summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/vector-shift-ashr-512.ll
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-14 15:37:50 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-14 15:37:50 +0000
commit581a6d8501ff5614297da837b81ed3b6956361ea (patch)
tree985ee91d0ca1d3e6506ac5ff7e37f5b67adfec09 /test/CodeGen/X86/vector-shift-ashr-512.ll
parent909545a822eef491158f831688066f0ec2866938 (diff)
downloadsrc-test-581a6d8501ff5614297da837b81ed3b6956361ea.tar.gz
src-test-581a6d8501ff5614297da837b81ed3b6956361ea.zip
Notes
Diffstat (limited to 'test/CodeGen/X86/vector-shift-ashr-512.ll')
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-512.ll1123
1 files changed, 91 insertions, 1032 deletions
diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll
index 2c9e433cfb2ca..6cc98b5f3eeb1 100644
--- a/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
;
; Variable Shifts
;
@@ -99,399 +100,36 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
;
; AVX512BW-LABEL: var_shift_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm4
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm5
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4
-; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %sil
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm5
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
-; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %sil
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: movzbl %sil, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm4
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3
+; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = ashr <64 x i8> %a, %b
ret <64 x i8> %shift
@@ -590,399 +228,36 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512BW-LABEL: splatvar_shift_v64i8:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm4
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4
-; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm5
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4
-; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi
-; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %sil
-; AVX512BW-NEXT: movzbl %dl, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm5
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %sil, %eax
-; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
-; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
-; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %sil
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: movzbl %sil, %ecx
-; AVX512BW-NEXT: vmovd %ecx, %xmm4
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %dl
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: movzbl %dl, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
-; AVX512BW-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
-; AVX512BW-NEXT: sarb %cl, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3
+; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
%shift = ashr <64 x i8> %a, %splat
@@ -1080,252 +355,36 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
;
; AVX512BW-LABEL: constant_shift_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm2
-; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
-; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512BW-NEXT: vmovd %eax, %xmm3
-; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
-; AVX512BW-NEXT: sarb $7, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
-; AVX512BW-NEXT: sarb $6, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
-; AVX512BW-NEXT: sarb $5, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
-; AVX512BW-NEXT: sarb $4, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
-; AVX512BW-NEXT: sarb $3, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
-; AVX512BW-NEXT: sarb $2, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
-; AVX512BW-NEXT: sarb %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm3
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63]
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm1, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
+; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm3[0],zmm0[1],zmm3[1],zmm0[2],zmm3[2],zmm0[3],zmm3[3],zmm0[4],zmm3[4],zmm0[5],zmm3[5],zmm0[6],zmm3[6],zmm0[7],zmm3[7],zmm0[16],zmm3[16],zmm0[17],zmm3[17],zmm0[18],zmm3[18],zmm0[19],zmm3[19],zmm0[20],zmm3[20],zmm0[21],zmm3[21],zmm0[22],zmm3[22],zmm0[23],zmm3[23],zmm0[32],zmm3[32],zmm0[33],zmm3[33],zmm0[34],zmm3[34],zmm0[35],zmm3[35],zmm0[36],zmm3[36],zmm0[37],zmm3[37],zmm0[38],zmm3[38],zmm0[39],zmm3[39],zmm0[48],zmm3[48],zmm0[49],zmm3[49],zmm0[50],zmm3[50],zmm0[51],zmm3[51],zmm0[52],zmm3[52],zmm0[53],zmm3[53],zmm0[54],zmm3[54],zmm0[55],zmm3[55]
+; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3
+; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3
+; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
+; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
+; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <64 x i8> %shift