summaryrefslogtreecommitdiff
path: root/secure/lib/libcrypto/amd64/chacha-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/amd64/chacha-x86_64.S')
-rw-r--r--secure/lib/libcrypto/amd64/chacha-x86_64.S1026
1 files changed, 0 insertions, 1026 deletions
diff --git a/secure/lib/libcrypto/amd64/chacha-x86_64.S b/secure/lib/libcrypto/amd64/chacha-x86_64.S
index b01c1b87d47b..0b3d5b8b6db4 100644
--- a/secure/lib/libcrypto/amd64/chacha-x86_64.S
+++ b/secure/lib/libcrypto/amd64/chacha-x86_64.S
@@ -331,8 +331,6 @@ ChaCha20_ssse3:
.LChaCha20_ssse3:
movq %rsp,%r9
.cfi_def_cfa_register %r9
- testl $2048,%r10d
- jnz .LChaCha20_4xop
cmpq $128,%rdx
je .LChaCha20_128
ja .LChaCha20_4x
@@ -628,9 +626,6 @@ ChaCha20_4x:
movq %rsp,%r9
.cfi_def_cfa_register %r9
movq %r10,%r11
- shrq $32,%r10
- testq $32,%r10
- jnz .LChaCha20_8x
cmpq $192,%rdx
ja .Lproceed4x
@@ -1172,1024 +1167,3 @@ ChaCha20_4x:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
-.type ChaCha20_4xop,@function
-.align 32
-ChaCha20_4xop:
-.cfi_startproc
-.LChaCha20_4xop:
- movq %rsp,%r9
-.cfi_def_cfa_register %r9
- subq $0x140+8,%rsp
- vzeroupper
-
- vmovdqa .Lsigma(%rip),%xmm11
- vmovdqu (%rcx),%xmm3
- vmovdqu 16(%rcx),%xmm15
- vmovdqu (%r8),%xmm7
- leaq 256(%rsp),%rcx
-
- vpshufd $0x00,%xmm11,%xmm8
- vpshufd $0x55,%xmm11,%xmm9
- vmovdqa %xmm8,64(%rsp)
- vpshufd $0xaa,%xmm11,%xmm10
- vmovdqa %xmm9,80(%rsp)
- vpshufd $0xff,%xmm11,%xmm11
- vmovdqa %xmm10,96(%rsp)
- vmovdqa %xmm11,112(%rsp)
-
- vpshufd $0x00,%xmm3,%xmm0
- vpshufd $0x55,%xmm3,%xmm1
- vmovdqa %xmm0,128-256(%rcx)
- vpshufd $0xaa,%xmm3,%xmm2
- vmovdqa %xmm1,144-256(%rcx)
- vpshufd $0xff,%xmm3,%xmm3
- vmovdqa %xmm2,160-256(%rcx)
- vmovdqa %xmm3,176-256(%rcx)
-
- vpshufd $0x00,%xmm15,%xmm12
- vpshufd $0x55,%xmm15,%xmm13
- vmovdqa %xmm12,192-256(%rcx)
- vpshufd $0xaa,%xmm15,%xmm14
- vmovdqa %xmm13,208-256(%rcx)
- vpshufd $0xff,%xmm15,%xmm15
- vmovdqa %xmm14,224-256(%rcx)
- vmovdqa %xmm15,240-256(%rcx)
-
- vpshufd $0x00,%xmm7,%xmm4
- vpshufd $0x55,%xmm7,%xmm5
- vpaddd .Linc(%rip),%xmm4,%xmm4
- vpshufd $0xaa,%xmm7,%xmm6
- vmovdqa %xmm5,272-256(%rcx)
- vpshufd $0xff,%xmm7,%xmm7
- vmovdqa %xmm6,288-256(%rcx)
- vmovdqa %xmm7,304-256(%rcx)
-
- jmp .Loop_enter4xop
-
-.align 32
-.Loop_outer4xop:
- vmovdqa 64(%rsp),%xmm8
- vmovdqa 80(%rsp),%xmm9
- vmovdqa 96(%rsp),%xmm10
- vmovdqa 112(%rsp),%xmm11
- vmovdqa 128-256(%rcx),%xmm0
- vmovdqa 144-256(%rcx),%xmm1
- vmovdqa 160-256(%rcx),%xmm2
- vmovdqa 176-256(%rcx),%xmm3
- vmovdqa 192-256(%rcx),%xmm12
- vmovdqa 208-256(%rcx),%xmm13
- vmovdqa 224-256(%rcx),%xmm14
- vmovdqa 240-256(%rcx),%xmm15
- vmovdqa 256-256(%rcx),%xmm4
- vmovdqa 272-256(%rcx),%xmm5
- vmovdqa 288-256(%rcx),%xmm6
- vmovdqa 304-256(%rcx),%xmm7
- vpaddd .Lfour(%rip),%xmm4,%xmm4
-
-.Loop_enter4xop:
- movl $10,%eax
- vmovdqa %xmm4,256-256(%rcx)
- jmp .Loop4xop
-
-.align 32
-.Loop4xop:
- vpaddd %xmm0,%xmm8,%xmm8
- vpaddd %xmm1,%xmm9,%xmm9
- vpaddd %xmm2,%xmm10,%xmm10
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor %xmm4,%xmm8,%xmm4
- vpxor %xmm5,%xmm9,%xmm5
- vpxor %xmm6,%xmm10,%xmm6
- vpxor %xmm7,%xmm11,%xmm7
-.byte 143,232,120,194,228,16
-.byte 143,232,120,194,237,16
-.byte 143,232,120,194,246,16
-.byte 143,232,120,194,255,16
- vpaddd %xmm4,%xmm12,%xmm12
- vpaddd %xmm5,%xmm13,%xmm13
- vpaddd %xmm6,%xmm14,%xmm14
- vpaddd %xmm7,%xmm15,%xmm15
- vpxor %xmm0,%xmm12,%xmm0
- vpxor %xmm1,%xmm13,%xmm1
- vpxor %xmm14,%xmm2,%xmm2
- vpxor %xmm15,%xmm3,%xmm3
-.byte 143,232,120,194,192,12
-.byte 143,232,120,194,201,12
-.byte 143,232,120,194,210,12
-.byte 143,232,120,194,219,12
- vpaddd %xmm8,%xmm0,%xmm8
- vpaddd %xmm9,%xmm1,%xmm9
- vpaddd %xmm2,%xmm10,%xmm10
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor %xmm4,%xmm8,%xmm4
- vpxor %xmm5,%xmm9,%xmm5
- vpxor %xmm6,%xmm10,%xmm6
- vpxor %xmm7,%xmm11,%xmm7
-.byte 143,232,120,194,228,8
-.byte 143,232,120,194,237,8
-.byte 143,232,120,194,246,8
-.byte 143,232,120,194,255,8
- vpaddd %xmm4,%xmm12,%xmm12
- vpaddd %xmm5,%xmm13,%xmm13
- vpaddd %xmm6,%xmm14,%xmm14
- vpaddd %xmm7,%xmm15,%xmm15
- vpxor %xmm0,%xmm12,%xmm0
- vpxor %xmm1,%xmm13,%xmm1
- vpxor %xmm14,%xmm2,%xmm2
- vpxor %xmm15,%xmm3,%xmm3
-.byte 143,232,120,194,192,7
-.byte 143,232,120,194,201,7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,219,7
- vpaddd %xmm1,%xmm8,%xmm8
- vpaddd %xmm2,%xmm9,%xmm9
- vpaddd %xmm3,%xmm10,%xmm10
- vpaddd %xmm0,%xmm11,%xmm11
- vpxor %xmm7,%xmm8,%xmm7
- vpxor %xmm4,%xmm9,%xmm4
- vpxor %xmm5,%xmm10,%xmm5
- vpxor %xmm6,%xmm11,%xmm6
-.byte 143,232,120,194,255,16
-.byte 143,232,120,194,228,16
-.byte 143,232,120,194,237,16
-.byte 143,232,120,194,246,16
- vpaddd %xmm7,%xmm14,%xmm14
- vpaddd %xmm4,%xmm15,%xmm15
- vpaddd %xmm5,%xmm12,%xmm12
- vpaddd %xmm6,%xmm13,%xmm13
- vpxor %xmm1,%xmm14,%xmm1
- vpxor %xmm2,%xmm15,%xmm2
- vpxor %xmm12,%xmm3,%xmm3
- vpxor %xmm13,%xmm0,%xmm0
-.byte 143,232,120,194,201,12
-.byte 143,232,120,194,210,12
-.byte 143,232,120,194,219,12
-.byte 143,232,120,194,192,12
- vpaddd %xmm8,%xmm1,%xmm8
- vpaddd %xmm9,%xmm2,%xmm9
- vpaddd %xmm3,%xmm10,%xmm10
- vpaddd %xmm0,%xmm11,%xmm11
- vpxor %xmm7,%xmm8,%xmm7
- vpxor %xmm4,%xmm9,%xmm4
- vpxor %xmm5,%xmm10,%xmm5
- vpxor %xmm6,%xmm11,%xmm6
-.byte 143,232,120,194,255,8
-.byte 143,232,120,194,228,8
-.byte 143,232,120,194,237,8
-.byte 143,232,120,194,246,8
- vpaddd %xmm7,%xmm14,%xmm14
- vpaddd %xmm4,%xmm15,%xmm15
- vpaddd %xmm5,%xmm12,%xmm12
- vpaddd %xmm6,%xmm13,%xmm13
- vpxor %xmm1,%xmm14,%xmm1
- vpxor %xmm2,%xmm15,%xmm2
- vpxor %xmm12,%xmm3,%xmm3
- vpxor %xmm13,%xmm0,%xmm0
-.byte 143,232,120,194,201,7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,219,7
-.byte 143,232,120,194,192,7
- decl %eax
- jnz .Loop4xop
-
- vpaddd 64(%rsp),%xmm8,%xmm8
- vpaddd 80(%rsp),%xmm9,%xmm9
- vpaddd 96(%rsp),%xmm10,%xmm10
- vpaddd 112(%rsp),%xmm11,%xmm11
-
- vmovdqa %xmm14,32(%rsp)
- vmovdqa %xmm15,48(%rsp)
-
- vpunpckldq %xmm9,%xmm8,%xmm14
- vpunpckldq %xmm11,%xmm10,%xmm15
- vpunpckhdq %xmm9,%xmm8,%xmm8
- vpunpckhdq %xmm11,%xmm10,%xmm10
- vpunpcklqdq %xmm15,%xmm14,%xmm9
- vpunpckhqdq %xmm15,%xmm14,%xmm14
- vpunpcklqdq %xmm10,%xmm8,%xmm11
- vpunpckhqdq %xmm10,%xmm8,%xmm8
- vpaddd 128-256(%rcx),%xmm0,%xmm0
- vpaddd 144-256(%rcx),%xmm1,%xmm1
- vpaddd 160-256(%rcx),%xmm2,%xmm2
- vpaddd 176-256(%rcx),%xmm3,%xmm3
-
- vmovdqa %xmm9,0(%rsp)
- vmovdqa %xmm14,16(%rsp)
- vmovdqa 32(%rsp),%xmm9
- vmovdqa 48(%rsp),%xmm14
-
- vpunpckldq %xmm1,%xmm0,%xmm10
- vpunpckldq %xmm3,%xmm2,%xmm15
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm15,%xmm10,%xmm1
- vpunpckhqdq %xmm15,%xmm10,%xmm10
- vpunpcklqdq %xmm2,%xmm0,%xmm3
- vpunpckhqdq %xmm2,%xmm0,%xmm0
- vpaddd 192-256(%rcx),%xmm12,%xmm12
- vpaddd 208-256(%rcx),%xmm13,%xmm13
- vpaddd 224-256(%rcx),%xmm9,%xmm9
- vpaddd 240-256(%rcx),%xmm14,%xmm14
-
- vpunpckldq %xmm13,%xmm12,%xmm2
- vpunpckldq %xmm14,%xmm9,%xmm15
- vpunpckhdq %xmm13,%xmm12,%xmm12
- vpunpckhdq %xmm14,%xmm9,%xmm9
- vpunpcklqdq %xmm15,%xmm2,%xmm13
- vpunpckhqdq %xmm15,%xmm2,%xmm2
- vpunpcklqdq %xmm9,%xmm12,%xmm14
- vpunpckhqdq %xmm9,%xmm12,%xmm12
- vpaddd 256-256(%rcx),%xmm4,%xmm4
- vpaddd 272-256(%rcx),%xmm5,%xmm5
- vpaddd 288-256(%rcx),%xmm6,%xmm6
- vpaddd 304-256(%rcx),%xmm7,%xmm7
-
- vpunpckldq %xmm5,%xmm4,%xmm9
- vpunpckldq %xmm7,%xmm6,%xmm15
- vpunpckhdq %xmm5,%xmm4,%xmm4
- vpunpckhdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm15,%xmm9,%xmm5
- vpunpckhqdq %xmm15,%xmm9,%xmm9
- vpunpcklqdq %xmm6,%xmm4,%xmm7
- vpunpckhqdq %xmm6,%xmm4,%xmm4
- vmovdqa 0(%rsp),%xmm6
- vmovdqa 16(%rsp),%xmm15
-
- cmpq $256,%rdx
- jb .Ltail4xop
-
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
- leaq 128(%rsi),%rsi
- vpxor 0(%rsi),%xmm11,%xmm11
- vpxor 16(%rsi),%xmm3,%xmm3
- vpxor 32(%rsi),%xmm14,%xmm14
- vpxor 48(%rsi),%xmm7,%xmm7
- vpxor 64(%rsi),%xmm8,%xmm8
- vpxor 80(%rsi),%xmm0,%xmm0
- vpxor 96(%rsi),%xmm12,%xmm12
- vpxor 112(%rsi),%xmm4,%xmm4
- leaq 128(%rsi),%rsi
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- leaq 128(%rdi),%rdi
- vmovdqu %xmm11,0(%rdi)
- vmovdqu %xmm3,16(%rdi)
- vmovdqu %xmm14,32(%rdi)
- vmovdqu %xmm7,48(%rdi)
- vmovdqu %xmm8,64(%rdi)
- vmovdqu %xmm0,80(%rdi)
- vmovdqu %xmm12,96(%rdi)
- vmovdqu %xmm4,112(%rdi)
- leaq 128(%rdi),%rdi
-
- subq $256,%rdx
- jnz .Loop_outer4xop
-
- jmp .Ldone4xop
-
-.align 32
-.Ltail4xop:
- cmpq $192,%rdx
- jae .L192_or_more4xop
- cmpq $128,%rdx
- jae .L128_or_more4xop
- cmpq $64,%rdx
- jae .L64_or_more4xop
-
- xorq %r10,%r10
- vmovdqa %xmm6,0(%rsp)
- vmovdqa %xmm1,16(%rsp)
- vmovdqa %xmm13,32(%rsp)
- vmovdqa %xmm5,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L64_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- je .Ldone4xop
-
- leaq 64(%rsi),%rsi
- vmovdqa %xmm15,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm10,16(%rsp)
- leaq 64(%rdi),%rdi
- vmovdqa %xmm2,32(%rsp)
- subq $64,%rdx
- vmovdqa %xmm9,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L128_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- je .Ldone4xop
-
- leaq 128(%rsi),%rsi
- vmovdqa %xmm11,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm3,16(%rsp)
- leaq 128(%rdi),%rdi
- vmovdqa %xmm14,32(%rsp)
- subq $128,%rdx
- vmovdqa %xmm7,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L192_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
- leaq 128(%rsi),%rsi
- vpxor 0(%rsi),%xmm11,%xmm11
- vpxor 16(%rsi),%xmm3,%xmm3
- vpxor 32(%rsi),%xmm14,%xmm14
- vpxor 48(%rsi),%xmm7,%xmm7
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- leaq 128(%rdi),%rdi
- vmovdqu %xmm11,0(%rdi)
- vmovdqu %xmm3,16(%rdi)
- vmovdqu %xmm14,32(%rdi)
- vmovdqu %xmm7,48(%rdi)
- je .Ldone4xop
-
- leaq 64(%rsi),%rsi
- vmovdqa %xmm8,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm0,16(%rsp)
- leaq 64(%rdi),%rdi
- vmovdqa %xmm12,32(%rsp)
- subq $192,%rdx
- vmovdqa %xmm4,48(%rsp)
-
-.Loop_tail4xop:
- movzbl (%rsi,%r10,1),%eax
- movzbl (%rsp,%r10,1),%ecx
- leaq 1(%r10),%r10
- xorl %ecx,%eax
- movb %al,-1(%rdi,%r10,1)
- decq %rdx
- jnz .Loop_tail4xop
-
-.Ldone4xop:
- vzeroupper
- leaq (%r9),%rsp
-.cfi_def_cfa_register %rsp
-.L4xop_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ChaCha20_4xop,.-ChaCha20_4xop
-.type ChaCha20_8x,@function
-.align 32
-ChaCha20_8x:
-.cfi_startproc
-.LChaCha20_8x:
- movq %rsp,%r9
-.cfi_def_cfa_register %r9
- subq $0x280+8,%rsp
- andq $-32,%rsp
- vzeroupper
-
-
-
-
-
-
-
-
-
-
- vbroadcasti128 .Lsigma(%rip),%ymm11
- vbroadcasti128 (%rcx),%ymm3
- vbroadcasti128 16(%rcx),%ymm15
- vbroadcasti128 (%r8),%ymm7
- leaq 256(%rsp),%rcx
- leaq 512(%rsp),%rax
- leaq .Lrot16(%rip),%r10
- leaq .Lrot24(%rip),%r11
-
- vpshufd $0x00,%ymm11,%ymm8
- vpshufd $0x55,%ymm11,%ymm9
- vmovdqa %ymm8,128-256(%rcx)
- vpshufd $0xaa,%ymm11,%ymm10
- vmovdqa %ymm9,160-256(%rcx)
- vpshufd $0xff,%ymm11,%ymm11
- vmovdqa %ymm10,192-256(%rcx)
- vmovdqa %ymm11,224-256(%rcx)
-
- vpshufd $0x00,%ymm3,%ymm0
- vpshufd $0x55,%ymm3,%ymm1
- vmovdqa %ymm0,256-256(%rcx)
- vpshufd $0xaa,%ymm3,%ymm2
- vmovdqa %ymm1,288-256(%rcx)
- vpshufd $0xff,%ymm3,%ymm3
- vmovdqa %ymm2,320-256(%rcx)
- vmovdqa %ymm3,352-256(%rcx)
-
- vpshufd $0x00,%ymm15,%ymm12
- vpshufd $0x55,%ymm15,%ymm13
- vmovdqa %ymm12,384-512(%rax)
- vpshufd $0xaa,%ymm15,%ymm14
- vmovdqa %ymm13,416-512(%rax)
- vpshufd $0xff,%ymm15,%ymm15
- vmovdqa %ymm14,448-512(%rax)
- vmovdqa %ymm15,480-512(%rax)
-
- vpshufd $0x00,%ymm7,%ymm4
- vpshufd $0x55,%ymm7,%ymm5
- vpaddd .Lincy(%rip),%ymm4,%ymm4
- vpshufd $0xaa,%ymm7,%ymm6
- vmovdqa %ymm5,544-512(%rax)
- vpshufd $0xff,%ymm7,%ymm7
- vmovdqa %ymm6,576-512(%rax)
- vmovdqa %ymm7,608-512(%rax)
-
- jmp .Loop_enter8x
-
-.align 32
-.Loop_outer8x:
- vmovdqa 128-256(%rcx),%ymm8
- vmovdqa 160-256(%rcx),%ymm9
- vmovdqa 192-256(%rcx),%ymm10
- vmovdqa 224-256(%rcx),%ymm11
- vmovdqa 256-256(%rcx),%ymm0
- vmovdqa 288-256(%rcx),%ymm1
- vmovdqa 320-256(%rcx),%ymm2
- vmovdqa 352-256(%rcx),%ymm3
- vmovdqa 384-512(%rax),%ymm12
- vmovdqa 416-512(%rax),%ymm13
- vmovdqa 448-512(%rax),%ymm14
- vmovdqa 480-512(%rax),%ymm15
- vmovdqa 512-512(%rax),%ymm4
- vmovdqa 544-512(%rax),%ymm5
- vmovdqa 576-512(%rax),%ymm6
- vmovdqa 608-512(%rax),%ymm7
- vpaddd .Leight(%rip),%ymm4,%ymm4
-
-.Loop_enter8x:
- vmovdqa %ymm14,64(%rsp)
- vmovdqa %ymm15,96(%rsp)
- vbroadcasti128 (%r10),%ymm15
- vmovdqa %ymm4,512-512(%rax)
- movl $10,%eax
- jmp .Loop8x
-
-.align 32
-.Loop8x:
- vpaddd %ymm0,%ymm8,%ymm8
- vpxor %ymm4,%ymm8,%ymm4
- vpshufb %ymm15,%ymm4,%ymm4
- vpaddd %ymm1,%ymm9,%ymm9
- vpxor %ymm5,%ymm9,%ymm5
- vpshufb %ymm15,%ymm5,%ymm5
- vpaddd %ymm4,%ymm12,%ymm12
- vpxor %ymm0,%ymm12,%ymm0
- vpslld $12,%ymm0,%ymm14
- vpsrld $20,%ymm0,%ymm0
- vpor %ymm0,%ymm14,%ymm0
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm5,%ymm13,%ymm13
- vpxor %ymm1,%ymm13,%ymm1
- vpslld $12,%ymm1,%ymm15
- vpsrld $20,%ymm1,%ymm1
- vpor %ymm1,%ymm15,%ymm1
- vpaddd %ymm0,%ymm8,%ymm8
- vpxor %ymm4,%ymm8,%ymm4
- vpshufb %ymm14,%ymm4,%ymm4
- vpaddd %ymm1,%ymm9,%ymm9
- vpxor %ymm5,%ymm9,%ymm5
- vpshufb %ymm14,%ymm5,%ymm5
- vpaddd %ymm4,%ymm12,%ymm12
- vpxor %ymm0,%ymm12,%ymm0
- vpslld $7,%ymm0,%ymm15
- vpsrld $25,%ymm0,%ymm0
- vpor %ymm0,%ymm15,%ymm0
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm5,%ymm13,%ymm13
- vpxor %ymm1,%ymm13,%ymm1
- vpslld $7,%ymm1,%ymm14
- vpsrld $25,%ymm1,%ymm1
- vpor %ymm1,%ymm14,%ymm1
- vmovdqa %ymm12,0(%rsp)
- vmovdqa %ymm13,32(%rsp)
- vmovdqa 64(%rsp),%ymm12
- vmovdqa 96(%rsp),%ymm13
- vpaddd %ymm2,%ymm10,%ymm10
- vpxor %ymm6,%ymm10,%ymm6
- vpshufb %ymm15,%ymm6,%ymm6
- vpaddd %ymm3,%ymm11,%ymm11
- vpxor %ymm7,%ymm11,%ymm7
- vpshufb %ymm15,%ymm7,%ymm7
- vpaddd %ymm6,%ymm12,%ymm12
- vpxor %ymm2,%ymm12,%ymm2
- vpslld $12,%ymm2,%ymm14
- vpsrld $20,%ymm2,%ymm2
- vpor %ymm2,%ymm14,%ymm2
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm7,%ymm13,%ymm13
- vpxor %ymm3,%ymm13,%ymm3
- vpslld $12,%ymm3,%ymm15
- vpsrld $20,%ymm3,%ymm3
- vpor %ymm3,%ymm15,%ymm3
- vpaddd %ymm2,%ymm10,%ymm10
- vpxor %ymm6,%ymm10,%ymm6
- vpshufb %ymm14,%ymm6,%ymm6
- vpaddd %ymm3,%ymm11,%ymm11
- vpxor %ymm7,%ymm11,%ymm7
- vpshufb %ymm14,%ymm7,%ymm7
- vpaddd %ymm6,%ymm12,%ymm12
- vpxor %ymm2,%ymm12,%ymm2
- vpslld $7,%ymm2,%ymm15
- vpsrld $25,%ymm2,%ymm2
- vpor %ymm2,%ymm15,%ymm2
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm7,%ymm13,%ymm13
- vpxor %ymm3,%ymm13,%ymm3
- vpslld $7,%ymm3,%ymm14
- vpsrld $25,%ymm3,%ymm3
- vpor %ymm3,%ymm14,%ymm3
- vpaddd %ymm1,%ymm8,%ymm8
- vpxor %ymm7,%ymm8,%ymm7
- vpshufb %ymm15,%ymm7,%ymm7
- vpaddd %ymm2,%ymm9,%ymm9
- vpxor %ymm4,%ymm9,%ymm4
- vpshufb %ymm15,%ymm4,%ymm4
- vpaddd %ymm7,%ymm12,%ymm12
- vpxor %ymm1,%ymm12,%ymm1
- vpslld $12,%ymm1,%ymm14
- vpsrld $20,%ymm1,%ymm1
- vpor %ymm1,%ymm14,%ymm1
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm4,%ymm13,%ymm13
- vpxor %ymm2,%ymm13,%ymm2
- vpslld $12,%ymm2,%ymm15
- vpsrld $20,%ymm2,%ymm2
- vpor %ymm2,%ymm15,%ymm2
- vpaddd %ymm1,%ymm8,%ymm8
- vpxor %ymm7,%ymm8,%ymm7
- vpshufb %ymm14,%ymm7,%ymm7
- vpaddd %ymm2,%ymm9,%ymm9
- vpxor %ymm4,%ymm9,%ymm4
- vpshufb %ymm14,%ymm4,%ymm4
- vpaddd %ymm7,%ymm12,%ymm12
- vpxor %ymm1,%ymm12,%ymm1
- vpslld $7,%ymm1,%ymm15
- vpsrld $25,%ymm1,%ymm1
- vpor %ymm1,%ymm15,%ymm1
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm4,%ymm13,%ymm13
- vpxor %ymm2,%ymm13,%ymm2
- vpslld $7,%ymm2,%ymm14
- vpsrld $25,%ymm2,%ymm2
- vpor %ymm2,%ymm14,%ymm2
- vmovdqa %ymm12,64(%rsp)
- vmovdqa %ymm13,96(%rsp)
- vmovdqa 0(%rsp),%ymm12
- vmovdqa 32(%rsp),%ymm13
- vpaddd %ymm3,%ymm10,%ymm10
- vpxor %ymm5,%ymm10,%ymm5
- vpshufb %ymm15,%ymm5,%ymm5
- vpaddd %ymm0,%ymm11,%ymm11
- vpxor %ymm6,%ymm11,%ymm6
- vpshufb %ymm15,%ymm6,%ymm6
- vpaddd %ymm5,%ymm12,%ymm12
- vpxor %ymm3,%ymm12,%ymm3
- vpslld $12,%ymm3,%ymm14
- vpsrld $20,%ymm3,%ymm3
- vpor %ymm3,%ymm14,%ymm3
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm6,%ymm13,%ymm13
- vpxor %ymm0,%ymm13,%ymm0
- vpslld $12,%ymm0,%ymm15
- vpsrld $20,%ymm0,%ymm0
- vpor %ymm0,%ymm15,%ymm0
- vpaddd %ymm3,%ymm10,%ymm10
- vpxor %ymm5,%ymm10,%ymm5
- vpshufb %ymm14,%ymm5,%ymm5
- vpaddd %ymm0,%ymm11,%ymm11
- vpxor %ymm6,%ymm11,%ymm6
- vpshufb %ymm14,%ymm6,%ymm6
- vpaddd %ymm5,%ymm12,%ymm12
- vpxor %ymm3,%ymm12,%ymm3
- vpslld $7,%ymm3,%ymm15
- vpsrld $25,%ymm3,%ymm3
- vpor %ymm3,%ymm15,%ymm3
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm6,%ymm13,%ymm13
- vpxor %ymm0,%ymm13,%ymm0
- vpslld $7,%ymm0,%ymm14
- vpsrld $25,%ymm0,%ymm0
- vpor %ymm0,%ymm14,%ymm0
- decl %eax
- jnz .Loop8x
-
- leaq 512(%rsp),%rax
- vpaddd 128-256(%rcx),%ymm8,%ymm8
- vpaddd 160-256(%rcx),%ymm9,%ymm9
- vpaddd 192-256(%rcx),%ymm10,%ymm10
- vpaddd 224-256(%rcx),%ymm11,%ymm11
-
- vpunpckldq %ymm9,%ymm8,%ymm14
- vpunpckldq %ymm11,%ymm10,%ymm15
- vpunpckhdq %ymm9,%ymm8,%ymm8
- vpunpckhdq %ymm11,%ymm10,%ymm10
- vpunpcklqdq %ymm15,%ymm14,%ymm9
- vpunpckhqdq %ymm15,%ymm14,%ymm14
- vpunpcklqdq %ymm10,%ymm8,%ymm11
- vpunpckhqdq %ymm10,%ymm8,%ymm8
- vpaddd 256-256(%rcx),%ymm0,%ymm0
- vpaddd 288-256(%rcx),%ymm1,%ymm1
- vpaddd 320-256(%rcx),%ymm2,%ymm2
- vpaddd 352-256(%rcx),%ymm3,%ymm3
-
- vpunpckldq %ymm1,%ymm0,%ymm10
- vpunpckldq %ymm3,%ymm2,%ymm15
- vpunpckhdq %ymm1,%ymm0,%ymm0
- vpunpckhdq %ymm3,%ymm2,%ymm2
- vpunpcklqdq %ymm15,%ymm10,%ymm1
- vpunpckhqdq %ymm15,%ymm10,%ymm10
- vpunpcklqdq %ymm2,%ymm0,%ymm3
- vpunpckhqdq %ymm2,%ymm0,%ymm0
- vperm2i128 $0x20,%ymm1,%ymm9,%ymm15
- vperm2i128 $0x31,%ymm1,%ymm9,%ymm1
- vperm2i128 $0x20,%ymm10,%ymm14,%ymm9
- vperm2i128 $0x31,%ymm10,%ymm14,%ymm10
- vperm2i128 $0x20,%ymm3,%ymm11,%ymm14
- vperm2i128 $0x31,%ymm3,%ymm11,%ymm3
- vperm2i128 $0x20,%ymm0,%ymm8,%ymm11
- vperm2i128 $0x31,%ymm0,%ymm8,%ymm0
- vmovdqa %ymm15,0(%rsp)
- vmovdqa %ymm9,32(%rsp)
- vmovdqa 64(%rsp),%ymm15
- vmovdqa 96(%rsp),%ymm9
-
- vpaddd 384-512(%rax),%ymm12,%ymm12
- vpaddd 416-512(%rax),%ymm13,%ymm13
- vpaddd 448-512(%rax),%ymm15,%ymm15
- vpaddd 480-512(%rax),%ymm9,%ymm9
-
- vpunpckldq %ymm13,%ymm12,%ymm2
- vpunpckldq %ymm9,%ymm15,%ymm8
- vpunpckhdq %ymm13,%ymm12,%ymm12
- vpunpckhdq %ymm9,%ymm15,%ymm15
- vpunpcklqdq %ymm8,%ymm2,%ymm13
- vpunpckhqdq %ymm8,%ymm2,%ymm2
- vpunpcklqdq %ymm15,%ymm12,%ymm9
- vpunpckhqdq %ymm15,%ymm12,%ymm12
- vpaddd 512-512(%rax),%ymm4,%ymm4
- vpaddd 544-512(%rax),%ymm5,%ymm5
- vpaddd 576-512(%rax),%ymm6,%ymm6
- vpaddd 608-512(%rax),%ymm7,%ymm7
-
- vpunpckldq %ymm5,%ymm4,%ymm15
- vpunpckldq %ymm7,%ymm6,%ymm8
- vpunpckhdq %ymm5,%ymm4,%ymm4
- vpunpckhdq %ymm7,%ymm6,%ymm6
- vpunpcklqdq %ymm8,%ymm15,%ymm5
- vpunpckhqdq %ymm8,%ymm15,%ymm15
- vpunpcklqdq %ymm6,%ymm4,%ymm7
- vpunpckhqdq %ymm6,%ymm4,%ymm4
- vperm2i128 $0x20,%ymm5,%ymm13,%ymm8
- vperm2i128 $0x31,%ymm5,%ymm13,%ymm5
- vperm2i128 $0x20,%ymm15,%ymm2,%ymm13
- vperm2i128 $0x31,%ymm15,%ymm2,%ymm15
- vperm2i128 $0x20,%ymm7,%ymm9,%ymm2
- vperm2i128 $0x31,%ymm7,%ymm9,%ymm7
- vperm2i128 $0x20,%ymm4,%ymm12,%ymm9
- vperm2i128 $0x31,%ymm4,%ymm12,%ymm4
- vmovdqa 0(%rsp),%ymm6
- vmovdqa 32(%rsp),%ymm12
-
- cmpq $512,%rdx
- jb .Ltail8x
-
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- leaq 128(%rsi),%rsi
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm12,%ymm12
- vpxor 32(%rsi),%ymm13,%ymm13
- vpxor 64(%rsi),%ymm10,%ymm10
- vpxor 96(%rsi),%ymm15,%ymm15
- leaq 128(%rsi),%rsi
- vmovdqu %ymm12,0(%rdi)
- vmovdqu %ymm13,32(%rdi)
- vmovdqu %ymm10,64(%rdi)
- vmovdqu %ymm15,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm14,%ymm14
- vpxor 32(%rsi),%ymm2,%ymm2
- vpxor 64(%rsi),%ymm3,%ymm3
- vpxor 96(%rsi),%ymm7,%ymm7
- leaq 128(%rsi),%rsi
- vmovdqu %ymm14,0(%rdi)
- vmovdqu %ymm2,32(%rdi)
- vmovdqu %ymm3,64(%rdi)
- vmovdqu %ymm7,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm11,%ymm11
- vpxor 32(%rsi),%ymm9,%ymm9
- vpxor 64(%rsi),%ymm0,%ymm0
- vpxor 96(%rsi),%ymm4,%ymm4
- leaq 128(%rsi),%rsi
- vmovdqu %ymm11,0(%rdi)
- vmovdqu %ymm9,32(%rdi)
- vmovdqu %ymm0,64(%rdi)
- vmovdqu %ymm4,96(%rdi)
- leaq 128(%rdi),%rdi
-
- subq $512,%rdx
- jnz .Loop_outer8x
-
- jmp .Ldone8x
-
-.Ltail8x:
- cmpq $448,%rdx
- jae .L448_or_more8x
- cmpq $384,%rdx
- jae .L384_or_more8x
- cmpq $320,%rdx
- jae .L320_or_more8x
- cmpq $256,%rdx
- jae .L256_or_more8x
- cmpq $192,%rdx
- jae .L192_or_more8x
- cmpq $128,%rdx
- jae .L128_or_more8x
- cmpq $64,%rdx
- jae .L64_or_more8x
-
- xorq %r10,%r10
- vmovdqa %ymm6,0(%rsp)
- vmovdqa %ymm8,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L64_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- je .Ldone8x
-
- leaq 64(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm1,0(%rsp)
- leaq 64(%rdi),%rdi
- subq $64,%rdx
- vmovdqa %ymm5,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L128_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- je .Ldone8x
-
- leaq 128(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm12,0(%rsp)
- leaq 128(%rdi),%rdi
- subq $128,%rdx
- vmovdqa %ymm13,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L192_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- je .Ldone8x
-
- leaq 192(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm10,0(%rsp)
- leaq 192(%rdi),%rdi
- subq $192,%rdx
- vmovdqa %ymm15,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L256_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- je .Ldone8x
-
- leaq 256(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm14,0(%rsp)
- leaq 256(%rdi),%rdi
- subq $256,%rdx
- vmovdqa %ymm2,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L320_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- je .Ldone8x
-
- leaq 320(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm3,0(%rsp)
- leaq 320(%rdi),%rdi
- subq $320,%rdx
- vmovdqa %ymm7,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L384_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vpxor 320(%rsi),%ymm3,%ymm3
- vpxor 352(%rsi),%ymm7,%ymm7
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- vmovdqu %ymm3,320(%rdi)
- vmovdqu %ymm7,352(%rdi)
- je .Ldone8x
-
- leaq 384(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm11,0(%rsp)
- leaq 384(%rdi),%rdi
- subq $384,%rdx
- vmovdqa %ymm9,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L448_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vpxor 320(%rsi),%ymm3,%ymm3
- vpxor 352(%rsi),%ymm7,%ymm7
- vpxor 384(%rsi),%ymm11,%ymm11
- vpxor 416(%rsi),%ymm9,%ymm9
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- vmovdqu %ymm3,320(%rdi)
- vmovdqu %ymm7,352(%rdi)
- vmovdqu %ymm11,384(%rdi)
- vmovdqu %ymm9,416(%rdi)
- je .Ldone8x
-
- leaq 448(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm0,0(%rsp)
- leaq 448(%rdi),%rdi
- subq $448,%rdx
- vmovdqa %ymm4,32(%rsp)
-
-.Loop_tail8x:
- movzbl (%rsi,%r10,1),%eax
- movzbl (%rsp,%r10,1),%ecx
- leaq 1(%r10),%r10
- xorl %ecx,%eax
- movb %al,-1(%rdi,%r10,1)
- decq %rdx
- jnz .Loop_tail8x
-
-.Ldone8x:
- vzeroall
- leaq (%r9),%rsp
-.cfi_def_cfa_register %rsp
-.L8x_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ChaCha20_8x,.-ChaCha20_8x