aboutsummaryrefslogtreecommitdiff
path: root/secure/lib/libcrypto/i386
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2020-03-18 02:13:12 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2020-03-18 02:13:12 +0000
commit17f01e9963948a18f55eb97173123702c5dae671 (patch)
treebc68d611f898931c657418447120d2c674c1ff38 /secure/lib/libcrypto/i386
parent889d304bb46d7551805fd8e79815a50a4cddda6b (diff)
parentaa144ced5d61b5c7fb74acaebb37d85bd08f0416 (diff)
Notes
Diffstat (limited to 'secure/lib/libcrypto/i386')
-rw-r--r--secure/lib/libcrypto/i386/chacha-x86.S960
-rw-r--r--secure/lib/libcrypto/i386/ecp_nistz256-x86.S36
-rw-r--r--secure/lib/libcrypto/i386/poly1305-x86.S1110
-rw-r--r--secure/lib/libcrypto/i386/sha1-586.S2350
-rw-r--r--secure/lib/libcrypto/i386/sha256-586.S4496
5 files changed, 56 insertions, 8896 deletions
diff --git a/secure/lib/libcrypto/i386/chacha-x86.S b/secure/lib/libcrypto/i386/chacha-x86.S
index d6b2936a5381..566285310e06 100644
--- a/secure/lib/libcrypto/i386/chacha-x86.S
+++ b/secure/lib/libcrypto/i386/chacha-x86.S
@@ -385,8 +385,6 @@ ChaCha20_ssse3:
pushl %esi
pushl %edi
.Lssse3_shortcut:
- testl $2048,4(%ebp)
- jnz .Lxop_shortcut
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
@@ -530,484 +528,6 @@ ChaCha20_ssse3:
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
-.globl ChaCha20_xop
-.type ChaCha20_xop,@function
-.align 16
-ChaCha20_xop:
-.L_ChaCha20_xop_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
-.Lxop_shortcut:
- movl 20(%esp),%edi
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
- movl 32(%esp),%edx
- movl 36(%esp),%ebx
- vzeroupper
- movl %esp,%ebp
- subl $524,%esp
- andl $-64,%esp
- movl %ebp,512(%esp)
- leal .Lssse3_data-.Lpic_point(%eax),%eax
- vmovdqu (%ebx),%xmm3
- cmpl $256,%ecx
- jb .L0141x
- movl %edx,516(%esp)
- movl %ebx,520(%esp)
- subl $256,%ecx
- leal 384(%esp),%ebp
- vmovdqu (%edx),%xmm7
- vpshufd $0,%xmm3,%xmm0
- vpshufd $85,%xmm3,%xmm1
- vpshufd $170,%xmm3,%xmm2
- vpshufd $255,%xmm3,%xmm3
- vpaddd 48(%eax),%xmm0,%xmm0
- vpshufd $0,%xmm7,%xmm4
- vpshufd $85,%xmm7,%xmm5
- vpsubd 64(%eax),%xmm0,%xmm0
- vpshufd $170,%xmm7,%xmm6
- vpshufd $255,%xmm7,%xmm7
- vmovdqa %xmm0,64(%ebp)
- vmovdqa %xmm1,80(%ebp)
- vmovdqa %xmm2,96(%ebp)
- vmovdqa %xmm3,112(%ebp)
- vmovdqu 16(%edx),%xmm3
- vmovdqa %xmm4,-64(%ebp)
- vmovdqa %xmm5,-48(%ebp)
- vmovdqa %xmm6,-32(%ebp)
- vmovdqa %xmm7,-16(%ebp)
- vmovdqa 32(%eax),%xmm7
- leal 128(%esp),%ebx
- vpshufd $0,%xmm3,%xmm0
- vpshufd $85,%xmm3,%xmm1
- vpshufd $170,%xmm3,%xmm2
- vpshufd $255,%xmm3,%xmm3
- vpshufd $0,%xmm7,%xmm4
- vpshufd $85,%xmm7,%xmm5
- vpshufd $170,%xmm7,%xmm6
- vpshufd $255,%xmm7,%xmm7
- vmovdqa %xmm0,(%ebp)
- vmovdqa %xmm1,16(%ebp)
- vmovdqa %xmm2,32(%ebp)
- vmovdqa %xmm3,48(%ebp)
- vmovdqa %xmm4,-128(%ebp)
- vmovdqa %xmm5,-112(%ebp)
- vmovdqa %xmm6,-96(%ebp)
- vmovdqa %xmm7,-80(%ebp)
- leal 128(%esi),%esi
- leal 128(%edi),%edi
- jmp .L015outer_loop
-.align 32
-.L015outer_loop:
- vmovdqa -112(%ebp),%xmm1
- vmovdqa -96(%ebp),%xmm2
- vmovdqa -80(%ebp),%xmm3
- vmovdqa -48(%ebp),%xmm5
- vmovdqa -32(%ebp),%xmm6
- vmovdqa -16(%ebp),%xmm7
- vmovdqa %xmm1,-112(%ebx)
- vmovdqa %xmm2,-96(%ebx)
- vmovdqa %xmm3,-80(%ebx)
- vmovdqa %xmm5,-48(%ebx)
- vmovdqa %xmm6,-32(%ebx)
- vmovdqa %xmm7,-16(%ebx)
- vmovdqa 32(%ebp),%xmm2
- vmovdqa 48(%ebp),%xmm3
- vmovdqa 64(%ebp),%xmm4
- vmovdqa 80(%ebp),%xmm5
- vmovdqa 96(%ebp),%xmm6
- vmovdqa 112(%ebp),%xmm7
- vpaddd 64(%eax),%xmm4,%xmm4
- vmovdqa %xmm2,32(%ebx)
- vmovdqa %xmm3,48(%ebx)
- vmovdqa %xmm4,64(%ebx)
- vmovdqa %xmm5,80(%ebx)
- vmovdqa %xmm6,96(%ebx)
- vmovdqa %xmm7,112(%ebx)
- vmovdqa %xmm4,64(%ebp)
- vmovdqa -128(%ebp),%xmm0
- vmovdqa %xmm4,%xmm6
- vmovdqa -64(%ebp),%xmm3
- vmovdqa (%ebp),%xmm4
- vmovdqa 16(%ebp),%xmm5
- movl $10,%edx
- nop
-.align 32
-.L016loop:
- vpaddd %xmm3,%xmm0,%xmm0
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,246,16
- vpaddd %xmm6,%xmm4,%xmm4
- vpxor %xmm4,%xmm3,%xmm2
- vmovdqa -112(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -48(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 80(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-128(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,64(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
- vmovdqa %xmm4,(%ebx)
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-64(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa 32(%ebx),%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -96(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -32(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 96(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-112(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,80(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,219,7
- vmovdqa %xmm5,16(%ebx)
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-48(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa 48(%ebx),%xmm5
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -80(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -16(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 112(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-96(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,96(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-32(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -128(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -48(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-80(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm7,%xmm6
-.byte 143,232,120,194,219,7
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-16(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -112(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -32(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 64(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-128(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,112(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
- vmovdqa %xmm4,32(%ebx)
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-48(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa (%ebx),%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -96(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -16(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 80(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-112(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,64(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,219,7
- vmovdqa %xmm5,48(%ebx)
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-32(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa 16(%ebx),%xmm5
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -80(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -64(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 96(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-96(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,80(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-16(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -128(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 64(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-80(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,96(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
-.byte 143,232,120,194,219,7
- decl %edx
- jnz .L016loop
- vmovdqa %xmm3,-64(%ebx)
- vmovdqa %xmm4,(%ebx)
- vmovdqa %xmm5,16(%ebx)
- vmovdqa %xmm6,64(%ebx)
- vmovdqa %xmm7,96(%ebx)
- vmovdqa -112(%ebx),%xmm1
- vmovdqa -96(%ebx),%xmm2
- vmovdqa -80(%ebx),%xmm3
- vpaddd -128(%ebp),%xmm0,%xmm0
- vpaddd -112(%ebp),%xmm1,%xmm1
- vpaddd -96(%ebp),%xmm2,%xmm2
- vpaddd -80(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa -64(%ebx),%xmm0
- vmovdqa -48(%ebx),%xmm1
- vmovdqa -32(%ebx),%xmm2
- vmovdqa -16(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd -64(%ebp),%xmm0,%xmm0
- vpaddd -48(%ebp),%xmm1,%xmm1
- vpaddd -32(%ebp),%xmm2,%xmm2
- vpaddd -16(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa (%ebx),%xmm0
- vmovdqa 16(%ebx),%xmm1
- vmovdqa 32(%ebx),%xmm2
- vmovdqa 48(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd (%ebp),%xmm0,%xmm0
- vpaddd 16(%ebp),%xmm1,%xmm1
- vpaddd 32(%ebp),%xmm2,%xmm2
- vpaddd 48(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa 64(%ebx),%xmm0
- vmovdqa 80(%ebx),%xmm1
- vmovdqa 96(%ebx),%xmm2
- vmovdqa 112(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd 64(%ebp),%xmm0,%xmm0
- vpaddd 80(%ebp),%xmm1,%xmm1
- vpaddd 96(%ebp),%xmm2,%xmm2
- vpaddd 112(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 208(%esi),%esi
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 208(%edi),%edi
- subl $256,%ecx
- jnc .L015outer_loop
- addl $256,%ecx
- jz .L017done
- movl 520(%esp),%ebx
- leal -128(%esi),%esi
- movl 516(%esp),%edx
- leal -128(%edi),%edi
- vmovd 64(%ebp),%xmm2
- vmovdqu (%ebx),%xmm3
- vpaddd 96(%eax),%xmm2,%xmm2
- vpand 112(%eax),%xmm3,%xmm3
- vpor %xmm2,%xmm3,%xmm3
-.L0141x:
- vmovdqa 32(%eax),%xmm0
- vmovdqu (%edx),%xmm1
- vmovdqu 16(%edx),%xmm2
- vmovdqa (%eax),%xmm6
- vmovdqa 16(%eax),%xmm7
- movl %ebp,48(%esp)
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- movl $10,%edx
- jmp .L018loop1x
-.align 16
-.L019outer1x:
- vmovdqa 80(%eax),%xmm3
- vmovdqa (%esp),%xmm0
- vmovdqa 16(%esp),%xmm1
- vmovdqa 32(%esp),%xmm2
- vpaddd 48(%esp),%xmm3,%xmm3
- movl $10,%edx
- vmovdqa %xmm3,48(%esp)
- jmp .L018loop1x
-.align 16
-.L018loop1x:
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,16
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,12
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,8
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,7
- vpshufd $78,%xmm2,%xmm2
- vpshufd $57,%xmm1,%xmm1
- vpshufd $147,%xmm3,%xmm3
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,16
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,12
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,8
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,7
- vpshufd $78,%xmm2,%xmm2
- vpshufd $147,%xmm1,%xmm1
- vpshufd $57,%xmm3,%xmm3
- decl %edx
- jnz .L018loop1x
- vpaddd (%esp),%xmm0,%xmm0
- vpaddd 16(%esp),%xmm1,%xmm1
- vpaddd 32(%esp),%xmm2,%xmm2
- vpaddd 48(%esp),%xmm3,%xmm3
- cmpl $64,%ecx
- jb .L020tail
- vpxor (%esi),%xmm0,%xmm0
- vpxor 16(%esi),%xmm1,%xmm1
- vpxor 32(%esi),%xmm2,%xmm2
- vpxor 48(%esi),%xmm3,%xmm3
- leal 64(%esi),%esi
- vmovdqu %xmm0,(%edi)
- vmovdqu %xmm1,16(%edi)
- vmovdqu %xmm2,32(%edi)
- vmovdqu %xmm3,48(%edi)
- leal 64(%edi),%edi
- subl $64,%ecx
- jnz .L019outer1x
- jmp .L017done
-.L020tail:
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- xorl %eax,%eax
- xorl %edx,%edx
- xorl %ebp,%ebp
-.L021tail_loop:
- movb (%esp,%ebp,1),%al
- movb (%esi,%ebp,1),%dl
- leal 1(%ebp),%ebp
- xorb %dl,%al
- movb %al,-1(%edi,%ebp,1)
- decl %ecx
- jnz .L021tail_loop
-.L017done:
- vzeroupper
- movl 512(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
#else
.text
@@ -1394,8 +914,6 @@ ChaCha20_ssse3:
pushl %esi
pushl %edi
.Lssse3_shortcut:
- testl $2048,4(%ebp)
- jnz .Lxop_shortcut
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
@@ -1539,483 +1057,5 @@ ChaCha20_ssse3:
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
-.globl ChaCha20_xop
-.type ChaCha20_xop,@function
-.align 16
-ChaCha20_xop:
-.L_ChaCha20_xop_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
-.Lxop_shortcut:
- movl 20(%esp),%edi
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
- movl 32(%esp),%edx
- movl 36(%esp),%ebx
- vzeroupper
- movl %esp,%ebp
- subl $524,%esp
- andl $-64,%esp
- movl %ebp,512(%esp)
- leal .Lssse3_data-.Lpic_point(%eax),%eax
- vmovdqu (%ebx),%xmm3
- cmpl $256,%ecx
- jb .L0141x
- movl %edx,516(%esp)
- movl %ebx,520(%esp)
- subl $256,%ecx
- leal 384(%esp),%ebp
- vmovdqu (%edx),%xmm7
- vpshufd $0,%xmm3,%xmm0
- vpshufd $85,%xmm3,%xmm1
- vpshufd $170,%xmm3,%xmm2
- vpshufd $255,%xmm3,%xmm3
- vpaddd 48(%eax),%xmm0,%xmm0
- vpshufd $0,%xmm7,%xmm4
- vpshufd $85,%xmm7,%xmm5
- vpsubd 64(%eax),%xmm0,%xmm0
- vpshufd $170,%xmm7,%xmm6
- vpshufd $255,%xmm7,%xmm7
- vmovdqa %xmm0,64(%ebp)
- vmovdqa %xmm1,80(%ebp)
- vmovdqa %xmm2,96(%ebp)
- vmovdqa %xmm3,112(%ebp)
- vmovdqu 16(%edx),%xmm3
- vmovdqa %xmm4,-64(%ebp)
- vmovdqa %xmm5,-48(%ebp)
- vmovdqa %xmm6,-32(%ebp)
- vmovdqa %xmm7,-16(%ebp)
- vmovdqa 32(%eax),%xmm7
- leal 128(%esp),%ebx
- vpshufd $0,%xmm3,%xmm0
- vpshufd $85,%xmm3,%xmm1
- vpshufd $170,%xmm3,%xmm2
- vpshufd $255,%xmm3,%xmm3
- vpshufd $0,%xmm7,%xmm4
- vpshufd $85,%xmm7,%xmm5
- vpshufd $170,%xmm7,%xmm6
- vpshufd $255,%xmm7,%xmm7
- vmovdqa %xmm0,(%ebp)
- vmovdqa %xmm1,16(%ebp)
- vmovdqa %xmm2,32(%ebp)
- vmovdqa %xmm3,48(%ebp)
- vmovdqa %xmm4,-128(%ebp)
- vmovdqa %xmm5,-112(%ebp)
- vmovdqa %xmm6,-96(%ebp)
- vmovdqa %xmm7,-80(%ebp)
- leal 128(%esi),%esi
- leal 128(%edi),%edi
- jmp .L015outer_loop
-.align 32
-.L015outer_loop:
- vmovdqa -112(%ebp),%xmm1
- vmovdqa -96(%ebp),%xmm2
- vmovdqa -80(%ebp),%xmm3
- vmovdqa -48(%ebp),%xmm5
- vmovdqa -32(%ebp),%xmm6
- vmovdqa -16(%ebp),%xmm7
- vmovdqa %xmm1,-112(%ebx)
- vmovdqa %xmm2,-96(%ebx)
- vmovdqa %xmm3,-80(%ebx)
- vmovdqa %xmm5,-48(%ebx)
- vmovdqa %xmm6,-32(%ebx)
- vmovdqa %xmm7,-16(%ebx)
- vmovdqa 32(%ebp),%xmm2
- vmovdqa 48(%ebp),%xmm3
- vmovdqa 64(%ebp),%xmm4
- vmovdqa 80(%ebp),%xmm5
- vmovdqa 96(%ebp),%xmm6
- vmovdqa 112(%ebp),%xmm7
- vpaddd 64(%eax),%xmm4,%xmm4
- vmovdqa %xmm2,32(%ebx)
- vmovdqa %xmm3,48(%ebx)
- vmovdqa %xmm4,64(%ebx)
- vmovdqa %xmm5,80(%ebx)
- vmovdqa %xmm6,96(%ebx)
- vmovdqa %xmm7,112(%ebx)
- vmovdqa %xmm4,64(%ebp)
- vmovdqa -128(%ebp),%xmm0
- vmovdqa %xmm4,%xmm6
- vmovdqa -64(%ebp),%xmm3
- vmovdqa (%ebp),%xmm4
- vmovdqa 16(%ebp),%xmm5
- movl $10,%edx
- nop
-.align 32
-.L016loop:
- vpaddd %xmm3,%xmm0,%xmm0
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,246,16
- vpaddd %xmm6,%xmm4,%xmm4
- vpxor %xmm4,%xmm3,%xmm2
- vmovdqa -112(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -48(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 80(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-128(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,64(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
- vmovdqa %xmm4,(%ebx)
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-64(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa 32(%ebx),%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -96(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -32(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 96(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-112(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,80(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,219,7
- vmovdqa %xmm5,16(%ebx)
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-48(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa 48(%ebx),%xmm5
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -80(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -16(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 112(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-96(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,96(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-32(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -128(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -48(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-80(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm7,%xmm6
-.byte 143,232,120,194,219,7
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-16(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -112(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -32(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 64(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-128(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,112(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
- vmovdqa %xmm4,32(%ebx)
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-48(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa (%ebx),%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -96(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vmovdqa -16(%ebx),%xmm2
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 80(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
- vpaddd %xmm2,%xmm0,%xmm0
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-112(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,64(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
- vpxor %xmm0,%xmm6,%xmm6
-.byte 143,232,120,194,219,7
- vmovdqa %xmm5,48(%ebx)
-.byte 143,232,120,194,246,16
- vmovdqa %xmm3,-32(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa 16(%ebx),%xmm5
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa -80(%ebx),%xmm1
-.byte 143,232,120,194,210,12
- vmovdqa -64(%ebx),%xmm3
- vpaddd %xmm2,%xmm0,%xmm0
- vmovdqa 96(%ebx),%xmm7
- vpxor %xmm0,%xmm6,%xmm6
- vpaddd %xmm3,%xmm1,%xmm1
-.byte 143,232,120,194,246,8
- vmovdqa %xmm0,-96(%ebx)
- vpaddd %xmm6,%xmm4,%xmm4
- vmovdqa %xmm6,80(%ebx)
- vpxor %xmm4,%xmm2,%xmm2
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,255,16
- vmovdqa %xmm2,-16(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqa -128(%ebx),%xmm0
-.byte 143,232,120,194,219,12
- vpaddd %xmm3,%xmm1,%xmm1
- vmovdqa 64(%ebx),%xmm6
- vpxor %xmm1,%xmm7,%xmm7
-.byte 143,232,120,194,255,8
- vmovdqa %xmm1,-80(%ebx)
- vpaddd %xmm7,%xmm5,%xmm5
- vmovdqa %xmm7,96(%ebx)
- vpxor %xmm5,%xmm3,%xmm3
-.byte 143,232,120,194,219,7
- decl %edx
- jnz .L016loop
- vmovdqa %xmm3,-64(%ebx)
- vmovdqa %xmm4,(%ebx)
- vmovdqa %xmm5,16(%ebx)
- vmovdqa %xmm6,64(%ebx)
- vmovdqa %xmm7,96(%ebx)
- vmovdqa -112(%ebx),%xmm1
- vmovdqa -96(%ebx),%xmm2
- vmovdqa -80(%ebx),%xmm3
- vpaddd -128(%ebp),%xmm0,%xmm0
- vpaddd -112(%ebp),%xmm1,%xmm1
- vpaddd -96(%ebp),%xmm2,%xmm2
- vpaddd -80(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa -64(%ebx),%xmm0
- vmovdqa -48(%ebx),%xmm1
- vmovdqa -32(%ebx),%xmm2
- vmovdqa -16(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd -64(%ebp),%xmm0,%xmm0
- vpaddd -48(%ebp),%xmm1,%xmm1
- vpaddd -32(%ebp),%xmm2,%xmm2
- vpaddd -16(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa (%ebx),%xmm0
- vmovdqa 16(%ebx),%xmm1
- vmovdqa 32(%ebx),%xmm2
- vmovdqa 48(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd (%ebp),%xmm0,%xmm0
- vpaddd 16(%ebp),%xmm1,%xmm1
- vpaddd 32(%ebp),%xmm2,%xmm2
- vpaddd 48(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 16(%esi),%esi
- vmovdqa 64(%ebx),%xmm0
- vmovdqa 80(%ebx),%xmm1
- vmovdqa 96(%ebx),%xmm2
- vmovdqa 112(%ebx),%xmm3
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 16(%edi),%edi
- vpaddd 64(%ebp),%xmm0,%xmm0
- vpaddd 80(%ebp),%xmm1,%xmm1
- vpaddd 96(%ebp),%xmm2,%xmm2
- vpaddd 112(%ebp),%xmm3,%xmm3
- vpunpckldq %xmm1,%xmm0,%xmm6
- vpunpckldq %xmm3,%xmm2,%xmm7
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm7,%xmm6,%xmm1
- vpunpckhqdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm2,%xmm0,%xmm7
- vpunpckhqdq %xmm2,%xmm0,%xmm3
- vpxor -128(%esi),%xmm1,%xmm4
- vpxor -64(%esi),%xmm6,%xmm5
- vpxor (%esi),%xmm7,%xmm6
- vpxor 64(%esi),%xmm3,%xmm7
- leal 208(%esi),%esi
- vmovdqu %xmm4,-128(%edi)
- vmovdqu %xmm5,-64(%edi)
- vmovdqu %xmm6,(%edi)
- vmovdqu %xmm7,64(%edi)
- leal 208(%edi),%edi
- subl $256,%ecx
- jnc .L015outer_loop
- addl $256,%ecx
- jz .L017done
- movl 520(%esp),%ebx
- leal -128(%esi),%esi
- movl 516(%esp),%edx
- leal -128(%edi),%edi
- vmovd 64(%ebp),%xmm2
- vmovdqu (%ebx),%xmm3
- vpaddd 96(%eax),%xmm2,%xmm2
- vpand 112(%eax),%xmm3,%xmm3
- vpor %xmm2,%xmm3,%xmm3
-.L0141x:
- vmovdqa 32(%eax),%xmm0
- vmovdqu (%edx),%xmm1
- vmovdqu 16(%edx),%xmm2
- vmovdqa (%eax),%xmm6
- vmovdqa 16(%eax),%xmm7
- movl %ebp,48(%esp)
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- movl $10,%edx
- jmp .L018loop1x
-.align 16
-.L019outer1x:
- vmovdqa 80(%eax),%xmm3
- vmovdqa (%esp),%xmm0
- vmovdqa 16(%esp),%xmm1
- vmovdqa 32(%esp),%xmm2
- vpaddd 48(%esp),%xmm3,%xmm3
- movl $10,%edx
- vmovdqa %xmm3,48(%esp)
- jmp .L018loop1x
-.align 16
-.L018loop1x:
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,16
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,12
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,8
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,7
- vpshufd $78,%xmm2,%xmm2
- vpshufd $57,%xmm1,%xmm1
- vpshufd $147,%xmm3,%xmm3
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,16
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,12
- vpaddd %xmm1,%xmm0,%xmm0
- vpxor %xmm0,%xmm3,%xmm3
-.byte 143,232,120,194,219,8
- vpaddd %xmm3,%xmm2,%xmm2
- vpxor %xmm2,%xmm1,%xmm1
-.byte 143,232,120,194,201,7
- vpshufd $78,%xmm2,%xmm2
- vpshufd $147,%xmm1,%xmm1
- vpshufd $57,%xmm3,%xmm3
- decl %edx
- jnz .L018loop1x
- vpaddd (%esp),%xmm0,%xmm0
- vpaddd 16(%esp),%xmm1,%xmm1
- vpaddd 32(%esp),%xmm2,%xmm2
- vpaddd 48(%esp),%xmm3,%xmm3
- cmpl $64,%ecx
- jb .L020tail
- vpxor (%esi),%xmm0,%xmm0
- vpxor 16(%esi),%xmm1,%xmm1
- vpxor 32(%esi),%xmm2,%xmm2
- vpxor 48(%esi),%xmm3,%xmm3
- leal 64(%esi),%esi
- vmovdqu %xmm0,(%edi)
- vmovdqu %xmm1,16(%edi)
- vmovdqu %xmm2,32(%edi)
- vmovdqu %xmm3,48(%edi)
- leal 64(%edi),%edi
- subl $64,%ecx
- jnz .L019outer1x
- jmp .L017done
-.L020tail:
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- xorl %eax,%eax
- xorl %edx,%edx
- xorl %ebp,%ebp
-.L021tail_loop:
- movb (%esp,%ebp,1),%al
- movb (%esi,%ebp,1),%dl
- leal 1(%ebp),%ebp
- xorb %dl,%al
- movb %al,-1(%edi,%ebp,1)
- decl %ecx
- jnz .L021tail_loop
-.L017done:
- vzeroupper
- movl 512(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
#endif
diff --git a/secure/lib/libcrypto/i386/ecp_nistz256-x86.S b/secure/lib/libcrypto/i386/ecp_nistz256-x86.S
index 7d0c1b9eb9df..eb413d9f1a73 100644
--- a/secure/lib/libcrypto/i386/ecp_nistz256-x86.S
+++ b/secure/lib/libcrypto/i386/ecp_nistz256-x86.S
@@ -4422,19 +4422,15 @@ ecp_nistz256_point_add:
orl 4(%edi),%eax
orl 8(%edi),%eax
orl 12(%edi),%eax
+ movl 576(%esp),%ebx
+ notl %ebx
+ orl %ebx,%eax
+ movl 580(%esp),%ebx
+ notl %ebx
+ orl %ebx,%eax
+ orl 584(%esp),%eax
.byte 62
jnz .L010add_proceed
- movl 576(%esp),%eax
- andl 580(%esp),%eax
- movl 584(%esp),%ebx
- jz .L010add_proceed
- testl %ebx,%ebx
- jz .L011add_double
- movl 616(%esp),%edi
- xorl %eax,%eax
- movl $24,%ecx
-.byte 252,243,171
- jmp .L012add_done
.align 16
.L011add_double:
movl 620(%esp),%esi
@@ -9590,19 +9586,15 @@ ecp_nistz256_point_add:
orl 4(%edi),%eax
orl 8(%edi),%eax
orl 12(%edi),%eax
+ movl 576(%esp),%ebx
+ notl %ebx
+ orl %ebx,%eax
+ movl 580(%esp),%ebx
+ notl %ebx
+ orl %ebx,%eax
+ orl 584(%esp),%eax
.byte 62
jnz .L010add_proceed
- movl 576(%esp),%eax
- andl 580(%esp),%eax
- movl 584(%esp),%ebx
- jz .L010add_proceed
- testl %ebx,%ebx
- jz .L011add_double
- movl 616(%esp),%edi
- xorl %eax,%eax
- movl $24,%ecx
-.byte 252,243,171
- jmp .L012add_done
.align 16
.L011add_double:
movl 620(%esp),%esi
diff --git a/secure/lib/libcrypto/i386/poly1305-x86.S b/secure/lib/libcrypto/i386/poly1305-x86.S
index 100deee40bf2..b394500278d5 100644
--- a/secure/lib/libcrypto/i386/poly1305-x86.S
+++ b/secure/lib/libcrypto/i386/poly1305-x86.S
@@ -36,10 +36,6 @@ poly1305_init:
jne .L002no_sse2
leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
- movl 8(%edi),%ecx
- testl $32,%ecx
- jz .L002no_sse2
- leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
.L002no_sse2:
movl 20(%esp),%edi
movl %eax,(%ebp)
@@ -1348,557 +1344,6 @@ _poly1305_emit_sse2:
popl %ebp
ret
.size _poly1305_emit_sse2,.-_poly1305_emit_sse2
-.align 32
-.type _poly1305_init_avx2,@function
-.align 16
-_poly1305_init_avx2:
- vmovdqu 24(%edi),%xmm4
- leal 48(%edi),%edi
- movl %esp,%ebp
- subl $224,%esp
- andl $-16,%esp
- vmovdqa 64(%ebx),%xmm7
- vpand %xmm7,%xmm4,%xmm0
- vpsrlq $26,%xmm4,%xmm1
- vpsrldq $6,%xmm4,%xmm3
- vpand %xmm7,%xmm1,%xmm1
- vpsrlq $4,%xmm3,%xmm2
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm7,%xmm2,%xmm2
- vpand %xmm7,%xmm3,%xmm3
- vpsrldq $13,%xmm4,%xmm4
- leal 144(%esp),%edx
- movl $2,%ecx
-.L018square:
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- vmovdqa %xmm4,64(%esp)
- vpslld $2,%xmm1,%xmm6
- vpslld $2,%xmm2,%xmm5
- vpaddd %xmm1,%xmm6,%xmm6
- vpaddd %xmm2,%xmm5,%xmm5
- vmovdqa %xmm6,80(%esp)
- vmovdqa %xmm5,96(%esp)
- vpslld $2,%xmm3,%xmm6
- vpslld $2,%xmm4,%xmm5
- vpaddd %xmm3,%xmm6,%xmm6
- vpaddd %xmm4,%xmm5,%xmm5
- vmovdqa %xmm6,112(%esp)
- vmovdqa %xmm5,128(%esp)
- vpshufd $68,%xmm0,%xmm5
- vmovdqa %xmm1,%xmm6
- vpshufd $68,%xmm1,%xmm1
- vpshufd $68,%xmm2,%xmm2
- vpshufd $68,%xmm3,%xmm3
- vpshufd $68,%xmm4,%xmm4
- vmovdqa %xmm5,(%edx)
- vmovdqa %xmm1,16(%edx)
- vmovdqa %xmm2,32(%edx)
- vmovdqa %xmm3,48(%edx)
- vmovdqa %xmm4,64(%edx)
- vpmuludq %xmm0,%xmm4,%xmm4
- vpmuludq %xmm0,%xmm3,%xmm3
- vpmuludq %xmm0,%xmm2,%xmm2
- vpmuludq %xmm0,%xmm1,%xmm1
- vpmuludq %xmm0,%xmm5,%xmm0
- vpmuludq 48(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm4,%xmm4
- vpmuludq 32(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm3,%xmm3
- vpmuludq 16(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vmovdqa 80(%esp),%xmm7
- vpmuludq (%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm1,%xmm1
- vmovdqa 32(%esp),%xmm5
- vpmuludq 64(%edx),%xmm7,%xmm7
- vpaddq %xmm7,%xmm0,%xmm0
- vpmuludq 32(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm4,%xmm4
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm3,%xmm3
- vmovdqa 96(%esp),%xmm6
- vpmuludq (%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vpmuludq 64(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm1,%xmm1
- vmovdqa 48(%esp),%xmm5
- vpmuludq 48(%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm0,%xmm0
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm4,%xmm4
- vmovdqa 112(%esp),%xmm6
- vpmuludq (%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm3,%xmm3
- vpmuludq 64(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm2,%xmm2
- vpmuludq 48(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm1,%xmm1
- vmovdqa 64(%esp),%xmm7
- vpmuludq 32(%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm0,%xmm0
- vmovdqa 128(%esp),%xmm5
- vpmuludq (%edx),%xmm7,%xmm7
- vpaddq %xmm7,%xmm4,%xmm4
- vpmuludq 64(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm3,%xmm3
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm0,%xmm0
- vpmuludq 32(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm1,%xmm1
- vmovdqa 64(%ebx),%xmm7
- vpmuludq 48(%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vpsrlq $26,%xmm3,%xmm5
- vpand %xmm7,%xmm3,%xmm3
- vpsrlq $26,%xmm0,%xmm6
- vpand %xmm7,%xmm0,%xmm0
- vpaddq %xmm5,%xmm4,%xmm4
- vpaddq %xmm6,%xmm1,%xmm1
- vpsrlq $26,%xmm4,%xmm5
- vpand %xmm7,%xmm4,%xmm4
- vpsrlq $26,%xmm1,%xmm6
- vpand %xmm7,%xmm1,%xmm1
- vpaddq %xmm6,%xmm2,%xmm2
- vpaddd %xmm5,%xmm0,%xmm0
- vpsllq $2,%xmm5,%xmm5
- vpsrlq $26,%xmm2,%xmm6
- vpand %xmm7,%xmm2,%xmm2
- vpaddd %xmm5,%xmm0,%xmm0
- vpaddd %xmm6,%xmm3,%xmm3
- vpsrlq $26,%xmm3,%xmm6
- vpsrlq $26,%xmm0,%xmm5
- vpand %xmm7,%xmm0,%xmm0
- vpand %xmm7,%xmm3,%xmm3
- vpaddd %xmm5,%xmm1,%xmm1
- vpaddd %xmm6,%xmm4,%xmm4
- decl %ecx
- jz .L019square_break
- vpunpcklqdq (%esp),%xmm0,%xmm0
- vpunpcklqdq 16(%esp),%xmm1,%xmm1
- vpunpcklqdq 32(%esp),%xmm2,%xmm2
- vpunpcklqdq 48(%esp),%xmm3,%xmm3
- vpunpcklqdq 64(%esp),%xmm4,%xmm4
- jmp .L018square
-.L019square_break:
- vpsllq $32,%xmm0,%xmm0
- vpsllq $32,%xmm1,%xmm1
- vpsllq $32,%xmm2,%xmm2
- vpsllq $32,%xmm3,%xmm3
- vpsllq $32,%xmm4,%xmm4
- vpor (%esp),%xmm0,%xmm0
- vpor 16(%esp),%xmm1,%xmm1
- vpor 32(%esp),%xmm2,%xmm2
- vpor 48(%esp),%xmm3,%xmm3
- vpor 64(%esp),%xmm4,%xmm4
- vpshufd $141,%xmm0,%xmm0
- vpshufd $141,%xmm1,%xmm1
- vpshufd $141,%xmm2,%xmm2
- vpshufd $141,%xmm3,%xmm3
- vpshufd $141,%xmm4,%xmm4
- vmovdqu %xmm0,(%edi)
- vmovdqu %xmm1,16(%edi)
- vmovdqu %xmm2,32(%edi)
- vmovdqu %xmm3,48(%edi)
- vmovdqu %xmm4,64(%edi)
- vpslld $2,%xmm1,%xmm6
- vpslld $2,%xmm2,%xmm5
- vpaddd %xmm1,%xmm6,%xmm6
- vpaddd %xmm2,%xmm5,%xmm5
- vmovdqu %xmm6,80(%edi)
- vmovdqu %xmm5,96(%edi)
- vpslld $2,%xmm3,%xmm6
- vpslld $2,%xmm4,%xmm5
- vpaddd %xmm3,%xmm6,%xmm6
- vpaddd %xmm4,%xmm5,%xmm5
- vmovdqu %xmm6,112(%edi)
- vmovdqu %xmm5,128(%edi)
- movl %ebp,%esp
- leal -48(%edi),%edi
- ret
-.size _poly1305_init_avx2,.-_poly1305_init_avx2
-.align 32
-.type _poly1305_blocks_avx2,@function
-.align 16
-_poly1305_blocks_avx2:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
- movl 20(%edi),%eax
- andl $-16,%ecx
- jz .L020nodata
- cmpl $64,%ecx
- jae .L021enter_avx2
- testl %eax,%eax
- jz .Lenter_blocks
-.L021enter_avx2:
- vzeroupper
- call .L022pic_point
-.L022pic_point:
- popl %ebx
- leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
- testl %eax,%eax
- jnz .L023base2_26
- call _poly1305_init_avx2
- movl (%edi),%eax
- movl 3(%edi),%ecx
- movl 6(%edi),%edx
- movl 9(%edi),%esi
- movl 13(%edi),%ebp
- shrl $2,%ecx
- andl $67108863,%eax
- shrl $4,%edx
- andl $67108863,%ecx
- shrl $6,%esi
- andl $67108863,%edx
- movl %eax,(%edi)
- movl %ecx,4(%edi)
- movl %edx,8(%edi)
- movl %esi,12(%edi)
- movl %ebp,16(%edi)
- movl $1,20(%edi)
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
-.L023base2_26:
- movl 32(%esp),%eax
- movl %esp,%ebp
- subl $448,%esp
- andl $-512,%esp
- vmovdqu 48(%edi),%xmm0
- leal 288(%esp),%edx
- vmovdqu 64(%edi),%xmm1
- vmovdqu 80(%edi),%xmm2
- vmovdqu 96(%edi),%xmm3
- vmovdqu 112(%edi),%xmm4
- leal 48(%edi),%edi
- vpermq $64,%ymm0,%ymm0
- vpermq $64,%ymm1,%ymm1
- vpermq $64,%ymm2,%ymm2
- vpermq $64,%ymm3,%ymm3
- vpermq $64,%ymm4,%ymm4
- vpshufd $200,%ymm0,%ymm0
- vpshufd $200,%ymm1,%ymm1
- vpshufd $200,%ymm2,%ymm2
- vpshufd $200,%ymm3,%ymm3
- vpshufd $200,%ymm4,%ymm4
- vmovdqa %ymm0,-128(%edx)
- vmovdqu 80(%edi),%xmm0
- vmovdqa %ymm1,-96(%edx)
- vmovdqu 96(%edi),%xmm1
- vmovdqa %ymm2,-64(%edx)
- vmovdqu 112(%edi),%xmm2
- vmovdqa %ymm3,-32(%edx)
- vmovdqu 128(%edi),%xmm3
- vmovdqa %ymm4,(%edx)
- vpermq $64,%ymm0,%ymm0
- vpermq $64,%ymm1,%ymm1
- vpermq $64,%ymm2,%ymm2
- vpermq $64,%ymm3,%ymm3
- vpshufd $200,%ymm0,%ymm0
- vpshufd $200,%ymm1,%ymm1
- vpshufd $200,%ymm2,%ymm2
- vpshufd $200,%ymm3,%ymm3
- vmovdqa %ymm0,32(%edx)
- vmovd -48(%edi),%xmm0
- vmovdqa %ymm1,64(%edx)
- vmovd -44(%edi),%xmm1
- vmovdqa %ymm2,96(%edx)
- vmovd -40(%edi),%xmm2
- vmovdqa %ymm3,128(%edx)
- vmovd -36(%edi),%xmm3
- vmovd -32(%edi),%xmm4
- vmovdqa 64(%ebx),%ymm7
- negl %eax
- testl $63,%ecx
- jz .L024even
- movl %ecx,%edx
- andl $-64,%ecx
- andl $63,%edx
- vmovdqu (%esi),%xmm5
- cmpl $32,%edx
- jb .L025one
- vmovdqu 16(%esi),%xmm6
- je .L026two
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- leal 48(%esi),%esi
- leal 8(%ebx),%ebx
- leal 296(%esp),%edx
- jmp .L027tail
-.L026two:
- leal 32(%esi),%esi
- leal 16(%ebx),%ebx
- leal 304(%esp),%edx
- jmp .L027tail
-.L025one:
- leal 16(%esi),%esi
- vpxor %ymm6,%ymm6,%ymm6
- leal 32(%ebx,%eax,8),%ebx
- leal 312(%esp),%edx
- jmp .L027tail
-.align 32
-.L024even:
- vmovdqu (%esi),%xmm5
- vmovdqu 16(%esi),%xmm6
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- vinserti128 $1,48(%esi),%ymm6,%ymm6
- leal 64(%esi),%esi
- subl $64,%ecx
- jz .L027tail
-.L028loop:
- vmovdqa %ymm2,64(%esp)
- vpsrldq $6,%ymm5,%ymm2
- vmovdqa %ymm0,(%esp)
- vpsrldq $6,%ymm6,%ymm0
- vmovdqa %ymm1,32(%esp)
- vpunpckhqdq %ymm6,%ymm5,%ymm1
- vpunpcklqdq %ymm6,%ymm5,%ymm5
- vpunpcklqdq %ymm0,%ymm2,%ymm2
- vpsrlq $30,%ymm2,%ymm0
- vpsrlq $4,%ymm2,%ymm2
- vpsrlq $26,%ymm5,%ymm6
- vpsrlq $40,%ymm1,%ymm1
- vpand %ymm7,%ymm2,%ymm2
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpor (%ebx),%ymm1,%ymm1
- vpaddq 64(%esp),%ymm2,%ymm2
- vpaddq (%esp),%ymm5,%ymm5
- vpaddq 32(%esp),%ymm6,%ymm6
- vpaddq %ymm3,%ymm0,%ymm0
- vpaddq %ymm4,%ymm1,%ymm1
- vpmuludq -96(%edx),%ymm2,%ymm3
- vmovdqa %ymm6,32(%esp)
- vpmuludq -64(%edx),%ymm2,%ymm4
- vmovdqa %ymm0,96(%esp)
- vpmuludq 96(%edx),%ymm2,%ymm0
- vmovdqa %ymm1,128(%esp)
- vpmuludq 128(%edx),%ymm2,%ymm1
- vpmuludq -128(%edx),%ymm2,%ymm2
- vpmuludq -32(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq (%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm4,%ymm4
- vpmuludq -128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm0,%ymm0
- vmovdqa 32(%esp),%ymm7
- vpmuludq -96(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq -64(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpmuludq -64(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm3,%ymm3
- vpmuludq -32(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm4,%ymm4
- vpmuludq 128(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vmovdqa 96(%esp),%ymm6
- vpmuludq -128(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm1,%ymm1
- vpmuludq -96(%edx),%ymm7,%ymm7
- vpaddq %ymm7,%ymm2,%ymm2
- vpmuludq -128(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm3,%ymm3
- vpmuludq -96(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vpmuludq 64(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm0,%ymm0
- vmovdqa 128(%esp),%ymm5
- vpmuludq 96(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm1,%ymm1
- vpmuludq 128(%edx),%ymm6,%ymm6
- vpaddq %ymm6,%ymm2,%ymm2
- vpmuludq 128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 32(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vpmuludq -128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vmovdqa 64(%ebx),%ymm7
- vpmuludq 64(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq 96(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpsrlq $26,%ymm3,%ymm5
- vpand %ymm7,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpaddq %ymm5,%ymm4,%ymm4
- vpaddq %ymm6,%ymm1,%ymm1
- vpsrlq $26,%ymm4,%ymm5
- vpand %ymm7,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm6
- vpand %ymm7,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpsllq $2,%ymm5,%ymm5
- vpsrlq $26,%ymm2,%ymm6
- vpand %ymm7,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrlq $26,%ymm3,%ymm6
- vpsrlq $26,%ymm0,%ymm5
- vpand %ymm7,%ymm0,%ymm0
- vpand %ymm7,%ymm3,%ymm3
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm4,%ymm4
- vmovdqu (%esi),%xmm5
- vmovdqu 16(%esi),%xmm6
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- vinserti128 $1,48(%esi),%ymm6,%ymm6
- leal 64(%esi),%esi
- subl $64,%ecx
- jnz .L028loop
-.L027tail:
- vmovdqa %ymm2,64(%esp)
- vpsrldq $6,%ymm5,%ymm2
- vmovdqa %ymm0,(%esp)
- vpsrldq $6,%ymm6,%ymm0
- vmovdqa %ymm1,32(%esp)
- vpunpckhqdq %ymm6,%ymm5,%ymm1
- vpunpcklqdq %ymm6,%ymm5,%ymm5
- vpunpcklqdq %ymm0,%ymm2,%ymm2
- vpsrlq $30,%ymm2,%ymm0
- vpsrlq $4,%ymm2,%ymm2
- vpsrlq $26,%ymm5,%ymm6
- vpsrlq $40,%ymm1,%ymm1
- vpand %ymm7,%ymm2,%ymm2
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpor (%ebx),%ymm1,%ymm1
- andl $-64,%ebx
- vpaddq 64(%esp),%ymm2,%ymm2
- vpaddq (%esp),%ymm5,%ymm5
- vpaddq 32(%esp),%ymm6,%ymm6
- vpaddq %ymm3,%ymm0,%ymm0
- vpaddq %ymm4,%ymm1,%ymm1
- vpmuludq -92(%edx),%ymm2,%ymm3
- vmovdqa %ymm6,32(%esp)
- vpmuludq -60(%edx),%ymm2,%ymm4
- vmovdqa %ymm0,96(%esp)
- vpmuludq 100(%edx),%ymm2,%ymm0
- vmovdqa %ymm1,128(%esp)
- vpmuludq 132(%edx),%ymm2,%ymm1
- vpmuludq -124(%edx),%ymm2,%ymm2
- vpmuludq -28(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 4(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm4,%ymm4
- vpmuludq -124(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm0,%ymm0
- vmovdqa 32(%esp),%ymm7
- vpmuludq -92(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq -60(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpmuludq -60(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm3,%ymm3
- vpmuludq -28(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm4,%ymm4
- vpmuludq 132(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vmovdqa 96(%esp),%ymm6
- vpmuludq -124(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm1,%ymm1
- vpmuludq -92(%edx),%ymm7,%ymm7
- vpaddq %ymm7,%ymm2,%ymm2
- vpmuludq -124(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm3,%ymm3
- vpmuludq -92(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vpmuludq 68(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm0,%ymm0
- vmovdqa 128(%esp),%ymm5
- vpmuludq 100(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm1,%ymm1
- vpmuludq 132(%edx),%ymm6,%ymm6
- vpaddq %ymm6,%ymm2,%ymm2
- vpmuludq 132(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 36(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vpmuludq -124(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vmovdqa 64(%ebx),%ymm7
- vpmuludq 68(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq 100(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpsrldq $8,%ymm4,%ymm5
- vpsrldq $8,%ymm3,%ymm6
- vpaddq %ymm5,%ymm4,%ymm4
- vpsrldq $8,%ymm0,%ymm5
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrldq $8,%ymm1,%ymm6
- vpaddq %ymm5,%ymm0,%ymm0
- vpsrldq $8,%ymm2,%ymm5
- vpaddq %ymm6,%ymm1,%ymm1
- vpermq $2,%ymm4,%ymm6
- vpaddq %ymm5,%ymm2,%ymm2
- vpermq $2,%ymm3,%ymm5
- vpaddq %ymm6,%ymm4,%ymm4
- vpermq $2,%ymm0,%ymm6
- vpaddq %ymm5,%ymm3,%ymm3
- vpermq $2,%ymm1,%ymm5
- vpaddq %ymm6,%ymm0,%ymm0
- vpermq $2,%ymm2,%ymm6
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpsrlq $26,%ymm3,%ymm5
- vpand %ymm7,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpaddq %ymm5,%ymm4,%ymm4
- vpaddq %ymm6,%ymm1,%ymm1
- vpsrlq $26,%ymm4,%ymm5
- vpand %ymm7,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm6
- vpand %ymm7,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpsllq $2,%ymm5,%ymm5
- vpsrlq $26,%ymm2,%ymm6
- vpand %ymm7,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrlq $26,%ymm3,%ymm6
- vpsrlq $26,%ymm0,%ymm5
- vpand %ymm7,%ymm0,%ymm0
- vpand %ymm7,%ymm3,%ymm3
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm4,%ymm4
- cmpl $0,%ecx
- je .L029done
- vpshufd $252,%xmm0,%xmm0
- leal 288(%esp),%edx
- vpshufd $252,%xmm1,%xmm1
- vpshufd $252,%xmm2,%xmm2
- vpshufd $252,%xmm3,%xmm3
- vpshufd $252,%xmm4,%xmm4
- jmp .L024even
-.align 16
-.L029done:
- vmovd %xmm0,-48(%edi)
- vmovd %xmm1,-44(%edi)
- vmovd %xmm2,-40(%edi)
- vmovd %xmm3,-36(%edi)
- vmovd %xmm4,-32(%edi)
- vzeroupper
- movl %ebp,%esp
-.L020nodata:
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
.align 64
.Lconst_sse2:
.long 16777216,0,16777216,0,16777216,0,16777216,0
@@ -1947,10 +1392,6 @@ poly1305_init:
jne .L002no_sse2
leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
- movl 8(%edi),%ecx
- testl $32,%ecx
- jz .L002no_sse2
- leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
.L002no_sse2:
movl 20(%esp),%edi
movl %eax,(%ebp)
@@ -3259,557 +2700,6 @@ _poly1305_emit_sse2:
popl %ebp
ret
.size _poly1305_emit_sse2,.-_poly1305_emit_sse2
-.align 32
-.type _poly1305_init_avx2,@function
-.align 16
-_poly1305_init_avx2:
- vmovdqu 24(%edi),%xmm4
- leal 48(%edi),%edi
- movl %esp,%ebp
- subl $224,%esp
- andl $-16,%esp
- vmovdqa 64(%ebx),%xmm7
- vpand %xmm7,%xmm4,%xmm0
- vpsrlq $26,%xmm4,%xmm1
- vpsrldq $6,%xmm4,%xmm3
- vpand %xmm7,%xmm1,%xmm1
- vpsrlq $4,%xmm3,%xmm2
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm7,%xmm2,%xmm2
- vpand %xmm7,%xmm3,%xmm3
- vpsrldq $13,%xmm4,%xmm4
- leal 144(%esp),%edx
- movl $2,%ecx
-.L018square:
- vmovdqa %xmm0,(%esp)
- vmovdqa %xmm1,16(%esp)
- vmovdqa %xmm2,32(%esp)
- vmovdqa %xmm3,48(%esp)
- vmovdqa %xmm4,64(%esp)
- vpslld $2,%xmm1,%xmm6
- vpslld $2,%xmm2,%xmm5
- vpaddd %xmm1,%xmm6,%xmm6
- vpaddd %xmm2,%xmm5,%xmm5
- vmovdqa %xmm6,80(%esp)
- vmovdqa %xmm5,96(%esp)
- vpslld $2,%xmm3,%xmm6
- vpslld $2,%xmm4,%xmm5
- vpaddd %xmm3,%xmm6,%xmm6
- vpaddd %xmm4,%xmm5,%xmm5
- vmovdqa %xmm6,112(%esp)
- vmovdqa %xmm5,128(%esp)
- vpshufd $68,%xmm0,%xmm5
- vmovdqa %xmm1,%xmm6
- vpshufd $68,%xmm1,%xmm1
- vpshufd $68,%xmm2,%xmm2
- vpshufd $68,%xmm3,%xmm3
- vpshufd $68,%xmm4,%xmm4
- vmovdqa %xmm5,(%edx)
- vmovdqa %xmm1,16(%edx)
- vmovdqa %xmm2,32(%edx)
- vmovdqa %xmm3,48(%edx)
- vmovdqa %xmm4,64(%edx)
- vpmuludq %xmm0,%xmm4,%xmm4
- vpmuludq %xmm0,%xmm3,%xmm3
- vpmuludq %xmm0,%xmm2,%xmm2
- vpmuludq %xmm0,%xmm1,%xmm1
- vpmuludq %xmm0,%xmm5,%xmm0
- vpmuludq 48(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm4,%xmm4
- vpmuludq 32(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm3,%xmm3
- vpmuludq 16(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vmovdqa 80(%esp),%xmm7
- vpmuludq (%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm1,%xmm1
- vmovdqa 32(%esp),%xmm5
- vpmuludq 64(%edx),%xmm7,%xmm7
- vpaddq %xmm7,%xmm0,%xmm0
- vpmuludq 32(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm4,%xmm4
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm3,%xmm3
- vmovdqa 96(%esp),%xmm6
- vpmuludq (%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vpmuludq 64(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm1,%xmm1
- vmovdqa 48(%esp),%xmm5
- vpmuludq 48(%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm0,%xmm0
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm4,%xmm4
- vmovdqa 112(%esp),%xmm6
- vpmuludq (%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm3,%xmm3
- vpmuludq 64(%edx),%xmm6,%xmm7
- vpaddq %xmm7,%xmm2,%xmm2
- vpmuludq 48(%edx),%xmm6,%xmm5
- vpaddq %xmm5,%xmm1,%xmm1
- vmovdqa 64(%esp),%xmm7
- vpmuludq 32(%edx),%xmm6,%xmm6
- vpaddq %xmm6,%xmm0,%xmm0
- vmovdqa 128(%esp),%xmm5
- vpmuludq (%edx),%xmm7,%xmm7
- vpaddq %xmm7,%xmm4,%xmm4
- vpmuludq 64(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm3,%xmm3
- vpmuludq 16(%edx),%xmm5,%xmm7
- vpaddq %xmm7,%xmm0,%xmm0
- vpmuludq 32(%edx),%xmm5,%xmm6
- vpaddq %xmm6,%xmm1,%xmm1
- vmovdqa 64(%ebx),%xmm7
- vpmuludq 48(%edx),%xmm5,%xmm5
- vpaddq %xmm5,%xmm2,%xmm2
- vpsrlq $26,%xmm3,%xmm5
- vpand %xmm7,%xmm3,%xmm3
- vpsrlq $26,%xmm0,%xmm6
- vpand %xmm7,%xmm0,%xmm0
- vpaddq %xmm5,%xmm4,%xmm4
- vpaddq %xmm6,%xmm1,%xmm1
- vpsrlq $26,%xmm4,%xmm5
- vpand %xmm7,%xmm4,%xmm4
- vpsrlq $26,%xmm1,%xmm6
- vpand %xmm7,%xmm1,%xmm1
- vpaddq %xmm6,%xmm2,%xmm2
- vpaddd %xmm5,%xmm0,%xmm0
- vpsllq $2,%xmm5,%xmm5
- vpsrlq $26,%xmm2,%xmm6
- vpand %xmm7,%xmm2,%xmm2
- vpaddd %xmm5,%xmm0,%xmm0
- vpaddd %xmm6,%xmm3,%xmm3
- vpsrlq $26,%xmm3,%xmm6
- vpsrlq $26,%xmm0,%xmm5
- vpand %xmm7,%xmm0,%xmm0
- vpand %xmm7,%xmm3,%xmm3
- vpaddd %xmm5,%xmm1,%xmm1
- vpaddd %xmm6,%xmm4,%xmm4
- decl %ecx
- jz .L019square_break
- vpunpcklqdq (%esp),%xmm0,%xmm0
- vpunpcklqdq 16(%esp),%xmm1,%xmm1
- vpunpcklqdq 32(%esp),%xmm2,%xmm2
- vpunpcklqdq 48(%esp),%xmm3,%xmm3
- vpunpcklqdq 64(%esp),%xmm4,%xmm4
- jmp .L018square
-.L019square_break:
- vpsllq $32,%xmm0,%xmm0
- vpsllq $32,%xmm1,%xmm1
- vpsllq $32,%xmm2,%xmm2
- vpsllq $32,%xmm3,%xmm3
- vpsllq $32,%xmm4,%xmm4
- vpor (%esp),%xmm0,%xmm0
- vpor 16(%esp),%xmm1,%xmm1
- vpor 32(%esp),%xmm2,%xmm2
- vpor 48(%esp),%xmm3,%xmm3
- vpor 64(%esp),%xmm4,%xmm4
- vpshufd $141,%xmm0,%xmm0
- vpshufd $141,%xmm1,%xmm1
- vpshufd $141,%xmm2,%xmm2
- vpshufd $141,%xmm3,%xmm3
- vpshufd $141,%xmm4,%xmm4
- vmovdqu %xmm0,(%edi)
- vmovdqu %xmm1,16(%edi)
- vmovdqu %xmm2,32(%edi)
- vmovdqu %xmm3,48(%edi)
- vmovdqu %xmm4,64(%edi)
- vpslld $2,%xmm1,%xmm6
- vpslld $2,%xmm2,%xmm5
- vpaddd %xmm1,%xmm6,%xmm6
- vpaddd %xmm2,%xmm5,%xmm5
- vmovdqu %xmm6,80(%edi)
- vmovdqu %xmm5,96(%edi)
- vpslld $2,%xmm3,%xmm6
- vpslld $2,%xmm4,%xmm5
- vpaddd %xmm3,%xmm6,%xmm6
- vpaddd %xmm4,%xmm5,%xmm5
- vmovdqu %xmm6,112(%edi)
- vmovdqu %xmm5,128(%edi)
- movl %ebp,%esp
- leal -48(%edi),%edi
- ret
-.size _poly1305_init_avx2,.-_poly1305_init_avx2
-.align 32
-.type _poly1305_blocks_avx2,@function
-.align 16
-_poly1305_blocks_avx2:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
- movl 20(%edi),%eax
- andl $-16,%ecx
- jz .L020nodata
- cmpl $64,%ecx
- jae .L021enter_avx2
- testl %eax,%eax
- jz .Lenter_blocks
-.L021enter_avx2:
- vzeroupper
- call .L022pic_point
-.L022pic_point:
- popl %ebx
- leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
- testl %eax,%eax
- jnz .L023base2_26
- call _poly1305_init_avx2
- movl (%edi),%eax
- movl 3(%edi),%ecx
- movl 6(%edi),%edx
- movl 9(%edi),%esi
- movl 13(%edi),%ebp
- shrl $2,%ecx
- andl $67108863,%eax
- shrl $4,%edx
- andl $67108863,%ecx
- shrl $6,%esi
- andl $67108863,%edx
- movl %eax,(%edi)
- movl %ecx,4(%edi)
- movl %edx,8(%edi)
- movl %esi,12(%edi)
- movl %ebp,16(%edi)
- movl $1,20(%edi)
- movl 24(%esp),%esi
- movl 28(%esp),%ecx
-.L023base2_26:
- movl 32(%esp),%eax
- movl %esp,%ebp
- subl $448,%esp
- andl $-512,%esp
- vmovdqu 48(%edi),%xmm0
- leal 288(%esp),%edx
- vmovdqu 64(%edi),%xmm1
- vmovdqu 80(%edi),%xmm2
- vmovdqu 96(%edi),%xmm3
- vmovdqu 112(%edi),%xmm4
- leal 48(%edi),%edi
- vpermq $64,%ymm0,%ymm0
- vpermq $64,%ymm1,%ymm1
- vpermq $64,%ymm2,%ymm2
- vpermq $64,%ymm3,%ymm3
- vpermq $64,%ymm4,%ymm4
- vpshufd $200,%ymm0,%ymm0
- vpshufd $200,%ymm1,%ymm1
- vpshufd $200,%ymm2,%ymm2
- vpshufd $200,%ymm3,%ymm3
- vpshufd $200,%ymm4,%ymm4
- vmovdqa %ymm0,-128(%edx)
- vmovdqu 80(%edi),%xmm0
- vmovdqa %ymm1,-96(%edx)
- vmovdqu 96(%edi),%xmm1
- vmovdqa %ymm2,-64(%edx)
- vmovdqu 112(%edi),%xmm2
- vmovdqa %ymm3,-32(%edx)
- vmovdqu 128(%edi),%xmm3
- vmovdqa %ymm4,(%edx)
- vpermq $64,%ymm0,%ymm0
- vpermq $64,%ymm1,%ymm1
- vpermq $64,%ymm2,%ymm2
- vpermq $64,%ymm3,%ymm3
- vpshufd $200,%ymm0,%ymm0
- vpshufd $200,%ymm1,%ymm1
- vpshufd $200,%ymm2,%ymm2
- vpshufd $200,%ymm3,%ymm3
- vmovdqa %ymm0,32(%edx)
- vmovd -48(%edi),%xmm0
- vmovdqa %ymm1,64(%edx)
- vmovd -44(%edi),%xmm1
- vmovdqa %ymm2,96(%edx)
- vmovd -40(%edi),%xmm2
- vmovdqa %ymm3,128(%edx)
- vmovd -36(%edi),%xmm3
- vmovd -32(%edi),%xmm4
- vmovdqa 64(%ebx),%ymm7
- negl %eax
- testl $63,%ecx
- jz .L024even
- movl %ecx,%edx
- andl $-64,%ecx
- andl $63,%edx
- vmovdqu (%esi),%xmm5
- cmpl $32,%edx
- jb .L025one
- vmovdqu 16(%esi),%xmm6
- je .L026two
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- leal 48(%esi),%esi
- leal 8(%ebx),%ebx
- leal 296(%esp),%edx
- jmp .L027tail
-.L026two:
- leal 32(%esi),%esi
- leal 16(%ebx),%ebx
- leal 304(%esp),%edx
- jmp .L027tail
-.L025one:
- leal 16(%esi),%esi
- vpxor %ymm6,%ymm6,%ymm6
- leal 32(%ebx,%eax,8),%ebx
- leal 312(%esp),%edx
- jmp .L027tail
-.align 32
-.L024even:
- vmovdqu (%esi),%xmm5
- vmovdqu 16(%esi),%xmm6
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- vinserti128 $1,48(%esi),%ymm6,%ymm6
- leal 64(%esi),%esi
- subl $64,%ecx
- jz .L027tail
-.L028loop:
- vmovdqa %ymm2,64(%esp)
- vpsrldq $6,%ymm5,%ymm2
- vmovdqa %ymm0,(%esp)
- vpsrldq $6,%ymm6,%ymm0
- vmovdqa %ymm1,32(%esp)
- vpunpckhqdq %ymm6,%ymm5,%ymm1
- vpunpcklqdq %ymm6,%ymm5,%ymm5
- vpunpcklqdq %ymm0,%ymm2,%ymm2
- vpsrlq $30,%ymm2,%ymm0
- vpsrlq $4,%ymm2,%ymm2
- vpsrlq $26,%ymm5,%ymm6
- vpsrlq $40,%ymm1,%ymm1
- vpand %ymm7,%ymm2,%ymm2
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpor (%ebx),%ymm1,%ymm1
- vpaddq 64(%esp),%ymm2,%ymm2
- vpaddq (%esp),%ymm5,%ymm5
- vpaddq 32(%esp),%ymm6,%ymm6
- vpaddq %ymm3,%ymm0,%ymm0
- vpaddq %ymm4,%ymm1,%ymm1
- vpmuludq -96(%edx),%ymm2,%ymm3
- vmovdqa %ymm6,32(%esp)
- vpmuludq -64(%edx),%ymm2,%ymm4
- vmovdqa %ymm0,96(%esp)
- vpmuludq 96(%edx),%ymm2,%ymm0
- vmovdqa %ymm1,128(%esp)
- vpmuludq 128(%edx),%ymm2,%ymm1
- vpmuludq -128(%edx),%ymm2,%ymm2
- vpmuludq -32(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq (%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm4,%ymm4
- vpmuludq -128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm0,%ymm0
- vmovdqa 32(%esp),%ymm7
- vpmuludq -96(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq -64(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpmuludq -64(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm3,%ymm3
- vpmuludq -32(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm4,%ymm4
- vpmuludq 128(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vmovdqa 96(%esp),%ymm6
- vpmuludq -128(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm1,%ymm1
- vpmuludq -96(%edx),%ymm7,%ymm7
- vpaddq %ymm7,%ymm2,%ymm2
- vpmuludq -128(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm3,%ymm3
- vpmuludq -96(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vpmuludq 64(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm0,%ymm0
- vmovdqa 128(%esp),%ymm5
- vpmuludq 96(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm1,%ymm1
- vpmuludq 128(%edx),%ymm6,%ymm6
- vpaddq %ymm6,%ymm2,%ymm2
- vpmuludq 128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 32(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vpmuludq -128(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vmovdqa 64(%ebx),%ymm7
- vpmuludq 64(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq 96(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpsrlq $26,%ymm3,%ymm5
- vpand %ymm7,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpaddq %ymm5,%ymm4,%ymm4
- vpaddq %ymm6,%ymm1,%ymm1
- vpsrlq $26,%ymm4,%ymm5
- vpand %ymm7,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm6
- vpand %ymm7,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpsllq $2,%ymm5,%ymm5
- vpsrlq $26,%ymm2,%ymm6
- vpand %ymm7,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrlq $26,%ymm3,%ymm6
- vpsrlq $26,%ymm0,%ymm5
- vpand %ymm7,%ymm0,%ymm0
- vpand %ymm7,%ymm3,%ymm3
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm4,%ymm4
- vmovdqu (%esi),%xmm5
- vmovdqu 16(%esi),%xmm6
- vinserti128 $1,32(%esi),%ymm5,%ymm5
- vinserti128 $1,48(%esi),%ymm6,%ymm6
- leal 64(%esi),%esi
- subl $64,%ecx
- jnz .L028loop
-.L027tail:
- vmovdqa %ymm2,64(%esp)
- vpsrldq $6,%ymm5,%ymm2
- vmovdqa %ymm0,(%esp)
- vpsrldq $6,%ymm6,%ymm0
- vmovdqa %ymm1,32(%esp)
- vpunpckhqdq %ymm6,%ymm5,%ymm1
- vpunpcklqdq %ymm6,%ymm5,%ymm5
- vpunpcklqdq %ymm0,%ymm2,%ymm2
- vpsrlq $30,%ymm2,%ymm0
- vpsrlq $4,%ymm2,%ymm2
- vpsrlq $26,%ymm5,%ymm6
- vpsrlq $40,%ymm1,%ymm1
- vpand %ymm7,%ymm2,%ymm2
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpor (%ebx),%ymm1,%ymm1
- andl $-64,%ebx
- vpaddq 64(%esp),%ymm2,%ymm2
- vpaddq (%esp),%ymm5,%ymm5
- vpaddq 32(%esp),%ymm6,%ymm6
- vpaddq %ymm3,%ymm0,%ymm0
- vpaddq %ymm4,%ymm1,%ymm1
- vpmuludq -92(%edx),%ymm2,%ymm3
- vmovdqa %ymm6,32(%esp)
- vpmuludq -60(%edx),%ymm2,%ymm4
- vmovdqa %ymm0,96(%esp)
- vpmuludq 100(%edx),%ymm2,%ymm0
- vmovdqa %ymm1,128(%esp)
- vpmuludq 132(%edx),%ymm2,%ymm1
- vpmuludq -124(%edx),%ymm2,%ymm2
- vpmuludq -28(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 4(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm4,%ymm4
- vpmuludq -124(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm0,%ymm0
- vmovdqa 32(%esp),%ymm7
- vpmuludq -92(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq -60(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpmuludq -60(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm3,%ymm3
- vpmuludq -28(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm4,%ymm4
- vpmuludq 132(%edx),%ymm7,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vmovdqa 96(%esp),%ymm6
- vpmuludq -124(%edx),%ymm7,%ymm5
- vpaddq %ymm5,%ymm1,%ymm1
- vpmuludq -92(%edx),%ymm7,%ymm7
- vpaddq %ymm7,%ymm2,%ymm2
- vpmuludq -124(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm3,%ymm3
- vpmuludq -92(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vpmuludq 68(%edx),%ymm6,%ymm5
- vpaddq %ymm5,%ymm0,%ymm0
- vmovdqa 128(%esp),%ymm5
- vpmuludq 100(%edx),%ymm6,%ymm7
- vpaddq %ymm7,%ymm1,%ymm1
- vpmuludq 132(%edx),%ymm6,%ymm6
- vpaddq %ymm6,%ymm2,%ymm2
- vpmuludq 132(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm3,%ymm3
- vpmuludq 36(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm0,%ymm0
- vpmuludq -124(%edx),%ymm5,%ymm7
- vpaddq %ymm7,%ymm4,%ymm4
- vmovdqa 64(%ebx),%ymm7
- vpmuludq 68(%edx),%ymm5,%ymm6
- vpaddq %ymm6,%ymm1,%ymm1
- vpmuludq 100(%edx),%ymm5,%ymm5
- vpaddq %ymm5,%ymm2,%ymm2
- vpsrldq $8,%ymm4,%ymm5
- vpsrldq $8,%ymm3,%ymm6
- vpaddq %ymm5,%ymm4,%ymm4
- vpsrldq $8,%ymm0,%ymm5
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrldq $8,%ymm1,%ymm6
- vpaddq %ymm5,%ymm0,%ymm0
- vpsrldq $8,%ymm2,%ymm5
- vpaddq %ymm6,%ymm1,%ymm1
- vpermq $2,%ymm4,%ymm6
- vpaddq %ymm5,%ymm2,%ymm2
- vpermq $2,%ymm3,%ymm5
- vpaddq %ymm6,%ymm4,%ymm4
- vpermq $2,%ymm0,%ymm6
- vpaddq %ymm5,%ymm3,%ymm3
- vpermq $2,%ymm1,%ymm5
- vpaddq %ymm6,%ymm0,%ymm0
- vpermq $2,%ymm2,%ymm6
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpsrlq $26,%ymm3,%ymm5
- vpand %ymm7,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm6
- vpand %ymm7,%ymm0,%ymm0
- vpaddq %ymm5,%ymm4,%ymm4
- vpaddq %ymm6,%ymm1,%ymm1
- vpsrlq $26,%ymm4,%ymm5
- vpand %ymm7,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm6
- vpand %ymm7,%ymm1,%ymm1
- vpaddq %ymm6,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpsllq $2,%ymm5,%ymm5
- vpsrlq $26,%ymm2,%ymm6
- vpand %ymm7,%ymm2,%ymm2
- vpaddq %ymm5,%ymm0,%ymm0
- vpaddq %ymm6,%ymm3,%ymm3
- vpsrlq $26,%ymm3,%ymm6
- vpsrlq $26,%ymm0,%ymm5
- vpand %ymm7,%ymm0,%ymm0
- vpand %ymm7,%ymm3,%ymm3
- vpaddq %ymm5,%ymm1,%ymm1
- vpaddq %ymm6,%ymm4,%ymm4
- cmpl $0,%ecx
- je .L029done
- vpshufd $252,%xmm0,%xmm0
- leal 288(%esp),%edx
- vpshufd $252,%xmm1,%xmm1
- vpshufd $252,%xmm2,%xmm2
- vpshufd $252,%xmm3,%xmm3
- vpshufd $252,%xmm4,%xmm4
- jmp .L024even
-.align 16
-.L029done:
- vmovd %xmm0,-48(%edi)
- vmovd %xmm1,-44(%edi)
- vmovd %xmm2,-40(%edi)
- vmovd %xmm3,-36(%edi)
- vmovd %xmm4,-32(%edi)
- vzeroupper
- movl %ebp,%esp
-.L020nodata:
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
.align 64
.Lconst_sse2:
.long 16777216,0,16777216,0,16777216,0,16777216,0
diff --git a/secure/lib/libcrypto/i386/sha1-586.S b/secure/lib/libcrypto/i386/sha1-586.S
index 7e90e2d9b1d2..49e7482b8161 100644
--- a/secure/lib/libcrypto/i386/sha1-586.S
+++ b/secure/lib/libcrypto/i386/sha1-586.S
@@ -25,11 +25,6 @@ sha1_block_data_order:
jz .L001x86
testl $536870912,%ecx
jnz .Lshaext_shortcut
- andl $268435456,%edx
- andl $1073741824,%eax
- orl %edx,%eax
- cmpl $1342177280,%eax
- je .Lavx_shortcut
jmp .Lssse3_shortcut
.align 16
.L001x86:
@@ -2787,1176 +2782,6 @@ _sha1_block_data_order_ssse3:
popl %ebp
ret
.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
-.type _sha1_block_data_order_avx,@function
-.align 16
-_sha1_block_data_order_avx:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- call .L008pic_point
-.L008pic_point:
- popl %ebp
- leal .LK_XX_XX-.L008pic_point(%ebp),%ebp
-.Lavx_shortcut:
- vzeroall
- vmovdqa (%ebp),%xmm7
- vmovdqa 16(%ebp),%xmm0
- vmovdqa 32(%ebp),%xmm1
- vmovdqa 48(%ebp),%xmm2
- vmovdqa 64(%ebp),%xmm6
- movl 20(%esp),%edi
- movl 24(%esp),%ebp
- movl 28(%esp),%edx
- movl %esp,%esi
- subl $208,%esp
- andl $-64,%esp
- vmovdqa %xmm0,112(%esp)
- vmovdqa %xmm1,128(%esp)
- vmovdqa %xmm2,144(%esp)
- shll $6,%edx
- vmovdqa %xmm7,160(%esp)
- addl %ebp,%edx
- vmovdqa %xmm6,176(%esp)
- addl $64,%ebp
- movl %edi,192(%esp)
- movl %ebp,196(%esp)
- movl %edx,200(%esp)
- movl %esi,204(%esp)
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- movl 12(%edi),%edx
- movl 16(%edi),%edi
- movl %ebx,%esi
- vmovdqu -64(%ebp),%xmm0
- vmovdqu -48(%ebp),%xmm1
- vmovdqu -32(%ebp),%xmm2
- vmovdqu -16(%ebp),%xmm3
- vpshufb %xmm6,%xmm0,%xmm0
- vpshufb %xmm6,%xmm1,%xmm1
- vpshufb %xmm6,%xmm2,%xmm2
- vmovdqa %xmm7,96(%esp)
- vpshufb %xmm6,%xmm3,%xmm3
- vpaddd %xmm7,%xmm0,%xmm4
- vpaddd %xmm7,%xmm1,%xmm5
- vpaddd %xmm7,%xmm2,%xmm6
- vmovdqa %xmm4,(%esp)
- movl %ecx,%ebp
- vmovdqa %xmm5,16(%esp)
- xorl %edx,%ebp
- vmovdqa %xmm6,32(%esp)
- andl %ebp,%esi
- jmp .L009loop
-.align 16
-.L009loop:
- shrdl $2,%ebx,%ebx
- xorl %edx,%esi
- vpalignr $8,%xmm0,%xmm1,%xmm4
- movl %eax,%ebp
- addl (%esp),%edi
- vpaddd %xmm3,%xmm7,%xmm7
- vmovdqa %xmm0,64(%esp)
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpsrldq $4,%xmm3,%xmm6
- addl %esi,%edi
- andl %ebx,%ebp
- vpxor %xmm0,%xmm4,%xmm4
- xorl %ecx,%ebx
- addl %eax,%edi
- vpxor %xmm2,%xmm6,%xmm6
- shrdl $7,%eax,%eax
- xorl %ecx,%ebp
- vmovdqa %xmm7,48(%esp)
- movl %edi,%esi
- addl 4(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- addl %ebp,%edx
- andl %eax,%esi
- vpsrld $31,%xmm4,%xmm6
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%esi
- vpslldq $12,%xmm4,%xmm0
- vpaddd %xmm4,%xmm4,%xmm4
- movl %edx,%ebp
- addl 8(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpsrld $30,%xmm0,%xmm7
- vpor %xmm6,%xmm4,%xmm4
- addl %esi,%ecx
- andl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- vpslld $2,%xmm0,%xmm0
- shrdl $7,%edx,%edx
- xorl %eax,%ebp
- vpxor %xmm7,%xmm4,%xmm4
- movl %ecx,%esi
- addl 12(%esp),%ebx
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vpxor %xmm0,%xmm4,%xmm4
- addl %ebp,%ebx
- andl %edx,%esi
- vmovdqa 96(%esp),%xmm0
- xorl %edi,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %edi,%esi
- vpalignr $8,%xmm1,%xmm2,%xmm5
- movl %ebx,%ebp
- addl 16(%esp),%eax
- vpaddd %xmm4,%xmm0,%xmm0
- vmovdqa %xmm1,80(%esp)
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpsrldq $4,%xmm4,%xmm7
- addl %esi,%eax
- andl %ecx,%ebp
- vpxor %xmm1,%xmm5,%xmm5
- xorl %edx,%ecx
- addl %ebx,%eax
- vpxor %xmm3,%xmm7,%xmm7
- shrdl $7,%ebx,%ebx
- xorl %edx,%ebp
- vmovdqa %xmm0,(%esp)
- movl %eax,%esi
- addl 20(%esp),%edi
- vpxor %xmm7,%xmm5,%xmm5
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- addl %ebp,%edi
- andl %ebx,%esi
- vpsrld $31,%xmm5,%xmm7
- xorl %ecx,%ebx
- addl %eax,%edi
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- vpslldq $12,%xmm5,%xmm1
- vpaddd %xmm5,%xmm5,%xmm5
- movl %edi,%ebp
- addl 24(%esp),%edx
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vpsrld $30,%xmm1,%xmm0
- vpor %xmm7,%xmm5,%xmm5
- addl %esi,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- vpslld $2,%xmm1,%xmm1
- shrdl $7,%edi,%edi
- xorl %ebx,%ebp
- vpxor %xmm0,%xmm5,%xmm5
- movl %edx,%esi
- addl 28(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpxor %xmm1,%xmm5,%xmm5
- addl %ebp,%ecx
- andl %edi,%esi
- vmovdqa 112(%esp),%xmm1
- xorl %eax,%edi
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- vpalignr $8,%xmm2,%xmm3,%xmm6
- movl %ecx,%ebp
- addl 32(%esp),%ebx
- vpaddd %xmm5,%xmm1,%xmm1
- vmovdqa %xmm2,96(%esp)
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vpsrldq $4,%xmm5,%xmm0
- addl %esi,%ebx
- andl %edx,%ebp
- vpxor %xmm2,%xmm6,%xmm6
- xorl %edi,%edx
- addl %ecx,%ebx
- vpxor %xmm4,%xmm0,%xmm0
- shrdl $7,%ecx,%ecx
- xorl %edi,%ebp
- vmovdqa %xmm1,16(%esp)
- movl %ebx,%esi
- addl 36(%esp),%eax
- vpxor %xmm0,%xmm6,%xmm6
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- andl %ecx,%esi
- vpsrld $31,%xmm6,%xmm0
- xorl %edx,%ecx
- addl %ebx,%eax
- shrdl $7,%ebx,%ebx
- xorl %edx,%esi
- vpslldq $12,%xmm6,%xmm2
- vpaddd %xmm6,%xmm6,%xmm6
- movl %eax,%ebp
- addl 40(%esp),%edi
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpsrld $30,%xmm2,%xmm1
- vpor %xmm0,%xmm6,%xmm6
- addl %esi,%edi
- andl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- vpslld $2,%xmm2,%xmm2
- vmovdqa 64(%esp),%xmm0
- shrdl $7,%eax,%eax
- xorl %ecx,%ebp
- vpxor %xmm1,%xmm6,%xmm6
- movl %edi,%esi
- addl 44(%esp),%edx
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vpxor %xmm2,%xmm6,%xmm6
- addl %ebp,%edx
- andl %eax,%esi
- vmovdqa 112(%esp),%xmm2
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%esi
- vpalignr $8,%xmm3,%xmm4,%xmm7
- movl %edx,%ebp
- addl 48(%esp),%ecx
- vpaddd %xmm6,%xmm2,%xmm2
- vmovdqa %xmm3,64(%esp)
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpsrldq $4,%xmm6,%xmm1
- addl %esi,%ecx
- andl %edi,%ebp
- vpxor %xmm3,%xmm7,%xmm7
- xorl %eax,%edi
- addl %edx,%ecx
- vpxor %xmm5,%xmm1,%xmm1
- shrdl $7,%edx,%edx
- xorl %eax,%ebp
- vmovdqa %xmm2,32(%esp)
- movl %ecx,%esi
- addl 52(%esp),%ebx
- vpxor %xmm1,%xmm7,%xmm7
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- andl %edx,%esi
- vpsrld $31,%xmm7,%xmm1
- xorl %edi,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %edi,%esi
- vpslldq $12,%xmm7,%xmm3
- vpaddd %xmm7,%xmm7,%xmm7
- movl %ebx,%ebp
- addl 56(%esp),%eax
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpsrld $30,%xmm3,%xmm2
- vpor %xmm1,%xmm7,%xmm7
- addl %esi,%eax
- andl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm3,%xmm3
- vmovdqa 80(%esp),%xmm1
- shrdl $7,%ebx,%ebx
- xorl %edx,%ebp
- vpxor %xmm2,%xmm7,%xmm7
- movl %eax,%esi
- addl 60(%esp),%edi
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpxor %xmm3,%xmm7,%xmm7
- addl %ebp,%edi
- andl %ebx,%esi
- vmovdqa 112(%esp),%xmm3
- xorl %ecx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm6,%xmm7,%xmm2
- vpxor %xmm4,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- movl %edi,%ebp
- addl (%esp),%edx
- vpxor %xmm1,%xmm0,%xmm0
- vmovdqa %xmm4,80(%esp)
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vmovdqa %xmm3,%xmm4
- vpaddd %xmm7,%xmm3,%xmm3
- addl %esi,%edx
- andl %eax,%ebp
- vpxor %xmm2,%xmm0,%xmm0
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%ebp
- vpsrld $30,%xmm0,%xmm2
- vmovdqa %xmm3,48(%esp)
- movl %edx,%esi
- addl 4(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpslld $2,%xmm0,%xmm0
- addl %ebp,%ecx
- andl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- movl %ecx,%ebp
- addl 8(%esp),%ebx
- vpor %xmm2,%xmm0,%xmm0
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vmovdqa 96(%esp),%xmm2
- addl %esi,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 12(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm7,%xmm0,%xmm3
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- vpxor %xmm2,%xmm1,%xmm1
- vmovdqa %xmm5,96(%esp)
- addl %esi,%edi
- xorl %ecx,%ebp
- vmovdqa %xmm4,%xmm5
- vpaddd %xmm0,%xmm4,%xmm4
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpxor %xmm3,%xmm1,%xmm1
- addl 20(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- vpsrld $30,%xmm1,%xmm3
- vmovdqa %xmm4,(%esp)
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpslld $2,%xmm1,%xmm1
- addl 24(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpor %xmm3,%xmm1,%xmm1
- addl 28(%esp),%ebx
- xorl %edi,%ebp
- vmovdqa 64(%esp),%xmm3
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm0,%xmm1,%xmm4
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- vpxor %xmm3,%xmm2,%xmm2
- vmovdqa %xmm6,64(%esp)
- addl %esi,%eax
- xorl %edx,%ebp
- vmovdqa 128(%esp),%xmm6
- vpaddd %xmm1,%xmm5,%xmm5
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpxor %xmm4,%xmm2,%xmm2
- addl 36(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- vpsrld $30,%xmm2,%xmm4
- vmovdqa %xmm5,16(%esp)
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpslld $2,%xmm2,%xmm2
- addl 40(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpor %xmm4,%xmm2,%xmm2
- addl 44(%esp),%ecx
- xorl %eax,%ebp
- vmovdqa 80(%esp),%xmm4
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpalignr $8,%xmm1,%xmm2,%xmm5
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- vpxor %xmm4,%xmm3,%xmm3
- vmovdqa %xmm7,80(%esp)
- addl %esi,%ebx
- xorl %edi,%ebp
- vmovdqa %xmm6,%xmm7
- vpaddd %xmm2,%xmm6,%xmm6
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpxor %xmm5,%xmm3,%xmm3
- addl 52(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- vpsrld $30,%xmm3,%xmm5
- vmovdqa %xmm6,32(%esp)
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm3,%xmm3
- addl 56(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpor %xmm5,%xmm3,%xmm3
- addl 60(%esp),%edx
- xorl %ebx,%ebp
- vmovdqa 96(%esp),%xmm5
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpalignr $8,%xmm2,%xmm3,%xmm6
- vpxor %xmm0,%xmm4,%xmm4
- addl (%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- vpxor %xmm5,%xmm4,%xmm4
- vmovdqa %xmm0,96(%esp)
- addl %esi,%ecx
- xorl %eax,%ebp
- vmovdqa %xmm7,%xmm0
- vpaddd %xmm3,%xmm7,%xmm7
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpxor %xmm6,%xmm4,%xmm4
- addl 4(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- vpsrld $30,%xmm4,%xmm6
- vmovdqa %xmm7,48(%esp)
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpslld $2,%xmm4,%xmm4
- addl 8(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpor %xmm6,%xmm4,%xmm4
- addl 12(%esp),%edi
- xorl %ecx,%ebp
- vmovdqa 64(%esp),%xmm6
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm3,%xmm4,%xmm7
- vpxor %xmm1,%xmm5,%xmm5
- addl 16(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- vpxor %xmm6,%xmm5,%xmm5
- vmovdqa %xmm1,64(%esp)
- addl %esi,%edx
- xorl %ebx,%ebp
- vmovdqa %xmm0,%xmm1
- vpaddd %xmm4,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpxor %xmm7,%xmm5,%xmm5
- addl 20(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- vpsrld $30,%xmm5,%xmm7
- vmovdqa %xmm0,(%esp)
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpslld $2,%xmm5,%xmm5
- addl 24(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpor %xmm7,%xmm5,%xmm5
- addl 28(%esp),%eax
- vmovdqa 80(%esp),%xmm7
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm4,%xmm5,%xmm0
- vpxor %xmm2,%xmm6,%xmm6
- addl 32(%esp),%edi
- andl %ecx,%esi
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- vpxor %xmm7,%xmm6,%xmm6
- vmovdqa %xmm2,80(%esp)
- movl %eax,%ebp
- xorl %ecx,%esi
- vmovdqa %xmm1,%xmm2
- vpaddd %xmm5,%xmm1,%xmm1
- shldl $5,%eax,%eax
- addl %esi,%edi
- vpxor %xmm0,%xmm6,%xmm6
- xorl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 36(%esp),%edx
- vpsrld $30,%xmm6,%xmm0
- vmovdqa %xmm1,16(%esp)
- andl %ebx,%ebp
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %edi,%esi
- vpslld $2,%xmm6,%xmm6
- xorl %ebx,%ebp
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %edi,%edx
- addl 40(%esp),%ecx
- andl %eax,%esi
- vpor %xmm0,%xmm6,%xmm6
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- vmovdqa 96(%esp),%xmm0
- movl %edx,%ebp
- xorl %eax,%esi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- addl 44(%esp),%ebx
- andl %edi,%ebp
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- movl %ecx,%esi
- xorl %edi,%ebp
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edx,%esi
- xorl %edi,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm5,%xmm6,%xmm1
- vpxor %xmm3,%xmm7,%xmm7
- addl 48(%esp),%eax
- andl %edx,%esi
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- vpxor %xmm0,%xmm7,%xmm7
- vmovdqa %xmm3,96(%esp)
- movl %ebx,%ebp
- xorl %edx,%esi
- vmovdqa 144(%esp),%xmm3
- vpaddd %xmm6,%xmm2,%xmm2
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vpxor %xmm1,%xmm7,%xmm7
- xorl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 52(%esp),%edi
- vpsrld $30,%xmm7,%xmm1
- vmovdqa %xmm2,32(%esp)
- andl %ecx,%ebp
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- vpslld $2,%xmm7,%xmm7
- xorl %ecx,%ebp
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 56(%esp),%edx
- andl %ebx,%esi
- vpor %xmm1,%xmm7,%xmm7
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- vmovdqa 64(%esp),%xmm1
- movl %edi,%ebp
- xorl %ebx,%esi
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- addl 60(%esp),%ecx
- andl %eax,%ebp
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- movl %edx,%esi
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- vpalignr $8,%xmm6,%xmm7,%xmm2
- vpxor %xmm4,%xmm0,%xmm0
- addl (%esp),%ebx
- andl %edi,%esi
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- vpxor %xmm1,%xmm0,%xmm0
- vmovdqa %xmm4,64(%esp)
- movl %ecx,%ebp
- xorl %edi,%esi
- vmovdqa %xmm3,%xmm4
- vpaddd %xmm7,%xmm3,%xmm3
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- vpxor %xmm2,%xmm0,%xmm0
- xorl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 4(%esp),%eax
- vpsrld $30,%xmm0,%xmm2
- vmovdqa %xmm3,48(%esp)
- andl %edx,%ebp
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- vpslld $2,%xmm0,%xmm0
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 8(%esp),%edi
- andl %ecx,%esi
- vpor %xmm2,%xmm0,%xmm0
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- vmovdqa 80(%esp),%xmm2
- movl %eax,%ebp
- xorl %ecx,%esi
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 12(%esp),%edx
- andl %ebx,%ebp
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %edi,%esi
- xorl %ebx,%ebp
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %edi,%edx
- vpalignr $8,%xmm7,%xmm0,%xmm3
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%esp),%ecx
- andl %eax,%esi
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- vpxor %xmm2,%xmm1,%xmm1
- vmovdqa %xmm5,80(%esp)
- movl %edx,%ebp
- xorl %eax,%esi
- vmovdqa %xmm4,%xmm5
- vpaddd %xmm0,%xmm4,%xmm4
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vpxor %xmm3,%xmm1,%xmm1
- xorl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- addl 20(%esp),%ebx
- vpsrld $30,%xmm1,%xmm3
- vmovdqa %xmm4,(%esp)
- andl %edi,%ebp
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- movl %ecx,%esi
- vpslld $2,%xmm1,%xmm1
- xorl %edi,%ebp
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edx,%esi
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 24(%esp),%eax
- andl %edx,%esi
- vpor %xmm3,%xmm1,%xmm1
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- vmovdqa 96(%esp),%xmm3
- movl %ebx,%ebp
- xorl %edx,%esi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 28(%esp),%edi
- andl %ecx,%ebp
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- xorl %ecx,%ebp
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm0,%xmm1,%xmm4
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%esp),%edx
- andl %ebx,%esi
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- vpxor %xmm3,%xmm2,%xmm2
- vmovdqa %xmm6,96(%esp)
- movl %edi,%ebp
- xorl %ebx,%esi
- vmovdqa %xmm5,%xmm6
- vpaddd %xmm1,%xmm5,%xmm5
- shldl $5,%edi,%edi
- addl %esi,%edx
- vpxor %xmm4,%xmm2,%xmm2
- xorl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- addl 36(%esp),%ecx
- vpsrld $30,%xmm2,%xmm4
- vmovdqa %xmm5,16(%esp)
- andl %eax,%ebp
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- movl %edx,%esi
- vpslld $2,%xmm2,%xmm2
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- addl 40(%esp),%ebx
- andl %edi,%esi
- vpor %xmm4,%xmm2,%xmm2
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- vmovdqa 64(%esp),%xmm4
- movl %ecx,%ebp
- xorl %edi,%esi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 44(%esp),%eax
- andl %edx,%ebp
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- addl %ebx,%eax
- vpalignr $8,%xmm1,%xmm2,%xmm5
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- vpxor %xmm4,%xmm3,%xmm3
- vmovdqa %xmm7,64(%esp)
- addl %esi,%edi
- xorl %ecx,%ebp
- vmovdqa %xmm6,%xmm7
- vpaddd %xmm2,%xmm6,%xmm6
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpxor %xmm5,%xmm3,%xmm3
- addl 52(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- vpsrld $30,%xmm3,%xmm5
- vmovdqa %xmm6,32(%esp)
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpslld $2,%xmm3,%xmm3
- addl 56(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpor %xmm5,%xmm3,%xmm3
- addl 60(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl (%esp),%eax
- vpaddd %xmm3,%xmm7,%xmm7
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vmovdqa %xmm7,48(%esp)
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 4(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 8(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 12(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- movl 196(%esp),%ebp
- cmpl 200(%esp),%ebp
- je .L010done
- vmovdqa 160(%esp),%xmm7
- vmovdqa 176(%esp),%xmm6
- vmovdqu (%ebp),%xmm0
- vmovdqu 16(%ebp),%xmm1
- vmovdqu 32(%ebp),%xmm2
- vmovdqu 48(%ebp),%xmm3
- addl $64,%ebp
- vpshufb %xmm6,%xmm0,%xmm0
- movl %ebp,196(%esp)
- vmovdqa %xmm7,96(%esp)
- addl 16(%esp),%ebx
- xorl %edi,%esi
- vpshufb %xmm6,%xmm1,%xmm1
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- vpaddd %xmm7,%xmm0,%xmm4
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vmovdqa %xmm4,(%esp)
- addl 20(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 28(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 32(%esp),%ecx
- xorl %eax,%esi
- vpshufb %xmm6,%xmm2,%xmm2
- movl %edx,%ebp
- shldl $5,%edx,%edx
- vpaddd %xmm7,%xmm1,%xmm5
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vmovdqa %xmm5,16(%esp)
- addl 36(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 48(%esp),%edx
- xorl %ebx,%esi
- vpshufb %xmm6,%xmm3,%xmm3
- movl %edi,%ebp
- shldl $5,%edi,%edi
- vpaddd %xmm7,%xmm2,%xmm6
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vmovdqa %xmm6,32(%esp)
- addl 52(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 56(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,%ebx
- movl %ecx,8(%ebp)
- xorl %edx,%ebx
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- movl %esi,%ebp
- andl %ebx,%esi
- movl %ebp,%ebx
- jmp .L009loop
-.align 16
-.L010done:
- addl 16(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 20(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 28(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 32(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 36(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 48(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 52(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 56(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vzeroall
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- movl 204(%esp),%esp
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,8(%ebp)
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx
.align 64
.LK_XX_XX:
.long 1518500249,1518500249,1518500249,1518500249
@@ -3995,11 +2820,6 @@ sha1_block_data_order:
jz .L001x86
testl $536870912,%ecx
jnz .Lshaext_shortcut
- andl $268435456,%edx
- andl $1073741824,%eax
- orl %edx,%eax
- cmpl $1342177280,%eax
- je .Lavx_shortcut
jmp .Lssse3_shortcut
.align 16
.L001x86:
@@ -6757,1176 +5577,6 @@ _sha1_block_data_order_ssse3:
popl %ebp
ret
.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3
-.type _sha1_block_data_order_avx,@function
-.align 16
-_sha1_block_data_order_avx:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- call .L008pic_point
-.L008pic_point:
- popl %ebp
- leal .LK_XX_XX-.L008pic_point(%ebp),%ebp
-.Lavx_shortcut:
- vzeroall
- vmovdqa (%ebp),%xmm7
- vmovdqa 16(%ebp),%xmm0
- vmovdqa 32(%ebp),%xmm1
- vmovdqa 48(%ebp),%xmm2
- vmovdqa 64(%ebp),%xmm6
- movl 20(%esp),%edi
- movl 24(%esp),%ebp
- movl 28(%esp),%edx
- movl %esp,%esi
- subl $208,%esp
- andl $-64,%esp
- vmovdqa %xmm0,112(%esp)
- vmovdqa %xmm1,128(%esp)
- vmovdqa %xmm2,144(%esp)
- shll $6,%edx
- vmovdqa %xmm7,160(%esp)
- addl %ebp,%edx
- vmovdqa %xmm6,176(%esp)
- addl $64,%ebp
- movl %edi,192(%esp)
- movl %ebp,196(%esp)
- movl %edx,200(%esp)
- movl %esi,204(%esp)
- movl (%edi),%eax
- movl 4(%edi),%ebx
- movl 8(%edi),%ecx
- movl 12(%edi),%edx
- movl 16(%edi),%edi
- movl %ebx,%esi
- vmovdqu -64(%ebp),%xmm0
- vmovdqu -48(%ebp),%xmm1
- vmovdqu -32(%ebp),%xmm2
- vmovdqu -16(%ebp),%xmm3
- vpshufb %xmm6,%xmm0,%xmm0
- vpshufb %xmm6,%xmm1,%xmm1
- vpshufb %xmm6,%xmm2,%xmm2
- vmovdqa %xmm7,96(%esp)
- vpshufb %xmm6,%xmm3,%xmm3
- vpaddd %xmm7,%xmm0,%xmm4
- vpaddd %xmm7,%xmm1,%xmm5
- vpaddd %xmm7,%xmm2,%xmm6
- vmovdqa %xmm4,(%esp)
- movl %ecx,%ebp
- vmovdqa %xmm5,16(%esp)
- xorl %edx,%ebp
- vmovdqa %xmm6,32(%esp)
- andl %ebp,%esi
- jmp .L009loop
-.align 16
-.L009loop:
- shrdl $2,%ebx,%ebx
- xorl %edx,%esi
- vpalignr $8,%xmm0,%xmm1,%xmm4
- movl %eax,%ebp
- addl (%esp),%edi
- vpaddd %xmm3,%xmm7,%xmm7
- vmovdqa %xmm0,64(%esp)
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpsrldq $4,%xmm3,%xmm6
- addl %esi,%edi
- andl %ebx,%ebp
- vpxor %xmm0,%xmm4,%xmm4
- xorl %ecx,%ebx
- addl %eax,%edi
- vpxor %xmm2,%xmm6,%xmm6
- shrdl $7,%eax,%eax
- xorl %ecx,%ebp
- vmovdqa %xmm7,48(%esp)
- movl %edi,%esi
- addl 4(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- addl %ebp,%edx
- andl %eax,%esi
- vpsrld $31,%xmm4,%xmm6
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%esi
- vpslldq $12,%xmm4,%xmm0
- vpaddd %xmm4,%xmm4,%xmm4
- movl %edx,%ebp
- addl 8(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpsrld $30,%xmm0,%xmm7
- vpor %xmm6,%xmm4,%xmm4
- addl %esi,%ecx
- andl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- vpslld $2,%xmm0,%xmm0
- shrdl $7,%edx,%edx
- xorl %eax,%ebp
- vpxor %xmm7,%xmm4,%xmm4
- movl %ecx,%esi
- addl 12(%esp),%ebx
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vpxor %xmm0,%xmm4,%xmm4
- addl %ebp,%ebx
- andl %edx,%esi
- vmovdqa 96(%esp),%xmm0
- xorl %edi,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %edi,%esi
- vpalignr $8,%xmm1,%xmm2,%xmm5
- movl %ebx,%ebp
- addl 16(%esp),%eax
- vpaddd %xmm4,%xmm0,%xmm0
- vmovdqa %xmm1,80(%esp)
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpsrldq $4,%xmm4,%xmm7
- addl %esi,%eax
- andl %ecx,%ebp
- vpxor %xmm1,%xmm5,%xmm5
- xorl %edx,%ecx
- addl %ebx,%eax
- vpxor %xmm3,%xmm7,%xmm7
- shrdl $7,%ebx,%ebx
- xorl %edx,%ebp
- vmovdqa %xmm0,(%esp)
- movl %eax,%esi
- addl 20(%esp),%edi
- vpxor %xmm7,%xmm5,%xmm5
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- addl %ebp,%edi
- andl %ebx,%esi
- vpsrld $31,%xmm5,%xmm7
- xorl %ecx,%ebx
- addl %eax,%edi
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- vpslldq $12,%xmm5,%xmm1
- vpaddd %xmm5,%xmm5,%xmm5
- movl %edi,%ebp
- addl 24(%esp),%edx
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vpsrld $30,%xmm1,%xmm0
- vpor %xmm7,%xmm5,%xmm5
- addl %esi,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- vpslld $2,%xmm1,%xmm1
- shrdl $7,%edi,%edi
- xorl %ebx,%ebp
- vpxor %xmm0,%xmm5,%xmm5
- movl %edx,%esi
- addl 28(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpxor %xmm1,%xmm5,%xmm5
- addl %ebp,%ecx
- andl %edi,%esi
- vmovdqa 112(%esp),%xmm1
- xorl %eax,%edi
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- vpalignr $8,%xmm2,%xmm3,%xmm6
- movl %ecx,%ebp
- addl 32(%esp),%ebx
- vpaddd %xmm5,%xmm1,%xmm1
- vmovdqa %xmm2,96(%esp)
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vpsrldq $4,%xmm5,%xmm0
- addl %esi,%ebx
- andl %edx,%ebp
- vpxor %xmm2,%xmm6,%xmm6
- xorl %edi,%edx
- addl %ecx,%ebx
- vpxor %xmm4,%xmm0,%xmm0
- shrdl $7,%ecx,%ecx
- xorl %edi,%ebp
- vmovdqa %xmm1,16(%esp)
- movl %ebx,%esi
- addl 36(%esp),%eax
- vpxor %xmm0,%xmm6,%xmm6
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- andl %ecx,%esi
- vpsrld $31,%xmm6,%xmm0
- xorl %edx,%ecx
- addl %ebx,%eax
- shrdl $7,%ebx,%ebx
- xorl %edx,%esi
- vpslldq $12,%xmm6,%xmm2
- vpaddd %xmm6,%xmm6,%xmm6
- movl %eax,%ebp
- addl 40(%esp),%edi
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpsrld $30,%xmm2,%xmm1
- vpor %xmm0,%xmm6,%xmm6
- addl %esi,%edi
- andl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- vpslld $2,%xmm2,%xmm2
- vmovdqa 64(%esp),%xmm0
- shrdl $7,%eax,%eax
- xorl %ecx,%ebp
- vpxor %xmm1,%xmm6,%xmm6
- movl %edi,%esi
- addl 44(%esp),%edx
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vpxor %xmm2,%xmm6,%xmm6
- addl %ebp,%edx
- andl %eax,%esi
- vmovdqa 112(%esp),%xmm2
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%esi
- vpalignr $8,%xmm3,%xmm4,%xmm7
- movl %edx,%ebp
- addl 48(%esp),%ecx
- vpaddd %xmm6,%xmm2,%xmm2
- vmovdqa %xmm3,64(%esp)
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpsrldq $4,%xmm6,%xmm1
- addl %esi,%ecx
- andl %edi,%ebp
- vpxor %xmm3,%xmm7,%xmm7
- xorl %eax,%edi
- addl %edx,%ecx
- vpxor %xmm5,%xmm1,%xmm1
- shrdl $7,%edx,%edx
- xorl %eax,%ebp
- vmovdqa %xmm2,32(%esp)
- movl %ecx,%esi
- addl 52(%esp),%ebx
- vpxor %xmm1,%xmm7,%xmm7
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- andl %edx,%esi
- vpsrld $31,%xmm7,%xmm1
- xorl %edi,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %edi,%esi
- vpslldq $12,%xmm7,%xmm3
- vpaddd %xmm7,%xmm7,%xmm7
- movl %ebx,%ebp
- addl 56(%esp),%eax
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpsrld $30,%xmm3,%xmm2
- vpor %xmm1,%xmm7,%xmm7
- addl %esi,%eax
- andl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm3,%xmm3
- vmovdqa 80(%esp),%xmm1
- shrdl $7,%ebx,%ebx
- xorl %edx,%ebp
- vpxor %xmm2,%xmm7,%xmm7
- movl %eax,%esi
- addl 60(%esp),%edi
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpxor %xmm3,%xmm7,%xmm7
- addl %ebp,%edi
- andl %ebx,%esi
- vmovdqa 112(%esp),%xmm3
- xorl %ecx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm6,%xmm7,%xmm2
- vpxor %xmm4,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- movl %edi,%ebp
- addl (%esp),%edx
- vpxor %xmm1,%xmm0,%xmm0
- vmovdqa %xmm4,80(%esp)
- xorl %ebx,%eax
- shldl $5,%edi,%edi
- vmovdqa %xmm3,%xmm4
- vpaddd %xmm7,%xmm3,%xmm3
- addl %esi,%edx
- andl %eax,%ebp
- vpxor %xmm2,%xmm0,%xmm0
- xorl %ebx,%eax
- addl %edi,%edx
- shrdl $7,%edi,%edi
- xorl %ebx,%ebp
- vpsrld $30,%xmm0,%xmm2
- vmovdqa %xmm3,48(%esp)
- movl %edx,%esi
- addl 4(%esp),%ecx
- xorl %eax,%edi
- shldl $5,%edx,%edx
- vpslld $2,%xmm0,%xmm0
- addl %ebp,%ecx
- andl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- movl %ecx,%ebp
- addl 8(%esp),%ebx
- vpor %xmm2,%xmm0,%xmm0
- xorl %edi,%edx
- shldl $5,%ecx,%ecx
- vmovdqa 96(%esp),%xmm2
- addl %esi,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 12(%esp),%eax
- xorl %edi,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm7,%xmm0,%xmm3
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- vpxor %xmm2,%xmm1,%xmm1
- vmovdqa %xmm5,96(%esp)
- addl %esi,%edi
- xorl %ecx,%ebp
- vmovdqa %xmm4,%xmm5
- vpaddd %xmm0,%xmm4,%xmm4
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpxor %xmm3,%xmm1,%xmm1
- addl 20(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- vpsrld $30,%xmm1,%xmm3
- vmovdqa %xmm4,(%esp)
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpslld $2,%xmm1,%xmm1
- addl 24(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpor %xmm3,%xmm1,%xmm1
- addl 28(%esp),%ebx
- xorl %edi,%ebp
- vmovdqa 64(%esp),%xmm3
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm0,%xmm1,%xmm4
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- vpxor %xmm3,%xmm2,%xmm2
- vmovdqa %xmm6,64(%esp)
- addl %esi,%eax
- xorl %edx,%ebp
- vmovdqa 128(%esp),%xmm6
- vpaddd %xmm1,%xmm5,%xmm5
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpxor %xmm4,%xmm2,%xmm2
- addl 36(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- vpsrld $30,%xmm2,%xmm4
- vmovdqa %xmm5,16(%esp)
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpslld $2,%xmm2,%xmm2
- addl 40(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpor %xmm4,%xmm2,%xmm2
- addl 44(%esp),%ecx
- xorl %eax,%ebp
- vmovdqa 80(%esp),%xmm4
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpalignr $8,%xmm1,%xmm2,%xmm5
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- vpxor %xmm4,%xmm3,%xmm3
- vmovdqa %xmm7,80(%esp)
- addl %esi,%ebx
- xorl %edi,%ebp
- vmovdqa %xmm6,%xmm7
- vpaddd %xmm2,%xmm6,%xmm6
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpxor %xmm5,%xmm3,%xmm3
- addl 52(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- vpsrld $30,%xmm3,%xmm5
- vmovdqa %xmm6,32(%esp)
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm3,%xmm3
- addl 56(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpor %xmm5,%xmm3,%xmm3
- addl 60(%esp),%edx
- xorl %ebx,%ebp
- vmovdqa 96(%esp),%xmm5
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpalignr $8,%xmm2,%xmm3,%xmm6
- vpxor %xmm0,%xmm4,%xmm4
- addl (%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- vpxor %xmm5,%xmm4,%xmm4
- vmovdqa %xmm0,96(%esp)
- addl %esi,%ecx
- xorl %eax,%ebp
- vmovdqa %xmm7,%xmm0
- vpaddd %xmm3,%xmm7,%xmm7
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpxor %xmm6,%xmm4,%xmm4
- addl 4(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- vpsrld $30,%xmm4,%xmm6
- vmovdqa %xmm7,48(%esp)
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpslld $2,%xmm4,%xmm4
- addl 8(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpor %xmm6,%xmm4,%xmm4
- addl 12(%esp),%edi
- xorl %ecx,%ebp
- vmovdqa 64(%esp),%xmm6
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm3,%xmm4,%xmm7
- vpxor %xmm1,%xmm5,%xmm5
- addl 16(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- vpxor %xmm6,%xmm5,%xmm5
- vmovdqa %xmm1,64(%esp)
- addl %esi,%edx
- xorl %ebx,%ebp
- vmovdqa %xmm0,%xmm1
- vpaddd %xmm4,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpxor %xmm7,%xmm5,%xmm5
- addl 20(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- vpsrld $30,%xmm5,%xmm7
- vmovdqa %xmm0,(%esp)
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpslld $2,%xmm5,%xmm5
- addl 24(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpor %xmm7,%xmm5,%xmm5
- addl 28(%esp),%eax
- vmovdqa 80(%esp),%xmm7
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm4,%xmm5,%xmm0
- vpxor %xmm2,%xmm6,%xmm6
- addl 32(%esp),%edi
- andl %ecx,%esi
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- vpxor %xmm7,%xmm6,%xmm6
- vmovdqa %xmm2,80(%esp)
- movl %eax,%ebp
- xorl %ecx,%esi
- vmovdqa %xmm1,%xmm2
- vpaddd %xmm5,%xmm1,%xmm1
- shldl $5,%eax,%eax
- addl %esi,%edi
- vpxor %xmm0,%xmm6,%xmm6
- xorl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 36(%esp),%edx
- vpsrld $30,%xmm6,%xmm0
- vmovdqa %xmm1,16(%esp)
- andl %ebx,%ebp
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %edi,%esi
- vpslld $2,%xmm6,%xmm6
- xorl %ebx,%ebp
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %edi,%edx
- addl 40(%esp),%ecx
- andl %eax,%esi
- vpor %xmm0,%xmm6,%xmm6
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- vmovdqa 96(%esp),%xmm0
- movl %edx,%ebp
- xorl %eax,%esi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- addl 44(%esp),%ebx
- andl %edi,%ebp
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- movl %ecx,%esi
- xorl %edi,%ebp
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edx,%esi
- xorl %edi,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm5,%xmm6,%xmm1
- vpxor %xmm3,%xmm7,%xmm7
- addl 48(%esp),%eax
- andl %edx,%esi
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- vpxor %xmm0,%xmm7,%xmm7
- vmovdqa %xmm3,96(%esp)
- movl %ebx,%ebp
- xorl %edx,%esi
- vmovdqa 144(%esp),%xmm3
- vpaddd %xmm6,%xmm2,%xmm2
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vpxor %xmm1,%xmm7,%xmm7
- xorl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 52(%esp),%edi
- vpsrld $30,%xmm7,%xmm1
- vmovdqa %xmm2,32(%esp)
- andl %ecx,%ebp
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- vpslld $2,%xmm7,%xmm7
- xorl %ecx,%ebp
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 56(%esp),%edx
- andl %ebx,%esi
- vpor %xmm1,%xmm7,%xmm7
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- vmovdqa 64(%esp),%xmm1
- movl %edi,%ebp
- xorl %ebx,%esi
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- addl 60(%esp),%ecx
- andl %eax,%ebp
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- movl %edx,%esi
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- vpalignr $8,%xmm6,%xmm7,%xmm2
- vpxor %xmm4,%xmm0,%xmm0
- addl (%esp),%ebx
- andl %edi,%esi
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- vpxor %xmm1,%xmm0,%xmm0
- vmovdqa %xmm4,64(%esp)
- movl %ecx,%ebp
- xorl %edi,%esi
- vmovdqa %xmm3,%xmm4
- vpaddd %xmm7,%xmm3,%xmm3
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- vpxor %xmm2,%xmm0,%xmm0
- xorl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 4(%esp),%eax
- vpsrld $30,%xmm0,%xmm2
- vmovdqa %xmm3,48(%esp)
- andl %edx,%ebp
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- vpslld $2,%xmm0,%xmm0
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 8(%esp),%edi
- andl %ecx,%esi
- vpor %xmm2,%xmm0,%xmm0
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- vmovdqa 80(%esp),%xmm2
- movl %eax,%ebp
- xorl %ecx,%esi
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ebx,%ebp
- xorl %ecx,%ebx
- addl %eax,%edi
- addl 12(%esp),%edx
- andl %ebx,%ebp
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %edi,%esi
- xorl %ebx,%ebp
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %edi,%edx
- vpalignr $8,%xmm7,%xmm0,%xmm3
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%esp),%ecx
- andl %eax,%esi
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- vpxor %xmm2,%xmm1,%xmm1
- vmovdqa %xmm5,80(%esp)
- movl %edx,%ebp
- xorl %eax,%esi
- vmovdqa %xmm4,%xmm5
- vpaddd %xmm0,%xmm4,%xmm4
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vpxor %xmm3,%xmm1,%xmm1
- xorl %edi,%ebp
- xorl %eax,%edi
- addl %edx,%ecx
- addl 20(%esp),%ebx
- vpsrld $30,%xmm1,%xmm3
- vmovdqa %xmm4,(%esp)
- andl %edi,%ebp
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- movl %ecx,%esi
- vpslld $2,%xmm1,%xmm1
- xorl %edi,%ebp
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edx,%esi
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 24(%esp),%eax
- andl %edx,%esi
- vpor %xmm3,%xmm1,%xmm1
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- vmovdqa 96(%esp),%xmm3
- movl %ebx,%ebp
- xorl %edx,%esi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %ecx,%ebp
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 28(%esp),%edi
- andl %ecx,%ebp
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- xorl %ecx,%ebp
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%edi
- vpalignr $8,%xmm0,%xmm1,%xmm4
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%esp),%edx
- andl %ebx,%esi
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- vpxor %xmm3,%xmm2,%xmm2
- vmovdqa %xmm6,96(%esp)
- movl %edi,%ebp
- xorl %ebx,%esi
- vmovdqa %xmm5,%xmm6
- vpaddd %xmm1,%xmm5,%xmm5
- shldl $5,%edi,%edi
- addl %esi,%edx
- vpxor %xmm4,%xmm2,%xmm2
- xorl %eax,%ebp
- xorl %ebx,%eax
- addl %edi,%edx
- addl 36(%esp),%ecx
- vpsrld $30,%xmm2,%xmm4
- vmovdqa %xmm5,16(%esp)
- andl %eax,%ebp
- xorl %ebx,%eax
- shrdl $7,%edi,%edi
- movl %edx,%esi
- vpslld $2,%xmm2,%xmm2
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %edi,%esi
- xorl %eax,%edi
- addl %edx,%ecx
- addl 40(%esp),%ebx
- andl %edi,%esi
- vpor %xmm4,%xmm2,%xmm2
- xorl %eax,%edi
- shrdl $7,%edx,%edx
- vmovdqa 64(%esp),%xmm4
- movl %ecx,%ebp
- xorl %edi,%esi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edx,%ebp
- xorl %edi,%edx
- addl %ecx,%ebx
- addl 44(%esp),%eax
- andl %edx,%ebp
- xorl %edi,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%ebp
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- addl %ebx,%eax
- vpalignr $8,%xmm1,%xmm2,%xmm5
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- vpxor %xmm4,%xmm3,%xmm3
- vmovdqa %xmm7,64(%esp)
- addl %esi,%edi
- xorl %ecx,%ebp
- vmovdqa %xmm6,%xmm7
- vpaddd %xmm2,%xmm6,%xmm6
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- vpxor %xmm5,%xmm3,%xmm3
- addl 52(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- vpsrld $30,%xmm3,%xmm5
- vmovdqa %xmm6,32(%esp)
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vpslld $2,%xmm3,%xmm3
- addl 56(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vpor %xmm5,%xmm3,%xmm3
- addl 60(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl (%esp),%eax
- vpaddd %xmm3,%xmm7,%xmm7
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vmovdqa %xmm7,48(%esp)
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 4(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 8(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 12(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- movl 196(%esp),%ebp
- cmpl 200(%esp),%ebp
- je .L010done
- vmovdqa 160(%esp),%xmm7
- vmovdqa 176(%esp),%xmm6
- vmovdqu (%ebp),%xmm0
- vmovdqu 16(%ebp),%xmm1
- vmovdqu 32(%ebp),%xmm2
- vmovdqu 48(%ebp),%xmm3
- addl $64,%ebp
- vpshufb %xmm6,%xmm0,%xmm0
- movl %ebp,196(%esp)
- vmovdqa %xmm7,96(%esp)
- addl 16(%esp),%ebx
- xorl %edi,%esi
- vpshufb %xmm6,%xmm1,%xmm1
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- vpaddd %xmm7,%xmm0,%xmm4
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vmovdqa %xmm4,(%esp)
- addl 20(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 28(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 32(%esp),%ecx
- xorl %eax,%esi
- vpshufb %xmm6,%xmm2,%xmm2
- movl %edx,%ebp
- shldl $5,%edx,%edx
- vpaddd %xmm7,%xmm1,%xmm5
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- vmovdqa %xmm5,16(%esp)
- addl 36(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 48(%esp),%edx
- xorl %ebx,%esi
- vpshufb %xmm6,%xmm3,%xmm3
- movl %edi,%ebp
- shldl $5,%edi,%edi
- vpaddd %xmm7,%xmm2,%xmm6
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- vmovdqa %xmm6,32(%esp)
- addl 52(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 56(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,%ebx
- movl %ecx,8(%ebp)
- xorl %edx,%ebx
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- movl %esi,%ebp
- andl %ebx,%esi
- movl %ebp,%ebx
- jmp .L009loop
-.align 16
-.L010done:
- addl 16(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 20(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%esp),%edi
- xorl %ecx,%esi
- movl %eax,%ebp
- shldl $5,%eax,%eax
- addl %esi,%edi
- xorl %ecx,%ebp
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 28(%esp),%edx
- xorl %ebx,%ebp
- movl %edi,%esi
- shldl $5,%edi,%edi
- addl %ebp,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 32(%esp),%ecx
- xorl %eax,%esi
- movl %edx,%ebp
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %eax,%ebp
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 36(%esp),%ebx
- xorl %edi,%ebp
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %ebp,%ebx
- xorl %edi,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%esp),%eax
- xorl %edx,%esi
- movl %ebx,%ebp
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%ebp
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%esp),%edi
- xorl %ecx,%ebp
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %ebp,%edi
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%edi
- addl 48(%esp),%edx
- xorl %ebx,%esi
- movl %edi,%ebp
- shldl $5,%edi,%edi
- addl %esi,%edx
- xorl %ebx,%ebp
- shrdl $7,%eax,%eax
- addl %edi,%edx
- addl 52(%esp),%ecx
- xorl %eax,%ebp
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %ebp,%ecx
- xorl %eax,%esi
- shrdl $7,%edi,%edi
- addl %edx,%ecx
- addl 56(%esp),%ebx
- xorl %edi,%esi
- movl %ecx,%ebp
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edi,%ebp
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%esp),%eax
- xorl %edx,%ebp
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %ebp,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vzeroall
- movl 192(%esp),%ebp
- addl (%ebp),%eax
- movl 204(%esp),%esp
- addl 4(%ebp),%esi
- addl 8(%ebp),%ecx
- movl %eax,(%ebp)
- addl 12(%ebp),%edx
- movl %esi,4(%ebp)
- addl 16(%ebp),%edi
- movl %ecx,8(%ebp)
- movl %edx,12(%ebp)
- movl %edi,16(%ebp)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx
.align 64
.LK_XX_XX:
.long 1518500249,1518500249,1518500249,1518500249
diff --git a/secure/lib/libcrypto/i386/sha256-586.S b/secure/lib/libcrypto/i386/sha256-586.S
index 7b4205352bdf..5d8476c1e1bb 100644
--- a/secure/lib/libcrypto/i386/sha256-586.S
+++ b/secure/lib/libcrypto/i386/sha256-586.S
@@ -42,13 +42,12 @@ sha256_block_data_order:
orl %ebx,%ecx
andl $1342177280,%ecx
cmpl $1342177280,%ecx
- je .L005AVX
testl $512,%ebx
- jnz .L006SSSE3
+ jnz .L005SSSE3
.L003no_xmm:
subl %edi,%eax
cmpl $256,%eax
- jae .L007unrolled
+ jae .L006unrolled
jmp .L002loop
.align 16
.L002loop:
@@ -120,7 +119,7 @@ sha256_block_data_order:
movl %ecx,28(%esp)
movl %edi,32(%esp)
.align 16
-.L00800_15:
+.L00700_15:
movl %edx,%ecx
movl 24(%esp),%esi
rorl $14,%ecx
@@ -158,11 +157,11 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3248222580,%esi
- jne .L00800_15
+ jne .L00700_15
movl 156(%esp),%ecx
- jmp .L00916_63
+ jmp .L00816_63
.align 16
-.L00916_63:
+.L00816_63:
movl %ecx,%ebx
movl 104(%esp),%esi
rorl $11,%ecx
@@ -217,7 +216,7 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3329325298,%esi
- jne .L00916_63
+ jne .L00816_63
movl 356(%esp),%esi
movl 8(%esp),%ebx
movl 16(%esp),%ecx
@@ -261,7 +260,7 @@ sha256_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.align 16
-.L007unrolled:
+.L006unrolled:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebp
@@ -278,9 +277,9 @@ sha256_block_data_order:
movl %ebx,20(%esp)
movl %ecx,24(%esp)
movl %esi,28(%esp)
- jmp .L010grand_loop
+ jmp .L009grand_loop
.align 16
-.L010grand_loop:
+.L009grand_loop:
movl (%edi),%ebx
movl 4(%edi),%ecx
bswap %ebx
@@ -3160,7 +3159,7 @@ sha256_block_data_order:
movl %ebx,24(%esp)
movl %ecx,28(%esp)
cmpl 104(%esp),%edi
- jb .L010grand_loop
+ jb .L009grand_loop
movl 108(%esp),%esp
popl %edi
popl %esi
@@ -3179,9 +3178,9 @@ sha256_block_data_order:
pshufd $27,%xmm2,%xmm2
.byte 102,15,58,15,202,8
punpcklqdq %xmm0,%xmm2
- jmp .L011loop_shaext
+ jmp .L010loop_shaext
.align 16
-.L011loop_shaext:
+.L010loop_shaext:
movdqu (%edi),%xmm3
movdqu 16(%edi),%xmm4
movdqu 32(%edi),%xmm5
@@ -3351,7 +3350,7 @@ sha256_block_data_order:
.byte 15,56,203,202
paddd 16(%esp),%xmm2
paddd (%esp),%xmm1
- jnz .L011loop_shaext
+ jnz .L010loop_shaext
pshufd $177,%xmm2,%xmm2
pshufd $27,%xmm1,%xmm7
pshufd $177,%xmm1,%xmm1
@@ -3366,7 +3365,7 @@ sha256_block_data_order:
popl %ebp
ret
.align 32
-.L006SSSE3:
+.L005SSSE3:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebx
@@ -3385,9 +3384,9 @@ sha256_block_data_order:
movl %ecx,24(%esp)
movl %esi,28(%esp)
movdqa 256(%ebp),%xmm7
- jmp .L012grand_ssse3
+ jmp .L011grand_ssse3
.align 16
-.L012grand_ssse3:
+.L011grand_ssse3:
movdqu (%edi),%xmm0
movdqu 16(%edi),%xmm1
movdqu 32(%edi),%xmm2
@@ -3410,9 +3409,9 @@ sha256_block_data_order:
paddd %xmm3,%xmm7
movdqa %xmm6,64(%esp)
movdqa %xmm7,80(%esp)
- jmp .L013ssse3_00_47
+ jmp .L012ssse3_00_47
.align 16
-.L013ssse3_00_47:
+.L012ssse3_00_47:
addl $64,%ebp
movl %edx,%ecx
movdqa %xmm1,%xmm4
@@ -4055,7 +4054,7 @@ sha256_block_data_order:
addl %ecx,%eax
movdqa %xmm6,80(%esp)
cmpl $66051,64(%ebp)
- jne .L013ssse3_00_47
+ jne .L012ssse3_00_47
movl %edx,%ecx
rorl $14,%edx
movl 20(%esp),%esi
@@ -4569,2218 +4568,13 @@ sha256_block_data_order:
movdqa 64(%ebp),%xmm7
subl $192,%ebp
cmpl 104(%esp),%edi
- jb .L012grand_ssse3
+ jb .L011grand_ssse3
movl 108(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
-.align 32
-.L005AVX:
- andl $264,%edx
- cmpl $264,%edx
- je .L014AVX_BMI
- leal -96(%esp),%esp
- vzeroall
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,4(%esp)
- xorl %ecx,%ebx
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%edi
- movl 24(%esi),%ecx
- movl 28(%esi),%esi
- movl %edi,20(%esp)
- movl 100(%esp),%edi
- movl %ecx,24(%esp)
- movl %esi,28(%esp)
- vmovdqa 256(%ebp),%xmm7
- jmp .L015grand_avx
-.align 32
-.L015grand_avx:
- vmovdqu (%edi),%xmm0
- vmovdqu 16(%edi),%xmm1
- vmovdqu 32(%edi),%xmm2
- vmovdqu 48(%edi),%xmm3
- addl $64,%edi
- vpshufb %xmm7,%xmm0,%xmm0
- movl %edi,100(%esp)
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd (%ebp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 16(%ebp),%xmm1,%xmm5
- vpaddd 32(%ebp),%xmm2,%xmm6
- vpaddd 48(%ebp),%xmm3,%xmm7
- vmovdqa %xmm4,32(%esp)
- vmovdqa %xmm5,48(%esp)
- vmovdqa %xmm6,64(%esp)
- vmovdqa %xmm7,80(%esp)
- jmp .L016avx_00_47
-.align 16
-.L016avx_00_47:
- addl $64,%ebp
- vpalignr $4,%xmm0,%xmm1,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- vpalignr $4,%xmm2,%xmm3,%xmm7
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- vpshufd $250,%xmm3,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 32(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- vpaddd %xmm4,%xmm0,%xmm0
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 36(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm0,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 40(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm0,%xmm0
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- vpaddd (%ebp),%xmm0,%xmm6
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 44(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,32(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- vpalignr $4,%xmm3,%xmm0,%xmm7
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- vpshufd $250,%xmm0,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 48(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- vpaddd %xmm4,%xmm1,%xmm1
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 52(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm1,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 56(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm1,%xmm1
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- vpaddd 16(%ebp),%xmm1,%xmm6
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 60(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,48(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- vpalignr $4,%xmm0,%xmm1,%xmm7
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- vpshufd $250,%xmm1,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 64(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- vpaddd %xmm4,%xmm2,%xmm2
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 68(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm2,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 72(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm2,%xmm2
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- vpaddd 32(%ebp),%xmm2,%xmm6
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 76(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,64(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- vpalignr $4,%xmm1,%xmm2,%xmm7
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- vpshufd $250,%xmm2,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 80(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- vpaddd %xmm4,%xmm3,%xmm3
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 84(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm3,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 88(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm3,%xmm3
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- vpaddd 48(%ebp),%xmm3,%xmm6
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 92(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,80(%esp)
- cmpl $66051,64(%ebp)
- jne .L016avx_00_47
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 32(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 36(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 40(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 44(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 48(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 52(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 56(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 60(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 64(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 68(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 72(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 76(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 80(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 84(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 88(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 92(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- movl 96(%esp),%esi
- xorl %edi,%ebx
- movl 12(%esp),%ecx
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%edi
- addl 12(%esi),%ecx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %edi,8(%esi)
- movl %ecx,12(%esi)
- movl %ebx,4(%esp)
- xorl %edi,%ebx
- movl %edi,8(%esp)
- movl %ecx,12(%esp)
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
- addl 16(%esi),%edx
- addl 20(%esi),%edi
- addl 24(%esi),%ecx
- movl %edx,16(%esi)
- movl %edi,20(%esi)
- movl %edi,20(%esp)
- movl 28(%esp),%edi
- movl %ecx,24(%esi)
- addl 28(%esi),%edi
- movl %ecx,24(%esp)
- movl %edi,28(%esi)
- movl %edi,28(%esp)
- movl 100(%esp),%edi
- vmovdqa 64(%ebp),%xmm7
- subl $192,%ebp
- cmpl 104(%esp),%edi
- jb .L015grand_avx
- movl 108(%esp),%esp
- vzeroall
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 32
-.L014AVX_BMI:
- leal -96(%esp),%esp
- vzeroall
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,4(%esp)
- xorl %ecx,%ebx
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%edi
- movl 24(%esi),%ecx
- movl 28(%esi),%esi
- movl %edi,20(%esp)
- movl 100(%esp),%edi
- movl %ecx,24(%esp)
- movl %esi,28(%esp)
- vmovdqa 256(%ebp),%xmm7
- jmp .L017grand_avx_bmi
-.align 32
-.L017grand_avx_bmi:
- vmovdqu (%edi),%xmm0
- vmovdqu 16(%edi),%xmm1
- vmovdqu 32(%edi),%xmm2
- vmovdqu 48(%edi),%xmm3
- addl $64,%edi
- vpshufb %xmm7,%xmm0,%xmm0
- movl %edi,100(%esp)
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd (%ebp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 16(%ebp),%xmm1,%xmm5
- vpaddd 32(%ebp),%xmm2,%xmm6
- vpaddd 48(%ebp),%xmm3,%xmm7
- vmovdqa %xmm4,32(%esp)
- vmovdqa %xmm5,48(%esp)
- vmovdqa %xmm6,64(%esp)
- vmovdqa %xmm7,80(%esp)
- jmp .L018avx_bmi_00_47
-.align 16
-.L018avx_bmi_00_47:
- addl $64,%ebp
- vpalignr $4,%xmm0,%xmm1,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- vpaddd %xmm7,%xmm0,%xmm0
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 32(%esp),%edx
- vpshufd $250,%xmm3,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 36(%esp),%edx
- vpaddd %xmm4,%xmm0,%xmm0
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm0,%xmm0
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm0,%xmm7
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 40(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm0,%xmm0
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 44(%esp),%edx
- vpaddd (%ebp),%xmm0,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,32(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- vpaddd %xmm7,%xmm1,%xmm1
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 48(%esp),%edx
- vpshufd $250,%xmm0,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 52(%esp),%edx
- vpaddd %xmm4,%xmm1,%xmm1
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm1,%xmm1
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm1,%xmm7
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 56(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm1,%xmm1
- addl (%esp),%edx
- andl %ebx,%eax
- addl 60(%esp),%edx
- vpaddd 16(%ebp),%xmm1,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,48(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- vpalignr $4,%xmm0,%xmm1,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- vpaddd %xmm7,%xmm2,%xmm2
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 64(%esp),%edx
- vpshufd $250,%xmm1,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 68(%esp),%edx
- vpaddd %xmm4,%xmm2,%xmm2
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm2,%xmm2
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm2,%xmm7
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 72(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm2,%xmm2
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 76(%esp),%edx
- vpaddd 32(%ebp),%xmm2,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,64(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- vpaddd %xmm7,%xmm3,%xmm3
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 80(%esp),%edx
- vpshufd $250,%xmm2,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 84(%esp),%edx
- vpaddd %xmm4,%xmm3,%xmm3
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm3,%xmm3
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm3,%xmm7
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 88(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm3,%xmm3
- addl (%esp),%edx
- andl %ebx,%eax
- addl 92(%esp),%edx
- vpaddd 48(%ebp),%xmm3,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,80(%esp)
- cmpl $66051,64(%ebp)
- jne .L018avx_bmi_00_47
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 32(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 36(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 40(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 44(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 48(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 52(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 56(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- andl %ebx,%eax
- addl 60(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 64(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 68(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 72(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 76(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 80(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 84(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 88(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- andl %ebx,%eax
- addl 92(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- movl 96(%esp),%esi
- xorl %edi,%ebx
- movl 12(%esp),%ecx
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%edi
- addl 12(%esi),%ecx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %edi,8(%esi)
- movl %ecx,12(%esi)
- movl %ebx,4(%esp)
- xorl %edi,%ebx
- movl %edi,8(%esp)
- movl %ecx,12(%esp)
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
- addl 16(%esi),%edx
- addl 20(%esi),%edi
- addl 24(%esi),%ecx
- movl %edx,16(%esi)
- movl %edi,20(%esi)
- movl %edi,20(%esp)
- movl 28(%esp),%edi
- movl %ecx,24(%esi)
- addl 28(%esi),%edi
- movl %ecx,24(%esp)
- movl %edi,28(%esi)
- movl %edi,28(%esp)
- movl 100(%esp),%edi
- vmovdqa 64(%ebp),%xmm7
- subl $192,%ebp
- cmpl 104(%esp),%edi
- jb .L017grand_avx_bmi
- movl 108(%esp),%esp
- vzeroall
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
.comm OPENSSL_ia32cap_P,16,4
#else
@@ -6825,13 +4619,12 @@ sha256_block_data_order:
orl %ebx,%ecx
andl $1342177280,%ecx
cmpl $1342177280,%ecx
- je .L005AVX
testl $512,%ebx
- jnz .L006SSSE3
+ jnz .L005SSSE3
.L003no_xmm:
subl %edi,%eax
cmpl $256,%eax
- jae .L007unrolled
+ jae .L006unrolled
jmp .L002loop
.align 16
.L002loop:
@@ -6903,7 +4696,7 @@ sha256_block_data_order:
movl %ecx,28(%esp)
movl %edi,32(%esp)
.align 16
-.L00800_15:
+.L00700_15:
movl %edx,%ecx
movl 24(%esp),%esi
rorl $14,%ecx
@@ -6941,11 +4734,11 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3248222580,%esi
- jne .L00800_15
+ jne .L00700_15
movl 156(%esp),%ecx
- jmp .L00916_63
+ jmp .L00816_63
.align 16
-.L00916_63:
+.L00816_63:
movl %ecx,%ebx
movl 104(%esp),%esi
rorl $11,%ecx
@@ -7000,7 +4793,7 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3329325298,%esi
- jne .L00916_63
+ jne .L00816_63
movl 356(%esp),%esi
movl 8(%esp),%ebx
movl 16(%esp),%ecx
@@ -7044,7 +4837,7 @@ sha256_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.align 16
-.L007unrolled:
+.L006unrolled:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebp
@@ -7061,9 +4854,9 @@ sha256_block_data_order:
movl %ebx,20(%esp)
movl %ecx,24(%esp)
movl %esi,28(%esp)
- jmp .L010grand_loop
+ jmp .L009grand_loop
.align 16
-.L010grand_loop:
+.L009grand_loop:
movl (%edi),%ebx
movl 4(%edi),%ecx
bswap %ebx
@@ -9943,7 +7736,7 @@ sha256_block_data_order:
movl %ebx,24(%esp)
movl %ecx,28(%esp)
cmpl 104(%esp),%edi
- jb .L010grand_loop
+ jb .L009grand_loop
movl 108(%esp),%esp
popl %edi
popl %esi
@@ -9962,9 +7755,9 @@ sha256_block_data_order:
pshufd $27,%xmm2,%xmm2
.byte 102,15,58,15,202,8
punpcklqdq %xmm0,%xmm2
- jmp .L011loop_shaext
+ jmp .L010loop_shaext
.align 16
-.L011loop_shaext:
+.L010loop_shaext:
movdqu (%edi),%xmm3
movdqu 16(%edi),%xmm4
movdqu 32(%edi),%xmm5
@@ -10134,7 +7927,7 @@ sha256_block_data_order:
.byte 15,56,203,202
paddd 16(%esp),%xmm2
paddd (%esp),%xmm1
- jnz .L011loop_shaext
+ jnz .L010loop_shaext
pshufd $177,%xmm2,%xmm2
pshufd $27,%xmm1,%xmm7
pshufd $177,%xmm1,%xmm1
@@ -10149,7 +7942,7 @@ sha256_block_data_order:
popl %ebp
ret
.align 32
-.L006SSSE3:
+.L005SSSE3:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebx
@@ -10168,9 +7961,9 @@ sha256_block_data_order:
movl %ecx,24(%esp)
movl %esi,28(%esp)
movdqa 256(%ebp),%xmm7
- jmp .L012grand_ssse3
+ jmp .L011grand_ssse3
.align 16
-.L012grand_ssse3:
+.L011grand_ssse3:
movdqu (%edi),%xmm0
movdqu 16(%edi),%xmm1
movdqu 32(%edi),%xmm2
@@ -10193,9 +7986,9 @@ sha256_block_data_order:
paddd %xmm3,%xmm7
movdqa %xmm6,64(%esp)
movdqa %xmm7,80(%esp)
- jmp .L013ssse3_00_47
+ jmp .L012ssse3_00_47
.align 16
-.L013ssse3_00_47:
+.L012ssse3_00_47:
addl $64,%ebp
movl %edx,%ecx
movdqa %xmm1,%xmm4
@@ -10838,7 +8631,7 @@ sha256_block_data_order:
addl %ecx,%eax
movdqa %xmm6,80(%esp)
cmpl $66051,64(%ebp)
- jne .L013ssse3_00_47
+ jne .L012ssse3_00_47
movl %edx,%ecx
rorl $14,%edx
movl 20(%esp),%esi
@@ -11352,2213 +9145,8 @@ sha256_block_data_order:
movdqa 64(%ebp),%xmm7
subl $192,%ebp
cmpl 104(%esp),%edi
- jb .L012grand_ssse3
- movl 108(%esp),%esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 32
-.L005AVX:
- andl $264,%edx
- cmpl $264,%edx
- je .L014AVX_BMI
- leal -96(%esp),%esp
- vzeroall
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,4(%esp)
- xorl %ecx,%ebx
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%edi
- movl 24(%esi),%ecx
- movl 28(%esi),%esi
- movl %edi,20(%esp)
- movl 100(%esp),%edi
- movl %ecx,24(%esp)
- movl %esi,28(%esp)
- vmovdqa 256(%ebp),%xmm7
- jmp .L015grand_avx
-.align 32
-.L015grand_avx:
- vmovdqu (%edi),%xmm0
- vmovdqu 16(%edi),%xmm1
- vmovdqu 32(%edi),%xmm2
- vmovdqu 48(%edi),%xmm3
- addl $64,%edi
- vpshufb %xmm7,%xmm0,%xmm0
- movl %edi,100(%esp)
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd (%ebp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 16(%ebp),%xmm1,%xmm5
- vpaddd 32(%ebp),%xmm2,%xmm6
- vpaddd 48(%ebp),%xmm3,%xmm7
- vmovdqa %xmm4,32(%esp)
- vmovdqa %xmm5,48(%esp)
- vmovdqa %xmm6,64(%esp)
- vmovdqa %xmm7,80(%esp)
- jmp .L016avx_00_47
-.align 16
-.L016avx_00_47:
- addl $64,%ebp
- vpalignr $4,%xmm0,%xmm1,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- vpalignr $4,%xmm2,%xmm3,%xmm7
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- vpshufd $250,%xmm3,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 32(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- vpaddd %xmm4,%xmm0,%xmm0
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 36(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm0,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 40(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm0,%xmm0
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- vpaddd (%ebp),%xmm0,%xmm6
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 44(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,32(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- vpalignr $4,%xmm3,%xmm0,%xmm7
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- vpshufd $250,%xmm0,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 48(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- vpaddd %xmm4,%xmm1,%xmm1
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 52(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm1,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 56(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm1,%xmm1
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- vpaddd 16(%ebp),%xmm1,%xmm6
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 60(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,48(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- vpalignr $4,%xmm0,%xmm1,%xmm7
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- vpshufd $250,%xmm1,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 64(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- vpaddd %xmm4,%xmm2,%xmm2
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 68(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm2,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 72(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm2,%xmm2
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- vpaddd 32(%ebp),%xmm2,%xmm6
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 76(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,64(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- vpalignr $4,%xmm1,%xmm2,%xmm7
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrld $3,%xmm4,%xmm7
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- vpslld $14,%xmm4,%xmm5
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- vpshufd $250,%xmm2,%xmm7
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpsrld $11,%xmm6,%xmm6
- addl 80(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpxor %xmm5,%xmm4,%xmm4
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- vpslld $11,%xmm5,%xmm5
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- vpsrld $10,%xmm7,%xmm6
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- vpaddd %xmm4,%xmm3,%xmm3
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- vpxor %xmm5,%xmm6,%xmm6
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- vpsrlq $19,%xmm7,%xmm7
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- addl 84(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- vpshufd $132,%xmm6,%xmm7
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- vpsrldq $8,%xmm7,%xmm7
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- vpshufd $80,%xmm3,%xmm7
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- vpsrld $10,%xmm7,%xmm6
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- vpsrlq $17,%xmm7,%xmm5
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- vpxor %xmm5,%xmm6,%xmm6
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- vpsrlq $19,%xmm7,%xmm7
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- vpshufd $232,%xmm6,%xmm7
- addl 88(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- vpslldq $8,%xmm7,%xmm7
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- vpaddd %xmm7,%xmm3,%xmm3
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- vpaddd 48(%ebp),%xmm3,%xmm6
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 92(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- vmovdqa %xmm6,80(%esp)
- cmpl $66051,64(%ebp)
- jne .L016avx_00_47
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 32(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 36(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 40(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 44(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 48(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 52(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 56(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 60(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 20(%esp),%esi
- xorl %ecx,%edx
- movl 24(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,16(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 4(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 64(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 12(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 16(%esp),%esi
- xorl %ecx,%edx
- movl 20(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,12(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl (%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,28(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 68(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 8(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 12(%esp),%esi
- xorl %ecx,%edx
- movl 16(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,8(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 28(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,24(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 72(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 4(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 8(%esp),%esi
- xorl %ecx,%edx
- movl 12(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,4(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 24(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,20(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 76(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl (%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 4(%esp),%esi
- xorl %ecx,%edx
- movl 8(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 20(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,16(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 80(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 28(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl (%esp),%esi
- xorl %ecx,%edx
- movl 4(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,28(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 16(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,12(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 84(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 24(%esp),%edx
- addl %ecx,%eax
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 28(%esp),%esi
- xorl %ecx,%edx
- movl (%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,24(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %eax,%ecx
- addl %edi,%edx
- movl 12(%esp),%edi
- movl %eax,%esi
- shrdl $9,%ecx,%ecx
- movl %eax,8(%esp)
- xorl %eax,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %eax,%ebx
- xorl %esi,%ecx
- addl 88(%esp),%edx
- xorl %edi,%ebx
- shrdl $2,%ecx,%ecx
- addl %edx,%ebx
- addl 20(%esp),%edx
- addl %ecx,%ebx
- movl %edx,%ecx
- shrdl $14,%edx,%edx
- movl 24(%esp),%esi
- xorl %ecx,%edx
- movl 28(%esp),%edi
- xorl %edi,%esi
- shrdl $5,%edx,%edx
- andl %ecx,%esi
- movl %ecx,20(%esp)
- xorl %ecx,%edx
- xorl %esi,%edi
- shrdl $6,%edx,%edx
- movl %ebx,%ecx
- addl %edi,%edx
- movl 8(%esp),%edi
- movl %ebx,%esi
- shrdl $9,%ecx,%ecx
- movl %ebx,4(%esp)
- xorl %ebx,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- shrdl $11,%ecx,%ecx
- andl %ebx,%eax
- xorl %esi,%ecx
- addl 92(%esp),%edx
- xorl %edi,%eax
- shrdl $2,%ecx,%ecx
- addl %edx,%eax
- addl 16(%esp),%edx
- addl %ecx,%eax
- movl 96(%esp),%esi
- xorl %edi,%ebx
- movl 12(%esp),%ecx
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%edi
- addl 12(%esi),%ecx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %edi,8(%esi)
- movl %ecx,12(%esi)
- movl %ebx,4(%esp)
- xorl %edi,%ebx
- movl %edi,8(%esp)
- movl %ecx,12(%esp)
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
- addl 16(%esi),%edx
- addl 20(%esi),%edi
- addl 24(%esi),%ecx
- movl %edx,16(%esi)
- movl %edi,20(%esi)
- movl %edi,20(%esp)
- movl 28(%esp),%edi
- movl %ecx,24(%esi)
- addl 28(%esi),%edi
- movl %ecx,24(%esp)
- movl %edi,28(%esi)
- movl %edi,28(%esp)
- movl 100(%esp),%edi
- vmovdqa 64(%ebp),%xmm7
- subl $192,%ebp
- cmpl 104(%esp),%edi
- jb .L015grand_avx
- movl 108(%esp),%esp
- vzeroall
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 32
-.L014AVX_BMI:
- leal -96(%esp),%esp
- vzeroall
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edi
- movl %ebx,4(%esp)
- xorl %ecx,%ebx
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
- movl 16(%esi),%edx
- movl 20(%esi),%edi
- movl 24(%esi),%ecx
- movl 28(%esi),%esi
- movl %edi,20(%esp)
- movl 100(%esp),%edi
- movl %ecx,24(%esp)
- movl %esi,28(%esp)
- vmovdqa 256(%ebp),%xmm7
- jmp .L017grand_avx_bmi
-.align 32
-.L017grand_avx_bmi:
- vmovdqu (%edi),%xmm0
- vmovdqu 16(%edi),%xmm1
- vmovdqu 32(%edi),%xmm2
- vmovdqu 48(%edi),%xmm3
- addl $64,%edi
- vpshufb %xmm7,%xmm0,%xmm0
- movl %edi,100(%esp)
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd (%ebp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 16(%ebp),%xmm1,%xmm5
- vpaddd 32(%ebp),%xmm2,%xmm6
- vpaddd 48(%ebp),%xmm3,%xmm7
- vmovdqa %xmm4,32(%esp)
- vmovdqa %xmm5,48(%esp)
- vmovdqa %xmm6,64(%esp)
- vmovdqa %xmm7,80(%esp)
- jmp .L018avx_bmi_00_47
-.align 16
-.L018avx_bmi_00_47:
- addl $64,%ebp
- vpalignr $4,%xmm0,%xmm1,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- vpaddd %xmm7,%xmm0,%xmm0
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 32(%esp),%edx
- vpshufd $250,%xmm3,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 36(%esp),%edx
- vpaddd %xmm4,%xmm0,%xmm0
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm0,%xmm0
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm0,%xmm7
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 40(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm0,%xmm0
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 44(%esp),%edx
- vpaddd (%ebp),%xmm0,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,32(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- vpaddd %xmm7,%xmm1,%xmm1
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 48(%esp),%edx
- vpshufd $250,%xmm0,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 52(%esp),%edx
- vpaddd %xmm4,%xmm1,%xmm1
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm1,%xmm1
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm1,%xmm7
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 56(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm1,%xmm1
- addl (%esp),%edx
- andl %ebx,%eax
- addl 60(%esp),%edx
- vpaddd 16(%ebp),%xmm1,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,48(%esp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- vpalignr $4,%xmm0,%xmm1,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- vpaddd %xmm7,%xmm2,%xmm2
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 64(%esp),%edx
- vpshufd $250,%xmm1,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 68(%esp),%edx
- vpaddd %xmm4,%xmm2,%xmm2
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm2,%xmm2
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm2,%xmm7
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 72(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm2,%xmm2
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 76(%esp),%edx
- vpaddd 32(%ebp),%xmm2,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,64(%esp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- vpalignr $4,%xmm1,%xmm2,%xmm7
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- vpsrld $7,%xmm4,%xmm6
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- vpaddd %xmm7,%xmm3,%xmm3
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrld $3,%xmm4,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpslld $14,%xmm4,%xmm5
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpxor %xmm6,%xmm7,%xmm4
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 80(%esp),%edx
- vpshufd $250,%xmm2,%xmm7
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- vpsrld $11,%xmm6,%xmm6
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpslld $11,%xmm5,%xmm5
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- vpxor %xmm6,%xmm4,%xmm4
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpsrld $10,%xmm7,%xmm6
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpxor %xmm5,%xmm4,%xmm4
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpsrlq $17,%xmm7,%xmm5
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 84(%esp),%edx
- vpaddd %xmm4,%xmm3,%xmm3
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- vpxor %xmm5,%xmm6,%xmm6
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpsrlq $19,%xmm7,%xmm7
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- vpshufd $132,%xmm6,%xmm7
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- vpsrldq $8,%xmm7,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- vpaddd %xmm7,%xmm3,%xmm3
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- vpshufd $80,%xmm3,%xmm7
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 88(%esp),%edx
- vpsrld $10,%xmm7,%xmm6
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- vpsrlq $17,%xmm7,%xmm5
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- vpxor %xmm5,%xmm6,%xmm6
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- vpsrlq $19,%xmm7,%xmm7
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- vpxor %xmm7,%xmm6,%xmm6
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- vpshufd $232,%xmm6,%xmm7
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- vpslldq $8,%xmm7,%xmm7
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- vpaddd %xmm7,%xmm3,%xmm3
- addl (%esp),%edx
- andl %ebx,%eax
- addl 92(%esp),%edx
- vpaddd 48(%ebp),%xmm3,%xmm6
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- vmovdqa %xmm6,80(%esp)
- cmpl $66051,64(%ebp)
- jne .L018avx_bmi_00_47
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 32(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 36(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 40(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 44(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 48(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 52(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 56(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- andl %ebx,%eax
- addl 60(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,16(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 24(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 20(%esp),%edx
- movl %eax,(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 4(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 28(%esp),%edx
- andl %eax,%ebx
- addl 64(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 12(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,12(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 20(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 16(%esp),%edx
- movl %ebx,28(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl (%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 24(%esp),%edx
- andl %ebx,%eax
- addl 68(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 8(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,8(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 16(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 12(%esp),%edx
- movl %eax,24(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 28(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 20(%esp),%edx
- andl %eax,%ebx
- addl 72(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 4(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,4(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 12(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 8(%esp),%edx
- movl %ebx,20(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 24(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 16(%esp),%edx
- andl %ebx,%eax
- addl 76(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl (%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 8(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 4(%esp),%edx
- movl %eax,16(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 20(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 12(%esp),%edx
- andl %eax,%ebx
- addl 80(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 28(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,28(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 4(%esp),%edx,%esi
- xorl %edi,%ecx
- andl (%esp),%edx
- movl %ebx,12(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 16(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl 8(%esp),%edx
- andl %ebx,%eax
- addl 84(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 24(%esp),%edx
- leal (%eax,%ecx,1),%eax
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,24(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl (%esp),%edx,%esi
- xorl %edi,%ecx
- andl 28(%esp),%edx
- movl %eax,8(%esp)
- orl %esi,%edx
- rorxl $2,%eax,%edi
- rorxl $13,%eax,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%eax,%ecx
- xorl %edi,%esi
- movl 12(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%eax
- addl 4(%esp),%edx
- andl %eax,%ebx
- addl 88(%esp),%edx
- xorl %edi,%ebx
- addl %edx,%ecx
- addl 20(%esp),%edx
- leal (%ebx,%ecx,1),%ebx
- rorxl $6,%edx,%ecx
- rorxl $11,%edx,%esi
- movl %edx,20(%esp)
- rorxl $25,%edx,%edi
- xorl %esi,%ecx
- andnl 28(%esp),%edx,%esi
- xorl %edi,%ecx
- andl 24(%esp),%edx
- movl %ebx,4(%esp)
- orl %esi,%edx
- rorxl $2,%ebx,%edi
- rorxl $13,%ebx,%esi
- leal (%edx,%ecx,1),%edx
- rorxl $22,%ebx,%ecx
- xorl %edi,%esi
- movl 8(%esp),%edi
- xorl %esi,%ecx
- xorl %edi,%ebx
- addl (%esp),%edx
- andl %ebx,%eax
- addl 92(%esp),%edx
- xorl %edi,%eax
- addl %edx,%ecx
- addl 16(%esp),%edx
- leal (%eax,%ecx,1),%eax
- movl 96(%esp),%esi
- xorl %edi,%ebx
- movl 12(%esp),%ecx
- addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%edi
- addl 12(%esi),%ecx
- movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %edi,8(%esi)
- movl %ecx,12(%esi)
- movl %ebx,4(%esp)
- xorl %edi,%ebx
- movl %edi,8(%esp)
- movl %ecx,12(%esp)
- movl 20(%esp),%edi
- movl 24(%esp),%ecx
- addl 16(%esi),%edx
- addl 20(%esi),%edi
- addl 24(%esi),%ecx
- movl %edx,16(%esi)
- movl %edi,20(%esi)
- movl %edi,20(%esp)
- movl 28(%esp),%edi
- movl %ecx,24(%esi)
- addl 28(%esi),%edi
- movl %ecx,24(%esp)
- movl %edi,28(%esi)
- movl %edi,28(%esp)
- movl 100(%esp),%edi
- vmovdqa 64(%ebp),%xmm7
- subl $192,%ebp
- cmpl 104(%esp),%edi
- jb .L017grand_avx_bmi
+ jb .L011grand_ssse3
movl 108(%esp),%esp
- vzeroall
popl %edi
popl %esi
popl %ebx