diff options
| author | Jung-uk Kim <jkim@FreeBSD.org> | 2020-03-18 02:13:12 +0000 |
|---|---|---|
| committer | Jung-uk Kim <jkim@FreeBSD.org> | 2020-03-18 02:13:12 +0000 |
| commit | 17f01e9963948a18f55eb97173123702c5dae671 (patch) | |
| tree | bc68d611f898931c657418447120d2c674c1ff38 /secure/lib/libcrypto/i386 | |
| parent | 889d304bb46d7551805fd8e79815a50a4cddda6b (diff) | |
| parent | aa144ced5d61b5c7fb74acaebb37d85bd08f0416 (diff) | |
Notes
Diffstat (limited to 'secure/lib/libcrypto/i386')
| -rw-r--r-- | secure/lib/libcrypto/i386/chacha-x86.S | 960 | ||||
| -rw-r--r-- | secure/lib/libcrypto/i386/ecp_nistz256-x86.S | 36 | ||||
| -rw-r--r-- | secure/lib/libcrypto/i386/poly1305-x86.S | 1110 | ||||
| -rw-r--r-- | secure/lib/libcrypto/i386/sha1-586.S | 2350 | ||||
| -rw-r--r-- | secure/lib/libcrypto/i386/sha256-586.S | 4496 |
5 files changed, 56 insertions, 8896 deletions
diff --git a/secure/lib/libcrypto/i386/chacha-x86.S b/secure/lib/libcrypto/i386/chacha-x86.S index d6b2936a5381..566285310e06 100644 --- a/secure/lib/libcrypto/i386/chacha-x86.S +++ b/secure/lib/libcrypto/i386/chacha-x86.S @@ -385,8 +385,6 @@ ChaCha20_ssse3: pushl %esi pushl %edi .Lssse3_shortcut: - testl $2048,4(%ebp) - jnz .Lxop_shortcut movl 20(%esp),%edi movl 24(%esp),%esi movl 28(%esp),%ecx @@ -530,484 +528,6 @@ ChaCha20_ssse3: .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 .byte 114,103,62,0 -.globl ChaCha20_xop -.type ChaCha20_xop,@function -.align 16 -ChaCha20_xop: -.L_ChaCha20_xop_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi -.Lxop_shortcut: - movl 20(%esp),%edi - movl 24(%esp),%esi - movl 28(%esp),%ecx - movl 32(%esp),%edx - movl 36(%esp),%ebx - vzeroupper - movl %esp,%ebp - subl $524,%esp - andl $-64,%esp - movl %ebp,512(%esp) - leal .Lssse3_data-.Lpic_point(%eax),%eax - vmovdqu (%ebx),%xmm3 - cmpl $256,%ecx - jb .L0141x - movl %edx,516(%esp) - movl %ebx,520(%esp) - subl $256,%ecx - leal 384(%esp),%ebp - vmovdqu (%edx),%xmm7 - vpshufd $0,%xmm3,%xmm0 - vpshufd $85,%xmm3,%xmm1 - vpshufd $170,%xmm3,%xmm2 - vpshufd $255,%xmm3,%xmm3 - vpaddd 48(%eax),%xmm0,%xmm0 - vpshufd $0,%xmm7,%xmm4 - vpshufd $85,%xmm7,%xmm5 - vpsubd 64(%eax),%xmm0,%xmm0 - vpshufd $170,%xmm7,%xmm6 - vpshufd $255,%xmm7,%xmm7 - vmovdqa %xmm0,64(%ebp) - vmovdqa %xmm1,80(%ebp) - vmovdqa %xmm2,96(%ebp) - vmovdqa %xmm3,112(%ebp) - vmovdqu 16(%edx),%xmm3 - vmovdqa %xmm4,-64(%ebp) - vmovdqa %xmm5,-48(%ebp) - vmovdqa %xmm6,-32(%ebp) - vmovdqa %xmm7,-16(%ebp) - vmovdqa 32(%eax),%xmm7 - leal 128(%esp),%ebx - vpshufd $0,%xmm3,%xmm0 - vpshufd $85,%xmm3,%xmm1 - vpshufd $170,%xmm3,%xmm2 - vpshufd $255,%xmm3,%xmm3 - vpshufd $0,%xmm7,%xmm4 - vpshufd $85,%xmm7,%xmm5 - vpshufd $170,%xmm7,%xmm6 - vpshufd $255,%xmm7,%xmm7 - vmovdqa %xmm0,(%ebp) - vmovdqa %xmm1,16(%ebp) - vmovdqa %xmm2,32(%ebp) - vmovdqa %xmm3,48(%ebp) - vmovdqa %xmm4,-128(%ebp) - vmovdqa %xmm5,-112(%ebp) - vmovdqa %xmm6,-96(%ebp) - vmovdqa %xmm7,-80(%ebp) - leal 128(%esi),%esi - leal 128(%edi),%edi - jmp .L015outer_loop -.align 32 -.L015outer_loop: - vmovdqa -112(%ebp),%xmm1 - vmovdqa -96(%ebp),%xmm2 - vmovdqa -80(%ebp),%xmm3 - vmovdqa -48(%ebp),%xmm5 - vmovdqa -32(%ebp),%xmm6 - vmovdqa -16(%ebp),%xmm7 - vmovdqa %xmm1,-112(%ebx) - vmovdqa %xmm2,-96(%ebx) - vmovdqa %xmm3,-80(%ebx) - vmovdqa %xmm5,-48(%ebx) - vmovdqa %xmm6,-32(%ebx) - vmovdqa %xmm7,-16(%ebx) - vmovdqa 32(%ebp),%xmm2 - vmovdqa 48(%ebp),%xmm3 - vmovdqa 64(%ebp),%xmm4 - vmovdqa 80(%ebp),%xmm5 - vmovdqa 96(%ebp),%xmm6 - vmovdqa 112(%ebp),%xmm7 - vpaddd 64(%eax),%xmm4,%xmm4 - vmovdqa %xmm2,32(%ebx) - vmovdqa %xmm3,48(%ebx) - vmovdqa %xmm4,64(%ebx) - vmovdqa %xmm5,80(%ebx) - vmovdqa %xmm6,96(%ebx) - vmovdqa %xmm7,112(%ebx) - vmovdqa %xmm4,64(%ebp) - vmovdqa -128(%ebp),%xmm0 - vmovdqa %xmm4,%xmm6 - vmovdqa -64(%ebp),%xmm3 - vmovdqa (%ebp),%xmm4 - vmovdqa 16(%ebp),%xmm5 - movl $10,%edx - nop -.align 32 -.L016loop: - vpaddd %xmm3,%xmm0,%xmm0 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,246,16 - vpaddd %xmm6,%xmm4,%xmm4 - vpxor %xmm4,%xmm3,%xmm2 - vmovdqa -112(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -48(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 80(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-128(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,64(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 - vmovdqa %xmm4,(%ebx) -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-64(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa 32(%ebx),%xmm4 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -96(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -32(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 96(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-112(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,80(%ebx) - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,219,7 - vmovdqa %xmm5,16(%ebx) -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-48(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa 48(%ebx),%xmm5 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -80(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -16(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 112(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-96(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,96(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-32(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -128(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -48(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-80(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm7,%xmm6 -.byte 143,232,120,194,219,7 -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-16(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -112(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -32(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 64(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-128(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,112(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 - vmovdqa %xmm4,32(%ebx) -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-48(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa (%ebx),%xmm4 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -96(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -16(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 80(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-112(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,64(%ebx) - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,219,7 - vmovdqa %xmm5,48(%ebx) -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-32(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa 16(%ebx),%xmm5 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -80(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -64(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 96(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-96(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,80(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-16(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -128(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 64(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-80(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,96(%ebx) - vpxor %xmm5,%xmm3,%xmm3 -.byte 143,232,120,194,219,7 - decl %edx - jnz .L016loop - vmovdqa %xmm3,-64(%ebx) - vmovdqa %xmm4,(%ebx) - vmovdqa %xmm5,16(%ebx) - vmovdqa %xmm6,64(%ebx) - vmovdqa %xmm7,96(%ebx) - vmovdqa -112(%ebx),%xmm1 - vmovdqa -96(%ebx),%xmm2 - vmovdqa -80(%ebx),%xmm3 - vpaddd -128(%ebp),%xmm0,%xmm0 - vpaddd -112(%ebp),%xmm1,%xmm1 - vpaddd -96(%ebp),%xmm2,%xmm2 - vpaddd -80(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa -64(%ebx),%xmm0 - vmovdqa -48(%ebx),%xmm1 - vmovdqa -32(%ebx),%xmm2 - vmovdqa -16(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd -64(%ebp),%xmm0,%xmm0 - vpaddd -48(%ebp),%xmm1,%xmm1 - vpaddd -32(%ebp),%xmm2,%xmm2 - vpaddd -16(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa (%ebx),%xmm0 - vmovdqa 16(%ebx),%xmm1 - vmovdqa 32(%ebx),%xmm2 - vmovdqa 48(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd (%ebp),%xmm0,%xmm0 - vpaddd 16(%ebp),%xmm1,%xmm1 - vpaddd 32(%ebp),%xmm2,%xmm2 - vpaddd 48(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa 64(%ebx),%xmm0 - vmovdqa 80(%ebx),%xmm1 - vmovdqa 96(%ebx),%xmm2 - vmovdqa 112(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd 64(%ebp),%xmm0,%xmm0 - vpaddd 80(%ebp),%xmm1,%xmm1 - vpaddd 96(%ebp),%xmm2,%xmm2 - vpaddd 112(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 208(%esi),%esi - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 208(%edi),%edi - subl $256,%ecx - jnc .L015outer_loop - addl $256,%ecx - jz .L017done - movl 520(%esp),%ebx - leal -128(%esi),%esi - movl 516(%esp),%edx - leal -128(%edi),%edi - vmovd 64(%ebp),%xmm2 - vmovdqu (%ebx),%xmm3 - vpaddd 96(%eax),%xmm2,%xmm2 - vpand 112(%eax),%xmm3,%xmm3 - vpor %xmm2,%xmm3,%xmm3 -.L0141x: - vmovdqa 32(%eax),%xmm0 - vmovdqu (%edx),%xmm1 - vmovdqu 16(%edx),%xmm2 - vmovdqa (%eax),%xmm6 - vmovdqa 16(%eax),%xmm7 - movl %ebp,48(%esp) - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - movl $10,%edx - jmp .L018loop1x -.align 16 -.L019outer1x: - vmovdqa 80(%eax),%xmm3 - vmovdqa (%esp),%xmm0 - vmovdqa 16(%esp),%xmm1 - vmovdqa 32(%esp),%xmm2 - vpaddd 48(%esp),%xmm3,%xmm3 - movl $10,%edx - vmovdqa %xmm3,48(%esp) - jmp .L018loop1x -.align 16 -.L018loop1x: - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,16 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,12 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,8 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,7 - vpshufd $78,%xmm2,%xmm2 - vpshufd $57,%xmm1,%xmm1 - vpshufd $147,%xmm3,%xmm3 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,16 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,12 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,8 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,7 - vpshufd $78,%xmm2,%xmm2 - vpshufd $147,%xmm1,%xmm1 - vpshufd $57,%xmm3,%xmm3 - decl %edx - jnz .L018loop1x - vpaddd (%esp),%xmm0,%xmm0 - vpaddd 16(%esp),%xmm1,%xmm1 - vpaddd 32(%esp),%xmm2,%xmm2 - vpaddd 48(%esp),%xmm3,%xmm3 - cmpl $64,%ecx - jb .L020tail - vpxor (%esi),%xmm0,%xmm0 - vpxor 16(%esi),%xmm1,%xmm1 - vpxor 32(%esi),%xmm2,%xmm2 - vpxor 48(%esi),%xmm3,%xmm3 - leal 64(%esi),%esi - vmovdqu %xmm0,(%edi) - vmovdqu %xmm1,16(%edi) - vmovdqu %xmm2,32(%edi) - vmovdqu %xmm3,48(%edi) - leal 64(%edi),%edi - subl $64,%ecx - jnz .L019outer1x - jmp .L017done -.L020tail: - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - xorl %eax,%eax - xorl %edx,%edx - xorl %ebp,%ebp -.L021tail_loop: - movb (%esp,%ebp,1),%al - movb (%esi,%ebp,1),%dl - leal 1(%ebp),%ebp - xorb %dl,%al - movb %al,-1(%edi,%ebp,1) - decl %ecx - jnz .L021tail_loop -.L017done: - vzeroupper - movl 512(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size ChaCha20_xop,.-.L_ChaCha20_xop_begin .comm OPENSSL_ia32cap_P,16,4 #else .text @@ -1394,8 +914,6 @@ ChaCha20_ssse3: pushl %esi pushl %edi .Lssse3_shortcut: - testl $2048,4(%ebp) - jnz .Lxop_shortcut movl 20(%esp),%edi movl 24(%esp),%esi movl 28(%esp),%ecx @@ -1539,483 +1057,5 @@ ChaCha20_ssse3: .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 .byte 114,103,62,0 -.globl ChaCha20_xop -.type ChaCha20_xop,@function -.align 16 -ChaCha20_xop: -.L_ChaCha20_xop_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi -.Lxop_shortcut: - movl 20(%esp),%edi - movl 24(%esp),%esi - movl 28(%esp),%ecx - movl 32(%esp),%edx - movl 36(%esp),%ebx - vzeroupper - movl %esp,%ebp - subl $524,%esp - andl $-64,%esp - movl %ebp,512(%esp) - leal .Lssse3_data-.Lpic_point(%eax),%eax - vmovdqu (%ebx),%xmm3 - cmpl $256,%ecx - jb .L0141x - movl %edx,516(%esp) - movl %ebx,520(%esp) - subl $256,%ecx - leal 384(%esp),%ebp - vmovdqu (%edx),%xmm7 - vpshufd $0,%xmm3,%xmm0 - vpshufd $85,%xmm3,%xmm1 - vpshufd $170,%xmm3,%xmm2 - vpshufd $255,%xmm3,%xmm3 - vpaddd 48(%eax),%xmm0,%xmm0 - vpshufd $0,%xmm7,%xmm4 - vpshufd $85,%xmm7,%xmm5 - vpsubd 64(%eax),%xmm0,%xmm0 - vpshufd $170,%xmm7,%xmm6 - vpshufd $255,%xmm7,%xmm7 - vmovdqa %xmm0,64(%ebp) - vmovdqa %xmm1,80(%ebp) - vmovdqa %xmm2,96(%ebp) - vmovdqa %xmm3,112(%ebp) - vmovdqu 16(%edx),%xmm3 - vmovdqa %xmm4,-64(%ebp) - vmovdqa %xmm5,-48(%ebp) - vmovdqa %xmm6,-32(%ebp) - vmovdqa %xmm7,-16(%ebp) - vmovdqa 32(%eax),%xmm7 - leal 128(%esp),%ebx - vpshufd $0,%xmm3,%xmm0 - vpshufd $85,%xmm3,%xmm1 - vpshufd $170,%xmm3,%xmm2 - vpshufd $255,%xmm3,%xmm3 - vpshufd $0,%xmm7,%xmm4 - vpshufd $85,%xmm7,%xmm5 - vpshufd $170,%xmm7,%xmm6 - vpshufd $255,%xmm7,%xmm7 - vmovdqa %xmm0,(%ebp) - vmovdqa %xmm1,16(%ebp) - vmovdqa %xmm2,32(%ebp) - vmovdqa %xmm3,48(%ebp) - vmovdqa %xmm4,-128(%ebp) - vmovdqa %xmm5,-112(%ebp) - vmovdqa %xmm6,-96(%ebp) - vmovdqa %xmm7,-80(%ebp) - leal 128(%esi),%esi - leal 128(%edi),%edi - jmp .L015outer_loop -.align 32 -.L015outer_loop: - vmovdqa -112(%ebp),%xmm1 - vmovdqa -96(%ebp),%xmm2 - vmovdqa -80(%ebp),%xmm3 - vmovdqa -48(%ebp),%xmm5 - vmovdqa -32(%ebp),%xmm6 - vmovdqa -16(%ebp),%xmm7 - vmovdqa %xmm1,-112(%ebx) - vmovdqa %xmm2,-96(%ebx) - vmovdqa %xmm3,-80(%ebx) - vmovdqa %xmm5,-48(%ebx) - vmovdqa %xmm6,-32(%ebx) - vmovdqa %xmm7,-16(%ebx) - vmovdqa 32(%ebp),%xmm2 - vmovdqa 48(%ebp),%xmm3 - vmovdqa 64(%ebp),%xmm4 - vmovdqa 80(%ebp),%xmm5 - vmovdqa 96(%ebp),%xmm6 - vmovdqa 112(%ebp),%xmm7 - vpaddd 64(%eax),%xmm4,%xmm4 - vmovdqa %xmm2,32(%ebx) - vmovdqa %xmm3,48(%ebx) - vmovdqa %xmm4,64(%ebx) - vmovdqa %xmm5,80(%ebx) - vmovdqa %xmm6,96(%ebx) - vmovdqa %xmm7,112(%ebx) - vmovdqa %xmm4,64(%ebp) - vmovdqa -128(%ebp),%xmm0 - vmovdqa %xmm4,%xmm6 - vmovdqa -64(%ebp),%xmm3 - vmovdqa (%ebp),%xmm4 - vmovdqa 16(%ebp),%xmm5 - movl $10,%edx - nop -.align 32 -.L016loop: - vpaddd %xmm3,%xmm0,%xmm0 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,246,16 - vpaddd %xmm6,%xmm4,%xmm4 - vpxor %xmm4,%xmm3,%xmm2 - vmovdqa -112(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -48(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 80(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-128(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,64(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 - vmovdqa %xmm4,(%ebx) -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-64(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa 32(%ebx),%xmm4 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -96(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -32(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 96(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-112(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,80(%ebx) - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,219,7 - vmovdqa %xmm5,16(%ebx) -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-48(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa 48(%ebx),%xmm5 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -80(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -16(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 112(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-96(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,96(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-32(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -128(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -48(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-80(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm7,%xmm6 -.byte 143,232,120,194,219,7 -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-16(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -112(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -32(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 64(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-128(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,112(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 - vmovdqa %xmm4,32(%ebx) -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-48(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa (%ebx),%xmm4 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -96(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vmovdqa -16(%ebx),%xmm2 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 80(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 - vpaddd %xmm2,%xmm0,%xmm0 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-112(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,64(%ebx) - vpxor %xmm5,%xmm3,%xmm3 - vpxor %xmm0,%xmm6,%xmm6 -.byte 143,232,120,194,219,7 - vmovdqa %xmm5,48(%ebx) -.byte 143,232,120,194,246,16 - vmovdqa %xmm3,-32(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa 16(%ebx),%xmm5 - vpxor %xmm4,%xmm2,%xmm2 - vmovdqa -80(%ebx),%xmm1 -.byte 143,232,120,194,210,12 - vmovdqa -64(%ebx),%xmm3 - vpaddd %xmm2,%xmm0,%xmm0 - vmovdqa 96(%ebx),%xmm7 - vpxor %xmm0,%xmm6,%xmm6 - vpaddd %xmm3,%xmm1,%xmm1 -.byte 143,232,120,194,246,8 - vmovdqa %xmm0,-96(%ebx) - vpaddd %xmm6,%xmm4,%xmm4 - vmovdqa %xmm6,80(%ebx) - vpxor %xmm4,%xmm2,%xmm2 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,210,7 -.byte 143,232,120,194,255,16 - vmovdqa %xmm2,-16(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqa -128(%ebx),%xmm0 -.byte 143,232,120,194,219,12 - vpaddd %xmm3,%xmm1,%xmm1 - vmovdqa 64(%ebx),%xmm6 - vpxor %xmm1,%xmm7,%xmm7 -.byte 143,232,120,194,255,8 - vmovdqa %xmm1,-80(%ebx) - vpaddd %xmm7,%xmm5,%xmm5 - vmovdqa %xmm7,96(%ebx) - vpxor %xmm5,%xmm3,%xmm3 -.byte 143,232,120,194,219,7 - decl %edx - jnz .L016loop - vmovdqa %xmm3,-64(%ebx) - vmovdqa %xmm4,(%ebx) - vmovdqa %xmm5,16(%ebx) - vmovdqa %xmm6,64(%ebx) - vmovdqa %xmm7,96(%ebx) - vmovdqa -112(%ebx),%xmm1 - vmovdqa -96(%ebx),%xmm2 - vmovdqa -80(%ebx),%xmm3 - vpaddd -128(%ebp),%xmm0,%xmm0 - vpaddd -112(%ebp),%xmm1,%xmm1 - vpaddd -96(%ebp),%xmm2,%xmm2 - vpaddd -80(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa -64(%ebx),%xmm0 - vmovdqa -48(%ebx),%xmm1 - vmovdqa -32(%ebx),%xmm2 - vmovdqa -16(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd -64(%ebp),%xmm0,%xmm0 - vpaddd -48(%ebp),%xmm1,%xmm1 - vpaddd -32(%ebp),%xmm2,%xmm2 - vpaddd -16(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa (%ebx),%xmm0 - vmovdqa 16(%ebx),%xmm1 - vmovdqa 32(%ebx),%xmm2 - vmovdqa 48(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd (%ebp),%xmm0,%xmm0 - vpaddd 16(%ebp),%xmm1,%xmm1 - vpaddd 32(%ebp),%xmm2,%xmm2 - vpaddd 48(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 16(%esi),%esi - vmovdqa 64(%ebx),%xmm0 - vmovdqa 80(%ebx),%xmm1 - vmovdqa 96(%ebx),%xmm2 - vmovdqa 112(%ebx),%xmm3 - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 16(%edi),%edi - vpaddd 64(%ebp),%xmm0,%xmm0 - vpaddd 80(%ebp),%xmm1,%xmm1 - vpaddd 96(%ebp),%xmm2,%xmm2 - vpaddd 112(%ebp),%xmm3,%xmm3 - vpunpckldq %xmm1,%xmm0,%xmm6 - vpunpckldq %xmm3,%xmm2,%xmm7 - vpunpckhdq %xmm1,%xmm0,%xmm0 - vpunpckhdq %xmm3,%xmm2,%xmm2 - vpunpcklqdq %xmm7,%xmm6,%xmm1 - vpunpckhqdq %xmm7,%xmm6,%xmm6 - vpunpcklqdq %xmm2,%xmm0,%xmm7 - vpunpckhqdq %xmm2,%xmm0,%xmm3 - vpxor -128(%esi),%xmm1,%xmm4 - vpxor -64(%esi),%xmm6,%xmm5 - vpxor (%esi),%xmm7,%xmm6 - vpxor 64(%esi),%xmm3,%xmm7 - leal 208(%esi),%esi - vmovdqu %xmm4,-128(%edi) - vmovdqu %xmm5,-64(%edi) - vmovdqu %xmm6,(%edi) - vmovdqu %xmm7,64(%edi) - leal 208(%edi),%edi - subl $256,%ecx - jnc .L015outer_loop - addl $256,%ecx - jz .L017done - movl 520(%esp),%ebx - leal -128(%esi),%esi - movl 516(%esp),%edx - leal -128(%edi),%edi - vmovd 64(%ebp),%xmm2 - vmovdqu (%ebx),%xmm3 - vpaddd 96(%eax),%xmm2,%xmm2 - vpand 112(%eax),%xmm3,%xmm3 - vpor %xmm2,%xmm3,%xmm3 -.L0141x: - vmovdqa 32(%eax),%xmm0 - vmovdqu (%edx),%xmm1 - vmovdqu 16(%edx),%xmm2 - vmovdqa (%eax),%xmm6 - vmovdqa 16(%eax),%xmm7 - movl %ebp,48(%esp) - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - movl $10,%edx - jmp .L018loop1x -.align 16 -.L019outer1x: - vmovdqa 80(%eax),%xmm3 - vmovdqa (%esp),%xmm0 - vmovdqa 16(%esp),%xmm1 - vmovdqa 32(%esp),%xmm2 - vpaddd 48(%esp),%xmm3,%xmm3 - movl $10,%edx - vmovdqa %xmm3,48(%esp) - jmp .L018loop1x -.align 16 -.L018loop1x: - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,16 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,12 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,8 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,7 - vpshufd $78,%xmm2,%xmm2 - vpshufd $57,%xmm1,%xmm1 - vpshufd $147,%xmm3,%xmm3 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,16 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,12 - vpaddd %xmm1,%xmm0,%xmm0 - vpxor %xmm0,%xmm3,%xmm3 -.byte 143,232,120,194,219,8 - vpaddd %xmm3,%xmm2,%xmm2 - vpxor %xmm2,%xmm1,%xmm1 -.byte 143,232,120,194,201,7 - vpshufd $78,%xmm2,%xmm2 - vpshufd $147,%xmm1,%xmm1 - vpshufd $57,%xmm3,%xmm3 - decl %edx - jnz .L018loop1x - vpaddd (%esp),%xmm0,%xmm0 - vpaddd 16(%esp),%xmm1,%xmm1 - vpaddd 32(%esp),%xmm2,%xmm2 - vpaddd 48(%esp),%xmm3,%xmm3 - cmpl $64,%ecx - jb .L020tail - vpxor (%esi),%xmm0,%xmm0 - vpxor 16(%esi),%xmm1,%xmm1 - vpxor 32(%esi),%xmm2,%xmm2 - vpxor 48(%esi),%xmm3,%xmm3 - leal 64(%esi),%esi - vmovdqu %xmm0,(%edi) - vmovdqu %xmm1,16(%edi) - vmovdqu %xmm2,32(%edi) - vmovdqu %xmm3,48(%edi) - leal 64(%edi),%edi - subl $64,%ecx - jnz .L019outer1x - jmp .L017done -.L020tail: - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - xorl %eax,%eax - xorl %edx,%edx - xorl %ebp,%ebp -.L021tail_loop: - movb (%esp,%ebp,1),%al - movb (%esi,%ebp,1),%dl - leal 1(%ebp),%ebp - xorb %dl,%al - movb %al,-1(%edi,%ebp,1) - decl %ecx - jnz .L021tail_loop -.L017done: - vzeroupper - movl 512(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size ChaCha20_xop,.-.L_ChaCha20_xop_begin .comm OPENSSL_ia32cap_P,16,4 #endif diff --git a/secure/lib/libcrypto/i386/ecp_nistz256-x86.S b/secure/lib/libcrypto/i386/ecp_nistz256-x86.S index 7d0c1b9eb9df..eb413d9f1a73 100644 --- a/secure/lib/libcrypto/i386/ecp_nistz256-x86.S +++ b/secure/lib/libcrypto/i386/ecp_nistz256-x86.S @@ -4422,19 +4422,15 @@ ecp_nistz256_point_add: orl 4(%edi),%eax orl 8(%edi),%eax orl 12(%edi),%eax + movl 576(%esp),%ebx + notl %ebx + orl %ebx,%eax + movl 580(%esp),%ebx + notl %ebx + orl %ebx,%eax + orl 584(%esp),%eax .byte 62 jnz .L010add_proceed - movl 576(%esp),%eax - andl 580(%esp),%eax - movl 584(%esp),%ebx - jz .L010add_proceed - testl %ebx,%ebx - jz .L011add_double - movl 616(%esp),%edi - xorl %eax,%eax - movl $24,%ecx -.byte 252,243,171 - jmp .L012add_done .align 16 .L011add_double: movl 620(%esp),%esi @@ -9590,19 +9586,15 @@ ecp_nistz256_point_add: orl 4(%edi),%eax orl 8(%edi),%eax orl 12(%edi),%eax + movl 576(%esp),%ebx + notl %ebx + orl %ebx,%eax + movl 580(%esp),%ebx + notl %ebx + orl %ebx,%eax + orl 584(%esp),%eax .byte 62 jnz .L010add_proceed - movl 576(%esp),%eax - andl 580(%esp),%eax - movl 584(%esp),%ebx - jz .L010add_proceed - testl %ebx,%ebx - jz .L011add_double - movl 616(%esp),%edi - xorl %eax,%eax - movl $24,%ecx -.byte 252,243,171 - jmp .L012add_done .align 16 .L011add_double: movl 620(%esp),%esi diff --git a/secure/lib/libcrypto/i386/poly1305-x86.S b/secure/lib/libcrypto/i386/poly1305-x86.S index 100deee40bf2..b394500278d5 100644 --- a/secure/lib/libcrypto/i386/poly1305-x86.S +++ b/secure/lib/libcrypto/i386/poly1305-x86.S @@ -36,10 +36,6 @@ poly1305_init: jne .L002no_sse2 leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx - movl 8(%edi),%ecx - testl $32,%ecx - jz .L002no_sse2 - leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax .L002no_sse2: movl 20(%esp),%edi movl %eax,(%ebp) @@ -1348,557 +1344,6 @@ _poly1305_emit_sse2: popl %ebp ret .size _poly1305_emit_sse2,.-_poly1305_emit_sse2 -.align 32 -.type _poly1305_init_avx2,@function -.align 16 -_poly1305_init_avx2: - vmovdqu 24(%edi),%xmm4 - leal 48(%edi),%edi - movl %esp,%ebp - subl $224,%esp - andl $-16,%esp - vmovdqa 64(%ebx),%xmm7 - vpand %xmm7,%xmm4,%xmm0 - vpsrlq $26,%xmm4,%xmm1 - vpsrldq $6,%xmm4,%xmm3 - vpand %xmm7,%xmm1,%xmm1 - vpsrlq $4,%xmm3,%xmm2 - vpsrlq $30,%xmm3,%xmm3 - vpand %xmm7,%xmm2,%xmm2 - vpand %xmm7,%xmm3,%xmm3 - vpsrldq $13,%xmm4,%xmm4 - leal 144(%esp),%edx - movl $2,%ecx -.L018square: - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - vmovdqa %xmm4,64(%esp) - vpslld $2,%xmm1,%xmm6 - vpslld $2,%xmm2,%xmm5 - vpaddd %xmm1,%xmm6,%xmm6 - vpaddd %xmm2,%xmm5,%xmm5 - vmovdqa %xmm6,80(%esp) - vmovdqa %xmm5,96(%esp) - vpslld $2,%xmm3,%xmm6 - vpslld $2,%xmm4,%xmm5 - vpaddd %xmm3,%xmm6,%xmm6 - vpaddd %xmm4,%xmm5,%xmm5 - vmovdqa %xmm6,112(%esp) - vmovdqa %xmm5,128(%esp) - vpshufd $68,%xmm0,%xmm5 - vmovdqa %xmm1,%xmm6 - vpshufd $68,%xmm1,%xmm1 - vpshufd $68,%xmm2,%xmm2 - vpshufd $68,%xmm3,%xmm3 - vpshufd $68,%xmm4,%xmm4 - vmovdqa %xmm5,(%edx) - vmovdqa %xmm1,16(%edx) - vmovdqa %xmm2,32(%edx) - vmovdqa %xmm3,48(%edx) - vmovdqa %xmm4,64(%edx) - vpmuludq %xmm0,%xmm4,%xmm4 - vpmuludq %xmm0,%xmm3,%xmm3 - vpmuludq %xmm0,%xmm2,%xmm2 - vpmuludq %xmm0,%xmm1,%xmm1 - vpmuludq %xmm0,%xmm5,%xmm0 - vpmuludq 48(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm4,%xmm4 - vpmuludq 32(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq 16(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vmovdqa 80(%esp),%xmm7 - vpmuludq (%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm1,%xmm1 - vmovdqa 32(%esp),%xmm5 - vpmuludq 64(%edx),%xmm7,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 32(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm4,%xmm4 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vmovdqa 96(%esp),%xmm6 - vpmuludq (%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vpmuludq 64(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm1,%xmm1 - vmovdqa 48(%esp),%xmm5 - vpmuludq 48(%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm0,%xmm0 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm4,%xmm4 - vmovdqa 112(%esp),%xmm6 - vpmuludq (%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm3,%xmm3 - vpmuludq 64(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm2,%xmm2 - vpmuludq 48(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm1,%xmm1 - vmovdqa 64(%esp),%xmm7 - vpmuludq 32(%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm0,%xmm0 - vmovdqa 128(%esp),%xmm5 - vpmuludq (%edx),%xmm7,%xmm7 - vpaddq %xmm7,%xmm4,%xmm4 - vpmuludq 64(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm3,%xmm3 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 32(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm1,%xmm1 - vmovdqa 64(%ebx),%xmm7 - vpmuludq 48(%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vpsrlq $26,%xmm3,%xmm5 - vpand %xmm7,%xmm3,%xmm3 - vpsrlq $26,%xmm0,%xmm6 - vpand %xmm7,%xmm0,%xmm0 - vpaddq %xmm5,%xmm4,%xmm4 - vpaddq %xmm6,%xmm1,%xmm1 - vpsrlq $26,%xmm4,%xmm5 - vpand %xmm7,%xmm4,%xmm4 - vpsrlq $26,%xmm1,%xmm6 - vpand %xmm7,%xmm1,%xmm1 - vpaddq %xmm6,%xmm2,%xmm2 - vpaddd %xmm5,%xmm0,%xmm0 - vpsllq $2,%xmm5,%xmm5 - vpsrlq $26,%xmm2,%xmm6 - vpand %xmm7,%xmm2,%xmm2 - vpaddd %xmm5,%xmm0,%xmm0 - vpaddd %xmm6,%xmm3,%xmm3 - vpsrlq $26,%xmm3,%xmm6 - vpsrlq $26,%xmm0,%xmm5 - vpand %xmm7,%xmm0,%xmm0 - vpand %xmm7,%xmm3,%xmm3 - vpaddd %xmm5,%xmm1,%xmm1 - vpaddd %xmm6,%xmm4,%xmm4 - decl %ecx - jz .L019square_break - vpunpcklqdq (%esp),%xmm0,%xmm0 - vpunpcklqdq 16(%esp),%xmm1,%xmm1 - vpunpcklqdq 32(%esp),%xmm2,%xmm2 - vpunpcklqdq 48(%esp),%xmm3,%xmm3 - vpunpcklqdq 64(%esp),%xmm4,%xmm4 - jmp .L018square -.L019square_break: - vpsllq $32,%xmm0,%xmm0 - vpsllq $32,%xmm1,%xmm1 - vpsllq $32,%xmm2,%xmm2 - vpsllq $32,%xmm3,%xmm3 - vpsllq $32,%xmm4,%xmm4 - vpor (%esp),%xmm0,%xmm0 - vpor 16(%esp),%xmm1,%xmm1 - vpor 32(%esp),%xmm2,%xmm2 - vpor 48(%esp),%xmm3,%xmm3 - vpor 64(%esp),%xmm4,%xmm4 - vpshufd $141,%xmm0,%xmm0 - vpshufd $141,%xmm1,%xmm1 - vpshufd $141,%xmm2,%xmm2 - vpshufd $141,%xmm3,%xmm3 - vpshufd $141,%xmm4,%xmm4 - vmovdqu %xmm0,(%edi) - vmovdqu %xmm1,16(%edi) - vmovdqu %xmm2,32(%edi) - vmovdqu %xmm3,48(%edi) - vmovdqu %xmm4,64(%edi) - vpslld $2,%xmm1,%xmm6 - vpslld $2,%xmm2,%xmm5 - vpaddd %xmm1,%xmm6,%xmm6 - vpaddd %xmm2,%xmm5,%xmm5 - vmovdqu %xmm6,80(%edi) - vmovdqu %xmm5,96(%edi) - vpslld $2,%xmm3,%xmm6 - vpslld $2,%xmm4,%xmm5 - vpaddd %xmm3,%xmm6,%xmm6 - vpaddd %xmm4,%xmm5,%xmm5 - vmovdqu %xmm6,112(%edi) - vmovdqu %xmm5,128(%edi) - movl %ebp,%esp - leal -48(%edi),%edi - ret -.size _poly1305_init_avx2,.-_poly1305_init_avx2 -.align 32 -.type _poly1305_blocks_avx2,@function -.align 16 -_poly1305_blocks_avx2: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edi - movl 24(%esp),%esi - movl 28(%esp),%ecx - movl 20(%edi),%eax - andl $-16,%ecx - jz .L020nodata - cmpl $64,%ecx - jae .L021enter_avx2 - testl %eax,%eax - jz .Lenter_blocks -.L021enter_avx2: - vzeroupper - call .L022pic_point -.L022pic_point: - popl %ebx - leal .Lconst_sse2-.L022pic_point(%ebx),%ebx - testl %eax,%eax - jnz .L023base2_26 - call _poly1305_init_avx2 - movl (%edi),%eax - movl 3(%edi),%ecx - movl 6(%edi),%edx - movl 9(%edi),%esi - movl 13(%edi),%ebp - shrl $2,%ecx - andl $67108863,%eax - shrl $4,%edx - andl $67108863,%ecx - shrl $6,%esi - andl $67108863,%edx - movl %eax,(%edi) - movl %ecx,4(%edi) - movl %edx,8(%edi) - movl %esi,12(%edi) - movl %ebp,16(%edi) - movl $1,20(%edi) - movl 24(%esp),%esi - movl 28(%esp),%ecx -.L023base2_26: - movl 32(%esp),%eax - movl %esp,%ebp - subl $448,%esp - andl $-512,%esp - vmovdqu 48(%edi),%xmm0 - leal 288(%esp),%edx - vmovdqu 64(%edi),%xmm1 - vmovdqu 80(%edi),%xmm2 - vmovdqu 96(%edi),%xmm3 - vmovdqu 112(%edi),%xmm4 - leal 48(%edi),%edi - vpermq $64,%ymm0,%ymm0 - vpermq $64,%ymm1,%ymm1 - vpermq $64,%ymm2,%ymm2 - vpermq $64,%ymm3,%ymm3 - vpermq $64,%ymm4,%ymm4 - vpshufd $200,%ymm0,%ymm0 - vpshufd $200,%ymm1,%ymm1 - vpshufd $200,%ymm2,%ymm2 - vpshufd $200,%ymm3,%ymm3 - vpshufd $200,%ymm4,%ymm4 - vmovdqa %ymm0,-128(%edx) - vmovdqu 80(%edi),%xmm0 - vmovdqa %ymm1,-96(%edx) - vmovdqu 96(%edi),%xmm1 - vmovdqa %ymm2,-64(%edx) - vmovdqu 112(%edi),%xmm2 - vmovdqa %ymm3,-32(%edx) - vmovdqu 128(%edi),%xmm3 - vmovdqa %ymm4,(%edx) - vpermq $64,%ymm0,%ymm0 - vpermq $64,%ymm1,%ymm1 - vpermq $64,%ymm2,%ymm2 - vpermq $64,%ymm3,%ymm3 - vpshufd $200,%ymm0,%ymm0 - vpshufd $200,%ymm1,%ymm1 - vpshufd $200,%ymm2,%ymm2 - vpshufd $200,%ymm3,%ymm3 - vmovdqa %ymm0,32(%edx) - vmovd -48(%edi),%xmm0 - vmovdqa %ymm1,64(%edx) - vmovd -44(%edi),%xmm1 - vmovdqa %ymm2,96(%edx) - vmovd -40(%edi),%xmm2 - vmovdqa %ymm3,128(%edx) - vmovd -36(%edi),%xmm3 - vmovd -32(%edi),%xmm4 - vmovdqa 64(%ebx),%ymm7 - negl %eax - testl $63,%ecx - jz .L024even - movl %ecx,%edx - andl $-64,%ecx - andl $63,%edx - vmovdqu (%esi),%xmm5 - cmpl $32,%edx - jb .L025one - vmovdqu 16(%esi),%xmm6 - je .L026two - vinserti128 $1,32(%esi),%ymm5,%ymm5 - leal 48(%esi),%esi - leal 8(%ebx),%ebx - leal 296(%esp),%edx - jmp .L027tail -.L026two: - leal 32(%esi),%esi - leal 16(%ebx),%ebx - leal 304(%esp),%edx - jmp .L027tail -.L025one: - leal 16(%esi),%esi - vpxor %ymm6,%ymm6,%ymm6 - leal 32(%ebx,%eax,8),%ebx - leal 312(%esp),%edx - jmp .L027tail -.align 32 -.L024even: - vmovdqu (%esi),%xmm5 - vmovdqu 16(%esi),%xmm6 - vinserti128 $1,32(%esi),%ymm5,%ymm5 - vinserti128 $1,48(%esi),%ymm6,%ymm6 - leal 64(%esi),%esi - subl $64,%ecx - jz .L027tail -.L028loop: - vmovdqa %ymm2,64(%esp) - vpsrldq $6,%ymm5,%ymm2 - vmovdqa %ymm0,(%esp) - vpsrldq $6,%ymm6,%ymm0 - vmovdqa %ymm1,32(%esp) - vpunpckhqdq %ymm6,%ymm5,%ymm1 - vpunpcklqdq %ymm6,%ymm5,%ymm5 - vpunpcklqdq %ymm0,%ymm2,%ymm2 - vpsrlq $30,%ymm2,%ymm0 - vpsrlq $4,%ymm2,%ymm2 - vpsrlq $26,%ymm5,%ymm6 - vpsrlq $40,%ymm1,%ymm1 - vpand %ymm7,%ymm2,%ymm2 - vpand %ymm7,%ymm5,%ymm5 - vpand %ymm7,%ymm6,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpor (%ebx),%ymm1,%ymm1 - vpaddq 64(%esp),%ymm2,%ymm2 - vpaddq (%esp),%ymm5,%ymm5 - vpaddq 32(%esp),%ymm6,%ymm6 - vpaddq %ymm3,%ymm0,%ymm0 - vpaddq %ymm4,%ymm1,%ymm1 - vpmuludq -96(%edx),%ymm2,%ymm3 - vmovdqa %ymm6,32(%esp) - vpmuludq -64(%edx),%ymm2,%ymm4 - vmovdqa %ymm0,96(%esp) - vpmuludq 96(%edx),%ymm2,%ymm0 - vmovdqa %ymm1,128(%esp) - vpmuludq 128(%edx),%ymm2,%ymm1 - vpmuludq -128(%edx),%ymm2,%ymm2 - vpmuludq -32(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq (%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm4,%ymm4 - vpmuludq -128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm0,%ymm0 - vmovdqa 32(%esp),%ymm7 - vpmuludq -96(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq -64(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpmuludq -64(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm3,%ymm3 - vpmuludq -32(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm4,%ymm4 - vpmuludq 128(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vmovdqa 96(%esp),%ymm6 - vpmuludq -128(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm1,%ymm1 - vpmuludq -96(%edx),%ymm7,%ymm7 - vpaddq %ymm7,%ymm2,%ymm2 - vpmuludq -128(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm3,%ymm3 - vpmuludq -96(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vpmuludq 64(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm0,%ymm0 - vmovdqa 128(%esp),%ymm5 - vpmuludq 96(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm1,%ymm1 - vpmuludq 128(%edx),%ymm6,%ymm6 - vpaddq %ymm6,%ymm2,%ymm2 - vpmuludq 128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 32(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vpmuludq -128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vmovdqa 64(%ebx),%ymm7 - vpmuludq 64(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq 96(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpsrlq $26,%ymm3,%ymm5 - vpand %ymm7,%ymm3,%ymm3 - vpsrlq $26,%ymm0,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpaddq %ymm5,%ymm4,%ymm4 - vpaddq %ymm6,%ymm1,%ymm1 - vpsrlq $26,%ymm4,%ymm5 - vpand %ymm7,%ymm4,%ymm4 - vpsrlq $26,%ymm1,%ymm6 - vpand %ymm7,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpsllq $2,%ymm5,%ymm5 - vpsrlq $26,%ymm2,%ymm6 - vpand %ymm7,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrlq $26,%ymm3,%ymm6 - vpsrlq $26,%ymm0,%ymm5 - vpand %ymm7,%ymm0,%ymm0 - vpand %ymm7,%ymm3,%ymm3 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm4,%ymm4 - vmovdqu (%esi),%xmm5 - vmovdqu 16(%esi),%xmm6 - vinserti128 $1,32(%esi),%ymm5,%ymm5 - vinserti128 $1,48(%esi),%ymm6,%ymm6 - leal 64(%esi),%esi - subl $64,%ecx - jnz .L028loop -.L027tail: - vmovdqa %ymm2,64(%esp) - vpsrldq $6,%ymm5,%ymm2 - vmovdqa %ymm0,(%esp) - vpsrldq $6,%ymm6,%ymm0 - vmovdqa %ymm1,32(%esp) - vpunpckhqdq %ymm6,%ymm5,%ymm1 - vpunpcklqdq %ymm6,%ymm5,%ymm5 - vpunpcklqdq %ymm0,%ymm2,%ymm2 - vpsrlq $30,%ymm2,%ymm0 - vpsrlq $4,%ymm2,%ymm2 - vpsrlq $26,%ymm5,%ymm6 - vpsrlq $40,%ymm1,%ymm1 - vpand %ymm7,%ymm2,%ymm2 - vpand %ymm7,%ymm5,%ymm5 - vpand %ymm7,%ymm6,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpor (%ebx),%ymm1,%ymm1 - andl $-64,%ebx - vpaddq 64(%esp),%ymm2,%ymm2 - vpaddq (%esp),%ymm5,%ymm5 - vpaddq 32(%esp),%ymm6,%ymm6 - vpaddq %ymm3,%ymm0,%ymm0 - vpaddq %ymm4,%ymm1,%ymm1 - vpmuludq -92(%edx),%ymm2,%ymm3 - vmovdqa %ymm6,32(%esp) - vpmuludq -60(%edx),%ymm2,%ymm4 - vmovdqa %ymm0,96(%esp) - vpmuludq 100(%edx),%ymm2,%ymm0 - vmovdqa %ymm1,128(%esp) - vpmuludq 132(%edx),%ymm2,%ymm1 - vpmuludq -124(%edx),%ymm2,%ymm2 - vpmuludq -28(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 4(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm4,%ymm4 - vpmuludq -124(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm0,%ymm0 - vmovdqa 32(%esp),%ymm7 - vpmuludq -92(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq -60(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpmuludq -60(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm3,%ymm3 - vpmuludq -28(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm4,%ymm4 - vpmuludq 132(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vmovdqa 96(%esp),%ymm6 - vpmuludq -124(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm1,%ymm1 - vpmuludq -92(%edx),%ymm7,%ymm7 - vpaddq %ymm7,%ymm2,%ymm2 - vpmuludq -124(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm3,%ymm3 - vpmuludq -92(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vpmuludq 68(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm0,%ymm0 - vmovdqa 128(%esp),%ymm5 - vpmuludq 100(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm1,%ymm1 - vpmuludq 132(%edx),%ymm6,%ymm6 - vpaddq %ymm6,%ymm2,%ymm2 - vpmuludq 132(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 36(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vpmuludq -124(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vmovdqa 64(%ebx),%ymm7 - vpmuludq 68(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq 100(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpsrldq $8,%ymm4,%ymm5 - vpsrldq $8,%ymm3,%ymm6 - vpaddq %ymm5,%ymm4,%ymm4 - vpsrldq $8,%ymm0,%ymm5 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrldq $8,%ymm1,%ymm6 - vpaddq %ymm5,%ymm0,%ymm0 - vpsrldq $8,%ymm2,%ymm5 - vpaddq %ymm6,%ymm1,%ymm1 - vpermq $2,%ymm4,%ymm6 - vpaddq %ymm5,%ymm2,%ymm2 - vpermq $2,%ymm3,%ymm5 - vpaddq %ymm6,%ymm4,%ymm4 - vpermq $2,%ymm0,%ymm6 - vpaddq %ymm5,%ymm3,%ymm3 - vpermq $2,%ymm1,%ymm5 - vpaddq %ymm6,%ymm0,%ymm0 - vpermq $2,%ymm2,%ymm6 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpsrlq $26,%ymm3,%ymm5 - vpand %ymm7,%ymm3,%ymm3 - vpsrlq $26,%ymm0,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpaddq %ymm5,%ymm4,%ymm4 - vpaddq %ymm6,%ymm1,%ymm1 - vpsrlq $26,%ymm4,%ymm5 - vpand %ymm7,%ymm4,%ymm4 - vpsrlq $26,%ymm1,%ymm6 - vpand %ymm7,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpsllq $2,%ymm5,%ymm5 - vpsrlq $26,%ymm2,%ymm6 - vpand %ymm7,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrlq $26,%ymm3,%ymm6 - vpsrlq $26,%ymm0,%ymm5 - vpand %ymm7,%ymm0,%ymm0 - vpand %ymm7,%ymm3,%ymm3 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm4,%ymm4 - cmpl $0,%ecx - je .L029done - vpshufd $252,%xmm0,%xmm0 - leal 288(%esp),%edx - vpshufd $252,%xmm1,%xmm1 - vpshufd $252,%xmm2,%xmm2 - vpshufd $252,%xmm3,%xmm3 - vpshufd $252,%xmm4,%xmm4 - jmp .L024even -.align 16 -.L029done: - vmovd %xmm0,-48(%edi) - vmovd %xmm1,-44(%edi) - vmovd %xmm2,-40(%edi) - vmovd %xmm3,-36(%edi) - vmovd %xmm4,-32(%edi) - vzeroupper - movl %ebp,%esp -.L020nodata: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2 .align 64 .Lconst_sse2: .long 16777216,0,16777216,0,16777216,0,16777216,0 @@ -1947,10 +1392,6 @@ poly1305_init: jne .L002no_sse2 leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx - movl 8(%edi),%ecx - testl $32,%ecx - jz .L002no_sse2 - leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax .L002no_sse2: movl 20(%esp),%edi movl %eax,(%ebp) @@ -3259,557 +2700,6 @@ _poly1305_emit_sse2: popl %ebp ret .size _poly1305_emit_sse2,.-_poly1305_emit_sse2 -.align 32 -.type _poly1305_init_avx2,@function -.align 16 -_poly1305_init_avx2: - vmovdqu 24(%edi),%xmm4 - leal 48(%edi),%edi - movl %esp,%ebp - subl $224,%esp - andl $-16,%esp - vmovdqa 64(%ebx),%xmm7 - vpand %xmm7,%xmm4,%xmm0 - vpsrlq $26,%xmm4,%xmm1 - vpsrldq $6,%xmm4,%xmm3 - vpand %xmm7,%xmm1,%xmm1 - vpsrlq $4,%xmm3,%xmm2 - vpsrlq $30,%xmm3,%xmm3 - vpand %xmm7,%xmm2,%xmm2 - vpand %xmm7,%xmm3,%xmm3 - vpsrldq $13,%xmm4,%xmm4 - leal 144(%esp),%edx - movl $2,%ecx -.L018square: - vmovdqa %xmm0,(%esp) - vmovdqa %xmm1,16(%esp) - vmovdqa %xmm2,32(%esp) - vmovdqa %xmm3,48(%esp) - vmovdqa %xmm4,64(%esp) - vpslld $2,%xmm1,%xmm6 - vpslld $2,%xmm2,%xmm5 - vpaddd %xmm1,%xmm6,%xmm6 - vpaddd %xmm2,%xmm5,%xmm5 - vmovdqa %xmm6,80(%esp) - vmovdqa %xmm5,96(%esp) - vpslld $2,%xmm3,%xmm6 - vpslld $2,%xmm4,%xmm5 - vpaddd %xmm3,%xmm6,%xmm6 - vpaddd %xmm4,%xmm5,%xmm5 - vmovdqa %xmm6,112(%esp) - vmovdqa %xmm5,128(%esp) - vpshufd $68,%xmm0,%xmm5 - vmovdqa %xmm1,%xmm6 - vpshufd $68,%xmm1,%xmm1 - vpshufd $68,%xmm2,%xmm2 - vpshufd $68,%xmm3,%xmm3 - vpshufd $68,%xmm4,%xmm4 - vmovdqa %xmm5,(%edx) - vmovdqa %xmm1,16(%edx) - vmovdqa %xmm2,32(%edx) - vmovdqa %xmm3,48(%edx) - vmovdqa %xmm4,64(%edx) - vpmuludq %xmm0,%xmm4,%xmm4 - vpmuludq %xmm0,%xmm3,%xmm3 - vpmuludq %xmm0,%xmm2,%xmm2 - vpmuludq %xmm0,%xmm1,%xmm1 - vpmuludq %xmm0,%xmm5,%xmm0 - vpmuludq 48(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm4,%xmm4 - vpmuludq 32(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq 16(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vmovdqa 80(%esp),%xmm7 - vpmuludq (%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm1,%xmm1 - vmovdqa 32(%esp),%xmm5 - vpmuludq 64(%edx),%xmm7,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 32(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm4,%xmm4 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vmovdqa 96(%esp),%xmm6 - vpmuludq (%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vpmuludq 64(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm1,%xmm1 - vmovdqa 48(%esp),%xmm5 - vpmuludq 48(%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm0,%xmm0 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm4,%xmm4 - vmovdqa 112(%esp),%xmm6 - vpmuludq (%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm3,%xmm3 - vpmuludq 64(%edx),%xmm6,%xmm7 - vpaddq %xmm7,%xmm2,%xmm2 - vpmuludq 48(%edx),%xmm6,%xmm5 - vpaddq %xmm5,%xmm1,%xmm1 - vmovdqa 64(%esp),%xmm7 - vpmuludq 32(%edx),%xmm6,%xmm6 - vpaddq %xmm6,%xmm0,%xmm0 - vmovdqa 128(%esp),%xmm5 - vpmuludq (%edx),%xmm7,%xmm7 - vpaddq %xmm7,%xmm4,%xmm4 - vpmuludq 64(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm3,%xmm3 - vpmuludq 16(%edx),%xmm5,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 32(%edx),%xmm5,%xmm6 - vpaddq %xmm6,%xmm1,%xmm1 - vmovdqa 64(%ebx),%xmm7 - vpmuludq 48(%edx),%xmm5,%xmm5 - vpaddq %xmm5,%xmm2,%xmm2 - vpsrlq $26,%xmm3,%xmm5 - vpand %xmm7,%xmm3,%xmm3 - vpsrlq $26,%xmm0,%xmm6 - vpand %xmm7,%xmm0,%xmm0 - vpaddq %xmm5,%xmm4,%xmm4 - vpaddq %xmm6,%xmm1,%xmm1 - vpsrlq $26,%xmm4,%xmm5 - vpand %xmm7,%xmm4,%xmm4 - vpsrlq $26,%xmm1,%xmm6 - vpand %xmm7,%xmm1,%xmm1 - vpaddq %xmm6,%xmm2,%xmm2 - vpaddd %xmm5,%xmm0,%xmm0 - vpsllq $2,%xmm5,%xmm5 - vpsrlq $26,%xmm2,%xmm6 - vpand %xmm7,%xmm2,%xmm2 - vpaddd %xmm5,%xmm0,%xmm0 - vpaddd %xmm6,%xmm3,%xmm3 - vpsrlq $26,%xmm3,%xmm6 - vpsrlq $26,%xmm0,%xmm5 - vpand %xmm7,%xmm0,%xmm0 - vpand %xmm7,%xmm3,%xmm3 - vpaddd %xmm5,%xmm1,%xmm1 - vpaddd %xmm6,%xmm4,%xmm4 - decl %ecx - jz .L019square_break - vpunpcklqdq (%esp),%xmm0,%xmm0 - vpunpcklqdq 16(%esp),%xmm1,%xmm1 - vpunpcklqdq 32(%esp),%xmm2,%xmm2 - vpunpcklqdq 48(%esp),%xmm3,%xmm3 - vpunpcklqdq 64(%esp),%xmm4,%xmm4 - jmp .L018square -.L019square_break: - vpsllq $32,%xmm0,%xmm0 - vpsllq $32,%xmm1,%xmm1 - vpsllq $32,%xmm2,%xmm2 - vpsllq $32,%xmm3,%xmm3 - vpsllq $32,%xmm4,%xmm4 - vpor (%esp),%xmm0,%xmm0 - vpor 16(%esp),%xmm1,%xmm1 - vpor 32(%esp),%xmm2,%xmm2 - vpor 48(%esp),%xmm3,%xmm3 - vpor 64(%esp),%xmm4,%xmm4 - vpshufd $141,%xmm0,%xmm0 - vpshufd $141,%xmm1,%xmm1 - vpshufd $141,%xmm2,%xmm2 - vpshufd $141,%xmm3,%xmm3 - vpshufd $141,%xmm4,%xmm4 - vmovdqu %xmm0,(%edi) - vmovdqu %xmm1,16(%edi) - vmovdqu %xmm2,32(%edi) - vmovdqu %xmm3,48(%edi) - vmovdqu %xmm4,64(%edi) - vpslld $2,%xmm1,%xmm6 - vpslld $2,%xmm2,%xmm5 - vpaddd %xmm1,%xmm6,%xmm6 - vpaddd %xmm2,%xmm5,%xmm5 - vmovdqu %xmm6,80(%edi) - vmovdqu %xmm5,96(%edi) - vpslld $2,%xmm3,%xmm6 - vpslld $2,%xmm4,%xmm5 - vpaddd %xmm3,%xmm6,%xmm6 - vpaddd %xmm4,%xmm5,%xmm5 - vmovdqu %xmm6,112(%edi) - vmovdqu %xmm5,128(%edi) - movl %ebp,%esp - leal -48(%edi),%edi - ret -.size _poly1305_init_avx2,.-_poly1305_init_avx2 -.align 32 -.type _poly1305_blocks_avx2,@function -.align 16 -_poly1305_blocks_avx2: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edi - movl 24(%esp),%esi - movl 28(%esp),%ecx - movl 20(%edi),%eax - andl $-16,%ecx - jz .L020nodata - cmpl $64,%ecx - jae .L021enter_avx2 - testl %eax,%eax - jz .Lenter_blocks -.L021enter_avx2: - vzeroupper - call .L022pic_point -.L022pic_point: - popl %ebx - leal .Lconst_sse2-.L022pic_point(%ebx),%ebx - testl %eax,%eax - jnz .L023base2_26 - call _poly1305_init_avx2 - movl (%edi),%eax - movl 3(%edi),%ecx - movl 6(%edi),%edx - movl 9(%edi),%esi - movl 13(%edi),%ebp - shrl $2,%ecx - andl $67108863,%eax - shrl $4,%edx - andl $67108863,%ecx - shrl $6,%esi - andl $67108863,%edx - movl %eax,(%edi) - movl %ecx,4(%edi) - movl %edx,8(%edi) - movl %esi,12(%edi) - movl %ebp,16(%edi) - movl $1,20(%edi) - movl 24(%esp),%esi - movl 28(%esp),%ecx -.L023base2_26: - movl 32(%esp),%eax - movl %esp,%ebp - subl $448,%esp - andl $-512,%esp - vmovdqu 48(%edi),%xmm0 - leal 288(%esp),%edx - vmovdqu 64(%edi),%xmm1 - vmovdqu 80(%edi),%xmm2 - vmovdqu 96(%edi),%xmm3 - vmovdqu 112(%edi),%xmm4 - leal 48(%edi),%edi - vpermq $64,%ymm0,%ymm0 - vpermq $64,%ymm1,%ymm1 - vpermq $64,%ymm2,%ymm2 - vpermq $64,%ymm3,%ymm3 - vpermq $64,%ymm4,%ymm4 - vpshufd $200,%ymm0,%ymm0 - vpshufd $200,%ymm1,%ymm1 - vpshufd $200,%ymm2,%ymm2 - vpshufd $200,%ymm3,%ymm3 - vpshufd $200,%ymm4,%ymm4 - vmovdqa %ymm0,-128(%edx) - vmovdqu 80(%edi),%xmm0 - vmovdqa %ymm1,-96(%edx) - vmovdqu 96(%edi),%xmm1 - vmovdqa %ymm2,-64(%edx) - vmovdqu 112(%edi),%xmm2 - vmovdqa %ymm3,-32(%edx) - vmovdqu 128(%edi),%xmm3 - vmovdqa %ymm4,(%edx) - vpermq $64,%ymm0,%ymm0 - vpermq $64,%ymm1,%ymm1 - vpermq $64,%ymm2,%ymm2 - vpermq $64,%ymm3,%ymm3 - vpshufd $200,%ymm0,%ymm0 - vpshufd $200,%ymm1,%ymm1 - vpshufd $200,%ymm2,%ymm2 - vpshufd $200,%ymm3,%ymm3 - vmovdqa %ymm0,32(%edx) - vmovd -48(%edi),%xmm0 - vmovdqa %ymm1,64(%edx) - vmovd -44(%edi),%xmm1 - vmovdqa %ymm2,96(%edx) - vmovd -40(%edi),%xmm2 - vmovdqa %ymm3,128(%edx) - vmovd -36(%edi),%xmm3 - vmovd -32(%edi),%xmm4 - vmovdqa 64(%ebx),%ymm7 - negl %eax - testl $63,%ecx - jz .L024even - movl %ecx,%edx - andl $-64,%ecx - andl $63,%edx - vmovdqu (%esi),%xmm5 - cmpl $32,%edx - jb .L025one - vmovdqu 16(%esi),%xmm6 - je .L026two - vinserti128 $1,32(%esi),%ymm5,%ymm5 - leal 48(%esi),%esi - leal 8(%ebx),%ebx - leal 296(%esp),%edx - jmp .L027tail -.L026two: - leal 32(%esi),%esi - leal 16(%ebx),%ebx - leal 304(%esp),%edx - jmp .L027tail -.L025one: - leal 16(%esi),%esi - vpxor %ymm6,%ymm6,%ymm6 - leal 32(%ebx,%eax,8),%ebx - leal 312(%esp),%edx - jmp .L027tail -.align 32 -.L024even: - vmovdqu (%esi),%xmm5 - vmovdqu 16(%esi),%xmm6 - vinserti128 $1,32(%esi),%ymm5,%ymm5 - vinserti128 $1,48(%esi),%ymm6,%ymm6 - leal 64(%esi),%esi - subl $64,%ecx - jz .L027tail -.L028loop: - vmovdqa %ymm2,64(%esp) - vpsrldq $6,%ymm5,%ymm2 - vmovdqa %ymm0,(%esp) - vpsrldq $6,%ymm6,%ymm0 - vmovdqa %ymm1,32(%esp) - vpunpckhqdq %ymm6,%ymm5,%ymm1 - vpunpcklqdq %ymm6,%ymm5,%ymm5 - vpunpcklqdq %ymm0,%ymm2,%ymm2 - vpsrlq $30,%ymm2,%ymm0 - vpsrlq $4,%ymm2,%ymm2 - vpsrlq $26,%ymm5,%ymm6 - vpsrlq $40,%ymm1,%ymm1 - vpand %ymm7,%ymm2,%ymm2 - vpand %ymm7,%ymm5,%ymm5 - vpand %ymm7,%ymm6,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpor (%ebx),%ymm1,%ymm1 - vpaddq 64(%esp),%ymm2,%ymm2 - vpaddq (%esp),%ymm5,%ymm5 - vpaddq 32(%esp),%ymm6,%ymm6 - vpaddq %ymm3,%ymm0,%ymm0 - vpaddq %ymm4,%ymm1,%ymm1 - vpmuludq -96(%edx),%ymm2,%ymm3 - vmovdqa %ymm6,32(%esp) - vpmuludq -64(%edx),%ymm2,%ymm4 - vmovdqa %ymm0,96(%esp) - vpmuludq 96(%edx),%ymm2,%ymm0 - vmovdqa %ymm1,128(%esp) - vpmuludq 128(%edx),%ymm2,%ymm1 - vpmuludq -128(%edx),%ymm2,%ymm2 - vpmuludq -32(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq (%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm4,%ymm4 - vpmuludq -128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm0,%ymm0 - vmovdqa 32(%esp),%ymm7 - vpmuludq -96(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq -64(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpmuludq -64(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm3,%ymm3 - vpmuludq -32(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm4,%ymm4 - vpmuludq 128(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vmovdqa 96(%esp),%ymm6 - vpmuludq -128(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm1,%ymm1 - vpmuludq -96(%edx),%ymm7,%ymm7 - vpaddq %ymm7,%ymm2,%ymm2 - vpmuludq -128(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm3,%ymm3 - vpmuludq -96(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vpmuludq 64(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm0,%ymm0 - vmovdqa 128(%esp),%ymm5 - vpmuludq 96(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm1,%ymm1 - vpmuludq 128(%edx),%ymm6,%ymm6 - vpaddq %ymm6,%ymm2,%ymm2 - vpmuludq 128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 32(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vpmuludq -128(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vmovdqa 64(%ebx),%ymm7 - vpmuludq 64(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq 96(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpsrlq $26,%ymm3,%ymm5 - vpand %ymm7,%ymm3,%ymm3 - vpsrlq $26,%ymm0,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpaddq %ymm5,%ymm4,%ymm4 - vpaddq %ymm6,%ymm1,%ymm1 - vpsrlq $26,%ymm4,%ymm5 - vpand %ymm7,%ymm4,%ymm4 - vpsrlq $26,%ymm1,%ymm6 - vpand %ymm7,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpsllq $2,%ymm5,%ymm5 - vpsrlq $26,%ymm2,%ymm6 - vpand %ymm7,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrlq $26,%ymm3,%ymm6 - vpsrlq $26,%ymm0,%ymm5 - vpand %ymm7,%ymm0,%ymm0 - vpand %ymm7,%ymm3,%ymm3 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm4,%ymm4 - vmovdqu (%esi),%xmm5 - vmovdqu 16(%esi),%xmm6 - vinserti128 $1,32(%esi),%ymm5,%ymm5 - vinserti128 $1,48(%esi),%ymm6,%ymm6 - leal 64(%esi),%esi - subl $64,%ecx - jnz .L028loop -.L027tail: - vmovdqa %ymm2,64(%esp) - vpsrldq $6,%ymm5,%ymm2 - vmovdqa %ymm0,(%esp) - vpsrldq $6,%ymm6,%ymm0 - vmovdqa %ymm1,32(%esp) - vpunpckhqdq %ymm6,%ymm5,%ymm1 - vpunpcklqdq %ymm6,%ymm5,%ymm5 - vpunpcklqdq %ymm0,%ymm2,%ymm2 - vpsrlq $30,%ymm2,%ymm0 - vpsrlq $4,%ymm2,%ymm2 - vpsrlq $26,%ymm5,%ymm6 - vpsrlq $40,%ymm1,%ymm1 - vpand %ymm7,%ymm2,%ymm2 - vpand %ymm7,%ymm5,%ymm5 - vpand %ymm7,%ymm6,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpor (%ebx),%ymm1,%ymm1 - andl $-64,%ebx - vpaddq 64(%esp),%ymm2,%ymm2 - vpaddq (%esp),%ymm5,%ymm5 - vpaddq 32(%esp),%ymm6,%ymm6 - vpaddq %ymm3,%ymm0,%ymm0 - vpaddq %ymm4,%ymm1,%ymm1 - vpmuludq -92(%edx),%ymm2,%ymm3 - vmovdqa %ymm6,32(%esp) - vpmuludq -60(%edx),%ymm2,%ymm4 - vmovdqa %ymm0,96(%esp) - vpmuludq 100(%edx),%ymm2,%ymm0 - vmovdqa %ymm1,128(%esp) - vpmuludq 132(%edx),%ymm2,%ymm1 - vpmuludq -124(%edx),%ymm2,%ymm2 - vpmuludq -28(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 4(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm4,%ymm4 - vpmuludq -124(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm0,%ymm0 - vmovdqa 32(%esp),%ymm7 - vpmuludq -92(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq -60(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpmuludq -60(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm3,%ymm3 - vpmuludq -28(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm4,%ymm4 - vpmuludq 132(%edx),%ymm7,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vmovdqa 96(%esp),%ymm6 - vpmuludq -124(%edx),%ymm7,%ymm5 - vpaddq %ymm5,%ymm1,%ymm1 - vpmuludq -92(%edx),%ymm7,%ymm7 - vpaddq %ymm7,%ymm2,%ymm2 - vpmuludq -124(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm3,%ymm3 - vpmuludq -92(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vpmuludq 68(%edx),%ymm6,%ymm5 - vpaddq %ymm5,%ymm0,%ymm0 - vmovdqa 128(%esp),%ymm5 - vpmuludq 100(%edx),%ymm6,%ymm7 - vpaddq %ymm7,%ymm1,%ymm1 - vpmuludq 132(%edx),%ymm6,%ymm6 - vpaddq %ymm6,%ymm2,%ymm2 - vpmuludq 132(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm3,%ymm3 - vpmuludq 36(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm0,%ymm0 - vpmuludq -124(%edx),%ymm5,%ymm7 - vpaddq %ymm7,%ymm4,%ymm4 - vmovdqa 64(%ebx),%ymm7 - vpmuludq 68(%edx),%ymm5,%ymm6 - vpaddq %ymm6,%ymm1,%ymm1 - vpmuludq 100(%edx),%ymm5,%ymm5 - vpaddq %ymm5,%ymm2,%ymm2 - vpsrldq $8,%ymm4,%ymm5 - vpsrldq $8,%ymm3,%ymm6 - vpaddq %ymm5,%ymm4,%ymm4 - vpsrldq $8,%ymm0,%ymm5 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrldq $8,%ymm1,%ymm6 - vpaddq %ymm5,%ymm0,%ymm0 - vpsrldq $8,%ymm2,%ymm5 - vpaddq %ymm6,%ymm1,%ymm1 - vpermq $2,%ymm4,%ymm6 - vpaddq %ymm5,%ymm2,%ymm2 - vpermq $2,%ymm3,%ymm5 - vpaddq %ymm6,%ymm4,%ymm4 - vpermq $2,%ymm0,%ymm6 - vpaddq %ymm5,%ymm3,%ymm3 - vpermq $2,%ymm1,%ymm5 - vpaddq %ymm6,%ymm0,%ymm0 - vpermq $2,%ymm2,%ymm6 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpsrlq $26,%ymm3,%ymm5 - vpand %ymm7,%ymm3,%ymm3 - vpsrlq $26,%ymm0,%ymm6 - vpand %ymm7,%ymm0,%ymm0 - vpaddq %ymm5,%ymm4,%ymm4 - vpaddq %ymm6,%ymm1,%ymm1 - vpsrlq $26,%ymm4,%ymm5 - vpand %ymm7,%ymm4,%ymm4 - vpsrlq $26,%ymm1,%ymm6 - vpand %ymm7,%ymm1,%ymm1 - vpaddq %ymm6,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpsllq $2,%ymm5,%ymm5 - vpsrlq $26,%ymm2,%ymm6 - vpand %ymm7,%ymm2,%ymm2 - vpaddq %ymm5,%ymm0,%ymm0 - vpaddq %ymm6,%ymm3,%ymm3 - vpsrlq $26,%ymm3,%ymm6 - vpsrlq $26,%ymm0,%ymm5 - vpand %ymm7,%ymm0,%ymm0 - vpand %ymm7,%ymm3,%ymm3 - vpaddq %ymm5,%ymm1,%ymm1 - vpaddq %ymm6,%ymm4,%ymm4 - cmpl $0,%ecx - je .L029done - vpshufd $252,%xmm0,%xmm0 - leal 288(%esp),%edx - vpshufd $252,%xmm1,%xmm1 - vpshufd $252,%xmm2,%xmm2 - vpshufd $252,%xmm3,%xmm3 - vpshufd $252,%xmm4,%xmm4 - jmp .L024even -.align 16 -.L029done: - vmovd %xmm0,-48(%edi) - vmovd %xmm1,-44(%edi) - vmovd %xmm2,-40(%edi) - vmovd %xmm3,-36(%edi) - vmovd %xmm4,-32(%edi) - vzeroupper - movl %ebp,%esp -.L020nodata: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2 .align 64 .Lconst_sse2: .long 16777216,0,16777216,0,16777216,0,16777216,0 diff --git a/secure/lib/libcrypto/i386/sha1-586.S b/secure/lib/libcrypto/i386/sha1-586.S index 7e90e2d9b1d2..49e7482b8161 100644 --- a/secure/lib/libcrypto/i386/sha1-586.S +++ b/secure/lib/libcrypto/i386/sha1-586.S @@ -25,11 +25,6 @@ sha1_block_data_order: jz .L001x86 testl $536870912,%ecx jnz .Lshaext_shortcut - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je .Lavx_shortcut jmp .Lssse3_shortcut .align 16 .L001x86: @@ -2787,1176 +2782,6 @@ _sha1_block_data_order_ssse3: popl %ebp ret .size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 -.type _sha1_block_data_order_avx,@function -.align 16 -_sha1_block_data_order_avx: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - call .L008pic_point -.L008pic_point: - popl %ebp - leal .LK_XX_XX-.L008pic_point(%ebp),%ebp -.Lavx_shortcut: - vzeroall - vmovdqa (%ebp),%xmm7 - vmovdqa 16(%ebp),%xmm0 - vmovdqa 32(%ebp),%xmm1 - vmovdqa 48(%ebp),%xmm2 - vmovdqa 64(%ebp),%xmm6 - movl 20(%esp),%edi - movl 24(%esp),%ebp - movl 28(%esp),%edx - movl %esp,%esi - subl $208,%esp - andl $-64,%esp - vmovdqa %xmm0,112(%esp) - vmovdqa %xmm1,128(%esp) - vmovdqa %xmm2,144(%esp) - shll $6,%edx - vmovdqa %xmm7,160(%esp) - addl %ebp,%edx - vmovdqa %xmm6,176(%esp) - addl $64,%ebp - movl %edi,192(%esp) - movl %ebp,196(%esp) - movl %edx,200(%esp) - movl %esi,204(%esp) - movl (%edi),%eax - movl 4(%edi),%ebx - movl 8(%edi),%ecx - movl 12(%edi),%edx - movl 16(%edi),%edi - movl %ebx,%esi - vmovdqu -64(%ebp),%xmm0 - vmovdqu -48(%ebp),%xmm1 - vmovdqu -32(%ebp),%xmm2 - vmovdqu -16(%ebp),%xmm3 - vpshufb %xmm6,%xmm0,%xmm0 - vpshufb %xmm6,%xmm1,%xmm1 - vpshufb %xmm6,%xmm2,%xmm2 - vmovdqa %xmm7,96(%esp) - vpshufb %xmm6,%xmm3,%xmm3 - vpaddd %xmm7,%xmm0,%xmm4 - vpaddd %xmm7,%xmm1,%xmm5 - vpaddd %xmm7,%xmm2,%xmm6 - vmovdqa %xmm4,(%esp) - movl %ecx,%ebp - vmovdqa %xmm5,16(%esp) - xorl %edx,%ebp - vmovdqa %xmm6,32(%esp) - andl %ebp,%esi - jmp .L009loop -.align 16 -.L009loop: - shrdl $2,%ebx,%ebx - xorl %edx,%esi - vpalignr $8,%xmm0,%xmm1,%xmm4 - movl %eax,%ebp - addl (%esp),%edi - vpaddd %xmm3,%xmm7,%xmm7 - vmovdqa %xmm0,64(%esp) - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpsrldq $4,%xmm3,%xmm6 - addl %esi,%edi - andl %ebx,%ebp - vpxor %xmm0,%xmm4,%xmm4 - xorl %ecx,%ebx - addl %eax,%edi - vpxor %xmm2,%xmm6,%xmm6 - shrdl $7,%eax,%eax - xorl %ecx,%ebp - vmovdqa %xmm7,48(%esp) - movl %edi,%esi - addl 4(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - xorl %ebx,%eax - shldl $5,%edi,%edi - addl %ebp,%edx - andl %eax,%esi - vpsrld $31,%xmm4,%xmm6 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%esi - vpslldq $12,%xmm4,%xmm0 - vpaddd %xmm4,%xmm4,%xmm4 - movl %edx,%ebp - addl 8(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpsrld $30,%xmm0,%xmm7 - vpor %xmm6,%xmm4,%xmm4 - addl %esi,%ecx - andl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - vpslld $2,%xmm0,%xmm0 - shrdl $7,%edx,%edx - xorl %eax,%ebp - vpxor %xmm7,%xmm4,%xmm4 - movl %ecx,%esi - addl 12(%esp),%ebx - xorl %edi,%edx - shldl $5,%ecx,%ecx - vpxor %xmm0,%xmm4,%xmm4 - addl %ebp,%ebx - andl %edx,%esi - vmovdqa 96(%esp),%xmm0 - xorl %edi,%edx - addl %ecx,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - vpalignr $8,%xmm1,%xmm2,%xmm5 - movl %ebx,%ebp - addl 16(%esp),%eax - vpaddd %xmm4,%xmm0,%xmm0 - vmovdqa %xmm1,80(%esp) - xorl %edx,%ecx - shldl $5,%ebx,%ebx - vpsrldq $4,%xmm4,%xmm7 - addl %esi,%eax - andl %ecx,%ebp - vpxor %xmm1,%xmm5,%xmm5 - xorl %edx,%ecx - addl %ebx,%eax - vpxor %xmm3,%xmm7,%xmm7 - shrdl $7,%ebx,%ebx - xorl %edx,%ebp - vmovdqa %xmm0,(%esp) - movl %eax,%esi - addl 20(%esp),%edi - vpxor %xmm7,%xmm5,%xmm5 - xorl %ecx,%ebx - shldl $5,%eax,%eax - addl %ebp,%edi - andl %ebx,%esi - vpsrld $31,%xmm5,%xmm7 - xorl %ecx,%ebx - addl %eax,%edi - shrdl $7,%eax,%eax - xorl %ecx,%esi - vpslldq $12,%xmm5,%xmm1 - vpaddd %xmm5,%xmm5,%xmm5 - movl %edi,%ebp - addl 24(%esp),%edx - xorl %ebx,%eax - shldl $5,%edi,%edi - vpsrld $30,%xmm1,%xmm0 - vpor %xmm7,%xmm5,%xmm5 - addl %esi,%edx - andl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - vpslld $2,%xmm1,%xmm1 - shrdl $7,%edi,%edi - xorl %ebx,%ebp - vpxor %xmm0,%xmm5,%xmm5 - movl %edx,%esi - addl 28(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpxor %xmm1,%xmm5,%xmm5 - addl %ebp,%ecx - andl %edi,%esi - vmovdqa 112(%esp),%xmm1 - xorl %eax,%edi - addl %edx,%ecx - shrdl $7,%edx,%edx - xorl %eax,%esi - vpalignr $8,%xmm2,%xmm3,%xmm6 - movl %ecx,%ebp - addl 32(%esp),%ebx - vpaddd %xmm5,%xmm1,%xmm1 - vmovdqa %xmm2,96(%esp) - xorl %edi,%edx - shldl $5,%ecx,%ecx - vpsrldq $4,%xmm5,%xmm0 - addl %esi,%ebx - andl %edx,%ebp - vpxor %xmm2,%xmm6,%xmm6 - xorl %edi,%edx - addl %ecx,%ebx - vpxor %xmm4,%xmm0,%xmm0 - shrdl $7,%ecx,%ecx - xorl %edi,%ebp - vmovdqa %xmm1,16(%esp) - movl %ebx,%esi - addl 36(%esp),%eax - vpxor %xmm0,%xmm6,%xmm6 - xorl %edx,%ecx - shldl $5,%ebx,%ebx - addl %ebp,%eax - andl %ecx,%esi - vpsrld $31,%xmm6,%xmm0 - xorl %edx,%ecx - addl %ebx,%eax - shrdl $7,%ebx,%ebx - xorl %edx,%esi - vpslldq $12,%xmm6,%xmm2 - vpaddd %xmm6,%xmm6,%xmm6 - movl %eax,%ebp - addl 40(%esp),%edi - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpsrld $30,%xmm2,%xmm1 - vpor %xmm0,%xmm6,%xmm6 - addl %esi,%edi - andl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - vpslld $2,%xmm2,%xmm2 - vmovdqa 64(%esp),%xmm0 - shrdl $7,%eax,%eax - xorl %ecx,%ebp - vpxor %xmm1,%xmm6,%xmm6 - movl %edi,%esi - addl 44(%esp),%edx - xorl %ebx,%eax - shldl $5,%edi,%edi - vpxor %xmm2,%xmm6,%xmm6 - addl %ebp,%edx - andl %eax,%esi - vmovdqa 112(%esp),%xmm2 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%esi - vpalignr $8,%xmm3,%xmm4,%xmm7 - movl %edx,%ebp - addl 48(%esp),%ecx - vpaddd %xmm6,%xmm2,%xmm2 - vmovdqa %xmm3,64(%esp) - xorl %eax,%edi - shldl $5,%edx,%edx - vpsrldq $4,%xmm6,%xmm1 - addl %esi,%ecx - andl %edi,%ebp - vpxor %xmm3,%xmm7,%xmm7 - xorl %eax,%edi - addl %edx,%ecx - vpxor %xmm5,%xmm1,%xmm1 - shrdl $7,%edx,%edx - xorl %eax,%ebp - vmovdqa %xmm2,32(%esp) - movl %ecx,%esi - addl 52(%esp),%ebx - vpxor %xmm1,%xmm7,%xmm7 - xorl %edi,%edx - shldl $5,%ecx,%ecx - addl %ebp,%ebx - andl %edx,%esi - vpsrld $31,%xmm7,%xmm1 - xorl %edi,%edx - addl %ecx,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - vpslldq $12,%xmm7,%xmm3 - vpaddd %xmm7,%xmm7,%xmm7 - movl %ebx,%ebp - addl 56(%esp),%eax - xorl %edx,%ecx - shldl $5,%ebx,%ebx - vpsrld $30,%xmm3,%xmm2 - vpor %xmm1,%xmm7,%xmm7 - addl %esi,%eax - andl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - vpslld $2,%xmm3,%xmm3 - vmovdqa 80(%esp),%xmm1 - shrdl $7,%ebx,%ebx - xorl %edx,%ebp - vpxor %xmm2,%xmm7,%xmm7 - movl %eax,%esi - addl 60(%esp),%edi - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpxor %xmm3,%xmm7,%xmm7 - addl %ebp,%edi - andl %ebx,%esi - vmovdqa 112(%esp),%xmm3 - xorl %ecx,%ebx - addl %eax,%edi - vpalignr $8,%xmm6,%xmm7,%xmm2 - vpxor %xmm4,%xmm0,%xmm0 - shrdl $7,%eax,%eax - xorl %ecx,%esi - movl %edi,%ebp - addl (%esp),%edx - vpxor %xmm1,%xmm0,%xmm0 - vmovdqa %xmm4,80(%esp) - xorl %ebx,%eax - shldl $5,%edi,%edi - vmovdqa %xmm3,%xmm4 - vpaddd %xmm7,%xmm3,%xmm3 - addl %esi,%edx - andl %eax,%ebp - vpxor %xmm2,%xmm0,%xmm0 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%ebp - vpsrld $30,%xmm0,%xmm2 - vmovdqa %xmm3,48(%esp) - movl %edx,%esi - addl 4(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpslld $2,%xmm0,%xmm0 - addl %ebp,%ecx - andl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - shrdl $7,%edx,%edx - xorl %eax,%esi - movl %ecx,%ebp - addl 8(%esp),%ebx - vpor %xmm2,%xmm0,%xmm0 - xorl %edi,%edx - shldl $5,%ecx,%ecx - vmovdqa 96(%esp),%xmm2 - addl %esi,%ebx - andl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 12(%esp),%eax - xorl %edi,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpalignr $8,%xmm7,%xmm0,%xmm3 - vpxor %xmm5,%xmm1,%xmm1 - addl 16(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - vpxor %xmm2,%xmm1,%xmm1 - vmovdqa %xmm5,96(%esp) - addl %esi,%edi - xorl %ecx,%ebp - vmovdqa %xmm4,%xmm5 - vpaddd %xmm0,%xmm4,%xmm4 - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpxor %xmm3,%xmm1,%xmm1 - addl 20(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - vpsrld $30,%xmm1,%xmm3 - vmovdqa %xmm4,(%esp) - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpslld $2,%xmm1,%xmm1 - addl 24(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vpor %xmm3,%xmm1,%xmm1 - addl 28(%esp),%ebx - xorl %edi,%ebp - vmovdqa 64(%esp),%xmm3 - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpalignr $8,%xmm0,%xmm1,%xmm4 - vpxor %xmm6,%xmm2,%xmm2 - addl 32(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - vpxor %xmm3,%xmm2,%xmm2 - vmovdqa %xmm6,64(%esp) - addl %esi,%eax - xorl %edx,%ebp - vmovdqa 128(%esp),%xmm6 - vpaddd %xmm1,%xmm5,%xmm5 - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpxor %xmm4,%xmm2,%xmm2 - addl 36(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - vpsrld $30,%xmm2,%xmm4 - vmovdqa %xmm5,16(%esp) - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpslld $2,%xmm2,%xmm2 - addl 40(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - vpor %xmm4,%xmm2,%xmm2 - addl 44(%esp),%ecx - xorl %eax,%ebp - vmovdqa 80(%esp),%xmm4 - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - vpalignr $8,%xmm1,%xmm2,%xmm5 - vpxor %xmm7,%xmm3,%xmm3 - addl 48(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - vpxor %xmm4,%xmm3,%xmm3 - vmovdqa %xmm7,80(%esp) - addl %esi,%ebx - xorl %edi,%ebp - vmovdqa %xmm6,%xmm7 - vpaddd %xmm2,%xmm6,%xmm6 - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpxor %xmm5,%xmm3,%xmm3 - addl 52(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - vpsrld $30,%xmm3,%xmm5 - vmovdqa %xmm6,32(%esp) - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpslld $2,%xmm3,%xmm3 - addl 56(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpor %xmm5,%xmm3,%xmm3 - addl 60(%esp),%edx - xorl %ebx,%ebp - vmovdqa 96(%esp),%xmm5 - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpalignr $8,%xmm2,%xmm3,%xmm6 - vpxor %xmm0,%xmm4,%xmm4 - addl (%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - vpxor %xmm5,%xmm4,%xmm4 - vmovdqa %xmm0,96(%esp) - addl %esi,%ecx - xorl %eax,%ebp - vmovdqa %xmm7,%xmm0 - vpaddd %xmm3,%xmm7,%xmm7 - shrdl $7,%edi,%edi - addl %edx,%ecx - vpxor %xmm6,%xmm4,%xmm4 - addl 4(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - vpsrld $30,%xmm4,%xmm6 - vmovdqa %xmm7,48(%esp) - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpslld $2,%xmm4,%xmm4 - addl 8(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpor %xmm6,%xmm4,%xmm4 - addl 12(%esp),%edi - xorl %ecx,%ebp - vmovdqa 64(%esp),%xmm6 - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpalignr $8,%xmm3,%xmm4,%xmm7 - vpxor %xmm1,%xmm5,%xmm5 - addl 16(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - vpxor %xmm6,%xmm5,%xmm5 - vmovdqa %xmm1,64(%esp) - addl %esi,%edx - xorl %ebx,%ebp - vmovdqa %xmm0,%xmm1 - vpaddd %xmm4,%xmm0,%xmm0 - shrdl $7,%eax,%eax - addl %edi,%edx - vpxor %xmm7,%xmm5,%xmm5 - addl 20(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - vpsrld $30,%xmm5,%xmm7 - vmovdqa %xmm0,(%esp) - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - vpslld $2,%xmm5,%xmm5 - addl 24(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpor %xmm7,%xmm5,%xmm5 - addl 28(%esp),%eax - vmovdqa 80(%esp),%xmm7 - shrdl $7,%ecx,%ecx - movl %ebx,%esi - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %ecx,%esi - xorl %edx,%ecx - addl %ebx,%eax - vpalignr $8,%xmm4,%xmm5,%xmm0 - vpxor %xmm2,%xmm6,%xmm6 - addl 32(%esp),%edi - andl %ecx,%esi - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - vpxor %xmm7,%xmm6,%xmm6 - vmovdqa %xmm2,80(%esp) - movl %eax,%ebp - xorl %ecx,%esi - vmovdqa %xmm1,%xmm2 - vpaddd %xmm5,%xmm1,%xmm1 - shldl $5,%eax,%eax - addl %esi,%edi - vpxor %xmm0,%xmm6,%xmm6 - xorl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - addl 36(%esp),%edx - vpsrld $30,%xmm6,%xmm0 - vmovdqa %xmm1,16(%esp) - andl %ebx,%ebp - xorl %ecx,%ebx - shrdl $7,%eax,%eax - movl %edi,%esi - vpslld $2,%xmm6,%xmm6 - xorl %ebx,%ebp - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %eax,%esi - xorl %ebx,%eax - addl %edi,%edx - addl 40(%esp),%ecx - andl %eax,%esi - vpor %xmm0,%xmm6,%xmm6 - xorl %ebx,%eax - shrdl $7,%edi,%edi - vmovdqa 96(%esp),%xmm0 - movl %edx,%ebp - xorl %eax,%esi - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - addl 44(%esp),%ebx - andl %edi,%ebp - xorl %eax,%edi - shrdl $7,%edx,%edx - movl %ecx,%esi - xorl %edi,%ebp - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edx,%esi - xorl %edi,%edx - addl %ecx,%ebx - vpalignr $8,%xmm5,%xmm6,%xmm1 - vpxor %xmm3,%xmm7,%xmm7 - addl 48(%esp),%eax - andl %edx,%esi - xorl %edi,%edx - shrdl $7,%ecx,%ecx - vpxor %xmm0,%xmm7,%xmm7 - vmovdqa %xmm3,96(%esp) - movl %ebx,%ebp - xorl %edx,%esi - vmovdqa 144(%esp),%xmm3 - vpaddd %xmm6,%xmm2,%xmm2 - shldl $5,%ebx,%ebx - addl %esi,%eax - vpxor %xmm1,%xmm7,%xmm7 - xorl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - addl 52(%esp),%edi - vpsrld $30,%xmm7,%xmm1 - vmovdqa %xmm2,32(%esp) - andl %ecx,%ebp - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - movl %eax,%esi - vpslld $2,%xmm7,%xmm7 - xorl %ecx,%ebp - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ebx,%esi - xorl %ecx,%ebx - addl %eax,%edi - addl 56(%esp),%edx - andl %ebx,%esi - vpor %xmm1,%xmm7,%xmm7 - xorl %ecx,%ebx - shrdl $7,%eax,%eax - vmovdqa 64(%esp),%xmm1 - movl %edi,%ebp - xorl %ebx,%esi - shldl $5,%edi,%edi - addl %esi,%edx - xorl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - addl 60(%esp),%ecx - andl %eax,%ebp - xorl %ebx,%eax - shrdl $7,%edi,%edi - movl %edx,%esi - xorl %eax,%ebp - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - vpalignr $8,%xmm6,%xmm7,%xmm2 - vpxor %xmm4,%xmm0,%xmm0 - addl (%esp),%ebx - andl %edi,%esi - xorl %eax,%edi - shrdl $7,%edx,%edx - vpxor %xmm1,%xmm0,%xmm0 - vmovdqa %xmm4,64(%esp) - movl %ecx,%ebp - xorl %edi,%esi - vmovdqa %xmm3,%xmm4 - vpaddd %xmm7,%xmm3,%xmm3 - shldl $5,%ecx,%ecx - addl %esi,%ebx - vpxor %xmm2,%xmm0,%xmm0 - xorl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 4(%esp),%eax - vpsrld $30,%xmm0,%xmm2 - vmovdqa %xmm3,48(%esp) - andl %edx,%ebp - xorl %edi,%edx - shrdl $7,%ecx,%ecx - movl %ebx,%esi - vpslld $2,%xmm0,%xmm0 - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %ecx,%esi - xorl %edx,%ecx - addl %ebx,%eax - addl 8(%esp),%edi - andl %ecx,%esi - vpor %xmm2,%xmm0,%xmm0 - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - vmovdqa 80(%esp),%xmm2 - movl %eax,%ebp - xorl %ecx,%esi - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - addl 12(%esp),%edx - andl %ebx,%ebp - xorl %ecx,%ebx - shrdl $7,%eax,%eax - movl %edi,%esi - xorl %ebx,%ebp - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %eax,%esi - xorl %ebx,%eax - addl %edi,%edx - vpalignr $8,%xmm7,%xmm0,%xmm3 - vpxor %xmm5,%xmm1,%xmm1 - addl 16(%esp),%ecx - andl %eax,%esi - xorl %ebx,%eax - shrdl $7,%edi,%edi - vpxor %xmm2,%xmm1,%xmm1 - vmovdqa %xmm5,80(%esp) - movl %edx,%ebp - xorl %eax,%esi - vmovdqa %xmm4,%xmm5 - vpaddd %xmm0,%xmm4,%xmm4 - shldl $5,%edx,%edx - addl %esi,%ecx - vpxor %xmm3,%xmm1,%xmm1 - xorl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - addl 20(%esp),%ebx - vpsrld $30,%xmm1,%xmm3 - vmovdqa %xmm4,(%esp) - andl %edi,%ebp - xorl %eax,%edi - shrdl $7,%edx,%edx - movl %ecx,%esi - vpslld $2,%xmm1,%xmm1 - xorl %edi,%ebp - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edx,%esi - xorl %edi,%edx - addl %ecx,%ebx - addl 24(%esp),%eax - andl %edx,%esi - vpor %xmm3,%xmm1,%xmm1 - xorl %edi,%edx - shrdl $7,%ecx,%ecx - vmovdqa 96(%esp),%xmm3 - movl %ebx,%ebp - xorl %edx,%esi - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - addl 28(%esp),%edi - andl %ecx,%ebp - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - movl %eax,%esi - xorl %ecx,%ebp - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ebx,%esi - xorl %ecx,%ebx - addl %eax,%edi - vpalignr $8,%xmm0,%xmm1,%xmm4 - vpxor %xmm6,%xmm2,%xmm2 - addl 32(%esp),%edx - andl %ebx,%esi - xorl %ecx,%ebx - shrdl $7,%eax,%eax - vpxor %xmm3,%xmm2,%xmm2 - vmovdqa %xmm6,96(%esp) - movl %edi,%ebp - xorl %ebx,%esi - vmovdqa %xmm5,%xmm6 - vpaddd %xmm1,%xmm5,%xmm5 - shldl $5,%edi,%edi - addl %esi,%edx - vpxor %xmm4,%xmm2,%xmm2 - xorl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - addl 36(%esp),%ecx - vpsrld $30,%xmm2,%xmm4 - vmovdqa %xmm5,16(%esp) - andl %eax,%ebp - xorl %ebx,%eax - shrdl $7,%edi,%edi - movl %edx,%esi - vpslld $2,%xmm2,%xmm2 - xorl %eax,%ebp - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - addl 40(%esp),%ebx - andl %edi,%esi - vpor %xmm4,%xmm2,%xmm2 - xorl %eax,%edi - shrdl $7,%edx,%edx - vmovdqa 64(%esp),%xmm4 - movl %ecx,%ebp - xorl %edi,%esi - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 44(%esp),%eax - andl %edx,%ebp - xorl %edi,%edx - shrdl $7,%ecx,%ecx - movl %ebx,%esi - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - addl %ebx,%eax - vpalignr $8,%xmm1,%xmm2,%xmm5 - vpxor %xmm7,%xmm3,%xmm3 - addl 48(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - vpxor %xmm4,%xmm3,%xmm3 - vmovdqa %xmm7,64(%esp) - addl %esi,%edi - xorl %ecx,%ebp - vmovdqa %xmm6,%xmm7 - vpaddd %xmm2,%xmm6,%xmm6 - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpxor %xmm5,%xmm3,%xmm3 - addl 52(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - vpsrld $30,%xmm3,%xmm5 - vmovdqa %xmm6,32(%esp) - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpslld $2,%xmm3,%xmm3 - addl 56(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vpor %xmm5,%xmm3,%xmm3 - addl 60(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl (%esp),%eax - vpaddd %xmm3,%xmm7,%xmm7 - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - vmovdqa %xmm7,48(%esp) - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 4(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 8(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - addl 12(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - movl 196(%esp),%ebp - cmpl 200(%esp),%ebp - je .L010done - vmovdqa 160(%esp),%xmm7 - vmovdqa 176(%esp),%xmm6 - vmovdqu (%ebp),%xmm0 - vmovdqu 16(%ebp),%xmm1 - vmovdqu 32(%ebp),%xmm2 - vmovdqu 48(%ebp),%xmm3 - addl $64,%ebp - vpshufb %xmm6,%xmm0,%xmm0 - movl %ebp,196(%esp) - vmovdqa %xmm7,96(%esp) - addl 16(%esp),%ebx - xorl %edi,%esi - vpshufb %xmm6,%xmm1,%xmm1 - movl %ecx,%ebp - shldl $5,%ecx,%ecx - vpaddd %xmm7,%xmm0,%xmm4 - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - vmovdqa %xmm4,(%esp) - addl 20(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 24(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 28(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - addl 32(%esp),%ecx - xorl %eax,%esi - vpshufb %xmm6,%xmm2,%xmm2 - movl %edx,%ebp - shldl $5,%edx,%edx - vpaddd %xmm7,%xmm1,%xmm5 - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vmovdqa %xmm5,16(%esp) - addl 36(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 40(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 44(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 48(%esp),%edx - xorl %ebx,%esi - vpshufb %xmm6,%xmm3,%xmm3 - movl %edi,%ebp - shldl $5,%edi,%edi - vpaddd %xmm7,%xmm2,%xmm6 - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - vmovdqa %xmm6,32(%esp) - addl 52(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 56(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 60(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - shrdl $7,%ecx,%ecx - addl %ebx,%eax - movl 192(%esp),%ebp - addl (%ebp),%eax - addl 4(%ebp),%esi - addl 8(%ebp),%ecx - movl %eax,(%ebp) - addl 12(%ebp),%edx - movl %esi,4(%ebp) - addl 16(%ebp),%edi - movl %ecx,%ebx - movl %ecx,8(%ebp) - xorl %edx,%ebx - movl %edx,12(%ebp) - movl %edi,16(%ebp) - movl %esi,%ebp - andl %ebx,%esi - movl %ebp,%ebx - jmp .L009loop -.align 16 -.L010done: - addl 16(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 20(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 24(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 28(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - addl 32(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 36(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 40(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 44(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 48(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - addl 52(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 56(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 60(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vzeroall - movl 192(%esp),%ebp - addl (%ebp),%eax - movl 204(%esp),%esp - addl 4(%ebp),%esi - addl 8(%ebp),%ecx - movl %eax,(%ebp) - addl 12(%ebp),%edx - movl %esi,4(%ebp) - addl 16(%ebp),%edi - movl %ecx,8(%ebp) - movl %edx,12(%ebp) - movl %edi,16(%ebp) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 @@ -3995,11 +2820,6 @@ sha1_block_data_order: jz .L001x86 testl $536870912,%ecx jnz .Lshaext_shortcut - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je .Lavx_shortcut jmp .Lssse3_shortcut .align 16 .L001x86: @@ -6757,1176 +5577,6 @@ _sha1_block_data_order_ssse3: popl %ebp ret .size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 -.type _sha1_block_data_order_avx,@function -.align 16 -_sha1_block_data_order_avx: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - call .L008pic_point -.L008pic_point: - popl %ebp - leal .LK_XX_XX-.L008pic_point(%ebp),%ebp -.Lavx_shortcut: - vzeroall - vmovdqa (%ebp),%xmm7 - vmovdqa 16(%ebp),%xmm0 - vmovdqa 32(%ebp),%xmm1 - vmovdqa 48(%ebp),%xmm2 - vmovdqa 64(%ebp),%xmm6 - movl 20(%esp),%edi - movl 24(%esp),%ebp - movl 28(%esp),%edx - movl %esp,%esi - subl $208,%esp - andl $-64,%esp - vmovdqa %xmm0,112(%esp) - vmovdqa %xmm1,128(%esp) - vmovdqa %xmm2,144(%esp) - shll $6,%edx - vmovdqa %xmm7,160(%esp) - addl %ebp,%edx - vmovdqa %xmm6,176(%esp) - addl $64,%ebp - movl %edi,192(%esp) - movl %ebp,196(%esp) - movl %edx,200(%esp) - movl %esi,204(%esp) - movl (%edi),%eax - movl 4(%edi),%ebx - movl 8(%edi),%ecx - movl 12(%edi),%edx - movl 16(%edi),%edi - movl %ebx,%esi - vmovdqu -64(%ebp),%xmm0 - vmovdqu -48(%ebp),%xmm1 - vmovdqu -32(%ebp),%xmm2 - vmovdqu -16(%ebp),%xmm3 - vpshufb %xmm6,%xmm0,%xmm0 - vpshufb %xmm6,%xmm1,%xmm1 - vpshufb %xmm6,%xmm2,%xmm2 - vmovdqa %xmm7,96(%esp) - vpshufb %xmm6,%xmm3,%xmm3 - vpaddd %xmm7,%xmm0,%xmm4 - vpaddd %xmm7,%xmm1,%xmm5 - vpaddd %xmm7,%xmm2,%xmm6 - vmovdqa %xmm4,(%esp) - movl %ecx,%ebp - vmovdqa %xmm5,16(%esp) - xorl %edx,%ebp - vmovdqa %xmm6,32(%esp) - andl %ebp,%esi - jmp .L009loop -.align 16 -.L009loop: - shrdl $2,%ebx,%ebx - xorl %edx,%esi - vpalignr $8,%xmm0,%xmm1,%xmm4 - movl %eax,%ebp - addl (%esp),%edi - vpaddd %xmm3,%xmm7,%xmm7 - vmovdqa %xmm0,64(%esp) - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpsrldq $4,%xmm3,%xmm6 - addl %esi,%edi - andl %ebx,%ebp - vpxor %xmm0,%xmm4,%xmm4 - xorl %ecx,%ebx - addl %eax,%edi - vpxor %xmm2,%xmm6,%xmm6 - shrdl $7,%eax,%eax - xorl %ecx,%ebp - vmovdqa %xmm7,48(%esp) - movl %edi,%esi - addl 4(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - xorl %ebx,%eax - shldl $5,%edi,%edi - addl %ebp,%edx - andl %eax,%esi - vpsrld $31,%xmm4,%xmm6 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%esi - vpslldq $12,%xmm4,%xmm0 - vpaddd %xmm4,%xmm4,%xmm4 - movl %edx,%ebp - addl 8(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpsrld $30,%xmm0,%xmm7 - vpor %xmm6,%xmm4,%xmm4 - addl %esi,%ecx - andl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - vpslld $2,%xmm0,%xmm0 - shrdl $7,%edx,%edx - xorl %eax,%ebp - vpxor %xmm7,%xmm4,%xmm4 - movl %ecx,%esi - addl 12(%esp),%ebx - xorl %edi,%edx - shldl $5,%ecx,%ecx - vpxor %xmm0,%xmm4,%xmm4 - addl %ebp,%ebx - andl %edx,%esi - vmovdqa 96(%esp),%xmm0 - xorl %edi,%edx - addl %ecx,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - vpalignr $8,%xmm1,%xmm2,%xmm5 - movl %ebx,%ebp - addl 16(%esp),%eax - vpaddd %xmm4,%xmm0,%xmm0 - vmovdqa %xmm1,80(%esp) - xorl %edx,%ecx - shldl $5,%ebx,%ebx - vpsrldq $4,%xmm4,%xmm7 - addl %esi,%eax - andl %ecx,%ebp - vpxor %xmm1,%xmm5,%xmm5 - xorl %edx,%ecx - addl %ebx,%eax - vpxor %xmm3,%xmm7,%xmm7 - shrdl $7,%ebx,%ebx - xorl %edx,%ebp - vmovdqa %xmm0,(%esp) - movl %eax,%esi - addl 20(%esp),%edi - vpxor %xmm7,%xmm5,%xmm5 - xorl %ecx,%ebx - shldl $5,%eax,%eax - addl %ebp,%edi - andl %ebx,%esi - vpsrld $31,%xmm5,%xmm7 - xorl %ecx,%ebx - addl %eax,%edi - shrdl $7,%eax,%eax - xorl %ecx,%esi - vpslldq $12,%xmm5,%xmm1 - vpaddd %xmm5,%xmm5,%xmm5 - movl %edi,%ebp - addl 24(%esp),%edx - xorl %ebx,%eax - shldl $5,%edi,%edi - vpsrld $30,%xmm1,%xmm0 - vpor %xmm7,%xmm5,%xmm5 - addl %esi,%edx - andl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - vpslld $2,%xmm1,%xmm1 - shrdl $7,%edi,%edi - xorl %ebx,%ebp - vpxor %xmm0,%xmm5,%xmm5 - movl %edx,%esi - addl 28(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpxor %xmm1,%xmm5,%xmm5 - addl %ebp,%ecx - andl %edi,%esi - vmovdqa 112(%esp),%xmm1 - xorl %eax,%edi - addl %edx,%ecx - shrdl $7,%edx,%edx - xorl %eax,%esi - vpalignr $8,%xmm2,%xmm3,%xmm6 - movl %ecx,%ebp - addl 32(%esp),%ebx - vpaddd %xmm5,%xmm1,%xmm1 - vmovdqa %xmm2,96(%esp) - xorl %edi,%edx - shldl $5,%ecx,%ecx - vpsrldq $4,%xmm5,%xmm0 - addl %esi,%ebx - andl %edx,%ebp - vpxor %xmm2,%xmm6,%xmm6 - xorl %edi,%edx - addl %ecx,%ebx - vpxor %xmm4,%xmm0,%xmm0 - shrdl $7,%ecx,%ecx - xorl %edi,%ebp - vmovdqa %xmm1,16(%esp) - movl %ebx,%esi - addl 36(%esp),%eax - vpxor %xmm0,%xmm6,%xmm6 - xorl %edx,%ecx - shldl $5,%ebx,%ebx - addl %ebp,%eax - andl %ecx,%esi - vpsrld $31,%xmm6,%xmm0 - xorl %edx,%ecx - addl %ebx,%eax - shrdl $7,%ebx,%ebx - xorl %edx,%esi - vpslldq $12,%xmm6,%xmm2 - vpaddd %xmm6,%xmm6,%xmm6 - movl %eax,%ebp - addl 40(%esp),%edi - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpsrld $30,%xmm2,%xmm1 - vpor %xmm0,%xmm6,%xmm6 - addl %esi,%edi - andl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - vpslld $2,%xmm2,%xmm2 - vmovdqa 64(%esp),%xmm0 - shrdl $7,%eax,%eax - xorl %ecx,%ebp - vpxor %xmm1,%xmm6,%xmm6 - movl %edi,%esi - addl 44(%esp),%edx - xorl %ebx,%eax - shldl $5,%edi,%edi - vpxor %xmm2,%xmm6,%xmm6 - addl %ebp,%edx - andl %eax,%esi - vmovdqa 112(%esp),%xmm2 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%esi - vpalignr $8,%xmm3,%xmm4,%xmm7 - movl %edx,%ebp - addl 48(%esp),%ecx - vpaddd %xmm6,%xmm2,%xmm2 - vmovdqa %xmm3,64(%esp) - xorl %eax,%edi - shldl $5,%edx,%edx - vpsrldq $4,%xmm6,%xmm1 - addl %esi,%ecx - andl %edi,%ebp - vpxor %xmm3,%xmm7,%xmm7 - xorl %eax,%edi - addl %edx,%ecx - vpxor %xmm5,%xmm1,%xmm1 - shrdl $7,%edx,%edx - xorl %eax,%ebp - vmovdqa %xmm2,32(%esp) - movl %ecx,%esi - addl 52(%esp),%ebx - vpxor %xmm1,%xmm7,%xmm7 - xorl %edi,%edx - shldl $5,%ecx,%ecx - addl %ebp,%ebx - andl %edx,%esi - vpsrld $31,%xmm7,%xmm1 - xorl %edi,%edx - addl %ecx,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - vpslldq $12,%xmm7,%xmm3 - vpaddd %xmm7,%xmm7,%xmm7 - movl %ebx,%ebp - addl 56(%esp),%eax - xorl %edx,%ecx - shldl $5,%ebx,%ebx - vpsrld $30,%xmm3,%xmm2 - vpor %xmm1,%xmm7,%xmm7 - addl %esi,%eax - andl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - vpslld $2,%xmm3,%xmm3 - vmovdqa 80(%esp),%xmm1 - shrdl $7,%ebx,%ebx - xorl %edx,%ebp - vpxor %xmm2,%xmm7,%xmm7 - movl %eax,%esi - addl 60(%esp),%edi - xorl %ecx,%ebx - shldl $5,%eax,%eax - vpxor %xmm3,%xmm7,%xmm7 - addl %ebp,%edi - andl %ebx,%esi - vmovdqa 112(%esp),%xmm3 - xorl %ecx,%ebx - addl %eax,%edi - vpalignr $8,%xmm6,%xmm7,%xmm2 - vpxor %xmm4,%xmm0,%xmm0 - shrdl $7,%eax,%eax - xorl %ecx,%esi - movl %edi,%ebp - addl (%esp),%edx - vpxor %xmm1,%xmm0,%xmm0 - vmovdqa %xmm4,80(%esp) - xorl %ebx,%eax - shldl $5,%edi,%edi - vmovdqa %xmm3,%xmm4 - vpaddd %xmm7,%xmm3,%xmm3 - addl %esi,%edx - andl %eax,%ebp - vpxor %xmm2,%xmm0,%xmm0 - xorl %ebx,%eax - addl %edi,%edx - shrdl $7,%edi,%edi - xorl %ebx,%ebp - vpsrld $30,%xmm0,%xmm2 - vmovdqa %xmm3,48(%esp) - movl %edx,%esi - addl 4(%esp),%ecx - xorl %eax,%edi - shldl $5,%edx,%edx - vpslld $2,%xmm0,%xmm0 - addl %ebp,%ecx - andl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - shrdl $7,%edx,%edx - xorl %eax,%esi - movl %ecx,%ebp - addl 8(%esp),%ebx - vpor %xmm2,%xmm0,%xmm0 - xorl %edi,%edx - shldl $5,%ecx,%ecx - vmovdqa 96(%esp),%xmm2 - addl %esi,%ebx - andl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 12(%esp),%eax - xorl %edi,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpalignr $8,%xmm7,%xmm0,%xmm3 - vpxor %xmm5,%xmm1,%xmm1 - addl 16(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - vpxor %xmm2,%xmm1,%xmm1 - vmovdqa %xmm5,96(%esp) - addl %esi,%edi - xorl %ecx,%ebp - vmovdqa %xmm4,%xmm5 - vpaddd %xmm0,%xmm4,%xmm4 - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpxor %xmm3,%xmm1,%xmm1 - addl 20(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - vpsrld $30,%xmm1,%xmm3 - vmovdqa %xmm4,(%esp) - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpslld $2,%xmm1,%xmm1 - addl 24(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vpor %xmm3,%xmm1,%xmm1 - addl 28(%esp),%ebx - xorl %edi,%ebp - vmovdqa 64(%esp),%xmm3 - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpalignr $8,%xmm0,%xmm1,%xmm4 - vpxor %xmm6,%xmm2,%xmm2 - addl 32(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - vpxor %xmm3,%xmm2,%xmm2 - vmovdqa %xmm6,64(%esp) - addl %esi,%eax - xorl %edx,%ebp - vmovdqa 128(%esp),%xmm6 - vpaddd %xmm1,%xmm5,%xmm5 - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpxor %xmm4,%xmm2,%xmm2 - addl 36(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - vpsrld $30,%xmm2,%xmm4 - vmovdqa %xmm5,16(%esp) - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpslld $2,%xmm2,%xmm2 - addl 40(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - vpor %xmm4,%xmm2,%xmm2 - addl 44(%esp),%ecx - xorl %eax,%ebp - vmovdqa 80(%esp),%xmm4 - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - vpalignr $8,%xmm1,%xmm2,%xmm5 - vpxor %xmm7,%xmm3,%xmm3 - addl 48(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - vpxor %xmm4,%xmm3,%xmm3 - vmovdqa %xmm7,80(%esp) - addl %esi,%ebx - xorl %edi,%ebp - vmovdqa %xmm6,%xmm7 - vpaddd %xmm2,%xmm6,%xmm6 - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpxor %xmm5,%xmm3,%xmm3 - addl 52(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - vpsrld $30,%xmm3,%xmm5 - vmovdqa %xmm6,32(%esp) - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpslld $2,%xmm3,%xmm3 - addl 56(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpor %xmm5,%xmm3,%xmm3 - addl 60(%esp),%edx - xorl %ebx,%ebp - vmovdqa 96(%esp),%xmm5 - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpalignr $8,%xmm2,%xmm3,%xmm6 - vpxor %xmm0,%xmm4,%xmm4 - addl (%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - vpxor %xmm5,%xmm4,%xmm4 - vmovdqa %xmm0,96(%esp) - addl %esi,%ecx - xorl %eax,%ebp - vmovdqa %xmm7,%xmm0 - vpaddd %xmm3,%xmm7,%xmm7 - shrdl $7,%edi,%edi - addl %edx,%ecx - vpxor %xmm6,%xmm4,%xmm4 - addl 4(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - vpsrld $30,%xmm4,%xmm6 - vmovdqa %xmm7,48(%esp) - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpslld $2,%xmm4,%xmm4 - addl 8(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vpor %xmm6,%xmm4,%xmm4 - addl 12(%esp),%edi - xorl %ecx,%ebp - vmovdqa 64(%esp),%xmm6 - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpalignr $8,%xmm3,%xmm4,%xmm7 - vpxor %xmm1,%xmm5,%xmm5 - addl 16(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - vpxor %xmm6,%xmm5,%xmm5 - vmovdqa %xmm1,64(%esp) - addl %esi,%edx - xorl %ebx,%ebp - vmovdqa %xmm0,%xmm1 - vpaddd %xmm4,%xmm0,%xmm0 - shrdl $7,%eax,%eax - addl %edi,%edx - vpxor %xmm7,%xmm5,%xmm5 - addl 20(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - vpsrld $30,%xmm5,%xmm7 - vmovdqa %xmm0,(%esp) - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - vpslld $2,%xmm5,%xmm5 - addl 24(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - vpor %xmm7,%xmm5,%xmm5 - addl 28(%esp),%eax - vmovdqa 80(%esp),%xmm7 - shrdl $7,%ecx,%ecx - movl %ebx,%esi - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %ecx,%esi - xorl %edx,%ecx - addl %ebx,%eax - vpalignr $8,%xmm4,%xmm5,%xmm0 - vpxor %xmm2,%xmm6,%xmm6 - addl 32(%esp),%edi - andl %ecx,%esi - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - vpxor %xmm7,%xmm6,%xmm6 - vmovdqa %xmm2,80(%esp) - movl %eax,%ebp - xorl %ecx,%esi - vmovdqa %xmm1,%xmm2 - vpaddd %xmm5,%xmm1,%xmm1 - shldl $5,%eax,%eax - addl %esi,%edi - vpxor %xmm0,%xmm6,%xmm6 - xorl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - addl 36(%esp),%edx - vpsrld $30,%xmm6,%xmm0 - vmovdqa %xmm1,16(%esp) - andl %ebx,%ebp - xorl %ecx,%ebx - shrdl $7,%eax,%eax - movl %edi,%esi - vpslld $2,%xmm6,%xmm6 - xorl %ebx,%ebp - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %eax,%esi - xorl %ebx,%eax - addl %edi,%edx - addl 40(%esp),%ecx - andl %eax,%esi - vpor %xmm0,%xmm6,%xmm6 - xorl %ebx,%eax - shrdl $7,%edi,%edi - vmovdqa 96(%esp),%xmm0 - movl %edx,%ebp - xorl %eax,%esi - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - addl 44(%esp),%ebx - andl %edi,%ebp - xorl %eax,%edi - shrdl $7,%edx,%edx - movl %ecx,%esi - xorl %edi,%ebp - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edx,%esi - xorl %edi,%edx - addl %ecx,%ebx - vpalignr $8,%xmm5,%xmm6,%xmm1 - vpxor %xmm3,%xmm7,%xmm7 - addl 48(%esp),%eax - andl %edx,%esi - xorl %edi,%edx - shrdl $7,%ecx,%ecx - vpxor %xmm0,%xmm7,%xmm7 - vmovdqa %xmm3,96(%esp) - movl %ebx,%ebp - xorl %edx,%esi - vmovdqa 144(%esp),%xmm3 - vpaddd %xmm6,%xmm2,%xmm2 - shldl $5,%ebx,%ebx - addl %esi,%eax - vpxor %xmm1,%xmm7,%xmm7 - xorl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - addl 52(%esp),%edi - vpsrld $30,%xmm7,%xmm1 - vmovdqa %xmm2,32(%esp) - andl %ecx,%ebp - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - movl %eax,%esi - vpslld $2,%xmm7,%xmm7 - xorl %ecx,%ebp - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ebx,%esi - xorl %ecx,%ebx - addl %eax,%edi - addl 56(%esp),%edx - andl %ebx,%esi - vpor %xmm1,%xmm7,%xmm7 - xorl %ecx,%ebx - shrdl $7,%eax,%eax - vmovdqa 64(%esp),%xmm1 - movl %edi,%ebp - xorl %ebx,%esi - shldl $5,%edi,%edi - addl %esi,%edx - xorl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - addl 60(%esp),%ecx - andl %eax,%ebp - xorl %ebx,%eax - shrdl $7,%edi,%edi - movl %edx,%esi - xorl %eax,%ebp - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - vpalignr $8,%xmm6,%xmm7,%xmm2 - vpxor %xmm4,%xmm0,%xmm0 - addl (%esp),%ebx - andl %edi,%esi - xorl %eax,%edi - shrdl $7,%edx,%edx - vpxor %xmm1,%xmm0,%xmm0 - vmovdqa %xmm4,64(%esp) - movl %ecx,%ebp - xorl %edi,%esi - vmovdqa %xmm3,%xmm4 - vpaddd %xmm7,%xmm3,%xmm3 - shldl $5,%ecx,%ecx - addl %esi,%ebx - vpxor %xmm2,%xmm0,%xmm0 - xorl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 4(%esp),%eax - vpsrld $30,%xmm0,%xmm2 - vmovdqa %xmm3,48(%esp) - andl %edx,%ebp - xorl %edi,%edx - shrdl $7,%ecx,%ecx - movl %ebx,%esi - vpslld $2,%xmm0,%xmm0 - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %ecx,%esi - xorl %edx,%ecx - addl %ebx,%eax - addl 8(%esp),%edi - andl %ecx,%esi - vpor %xmm2,%xmm0,%xmm0 - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - vmovdqa 80(%esp),%xmm2 - movl %eax,%ebp - xorl %ecx,%esi - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ebx,%ebp - xorl %ecx,%ebx - addl %eax,%edi - addl 12(%esp),%edx - andl %ebx,%ebp - xorl %ecx,%ebx - shrdl $7,%eax,%eax - movl %edi,%esi - xorl %ebx,%ebp - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %eax,%esi - xorl %ebx,%eax - addl %edi,%edx - vpalignr $8,%xmm7,%xmm0,%xmm3 - vpxor %xmm5,%xmm1,%xmm1 - addl 16(%esp),%ecx - andl %eax,%esi - xorl %ebx,%eax - shrdl $7,%edi,%edi - vpxor %xmm2,%xmm1,%xmm1 - vmovdqa %xmm5,80(%esp) - movl %edx,%ebp - xorl %eax,%esi - vmovdqa %xmm4,%xmm5 - vpaddd %xmm0,%xmm4,%xmm4 - shldl $5,%edx,%edx - addl %esi,%ecx - vpxor %xmm3,%xmm1,%xmm1 - xorl %edi,%ebp - xorl %eax,%edi - addl %edx,%ecx - addl 20(%esp),%ebx - vpsrld $30,%xmm1,%xmm3 - vmovdqa %xmm4,(%esp) - andl %edi,%ebp - xorl %eax,%edi - shrdl $7,%edx,%edx - movl %ecx,%esi - vpslld $2,%xmm1,%xmm1 - xorl %edi,%ebp - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edx,%esi - xorl %edi,%edx - addl %ecx,%ebx - addl 24(%esp),%eax - andl %edx,%esi - vpor %xmm3,%xmm1,%xmm1 - xorl %edi,%edx - shrdl $7,%ecx,%ecx - vmovdqa 96(%esp),%xmm3 - movl %ebx,%ebp - xorl %edx,%esi - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %ecx,%ebp - xorl %edx,%ecx - addl %ebx,%eax - addl 28(%esp),%edi - andl %ecx,%ebp - xorl %edx,%ecx - shrdl $7,%ebx,%ebx - movl %eax,%esi - xorl %ecx,%ebp - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ebx,%esi - xorl %ecx,%ebx - addl %eax,%edi - vpalignr $8,%xmm0,%xmm1,%xmm4 - vpxor %xmm6,%xmm2,%xmm2 - addl 32(%esp),%edx - andl %ebx,%esi - xorl %ecx,%ebx - shrdl $7,%eax,%eax - vpxor %xmm3,%xmm2,%xmm2 - vmovdqa %xmm6,96(%esp) - movl %edi,%ebp - xorl %ebx,%esi - vmovdqa %xmm5,%xmm6 - vpaddd %xmm1,%xmm5,%xmm5 - shldl $5,%edi,%edi - addl %esi,%edx - vpxor %xmm4,%xmm2,%xmm2 - xorl %eax,%ebp - xorl %ebx,%eax - addl %edi,%edx - addl 36(%esp),%ecx - vpsrld $30,%xmm2,%xmm4 - vmovdqa %xmm5,16(%esp) - andl %eax,%ebp - xorl %ebx,%eax - shrdl $7,%edi,%edi - movl %edx,%esi - vpslld $2,%xmm2,%xmm2 - xorl %eax,%ebp - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %edi,%esi - xorl %eax,%edi - addl %edx,%ecx - addl 40(%esp),%ebx - andl %edi,%esi - vpor %xmm4,%xmm2,%xmm2 - xorl %eax,%edi - shrdl $7,%edx,%edx - vmovdqa 64(%esp),%xmm4 - movl %ecx,%ebp - xorl %edi,%esi - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edx,%ebp - xorl %edi,%edx - addl %ecx,%ebx - addl 44(%esp),%eax - andl %edx,%ebp - xorl %edi,%edx - shrdl $7,%ecx,%ecx - movl %ebx,%esi - xorl %edx,%ebp - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - addl %ebx,%eax - vpalignr $8,%xmm1,%xmm2,%xmm5 - vpxor %xmm7,%xmm3,%xmm3 - addl 48(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - vpxor %xmm4,%xmm3,%xmm3 - vmovdqa %xmm7,64(%esp) - addl %esi,%edi - xorl %ecx,%ebp - vmovdqa %xmm6,%xmm7 - vpaddd %xmm2,%xmm6,%xmm6 - shrdl $7,%ebx,%ebx - addl %eax,%edi - vpxor %xmm5,%xmm3,%xmm3 - addl 52(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - vpsrld $30,%xmm3,%xmm5 - vmovdqa %xmm6,32(%esp) - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - vpslld $2,%xmm3,%xmm3 - addl 56(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vpor %xmm5,%xmm3,%xmm3 - addl 60(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl (%esp),%eax - vpaddd %xmm3,%xmm7,%xmm7 - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - vmovdqa %xmm7,48(%esp) - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 4(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 8(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - addl 12(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - movl 196(%esp),%ebp - cmpl 200(%esp),%ebp - je .L010done - vmovdqa 160(%esp),%xmm7 - vmovdqa 176(%esp),%xmm6 - vmovdqu (%ebp),%xmm0 - vmovdqu 16(%ebp),%xmm1 - vmovdqu 32(%ebp),%xmm2 - vmovdqu 48(%ebp),%xmm3 - addl $64,%ebp - vpshufb %xmm6,%xmm0,%xmm0 - movl %ebp,196(%esp) - vmovdqa %xmm7,96(%esp) - addl 16(%esp),%ebx - xorl %edi,%esi - vpshufb %xmm6,%xmm1,%xmm1 - movl %ecx,%ebp - shldl $5,%ecx,%ecx - vpaddd %xmm7,%xmm0,%xmm4 - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - vmovdqa %xmm4,(%esp) - addl 20(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 24(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 28(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - addl 32(%esp),%ecx - xorl %eax,%esi - vpshufb %xmm6,%xmm2,%xmm2 - movl %edx,%ebp - shldl $5,%edx,%edx - vpaddd %xmm7,%xmm1,%xmm5 - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - vmovdqa %xmm5,16(%esp) - addl 36(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 40(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 44(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 48(%esp),%edx - xorl %ebx,%esi - vpshufb %xmm6,%xmm3,%xmm3 - movl %edi,%ebp - shldl $5,%edi,%edi - vpaddd %xmm7,%xmm2,%xmm6 - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - vmovdqa %xmm6,32(%esp) - addl 52(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 56(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 60(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - shrdl $7,%ecx,%ecx - addl %ebx,%eax - movl 192(%esp),%ebp - addl (%ebp),%eax - addl 4(%ebp),%esi - addl 8(%ebp),%ecx - movl %eax,(%ebp) - addl 12(%ebp),%edx - movl %esi,4(%ebp) - addl 16(%ebp),%edi - movl %ecx,%ebx - movl %ecx,8(%ebp) - xorl %edx,%ebx - movl %edx,12(%ebp) - movl %edi,16(%ebp) - movl %esi,%ebp - andl %ebx,%esi - movl %ebp,%ebx - jmp .L009loop -.align 16 -.L010done: - addl 16(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 20(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - xorl %edx,%esi - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 24(%esp),%edi - xorl %ecx,%esi - movl %eax,%ebp - shldl $5,%eax,%eax - addl %esi,%edi - xorl %ecx,%ebp - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 28(%esp),%edx - xorl %ebx,%ebp - movl %edi,%esi - shldl $5,%edi,%edi - addl %ebp,%edx - xorl %ebx,%esi - shrdl $7,%eax,%eax - addl %edi,%edx - addl 32(%esp),%ecx - xorl %eax,%esi - movl %edx,%ebp - shldl $5,%edx,%edx - addl %esi,%ecx - xorl %eax,%ebp - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 36(%esp),%ebx - xorl %edi,%ebp - movl %ecx,%esi - shldl $5,%ecx,%ecx - addl %ebp,%ebx - xorl %edi,%esi - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 40(%esp),%eax - xorl %edx,%esi - movl %ebx,%ebp - shldl $5,%ebx,%ebx - addl %esi,%eax - xorl %edx,%ebp - shrdl $7,%ecx,%ecx - addl %ebx,%eax - addl 44(%esp),%edi - xorl %ecx,%ebp - movl %eax,%esi - shldl $5,%eax,%eax - addl %ebp,%edi - xorl %ecx,%esi - shrdl $7,%ebx,%ebx - addl %eax,%edi - addl 48(%esp),%edx - xorl %ebx,%esi - movl %edi,%ebp - shldl $5,%edi,%edi - addl %esi,%edx - xorl %ebx,%ebp - shrdl $7,%eax,%eax - addl %edi,%edx - addl 52(%esp),%ecx - xorl %eax,%ebp - movl %edx,%esi - shldl $5,%edx,%edx - addl %ebp,%ecx - xorl %eax,%esi - shrdl $7,%edi,%edi - addl %edx,%ecx - addl 56(%esp),%ebx - xorl %edi,%esi - movl %ecx,%ebp - shldl $5,%ecx,%ecx - addl %esi,%ebx - xorl %edi,%ebp - shrdl $7,%edx,%edx - addl %ecx,%ebx - addl 60(%esp),%eax - xorl %edx,%ebp - movl %ebx,%esi - shldl $5,%ebx,%ebx - addl %ebp,%eax - shrdl $7,%ecx,%ecx - addl %ebx,%eax - vzeroall - movl 192(%esp),%ebp - addl (%ebp),%eax - movl 204(%esp),%esp - addl 4(%ebp),%esi - addl 8(%ebp),%ecx - movl %eax,(%ebp) - addl 12(%ebp),%edx - movl %esi,4(%ebp) - addl 16(%ebp),%edi - movl %ecx,8(%ebp) - movl %edx,12(%ebp) - movl %edi,16(%ebp) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 diff --git a/secure/lib/libcrypto/i386/sha256-586.S b/secure/lib/libcrypto/i386/sha256-586.S index 7b4205352bdf..5d8476c1e1bb 100644 --- a/secure/lib/libcrypto/i386/sha256-586.S +++ b/secure/lib/libcrypto/i386/sha256-586.S @@ -42,13 +42,12 @@ sha256_block_data_order: orl %ebx,%ecx andl $1342177280,%ecx cmpl $1342177280,%ecx - je .L005AVX testl $512,%ebx - jnz .L006SSSE3 + jnz .L005SSSE3 .L003no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L007unrolled + jae .L006unrolled jmp .L002loop .align 16 .L002loop: @@ -120,7 +119,7 @@ sha256_block_data_order: movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00800_15: +.L00700_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -158,11 +157,11 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00800_15 + jne .L00700_15 movl 156(%esp),%ecx - jmp .L00916_63 + jmp .L00816_63 .align 16 -.L00916_63: +.L00816_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -217,7 +216,7 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00916_63 + jne .L00816_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -261,7 +260,7 @@ sha256_block_data_order: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L007unrolled: +.L006unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -278,9 +277,9 @@ sha256_block_data_order: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L010grand_loop + jmp .L009grand_loop .align 16 -.L010grand_loop: +.L009grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -3160,7 +3159,7 @@ sha256_block_data_order: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L010grand_loop + jb .L009grand_loop movl 108(%esp),%esp popl %edi popl %esi @@ -3179,9 +3178,9 @@ sha256_block_data_order: pshufd $27,%xmm2,%xmm2 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 - jmp .L011loop_shaext + jmp .L010loop_shaext .align 16 -.L011loop_shaext: +.L010loop_shaext: movdqu (%edi),%xmm3 movdqu 16(%edi),%xmm4 movdqu 32(%edi),%xmm5 @@ -3351,7 +3350,7 @@ sha256_block_data_order: .byte 15,56,203,202 paddd 16(%esp),%xmm2 paddd (%esp),%xmm1 - jnz .L011loop_shaext + jnz .L010loop_shaext pshufd $177,%xmm2,%xmm2 pshufd $27,%xmm1,%xmm7 pshufd $177,%xmm1,%xmm1 @@ -3366,7 +3365,7 @@ sha256_block_data_order: popl %ebp ret .align 32 -.L006SSSE3: +.L005SSSE3: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -3385,9 +3384,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L012grand_ssse3 + jmp .L011grand_ssse3 .align 16 -.L012grand_ssse3: +.L011grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -3410,9 +3409,9 @@ sha256_block_data_order: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L013ssse3_00_47 + jmp .L012ssse3_00_47 .align 16 -.L013ssse3_00_47: +.L012ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -4055,7 +4054,7 @@ sha256_block_data_order: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L013ssse3_00_47 + jne .L012ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -4569,2218 +4568,13 @@ sha256_block_data_order: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L012grand_ssse3 + jb .L011grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 32 -.L005AVX: - andl $264,%edx - cmpl $264,%edx - je .L014AVX_BMI - leal -96(%esp),%esp - vzeroall - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,4(%esp) - xorl %ecx,%ebx - movl %ecx,8(%esp) - movl %edi,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%edi - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ecx,24(%esp) - movl %esi,28(%esp) - vmovdqa 256(%ebp),%xmm7 - jmp .L015grand_avx -.align 32 -.L015grand_avx: - vmovdqu (%edi),%xmm0 - vmovdqu 16(%edi),%xmm1 - vmovdqu 32(%edi),%xmm2 - vmovdqu 48(%edi),%xmm3 - addl $64,%edi - vpshufb %xmm7,%xmm0,%xmm0 - movl %edi,100(%esp) - vpshufb %xmm7,%xmm1,%xmm1 - vpshufb %xmm7,%xmm2,%xmm2 - vpaddd (%ebp),%xmm0,%xmm4 - vpshufb %xmm7,%xmm3,%xmm3 - vpaddd 16(%ebp),%xmm1,%xmm5 - vpaddd 32(%ebp),%xmm2,%xmm6 - vpaddd 48(%ebp),%xmm3,%xmm7 - vmovdqa %xmm4,32(%esp) - vmovdqa %xmm5,48(%esp) - vmovdqa %xmm6,64(%esp) - vmovdqa %xmm7,80(%esp) - jmp .L016avx_00_47 -.align 16 -.L016avx_00_47: - addl $64,%ebp - vpalignr $4,%xmm0,%xmm1,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - vpalignr $4,%xmm2,%xmm3,%xmm7 - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - vpaddd %xmm7,%xmm0,%xmm0 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - vpshufd $250,%xmm3,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 32(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - vpaddd %xmm4,%xmm0,%xmm0 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 36(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - vpaddd %xmm7,%xmm0,%xmm0 - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm0,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 40(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm0,%xmm0 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - vpaddd (%ebp),%xmm0,%xmm6 - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 44(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,32(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - vpalignr $4,%xmm3,%xmm0,%xmm7 - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - vpaddd %xmm7,%xmm1,%xmm1 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - vpshufd $250,%xmm0,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 48(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - vpaddd %xmm4,%xmm1,%xmm1 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 52(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - vpaddd %xmm7,%xmm1,%xmm1 - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm1,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 56(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm1,%xmm1 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - vpaddd 16(%ebp),%xmm1,%xmm6 - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 60(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,48(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - vpalignr $4,%xmm0,%xmm1,%xmm7 - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - vpaddd %xmm7,%xmm2,%xmm2 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - vpshufd $250,%xmm1,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 64(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - vpaddd %xmm4,%xmm2,%xmm2 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 68(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - vpaddd %xmm7,%xmm2,%xmm2 - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm2,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 72(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm2,%xmm2 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - vpaddd 32(%ebp),%xmm2,%xmm6 - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 76(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,64(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - vpalignr $4,%xmm1,%xmm2,%xmm7 - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - vpaddd %xmm7,%xmm3,%xmm3 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - vpshufd $250,%xmm2,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 80(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - vpaddd %xmm4,%xmm3,%xmm3 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 84(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - vpaddd %xmm7,%xmm3,%xmm3 - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm3,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 88(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm3,%xmm3 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - vpaddd 48(%ebp),%xmm3,%xmm6 - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 92(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,80(%esp) - cmpl $66051,64(%ebp) - jne .L016avx_00_47 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 32(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 36(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 40(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 44(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 48(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 52(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 56(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 60(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 64(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 68(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 72(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 76(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 80(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 84(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 88(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 92(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - movl 96(%esp),%esi - xorl %edi,%ebx - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebx,4(%esp) - xorl %edi,%ebx - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %edi,20(%esp) - movl 28(%esp),%edi - movl %ecx,24(%esi) - addl 28(%esi),%edi - movl %ecx,24(%esp) - movl %edi,28(%esi) - movl %edi,28(%esp) - movl 100(%esp),%edi - vmovdqa 64(%ebp),%xmm7 - subl $192,%ebp - cmpl 104(%esp),%edi - jb .L015grand_avx - movl 108(%esp),%esp - vzeroall - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 32 -.L014AVX_BMI: - leal -96(%esp),%esp - vzeroall - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,4(%esp) - xorl %ecx,%ebx - movl %ecx,8(%esp) - movl %edi,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%edi - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ecx,24(%esp) - movl %esi,28(%esp) - vmovdqa 256(%ebp),%xmm7 - jmp .L017grand_avx_bmi -.align 32 -.L017grand_avx_bmi: - vmovdqu (%edi),%xmm0 - vmovdqu 16(%edi),%xmm1 - vmovdqu 32(%edi),%xmm2 - vmovdqu 48(%edi),%xmm3 - addl $64,%edi - vpshufb %xmm7,%xmm0,%xmm0 - movl %edi,100(%esp) - vpshufb %xmm7,%xmm1,%xmm1 - vpshufb %xmm7,%xmm2,%xmm2 - vpaddd (%ebp),%xmm0,%xmm4 - vpshufb %xmm7,%xmm3,%xmm3 - vpaddd 16(%ebp),%xmm1,%xmm5 - vpaddd 32(%ebp),%xmm2,%xmm6 - vpaddd 48(%ebp),%xmm3,%xmm7 - vmovdqa %xmm4,32(%esp) - vmovdqa %xmm5,48(%esp) - vmovdqa %xmm6,64(%esp) - vmovdqa %xmm7,80(%esp) - jmp .L018avx_bmi_00_47 -.align 16 -.L018avx_bmi_00_47: - addl $64,%ebp - vpalignr $4,%xmm0,%xmm1,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - vpaddd %xmm7,%xmm0,%xmm0 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 28(%esp),%edx - andl %eax,%ebx - addl 32(%esp),%edx - vpshufd $250,%xmm3,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 24(%esp),%edx - andl %ebx,%eax - addl 36(%esp),%edx - vpaddd %xmm4,%xmm0,%xmm0 - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm0,%xmm0 - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm0,%xmm7 - addl 20(%esp),%edx - andl %eax,%ebx - addl 40(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm0,%xmm0 - addl 16(%esp),%edx - andl %ebx,%eax - addl 44(%esp),%edx - vpaddd (%ebp),%xmm0,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,32(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - vpaddd %xmm7,%xmm1,%xmm1 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 12(%esp),%edx - andl %eax,%ebx - addl 48(%esp),%edx - vpshufd $250,%xmm0,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 8(%esp),%edx - andl %ebx,%eax - addl 52(%esp),%edx - vpaddd %xmm4,%xmm1,%xmm1 - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm1,%xmm1 - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm1,%xmm7 - addl 4(%esp),%edx - andl %eax,%ebx - addl 56(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm1,%xmm1 - addl (%esp),%edx - andl %ebx,%eax - addl 60(%esp),%edx - vpaddd 16(%ebp),%xmm1,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,48(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - vpalignr $4,%xmm0,%xmm1,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - vpaddd %xmm7,%xmm2,%xmm2 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 28(%esp),%edx - andl %eax,%ebx - addl 64(%esp),%edx - vpshufd $250,%xmm1,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 24(%esp),%edx - andl %ebx,%eax - addl 68(%esp),%edx - vpaddd %xmm4,%xmm2,%xmm2 - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm2,%xmm2 - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm2,%xmm7 - addl 20(%esp),%edx - andl %eax,%ebx - addl 72(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm2,%xmm2 - addl 16(%esp),%edx - andl %ebx,%eax - addl 76(%esp),%edx - vpaddd 32(%ebp),%xmm2,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,64(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - vpaddd %xmm7,%xmm3,%xmm3 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 12(%esp),%edx - andl %eax,%ebx - addl 80(%esp),%edx - vpshufd $250,%xmm2,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 8(%esp),%edx - andl %ebx,%eax - addl 84(%esp),%edx - vpaddd %xmm4,%xmm3,%xmm3 - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm3,%xmm3 - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm3,%xmm7 - addl 4(%esp),%edx - andl %eax,%ebx - addl 88(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm3,%xmm3 - addl (%esp),%edx - andl %ebx,%eax - addl 92(%esp),%edx - vpaddd 48(%ebp),%xmm3,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,80(%esp) - cmpl $66051,64(%ebp) - jne .L018avx_bmi_00_47 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - andl %eax,%ebx - addl 32(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - andl %ebx,%eax - addl 36(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - andl %eax,%ebx - addl 40(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - andl %ebx,%eax - addl 44(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - andl %eax,%ebx - addl 48(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - andl %ebx,%eax - addl 52(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - andl %eax,%ebx - addl 56(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl (%esp),%edx - andl %ebx,%eax - addl 60(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - andl %eax,%ebx - addl 64(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - andl %ebx,%eax - addl 68(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - andl %eax,%ebx - addl 72(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - andl %ebx,%eax - addl 76(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - andl %eax,%ebx - addl 80(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - andl %ebx,%eax - addl 84(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - andl %eax,%ebx - addl 88(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl (%esp),%edx - andl %ebx,%eax - addl 92(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - movl 96(%esp),%esi - xorl %edi,%ebx - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebx,4(%esp) - xorl %edi,%ebx - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %edi,20(%esp) - movl 28(%esp),%edi - movl %ecx,24(%esi) - addl 28(%esi),%edi - movl %ecx,24(%esp) - movl %edi,28(%esi) - movl %edi,28(%esp) - movl 100(%esp),%edi - vmovdqa 64(%ebp),%xmm7 - subl $192,%ebp - cmpl 104(%esp),%edi - jb .L017grand_avx_bmi - movl 108(%esp),%esp - vzeroall - popl %edi - popl %esi - popl %ebx - popl %ebp - ret .size sha256_block_data_order,.-.L_sha256_block_data_order_begin .comm OPENSSL_ia32cap_P,16,4 #else @@ -6825,13 +4619,12 @@ sha256_block_data_order: orl %ebx,%ecx andl $1342177280,%ecx cmpl $1342177280,%ecx - je .L005AVX testl $512,%ebx - jnz .L006SSSE3 + jnz .L005SSSE3 .L003no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L007unrolled + jae .L006unrolled jmp .L002loop .align 16 .L002loop: @@ -6903,7 +4696,7 @@ sha256_block_data_order: movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00800_15: +.L00700_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -6941,11 +4734,11 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00800_15 + jne .L00700_15 movl 156(%esp),%ecx - jmp .L00916_63 + jmp .L00816_63 .align 16 -.L00916_63: +.L00816_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -7000,7 +4793,7 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00916_63 + jne .L00816_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -7044,7 +4837,7 @@ sha256_block_data_order: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L007unrolled: +.L006unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -7061,9 +4854,9 @@ sha256_block_data_order: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L010grand_loop + jmp .L009grand_loop .align 16 -.L010grand_loop: +.L009grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -9943,7 +7736,7 @@ sha256_block_data_order: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L010grand_loop + jb .L009grand_loop movl 108(%esp),%esp popl %edi popl %esi @@ -9962,9 +7755,9 @@ sha256_block_data_order: pshufd $27,%xmm2,%xmm2 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 - jmp .L011loop_shaext + jmp .L010loop_shaext .align 16 -.L011loop_shaext: +.L010loop_shaext: movdqu (%edi),%xmm3 movdqu 16(%edi),%xmm4 movdqu 32(%edi),%xmm5 @@ -10134,7 +7927,7 @@ sha256_block_data_order: .byte 15,56,203,202 paddd 16(%esp),%xmm2 paddd (%esp),%xmm1 - jnz .L011loop_shaext + jnz .L010loop_shaext pshufd $177,%xmm2,%xmm2 pshufd $27,%xmm1,%xmm7 pshufd $177,%xmm1,%xmm1 @@ -10149,7 +7942,7 @@ sha256_block_data_order: popl %ebp ret .align 32 -.L006SSSE3: +.L005SSSE3: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -10168,9 +7961,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L012grand_ssse3 + jmp .L011grand_ssse3 .align 16 -.L012grand_ssse3: +.L011grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -10193,9 +7986,9 @@ sha256_block_data_order: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L013ssse3_00_47 + jmp .L012ssse3_00_47 .align 16 -.L013ssse3_00_47: +.L012ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -10838,7 +8631,7 @@ sha256_block_data_order: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L013ssse3_00_47 + jne .L012ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -11352,2213 +9145,8 @@ sha256_block_data_order: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L012grand_ssse3 - movl 108(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 32 -.L005AVX: - andl $264,%edx - cmpl $264,%edx - je .L014AVX_BMI - leal -96(%esp),%esp - vzeroall - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,4(%esp) - xorl %ecx,%ebx - movl %ecx,8(%esp) - movl %edi,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%edi - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ecx,24(%esp) - movl %esi,28(%esp) - vmovdqa 256(%ebp),%xmm7 - jmp .L015grand_avx -.align 32 -.L015grand_avx: - vmovdqu (%edi),%xmm0 - vmovdqu 16(%edi),%xmm1 - vmovdqu 32(%edi),%xmm2 - vmovdqu 48(%edi),%xmm3 - addl $64,%edi - vpshufb %xmm7,%xmm0,%xmm0 - movl %edi,100(%esp) - vpshufb %xmm7,%xmm1,%xmm1 - vpshufb %xmm7,%xmm2,%xmm2 - vpaddd (%ebp),%xmm0,%xmm4 - vpshufb %xmm7,%xmm3,%xmm3 - vpaddd 16(%ebp),%xmm1,%xmm5 - vpaddd 32(%ebp),%xmm2,%xmm6 - vpaddd 48(%ebp),%xmm3,%xmm7 - vmovdqa %xmm4,32(%esp) - vmovdqa %xmm5,48(%esp) - vmovdqa %xmm6,64(%esp) - vmovdqa %xmm7,80(%esp) - jmp .L016avx_00_47 -.align 16 -.L016avx_00_47: - addl $64,%ebp - vpalignr $4,%xmm0,%xmm1,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - vpalignr $4,%xmm2,%xmm3,%xmm7 - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - vpaddd %xmm7,%xmm0,%xmm0 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - vpshufd $250,%xmm3,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 32(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - vpaddd %xmm4,%xmm0,%xmm0 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 36(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - vpaddd %xmm7,%xmm0,%xmm0 - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm0,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 40(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm0,%xmm0 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - vpaddd (%ebp),%xmm0,%xmm6 - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 44(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,32(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - vpalignr $4,%xmm3,%xmm0,%xmm7 - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - vpaddd %xmm7,%xmm1,%xmm1 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - vpshufd $250,%xmm0,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 48(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - vpaddd %xmm4,%xmm1,%xmm1 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 52(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - vpaddd %xmm7,%xmm1,%xmm1 - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm1,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 56(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm1,%xmm1 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - vpaddd 16(%ebp),%xmm1,%xmm6 - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 60(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,48(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - vpalignr $4,%xmm0,%xmm1,%xmm7 - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - vpaddd %xmm7,%xmm2,%xmm2 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - vpshufd $250,%xmm1,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 64(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - vpaddd %xmm4,%xmm2,%xmm2 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 68(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - vpaddd %xmm7,%xmm2,%xmm2 - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm2,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 72(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm2,%xmm2 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - vpaddd 32(%ebp),%xmm2,%xmm6 - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 76(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,64(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm4 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - vpalignr $4,%xmm1,%xmm2,%xmm7 - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - vpsrld $7,%xmm4,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - vpaddd %xmm7,%xmm3,%xmm3 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrld $3,%xmm4,%xmm7 - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - vpslld $14,%xmm4,%xmm5 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - vpxor %xmm6,%xmm7,%xmm4 - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - vpshufd $250,%xmm2,%xmm7 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpsrld $11,%xmm6,%xmm6 - addl 80(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpxor %xmm5,%xmm4,%xmm4 - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - vpslld $11,%xmm5,%xmm5 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - vpxor %xmm6,%xmm4,%xmm4 - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - vpsrld $10,%xmm7,%xmm6 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - vpxor %xmm5,%xmm4,%xmm4 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - vpaddd %xmm4,%xmm3,%xmm3 - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - vpxor %xmm5,%xmm6,%xmm6 - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - vpsrlq $19,%xmm7,%xmm7 - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - addl 84(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - vpshufd $132,%xmm6,%xmm7 - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - vpsrldq $8,%xmm7,%xmm7 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - vpaddd %xmm7,%xmm3,%xmm3 - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - vpshufd $80,%xmm3,%xmm7 - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - vpsrld $10,%xmm7,%xmm6 - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - vpsrlq $17,%xmm7,%xmm5 - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - vpxor %xmm5,%xmm6,%xmm6 - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - vpsrlq $19,%xmm7,%xmm7 - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - vpshufd $232,%xmm6,%xmm7 - addl 88(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - vpslldq $8,%xmm7,%xmm7 - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - vpaddd %xmm7,%xmm3,%xmm3 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - vpaddd 48(%ebp),%xmm3,%xmm6 - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 92(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - vmovdqa %xmm6,80(%esp) - cmpl $66051,64(%ebp) - jne .L016avx_00_47 - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 32(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 36(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 40(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 44(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 48(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 52(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 56(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 60(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 20(%esp),%esi - xorl %ecx,%edx - movl 24(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 4(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 64(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 12(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 16(%esp),%esi - xorl %ecx,%edx - movl 20(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,12(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl (%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,28(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 68(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 8(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 12(%esp),%esi - xorl %ecx,%edx - movl 16(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 28(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,24(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 72(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 4(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 8(%esp),%esi - xorl %ecx,%edx - movl 12(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,4(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 24(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,20(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 76(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl (%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 4(%esp),%esi - xorl %ecx,%edx - movl 8(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 20(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,16(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 80(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 28(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl (%esp),%esi - xorl %ecx,%edx - movl 4(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,28(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 16(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,12(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 84(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 24(%esp),%edx - addl %ecx,%eax - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 28(%esp),%esi - xorl %ecx,%edx - movl (%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %edi,%edx - movl 12(%esp),%edi - movl %eax,%esi - shrdl $9,%ecx,%ecx - movl %eax,8(%esp) - xorl %eax,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - shrdl $11,%ecx,%ecx - andl %eax,%ebx - xorl %esi,%ecx - addl 88(%esp),%edx - xorl %edi,%ebx - shrdl $2,%ecx,%ecx - addl %edx,%ebx - addl 20(%esp),%edx - addl %ecx,%ebx - movl %edx,%ecx - shrdl $14,%edx,%edx - movl 24(%esp),%esi - xorl %ecx,%edx - movl 28(%esp),%edi - xorl %edi,%esi - shrdl $5,%edx,%edx - andl %ecx,%esi - movl %ecx,20(%esp) - xorl %ecx,%edx - xorl %esi,%edi - shrdl $6,%edx,%edx - movl %ebx,%ecx - addl %edi,%edx - movl 8(%esp),%edi - movl %ebx,%esi - shrdl $9,%ecx,%ecx - movl %ebx,4(%esp) - xorl %ebx,%ecx - xorl %edi,%ebx - addl (%esp),%edx - shrdl $11,%ecx,%ecx - andl %ebx,%eax - xorl %esi,%ecx - addl 92(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %edx,%eax - addl 16(%esp),%edx - addl %ecx,%eax - movl 96(%esp),%esi - xorl %edi,%ebx - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebx,4(%esp) - xorl %edi,%ebx - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %edi,20(%esp) - movl 28(%esp),%edi - movl %ecx,24(%esi) - addl 28(%esi),%edi - movl %ecx,24(%esp) - movl %edi,28(%esi) - movl %edi,28(%esp) - movl 100(%esp),%edi - vmovdqa 64(%ebp),%xmm7 - subl $192,%ebp - cmpl 104(%esp),%edi - jb .L015grand_avx - movl 108(%esp),%esp - vzeroall - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 32 -.L014AVX_BMI: - leal -96(%esp),%esp - vzeroall - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,4(%esp) - xorl %ecx,%ebx - movl %ecx,8(%esp) - movl %edi,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%edi - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ecx,24(%esp) - movl %esi,28(%esp) - vmovdqa 256(%ebp),%xmm7 - jmp .L017grand_avx_bmi -.align 32 -.L017grand_avx_bmi: - vmovdqu (%edi),%xmm0 - vmovdqu 16(%edi),%xmm1 - vmovdqu 32(%edi),%xmm2 - vmovdqu 48(%edi),%xmm3 - addl $64,%edi - vpshufb %xmm7,%xmm0,%xmm0 - movl %edi,100(%esp) - vpshufb %xmm7,%xmm1,%xmm1 - vpshufb %xmm7,%xmm2,%xmm2 - vpaddd (%ebp),%xmm0,%xmm4 - vpshufb %xmm7,%xmm3,%xmm3 - vpaddd 16(%ebp),%xmm1,%xmm5 - vpaddd 32(%ebp),%xmm2,%xmm6 - vpaddd 48(%ebp),%xmm3,%xmm7 - vmovdqa %xmm4,32(%esp) - vmovdqa %xmm5,48(%esp) - vmovdqa %xmm6,64(%esp) - vmovdqa %xmm7,80(%esp) - jmp .L018avx_bmi_00_47 -.align 16 -.L018avx_bmi_00_47: - addl $64,%ebp - vpalignr $4,%xmm0,%xmm1,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - vpaddd %xmm7,%xmm0,%xmm0 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 28(%esp),%edx - andl %eax,%ebx - addl 32(%esp),%edx - vpshufd $250,%xmm3,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 24(%esp),%edx - andl %ebx,%eax - addl 36(%esp),%edx - vpaddd %xmm4,%xmm0,%xmm0 - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm0,%xmm0 - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm0,%xmm7 - addl 20(%esp),%edx - andl %eax,%ebx - addl 40(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm0,%xmm0 - addl 16(%esp),%edx - andl %ebx,%eax - addl 44(%esp),%edx - vpaddd (%ebp),%xmm0,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,32(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - vpaddd %xmm7,%xmm1,%xmm1 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 12(%esp),%edx - andl %eax,%ebx - addl 48(%esp),%edx - vpshufd $250,%xmm0,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 8(%esp),%edx - andl %ebx,%eax - addl 52(%esp),%edx - vpaddd %xmm4,%xmm1,%xmm1 - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm1,%xmm1 - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm1,%xmm7 - addl 4(%esp),%edx - andl %eax,%ebx - addl 56(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm1,%xmm1 - addl (%esp),%edx - andl %ebx,%eax - addl 60(%esp),%edx - vpaddd 16(%ebp),%xmm1,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,48(%esp) - vpalignr $4,%xmm2,%xmm3,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - vpalignr $4,%xmm0,%xmm1,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - vpaddd %xmm7,%xmm2,%xmm2 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 28(%esp),%edx - andl %eax,%ebx - addl 64(%esp),%edx - vpshufd $250,%xmm1,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 24(%esp),%edx - andl %ebx,%eax - addl 68(%esp),%edx - vpaddd %xmm4,%xmm2,%xmm2 - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm2,%xmm2 - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm2,%xmm7 - addl 20(%esp),%edx - andl %eax,%ebx - addl 72(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm2,%xmm2 - addl 16(%esp),%edx - andl %ebx,%eax - addl 76(%esp),%edx - vpaddd 32(%ebp),%xmm2,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,64(%esp) - vpalignr $4,%xmm3,%xmm0,%xmm4 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - vpalignr $4,%xmm1,%xmm2,%xmm7 - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - vpsrld $7,%xmm4,%xmm6 - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - vpaddd %xmm7,%xmm3,%xmm3 - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrld $3,%xmm4,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpslld $14,%xmm4,%xmm5 - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpxor %xmm6,%xmm7,%xmm4 - addl 12(%esp),%edx - andl %eax,%ebx - addl 80(%esp),%edx - vpshufd $250,%xmm2,%xmm7 - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - vpsrld $11,%xmm6,%xmm6 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpslld $11,%xmm5,%xmm5 - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - vpxor %xmm6,%xmm4,%xmm4 - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpsrld $10,%xmm7,%xmm6 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpxor %xmm5,%xmm4,%xmm4 - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpsrlq $17,%xmm7,%xmm5 - addl 8(%esp),%edx - andl %ebx,%eax - addl 84(%esp),%edx - vpaddd %xmm4,%xmm3,%xmm3 - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - vpxor %xmm5,%xmm6,%xmm6 - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpsrlq $19,%xmm7,%xmm7 - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpxor %xmm7,%xmm6,%xmm6 - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - vpshufd $132,%xmm6,%xmm7 - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - vpsrldq $8,%xmm7,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - vpaddd %xmm7,%xmm3,%xmm3 - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - vpshufd $80,%xmm3,%xmm7 - addl 4(%esp),%edx - andl %eax,%ebx - addl 88(%esp),%edx - vpsrld $10,%xmm7,%xmm6 - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - vpsrlq $17,%xmm7,%xmm5 - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - vpxor %xmm5,%xmm6,%xmm6 - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - vpsrlq $19,%xmm7,%xmm7 - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - vpxor %xmm7,%xmm6,%xmm6 - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - vpshufd $232,%xmm6,%xmm7 - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - vpslldq $8,%xmm7,%xmm7 - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - vpaddd %xmm7,%xmm3,%xmm3 - addl (%esp),%edx - andl %ebx,%eax - addl 92(%esp),%edx - vpaddd 48(%ebp),%xmm3,%xmm6 - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - vmovdqa %xmm6,80(%esp) - cmpl $66051,64(%ebp) - jne .L018avx_bmi_00_47 - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - andl %eax,%ebx - addl 32(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - andl %ebx,%eax - addl 36(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - andl %eax,%ebx - addl 40(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - andl %ebx,%eax - addl 44(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - andl %eax,%ebx - addl 48(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - andl %ebx,%eax - addl 52(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - andl %eax,%ebx - addl 56(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl (%esp),%edx - andl %ebx,%eax - addl 60(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,16(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 24(%esp),%edx,%esi - xorl %edi,%ecx - andl 20(%esp),%edx - movl %eax,(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 4(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 28(%esp),%edx - andl %eax,%ebx - addl 64(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 12(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,12(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 20(%esp),%edx,%esi - xorl %edi,%ecx - andl 16(%esp),%edx - movl %ebx,28(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl (%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 24(%esp),%edx - andl %ebx,%eax - addl 68(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 8(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,8(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 16(%esp),%edx,%esi - xorl %edi,%ecx - andl 12(%esp),%edx - movl %eax,24(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 28(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 20(%esp),%edx - andl %eax,%ebx - addl 72(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 4(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,4(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 12(%esp),%edx,%esi - xorl %edi,%ecx - andl 8(%esp),%edx - movl %ebx,20(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 24(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 16(%esp),%edx - andl %ebx,%eax - addl 76(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl (%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 8(%esp),%edx,%esi - xorl %edi,%ecx - andl 4(%esp),%edx - movl %eax,16(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 20(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 12(%esp),%edx - andl %eax,%ebx - addl 80(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 28(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,28(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 4(%esp),%edx,%esi - xorl %edi,%ecx - andl (%esp),%edx - movl %ebx,12(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 16(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl 8(%esp),%edx - andl %ebx,%eax - addl 84(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 24(%esp),%edx - leal (%eax,%ecx,1),%eax - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,24(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl (%esp),%edx,%esi - xorl %edi,%ecx - andl 28(%esp),%edx - movl %eax,8(%esp) - orl %esi,%edx - rorxl $2,%eax,%edi - rorxl $13,%eax,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%eax,%ecx - xorl %edi,%esi - movl 12(%esp),%edi - xorl %esi,%ecx - xorl %edi,%eax - addl 4(%esp),%edx - andl %eax,%ebx - addl 88(%esp),%edx - xorl %edi,%ebx - addl %edx,%ecx - addl 20(%esp),%edx - leal (%ebx,%ecx,1),%ebx - rorxl $6,%edx,%ecx - rorxl $11,%edx,%esi - movl %edx,20(%esp) - rorxl $25,%edx,%edi - xorl %esi,%ecx - andnl 28(%esp),%edx,%esi - xorl %edi,%ecx - andl 24(%esp),%edx - movl %ebx,4(%esp) - orl %esi,%edx - rorxl $2,%ebx,%edi - rorxl $13,%ebx,%esi - leal (%edx,%ecx,1),%edx - rorxl $22,%ebx,%ecx - xorl %edi,%esi - movl 8(%esp),%edi - xorl %esi,%ecx - xorl %edi,%ebx - addl (%esp),%edx - andl %ebx,%eax - addl 92(%esp),%edx - xorl %edi,%eax - addl %edx,%ecx - addl 16(%esp),%edx - leal (%eax,%ecx,1),%eax - movl 96(%esp),%esi - xorl %edi,%ebx - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebx,4(%esp) - xorl %edi,%ebx - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %edi,20(%esp) - movl 28(%esp),%edi - movl %ecx,24(%esi) - addl 28(%esi),%edi - movl %ecx,24(%esp) - movl %edi,28(%esi) - movl %edi,28(%esp) - movl 100(%esp),%edi - vmovdqa 64(%ebp),%xmm7 - subl $192,%ebp - cmpl 104(%esp),%edi - jb .L017grand_avx_bmi + jb .L011grand_ssse3 movl 108(%esp),%esp - vzeroall popl %edi popl %esi popl %ebx |
