diff options
Diffstat (limited to 'secure/lib/libcrypto/amd64/ghash-x86_64.S')
-rw-r--r-- | secure/lib/libcrypto/amd64/ghash-x86_64.S | 475 |
1 files changed, 2 insertions, 473 deletions
diff --git a/secure/lib/libcrypto/amd64/ghash-x86_64.S b/secure/lib/libcrypto/amd64/ghash-x86_64.S index 078353528d5f..55ad7db1f240 100644 --- a/secure/lib/libcrypto/amd64/ghash-x86_64.S +++ b/secure/lib/libcrypto/amd64/ghash-x86_64.S @@ -1304,108 +1304,7 @@ gcm_ghash_clmul: .align 32 gcm_init_avx: .cfi_startproc - vzeroupper - - vmovdqu (%rsi),%xmm2 - vpshufd $78,%xmm2,%xmm2 - - - vpshufd $255,%xmm2,%xmm4 - vpsrlq $63,%xmm2,%xmm3 - vpsllq $1,%xmm2,%xmm2 - vpxor %xmm5,%xmm5,%xmm5 - vpcmpgtd %xmm4,%xmm5,%xmm5 - vpslldq $8,%xmm3,%xmm3 - vpor %xmm3,%xmm2,%xmm2 - - - vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 - vpxor %xmm5,%xmm2,%xmm2 - - vpunpckhqdq %xmm2,%xmm2,%xmm6 - vmovdqa %xmm2,%xmm0 - vpxor %xmm2,%xmm6,%xmm6 - movq $4,%r10 - jmp .Linit_start_avx -.align 32 -.Linit_loop_avx: - vpalignr $8,%xmm3,%xmm4,%xmm5 - vmovdqu %xmm5,-16(%rdi) - vpunpckhqdq %xmm0,%xmm0,%xmm3 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 - vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 - vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 - vpxor %xmm0,%xmm1,%xmm4 - vpxor %xmm4,%xmm3,%xmm3 - - vpslldq $8,%xmm3,%xmm4 - vpsrldq $8,%xmm3,%xmm3 - vpxor %xmm4,%xmm0,%xmm0 - vpxor %xmm3,%xmm1,%xmm1 - vpsllq $57,%xmm0,%xmm3 - vpsllq $62,%xmm0,%xmm4 - vpxor %xmm3,%xmm4,%xmm4 - vpsllq $63,%xmm0,%xmm3 - vpxor %xmm3,%xmm4,%xmm4 - vpslldq $8,%xmm4,%xmm3 - vpsrldq $8,%xmm4,%xmm4 - vpxor %xmm3,%xmm0,%xmm0 - vpxor %xmm4,%xmm1,%xmm1 - - vpsrlq $1,%xmm0,%xmm4 - vpxor %xmm0,%xmm1,%xmm1 - vpxor %xmm4,%xmm0,%xmm0 - vpsrlq $5,%xmm4,%xmm4 - vpxor %xmm4,%xmm0,%xmm0 - vpsrlq $1,%xmm0,%xmm0 - vpxor %xmm1,%xmm0,%xmm0 -.Linit_start_avx: - vmovdqa %xmm0,%xmm5 - vpunpckhqdq %xmm0,%xmm0,%xmm3 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 - vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 - vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 - vpxor %xmm0,%xmm1,%xmm4 - vpxor %xmm4,%xmm3,%xmm3 - - vpslldq $8,%xmm3,%xmm4 - vpsrldq $8,%xmm3,%xmm3 - vpxor %xmm4,%xmm0,%xmm0 - vpxor %xmm3,%xmm1,%xmm1 - vpsllq $57,%xmm0,%xmm3 - vpsllq $62,%xmm0,%xmm4 - vpxor %xmm3,%xmm4,%xmm4 - vpsllq $63,%xmm0,%xmm3 - vpxor %xmm3,%xmm4,%xmm4 - vpslldq $8,%xmm4,%xmm3 - vpsrldq $8,%xmm4,%xmm4 - vpxor %xmm3,%xmm0,%xmm0 - vpxor %xmm4,%xmm1,%xmm1 - - vpsrlq $1,%xmm0,%xmm4 - vpxor %xmm0,%xmm1,%xmm1 - vpxor %xmm4,%xmm0,%xmm0 - vpsrlq $5,%xmm4,%xmm4 - vpxor %xmm4,%xmm0,%xmm0 - vpsrlq $1,%xmm0,%xmm0 - vpxor %xmm1,%xmm0,%xmm0 - vpshufd $78,%xmm5,%xmm3 - vpshufd $78,%xmm0,%xmm4 - vpxor %xmm5,%xmm3,%xmm3 - vmovdqu %xmm5,0(%rdi) - vpxor %xmm0,%xmm4,%xmm4 - vmovdqu %xmm0,16(%rdi) - leaq 48(%rdi),%rdi - subq $1,%r10 - jnz .Linit_loop_avx - - vpalignr $8,%xmm4,%xmm3,%xmm5 - vmovdqu %xmm5,-16(%rdi) - - vzeroupper - .byte 0xf3,0xc3 + jmp .L_init_clmul .cfi_endproc .size gcm_init_avx,.-gcm_init_avx .globl gcm_gmult_avx @@ -1421,377 +1320,7 @@ gcm_gmult_avx: .align 32 gcm_ghash_avx: .cfi_startproc - vzeroupper - - vmovdqu (%rdi),%xmm10 - leaq .L0x1c2_polynomial(%rip),%r10 - leaq 64(%rsi),%rsi - vmovdqu .Lbswap_mask(%rip),%xmm13 - vpshufb %xmm13,%xmm10,%xmm10 - cmpq $0x80,%rcx - jb .Lshort_avx - subq $0x80,%rcx - - vmovdqu 112(%rdx),%xmm14 - vmovdqu 0-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm14 - vmovdqu 32-64(%rsi),%xmm7 - - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vmovdqu 96(%rdx),%xmm15 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpxor %xmm14,%xmm9,%xmm9 - vpshufb %xmm13,%xmm15,%xmm15 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 16-64(%rsi),%xmm6 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vmovdqu 80(%rdx),%xmm14 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vpxor %xmm15,%xmm8,%xmm8 - - vpshufb %xmm13,%xmm14,%xmm14 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vmovdqu 48-64(%rsi),%xmm6 - vpxor %xmm14,%xmm9,%xmm9 - vmovdqu 64(%rdx),%xmm15 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 80-64(%rsi),%xmm7 - - vpshufb %xmm13,%xmm15,%xmm15 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpxor %xmm1,%xmm4,%xmm4 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 64-64(%rsi),%xmm6 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vpxor %xmm15,%xmm8,%xmm8 - - vmovdqu 48(%rdx),%xmm14 - vpxor %xmm3,%xmm0,%xmm0 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpxor %xmm4,%xmm1,%xmm1 - vpshufb %xmm13,%xmm14,%xmm14 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vmovdqu 96-64(%rsi),%xmm6 - vpxor %xmm5,%xmm2,%xmm2 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 128-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - - vmovdqu 32(%rdx),%xmm15 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpxor %xmm1,%xmm4,%xmm4 - vpshufb %xmm13,%xmm15,%xmm15 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 112-64(%rsi),%xmm6 - vpxor %xmm2,%xmm5,%xmm5 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vpxor %xmm15,%xmm8,%xmm8 - - vmovdqu 16(%rdx),%xmm14 - vpxor %xmm3,%xmm0,%xmm0 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpxor %xmm4,%xmm1,%xmm1 - vpshufb %xmm13,%xmm14,%xmm14 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vmovdqu 144-64(%rsi),%xmm6 - vpxor %xmm5,%xmm2,%xmm2 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 176-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - - vmovdqu (%rdx),%xmm15 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpxor %xmm1,%xmm4,%xmm4 - vpshufb %xmm13,%xmm15,%xmm15 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 160-64(%rsi),%xmm6 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 - - leaq 128(%rdx),%rdx - cmpq $0x80,%rcx - jb .Ltail_avx - - vpxor %xmm10,%xmm15,%xmm15 - subq $0x80,%rcx - jmp .Loop8x_avx - -.align 32 -.Loop8x_avx: - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vmovdqu 112(%rdx),%xmm14 - vpxor %xmm0,%xmm3,%xmm3 - vpxor %xmm15,%xmm8,%xmm8 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 - vpshufb %xmm13,%xmm14,%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 - vmovdqu 0-64(%rsi),%xmm6 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 - vmovdqu 32-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - - vmovdqu 96(%rdx),%xmm15 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpxor %xmm3,%xmm10,%xmm10 - vpshufb %xmm13,%xmm15,%xmm15 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vxorps %xmm4,%xmm11,%xmm11 - vmovdqu 16-64(%rsi),%xmm6 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vpxor %xmm5,%xmm12,%xmm12 - vxorps %xmm15,%xmm8,%xmm8 - - vmovdqu 80(%rdx),%xmm14 - vpxor %xmm10,%xmm12,%xmm12 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpxor %xmm11,%xmm12,%xmm12 - vpslldq $8,%xmm12,%xmm9 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vpsrldq $8,%xmm12,%xmm12 - vpxor %xmm9,%xmm10,%xmm10 - vmovdqu 48-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm14 - vxorps %xmm12,%xmm11,%xmm11 - vpxor %xmm1,%xmm4,%xmm4 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 80-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - vpxor %xmm2,%xmm5,%xmm5 - - vmovdqu 64(%rdx),%xmm15 - vpalignr $8,%xmm10,%xmm10,%xmm12 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpshufb %xmm13,%xmm15,%xmm15 - vpxor %xmm3,%xmm0,%xmm0 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 64-64(%rsi),%xmm6 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm4,%xmm1,%xmm1 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vxorps %xmm15,%xmm8,%xmm8 - vpxor %xmm5,%xmm2,%xmm2 - - vmovdqu 48(%rdx),%xmm14 - vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpshufb %xmm13,%xmm14,%xmm14 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vmovdqu 96-64(%rsi),%xmm6 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 128-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - vpxor %xmm2,%xmm5,%xmm5 - - vmovdqu 32(%rdx),%xmm15 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpshufb %xmm13,%xmm15,%xmm15 - vpxor %xmm3,%xmm0,%xmm0 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 112-64(%rsi),%xmm6 - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm4,%xmm1,%xmm1 - vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 - vpxor %xmm15,%xmm8,%xmm8 - vpxor %xmm5,%xmm2,%xmm2 - vxorps %xmm12,%xmm10,%xmm10 - - vmovdqu 16(%rdx),%xmm14 - vpalignr $8,%xmm10,%xmm10,%xmm12 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 - vpshufb %xmm13,%xmm14,%xmm14 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 - vmovdqu 144-64(%rsi),%xmm6 - vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 - vxorps %xmm11,%xmm12,%xmm12 - vpunpckhqdq %xmm14,%xmm14,%xmm9 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 - vmovdqu 176-64(%rsi),%xmm7 - vpxor %xmm14,%xmm9,%xmm9 - vpxor %xmm2,%xmm5,%xmm5 - - vmovdqu (%rdx),%xmm15 - vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 - vpshufb %xmm13,%xmm15,%xmm15 - vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 - vmovdqu 160-64(%rsi),%xmm6 - vpxor %xmm12,%xmm15,%xmm15 - vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 - vpxor %xmm10,%xmm15,%xmm15 - - leaq 128(%rdx),%rdx - subq $0x80,%rcx - jnc .Loop8x_avx - - addq $0x80,%rcx - jmp .Ltail_no_xor_avx - -.align 32 -.Lshort_avx: - vmovdqu -16(%rdx,%rcx,1),%xmm14 - leaq (%rdx,%rcx,1),%rdx - vmovdqu 0-64(%rsi),%xmm6 - vmovdqu 32-64(%rsi),%xmm7 - vpshufb %xmm13,%xmm14,%xmm15 - - vmovdqa %xmm0,%xmm3 - vmovdqa %xmm1,%xmm4 - vmovdqa %xmm2,%xmm5 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -32(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 16-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vpsrldq $8,%xmm7,%xmm7 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -48(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 48-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vmovdqu 80-64(%rsi),%xmm7 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -64(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 64-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vpsrldq $8,%xmm7,%xmm7 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -80(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 96-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vmovdqu 128-64(%rsi),%xmm7 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -96(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 112-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vpsrldq $8,%xmm7,%xmm7 - subq $0x10,%rcx - jz .Ltail_avx - - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vmovdqu -112(%rdx),%xmm14 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vmovdqu 144-64(%rsi),%xmm6 - vpshufb %xmm13,%xmm14,%xmm15 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - vmovq 184-64(%rsi),%xmm7 - subq $0x10,%rcx - jmp .Ltail_avx - -.align 32 -.Ltail_avx: - vpxor %xmm10,%xmm15,%xmm15 -.Ltail_no_xor_avx: - vpunpckhqdq %xmm15,%xmm15,%xmm8 - vpxor %xmm0,%xmm3,%xmm3 - vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 - vpxor %xmm15,%xmm8,%xmm8 - vpxor %xmm1,%xmm4,%xmm4 - vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 - vpxor %xmm2,%xmm5,%xmm5 - vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 - - vmovdqu (%r10),%xmm12 - - vpxor %xmm0,%xmm3,%xmm10 - vpxor %xmm1,%xmm4,%xmm11 - vpxor %xmm2,%xmm5,%xmm5 - - vpxor %xmm10,%xmm5,%xmm5 - vpxor %xmm11,%xmm5,%xmm5 - vpslldq $8,%xmm5,%xmm9 - vpsrldq $8,%xmm5,%xmm5 - vpxor %xmm9,%xmm10,%xmm10 - vpxor %xmm5,%xmm11,%xmm11 - - vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 - vpalignr $8,%xmm10,%xmm10,%xmm10 - vpxor %xmm9,%xmm10,%xmm10 - - vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 - vpalignr $8,%xmm10,%xmm10,%xmm10 - vpxor %xmm11,%xmm10,%xmm10 - vpxor %xmm9,%xmm10,%xmm10 - - cmpq $0,%rcx - jne .Lshort_avx - - vpshufb %xmm13,%xmm10,%xmm10 - vmovdqu %xmm10,(%rdi) - vzeroupper - .byte 0xf3,0xc3 + jmp .L_ghash_clmul .cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx .align 64 |