diff options
Diffstat (limited to 'crypto/aes/asm/vpaes-x86_64.pl')
-rwxr-xr-x | crypto/aes/asm/vpaes-x86_64.pl | 32 |
1 files changed, 20 insertions, 12 deletions
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index f2ef318fae4e..b715aca167d1 100755 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ###################################################################### ## Constant-time SSSE3 AES core implementation. @@ -31,6 +38,7 @@ # Nehalem 29.6/40.3/14.6 10.0/11.8 # Atom 57.3/74.2/32.1 60.9/77.2(***) # Silvermont 52.7/64.0/19.5 48.8/60.8(***) +# Goldmont 38.9/49.0/17.8 10.6/12.6 # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast @@ -57,7 +65,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $PREFIX="vpaes"; @@ -164,7 +172,7 @@ _vpaes_encrypt_core: pshufb %xmm1, %xmm0 ret .size _vpaes_encrypt_core,.-_vpaes_encrypt_core - + ## ## Decryption core ## @@ -325,7 +333,7 @@ _vpaes_schedule_core: ## .Lschedule_128: mov \$10, %esi - + .Loop_schedule_128: call _vpaes_schedule_round dec %rsi @@ -359,7 +367,7 @@ _vpaes_schedule_core: .Loop_schedule_192: call _vpaes_schedule_round - palignr \$8,%xmm6,%xmm0 + palignr \$8,%xmm6,%xmm0 call _vpaes_schedule_mangle # save key n call _vpaes_schedule_192_smear call _vpaes_schedule_mangle # save key n+1 @@ -385,7 +393,7 @@ _vpaes_schedule_core: movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) call _vpaes_schedule_transform # input transform mov \$7, %esi - + .Loop_schedule_256: call _vpaes_schedule_mangle # output low result movdqa %xmm0, %xmm6 # save cur_lo in xmm6 @@ -394,7 +402,7 @@ _vpaes_schedule_core: call _vpaes_schedule_round dec %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle # low round. swap xmm7 and xmm6 pshufd \$0xFF, %xmm0, %xmm0 @@ -402,10 +410,10 @@ _vpaes_schedule_core: movdqa %xmm6, %xmm7 call _vpaes_schedule_low_round movdqa %xmm5, %xmm7 - + jmp .Loop_schedule_256 - + ## ## .aes_schedule_mangle_last ## @@ -504,9 +512,9 @@ _vpaes_schedule_round: # rotate pshufd \$0xFF, %xmm0, %xmm0 palignr \$1, %xmm0, %xmm0 - + # fall through... - + # low round: same as high round, but no rotation and no rcon. _vpaes_schedule_low_round: # smear xmm7 @@ -545,7 +553,7 @@ _vpaes_schedule_low_round: pxor %xmm4, %xmm0 # 0 = sbox output # add in smeared stuff - pxor %xmm7, %xmm0 + pxor %xmm7, %xmm0 movdqa %xmm0, %xmm7 ret .size _vpaes_schedule_round,.-_vpaes_schedule_round |