summaryrefslogtreecommitdiff
path: root/crypto/aes/asm/vpaes-x86_64.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/aes/asm/vpaes-x86_64.pl')
-rwxr-xr-xcrypto/aes/asm/vpaes-x86_64.pl32
1 files changed, 20 insertions, 12 deletions
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
index f2ef318fae4e..b715aca167d1 100755
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
######################################################################
## Constant-time SSSE3 AES core implementation.
@@ -31,6 +38,7 @@
# Nehalem 29.6/40.3/14.6 10.0/11.8
# Atom 57.3/74.2/32.1 60.9/77.2(***)
# Silvermont 52.7/64.0/19.5 48.8/60.8(***)
+# Goldmont 38.9/49.0/17.8 10.6/12.6
#
# (*) "Hyper-threading" in the context refers rather to cache shared
# among multiple cores, than to specifically Intel HTT. As vast
@@ -57,7 +65,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
$PREFIX="vpaes";
@@ -164,7 +172,7 @@ _vpaes_encrypt_core:
pshufb %xmm1, %xmm0
ret
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
-
+
##
## Decryption core
##
@@ -325,7 +333,7 @@ _vpaes_schedule_core:
##
.Lschedule_128:
mov \$10, %esi
-
+
.Loop_schedule_128:
call _vpaes_schedule_round
dec %rsi
@@ -359,7 +367,7 @@ _vpaes_schedule_core:
.Loop_schedule_192:
call _vpaes_schedule_round
- palignr \$8,%xmm6,%xmm0
+ palignr \$8,%xmm6,%xmm0
call _vpaes_schedule_mangle # save key n
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle # save key n+1
@@ -385,7 +393,7 @@ _vpaes_schedule_core:
movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
call _vpaes_schedule_transform # input transform
mov \$7, %esi
-
+
.Loop_schedule_256:
call _vpaes_schedule_mangle # output low result
movdqa %xmm0, %xmm6 # save cur_lo in xmm6
@@ -394,7 +402,7 @@ _vpaes_schedule_core:
call _vpaes_schedule_round
dec %rsi
jz .Lschedule_mangle_last
- call _vpaes_schedule_mangle
+ call _vpaes_schedule_mangle
# low round. swap xmm7 and xmm6
pshufd \$0xFF, %xmm0, %xmm0
@@ -402,10 +410,10 @@ _vpaes_schedule_core:
movdqa %xmm6, %xmm7
call _vpaes_schedule_low_round
movdqa %xmm5, %xmm7
-
+
jmp .Loop_schedule_256
-
+
##
## .aes_schedule_mangle_last
##
@@ -504,9 +512,9 @@ _vpaes_schedule_round:
# rotate
pshufd \$0xFF, %xmm0, %xmm0
palignr \$1, %xmm0, %xmm0
-
+
# fall through...
-
+
# low round: same as high round, but no rotation and no rcon.
_vpaes_schedule_low_round:
# smear xmm7
@@ -545,7 +553,7 @@ _vpaes_schedule_low_round:
pxor %xmm4, %xmm0 # 0 = sbox output
# add in smeared stuff
- pxor %xmm7, %xmm0
+ pxor %xmm7, %xmm0
movdqa %xmm0, %xmm7
ret
.size _vpaes_schedule_round,.-_vpaes_schedule_round