1 files changed, 20 insertions, 12 deletions
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
index f2ef318fae4e..b715aca167d1 100755
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 
 ######################################################################
 ## Constant-time SSSE3 AES core implementation.
@@ -31,6 +38,7 @@
 # Nehalem	29.6/40.3/14.6		10.0/11.8
 # Atom		57.3/74.2/32.1		60.9/77.2(***)
 # Silvermont	52.7/64.0/19.5		48.8/60.8(***)
+# Goldmont	38.9/49.0/17.8		10.6/12.6
 #
 # (*)	"Hyper-threading" in the context refers rather to cache shared
 #	among multiple cores, than to specifically Intel HTT. As vast
@@ -57,7 +65,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
 *STDOUT=*OUT;
 
 $PREFIX="vpaes";
@@ -164,7 +172,7 @@ _vpaes_encrypt_core:
 	pshufb	%xmm1,	%xmm0
 	ret
 .size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
-	
+
 ##
 ##  Decryption core
 ##
@@ -325,7 +333,7 @@ _vpaes_schedule_core:
 ##
 .Lschedule_128:
 	mov	\$10, %esi
-	
+
 .Loop_schedule_128:
 	call 	_vpaes_schedule_round
 	dec	%rsi
@@ -359,7 +367,7 @@ _vpaes_schedule_core:
 
 .Loop_schedule_192:
 	call	_vpaes_schedule_round
-	palignr	\$8,%xmm6,%xmm0	
+	palignr	\$8,%xmm6,%xmm0
 	call	_vpaes_schedule_mangle	# save key n
 	call	_vpaes_schedule_192_smear
 	call	_vpaes_schedule_mangle	# save key n+1
@@ -385,7 +393,7 @@ _vpaes_schedule_core:
 	movdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
 	call	_vpaes_schedule_transform	# input transform
 	mov	\$7, %esi
-	
+
 .Loop_schedule_256:
 	call	_vpaes_schedule_mangle	# output low result
 	movdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
@@ -394,7 +402,7 @@ _vpaes_schedule_core:
 	call	_vpaes_schedule_round
 	dec	%rsi
 	jz 	.Lschedule_mangle_last
-	call	_vpaes_schedule_mangle	
+	call	_vpaes_schedule_mangle
 
 	# low round. swap xmm7 and xmm6
 	pshufd	\$0xFF,	%xmm0,	%xmm0
@@ -402,10 +410,10 @@ _vpaes_schedule_core:
 	movdqa	%xmm6,	%xmm7
 	call	_vpaes_schedule_low_round
 	movdqa	%xmm5,	%xmm7
-	
+
 	jmp	.Loop_schedule_256
 
-	
+
 ##
 ##  .aes_schedule_mangle_last
 ##
@@ -504,9 +512,9 @@ _vpaes_schedule_round:
 	# rotate
 	pshufd	\$0xFF,	%xmm0,	%xmm0
 	palignr	\$1,	%xmm0,	%xmm0
-	
+
 	# fall through...
-	
+
 	# low round: same as high round, but no rotation and no rcon.
 _vpaes_schedule_low_round:
 	# smear xmm7
@@ -545,7 +553,7 @@ _vpaes_schedule_low_round:
 	pxor	%xmm4, 	%xmm0		# 0 = sbox output
 
 	# add in smeared stuff
-	pxor	%xmm7,	%xmm0	
+	pxor	%xmm7,	%xmm0
 	movdqa	%xmm0,	%xmm7
 	ret
 .size	_vpaes_schedule_round,.-_vpaes_schedule_round