summaryrefslogtreecommitdiff
path: root/crypto/rc4/asm/rc4-md5-x86_64.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/rc4/asm/rc4-md5-x86_64.pl')
-rwxr-xr-xcrypto/rc4/asm/rc4-md5-x86_64.pl37
1 files changed, 33 insertions, 4 deletions
diff --git a/crypto/rc4/asm/rc4-md5-x86_64.pl b/crypto/rc4/asm/rc4-md5-x86_64.pl
index 272fa91e1a1e..74e5191051eb 100755
--- a/crypto/rc4/asm/rc4-md5-x86_64.pl
+++ b/crypto/rc4/asm/rc4-md5-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -29,16 +36,22 @@
# Core2 6.5 5.8 12.3 7.7 +60%
# Westmere 4.3 5.2 9.5 7.0 +36%
# Sandy Bridge 4.2 5.5 9.7 6.8 +43%
+# Ivy Bridge 4.1 5.2 9.3 6.0 +54%
+# Haswell 4.0 5.0 9.0 5.7 +60%
+# Skylake 6.3(**) 5.0 11.3 5.3 +110%
# Atom 9.3 6.5 15.8 11.1 +42%
+# VIA Nano 6.3 5.4 11.7 8.6 +37%
+# Bulldozer 4.5 5.4 9.9 7.7 +29%
#
# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
# is +53%...
+# (**) unidentified anomaly;
my ($rc4,$md5)=(1,1); # what to generate?
my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
# but its result is discarded. Idea here is
# to be able to use 'openssl speed rc4' for
- # benchmarking the stitched subroutine...
+ # benchmarking the stitched subroutine...
my $flavour = shift;
my $output = shift;
@@ -51,7 +64,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
@@ -111,15 +124,23 @@ $code.=<<___;
.globl $func
.type $func,\@function,$nargs
$func:
+.cfi_startproc
cmp \$0,$len
je .Labort
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$40,%rsp
+.cfi_adjust_cfa_offset 40
.Lbody:
___
if ($rc4) {
@@ -406,7 +427,7 @@ $code.=<<___ if ($rc4 && (!$md5 || $D));
and \$63,$len # remaining bytes
jnz .Loop1
jmp .Ldone
-
+
.align 16
.Loop1:
add $TX[0]#b,$YY#b
@@ -431,15 +452,23 @@ $code.=<<___;
#rc4# movl $YY#d,-4($dat)
mov 40(%rsp),%r15
+.cfi_restore %r15
mov 48(%rsp),%r14
+.cfi_restore %r14
mov 56(%rsp),%r13
+.cfi_restore %r13
mov 64(%rsp),%r12
+.cfi_restore %r12
mov 72(%rsp),%rbp
+.cfi_restore %rbp
mov 80(%rsp),%rbx
+.cfi_restore %rbx
lea 88(%rsp),%rsp
+.cfi_adjust_cfa_offset -88
.Lepilogue:
.Labort:
ret
+.cfi_endproc
.size $func,.-$func
___