summaryrefslogtreecommitdiff
path: root/crypto/bn/asm/ppc64-mont.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/ppc64-mont.pl')
-rwxr-xr-xcrypto/bn/asm/ppc64-mont.pl54
1 files changed, 39 insertions, 15 deletions
diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl
index 595fc6d31f60..c41b620bc23e 100755
--- a/crypto/bn/asm/ppc64-mont.pl
+++ b/crypto/bn/asm/ppc64-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -28,7 +35,7 @@
# key lengths. As it's obviously inappropriate as "best all-round"
# alternative, it has to be complemented with run-time CPU family
# detection. Oh! It should also be noted that unlike other PowerPC
-# implementation IALU ppc-mont.pl module performs *suboptimaly* on
+# implementation IALU ppc-mont.pl module performs *suboptimally* on
# >=1024-bit key lengths on Power 6. It should also be noted that
# *everything* said so far applies to 64-bit builds! As far as 32-bit
# application executed on 64-bit CPU goes, this module is likely to
@@ -1346,7 +1353,7 @@ $code.=<<___;
std $t3,-16($tp) ; tp[j-1]
std $t5,-8($tp) ; tp[j]
- add $carry,$carry,$ovf ; comsume upmost overflow
+ add $carry,$carry,$ovf ; consume upmost overflow
add $t6,$t6,$carry ; can not overflow
srdi $carry,$t6,16
add $t7,$t7,$carry
@@ -1494,16 +1501,14 @@ Lsub: ldx $t0,$tp,$i
li $i,0
subfe $ovf,$i,$ovf ; handle upmost overflow bit
- and $ap,$tp,$ovf
- andc $np,$rp,$ovf
- or $ap,$ap,$np ; ap=borrow?tp:rp
- addi $t7,$ap,8
mtctr $j
.align 4
-Lcopy: ; copy or in-place refresh
- ldx $t0,$ap,$i
- ldx $t1,$t7,$i
+Lcopy: ; conditional copy
+ ldx $t0,$tp,$i
+ ldx $t1,$t4,$i
+ ldx $t2,$rp,$i
+ ldx $t3,$t6,$i
std $i,8($nap_d) ; zap nap_d
std $i,16($nap_d)
std $i,24($nap_d)
@@ -1512,6 +1517,12 @@ Lcopy: ; copy or in-place refresh
std $i,48($nap_d)
std $i,56($nap_d)
stdu $i,64($nap_d)
+ and $t0,$t0,$ovf
+ and $t1,$t1,$ovf
+ andc $t2,$t2,$ovf
+ andc $t3,$t3,$ovf
+ or $t0,$t0,$t2
+ or $t1,$t1,$t3
stdx $t0,$rp,$i
stdx $t1,$t6,$i
stdx $i,$tp,$i ; zap tp at once
@@ -1554,20 +1565,21 @@ Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order
li $i,0
subfe $ovf,$i,$ovf ; handle upmost overflow bit
- addi $tp,$sp,`$FRAME+$TRANSFER+4`
+ addi $ap,$sp,`$FRAME+$TRANSFER+4`
subf $rp,$num,$rp ; rewind rp
- and $ap,$tp,$ovf
- andc $np,$rp,$ovf
- or $ap,$ap,$np ; ap=borrow?tp:rp
addi $tp,$sp,`$FRAME+$TRANSFER`
mtctr $j
.align 4
-Lcopy: ; copy or in-place refresh
+Lcopy: ; conditional copy
lwz $t0,4($ap)
lwz $t1,8($ap)
lwz $t2,12($ap)
lwzu $t3,16($ap)
+ lwz $t4,4($rp)
+ lwz $t5,8($rp)
+ lwz $t6,12($rp)
+ lwz $t7,16($rp)
std $i,8($nap_d) ; zap nap_d
std $i,16($nap_d)
std $i,24($nap_d)
@@ -1576,6 +1588,18 @@ Lcopy: ; copy or in-place refresh
std $i,48($nap_d)
std $i,56($nap_d)
stdu $i,64($nap_d)
+ and $t0,$t0,$ovf
+ and $t1,$t1,$ovf
+ and $t2,$t2,$ovf
+ and $t3,$t3,$ovf
+ andc $t4,$t4,$ovf
+ andc $t5,$t5,$ovf
+ andc $t6,$t6,$ovf
+ andc $t7,$t7,$ovf
+ or $t0,$t0,$t4
+ or $t1,$t1,$t5
+ or $t2,$t2,$t6
+ or $t3,$t3,$t7
stw $t0,4($rp)
stw $t1,8($rp)
stw $t2,12($rp)