diff options
Diffstat (limited to 'crypto/bn/asm/sparcv9-mont.pl')
-rwxr-xr-x | crypto/bn/asm/sparcv9-mont.pl | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl index d8662878006e..b41903af985f 100755 --- a/crypto/bn/asm/sparcv9-mont.pl +++ b/crypto/bn/asm/sparcv9-mont.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -13,7 +20,7 @@ # for undertaken effort are multiple. First of all, UltraSPARC is not # the whole SPARCv9 universe and other VIS-free implementations deserve # optimized code as much. Secondly, newly introduced UltraSPARC T1, -# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes, +# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths, # such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with # several integrated RSA/DSA accelerator circuits accessible through # kernel driver [only(*)], but having decent user-land software @@ -23,7 +30,7 @@ # instructions... # (*) Engine accessing the driver in question is on my TODO list. -# For reference, acceleator is estimated to give 6 to 10 times +# For reference, accelerator is estimated to give 6 to 10 times # improvement on single-threaded RSA sign. It should be noted # that 6-10x improvement coefficient does not actually mean # something extraordinary in terms of absolute [single-threaded] @@ -42,6 +49,9 @@ # module still have hidden potential [see TODO list there], which is # estimated to be larger than 20%... +$output = pop; +open STDOUT,">$output"; + # int bn_mul_mont( $rp="%i0"; # BN_ULONG *rp, $ap="%i1"; # const BN_ULONG *ap, @@ -50,10 +60,8 @@ $np="%i3"; # const BN_ULONG *np, $n0="%i4"; # const BN_ULONG *n0, $num="%i5"; # int num); -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=128; } +$frame="STACK_FRAME"; +$bias="STACK_BIAS"; $car0="%o0"; $car1="%o1"; @@ -76,6 +84,8 @@ $tpj="%l7"; $fname="bn_mul_mont_int"; $code=<<___; +#include "sparc_arch.h" + .section ".text",#alloc,#execinstr .global $fname @@ -105,7 +115,7 @@ $fname: ld [$np],$car1 ! np[0] sub %o7,$bias,%sp ! alloca ld [$np+4],$npj ! np[1] - be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont + be,pt SIZE_T_CC,.Lbn_sqr_mont mov 12,$j mulx $car0,$mul0,$car0 ! ap[0]*bp[0] @@ -255,7 +265,6 @@ $fname: .Ltail: add $np,$num,$np add $rp,$num,$rp - mov $tp,$ap sub %g0,$num,%o7 ! k=-num ba .Lsub subcc %g0,%g0,%g0 ! clear %icc.c @@ -268,15 +277,14 @@ $fname: add %o7,4,%o7 brnz %o7,.Lsub st %o1,[$i] - subc $car2,0,$car2 ! handle upmost overflow bit - and $tp,$car2,$ap - andn $rp,$car2,$np - or $ap,$np,$ap + subccc $car2,0,$car2 ! handle upmost overflow bit sub %g0,$num,%o7 .Lcopy: - ld [$ap+%o7],%o0 ! copy or in-place refresh + ld [$tp+%o7],%o1 ! conditional copy + ld [$rp+%o7],%o0 st %g0,[$tp+%o7] ! zap tp + movcs %icc,%o1,%o0 st %o0,[$rp+%o7] add %o7,4,%o7 brnz %o7,.Lcopy @@ -485,6 +493,9 @@ $code.=<<___; mulx $npj,$mul1,$acc1 add $tpj,$car1,$car1 ld [$np+$j],$npj ! np[j] + srlx $car1,32,$tmp0 + and $car1,$mask,$car1 + add $tmp0,$sbit,$sbit add $acc0,$car1,$car1 ld [$tp+8],$tpj ! tp[j] add $acc1,$car1,$car1 @@ -601,7 +612,7 @@ $code.=<<___; add $tp,8,$tp .type $fname,#function .size $fname,(.-$fname) -.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" +.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" .align 32 ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; |