diff options
Diffstat (limited to 'crypto/bn/asm/vis3-mont.pl')
-rwxr-xr-x | crypto/bn/asm/vis3-mont.pl | 47 |
1 files changed, 29 insertions, 18 deletions
diff --git a/crypto/bn/asm/vis3-mont.pl b/crypto/bn/asm/vis3-mont.pl index 263ac02b6f45..04833a0c876d 100755 --- a/crypto/bn/asm/vis3-mont.pl +++ b/crypto/bn/asm/vis3-mont.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2012-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -9,7 +16,7 @@ # October 2012. # -# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and +# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and # onward. There are three new instructions used here: umulxhi, # addxc[cc] and initializing store. On T3 RSA private key operations # are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key @@ -18,16 +25,20 @@ # for reference purposes, because T4 has dedicated Montgomery # multiplication and squaring *instructions* that deliver even more. -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } +$output = pop; +open STDOUT,">$output"; + +$frame = "STACK_FRAME"; +$bias = "STACK_BIAS"; + +$code.=<<___; +#include "sparc_arch.h" -$code.=<<___ if ($bits==64); +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif + .section ".text",#alloc,#execinstr ___ @@ -299,23 +310,23 @@ $code.=<<___; sub $anp, $num, $anp sub $rp, $num, $rp - subc $ovf, %g0, $ovf ! handle upmost overflow bit - and $tp, $ovf, $ap - andn $rp, $ovf, $np - or $np, $ap, $ap ! ap=borrow?tp:rp + subccc $ovf, %g0, $ovf ! handle upmost overflow bit ba .Lcopy sub $num, 8, $cnt .align 16 -.Lcopy: ! copy or in-place refresh - ld [$ap+0], $t2 - ld [$ap+4], $t3 - add $ap, 8, $ap +.Lcopy: ! conditional copy + ld [$tp+0], $t0 + ld [$tp+4], $t1 + ld [$rp+0], $t2 + ld [$rp+4], $t3 stx %g0, [$tp] ! zap add $tp, 8, $tp stx %g0, [$anp] ! zap stx %g0, [$anp+8] add $anp, 16, $anp + movcs %icc, $t0, $t2 + movcs %icc, $t1, $t3 st $t3, [$rp+0] ! flip order st $t2, [$rp+4] add $rp, 8, $rp @@ -333,7 +344,7 @@ ___ # Purpose of these subroutines is to explicitly encode VIS instructions, # so that one can compile the module without having to specify VIS -# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. # Idea is to reserve for option to produce "universal" binary and let # programmer detect if current CPU is VIS capable at run-time. sub unvis3 { |