diff options
author | Jung-uk Kim <jkim@FreeBSD.org> | 2017-12-07 18:02:57 +0000 |
---|---|---|
committer | Jung-uk Kim <jkim@FreeBSD.org> | 2017-12-07 18:02:57 +0000 |
commit | c4ad4dffb36f4c6a204680ed8d2adfbdb6210297 (patch) | |
tree | cd7bd81ef925aea7e7f5fd272e0538659f3d390f /crypto/openssl/crypto/bn | |
parent | 8f2246242eb7dddea36c7cda048774d7114ab051 (diff) | |
parent | 4f94f84d8491a2455678402b5c7c92e692a272bc (diff) | |
download | src-test2-c4ad4dffb36f4c6a204680ed8d2adfbdb6210297.tar.gz src-test2-c4ad4dffb36f4c6a204680ed8d2adfbdb6210297.zip |
Notes
Diffstat (limited to 'crypto/openssl/crypto/bn')
-rwxr-xr-x | crypto/openssl/crypto/bn/asm/rsaz-avx2.pl | 15 | ||||
-rw-r--r-- | crypto/openssl/crypto/bn/bn_exp.c | 8 |
2 files changed, 11 insertions, 12 deletions
diff --git a/crypto/openssl/crypto/bn/asm/rsaz-avx2.pl b/crypto/openssl/crypto/bn/asm/rsaz-avx2.pl index 712a77fe8ca3..2b3f8b0e21ec 100755 --- a/crypto/openssl/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/openssl/crypto/bn/asm/rsaz-avx2.pl @@ -239,7 +239,7 @@ $code.=<<___; vmovdqu 32*8-128($ap), $ACC8 lea 192(%rsp), $tp0 # 64+128=192 - vpbroadcastq .Land_mask(%rip), $AND_MASK + vmovdqu .Land_mask(%rip), $AND_MASK jmp .LOOP_GRANDE_SQR_1024 .align 32 @@ -1070,10 +1070,10 @@ $code.=<<___; vpmuludq 32*6-128($np),$Yi,$TEMP1 vpaddq $TEMP1,$ACC6,$ACC6 vpmuludq 32*7-128($np),$Yi,$TEMP2 - vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3 + vpblendd \$3, $ZERO, $ACC9, $TEMP1 # correct $ACC3 vpaddq $TEMP2,$ACC7,$ACC7 vpmuludq 32*8-128($np),$Yi,$TEMP0 - vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3 + vpaddq $TEMP1, $ACC3, $ACC3 # correct $ACC3 vpaddq $TEMP0,$ACC8,$ACC8 mov %rbx, %rax @@ -1086,7 +1086,9 @@ $code.=<<___; vmovdqu -8+32*2-128($ap),$TEMP2 mov $r1, %rax + vpblendd \$0xfc, $ZERO, $ACC9, $ACC9 # correct $ACC3 imull $n0, %eax + vpaddq $ACC9,$ACC4,$ACC4 # correct $ACC3 and \$0x1fffffff, %eax imulq 16-128($ap),%rbx @@ -1322,15 +1324,12 @@ ___ # But as we underutilize resources, it's possible to correct in # each iteration with marginal performance loss. But then, as # we do it in each iteration, we can correct less digits, and -# avoid performance penalties completely. Also note that we -# correct only three digits out of four. This works because -# most significant digit is subjected to less additions. +# avoid performance penalties completely. $TEMP0 = $ACC9; $TEMP3 = $Bi; $TEMP4 = $Yi; $code.=<<___; - vpermq \$0, $AND_MASK, $AND_MASK vpaddq (%rsp), $TEMP1, $ACC0 vpsrlq \$29, $ACC0, $TEMP1 @@ -1763,7 +1762,7 @@ $code.=<<___; .align 64 .Land_mask: - .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 + .quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff .Lscatter_permd: .long 0,2,4,6,7,7,7,7 .Lgather_permd: diff --git a/crypto/openssl/crypto/bn/bn_exp.c b/crypto/openssl/crypto/bn/bn_exp.c index 35facd213a25..c4b63e44ba36 100644 --- a/crypto/openssl/crypto/bn/bn_exp.c +++ b/crypto/openssl/crypto/bn/bn_exp.c @@ -149,7 +149,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) || BN_get_flags(a, BN_FLG_CONSTTIME) != 0) { /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return -1; + return 0; } BN_CTX_start(ctx); @@ -285,7 +285,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, || BN_get_flags(m, BN_FLG_CONSTTIME) != 0) { /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return -1; + return 0; } bits = BN_num_bits(p); @@ -1228,7 +1228,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, || BN_get_flags(m, BN_FLG_CONSTTIME) != 0) { /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return -1; + return 0; } bn_check_top(p); @@ -1361,7 +1361,7 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, || BN_get_flags(m, BN_FLG_CONSTTIME) != 0) { /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); - return -1; + return 0; } bits = BN_num_bits(p); |