summaryrefslogtreecommitdiff
path: root/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/Makefile9
-rw-r--r--crypto/bn/asm/ia64.S35
-rwxr-xr-xcrypto/bn/asm/mo-586.pl603
-rwxr-xr-xcrypto/bn/asm/x86_64-mont.pl214
-rw-r--r--crypto/bn/bn.h25
-rw-r--r--crypto/bn/bn_blind.c12
-rw-r--r--crypto/bn/bn_div.c249
-rw-r--r--crypto/bn/bn_err.c4
-rw-r--r--crypto/bn/bn_exp.c20
-rw-r--r--crypto/bn/bn_gcd.c161
-rw-r--r--crypto/bn/bn_gf2m.c6
-rw-r--r--crypto/bn/bn_lcl.h1
-rw-r--r--crypto/bn/bn_lib.c2
-rw-r--r--crypto/bn/bn_mont.c321
-rw-r--r--crypto/bn/bn_mul.c15
-rw-r--r--crypto/bn/bn_nist.c653
-rw-r--r--crypto/bn/bn_prime.c4
-rw-r--r--crypto/bn/bn_prime.h4
-rw-r--r--crypto/bn/bn_prime.pl4
-rw-r--r--crypto/bn/bntest.c60
20 files changed, 1981 insertions, 421 deletions
diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile
index 5c3e08fa8085..0491e3db4c47 100644
--- a/crypto/bn/Makefile
+++ b/crypto/bn/Makefile
@@ -67,16 +67,22 @@ bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > ../$@)
co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) co-586.pl elf $(CFLAGS) > ../$@)
+mo86-elf.s: asm/mo-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) mo-586.pl elf $(CFLAGS) > ../$@)
# COFF
bn86-cof.s: asm/bn-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) bn-586.pl coff $(CFLAGS) > ../$@)
co86-cof.s: asm/co-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) co-586.pl coff $(CFLAGS) > ../$@)
+mo86-cof.s: asm/mo-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) mo-586.pl coff $(CFLAGS) > ../$@)
# a.out
bn86-out.s: asm/bn-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) bn-586.pl a.out $(CFLAGS) > ../$@)
co86-out.s: asm/co-586.pl ../perlasm/x86asm.pl
(cd asm; $(PERL) co-586.pl a.out $(CFLAGS) > ../$@)
+mo86-out.s: asm/mo-586.pl ../perlasm/x86asm.pl
+ (cd asm; $(PERL) mo-586.pl a.out $(CFLAGS) > ../$@)
sparcv8.o: asm/sparcv8.S
$(CC) $(CFLAGS) -c asm/sparcv8.S
@@ -91,6 +97,8 @@ bn-mips3.o: asm/mips3.s
x86_64-gcc.o: asm/x86_64-gcc.c
$(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c
+x86_64-mont.s: asm/x86_64-mont.pl
+ $(PERL) asm/x86_64-mont.pl $@
bn-ia64.s: asm/ia64.S
$(CC) $(CFLAGS) -E asm/ia64.S > $@
@@ -108,6 +116,7 @@ linux_ppc64.s: asm/ppc.pl; $(PERL) $< $@
aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
osx_ppc32.s: asm/ppc.pl; $(PERL) $< $@
+osx_ppc64.s: asm/ppc.pl; $(PERL) $< $@
files:
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S
index 7b82b820e62a..951abc53ea5b 100644
--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -171,21 +171,21 @@
.skip 32 // makes the loop body aligned at 64-byte boundary
bn_add_words:
.prologue
- .fframe 0
.save ar.pfs,r2
{ .mii; alloc r2=ar.pfs,4,12,0,16
cmp4.le p6,p0=r35,r0 };;
{ .mfb; mov r8=r0 // return value
(p6) br.ret.spnt.many b0 };;
- .save ar.lc,r3
{ .mib; sub r10=r35,r0,1
+ .save ar.lc,r3
mov r3=ar.lc
brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
}
- .body
{ .mib; ADDP r14=0,r32 // rp
+ .save pr,r9
mov r9=pr };;
+ .body
{ .mii; ADDP r15=0,r33 // ap
mov ar.lc=r10
mov ar.ec=6 }
@@ -224,21 +224,21 @@ bn_add_words:
.skip 32 // makes the loop body aligned at 64-byte boundary
bn_sub_words:
.prologue
- .fframe 0
.save ar.pfs,r2
{ .mii; alloc r2=ar.pfs,4,12,0,16
cmp4.le p6,p0=r35,r0 };;
{ .mfb; mov r8=r0 // return value
(p6) br.ret.spnt.many b0 };;
- .save ar.lc,r3
{ .mib; sub r10=r35,r0,1
+ .save ar.lc,r3
mov r3=ar.lc
brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
}
- .body
{ .mib; ADDP r14=0,r32 // rp
+ .save pr,r9
mov r9=pr };;
+ .body
{ .mii; ADDP r15=0,r33 // ap
mov ar.lc=r10
mov ar.ec=6 }
@@ -283,7 +283,6 @@ bn_sub_words:
.skip 32 // makes the loop body aligned at 64-byte boundary
bn_mul_words:
.prologue
- .fframe 0
.save ar.pfs,r2
#ifdef XMA_TEMPTATION
{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
@@ -294,9 +293,10 @@ bn_mul_words:
cmp4.le p6,p0=r34,r0
(p6) br.ret.spnt.many b0 };;
- .save ar.lc,r3
{ .mii; sub r10=r34,r0,1
+ .save ar.lc,r3
mov r3=ar.lc
+ .save pr,r9
mov r9=pr };;
.body
@@ -397,22 +397,21 @@ bn_mul_words:
.skip 48 // makes the loop body aligned at 64-byte boundary
bn_mul_add_words:
.prologue
- .fframe 0
.save ar.pfs,r2
- .save ar.lc,r3
- .save pr,r9
{ .mmi; alloc r2=ar.pfs,4,4,0,8
cmp4.le p6,p0=r34,r0
+ .save ar.lc,r3
mov r3=ar.lc };;
{ .mib; mov r8=r0 // return value
sub r10=r34,r0,1
(p6) br.ret.spnt.many b0 };;
- .body
{ .mib; setf.sig f8=r35 // w
+ .save pr,r9
mov r9=pr
brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
}
+ .body
{ .mmi; ADDP r14=0,r32 // rp
ADDP r15=0,r33 // ap
mov ar.lc=r10 }
@@ -466,7 +465,6 @@ bn_mul_add_words:
.skip 32 // makes the loop body aligned at 64-byte boundary
bn_sqr_words:
.prologue
- .fframe 0
.save ar.pfs,r2
{ .mii; alloc r2=ar.pfs,3,0,0,0
sxt4 r34=r34 };;
@@ -476,9 +474,10 @@ bn_sqr_words:
nop.f 0x0
(p6) br.ret.spnt.many b0 };;
- .save ar.lc,r3
{ .mii; sub r10=r34,r0,1
+ .save ar.lc,r3
mov r3=ar.lc
+ .save pr,r9
mov r9=pr };;
.body
@@ -545,7 +544,6 @@ bn_sqr_words:
.align 64
bn_sqr_comba8:
.prologue
- .fframe 0
.save ar.pfs,r2
#if defined(_HPUX_SOURCE) && !defined(_LP64)
{ .mii; alloc r2=ar.pfs,2,1,0,0
@@ -617,7 +615,6 @@ bn_sqr_comba8:
.align 64
bn_mul_comba8:
.prologue
- .fframe 0
.save ar.pfs,r2
#if defined(_HPUX_SOURCE) && !defined(_LP64)
{ .mii; alloc r2=ar.pfs,3,0,0,0
@@ -1175,7 +1172,6 @@ bn_mul_comba8:
.align 64
bn_sqr_comba4:
.prologue
- .fframe 0
.save ar.pfs,r2
#if defined(_HPUX_SOURCE) && !defined(_LP64)
{ .mii; alloc r2=ar.pfs,2,1,0,0
@@ -1208,7 +1204,6 @@ bn_sqr_comba4:
.align 64
bn_mul_comba4:
.prologue
- .fframe 0
.save ar.pfs,r2
#if defined(_HPUX_SOURCE) && !defined(_LP64)
{ .mii; alloc r2=ar.pfs,3,0,0,0
@@ -1411,11 +1406,11 @@ equ=p24
.align 64
bn_div_words:
.prologue
- .fframe 0
.save ar.pfs,r2
- .save b0,r3
{ .mii; alloc r2=ar.pfs,3,5,0,8
+ .save b0,r3
mov r3=b0
+ .save pr,r10
mov r10=pr };;
{ .mmb; cmp.eq p6,p0=r34,r0
mov r8=-1
diff --git a/crypto/bn/asm/mo-586.pl b/crypto/bn/asm/mo-586.pl
new file mode 100755
index 000000000000..0982293094d9
--- /dev/null
+++ b/crypto/bn/asm/mo-586.pl
@@ -0,0 +1,603 @@
+#!/usr/bin/env perl
+
+# This is crypto/bn/asm/x86-mont.pl (with asciz from crypto/perlasm/x86asm.pl)
+# from OpenSSL 0.9.9-dev
+
+sub ::asciz
+{ my @str=unpack("C*",shift);
+ push @str,0;
+ while ($#str>15) {
+ &data_byte(@str[0..15]);
+ foreach (0..15) { shift @str; }
+ }
+ &data_byte(@str) if (@str);
+}
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005
+#
+# This is a "teaser" code, as it can be improved in several ways...
+# First of all non-SSE2 path should be implemented (yes, for now it
+# performs Montgomery multiplication/convolution only on SSE2-capable
+# CPUs such as P4, others fall down to original code). Then inner loop
+# can be unrolled and modulo-scheduled to improve ILP and possibly
+# moved to 128-bit XMM register bank (though it would require input
+# rearrangement and/or increase bus bandwidth utilization). Dedicated
+# squaring procedure should give further performance improvement...
+# Yet, for being draft, the code improves rsa512 *sign* benchmark by
+# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
+
+# December 2006
+#
+# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
+# Integer-only code [being equipped with dedicated squaring procedure]
+# gives ~40% on rsa512 sign benchmark...
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+&function_begin("bn_mul_mont");
+
+$i="edx";
+$j="ecx";
+$ap="esi"; $tp="esi"; # overlapping variables!!!
+$rp="edi"; $bp="edi"; # overlapping variables!!!
+$np="ebp";
+$num="ebx";
+
+$_num=&DWP(4*0,"esp"); # stack top layout
+$_rp=&DWP(4*1,"esp");
+$_ap=&DWP(4*2,"esp");
+$_bp=&DWP(4*3,"esp");
+$_np=&DWP(4*4,"esp");
+$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
+$_sp=&DWP(4*6,"esp");
+$_bpend=&DWP(4*7,"esp");
+$frame=32; # size of above frame rounded up to 16n
+
+ &xor ("eax","eax");
+ &mov ("edi",&wparam(5)); # int num
+ &cmp ("edi",4);
+ &jl (&label("just_leave"));
+
+ &lea ("esi",&wparam(0)); # put aside pointer to argument block
+ &lea ("edx",&wparam(1)); # load ap
+ &mov ("ebp","esp"); # saved stack pointer!
+ &add ("edi",2); # extra two words on top of tp
+ &neg ("edi");
+ &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
+ &neg ("edi");
+
+ # minimize cache contention by arraning 2K window between stack
+ # pointer and ap argument [np is also position sensitive vector,
+ # but it's assumed to be near ap, as it's allocated at ~same
+ # time].
+ &mov ("eax","esp");
+ &sub ("eax","edx");
+ &and ("eax",2047);
+ &sub ("esp","eax"); # this aligns sp and ap modulo 2048
+
+ &xor ("edx","esp");
+ &and ("edx",2048);
+ &xor ("edx",2048);
+ &sub ("esp","edx"); # this splits them apart modulo 4096
+
+ &and ("esp",-64); # align to cache line
+
+ ################################# load argument block...
+ &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
+ &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
+ &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
+ &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
+ &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
+ #&mov ("edi",&DWP(5*4,"esi"));# int num
+
+ &mov ("esi",&DWP(0,"esi")); # pull n0[0]
+ &mov ($_rp,"eax"); # ... save a copy of argument block
+ &mov ($_ap,"ebx");
+ &mov ($_bp,"ecx");
+ &mov ($_np,"edx");
+ &mov ($_n0,"esi");
+ &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
+ #&mov ($_num,$num); # redundant as $num is not reused
+ &mov ($_sp,"ebp"); # saved stack pointer!
+
+if($sse2) {
+$acc0="mm0"; # mmx register bank layout
+$acc1="mm1";
+$car0="mm2";
+$car1="mm3";
+$mul0="mm4";
+$mul1="mm5";
+$temp="mm6";
+$mask="mm7";
+
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt (&DWP(0,"eax"),26);
+ &jnc (&label("non_sse2"));
+
+ &mov ("eax",-1);
+ &movd ($mask,"eax"); # mask 32 lower bits
+
+ &mov ($ap,$_ap); # load input pointers
+ &mov ($bp,$_bp);
+ &mov ($np,$_np);
+
+ &xor ($i,$i); # i=0
+ &xor ($j,$j); # j=0
+
+ &movd ($mul0,&DWP(0,$bp)); # bp[0]
+ &movd ($mul1,&DWP(0,$ap)); # ap[0]
+ &movd ($car1,&DWP(0,$np)); # np[0]
+
+ &pmuludq($mul1,$mul0); # ap[0]*bp[0]
+ &movq ($car0,$mul1);
+ &movq ($acc0,$mul1); # I wish movd worked for
+ &pand ($acc0,$mask); # inter-register transfers
+
+ &pmuludq($mul1,$_n0q); # *=n0
+
+ &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
+ &paddq ($car1,$acc0);
+
+ &movd ($acc1,&DWP(4,$np)); # np[1]
+ &movd ($acc0,&DWP(4,$ap)); # ap[1]
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &inc ($j); # j++
+&set_label("1st",16);
+ &pmuludq($acc0,$mul0); # ap[j]*bp[0]
+ &pmuludq($acc1,$mul1); # np[j]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
+ &paddq ($car1,$acc0); # +=ap[j]*bp[0];
+ &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
+ &psrlq ($car0,32);
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
+ &psrlq ($car1,32);
+
+ &lea ($j,&DWP(1,$j));
+ &cmp ($j,$num);
+ &jl (&label("1st"));
+
+ &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
+ &pmuludq($acc1,$mul1); # np[num-1]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &paddq ($car1,$car0);
+ &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
+
+ &inc ($i); # i++
+&set_label("outer");
+ &xor ($j,$j); # j=0
+
+ &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
+ &movd ($mul1,&DWP(0,$ap)); # ap[0]
+ &movd ($temp,&DWP($frame,"esp")); # tp[0]
+ &movd ($car1,&DWP(0,$np)); # np[0]
+ &pmuludq($mul1,$mul0); # ap[0]*bp[i]
+
+ &paddq ($mul1,$temp); # +=tp[0]
+ &movq ($acc0,$mul1);
+ &movq ($car0,$mul1);
+ &pand ($acc0,$mask);
+
+ &pmuludq($mul1,$_n0q); # *=n0
+
+ &pmuludq($car1,$mul1);
+ &paddq ($car1,$acc0);
+
+ &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
+ &movd ($acc1,&DWP(4,$np)); # np[1]
+ &movd ($acc0,&DWP(4,$ap)); # ap[1]
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+ &paddq ($car0,$temp); # +=tp[1]
+
+ &inc ($j); # j++
+ &dec ($num);
+&set_label("inner");
+ &pmuludq($acc0,$mul0); # ap[j]*bp[i]
+ &pmuludq($acc1,$mul1); # np[j]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
+ &pand ($acc0,$mask);
+ &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
+ &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
+ &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
+ &psrlq ($car0,32);
+ &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
+ &psrlq ($car1,32);
+ &paddq ($car0,$temp); # +=tp[j+1]
+
+ &dec ($num);
+ &lea ($j,&DWP(1,$j)); # j++
+ &jnz (&label("inner"));
+
+ &mov ($num,$j);
+ &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
+ &pmuludq($acc1,$mul1); # np[num-1]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
+ &paddq ($car1,$car0);
+ &paddq ($car1,$temp);
+ &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
+
+ &lea ($i,&DWP(1,$i)); # i++
+ &cmp ($i,$num);
+ &jle (&label("outer"));
+
+ &emms (); # done with mmx bank
+ &jmp (&label("common_tail"));
+
+&set_label("non_sse2",16);
+}
+
+if (0) {
+ &mov ("esp",$_sp);
+ &xor ("eax","eax"); # signal "not fast enough [yet]"
+ &jmp (&label("just_leave"));
+ # While the below code provides competitive performance for
+ # all key lengthes on modern Intel cores, it's still more
+ # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
+ # means compared to the original integer-only assembler.
+ # 512-bit RSA sign is better by ~40%, but that's about all
+ # one can say about all CPUs...
+} else {
+$inp="esi"; # integer path uses these registers differently
+$word="edi";
+$carry="ebp";
+
+ &mov ($inp,$_ap);
+ &lea ($carry,&DWP(1,$num));
+ &mov ($word,$_bp);
+ &xor ($j,$j); # j=0
+ &mov ("edx",$inp);
+ &and ($carry,1); # see if num is even
+ &sub ("edx",$word); # see if ap==bp
+ &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
+ &or ($carry,"edx");
+ &mov ($word,&DWP(0,$word)); # bp[0]
+ &jz (&label("bn_sqr_mont"));
+ &mov ($_bpend,"eax");
+ &mov ("eax",&DWP(0,$inp));
+ &xor ("edx","edx");
+
+&set_label("mull",16);
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*bp[0]
+ &add ($carry,"eax");
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
+ &cmp ($j,$num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("mull"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*bp[0]
+ &mov ($word,$_n0);
+ &add ("eax",$carry);
+ &mov ($inp,$_np);
+ &adc ("edx",0);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
+ &xor ($j,$j);
+ &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
+
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &adc ("edx",0);
+ &inc ($j);
+
+ &jmp (&label("2ndmadd"));
+
+&set_label("1stmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*bp[i]
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("1stmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*bp[i]
+ &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &mov ($word,$_n0);
+ &adc ("edx",0);
+ &mov ($inp,$_np);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &xor ($j,$j);
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
+ &adc ($j,0);
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &adc ("edx",0);
+ &mov ($j,1);
+
+&set_label("2ndmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
+ &jl (&label("2ndmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
+
+ &xor ("eax","eax");
+ &mov ($j,$_bp); # &bp[i]
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
+ &lea ($j,&DWP(4,$j));
+ &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
+ &cmp ($j,$_bpend);
+ &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
+ &je (&label("common_tail"));
+
+ &mov ($word,&DWP(0,$j)); # bp[i+1]
+ &mov ($inp,$_ap);
+ &mov ($_bp,$j); # &bp[++i]
+ &xor ($j,$j);
+ &xor ("edx","edx");
+ &mov ("eax",&DWP(0,$inp));
+ &jmp (&label("1stmadd"));
+
+&set_label("bn_sqr_mont",16);
+$sbit=$num;
+ &mov ($_num,$num);
+ &mov ($_bp,$j); # i=0
+
+ &mov ("eax",$word); # ap[0]
+ &mul ($word); # ap[0]*ap[0]
+ &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
+ &mov ($sbit,"edx");
+ &shr ("edx",1);
+ &and ($sbit,1);
+ &inc ($j);
+&set_label("sqr",16);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*ap[0]
+ &add ("eax",$carry);
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &lea ($carry,&DWP(0,$sbit,"eax",2));
+ &shr ("eax",31);
+ &cmp ($j,$_num);
+ &mov ($sbit,"eax");
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("sqr"));
+
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*ap[0]
+ &add ("eax",$carry);
+ &mov ($word,$_n0);
+ &adc ("edx",0);
+ &mov ($inp,$_np);
+ &lea ($carry,&DWP(0,$sbit,"eax",2));
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+ &shr ("eax",31);
+ &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
+
+ &lea ($carry,&DWP(0,"eax","edx",2));
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &shr ("edx",31);
+ &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ($num,$j);
+ &adc ("edx",0);
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &mov ($j,1);
+
+&set_label("3rdmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j+1]*m
+ &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
+ &lea ($j,&DWP(2,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("3rdmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
+
+ &mov ($j,$_bp); # i
+ &xor ("eax","eax");
+ &mov ($inp,$_ap);
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
+ &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
+ &cmp ($j,$num);
+ &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
+ &je (&label("common_tail"));
+
+ &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
+ &lea ($j,&DWP(1,$j));
+ &mov ("eax",$word);
+ &mov ($_bp,$j); # ++i
+ &mul ($word); # ap[i]*ap[i]
+ &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
+ &adc ("edx",0);
+ &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
+ &xor ($carry,$carry);
+ &cmp ($j,$num);
+ &lea ($j,&DWP(1,$j));
+ &je (&label("sqrlast"));
+
+ &mov ($sbit,"edx"); # zaps $num
+ &shr ("edx",1);
+ &and ($sbit,1);
+&set_label("sqradd",16);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*ap[i]
+ &add ("eax",$carry);
+ &lea ($carry,&DWP(0,"eax","eax"));
+ &adc ("edx",0);
+ &shr ("eax",31);
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("eax",0);
+ &add ($carry,$sbit);
+ &adc ("eax",0);
+ &cmp ($j,$_num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &mov ($sbit,"eax");
+ &jle (&label("sqradd"));
+
+ &mov ($carry,"edx");
+ &lea ("edx",&DWP(0,$sbit,"edx",2));
+ &shr ($carry,31);
+&set_label("sqrlast");
+ &mov ($word,$_n0);
+ &mov ($inp,$_np);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &adc ($carry,0);
+ &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &lea ($num,&DWP(-1,$j));
+ &adc ("edx",0);
+ &mov ($j,1);
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+
+ &jmp (&label("3rdmadd"));
+}
+
+&set_label("common_tail",16);
+ &mov ($np,$_np); # load modulus pointer
+ &mov ($rp,$_rp); # load result pointer
+ &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
+
+ &mov ("eax",&DWP(0,$tp)); # tp[0]
+ &mov ($j,$num); # j=num-1
+ &xor ($i,$i); # i=0 and clear CF!
+
+&set_label("sub",16);
+ &sbb ("eax",&DWP(0,$np,$i,4));
+ &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
+ &dec ($j); # doesn't affect CF!
+ &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
+ &lea ($i,&DWP(1,$i)); # i++
+ &jge (&label("sub"));
+
+ &sbb ("eax",0); # handle upmost overflow bit
+ &and ($tp,"eax");
+ &not ("eax");
+ &mov ($np,$rp);
+ &and ($np,"eax");
+ &or ($tp,$np); # tp=carry?tp:rp
+
+&set_label("copy",16); # copy or in-place refresh
+ &mov ("eax",&DWP(0,$tp,$num,4));
+ &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
+ &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
+ &dec ($num);
+ &jge (&label("copy"));
+
+ &mov ("esp",$_sp); # pull saved stack pointer
+ &mov ("eax",1);
+&set_label("just_leave");
+&function_end("bn_mul_mont");
+
+&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
new file mode 100755
index 000000000000..c43b69592a5c
--- /dev/null
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -0,0 +1,214 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005.
+#
+# Montgomery multiplication routine for x86_64. While it gives modest
+# 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more
+# than twice, >2x, as fast. Most common rsa1024 sign is improved by
+# respectful 50%. It remains to be seen if loop unrolling and
+# dedicated squaring routine can provide further improvement...
+
+$output=shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open STDOUT,"| $^X $xlate $output";
+
+# int bn_mul_mont(
+$rp="%rdi"; # BN_ULONG *rp,
+$ap="%rsi"; # const BN_ULONG *ap,
+$bp="%rdx"; # const BN_ULONG *bp,
+$np="%rcx"; # const BN_ULONG *np,
+$n0="%r8"; # const BN_ULONG *n0,
+$num="%r9"; # int num);
+$lo0="%r10";
+$hi0="%r11";
+$bp="%r12"; # reassign $bp
+$hi1="%r13";
+$i="%r14";
+$j="%r15";
+$m0="%rbx";
+$m1="%rbp";
+
+$code=<<___;
+.text
+
+.globl bn_mul_mont
+.type bn_mul_mont,\@function,6
+.align 16
+bn_mul_mont:
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov ${num}d,${num}d
+ lea 2($num),%rax
+ mov %rsp,%rbp
+ neg %rax
+ lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2))
+ and \$-1024,%rsp # minimize TLB usage
+
+ mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
+ mov %rdx,$bp # $bp reassigned, remember?
+
+ mov ($n0),$n0 # pull n0[0] value
+
+ xor $i,$i # i=0
+ xor $j,$j # j=0
+
+ mov ($bp),$m0 # m0=bp[0]
+ mov ($ap),%rax
+ mulq $m0 # ap[0]*bp[0]
+ mov %rax,$lo0
+ mov %rdx,$hi0
+
+ imulq $n0,%rax # "tp[0]"*n0
+ mov %rax,$m1
+
+ mulq ($np) # np[0]*m1
+ add $lo0,%rax # discarded
+ adc \$0,%rdx
+ mov %rdx,$hi1
+
+ lea 1($j),$j # j++
+.L1st:
+ mov ($ap,$j,8),%rax
+ mulq $m0 # ap[j]*bp[0]
+ add $hi0,%rax
+ adc \$0,%rdx
+ mov %rax,$lo0
+ mov ($np,$j,8),%rax
+ mov %rdx,$hi0
+
+ mulq $m1 # np[j]*m1
+ add $hi1,%rax
+ lea 1($j),$j # j++
+ adc \$0,%rdx
+ add $lo0,%rax # np[j]*m1+ap[j]*bp[0]
+ adc \$0,%rdx
+ mov %rax,-16(%rsp,$j,8) # tp[j-1]
+ cmp $num,$j
+ mov %rdx,$hi1
+ jl .L1st
+
+ xor %rdx,%rdx
+ add $hi0,$hi1
+ adc \$0,%rdx
+ mov $hi1,-8(%rsp,$num,8)
+ mov %rdx,(%rsp,$num,8) # store upmost overflow bit
+
+ lea 1($i),$i # i++
+.align 4
+.Louter:
+ xor $j,$j # j=0
+
+ mov ($bp,$i,8),$m0 # m0=bp[i]
+ mov ($ap),%rax # ap[0]
+ mulq $m0 # ap[0]*bp[i]
+ add (%rsp),%rax # ap[0]*bp[i]+tp[0]
+ adc \$0,%rdx
+ mov %rax,$lo0
+ mov %rdx,$hi0
+
+ imulq $n0,%rax # tp[0]*n0
+ mov %rax,$m1
+
+ mulq ($np,$j,8) # np[0]*m1
+ add $lo0,%rax # discarded
+ mov 8(%rsp),$lo0 # tp[1]
+ adc \$0,%rdx
+ mov %rdx,$hi1
+
+ lea 1($j),$j # j++
+.align 4
+.Linner:
+ mov ($ap,$j,8),%rax
+ mulq $m0 # ap[j]*bp[i]
+ add $hi0,%rax
+ adc \$0,%rdx
+ add %rax,$lo0 # ap[j]*bp[i]+tp[j]
+ mov ($np,$j,8),%rax
+ adc \$0,%rdx
+ mov %rdx,$hi0
+
+ mulq $m1 # np[j]*m1
+ add $hi1,%rax
+ lea 1($j),$j # j++
+ adc \$0,%rdx
+ add $lo0,%rax # np[j]*m1+ap[j]*bp[i]+tp[j]
+ adc \$0,%rdx
+ mov (%rsp,$j,8),$lo0
+ cmp $num,$j
+ mov %rax,-16(%rsp,$j,8) # tp[j-1]
+ mov %rdx,$hi1
+ jl .Linner
+
+ xor %rdx,%rdx
+ add $hi0,$hi1
+ adc \$0,%rdx
+ add $lo0,$hi1 # pull upmost overflow bit
+ adc \$0,%rdx
+ mov $hi1,-8(%rsp,$num,8)
+ mov %rdx,(%rsp,$num,8) # store upmost overflow bit
+
+ lea 1($i),$i # i++
+ cmp $num,$i
+ jl .Louter
+
+ lea (%rsp),$ap # borrow ap for tp
+ lea -1($num),$j # j=num-1
+
+ mov ($ap),%rax # tp[0]
+ xor $i,$i # i=0 and clear CF!
+ jmp .Lsub
+.align 16
+.Lsub: sbb ($np,$i,8),%rax
+ mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
+ dec $j # doesn't affect CF!
+ mov 8($ap,$i,8),%rax # tp[i+1]
+ lea 1($i),$i # i++
+ jge .Lsub
+
+ sbb \$0,%rax # handle upmost overflow bit
+ and %rax,$ap
+ not %rax
+ mov $rp,$np
+ and %rax,$np
+ lea -1($num),$j
+ or $np,$ap # ap=borrow?tp:rp
+.align 16
+.Lcopy: # copy or in-place refresh
+ mov ($ap,$j,8),%rax
+ mov %rax,($rp,$j,8) # rp[i]=tp[i]
+ mov $i,(%rsp,$j,8) # zap temporary vector
+ dec $j
+ jge .Lcopy
+
+ mov 8(%rsp,$num,8),%rsp # restore %rsp
+ mov \$1,%rax
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+.size bn_mul_mont,.-bn_mul_mont
+.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 95c5d643cbd1..6d754d554776 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -245,8 +245,18 @@ extern "C" {
#define BN_FLG_MALLOCED 0x01
#define BN_FLG_STATIC_DATA 0x02
-#define BN_FLG_EXP_CONSTTIME 0x04 /* avoid leaking exponent information through timings
- * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
+#define BN_FLG_CONSTTIME 0x04 /* avoid leaking exponent information through timing,
+ * BN_mod_exp_mont() will call BN_mod_exp_mont_consttime,
+ * BN_div() will call BN_div_no_branch,
+ * BN_mod_inverse() will call BN_mod_inverse_no_branch.
+ */
+
+#ifndef OPENSSL_NO_DEPRECATED
+#define BN_FLG_EXP_CONSTTIME BN_FLG_CONSTTIME /* deprecated name for the flag */
+ /* avoid leaking exponent information through timings
+ * (BN_mod_exp_mont() will call BN_mod_exp_mont_consttime) */
+#endif
+
#ifndef OPENSSL_NO_DEPRECATED
#define BN_FLG_FREE 0x8000 /* used for debuging */
#endif
@@ -293,7 +303,12 @@ struct bn_mont_ctx_st
BIGNUM N; /* The modulus */
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
* (Ni is only stored for bignum algorithm) */
+#if 0
+ /* OpenSSL 0.9.9 preview: */
+ BN_ULONG n0[2];/* least significant word(s) of Ni */
+#else
BN_ULONG n0; /* least significant word of Ni */
+#endif
int flags;
};
@@ -534,7 +549,7 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
#define BN_BLINDING_NO_UPDATE 0x00000001
#define BN_BLINDING_NO_RECREATE 0x00000002
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod);
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod);
void BN_BLINDING_free(BN_BLINDING *b);
int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
@@ -546,7 +561,7 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long);
unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);
void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
- const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
+ const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx);
@@ -775,6 +790,7 @@ void ERR_load_BN_strings(void);
#define BN_F_BN_CTX_NEW 106
#define BN_F_BN_CTX_START 129
#define BN_F_BN_DIV 107
+#define BN_F_BN_DIV_NO_BRANCH 138
#define BN_F_BN_DIV_RECP 130
#define BN_F_BN_EXP 123
#define BN_F_BN_EXPAND2 108
@@ -793,6 +809,7 @@ void ERR_load_BN_strings(void);
#define BN_F_BN_MOD_EXP_RECP 125
#define BN_F_BN_MOD_EXP_SIMPLE 126
#define BN_F_BN_MOD_INVERSE 110
+#define BN_F_BN_MOD_INVERSE_NO_BRANCH 139
#define BN_F_BN_MOD_LSHIFT_QUICK 119
#define BN_F_BN_MOD_MUL_RECIPROCAL 111
#define BN_F_BN_MOD_SQRT 121
diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
index ca22d4f8bdcb..c11fb4ccc2d3 100644
--- a/crypto/bn/bn_blind.c
+++ b/crypto/bn/bn_blind.c
@@ -131,7 +131,7 @@ struct bn_blinding_st
BN_MONT_CTX *m_ctx);
};
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod)
{
BN_BLINDING *ret=NULL;
@@ -151,7 +151,12 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
{
if ((ret->Ai = BN_dup(Ai)) == NULL) goto err;
}
- ret->mod = mod;
+
+ /* save a copy of mod in the BN_BLINDING structure */
+ if ((ret->mod = BN_dup(mod)) == NULL) goto err;
+ if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+ BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
+
ret->counter = BN_BLINDING_COUNTER;
return(ret);
err:
@@ -167,6 +172,7 @@ void BN_BLINDING_free(BN_BLINDING *r)
if (r->A != NULL) BN_free(r->A );
if (r->Ai != NULL) BN_free(r->Ai);
if (r->e != NULL) BN_free(r->e );
+ if (r->mod != NULL) BN_free(r->mod);
OPENSSL_free(r);
}
@@ -278,7 +284,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
}
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
- const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
+ const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx)
diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c
index 2857f44861a7..1e8e57626be3 100644
--- a/crypto/bn/bn_div.c
+++ b/crypto/bn/bn_div.c
@@ -169,13 +169,15 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
#endif /* OPENSSL_NO_ASM */
-/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
- * rm such that dv*divisor + rm = num holds.
+/* BN_div[_no_branch] computes dv := num / divisor, rounding towards
+ * zero, and sets up rm such that dv*divisor + rm = num holds.
* Thus:
* dv->neg == num->neg ^ divisor->neg (unless the result is zero)
* rm->neg == num->neg (unless the remainder is zero)
* If 'dv' or 'rm' is NULL, the respective value is not returned.
*/
+static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
+ const BIGNUM *divisor, BN_CTX *ctx);
int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
BN_CTX *ctx)
{
@@ -185,9 +187,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
BN_ULONG d0,d1;
int num_n,div_n;
+ /* Invalid zero-padding would have particularly bad consequences
+ * in the case of 'num', so don't just rely on bn_check_top() for this one
+ * (bn_check_top() works only for BN_DEBUG builds) */
+ if (num->top > 0 && num->d[num->top - 1] == 0)
+ {
+ BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED);
+ return 0;
+ }
+
+ bn_check_top(num);
+
+ if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
+ {
+ return BN_div_no_branch(dv, rm, num, divisor, ctx);
+ }
+
bn_check_top(dv);
bn_check_top(rm);
- bn_check_top(num);
+ /* bn_check_top(num); */ /* 'num' has been checked already */
bn_check_top(divisor);
if (BN_is_zero(divisor))
@@ -397,4 +415,229 @@ err:
return(0);
}
+
+/* BN_div_no_branch is a special version of BN_div. It does not contain
+ * branches that may leak sensitive information.
+ */
+static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
+ const BIGNUM *divisor, BN_CTX *ctx)
+ {
+ int norm_shift,i,loop;
+ BIGNUM *tmp,wnum,*snum,*sdiv,*res;
+ BN_ULONG *resp,*wnump;
+ BN_ULONG d0,d1;
+ int num_n,div_n;
+
+ bn_check_top(dv);
+ bn_check_top(rm);
+ /* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
+ bn_check_top(divisor);
+
+ if (BN_is_zero(divisor))
+ {
+ BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
+ return(0);
+ }
+
+ BN_CTX_start(ctx);
+ tmp=BN_CTX_get(ctx);
+ snum=BN_CTX_get(ctx);
+ sdiv=BN_CTX_get(ctx);
+ if (dv == NULL)
+ res=BN_CTX_get(ctx);
+ else res=dv;
+ if (sdiv == NULL || res == NULL) goto err;
+
+ /* First we normalise the numbers */
+ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
+ if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
+ sdiv->neg=0;
+ norm_shift+=BN_BITS2;
+ if (!(BN_lshift(snum,num,norm_shift))) goto err;
+ snum->neg=0;
+
+ /* Since we don't know whether snum is larger than sdiv,
+ * we pad snum with enough zeroes without changing its
+ * value.
+ */
+ if (snum->top <= sdiv->top+1)
+ {
+ if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
+ for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
+ snum->top = sdiv->top + 2;
+ }
+ else
+ {
+ if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
+ snum->d[snum->top] = 0;
+ snum->top ++;
+ }
+
+ div_n=sdiv->top;
+ num_n=snum->top;
+ loop=num_n-div_n;
+ /* Lets setup a 'window' into snum
+ * This is the part that corresponds to the current
+ * 'area' being divided */
+ wnum.neg = 0;
+ wnum.d = &(snum->d[loop]);
+ wnum.top = div_n;
+ /* only needed when BN_ucmp messes up the values between top and max */
+ wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
+
+ /* Get the top 2 words of sdiv */
+ /* div_n=sdiv->top; */
+ d0=sdiv->d[div_n-1];
+ d1=(div_n == 1)?0:sdiv->d[div_n-2];
+
+ /* pointer to the 'top' of snum */
+ wnump= &(snum->d[num_n-1]);
+
+ /* Setup to 'res' */
+ res->neg= (num->neg^divisor->neg);
+ if (!bn_wexpand(res,(loop+1))) goto err;
+ res->top=loop-1;
+ resp= &(res->d[loop-1]);
+
+ /* space for temp */
+ if (!bn_wexpand(tmp,(div_n+1))) goto err;
+
+ /* if res->top == 0 then clear the neg value otherwise decrease
+ * the resp pointer */
+ if (res->top == 0)
+ res->neg = 0;
+ else
+ resp--;
+
+ for (i=0; i<loop-1; i++, wnump--, resp--)
+ {
+ BN_ULONG q,l0;
+ /* the first part of the loop uses the top two words of
+ * snum and sdiv to calculate a BN_ULONG q such that
+ * | wnum - sdiv * q | < sdiv */
+#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
+ BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
+ q=bn_div_3_words(wnump,d1,d0);
+#else
+ BN_ULONG n0,n1,rem=0;
+
+ n0=wnump[0];
+ n1=wnump[-1];
+ if (n0 == d0)
+ q=BN_MASK2;
+ else /* n0 < d0 */
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t2;
+
+#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
+ q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
+#else
+ q=bn_div_words(n0,n1,d0);
+#ifdef BN_DEBUG_LEVITTE
+ fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
+X) -> 0x%08X\n",
+ n0, n1, d0, q);
+#endif
+#endif
+
+#ifndef REMAINDER_IS_ALREADY_CALCULATED
+ /*
+ * rem doesn't have to be BN_ULLONG. The least we
+ * know it's less that d0, isn't it?
+ */
+ rem=(n1-q*d0)&BN_MASK2;
+#endif
+ t2=(BN_ULLONG)d1*q;
+
+ for (;;)
+ {
+ if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
+ break;
+ q--;
+ rem += d0;
+ if (rem < d0) break; /* don't let rem overflow */
+ t2 -= d1;
+ }
+#else /* !BN_LLONG */
+ BN_ULONG t2l,t2h,ql,qh;
+
+ q=bn_div_words(n0,n1,d0);
+#ifdef BN_DEBUG_LEVITTE
+ fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
+X) -> 0x%08X\n",
+ n0, n1, d0, q);
+#endif
+#ifndef REMAINDER_IS_ALREADY_CALCULATED
+ rem=(n1-q*d0)&BN_MASK2;
+#endif
+
+#if defined(BN_UMULT_LOHI)
+ BN_UMULT_LOHI(t2l,t2h,d1,q);
+#elif defined(BN_UMULT_HIGH)
+ t2l = d1 * q;
+ t2h = BN_UMULT_HIGH(d1,q);
+#else
+ t2l=LBITS(d1); t2h=HBITS(d1);
+ ql =LBITS(q); qh =HBITS(q);
+ mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+#endif
+
+ for (;;)
+ {
+ if ((t2h < rem) ||
+ ((t2h == rem) && (t2l <= wnump[-2])))
+ break;
+ q--;
+ rem += d0;
+ if (rem < d0) break; /* don't let rem overflow */
+ if (t2l < d1) t2h--; t2l -= d1;
+ }
+#endif /* !BN_LLONG */
+ }
+#endif /* !BN_DIV3W */
+
+ l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
+ tmp->d[div_n]=l0;
+ wnum.d--;
+ /* ingore top values of the bignums just sub the two
+ * BN_ULONG arrays with bn_sub_words */
+ if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
+ {
+ /* Note: As we have considered only the leading
+ * two BN_ULONGs in the calculation of q, sdiv * q
+ * might be greater than wnum (but then (q-1) * sdiv
+ * is less or equal than wnum)
+ */
+ q--;
+ if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
+ /* we can't have an overflow here (assuming
+ * that q != 0, but if q == 0 then tmp is
+ * zero anyway) */
+ (*wnump)++;
+ }
+ /* store part of the result */
+ *resp = q;
+ }
+ bn_correct_top(snum);
+ if (rm != NULL)
+ {
+ /* Keep a copy of the neg flag in num because if rm==num
+ * BN_rshift() will overwrite it.
+ */
+ int neg = num->neg;
+ BN_rshift(rm,snum,norm_shift);
+ if (!BN_is_zero(rm))
+ rm->neg = neg;
+ bn_check_top(rm);
+ }
+ bn_correct_top(res);
+ BN_CTX_end(ctx);
+ return(1);
+err:
+ bn_check_top(rm);
+ BN_CTX_end(ctx);
+ return(0);
+ }
+
#endif
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
index 24fbbb772d0f..cfe2eb94a0ce 100644
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -1,6 +1,6 @@
/* crypto/bn/bn_err.c */
/* ====================================================================
- * Copyright (c) 1999-2005 The OpenSSL Project. All rights reserved.
+ * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -82,6 +82,7 @@ static ERR_STRING_DATA BN_str_functs[]=
{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
+{ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"},
{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
@@ -100,6 +101,7 @@ static ERR_STRING_DATA BN_str_functs[]=
{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
+{ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 8f8c69448191..70a33f0d936c 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -122,9 +122,9 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
int i,bits,ret=0;
BIGNUM *v,*rr;
- if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
{
- /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+ /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return -1;
}
@@ -213,7 +213,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
if (BN_is_odd(m))
{
# ifdef MONT_EXP_WORD
- if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0))
+ if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
{
BN_ULONG A = a->d[0];
ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
@@ -245,9 +245,9 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
BIGNUM *val[TABLE_SIZE];
BN_RECP_CTX recp;
- if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
{
- /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+ /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return -1;
}
@@ -379,7 +379,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
BIGNUM *val[TABLE_SIZE];
BN_MONT_CTX *mont=NULL;
- if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
{
return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
}
@@ -745,9 +745,9 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
- if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
{
- /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+ /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return -1;
}
@@ -881,9 +881,9 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
/* Table of variables obtained from 'ctx' */
BIGNUM *val[TABLE_SIZE];
- if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
{
- /* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
+ /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return -1;
}
diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c
index f02e6fcdb422..4a352119ba8a 100644
--- a/crypto/bn/bn_gcd.c
+++ b/crypto/bn/bn_gcd.c
@@ -203,6 +203,8 @@ err:
/* solves ax == 1 (mod n) */
+static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
+ const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx);
BIGNUM *BN_mod_inverse(BIGNUM *in,
const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
{
@@ -210,6 +212,11 @@ BIGNUM *BN_mod_inverse(BIGNUM *in,
BIGNUM *ret=NULL;
int sign;
+ if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0))
+ {
+ return BN_mod_inverse_no_branch(in, a, n, ctx);
+ }
+
bn_check_top(a);
bn_check_top(n);
@@ -491,3 +498,157 @@ err:
bn_check_top(ret);
return(ret);
}
+
+
+/* BN_mod_inverse_no_branch is a special version of BN_mod_inverse.
+ * It does not contain branches that may leak sensitive information.
+ */
+static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
+ const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
+ {
+ BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
+ BIGNUM local_A, local_B;
+ BIGNUM *pA, *pB;
+ BIGNUM *ret=NULL;
+ int sign;
+
+ bn_check_top(a);
+ bn_check_top(n);
+
+ BN_CTX_start(ctx);
+ A = BN_CTX_get(ctx);
+ B = BN_CTX_get(ctx);
+ X = BN_CTX_get(ctx);
+ D = BN_CTX_get(ctx);
+ M = BN_CTX_get(ctx);
+ Y = BN_CTX_get(ctx);
+ T = BN_CTX_get(ctx);
+ if (T == NULL) goto err;
+
+ if (in == NULL)
+ R=BN_new();
+ else
+ R=in;
+ if (R == NULL) goto err;
+
+ BN_one(X);
+ BN_zero(Y);
+ if (BN_copy(B,a) == NULL) goto err;
+ if (BN_copy(A,n) == NULL) goto err;
+ A->neg = 0;
+
+ if (B->neg || (BN_ucmp(B, A) >= 0))
+ {
+ /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+ * BN_div_no_branch will be called eventually.
+ */
+ pB = &local_B;
+ BN_with_flags(pB, B, BN_FLG_CONSTTIME);
+ if (!BN_nnmod(B, pB, A, ctx)) goto err;
+ }
+ sign = -1;
+ /* From B = a mod |n|, A = |n| it follows that
+ *
+ * 0 <= B < A,
+ * -sign*X*a == B (mod |n|),
+ * sign*Y*a == A (mod |n|).
+ */
+
+ while (!BN_is_zero(B))
+ {
+ BIGNUM *tmp;
+
+ /*
+ * 0 < B < A,
+ * (*) -sign*X*a == B (mod |n|),
+ * sign*Y*a == A (mod |n|)
+ */
+
+ /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+ * BN_div_no_branch will be called eventually.
+ */
+ pA = &local_A;
+ BN_with_flags(pA, A, BN_FLG_CONSTTIME);
+
+ /* (D, M) := (A/B, A%B) ... */
+ if (!BN_div(D,M,pA,B,ctx)) goto err;
+
+ /* Now
+ * A = D*B + M;
+ * thus we have
+ * (**) sign*Y*a == D*B + M (mod |n|).
+ */
+
+ tmp=A; /* keep the BIGNUM object, the value does not matter */
+
+ /* (A, B) := (B, A mod B) ... */
+ A=B;
+ B=M;
+ /* ... so we have 0 <= B < A again */
+
+ /* Since the former M is now B and the former B is now A,
+ * (**) translates into
+ * sign*Y*a == D*A + B (mod |n|),
+ * i.e.
+ * sign*Y*a - D*A == B (mod |n|).
+ * Similarly, (*) translates into
+ * -sign*X*a == A (mod |n|).
+ *
+ * Thus,
+ * sign*Y*a + D*sign*X*a == B (mod |n|),
+ * i.e.
+ * sign*(Y + D*X)*a == B (mod |n|).
+ *
+ * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
+ * -sign*X*a == B (mod |n|),
+ * sign*Y*a == A (mod |n|).
+ * Note that X and Y stay non-negative all the time.
+ */
+
+ if (!BN_mul(tmp,D,X,ctx)) goto err;
+ if (!BN_add(tmp,tmp,Y)) goto err;
+
+ M=Y; /* keep the BIGNUM object, the value does not matter */
+ Y=X;
+ X=tmp;
+ sign = -sign;
+ }
+
+ /*
+ * The while loop (Euclid's algorithm) ends when
+ * A == gcd(a,n);
+ * we have
+ * sign*Y*a == A (mod |n|),
+ * where Y is non-negative.
+ */
+
+ if (sign < 0)
+ {
+ if (!BN_sub(Y,n,Y)) goto err;
+ }
+ /* Now Y*a == A (mod |n|). */
+
+ if (BN_is_one(A))
+ {
+ /* Y*a == 1 (mod |n|) */
+ if (!Y->neg && BN_ucmp(Y,n) < 0)
+ {
+ if (!BN_copy(R,Y)) goto err;
+ }
+ else
+ {
+ if (!BN_nnmod(R,Y,n,ctx)) goto err;
+ }
+ }
+ else
+ {
+ BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH,BN_R_NO_INVERSE);
+ goto err;
+ }
+ ret=R;
+err:
+ if ((ret == NULL) && (in == NULL)) BN_free(R);
+ BN_CTX_end(ctx);
+ bn_check_top(ret);
+ return(ret);
+ }
diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c
index 6a793857e130..306f029f2789 100644
--- a/crypto/bn/bn_gf2m.c
+++ b/crypto/bn/bn_gf2m.c
@@ -384,7 +384,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
if (zz == 0) break;
d1 = BN_BITS2 - d0;
- if (d0) z[dN] = (z[dN] << d1) >> d1; /* clear up the top d1 bits */
+ /* clear up the top d1 bits */
+ if (d0)
+ z[dN] = (z[dN] << d1) >> d1;
+ else
+ z[dN] = 0;
z[0] ^= zz; /* reduction t^0 component */
for (k = 1; p[k] != 0; k++)
diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h
index ad4ca7ff305a..27ac4397a151 100644
--- a/crypto/bn/bn_lcl.h
+++ b/crypto/bn/bn_lcl.h
@@ -481,6 +481,7 @@ BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
#ifdef __cplusplus
}
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 210ccb42bba1..2649b8c53851 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -763,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n)
i=n/BN_BITS2;
j=n%BN_BITS2;
if (a->top <= i) return 0;
- return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
+ return(((a->d[i])>>j)&((BN_ULONG)1));
}
int BN_mask_bits(BIGNUM *a, int n)
diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c
index 961ca67ea1f5..4799b152ddcb 100644
--- a/crypto/bn/bn_mont.c
+++ b/crypto/bn/bn_mont.c
@@ -122,11 +122,50 @@
#define MONT_WORD /* use the faster word-based algorithm */
+#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+/* This condition means we have a specific non-default build:
+ * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any
+ * BN_BITS2<=32 platform; an explicit "enable-montasm" is required.
+ * I.e., if we are here, the user intentionally deviates from the
+ * normal stable build to get better Montgomery performance from
+ * the 0.9.9-dev backport.
+ *
+ * In this case only, we also enable BN_from_montgomery_word()
+ * (another non-stable feature from 0.9.9-dev).
+ */
+#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#endif
+
+#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
+#endif
+
+
+
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_MONT_CTX *mont, BN_CTX *ctx)
{
BIGNUM *tmp;
int ret=0;
+#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
+ int num = mont->N.top;
+
+ if (num>1 && a->top==num && b->top==num)
+ {
+ if (bn_wexpand(r,num) == NULL) return(0);
+#if 0 /* for OpenSSL 0.9.9 mont->n0 */
+ if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
+#else
+ if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num))
+#endif
+ {
+ r->neg = a->neg^b->neg;
+ r->top = num;
+ bn_correct_top(r);
+ return(1);
+ }
+ }
+#endif
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
@@ -142,7 +181,11 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
if (!BN_mul(tmp,a,b,ctx)) goto err;
}
/* reduce from aRR to aR */
+#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+ if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
+#else
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
+#endif
bn_check_top(r);
ret=1;
err:
@@ -150,6 +193,150 @@ err:
return(ret);
}
+#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
+ {
+ BIGNUM *n;
+ BN_ULONG *ap,*np,*rp,n0,v,*nrp;
+ int al,nl,max,i,x,ri;
+
+ n= &(mont->N);
+ /* mont->ri is the size of mont->N in bits (rounded up
+ to the word size) */
+ al=ri=mont->ri/BN_BITS2;
+
+ nl=n->top;
+ if ((al == 0) || (nl == 0)) { ret->top=0; return(1); }
+
+ max=(nl+al+1); /* allow for overflow (no?) XXX */
+ if (bn_wexpand(r,max) == NULL) return(0);
+
+ r->neg^=n->neg;
+ np=n->d;
+ rp=r->d;
+ nrp= &(r->d[nl]);
+
+ /* clear the top words of T */
+ for (i=r->top; i<max; i++) /* memset? XXX */
+ r->d[i]=0;
+
+ r->top=max;
+#if 0 /* for OpenSSL 0.9.9 mont->n0 */
+ n0=mont->n0[0];
+#else
+ n0=mont->n0;
+#endif
+
+#ifdef BN_COUNT
+ fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
+#endif
+ for (i=0; i<nl; i++)
+ {
+#ifdef __TANDEM
+ {
+ long long t1;
+ long long t2;
+ long long t3;
+ t1 = rp[0] * (n0 & 0177777);
+ t2 = 037777600000l;
+ t2 = n0 & t2;
+ t3 = rp[0] & 0177777;
+ t2 = (t3 * t2) & BN_MASK2;
+ t1 = t1 + t2;
+ v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
+ }
+#else
+ v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
+#endif
+ nrp++;
+ rp++;
+ if (((nrp[-1]+=v)&BN_MASK2) >= v)
+ continue;
+ else
+ {
+ if (((++nrp[0])&BN_MASK2) != 0) continue;
+ if (((++nrp[1])&BN_MASK2) != 0) continue;
+ for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
+ }
+ }
+ bn_correct_top(r);
+
+ /* mont->ri will be a multiple of the word size and below code
+ * is kind of BN_rshift(ret,r,mont->ri) equivalent */
+ if (r->top <= ri)
+ {
+ ret->top=0;
+ return(1);
+ }
+ al=r->top-ri;
+
+ if (bn_wexpand(ret,ri) == NULL) return(0);
+ x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
+ ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
+ ret->neg=r->neg;
+
+ rp=ret->d;
+ ap=&(r->d[ri]);
+
+ {
+ size_t m1,m2;
+
+ v=bn_sub_words(rp,ap,np,ri);
+ /* this ----------------^^ works even in al<ri case
+ * thanks to zealous zeroing of top of the vector in the
+ * beginning. */
+
+ /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
+ /* in other words if subtraction result is real, then
+ * trick unconditional memcpy below to perform in-place
+ * "refresh" instead of actual copy. */
+ m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
+ m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
+ m1|=m2; /* (al!=ri) */
+ m1|=(0-(size_t)v); /* (al!=ri || v) */
+ m1&=~m2; /* (al!=ri || v) && !al>ri */
+ nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
+ }
+
+ /* 'i<ri' is chosen to eliminate dependency on input data, even
+ * though it results in redundant copy in al<ri case. */
+ for (i=0,ri-=4; i<ri; i+=4)
+ {
+ BN_ULONG t1,t2,t3,t4;
+
+ t1=nrp[i+0];
+ t2=nrp[i+1];
+ t3=nrp[i+2]; ap[i+0]=0;
+ t4=nrp[i+3]; ap[i+1]=0;
+ rp[i+0]=t1; ap[i+2]=0;
+ rp[i+1]=t2; ap[i+3]=0;
+ rp[i+2]=t3;
+ rp[i+3]=t4;
+ }
+ for (ri+=4; i<ri; i++)
+ rp[i]=nrp[i], ap[i]=0;
+ bn_correct_top(r);
+ bn_correct_top(ret);
+ bn_check_top(ret);
+
+ return(1);
+ }
+
+int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+ BN_CTX *ctx)
+ {
+ int retn=0;
+ BIGNUM *t;
+
+ BN_CTX_start(ctx);
+ if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
+ retn = BN_from_montgomery_word(ret,t,mont);
+ BN_CTX_end(ctx);
+ return retn;
+ }
+
+#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
+
int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
BN_CTX *ctx)
{
@@ -176,7 +363,6 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
max=(nl+al+1); /* allow for overflow (no?) XXX */
if (bn_wexpand(r,max) == NULL) goto err;
- if (bn_wexpand(ret,max) == NULL) goto err;
r->neg=a->neg^n->neg;
np=n->d;
@@ -228,19 +414,72 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
}
bn_correct_top(r);
- /* mont->ri will be a multiple of the word size */
-#if 0
- BN_rshift(ret,r,mont->ri);
-#else
- ret->neg = r->neg;
- x=ri;
+ /* mont->ri will be a multiple of the word size and below code
+ * is kind of BN_rshift(ret,r,mont->ri) equivalent */
+ if (r->top <= ri)
+ {
+ ret->top=0;
+ retn=1;
+ goto err;
+ }
+ al=r->top-ri;
+
+# define BRANCH_FREE 1
+# if BRANCH_FREE
+ if (bn_wexpand(ret,ri) == NULL) goto err;
+ x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
+ ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
+ ret->neg=r->neg;
+
rp=ret->d;
- ap= &(r->d[x]);
- if (r->top < x)
- al=0;
- else
- al=r->top-x;
+ ap=&(r->d[ri]);
+
+ {
+ size_t m1,m2;
+
+ v=bn_sub_words(rp,ap,np,ri);
+ /* this ----------------^^ works even in al<ri case
+ * thanks to zealous zeroing of top of the vector in the
+ * beginning. */
+
+ /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
+ /* in other words if subtraction result is real, then
+ * trick unconditional memcpy below to perform in-place
+ * "refresh" instead of actual copy. */
+ m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
+ m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
+ m1|=m2; /* (al!=ri) */
+ m1|=(0-(size_t)v); /* (al!=ri || v) */
+ m1&=~m2; /* (al!=ri || v) && !al>ri */
+ nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
+ }
+
+ /* 'i<ri' is chosen to eliminate dependency on input data, even
+ * though it results in redundant copy in al<ri case. */
+ for (i=0,ri-=4; i<ri; i+=4)
+ {
+ BN_ULONG t1,t2,t3,t4;
+
+ t1=nrp[i+0];
+ t2=nrp[i+1];
+ t3=nrp[i+2]; ap[i+0]=0;
+ t4=nrp[i+3]; ap[i+1]=0;
+ rp[i+0]=t1; ap[i+2]=0;
+ rp[i+1]=t2; ap[i+3]=0;
+ rp[i+2]=t3;
+ rp[i+3]=t4;
+ }
+ for (ri+=4; i<ri; i++)
+ rp[i]=nrp[i], ap[i]=0;
+ bn_correct_top(r);
+ bn_correct_top(ret);
+# else
+ if (bn_wexpand(ret,al) == NULL) goto err;
ret->top=al;
+ ret->neg=r->neg;
+
+ rp=ret->d;
+ ap=&(r->d[ri]);
al-=4;
for (i=0; i<al; i+=4)
{
@@ -258,7 +497,7 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
al+=4;
for (; i<al; i++)
rp[i]=ap[i];
-#endif
+# endif
#else /* !MONT_WORD */
BIGNUM *t1,*t2;
@@ -278,16 +517,19 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
if (!BN_rshift(ret,t2,mont->ri)) goto err;
#endif /* MONT_WORD */
+#if !defined(BRANCH_FREE) || BRANCH_FREE==0
if (BN_ucmp(ret, &(mont->N)) >= 0)
{
if (!BN_usub(ret,ret,&(mont->N))) goto err;
}
+#endif
retn=1;
bn_check_top(ret);
err:
BN_CTX_end(ctx);
return(retn);
}
+#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
BN_MONT_CTX *BN_MONT_CTX_new(void)
{
@@ -307,6 +549,11 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
BN_init(&(ctx->RR));
BN_init(&(ctx->N));
BN_init(&(ctx->Ni));
+#if 0 /* for OpenSSL 0.9.9 mont->n0 */
+ ctx->n0[0] = ctx->n0[1] = 0;
+#else
+ ctx->n0 = 0;
+#endif
ctx->flags=0;
}
@@ -340,14 +587,51 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
BN_zero(R);
+#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)",
+ only certain BN_BITS2<=32 platforms actually need this */
+ if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */
+#else
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
+#endif
buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0;
+
+ BN_init(&tmod);
tmod.d=buf;
tmod.top = buf[0] != 0 ? 1 : 0;
tmod.dmax=2;
tmod.neg=0;
+
+#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)";
+ only certain BN_BITS2<=32 platforms actually need this */
+ tmod.top=0;
+ if ((buf[0] = mod->d[0])) tmod.top=1;
+ if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2;
+
+ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
+ goto err;
+ if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
+ if (!BN_is_zero(Ri))
+ {
+ if (!BN_sub_word(Ri,1)) goto err;
+ }
+ else /* if N mod word size == 1 */
+ {
+ if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
+ goto err;
+ /* Ri-- (mod double word size) */
+ Ri->neg=0;
+ Ri->d[0]=BN_MASK2;
+ Ri->d[1]=BN_MASK2;
+ Ri->top=2;
+ }
+ if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
+ /* Ni = (R*Ri-1)/N,
+ * keep only couple of least significant words: */
+ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+ mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
+#else
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
@@ -363,7 +647,13 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
+# if 0 /* for OpenSSL 0.9.9 mont->n0 */
+ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+ mont->n0[1] = 0;
+# else
mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
+# endif
+#endif
}
#else /* !MONT_WORD */
{ /* bignum version */
@@ -399,7 +689,12 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri;
+#if 0 /* for OpenSSL 0.9.9 mont->n0 */
+ to->n0[0]=from->n0[0];
+ to->n0[1]=from->n0[1];
+#else
to->n0=from->n0;
+#endif
return(to);
}
diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c
index aec1eafc65fb..b848c8cc60f4 100644
--- a/crypto/bn/bn_mul.c
+++ b/crypto/bn/bn_mul.c
@@ -389,6 +389,7 @@ BN_ULONG bn_add_part_words(BN_ULONG *r,
* a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
* a[1]*b[1]
*/
+/* dnX may not be positive, but n2/2+dnX has to be */
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
int dna, int dnb, BN_ULONG *t)
{
@@ -398,7 +399,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
BN_ULONG ln,lo,*p;
# ifdef BN_COUNT
- fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2);
+ fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
# endif
# ifdef BN_MUL_COMBA
# if 0
@@ -545,6 +546,7 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
/* n+tn is the word length
* t needs to be n*4 is size, as does r */
+/* tnX may not be negative but less than n */
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
int tna, int tnb, BN_ULONG *t)
{
@@ -553,8 +555,8 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
BN_ULONG ln,lo,*p;
# ifdef BN_COUNT
- fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n",
- tna, n, tnb, n);
+ fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
+ n, tna, n, tnb);
# endif
if (n < 8)
{
@@ -655,14 +657,17 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
for (;;)
{
i/=2;
- if (i < tna && i < tnb)
+ /* these simplified conditions work
+ * exclusively because difference
+ * between tna and tnb is 1 or 0 */
+ if (i < tna || i < tnb)
{
bn_mul_part_recursive(&(r[n2]),
&(a[n]),&(b[n]),
i,tna-i,tnb-i,p);
break;
}
- else if (i <= tna && i <= tnb)
+ else if (i == tna || i == tnb)
{
bn_mul_recursive(&(r[n2]),
&(a[n]),&(b[n]),
diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c
index f8e306bb82d3..1fc94f55c32c 100644
--- a/crypto/bn/bn_nist.c
+++ b/crypto/bn/bn_nist.c
@@ -59,6 +59,7 @@
#include "bn_lcl.h"
#include "cryptlib.h"
+
#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
@@ -99,114 +100,106 @@ static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0x000001FF};
-#elif BN_BITS2 == 16
-static const BN_ULONG _nist_p_192[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFE,
- 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF};
-static const BN_ULONG _nist_p_224[] = {0x0001,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF};
-static const BN_ULONG _nist_p_256[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
- 0xFFFF,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0000,0xFFFF,
- 0xFFFF};
-static const BN_ULONG _nist_p_384[] = {0xFFFF,0xFFFF,0x0000,0x0000,0x0000,
- 0x0000,0xFFFF,0xFFFF,0xFFFE,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
- 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF};
-static const BN_ULONG _nist_p_521[] = {0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
- 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
- 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
- 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0x01FF};
-#elif BN_BITS2 == 8
-static const BN_ULONG _nist_p_192[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFE,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF};
-static const BN_ULONG _nist_p_224[] = {0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
-static const BN_ULONG _nist_p_256[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x01,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF};
-static const BN_ULONG _nist_p_384[] = {0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
-static const BN_ULONG _nist_p_521[] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0x01};
#endif
+
+static const BIGNUM _bignum_nist_p_192 =
+ {
+ (BN_ULONG *)_nist_p_192,
+ BN_NIST_192_TOP,
+ BN_NIST_192_TOP,
+ 0,
+ BN_FLG_STATIC_DATA
+ };
+
+static const BIGNUM _bignum_nist_p_224 =
+ {
+ (BN_ULONG *)_nist_p_224,
+ BN_NIST_224_TOP,
+ BN_NIST_224_TOP,
+ 0,
+ BN_FLG_STATIC_DATA
+ };
+
+static const BIGNUM _bignum_nist_p_256 =
+ {
+ (BN_ULONG *)_nist_p_256,
+ BN_NIST_256_TOP,
+ BN_NIST_256_TOP,
+ 0,
+ BN_FLG_STATIC_DATA
+ };
+
+static const BIGNUM _bignum_nist_p_384 =
+ {
+ (BN_ULONG *)_nist_p_384,
+ BN_NIST_384_TOP,
+ BN_NIST_384_TOP,
+ 0,
+ BN_FLG_STATIC_DATA
+ };
+
+static const BIGNUM _bignum_nist_p_521 =
+ {
+ (BN_ULONG *)_nist_p_521,
+ BN_NIST_521_TOP,
+ BN_NIST_521_TOP,
+ 0,
+ BN_FLG_STATIC_DATA
+ };
+
+
const BIGNUM *BN_get0_nist_prime_192(void)
{
- static BIGNUM const_nist_192 = { (BN_ULONG *)_nist_p_192,
- BN_NIST_192_TOP, BN_NIST_192_TOP, 0, BN_FLG_STATIC_DATA };
- return &const_nist_192;
+ return &_bignum_nist_p_192;
}
const BIGNUM *BN_get0_nist_prime_224(void)
{
- static BIGNUM const_nist_224 = { (BN_ULONG *)_nist_p_224,
- BN_NIST_224_TOP, BN_NIST_224_TOP, 0, BN_FLG_STATIC_DATA };
- return &const_nist_224;
+ return &_bignum_nist_p_224;
}
const BIGNUM *BN_get0_nist_prime_256(void)
{
- static BIGNUM const_nist_256 = { (BN_ULONG *)_nist_p_256,
- BN_NIST_256_TOP, BN_NIST_256_TOP, 0, BN_FLG_STATIC_DATA };
- return &const_nist_256;
+ return &_bignum_nist_p_256;
}
const BIGNUM *BN_get0_nist_prime_384(void)
{
- static BIGNUM const_nist_384 = { (BN_ULONG *)_nist_p_384,
- BN_NIST_384_TOP, BN_NIST_384_TOP, 0, BN_FLG_STATIC_DATA };
- return &const_nist_384;
+ return &_bignum_nist_p_384;
}
const BIGNUM *BN_get0_nist_prime_521(void)
{
- static BIGNUM const_nist_521 = { (BN_ULONG *)_nist_p_521,
- BN_NIST_521_TOP, BN_NIST_521_TOP, 0, BN_FLG_STATIC_DATA };
- return &const_nist_521;
+ return &_bignum_nist_p_521;
}
-/* some misc internal functions */
-#if BN_BITS2 != 64
-static BN_ULONG _256_data[BN_NIST_256_TOP*6];
-static int _is_set_256_data = 0;
-static void _init_256_data(void);
-
-static BN_ULONG _384_data[BN_NIST_384_TOP*8];
-static int _is_set_384_data = 0;
-static void _init_384_data(void);
-#endif
-
-#define BN_NIST_ADD_ONE(a) while (!(++(*(a)))) ++(a);
static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
- {
+ {
int i;
- BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
- for (i = (top); i != 0; i--)
- *_tmp1++ = *_tmp2++;
- for (i = (max) - (top); i != 0; i--)
- *_tmp1++ = (BN_ULONG) 0;
- }
+ BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
+
+ OPENSSL_assert(top <= max);
+ for (i = (top); i != 0; i--)
+ *_tmp1++ = *_tmp2++;
+ for (i = (max) - (top); i != 0; i--)
+ *_tmp1++ = (BN_ULONG) 0;
+ }
static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
- {
+ {
int i;
- BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
- for (i = (top); i != 0; i--)
- *_tmp1++ = *_tmp2++;
- }
+ BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
+ for (i = (top); i != 0; i--)
+ *_tmp1++ = *_tmp2++;
+ }
#if BN_BITS2 == 64
-#define bn_cp_64(to, n, from, m) (to)[n] = (from)[m];
+#define bn_cp_64(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
#define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0;
/* TBD */
-#define bn_cp_32(to, n, from, m) (to)[n] = (from)[m];
+#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
#else
#define bn_cp_64(to, n, from, m) \
@@ -220,26 +213,8 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
bn_32_set_0(to, (n)*2+1); \
}
#if BN_BITS2 == 32
-#define bn_cp_32(to, n, from, m) (to)[n] = (from)[m];
+#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
-#elif BN_BITS2 == 16
-#define bn_cp_32(to, n, from, m) \
- { \
- (to)[(n)*2] = (from)[(m)*2]; \
- (to)[(n)*2+1] = (from)[(m)*2+1];\
- }
-#define bn_32_set_0(to, n) { (to)[(n)*2] = 0; (to)[(n)*2+1] = 0; }
-#elif BN_BITS2 == 8
-#define bn_cp_32(to, n, from, m) \
- { \
- (to)[(n)*4] = (from)[(m)*4]; \
- (to)[(n)*4+1] = (from)[(m)*4+1];\
- (to)[(n)*4+2] = (from)[(m)*4+2];\
- (to)[(n)*4+3] = (from)[(m)*4+3];\
- }
-#define bn_32_set_0(to, n) \
- { (to)[(n)*4] = (BN_ULONG)0; (to)[(n)*4+1] = (BN_ULONG)0; \
- (to)[(n)*4+2] = (BN_ULONG)0; (to)[(n)*4+3] = (BN_ULONG)0; }
#endif
#endif /* BN_BITS2 != 64 */
@@ -255,10 +230,18 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
int top = a->top, i;
- BN_ULONG carry = 0;
+ int carry;
register BN_ULONG *r_d, *a_d = a->d;
BN_ULONG t_d[BN_NIST_192_TOP],
- buf[BN_NIST_192_TOP];
+ buf[BN_NIST_192_TOP],
+ c_d[BN_NIST_192_TOP],
+ *res;
+ size_t mask;
+
+ field = &_bignum_nist_p_192; /* just to make sure */
+
+ if (BN_is_negative(a) || a->top > 2*BN_NIST_192_TOP)
+ return BN_nnmod(r, field, a, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -269,9 +252,6 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
else if (i > 0)
return (r == a) ? 1 : (BN_copy(r ,a) != NULL);
- if (top == BN_NIST_192_TOP)
- return BN_usub(r, a, field);
-
if (r != a)
{
if (!bn_wexpand(r, BN_NIST_192_TOP))
@@ -284,41 +264,33 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
-#if defined(OPENSSL_SYS_VMS) && defined(__DECC)
-# pragma message save
-# pragma message disable BADSUBSCRIPT
-#endif
-
nist_set_192(t_d, buf, 0, 3, 3);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP))
- ++carry;
-
+ carry = bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
nist_set_192(t_d, buf, 4, 4, 0);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP))
- ++carry;
-
-#if defined(OPENSSL_SYS_VMS) && defined(__DECC)
-# pragma message restore
-#endif
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_192_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
nist_set_192(t_d, buf, 5, 5, 5)
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_192_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
- while (carry)
- {
- if (bn_sub_words(r_d, r_d, _nist_p_192, BN_NIST_192_TOP))
- --carry;
- }
+ nist_cp_bn(r_d, res, BN_NIST_192_TOP);
r->top = BN_NIST_192_TOP;
bn_correct_top(r);
- if (BN_ucmp(r, field) >= 0)
+
+ if (BN_ucmp(field, r) <= 0)
{
- bn_sub_words(r_d, r_d, _nist_p_192, BN_NIST_192_TOP);
- bn_correct_top(r);
+ if (!BN_usub(r, r, field)) return 0;
}
- bn_check_top(r);
return 1;
}
@@ -336,12 +308,20 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 != 64
+#if BN_BITS2 == 32
int top = a->top, i;
- int carry = 0;
+ int carry;
BN_ULONG *r_d, *a_d = a->d;
BN_ULONG t_d[BN_NIST_224_TOP],
- buf[BN_NIST_224_TOP];
+ buf[BN_NIST_224_TOP],
+ c_d[BN_NIST_224_TOP],
+ *res;
+ size_t mask;
+
+ field = &_bignum_nist_p_224; /* just to make sure */
+
+ if (BN_is_negative(a) || a->top > 2*BN_NIST_224_TOP)
+ return BN_nnmod(r, field, a, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -352,9 +332,6 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
else if (i > 0)
return (r == a)? 1 : (BN_copy(r ,a) != NULL);
- if (top == BN_NIST_224_TOP)
- return BN_usub(r, a, field);
-
if (r != a)
{
if (!bn_wexpand(r, BN_NIST_224_TOP))
@@ -368,65 +345,53 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP))
- ++carry;
+ carry = bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_224_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP))
- --carry;
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP);
+ bn_add_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
+ if (bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP))
+ bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+#endif
nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP))
- --carry;
-
- if (carry > 0)
- while (carry)
- {
- if (bn_sub_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP))
- --carry;
- }
- else if (carry < 0)
- while (carry)
- {
- if (bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP))
- ++carry;
- }
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP);
+ bn_add_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ nist_cp_bn(r_d, res, BN_NIST_224_TOP);
+#else
+ if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP))
+ bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+#endif
r->top = BN_NIST_224_TOP;
bn_correct_top(r);
- if (BN_ucmp(r, field) >= 0)
+
+ if (BN_ucmp(field, r) <= 0)
{
- bn_sub_words(r_d, r_d, _nist_p_224, BN_NIST_224_TOP);
- bn_correct_top(r);
+ if (!BN_usub(r, r, field)) return 0;
}
- bn_check_top(r);
+
return 1;
-#else
+#else /* BN_BITS!=32 */
return 0;
#endif
}
-#if BN_BITS2 != 64
-static void _init_256_data(void)
- {
- int i;
- BN_ULONG *tmp1 = _256_data;
- const BN_ULONG *tmp2 = tmp1;
-
- memcpy(tmp1, _nist_p_256, BN_NIST_256_TOP * sizeof(BN_ULONG));
- tmp1 += BN_NIST_256_TOP;
-
- for (i=0; i<5; i++)
- {
- bn_add_words(tmp1, _nist_p_256, tmp2, BN_NIST_256_TOP);
- tmp2 = tmp1;
- tmp1 += BN_NIST_256_TOP;
- }
- _is_set_256_data = 1;
- }
-#endif
-
#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
{ \
if (a8 != 0) bn_cp_32(to, 0, from, (a8) - 8) else bn_32_set_0(to, 0)\
@@ -442,24 +407,21 @@ static void _init_256_data(void)
int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 != 64
+#if BN_BITS2 == 32
int i, top = a->top;
int carry = 0;
register BN_ULONG *a_d = a->d, *r_d;
BN_ULONG t_d[BN_NIST_256_TOP],
- t_d2[BN_NIST_256_TOP],
- buf[BN_NIST_256_TOP];
+ buf[BN_NIST_256_TOP],
+ c_d[BN_NIST_256_TOP],
+ *res;
+ size_t mask;
+
+ field = &_bignum_nist_p_256; /* just to make sure */
+
+ if (BN_is_negative(a) || a->top > 2*BN_NIST_256_TOP)
+ return BN_nnmod(r, field, a, ctx);
- if (!_is_set_256_data)
- {
- CRYPTO_w_lock(CRYPTO_LOCK_BN);
-
- if (!_is_set_256_data)
- _init_256_data();
-
- CRYPTO_w_unlock(CRYPTO_LOCK_BN);
- }
-
i = BN_ucmp(field, a);
if (i == 0)
{
@@ -469,9 +431,6 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
else if (i > 0)
return (r == a)? 1 : (BN_copy(r ,a) != NULL);
- if (top == BN_NIST_256_TOP)
- return BN_usub(r, a, field);
-
if (r != a)
{
if (!bn_wexpand(r, BN_NIST_256_TOP))
@@ -487,98 +446,96 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
/*S1*/
nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0);
/*S2*/
- nist_set_256(t_d2,buf, 0, 15, 14, 13, 12, 0, 0, 0);
- if (bn_add_words(t_d, t_d, t_d2, BN_NIST_256_TOP))
- carry = 2;
- /* left shift */
- {
- register BN_ULONG *ap,t,c;
- ap = t_d;
- c=0;
- for (i = BN_NIST_256_TOP; i != 0; --i)
- {
- t= *ap;
- *(ap++)=((t<<1)|c)&BN_MASK2;
- c=(t & BN_TBIT)?1:0;
- }
- if (c)
- ++carry;
- }
+ nist_set_256(c_d,buf, 0, 15, 14, 13, 12, 0, 0, 0);
+ carry = bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,t_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)t_d&~mask));
+
+ carry = bn_add_words(t_d, res, res, BN_NIST_256_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,t_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)t_d&~mask));
+
+ carry = bn_add_words(r_d, r_d, res, BN_NIST_256_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- ++carry;
/*S3*/
nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_256_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S4*/
nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_256_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*D1*/
nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- --carry;
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
+ bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
+ if (bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP))
+ bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+#endif
/*D2*/
nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12);
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
+ bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- --carry;
+ bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+#endif
/*D3*/
nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13);
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
+ bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- --carry;
+ bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+#endif
/*D4*/
nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- --carry;
-
- if (carry)
- {
- if (carry > 0)
- bn_sub_words(r_d, r_d, _256_data + BN_NIST_256_TOP *
- --carry, BN_NIST_256_TOP);
- else
- {
- carry = -carry;
- bn_add_words(r_d, r_d, _256_data + BN_NIST_256_TOP *
- --carry, BN_NIST_256_TOP);
- }
- }
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
+ bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ nist_cp_bn(r_d, res, BN_NIST_384_TOP);
+#else
+ if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
+ bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
+#endif
r->top = BN_NIST_256_TOP;
bn_correct_top(r);
- if (BN_ucmp(r, field) >= 0)
+
+ if (BN_ucmp(field, r) <= 0)
{
- bn_sub_words(r_d, r_d, _nist_p_256, BN_NIST_256_TOP);
- bn_correct_top(r);
+ if (!BN_usub(r, r, field)) return 0;
}
- bn_check_top(r);
+
return 1;
-#else
+#else /* BN_BITS!=32 */
return 0;
#endif
}
-#if BN_BITS2 != 64
-static void _init_384_data(void)
- {
- int i;
- BN_ULONG *tmp1 = _384_data;
- const BN_ULONG *tmp2 = tmp1;
-
- memcpy(tmp1, _nist_p_384, BN_NIST_384_TOP * sizeof(BN_ULONG));
- tmp1 += BN_NIST_384_TOP;
-
- for (i=0; i<7; i++)
- {
- bn_add_words(tmp1, _nist_p_384, tmp2, BN_NIST_384_TOP);
- tmp2 = tmp1;
- tmp1 += BN_NIST_384_TOP;
- }
- _is_set_384_data = 1;
- }
-#endif
-
#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
{ \
if (a12 != 0) bn_cp_32(to, 0, from, (a12) - 12) else bn_32_set_0(to, 0)\
@@ -598,22 +555,20 @@ static void _init_384_data(void)
int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 != 64
+#if BN_BITS2 == 32
int i, top = a->top;
int carry = 0;
register BN_ULONG *r_d, *a_d = a->d;
BN_ULONG t_d[BN_NIST_384_TOP],
- buf[BN_NIST_384_TOP];
+ buf[BN_NIST_384_TOP],
+ c_d[BN_NIST_384_TOP],
+ *res;
+ size_t mask;
- if (!_is_set_384_data)
- {
- CRYPTO_w_lock(CRYPTO_LOCK_BN);
-
- if (!_is_set_384_data)
- _init_384_data();
+ field = &_bignum_nist_p_384; /* just to make sure */
- CRYPTO_w_unlock(CRYPTO_LOCK_BN);
- }
+ if (BN_is_negative(a) || a->top > 2*BN_NIST_384_TOP)
+ return BN_nnmod(r, field, a, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -624,9 +579,6 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
else if (i > 0)
return (r == a)? 1 : (BN_copy(r ,a) != NULL);
- if (top == BN_NIST_384_TOP)
- return BN_usub(r, a, field);
-
if (r != a)
{
if (!bn_wexpand(r, BN_NIST_384_TOP))
@@ -646,72 +598,108 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
register BN_ULONG *ap,t,c;
ap = t_d;
c=0;
- for (i = BN_NIST_256_TOP; i != 0; --i)
+ for (i = 3; i != 0; --i)
{
t= *ap;
*(ap++)=((t<<1)|c)&BN_MASK2;
c=(t & BN_TBIT)?1:0;
}
+ *ap=c;
}
- if (bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
- t_d, BN_NIST_256_TOP))
- ++carry;
+ carry = bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
+ t_d, BN_NIST_256_TOP);
+ /*
+ * we need if (result>=modulus) subtract(result,modulus);
+ * in n-bit space this can be expressed as
+ * if (carry || result>=modulus) subtract(result,modulus);
+ * the catch is that comparison implies subtraction and
+ * therefore one can write tmp=subtract(result,modulus);
+ * and then if(carry || !borrow) result=tmp; this's what
+ * happens below, but without explicit if:-) a.
+ */
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S2 */
- if (bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, buf, BN_NIST_384_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S3*/
nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S4*/
nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S5*/
- nist_set_256(t_d, buf, 0, 0, 0, 0, 23-4, 22-4, 21-4, 20-4);
- if (bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
- t_d, BN_NIST_256_TOP))
- ++carry;
+ nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0);
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*S6*/
nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20);
- if (bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- ++carry;
+ carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = ~mask | (0-(size_t)carry);
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+
/*D1*/
nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- --carry;
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
+ bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
+ if (bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP))
+ bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+#endif
/*D2*/
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0);
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
+ bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+#else
if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- --carry;
+ bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+#endif
/*D3*/
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0);
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- --carry;
-
- if (carry)
- {
- if (carry > 0)
- bn_sub_words(r_d, r_d, _384_data + BN_NIST_384_TOP *
- --carry, BN_NIST_384_TOP);
- else
- {
- carry = -carry;
- bn_add_words(r_d, r_d, _384_data + BN_NIST_384_TOP *
- --carry, BN_NIST_384_TOP);
- }
- }
+#if BRANCH_FREE
+ carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
+ bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+ mask = 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ nist_cp_bn(r_d, res, BN_NIST_384_TOP);
+#else
+ if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
+ bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
+#endif
r->top = BN_NIST_384_TOP;
bn_correct_top(r);
- if (BN_ucmp(r, field) >= 0)
+
+ if (BN_ucmp(field, r) <= 0)
{
- bn_sub_words(r_d, r_d, _nist_p_384, BN_NIST_384_TOP);
- bn_correct_top(r);
+ if (!BN_usub(r, r, field)) return 0;
}
- bn_check_top(r);
+
return 1;
-#else
+#else /* BN_BITS!=32 */
return 0;
#endif
}
@@ -723,20 +711,37 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF
#elif BN_BITS2 == 32
#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF
-#elif BN_BITS2 == 16
-#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF
-#elif BN_BITS2 == 8
-#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1
#endif
int top, ret = 0;
- BN_ULONG *r_d;
BIGNUM *tmp;
+ field = &_bignum_nist_p_521; /* just to make sure */
+
+ if (BN_is_negative(a))
+ return BN_nnmod(r, field, a, ctx);
+
/* check whether a reduction is necessary */
top = a->top;
if (top < BN_NIST_521_TOP || ( top == BN_NIST_521_TOP &&
- (!(a->d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK)))))
- return (r == a)? 1 : (BN_copy(r ,a) != NULL);
+ (!(a->d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK)))))
+ {
+ int i = BN_ucmp(field, a);
+ if (i == 0)
+ {
+ BN_zero(r);
+ return 1;
+ }
+ else
+ {
+#ifdef BN_DEBUG
+ OPENSSL_assert(i > 0); /* because 'field' is 1111...1111 */
+#endif
+ return (r == a)? 1 : (BN_copy(r ,a) != NULL);
+ }
+ }
+
+ if (BN_num_bits(a) > 2*521)
+ return BN_nnmod(r, field, a, ctx);
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
@@ -756,15 +761,11 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
if (!BN_uadd(r, tmp, r))
goto err;
- top = r->top;
- r_d = r->d;
- if (top == BN_NIST_521_TOP &&
- (r_d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK)))
+
+ if (BN_ucmp(field, r) <= 0)
{
- BN_NIST_ADD_ONE(r_d)
- r_d[BN_NIST_521_TOP-1] &= BN_NIST_521_TOP_MASK;
+ if (!BN_usub(r, r, field)) goto err;
}
- bn_correct_top(r);
ret = 1;
err:
diff --git a/crypto/bn/bn_prime.c b/crypto/bn/bn_prime.c
index 5bab019553bf..7b25979dd1c4 100644
--- a/crypto/bn/bn_prime.c
+++ b/crypto/bn/bn_prime.c
@@ -377,14 +377,14 @@ static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
static int probable_prime(BIGNUM *rnd, int bits)
{
int i;
- BN_ULONG mods[NUMPRIMES];
+ prime_t mods[NUMPRIMES];
BN_ULONG delta,maxdelta;
again:
if (!BN_rand(rnd,bits,1,1)) return(0);
/* we now have a random number 'rand' to test. */
for (i=1; i<NUMPRIMES; i++)
- mods[i]=BN_mod_word(rnd,(BN_ULONG)primes[i]);
+ mods[i]=(prime_t)BN_mod_word(rnd,(BN_ULONG)primes[i]);
maxdelta=BN_MASK2 - primes[NUMPRIMES-1];
delta=0;
loop: for (i=1; i<NUMPRIMES; i++)
diff --git a/crypto/bn/bn_prime.h b/crypto/bn/bn_prime.h
index b7cf9a9bfe12..51d2194febdb 100644
--- a/crypto/bn/bn_prime.h
+++ b/crypto/bn/bn_prime.h
@@ -58,10 +58,12 @@
#ifndef EIGHT_BIT
#define NUMPRIMES 2048
+typedef unsigned short prime_t;
#else
#define NUMPRIMES 54
+typedef unsigned char prime_t;
#endif
-static const unsigned int primes[NUMPRIMES]=
+static const prime_t primes[NUMPRIMES]=
{
2, 3, 5, 7, 11, 13, 17, 19,
23, 29, 31, 37, 41, 43, 47, 53,
diff --git a/crypto/bn/bn_prime.pl b/crypto/bn/bn_prime.pl
index e583d1d53b9d..3fafb6f3e90a 100644
--- a/crypto/bn/bn_prime.pl
+++ b/crypto/bn/bn_prime.pl
@@ -101,10 +101,12 @@ for ($i=0; $i <= $#primes; $i++)
printf "#ifndef EIGHT_BIT\n";
printf "#define NUMPRIMES %d\n",$num;
+printf "typedef unsigned short prime_t;\n";
printf "#else\n";
printf "#define NUMPRIMES %d\n",$eight;
+printf "typedef unsigned char prime_t;\n";
printf "#endif\n";
-print "static const unsigned int primes[NUMPRIMES]=\n\t{\n\t";
+print "static const prime_t primes[NUMPRIMES]=\n\t{\n\t";
$init=0;
for ($i=0; $i <= $#primes; $i++)
{
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index c885300a669f..310763eca0c6 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -184,120 +184,120 @@ int main(int argc, char *argv[])
message(out,"BN_add");
if (!test_add(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_sub");
if (!test_sub(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_lshift1");
if (!test_lshift1(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_lshift (fixed)");
if (!test_lshift(out,ctx,BN_bin2bn(lst,sizeof(lst)-1,NULL)))
goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_lshift");
if (!test_lshift(out,ctx,NULL)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_rshift1");
if (!test_rshift1(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_rshift");
if (!test_rshift(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_sqr");
if (!test_sqr(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mul");
if (!test_mul(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_div");
if (!test_div(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_div_word");
if (!test_div_word(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_div_recp");
if (!test_div_recp(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mod");
if (!test_mod(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mod_mul");
if (!test_mod_mul(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mont");
if (!test_mont(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mod_exp");
if (!test_mod_exp(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mod_exp_mont_consttime");
if (!test_mod_exp_mont_consttime(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_exp");
if (!test_exp(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_kronecker");
if (!test_kron(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_mod_sqrt");
if (!test_sqrt(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_add");
if (!test_gf2m_add(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod");
if (!test_gf2m_mod(out)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_mul");
if (!test_gf2m_mod_mul(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_sqr");
if (!test_gf2m_mod_sqr(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_inv");
if (!test_gf2m_mod_inv(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_div");
if (!test_gf2m_mod_div(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_exp");
if (!test_gf2m_mod_exp(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_sqrt");
if (!test_gf2m_mod_sqrt(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
message(out,"BN_GF2m_mod_solve_quad");
if (!test_gf2m_mod_solve_quad(out,ctx)) goto err;
- BIO_flush(out);
+ (void)BIO_flush(out);
BN_CTX_free(ctx);
BIO_free(out);
@@ -307,7 +307,7 @@ int main(int argc, char *argv[])
err:
BIO_puts(out,"1\n"); /* make sure the Perl script fed by bc notices
* the failure, see test_bn in test/Makefile.ssl*/
- BIO_flush(out);
+ (void)BIO_flush(out);
ERR_load_crypto_strings();
ERR_print_errors_fp(stderr);
EXIT(1);