diff options
author | Jung-uk Kim <jkim@FreeBSD.org> | 2019-02-26 18:06:51 +0000 |
---|---|---|
committer | Jung-uk Kim <jkim@FreeBSD.org> | 2019-02-26 18:06:51 +0000 |
commit | 851f7386fd78b9787f4f6669ad271886a2a003f1 (patch) | |
tree | 952920d27fdcd105b7f77b6e5fef3fedae8f74ea /crypto | |
parent | 8c3f9abd70b3f447a4795c1b00b386b044fb322d (diff) | |
download | src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.tar.gz src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.zip |
Import OpenSSL 1.1.1b.vendor/openssl/1.1.1b
Notes
Notes:
svn path=/vendor-crypto/openssl/dist/; revision=344595
svn path=/vendor-crypto/openssl/1.1.1b/; revision=344596; tag=vendor/openssl/1.1.1b
Diffstat (limited to 'crypto')
100 files changed, 4309 insertions, 2915 deletions
diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl index 4d1dc9c70199d..d87e201147710 100755 --- a/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/aes/asm/aes-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -554,6 +554,7 @@ $code.=<<___; .type _x86_64_AES_encrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_encrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Te4 mov 32-128($inp),$acc2 @@ -587,6 +588,7 @@ $code.=<<___; xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact ___ @@ -1161,6 +1163,7 @@ $code.=<<___; .type _x86_64_AES_decrypt_compact,\@abi-omnipotent .align 16 _x86_64_AES_decrypt_compact: +.cfi_startproc lea 128($sbox),$inp # size optimization mov 0-128($inp),$acc1 # prefetch Td4 mov 32-128($inp),$acc2 @@ -1203,6 +1206,7 @@ $code.=<<___; xor 8($key),$s2 xor 12($key),$s3 .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact ___ @@ -1365,6 +1369,7 @@ AES_set_encrypt_key: .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 _x86_64_AES_set_encrypt_key: +.cfi_startproc mov %esi,%ecx # %ecx=bits mov %rdi,%rsi # %rsi=userKey mov %rdx,%rdi # %rdi=key @@ -1546,6 +1551,7 @@ $code.=<<___; mov \$-1,%rax .Lexit: .byte 0xf3,0xc3 # rep ret +.cfi_endproc .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key ___ @@ -1728,7 +1734,9 @@ AES_cbc_encrypt: cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq -.cfi_push 49 # %rflags +# This could be .cfi_push 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset 8 push %rbx .cfi_push %rbx push %rbp @@ -1751,6 +1759,7 @@ AES_cbc_encrypt: cmp \$0,%r9 cmoveq %r10,$sbox +.cfi_remember_state mov OPENSSL_ia32cap_P(%rip),%r10d cmp \$$speed_limit,%rdx jb .Lcbc_slow_prologue @@ -1986,6 +1995,7 @@ AES_cbc_encrypt: #--------------------------- SLOW ROUTINE ---------------------------# .align 16 .Lcbc_slow_prologue: +.cfi_restore_state # allocate aligned stack frame... lea -88(%rsp),%rbp and \$-64,%rbp @@ -1997,8 +2007,10 @@ AES_cbc_encrypt: sub %r10,%rbp xchg %rsp,%rbp +.cfi_def_cfa_register %rbp #add \$8,%rsp # reserve for return address! mov %rbp,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_slow_body: #mov %rdi,$_inp # save copy of inp #mov %rsi,$_out # save copy of out @@ -2187,7 +2199,9 @@ AES_cbc_encrypt: .cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq -.cfi_pop 49 # %rflags +# This could be .cfi_pop 49, but libunwind fails on registers it does not +# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087. +.cfi_adjust_cfa_offset -8 .Lcbc_epilogue: ret .cfi_endproc diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl index 2a202c53e5f84..b68c14da60ca1 100755 --- a/crypto/aes/asm/aesni-x86_64.pl +++ b/crypto/aes/asm/aesni-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2009-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -274,6 +274,7 @@ $code.=<<___; .type ${PREFIX}_encrypt,\@abi-omnipotent .align 16 ${PREFIX}_encrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -284,12 +285,14 @@ $code.=<<___; movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@abi-omnipotent .align 16 ${PREFIX}_decrypt: +.cfi_startproc movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -300,6 +303,7 @@ $code.=<<___; movups $inout0,($out) # output pxor $inout0,$inout0 ret +.cfi_endproc .size ${PREFIX}_decrypt, .-${PREFIX}_decrypt ___ } @@ -325,6 +329,7 @@ $code.=<<___; .type _aesni_${dir}rypt2,\@abi-omnipotent .align 16 _aesni_${dir}rypt2: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -350,6 +355,7 @@ _aesni_${dir}rypt2: aes${dir}last $rndkey0,$inout0 aes${dir}last $rndkey0,$inout1 ret +.cfi_endproc .size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2 ___ } @@ -361,6 +367,7 @@ $code.=<<___; .type _aesni_${dir}rypt3,\@abi-omnipotent .align 16 _aesni_${dir}rypt3: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -391,6 +398,7 @@ _aesni_${dir}rypt3: aes${dir}last $rndkey0,$inout1 aes${dir}last $rndkey0,$inout2 ret +.cfi_endproc .size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 ___ } @@ -406,6 +414,7 @@ $code.=<<___; .type _aesni_${dir}rypt4,\@abi-omnipotent .align 16 _aesni_${dir}rypt4: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -442,6 +451,7 @@ _aesni_${dir}rypt4: aes${dir}last $rndkey0,$inout2 aes${dir}last $rndkey0,$inout3 ret +.cfi_endproc .size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 ___ } @@ -453,6 +463,7 @@ $code.=<<___; .type _aesni_${dir}rypt6,\@abi-omnipotent .align 16 _aesni_${dir}rypt6: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -503,6 +514,7 @@ _aesni_${dir}rypt6: aes${dir}last $rndkey0,$inout4 aes${dir}last $rndkey0,$inout5 ret +.cfi_endproc .size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 ___ } @@ -514,6 +526,7 @@ $code.=<<___; .type _aesni_${dir}rypt8,\@abi-omnipotent .align 16 _aesni_${dir}rypt8: +.cfi_startproc $movkey ($key),$rndkey0 shl \$4,$rounds $movkey 16($key),$rndkey1 @@ -574,6 +587,7 @@ _aesni_${dir}rypt8: aes${dir}last $rndkey0,$inout6 aes${dir}last $rndkey0,$inout7 ret +.cfi_endproc .size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 ___ } @@ -598,6 +612,7 @@ $code.=<<___; .type aesni_ecb_encrypt,\@function,5 .align 16 aesni_ecb_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -943,6 +958,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size aesni_ecb_encrypt,.-aesni_ecb_encrypt ___ diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl index 8b37cfc452a02..eec0ed230a888 100755 --- a/crypto/aes/asm/aesv8-armx.pl +++ b/crypto/aes/asm/aesv8-armx.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -262,6 +262,7 @@ $code.=<<___; ${prefix}_set_decrypt_key: ___ $code.=<<___ if ($flavour =~ /64/); + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 ___ @@ -305,6 +306,7 @@ $code.=<<___ if ($flavour !~ /64/); ___ $code.=<<___ if ($flavour =~ /64/); ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret ___ $code.=<<___; diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl index 2c79c2b67c897..e62342729e7fa 100755 --- a/crypto/aes/asm/bsaes-x86_64.pl +++ b/crypto/aes/asm/bsaes-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -816,6 +816,7 @@ $code.=<<___; .type _bsaes_encrypt8,\@abi-omnipotent .align 64 _bsaes_encrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -875,11 +876,13 @@ $code.=<<___; pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_encrypt8,.-_bsaes_encrypt8 .type _bsaes_decrypt8,\@abi-omnipotent .align 64 _bsaes_decrypt8: +.cfi_startproc lea .LBS0(%rip), $const # constants table movdqa ($key), @XMM[9] # round 0 key @@ -937,6 +940,7 @@ $code.=<<___; pxor @XMM[8], @XMM[0] pxor @XMM[8], @XMM[1] ret +.cfi_endproc .size _bsaes_decrypt8,.-_bsaes_decrypt8 ___ } @@ -971,6 +975,7 @@ $code.=<<___; .type _bsaes_key_convert,\@abi-omnipotent .align 16 _bsaes_key_convert: +.cfi_startproc lea .Lmasks(%rip), $const movdqu ($inp), %xmm7 # load round 0 key lea 0x10($inp), $inp @@ -1049,6 +1054,7 @@ _bsaes_key_convert: movdqa 0x50($const), %xmm7 # .L63 #movdqa %xmm6, ($out) # don't save last round key ret +.cfi_endproc .size _bsaes_key_convert,.-_bsaes_key_convert ___ } diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl index 5131e13a09a29..e38288af5558a 100755 --- a/crypto/aes/asm/vpaes-armv8.pl +++ b/crypto/aes/asm/vpaes-armv8.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -255,6 +255,7 @@ _vpaes_encrypt_core: .type vpaes_encrypt,%function .align 4 vpaes_encrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -264,6 +265,7 @@ vpaes_encrypt: st1 {v0.16b}, [$out] ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_encrypt,.-vpaes_encrypt @@ -486,6 +488,7 @@ _vpaes_decrypt_core: .type vpaes_decrypt,%function .align 4 vpaes_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -495,6 +498,7 @@ vpaes_decrypt: st1 {v0.16b}, [$out] ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_decrypt,.-vpaes_decrypt @@ -665,6 +669,7 @@ _vpaes_key_preheat: .type _vpaes_schedule_core,%function .align 4 _vpaes_schedule_core: + .inst 0xd503233f // paciasp stp x29, x30, [sp,#-16]! add x29,sp,#0 @@ -829,6 +834,7 @@ _vpaes_schedule_core: eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 ldp x29, x30, [sp],#16 + .inst 0xd50323bf // autiasp ret .size _vpaes_schedule_core,.-_vpaes_schedule_core @@ -1041,6 +1047,7 @@ _vpaes_schedule_mangle: .type vpaes_set_encrypt_key,%function .align 4 vpaes_set_encrypt_key: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1056,6 +1063,7 @@ vpaes_set_encrypt_key: ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key @@ -1063,6 +1071,7 @@ vpaes_set_encrypt_key: .type vpaes_set_decrypt_key,%function .align 4 vpaes_set_decrypt_key: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1082,6 +1091,7 @@ vpaes_set_decrypt_key: ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key ___ @@ -1098,6 +1108,7 @@ vpaes_cbc_encrypt: cmp w5, #0 // check direction b.eq vpaes_cbc_decrypt + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1120,6 +1131,7 @@ vpaes_cbc_encrypt: st1 {v0.16b}, [$ivec] // write ivec ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp .Lcbc_abort: ret .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt @@ -1127,6 +1139,7 @@ vpaes_cbc_encrypt: .type vpaes_cbc_decrypt,%function .align 4 vpaes_cbc_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1168,6 +1181,7 @@ vpaes_cbc_decrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt ___ @@ -1177,6 +1191,7 @@ $code.=<<___; .type vpaes_ecb_encrypt,%function .align 4 vpaes_ecb_encrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1210,6 +1225,7 @@ vpaes_ecb_encrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_ecb_encrypt,.-vpaes_ecb_encrypt @@ -1217,6 +1233,7 @@ vpaes_ecb_encrypt: .type vpaes_ecb_decrypt,%function .align 4 vpaes_ecb_decrypt: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 stp d8,d9,[sp,#-16]! // ABI spec says so @@ -1250,6 +1267,7 @@ vpaes_ecb_decrypt: ldp d10,d11,[sp],#16 ldp d8,d9,[sp],#16 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt ___ diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index b715aca167d17..33d293e623d5b 100755 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -91,6 +91,7 @@ $code.=<<___; .type _vpaes_encrypt_core,\@abi-omnipotent .align 16 _vpaes_encrypt_core: +.cfi_startproc mov %rdx, %r9 mov \$16, %r11 mov 240(%rdx),%eax @@ -171,6 +172,7 @@ _vpaes_encrypt_core: pxor %xmm4, %xmm0 # 0 = A pshufb %xmm1, %xmm0 ret +.cfi_endproc .size _vpaes_encrypt_core,.-_vpaes_encrypt_core ## @@ -181,6 +183,7 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,\@abi-omnipotent .align 16 _vpaes_decrypt_core: +.cfi_startproc mov %rdx, %r9 # load key mov 240(%rdx),%eax movdqa %xmm9, %xmm1 @@ -277,6 +280,7 @@ _vpaes_decrypt_core: pxor %xmm4, %xmm0 # 0 = A pshufb %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_decrypt_core,.-_vpaes_decrypt_core ######################################################## @@ -287,6 +291,7 @@ _vpaes_decrypt_core: .type _vpaes_schedule_core,\@abi-omnipotent .align 16 _vpaes_schedule_core: +.cfi_startproc # rdi = key # rsi = size in bits # rdx = buffer @@ -453,6 +458,7 @@ _vpaes_schedule_core: pxor %xmm6, %xmm6 pxor %xmm7, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_core,.-_vpaes_schedule_core ## @@ -472,6 +478,7 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,\@abi-omnipotent .align 16 _vpaes_schedule_192_smear: +.cfi_startproc pshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a pxor %xmm1, %xmm6 # -> c+d c 0 0 @@ -480,6 +487,7 @@ _vpaes_schedule_192_smear: movdqa %xmm6, %xmm0 movhlps %xmm1, %xmm6 # clobber low side with zeros ret +.cfi_endproc .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear ## @@ -503,6 +511,7 @@ _vpaes_schedule_192_smear: .type _vpaes_schedule_round,\@abi-omnipotent .align 16 _vpaes_schedule_round: +.cfi_startproc # extract rcon from xmm8 pxor %xmm1, %xmm1 palignr \$15, %xmm8, %xmm1 @@ -556,6 +565,7 @@ _vpaes_schedule_low_round: pxor %xmm7, %xmm0 movdqa %xmm0, %xmm7 ret +.cfi_endproc .size _vpaes_schedule_round,.-_vpaes_schedule_round ## @@ -570,6 +580,7 @@ _vpaes_schedule_low_round: .type _vpaes_schedule_transform,\@abi-omnipotent .align 16 _vpaes_schedule_transform: +.cfi_startproc movdqa %xmm9, %xmm1 pandn %xmm0, %xmm1 psrld \$4, %xmm1 @@ -580,6 +591,7 @@ _vpaes_schedule_transform: pshufb %xmm1, %xmm0 pxor %xmm2, %xmm0 ret +.cfi_endproc .size _vpaes_schedule_transform,.-_vpaes_schedule_transform ## @@ -608,6 +620,7 @@ _vpaes_schedule_transform: .type _vpaes_schedule_mangle,\@abi-omnipotent .align 16 _vpaes_schedule_mangle: +.cfi_startproc movdqa %xmm0, %xmm4 # save xmm0 for later movdqa .Lk_mc_forward(%rip),%xmm5 test %rcx, %rcx @@ -672,6 +685,7 @@ _vpaes_schedule_mangle: and \$0x30, %r8 movdqu %xmm3, (%rdx) ret +.cfi_endproc .size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle # @@ -681,6 +695,7 @@ _vpaes_schedule_mangle: .type ${PREFIX}_set_encrypt_key,\@function,3 .align 16 ${PREFIX}_set_encrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -723,12 +738,14 @@ ___ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key .globl ${PREFIX}_set_decrypt_key .type ${PREFIX}_set_decrypt_key,\@function,3 .align 16 ${PREFIX}_set_decrypt_key: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -776,12 +793,14 @@ ___ $code.=<<___; xor %eax,%eax ret +.cfi_endproc .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key .globl ${PREFIX}_encrypt .type ${PREFIX}_encrypt,\@function,3 .align 16 ${PREFIX}_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -819,12 +838,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt .globl ${PREFIX}_decrypt .type ${PREFIX}_decrypt,\@function,3 .align 16 ${PREFIX}_decrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -862,6 +883,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size ${PREFIX}_decrypt,.-${PREFIX}_decrypt ___ { @@ -874,6 +896,7 @@ $code.=<<___; .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: +.cfi_startproc xchg $key,$len ___ ($len,$key)=($key,$len); @@ -944,6 +967,7 @@ ___ $code.=<<___; .Lcbc_abort: ret +.cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } @@ -957,6 +981,7 @@ $code.=<<___; .type _vpaes_preheat,\@abi-omnipotent .align 16 _vpaes_preheat: +.cfi_startproc lea .Lk_s0F(%rip), %r10 movdqa -0x20(%r10), %xmm10 # .Lk_inv movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 @@ -966,6 +991,7 @@ _vpaes_preheat: movdqa 0x50(%r10), %xmm15 # .Lk_sb2 movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 ret +.cfi_endproc .size _vpaes_preheat,.-_vpaes_preheat ######################################################## ## ## diff --git a/crypto/armcap.c b/crypto/armcap.c index 1b3c2722d1e16..58e54f0da2e17 100644 --- a/crypto/armcap.c +++ b/crypto/armcap.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -62,14 +62,12 @@ uint32_t OPENSSL_rdtsc(void) # if defined(__GNUC__) && __GNUC__>=2 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor)); # endif -/* - * Use a weak reference to getauxval() so we can use it if it is available but - * don't break the build if it is not. - */ -# if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__) -extern unsigned long getauxval(unsigned long type) __attribute__ ((weak)); -# else -static unsigned long (*getauxval) (unsigned long) = NULL; + +# if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +# if __GLIBC_PREREQ(2, 16) +# include <sys/auxv.h> +# define OSSL_IMPLEMENT_GETAUXVAL +# endif # endif /* @@ -134,6 +132,33 @@ void OPENSSL_cpuid_setup(void) */ # endif + OPENSSL_armcap_P = 0; + +# ifdef OSSL_IMPLEMENT_GETAUXVAL + if (getauxval(HWCAP) & HWCAP_NEON) { + unsigned long hwcap = getauxval(HWCAP_CE); + + OPENSSL_armcap_P |= ARMV7_NEON; + + if (hwcap & HWCAP_CE_AES) + OPENSSL_armcap_P |= ARMV8_AES; + + if (hwcap & HWCAP_CE_PMULL) + OPENSSL_armcap_P |= ARMV8_PMULL; + + if (hwcap & HWCAP_CE_SHA1) + OPENSSL_armcap_P |= ARMV8_SHA1; + + if (hwcap & HWCAP_CE_SHA256) + OPENSSL_armcap_P |= ARMV8_SHA256; + +# ifdef __aarch64__ + if (hwcap & HWCAP_CE_SHA512) + OPENSSL_armcap_P |= ARMV8_SHA512; +# endif + } +# endif + sigfillset(&all_masked); sigdelset(&all_masked, SIGILL); sigdelset(&all_masked, SIGTRAP); @@ -141,8 +166,6 @@ void OPENSSL_cpuid_setup(void) sigdelset(&all_masked, SIGBUS); sigdelset(&all_masked, SIGSEGV); - OPENSSL_armcap_P = 0; - memset(&ill_act, 0, sizeof(ill_act)); ill_act.sa_handler = ill_handler; ill_act.sa_mask = all_masked; @@ -150,30 +173,9 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset); sigaction(SIGILL, &ill_act, &ill_oact); - if (getauxval != NULL) { - if (getauxval(HWCAP) & HWCAP_NEON) { - unsigned long hwcap = getauxval(HWCAP_CE); - - OPENSSL_armcap_P |= ARMV7_NEON; - - if (hwcap & HWCAP_CE_AES) - OPENSSL_armcap_P |= ARMV8_AES; - - if (hwcap & HWCAP_CE_PMULL) - OPENSSL_armcap_P |= ARMV8_PMULL; - - if (hwcap & HWCAP_CE_SHA1) - OPENSSL_armcap_P |= ARMV8_SHA1; - - if (hwcap & HWCAP_CE_SHA256) - OPENSSL_armcap_P |= ARMV8_SHA256; - -# ifdef __aarch64__ - if (hwcap & HWCAP_CE_SHA512) - OPENSSL_armcap_P |= ARMV8_SHA512; -# endif - } - } else if (sigsetjmp(ill_jmp, 1) == 0) { + /* If we used getauxval, we already have all the values */ +# ifndef OSSL_IMPLEMENT_GETAUXVAL + if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; if (sigsetjmp(ill_jmp, 1) == 0) { @@ -191,13 +193,16 @@ void OPENSSL_cpuid_setup(void) _armv8_sha256_probe(); OPENSSL_armcap_P |= ARMV8_SHA256; } -# if defined(__aarch64__) && !defined(__APPLE__) +# if defined(__aarch64__) && !defined(__APPLE__) if (sigsetjmp(ill_jmp, 1) == 0) { _armv8_sha512_probe(); OPENSSL_armcap_P |= ARMV8_SHA512; } -# endif +# endif } +# endif + + /* Things that getauxval didn't tell us */ if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_tick(); OPENSSL_armcap_P |= ARMV7_TICK; diff --git a/crypto/asn1/a_digest.c b/crypto/asn1/a_digest.c index f4cc1f2e0eaa5..cc3532ea7df2f 100644 --- a/crypto/asn1/a_digest.c +++ b/crypto/asn1/a_digest.c @@ -23,18 +23,22 @@ int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, unsigned char *md, unsigned int *len) { - int i; + int inl; unsigned char *str, *p; - i = i2d(data, NULL); - if ((str = OPENSSL_malloc(i)) == NULL) { + inl = i2d(data, NULL); + if (inl <= 0) { + ASN1err(ASN1_F_ASN1_DIGEST, ERR_R_INTERNAL_ERROR); + return 0; + } + if ((str = OPENSSL_malloc(inl)) == NULL) { ASN1err(ASN1_F_ASN1_DIGEST, ERR_R_MALLOC_FAILURE); return 0; } p = str; i2d(data, &p); - if (!EVP_Digest(str, i, md, len, type, NULL)) { + if (!EVP_Digest(str, inl, md, len, type, NULL)) { OPENSSL_free(str); return 0; } diff --git a/crypto/asn1/a_sign.c b/crypto/asn1/a_sign.c index 130e23eaaa1e6..146fdb9626281 100644 --- a/crypto/asn1/a_sign.c +++ b/crypto/asn1/a_sign.c @@ -29,7 +29,8 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, { EVP_MD_CTX *ctx = EVP_MD_CTX_new(); unsigned char *p, *buf_in = NULL, *buf_out = NULL; - int i, inl = 0, outl = 0, outll = 0; + int i, inl = 0, outl = 0; + size_t inll = 0, outll = 0; X509_ALGOR *a; if (ctx == NULL) { @@ -70,10 +71,15 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, } } inl = i2d(data, NULL); - buf_in = OPENSSL_malloc((unsigned int)inl); + if (inl <= 0) { + ASN1err(ASN1_F_ASN1_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } + inll = (size_t)inl; + buf_in = OPENSSL_malloc(inll); outll = outl = EVP_PKEY_size(pkey); - buf_out = OPENSSL_malloc((unsigned int)outl); - if ((buf_in == NULL) || (buf_out == NULL)) { + buf_out = OPENSSL_malloc(outll); + if (buf_in == NULL || buf_out == NULL) { outl = 0; ASN1err(ASN1_F_ASN1_SIGN, ERR_R_MALLOC_FAILURE); goto err; @@ -101,7 +107,7 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, signature->flags |= ASN1_STRING_FLAG_BITS_LEFT; err: EVP_MD_CTX_free(ctx); - OPENSSL_clear_free((char *)buf_in, (unsigned int)inl); + OPENSSL_clear_free((char *)buf_in, inll); OPENSSL_clear_free((char *)buf_out, outll); return outl; } @@ -138,7 +144,7 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it, EVP_PKEY *pkey; unsigned char *buf_in = NULL, *buf_out = NULL; size_t inl = 0, outl = 0, outll = 0; - int signid, paramtype; + int signid, paramtype, buf_len = 0; int rv; type = EVP_MD_CTX_md(ctx); @@ -198,10 +204,16 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it, } - inl = ASN1_item_i2d(asn, &buf_in, it); + buf_len = ASN1_item_i2d(asn, &buf_in, it); + if (buf_len <= 0) { + outl = 0; + ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ERR_R_INTERNAL_ERROR); + goto err; + } + inl = buf_len; outll = outl = EVP_PKEY_size(pkey); - buf_out = OPENSSL_malloc((unsigned int)outl); - if ((buf_in == NULL) || (buf_out == NULL)) { + buf_out = OPENSSL_malloc(outll); + if (buf_in == NULL || buf_out == NULL) { outl = 0; ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ERR_R_MALLOC_FAILURE); goto err; @@ -223,7 +235,7 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it, signature->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07); signature->flags |= ASN1_STRING_FLAG_BITS_LEFT; err: - OPENSSL_clear_free((char *)buf_in, (unsigned int)inl); + OPENSSL_clear_free((char *)buf_in, inl); OPENSSL_clear_free((char *)buf_out, outll); return outl; } diff --git a/crypto/asn1/a_verify.c b/crypto/asn1/a_verify.c index 973d50d24de90..cdaf17c3cbc1b 100644 --- a/crypto/asn1/a_verify.c +++ b/crypto/asn1/a_verify.c @@ -48,6 +48,10 @@ int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *a, ASN1_BIT_STRING *signature, } inl = i2d(data, NULL); + if (inl <= 0) { + ASN1err(ASN1_F_ASN1_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } buf_in = OPENSSL_malloc((unsigned int)inl); if (buf_in == NULL) { ASN1err(ASN1_F_ASN1_VERIFY, ERR_R_MALLOC_FAILURE); @@ -87,8 +91,8 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, EVP_MD_CTX *ctx = NULL; unsigned char *buf_in = NULL; int ret = -1, inl = 0; - int mdnid, pknid; + size_t inll = 0; if (!pkey) { ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_PASSED_NULL_PARAMETER); @@ -127,8 +131,8 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, goto err; ret = -1; } else { - const EVP_MD *type; - type = EVP_get_digestbynid(mdnid); + const EVP_MD *type = EVP_get_digestbynid(mdnid); + if (type == NULL) { ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM); @@ -150,11 +154,15 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, } inl = ASN1_item_i2d(asn, &buf_in, it); - + if (inl <= 0) { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } if (buf_in == NULL) { ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_MALLOC_FAILURE); goto err; } + inll = inl; ret = EVP_DigestVerify(ctx, signature->data, (size_t)signature->length, buf_in, inl); @@ -164,7 +172,7 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, } ret = 1; err: - OPENSSL_clear_free(buf_in, (unsigned int)inl); + OPENSSL_clear_free(buf_in, inll); EVP_MD_CTX_free(ctx); return ret; } diff --git a/crypto/asn1/ameth_lib.c b/crypto/asn1/ameth_lib.c index 9a1644148af5d..d7d270dbb5819 100644 --- a/crypto/asn1/ameth_lib.c +++ b/crypto/asn1/ameth_lib.c @@ -140,6 +140,22 @@ int EVP_PKEY_asn1_add0(const EVP_PKEY_ASN1_METHOD *ameth) { EVP_PKEY_ASN1_METHOD tmp = { 0, }; + /* + * One of the following must be true: + * + * pem_str == NULL AND ASN1_PKEY_ALIAS is set + * pem_str != NULL AND ASN1_PKEY_ALIAS is clear + * + * Anything else is an error and may lead to a corrupt ASN1 method table + */ + if (!((ameth->pem_str == NULL + && (ameth->pkey_flags & ASN1_PKEY_ALIAS) != 0) + || (ameth->pem_str != NULL + && (ameth->pkey_flags & ASN1_PKEY_ALIAS) == 0))) { + EVPerr(EVP_F_EVP_PKEY_ASN1_ADD0, ERR_R_PASSED_INVALID_ARGUMENT); + return 0; + } + if (app_methods == NULL) { app_methods = sk_EVP_PKEY_ASN1_METHOD_new(ameth_cmp); if (app_methods == NULL) @@ -216,18 +232,6 @@ EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_new(int id, int flags, goto err; } - /* - * One of the following must be true: - * - * pem_str == NULL AND ASN1_PKEY_ALIAS is set - * pem_str != NULL AND ASN1_PKEY_ALIAS is clear - * - * Anything else is an error and may lead to a corrupt ASN1 method table - */ - if (!((pem_str == NULL && (flags & ASN1_PKEY_ALIAS) != 0) - || (pem_str != NULL && (flags & ASN1_PKEY_ALIAS) == 0))) - goto err; - if (pem_str) { ameth->pem_str = OPENSSL_strdup(pem_str); if (!ameth->pem_str) diff --git a/crypto/asn1/charmap.h b/crypto/asn1/charmap.h index bfccac2cb4e31..f15d72d73661d 100644 --- a/crypto/asn1/charmap.h +++ b/crypto/asn1/charmap.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by crypto/asn1/charmap.pl * - * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy diff --git a/crypto/asn1/charmap.pl b/crypto/asn1/charmap.pl index fbab1f3b0ad7e..dadd8df7749df 100644 --- a/crypto/asn1/charmap.pl +++ b/crypto/asn1/charmap.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/asn1/d2i_pu.c b/crypto/asn1/d2i_pu.c index 9452e08a5874c..7bc16c7bceb40 100644 --- a/crypto/asn1/d2i_pu.c +++ b/crypto/asn1/d2i_pu.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -32,7 +32,7 @@ EVP_PKEY *d2i_PublicKey(int type, EVP_PKEY **a, const unsigned char **pp, } else ret = *a; - if (!EVP_PKEY_set_type(ret, type)) { + if (type != EVP_PKEY_id(ret) && !EVP_PKEY_set_type(ret, type)) { ASN1err(ASN1_F_D2I_PUBLICKEY, ERR_R_EVP_LIB); goto err; } diff --git a/crypto/bio/b_addr.c b/crypto/bio/b_addr.c index abec7bb8dbba4..4395ab7a0683e 100644 --- a/crypto/bio/b_addr.c +++ b/crypto/bio/b_addr.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -782,7 +782,12 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type, * anyway [above getaddrinfo/gai_strerror is]. We just let * system administrator figure this out... */ +# if defined(OPENSSL_SYS_VXWORKS) + /* h_errno doesn't exist on VxWorks */ + SYSerr(SYS_F_GETHOSTBYNAME, 1000 ); +# else SYSerr(SYS_F_GETHOSTBYNAME, 1000 + h_errno); +# endif #else SYSerr(SYS_F_GETHOSTBYNAME, WSAGetLastError()); #endif diff --git a/crypto/bio/bss_file.c b/crypto/bio/bss_file.c index 8de2391267afe..057344783d61b 100644 --- a/crypto/bio/bss_file.c +++ b/crypto/bio/bss_file.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -253,9 +253,7 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr) } # elif defined(OPENSSL_SYS_WIN32_CYGWIN) int fd = fileno((FILE *)ptr); - if (num & BIO_FP_TEXT) - setmode(fd, O_TEXT); - else + if (!(num & BIO_FP_TEXT)) setmode(fd, O_BINARY); # endif } @@ -279,11 +277,14 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; break; } -# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_WIN32_CYGWIN) +# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) if (!(num & BIO_FP_TEXT)) OPENSSL_strlcat(p, "b", sizeof(p)); else OPENSSL_strlcat(p, "t", sizeof(p)); +# elif defined(OPENSSL_SYS_WIN32_CYGWIN) + if (!(num & BIO_FP_TEXT)) + OPENSSL_strlcat(p, "b", sizeof(p)); # endif fp = openssl_fopen(ptr, p); if (fp == NULL) { diff --git a/crypto/bio/bss_mem.c b/crypto/bio/bss_mem.c index e0a97c3b43e18..10fcbf7a7c4f3 100644 --- a/crypto/bio/bss_mem.c +++ b/crypto/bio/bss_mem.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -20,7 +20,7 @@ static long mem_ctrl(BIO *h, int cmd, long arg1, void *arg2); static int mem_new(BIO *h); static int secmem_new(BIO *h); static int mem_free(BIO *data); -static int mem_buf_free(BIO *data, int free_all); +static int mem_buf_free(BIO *data); static int mem_buf_sync(BIO *h); static const BIO_METHOD mem_method = { @@ -140,10 +140,20 @@ static int secmem_new(BIO *bi) static int mem_free(BIO *a) { - return mem_buf_free(a, 1); + BIO_BUF_MEM *bb; + + if (a == NULL) + return 0; + + bb = (BIO_BUF_MEM *)a->ptr; + if (!mem_buf_free(a)) + return 0; + OPENSSL_free(bb->readp); + OPENSSL_free(bb); + return 1; } -static int mem_buf_free(BIO *a, int free_all) +static int mem_buf_free(BIO *a) { if (a == NULL) return 0; @@ -155,11 +165,6 @@ static int mem_buf_free(BIO *a, int free_all) if (a->flags & BIO_FLAGS_MEM_RDONLY) b->data = NULL; BUF_MEM_free(b); - if (free_all) { - OPENSSL_free(bb->readp); - OPENSSL_free(bb); - } - a->ptr = NULL; } return 1; } @@ -266,11 +271,10 @@ static long mem_ctrl(BIO *b, int cmd, long num, void *ptr) } break; case BIO_C_SET_BUF_MEM: - mem_buf_free(b, 0); + mem_buf_free(b); b->shutdown = (int)num; bbm->buf = ptr; *bbm->readp = *bbm->buf; - b->ptr = bbm; break; case BIO_C_GET_BUF_MEM_PTR: if (ptr != NULL) { diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl index 5d5af1b6be259..d8347bf932492 100755 --- a/crypto/bn/asm/armv8-mont.pl +++ b/crypto/bn/asm/armv8-mont.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -287,6 +287,7 @@ __bn_sqr8x_mont: cmp $ap,$bp b.ne __bn_mul4x_mont .Lsqr8x_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-128]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -1040,6 +1041,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldr x29,[sp],#128 + .inst 0xd50323bf // autiasp ret .size __bn_sqr8x_mont,.-__bn_sqr8x_mont ___ @@ -1063,6 +1065,7 @@ $code.=<<___; .type __bn_mul4x_mont,%function .align 5 __bn_mul4x_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-128]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -1496,6 +1499,7 @@ __bn_mul4x_mont: ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldr x29,[sp],#128 + .inst 0xd50323bf // autiasp ret .size __bn_mul4x_mont,.-__bn_mul4x_mont ___ diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S index d235c45e2d632..0a26735c69792 100644 --- a/crypto/bn/asm/ia64.S +++ b/crypto/bn/asm/ia64.S @@ -3,7 +3,7 @@ .ident "ia64.S, Version 2.1" .ident "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>" -// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -48,7 +48,7 @@ // on Itanium2! What to do? Reschedule loops for Itanium2? But then // Itanium would exhibit anti-scalability. So I've chosen to reschedule // for worst latency for every instruction aiming for best *all-round* -// performance. +// performance. // Q. How much faster does it get? // A. Here is the output from 'openssl speed rsa dsa' for vanilla @@ -472,7 +472,7 @@ bn_mul_add_words: .global bn_sqr_words# .proc bn_sqr_words# .align 64 -.skip 32 // makes the loop body aligned at 64-byte boundary +.skip 32 // makes the loop body aligned at 64-byte boundary bn_sqr_words: .prologue .save ar.pfs,r2 diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index da35ec1b30cea..3875132bd25d4 100755 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -798,6 +798,11 @@ $code.=<<___; move $a0,$v0 .end bn_sub_words_internal +#if 0 +/* + * The bn_div_3_words entry point is re-used for constant-time interface. + * Implementation is retained as hystorical reference. + */ .align 5 .globl bn_div_3_words .ent bn_div_3_words @@ -877,6 +882,7 @@ $code.=<<___; jr $ra move $a0,$v0 .end bn_div_3_words_internal +#endif .align 5 .globl bn_div_words diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index f1292cc75cfb5..85cd73c668bdd 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2013-2019 The OpenSSL Project Authors. All Rights Reserved. # Copyright (c) 2012, Intel Corporation. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use @@ -1492,6 +1492,7 @@ $code.=<<___; .type rsaz_1024_red2norm_avx2,\@abi-omnipotent .align 32 rsaz_1024_red2norm_avx2: +.cfi_startproc sub \$-128,$inp # size optimization xor %rax,%rax ___ @@ -1525,12 +1526,14 @@ ___ } $code.=<<___; ret +.cfi_endproc .size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 .globl rsaz_1024_norm2red_avx2 .type rsaz_1024_norm2red_avx2,\@abi-omnipotent .align 32 rsaz_1024_norm2red_avx2: +.cfi_startproc sub \$-128,$out # size optimization mov ($inp),@T[0] mov \$0x1fffffff,%eax @@ -1562,6 +1565,7 @@ $code.=<<___; mov @T[0],`8*($j+2)-128`($out) mov @T[0],`8*($j+3)-128`($out) ret +.cfi_endproc .size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 ___ } @@ -1573,6 +1577,7 @@ $code.=<<___; .type rsaz_1024_scatter5_avx2,\@abi-omnipotent .align 32 rsaz_1024_scatter5_avx2: +.cfi_startproc vzeroupper vmovdqu .Lscatter_permd(%rip),%ymm5 shl \$4,$power @@ -1592,6 +1597,7 @@ rsaz_1024_scatter5_avx2: vzeroupper ret +.cfi_endproc .size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 .globl rsaz_1024_gather5_avx2 diff --git a/crypto/bn/asm/sparcv8plus.S b/crypto/bn/asm/sparcv8plus.S index fe4699b2bdd14..d520ffa7c2483 100644 --- a/crypto/bn/asm/sparcv8plus.S +++ b/crypto/bn/asm/sparcv8plus.S @@ -3,7 +3,7 @@ /* * ==================================================================== - * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -154,9 +154,9 @@ .register %g2,#scratch .register %g3,#scratch # define FRAME_SIZE -192 -#else +#else # define FRAME_SIZE -96 -#endif +#endif /* * GNU assembler can't stand stuw:-( */ diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index ad6e8ada3ce75..f43e13d11643a 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -2910,6 +2910,7 @@ bn_powerx5: .align 32 bn_sqrx8x_internal: __bn_sqrx8x_internal: +.cfi_startproc ################################################################## # Squaring part: # @@ -3542,6 +3543,7 @@ __bn_sqrx8x_reduction: cmp 8+8(%rsp),%r8 # end of t[]? jb .Lsqrx8x_reduction_loop ret +.cfi_endproc .size bn_sqrx8x_internal,.-bn_sqrx8x_internal ___ } diff --git a/crypto/bn/bn_ctx.c b/crypto/bn/bn_ctx.c index aa08b31a34bb9..54b799961aa43 100644 --- a/crypto/bn/bn_ctx.c +++ b/crypto/bn/bn_ctx.c @@ -1,5 +1,5 @@ /* - * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -227,6 +227,8 @@ BIGNUM *BN_CTX_get(BN_CTX *ctx) } /* OK, make sure the returned bignum is "zero" */ BN_zero(ret); + /* clear BN_FLG_CONSTTIME if leaked from previous frames */ + ret->flags &= (~BN_FLG_CONSTTIME); ctx->used++; CTXDBG_RET(ctx, ret); return ret; @@ -256,7 +258,7 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx) unsigned int newsize = st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES; unsigned int *newitems; - + if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) { BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE); return 0; @@ -310,7 +312,7 @@ static BIGNUM *BN_POOL_get(BN_POOL *p, int flag) /* Full; allocate a new pool item and link it in. */ if (p->used == p->size) { BN_POOL_ITEM *item; - + if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) { BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE); return NULL; diff --git a/crypto/bn/bn_depr.c b/crypto/bn/bn_depr.c index 7d89214b1c16a..58bcf197a490e 100644 --- a/crypto/bn/bn_depr.c +++ b/crypto/bn/bn_depr.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -40,7 +40,7 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, goto err; /* we have a prime :-) */ - return ret; + return rnd; err: BN_free(rnd); return NULL; diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c index 70add10c7d6ce..3a6fa0a1b194b 100644 --- a/crypto/bn/bn_div.c +++ b/crypto/bn/bn_div.c @@ -7,6 +7,7 @@ * https://www.openssl.org/source/license.html */ +#include <assert.h> #include <openssl/bn.h> #include "internal/cryptlib.h" #include "bn_lcl.h" @@ -86,6 +87,77 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, #else +# if defined(BN_DIV3W) +BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0); +# elif 0 +/* + * This is #if-ed away, because it's a reference for assembly implementations, + * where it can and should be made constant-time. But if you want to test it, + * just replace 0 with 1. + */ +# if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 +# undef BN_ULLONG +# define BN_ULLONG __uint128_t +# define BN_LLONG +# endif + +# ifdef BN_LLONG +# define BN_DIV3W +/* + * Interface is somewhat quirky, |m| is pointer to most significant limb, + * and less significant limb is referred at |m[-1]|. This means that caller + * is responsible for ensuring that |m[-1]| is valid. Second condition that + * has to be met is that |d0|'s most significant bit has to be set. Or in + * other words divisor has to be "bit-aligned to the left." bn_div_fixed_top + * does all this. The subroutine considers four limbs, two of which are + * "overlapping," hence the name... + */ +static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0) +{ + BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1]; + BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1; + BN_ULONG Q = 0, mask; + int i; + + for (i = 0; i < BN_BITS2; i++) { + Q <<= 1; + if (R >= D) { + Q |= 1; + R -= D; + } + D >>= 1; + } + + mask = 0 - (Q >> (BN_BITS2 - 1)); /* does it overflow? */ + + Q <<= 1; + Q |= (R >= D); + + return (Q | mask) & BN_MASK2; +} +# endif +# endif + +static int bn_left_align(BIGNUM *num) +{ + BN_ULONG *d = num->d, n, m, rmask; + int top = num->top; + int rshift = BN_num_bits_word(d[top - 1]), lshift, i; + + lshift = BN_BITS2 - rshift; + rshift %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */ + rmask |= rmask >> 8; + + for (i = 0, m = 0; i < top; i++) { + n = d[i]; + d[i] = ((n << lshift) | m) & BN_MASK2; + m = (n >> rshift) & rmask; + } + + return lshift; +} + # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \ && !defined(PEDANTIC) && !defined(BN_DIV3W) # if defined(__GNUC__) && __GNUC__>=2 @@ -137,55 +209,73 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, BN_CTX *ctx) { - int norm_shift, i, loop; - BIGNUM *tmp, wnum, *snum, *sdiv, *res; - BN_ULONG *resp, *wnump; - BN_ULONG d0, d1; - int num_n, div_n; - int no_branch = 0; + int ret; + + if (BN_is_zero(divisor)) { + BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); + return 0; + } /* * Invalid zero-padding would have particularly bad consequences so don't * just rely on bn_check_top() here (bn_check_top() works only for * BN_DEBUG builds) */ - if ((num->top > 0 && num->d[num->top - 1] == 0) || - (divisor->top > 0 && divisor->d[divisor->top - 1] == 0)) { + if (divisor->d[divisor->top - 1] == 0) { BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED); return 0; } - bn_check_top(num); - bn_check_top(divisor); + ret = bn_div_fixed_top(dv, rm, num, divisor, ctx); - if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) - || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) { - no_branch = 1; + if (ret) { + if (dv != NULL) + bn_correct_top(dv); + if (rm != NULL) + bn_correct_top(rm); } - bn_check_top(dv); - bn_check_top(rm); - /*- bn_check_top(num); *//* - * 'num' has been checked already - */ - /*- bn_check_top(divisor); *//* - * 'divisor' has been checked already - */ + return ret; +} - if (BN_is_zero(divisor)) { - BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return 0; - } +/* + * It's argued that *length* of *significant* part of divisor is public. + * Even if it's private modulus that is. Again, *length* is assumed + * public, but not *value*. Former is likely to be pre-defined by + * algorithm with bit granularity, though below subroutine is invariant + * of limb length. Thanks to this assumption we can require that |divisor| + * may not be zero-padded, yet claim this subroutine "constant-time"(*). + * This is because zero-padded dividend, |num|, is tolerated, so that + * caller can pass dividend of public length(*), but with smaller amount + * of significant limbs. This naturally means that quotient, |dv|, would + * contain correspongly less significant limbs as well, and will be zero- + * padded accordingly. Returned remainder, |rm|, will have same bit length + * as divisor, also zero-padded if needed. These actually leave sign bits + * in ambiguous state. In sense that we try to avoid negative zeros, while + * zero-padded zeros would retain sign. + * + * (*) "Constant-time-ness" has two pre-conditions: + * + * - availability of constant-time bn_div_3_words; + * - dividend is at least as "wide" as divisor, limb-wise, zero-padded + * if so requied, which shouldn't be a privacy problem, because + * divisor's length is considered public; + */ +int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, + const BIGNUM *divisor, BN_CTX *ctx) +{ + int norm_shift, i, j, loop; + BIGNUM *tmp, *snum, *sdiv, *res; + BN_ULONG *resp, *wnum, *wnumtop; + BN_ULONG d0, d1; + int num_n, div_n; - if (!no_branch && BN_ucmp(num, divisor) < 0) { - if (rm != NULL) { - if (BN_copy(rm, num) == NULL) - return 0; - } - if (dv != NULL) - BN_zero(dv); - return 1; - } + assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0); + + bn_check_top(num); + bn_check_top(divisor); + bn_check_top(dv); + bn_check_top(rm); BN_CTX_start(ctx); res = (dv == NULL) ? BN_CTX_get(ctx) : dv; @@ -196,113 +286,72 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, goto err; /* First we normalise the numbers */ - norm_shift = BN_BITS2 - ((BN_num_bits(divisor)) % BN_BITS2); - if (!(BN_lshift(sdiv, divisor, norm_shift))) + if (!BN_copy(sdiv, divisor)) goto err; + norm_shift = bn_left_align(sdiv); sdiv->neg = 0; - norm_shift += BN_BITS2; - if (!(BN_lshift(snum, num, norm_shift))) + /* + * Note that bn_lshift_fixed_top's output is always one limb longer + * than input, even when norm_shift is zero. This means that amount of + * inner loop iterations is invariant of dividend value, and that one + * doesn't need to compare dividend and divisor if they were originally + * of the same bit length. + */ + if (!(bn_lshift_fixed_top(snum, num, norm_shift))) goto err; - snum->neg = 0; - - if (no_branch) { - /* - * Since we don't know whether snum is larger than sdiv, we pad snum - * with enough zeroes without changing its value. - */ - if (snum->top <= sdiv->top + 1) { - if (bn_wexpand(snum, sdiv->top + 2) == NULL) - goto err; - for (i = snum->top; i < sdiv->top + 2; i++) - snum->d[i] = 0; - snum->top = sdiv->top + 2; - } else { - if (bn_wexpand(snum, snum->top + 1) == NULL) - goto err; - snum->d[snum->top] = 0; - snum->top++; - } - } div_n = sdiv->top; num_n = snum->top; + + if (num_n <= div_n) { + /* caller didn't pad dividend -> no constant-time guarantee... */ + if (bn_wexpand(snum, div_n + 1) == NULL) + goto err; + memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG)); + snum->top = num_n = div_n + 1; + } + loop = num_n - div_n; /* * Lets setup a 'window' into snum This is the part that corresponds to * the current 'area' being divided */ - wnum.neg = 0; - wnum.d = &(snum->d[loop]); - wnum.top = div_n; - wnum.flags = BN_FLG_STATIC_DATA; - /* - * only needed when BN_ucmp messes up the values between top and max - */ - wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */ + wnum = &(snum->d[loop]); + wnumtop = &(snum->d[num_n - 1]); /* Get the top 2 words of sdiv */ - /* div_n=sdiv->top; */ d0 = sdiv->d[div_n - 1]; d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2]; - /* pointer to the 'top' of snum */ - wnump = &(snum->d[num_n - 1]); - - /* Setup to 'res' */ - if (!bn_wexpand(res, (loop + 1))) + /* Setup quotient */ + if (!bn_wexpand(res, loop)) goto err; res->neg = (num->neg ^ divisor->neg); - res->top = loop - no_branch; - resp = &(res->d[loop - 1]); + res->top = loop; + res->flags |= BN_FLG_FIXED_TOP; + resp = &(res->d[loop]); /* space for temp */ if (!bn_wexpand(tmp, (div_n + 1))) goto err; - if (!no_branch) { - if (BN_ucmp(&wnum, sdiv) >= 0) { - /* - * If BN_DEBUG_RAND is defined BN_ucmp changes (via bn_pollute) - * the const bignum arguments => clean the values between top and - * max again - */ - bn_clear_top2max(&wnum); - bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); - *resp = 1; - } else - res->top--; - } - - /* Increase the resp pointer so that we never create an invalid pointer. */ - resp++; - - /* - * if res->top == 0 then clear the neg value otherwise decrease the resp - * pointer - */ - if (res->top == 0) - res->neg = 0; - else - resp--; - - for (i = 0; i < loop - 1; i++, wnump--) { + for (i = 0; i < loop; i++, wnumtop--) { BN_ULONG q, l0; /* * the first part of the loop uses the top two words of snum and sdiv * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv */ -# if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM) - BN_ULONG bn_div_3_words(BN_ULONG *, BN_ULONG, BN_ULONG); - q = bn_div_3_words(wnump, d1, d0); +# if defined(BN_DIV3W) + q = bn_div_3_words(wnumtop, d1, d0); # else BN_ULONG n0, n1, rem = 0; - n0 = wnump[0]; - n1 = wnump[-1]; + n0 = wnumtop[0]; + n1 = wnumtop[-1]; if (n0 == d0) q = BN_MASK2; else { /* n0 < d0 */ - + BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2]; # ifdef BN_LLONG BN_ULLONG t2; @@ -322,7 +371,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, t2 = (BN_ULLONG) d1 *q; for (;;) { - if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | wnump[-2])) + if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2)) break; q--; rem += d0; @@ -355,7 +404,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, # endif for (;;) { - if ((t2h < rem) || ((t2h == rem) && (t2l <= wnump[-2]))) + if ((t2h < rem) || ((t2h == rem) && (t2l <= n2))) break; q--; rem += d0; @@ -371,43 +420,33 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); tmp->d[div_n] = l0; - wnum.d--; + wnum--; /* - * ingore top values of the bignums just sub the two BN_ULONG arrays + * ignore top values of the bignums just sub the two BN_ULONG arrays * with bn_sub_words */ - if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) { - /* - * Note: As we have considered only the leading two BN_ULONGs in - * the calculation of q, sdiv * q might be greater than wnum (but - * then (q-1) * sdiv is less or equal than wnum) - */ - q--; - if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) - /* - * we can't have an overflow here (assuming that q != 0, but - * if q == 0 then tmp is zero anyway) - */ - (*wnump)++; - } - /* store part of the result */ - resp--; - *resp = q; - } - bn_correct_top(snum); - if (rm != NULL) { + l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1); + q -= l0; /* - * Keep a copy of the neg flag in num because if rm==num BN_rshift() - * will overwrite it. + * Note: As we have considered only the leading two BN_ULONGs in + * the calculation of q, sdiv * q might be greater than wnum (but + * then (q-1) * sdiv is less or equal than wnum) */ - int neg = num->neg; - BN_rshift(rm, snum, norm_shift); - if (!BN_is_zero(rm)) - rm->neg = neg; - bn_check_top(rm); + for (l0 = 0 - l0, j = 0; j < div_n; j++) + tmp->d[j] = sdiv->d[j] & l0; + l0 = bn_add_words(wnum, wnum, tmp->d, div_n); + (*wnumtop) += l0; + assert((*wnumtop) == 0); + + /* store part of the result */ + *--resp = q; } - if (no_branch) - bn_correct_top(res); + /* snum holds remainder, it's as wide as divisor */ + snum->neg = num->neg; + snum->top = div_n; + snum->flags |= BN_FLG_FIXED_TOP; + if (rm != NULL) + bn_rshift_fixed_top(rm, snum, norm_shift); BN_CTX_end(ctx); return 1; err: diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index c026ffcb339ce..88f2baf0e553c 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -648,34 +648,41 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; } + if (a->neg || BN_ucmp(a, m) >= 0) { + BIGNUM *reduced = BN_CTX_get(ctx); + if (reduced == NULL + || !BN_nnmod(reduced, a, m, ctx)) { + goto err; + } + a = reduced; + } + #ifdef RSAZ_ENABLED - if (!a->neg) { - /* - * If the size of the operands allow it, perform the optimized - * RSAZ exponentiation. For further information see - * crypto/bn/rsaz_exp.c and accompanying assembly modules. - */ - if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) - && rsaz_avx2_eligible()) { - if (NULL == bn_wexpand(rr, 16)) - goto err; - RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, - mont->n0[0]); - rr->top = 16; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; + /* + * If the size of the operands allow it, perform the optimized + * RSAZ exponentiation. For further information see + * crypto/bn/rsaz_exp.c and accompanying assembly modules. + */ + if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) + && rsaz_avx2_eligible()) { + if (NULL == bn_wexpand(rr, 16)) goto err; - } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { - if (NULL == bn_wexpand(rr, 8)) - goto err; - RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); - rr->top = 8; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; + RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, + mont->n0[0]); + rr->top = 16; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; + } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { + if (NULL == bn_wexpand(rr, 8)) goto err; - } + RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); + rr->top = 8; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; } #endif @@ -747,12 +754,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; /* prepare a^1 in Montgomery domain */ - if (a->neg || BN_ucmp(a, m) >= 0) { - if (!BN_nnmod(&am, a, m, ctx)) - goto err; - if (!bn_to_mont_fixed_top(&am, &am, mont, ctx)) - goto err; - } else if (!bn_to_mont_fixed_top(&am, a, mont, ctx)) + if (!bn_to_mont_fixed_top(&am, a, mont, ctx)) goto err; #if defined(SPARC_T4_MONT) diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c index 80f910c807793..8286b3855a2cb 100644 --- a/crypto/bn/bn_lib.c +++ b/crypto/bn/bn_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -695,6 +695,9 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n) int i; BN_ULONG aa, bb; + if (n == 0) + return 0; + aa = a[n - 1]; bb = b[n - 1]; if (aa != bb) @@ -737,26 +740,25 @@ int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl) return bn_cmp_words(a, b, cl); } -/* +/*- * Constant-time conditional swap of a and b. - * a and b are swapped if condition is not 0. The code assumes that at most one bit of condition is set. - * nwords is the number of words to swap. The code assumes that at least nwords are allocated in both a and b, - * and that no more than nwords are used by either a or b. - * a and b cannot be the same number + * a and b are swapped if condition is not 0. + * nwords is the number of words to swap. + * Assumes that at least nwords are allocated in both a and b. + * Assumes that no more than nwords are used by either a or b. */ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) { BN_ULONG t; int i; + if (a == b) + return; + bn_wcheck_size(a, nwords); bn_wcheck_size(b, nwords); - assert(a != b); - assert((condition & (condition - 1)) == 0); - assert(sizeof(BN_ULONG) >= sizeof(int)); - - condition = ((condition - 1) >> (BN_BITS2 - 1)) - 1; + condition = ((~condition & ((condition - 1))) >> (BN_BITS2 - 1)) - 1; t = (a->top ^ b->top) & condition; a->top ^= t; @@ -794,42 +796,16 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords) a->flags ^= t; b->flags ^= t; -#define BN_CONSTTIME_SWAP(ind) \ - do { \ - t = (a->d[ind] ^ b->d[ind]) & condition; \ - a->d[ind] ^= t; \ - b->d[ind] ^= t; \ - } while (0) - - switch (nwords) { - default: - for (i = 10; i < nwords; i++) - BN_CONSTTIME_SWAP(i); - /* Fallthrough */ - case 10: - BN_CONSTTIME_SWAP(9); /* Fallthrough */ - case 9: - BN_CONSTTIME_SWAP(8); /* Fallthrough */ - case 8: - BN_CONSTTIME_SWAP(7); /* Fallthrough */ - case 7: - BN_CONSTTIME_SWAP(6); /* Fallthrough */ - case 6: - BN_CONSTTIME_SWAP(5); /* Fallthrough */ - case 5: - BN_CONSTTIME_SWAP(4); /* Fallthrough */ - case 4: - BN_CONSTTIME_SWAP(3); /* Fallthrough */ - case 3: - BN_CONSTTIME_SWAP(2); /* Fallthrough */ - case 2: - BN_CONSTTIME_SWAP(1); /* Fallthrough */ - case 1: - BN_CONSTTIME_SWAP(0); - } -#undef BN_CONSTTIME_SWAP + /* conditionally swap the data */ + for (i = 0; i < nwords; i++) { + t = (a->d[i] ^ b->d[i]) & condition; + a->d[i] ^= t; + b->d[i] ^= t; + } } +#undef BN_CONSTTIME_SWAP_FLAGS + /* Bits of security, see SP800-57 */ int BN_security_bits(int L, int N) diff --git a/crypto/bn/bn_prime.h b/crypto/bn/bn_prime.h index a64c9630f3b04..2eb7b52f76f9a 100644 --- a/crypto/bn/bn_prime.h +++ b/crypto/bn/bn_prime.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by crypto/bn/bn_prime.pl * - * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy diff --git a/crypto/bn/bn_prime.pl b/crypto/bn/bn_prime.pl index eeca475b9366b..b0b16087429b2 100644 --- a/crypto/bn/bn_prime.pl +++ b/crypto/bn/bn_prime.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/bn/bn_shift.c b/crypto/bn/bn_shift.c index 15d4b321ba268..b7a1e0ff9ae3c 100644 --- a/crypto/bn/bn_shift.c +++ b/crypto/bn/bn_shift.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -7,6 +7,7 @@ * https://www.openssl.org/source/license.html */ +#include <assert.h> #include "internal/cryptlib.h" #include "bn_lcl.h" @@ -82,40 +83,70 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) { - int i, nw, lb, rb; - BN_ULONG *t, *f; - BN_ULONG l; - - bn_check_top(r); - bn_check_top(a); + int ret; if (n < 0) { BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT); return 0; } + ret = bn_lshift_fixed_top(r, a, n); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +/* + * In respect to shift factor the execution time is invariant of + * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition + * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being + * non-secret. + */ +int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, rmask = 0; + + assert(n >= 0); + + bn_check_top(r); + bn_check_top(a); + nw = n / BN_BITS2; if (bn_wexpand(r, a->top + nw + 1) == NULL) return 0; - r->neg = a->neg; - lb = n % BN_BITS2; - rb = BN_BITS2 - lb; - f = a->d; - t = r->d; - t[a->top + nw] = 0; - if (lb == 0) - for (i = a->top - 1; i >= 0; i--) - t[nw + i] = f[i]; - else - for (i = a->top - 1; i >= 0; i--) { - l = f[i]; - t[nw + i + 1] |= (l >> rb) & BN_MASK2; - t[nw + i] = (l << lb) & BN_MASK2; + + if (a->top != 0) { + lb = (unsigned int)n % BN_BITS2; + rb = BN_BITS2 - lb; + rb %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rb; /* rmask = 0 - (rb != 0) */ + rmask |= rmask >> 8; + f = &(a->d[0]); + t = &(r->d[nw]); + l = f[a->top - 1]; + t[a->top] = (l >> rb) & rmask; + for (i = a->top - 1; i > 0; i--) { + m = l << lb; + l = f[i - 1]; + t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2; } - memset(t, 0, sizeof(*t) * nw); + t[0] = (l << lb) & BN_MASK2; + } else { + /* shouldn't happen, but formally required */ + r->d[nw] = 0; + } + if (nw != 0) + memset(r->d, 0, sizeof(*t) * nw); + + r->neg = a->neg; r->top = a->top + nw + 1; - bn_correct_top(r); - bn_check_top(r); + r->flags |= BN_FLG_FIXED_TOP; + return 1; } @@ -173,3 +204,54 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) bn_check_top(r); return 1; } + +/* + * In respect to shift factor the execution time is invariant of + * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition + * for constant-time-ness for sufficiently[!] zero-padded inputs is + * |n < BN_BITS2| or |n / BN_BITS2| being non-secret. + */ +int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, top, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, mask; + + bn_check_top(r); + bn_check_top(a); + + assert(n >= 0); + + nw = n / BN_BITS2; + if (nw >= a->top) { + /* shouldn't happen, but formally required */ + BN_zero(r); + return 1; + } + + rb = (unsigned int)n % BN_BITS2; + lb = BN_BITS2 - rb; + lb %= BN_BITS2; /* say no to undefined behaviour */ + mask = (BN_ULONG)0 - lb; /* mask = 0 - (lb != 0) */ + mask |= mask >> 8; + top = a->top - nw; + if (r != a && bn_wexpand(r, top) == NULL) + return 0; + + t = &(r->d[0]); + f = &(a->d[nw]); + l = f[0]; + for (i = 0; i < top - 1; i++) { + m = f[i + 1]; + t[i] = (l >> rb) | ((m << lb) & mask); + l = m; + } + t[i] = l >> rb; + + r->neg = a->neg; + r->top = top; + r->flags |= BN_FLG_FIXED_TOP; + + return 1; +} diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index 4a838bc2b32e2..e90be6d0e5bdd 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -157,6 +157,7 @@ ChaCha20_ctr32: b.ne ChaCha20_neon .Lshort: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -276,6 +277,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 + .inst 0xd50323bf // autiasp .Labort: ret @@ -332,6 +334,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 + .inst 0xd50323bf // autiasp ret .size ChaCha20_ctr32,.-ChaCha20_ctr32 ___ @@ -377,6 +380,7 @@ $code.=<<___; .type ChaCha20_neon,%function .align 5 ChaCha20_neon: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -575,6 +579,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 + .inst 0xd50323bf // autiasp ret .Ltail_neon: @@ -684,6 +689,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 + .inst 0xd50323bf // autiasp ret .size ChaCha20_neon,.-ChaCha20_neon ___ @@ -696,6 +702,7 @@ $code.=<<___; .type ChaCha20_512_neon,%function .align 5 ChaCha20_512_neon: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -1114,6 +1121,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 + .inst 0xd50323bf // autiasp ret .size ChaCha20_512_neon,.-ChaCha20_512_neon ___ diff --git a/crypto/cms/cms_kari.c b/crypto/cms/cms_kari.c index 3bc46febf6403..5e83814d0fcf6 100644 --- a/crypto/cms/cms_kari.c +++ b/crypto/cms/cms_kari.c @@ -1,5 +1,5 @@ /* - * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2013-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -282,7 +282,7 @@ static int cms_kari_create_ephemeral_key(CMS_KeyAgreeRecipientInfo *kari, return rv; } -/* Initialise a ktri based on passed certificate and key */ +/* Initialise a kari based on passed certificate and key */ int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, EVP_PKEY *pk, unsigned int flags) @@ -299,6 +299,9 @@ int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, kari->version = 3; rek = M_ASN1_new_of(CMS_RecipientEncryptedKey); + if (rek == NULL) + return 0; + if (!sk_CMS_RecipientEncryptedKey_push(kari->recipientEncryptedKeys, rek)) { M_ASN1_free_of(rek, CMS_RecipientEncryptedKey); return 0; diff --git a/crypto/cms/cms_pwri.c b/crypto/cms/cms_pwri.c index eac9c2fc862eb..26e3bdcf9e412 100644 --- a/crypto/cms/cms_pwri.c +++ b/crypto/cms/cms_pwri.c @@ -373,6 +373,7 @@ int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, goto err; } + OPENSSL_clear_free(ec->key, ec->keylen); ec->key = key; ec->keylen = keylen; diff --git a/crypto/conf/conf_def.c b/crypto/conf/conf_def.c index 7f0d70ea695ec..8e3f42a0caca2 100644 --- a/crypto/conf/conf_def.c +++ b/crypto/conf/conf_def.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -348,10 +348,15 @@ static int def_load_bio(CONF *conf, BIO *in, long *line) psection = section; } p = eat_ws(conf, end); - if (strncmp(pname, ".include", 8) == 0 && p != pname + 8) { + if (strncmp(pname, ".include", 8) == 0 + && (p != pname + 8 || *p == '=')) { char *include = NULL; BIO *next; + if (*p == '=') { + p++; + p = eat_ws(conf, p); + } trim_ws(conf, p); if (!str_copy(conf, psection, &include, p)) goto err; diff --git a/crypto/conf/conf_def.h b/crypto/conf/conf_def.h index 73e88baa8ba11..2016d31b89290 100644 --- a/crypto/conf/conf_def.h +++ b/crypto/conf/conf_def.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by crypto/conf/keysets.pl * - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at diff --git a/crypto/conf/conf_lib.c b/crypto/conf/conf_lib.c index 07110d8502a4e..2d40ac97ec275 100644 --- a/crypto/conf/conf_lib.c +++ b/crypto/conf/conf_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -358,11 +358,36 @@ OPENSSL_INIT_SETTINGS *OPENSSL_INIT_new(void) if (ret != NULL) memset(ret, 0, sizeof(*ret)); + ret->flags = DEFAULT_CONF_MFLAGS; + return ret; } #ifndef OPENSSL_NO_STDIO +int OPENSSL_INIT_set_config_filename(OPENSSL_INIT_SETTINGS *settings, + const char *filename) +{ + char *newfilename = NULL; + + if (filename != NULL) { + newfilename = strdup(filename); + if (newfilename == NULL) + return 0; + } + + free(settings->filename); + settings->filename = newfilename; + + return 1; +} + +void OPENSSL_INIT_set_config_file_flags(OPENSSL_INIT_SETTINGS *settings, + unsigned long flags) +{ + settings->flags = flags; +} + int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings, const char *appname) { @@ -383,6 +408,7 @@ int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings, void OPENSSL_INIT_free(OPENSSL_INIT_SETTINGS *settings) { + free(settings->filename); free(settings->appname); free(settings); } diff --git a/crypto/conf/conf_mod.c b/crypto/conf/conf_mod.c index 51f262e774dd6..e703d97f5451f 100644 --- a/crypto/conf/conf_mod.c +++ b/crypto/conf/conf_mod.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -142,6 +142,9 @@ int CONF_modules_load_file(const char *filename, const char *appname, OPENSSL_free(file); NCONF_free(conf); + if (flags & CONF_MFLAGS_IGNORE_RETURN_CODES) + return 1; + return ret; } diff --git a/crypto/conf/conf_sap.c b/crypto/conf/conf_sap.c index 3d2e065e5b07c..2ce42f0c67408 100644 --- a/crypto/conf/conf_sap.c +++ b/crypto/conf/conf_sap.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -39,10 +39,24 @@ void OPENSSL_config(const char *appname) } #endif -void openssl_config_int(const char *appname) +int openssl_config_int(const OPENSSL_INIT_SETTINGS *settings) { + int ret; + const char *filename; + const char *appname; + unsigned long flags; + if (openssl_configured) - return; + return 1; + + filename = settings ? settings->filename : NULL; + appname = settings ? settings->appname : NULL; + flags = settings ? settings->flags : DEFAULT_CONF_MFLAGS; + +#ifdef OPENSSL_INIT_DEBUG + fprintf(stderr, "OPENSSL_INIT: openssl_config_int(%s, %s, %lu)\n", + filename, appname, flags); +#endif OPENSSL_load_builtin_modules(); #ifndef OPENSSL_NO_ENGINE @@ -51,11 +65,10 @@ void openssl_config_int(const char *appname) #endif ERR_clear_error(); #ifndef OPENSSL_SYS_UEFI - CONF_modules_load_file(NULL, appname, - CONF_MFLAGS_DEFAULT_SECTION | - CONF_MFLAGS_IGNORE_MISSING_FILE); + ret = CONF_modules_load_file(filename, appname, flags); #endif openssl_configured = 1; + return ret; } void openssl_no_config_int(void) diff --git a/crypto/conf/conf_ssl.c b/crypto/conf/conf_ssl.c index 024bdb4808e39..387f2cf46c31b 100644 --- a/crypto/conf/conf_ssl.c +++ b/crypto/conf/conf_ssl.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -78,6 +78,8 @@ static int ssl_module_init(CONF_IMODULE *md, const CONF *cnf) cnt = sk_CONF_VALUE_num(cmd_lists); ssl_module_free(md); ssl_names = OPENSSL_zalloc(sizeof(*ssl_names) * cnt); + if (ssl_names == NULL) + goto err; ssl_names_count = cnt; for (i = 0; i < ssl_names_count; i++) { struct ssl_conf_name_st *ssl_name = ssl_names + i; diff --git a/crypto/conf/keysets.pl b/crypto/conf/keysets.pl index cfa230ec3a1af..27a7214cc5192 100644 --- a/crypto/conf/keysets.pl +++ b/crypto/conf/keysets.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 1cd77c96d2f7f..7b761a3adced9 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -1,5 +1,5 @@ /* - * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -352,9 +352,9 @@ void OPENSSL_showfatal(const char *fmta, ...) /* * TODO: (For non GUI and no std error cases) - * Add event logging feature here. + * Add event logging feature here. */ - + # if !defined(NDEBUG) /* * We are in a situation where we tried to report a critical @@ -393,7 +393,7 @@ void OPENSSL_showfatal(const char *fmta, ...) # endif # else MessageBox(NULL, buf, _T("OpenSSL: FATAL"), MB_OK | MB_ICONERROR); -# endif +# endif } #else void OPENSSL_showfatal(const char *fmta, ...) @@ -460,4 +460,14 @@ uint32_t OPENSSL_rdtsc(void) { return 0; } + +size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) +{ + return 0; +} + +size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) +{ + return 0; +} #endif diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4 index 4a0d15620c00d..4ada97b175d99 100644 --- a/crypto/des/asm/des_enc.m4 +++ b/crypto/des/asm/des_enc.m4 @@ -1,4 +1,4 @@ -! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. +! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. ! ! Licensed under the OpenSSL license (the "License"). You may not use ! this file except in compliance with the License. You can obtain a copy @@ -313,7 +313,7 @@ $4: sll out1, 28, out1 ! rotate xor $1, local1, $1 ! 1 finished, local1 now sbox 7 - ld [global2+local2], local2 ! 2 + ld [global2+local2], local2 ! 2 srl out0, 24, local1 ! 7 or out1, local0, out1 ! rotate @@ -1392,7 +1392,7 @@ DES_ncbc_encrypt: add %o7,global1,global1 sub global1,.PIC.DES_SPtrans-.des_and,out2 - cmp in5, 0 ! enc + cmp in5, 0 ! enc be .ncbc.dec STPTR in4, IVEC diff --git a/crypto/dso/dso_dlfcn.c b/crypto/dso/dso_dlfcn.c index ad8899c289a37..4240f5f5e30c8 100644 --- a/crypto/dso/dso_dlfcn.c +++ b/crypto/dso/dso_dlfcn.c @@ -17,6 +17,7 @@ #endif #include "dso_locl.h" +#include "e_os.h" #ifdef DSO_DLFCN @@ -99,6 +100,7 @@ static int dlfcn_load(DSO *dso) /* See applicable comments in dso_dl.c */ char *filename = DSO_convert_filename(dso, NULL); int flags = DLOPEN_FLAG; + int saveerrno = get_last_sys_error(); if (filename == NULL) { DSOerr(DSO_F_DLFCN_LOAD, DSO_R_NO_FILENAME); @@ -118,6 +120,11 @@ static int dlfcn_load(DSO *dso) ERR_add_error_data(4, "filename(", filename, "): ", dlerror()); goto err; } + /* + * Some dlopen() implementations (e.g. solaris) do no preserve errno, even + * on a successful call. + */ + set_sys_error(saveerrno); if (!sk_void_push(dso->meth_data, (char *)ptr)) { DSOerr(DSO_F_DLFCN_LOAD, DSO_R_STACK_ERROR); goto err; diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index 1361cb395ffb2..887ddfb1ea9b1 100755 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -119,6 +119,7 @@ $code.=<<___; .type ecp_nistz256_to_mont,%function .align 6 ecp_nistz256_to_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-32]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -134,6 +135,7 @@ ecp_nistz256_to_mont: ldp x19,x20,[sp,#16] ldp x29,x30,[sp],#32 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont @@ -142,6 +144,7 @@ ecp_nistz256_to_mont: .type ecp_nistz256_from_mont,%function .align 4 ecp_nistz256_from_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-32]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -157,6 +160,7 @@ ecp_nistz256_from_mont: ldp x19,x20,[sp,#16] ldp x29,x30,[sp],#32 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont @@ -166,6 +170,7 @@ ecp_nistz256_from_mont: .type ecp_nistz256_mul_mont,%function .align 4 ecp_nistz256_mul_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-32]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -180,6 +185,7 @@ ecp_nistz256_mul_mont: ldp x19,x20,[sp,#16] ldp x29,x30,[sp],#32 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont @@ -188,6 +194,7 @@ ecp_nistz256_mul_mont: .type ecp_nistz256_sqr_mont,%function .align 4 ecp_nistz256_sqr_mont: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-32]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -201,6 +208,7 @@ ecp_nistz256_sqr_mont: ldp x19,x20,[sp,#16] ldp x29,x30,[sp],#32 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont @@ -210,6 +218,7 @@ ecp_nistz256_sqr_mont: .type ecp_nistz256_add,%function .align 4 ecp_nistz256_add: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -223,6 +232,7 @@ ecp_nistz256_add: bl __ecp_nistz256_add ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_add,.-ecp_nistz256_add @@ -231,6 +241,7 @@ ecp_nistz256_add: .type ecp_nistz256_div_by_2,%function .align 4 ecp_nistz256_div_by_2: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -242,6 +253,7 @@ ecp_nistz256_div_by_2: bl __ecp_nistz256_div_by_2 ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 @@ -250,6 +262,7 @@ ecp_nistz256_div_by_2: .type ecp_nistz256_mul_by_2,%function .align 4 ecp_nistz256_mul_by_2: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -265,6 +278,7 @@ ecp_nistz256_mul_by_2: bl __ecp_nistz256_add // ret = a+a // 2*a ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 @@ -273,6 +287,7 @@ ecp_nistz256_mul_by_2: .type ecp_nistz256_mul_by_3,%function .align 4 ecp_nistz256_mul_by_3: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -299,6 +314,7 @@ ecp_nistz256_mul_by_3: bl __ecp_nistz256_add // ret += a // 2*a+a=3*a ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 @@ -308,6 +324,7 @@ ecp_nistz256_mul_by_3: .type ecp_nistz256_sub,%function .align 4 ecp_nistz256_sub: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -319,6 +336,7 @@ ecp_nistz256_sub: bl __ecp_nistz256_sub_from ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_sub,.-ecp_nistz256_sub @@ -327,6 +345,7 @@ ecp_nistz256_sub: .type ecp_nistz256_neg,%function .align 4 ecp_nistz256_neg: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -341,6 +360,7 @@ ecp_nistz256_neg: bl __ecp_nistz256_sub_from ldp x29,x30,[sp],#16 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_neg,.-ecp_nistz256_neg @@ -701,6 +721,7 @@ $code.=<<___; .type ecp_nistz256_point_double,%function .align 5 ecp_nistz256_point_double: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -835,6 +856,7 @@ ecp_nistz256_point_double: ldp x19,x20,[x29,#16] ldp x21,x22,[x29,#32] ldp x29,x30,[sp],#80 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_point_double,.-ecp_nistz256_point_double ___ @@ -857,6 +879,7 @@ $code.=<<___; .type ecp_nistz256_point_add,%function .align 5 ecp_nistz256_point_add: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -1094,12 +1117,13 @@ $code.=<<___; stp $acc2,$acc3,[$rp_real,#$i+16] .Ladd_done: - add sp,x29,#0 // destroy frame + add sp,x29,#0 // destroy frame ldp x19,x20,[x29,#16] ldp x21,x22,[x29,#32] ldp x23,x24,[x29,#48] ldp x25,x26,[x29,#64] ldp x29,x30,[sp],#80 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_point_add,.-ecp_nistz256_point_add ___ @@ -1121,6 +1145,7 @@ $code.=<<___; .type ecp_nistz256_point_add_affine,%function .align 5 ecp_nistz256_point_add_affine: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -1309,6 +1334,7 @@ $code.=<<___; ldp x23,x24,[x29,#48] ldp x25,x26,[x29,#64] ldp x29,x30,[sp],#80 + .inst 0xd50323bf // autiasp ret .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine ___ diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index eba6ffd430bef..87149e7f680df 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved. # Copyright (c) 2014, Intel Corporation. All Rights Reserved. # Copyright (c) 2015 CloudFlare, Inc. # @@ -1674,6 +1674,7 @@ $code.=<<___; .type __ecp_nistz256_mul_montq,\@abi-omnipotent .align 32 __ecp_nistz256_mul_montq: +.cfi_startproc ######################################################################## # Multiply a by b[0] mov %rax, $t1 @@ -1885,6 +1886,7 @@ __ecp_nistz256_mul_montq: mov $acc1, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq ################################################################################ @@ -1968,6 +1970,7 @@ $code.=<<___; .type __ecp_nistz256_sqr_montq,\@abi-omnipotent .align 32 __ecp_nistz256_sqr_montq: +.cfi_startproc mov %rax, $acc5 mulq $acc6 # a[1]*a[0] mov %rax, $acc1 @@ -2125,6 +2128,7 @@ __ecp_nistz256_sqr_montq: mov $acc7, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq ___ @@ -2133,6 +2137,7 @@ $code.=<<___; .type __ecp_nistz256_mul_montx,\@abi-omnipotent .align 32 __ecp_nistz256_mul_montx: +.cfi_startproc ######################################################################## # Multiply by b[0] mulx $acc1, $acc0, $acc1 @@ -2295,11 +2300,13 @@ __ecp_nistz256_mul_montx: mov $acc1, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx .type __ecp_nistz256_sqr_montx,\@abi-omnipotent .align 32 __ecp_nistz256_sqr_montx: +.cfi_startproc mulx $acc6, $acc1, $acc2 # a[0]*a[1] mulx $acc7, $t0, $acc3 # a[0]*a[2] xor %eax, %eax @@ -2423,6 +2430,7 @@ __ecp_nistz256_sqr_montx: mov $acc7, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx ___ } @@ -2578,6 +2586,7 @@ ecp_nistz256_scatter_w5: .type ecp_nistz256_gather_w5,\@abi-omnipotent .align 32 ecp_nistz256_gather_w5: +.cfi_startproc ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax @@ -2666,6 +2675,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_gather_w5: .size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 @@ -2694,6 +2704,7 @@ ecp_nistz256_scatter_w7: .type ecp_nistz256_gather_w7,\@abi-omnipotent .align 32 ecp_nistz256_gather_w7: +.cfi_startproc ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax @@ -2771,6 +2782,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_gather_w7: .size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 ___ @@ -2787,6 +2799,7 @@ $code.=<<___; .type ecp_nistz256_avx2_gather_w5,\@abi-omnipotent .align 32 ecp_nistz256_avx2_gather_w5: +.cfi_startproc .Lavx2_gather_w5: vzeroupper ___ @@ -2874,6 +2887,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_gather_w5: .size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5 ___ @@ -2893,6 +2907,7 @@ $code.=<<___; .type ecp_nistz256_avx2_gather_w7,\@abi-omnipotent .align 32 ecp_nistz256_avx2_gather_w7: +.cfi_startproc .Lavx2_gather_w7: vzeroupper ___ @@ -2995,6 +3010,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .LSEH_end_ecp_nistz256_avx2_gather_w7: .size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7 ___ @@ -3064,6 +3080,7 @@ $code.=<<___; .type __ecp_nistz256_add_toq,\@abi-omnipotent .align 32 __ecp_nistz256_add_toq: +.cfi_startproc xor $t4,$t4 add 8*0($b_ptr), $a0 adc 8*1($b_ptr), $a1 @@ -3091,11 +3108,13 @@ __ecp_nistz256_add_toq: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq .type __ecp_nistz256_sub_fromq,\@abi-omnipotent .align 32 __ecp_nistz256_sub_fromq: +.cfi_startproc sub 8*0($b_ptr), $a0 sbb 8*1($b_ptr), $a1 mov $a0, $t0 @@ -3122,11 +3141,13 @@ __ecp_nistz256_sub_fromq: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq .type __ecp_nistz256_subq,\@abi-omnipotent .align 32 __ecp_nistz256_subq: +.cfi_startproc sub $a0, $t0 sbb $a1, $t1 mov $t0, $a0 @@ -3149,11 +3170,13 @@ __ecp_nistz256_subq: cmovnz $t3, $a3 ret +.cfi_endproc .size __ecp_nistz256_subq,.-__ecp_nistz256_subq .type __ecp_nistz256_mul_by_2q,\@abi-omnipotent .align 32 __ecp_nistz256_mul_by_2q: +.cfi_startproc xor $t4, $t4 add $a0, $a0 # a0:a3+a0:a3 adc $a1, $a1 @@ -3181,6 +3204,7 @@ __ecp_nistz256_mul_by_2q: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q ___ } @@ -3620,7 +3644,9 @@ $code.=<<___; movq %xmm1, $a_ptr # restore $a_ptr movq %xmm0, $r_ptr # restore $r_ptr add \$`32*(18-5)`, %rsp # difference in frame sizes +.cfi_adjust_cfa_offset `-32*(18-5)` jmp .Lpoint_double_shortcut$x +.cfi_adjust_cfa_offset `32*(18-5)` .align 32 .Ladd_proceed$x: @@ -4156,6 +4182,7 @@ $code.=<<___; .type __ecp_nistz256_add_tox,\@abi-omnipotent .align 32 __ecp_nistz256_add_tox: +.cfi_startproc xor $t4, $t4 adc 8*0($b_ptr), $a0 adc 8*1($b_ptr), $a1 @@ -4184,11 +4211,13 @@ __ecp_nistz256_add_tox: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox .type __ecp_nistz256_sub_fromx,\@abi-omnipotent .align 32 __ecp_nistz256_sub_fromx: +.cfi_startproc xor $t4, $t4 sbb 8*0($b_ptr), $a0 sbb 8*1($b_ptr), $a1 @@ -4217,11 +4246,13 @@ __ecp_nistz256_sub_fromx: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx .type __ecp_nistz256_subx,\@abi-omnipotent .align 32 __ecp_nistz256_subx: +.cfi_startproc xor $t4, $t4 sbb $a0, $t0 sbb $a1, $t1 @@ -4246,11 +4277,13 @@ __ecp_nistz256_subx: cmovc $t3, $a3 ret +.cfi_endproc .size __ecp_nistz256_subx,.-__ecp_nistz256_subx .type __ecp_nistz256_mul_by_2x,\@abi-omnipotent .align 32 __ecp_nistz256_mul_by_2x: +.cfi_startproc xor $t4, $t4 adc $a0, $a0 # a0:a3+a0:a3 adc $a1, $a1 @@ -4279,6 +4312,7 @@ __ecp_nistz256_mul_by_2x: mov $a3, 8*3($r_ptr) ret +.cfi_endproc .size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x ___ } diff --git a/crypto/ec/curve25519.c b/crypto/ec/curve25519.c index abe9b9cbf6dd0..aa999cc5914e5 100644 --- a/crypto/ec/curve25519.c +++ b/crypto/ec/curve25519.c @@ -744,91 +744,99 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], /* * Reference base 2^25.5 implementation. - */ -/* + * * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP * 20141124 (http://bench.cr.yp.to/supercop.html). * * The field functions are shared by Ed25519 and X25519 where possible. */ -/* fe means field element. Here the field is \Z/(2^255-19). An element t, +/* + * fe means field element. Here the field is \Z/(2^255-19). An element t, * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 * t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on - * context. */ + * context. + */ typedef int32_t fe[10]; +static const int64_t kBottom21Bits = 0x1fffffLL; static const int64_t kBottom25Bits = 0x1ffffffLL; static const int64_t kBottom26Bits = 0x3ffffffLL; static const int64_t kTop39Bits = 0xfffffffffe000000LL; static const int64_t kTop38Bits = 0xfffffffffc000000LL; -static uint64_t load_3(const uint8_t *in) { - uint64_t result; - result = (uint64_t)in[0]; - result |= ((uint64_t)in[1]) << 8; - result |= ((uint64_t)in[2]) << 16; - return result; +static uint64_t load_3(const uint8_t *in) +{ + uint64_t result; + + result = ((uint64_t)in[0]); + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; + return result; } -static uint64_t load_4(const uint8_t *in) { - uint64_t result; - result = (uint64_t)in[0]; - result |= ((uint64_t)in[1]) << 8; - result |= ((uint64_t)in[2]) << 16; - result |= ((uint64_t)in[3]) << 24; - return result; +static uint64_t load_4(const uint8_t *in) +{ + uint64_t result; + + result = ((uint64_t)in[0]); + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; + result |= ((uint64_t)in[3]) << 24; + return result; } -static void fe_frombytes(fe h, const uint8_t *s) { - /* Ignores top bit of h. */ - int64_t h0 = load_4(s); - int64_t h1 = load_3(s + 4) << 6; - int64_t h2 = load_3(s + 7) << 5; - int64_t h3 = load_3(s + 10) << 3; - int64_t h4 = load_3(s + 13) << 2; - int64_t h5 = load_4(s + 16); - int64_t h6 = load_3(s + 20) << 7; - int64_t h7 = load_3(s + 23) << 5; - int64_t h8 = load_3(s + 26) << 4; - int64_t h9 = (load_3(s + 29) & 8388607) << 2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; - carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; - carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; - carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; - carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; - carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; - - h[0] = (int32_t)h0; - h[1] = (int32_t)h1; - h[2] = (int32_t)h2; - h[3] = (int32_t)h3; - h[4] = (int32_t)h4; - h[5] = (int32_t)h5; - h[6] = (int32_t)h6; - h[7] = (int32_t)h7; - h[8] = (int32_t)h8; - h[9] = (int32_t)h9; +static void fe_frombytes(fe h, const uint8_t *s) +{ + /* Ignores top bit of h. */ + int64_t h0 = load_4(s); + int64_t h1 = load_3(s + 4) << 6; + int64_t h2 = load_3(s + 7) << 5; + int64_t h3 = load_3(s + 10) << 3; + int64_t h4 = load_3(s + 13) << 2; + int64_t h5 = load_4(s + 16); + int64_t h6 = load_3(s + 20) << 7; + int64_t h7 = load_3(s + 23) << 5; + int64_t h8 = load_3(s + 26) << 4; + int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } -/* Preconditions: - * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +/* + * Preconditions: + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. * * Write p=2^255-19; q=floor(h/p). * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). @@ -848,102 +856,112 @@ static void fe_frombytes(fe h, const uint8_t *s) { * Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. * * Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) - * so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. */ -static void fe_tobytes(uint8_t *s, const fe h) { - int32_t h0 = h[0]; - int32_t h1 = h[1]; - int32_t h2 = h[2]; - int32_t h3 = h[3]; - int32_t h4 = h[4]; - int32_t h5 = h[5]; - int32_t h6 = h[6]; - int32_t h7 = h[7]; - int32_t h8 = h[8]; - int32_t h9 = h[9]; - int32_t q; - - q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; - q = (h0 + q) >> 26; - q = (h1 + q) >> 25; - q = (h2 + q) >> 26; - q = (h3 + q) >> 25; - q = (h4 + q) >> 26; - q = (h5 + q) >> 25; - q = (h6 + q) >> 26; - q = (h7 + q) >> 25; - q = (h8 + q) >> 26; - q = (h9 + q) >> 25; - - /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ - h0 += 19 * q; - /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ - - h1 += h0 >> 26; h0 &= kBottom26Bits; - h2 += h1 >> 25; h1 &= kBottom25Bits; - h3 += h2 >> 26; h2 &= kBottom26Bits; - h4 += h3 >> 25; h3 &= kBottom25Bits; - h5 += h4 >> 26; h4 &= kBottom26Bits; - h6 += h5 >> 25; h5 &= kBottom25Bits; - h7 += h6 >> 26; h6 &= kBottom26Bits; - h8 += h7 >> 25; h7 &= kBottom25Bits; - h9 += h8 >> 26; h8 &= kBottom26Bits; - h9 &= kBottom25Bits; - /* h10 = carry9 */ - - /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. - * Have h0+...+2^230 h9 between 0 and 2^255-1; - * evidently 2^255 h10-2^255 q = 0. - * Goal: Output h0+...+2^230 h9. */ - - s[0] = (uint8_t)(h0 >> 0); - s[1] = (uint8_t)(h0 >> 8); - s[2] = (uint8_t)(h0 >> 16); - s[3] = (uint8_t)((h0 >> 24) | ((uint32_t)(h1) << 2)); - s[4] = (uint8_t)(h1 >> 6); - s[5] = (uint8_t)(h1 >> 14); - s[6] = (uint8_t)((h1 >> 22) | ((uint32_t)(h2) << 3)); - s[7] = (uint8_t)(h2 >> 5); - s[8] = (uint8_t)(h2 >> 13); - s[9] = (uint8_t)((h2 >> 21) | ((uint32_t)(h3) << 5)); - s[10] = (uint8_t)(h3 >> 3); - s[11] = (uint8_t)(h3 >> 11); - s[12] = (uint8_t)((h3 >> 19) | ((uint32_t)(h4) << 6)); - s[13] = (uint8_t)(h4 >> 2); - s[14] = (uint8_t)(h4 >> 10); - s[15] = (uint8_t)(h4 >> 18); - s[16] = (uint8_t)(h5 >> 0); - s[17] = (uint8_t)(h5 >> 8); - s[18] = (uint8_t)(h5 >> 16); - s[19] = (uint8_t)((h5 >> 24) | ((uint32_t)(h6) << 1)); - s[20] = (uint8_t)(h6 >> 7); - s[21] = (uint8_t)(h6 >> 15); - s[22] = (uint8_t)((h6 >> 23) | ((uint32_t)(h7) << 3)); - s[23] = (uint8_t)(h7 >> 5); - s[24] = (uint8_t)(h7 >> 13); - s[25] = (uint8_t)((h7 >> 21) | ((uint32_t)(h8) << 4)); - s[26] = (uint8_t)(h8 >> 4); - s[27] = (uint8_t)(h8 >> 12); - s[28] = (uint8_t)((h8 >> 20) | ((uint32_t)(h9) << 6)); - s[29] = (uint8_t)(h9 >> 2); - s[30] = (uint8_t)(h9 >> 10); - s[31] = (uint8_t)(h9 >> 18); + * so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. + */ +static void fe_tobytes(uint8_t *s, const fe h) +{ + int32_t h0 = h[0]; + int32_t h1 = h[1]; + int32_t h2 = h[2]; + int32_t h3 = h[3]; + int32_t h4 = h[4]; + int32_t h5 = h[5]; + int32_t h6 = h[6]; + int32_t h7 = h[7]; + int32_t h8 = h[8]; + int32_t h9 = h[9]; + int32_t q; + + q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; + q = (h0 + q) >> 26; + q = (h1 + q) >> 25; + q = (h2 + q) >> 26; + q = (h3 + q) >> 25; + q = (h4 + q) >> 26; + q = (h5 + q) >> 25; + q = (h6 + q) >> 26; + q = (h7 + q) >> 25; + q = (h8 + q) >> 26; + q = (h9 + q) >> 25; + + /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ + h0 += 19 * q; + /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ + + h1 += h0 >> 26; h0 &= kBottom26Bits; + h2 += h1 >> 25; h1 &= kBottom25Bits; + h3 += h2 >> 26; h2 &= kBottom26Bits; + h4 += h3 >> 25; h3 &= kBottom25Bits; + h5 += h4 >> 26; h4 &= kBottom26Bits; + h6 += h5 >> 25; h5 &= kBottom25Bits; + h7 += h6 >> 26; h6 &= kBottom26Bits; + h8 += h7 >> 25; h7 &= kBottom25Bits; + h9 += h8 >> 26; h8 &= kBottom26Bits; + h9 &= kBottom25Bits; + /* h10 = carry9 */ + + /* + * Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + * Have h0+...+2^230 h9 between 0 and 2^255-1; + * evidently 2^255 h10-2^255 q = 0. + * Goal: Output h0+...+2^230 h9. + */ + s[ 0] = (uint8_t) (h0 >> 0); + s[ 1] = (uint8_t) (h0 >> 8); + s[ 2] = (uint8_t) (h0 >> 16); + s[ 3] = (uint8_t)((h0 >> 24) | ((uint32_t)(h1) << 2)); + s[ 4] = (uint8_t) (h1 >> 6); + s[ 5] = (uint8_t) (h1 >> 14); + s[ 6] = (uint8_t)((h1 >> 22) | ((uint32_t)(h2) << 3)); + s[ 7] = (uint8_t) (h2 >> 5); + s[ 8] = (uint8_t) (h2 >> 13); + s[ 9] = (uint8_t)((h2 >> 21) | ((uint32_t)(h3) << 5)); + s[10] = (uint8_t) (h3 >> 3); + s[11] = (uint8_t) (h3 >> 11); + s[12] = (uint8_t)((h3 >> 19) | ((uint32_t)(h4) << 6)); + s[13] = (uint8_t) (h4 >> 2); + s[14] = (uint8_t) (h4 >> 10); + s[15] = (uint8_t) (h4 >> 18); + s[16] = (uint8_t) (h5 >> 0); + s[17] = (uint8_t) (h5 >> 8); + s[18] = (uint8_t) (h5 >> 16); + s[19] = (uint8_t)((h5 >> 24) | ((uint32_t)(h6) << 1)); + s[20] = (uint8_t) (h6 >> 7); + s[21] = (uint8_t) (h6 >> 15); + s[22] = (uint8_t)((h6 >> 23) | ((uint32_t)(h7) << 3)); + s[23] = (uint8_t) (h7 >> 5); + s[24] = (uint8_t) (h7 >> 13); + s[25] = (uint8_t)((h7 >> 21) | ((uint32_t)(h8) << 4)); + s[26] = (uint8_t) (h8 >> 4); + s[27] = (uint8_t) (h8 >> 12); + s[28] = (uint8_t)((h8 >> 20) | ((uint32_t)(h9) << 6)); + s[29] = (uint8_t) (h9 >> 2); + s[30] = (uint8_t) (h9 >> 10); + s[31] = (uint8_t) (h9 >> 18); } /* h = f */ -static void fe_copy(fe h, const fe f) { - memmove(h, f, sizeof(int32_t) * 10); +static void fe_copy(fe h, const fe f) +{ + memmove(h, f, sizeof(int32_t) * 10); } /* h = 0 */ -static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); } +static void fe_0(fe h) +{ + memset(h, 0, sizeof(int32_t) * 10); +} /* h = 1 */ -static void fe_1(fe h) { - memset(h, 0, sizeof(int32_t) * 10); - h[0] = 1; +static void fe_1(fe h) +{ + memset(h, 0, sizeof(int32_t) * 10); + h[0] = 1; } -/* h = f + g +/* + * h = f + g + * * Can overlap h with f or g. * * Preconditions: @@ -951,15 +969,20 @@ static void fe_1(fe h) { * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. * * Postconditions: - * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ -static void fe_add(fe h, const fe f, const fe g) { - unsigned i; - for (i = 0; i < 10; i++) { - h[i] = f[i] + g[i]; - } + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + */ +static void fe_add(fe h, const fe f, const fe g) +{ + unsigned i; + + for (i = 0; i < 10; i++) { + h[i] = f[i] + g[i]; + } } -/* h = f - g +/* + * h = f - g + * * Can overlap h with f or g. * * Preconditions: @@ -967,15 +990,20 @@ static void fe_add(fe h, const fe f, const fe g) { * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. * * Postconditions: - * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ -static void fe_sub(fe h, const fe f, const fe g) { - unsigned i; - for (i = 0; i < 10; i++) { - h[i] = f[i] - g[i]; - } + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + */ +static void fe_sub(fe h, const fe f, const fe g) +{ + unsigned i; + + for (i = 0; i < 10; i++) { + h[i] = f[i] - g[i]; + } } -/* h = f * g +/* + * h = f * g + * * Can overlap h with f or g. * * Preconditions: @@ -1001,224 +1029,228 @@ static void fe_sub(fe h, const fe f, const fe g) { * 10 of them are 2-way parallelizable and vectorizable. * Can get away with 11 carries, but then data flow is much deeper. * - * With tighter constraints on inputs can squeeze carries into int32. */ -static void fe_mul(fe h, const fe f, const fe g) { - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t g0 = g[0]; - int32_t g1 = g[1]; - int32_t g2 = g[2]; - int32_t g3 = g[3]; - int32_t g4 = g[4]; - int32_t g5 = g[5]; - int32_t g6 = g[6]; - int32_t g7 = g[7]; - int32_t g8 = g[8]; - int32_t g9 = g[9]; - int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ - int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ - int32_t g3_19 = 19 * g3; - int32_t g4_19 = 19 * g4; - int32_t g5_19 = 19 * g5; - int32_t g6_19 = 19 * g6; - int32_t g7_19 = 19 * g7; - int32_t g8_19 = 19 * g8; - int32_t g9_19 = 19 * g9; - int32_t f1_2 = 2 * f1; - int32_t f3_2 = 2 * f3; - int32_t f5_2 = 2 * f5; - int32_t f7_2 = 2 * f7; - int32_t f9_2 = 2 * f9; - int64_t f0g0 = f0 * (int64_t) g0; - int64_t f0g1 = f0 * (int64_t) g1; - int64_t f0g2 = f0 * (int64_t) g2; - int64_t f0g3 = f0 * (int64_t) g3; - int64_t f0g4 = f0 * (int64_t) g4; - int64_t f0g5 = f0 * (int64_t) g5; - int64_t f0g6 = f0 * (int64_t) g6; - int64_t f0g7 = f0 * (int64_t) g7; - int64_t f0g8 = f0 * (int64_t) g8; - int64_t f0g9 = f0 * (int64_t) g9; - int64_t f1g0 = f1 * (int64_t) g0; - int64_t f1g1_2 = f1_2 * (int64_t) g1; - int64_t f1g2 = f1 * (int64_t) g2; - int64_t f1g3_2 = f1_2 * (int64_t) g3; - int64_t f1g4 = f1 * (int64_t) g4; - int64_t f1g5_2 = f1_2 * (int64_t) g5; - int64_t f1g6 = f1 * (int64_t) g6; - int64_t f1g7_2 = f1_2 * (int64_t) g7; - int64_t f1g8 = f1 * (int64_t) g8; - int64_t f1g9_38 = f1_2 * (int64_t) g9_19; - int64_t f2g0 = f2 * (int64_t) g0; - int64_t f2g1 = f2 * (int64_t) g1; - int64_t f2g2 = f2 * (int64_t) g2; - int64_t f2g3 = f2 * (int64_t) g3; - int64_t f2g4 = f2 * (int64_t) g4; - int64_t f2g5 = f2 * (int64_t) g5; - int64_t f2g6 = f2 * (int64_t) g6; - int64_t f2g7 = f2 * (int64_t) g7; - int64_t f2g8_19 = f2 * (int64_t) g8_19; - int64_t f2g9_19 = f2 * (int64_t) g9_19; - int64_t f3g0 = f3 * (int64_t) g0; - int64_t f3g1_2 = f3_2 * (int64_t) g1; - int64_t f3g2 = f3 * (int64_t) g2; - int64_t f3g3_2 = f3_2 * (int64_t) g3; - int64_t f3g4 = f3 * (int64_t) g4; - int64_t f3g5_2 = f3_2 * (int64_t) g5; - int64_t f3g6 = f3 * (int64_t) g6; - int64_t f3g7_38 = f3_2 * (int64_t) g7_19; - int64_t f3g8_19 = f3 * (int64_t) g8_19; - int64_t f3g9_38 = f3_2 * (int64_t) g9_19; - int64_t f4g0 = f4 * (int64_t) g0; - int64_t f4g1 = f4 * (int64_t) g1; - int64_t f4g2 = f4 * (int64_t) g2; - int64_t f4g3 = f4 * (int64_t) g3; - int64_t f4g4 = f4 * (int64_t) g4; - int64_t f4g5 = f4 * (int64_t) g5; - int64_t f4g6_19 = f4 * (int64_t) g6_19; - int64_t f4g7_19 = f4 * (int64_t) g7_19; - int64_t f4g8_19 = f4 * (int64_t) g8_19; - int64_t f4g9_19 = f4 * (int64_t) g9_19; - int64_t f5g0 = f5 * (int64_t) g0; - int64_t f5g1_2 = f5_2 * (int64_t) g1; - int64_t f5g2 = f5 * (int64_t) g2; - int64_t f5g3_2 = f5_2 * (int64_t) g3; - int64_t f5g4 = f5 * (int64_t) g4; - int64_t f5g5_38 = f5_2 * (int64_t) g5_19; - int64_t f5g6_19 = f5 * (int64_t) g6_19; - int64_t f5g7_38 = f5_2 * (int64_t) g7_19; - int64_t f5g8_19 = f5 * (int64_t) g8_19; - int64_t f5g9_38 = f5_2 * (int64_t) g9_19; - int64_t f6g0 = f6 * (int64_t) g0; - int64_t f6g1 = f6 * (int64_t) g1; - int64_t f6g2 = f6 * (int64_t) g2; - int64_t f6g3 = f6 * (int64_t) g3; - int64_t f6g4_19 = f6 * (int64_t) g4_19; - int64_t f6g5_19 = f6 * (int64_t) g5_19; - int64_t f6g6_19 = f6 * (int64_t) g6_19; - int64_t f6g7_19 = f6 * (int64_t) g7_19; - int64_t f6g8_19 = f6 * (int64_t) g8_19; - int64_t f6g9_19 = f6 * (int64_t) g9_19; - int64_t f7g0 = f7 * (int64_t) g0; - int64_t f7g1_2 = f7_2 * (int64_t) g1; - int64_t f7g2 = f7 * (int64_t) g2; - int64_t f7g3_38 = f7_2 * (int64_t) g3_19; - int64_t f7g4_19 = f7 * (int64_t) g4_19; - int64_t f7g5_38 = f7_2 * (int64_t) g5_19; - int64_t f7g6_19 = f7 * (int64_t) g6_19; - int64_t f7g7_38 = f7_2 * (int64_t) g7_19; - int64_t f7g8_19 = f7 * (int64_t) g8_19; - int64_t f7g9_38 = f7_2 * (int64_t) g9_19; - int64_t f8g0 = f8 * (int64_t) g0; - int64_t f8g1 = f8 * (int64_t) g1; - int64_t f8g2_19 = f8 * (int64_t) g2_19; - int64_t f8g3_19 = f8 * (int64_t) g3_19; - int64_t f8g4_19 = f8 * (int64_t) g4_19; - int64_t f8g5_19 = f8 * (int64_t) g5_19; - int64_t f8g6_19 = f8 * (int64_t) g6_19; - int64_t f8g7_19 = f8 * (int64_t) g7_19; - int64_t f8g8_19 = f8 * (int64_t) g8_19; - int64_t f8g9_19 = f8 * (int64_t) g9_19; - int64_t f9g0 = f9 * (int64_t) g0; - int64_t f9g1_38 = f9_2 * (int64_t) g1_19; - int64_t f9g2_19 = f9 * (int64_t) g2_19; - int64_t f9g3_38 = f9_2 * (int64_t) g3_19; - int64_t f9g4_19 = f9 * (int64_t) g4_19; - int64_t f9g5_38 = f9_2 * (int64_t) g5_19; - int64_t f9g6_19 = f9 * (int64_t) g6_19; - int64_t f9g7_38 = f9_2 * (int64_t) g7_19; - int64_t f9g8_19 = f9 * (int64_t) g8_19; - int64_t f9g9_38 = f9_2 * (int64_t) g9_19; - int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; - int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; - int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; - int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; - int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; - int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; - int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; - int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; - int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; - int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) - * i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 - * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) - * i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */ - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - /* |h0| <= 2^25 */ - /* |h4| <= 2^25 */ - /* |h1| <= 1.71*2^59 */ - /* |h5| <= 1.71*2^59 */ - - carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; - carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; - /* |h1| <= 2^24; from now on fits into int32 */ - /* |h5| <= 2^24; from now on fits into int32 */ - /* |h2| <= 1.41*2^60 */ - /* |h6| <= 1.41*2^60 */ - - carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; - carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; - /* |h2| <= 2^25; from now on fits into int32 unchanged */ - /* |h6| <= 2^25; from now on fits into int32 unchanged */ - /* |h3| <= 1.71*2^59 */ - /* |h7| <= 1.71*2^59 */ - - carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; - carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; - /* |h3| <= 2^24; from now on fits into int32 unchanged */ - /* |h7| <= 2^24; from now on fits into int32 unchanged */ - /* |h4| <= 1.72*2^34 */ - /* |h8| <= 1.41*2^60 */ - - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; - /* |h4| <= 2^25; from now on fits into int32 unchanged */ - /* |h8| <= 2^25; from now on fits into int32 unchanged */ - /* |h5| <= 1.01*2^24 */ - /* |h9| <= 1.71*2^59 */ - - carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; - /* |h9| <= 2^24; from now on fits into int32 unchanged */ - /* |h0| <= 1.1*2^39 */ - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - /* |h0| <= 2^25; from now on fits into int32 unchanged */ - /* |h1| <= 1.01*2^24 */ - - h[0] = (int32_t)h0; - h[1] = (int32_t)h1; - h[2] = (int32_t)h2; - h[3] = (int32_t)h3; - h[4] = (int32_t)h4; - h[5] = (int32_t)h5; - h[6] = (int32_t)h6; - h[7] = (int32_t)h7; - h[8] = (int32_t)h8; - h[9] = (int32_t)h9; + * With tighter constraints on inputs can squeeze carries into int32. + */ +static void fe_mul(fe h, const fe f, const fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ + int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ + int32_t g3_19 = 19 * g3; + int32_t g4_19 = 19 * g4; + int32_t g5_19 = 19 * g5; + int32_t g6_19 = 19 * g6; + int32_t g7_19 = 19 * g7; + int32_t g8_19 = 19 * g8; + int32_t g9_19 = 19 * g9; + int32_t f1_2 = 2 * f1; + int32_t f3_2 = 2 * f3; + int32_t f5_2 = 2 * f5; + int32_t f7_2 = 2 * f7; + int32_t f9_2 = 2 * f9; + int64_t f0g0 = f0 * (int64_t) g0; + int64_t f0g1 = f0 * (int64_t) g1; + int64_t f0g2 = f0 * (int64_t) g2; + int64_t f0g3 = f0 * (int64_t) g3; + int64_t f0g4 = f0 * (int64_t) g4; + int64_t f0g5 = f0 * (int64_t) g5; + int64_t f0g6 = f0 * (int64_t) g6; + int64_t f0g7 = f0 * (int64_t) g7; + int64_t f0g8 = f0 * (int64_t) g8; + int64_t f0g9 = f0 * (int64_t) g9; + int64_t f1g0 = f1 * (int64_t) g0; + int64_t f1g1_2 = f1_2 * (int64_t) g1; + int64_t f1g2 = f1 * (int64_t) g2; + int64_t f1g3_2 = f1_2 * (int64_t) g3; + int64_t f1g4 = f1 * (int64_t) g4; + int64_t f1g5_2 = f1_2 * (int64_t) g5; + int64_t f1g6 = f1 * (int64_t) g6; + int64_t f1g7_2 = f1_2 * (int64_t) g7; + int64_t f1g8 = f1 * (int64_t) g8; + int64_t f1g9_38 = f1_2 * (int64_t) g9_19; + int64_t f2g0 = f2 * (int64_t) g0; + int64_t f2g1 = f2 * (int64_t) g1; + int64_t f2g2 = f2 * (int64_t) g2; + int64_t f2g3 = f2 * (int64_t) g3; + int64_t f2g4 = f2 * (int64_t) g4; + int64_t f2g5 = f2 * (int64_t) g5; + int64_t f2g6 = f2 * (int64_t) g6; + int64_t f2g7 = f2 * (int64_t) g7; + int64_t f2g8_19 = f2 * (int64_t) g8_19; + int64_t f2g9_19 = f2 * (int64_t) g9_19; + int64_t f3g0 = f3 * (int64_t) g0; + int64_t f3g1_2 = f3_2 * (int64_t) g1; + int64_t f3g2 = f3 * (int64_t) g2; + int64_t f3g3_2 = f3_2 * (int64_t) g3; + int64_t f3g4 = f3 * (int64_t) g4; + int64_t f3g5_2 = f3_2 * (int64_t) g5; + int64_t f3g6 = f3 * (int64_t) g6; + int64_t f3g7_38 = f3_2 * (int64_t) g7_19; + int64_t f3g8_19 = f3 * (int64_t) g8_19; + int64_t f3g9_38 = f3_2 * (int64_t) g9_19; + int64_t f4g0 = f4 * (int64_t) g0; + int64_t f4g1 = f4 * (int64_t) g1; + int64_t f4g2 = f4 * (int64_t) g2; + int64_t f4g3 = f4 * (int64_t) g3; + int64_t f4g4 = f4 * (int64_t) g4; + int64_t f4g5 = f4 * (int64_t) g5; + int64_t f4g6_19 = f4 * (int64_t) g6_19; + int64_t f4g7_19 = f4 * (int64_t) g7_19; + int64_t f4g8_19 = f4 * (int64_t) g8_19; + int64_t f4g9_19 = f4 * (int64_t) g9_19; + int64_t f5g0 = f5 * (int64_t) g0; + int64_t f5g1_2 = f5_2 * (int64_t) g1; + int64_t f5g2 = f5 * (int64_t) g2; + int64_t f5g3_2 = f5_2 * (int64_t) g3; + int64_t f5g4 = f5 * (int64_t) g4; + int64_t f5g5_38 = f5_2 * (int64_t) g5_19; + int64_t f5g6_19 = f5 * (int64_t) g6_19; + int64_t f5g7_38 = f5_2 * (int64_t) g7_19; + int64_t f5g8_19 = f5 * (int64_t) g8_19; + int64_t f5g9_38 = f5_2 * (int64_t) g9_19; + int64_t f6g0 = f6 * (int64_t) g0; + int64_t f6g1 = f6 * (int64_t) g1; + int64_t f6g2 = f6 * (int64_t) g2; + int64_t f6g3 = f6 * (int64_t) g3; + int64_t f6g4_19 = f6 * (int64_t) g4_19; + int64_t f6g5_19 = f6 * (int64_t) g5_19; + int64_t f6g6_19 = f6 * (int64_t) g6_19; + int64_t f6g7_19 = f6 * (int64_t) g7_19; + int64_t f6g8_19 = f6 * (int64_t) g8_19; + int64_t f6g9_19 = f6 * (int64_t) g9_19; + int64_t f7g0 = f7 * (int64_t) g0; + int64_t f7g1_2 = f7_2 * (int64_t) g1; + int64_t f7g2 = f7 * (int64_t) g2; + int64_t f7g3_38 = f7_2 * (int64_t) g3_19; + int64_t f7g4_19 = f7 * (int64_t) g4_19; + int64_t f7g5_38 = f7_2 * (int64_t) g5_19; + int64_t f7g6_19 = f7 * (int64_t) g6_19; + int64_t f7g7_38 = f7_2 * (int64_t) g7_19; + int64_t f7g8_19 = f7 * (int64_t) g8_19; + int64_t f7g9_38 = f7_2 * (int64_t) g9_19; + int64_t f8g0 = f8 * (int64_t) g0; + int64_t f8g1 = f8 * (int64_t) g1; + int64_t f8g2_19 = f8 * (int64_t) g2_19; + int64_t f8g3_19 = f8 * (int64_t) g3_19; + int64_t f8g4_19 = f8 * (int64_t) g4_19; + int64_t f8g5_19 = f8 * (int64_t) g5_19; + int64_t f8g6_19 = f8 * (int64_t) g6_19; + int64_t f8g7_19 = f8 * (int64_t) g7_19; + int64_t f8g8_19 = f8 * (int64_t) g8_19; + int64_t f8g9_19 = f8 * (int64_t) g9_19; + int64_t f9g0 = f9 * (int64_t) g0; + int64_t f9g1_38 = f9_2 * (int64_t) g1_19; + int64_t f9g2_19 = f9 * (int64_t) g2_19; + int64_t f9g3_38 = f9_2 * (int64_t) g3_19; + int64_t f9g4_19 = f9 * (int64_t) g4_19; + int64_t f9g5_38 = f9_2 * (int64_t) g5_19; + int64_t f9g6_19 = f9 * (int64_t) g6_19; + int64_t f9g7_38 = f9_2 * (int64_t) g7_19; + int64_t f9g8_19 = f9 * (int64_t) g8_19; + int64_t f9g9_38 = f9_2 * (int64_t) g9_19; + int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; + int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19; + int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38; + int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19; + int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38; + int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19; + int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38; + int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19; + int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38; + int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) + * i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 + * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) + * i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */ + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + /* |h0| <= 2^25 */ + /* |h4| <= 2^25 */ + /* |h1| <= 1.71*2^59 */ + /* |h5| <= 1.71*2^59 */ + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + /* |h1| <= 2^24; from now on fits into int32 */ + /* |h5| <= 2^24; from now on fits into int32 */ + /* |h2| <= 1.41*2^60 */ + /* |h6| <= 1.41*2^60 */ + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + /* |h2| <= 2^25; from now on fits into int32 unchanged */ + /* |h6| <= 2^25; from now on fits into int32 unchanged */ + /* |h3| <= 1.71*2^59 */ + /* |h7| <= 1.71*2^59 */ + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + /* |h3| <= 2^24; from now on fits into int32 unchanged */ + /* |h7| <= 2^24; from now on fits into int32 unchanged */ + /* |h4| <= 1.72*2^34 */ + /* |h8| <= 1.41*2^60 */ + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + /* |h4| <= 2^25; from now on fits into int32 unchanged */ + /* |h8| <= 2^25; from now on fits into int32 unchanged */ + /* |h5| <= 1.01*2^24 */ + /* |h9| <= 1.71*2^59 */ + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + /* |h9| <= 2^24; from now on fits into int32 unchanged */ + /* |h0| <= 1.1*2^39 */ + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + /* |h0| <= 2^25; from now on fits into int32 unchanged */ + /* |h1| <= 1.01*2^24 */ + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } -/* h = f * f +/* + * h = f * f + * * Can overlap h with f. * * Preconditions: @@ -1227,286 +1259,307 @@ static void fe_mul(fe h, const fe f, const fe g) { * Postconditions: * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. * - * See fe_mul.c for discussion of implementation strategy. */ -static void fe_sq(fe h, const fe f) { - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t f0_2 = 2 * f0; - int32_t f1_2 = 2 * f1; - int32_t f2_2 = 2 * f2; - int32_t f3_2 = 2 * f3; - int32_t f4_2 = 2 * f4; - int32_t f5_2 = 2 * f5; - int32_t f6_2 = 2 * f6; - int32_t f7_2 = 2 * f7; - int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ - int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ - int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ - int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ - int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ - int64_t f0f0 = f0 * (int64_t) f0; - int64_t f0f1_2 = f0_2 * (int64_t) f1; - int64_t f0f2_2 = f0_2 * (int64_t) f2; - int64_t f0f3_2 = f0_2 * (int64_t) f3; - int64_t f0f4_2 = f0_2 * (int64_t) f4; - int64_t f0f5_2 = f0_2 * (int64_t) f5; - int64_t f0f6_2 = f0_2 * (int64_t) f6; - int64_t f0f7_2 = f0_2 * (int64_t) f7; - int64_t f0f8_2 = f0_2 * (int64_t) f8; - int64_t f0f9_2 = f0_2 * (int64_t) f9; - int64_t f1f1_2 = f1_2 * (int64_t) f1; - int64_t f1f2_2 = f1_2 * (int64_t) f2; - int64_t f1f3_4 = f1_2 * (int64_t) f3_2; - int64_t f1f4_2 = f1_2 * (int64_t) f4; - int64_t f1f5_4 = f1_2 * (int64_t) f5_2; - int64_t f1f6_2 = f1_2 * (int64_t) f6; - int64_t f1f7_4 = f1_2 * (int64_t) f7_2; - int64_t f1f8_2 = f1_2 * (int64_t) f8; - int64_t f1f9_76 = f1_2 * (int64_t) f9_38; - int64_t f2f2 = f2 * (int64_t) f2; - int64_t f2f3_2 = f2_2 * (int64_t) f3; - int64_t f2f4_2 = f2_2 * (int64_t) f4; - int64_t f2f5_2 = f2_2 * (int64_t) f5; - int64_t f2f6_2 = f2_2 * (int64_t) f6; - int64_t f2f7_2 = f2_2 * (int64_t) f7; - int64_t f2f8_38 = f2_2 * (int64_t) f8_19; - int64_t f2f9_38 = f2 * (int64_t) f9_38; - int64_t f3f3_2 = f3_2 * (int64_t) f3; - int64_t f3f4_2 = f3_2 * (int64_t) f4; - int64_t f3f5_4 = f3_2 * (int64_t) f5_2; - int64_t f3f6_2 = f3_2 * (int64_t) f6; - int64_t f3f7_76 = f3_2 * (int64_t) f7_38; - int64_t f3f8_38 = f3_2 * (int64_t) f8_19; - int64_t f3f9_76 = f3_2 * (int64_t) f9_38; - int64_t f4f4 = f4 * (int64_t) f4; - int64_t f4f5_2 = f4_2 * (int64_t) f5; - int64_t f4f6_38 = f4_2 * (int64_t) f6_19; - int64_t f4f7_38 = f4 * (int64_t) f7_38; - int64_t f4f8_38 = f4_2 * (int64_t) f8_19; - int64_t f4f9_38 = f4 * (int64_t) f9_38; - int64_t f5f5_38 = f5 * (int64_t) f5_38; - int64_t f5f6_38 = f5_2 * (int64_t) f6_19; - int64_t f5f7_76 = f5_2 * (int64_t) f7_38; - int64_t f5f8_38 = f5_2 * (int64_t) f8_19; - int64_t f5f9_76 = f5_2 * (int64_t) f9_38; - int64_t f6f6_19 = f6 * (int64_t) f6_19; - int64_t f6f7_38 = f6 * (int64_t) f7_38; - int64_t f6f8_38 = f6_2 * (int64_t) f8_19; - int64_t f6f9_38 = f6 * (int64_t) f9_38; - int64_t f7f7_38 = f7 * (int64_t) f7_38; - int64_t f7f8_38 = f7_2 * (int64_t) f8_19; - int64_t f7f9_76 = f7_2 * (int64_t) f9_38; - int64_t f8f8_19 = f8 * (int64_t) f8_19; - int64_t f8f9_38 = f8 * (int64_t) f9_38; - int64_t f9f9_38 = f9 * (int64_t) f9_38; - int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; - int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; - int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; - int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; - int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; - int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; - int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; - int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; - int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; - int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - - carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; - carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; - - carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; - carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; - - carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; - carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; - - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; - - carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - - h[0] = (int32_t)h0; - h[1] = (int32_t)h1; - h[2] = (int32_t)h2; - h[3] = (int32_t)h3; - h[4] = (int32_t)h4; - h[5] = (int32_t)h5; - h[6] = (int32_t)h6; - h[7] = (int32_t)h7; - h[8] = (int32_t)h8; - h[9] = (int32_t)h9; + * See fe_mul.c for discussion of implementation strategy. + */ +static void fe_sq(fe h, const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; + int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; + int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; + int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; + int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; + int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; + int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; + int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; + int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; + int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } -static void fe_invert(fe out, const fe z) { - fe t0; - fe t1; - fe t2; - fe t3; - int i; +static void fe_invert(fe out, const fe z) +{ + fe t0; + fe t1; + fe t2; + fe t3; + int i; - /* - * Compute z ** -1 = z ** (2 ** 255 - 19 - 2) with the exponent as - * 2 ** 255 - 21 = (2 ** 5) * (2 ** 250 - 1) + 11. - */ + /* + * Compute z ** -1 = z ** (2 ** 255 - 19 - 2) with the exponent as + * 2 ** 255 - 21 = (2 ** 5) * (2 ** 250 - 1) + 11. + */ - /* t0 = z ** 2 */ - fe_sq(t0, z); + /* t0 = z ** 2 */ + fe_sq(t0, z); - /* t1 = t0 ** (2 ** 2) = z ** 8 */ - fe_sq(t1, t0); - fe_sq(t1, t1); + /* t1 = t0 ** (2 ** 2) = z ** 8 */ + fe_sq(t1, t0); + fe_sq(t1, t1); - /* t1 = z * t1 = z ** 9 */ - fe_mul(t1, z, t1); - /* t0 = t0 * t1 = z ** 11 -- stash t0 away for the end. */ - fe_mul(t0, t0, t1); + /* t1 = z * t1 = z ** 9 */ + fe_mul(t1, z, t1); + /* t0 = t0 * t1 = z ** 11 -- stash t0 away for the end. */ + fe_mul(t0, t0, t1); - /* t2 = t0 ** 2 = z ** 22 */ - fe_sq(t2, t0); + /* t2 = t0 ** 2 = z ** 22 */ + fe_sq(t2, t0); - /* t1 = t1 * t2 = z ** (2 ** 5 - 1) */ - fe_mul(t1, t1, t2); + /* t1 = t1 * t2 = z ** (2 ** 5 - 1) */ + fe_mul(t1, t1, t2); - /* t2 = t1 ** (2 ** 5) = z ** ((2 ** 5) * (2 ** 5 - 1)) */ - fe_sq(t2, t1); - for (i = 1; i < 5; ++i) { - fe_sq(t2, t2); - } + /* t2 = t1 ** (2 ** 5) = z ** ((2 ** 5) * (2 ** 5 - 1)) */ + fe_sq(t2, t1); + for (i = 1; i < 5; ++i) { + fe_sq(t2, t2); + } - /* t1 = t1 * t2 = z ** ((2 ** 5 + 1) * (2 ** 5 - 1)) = z ** (2 ** 10 - 1) */ - fe_mul(t1, t2, t1); + /* t1 = t1 * t2 = z ** ((2 ** 5 + 1) * (2 ** 5 - 1)) = z ** (2 ** 10 - 1) */ + fe_mul(t1, t2, t1); - /* Continuing similarly... */ + /* Continuing similarly... */ - /* t2 = z ** (2 ** 20 - 1) */ - fe_sq(t2, t1); - for (i = 1; i < 10; ++i) { - fe_sq(t2, t2); - } - fe_mul(t2, t2, t1); - - /* t2 = z ** (2 ** 40 - 1) */ - fe_sq(t3, t2); - for (i = 1; i < 20; ++i) { - fe_sq(t3, t3); - } - fe_mul(t2, t3, t2); - - /* t2 = z ** (2 ** 10) * (2 ** 40 - 1) */ - for (i = 0; i < 10; ++i) { - fe_sq(t2, t2); - } - /* t1 = z ** (2 ** 50 - 1) */ - fe_mul(t1, t2, t1); + /* t2 = z ** (2 ** 20 - 1) */ + fe_sq(t2, t1); + for (i = 1; i < 10; ++i) { + fe_sq(t2, t2); + } + fe_mul(t2, t2, t1); - /* t2 = z ** (2 ** 100 - 1) */ - fe_sq(t2, t1); - for (i = 1; i < 50; ++i) { - fe_sq(t2, t2); - } - fe_mul(t2, t2, t1); - - /* t2 = z ** (2 ** 200 - 1) */ - fe_sq(t3, t2); - for (i = 1; i < 100; ++i) { - fe_sq(t3, t3); - } - fe_mul(t2, t3, t2); - - /* t2 = z ** ((2 ** 50) * (2 ** 200 - 1) */ - fe_sq(t2, t2); - for (i = 1; i < 50; ++i) { + /* t2 = z ** (2 ** 40 - 1) */ + fe_sq(t3, t2); + for (i = 1; i < 20; ++i) { + fe_sq(t3, t3); + } + fe_mul(t2, t3, t2); + + /* t2 = z ** (2 ** 10) * (2 ** 40 - 1) */ + for (i = 0; i < 10; ++i) { + fe_sq(t2, t2); + } + /* t1 = z ** (2 ** 50 - 1) */ + fe_mul(t1, t2, t1); + + /* t2 = z ** (2 ** 100 - 1) */ + fe_sq(t2, t1); + for (i = 1; i < 50; ++i) { + fe_sq(t2, t2); + } + fe_mul(t2, t2, t1); + + /* t2 = z ** (2 ** 200 - 1) */ + fe_sq(t3, t2); + for (i = 1; i < 100; ++i) { + fe_sq(t3, t3); + } + fe_mul(t2, t3, t2); + + /* t2 = z ** ((2 ** 50) * (2 ** 200 - 1) */ fe_sq(t2, t2); - } + for (i = 1; i < 50; ++i) { + fe_sq(t2, t2); + } - /* t1 = z ** (2 ** 250 - 1) */ - fe_mul(t1, t2, t1); + /* t1 = z ** (2 ** 250 - 1) */ + fe_mul(t1, t2, t1); - /* t1 = z ** ((2 ** 5) * (2 ** 250 - 1)) */ - fe_sq(t1, t1); - for (i = 1; i < 5; ++i) { + /* t1 = z ** ((2 ** 5) * (2 ** 250 - 1)) */ fe_sq(t1, t1); - } + for (i = 1; i < 5; ++i) { + fe_sq(t1, t1); + } - /* Recall t0 = z ** 11; out = z ** (2 ** 255 - 21) */ - fe_mul(out, t1, t0); + /* Recall t0 = z ** 11; out = z ** (2 ** 255 - 21) */ + fe_mul(out, t1, t0); } -/* h = -f +/* + * h = -f * * Preconditions: * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. * * Postconditions: - * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */ -static void fe_neg(fe h, const fe f) { - unsigned i; - for (i = 0; i < 10; i++) { - h[i] = -f[i]; - } + * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + */ +static void fe_neg(fe h, const fe f) +{ + unsigned i; + + for (i = 0; i < 10; i++) { + h[i] = -f[i]; + } } -/* Replace (f,g) with (g,g) if b == 1; +/* + * Replace (f,g) with (g,g) if b == 1; * replace (f,g) with (f,g) if b == 0. * - * Preconditions: b in {0,1}. */ -static void fe_cmov(fe f, const fe g, unsigned b) { - size_t i; - b = 0-b; - for (i = 0; i < 10; i++) { - int32_t x = f[i] ^ g[i]; - x &= b; - f[i] ^= x; - } + * Preconditions: b in {0,1}. + */ +static void fe_cmov(fe f, const fe g, unsigned b) +{ + size_t i; + + b = 0-b; + for (i = 0; i < 10; i++) { + int32_t x = f[i] ^ g[i]; + x &= b; + f[i] ^= x; + } } -/* return 0 if f == 0 +/* + * return 0 if f == 0 * return 1 if f != 0 * * Preconditions: - * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ -static int fe_isnonzero(const fe f) { - uint8_t s[32]; - static const uint8_t zero[32] = {0}; - fe_tobytes(s, f); + * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + */ +static int fe_isnonzero(const fe f) +{ + uint8_t s[32]; + static const uint8_t zero[32] = {0}; - return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0; + fe_tobytes(s, f); + + return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0; } -/* return 1 if f is in {1,3,5,...,q-2} +/* + * return 1 if f is in {1,3,5,...,q-2} * return 0 if f is in {0,2,4,...,q-1} * * Preconditions: - * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ -static int fe_isnegative(const fe f) { - uint8_t s[32]; - fe_tobytes(s, f); - return s[0] & 1; + * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + */ +static int fe_isnegative(const fe f) +{ + uint8_t s[32]; + + fe_tobytes(s, f); + return s[0] & 1; } -/* h = 2 * f * f +/* + * h = 2 * f * f + * * Can overlap h with f. * * Preconditions: @@ -1515,208 +1568,212 @@ static int fe_isnegative(const fe f) { * Postconditions: * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. * - * See fe_mul.c for discussion of implementation strategy. */ -static void fe_sq2(fe h, const fe f) { - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int32_t f0_2 = 2 * f0; - int32_t f1_2 = 2 * f1; - int32_t f2_2 = 2 * f2; - int32_t f3_2 = 2 * f3; - int32_t f4_2 = 2 * f4; - int32_t f5_2 = 2 * f5; - int32_t f6_2 = 2 * f6; - int32_t f7_2 = 2 * f7; - int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ - int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ - int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ - int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ - int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ - int64_t f0f0 = f0 * (int64_t) f0; - int64_t f0f1_2 = f0_2 * (int64_t) f1; - int64_t f0f2_2 = f0_2 * (int64_t) f2; - int64_t f0f3_2 = f0_2 * (int64_t) f3; - int64_t f0f4_2 = f0_2 * (int64_t) f4; - int64_t f0f5_2 = f0_2 * (int64_t) f5; - int64_t f0f6_2 = f0_2 * (int64_t) f6; - int64_t f0f7_2 = f0_2 * (int64_t) f7; - int64_t f0f8_2 = f0_2 * (int64_t) f8; - int64_t f0f9_2 = f0_2 * (int64_t) f9; - int64_t f1f1_2 = f1_2 * (int64_t) f1; - int64_t f1f2_2 = f1_2 * (int64_t) f2; - int64_t f1f3_4 = f1_2 * (int64_t) f3_2; - int64_t f1f4_2 = f1_2 * (int64_t) f4; - int64_t f1f5_4 = f1_2 * (int64_t) f5_2; - int64_t f1f6_2 = f1_2 * (int64_t) f6; - int64_t f1f7_4 = f1_2 * (int64_t) f7_2; - int64_t f1f8_2 = f1_2 * (int64_t) f8; - int64_t f1f9_76 = f1_2 * (int64_t) f9_38; - int64_t f2f2 = f2 * (int64_t) f2; - int64_t f2f3_2 = f2_2 * (int64_t) f3; - int64_t f2f4_2 = f2_2 * (int64_t) f4; - int64_t f2f5_2 = f2_2 * (int64_t) f5; - int64_t f2f6_2 = f2_2 * (int64_t) f6; - int64_t f2f7_2 = f2_2 * (int64_t) f7; - int64_t f2f8_38 = f2_2 * (int64_t) f8_19; - int64_t f2f9_38 = f2 * (int64_t) f9_38; - int64_t f3f3_2 = f3_2 * (int64_t) f3; - int64_t f3f4_2 = f3_2 * (int64_t) f4; - int64_t f3f5_4 = f3_2 * (int64_t) f5_2; - int64_t f3f6_2 = f3_2 * (int64_t) f6; - int64_t f3f7_76 = f3_2 * (int64_t) f7_38; - int64_t f3f8_38 = f3_2 * (int64_t) f8_19; - int64_t f3f9_76 = f3_2 * (int64_t) f9_38; - int64_t f4f4 = f4 * (int64_t) f4; - int64_t f4f5_2 = f4_2 * (int64_t) f5; - int64_t f4f6_38 = f4_2 * (int64_t) f6_19; - int64_t f4f7_38 = f4 * (int64_t) f7_38; - int64_t f4f8_38 = f4_2 * (int64_t) f8_19; - int64_t f4f9_38 = f4 * (int64_t) f9_38; - int64_t f5f5_38 = f5 * (int64_t) f5_38; - int64_t f5f6_38 = f5_2 * (int64_t) f6_19; - int64_t f5f7_76 = f5_2 * (int64_t) f7_38; - int64_t f5f8_38 = f5_2 * (int64_t) f8_19; - int64_t f5f9_76 = f5_2 * (int64_t) f9_38; - int64_t f6f6_19 = f6 * (int64_t) f6_19; - int64_t f6f7_38 = f6 * (int64_t) f7_38; - int64_t f6f8_38 = f6_2 * (int64_t) f8_19; - int64_t f6f9_38 = f6 * (int64_t) f9_38; - int64_t f7f7_38 = f7 * (int64_t) f7_38; - int64_t f7f8_38 = f7_2 * (int64_t) f8_19; - int64_t f7f9_76 = f7_2 * (int64_t) f9_38; - int64_t f8f8_19 = f8 * (int64_t) f8_19; - int64_t f8f9_38 = f8 * (int64_t) f9_38; - int64_t f9f9_38 = f9 * (int64_t) f9_38; - int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; - int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; - int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; - int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; - int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; - int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; - int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; - int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; - int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; - int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - h0 += h0; - h1 += h1; - h2 += h2; - h3 += h3; - h4 += h4; - h5 += h5; - h6 += h6; - h7 += h7; - h8 += h8; - h9 += h9; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - - carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; - carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; - - carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; - carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; - - carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; - carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; - - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; - - carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - - h[0] = (int32_t)h0; - h[1] = (int32_t)h1; - h[2] = (int32_t)h2; - h[3] = (int32_t)h3; - h[4] = (int32_t)h4; - h[5] = (int32_t)h5; - h[6] = (int32_t)h6; - h[7] = (int32_t)h7; - h[8] = (int32_t)h8; - h[9] = (int32_t)h9; + * See fe_mul.c for discussion of implementation strategy. + */ +static void fe_sq2(fe h, const fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; + int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; + int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; + int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; + int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; + int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; + int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; + int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; + int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; + int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + h0 += h0; + h1 += h1; + h2 += h2; + h3 += h3; + h4 += h4; + h5 += h5; + h6 += h6; + h7 += h7; + h8 += h8; + h9 += h9; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } -static void fe_pow22523(fe out, const fe z) { - fe t0; - fe t1; - fe t2; - int i; +static void fe_pow22523(fe out, const fe z) +{ + fe t0; + fe t1; + fe t2; + int i; - fe_sq(t0, z); - fe_sq(t1, t0); - for (i = 1; i < 2; ++i) { - fe_sq(t1, t1); - } - fe_mul(t1, z, t1); - fe_mul(t0, t0, t1); - fe_sq(t0, t0); - fe_mul(t0, t1, t0); - fe_sq(t1, t0); - for (i = 1; i < 5; ++i) { - fe_sq(t1, t1); - } - fe_mul(t0, t1, t0); - fe_sq(t1, t0); - for (i = 1; i < 10; ++i) { - fe_sq(t1, t1); - } - fe_mul(t1, t1, t0); - fe_sq(t2, t1); - for (i = 1; i < 20; ++i) { - fe_sq(t2, t2); - } - fe_mul(t1, t2, t1); - fe_sq(t1, t1); - for (i = 1; i < 10; ++i) { - fe_sq(t1, t1); - } - fe_mul(t0, t1, t0); - fe_sq(t1, t0); - for (i = 1; i < 50; ++i) { + fe_sq(t0, z); + fe_sq(t1, t0); + for (i = 1; i < 2; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, z, t1); + fe_mul(t0, t0, t1); + fe_sq(t0, t0); + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 5; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 10; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, t1, t0); + fe_sq(t2, t1); + for (i = 1; i < 20; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); fe_sq(t1, t1); - } - fe_mul(t1, t1, t0); - fe_sq(t2, t1); - for (i = 1; i < 100; ++i) { - fe_sq(t2, t2); - } - fe_mul(t1, t2, t1); - fe_sq(t1, t1); - for (i = 1; i < 50; ++i) { + for (i = 1; i < 10; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 50; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, t1, t0); + fe_sq(t2, t1); + for (i = 1; i < 100; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); fe_sq(t1, t1); - } - fe_mul(t0, t1, t0); - fe_sq(t0, t0); - for (i = 1; i < 2; ++i) { + for (i = 1; i < 50; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); fe_sq(t0, t0); - } - fe_mul(out, t0, z); + for (i = 1; i < 2; ++i) { + fe_sq(t0, t0); + } + fe_mul(out, t0, z); } -/* ge means group element. - +/* + * ge means group element. + * * Here the group is the set of pairs (x,y) of field elements (see fe.h) * satisfying -x^2 + y^2 = 1 + d x^2y^2 * where d = -121665/121666. @@ -1725,268 +1782,292 @@ static void fe_pow22523(fe out, const fe z) { * ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z * ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT * ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T - * ge_precomp (Duif): (y+x,y-x,2dxy) */ - + * ge_precomp (Duif): (y+x,y-x,2dxy) + */ typedef struct { - fe X; - fe Y; - fe Z; + fe X; + fe Y; + fe Z; } ge_p2; typedef struct { - fe X; - fe Y; - fe Z; - fe T; + fe X; + fe Y; + fe Z; + fe T; } ge_p3; typedef struct { - fe X; - fe Y; - fe Z; - fe T; + fe X; + fe Y; + fe Z; + fe T; } ge_p1p1; typedef struct { - fe yplusx; - fe yminusx; - fe xy2d; + fe yplusx; + fe yminusx; + fe xy2d; } ge_precomp; typedef struct { - fe YplusX; - fe YminusX; - fe Z; - fe T2d; + fe YplusX; + fe YminusX; + fe Z; + fe T2d; } ge_cached; -static void ge_tobytes(uint8_t *s, const ge_p2 *h) { - fe recip; - fe x; - fe y; +static void ge_tobytes(uint8_t *s, const ge_p2 *h) +{ + fe recip; + fe x; + fe y; + + fe_invert(recip, h->Z); + fe_mul(x, h->X, recip); + fe_mul(y, h->Y, recip); + fe_tobytes(s, y); + s[31] ^= fe_isnegative(x) << 7; +} - fe_invert(recip, h->Z); - fe_mul(x, h->X, recip); - fe_mul(y, h->Y, recip); - fe_tobytes(s, y); - s[31] ^= fe_isnegative(x) << 7; +static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) +{ + fe recip; + fe x; + fe y; + + fe_invert(recip, h->Z); + fe_mul(x, h->X, recip); + fe_mul(y, h->Y, recip); + fe_tobytes(s, y); + s[31] ^= fe_isnegative(x) << 7; } -static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) { - fe recip; - fe x; - fe y; +static const fe d = { + -10913610, 13857413, -15372611, 6949391, 114729, + -8787816, -6275908, -3247719, -18696448, -12055116 +}; - fe_invert(recip, h->Z); - fe_mul(x, h->X, recip); - fe_mul(y, h->Y, recip); - fe_tobytes(s, y); - s[31] ^= fe_isnegative(x) << 7; -} +static const fe sqrtm1 = { + -32595792, -7943725, 9377950, 3500415, 12389472, + -272473, -25146209, -2005654, 326686, 11406482 +}; -static const fe d = {-10913610, 13857413, -15372611, 6949391, 114729, - -8787816, -6275908, -3247719, -18696448, -12055116}; - -static const fe sqrtm1 = {-32595792, -7943725, 9377950, 3500415, 12389472, - -272473, -25146209, -2005654, 326686, 11406482}; - -static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) { - fe u; - fe v; - fe v3; - fe vxx; - fe check; - - fe_frombytes(h->Y, s); - fe_1(h->Z); - fe_sq(u, h->Y); - fe_mul(v, u, d); - fe_sub(u, u, h->Z); /* u = y^2-1 */ - fe_add(v, v, h->Z); /* v = dy^2+1 */ - - fe_sq(v3, v); - fe_mul(v3, v3, v); /* v3 = v^3 */ - fe_sq(h->X, v3); - fe_mul(h->X, h->X, v); - fe_mul(h->X, h->X, u); /* x = uv^7 */ - - fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ - fe_mul(h->X, h->X, v3); - fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ - - fe_sq(vxx, h->X); - fe_mul(vxx, vxx, v); - fe_sub(check, vxx, u); /* vx^2-u */ - if (fe_isnonzero(check)) { - fe_add(check, vxx, u); /* vx^2+u */ +static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) +{ + fe u; + fe v; + fe v3; + fe vxx; + fe check; + + fe_frombytes(h->Y, s); + fe_1(h->Z); + fe_sq(u, h->Y); + fe_mul(v, u, d); + fe_sub(u, u, h->Z); /* u = y^2-1 */ + fe_add(v, v, h->Z); /* v = dy^2+1 */ + + fe_sq(v3, v); + fe_mul(v3, v3, v); /* v3 = v^3 */ + fe_sq(h->X, v3); + fe_mul(h->X, h->X, v); + fe_mul(h->X, h->X, u); /* x = uv^7 */ + + fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ + fe_mul(h->X, h->X, v3); + fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ + + fe_sq(vxx, h->X); + fe_mul(vxx, vxx, v); + fe_sub(check, vxx, u); /* vx^2-u */ if (fe_isnonzero(check)) { - return -1; + fe_add(check, vxx, u); /* vx^2+u */ + if (fe_isnonzero(check)) { + return -1; + } + fe_mul(h->X, h->X, sqrtm1); } - fe_mul(h->X, h->X, sqrtm1); - } - if (fe_isnegative(h->X) != (s[31] >> 7)) { - fe_neg(h->X, h->X); - } + if (fe_isnegative(h->X) != (s[31] >> 7)) { + fe_neg(h->X, h->X); + } - fe_mul(h->T, h->X, h->Y); - return 0; + fe_mul(h->T, h->X, h->Y); + return 0; } -static void ge_p2_0(ge_p2 *h) { - fe_0(h->X); - fe_1(h->Y); - fe_1(h->Z); +static void ge_p2_0(ge_p2 *h) +{ + fe_0(h->X); + fe_1(h->Y); + fe_1(h->Z); } -static void ge_p3_0(ge_p3 *h) { - fe_0(h->X); - fe_1(h->Y); - fe_1(h->Z); - fe_0(h->T); +static void ge_p3_0(ge_p3 *h) +{ + fe_0(h->X); + fe_1(h->Y); + fe_1(h->Z); + fe_0(h->T); } -static void ge_precomp_0(ge_precomp *h) { - fe_1(h->yplusx); - fe_1(h->yminusx); - fe_0(h->xy2d); +static void ge_precomp_0(ge_precomp *h) +{ + fe_1(h->yplusx); + fe_1(h->yminusx); + fe_0(h->xy2d); } /* r = p */ -static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { - fe_copy(r->X, p->X); - fe_copy(r->Y, p->Y); - fe_copy(r->Z, p->Z); +static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) +{ + fe_copy(r->X, p->X); + fe_copy(r->Y, p->Y); + fe_copy(r->Z, p->Z); } -static const fe d2 = {-21827239, -5839606, -30745221, 13898782, 229458, - 15978800, -12551817, -6495438, 29715968, 9444199}; +static const fe d2 = { + -21827239, -5839606, -30745221, 13898782, 229458, + 15978800, -12551817, -6495438, 29715968, 9444199 +}; /* r = p */ -static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { - fe_add(r->YplusX, p->Y, p->X); - fe_sub(r->YminusX, p->Y, p->X); - fe_copy(r->Z, p->Z); - fe_mul(r->T2d, p->T, d2); +static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) +{ + fe_add(r->YplusX, p->Y, p->X); + fe_sub(r->YminusX, p->Y, p->X); + fe_copy(r->Z, p->Z); + fe_mul(r->T2d, p->T, d2); } /* r = p */ -static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { - fe_mul(r->X, p->X, p->T); - fe_mul(r->Y, p->Y, p->Z); - fe_mul(r->Z, p->Z, p->T); +static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) +{ + fe_mul(r->X, p->X, p->T); + fe_mul(r->Y, p->Y, p->Z); + fe_mul(r->Z, p->Z, p->T); } /* r = p */ -static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { - fe_mul(r->X, p->X, p->T); - fe_mul(r->Y, p->Y, p->Z); - fe_mul(r->Z, p->Z, p->T); - fe_mul(r->T, p->X, p->Y); +static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) +{ + fe_mul(r->X, p->X, p->T); + fe_mul(r->Y, p->Y, p->Z); + fe_mul(r->Z, p->Z, p->T); + fe_mul(r->T, p->X, p->Y); } /* r = 2 * p */ -static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { - fe t0; - - fe_sq(r->X, p->X); - fe_sq(r->Z, p->Y); - fe_sq2(r->T, p->Z); - fe_add(r->Y, p->X, p->Y); - fe_sq(t0, r->Y); - fe_add(r->Y, r->Z, r->X); - fe_sub(r->Z, r->Z, r->X); - fe_sub(r->X, t0, r->Y); - fe_sub(r->T, r->T, r->Z); +static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) +{ + fe t0; + + fe_sq(r->X, p->X); + fe_sq(r->Z, p->Y); + fe_sq2(r->T, p->Z); + fe_add(r->Y, p->X, p->Y); + fe_sq(t0, r->Y); + fe_add(r->Y, r->Z, r->X); + fe_sub(r->Z, r->Z, r->X); + fe_sub(r->X, t0, r->Y); + fe_sub(r->T, r->T, r->Z); } /* r = 2 * p */ -static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { - ge_p2 q; - ge_p3_to_p2(&q, p); - ge_p2_dbl(r, &q); +static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) +{ + ge_p2 q; + ge_p3_to_p2(&q, p); + ge_p2_dbl(r, &q); } /* r = p + q */ -static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { - fe t0; - - fe_add(r->X, p->Y, p->X); - fe_sub(r->Y, p->Y, p->X); - fe_mul(r->Z, r->X, q->yplusx); - fe_mul(r->Y, r->Y, q->yminusx); - fe_mul(r->T, q->xy2d, p->T); - fe_add(t0, p->Z, p->Z); - fe_sub(r->X, r->Z, r->Y); - fe_add(r->Y, r->Z, r->Y); - fe_add(r->Z, t0, r->T); - fe_sub(r->T, t0, r->T); +static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) +{ + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->yplusx); + fe_mul(r->Y, r->Y, q->yminusx); + fe_mul(r->T, q->xy2d, p->T); + fe_add(t0, p->Z, p->Z); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_add(r->Z, t0, r->T); + fe_sub(r->T, t0, r->T); } /* r = p - q */ -static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { - fe t0; - - fe_add(r->X, p->Y, p->X); - fe_sub(r->Y, p->Y, p->X); - fe_mul(r->Z, r->X, q->yminusx); - fe_mul(r->Y, r->Y, q->yplusx); - fe_mul(r->T, q->xy2d, p->T); - fe_add(t0, p->Z, p->Z); - fe_sub(r->X, r->Z, r->Y); - fe_add(r->Y, r->Z, r->Y); - fe_sub(r->Z, t0, r->T); - fe_add(r->T, t0, r->T); +static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) +{ + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->yminusx); + fe_mul(r->Y, r->Y, q->yplusx); + fe_mul(r->T, q->xy2d, p->T); + fe_add(t0, p->Z, p->Z); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_sub(r->Z, t0, r->T); + fe_add(r->T, t0, r->T); } /* r = p + q */ -static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { - fe t0; - - fe_add(r->X, p->Y, p->X); - fe_sub(r->Y, p->Y, p->X); - fe_mul(r->Z, r->X, q->YplusX); - fe_mul(r->Y, r->Y, q->YminusX); - fe_mul(r->T, q->T2d, p->T); - fe_mul(r->X, p->Z, q->Z); - fe_add(t0, r->X, r->X); - fe_sub(r->X, r->Z, r->Y); - fe_add(r->Y, r->Z, r->Y); - fe_add(r->Z, t0, r->T); - fe_sub(r->T, t0, r->T); +static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) +{ + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->YplusX); + fe_mul(r->Y, r->Y, q->YminusX); + fe_mul(r->T, q->T2d, p->T); + fe_mul(r->X, p->Z, q->Z); + fe_add(t0, r->X, r->X); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_add(r->Z, t0, r->T); + fe_sub(r->T, t0, r->T); } /* r = p - q */ -static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { - fe t0; - - fe_add(r->X, p->Y, p->X); - fe_sub(r->Y, p->Y, p->X); - fe_mul(r->Z, r->X, q->YminusX); - fe_mul(r->Y, r->Y, q->YplusX); - fe_mul(r->T, q->T2d, p->T); - fe_mul(r->X, p->Z, q->Z); - fe_add(t0, r->X, r->X); - fe_sub(r->X, r->Z, r->Y); - fe_add(r->Y, r->Z, r->Y); - fe_sub(r->Z, t0, r->T); - fe_add(r->T, t0, r->T); +static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) +{ + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->YminusX); + fe_mul(r->Y, r->Y, q->YplusX); + fe_mul(r->T, q->T2d, p->T); + fe_mul(r->X, p->Z, q->Z); + fe_add(t0, r->X, r->X); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_sub(r->Z, t0, r->T); + fe_add(r->T, t0, r->T); } -static uint8_t equal(signed char b, signed char c) { - uint8_t ub = b; - uint8_t uc = c; - uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */ - uint32_t y = x; /* 0: yes; 1..255: no */ - y -= 1; /* 4294967295: yes; 0..254: no */ - y >>= 31; /* 1: yes; 0: no */ - return y; +static uint8_t equal(signed char b, signed char c) +{ + uint8_t ub = b; + uint8_t uc = c; + uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */ + uint32_t y = x; /* 0: yes; 1..255: no */ + y -= 1; /* 4294967295: yes; 0..254: no */ + y >>= 31; /* 1: yes; 0: no */ + return y; } -static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) { - fe_cmov(t->yplusx, u->yplusx, b); - fe_cmov(t->yminusx, u->yminusx, b); - fe_cmov(t->xy2d, u->xy2d, b); +static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) +{ + fe_cmov(t->yplusx, u->yplusx, b); + fe_cmov(t->yminusx, u->yminusx, b); + fe_cmov(t->xy2d, u->xy2d, b); } /* k25519Precomp[i][j] = (j+1)*256^i*B */ @@ -4105,257 +4186,273 @@ static const ge_precomp k25519Precomp[32][8] = { }, }; -static uint8_t negative(signed char b) { - uint32_t x = b; - x >>= 31; /* 1: yes; 0: no */ - return x; +static uint8_t negative(signed char b) +{ + uint32_t x = b; + + x >>= 31; /* 1: yes; 0: no */ + return x; } -static void table_select(ge_precomp *t, int pos, signed char b) { - ge_precomp minust; - uint8_t bnegative = negative(b); - uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1); - - ge_precomp_0(t); - cmov(t, &k25519Precomp[pos][0], equal(babs, 1)); - cmov(t, &k25519Precomp[pos][1], equal(babs, 2)); - cmov(t, &k25519Precomp[pos][2], equal(babs, 3)); - cmov(t, &k25519Precomp[pos][3], equal(babs, 4)); - cmov(t, &k25519Precomp[pos][4], equal(babs, 5)); - cmov(t, &k25519Precomp[pos][5], equal(babs, 6)); - cmov(t, &k25519Precomp[pos][6], equal(babs, 7)); - cmov(t, &k25519Precomp[pos][7], equal(babs, 8)); - fe_copy(minust.yplusx, t->yminusx); - fe_copy(minust.yminusx, t->yplusx); - fe_neg(minust.xy2d, t->xy2d); - cmov(t, &minust, bnegative); +static void table_select(ge_precomp *t, int pos, signed char b) +{ + ge_precomp minust; + uint8_t bnegative = negative(b); + uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1); + + ge_precomp_0(t); + cmov(t, &k25519Precomp[pos][0], equal(babs, 1)); + cmov(t, &k25519Precomp[pos][1], equal(babs, 2)); + cmov(t, &k25519Precomp[pos][2], equal(babs, 3)); + cmov(t, &k25519Precomp[pos][3], equal(babs, 4)); + cmov(t, &k25519Precomp[pos][4], equal(babs, 5)); + cmov(t, &k25519Precomp[pos][5], equal(babs, 6)); + cmov(t, &k25519Precomp[pos][6], equal(babs, 7)); + cmov(t, &k25519Precomp[pos][7], equal(babs, 8)); + fe_copy(minust.yplusx, t->yminusx); + fe_copy(minust.yminusx, t->yplusx); + fe_neg(minust.xy2d, t->xy2d); + cmov(t, &minust, bnegative); } -/* h = a * B +/* + * h = a * B + * * where a = a[0]+256*a[1]+...+256^31 a[31] * B is the Ed25519 base point (x,4/5) with x positive. * * Preconditions: - * a[31] <= 127 */ -static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) { - signed char e[64]; - signed char carry; - ge_p1p1 r; - ge_p2 s; - ge_precomp t; - int i; - - for (i = 0; i < 32; ++i) { - e[2 * i + 0] = (a[i] >> 0) & 15; - e[2 * i + 1] = (a[i] >> 4) & 15; - } - /* each e[i] is between 0 and 15 */ - /* e[63] is between 0 and 7 */ - - carry = 0; - for (i = 0; i < 63; ++i) { - e[i] += carry; - carry = e[i] + 8; - carry >>= 4; - e[i] -= carry << 4; - } - e[63] += carry; - /* each e[i] is between -8 and 8 */ - - ge_p3_0(h); - for (i = 1; i < 64; i += 2) { - table_select(&t, i / 2, e[i]); - ge_madd(&r, h, &t); - ge_p1p1_to_p3(h, &r); - } - - ge_p3_dbl(&r, h); - ge_p1p1_to_p2(&s, &r); - ge_p2_dbl(&r, &s); - ge_p1p1_to_p2(&s, &r); - ge_p2_dbl(&r, &s); - ge_p1p1_to_p2(&s, &r); - ge_p2_dbl(&r, &s); - ge_p1p1_to_p3(h, &r); - - for (i = 0; i < 64; i += 2) { - table_select(&t, i / 2, e[i]); - ge_madd(&r, h, &t); + * a[31] <= 127 + */ +static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) +{ + signed char e[64]; + signed char carry; + ge_p1p1 r; + ge_p2 s; + ge_precomp t; + int i; + + for (i = 0; i < 32; ++i) { + e[2 * i + 0] = (a[i] >> 0) & 15; + e[2 * i + 1] = (a[i] >> 4) & 15; + } + /* each e[i] is between 0 and 15 */ + /* e[63] is between 0 and 7 */ + + carry = 0; + for (i = 0; i < 63; ++i) { + e[i] += carry; + carry = e[i] + 8; + carry >>= 4; + e[i] -= carry << 4; + } + e[63] += carry; + /* each e[i] is between -8 and 8 */ + + ge_p3_0(h); + for (i = 1; i < 64; i += 2) { + table_select(&t, i / 2, e[i]); + ge_madd(&r, h, &t); + ge_p1p1_to_p3(h, &r); + } + + ge_p3_dbl(&r, h); + ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); + ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); + ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p3(h, &r); - } - OPENSSL_cleanse(e, sizeof(e)); + for (i = 0; i < 64; i += 2) { + table_select(&t, i / 2, e[i]); + ge_madd(&r, h, &t); + ge_p1p1_to_p3(h, &r); + } + + OPENSSL_cleanse(e, sizeof(e)); } #if !defined(BASE_2_51_IMPLEMENTED) -/* Replace (f,g) with (g,f) if b == 1; +/* + * Replace (f,g) with (g,f) if b == 1; * replace (f,g) with (f,g) if b == 0. * - * Preconditions: b in {0,1}. */ -static void fe_cswap(fe f, fe g, unsigned int b) { - size_t i; - b = 0-b; - for (i = 0; i < 10; i++) { - int32_t x = f[i] ^ g[i]; - x &= b; - f[i] ^= x; - g[i] ^= x; - } + * Preconditions: b in {0,1}. + */ +static void fe_cswap(fe f, fe g, unsigned int b) +{ + size_t i; + + b = 0-b; + for (i = 0; i < 10; i++) { + int32_t x = f[i] ^ g[i]; + x &= b; + f[i] ^= x; + g[i] ^= x; + } } -/* h = f * 121666 +/* + * h = f * 121666 + * * Can overlap h with f. * * Preconditions: * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. * * Postconditions: - * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */ -static void fe_mul121666(fe h, fe f) { - int32_t f0 = f[0]; - int32_t f1 = f[1]; - int32_t f2 = f[2]; - int32_t f3 = f[3]; - int32_t f4 = f[4]; - int32_t f5 = f[5]; - int32_t f6 = f[6]; - int32_t f7 = f[7]; - int32_t f8 = f[8]; - int32_t f9 = f[9]; - int64_t h0 = f0 * (int64_t) 121666; - int64_t h1 = f1 * (int64_t) 121666; - int64_t h2 = f2 * (int64_t) 121666; - int64_t h3 = f3 * (int64_t) 121666; - int64_t h4 = f4 * (int64_t) 121666; - int64_t h5 = f5 * (int64_t) 121666; - int64_t h6 = f6 * (int64_t) 121666; - int64_t h7 = f7 * (int64_t) 121666; - int64_t h8 = f8 * (int64_t) 121666; - int64_t h9 = f9 * (int64_t) 121666; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; - carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; - carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; - carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; - carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; - - carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; - carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; - carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; - carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; - carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; - - h[0] = (int32_t)h0; - h[1] = (int32_t)h1; - h[2] = (int32_t)h2; - h[3] = (int32_t)h3; - h[4] = (int32_t)h4; - h[5] = (int32_t)h5; - h[6] = (int32_t)h6; - h[7] = (int32_t)h7; - h[8] = (int32_t)h8; - h[9] = (int32_t)h9; + * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + */ +static void fe_mul121666(fe h, fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int64_t h0 = f0 * (int64_t) 121666; + int64_t h1 = f1 * (int64_t) 121666; + int64_t h2 = f2 * (int64_t) 121666; + int64_t h3 = f3 * (int64_t) 121666; + int64_t h4 = f4 * (int64_t) 121666; + int64_t h5 = f5 * (int64_t) 121666; + int64_t h6 = f6 * (int64_t) 121666; + int64_t h7 = f7 * (int64_t) 121666; + int64_t h8 = f8 * (int64_t) 121666; + int64_t h9 = f9 * (int64_t) 121666; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } static void x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32]) { - fe x1, x2, z2, x3, z3, tmp0, tmp1; - uint8_t e[32]; - unsigned swap = 0; - int pos; - - memcpy(e, scalar, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - fe_frombytes(x1, point); - fe_1(x2); - fe_0(z2); - fe_copy(x3, x1); - fe_1(z3); - - for (pos = 254; pos >= 0; --pos) { - unsigned b = 1 & (e[pos / 8] >> (pos & 7)); - swap ^= b; - fe_cswap(x2, x3, swap); - fe_cswap(z2, z3, swap); - swap = b; - fe_sub(tmp0, x3, z3); - fe_sub(tmp1, x2, z2); - fe_add(x2, x2, z2); - fe_add(z2, x3, z3); - fe_mul(z3, tmp0, x2); - fe_mul(z2, z2, tmp1); - fe_sq(tmp0, tmp1); - fe_sq(tmp1, x2); - fe_add(x3, z3, z2); - fe_sub(z2, z3, z2); - fe_mul(x2, tmp1, tmp0); - fe_sub(tmp1, tmp1, tmp0); - fe_sq(z2, z2); - fe_mul121666(z3, tmp1); - fe_sq(x3, x3); - fe_add(tmp0, tmp0, z3); - fe_mul(z3, x1, z2); - fe_mul(z2, tmp1, tmp0); - } - - fe_invert(z2, z2); - fe_mul(x2, x2, z2); - fe_tobytes(out, x2); - - OPENSSL_cleanse(e, sizeof(e)); + fe x1, x2, z2, x3, z3, tmp0, tmp1; + uint8_t e[32]; + unsigned swap = 0; + int pos; + + memcpy(e, scalar, 32); + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + fe_frombytes(x1, point); + fe_1(x2); + fe_0(z2); + fe_copy(x3, x1); + fe_1(z3); + + for (pos = 254; pos >= 0; --pos) { + unsigned b = 1 & (e[pos / 8] >> (pos & 7)); + swap ^= b; + fe_cswap(x2, x3, swap); + fe_cswap(z2, z3, swap); + swap = b; + fe_sub(tmp0, x3, z3); + fe_sub(tmp1, x2, z2); + fe_add(x2, x2, z2); + fe_add(z2, x3, z3); + fe_mul(z3, tmp0, x2); + fe_mul(z2, z2, tmp1); + fe_sq(tmp0, tmp1); + fe_sq(tmp1, x2); + fe_add(x3, z3, z2); + fe_sub(z2, z3, z2); + fe_mul(x2, tmp1, tmp0); + fe_sub(tmp1, tmp1, tmp0); + fe_sq(z2, z2); + fe_mul121666(z3, tmp1); + fe_sq(x3, x3); + fe_add(tmp0, tmp0, z3); + fe_mul(z3, x1, z2); + fe_mul(z2, tmp1, tmp0); + } + + fe_invert(z2, z2); + fe_mul(x2, x2, z2); + fe_tobytes(out, x2); + + OPENSSL_cleanse(e, sizeof(e)); } static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32]) { - x25519_scalar_mult_generic(out, scalar, point); + x25519_scalar_mult_generic(out, scalar, point); } #endif -static void slide(signed char *r, const uint8_t *a) { - int i; - int b; - int k; - - for (i = 0; i < 256; ++i) { - r[i] = 1 & (a[i >> 3] >> (i & 7)); - } - - for (i = 0; i < 256; ++i) { - if (r[i]) { - for (b = 1; b <= 6 && i + b < 256; ++b) { - if (r[i + b]) { - if (r[i] + (r[i + b] << b) <= 15) { - r[i] += r[i + b] << b; - r[i + b] = 0; - } else if (r[i] - (r[i + b] << b) >= -15) { - r[i] -= r[i + b] << b; - for (k = i + b; k < 256; ++k) { - if (!r[k]) { - r[k] = 1; - break; - } - r[k] = 0; +static void slide(signed char *r, const uint8_t *a) +{ + int i; + int b; + int k; + + for (i = 0; i < 256; ++i) { + r[i] = 1 & (a[i >> 3] >> (i & 7)); + } + + for (i = 0; i < 256; ++i) { + if (r[i]) { + for (b = 1; b <= 6 && i + b < 256; ++b) { + if (r[i + b]) { + if (r[i] + (r[i + b] << b) <= 15) { + r[i] += r[i + b] << b; + r[i + b] = 0; + } else if (r[i] - (r[i + b] << b) >= -15) { + r[i] -= r[i + b] << b; + for (k = i + b; k < 256; ++k) { + if (!r[k]) { + r[k] = 1; + break; + } + r[k] = 0; + } + } else { + break; + } + } } - } else { - break; - } } - } } - } } static const ge_precomp Bi[8] = { @@ -4425,1033 +4522,1075 @@ static const ge_precomp Bi[8] = { }, }; -/* r = a * A + b * B +/* + * r = a * A + b * B + * * where a = a[0]+256*a[1]+...+256^31 a[31]. * and b = b[0]+256*b[1]+...+256^31 b[31]. - * B is the Ed25519 base point (x,4/5) with x positive. */ + * B is the Ed25519 base point (x,4/5) with x positive. + */ static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a, - const ge_p3 *A, const uint8_t *b) { - signed char aslide[256]; - signed char bslide[256]; - ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ - ge_p1p1 t; - ge_p3 u; - ge_p3 A2; - int i; - - slide(aslide, a); - slide(bslide, b); - - ge_p3_to_cached(&Ai[0], A); - ge_p3_dbl(&t, A); - ge_p1p1_to_p3(&A2, &t); - ge_add(&t, &A2, &Ai[0]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[1], &u); - ge_add(&t, &A2, &Ai[1]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[2], &u); - ge_add(&t, &A2, &Ai[2]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[3], &u); - ge_add(&t, &A2, &Ai[3]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[4], &u); - ge_add(&t, &A2, &Ai[4]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[5], &u); - ge_add(&t, &A2, &Ai[5]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[6], &u); - ge_add(&t, &A2, &Ai[6]); - ge_p1p1_to_p3(&u, &t); - ge_p3_to_cached(&Ai[7], &u); - - ge_p2_0(r); - - for (i = 255; i >= 0; --i) { - if (aslide[i] || bslide[i]) { - break; + const ge_p3 *A, const uint8_t *b) +{ + signed char aslide[256]; + signed char bslide[256]; + ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ + ge_p1p1 t; + ge_p3 u; + ge_p3 A2; + int i; + + slide(aslide, a); + slide(bslide, b); + + ge_p3_to_cached(&Ai[0], A); + ge_p3_dbl(&t, A); + ge_p1p1_to_p3(&A2, &t); + ge_add(&t, &A2, &Ai[0]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[1], &u); + ge_add(&t, &A2, &Ai[1]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[2], &u); + ge_add(&t, &A2, &Ai[2]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[3], &u); + ge_add(&t, &A2, &Ai[3]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[4], &u); + ge_add(&t, &A2, &Ai[4]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[5], &u); + ge_add(&t, &A2, &Ai[5]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[6], &u); + ge_add(&t, &A2, &Ai[6]); + ge_p1p1_to_p3(&u, &t); + ge_p3_to_cached(&Ai[7], &u); + + ge_p2_0(r); + + for (i = 255; i >= 0; --i) { + if (aslide[i] || bslide[i]) { + break; + } } - } - for (; i >= 0; --i) { - ge_p2_dbl(&t, r); + for (; i >= 0; --i) { + ge_p2_dbl(&t, r); - if (aslide[i] > 0) { - ge_p1p1_to_p3(&u, &t); - ge_add(&t, &u, &Ai[aslide[i] / 2]); - } else if (aslide[i] < 0) { - ge_p1p1_to_p3(&u, &t); - ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); - } + if (aslide[i] > 0) { + ge_p1p1_to_p3(&u, &t); + ge_add(&t, &u, &Ai[aslide[i] / 2]); + } else if (aslide[i] < 0) { + ge_p1p1_to_p3(&u, &t); + ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); + } - if (bslide[i] > 0) { - ge_p1p1_to_p3(&u, &t); - ge_madd(&t, &u, &Bi[bslide[i] / 2]); - } else if (bslide[i] < 0) { - ge_p1p1_to_p3(&u, &t); - ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); - } + if (bslide[i] > 0) { + ge_p1p1_to_p3(&u, &t); + ge_madd(&t, &u, &Bi[bslide[i] / 2]); + } else if (bslide[i] < 0) { + ge_p1p1_to_p3(&u, &t); + ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); + } - ge_p1p1_to_p2(r, &t); - } + ge_p1p1_to_p2(r, &t); + } } -/* The set of scalars is \Z/l - * where l = 2^252 + 27742317777372353535851937790883648493. */ - -/* Input: +/* + * The set of scalars is \Z/l + * where l = 2^252 + 27742317777372353535851937790883648493. + * + * Input: * s[0]+256*s[1]+...+256^63*s[63] = s * * Output: * s[0]+256*s[1]+...+256^31*s[31] = s mod l * where l = 2^252 + 27742317777372353535851937790883648493. - * Overwrites s in place. */ -static void x25519_sc_reduce(uint8_t *s) { - int64_t s0 = 2097151 & load_3(s); - int64_t s1 = 2097151 & (load_4(s + 2) >> 5); - int64_t s2 = 2097151 & (load_3(s + 5) >> 2); - int64_t s3 = 2097151 & (load_4(s + 7) >> 7); - int64_t s4 = 2097151 & (load_4(s + 10) >> 4); - int64_t s5 = 2097151 & (load_3(s + 13) >> 1); - int64_t s6 = 2097151 & (load_4(s + 15) >> 6); - int64_t s7 = 2097151 & (load_3(s + 18) >> 3); - int64_t s8 = 2097151 & load_3(s + 21); - int64_t s9 = 2097151 & (load_4(s + 23) >> 5); - int64_t s10 = 2097151 & (load_3(s + 26) >> 2); - int64_t s11 = 2097151 & (load_4(s + 28) >> 7); - int64_t s12 = 2097151 & (load_4(s + 31) >> 4); - int64_t s13 = 2097151 & (load_3(s + 34) >> 1); - int64_t s14 = 2097151 & (load_4(s + 36) >> 6); - int64_t s15 = 2097151 & (load_3(s + 39) >> 3); - int64_t s16 = 2097151 & load_3(s + 42); - int64_t s17 = 2097151 & (load_4(s + 44) >> 5); - int64_t s18 = 2097151 & (load_3(s + 47) >> 2); - int64_t s19 = 2097151 & (load_4(s + 49) >> 7); - int64_t s20 = 2097151 & (load_4(s + 52) >> 4); - int64_t s21 = 2097151 & (load_3(s + 55) >> 1); - int64_t s22 = 2097151 & (load_4(s + 57) >> 6); - int64_t s23 = (load_4(s + 60) >> 3); - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - s23 = 0; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - s22 = 0; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - s21 = 0; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - s20 = 0; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - s19 = 0; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - s18 = 0; - - carry6 = (s6 + (1 << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry8 = (s8 + (1 << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry10 = (s10 + (1 << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - carry12 = (s12 + (1 << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * (1 << 21); - carry14 = (s14 + (1 << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * (1 << 21); - carry16 = (s16 + (1 << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * (1 << 21); - - carry7 = (s7 + (1 << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry9 = (s9 + (1 << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry11 = (s11 + (1 << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - carry13 = (s13 + (1 << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * (1 << 21); - carry15 = (s15 + (1 << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * (1 << 21); - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - s17 = 0; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - s16 = 0; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - s15 = 0; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - s14 = 0; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - s13 = 0; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (1 << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry2 = (s2 + (1 << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry4 = (s4 + (1 << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry6 = (s6 + (1 << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry8 = (s8 + (1 << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry10 = (s10 + (1 << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - - carry1 = (s1 + (1 << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry3 = (s3 + (1 << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry5 = (s5 + (1 << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry7 = (s7 + (1 << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry9 = (s9 + (1 << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry11 = (s11 + (1 << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - carry11 = s11 >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - - s[0] = (uint8_t)(s0 >> 0); - s[1] = (uint8_t)(s0 >> 8); - s[2] = (uint8_t)((s0 >> 16) | (s1 << 5)); - s[3] = (uint8_t)(s1 >> 3); - s[4] = (uint8_t)(s1 >> 11); - s[5] = (uint8_t)((s1 >> 19) | (s2 << 2)); - s[6] = (uint8_t)(s2 >> 6); - s[7] = (uint8_t)((s2 >> 14) | (s3 << 7)); - s[8] = (uint8_t)(s3 >> 1); - s[9] = (uint8_t)(s3 >> 9); - s[10] = (uint8_t)((s3 >> 17) | (s4 << 4)); - s[11] = (uint8_t)(s4 >> 4); - s[12] = (uint8_t)(s4 >> 12); - s[13] = (uint8_t)((s4 >> 20) | (s5 << 1)); - s[14] = (uint8_t)(s5 >> 7); - s[15] = (uint8_t)((s5 >> 15) | (s6 << 6)); - s[16] = (uint8_t)(s6 >> 2); - s[17] = (uint8_t)(s6 >> 10); - s[18] = (uint8_t)((s6 >> 18) | (s7 << 3)); - s[19] = (uint8_t)(s7 >> 5); - s[20] = (uint8_t)(s7 >> 13); - s[21] = (uint8_t)(s8 >> 0); - s[22] = (uint8_t)(s8 >> 8); - s[23] = (uint8_t)((s8 >> 16) | (s9 << 5)); - s[24] = (uint8_t)(s9 >> 3); - s[25] = (uint8_t)(s9 >> 11); - s[26] = (uint8_t)((s9 >> 19) | (s10 << 2)); - s[27] = (uint8_t)(s10 >> 6); - s[28] = (uint8_t)((s10 >> 14) | (s11 << 7)); - s[29] = (uint8_t)(s11 >> 1); - s[30] = (uint8_t)(s11 >> 9); - s[31] = (uint8_t)(s11 >> 17); + * Overwrites s in place. +*/ +static void x25519_sc_reduce(uint8_t *s) +{ + int64_t s0 = kBottom21Bits & load_3(s); + int64_t s1 = kBottom21Bits & (load_4(s + 2) >> 5); + int64_t s2 = kBottom21Bits & (load_3(s + 5) >> 2); + int64_t s3 = kBottom21Bits & (load_4(s + 7) >> 7); + int64_t s4 = kBottom21Bits & (load_4(s + 10) >> 4); + int64_t s5 = kBottom21Bits & (load_3(s + 13) >> 1); + int64_t s6 = kBottom21Bits & (load_4(s + 15) >> 6); + int64_t s7 = kBottom21Bits & (load_3(s + 18) >> 3); + int64_t s8 = kBottom21Bits & load_3(s + 21); + int64_t s9 = kBottom21Bits & (load_4(s + 23) >> 5); + int64_t s10 = kBottom21Bits & (load_3(s + 26) >> 2); + int64_t s11 = kBottom21Bits & (load_4(s + 28) >> 7); + int64_t s12 = kBottom21Bits & (load_4(s + 31) >> 4); + int64_t s13 = kBottom21Bits & (load_3(s + 34) >> 1); + int64_t s14 = kBottom21Bits & (load_4(s + 36) >> 6); + int64_t s15 = kBottom21Bits & (load_3(s + 39) >> 3); + int64_t s16 = kBottom21Bits & load_3(s + 42); + int64_t s17 = kBottom21Bits & (load_4(s + 44) >> 5); + int64_t s18 = kBottom21Bits & (load_3(s + 47) >> 2); + int64_t s19 = kBottom21Bits & (load_4(s + 49) >> 7); + int64_t s20 = kBottom21Bits & (load_4(s + 52) >> 4); + int64_t s21 = kBottom21Bits & (load_3(s + 55) >> 1); + int64_t s22 = kBottom21Bits & (load_4(s + 57) >> 6); + int64_t s23 = (load_4(s + 60) >> 3); + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 * (1 << 21); + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 * (1 << 21); + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 * (1 << 21); + + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 * (1 << 21); + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 * (1 << 21); + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + carry11 = s11 >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + + s[ 0] = (uint8_t) (s0 >> 0); + s[ 1] = (uint8_t) (s0 >> 8); + s[ 2] = (uint8_t)((s0 >> 16) | (s1 << 5)); + s[ 3] = (uint8_t) (s1 >> 3); + s[ 4] = (uint8_t) (s1 >> 11); + s[ 5] = (uint8_t)((s1 >> 19) | (s2 << 2)); + s[ 6] = (uint8_t) (s2 >> 6); + s[ 7] = (uint8_t)((s2 >> 14) | (s3 << 7)); + s[ 8] = (uint8_t) (s3 >> 1); + s[ 9] = (uint8_t) (s3 >> 9); + s[10] = (uint8_t)((s3 >> 17) | (s4 << 4)); + s[11] = (uint8_t) (s4 >> 4); + s[12] = (uint8_t) (s4 >> 12); + s[13] = (uint8_t)((s4 >> 20) | (s5 << 1)); + s[14] = (uint8_t) (s5 >> 7); + s[15] = (uint8_t)((s5 >> 15) | (s6 << 6)); + s[16] = (uint8_t) (s6 >> 2); + s[17] = (uint8_t) (s6 >> 10); + s[18] = (uint8_t)((s6 >> 18) | (s7 << 3)); + s[19] = (uint8_t) (s7 >> 5); + s[20] = (uint8_t) (s7 >> 13); + s[21] = (uint8_t) (s8 >> 0); + s[22] = (uint8_t) (s8 >> 8); + s[23] = (uint8_t)((s8 >> 16) | (s9 << 5)); + s[24] = (uint8_t) (s9 >> 3); + s[25] = (uint8_t) (s9 >> 11); + s[26] = (uint8_t)((s9 >> 19) | (s10 << 2)); + s[27] = (uint8_t) (s10 >> 6); + s[28] = (uint8_t)((s10 >> 14) | (s11 << 7)); + s[29] = (uint8_t) (s11 >> 1); + s[30] = (uint8_t) (s11 >> 9); + s[31] = (uint8_t) (s11 >> 17); } -/* Input: +/* + * Input: * a[0]+256*a[1]+...+256^31*a[31] = a * b[0]+256*b[1]+...+256^31*b[31] = b * c[0]+256*c[1]+...+256^31*c[31] = c * * Output: * s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l - * where l = 2^252 + 27742317777372353535851937790883648493. */ + * where l = 2^252 + 27742317777372353535851937790883648493. + */ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b, - const uint8_t *c) { - int64_t a0 = 2097151 & load_3(a); - int64_t a1 = 2097151 & (load_4(a + 2) >> 5); - int64_t a2 = 2097151 & (load_3(a + 5) >> 2); - int64_t a3 = 2097151 & (load_4(a + 7) >> 7); - int64_t a4 = 2097151 & (load_4(a + 10) >> 4); - int64_t a5 = 2097151 & (load_3(a + 13) >> 1); - int64_t a6 = 2097151 & (load_4(a + 15) >> 6); - int64_t a7 = 2097151 & (load_3(a + 18) >> 3); - int64_t a8 = 2097151 & load_3(a + 21); - int64_t a9 = 2097151 & (load_4(a + 23) >> 5); - int64_t a10 = 2097151 & (load_3(a + 26) >> 2); - int64_t a11 = (load_4(a + 28) >> 7); - int64_t b0 = 2097151 & load_3(b); - int64_t b1 = 2097151 & (load_4(b + 2) >> 5); - int64_t b2 = 2097151 & (load_3(b + 5) >> 2); - int64_t b3 = 2097151 & (load_4(b + 7) >> 7); - int64_t b4 = 2097151 & (load_4(b + 10) >> 4); - int64_t b5 = 2097151 & (load_3(b + 13) >> 1); - int64_t b6 = 2097151 & (load_4(b + 15) >> 6); - int64_t b7 = 2097151 & (load_3(b + 18) >> 3); - int64_t b8 = 2097151 & load_3(b + 21); - int64_t b9 = 2097151 & (load_4(b + 23) >> 5); - int64_t b10 = 2097151 & (load_3(b + 26) >> 2); - int64_t b11 = (load_4(b + 28) >> 7); - int64_t c0 = 2097151 & load_3(c); - int64_t c1 = 2097151 & (load_4(c + 2) >> 5); - int64_t c2 = 2097151 & (load_3(c + 5) >> 2); - int64_t c3 = 2097151 & (load_4(c + 7) >> 7); - int64_t c4 = 2097151 & (load_4(c + 10) >> 4); - int64_t c5 = 2097151 & (load_3(c + 13) >> 1); - int64_t c6 = 2097151 & (load_4(c + 15) >> 6); - int64_t c7 = 2097151 & (load_3(c + 18) >> 3); - int64_t c8 = 2097151 & load_3(c + 21); - int64_t c9 = 2097151 & (load_4(c + 23) >> 5); - int64_t c10 = 2097151 & (load_3(c + 26) >> 2); - int64_t c11 = (load_4(c + 28) >> 7); - int64_t s0; - int64_t s1; - int64_t s2; - int64_t s3; - int64_t s4; - int64_t s5; - int64_t s6; - int64_t s7; - int64_t s8; - int64_t s9; - int64_t s10; - int64_t s11; - int64_t s12; - int64_t s13; - int64_t s14; - int64_t s15; - int64_t s16; - int64_t s17; - int64_t s18; - int64_t s19; - int64_t s20; - int64_t s21; - int64_t s22; - int64_t s23; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - int64_t carry17; - int64_t carry18; - int64_t carry19; - int64_t carry20; - int64_t carry21; - int64_t carry22; - - s0 = c0 + a0 * b0; - s1 = c1 + a0 * b1 + a1 * b0; - s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0; - s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; - s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; - s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0; - s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0; - s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + - a6 * b1 + a7 * b0; - s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + - a6 * b2 + a7 * b1 + a8 * b0; - s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + - a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0; - s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + - a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0; - s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + - a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0; - s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + - a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1; - s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + - a9 * b4 + a10 * b3 + a11 * b2; - s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + - a10 * b4 + a11 * b3; - s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + - a11 * b4; - s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5; - s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6; - s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7; - s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8; - s20 = a9 * b11 + a10 * b10 + a11 * b9; - s21 = a10 * b11 + a11 * b10; - s22 = a11 * b11; - s23 = 0; - - carry0 = (s0 + (1 << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry2 = (s2 + (1 << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry4 = (s4 + (1 << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry6 = (s6 + (1 << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry8 = (s8 + (1 << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry10 = (s10 + (1 << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - carry12 = (s12 + (1 << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * (1 << 21); - carry14 = (s14 + (1 << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * (1 << 21); - carry16 = (s16 + (1 << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * (1 << 21); - carry18 = (s18 + (1 << 20)) >> 21; - s19 += carry18; - s18 -= carry18 * (1 << 21); - carry20 = (s20 + (1 << 20)) >> 21; - s21 += carry20; - s20 -= carry20 * (1 << 21); - carry22 = (s22 + (1 << 20)) >> 21; - s23 += carry22; - s22 -= carry22 * (1 << 21); - - carry1 = (s1 + (1 << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry3 = (s3 + (1 << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry5 = (s5 + (1 << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry7 = (s7 + (1 << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry9 = (s9 + (1 << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry11 = (s11 + (1 << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - carry13 = (s13 + (1 << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * (1 << 21); - carry15 = (s15 + (1 << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * (1 << 21); - carry17 = (s17 + (1 << 20)) >> 21; - s18 += carry17; - s17 -= carry17 * (1 << 21); - carry19 = (s19 + (1 << 20)) >> 21; - s20 += carry19; - s19 -= carry19 * (1 << 21); - carry21 = (s21 + (1 << 20)) >> 21; - s22 += carry21; - s21 -= carry21 * (1 << 21); - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - s23 = 0; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - s22 = 0; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - s21 = 0; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - s20 = 0; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - s19 = 0; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - s18 = 0; - - carry6 = (s6 + (1 << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry8 = (s8 + (1 << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry10 = (s10 + (1 << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - carry12 = (s12 + (1 << 20)) >> 21; - s13 += carry12; - s12 -= carry12 * (1 << 21); - carry14 = (s14 + (1 << 20)) >> 21; - s15 += carry14; - s14 -= carry14 * (1 << 21); - carry16 = (s16 + (1 << 20)) >> 21; - s17 += carry16; - s16 -= carry16 * (1 << 21); - - carry7 = (s7 + (1 << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry9 = (s9 + (1 << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry11 = (s11 + (1 << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - carry13 = (s13 + (1 << 20)) >> 21; - s14 += carry13; - s13 -= carry13 * (1 << 21); - carry15 = (s15 + (1 << 20)) >> 21; - s16 += carry15; - s15 -= carry15 * (1 << 21); - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - s17 = 0; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - s16 = 0; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - s15 = 0; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - s14 = 0; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - s13 = 0; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (1 << 20)) >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry2 = (s2 + (1 << 20)) >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry4 = (s4 + (1 << 20)) >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry6 = (s6 + (1 << 20)) >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry8 = (s8 + (1 << 20)) >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry10 = (s10 + (1 << 20)) >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - - carry1 = (s1 + (1 << 20)) >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry3 = (s3 + (1 << 20)) >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry5 = (s5 + (1 << 20)) >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry7 = (s7 + (1 << 20)) >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry9 = (s9 + (1 << 20)) >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry11 = (s11 + (1 << 20)) >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - carry11 = s11 >> 21; - s12 += carry11; - s11 -= carry11 * (1 << 21); - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; - s1 += carry0; - s0 -= carry0 * (1 << 21); - carry1 = s1 >> 21; - s2 += carry1; - s1 -= carry1 * (1 << 21); - carry2 = s2 >> 21; - s3 += carry2; - s2 -= carry2 * (1 << 21); - carry3 = s3 >> 21; - s4 += carry3; - s3 -= carry3 * (1 << 21); - carry4 = s4 >> 21; - s5 += carry4; - s4 -= carry4 * (1 << 21); - carry5 = s5 >> 21; - s6 += carry5; - s5 -= carry5 * (1 << 21); - carry6 = s6 >> 21; - s7 += carry6; - s6 -= carry6 * (1 << 21); - carry7 = s7 >> 21; - s8 += carry7; - s7 -= carry7 * (1 << 21); - carry8 = s8 >> 21; - s9 += carry8; - s8 -= carry8 * (1 << 21); - carry9 = s9 >> 21; - s10 += carry9; - s9 -= carry9 * (1 << 21); - carry10 = s10 >> 21; - s11 += carry10; - s10 -= carry10 * (1 << 21); - - s[0] = (uint8_t)(s0 >> 0); - s[1] = (uint8_t)(s0 >> 8); - s[2] = (uint8_t)((s0 >> 16) | (s1 << 5)); - s[3] = (uint8_t)(s1 >> 3); - s[4] = (uint8_t)(s1 >> 11); - s[5] = (uint8_t)((s1 >> 19) | (s2 << 2)); - s[6] = (uint8_t)(s2 >> 6); - s[7] = (uint8_t)((s2 >> 14) | (s3 << 7)); - s[8] = (uint8_t)(s3 >> 1); - s[9] = (uint8_t)(s3 >> 9); - s[10] = (uint8_t)((s3 >> 17) | (s4 << 4)); - s[11] = (uint8_t)(s4 >> 4); - s[12] = (uint8_t)(s4 >> 12); - s[13] = (uint8_t)((s4 >> 20) | (s5 << 1)); - s[14] = (uint8_t)(s5 >> 7); - s[15] = (uint8_t)((s5 >> 15) | (s6 << 6)); - s[16] = (uint8_t)(s6 >> 2); - s[17] = (uint8_t)(s6 >> 10); - s[18] = (uint8_t)((s6 >> 18) | (s7 << 3)); - s[19] = (uint8_t)(s7 >> 5); - s[20] = (uint8_t)(s7 >> 13); - s[21] = (uint8_t)(s8 >> 0); - s[22] = (uint8_t)(s8 >> 8); - s[23] = (uint8_t)((s8 >> 16) | (s9 << 5)); - s[24] = (uint8_t)(s9 >> 3); - s[25] = (uint8_t)(s9 >> 11); - s[26] = (uint8_t)((s9 >> 19) | (s10 << 2)); - s[27] = (uint8_t)(s10 >> 6); - s[28] = (uint8_t)((s10 >> 14) | (s11 << 7)); - s[29] = (uint8_t)(s11 >> 1); - s[30] = (uint8_t)(s11 >> 9); - s[31] = (uint8_t)(s11 >> 17); + const uint8_t *c) +{ + int64_t a0 = kBottom21Bits & load_3(a); + int64_t a1 = kBottom21Bits & (load_4(a + 2) >> 5); + int64_t a2 = kBottom21Bits & (load_3(a + 5) >> 2); + int64_t a3 = kBottom21Bits & (load_4(a + 7) >> 7); + int64_t a4 = kBottom21Bits & (load_4(a + 10) >> 4); + int64_t a5 = kBottom21Bits & (load_3(a + 13) >> 1); + int64_t a6 = kBottom21Bits & (load_4(a + 15) >> 6); + int64_t a7 = kBottom21Bits & (load_3(a + 18) >> 3); + int64_t a8 = kBottom21Bits & load_3(a + 21); + int64_t a9 = kBottom21Bits & (load_4(a + 23) >> 5); + int64_t a10 = kBottom21Bits & (load_3(a + 26) >> 2); + int64_t a11 = (load_4(a + 28) >> 7); + int64_t b0 = kBottom21Bits & load_3(b); + int64_t b1 = kBottom21Bits & (load_4(b + 2) >> 5); + int64_t b2 = kBottom21Bits & (load_3(b + 5) >> 2); + int64_t b3 = kBottom21Bits & (load_4(b + 7) >> 7); + int64_t b4 = kBottom21Bits & (load_4(b + 10) >> 4); + int64_t b5 = kBottom21Bits & (load_3(b + 13) >> 1); + int64_t b6 = kBottom21Bits & (load_4(b + 15) >> 6); + int64_t b7 = kBottom21Bits & (load_3(b + 18) >> 3); + int64_t b8 = kBottom21Bits & load_3(b + 21); + int64_t b9 = kBottom21Bits & (load_4(b + 23) >> 5); + int64_t b10 = kBottom21Bits & (load_3(b + 26) >> 2); + int64_t b11 = (load_4(b + 28) >> 7); + int64_t c0 = kBottom21Bits & load_3(c); + int64_t c1 = kBottom21Bits & (load_4(c + 2) >> 5); + int64_t c2 = kBottom21Bits & (load_3(c + 5) >> 2); + int64_t c3 = kBottom21Bits & (load_4(c + 7) >> 7); + int64_t c4 = kBottom21Bits & (load_4(c + 10) >> 4); + int64_t c5 = kBottom21Bits & (load_3(c + 13) >> 1); + int64_t c6 = kBottom21Bits & (load_4(c + 15) >> 6); + int64_t c7 = kBottom21Bits & (load_3(c + 18) >> 3); + int64_t c8 = kBottom21Bits & load_3(c + 21); + int64_t c9 = kBottom21Bits & (load_4(c + 23) >> 5); + int64_t c10 = kBottom21Bits & (load_3(c + 26) >> 2); + int64_t c11 = (load_4(c + 28) >> 7); + int64_t s0; + int64_t s1; + int64_t s2; + int64_t s3; + int64_t s4; + int64_t s5; + int64_t s6; + int64_t s7; + int64_t s8; + int64_t s9; + int64_t s10; + int64_t s11; + int64_t s12; + int64_t s13; + int64_t s14; + int64_t s15; + int64_t s16; + int64_t s17; + int64_t s18; + int64_t s19; + int64_t s20; + int64_t s21; + int64_t s22; + int64_t s23; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + int64_t carry17; + int64_t carry18; + int64_t carry19; + int64_t carry20; + int64_t carry21; + int64_t carry22; + + s0 = c0 + a0 * b0; + s1 = c1 + a0 * b1 + a1 * b0; + s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0; + s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; + s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; + s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0; + s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0; + s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0; + s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0; + s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0; + s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0; + s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0; + s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1; + s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2; + s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4 + a11 * b3; + s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4; + s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5; + s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6; + s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7; + s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8; + s20 = a9 * b11 + a10 * b10 + a11 * b9; + s21 = a10 * b11 + a11 * b10; + s22 = a11 * b11; + s23 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 * (1 << 21); + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 * (1 << 21); + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 * (1 << 21); + carry18 = (s18 + (1 << 20)) >> 21; + s19 += carry18; + s18 -= carry18 * (1 << 21); + carry20 = (s20 + (1 << 20)) >> 21; + s21 += carry20; + s20 -= carry20 * (1 << 21); + carry22 = (s22 + (1 << 20)) >> 21; + s23 += carry22; + s22 -= carry22 * (1 << 21); + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 * (1 << 21); + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 * (1 << 21); + carry17 = (s17 + (1 << 20)) >> 21; + s18 += carry17; + s17 -= carry17 * (1 << 21); + carry19 = (s19 + (1 << 20)) >> 21; + s20 += carry19; + s19 -= carry19 * (1 << 21); + carry21 = (s21 + (1 << 20)) >> 21; + s22 += carry21; + s21 -= carry21 * (1 << 21); + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 * (1 << 21); + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 * (1 << 21); + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 * (1 << 21); + + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 * (1 << 21); + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 * (1 << 21); + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + carry11 = s11 >> 21; + s12 += carry11; + s11 -= carry11 * (1 << 21); + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 * (1 << 21); + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 * (1 << 21); + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 * (1 << 21); + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 * (1 << 21); + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 * (1 << 21); + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 * (1 << 21); + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 * (1 << 21); + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 * (1 << 21); + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 * (1 << 21); + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 * (1 << 21); + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 * (1 << 21); + + s[ 0] = (uint8_t) (s0 >> 0); + s[ 1] = (uint8_t) (s0 >> 8); + s[ 2] = (uint8_t)((s0 >> 16) | (s1 << 5)); + s[ 3] = (uint8_t) (s1 >> 3); + s[ 4] = (uint8_t) (s1 >> 11); + s[ 5] = (uint8_t)((s1 >> 19) | (s2 << 2)); + s[ 6] = (uint8_t) (s2 >> 6); + s[ 7] = (uint8_t)((s2 >> 14) | (s3 << 7)); + s[ 8] = (uint8_t) (s3 >> 1); + s[ 9] = (uint8_t) (s3 >> 9); + s[10] = (uint8_t)((s3 >> 17) | (s4 << 4)); + s[11] = (uint8_t) (s4 >> 4); + s[12] = (uint8_t) (s4 >> 12); + s[13] = (uint8_t)((s4 >> 20) | (s5 << 1)); + s[14] = (uint8_t) (s5 >> 7); + s[15] = (uint8_t)((s5 >> 15) | (s6 << 6)); + s[16] = (uint8_t) (s6 >> 2); + s[17] = (uint8_t) (s6 >> 10); + s[18] = (uint8_t)((s6 >> 18) | (s7 << 3)); + s[19] = (uint8_t) (s7 >> 5); + s[20] = (uint8_t) (s7 >> 13); + s[21] = (uint8_t) (s8 >> 0); + s[22] = (uint8_t) (s8 >> 8); + s[23] = (uint8_t)((s8 >> 16) | (s9 << 5)); + s[24] = (uint8_t) (s9 >> 3); + s[25] = (uint8_t) (s9 >> 11); + s[26] = (uint8_t)((s9 >> 19) | (s10 << 2)); + s[27] = (uint8_t) (s10 >> 6); + s[28] = (uint8_t)((s10 >> 14) | (s11 << 7)); + s[29] = (uint8_t) (s11 >> 1); + s[30] = (uint8_t) (s11 >> 9); + s[31] = (uint8_t) (s11 >> 17); } int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len, - const uint8_t public_key[32], const uint8_t private_key[32]) { - uint8_t az[SHA512_DIGEST_LENGTH]; - uint8_t nonce[SHA512_DIGEST_LENGTH]; - ge_p3 R; - uint8_t hram[SHA512_DIGEST_LENGTH]; - SHA512_CTX hash_ctx; - - SHA512_Init(&hash_ctx); - SHA512_Update(&hash_ctx, private_key, 32); - SHA512_Final(az, &hash_ctx); - - az[0] &= 248; - az[31] &= 63; - az[31] |= 64; - - SHA512_Init(&hash_ctx); - SHA512_Update(&hash_ctx, az + 32, 32); - SHA512_Update(&hash_ctx, message, message_len); - SHA512_Final(nonce, &hash_ctx); - - x25519_sc_reduce(nonce); - ge_scalarmult_base(&R, nonce); - ge_p3_tobytes(out_sig, &R); - - SHA512_Init(&hash_ctx); - SHA512_Update(&hash_ctx, out_sig, 32); - SHA512_Update(&hash_ctx, public_key, 32); - SHA512_Update(&hash_ctx, message, message_len); - SHA512_Final(hram, &hash_ctx); - - x25519_sc_reduce(hram); - sc_muladd(out_sig + 32, hram, az, nonce); - - OPENSSL_cleanse(&hash_ctx, sizeof(hash_ctx)); - OPENSSL_cleanse(nonce, sizeof(nonce)); - OPENSSL_cleanse(az, sizeof(az)); - - return 1; + const uint8_t public_key[32], const uint8_t private_key[32]) +{ + uint8_t az[SHA512_DIGEST_LENGTH]; + uint8_t nonce[SHA512_DIGEST_LENGTH]; + ge_p3 R; + uint8_t hram[SHA512_DIGEST_LENGTH]; + SHA512_CTX hash_ctx; + + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, private_key, 32); + SHA512_Final(az, &hash_ctx); + + az[0] &= 248; + az[31] &= 63; + az[31] |= 64; + + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, az + 32, 32); + SHA512_Update(&hash_ctx, message, message_len); + SHA512_Final(nonce, &hash_ctx); + + x25519_sc_reduce(nonce); + ge_scalarmult_base(&R, nonce); + ge_p3_tobytes(out_sig, &R); + + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, out_sig, 32); + SHA512_Update(&hash_ctx, public_key, 32); + SHA512_Update(&hash_ctx, message, message_len); + SHA512_Final(hram, &hash_ctx); + + x25519_sc_reduce(hram); + sc_muladd(out_sig + 32, hram, az, nonce); + + OPENSSL_cleanse(&hash_ctx, sizeof(hash_ctx)); + OPENSSL_cleanse(nonce, sizeof(nonce)); + OPENSSL_cleanse(az, sizeof(az)); + + return 1; } +static const char allzeroes[15]; + int ED25519_verify(const uint8_t *message, size_t message_len, - const uint8_t signature[64], const uint8_t public_key[32]) { - ge_p3 A; - uint8_t rcopy[32]; - uint8_t scopy[32]; - SHA512_CTX hash_ctx; - ge_p2 R; - uint8_t rcheck[32]; - uint8_t h[SHA512_DIGEST_LENGTH]; - - if ((signature[63] & 224) != 0 || - ge_frombytes_vartime(&A, public_key) != 0) { - return 0; - } + const uint8_t signature[64], const uint8_t public_key[32]) +{ + int i; + ge_p3 A; + const uint8_t *r, *s; + SHA512_CTX hash_ctx; + ge_p2 R; + uint8_t rcheck[32]; + uint8_t h[SHA512_DIGEST_LENGTH]; + /* 27742317777372353535851937790883648493 in little endian format */ + const uint8_t l_low[16] = { + 0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2, + 0xDE, 0xF9, 0xDE, 0x14 + }; + + r = signature; + s = signature + 32; + + /* + * Check 0 <= s < L where L = 2^252 + 27742317777372353535851937790883648493 + * + * If not the signature is publicly invalid. Since it's public we can do the + * check in variable time. + * + * First check the most significant byte + */ + if (s[31] > 0x10) + return 0; + if (s[31] == 0x10) { + /* + * Most significant byte indicates a value close to 2^252 so check the + * rest + */ + if (memcmp(s + 16, allzeroes, sizeof(allzeroes)) != 0) + return 0; + for (i = 15; i >= 0; i--) { + if (s[i] < l_low[i]) + break; + if (s[i] > l_low[i]) + return 0; + } + if (i < 0) + return 0; + } - fe_neg(A.X, A.X); - fe_neg(A.T, A.T); + if (ge_frombytes_vartime(&A, public_key) != 0) { + return 0; + } - memcpy(rcopy, signature, 32); - memcpy(scopy, signature + 32, 32); + fe_neg(A.X, A.X); + fe_neg(A.T, A.T); - SHA512_Init(&hash_ctx); - SHA512_Update(&hash_ctx, signature, 32); - SHA512_Update(&hash_ctx, public_key, 32); - SHA512_Update(&hash_ctx, message, message_len); - SHA512_Final(h, &hash_ctx); + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, r, 32); + SHA512_Update(&hash_ctx, public_key, 32); + SHA512_Update(&hash_ctx, message, message_len); + SHA512_Final(h, &hash_ctx); - x25519_sc_reduce(h); + x25519_sc_reduce(h); - ge_double_scalarmult_vartime(&R, h, &A, scopy); + ge_double_scalarmult_vartime(&R, h, &A, s); - ge_tobytes(rcheck, &R); + ge_tobytes(rcheck, &R); - return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0; + return CRYPTO_memcmp(rcheck, r, sizeof(rcheck)) == 0; } void ED25519_public_from_private(uint8_t out_public_key[32], - const uint8_t private_key[32]) { - uint8_t az[SHA512_DIGEST_LENGTH]; - ge_p3 A; + const uint8_t private_key[32]) +{ + uint8_t az[SHA512_DIGEST_LENGTH]; + ge_p3 A; - SHA512(private_key, 32, az); + SHA512(private_key, 32, az); - az[0] &= 248; - az[31] &= 63; - az[31] |= 64; + az[0] &= 248; + az[31] &= 63; + az[31] |= 64; - ge_scalarmult_base(&A, az); - ge_p3_tobytes(out_public_key, &A); + ge_scalarmult_base(&A, az); + ge_p3_tobytes(out_public_key, &A); - OPENSSL_cleanse(az, sizeof(az)); + OPENSSL_cleanse(az, sizeof(az)); } int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32], - const uint8_t peer_public_value[32]) { - static const uint8_t kZeros[32] = {0}; - x25519_scalar_mult(out_shared_key, private_key, peer_public_value); - /* The all-zero output results when the input is a point of small order. */ - return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0; + const uint8_t peer_public_value[32]) +{ + static const uint8_t kZeros[32] = {0}; + x25519_scalar_mult(out_shared_key, private_key, peer_public_value); + /* The all-zero output results when the input is a point of small order. */ + return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0; } void X25519_public_from_private(uint8_t out_public_value[32], - const uint8_t private_key[32]) { - uint8_t e[32]; - ge_p3 A; - fe zplusy, zminusy, zminusy_inv; - - memcpy(e, private_key, 32); - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - ge_scalarmult_base(&A, e); - - /* We only need the u-coordinate of the curve25519 point. The map is - * u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */ - fe_add(zplusy, A.Z, A.Y); - fe_sub(zminusy, A.Z, A.Y); - fe_invert(zminusy_inv, zminusy); - fe_mul(zplusy, zplusy, zminusy_inv); - fe_tobytes(out_public_value, zplusy); - - OPENSSL_cleanse(e, sizeof(e)); + const uint8_t private_key[32]) +{ + uint8_t e[32]; + ge_p3 A; + fe zplusy, zminusy, zminusy_inv; + + memcpy(e, private_key, 32); + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + + ge_scalarmult_base(&A, e); + + /* + * We only need the u-coordinate of the curve25519 point. + * The map is u=(y+1)/(1-y). Since y=Y/Z, this gives + * u=(Z+Y)/(Z-Y). + */ + fe_add(zplusy, A.Z, A.Y); + fe_sub(zminusy, A.Z, A.Y); + fe_invert(zminusy_inv, zminusy); + fe_mul(zplusy, zplusy, zminusy_inv); + fe_tobytes(out_public_value, zplusy); + + OPENSSL_cleanse(e, sizeof(e)); } diff --git a/crypto/ec/curve448/eddsa.c b/crypto/ec/curve448/eddsa.c index 909413a535a8e..b28f7dff91386 100644 --- a/crypto/ec/curve448/eddsa.c +++ b/crypto/ec/curve448/eddsa.c @@ -246,10 +246,36 @@ c448_error_t c448_ed448_verify( uint8_t context_len) { curve448_point_t pk_point, r_point; - c448_error_t error = - curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point, pubkey); + c448_error_t error; curve448_scalar_t challenge_scalar; curve448_scalar_t response_scalar; + /* Order in little endian format */ + static const uint8_t order[] = { + 0xF3, 0x44, 0x58, 0xAB, 0x92, 0xC2, 0x78, 0x23, 0x55, 0x8F, 0xC5, 0x8D, + 0x72, 0xC2, 0x6C, 0x21, 0x90, 0x36, 0xD6, 0xAE, 0x49, 0xDB, 0x4E, 0xC4, + 0xE9, 0x23, 0xCA, 0x7C, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00 + }; + int i; + + /* + * Check that s (second 57 bytes of the sig) is less than the order. Both + * s and the order are in little-endian format. This can be done in + * variable time, since if this is not the case the signature if publicly + * invalid. + */ + for (i = EDDSA_448_PUBLIC_BYTES - 1; i >= 0; i--) { + if (signature[i + EDDSA_448_PUBLIC_BYTES] > order[i]) + return C448_FAILURE; + if (signature[i + EDDSA_448_PUBLIC_BYTES] < order[i]) + break; + } + if (i < 0) + return C448_FAILURE; + + error = + curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point, pubkey); if (C448_SUCCESS != error) return error; diff --git a/crypto/ec/curve448/point_448.h b/crypto/ec/curve448/point_448.h index 0ef3b8714e263..399f91b9a1d98 100644 --- a/crypto/ec/curve448/point_448.h +++ b/crypto/ec/curve448/point_448.h @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright 2015-2016 Cryptography Research, Inc. * * Licensed under the OpenSSL license (the "License"). You may not use @@ -116,7 +116,7 @@ void curve448_scalar_encode(unsigned char ser[C448_SCALAR_BYTES], /* * Add two scalars. |a|, |b| and |out| may alias each other. - * + * * a (in): One scalar. * b (in): Another scalar. * out (out): a+b. @@ -135,7 +135,7 @@ void curve448_scalar_sub(curve448_scalar_t out, /* * Multiply two scalars. |a|, |b| and |out| may alias each other. - * + * * a (in): One scalar. * b (in): Another scalar. * out (out): a*b. @@ -145,7 +145,7 @@ void curve448_scalar_mul(curve448_scalar_t out, /* * Halve a scalar. |a| and |out| may alias each other. -* +* * a (in): A scalar. * out (out): a/2. */ @@ -154,7 +154,7 @@ void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a); /* * Copy a scalar. The scalars may alias each other, in which case this * function does nothing. - * + * * a (in): A scalar. * out (out): Will become a copy of a. */ @@ -183,7 +183,7 @@ static ossl_inline void curve448_point_copy(curve448_point_t a, * * a (in): A point. * b (in): Another point. - * + * * Returns: * C448_TRUE: The points are equal. * C448_FALSE: The points are not equal. @@ -243,7 +243,7 @@ void curve448_point_mul_by_ratio_and_encode_like_x448( /* * RFC 7748 Diffie-Hellman base point scalarmul. This function uses a different * (non-Decaf) encoding. - * + * * out (out): The scaled point base*scalar * scalar (in): The scalar to multiply by. */ @@ -273,7 +273,7 @@ void curve448_precomputed_scalarmul(curve448_point_t scaled, * base2 (in): A second point to be scaled. * scalar2 (in) A second scalar to multiply by. * - * Warning: This function takes variable time, and may leak the scalars used. + * Warning: This function takes variable time, and may leak the scalars used. * It is designed for signature verification. */ void curve448_base_double_scalarmul_non_secret(curve448_point_t combo, diff --git a/crypto/ec/ec2_smpl.c b/crypto/ec/ec2_smpl.c index 87f7ce56911d9..0a05a7aeea61c 100644 --- a/crypto/ec/ec2_smpl.c +++ b/crypto/ec/ec2_smpl.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -810,7 +810,7 @@ int ec_GF2m_simple_ladder_post(const EC_GROUP *group, || !group->meth->field_mul(group, t2, t2, t0, ctx) || !BN_GF2m_add(t1, t2, t1) || !group->meth->field_mul(group, t2, p->X, t0, ctx) - || !BN_GF2m_mod_inv(t2, t2, group->field, ctx) + || !group->meth->field_inv(group, t2, t2, ctx) || !group->meth->field_mul(group, t1, t1, t2, ctx) || !group->meth->field_mul(group, r->X, r->Z, t2, ctx) || !BN_GF2m_add(t2, p->X, r->X) @@ -889,6 +889,21 @@ int ec_GF2m_simple_points_mul(const EC_GROUP *group, EC_POINT *r, return ret; } +/*- + * Computes the multiplicative inverse of a in GF(2^m), storing the result in r. + * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error. + * SCA hardening is with blinding: BN_GF2m_mod_inv does that. + */ +static int ec_GF2m_simple_field_inv(const EC_GROUP *group, BIGNUM *r, + const BIGNUM *a, BN_CTX *ctx) +{ + int ret; + + if (!(ret = BN_GF2m_mod_inv(r, a, group->field, ctx))) + ECerr(EC_F_EC_GF2M_SIMPLE_FIELD_INV, EC_R_CANNOT_INVERT); + return ret; +} + const EC_METHOD *EC_GF2m_simple_method(void) { static const EC_METHOD ret = { @@ -929,6 +944,7 @@ const EC_METHOD *EC_GF2m_simple_method(void) ec_GF2m_simple_field_mul, ec_GF2m_simple_field_sqr, ec_GF2m_simple_field_div, + ec_GF2m_simple_field_inv, 0, /* field_encode */ 0, /* field_decode */ 0, /* field_set_to_one */ diff --git a/crypto/ec/ec_ameth.c b/crypto/ec/ec_ameth.c index a3164b5b2ed97..8b363e096beda 100644 --- a/crypto/ec/ec_ameth.c +++ b/crypto/ec/ec_ameth.c @@ -505,7 +505,7 @@ static int ec_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) case ASN1_PKEY_CTRL_DEFAULT_MD_NID: *(int *)arg2 = NID_sha256; - return 2; + return 1; case ASN1_PKEY_CTRL_SET1_TLS_ENCPT: return EC_KEY_oct2key(EVP_PKEY_get0_EC_KEY(pkey), arg2, arg1, NULL); diff --git a/crypto/ec/ec_err.c b/crypto/ec/ec_err.c index 8f4911abec79a..ce3493823218f 100644 --- a/crypto/ec/ec_err.c +++ b/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* * Generated by util/mkerr.pl DO NOT EDIT - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -66,6 +66,8 @@ static const ERR_STRING_DATA EC_str_functs[] = { "ec_asn1_group2fieldid"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_MONTGOMERY_POINT_MULTIPLY, 0), "ec_GF2m_montgomery_point_multiply"}, + {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_FIELD_INV, 0), + "ec_GF2m_simple_field_inv"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_GROUP_CHECK_DISCRIMINANT, 0), "ec_GF2m_simple_group_check_discriminant"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_GROUP_SET_CURVE, 0), @@ -90,6 +92,8 @@ static const ERR_STRING_DATA EC_str_functs[] = { "ec_GFp_mont_field_decode"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_ENCODE, 0), "ec_GFp_mont_field_encode"}, + {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_INV, 0), + "ec_GFp_mont_field_inv"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_MUL, 0), "ec_GFp_mont_field_mul"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_SET_TO_ONE, 0), @@ -124,6 +128,8 @@ static const ERR_STRING_DATA EC_str_functs[] = { "ec_GFp_nist_group_set_curve"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_BLIND_COORDINATES, 0), "ec_GFp_simple_blind_coordinates"}, + {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_FIELD_INV, 0), + "ec_GFp_simple_field_inv"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_GROUP_CHECK_DISCRIMINANT, 0), "ec_GFp_simple_group_check_discriminant"}, {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_GROUP_SET_CURVE, 0), @@ -287,6 +293,7 @@ static const ERR_STRING_DATA EC_str_reasons[] = { {ERR_PACK(ERR_LIB_EC, 0, EC_R_BAD_SIGNATURE), "bad signature"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_BIGNUM_OUT_OF_RANGE), "bignum out of range"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_BUFFER_TOO_SMALL), "buffer too small"}, + {ERR_PACK(ERR_LIB_EC, 0, EC_R_CANNOT_INVERT), "cannot invert"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_COORDINATES_OUT_OF_RANGE), "coordinates out of range"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_CURVE_DOES_NOT_SUPPORT_ECDH), diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h index e055ddab1c76e..119255f1dc832 100644 --- a/crypto/ec/ec_lcl.h +++ b/crypto/ec/ec_lcl.h @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -15,7 +15,6 @@ #include <openssl/bn.h> #include "internal/refcount.h" #include "internal/ec_int.h" -#include "curve448/curve448_lcl.h" #if defined(__SUNPRO_C) # if __SUNPRO_C >= 0x520 @@ -154,6 +153,13 @@ struct ec_method_st { int (*field_sqr) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); int (*field_div) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *); + /*- + * 'field_inv' computes the multipicative inverse of a in the field, + * storing the result in r. + * + * If 'a' is zero (or equivalent), you'll get an EC_R_CANNOT_INVERT error. + */ + int (*field_inv) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); /* e.g. to Montgomery */ int (*field_encode) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); @@ -390,6 +396,8 @@ int ec_GFp_simple_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *); int ec_GFp_simple_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); +int ec_GFp_simple_field_inv(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, + BN_CTX *); int ec_GFp_simple_blind_coordinates(const EC_GROUP *group, EC_POINT *p, BN_CTX *ctx); int ec_GFp_simple_ladder_pre(const EC_GROUP *group, @@ -413,6 +421,8 @@ int ec_GFp_mont_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *); int ec_GFp_mont_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); +int ec_GFp_mont_field_inv(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, + BN_CTX *); int ec_GFp_mont_field_encode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *); int ec_GFp_mont_field_decode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, diff --git a/crypto/ec/ecp_mont.c b/crypto/ec/ecp_mont.c index 36682e5cfbd18..252e66ef3791c 100644 --- a/crypto/ec/ecp_mont.c +++ b/crypto/ec/ecp_mont.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -50,6 +50,7 @@ const EC_METHOD *EC_GFp_mont_method(void) ec_GFp_mont_field_mul, ec_GFp_mont_field_sqr, 0 /* field_div */ , + ec_GFp_mont_field_inv, ec_GFp_mont_field_encode, ec_GFp_mont_field_decode, ec_GFp_mont_field_set_to_one, @@ -206,6 +207,54 @@ int ec_GFp_mont_field_sqr(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, return BN_mod_mul_montgomery(r, a, a, group->field_data1, ctx); } +/*- + * Computes the multiplicative inverse of a in GF(p), storing the result in r. + * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error. + * We have a Mont structure, so SCA hardening is FLT inversion. + */ +int ec_GFp_mont_field_inv(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, + BN_CTX *ctx) +{ + BIGNUM *e = NULL; + BN_CTX *new_ctx = NULL; + int ret = 0; + + if (group->field_data1 == NULL) + return 0; + + if (ctx == NULL && (ctx = new_ctx = BN_CTX_secure_new()) == NULL) + return 0; + + BN_CTX_start(ctx); + if ((e = BN_CTX_get(ctx)) == NULL) + goto err; + + /* Inverse in constant time with Fermats Little Theorem */ + if (!BN_set_word(e, 2)) + goto err; + if (!BN_sub(e, group->field, e)) + goto err; + /*- + * Exponent e is public. + * No need for scatter-gather or BN_FLG_CONSTTIME. + */ + if (!BN_mod_exp_mont(r, a, e, group->field, ctx, group->field_data1)) + goto err; + + /* throw an error on zero */ + if (BN_is_zero(r)) { + ECerr(EC_F_EC_GFP_MONT_FIELD_INV, EC_R_CANNOT_INVERT); + goto err; + } + + ret = 1; + + err: + BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + return ret; +} + int ec_GFp_mont_field_encode(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) { diff --git a/crypto/ec/ecp_nist.c b/crypto/ec/ecp_nist.c index f53de1a1638bd..5eaa99d8402b6 100644 --- a/crypto/ec/ecp_nist.c +++ b/crypto/ec/ecp_nist.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -52,6 +52,7 @@ const EC_METHOD *EC_GFp_nist_method(void) ec_GFp_nist_field_mul, ec_GFp_nist_field_sqr, 0 /* field_div */ , + ec_GFp_simple_field_inv, 0 /* field_encode */ , 0 /* field_decode */ , 0, /* field_set_to_one */ diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c index 555bf307dd031..025273a144408 100644 --- a/crypto/ec/ecp_nistp224.c +++ b/crypto/ec/ecp_nistp224.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -279,6 +279,7 @@ const EC_METHOD *EC_GFp_nistp224_method(void) ec_GFp_nist_field_mul, ec_GFp_nist_field_sqr, 0 /* field_div */ , + ec_GFp_simple_field_inv, 0 /* field_encode */ , 0 /* field_decode */ , 0, /* field_set_to_one */ diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c index c87a5e548d369..a21e5f78fc906 100644 --- a/crypto/ec/ecp_nistp256.c +++ b/crypto/ec/ecp_nistp256.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -1810,6 +1810,7 @@ const EC_METHOD *EC_GFp_nistp256_method(void) ec_GFp_nist_field_mul, ec_GFp_nist_field_sqr, 0 /* field_div */ , + ec_GFp_simple_field_inv, 0 /* field_encode */ , 0 /* field_decode */ , 0, /* field_set_to_one */ diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c index 14f2feeb69996..2f47772a3477f 100644 --- a/crypto/ec/ecp_nistp521.c +++ b/crypto/ec/ecp_nistp521.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -1647,6 +1647,7 @@ const EC_METHOD *EC_GFp_nistp521_method(void) ec_GFp_nist_field_mul, ec_GFp_nist_field_sqr, 0 /* field_div */ , + ec_GFp_simple_field_inv, 0 /* field_encode */ , 0 /* field_decode */ , 0, /* field_set_to_one */ diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c index b0564bdbd04c5..aea6394169ce9 100644 --- a/crypto/ec/ecp_nistz256.c +++ b/crypto/ec/ecp_nistz256.c @@ -1,5 +1,5 @@ /* - * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2014, Intel Corporation. All Rights Reserved. * Copyright (c) 2015, CloudFlare, Inc. * @@ -1677,6 +1677,7 @@ const EC_METHOD *EC_GFp_nistz256_method(void) ec_GFp_mont_field_mul, ec_GFp_mont_field_sqr, 0, /* field_div */ + ec_GFp_mont_field_inv, ec_GFp_mont_field_encode, ec_GFp_mont_field_decode, ec_GFp_mont_field_set_to_one, diff --git a/crypto/ec/ecp_smpl.c b/crypto/ec/ecp_smpl.c index d0c5557ff4dda..f6a6cedb0ae36 100644 --- a/crypto/ec/ecp_smpl.c +++ b/crypto/ec/ecp_smpl.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -51,6 +51,7 @@ const EC_METHOD *EC_GFp_simple_method(void) ec_GFp_simple_field_mul, ec_GFp_simple_field_sqr, 0 /* field_div */ , + ec_GFp_simple_field_inv, 0 /* field_encode */ , 0 /* field_decode */ , 0, /* field_set_to_one */ @@ -553,7 +554,7 @@ int ec_GFp_simple_point_get_affine_coordinates(const EC_GROUP *group, } } } else { - if (!BN_mod_inverse(Z_1, Z_, group->field, ctx)) { + if (!group->meth->field_inv(group, Z_1, Z_, ctx)) { ECerr(EC_F_EC_GFP_SIMPLE_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB); goto err; @@ -1266,7 +1267,7 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num, * points[i]->Z by its inverse. */ - if (!BN_mod_inverse(tmp, prod_Z[num - 1], group->field, ctx)) { + if (!group->meth->field_inv(group, tmp, prod_Z[num - 1], ctx)) { ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB); goto err; } @@ -1370,6 +1371,50 @@ int ec_GFp_simple_field_sqr(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, } /*- + * Computes the multiplicative inverse of a in GF(p), storing the result in r. + * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error. + * Since we don't have a Mont structure here, SCA hardening is with blinding. + */ +int ec_GFp_simple_field_inv(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a, + BN_CTX *ctx) +{ + BIGNUM *e = NULL; + BN_CTX *new_ctx = NULL; + int ret = 0; + + if (ctx == NULL && (ctx = new_ctx = BN_CTX_secure_new()) == NULL) + return 0; + + BN_CTX_start(ctx); + if ((e = BN_CTX_get(ctx)) == NULL) + goto err; + + do { + if (!BN_priv_rand_range(e, group->field)) + goto err; + } while (BN_is_zero(e)); + + /* r := a * e */ + if (!group->meth->field_mul(group, r, a, e, ctx)) + goto err; + /* r := 1/(a * e) */ + if (!BN_mod_inverse(r, r, group->field, ctx)) { + ECerr(EC_F_EC_GFP_SIMPLE_FIELD_INV, EC_R_CANNOT_INVERT); + goto err; + } + /* r := e/(a * e) = 1/a */ + if (!group->meth->field_mul(group, r, r, e, ctx)) + goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + return ret; +} + +/*- * Apply randomization of EC point projective coordinates: * * (X, Y ,Z ) = (lambda^2*X, lambda^3*Y, lambda*Z) diff --git a/crypto/ec/ecx_meth.c b/crypto/ec/ecx_meth.c index b76bfdb6dc342..e4cac99e2d2ac 100644 --- a/crypto/ec/ecx_meth.c +++ b/crypto/ec/ecx_meth.c @@ -1,5 +1,5 @@ /* - * Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -15,6 +15,7 @@ #include "internal/asn1_int.h" #include "internal/evp_int.h" #include "ec_lcl.h" +#include "curve448/curve448_lcl.h" #define X25519_BITS 253 #define X25519_SECURITY_BITS 128 diff --git a/crypto/engine/README b/crypto/engine/README index 0050b9e50951c..c7a5696ca14cd 100644 --- a/crypto/engine/README +++ b/crypto/engine/README @@ -26,7 +26,7 @@ algorithm/mode pair are; EVP_EncryptInit(&ctx, cipher, key, iv); [ ... use EVP_EncryptUpdate() and EVP_EncryptFinal() ...] -(ii) indirectly; +(ii) indirectly; OpenSSL_add_all_ciphers(); cipher = EVP_get_cipherbyname("des_cbc"); EVP_EncryptInit(&ctx, cipher, key, iv); diff --git a/crypto/engine/eng_devcrypto.c b/crypto/engine/eng_devcrypto.c index 4a0ba09a38bec..717d7c27794ff 100644 --- a/crypto/engine/eng_devcrypto.c +++ b/crypto/engine/eng_devcrypto.c @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -24,6 +24,8 @@ #include "internal/engine.h" +/* #define ENGINE_DEVCRYPTO_DEBUG */ + #ifdef CRYPTO_ALGORITHM_MIN # define CHECK_BSD_STYLE_MACROS #endif @@ -35,6 +37,15 @@ */ static int cfd; +static int clean_devcrypto_session(struct session_op *sess) { + if (ioctl(cfd, CIOCFSESSION, &sess->ses) < 0) { + SYSerr(SYS_F_IOCTL, errno); + return 0; + } + memset(sess, 0, sizeof(struct session_op)); + return 1; +} + /****************************************************************************** * * Ciphers @@ -47,10 +58,12 @@ static int cfd; struct cipher_ctx { struct session_op sess; - - /* to pass from init to do_cipher */ - const unsigned char *iv; int op; /* COP_ENCRYPT or COP_DECRYPT */ + unsigned long mode; /* EVP_CIPH_*_MODE */ + + /* to handle ctr mode being a stream cipher */ + unsigned char partial[EVP_MAX_BLOCK_LENGTH]; + unsigned int blocksize, num; }; static const struct cipher_data_st { @@ -87,9 +100,9 @@ static const struct cipher_data_st { { NID_aes_256_xts, 16, 256 / 8 * 2, 16, EVP_CIPH_XTS_MODE, CRYPTO_AES_XTS }, #endif #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_AES_ECB) - { NID_aes_128_ecb, 16, 128 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, - { NID_aes_192_ecb, 16, 192 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, - { NID_aes_256_ecb, 16, 256 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, + { NID_aes_128_ecb, 16, 128 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, + { NID_aes_192_ecb, 16, 192 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, + { NID_aes_256_ecb, 16, 256 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB }, #endif #if 0 /* Not yet supported */ { NID_aes_128_gcm, 16, 128 / 8, 16, EVP_CIPH_GCM_MODE, CRYPTO_AES_GCM }, @@ -141,11 +154,17 @@ static int cipher_init(EVP_CIPHER_CTX *ctx, const unsigned char *key, const struct cipher_data_st *cipher_d = get_cipher_data(EVP_CIPHER_CTX_nid(ctx)); - memset(&cipher_ctx->sess, 0, sizeof(cipher_ctx->sess)); + /* cleanup a previous session */ + if (cipher_ctx->sess.ses != 0 && + clean_devcrypto_session(&cipher_ctx->sess) == 0) + return 0; + cipher_ctx->sess.cipher = cipher_d->devcryptoid; cipher_ctx->sess.keylen = cipher_d->keylen; cipher_ctx->sess.key = (void *)key; cipher_ctx->op = enc ? COP_ENCRYPT : COP_DECRYPT; + cipher_ctx->mode = cipher_d->flags & EVP_CIPH_MODE; + cipher_ctx->blocksize = cipher_d->blocksize; if (ioctl(cfd, CIOCGSESSION, &cipher_ctx->sess) < 0) { SYSerr(SYS_F_IOCTL, errno); return 0; @@ -160,8 +179,11 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, struct cipher_ctx *cipher_ctx = (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx); struct crypt_op cryp; + unsigned char *iv = EVP_CIPHER_CTX_iv_noconst(ctx); #if !defined(COP_FLAG_WRITE_IV) unsigned char saved_iv[EVP_MAX_IV_LENGTH]; + const unsigned char *ivptr; + size_t nblocks, ivlen; #endif memset(&cryp, 0, sizeof(cryp)); @@ -169,19 +191,28 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, cryp.len = inl; cryp.src = (void *)in; cryp.dst = (void *)out; - cryp.iv = (void *)EVP_CIPHER_CTX_iv_noconst(ctx); + cryp.iv = (void *)iv; cryp.op = cipher_ctx->op; #if !defined(COP_FLAG_WRITE_IV) cryp.flags = 0; - if (EVP_CIPHER_CTX_iv_length(ctx) > 0) { - assert(inl >= EVP_CIPHER_CTX_iv_length(ctx)); - if (!EVP_CIPHER_CTX_encrypting(ctx)) { - unsigned char *ivptr = in + inl - EVP_CIPHER_CTX_iv_length(ctx); - - memcpy(saved_iv, ivptr, EVP_CIPHER_CTX_iv_length(ctx)); + ivlen = EVP_CIPHER_CTX_iv_length(ctx); + if (ivlen > 0) + switch (cipher_ctx->mode) { + case EVP_CIPH_CBC_MODE: + assert(inl >= ivlen); + if (!EVP_CIPHER_CTX_encrypting(ctx)) { + ivptr = in + inl - ivlen; + memcpy(saved_iv, ivptr, ivlen); + } + break; + + case EVP_CIPH_CTR_MODE: + break; + + default: /* should not happen */ + return 0; } - } #else cryp.flags = COP_FLAG_WRITE_IV; #endif @@ -192,32 +223,113 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } #if !defined(COP_FLAG_WRITE_IV) - if (EVP_CIPHER_CTX_iv_length(ctx) > 0) { - unsigned char *ivptr = saved_iv; + if (ivlen > 0) + switch (cipher_ctx->mode) { + case EVP_CIPH_CBC_MODE: + assert(inl >= ivlen); + if (EVP_CIPHER_CTX_encrypting(ctx)) + ivptr = out + inl - ivlen; + else + ivptr = saved_iv; + + memcpy(iv, ivptr, ivlen); + break; + + case EVP_CIPH_CTR_MODE: + nblocks = (inl + cipher_ctx->blocksize - 1) + / cipher_ctx->blocksize; + do { + ivlen--; + nblocks += iv[ivlen]; + iv[ivlen] = (uint8_t) nblocks; + nblocks >>= 8; + } while (ivlen); + break; + + default: /* should not happen */ + return 0; + } +#endif + + return 1; +} - assert(inl >= EVP_CIPHER_CTX_iv_length(ctx)); - if (!EVP_CIPHER_CTX_encrypting(ctx)) - ivptr = out + inl - EVP_CIPHER_CTX_iv_length(ctx); +static int ctr_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + struct cipher_ctx *cipher_ctx = + (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx); + size_t nblocks, len; - memcpy(EVP_CIPHER_CTX_iv_noconst(ctx), ivptr, - EVP_CIPHER_CTX_iv_length(ctx)); + /* initial partial block */ + while (cipher_ctx->num && inl) { + (*out++) = *(in++) ^ cipher_ctx->partial[cipher_ctx->num]; + --inl; + cipher_ctx->num = (cipher_ctx->num + 1) % cipher_ctx->blocksize; + } + + /* full blocks */ + if (inl > (unsigned int) cipher_ctx->blocksize) { + nblocks = inl/cipher_ctx->blocksize; + len = nblocks * cipher_ctx->blocksize; + if (cipher_do_cipher(ctx, out, in, len) < 1) + return 0; + inl -= len; + out += len; + in += len; + } + + /* final partial block */ + if (inl) { + memset(cipher_ctx->partial, 0, cipher_ctx->blocksize); + if (cipher_do_cipher(ctx, cipher_ctx->partial, cipher_ctx->partial, + cipher_ctx->blocksize) < 1) + return 0; + while (inl--) { + out[cipher_ctx->num] = in[cipher_ctx->num] + ^ cipher_ctx->partial[cipher_ctx->num]; + cipher_ctx->num++; + } } -#endif return 1; } -static int cipher_cleanup(EVP_CIPHER_CTX *ctx) +static int cipher_ctrl(EVP_CIPHER_CTX *ctx, int type, int p1, void* p2) { struct cipher_ctx *cipher_ctx = (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx); + EVP_CIPHER_CTX *to_ctx = (EVP_CIPHER_CTX *)p2; + struct cipher_ctx *to_cipher_ctx; + + switch (type) { + case EVP_CTRL_COPY: + if (cipher_ctx == NULL) + return 1; + /* when copying the context, a new session needs to be initialized */ + to_cipher_ctx = + (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(to_ctx); + memset(&to_cipher_ctx->sess, 0, sizeof(to_cipher_ctx->sess)); + return cipher_init(to_ctx, cipher_ctx->sess.key, EVP_CIPHER_CTX_iv(ctx), + (cipher_ctx->op == COP_ENCRYPT)); + + case EVP_CTRL_INIT: + memset(&cipher_ctx->sess, 0, sizeof(cipher_ctx->sess)); + return 1; - if (ioctl(cfd, CIOCFSESSION, &cipher_ctx->sess.ses) < 0) { - SYSerr(SYS_F_IOCTL, errno); - return 0; + default: + break; } - return 1; + return -1; +} + +static int cipher_cleanup(EVP_CIPHER_CTX *ctx) +{ + struct cipher_ctx *cipher_ctx = + (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx); + + return clean_devcrypto_session(&cipher_ctx->sess); } /* @@ -233,6 +345,7 @@ static void prepare_cipher_methods(void) { size_t i; struct session_op sess; + unsigned long cipher_mode; memset(&sess, 0, sizeof(sess)); sess.key = (void *)"01234567890123456789012345678901234567890123456789"; @@ -250,18 +363,26 @@ static void prepare_cipher_methods(void) || ioctl(cfd, CIOCFSESSION, &sess.ses) < 0) continue; + cipher_mode = cipher_data[i].flags & EVP_CIPH_MODE; + if ((known_cipher_methods[i] = EVP_CIPHER_meth_new(cipher_data[i].nid, - cipher_data[i].blocksize, + cipher_mode == EVP_CIPH_CTR_MODE ? 1 : + cipher_data[i].blocksize, cipher_data[i].keylen)) == NULL || !EVP_CIPHER_meth_set_iv_length(known_cipher_methods[i], cipher_data[i].ivlen) || !EVP_CIPHER_meth_set_flags(known_cipher_methods[i], cipher_data[i].flags + | EVP_CIPH_CUSTOM_COPY + | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1) || !EVP_CIPHER_meth_set_init(known_cipher_methods[i], cipher_init) || !EVP_CIPHER_meth_set_do_cipher(known_cipher_methods[i], + cipher_mode == EVP_CIPH_CTR_MODE ? + ctr_do_cipher : cipher_do_cipher) + || !EVP_CIPHER_meth_set_ctrl(known_cipher_methods[i], cipher_ctrl) || !EVP_CIPHER_meth_set_cleanup(known_cipher_methods[i], cipher_cleanup) || !EVP_CIPHER_meth_set_impl_ctx_size(known_cipher_methods[i], @@ -338,34 +459,36 @@ static int devcrypto_ciphers(ENGINE *e, const EVP_CIPHER **cipher, struct digest_ctx { struct session_op sess; - int init; + /* This signals that the init function was called, not that it succeeded. */ + int init_called; }; static const struct digest_data_st { int nid; + int blocksize; int digestlen; int devcryptoid; } digest_data[] = { #ifndef OPENSSL_NO_MD5 - { NID_md5, 16, CRYPTO_MD5 }, + { NID_md5, /* MD5_CBLOCK */ 64, 16, CRYPTO_MD5 }, #endif - { NID_sha1, 20, CRYPTO_SHA1 }, + { NID_sha1, SHA_CBLOCK, 20, CRYPTO_SHA1 }, #ifndef OPENSSL_NO_RMD160 # if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_RIPEMD160) - { NID_ripemd160, 20, CRYPTO_RIPEMD160 }, + { NID_ripemd160, /* RIPEMD160_CBLOCK */ 64, 20, CRYPTO_RIPEMD160 }, # endif #endif #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_224) - { NID_sha224, 224 / 8, CRYPTO_SHA2_224 }, + { NID_sha224, SHA256_CBLOCK, 224 / 8, CRYPTO_SHA2_224 }, #endif #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_256) - { NID_sha256, 256 / 8, CRYPTO_SHA2_256 }, + { NID_sha256, SHA256_CBLOCK, 256 / 8, CRYPTO_SHA2_256 }, #endif #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_384) - { NID_sha384, 384 / 8, CRYPTO_SHA2_384 }, + { NID_sha384, SHA512_CBLOCK, 384 / 8, CRYPTO_SHA2_384 }, #endif #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_512) - { NID_sha512, 512 / 8, CRYPTO_SHA2_512 }, + { NID_sha512, SHA512_CBLOCK, 512 / 8, CRYPTO_SHA2_512 }, #endif }; @@ -403,7 +526,7 @@ static int digest_init(EVP_MD_CTX *ctx) const struct digest_data_st *digest_d = get_digest_data(EVP_MD_CTX_type(ctx)); - digest_ctx->init = 1; + digest_ctx->init_called = 1; memset(&digest_ctx->sess, 0, sizeof(digest_ctx->sess)); digest_ctx->sess.mac = digest_d->devcryptoid; @@ -438,6 +561,9 @@ static int digest_update(EVP_MD_CTX *ctx, const void *data, size_t count) if (count == 0) return 1; + if (digest_ctx == NULL) + return 0; + if (digest_op(digest_ctx, data, count, NULL, COP_FLAG_UPDATE) < 0) { SYSerr(SYS_F_IOCTL, errno); return 0; @@ -451,11 +577,9 @@ static int digest_final(EVP_MD_CTX *ctx, unsigned char *md) struct digest_ctx *digest_ctx = (struct digest_ctx *)EVP_MD_CTX_md_data(ctx); - if (digest_op(digest_ctx, NULL, 0, md, COP_FLAG_FINAL) < 0) { - SYSerr(SYS_F_IOCTL, errno); + if (md == NULL || digest_ctx == NULL) return 0; - } - if (ioctl(cfd, CIOCFSESSION, &digest_ctx->sess.ses) < 0) { + if (digest_op(digest_ctx, NULL, 0, md, COP_FLAG_FINAL) < 0) { SYSerr(SYS_F_IOCTL, errno); return 0; } @@ -471,14 +595,9 @@ static int digest_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from) (struct digest_ctx *)EVP_MD_CTX_md_data(to); struct cphash_op cphash; - if (digest_from == NULL) + if (digest_from == NULL || digest_from->init_called != 1) return 1; - if (digest_from->init != 1) { - SYSerr(SYS_F_IOCTL, EINVAL); - return 0; - } - if (!digest_init(to)) { SYSerr(SYS_F_IOCTL, errno); return 0; @@ -495,7 +614,37 @@ static int digest_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from) static int digest_cleanup(EVP_MD_CTX *ctx) { - return 1; + struct digest_ctx *digest_ctx = + (struct digest_ctx *)EVP_MD_CTX_md_data(ctx); + + if (digest_ctx == NULL) + return 1; + + return clean_devcrypto_session(&digest_ctx->sess); +} + +static int devcrypto_test_digest(size_t digest_data_index) +{ + struct session_op sess1, sess2; + struct cphash_op cphash; + int ret=0; + + memset(&sess1, 0, sizeof(sess1)); + memset(&sess2, 0, sizeof(sess2)); + sess1.mac = digest_data[digest_data_index].devcryptoid; + if (ioctl(cfd, CIOCGSESSION, &sess1) < 0) + return 0; + /* Make sure the driver is capable of hash state copy */ + sess2.mac = sess1.mac; + if (ioctl(cfd, CIOCGSESSION, &sess2) >= 0) { + cphash.src_ses = sess1.ses; + cphash.dst_ses = sess2.ses; + if (ioctl(cfd, CIOCCPHASH, &cphash) >= 0) + ret = 1; + ioctl(cfd, CIOCFSESSION, &sess2.ses); + } + ioctl(cfd, CIOCFSESSION, &sess1.ses); + return ret; } /* @@ -510,24 +659,20 @@ static EVP_MD *known_digest_methods[OSSL_NELEM(digest_data)] = { NULL, }; static void prepare_digest_methods(void) { size_t i; - struct session_op sess; - - memset(&sess, 0, sizeof(sess)); for (i = 0, known_digest_nids_amount = 0; i < OSSL_NELEM(digest_data); i++) { /* - * Check that the algo is really availably by trying to open and close - * a session. + * Check that the algo is usable */ - sess.mac = digest_data[i].devcryptoid; - if (ioctl(cfd, CIOCGSESSION, &sess) < 0 - || ioctl(cfd, CIOCFSESSION, &sess.ses) < 0) + if (!devcrypto_test_digest(i)) continue; if ((known_digest_methods[i] = EVP_MD_meth_new(digest_data[i].nid, NID_undef)) == NULL + || !EVP_MD_meth_set_input_blocksize(known_digest_methods[i], + digest_data[i].blocksize) || !EVP_MD_meth_set_result_size(known_digest_methods[i], digest_data[i].digestlen) || !EVP_MD_meth_set_init(known_digest_methods[i], digest_init) @@ -615,15 +760,13 @@ void engine_load_devcrypto_int() ENGINE *e = NULL; if ((cfd = open("/dev/crypto", O_RDWR, 0)) < 0) { - fprintf(stderr, "Could not open /dev/crypto: %s\n", strerror(errno)); +#ifndef ENGINE_DEVCRYPTO_DEBUG + if (errno != ENOENT) +#endif + fprintf(stderr, "Could not open /dev/crypto: %s\n", strerror(errno)); return; } - prepare_cipher_methods(); -#ifdef IMPLEMENT_DIGEST - prepare_digest_methods(); -#endif - if ((e = ENGINE_new()) == NULL || !ENGINE_set_destroy_function(e, devcrypto_unload)) { ENGINE_free(e); @@ -636,6 +779,11 @@ void engine_load_devcrypto_int() return; } + prepare_cipher_methods(); +#ifdef IMPLEMENT_DIGEST + prepare_digest_methods(); +#endif + if (!ENGINE_set_id(e, "devcrypto") || !ENGINE_set_name(e, "/dev/crypto engine") diff --git a/crypto/engine/eng_lib.c b/crypto/engine/eng_lib.c index 3ef3aae28a210..d7f2026fac546 100644 --- a/crypto/engine/eng_lib.c +++ b/crypto/engine/eng_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -124,7 +124,7 @@ static int int_cleanup_check(int create) static ENGINE_CLEANUP_ITEM *int_cleanup_item(ENGINE_CLEANUP_CB *cb) { ENGINE_CLEANUP_ITEM *item; - + if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) { ENGINEerr(ENGINE_F_INT_CLEANUP_ITEM, ERR_R_MALLOC_FAILURE); return NULL; diff --git a/crypto/err/err.c b/crypto/err/err.c index 03cbd738e1932..c737b2a9c3e6c 100644 --- a/crypto/err/err.c +++ b/crypto/err/err.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -19,6 +19,9 @@ #include <openssl/bio.h> #include <openssl/opensslconf.h> #include "internal/thread_once.h" +#include "internal/ctype.h" +#include "internal/constant_time_locl.h" +#include "e_os.h" static int err_load_strings(const ERR_STRING_DATA *str); @@ -181,8 +184,9 @@ static ERR_STRING_DATA *int_err_get_item(const ERR_STRING_DATA *d) } #ifndef OPENSSL_NO_ERR +/* A measurement on Linux 2018-11-21 showed about 3.5kib */ +# define SPACE_SYS_STR_REASONS 4 * 1024 # define NUM_SYS_STR_REASONS 127 -# define LEN_SYS_STR_REASON 32 static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1]; /* @@ -198,9 +202,12 @@ static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1]; static void build_SYS_str_reasons(void) { /* OPENSSL_malloc cannot be used here, use static storage instead */ - static char strerror_tab[NUM_SYS_STR_REASONS][LEN_SYS_STR_REASON]; + static char strerror_pool[SPACE_SYS_STR_REASONS]; + char *cur = strerror_pool; + size_t cnt = 0; static int init = 1; int i; + int saveerrno = get_last_sys_error(); CRYPTO_THREAD_write_lock(err_string_lock); if (!init) { @@ -213,9 +220,26 @@ static void build_SYS_str_reasons(void) str->error = ERR_PACK(ERR_LIB_SYS, 0, i); if (str->string == NULL) { - char (*dest)[LEN_SYS_STR_REASON] = &(strerror_tab[i - 1]); - if (openssl_strerror_r(i, *dest, sizeof(*dest))) - str->string = *dest; + if (openssl_strerror_r(i, cur, sizeof(strerror_pool) - cnt)) { + size_t l = strlen(cur); + + str->string = cur; + cnt += l; + if (cnt > sizeof(strerror_pool)) + cnt = sizeof(strerror_pool); + cur += l; + + /* + * VMS has an unusual quirk of adding spaces at the end of + * some (most? all?) messages. Lets trim them off. + */ + while (ossl_isspace(cur[-1])) { + cur--; + cnt--; + } + *cur++ = '\0'; + cnt++; + } } if (str->string == NULL) str->string = "unknown"; @@ -229,6 +253,8 @@ static void build_SYS_str_reasons(void) init = 0; CRYPTO_THREAD_unlock(err_string_lock); + /* openssl_strerror_r could change errno, but we want to preserve it */ + set_sys_error(saveerrno); err_load_strings(SYS_str_reasons); } #endif @@ -671,6 +697,7 @@ DEFINE_RUN_ONCE_STATIC(err_do_init) ERR_STATE *ERR_get_state(void) { ERR_STATE *state; + int saveerrno = get_last_sys_error(); if (!OPENSSL_init_crypto(OPENSSL_INIT_BASE_ONLY, NULL)) return NULL; @@ -702,6 +729,7 @@ ERR_STATE *ERR_get_state(void) OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); } + set_sys_error(saveerrno); return state; } @@ -711,6 +739,20 @@ ERR_STATE *ERR_get_state(void) */ int err_shelve_state(void **state) { + int saveerrno = get_last_sys_error(); + + /* + * Note, at present our only caller is OPENSSL_init_crypto(), indirectly + * via ossl_init_load_crypto_nodelete(), by which point the requested + * "base" initialization has already been performed, so the below call is a + * NOOP, that re-enters OPENSSL_init_crypto() only to quickly return. + * + * If are no other valid callers of this function, the call below can be + * removed, avoiding the re-entry into OPENSSL_init_crypto(). If there are + * potential uses that are not from inside OPENSSL_init_crypto(), then this + * call is needed, but some care is required to make sure that the re-entry + * remains a NOOP. + */ if (!OPENSSL_init_crypto(OPENSSL_INIT_BASE_ONLY, NULL)) return 0; @@ -721,6 +763,7 @@ int err_shelve_state(void **state) if (!CRYPTO_THREAD_set_local(&err_thread_local, (ERR_STATE*)-1)) return 0; + set_sys_error(saveerrno); return 1; } @@ -747,20 +790,31 @@ int ERR_get_next_error_library(void) return ret; } -void ERR_set_error_data(char *data, int flags) +static int err_set_error_data_int(char *data, int flags) { ERR_STATE *es; int i; es = ERR_get_state(); if (es == NULL) - return; + return 0; i = es->top; err_clear_data(es, i); es->err_data[i] = data; es->err_data_flags[i] = flags; + + return 1; +} + +void ERR_set_error_data(char *data, int flags) +{ + /* + * This function is void so we cannot propagate the error return. Since it + * is also in the public API we can't change the return type. + */ + err_set_error_data_int(data, flags); } void ERR_add_error_data(int num, ...) @@ -800,7 +854,8 @@ void ERR_add_error_vdata(int num, va_list args) } OPENSSL_strlcat(str, a, (size_t)s + 1); } - ERR_set_error_data(str, ERR_TXT_MALLOCED | ERR_TXT_STRING); + if (!err_set_error_data_int(str, ERR_TXT_MALLOCED | ERR_TXT_STRING)) + OPENSSL_free(str); } int ERR_set_mark(void) @@ -857,3 +912,42 @@ int ERR_clear_last_mark(void) es->err_flags[top] &= ~ERR_FLAG_MARK; return 1; } + +#ifdef UINTPTR_T +# undef UINTPTR_T +#endif +/* + * uintptr_t is the answer, but unfortunately C89, current "least common + * denominator" doesn't define it. Most legacy platforms typedef it anyway, + * so that attempt to fill the gaps means that one would have to identify + * that track these gaps, which would be undesirable. Macro it is... + */ +#if defined(__VMS) && __INITIAL_POINTER_SIZE==64 +/* + * But we can't use size_t on VMS, because it adheres to sizeof(size_t)==4 + * even in 64-bit builds, which means that it won't work as mask. + */ +# define UINTPTR_T unsigned long long +#else +# define UINTPTR_T size_t +#endif + +void err_clear_last_constant_time(int clear) +{ + ERR_STATE *es; + int top; + + es = ERR_get_state(); + if (es == NULL) + return; + + top = es->top; + + es->err_flags[top] &= ~(0 - clear); + es->err_buffer[top] &= ~(0UL - clear); + es->err_file[top] = (const char *)((UINTPTR_T)es->err_file[top] & + ~((UINTPTR_T)0 - clear)); + es->err_line[top] |= 0 - clear; + + es->top = (top + ERR_NUM_ERRORS - clear) % ERR_NUM_ERRORS; +} diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt index 5003d8735a4d2..feff1dccded72 100644 --- a/crypto/err/openssl.txt +++ b/crypto/err/openssl.txt @@ -1,4 +1,4 @@ -# Copyright 1999-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -519,6 +519,7 @@ EC_F_ECX_PUB_ENCODE:268:ecx_pub_encode EC_F_EC_ASN1_GROUP2CURVE:153:ec_asn1_group2curve EC_F_EC_ASN1_GROUP2FIELDID:154:ec_asn1_group2fieldid EC_F_EC_GF2M_MONTGOMERY_POINT_MULTIPLY:208:ec_GF2m_montgomery_point_multiply +EC_F_EC_GF2M_SIMPLE_FIELD_INV:296:ec_GF2m_simple_field_inv EC_F_EC_GF2M_SIMPLE_GROUP_CHECK_DISCRIMINANT:159:\ ec_GF2m_simple_group_check_discriminant EC_F_EC_GF2M_SIMPLE_GROUP_SET_CURVE:195:ec_GF2m_simple_group_set_curve @@ -535,6 +536,7 @@ EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES:164:\ ec_GF2m_simple_set_compressed_coordinates EC_F_EC_GFP_MONT_FIELD_DECODE:133:ec_GFp_mont_field_decode EC_F_EC_GFP_MONT_FIELD_ENCODE:134:ec_GFp_mont_field_encode +EC_F_EC_GFP_MONT_FIELD_INV:297:ec_GFp_mont_field_inv EC_F_EC_GFP_MONT_FIELD_MUL:131:ec_GFp_mont_field_mul EC_F_EC_GFP_MONT_FIELD_SET_TO_ONE:209:ec_GFp_mont_field_set_to_one EC_F_EC_GFP_MONT_FIELD_SQR:132:ec_GFp_mont_field_sqr @@ -555,6 +557,7 @@ EC_F_EC_GFP_NIST_FIELD_MUL:200:ec_GFp_nist_field_mul EC_F_EC_GFP_NIST_FIELD_SQR:201:ec_GFp_nist_field_sqr EC_F_EC_GFP_NIST_GROUP_SET_CURVE:202:ec_GFp_nist_group_set_curve EC_F_EC_GFP_SIMPLE_BLIND_COORDINATES:287:ec_GFp_simple_blind_coordinates +EC_F_EC_GFP_SIMPLE_FIELD_INV:298:ec_GFp_simple_field_inv EC_F_EC_GFP_SIMPLE_GROUP_CHECK_DISCRIMINANT:165:\ ec_GFp_simple_group_check_discriminant EC_F_EC_GFP_SIMPLE_GROUP_SET_CURVE:166:ec_GFp_simple_group_set_curve @@ -737,6 +740,7 @@ EVP_F_EVP_DECRYPTFINAL_EX:101:EVP_DecryptFinal_ex EVP_F_EVP_DECRYPTUPDATE:166:EVP_DecryptUpdate EVP_F_EVP_DIGESTFINALXOF:174:EVP_DigestFinalXOF EVP_F_EVP_DIGESTINIT_EX:128:EVP_DigestInit_ex +EVP_F_EVP_ENCRYPTDECRYPTUPDATE:219:evp_EncryptDecryptUpdate EVP_F_EVP_ENCRYPTFINAL_EX:127:EVP_EncryptFinal_ex EVP_F_EVP_ENCRYPTUPDATE:167:EVP_EncryptUpdate EVP_F_EVP_MD_CTX_COPY_EX:110:EVP_MD_CTX_copy_ex @@ -2115,6 +2119,7 @@ EC_R_ASN1_ERROR:115:asn1 error EC_R_BAD_SIGNATURE:156:bad signature EC_R_BIGNUM_OUT_OF_RANGE:144:bignum out of range EC_R_BUFFER_TOO_SMALL:100:buffer too small +EC_R_CANNOT_INVERT:165:cannot invert EC_R_COORDINATES_OUT_OF_RANGE:146:coordinates out of range EC_R_CURVE_DOES_NOT_SUPPORT_ECDH:160:curve does not support ecdh EC_R_CURVE_DOES_NOT_SUPPORT_SIGNING:159:curve does not support signing @@ -2722,6 +2727,8 @@ SSL_R_MISSING_SRP_PARAM:358:can't find SRP server param SSL_R_MISSING_SUPPORTED_GROUPS_EXTENSION:209:missing supported groups extension SSL_R_MISSING_TMP_DH_KEY:171:missing tmp dh key SSL_R_MISSING_TMP_ECDH_KEY:311:missing tmp ecdh key +SSL_R_MIXED_HANDSHAKE_AND_NON_HANDSHAKE_DATA:293:\ + mixed handshake and non handshake data SSL_R_NOT_ON_RECORD_BOUNDARY:182:not on record boundary SSL_R_NOT_REPLACING_CERTIFICATE:289:not replacing certificate SSL_R_NOT_SERVER:284:not server diff --git a/crypto/evp/evp_enc.c b/crypto/evp/evp_enc.c index 38633410cd1ab..05dd791b6cb07 100644 --- a/crypto/evp/evp_enc.c +++ b/crypto/evp/evp_enc.c @@ -294,8 +294,9 @@ int is_partially_overlapping(const void *ptr1, const void *ptr2, int len) return overlapped; } -int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, - const unsigned char *in, int inl) +static int evp_EncryptDecryptUpdate(EVP_CIPHER_CTX *ctx, + unsigned char *out, int *outl, + const unsigned char *in, int inl) { int i, j, bl, cmpl = inl; @@ -307,7 +308,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) { /* If block size > 1 then the cipher will have to do this check */ if (bl == 1 && is_partially_overlapping(out, in, cmpl)) { - EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING); + EVPerr(EVP_F_EVP_ENCRYPTDECRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING); return 0; } @@ -324,7 +325,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, return inl == 0; } if (is_partially_overlapping(out + ctx->buf_len, in, cmpl)) { - EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING); + EVPerr(EVP_F_EVP_ENCRYPTDECRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING); return 0; } @@ -371,6 +372,19 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, return 1; } + +int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, + const unsigned char *in, int inl) +{ + /* Prevent accidental use of decryption context when encrypting */ + if (!ctx->encrypt) { + EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_INVALID_OPERATION); + return 0; + } + + return evp_EncryptDecryptUpdate(ctx, out, outl, in, inl); +} + int EVP_EncryptFinal(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) { int ret; @@ -383,6 +397,12 @@ int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int n, ret; unsigned int i, b, bl; + /* Prevent accidental use of decryption context when encrypting */ + if (!ctx->encrypt) { + EVPerr(EVP_F_EVP_ENCRYPTFINAL_EX, EVP_R_INVALID_OPERATION); + return 0; + } + if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) { ret = ctx->cipher->do_cipher(ctx, out, NULL, 0); if (ret < 0) @@ -426,6 +446,12 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, int fix_len, cmpl = inl; unsigned int b; + /* Prevent accidental use of encryption context when decrypting */ + if (ctx->encrypt) { + EVPerr(EVP_F_EVP_DECRYPTUPDATE, EVP_R_INVALID_OPERATION); + return 0; + } + b = ctx->cipher->block_size; if (EVP_CIPHER_CTX_test_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS)) @@ -452,7 +478,7 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, } if (ctx->flags & EVP_CIPH_NO_PADDING) - return EVP_EncryptUpdate(ctx, out, outl, in, inl); + return evp_EncryptDecryptUpdate(ctx, out, outl, in, inl); OPENSSL_assert(b <= sizeof(ctx->final)); @@ -469,7 +495,7 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, } else fix_len = 0; - if (!EVP_EncryptUpdate(ctx, out, outl, in, inl)) + if (!evp_EncryptDecryptUpdate(ctx, out, outl, in, inl)) return 0; /* @@ -500,6 +526,13 @@ int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) { int i, n; unsigned int b; + + /* Prevent accidental use of encryption context when decrypting */ + if (ctx->encrypt) { + EVPerr(EVP_F_EVP_DECRYPTFINAL_EX, EVP_R_INVALID_OPERATION); + return 0; + } + *outl = 0; if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) { diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c index 3e14a7b509496..60df27cbc20ac 100644 --- a/crypto/evp/evp_err.c +++ b/crypto/evp/evp_err.c @@ -50,6 +50,8 @@ static const ERR_STRING_DATA EVP_str_functs[] = { {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DECRYPTUPDATE, 0), "EVP_DecryptUpdate"}, {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DIGESTFINALXOF, 0), "EVP_DigestFinalXOF"}, {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DIGESTINIT_EX, 0), "EVP_DigestInit_ex"}, + {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTDECRYPTUPDATE, 0), + "evp_EncryptDecryptUpdate"}, {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTFINAL_EX, 0), "EVP_EncryptFinal_ex"}, {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTUPDATE, 0), "EVP_EncryptUpdate"}, diff --git a/crypto/evp/p_lib.c b/crypto/evp/p_lib.c index 9429be97e3f93..148df90f84b13 100644 --- a/crypto/evp/p_lib.c +++ b/crypto/evp/p_lib.c @@ -42,7 +42,7 @@ int EVP_PKEY_security_bits(const EVP_PKEY *pkey) return pkey->ameth->pkey_security_bits(pkey); } -int EVP_PKEY_size(EVP_PKEY *pkey) +int EVP_PKEY_size(const EVP_PKEY *pkey) { if (pkey && pkey->ameth && pkey->ameth->pkey_size) return pkey->ameth->pkey_size(pkey); diff --git a/crypto/include/internal/bn_int.h b/crypto/include/internal/bn_int.h index cffe5cfc16507..30be7efe14d8d 100644 --- a/crypto/include/internal/bn_int.h +++ b/crypto/include/internal/bn_int.h @@ -65,7 +65,10 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words); * is customarily arranged by bn_correct_top. Output from below functions * is not processed with bn_correct_top, and for this reason it may not be * returned out of public API. It may only be passed internally into other - * functions known to support non-minimal or zero-padded BIGNUMs. + * functions known to support non-minimal or zero-padded BIGNUMs. Even + * though the goal is to facilitate constant-time-ness, not each subroutine + * is constant-time by itself. They all have pre-conditions, consult source + * code... */ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx); @@ -79,5 +82,9 @@ int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m); int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx); +int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); +int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); +int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, + const BIGNUM *d, BN_CTX *ctx); #endif diff --git a/crypto/init.c b/crypto/init.c index 209d1a483daeb..b9a7334a7ed70 100644 --- a/crypto/init.c +++ b/crypto/init.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -100,10 +100,6 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_base) return 0; if ((init_lock = CRYPTO_THREAD_lock_new()) == NULL) goto err; -#ifndef OPENSSL_SYS_UEFI - if (atexit(OPENSSL_cleanup) != 0) - goto err; -#endif OPENSSL_cpuid_setup(); destructor_key.value = key; @@ -121,13 +117,53 @@ err: return 0; } +static CRYPTO_ONCE register_atexit = CRYPTO_ONCE_STATIC_INIT; +#if !defined(OPENSSL_SYS_UEFI) && defined(_WIN32) +static int win32atexit(void) +{ + OPENSSL_cleanup(); + return 0; +} +#endif + +DEFINE_RUN_ONCE_STATIC(ossl_init_register_atexit) +{ +#ifdef OPENSSL_INIT_DEBUG + fprintf(stderr, "OPENSSL_INIT: ossl_init_register_atexit()\n"); +#endif +#ifndef OPENSSL_SYS_UEFI +# ifdef _WIN32 + /* We use _onexit() in preference because it gets called on DLL unload */ + if (_onexit(win32atexit) == NULL) + return 0; +# else + if (atexit(OPENSSL_cleanup) != 0) + return 0; +# endif +#endif + + return 1; +} + +DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_register_atexit, + ossl_init_register_atexit) +{ +#ifdef OPENSSL_INIT_DEBUG + fprintf(stderr, "OPENSSL_INIT: ossl_init_no_register_atexit ok!\n"); +#endif + /* Do nothing in this case */ + return 1; +} + static CRYPTO_ONCE load_crypto_nodelete = CRYPTO_ONCE_STATIC_INIT; DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_nodelete) { #ifdef OPENSSL_INIT_DEBUG fprintf(stderr, "OPENSSL_INIT: ossl_init_load_crypto_nodelete()\n"); #endif -#if !defined(OPENSSL_NO_DSO) && !defined(OPENSSL_USE_NODELETE) +#if !defined(OPENSSL_NO_DSO) \ + && !defined(OPENSSL_USE_NODELETE) \ + && !defined(OPENSSL_NO_PINSHARED) # ifdef DSO_WIN32 { HMODULE handle = NULL; @@ -177,12 +213,6 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_nodelete) static CRYPTO_ONCE load_crypto_strings = CRYPTO_ONCE_STATIC_INIT; static int load_crypto_strings_inited = 0; -DEFINE_RUN_ONCE_STATIC(ossl_init_no_load_crypto_strings) -{ - /* Do nothing in this case */ - return 1; -} - DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_strings) { int ret = 1; @@ -201,6 +231,13 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_strings) return ret; } +DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_load_crypto_strings, + ossl_init_load_crypto_strings) +{ + /* Do nothing in this case */ + return 1; +} + static CRYPTO_ONCE add_all_ciphers = CRYPTO_ONCE_STATIC_INIT; DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_ciphers) { @@ -218,6 +255,13 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_ciphers) return 1; } +DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_add_all_ciphers, + ossl_init_add_all_ciphers) +{ + /* Do nothing */ + return 1; +} + static CRYPTO_ONCE add_all_digests = CRYPTO_ONCE_STATIC_INIT; DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_digests) { @@ -235,7 +279,8 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_digests) return 1; } -DEFINE_RUN_ONCE_STATIC(ossl_init_no_add_algs) +DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_add_all_digests, + ossl_init_add_all_digests) { /* Do nothing */ return 1; @@ -243,19 +288,14 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_no_add_algs) static CRYPTO_ONCE config = CRYPTO_ONCE_STATIC_INIT; static int config_inited = 0; -static const char *appname; +static const OPENSSL_INIT_SETTINGS *conf_settings = NULL; DEFINE_RUN_ONCE_STATIC(ossl_init_config) { -#ifdef OPENSSL_INIT_DEBUG - fprintf(stderr, - "OPENSSL_INIT: ossl_init_config: openssl_config(%s)\n", - appname == NULL ? "NULL" : appname); -#endif - openssl_config_int(appname); + int ret = openssl_config_int(conf_settings); config_inited = 1; - return 1; + return ret; } -DEFINE_RUN_ONCE_STATIC(ossl_init_no_config) +DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_config, ossl_init_config) { #ifdef OPENSSL_INIT_DEBUG fprintf(stderr, @@ -586,17 +626,43 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) return 0; } + /* + * When the caller specifies OPENSSL_INIT_BASE_ONLY, that should be the + * *only* option specified. With that option we return immediately after + * doing the requested limited initialization. Note that + * err_shelve_state() called by us via ossl_init_load_crypto_nodelete() + * re-enters OPENSSL_init_crypto() with OPENSSL_INIT_BASE_ONLY, but with + * base already initialized this is a harmless NOOP. + * + * If we remain the only caller of err_shelve_state() the recursion should + * perhaps be removed, but if in doubt, it can be left in place. + */ if (!RUN_ONCE(&base, ossl_init_base)) return 0; + if (opts & OPENSSL_INIT_BASE_ONLY) + return 1; + + /* + * Now we don't always set up exit handlers, the INIT_BASE_ONLY calls + * should not have the side-effect of setting up exit handlers, and + * therefore, this code block is below the INIT_BASE_ONLY-conditioned early + * return above. + */ + if ((opts & OPENSSL_INIT_NO_ATEXIT) != 0) { + if (!RUN_ONCE_ALT(®ister_atexit, ossl_init_no_register_atexit, + ossl_init_register_atexit)) + return 0; + } else if (!RUN_ONCE(®ister_atexit, ossl_init_register_atexit)) { + return 0; + } - if (!(opts & OPENSSL_INIT_BASE_ONLY) - && !RUN_ONCE(&load_crypto_nodelete, - ossl_init_load_crypto_nodelete)) + if (!RUN_ONCE(&load_crypto_nodelete, ossl_init_load_crypto_nodelete)) return 0; if ((opts & OPENSSL_INIT_NO_LOAD_CRYPTO_STRINGS) - && !RUN_ONCE(&load_crypto_strings, - ossl_init_no_load_crypto_strings)) + && !RUN_ONCE_ALT(&load_crypto_strings, + ossl_init_no_load_crypto_strings, + ossl_init_load_crypto_strings)) return 0; if ((opts & OPENSSL_INIT_LOAD_CRYPTO_STRINGS) @@ -604,7 +670,8 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) return 0; if ((opts & OPENSSL_INIT_NO_ADD_ALL_CIPHERS) - && !RUN_ONCE(&add_all_ciphers, ossl_init_no_add_algs)) + && !RUN_ONCE_ALT(&add_all_ciphers, ossl_init_no_add_all_ciphers, + ossl_init_add_all_ciphers)) return 0; if ((opts & OPENSSL_INIT_ADD_ALL_CIPHERS) @@ -612,7 +679,8 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) return 0; if ((opts & OPENSSL_INIT_NO_ADD_ALL_DIGESTS) - && !RUN_ONCE(&add_all_digests, ossl_init_no_add_algs)) + && !RUN_ONCE_ALT(&add_all_digests, ossl_init_no_add_all_digests, + ossl_init_add_all_digests)) return 0; if ((opts & OPENSSL_INIT_ADD_ALL_DIGESTS) @@ -624,14 +692,15 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) return 0; if ((opts & OPENSSL_INIT_NO_LOAD_CONFIG) - && !RUN_ONCE(&config, ossl_init_no_config)) + && !RUN_ONCE_ALT(&config, ossl_init_no_config, ossl_init_config)) return 0; if (opts & OPENSSL_INIT_LOAD_CONFIG) { int ret; CRYPTO_THREAD_write_lock(init_lock); - appname = (settings == NULL) ? NULL : settings->appname; + conf_settings = settings; ret = RUN_ONCE(&config, ossl_init_config); + conf_settings = NULL; CRYPTO_THREAD_unlock(init_lock); if (!ret) return 0; @@ -695,7 +764,9 @@ int OPENSSL_atexit(void (*handler)(void)) { OPENSSL_INIT_STOP *newhand; -#if !defined(OPENSSL_NO_DSO) && !defined(OPENSSL_USE_NODELETE) +#if !defined(OPENSSL_NO_DSO) \ + && !defined(OPENSSL_USE_NODELETE)\ + && !defined(OPENSSL_NO_PINSHARED) { union { void *sym; diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index afc30c3e72a43..30158aa076da9 100755 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -529,6 +529,7 @@ $code.=<<___; .type gcm_init_clmul,\@abi-omnipotent .align 16 gcm_init_clmul: +.cfi_startproc .L_init_clmul: ___ $code.=<<___ if ($win64); @@ -598,6 +599,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_clmul,.-gcm_init_clmul ___ } @@ -609,6 +611,7 @@ $code.=<<___; .type gcm_gmult_clmul,\@abi-omnipotent .align 16 gcm_gmult_clmul: +.cfi_startproc .L_gmult_clmul: movdqu ($Xip),$Xi movdqa .Lbswap_mask(%rip),$T3 @@ -645,6 +648,7 @@ $code.=<<___; pshufb $T3,$Xi movdqu $Xi,($Xip) ret +.cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul ___ } @@ -658,6 +662,7 @@ $code.=<<___; .type gcm_ghash_clmul,\@abi-omnipotent .align 32 gcm_ghash_clmul: +.cfi_startproc .L_ghash_clmul: ___ $code.=<<___ if ($win64); @@ -1005,6 +1010,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_clmul,.-gcm_ghash_clmul ___ } @@ -1014,6 +1020,7 @@ $code.=<<___; .type gcm_init_avx,\@abi-omnipotent .align 32 gcm_init_avx: +.cfi_startproc ___ if ($avx) { my ($Htbl,$Xip)=@_4args; @@ -1142,6 +1149,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_avx,.-gcm_init_avx ___ } else { @@ -1156,7 +1164,9 @@ $code.=<<___; .type gcm_gmult_avx,\@abi-omnipotent .align 32 gcm_gmult_avx: +.cfi_startproc jmp .L_gmult_clmul +.cfi_endproc .size gcm_gmult_avx,.-gcm_gmult_avx ___ @@ -1165,6 +1175,7 @@ $code.=<<___; .type gcm_ghash_avx,\@abi-omnipotent .align 32 gcm_ghash_avx: +.cfi_startproc ___ if ($avx) { my ($Xip,$Htbl,$inp,$len)=@_4args; @@ -1577,6 +1588,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx ___ } else { diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h index e931f7f516ca8..9ab1a14b9e327 100644 --- a/crypto/objects/obj_dat.h +++ b/crypto/objects/obj_dat.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by crypto/objects/obj_dat.pl * - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at diff --git a/crypto/objects/obj_dat.pl b/crypto/objects/obj_dat.pl index e80900d09d268..e5d38147eccf0 100644 --- a/crypto/objects/obj_dat.pl +++ b/crypto/objects/obj_dat.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/objects/obj_xref.h b/crypto/objects/obj_xref.h index 9606e57d6191e..9144d569dcd04 100644 --- a/crypto/objects/obj_xref.h +++ b/crypto/objects/obj_xref.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by objxref.pl * - * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy diff --git a/crypto/objects/objects.pl b/crypto/objects/objects.pl index 8f9b67f95991a..d7d1962c9999f 100644 --- a/crypto/objects/objects.pl +++ b/crypto/objects/objects.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/objects/objxref.pl b/crypto/objects/objxref.pl index 0ec63f067e3cf..ce76cadae31c8 100755 --- a/crypto/objects/objxref.pl +++ b/crypto/objects/objxref.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy diff --git a/crypto/pem/pem_info.c b/crypto/pem/pem_info.c index a45fe83001b35..f90cb4465096c 100644 --- a/crypto/pem/pem_info.c +++ b/crypto/pem/pem_info.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -297,7 +297,7 @@ int PEM_X509_INFO_write_bio(BIO *bp, X509_INFO *xi, EVP_CIPHER *enc, goto err; } - /* Create the right magic header stuff */ + /* Create the right magic header stuff */ buf[0] = '\0'; PEM_proc_type(buf, PEM_TYPE_ENCRYPTED); PEM_dek_info(buf, objstr, EVP_CIPHER_iv_length(enc), diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index f8380f2e9cfa9..29a0eacfd532a 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -541,6 +541,7 @@ my %globals; ); my ($cfa_reg, $cfa_rsp); + my @cfa_stack; # [us]leb128 format is variable-length integer representation base # 2^128, with most significant bit of each byte being 0 denoting @@ -648,7 +649,13 @@ my %globals; # why it starts with -8. Recall that CFA is top of caller's # stack... /startproc/ && do { ($cfa_reg, $cfa_rsp) = ("%rsp", -8); last; }; - /endproc/ && do { ($cfa_reg, $cfa_rsp) = ("%rsp", 0); last; }; + /endproc/ && do { ($cfa_reg, $cfa_rsp) = ("%rsp", 0); + # .cfi_remember_state directives that are not + # matched with .cfi_restore_state are + # unnecessary. + die "unpaired .cfi_remember_state" if (@cfa_stack); + last; + }; /def_cfa_register/ && do { $cfa_reg = $$line; last; }; /def_cfa_offset/ @@ -688,6 +695,14 @@ my %globals; cfa_expression($$line))); last; }; + /remember_state/ + && do { push @cfa_stack, [$cfa_reg, $cfa_rsp]; + last; + }; + /restore_state/ + && do { ($cfa_reg, $cfa_rsp) = @{pop @cfa_stack}; + last; + }; } $self->{value} = ".cfi_$dir\t$$line" if ($dir); diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl index ac06457b65301..6c6c9bb05be07 100755 --- a/crypto/poly1305/asm/poly1305-armv8.pl +++ b/crypto/poly1305/asm/poly1305-armv8.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -291,6 +291,7 @@ poly1305_blocks_neon: cbz $is_base2_26,poly1305_blocks .Lblocks_neon: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 @@ -859,6 +860,7 @@ poly1305_blocks_neon: st1 {$ACC4}[0],[$ctx] .Lno_data_neon: + .inst 0xd50323bf // autiasp ldr x29,[sp],#80 ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h index 65cf96fc1fe8b..72bd7468745c7 100644 --- a/crypto/ppc_arch.h +++ b/crypto/ppc_arch.h @@ -1,5 +1,5 @@ /* - * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -22,5 +22,7 @@ extern unsigned int OPENSSL_ppccap_P; # define PPC_CRYPTO207 (1<<2) # define PPC_FPU (1<<3) # define PPC_MADD300 (1<<4) +# define PPC_MFTB (1<<5) +# define PPC_MFSPR268 (1<<6) #endif diff --git a/crypto/ppccap.c b/crypto/ppccap.c index 8b7d765c3aa23..afb9e31b00288 100644 --- a/crypto/ppccap.c +++ b/crypto/ppccap.c @@ -1,5 +1,5 @@ /* - * Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2009-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -168,16 +168,50 @@ void OPENSSL_altivec_probe(void); void OPENSSL_crypto207_probe(void); void OPENSSL_madd300_probe(void); -/* - * Use a weak reference to getauxval() so we can use it if it is available - * but don't break the build if it is not. Note that this is *link-time* - * feature detection, not *run-time*. In other words if we link with - * symbol present, it's expected to be present even at run-time. - */ -#if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__) -extern unsigned long getauxval(unsigned long type) __attribute__ ((weak)); -#else -static unsigned long (*getauxval) (unsigned long) = NULL; +long OPENSSL_rdtsc_mftb(void); +long OPENSSL_rdtsc_mfspr268(void); + +uint32_t OPENSSL_rdtsc(void) +{ + if (OPENSSL_ppccap_P & PPC_MFTB) + return OPENSSL_rdtsc_mftb(); + else if (OPENSSL_ppccap_P & PPC_MFSPR268) + return OPENSSL_rdtsc_mfspr268(); + else + return 0; +} + +size_t OPENSSL_instrument_bus_mftb(unsigned int *, size_t); +size_t OPENSSL_instrument_bus_mfspr268(unsigned int *, size_t); + +size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) +{ + if (OPENSSL_ppccap_P & PPC_MFTB) + return OPENSSL_instrument_bus_mftb(out, cnt); + else if (OPENSSL_ppccap_P & PPC_MFSPR268) + return OPENSSL_instrument_bus_mfspr268(out, cnt); + else + return 0; +} + +size_t OPENSSL_instrument_bus2_mftb(unsigned int *, size_t, size_t); +size_t OPENSSL_instrument_bus2_mfspr268(unsigned int *, size_t, size_t); + +size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) +{ + if (OPENSSL_ppccap_P & PPC_MFTB) + return OPENSSL_instrument_bus2_mftb(out, cnt, max); + else if (OPENSSL_ppccap_P & PPC_MFSPR268) + return OPENSSL_instrument_bus2_mfspr268(out, cnt, max); + else + return 0; +} + +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +# if __GLIBC_PREREQ(2, 16) +# include <sys/auxv.h> +# define OSSL_IMPLEMENT_GETAUXVAL +# endif #endif /* I wish <sys/auxv.h> was universally available */ @@ -277,7 +311,8 @@ void OPENSSL_cpuid_setup(void) } #endif - if (getauxval != NULL) { +#ifdef OSSL_IMPLEMENT_GETAUXVAL + { unsigned long hwcap = getauxval(HWCAP); if (hwcap & HWCAP_FPU) { @@ -304,9 +339,8 @@ void OPENSSL_cpuid_setup(void) if (hwcap & HWCAP_ARCH_3_00) { OPENSSL_ppccap_P |= PPC_MADD300; } - - return; } +#endif sigfillset(&all_masked); sigdelset(&all_masked, SIGILL); @@ -325,15 +359,16 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset); sigaction(SIGILL, &ill_act, &ill_oact); +#ifndef OSSL_IMPLEMENT_GETAUXVAL if (sigsetjmp(ill_jmp,1) == 0) { OPENSSL_fpu_probe(); OPENSSL_ppccap_P |= PPC_FPU; if (sizeof(size_t) == 4) { -#ifdef __linux +# ifdef __linux struct utsname uts; if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0) -#endif +# endif if (sigsetjmp(ill_jmp, 1) == 0) { OPENSSL_ppc64_probe(); OPENSSL_ppccap_P |= PPC_FPU64; @@ -358,6 +393,15 @@ void OPENSSL_cpuid_setup(void) OPENSSL_madd300_probe(); OPENSSL_ppccap_P |= PPC_MADD300; } +#endif + + if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_rdtsc_mftb(); + OPENSSL_ppccap_P |= PPC_MFTB; + } else if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_rdtsc_mfspr268(); + OPENSSL_ppccap_P |= PPC_MFSPR268; + } sigaction(SIGILL, &ill_oact, NULL); sigprocmask(SIG_SETMASK, &oset, NULL); diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl index 9d1cada4dc4c9..a38445fd3c529 100755 --- a/crypto/ppccpuid.pl +++ b/crypto/ppccpuid.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -124,26 +124,23 @@ Ladd: lwarx r5,0,r3 .long 0 .size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add -.globl .OPENSSL_rdtsc +.globl .OPENSSL_rdtsc_mftb .align 4 -.OPENSSL_rdtsc: -___ -$code.=<<___ if ($flavour =~ /64/); - mftb r3 -___ -$code.=<<___ if ($flavour !~ /64/); -Loop_rdtsc: - mftbu r5 +.OPENSSL_rdtsc_mftb: mftb r3 - mftbu r4 - cmplw r4,r5 - bne Loop_rdtsc -___ -$code.=<<___; blr .long 0 .byte 0,12,0x14,0,0,0,0,0 -.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc +.size .OPENSSL_rdtsc_mftb,.-.OPENSSL_rdtsc_mftb + +.globl .OPENSSL_rdtsc_mfspr268 +.align 4 +.OPENSSL_rdtsc_mfspr268: + mfspr r3,268 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_rdtsc_mfspr268,.-.OPENSSL_rdtsc_mfspr268 .globl .OPENSSL_cleanse .align 4 @@ -210,9 +207,9 @@ my ($tick,$lasttick)=("r6","r7"); my ($diff,$lastdiff)=("r8","r9"); $code.=<<___; -.globl .OPENSSL_instrument_bus +.globl .OPENSSL_instrument_bus_mftb .align 4 -.OPENSSL_instrument_bus: +.OPENSSL_instrument_bus_mftb: mtctr $cnt mftb $lasttick # collect 1st tick @@ -240,11 +237,11 @@ Loop: mftb $tick .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 -.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus +.size .OPENSSL_instrument_bus_mftb,.-.OPENSSL_instrument_bus_mftb -.globl .OPENSSL_instrument_bus2 +.globl .OPENSSL_instrument_bus2_mftb .align 4 -.OPENSSL_instrument_bus2: +.OPENSSL_instrument_bus2_mftb: mr r0,$cnt slwi $cnt,$cnt,2 @@ -292,7 +289,91 @@ Ldone2: .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 -.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2 +.size .OPENSSL_instrument_bus2_mftb,.-.OPENSSL_instrument_bus2_mftb + +.globl .OPENSSL_instrument_bus_mfspr268 +.align 4 +.OPENSSL_instrument_bus_mfspr268: + mtctr $cnt + + mfspr $lasttick,268 # collect 1st tick + li $diff,0 + + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + +Loop3: mfspr $tick,268 + sub $diff,$tick,$lasttick + mr $lasttick,$tick + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + addi $out,$out,4 # ++$out + bdnz Loop3 + + mr r3,$cnt + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .OPENSSL_instrument_bus_mfspr268,.-.OPENSSL_instrument_bus_mfspr268 + +.globl .OPENSSL_instrument_bus2_mfspr268 +.align 4 +.OPENSSL_instrument_bus2_mfspr268: + mr r0,$cnt + slwi $cnt,$cnt,2 + + mfspr $lasttick,268 # collect 1st tick + li $diff,0 + + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + + mfspr $tick,268 # collect 1st diff + sub $diff,$tick,$lasttick + mr $lasttick,$tick + mr $lastdiff,$diff +Loop4: + dcbf 0,$out # flush cache line + lwarx $tick,0,$out # load and lock + add $tick,$tick,$diff + stwcx. $tick,0,$out + stwx $tick,0,$out + + addic. $max,$max,-1 + beq Ldone4 + + mfspr $tick,268 + sub $diff,$tick,$lasttick + mr $lasttick,$tick + cmplw 7,$diff,$lastdiff + mr $lastdiff,$diff + + mfcr $tick # pull cr + not $tick,$tick # flip bits + rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale + + sub. $cnt,$cnt,$tick # conditional --$cnt + add $out,$out,$tick # conditional ++$out + bne Loop4 + +Ldone4: + srwi $cnt,$cnt,2 + sub r3,r0,$cnt + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .OPENSSL_instrument_bus2_mfspr268,.-.OPENSSL_instrument_bus2_mfspr268 ___ } diff --git a/crypto/rand/rand_unix.c b/crypto/rand/rand_unix.c index 9d8ffdd537965..9cbc9ade77fa2 100644 --- a/crypto/rand/rand_unix.c +++ b/crypto/rand/rand_unix.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -93,6 +93,27 @@ static uint64_t get_timer_bits(void); # error "UEFI and VXWorks only support seeding NONE" #endif +#if defined(OPENSSL_SYS_VXWORKS) +/* empty implementation */ +int rand_pool_init(void) +{ + return 1; +} + +void rand_pool_cleanup(void) +{ +} + +void rand_pool_keep_random_devices_open(int keep) +{ +} + +size_t rand_pool_acquire_entropy(RAND_POOL *pool) +{ + return rand_pool_entropy_available(pool); +} +#endif + #if !(defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_WIN32) \ || defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_VXWORKS) \ || defined(OPENSSL_SYS_UEFI)) diff --git a/crypto/rsa/rsa_ameth.c b/crypto/rsa/rsa_ameth.c index a6595aec05420..75debb3e0a9d2 100644 --- a/crypto/rsa/rsa_ameth.c +++ b/crypto/rsa/rsa_ameth.c @@ -34,7 +34,7 @@ static int rsa_param_encode(const EVP_PKEY *pkey, *pstr = NULL; /* If RSA it's just NULL type */ - if (pkey->ameth->pkey_id == EVP_PKEY_RSA) { + if (pkey->ameth->pkey_id != EVP_PKEY_RSA_PSS) { *pstrtype = V_ASN1_NULL; return 1; } @@ -58,7 +58,7 @@ static int rsa_param_decode(RSA *rsa, const X509_ALGOR *alg) int algptype; X509_ALGOR_get0(&algoid, &algptype, &algp, alg); - if (OBJ_obj2nid(algoid) == EVP_PKEY_RSA) + if (OBJ_obj2nid(algoid) != EVP_PKEY_RSA_PSS) return 1; if (algptype == V_ASN1_UNDEF) return 1; @@ -109,7 +109,10 @@ static int rsa_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) RSA_free(rsa); return 0; } - EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, rsa); + if (!EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, rsa)) { + RSA_free(rsa); + return 0; + } return 1; } diff --git a/crypto/rsa/rsa_oaep.c b/crypto/rsa/rsa_oaep.c index f13c6fc9e5063..689e6dc222922 100644 --- a/crypto/rsa/rsa_oaep.c +++ b/crypto/rsa/rsa_oaep.c @@ -120,7 +120,7 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, const EVP_MD *mgf1md) { int i, dblen = 0, mlen = -1, one_index = 0, msg_index; - unsigned int good, found_one_byte; + unsigned int good = 0, found_one_byte, mask; const unsigned char *maskedseed, *maskeddb; /* * |em| is the encoded message, zero-padded to exactly |num| bytes: em = @@ -147,8 +147,11 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, * the ciphertext, see PKCS #1 v2.2, section 7.1.2. * This does not leak any side-channel information. */ - if (num < flen || num < 2 * mdlen + 2) - goto decoding_err; + if (num < flen || num < 2 * mdlen + 2) { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, + RSA_R_OAEP_DECODING_ERROR); + return -1; + } dblen = num - mdlen - 1; db = OPENSSL_malloc(dblen); @@ -157,25 +160,26 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, goto cleanup; } - if (flen != num) { - em = OPENSSL_zalloc(num); - if (em == NULL) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, - ERR_R_MALLOC_FAILURE); - goto cleanup; - } + em = OPENSSL_malloc(num); + if (em == NULL) { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, + ERR_R_MALLOC_FAILURE); + goto cleanup; + } - /* - * Caller is encouraged to pass zero-padded message created with - * BN_bn2binpad, but if it doesn't, we do this zero-padding copy - * to avoid leaking that information. The copy still leaks some - * side-channel information, but it's impossible to have a fixed - * memory access pattern since we can't read out of the bounds of - * |from|. - */ - memcpy(em + num - flen, from, flen); - from = em; + /* + * Caller is encouraged to pass zero-padded message created with + * BN_bn2binpad. Trouble is that since we can't read out of |from|'s + * bounds, it's impossible to have an invariant memory access pattern + * in case |from| was not zero-padded in advance. + */ + for (from += flen, em += num, i = 0; i < num; i++) { + mask = ~constant_time_is_zero(flen); + flen -= 1 & mask; + from -= 1 & mask; + *--em = *from & mask; } + from = em; /* * The first byte must be zero, however we must not leak if this is @@ -222,32 +226,48 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, * so plaintext-awareness ensures timing side-channels are no longer a * concern. */ - if (!good) - goto decoding_err; - msg_index = one_index + 1; mlen = dblen - msg_index; - if (tlen < mlen) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE); - mlen = -1; - } else { - memcpy(to, db + msg_index, mlen); - goto cleanup; + /* + * For good measure, do this check in constant tine as well. + */ + good &= constant_time_ge(tlen, mlen); + + /* + * Even though we can't fake result's length, we can pretend copying + * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |dblen| + * bytes are viewed as circular buffer with start at |tlen|-|mlen'|, + * where |mlen'| is "saturated" |mlen| value. Deducing information + * about failure or |mlen| would take attacker's ability to observe + * memory access pattern with byte granularity *as it occurs*. It + * should be noted that failure is indistinguishable from normal + * operation if |tlen| is fixed by protocol. + */ + tlen = constant_time_select_int(constant_time_lt(dblen, tlen), dblen, tlen); + msg_index = constant_time_select_int(good, msg_index, dblen - tlen); + mlen = dblen - msg_index; + for (from = db + msg_index, mask = good, i = 0; i < tlen; i++) { + unsigned int equals = constant_time_eq(i, mlen); + + from -= dblen & equals; /* if (i == dblen) rewind */ + mask &= mask ^ equals; /* if (i == dblen) mask = 0 */ + to[i] = constant_time_select_8(mask, from[i], to[i]); } - decoding_err: /* * To avoid chosen ciphertext attacks, the error message should not * reveal which kind of decoding error happened. */ RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_OAEP_DECODING_ERROR); + err_clear_last_constant_time(1 & good); cleanup: OPENSSL_cleanse(seed, sizeof(seed)); OPENSSL_clear_free(db, dblen); OPENSSL_clear_free(em, num); - return mlen; + + return constant_time_select_int(good, mlen, -1); } int PKCS1_MGF1(unsigned char *mask, long len, diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c index 2b1b006c2801d..465134257fcc7 100644 --- a/crypto/rsa/rsa_ossl.c +++ b/crypto/rsa/rsa_ossl.c @@ -10,6 +10,7 @@ #include "internal/cryptlib.h" #include "internal/bn_int.h" #include "rsa_locl.h" +#include "internal/constant_time_locl.h" static int rsa_ossl_public_encrypt(int flen, const unsigned char *from, unsigned char *to, RSA *rsa, int padding); @@ -286,6 +287,11 @@ static int rsa_ossl_private_encrypt(int flen, const unsigned char *from, goto err; } + if (rsa->flags & RSA_FLAG_CACHE_PUBLIC) + if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock, + rsa->n, ctx)) + goto err; + if (!(rsa->flags & RSA_FLAG_NO_BLINDING)) { blinding = rsa_get_blinding(rsa, &local_blinding, ctx); if (blinding == NULL) { @@ -318,13 +324,6 @@ static int rsa_ossl_private_encrypt(int flen, const unsigned char *from, } BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME); - if (rsa->flags & RSA_FLAG_CACHE_PUBLIC) - if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock, - rsa->n, ctx)) { - BN_free(d); - goto err; - } - if (!rsa->meth->bn_mod_exp(ret, f, d, rsa->n, ctx, rsa->_method_mod_n)) { BN_free(d); @@ -481,8 +480,8 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from, RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_UNKNOWN_PADDING_TYPE); goto err; } - if (r < 0) - RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED); + RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED); + err_clear_last_constant_time(r >= 0); err: if (ctx != NULL) diff --git a/crypto/rsa/rsa_pk1.c b/crypto/rsa/rsa_pk1.c index d07c0d6f852b9..0626907418095 100644 --- a/crypto/rsa/rsa_pk1.c +++ b/crypto/rsa/rsa_pk1.c @@ -158,10 +158,10 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen, int i; /* |em| is the encoded message, zero-padded to exactly |num| bytes */ unsigned char *em = NULL; - unsigned int good, found_zero_byte; + unsigned int good, found_zero_byte, mask; int zero_index = 0, msg_index, mlen = -1; - if (tlen < 0 || flen < 0) + if (tlen <= 0 || flen <= 0) return -1; /* @@ -169,39 +169,41 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen, * section 7.2.2. */ - if (flen > num) - goto err; - - if (num < 11) - goto err; + if (flen > num || num < 11) { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, + RSA_R_PKCS_DECODING_ERROR); + return -1; + } - if (flen != num) { - em = OPENSSL_zalloc(num); - if (em == NULL) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE); - return -1; - } - /* - * Caller is encouraged to pass zero-padded message created with - * BN_bn2binpad, but if it doesn't, we do this zero-padding copy - * to avoid leaking that information. The copy still leaks some - * side-channel information, but it's impossible to have a fixed - * memory access pattern since we can't read out of the bounds of - * |from|. - */ - memcpy(em + num - flen, from, flen); - from = em; + em = OPENSSL_malloc(num); + if (em == NULL) { + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE); + return -1; + } + /* + * Caller is encouraged to pass zero-padded message created with + * BN_bn2binpad. Trouble is that since we can't read out of |from|'s + * bounds, it's impossible to have an invariant memory access pattern + * in case |from| was not zero-padded in advance. + */ + for (from += flen, em += num, i = 0; i < num; i++) { + mask = ~constant_time_is_zero(flen); + flen -= 1 & mask; + from -= 1 & mask; + *--em = *from & mask; } + from = em; good = constant_time_is_zero(from[0]); good &= constant_time_eq(from[1], 2); + /* scan over padding data */ found_zero_byte = 0; for (i = 2; i < num; i++) { unsigned int equals0 = constant_time_is_zero(from[i]); - zero_index = - constant_time_select_int(~found_zero_byte & equals0, i, - zero_index); + + zero_index = constant_time_select_int(~found_zero_byte & equals0, + i, zero_index); found_zero_byte |= equals0; } @@ -210,7 +212,7 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen, * If we never found a 0-byte, then |zero_index| is 0 and the check * also fails. */ - good &= constant_time_ge((unsigned int)(zero_index), 2 + 8); + good &= constant_time_ge(zero_index, 2 + 8); /* * Skip the zero byte. This is incorrect if we never found a zero-byte @@ -220,27 +222,34 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen, mlen = num - msg_index; /* - * For good measure, do this check in constant time as well; it could - * leak something if |tlen| was assuming valid padding. + * For good measure, do this check in constant time as well. */ - good &= constant_time_ge((unsigned int)(tlen), (unsigned int)(mlen)); + good &= constant_time_ge(tlen, mlen); /* - * We can't continue in constant-time because we need to copy the result - * and we cannot fake its length. This unavoidably leaks timing - * information at the API boundary. + * Even though we can't fake result's length, we can pretend copying + * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |num| + * bytes are viewed as circular buffer with start at |tlen|-|mlen'|, + * where |mlen'| is "saturated" |mlen| value. Deducing information + * about failure or |mlen| would take attacker's ability to observe + * memory access pattern with byte granularity *as it occurs*. It + * should be noted that failure is indistinguishable from normal + * operation if |tlen| is fixed by protocol. */ - if (!good) { - mlen = -1; - goto err; - } + tlen = constant_time_select_int(constant_time_lt(num, tlen), num, tlen); + msg_index = constant_time_select_int(good, msg_index, num - tlen); + mlen = num - msg_index; + for (from += msg_index, mask = good, i = 0; i < tlen; i++) { + unsigned int equals = constant_time_eq(i, mlen); - memcpy(to, from + msg_index, mlen); + from -= tlen & equals; /* if (i == mlen) rewind */ + mask &= mask ^ equals; /* if (i == mlen) mask = 0 */ + to[i] = constant_time_select_8(mask, from[i], to[i]); + } - err: OPENSSL_clear_free(em, num); - if (mlen == -1) - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, - RSA_R_PKCS_DECODING_ERROR); - return mlen; + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, RSA_R_PKCS_DECODING_ERROR); + err_clear_last_constant_time(1 & good); + + return constant_time_select_int(good, mlen, -1); } diff --git a/crypto/rsa/rsa_ssl.c b/crypto/rsa/rsa_ssl.c index 286d0a42de0f0..c5654595fb2f3 100644 --- a/crypto/rsa/rsa_ssl.c +++ b/crypto/rsa/rsa_ssl.c @@ -12,6 +12,7 @@ #include <openssl/bn.h> #include <openssl/rsa.h> #include <openssl/rand.h> +#include "internal/constant_time_locl.h" int RSA_padding_add_SSLv23(unsigned char *to, int tlen, const unsigned char *from, int flen) @@ -52,57 +53,115 @@ int RSA_padding_add_SSLv23(unsigned char *to, int tlen, return 1; } +/* + * Copy of RSA_padding_check_PKCS1_type_2 with a twist that rejects padding + * if nul delimiter is preceded by 8 consecutive 0x03 bytes. It also + * preserves error code reporting for backward compatibility. + */ int RSA_padding_check_SSLv23(unsigned char *to, int tlen, const unsigned char *from, int flen, int num) { - int i, j, k; - const unsigned char *p; + int i; + /* |em| is the encoded message, zero-padded to exactly |num| bytes */ + unsigned char *em = NULL; + unsigned int good, found_zero_byte, mask, threes_in_row; + int zero_index = 0, msg_index, mlen = -1, err; - p = from; if (flen < 10) { RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_SMALL); return -1; } - /* Accept even zero-padded input */ - if (flen == num) { - if (*(p++) != 0) { - RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_BLOCK_TYPE_IS_NOT_02); - return -1; - } - flen--; - } - if ((num != (flen + 1)) || (*(p++) != 02)) { - RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_BLOCK_TYPE_IS_NOT_02); - return -1; - } - /* scan over padding data */ - j = flen - 1; /* one for type */ - for (i = 0; i < j; i++) - if (*(p++) == 0) - break; - - if ((i == j) || (i < 8)) { - RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, - RSA_R_NULL_BEFORE_BLOCK_MISSING); + em = OPENSSL_malloc(num); + if (em == NULL) { + RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, ERR_R_MALLOC_FAILURE); return -1; } - for (k = -9; k < -1; k++) { - if (p[k] != 0x03) - break; + /* + * Caller is encouraged to pass zero-padded message created with + * BN_bn2binpad. Trouble is that since we can't read out of |from|'s + * bounds, it's impossible to have an invariant memory access pattern + * in case |from| was not zero-padded in advance. + */ + for (from += flen, em += num, i = 0; i < num; i++) { + mask = ~constant_time_is_zero(flen); + flen -= 1 & mask; + from -= 1 & mask; + *--em = *from & mask; } - if (k == -1) { - RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_SSLV3_ROLLBACK_ATTACK); - return -1; + from = em; + + good = constant_time_is_zero(from[0]); + good &= constant_time_eq(from[1], 2); + err = constant_time_select_int(good, 0, RSA_R_BLOCK_TYPE_IS_NOT_02); + mask = ~good; + + /* scan over padding data */ + found_zero_byte = 0; + threes_in_row = 0; + for (i = 2; i < num; i++) { + unsigned int equals0 = constant_time_is_zero(from[i]); + + zero_index = constant_time_select_int(~found_zero_byte & equals0, + i, zero_index); + found_zero_byte |= equals0; + + threes_in_row += 1 & ~found_zero_byte; + threes_in_row &= found_zero_byte | constant_time_eq(from[i], 3); } - i++; /* Skip over the '\0' */ - j -= i; - if (j > tlen) { - RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_LARGE); - return -1; + /* + * PS must be at least 8 bytes long, and it starts two bytes into |from|. + * If we never found a 0-byte, then |zero_index| is 0 and the check + * also fails. + */ + good &= constant_time_ge(zero_index, 2 + 8); + err = constant_time_select_int(mask | good, err, + RSA_R_NULL_BEFORE_BLOCK_MISSING); + mask = ~good; + + good &= constant_time_lt(threes_in_row, 8); + err = constant_time_select_int(mask | good, err, + RSA_R_SSLV3_ROLLBACK_ATTACK); + mask = ~good; + + /* + * Skip the zero byte. This is incorrect if we never found a zero-byte + * but in this case we also do not copy the message out. + */ + msg_index = zero_index + 1; + mlen = num - msg_index; + + /* + * For good measure, do this check in constant time as well. + */ + good &= constant_time_ge(tlen, mlen); + err = constant_time_select_int(mask | good, err, RSA_R_DATA_TOO_LARGE); + + /* + * Even though we can't fake result's length, we can pretend copying + * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |num| + * bytes are viewed as circular buffer with start at |tlen|-|mlen'|, + * where |mlen'| is "saturated" |mlen| value. Deducing information + * about failure or |mlen| would take attacker's ability to observe + * memory access pattern with byte granularity *as it occurs*. It + * should be noted that failure is indistinguishable from normal + * operation if |tlen| is fixed by protocol. + */ + tlen = constant_time_select_int(constant_time_lt(num, tlen), num, tlen); + msg_index = constant_time_select_int(good, msg_index, num - tlen); + mlen = num - msg_index; + for (from += msg_index, mask = good, i = 0; i < tlen; i++) { + unsigned int equals = constant_time_eq(i, mlen); + + from -= tlen & equals; /* if (i == mlen) rewind */ + mask &= mask ^ equals; /* if (i == mlen) mask = 0 */ + to[i] = constant_time_select_8(mask, from[i], to[i]); } - memcpy(to, p, (unsigned int)j); - return j; + OPENSSL_clear_free(em, num); + RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, err); + err_clear_last_constant_time(1 & good); + + return constant_time_select_int(good, mlen, -1); } diff --git a/crypto/rsa/rsa_x931g.c b/crypto/rsa/rsa_x931g.c index 3563670a12aca..15e40e8d1dd73 100644 --- a/crypto/rsa/rsa_x931g.c +++ b/crypto/rsa/rsa_x931g.c @@ -128,6 +128,8 @@ int RSA_X931_derive_ex(RSA *rsa, BIGNUM *p1, BIGNUM *p2, BIGNUM *q1, /* calculate inverse of q mod p */ rsa->iqmp = BN_mod_inverse(NULL, rsa->q, rsa->p, ctx2); + if (rsa->iqmp == NULL) + goto err; ret = 1; err: diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl index 704ab4a7e45a8..a3117bd7506dd 100755 --- a/crypto/sha/asm/keccak1600-armv8.pl +++ b/crypto/sha/asm/keccak1600-armv8.pl @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -121,6 +121,7 @@ $code.=<<___; .align 5 KeccakF1600_int: adr $C[2],iotas + .inst 0xd503233f // paciasp stp $C[2],x30,[sp,#16] // 32 bytes on top are mine b .Loop .align 4 @@ -292,12 +293,14 @@ $code.=<<___; bne .Loop ldr x30,[sp,#24] + .inst 0xd50323bf // autiasp ret .size KeccakF1600_int,.-KeccakF1600_int .type KeccakF1600,%function .align 5 KeccakF1600: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-128]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -347,6 +350,7 @@ KeccakF1600: ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#128 + .inst 0xd50323bf // autiasp ret .size KeccakF1600,.-KeccakF1600 @@ -354,6 +358,7 @@ KeccakF1600: .type SHA3_absorb,%function .align 5 SHA3_absorb: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-128]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -451,6 +456,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#128 + .inst 0xd50323bf // autiasp ret .size SHA3_absorb,.-SHA3_absorb ___ @@ -461,6 +467,7 @@ $code.=<<___; .type SHA3_squeeze,%function .align 5 SHA3_squeeze: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-48]! add x29,sp,#0 stp x19,x20,[sp,#16] @@ -523,6 +530,7 @@ SHA3_squeeze: ldp x19,x20,[sp,#16] ldp x21,x22,[sp,#32] ldp x29,x30,[sp],#48 + .inst 0xd50323bf // autiasp ret .size SHA3_squeeze,.-SHA3_squeeze ___ @@ -649,6 +657,7 @@ $code.=<<___; .type KeccakF1600_cext,%function .align 5 KeccakF1600_cext: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 stp d8,d9,[sp,#16] // per ABI requirement @@ -681,6 +690,7 @@ $code.=<<___; ldp d12,d13,[sp,#48] ldp d14,d15,[sp,#64] ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size KeccakF1600_cext,.-KeccakF1600_cext ___ @@ -693,6 +703,7 @@ $code.=<<___; .type SHA3_absorb_cext,%function .align 5 SHA3_absorb_cext: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-80]! add x29,sp,#0 stp d8,d9,[sp,#16] // per ABI requirement @@ -764,6 +775,7 @@ $code.=<<___; ldp d12,d13,[sp,#48] ldp d14,d15,[sp,#64] ldp x29,x30,[sp],#80 + .inst 0xd50323bf // autiasp ret .size SHA3_absorb_cext,.-SHA3_absorb_cext ___ @@ -775,6 +787,7 @@ $code.=<<___; .type SHA3_squeeze_cext,%function .align 5 SHA3_squeeze_cext: + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-16]! add x29,sp,#0 mov x9,$ctx @@ -830,6 +843,7 @@ SHA3_squeeze_cext: .Lsqueeze_done_ce: ldr x29,[sp],#16 + .inst 0xd50323bf // autiasp ret .size SHA3_squeeze_cext,.-SHA3_squeeze_cext ___ diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl index ac84ebb52e4f7..01ffe9f98c3ea 100755 --- a/crypto/sha/asm/sha512-armv8.pl +++ b/crypto/sha/asm/sha512-armv8.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -219,6 +219,7 @@ $code.=<<___ if ($SZ==8); ___ $code.=<<___; #endif + .inst 0xd503233f // paciasp stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -280,6 +281,7 @@ $code.=<<___; ldp x25,x26,[x29,#64] ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#128 + .inst 0xd50323bf // autiasp ret .size $func,.-$func diff --git a/crypto/srp/srp_lib.c b/crypto/srp/srp_lib.c index b97d630d37533..ca20f6d097981 100644 --- a/crypto/srp/srp_lib.c +++ b/crypto/srp/srp_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2004-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2004, EdelKey Project. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use @@ -26,6 +26,7 @@ static BIGNUM *srp_Calc_xy(const BIGNUM *x, const BIGNUM *y, const BIGNUM *N) unsigned char *tmp = NULL; int numN = BN_num_bytes(N); BIGNUM *res = NULL; + if (x != N && BN_ucmp(x, N) >= 0) return NULL; if (y != N && BN_ucmp(y, N) >= 0) @@ -139,7 +140,8 @@ BIGNUM *SRP_Calc_x(const BIGNUM *s, const char *user, const char *pass) || !EVP_DigestFinal_ex(ctxt, dig, NULL) || !EVP_DigestInit_ex(ctxt, EVP_sha1(), NULL)) goto err; - BN_bn2bin(s, cs); + if (BN_bn2bin(s, cs) < 0) + goto err; if (!EVP_DigestUpdate(ctxt, cs, BN_num_bytes(s))) goto err; diff --git a/crypto/srp/srp_vfy.c b/crypto/srp/srp_vfy.c index 17b35c00f9da8..eb279dd4187a2 100644 --- a/crypto/srp/srp_vfy.c +++ b/crypto/srp/srp_vfy.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2004-2019 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2004, EdelKey Project. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use @@ -187,7 +187,7 @@ void SRP_user_pwd_free(SRP_user_pwd *user_pwd) static SRP_user_pwd *SRP_user_pwd_new(void) { SRP_user_pwd *ret; - + if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { /* SRPerr(SRP_F_SRP_USER_PWD_NEW, ERR_R_MALLOC_FAILURE); */ /*ckerr_ignore*/ return NULL; @@ -598,10 +598,14 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, if ((len = t_fromb64(tmp, sizeof(tmp), N)) <= 0) goto err; N_bn_alloc = BN_bin2bn(tmp, len, NULL); + if (N_bn_alloc == NULL) + goto err; N_bn = N_bn_alloc; if ((len = t_fromb64(tmp, sizeof(tmp) ,g)) <= 0) goto err; g_bn_alloc = BN_bin2bn(tmp, len, NULL); + if (g_bn_alloc == NULL) + goto err; g_bn = g_bn_alloc; defgNid = "*"; } else { @@ -623,15 +627,19 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, goto err; s = BN_bin2bn(tmp2, len, NULL); } + if (s == NULL) + goto err; if (!SRP_create_verifier_BN(user, pass, &s, &v, N_bn, g_bn)) goto err; - BN_bn2bin(v, tmp); + if (BN_bn2bin(v, tmp) < 0) + goto err; vfsize = BN_num_bytes(v) * 2; if (((vf = OPENSSL_malloc(vfsize)) == NULL)) goto err; - t_tob64(vf, tmp, BN_num_bytes(v)); + if (!t_tob64(vf, tmp, BN_num_bytes(v))) + goto err; if (*salt == NULL) { char *tmp_salt; @@ -639,7 +647,10 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, if ((tmp_salt = OPENSSL_malloc(SRP_RANDOM_SALT_LEN * 2)) == NULL) { goto err; } - t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN); + if (!t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN)) { + OPENSSL_free(tmp_salt); + goto err; + } *salt = tmp_salt; } @@ -686,11 +697,15 @@ int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, goto err; salttmp = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); + if (salttmp == NULL) + goto err; } else { salttmp = *salt; } x = SRP_Calc_x(salttmp, user, pass); + if (x == NULL) + goto err; *verifier = BN_new(); if (*verifier == NULL) diff --git a/crypto/ui/ui_openssl.c b/crypto/ui/ui_openssl.c index 6b996134df49e..5ca418d24870e 100644 --- a/crypto/ui/ui_openssl.c +++ b/crypto/ui/ui_openssl.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -101,6 +101,12 @@ # endif +# if defined(OPENSSL_SYS_VXWORKS) +# undef TERMIOS +# undef TERMIO +# undef SGTTY +# endif + # ifdef TERMIOS # include <termios.h> # define TTY_STRUCT struct termios diff --git a/crypto/uid.c b/crypto/uid.c index f7ae2610b3607..b2b096446fb4e 100644 --- a/crypto/uid.c +++ b/crypto/uid.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -34,12 +34,13 @@ int OPENSSL_issetugid(void) # if defined(__GLIBC__) && defined(__GLIBC_PREREQ) # if __GLIBC_PREREQ(2, 16) # include <sys/auxv.h> +# define OSSL_IMPLEMENT_GETAUXVAL # endif # endif int OPENSSL_issetugid(void) { -# ifdef AT_SECURE +# ifdef OSSL_IMPLEMENT_GETAUXVAL return getauxval(AT_SECURE) != 0; # else return getuid() != geteuid() || getgid() != getegid(); diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c index 61e81922b4dab..4ced716e3646b 100644 --- a/crypto/x509/x509_vfy.c +++ b/crypto/x509/x509_vfy.c @@ -3232,12 +3232,19 @@ static int check_key_level(X509_STORE_CTX *ctx, X509 *cert) EVP_PKEY *pkey = X509_get0_pubkey(cert); int level = ctx->param->auth_level; + /* + * At security level zero, return without checking for a supported public + * key type. Some engines support key types not understood outside the + * engine, and we only need to understand the key when enforcing a security + * floor. + */ + if (level <= 0) + return 1; + /* Unsupported or malformed keys are not secure */ if (pkey == NULL) return 0; - if (level <= 0) - return 1; if (level > NUM_AUTH_LEVELS) level = NUM_AUTH_LEVELS; diff --git a/crypto/x509/x_crl.c b/crypto/x509/x_crl.c index 10733b58bca28..12ab3cca42c05 100644 --- a/crypto/x509/x_crl.c +++ b/crypto/x509/x_crl.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -158,6 +158,18 @@ static int crl_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, int idx; switch (operation) { + case ASN1_OP_D2I_PRE: + if (crl->meth->crl_free) { + if (!crl->meth->crl_free(crl)) + return 0; + } + AUTHORITY_KEYID_free(crl->akid); + ISSUING_DIST_POINT_free(crl->idp); + ASN1_INTEGER_free(crl->crl_number); + ASN1_INTEGER_free(crl->base_crl_number); + sk_GENERAL_NAMES_pop_free(crl->issuers, GENERAL_NAMES_free); + /* fall thru */ + case ASN1_OP_NEW_POST: crl->idp = NULL; crl->akid = NULL; diff --git a/crypto/x509/x_pubkey.c b/crypto/x509/x_pubkey.c index d050b0b4b3e24..1c87b8268eb57 100644 --- a/crypto/x509/x_pubkey.c +++ b/crypto/x509/x_pubkey.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -36,6 +36,7 @@ static int pubkey_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, /* Attempt to decode public key and cache in pubkey structure. */ X509_PUBKEY *pubkey = (X509_PUBKEY *)*pval; EVP_PKEY_free(pubkey->pkey); + pubkey->pkey = NULL; /* * Opportunistically decode the key but remove any non fatal errors * from the queue. Subsequent explicit attempts to decode/use the key diff --git a/crypto/x509/x_x509.c b/crypto/x509/x_x509.c index 4c04f12c94940..afe59c46c5188 100644 --- a/crypto/x509/x_x509.c +++ b/crypto/x509/x_x509.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -40,12 +40,35 @@ static int x509_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, switch (operation) { + case ASN1_OP_D2I_PRE: + CRYPTO_free_ex_data(CRYPTO_EX_INDEX_X509, ret, &ret->ex_data); + X509_CERT_AUX_free(ret->aux); + ASN1_OCTET_STRING_free(ret->skid); + AUTHORITY_KEYID_free(ret->akid); + CRL_DIST_POINTS_free(ret->crldp); + policy_cache_free(ret->policy_cache); + GENERAL_NAMES_free(ret->altname); + NAME_CONSTRAINTS_free(ret->nc); +#ifndef OPENSSL_NO_RFC3779 + sk_IPAddressFamily_pop_free(ret->rfc3779_addr, IPAddressFamily_free); + ASIdentifiers_free(ret->rfc3779_asid); +#endif + + /* fall thru */ + case ASN1_OP_NEW_POST: + ret->ex_cached = 0; + ret->ex_kusage = 0; + ret->ex_xkusage = 0; + ret->ex_nscert = 0; ret->ex_flags = 0; ret->ex_pathlen = -1; ret->ex_pcpathlen = -1; ret->skid = NULL; ret->akid = NULL; + ret->policy_cache = NULL; + ret->altname = NULL; + ret->nc = NULL; #ifndef OPENSSL_NO_RFC3779 ret->rfc3779_addr = NULL; ret->rfc3779_asid = NULL; |