Import OpenSSL 1.1.1b.vendor/openssl/1.1.1b

author: Jung-uk Kim <jkim@FreeBSD.org> 2019-02-26 18:06:51 +0000
committer: Jung-uk Kim <jkim@FreeBSD.org> 2019-02-26 18:06:51 +0000
commit: 851f7386fd78b9787f4f6669ad271886a2a003f1 (patch)
tree: 952920d27fdcd105b7f77b6e5fef3fedae8f74ea /crypto
parent: 8c3f9abd70b3f447a4795c1b00b386b044fb322d (diff)
download: src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.tar.gz
src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.zip
100 files changed, 4309 insertions, 2915 deletions
diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl
index 4d1dc9c70199d..d87e201147710 100755
--- a/crypto/aes/asm/aes-x86_64.pl
+++ b/crypto/aes/asm/aes-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -554,6 +554,7 @@ $code.=<<___;
 .type	_x86_64_AES_encrypt_compact,\@abi-omnipotent
 .align	16
 _x86_64_AES_encrypt_compact:
+.cfi_startproc
 	lea	128($sbox),$inp			# size optimization
 	mov	0-128($inp),$acc1		# prefetch Te4
 	mov	32-128($inp),$acc2
@@ -587,6 +588,7 @@ $code.=<<___;
 	xor	8($key),$s2
 	xor	12($key),$s3
 	.byte	0xf3,0xc3			# rep ret
+.cfi_endproc
 .size	_x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
 ___
 
@@ -1161,6 +1163,7 @@ $code.=<<___;
 .type	_x86_64_AES_decrypt_compact,\@abi-omnipotent
 .align	16
 _x86_64_AES_decrypt_compact:
+.cfi_startproc
 	lea	128($sbox),$inp			# size optimization
 	mov	0-128($inp),$acc1		# prefetch Td4
 	mov	32-128($inp),$acc2
@@ -1203,6 +1206,7 @@ $code.=<<___;
 	xor	8($key),$s2
 	xor	12($key),$s3
 	.byte	0xf3,0xc3			# rep ret
+.cfi_endproc
 .size	_x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
 ___
 
@@ -1365,6 +1369,7 @@ AES_set_encrypt_key:
 .type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
 .align	16
 _x86_64_AES_set_encrypt_key:
+.cfi_startproc
 	mov	%esi,%ecx			# %ecx=bits
 	mov	%rdi,%rsi			# %rsi=userKey
 	mov	%rdx,%rdi			# %rdi=key
@@ -1546,6 +1551,7 @@ $code.=<<___;
 	mov	\$-1,%rax
 .Lexit:
 	.byte	0xf3,0xc3			# rep ret
+.cfi_endproc
 .size	_x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
 ___
 
@@ -1728,7 +1734,9 @@ AES_cbc_encrypt:
 	cmp	\$0,%rdx	# check length
 	je	.Lcbc_epilogue
 	pushfq
-.cfi_push	49		# %rflags
+# This could be .cfi_push 49, but libunwind fails on registers it does not
+# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
+.cfi_adjust_cfa_offset	8
 	push	%rbx
 .cfi_push	%rbx
 	push	%rbp
@@ -1751,6 +1759,7 @@ AES_cbc_encrypt:
 	cmp	\$0,%r9
 	cmoveq	%r10,$sbox
 
+.cfi_remember_state
 	mov	OPENSSL_ia32cap_P(%rip),%r10d
 	cmp	\$$speed_limit,%rdx
 	jb	.Lcbc_slow_prologue
@@ -1986,6 +1995,7 @@ AES_cbc_encrypt:
 #--------------------------- SLOW ROUTINE ---------------------------#
 .align	16
 .Lcbc_slow_prologue:
+.cfi_restore_state
 	# allocate aligned stack frame...
 	lea	-88(%rsp),%rbp
 	and	\$-64,%rbp
@@ -1997,8 +2007,10 @@ AES_cbc_encrypt:
 	sub	%r10,%rbp
 
 	xchg	%rsp,%rbp
+.cfi_def_cfa_register	%rbp
 	#add	\$8,%rsp	# reserve for return address!
 	mov	%rbp,$_rsp	# save %rsp
+.cfi_cfa_expression	$_rsp,deref,+64
 .Lcbc_slow_body:
 	#mov	%rdi,$_inp	# save copy of inp
 	#mov	%rsi,$_out	# save copy of out
@@ -2187,7 +2199,9 @@ AES_cbc_encrypt:
 .cfi_def_cfa	%rsp,16
 .Lcbc_popfq:
 	popfq
-.cfi_pop	49		# %rflags
+# This could be .cfi_pop 49, but libunwind fails on registers it does not
+# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
+.cfi_adjust_cfa_offset	-8
 .Lcbc_epilogue:
 	ret
 .cfi_endproc
diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl
index 2a202c53e5f84..b68c14da60ca1 100755
--- a/crypto/aes/asm/aesni-x86_64.pl
+++ b/crypto/aes/asm/aesni-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2009-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -274,6 +274,7 @@ $code.=<<___;
 .type	${PREFIX}_encrypt,\@abi-omnipotent
 .align	16
 ${PREFIX}_encrypt:
+.cfi_startproc
 	movups	($inp),$inout0		# load input
 	mov	240($key),$rounds	# key->rounds
 ___
@@ -284,12 +285,14 @@ $code.=<<___;
 	movups	$inout0,($out)		# output
 	 pxor	$inout0,$inout0
 	ret
+.cfi_endproc
 .size	${PREFIX}_encrypt,.-${PREFIX}_encrypt
 
 .globl	${PREFIX}_decrypt
 .type	${PREFIX}_decrypt,\@abi-omnipotent
 .align	16
 ${PREFIX}_decrypt:
+.cfi_startproc
 	movups	($inp),$inout0		# load input
 	mov	240($key),$rounds	# key->rounds
 ___
@@ -300,6 +303,7 @@ $code.=<<___;
 	movups	$inout0,($out)		# output
 	 pxor	$inout0,$inout0
 	ret
+.cfi_endproc
 .size	${PREFIX}_decrypt, .-${PREFIX}_decrypt
 ___
 }
@@ -325,6 +329,7 @@ $code.=<<___;
 .type	_aesni_${dir}rypt2,\@abi-omnipotent
 .align	16
 _aesni_${dir}rypt2:
+.cfi_startproc
 	$movkey	($key),$rndkey0
 	shl	\$4,$rounds
 	$movkey	16($key),$rndkey1
@@ -350,6 +355,7 @@ _aesni_${dir}rypt2:
 	aes${dir}last	$rndkey0,$inout0
 	aes${dir}last	$rndkey0,$inout1
 	ret
+.cfi_endproc
 .size	_aesni_${dir}rypt2,.-_aesni_${dir}rypt2
 ___
 }
@@ -361,6 +367,7 @@ $code.=<<___;
 .type	_aesni_${dir}rypt3,\@abi-omnipotent
 .align	16
 _aesni_${dir}rypt3:
+.cfi_startproc
 	$movkey	($key),$rndkey0
 	shl	\$4,$rounds
 	$movkey	16($key),$rndkey1
@@ -391,6 +398,7 @@ _aesni_${dir}rypt3:
 	aes${dir}last	$rndkey0,$inout1
 	aes${dir}last	$rndkey0,$inout2
 	ret
+.cfi_endproc
 .size	_aesni_${dir}rypt3,.-_aesni_${dir}rypt3
 ___
 }
@@ -406,6 +414,7 @@ $code.=<<___;
 .type	_aesni_${dir}rypt4,\@abi-omnipotent
 .align	16
 _aesni_${dir}rypt4:
+.cfi_startproc
 	$movkey	($key),$rndkey0
 	shl	\$4,$rounds
 	$movkey	16($key),$rndkey1
@@ -442,6 +451,7 @@ _aesni_${dir}rypt4:
 	aes${dir}last	$rndkey0,$inout2
 	aes${dir}last	$rndkey0,$inout3
 	ret
+.cfi_endproc
 .size	_aesni_${dir}rypt4,.-_aesni_${dir}rypt4
 ___
 }
@@ -453,6 +463,7 @@ $code.=<<___;
 .type	_aesni_${dir}rypt6,\@abi-omnipotent
 .align	16
 _aesni_${dir}rypt6:
+.cfi_startproc
 	$movkey		($key),$rndkey0
 	shl		\$4,$rounds
 	$movkey		16($key),$rndkey1
@@ -503,6 +514,7 @@ _aesni_${dir}rypt6:
 	aes${dir}last	$rndkey0,$inout4
 	aes${dir}last	$rndkey0,$inout5
 	ret
+.cfi_endproc
 .size	_aesni_${dir}rypt6,.-_aesni_${dir}rypt6
 ___
 }
@@ -514,6 +526,7 @@ $code.=<<___;
 .type	_aesni_${dir}rypt8,\@abi-omnipotent
 .align	16
 _aesni_${dir}rypt8:
+.cfi_startproc
 	$movkey		($key),$rndkey0
 	shl		\$4,$rounds
 	$movkey		16($key),$rndkey1
@@ -574,6 +587,7 @@ _aesni_${dir}rypt8:
 	aes${dir}last	$rndkey0,$inout6
 	aes${dir}last	$rndkey0,$inout7
 	ret
+.cfi_endproc
 .size	_aesni_${dir}rypt8,.-_aesni_${dir}rypt8
 ___
 }
@@ -598,6 +612,7 @@ $code.=<<___;
 .type	aesni_ecb_encrypt,\@function,5
 .align	16
 aesni_ecb_encrypt:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -943,6 +958,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
 ___
 
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index 8b37cfc452a02..eec0ed230a888 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -262,6 +262,7 @@ $code.=<<___;
 ${prefix}_set_decrypt_key:
 ___
 $code.=<<___	if ($flavour =~ /64/);
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 ___
@@ -305,6 +306,7 @@ $code.=<<___	if ($flavour !~ /64/);
 ___
 $code.=<<___	if ($flavour =~ /64/);
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 ___
 $code.=<<___;
diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl
index 2c79c2b67c897..e62342729e7fa 100755
--- a/crypto/aes/asm/bsaes-x86_64.pl
+++ b/crypto/aes/asm/bsaes-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -816,6 +816,7 @@ $code.=<<___;
 .type	_bsaes_encrypt8,\@abi-omnipotent
 .align	64
 _bsaes_encrypt8:
+.cfi_startproc
 	lea	.LBS0(%rip), $const	# constants table
 
 	movdqa	($key), @XMM[9]		# round 0 key
@@ -875,11 +876,13 @@ $code.=<<___;
 	pxor	@XMM[8], @XMM[0]
 	pxor	@XMM[8], @XMM[1]
 	ret
+.cfi_endproc
 .size	_bsaes_encrypt8,.-_bsaes_encrypt8
 
 .type	_bsaes_decrypt8,\@abi-omnipotent
 .align	64
 _bsaes_decrypt8:
+.cfi_startproc
 	lea	.LBS0(%rip), $const	# constants table
 
 	movdqa	($key), @XMM[9]		# round 0 key
@@ -937,6 +940,7 @@ $code.=<<___;
 	pxor	@XMM[8], @XMM[0]
 	pxor	@XMM[8], @XMM[1]
 	ret
+.cfi_endproc
 .size	_bsaes_decrypt8,.-_bsaes_decrypt8
 ___
 }
@@ -971,6 +975,7 @@ $code.=<<___;
 .type	_bsaes_key_convert,\@abi-omnipotent
 .align	16
 _bsaes_key_convert:
+.cfi_startproc
 	lea	.Lmasks(%rip), $const
 	movdqu	($inp), %xmm7		# load round 0 key
 	lea	0x10($inp), $inp
@@ -1049,6 +1054,7 @@ _bsaes_key_convert:
 	movdqa	0x50($const), %xmm7	# .L63
 	#movdqa	%xmm6, ($out)		# don't save last round key
 	ret
+.cfi_endproc
 .size	_bsaes_key_convert,.-_bsaes_key_convert
 ___
 }
diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl
index 5131e13a09a29..e38288af5558a 100755
--- a/crypto/aes/asm/vpaes-armv8.pl
+++ b/crypto/aes/asm/vpaes-armv8.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -255,6 +255,7 @@ _vpaes_encrypt_core:
 .type	vpaes_encrypt,%function
 .align	4
 vpaes_encrypt:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -264,6 +265,7 @@ vpaes_encrypt:
 	st1	{v0.16b}, [$out]
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	vpaes_encrypt,.-vpaes_encrypt
 
@@ -486,6 +488,7 @@ _vpaes_decrypt_core:
 .type	vpaes_decrypt,%function
 .align	4
 vpaes_decrypt:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -495,6 +498,7 @@ vpaes_decrypt:
 	st1	{v0.16b}, [$out]
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	vpaes_decrypt,.-vpaes_decrypt
 
@@ -665,6 +669,7 @@ _vpaes_key_preheat:
 .type	_vpaes_schedule_core,%function
 .align	4
 _vpaes_schedule_core:
+	.inst	0xd503233f			// paciasp
 	stp	x29, x30, [sp,#-16]!
 	add	x29,sp,#0
 
@@ -829,6 +834,7 @@ _vpaes_schedule_core:
 	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
 	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
 	ldp	x29, x30, [sp],#16
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	_vpaes_schedule_core,.-_vpaes_schedule_core
 
@@ -1041,6 +1047,7 @@ _vpaes_schedule_mangle:
 .type	vpaes_set_encrypt_key,%function
 .align	4
 vpaes_set_encrypt_key:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1056,6 +1063,7 @@ vpaes_set_encrypt_key:
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
 
@@ -1063,6 +1071,7 @@ vpaes_set_encrypt_key:
 .type	vpaes_set_decrypt_key,%function
 .align	4
 vpaes_set_decrypt_key:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1082,6 +1091,7 @@ vpaes_set_decrypt_key:
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
 ___
@@ -1098,6 +1108,7 @@ vpaes_cbc_encrypt:
 	cmp	w5, #0			// check direction
 	b.eq	vpaes_cbc_decrypt
 
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -1120,6 +1131,7 @@ vpaes_cbc_encrypt:
 	st1	{v0.16b}, [$ivec]	// write ivec
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 .Lcbc_abort:
 	ret
 .size	vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@@ -1127,6 +1139,7 @@ vpaes_cbc_encrypt:
 .type	vpaes_cbc_decrypt,%function
 .align	4
 vpaes_cbc_decrypt:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1168,6 +1181,7 @@ vpaes_cbc_decrypt:
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
 ___
@@ -1177,6 +1191,7 @@ $code.=<<___;
 .type	vpaes_ecb_encrypt,%function
 .align	4
 vpaes_ecb_encrypt:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1210,6 +1225,7 @@ vpaes_ecb_encrypt:
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	vpaes_ecb_encrypt,.-vpaes_ecb_encrypt
 
@@ -1217,6 +1233,7 @@ vpaes_ecb_encrypt:
 .type	vpaes_ecb_decrypt,%function
 .align	4
 vpaes_ecb_decrypt:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1250,6 +1267,7 @@ vpaes_ecb_decrypt:
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	vpaes_ecb_decrypt,.-vpaes_ecb_decrypt
 ___
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
index b715aca167d17..33d293e623d5b 100755
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -91,6 +91,7 @@ $code.=<<___;
 .type	_vpaes_encrypt_core,\@abi-omnipotent
 .align 16
 _vpaes_encrypt_core:
+.cfi_startproc
 	mov	%rdx,	%r9
 	mov	\$16,	%r11
 	mov	240(%rdx),%eax
@@ -171,6 +172,7 @@ _vpaes_encrypt_core:
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	%xmm1,	%xmm0
 	ret
+.cfi_endproc
 .size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
 
 ##
@@ -181,6 +183,7 @@ _vpaes_encrypt_core:
 .type	_vpaes_decrypt_core,\@abi-omnipotent
 .align	16
 _vpaes_decrypt_core:
+.cfi_startproc
 	mov	%rdx,	%r9		# load key
 	mov	240(%rdx),%eax
 	movdqa	%xmm9,	%xmm1
@@ -277,6 +280,7 @@ _vpaes_decrypt_core:
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	%xmm2,	%xmm0
 	ret
+.cfi_endproc
 .size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
 
 ########################################################
@@ -287,6 +291,7 @@ _vpaes_decrypt_core:
 .type	_vpaes_schedule_core,\@abi-omnipotent
 .align	16
 _vpaes_schedule_core:
+.cfi_startproc
 	# rdi = key
 	# rsi = size in bits
 	# rdx = buffer
@@ -453,6 +458,7 @@ _vpaes_schedule_core:
 	pxor	%xmm6,  %xmm6
 	pxor	%xmm7,  %xmm7
 	ret
+.cfi_endproc
 .size	_vpaes_schedule_core,.-_vpaes_schedule_core
 
 ##
@@ -472,6 +478,7 @@ _vpaes_schedule_core:
 .type	_vpaes_schedule_192_smear,\@abi-omnipotent
 .align	16
 _vpaes_schedule_192_smear:
+.cfi_startproc
 	pshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
 	pshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
 	pxor	%xmm1,	%xmm6		# -> c+d c 0 0
@@ -480,6 +487,7 @@ _vpaes_schedule_192_smear:
 	movdqa	%xmm6,	%xmm0
 	movhlps	%xmm1,	%xmm6		# clobber low side with zeros
 	ret
+.cfi_endproc
 .size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
 
 ##
@@ -503,6 +511,7 @@ _vpaes_schedule_192_smear:
 .type	_vpaes_schedule_round,\@abi-omnipotent
 .align	16
 _vpaes_schedule_round:
+.cfi_startproc
 	# extract rcon from xmm8
 	pxor	%xmm1,	%xmm1
 	palignr	\$15,	%xmm8,	%xmm1
@@ -556,6 +565,7 @@ _vpaes_schedule_low_round:
 	pxor	%xmm7,	%xmm0
 	movdqa	%xmm0,	%xmm7
 	ret
+.cfi_endproc
 .size	_vpaes_schedule_round,.-_vpaes_schedule_round
 
 ##
@@ -570,6 +580,7 @@ _vpaes_schedule_low_round:
 .type	_vpaes_schedule_transform,\@abi-omnipotent
 .align	16
 _vpaes_schedule_transform:
+.cfi_startproc
 	movdqa	%xmm9,	%xmm1
 	pandn	%xmm0,	%xmm1
 	psrld	\$4,	%xmm1
@@ -580,6 +591,7 @@ _vpaes_schedule_transform:
 	pshufb	%xmm1,	%xmm0
 	pxor	%xmm2,	%xmm0
 	ret
+.cfi_endproc
 .size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
 
 ##
@@ -608,6 +620,7 @@ _vpaes_schedule_transform:
 .type	_vpaes_schedule_mangle,\@abi-omnipotent
 .align	16
 _vpaes_schedule_mangle:
+.cfi_startproc
 	movdqa	%xmm0,	%xmm4	# save xmm0 for later
 	movdqa	.Lk_mc_forward(%rip),%xmm5
 	test	%rcx, 	%rcx
@@ -672,6 +685,7 @@ _vpaes_schedule_mangle:
 	and	\$0x30,	%r8
 	movdqu	%xmm3,	(%rdx)
 	ret
+.cfi_endproc
 .size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
 
 #
@@ -681,6 +695,7 @@ _vpaes_schedule_mangle:
 .type	${PREFIX}_set_encrypt_key,\@function,3
 .align	16
 ${PREFIX}_set_encrypt_key:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -723,12 +738,14 @@ ___
 $code.=<<___;
 	xor	%eax,%eax
 	ret
+.cfi_endproc
 .size	${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
 
 .globl	${PREFIX}_set_decrypt_key
 .type	${PREFIX}_set_decrypt_key,\@function,3
 .align	16
 ${PREFIX}_set_decrypt_key:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -776,12 +793,14 @@ ___
 $code.=<<___;
 	xor	%eax,%eax
 	ret
+.cfi_endproc
 .size	${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
 
 .globl	${PREFIX}_encrypt
 .type	${PREFIX}_encrypt,\@function,3
 .align	16
 ${PREFIX}_encrypt:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -819,12 +838,14 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	${PREFIX}_encrypt,.-${PREFIX}_encrypt
 
 .globl	${PREFIX}_decrypt
 .type	${PREFIX}_decrypt,\@function,3
 .align	16
 ${PREFIX}_decrypt:
+.cfi_startproc
 ___
 $code.=<<___ if ($win64);
 	lea	-0xb8(%rsp),%rsp
@@ -862,6 +883,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	${PREFIX}_decrypt,.-${PREFIX}_decrypt
 ___
 {
@@ -874,6 +896,7 @@ $code.=<<___;
 .type	${PREFIX}_cbc_encrypt,\@function,6
 .align	16
 ${PREFIX}_cbc_encrypt:
+.cfi_startproc
 	xchg	$key,$len
 ___
 ($len,$key)=($key,$len);
@@ -944,6 +967,7 @@ ___
 $code.=<<___;
 .Lcbc_abort:
 	ret
+.cfi_endproc
 .size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
 ___
 }
@@ -957,6 +981,7 @@ $code.=<<___;
 .type	_vpaes_preheat,\@abi-omnipotent
 .align	16
 _vpaes_preheat:
+.cfi_startproc
 	lea	.Lk_s0F(%rip), %r10
 	movdqa	-0x20(%r10), %xmm10	# .Lk_inv
 	movdqa	-0x10(%r10), %xmm11	# .Lk_inv+16
@@ -966,6 +991,7 @@ _vpaes_preheat:
 	movdqa	0x50(%r10), %xmm15	# .Lk_sb2
 	movdqa	0x60(%r10), %xmm14	# .Lk_sb2+16
 	ret
+.cfi_endproc
 .size	_vpaes_preheat,.-_vpaes_preheat
 ########################################################
 ##                                                    ##
diff --git a/crypto/armcap.c b/crypto/armcap.c
index 1b3c2722d1e16..58e54f0da2e17 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -62,14 +62,12 @@ uint32_t OPENSSL_rdtsc(void)
 # if defined(__GNUC__) && __GNUC__>=2
 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor));
 # endif
-/*
- * Use a weak reference to getauxval() so we can use it if it is available but
- * don't break the build if it is not.
- */
-# if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__)
-extern unsigned long getauxval(unsigned long type) __attribute__ ((weak));
-# else
-static unsigned long (*getauxval) (unsigned long) = NULL;
+
+# if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#  if __GLIBC_PREREQ(2, 16)
+#   include <sys/auxv.h>
+#   define OSSL_IMPLEMENT_GETAUXVAL
+#  endif
 # endif
 
 /*
@@ -134,6 +132,33 @@ void OPENSSL_cpuid_setup(void)
      */
 # endif
 
+    OPENSSL_armcap_P = 0;
+
+# ifdef OSSL_IMPLEMENT_GETAUXVAL
+    if (getauxval(HWCAP) & HWCAP_NEON) {
+        unsigned long hwcap = getauxval(HWCAP_CE);
+
+        OPENSSL_armcap_P |= ARMV7_NEON;
+
+        if (hwcap & HWCAP_CE_AES)
+            OPENSSL_armcap_P |= ARMV8_AES;
+
+        if (hwcap & HWCAP_CE_PMULL)
+            OPENSSL_armcap_P |= ARMV8_PMULL;
+
+        if (hwcap & HWCAP_CE_SHA1)
+            OPENSSL_armcap_P |= ARMV8_SHA1;
+
+        if (hwcap & HWCAP_CE_SHA256)
+            OPENSSL_armcap_P |= ARMV8_SHA256;
+
+#  ifdef __aarch64__
+        if (hwcap & HWCAP_CE_SHA512)
+            OPENSSL_armcap_P |= ARMV8_SHA512;
+#  endif
+    }
+# endif
+
     sigfillset(&all_masked);
     sigdelset(&all_masked, SIGILL);
     sigdelset(&all_masked, SIGTRAP);
@@ -141,8 +166,6 @@ void OPENSSL_cpuid_setup(void)
     sigdelset(&all_masked, SIGBUS);
     sigdelset(&all_masked, SIGSEGV);
 
-    OPENSSL_armcap_P = 0;
-
     memset(&ill_act, 0, sizeof(ill_act));
     ill_act.sa_handler = ill_handler;
     ill_act.sa_mask = all_masked;
@@ -150,30 +173,9 @@ void OPENSSL_cpuid_setup(void)
     sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
     sigaction(SIGILL, &ill_act, &ill_oact);
 
-    if (getauxval != NULL) {
-        if (getauxval(HWCAP) & HWCAP_NEON) {
-            unsigned long hwcap = getauxval(HWCAP_CE);
-
-            OPENSSL_armcap_P |= ARMV7_NEON;
-
-            if (hwcap & HWCAP_CE_AES)
-                OPENSSL_armcap_P |= ARMV8_AES;
-
-            if (hwcap & HWCAP_CE_PMULL)
-                OPENSSL_armcap_P |= ARMV8_PMULL;
-
-            if (hwcap & HWCAP_CE_SHA1)
-                OPENSSL_armcap_P |= ARMV8_SHA1;
-
-            if (hwcap & HWCAP_CE_SHA256)
-                OPENSSL_armcap_P |= ARMV8_SHA256;
-
-# ifdef __aarch64__
-            if (hwcap & HWCAP_CE_SHA512)
-                OPENSSL_armcap_P |= ARMV8_SHA512;
-# endif
-        }
-    } else if (sigsetjmp(ill_jmp, 1) == 0) {
+    /* If we used getauxval, we already have all the values */
+# ifndef OSSL_IMPLEMENT_GETAUXVAL
+    if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_neon_probe();
         OPENSSL_armcap_P |= ARMV7_NEON;
         if (sigsetjmp(ill_jmp, 1) == 0) {
@@ -191,13 +193,16 @@ void OPENSSL_cpuid_setup(void)
             _armv8_sha256_probe();
             OPENSSL_armcap_P |= ARMV8_SHA256;
         }
-# if defined(__aarch64__) && !defined(__APPLE__)
+#  if defined(__aarch64__) && !defined(__APPLE__)
         if (sigsetjmp(ill_jmp, 1) == 0) {
             _armv8_sha512_probe();
             OPENSSL_armcap_P |= ARMV8_SHA512;
         }
-# endif
+#  endif
     }
+# endif
+
+    /* Things that getauxval didn't tell us */
     if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_tick();
         OPENSSL_armcap_P |= ARMV7_TICK;
diff --git a/crypto/asn1/a_digest.c b/crypto/asn1/a_digest.c
index f4cc1f2e0eaa5..cc3532ea7df2f 100644
--- a/crypto/asn1/a_digest.c
+++ b/crypto/asn1/a_digest.c
@@ -23,18 +23,22 @@
 int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data,
                 unsigned char *md, unsigned int *len)
 {
-    int i;
+    int inl;
     unsigned char *str, *p;
 
-    i = i2d(data, NULL);
-    if ((str = OPENSSL_malloc(i)) == NULL) {
+    inl = i2d(data, NULL);
+    if (inl <= 0) {
+        ASN1err(ASN1_F_ASN1_DIGEST, ERR_R_INTERNAL_ERROR);
+        return 0;
+    }
+    if ((str = OPENSSL_malloc(inl)) == NULL) {
         ASN1err(ASN1_F_ASN1_DIGEST, ERR_R_MALLOC_FAILURE);
         return 0;
     }
     p = str;
     i2d(data, &p);
 
-    if (!EVP_Digest(str, i, md, len, type, NULL)) {
+    if (!EVP_Digest(str, inl, md, len, type, NULL)) {
         OPENSSL_free(str);
         return 0;
     }
diff --git a/crypto/asn1/a_sign.c b/crypto/asn1/a_sign.c
index 130e23eaaa1e6..146fdb9626281 100644
--- a/crypto/asn1/a_sign.c
+++ b/crypto/asn1/a_sign.c
@@ -29,7 +29,8 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2,
 {
     EVP_MD_CTX *ctx = EVP_MD_CTX_new();
     unsigned char *p, *buf_in = NULL, *buf_out = NULL;
-    int i, inl = 0, outl = 0, outll = 0;
+    int i, inl = 0, outl = 0;
+    size_t inll = 0, outll = 0;
     X509_ALGOR *a;
 
     if (ctx == NULL) {
@@ -70,10 +71,15 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2,
         }
     }
     inl = i2d(data, NULL);
-    buf_in = OPENSSL_malloc((unsigned int)inl);
+    if (inl <= 0) {
+        ASN1err(ASN1_F_ASN1_SIGN, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
+    inll = (size_t)inl;
+    buf_in = OPENSSL_malloc(inll);
     outll = outl = EVP_PKEY_size(pkey);
-    buf_out = OPENSSL_malloc((unsigned int)outl);
-    if ((buf_in == NULL) || (buf_out == NULL)) {
+    buf_out = OPENSSL_malloc(outll);
+    if (buf_in == NULL || buf_out == NULL) {
         outl = 0;
         ASN1err(ASN1_F_ASN1_SIGN, ERR_R_MALLOC_FAILURE);
         goto err;
@@ -101,7 +107,7 @@ int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2,
     signature->flags |= ASN1_STRING_FLAG_BITS_LEFT;
  err:
     EVP_MD_CTX_free(ctx);
-    OPENSSL_clear_free((char *)buf_in, (unsigned int)inl);
+    OPENSSL_clear_free((char *)buf_in, inll);
     OPENSSL_clear_free((char *)buf_out, outll);
     return outl;
 }
@@ -138,7 +144,7 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it,
     EVP_PKEY *pkey;
     unsigned char *buf_in = NULL, *buf_out = NULL;
     size_t inl = 0, outl = 0, outll = 0;
-    int signid, paramtype;
+    int signid, paramtype, buf_len = 0;
     int rv;
 
     type = EVP_MD_CTX_md(ctx);
@@ -198,10 +204,16 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it,
 
     }
 
-    inl = ASN1_item_i2d(asn, &buf_in, it);
+    buf_len = ASN1_item_i2d(asn, &buf_in, it);
+    if (buf_len <= 0) {
+        outl = 0;
+        ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
+    inl = buf_len;
     outll = outl = EVP_PKEY_size(pkey);
-    buf_out = OPENSSL_malloc((unsigned int)outl);
-    if ((buf_in == NULL) || (buf_out == NULL)) {
+    buf_out = OPENSSL_malloc(outll);
+    if (buf_in == NULL || buf_out == NULL) {
         outl = 0;
         ASN1err(ASN1_F_ASN1_ITEM_SIGN_CTX, ERR_R_MALLOC_FAILURE);
         goto err;
@@ -223,7 +235,7 @@ int ASN1_item_sign_ctx(const ASN1_ITEM *it,
     signature->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07);
     signature->flags |= ASN1_STRING_FLAG_BITS_LEFT;
  err:
-    OPENSSL_clear_free((char *)buf_in, (unsigned int)inl);
+    OPENSSL_clear_free((char *)buf_in, inl);
     OPENSSL_clear_free((char *)buf_out, outll);
     return outl;
 }
diff --git a/crypto/asn1/a_verify.c b/crypto/asn1/a_verify.c
index 973d50d24de90..cdaf17c3cbc1b 100644
--- a/crypto/asn1/a_verify.c
+++ b/crypto/asn1/a_verify.c
@@ -48,6 +48,10 @@ int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *a, ASN1_BIT_STRING *signature,
     }
 
     inl = i2d(data, NULL);
+    if (inl <= 0) {
+        ASN1err(ASN1_F_ASN1_VERIFY, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
     buf_in = OPENSSL_malloc((unsigned int)inl);
     if (buf_in == NULL) {
         ASN1err(ASN1_F_ASN1_VERIFY, ERR_R_MALLOC_FAILURE);
@@ -87,8 +91,8 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a,
     EVP_MD_CTX *ctx = NULL;
     unsigned char *buf_in = NULL;
     int ret = -1, inl = 0;
-
     int mdnid, pknid;
+    size_t inll = 0;
 
     if (!pkey) {
         ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_PASSED_NULL_PARAMETER);
@@ -127,8 +131,8 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a,
             goto err;
         ret = -1;
     } else {
-        const EVP_MD *type;
-        type = EVP_get_digestbynid(mdnid);
+        const EVP_MD *type = EVP_get_digestbynid(mdnid);
+
         if (type == NULL) {
             ASN1err(ASN1_F_ASN1_ITEM_VERIFY,
                     ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM);
@@ -150,11 +154,15 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a,
     }
 
     inl = ASN1_item_i2d(asn, &buf_in, it);
-
+    if (inl <= 0) {
+        ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_INTERNAL_ERROR);
+        goto err;
+    }
     if (buf_in == NULL) {
         ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ERR_R_MALLOC_FAILURE);
         goto err;
     }
+    inll = inl;
 
     ret = EVP_DigestVerify(ctx, signature->data, (size_t)signature->length,
                            buf_in, inl);
@@ -164,7 +172,7 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a,
     }
     ret = 1;
  err:
-    OPENSSL_clear_free(buf_in, (unsigned int)inl);
+    OPENSSL_clear_free(buf_in, inll);
     EVP_MD_CTX_free(ctx);
     return ret;
 }
diff --git a/crypto/asn1/ameth_lib.c b/crypto/asn1/ameth_lib.c
index 9a1644148af5d..d7d270dbb5819 100644
--- a/crypto/asn1/ameth_lib.c
+++ b/crypto/asn1/ameth_lib.c
@@ -140,6 +140,22 @@ int EVP_PKEY_asn1_add0(const EVP_PKEY_ASN1_METHOD *ameth)
 {
     EVP_PKEY_ASN1_METHOD tmp = { 0, };
 
+    /*
+     * One of the following must be true:
+     *
+     * pem_str == NULL AND ASN1_PKEY_ALIAS is set
+     * pem_str != NULL AND ASN1_PKEY_ALIAS is clear
+     *
+     * Anything else is an error and may lead to a corrupt ASN1 method table
+     */
+    if (!((ameth->pem_str == NULL
+           && (ameth->pkey_flags & ASN1_PKEY_ALIAS) != 0)
+          || (ameth->pem_str != NULL
+              && (ameth->pkey_flags & ASN1_PKEY_ALIAS) == 0))) {
+        EVPerr(EVP_F_EVP_PKEY_ASN1_ADD0, ERR_R_PASSED_INVALID_ARGUMENT);
+        return 0;
+    }
+
     if (app_methods == NULL) {
         app_methods = sk_EVP_PKEY_ASN1_METHOD_new(ameth_cmp);
         if (app_methods == NULL)
@@ -216,18 +232,6 @@ EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_new(int id, int flags,
             goto err;
     }
 
-    /*
-     * One of the following must be true:
-     *
-     * pem_str == NULL AND ASN1_PKEY_ALIAS is set
-     * pem_str != NULL AND ASN1_PKEY_ALIAS is clear
-     *
-     * Anything else is an error and may lead to a corrupt ASN1 method table
-     */
-    if (!((pem_str == NULL && (flags & ASN1_PKEY_ALIAS) != 0)
-          || (pem_str != NULL && (flags & ASN1_PKEY_ALIAS) == 0)))
-        goto err;
-
     if (pem_str) {
         ameth->pem_str = OPENSSL_strdup(pem_str);
         if (!ameth->pem_str)
diff --git a/crypto/asn1/charmap.h b/crypto/asn1/charmap.h
index bfccac2cb4e31..f15d72d73661d 100644
--- a/crypto/asn1/charmap.h
+++ b/crypto/asn1/charmap.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/asn1/charmap.pl
  *
- * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/asn1/charmap.pl b/crypto/asn1/charmap.pl
index fbab1f3b0ad7e..dadd8df7749df 100644
--- a/crypto/asn1/charmap.pl
+++ b/crypto/asn1/charmap.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/asn1/d2i_pu.c b/crypto/asn1/d2i_pu.c
index 9452e08a5874c..7bc16c7bceb40 100644
--- a/crypto/asn1/d2i_pu.c
+++ b/crypto/asn1/d2i_pu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -32,7 +32,7 @@ EVP_PKEY *d2i_PublicKey(int type, EVP_PKEY **a, const unsigned char **pp,
     } else
         ret = *a;
 
-    if (!EVP_PKEY_set_type(ret, type)) {
+    if (type != EVP_PKEY_id(ret) && !EVP_PKEY_set_type(ret, type)) {
         ASN1err(ASN1_F_D2I_PUBLICKEY, ERR_R_EVP_LIB);
         goto err;
     }
diff --git a/crypto/bio/b_addr.c b/crypto/bio/b_addr.c
index abec7bb8dbba4..4395ab7a0683e 100644
--- a/crypto/bio/b_addr.c
+++ b/crypto/bio/b_addr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -782,7 +782,12 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type,
                  * anyway [above getaddrinfo/gai_strerror is]. We just let
                  * system administrator figure this out...
                  */
+# if defined(OPENSSL_SYS_VXWORKS)
+                /* h_errno doesn't exist on VxWorks */
+                SYSerr(SYS_F_GETHOSTBYNAME, 1000 );
+# else
                 SYSerr(SYS_F_GETHOSTBYNAME, 1000 + h_errno);
+# endif
 #else
                 SYSerr(SYS_F_GETHOSTBYNAME, WSAGetLastError());
 #endif
diff --git a/crypto/bio/bss_file.c b/crypto/bio/bss_file.c
index 8de2391267afe..057344783d61b 100644
--- a/crypto/bio/bss_file.c
+++ b/crypto/bio/bss_file.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -253,9 +253,7 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr)
             }
 #  elif defined(OPENSSL_SYS_WIN32_CYGWIN)
             int fd = fileno((FILE *)ptr);
-            if (num & BIO_FP_TEXT)
-                setmode(fd, O_TEXT);
-            else
+            if (!(num & BIO_FP_TEXT))
                 setmode(fd, O_BINARY);
 #  endif
         }
@@ -279,11 +277,14 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr)
             ret = 0;
             break;
         }
-#  if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_WIN32_CYGWIN)
+#  if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS)
         if (!(num & BIO_FP_TEXT))
             OPENSSL_strlcat(p, "b", sizeof(p));
         else
             OPENSSL_strlcat(p, "t", sizeof(p));
+#  elif defined(OPENSSL_SYS_WIN32_CYGWIN)
+        if (!(num & BIO_FP_TEXT))
+            OPENSSL_strlcat(p, "b", sizeof(p));
 #  endif
         fp = openssl_fopen(ptr, p);
         if (fp == NULL) {
diff --git a/crypto/bio/bss_mem.c b/crypto/bio/bss_mem.c
index e0a97c3b43e18..10fcbf7a7c4f3 100644
--- a/crypto/bio/bss_mem.c
+++ b/crypto/bio/bss_mem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -20,7 +20,7 @@ static long mem_ctrl(BIO *h, int cmd, long arg1, void *arg2);
 static int mem_new(BIO *h);
 static int secmem_new(BIO *h);
 static int mem_free(BIO *data);
-static int mem_buf_free(BIO *data, int free_all);
+static int mem_buf_free(BIO *data);
 static int mem_buf_sync(BIO *h);
 
 static const BIO_METHOD mem_method = {
@@ -140,10 +140,20 @@ static int secmem_new(BIO *bi)
 
 static int mem_free(BIO *a)
 {
-    return mem_buf_free(a, 1);
+    BIO_BUF_MEM *bb;
+
+    if (a == NULL)
+        return 0;
+
+    bb = (BIO_BUF_MEM *)a->ptr;
+    if (!mem_buf_free(a))
+        return 0;
+    OPENSSL_free(bb->readp);
+    OPENSSL_free(bb);
+    return 1;
 }
 
-static int mem_buf_free(BIO *a, int free_all)
+static int mem_buf_free(BIO *a)
 {
     if (a == NULL)
         return 0;
@@ -155,11 +165,6 @@ static int mem_buf_free(BIO *a, int free_all)
         if (a->flags & BIO_FLAGS_MEM_RDONLY)
             b->data = NULL;
         BUF_MEM_free(b);
-        if (free_all) {
-            OPENSSL_free(bb->readp);
-            OPENSSL_free(bb);
-        }
-        a->ptr = NULL;
     }
     return 1;
 }
@@ -266,11 +271,10 @@ static long mem_ctrl(BIO *b, int cmd, long num, void *ptr)
         }
         break;
     case BIO_C_SET_BUF_MEM:
-        mem_buf_free(b, 0);
+        mem_buf_free(b);
         b->shutdown = (int)num;
         bbm->buf = ptr;
         *bbm->readp = *bbm->buf;
-        b->ptr = bbm;
         break;
     case BIO_C_GET_BUF_MEM_PTR:
         if (ptr != NULL) {
diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl
index 5d5af1b6be259..d8347bf932492 100755
--- a/crypto/bn/asm/armv8-mont.pl
+++ b/crypto/bn/asm/armv8-mont.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -287,6 +287,7 @@ __bn_sqr8x_mont:
 	cmp	$ap,$bp
 	b.ne	__bn_mul4x_mont
 .Lsqr8x_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1040,6 +1041,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
 ___
@@ -1063,6 +1065,7 @@ $code.=<<___;
 .type	__bn_mul4x_mont,%function
 .align	5
 __bn_mul4x_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1496,6 +1499,7 @@ __bn_mul4x_mont:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	__bn_mul4x_mont,.-__bn_mul4x_mont
 ___
diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S
index d235c45e2d632..0a26735c69792 100644
--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -3,7 +3,7 @@
 .ident	"ia64.S, Version 2.1"
 .ident	"IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>"
 
-// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
 //
 // Licensed under the OpenSSL license (the "License").  You may not use
 // this file except in compliance with the License.  You can obtain a copy
@@ -48,7 +48,7 @@
 // on Itanium2! What to do? Reschedule loops for Itanium2? But then
 // Itanium would exhibit anti-scalability. So I've chosen to reschedule
 // for worst latency for every instruction aiming for best *all-round*
-// performance.  
+// performance.
 
 // Q.	How much faster does it get?
 // A.	Here is the output from 'openssl speed rsa dsa' for vanilla
@@ -472,7 +472,7 @@ bn_mul_add_words:
 .global	bn_sqr_words#
 .proc	bn_sqr_words#
 .align	64
-.skip	32	// makes the loop body aligned at 64-byte boundary 
+.skip	32	// makes the loop body aligned at 64-byte boundary
 bn_sqr_words:
 	.prologue
 	.save	ar.pfs,r2
diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
index da35ec1b30cea..3875132bd25d4 100755
--- a/crypto/bn/asm/mips.pl
+++ b/crypto/bn/asm/mips.pl
@@ -798,6 +798,11 @@ $code.=<<___;
 	move	$a0,$v0
 .end	bn_sub_words_internal
 
+#if 0
+/*
+ * The bn_div_3_words entry point is re-used for constant-time interface.
+ * Implementation is retained as hystorical reference.
+ */
 .align 5
 .globl	bn_div_3_words
 .ent	bn_div_3_words
@@ -877,6 +882,7 @@ $code.=<<___;
 	jr	$ra
 	move	$a0,$v0
 .end	bn_div_3_words_internal
+#endif
 
 .align	5
 .globl	bn_div_words
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl
index f1292cc75cfb5..85cd73c668bdd 100755
--- a/crypto/bn/asm/rsaz-avx2.pl
+++ b/crypto/bn/asm/rsaz-avx2.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2013-2019 The OpenSSL Project Authors. All Rights Reserved.
 # Copyright (c) 2012, Intel Corporation. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
@@ -1492,6 +1492,7 @@ $code.=<<___;
 .type	rsaz_1024_red2norm_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_red2norm_avx2:
+.cfi_startproc
 	sub	\$-128,$inp	# size optimization
 	xor	%rax,%rax
 ___
@@ -1525,12 +1526,14 @@ ___
 }
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
 
 .globl	rsaz_1024_norm2red_avx2
 .type	rsaz_1024_norm2red_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_norm2red_avx2:
+.cfi_startproc
 	sub	\$-128,$out	# size optimization
 	mov	($inp),@T[0]
 	mov	\$0x1fffffff,%eax
@@ -1562,6 +1565,7 @@ $code.=<<___;
 	mov	@T[0],`8*($j+2)-128`($out)
 	mov	@T[0],`8*($j+3)-128`($out)
 	ret
+.cfi_endproc
 .size	rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
 ___
 }
@@ -1573,6 +1577,7 @@ $code.=<<___;
 .type	rsaz_1024_scatter5_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_scatter5_avx2:
+.cfi_startproc
 	vzeroupper
 	vmovdqu	.Lscatter_permd(%rip),%ymm5
 	shl	\$4,$power
@@ -1592,6 +1597,7 @@ rsaz_1024_scatter5_avx2:
 
 	vzeroupper
 	ret
+.cfi_endproc
 .size	rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
 
 .globl	rsaz_1024_gather5_avx2
diff --git a/crypto/bn/asm/sparcv8plus.S b/crypto/bn/asm/sparcv8plus.S
index fe4699b2bdd14..d520ffa7c2483 100644
--- a/crypto/bn/asm/sparcv8plus.S
+++ b/crypto/bn/asm/sparcv8plus.S
@@ -3,7 +3,7 @@
 
 /*
  * ====================================================================
- * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -154,9 +154,9 @@
   .register	%g2,#scratch
   .register	%g3,#scratch
 # define	FRAME_SIZE	-192
-#else 
+#else
 # define	FRAME_SIZE	-96
-#endif 
+#endif
 /*
  * GNU assembler can't stand stuw:-(
  */
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index ad6e8ada3ce75..f43e13d11643a 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -2910,6 +2910,7 @@ bn_powerx5:
 .align	32
 bn_sqrx8x_internal:
 __bn_sqrx8x_internal:
+.cfi_startproc
 	##################################################################
 	# Squaring part:
 	#
@@ -3542,6 +3543,7 @@ __bn_sqrx8x_reduction:
 	cmp	8+8(%rsp),%r8		# end of t[]?
 	jb	.Lsqrx8x_reduction_loop
 	ret
+.cfi_endproc
 .size	bn_sqrx8x_internal,.-bn_sqrx8x_internal
 ___
 }
diff --git a/crypto/bn/bn_ctx.c b/crypto/bn/bn_ctx.c
index aa08b31a34bb9..54b799961aa43 100644
--- a/crypto/bn/bn_ctx.c
+++ b/crypto/bn/bn_ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -227,6 +227,8 @@ BIGNUM *BN_CTX_get(BN_CTX *ctx)
     }
     /* OK, make sure the returned bignum is "zero" */
     BN_zero(ret);
+    /* clear BN_FLG_CONSTTIME if leaked from previous frames */
+    ret->flags &= (~BN_FLG_CONSTTIME);
     ctx->used++;
     CTXDBG_RET(ctx, ret);
     return ret;
@@ -256,7 +258,7 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx)
         unsigned int newsize =
             st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
         unsigned int *newitems;
-        
+
         if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) {
             BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE);
             return 0;
@@ -310,7 +312,7 @@ static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
     /* Full; allocate a new pool item and link it in. */
     if (p->used == p->size) {
         BN_POOL_ITEM *item;
-        
+
         if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
             BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE);
             return NULL;
diff --git a/crypto/bn/bn_depr.c b/crypto/bn/bn_depr.c
index 7d89214b1c16a..58bcf197a490e 100644
--- a/crypto/bn/bn_depr.c
+++ b/crypto/bn/bn_depr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -40,7 +40,7 @@ BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
         goto err;
 
     /* we have a prime :-) */
-    return ret;
+    return rnd;
  err:
     BN_free(rnd);
     return NULL;
diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c
index 70add10c7d6ce..3a6fa0a1b194b 100644
--- a/crypto/bn/bn_div.c
+++ b/crypto/bn/bn_div.c
@@ -7,6 +7,7 @@
  * https://www.openssl.org/source/license.html
  */
 
+#include <assert.h>
 #include <openssl/bn.h>
 #include "internal/cryptlib.h"
 #include "bn_lcl.h"
@@ -86,6 +87,77 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
 
 #else
 
+# if defined(BN_DIV3W)
+BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
+# elif 0
+/*
+ * This is #if-ed away, because it's a reference for assembly implementations,
+ * where it can and should be made constant-time. But if you want to test it,
+ * just replace 0 with 1.
+ */
+#  if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
+#   undef BN_ULLONG
+#   define BN_ULLONG __uint128_t
+#   define BN_LLONG
+#  endif
+
+#  ifdef BN_LLONG
+#   define BN_DIV3W
+/*
+ * Interface is somewhat quirky, |m| is pointer to most significant limb,
+ * and less significant limb is referred at |m[-1]|. This means that caller
+ * is responsible for ensuring that |m[-1]| is valid. Second condition that
+ * has to be met is that |d0|'s most significant bit has to be set. Or in
+ * other words divisor has to be "bit-aligned to the left." bn_div_fixed_top
+ * does all this. The subroutine considers four limbs, two of which are
+ * "overlapping," hence the name...
+ */
+static BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
+{
+    BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1];
+    BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1;
+    BN_ULONG Q = 0, mask;
+    int i;
+
+    for (i = 0; i < BN_BITS2; i++) {
+        Q <<= 1;
+        if (R >= D) {
+            Q |= 1;
+            R -= D;
+        }
+        D >>= 1;
+    }
+
+    mask = 0 - (Q >> (BN_BITS2 - 1));   /* does it overflow? */
+
+    Q <<= 1;
+    Q |= (R >= D);
+
+    return (Q | mask) & BN_MASK2;
+}
+#  endif
+# endif
+
+static int bn_left_align(BIGNUM *num)
+{
+    BN_ULONG *d = num->d, n, m, rmask;
+    int top = num->top;
+    int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
+
+    lshift = BN_BITS2 - rshift;
+    rshift %= BN_BITS2;            /* say no to undefined behaviour */
+    rmask = (BN_ULONG)0 - rshift;  /* rmask = 0 - (rshift != 0) */
+    rmask |= rmask >> 8;
+
+    for (i = 0, m = 0; i < top; i++) {
+        n = d[i];
+        d[i] = ((n << lshift) | m) & BN_MASK2;
+        m = (n >> rshift) & rmask;
+    }
+
+    return lshift;
+}
+
 # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
     && !defined(PEDANTIC) && !defined(BN_DIV3W)
 #  if defined(__GNUC__) && __GNUC__>=2
@@ -137,55 +209,73 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
 int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
            BN_CTX *ctx)
 {
-    int norm_shift, i, loop;
-    BIGNUM *tmp, wnum, *snum, *sdiv, *res;
-    BN_ULONG *resp, *wnump;
-    BN_ULONG d0, d1;
-    int num_n, div_n;
-    int no_branch = 0;
+    int ret;
+
+    if (BN_is_zero(divisor)) {
+        BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
+        return 0;
+    }
 
     /*
      * Invalid zero-padding would have particularly bad consequences so don't
      * just rely on bn_check_top() here (bn_check_top() works only for
      * BN_DEBUG builds)
      */
-    if ((num->top > 0 && num->d[num->top - 1] == 0) ||
-        (divisor->top > 0 && divisor->d[divisor->top - 1] == 0)) {
+    if (divisor->d[divisor->top - 1] == 0) {
         BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED);
         return 0;
     }
 
-    bn_check_top(num);
-    bn_check_top(divisor);
+    ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
 
-    if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0)
-        || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) {
-        no_branch = 1;
+    if (ret) {
+        if (dv != NULL)
+            bn_correct_top(dv);
+        if (rm != NULL)
+            bn_correct_top(rm);
     }
 
-    bn_check_top(dv);
-    bn_check_top(rm);
-    /*- bn_check_top(num); *//*
-     * 'num' has been checked already
-     */
-    /*- bn_check_top(divisor); *//*
-     * 'divisor' has been checked already
-     */
+    return ret;
+}
 
-    if (BN_is_zero(divisor)) {
-        BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
-        return 0;
-    }
+/*
+ * It's argued that *length* of *significant* part of divisor is public.
+ * Even if it's private modulus that is. Again, *length* is assumed
+ * public, but not *value*. Former is likely to be pre-defined by
+ * algorithm with bit granularity, though below subroutine is invariant
+ * of limb length. Thanks to this assumption we can require that |divisor|
+ * may not be zero-padded, yet claim this subroutine "constant-time"(*).
+ * This is because zero-padded dividend, |num|, is tolerated, so that
+ * caller can pass dividend of public length(*), but with smaller amount
+ * of significant limbs. This naturally means that quotient, |dv|, would
+ * contain correspongly less significant limbs as well, and will be zero-
+ * padded accordingly. Returned remainder, |rm|, will have same bit length
+ * as divisor, also zero-padded if needed. These actually leave sign bits
+ * in ambiguous state. In sense that we try to avoid negative zeros, while
+ * zero-padded zeros would retain sign.
+ *
+ * (*) "Constant-time-ness" has two pre-conditions:
+ *
+ *     - availability of constant-time bn_div_3_words;
+ *     - dividend is at least as "wide" as divisor, limb-wise, zero-padded
+ *       if so requied, which shouldn't be a privacy problem, because
+ *       divisor's length is considered public;
+ */
+int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
+                     const BIGNUM *divisor, BN_CTX *ctx)
+{
+    int norm_shift, i, j, loop;
+    BIGNUM *tmp, *snum, *sdiv, *res;
+    BN_ULONG *resp, *wnum, *wnumtop;
+    BN_ULONG d0, d1;
+    int num_n, div_n;
 
-    if (!no_branch && BN_ucmp(num, divisor) < 0) {
-        if (rm != NULL) {
-            if (BN_copy(rm, num) == NULL)
-                return 0;
-        }
-        if (dv != NULL)
-            BN_zero(dv);
-        return 1;
-    }
+    assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
+
+    bn_check_top(num);
+    bn_check_top(divisor);
+    bn_check_top(dv);
+    bn_check_top(rm);
 
     BN_CTX_start(ctx);
     res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
@@ -196,113 +286,72 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
         goto err;
 
     /* First we normalise the numbers */
-    norm_shift = BN_BITS2 - ((BN_num_bits(divisor)) % BN_BITS2);
-    if (!(BN_lshift(sdiv, divisor, norm_shift)))
+    if (!BN_copy(sdiv, divisor))
         goto err;
+    norm_shift = bn_left_align(sdiv);
     sdiv->neg = 0;
-    norm_shift += BN_BITS2;
-    if (!(BN_lshift(snum, num, norm_shift)))
+    /*
+     * Note that bn_lshift_fixed_top's output is always one limb longer
+     * than input, even when norm_shift is zero. This means that amount of
+     * inner loop iterations is invariant of dividend value, and that one
+     * doesn't need to compare dividend and divisor if they were originally
+     * of the same bit length.
+     */
+    if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
         goto err;
-    snum->neg = 0;
-
-    if (no_branch) {
-        /*
-         * Since we don't know whether snum is larger than sdiv, we pad snum
-         * with enough zeroes without changing its value.
-         */
-        if (snum->top <= sdiv->top + 1) {
-            if (bn_wexpand(snum, sdiv->top + 2) == NULL)
-                goto err;
-            for (i = snum->top; i < sdiv->top + 2; i++)
-                snum->d[i] = 0;
-            snum->top = sdiv->top + 2;
-        } else {
-            if (bn_wexpand(snum, snum->top + 1) == NULL)
-                goto err;
-            snum->d[snum->top] = 0;
-            snum->top++;
-        }
-    }
 
     div_n = sdiv->top;
     num_n = snum->top;
+
+    if (num_n <= div_n) {
+        /* caller didn't pad dividend -> no constant-time guarantee... */
+        if (bn_wexpand(snum, div_n + 1) == NULL)
+            goto err;
+        memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
+        snum->top = num_n = div_n + 1;
+    }
+
     loop = num_n - div_n;
     /*
      * Lets setup a 'window' into snum This is the part that corresponds to
      * the current 'area' being divided
      */
-    wnum.neg = 0;
-    wnum.d = &(snum->d[loop]);
-    wnum.top = div_n;
-    wnum.flags = BN_FLG_STATIC_DATA;
-    /*
-     * only needed when BN_ucmp messes up the values between top and max
-     */
-    wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
+    wnum = &(snum->d[loop]);
+    wnumtop = &(snum->d[num_n - 1]);
 
     /* Get the top 2 words of sdiv */
-    /* div_n=sdiv->top; */
     d0 = sdiv->d[div_n - 1];
     d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
 
-    /* pointer to the 'top' of snum */
-    wnump = &(snum->d[num_n - 1]);
-
-    /* Setup to 'res' */
-    if (!bn_wexpand(res, (loop + 1)))
+    /* Setup quotient */
+    if (!bn_wexpand(res, loop))
         goto err;
     res->neg = (num->neg ^ divisor->neg);
-    res->top = loop - no_branch;
-    resp = &(res->d[loop - 1]);
+    res->top = loop;
+    res->flags |= BN_FLG_FIXED_TOP;
+    resp = &(res->d[loop]);
 
     /* space for temp */
     if (!bn_wexpand(tmp, (div_n + 1)))
         goto err;
 
-    if (!no_branch) {
-        if (BN_ucmp(&wnum, sdiv) >= 0) {
-            /*
-             * If BN_DEBUG_RAND is defined BN_ucmp changes (via bn_pollute)
-             * the const bignum arguments => clean the values between top and
-             * max again
-             */
-            bn_clear_top2max(&wnum);
-            bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
-            *resp = 1;
-        } else
-            res->top--;
-    }
-
-    /* Increase the resp pointer so that we never create an invalid pointer. */
-    resp++;
-
-    /*
-     * if res->top == 0 then clear the neg value otherwise decrease the resp
-     * pointer
-     */
-    if (res->top == 0)
-        res->neg = 0;
-    else
-        resp--;
-
-    for (i = 0; i < loop - 1; i++, wnump--) {
+    for (i = 0; i < loop; i++, wnumtop--) {
         BN_ULONG q, l0;
         /*
          * the first part of the loop uses the top two words of snum and sdiv
          * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
          */
-# if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
-        BN_ULONG bn_div_3_words(BN_ULONG *, BN_ULONG, BN_ULONG);
-        q = bn_div_3_words(wnump, d1, d0);
+# if defined(BN_DIV3W)
+        q = bn_div_3_words(wnumtop, d1, d0);
 # else
         BN_ULONG n0, n1, rem = 0;
 
-        n0 = wnump[0];
-        n1 = wnump[-1];
+        n0 = wnumtop[0];
+        n1 = wnumtop[-1];
         if (n0 == d0)
             q = BN_MASK2;
         else {                  /* n0 < d0 */
-
+            BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
 #  ifdef BN_LLONG
             BN_ULLONG t2;
 
@@ -322,7 +371,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
             t2 = (BN_ULLONG) d1 *q;
 
             for (;;) {
-                if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | wnump[-2]))
+                if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2))
                     break;
                 q--;
                 rem += d0;
@@ -355,7 +404,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
 #   endif
 
             for (;;) {
-                if ((t2h < rem) || ((t2h == rem) && (t2l <= wnump[-2])))
+                if ((t2h < rem) || ((t2h == rem) && (t2l <= n2)))
                     break;
                 q--;
                 rem += d0;
@@ -371,43 +420,33 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
 
         l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
         tmp->d[div_n] = l0;
-        wnum.d--;
+        wnum--;
         /*
-         * ingore top values of the bignums just sub the two BN_ULONG arrays
+         * ignore top values of the bignums just sub the two BN_ULONG arrays
          * with bn_sub_words
          */
-        if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) {
-            /*
-             * Note: As we have considered only the leading two BN_ULONGs in
-             * the calculation of q, sdiv * q might be greater than wnum (but
-             * then (q-1) * sdiv is less or equal than wnum)
-             */
-            q--;
-            if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
-                /*
-                 * we can't have an overflow here (assuming that q != 0, but
-                 * if q == 0 then tmp is zero anyway)
-                 */
-                (*wnump)++;
-        }
-        /* store part of the result */
-        resp--;
-        *resp = q;
-    }
-    bn_correct_top(snum);
-    if (rm != NULL) {
+        l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
+        q -= l0;
         /*
-         * Keep a copy of the neg flag in num because if rm==num BN_rshift()
-         * will overwrite it.
+         * Note: As we have considered only the leading two BN_ULONGs in
+         * the calculation of q, sdiv * q might be greater than wnum (but
+         * then (q-1) * sdiv is less or equal than wnum)
          */
-        int neg = num->neg;
-        BN_rshift(rm, snum, norm_shift);
-        if (!BN_is_zero(rm))
-            rm->neg = neg;
-        bn_check_top(rm);
+        for (l0 = 0 - l0, j = 0; j < div_n; j++)
+            tmp->d[j] = sdiv->d[j] & l0;
+        l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
+        (*wnumtop) += l0;
+        assert((*wnumtop) == 0);
+
+        /* store part of the result */
+        *--resp = q;
     }
-    if (no_branch)
-        bn_correct_top(res);
+    /* snum holds remainder, it's as wide as divisor */
+    snum->neg = num->neg;
+    snum->top = div_n;
+    snum->flags |= BN_FLG_FIXED_TOP;
+    if (rm != NULL)
+        bn_rshift_fixed_top(rm, snum, norm_shift);
     BN_CTX_end(ctx);
     return 1;
  err:
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index c026ffcb339ce..88f2baf0e553c 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -648,34 +648,41 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
             goto err;
     }
 
+    if (a->neg || BN_ucmp(a, m) >= 0) {
+        BIGNUM *reduced = BN_CTX_get(ctx);
+        if (reduced == NULL
+            || !BN_nnmod(reduced, a, m, ctx)) {
+            goto err;
+        }
+        a = reduced;
+    }
+
 #ifdef RSAZ_ENABLED
-    if (!a->neg) {
-        /*
-         * If the size of the operands allow it, perform the optimized
-         * RSAZ exponentiation. For further information see
-         * crypto/bn/rsaz_exp.c and accompanying assembly modules.
-         */
-        if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
-            && rsaz_avx2_eligible()) {
-            if (NULL == bn_wexpand(rr, 16))
-                goto err;
-            RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
-                                   mont->n0[0]);
-            rr->top = 16;
-            rr->neg = 0;
-            bn_correct_top(rr);
-            ret = 1;
+    /*
+     * If the size of the operands allow it, perform the optimized
+     * RSAZ exponentiation. For further information see
+     * crypto/bn/rsaz_exp.c and accompanying assembly modules.
+     */
+    if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
+        && rsaz_avx2_eligible()) {
+        if (NULL == bn_wexpand(rr, 16))
             goto err;
-        } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
-            if (NULL == bn_wexpand(rr, 8))
-                goto err;
-            RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
-            rr->top = 8;
-            rr->neg = 0;
-            bn_correct_top(rr);
-            ret = 1;
+        RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
+                               mont->n0[0]);
+        rr->top = 16;
+        rr->neg = 0;
+        bn_correct_top(rr);
+        ret = 1;
+        goto err;
+    } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
+        if (NULL == bn_wexpand(rr, 8))
             goto err;
-        }
+        RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
+        rr->top = 8;
+        rr->neg = 0;
+        bn_correct_top(rr);
+        ret = 1;
+        goto err;
     }
 #endif
 
@@ -747,12 +754,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         goto err;
 
     /* prepare a^1 in Montgomery domain */
-    if (a->neg || BN_ucmp(a, m) >= 0) {
-        if (!BN_nnmod(&am, a, m, ctx))
-            goto err;
-        if (!bn_to_mont_fixed_top(&am, &am, mont, ctx))
-            goto err;
-    } else if (!bn_to_mont_fixed_top(&am, a, mont, ctx))
+    if (!bn_to_mont_fixed_top(&am, a, mont, ctx))
         goto err;
 
 #if defined(SPARC_T4_MONT)
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 80f910c807793..8286b3855a2cb 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -695,6 +695,9 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
     int i;
     BN_ULONG aa, bb;
 
+    if (n == 0)
+        return 0;
+
     aa = a[n - 1];
     bb = b[n - 1];
     if (aa != bb)
@@ -737,26 +740,25 @@ int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl)
     return bn_cmp_words(a, b, cl);
 }
 
-/*
+/*-
  * Constant-time conditional swap of a and b.
- * a and b are swapped if condition is not 0.  The code assumes that at most one bit of condition is set.
- * nwords is the number of words to swap.  The code assumes that at least nwords are allocated in both a and b,
- * and that no more than nwords are used by either a or b.
- * a and b cannot be the same number
+ * a and b are swapped if condition is not 0.
+ * nwords is the number of words to swap.
+ * Assumes that at least nwords are allocated in both a and b.
+ * Assumes that no more than nwords are used by either a or b.
  */
 void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
 {
     BN_ULONG t;
     int i;
 
+    if (a == b)
+        return;
+
     bn_wcheck_size(a, nwords);
     bn_wcheck_size(b, nwords);
 
-    assert(a != b);
-    assert((condition & (condition - 1)) == 0);
-    assert(sizeof(BN_ULONG) >= sizeof(int));
-
-    condition = ((condition - 1) >> (BN_BITS2 - 1)) - 1;
+    condition = ((~condition & ((condition - 1))) >> (BN_BITS2 - 1)) - 1;
 
     t = (a->top ^ b->top) & condition;
     a->top ^= t;
@@ -794,42 +796,16 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
     a->flags ^= t;
     b->flags ^= t;
 
-#define BN_CONSTTIME_SWAP(ind) \
-        do { \
-                t = (a->d[ind] ^ b->d[ind]) & condition; \
-                a->d[ind] ^= t; \
-                b->d[ind] ^= t; \
-        } while (0)
-
-    switch (nwords) {
-    default:
-        for (i = 10; i < nwords; i++)
-            BN_CONSTTIME_SWAP(i);
-        /* Fallthrough */
-    case 10:
-        BN_CONSTTIME_SWAP(9);   /* Fallthrough */
-    case 9:
-        BN_CONSTTIME_SWAP(8);   /* Fallthrough */
-    case 8:
-        BN_CONSTTIME_SWAP(7);   /* Fallthrough */
-    case 7:
-        BN_CONSTTIME_SWAP(6);   /* Fallthrough */
-    case 6:
-        BN_CONSTTIME_SWAP(5);   /* Fallthrough */
-    case 5:
-        BN_CONSTTIME_SWAP(4);   /* Fallthrough */
-    case 4:
-        BN_CONSTTIME_SWAP(3);   /* Fallthrough */
-    case 3:
-        BN_CONSTTIME_SWAP(2);   /* Fallthrough */
-    case 2:
-        BN_CONSTTIME_SWAP(1);   /* Fallthrough */
-    case 1:
-        BN_CONSTTIME_SWAP(0);
-    }
-#undef BN_CONSTTIME_SWAP
+    /* conditionally swap the data */
+    for (i = 0; i < nwords; i++) {
+        t = (a->d[i] ^ b->d[i]) & condition;
+        a->d[i] ^= t;
+        b->d[i] ^= t;
+    }
 }
 
+#undef BN_CONSTTIME_SWAP_FLAGS
+
 /* Bits of security, see SP800-57 */
 
 int BN_security_bits(int L, int N)
diff --git a/crypto/bn/bn_prime.h b/crypto/bn/bn_prime.h
index a64c9630f3b04..2eb7b52f76f9a 100644
--- a/crypto/bn/bn_prime.h
+++ b/crypto/bn/bn_prime.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/bn/bn_prime.pl
  *
- * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/bn/bn_prime.pl b/crypto/bn/bn_prime.pl
index eeca475b9366b..b0b16087429b2 100644
--- a/crypto/bn/bn_prime.pl
+++ b/crypto/bn/bn_prime.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/bn/bn_shift.c b/crypto/bn/bn_shift.c
index 15d4b321ba268..b7a1e0ff9ae3c 100644
--- a/crypto/bn/bn_shift.c
+++ b/crypto/bn/bn_shift.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -7,6 +7,7 @@
  * https://www.openssl.org/source/license.html
  */
 
+#include <assert.h>
 #include "internal/cryptlib.h"
 #include "bn_lcl.h"
 
@@ -82,40 +83,70 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
 
 int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
 {
-    int i, nw, lb, rb;
-    BN_ULONG *t, *f;
-    BN_ULONG l;
-
-    bn_check_top(r);
-    bn_check_top(a);
+    int ret;
 
     if (n < 0) {
         BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT);
         return 0;
     }
 
+    ret = bn_lshift_fixed_top(r, a, n);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+/*
+ * In respect to shift factor the execution time is invariant of
+ * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
+ * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being
+ * non-secret.
+ */
+int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, rmask = 0;
+
+    assert(n >= 0);
+
+    bn_check_top(r);
+    bn_check_top(a);
+
     nw = n / BN_BITS2;
     if (bn_wexpand(r, a->top + nw + 1) == NULL)
         return 0;
-    r->neg = a->neg;
-    lb = n % BN_BITS2;
-    rb = BN_BITS2 - lb;
-    f = a->d;
-    t = r->d;
-    t[a->top + nw] = 0;
-    if (lb == 0)
-        for (i = a->top - 1; i >= 0; i--)
-            t[nw + i] = f[i];
-    else
-        for (i = a->top - 1; i >= 0; i--) {
-            l = f[i];
-            t[nw + i + 1] |= (l >> rb) & BN_MASK2;
-            t[nw + i] = (l << lb) & BN_MASK2;
+
+    if (a->top != 0) {
+        lb = (unsigned int)n % BN_BITS2;
+        rb = BN_BITS2 - lb;
+        rb %= BN_BITS2;            /* say no to undefined behaviour */
+        rmask = (BN_ULONG)0 - rb;  /* rmask = 0 - (rb != 0) */
+        rmask |= rmask >> 8;
+        f = &(a->d[0]);
+        t = &(r->d[nw]);
+        l = f[a->top - 1];
+        t[a->top] = (l >> rb) & rmask;
+        for (i = a->top - 1; i > 0; i--) {
+            m = l << lb;
+            l = f[i - 1];
+            t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2;
         }
-    memset(t, 0, sizeof(*t) * nw);
+        t[0] = (l << lb) & BN_MASK2;
+    } else {
+        /* shouldn't happen, but formally required */
+        r->d[nw] = 0;
+    }
+    if (nw != 0)
+        memset(r->d, 0, sizeof(*t) * nw);
+
+    r->neg = a->neg;
     r->top = a->top + nw + 1;
-    bn_correct_top(r);
-    bn_check_top(r);
+    r->flags |= BN_FLG_FIXED_TOP;
+
     return 1;
 }
 
@@ -173,3 +204,54 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
     bn_check_top(r);
     return 1;
 }
+
+/*
+ * In respect to shift factor the execution time is invariant of
+ * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
+ * for constant-time-ness for sufficiently[!] zero-padded inputs is
+ * |n < BN_BITS2| or |n / BN_BITS2| being non-secret.
+ */
+int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, top, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, mask;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    assert(n >= 0);
+
+    nw = n / BN_BITS2;
+    if (nw >= a->top) {
+        /* shouldn't happen, but formally required */
+        BN_zero(r);
+        return 1;
+    }
+
+    rb = (unsigned int)n % BN_BITS2;
+    lb = BN_BITS2 - rb;
+    lb %= BN_BITS2;            /* say no to undefined behaviour */
+    mask = (BN_ULONG)0 - lb;   /* mask = 0 - (lb != 0) */
+    mask |= mask >> 8;
+    top = a->top - nw;
+    if (r != a && bn_wexpand(r, top) == NULL)
+        return 0;
+
+    t = &(r->d[0]);
+    f = &(a->d[nw]);
+    l = f[0];
+    for (i = 0; i < top - 1; i++) {
+        m = f[i + 1];
+        t[i] = (l >> rb) | ((m << lb) & mask);
+        l = m;
+    }
+    t[i] = l >> rb;
+
+    r->neg = a->neg;
+    r->top = top;
+    r->flags |= BN_FLG_FIXED_TOP;
+
+    return 1;
+}
diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl
index 4a838bc2b32e2..e90be6d0e5bdd 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -157,6 +157,7 @@ ChaCha20_ctr32:
 	b.ne	ChaCha20_neon
 
 .Lshort:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -276,6 +277,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	.inst	0xd50323bf			// autiasp
 .Labort:
 	ret
 
@@ -332,6 +334,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	ChaCha20_ctr32,.-ChaCha20_ctr32
 ___
@@ -377,6 +380,7 @@ $code.=<<___;
 .type	ChaCha20_neon,%function
 .align	5
 ChaCha20_neon:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -575,6 +579,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	.inst	0xd50323bf			// autiasp
 	ret
 
 .Ltail_neon:
@@ -684,6 +689,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	ChaCha20_neon,.-ChaCha20_neon
 ___
@@ -696,6 +702,7 @@ $code.=<<___;
 .type	ChaCha20_512_neon,%function
 .align	5
 ChaCha20_512_neon:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -1114,6 +1121,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	ChaCha20_512_neon,.-ChaCha20_512_neon
 ___
diff --git a/crypto/cms/cms_kari.c b/crypto/cms/cms_kari.c
index 3bc46febf6403..5e83814d0fcf6 100644
--- a/crypto/cms/cms_kari.c
+++ b/crypto/cms/cms_kari.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2013-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -282,7 +282,7 @@ static int cms_kari_create_ephemeral_key(CMS_KeyAgreeRecipientInfo *kari,
     return rv;
 }
 
-/* Initialise a ktri based on passed certificate and key */
+/* Initialise a kari based on passed certificate and key */
 
 int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip,
                                 EVP_PKEY *pk, unsigned int flags)
@@ -299,6 +299,9 @@ int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip,
     kari->version = 3;
 
     rek = M_ASN1_new_of(CMS_RecipientEncryptedKey);
+    if (rek == NULL)
+        return 0;
+
     if (!sk_CMS_RecipientEncryptedKey_push(kari->recipientEncryptedKeys, rek)) {
         M_ASN1_free_of(rek, CMS_RecipientEncryptedKey);
         return 0;
diff --git a/crypto/cms/cms_pwri.c b/crypto/cms/cms_pwri.c
index eac9c2fc862eb..26e3bdcf9e412 100644
--- a/crypto/cms/cms_pwri.c
+++ b/crypto/cms/cms_pwri.c
@@ -373,6 +373,7 @@ int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
             goto err;
         }
 
+        OPENSSL_clear_free(ec->key, ec->keylen);
         ec->key = key;
         ec->keylen = keylen;
 
diff --git a/crypto/conf/conf_def.c b/crypto/conf/conf_def.c
index 7f0d70ea695ec..8e3f42a0caca2 100644
--- a/crypto/conf/conf_def.c
+++ b/crypto/conf/conf_def.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -348,10 +348,15 @@ static int def_load_bio(CONF *conf, BIO *in, long *line)
                 psection = section;
             }
             p = eat_ws(conf, end);
-            if (strncmp(pname, ".include", 8) == 0 && p != pname + 8) {
+            if (strncmp(pname, ".include", 8) == 0
+                && (p != pname + 8 || *p == '=')) {
                 char *include = NULL;
                 BIO *next;
 
+                if (*p == '=') {
+                    p++;
+                    p = eat_ws(conf, p);
+                }
                 trim_ws(conf, p);
                 if (!str_copy(conf, psection, &include, p))
                     goto err;
diff --git a/crypto/conf/conf_def.h b/crypto/conf/conf_def.h
index 73e88baa8ba11..2016d31b89290 100644
--- a/crypto/conf/conf_def.h
+++ b/crypto/conf/conf_def.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/conf/keysets.pl
  *
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
diff --git a/crypto/conf/conf_lib.c b/crypto/conf/conf_lib.c
index 07110d8502a4e..2d40ac97ec275 100644
--- a/crypto/conf/conf_lib.c
+++ b/crypto/conf/conf_lib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -358,11 +358,36 @@ OPENSSL_INIT_SETTINGS *OPENSSL_INIT_new(void)
 
     if (ret != NULL)
         memset(ret, 0, sizeof(*ret));
+    ret->flags = DEFAULT_CONF_MFLAGS;
+
     return ret;
 }
 
 
 #ifndef OPENSSL_NO_STDIO
+int OPENSSL_INIT_set_config_filename(OPENSSL_INIT_SETTINGS *settings,
+                                     const char *filename)
+{
+    char *newfilename = NULL;
+
+    if (filename != NULL) {
+        newfilename = strdup(filename);
+        if (newfilename == NULL)
+            return 0;
+    }
+
+    free(settings->filename);
+    settings->filename = newfilename;
+
+    return 1;
+}
+
+void OPENSSL_INIT_set_config_file_flags(OPENSSL_INIT_SETTINGS *settings,
+                                        unsigned long flags)
+{
+    settings->flags = flags;
+}
+
 int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings,
                                     const char *appname)
 {
@@ -383,6 +408,7 @@ int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings,
 
 void OPENSSL_INIT_free(OPENSSL_INIT_SETTINGS *settings)
 {
+    free(settings->filename);
     free(settings->appname);
     free(settings);
 }
diff --git a/crypto/conf/conf_mod.c b/crypto/conf/conf_mod.c
index 51f262e774dd6..e703d97f5451f 100644
--- a/crypto/conf/conf_mod.c
+++ b/crypto/conf/conf_mod.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -142,6 +142,9 @@ int CONF_modules_load_file(const char *filename, const char *appname,
         OPENSSL_free(file);
     NCONF_free(conf);
 
+    if (flags & CONF_MFLAGS_IGNORE_RETURN_CODES)
+        return 1;
+
     return ret;
 }
 
diff --git a/crypto/conf/conf_sap.c b/crypto/conf/conf_sap.c
index 3d2e065e5b07c..2ce42f0c67408 100644
--- a/crypto/conf/conf_sap.c
+++ b/crypto/conf/conf_sap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -39,10 +39,24 @@ void OPENSSL_config(const char *appname)
 }
 #endif
 
-void openssl_config_int(const char *appname)
+int openssl_config_int(const OPENSSL_INIT_SETTINGS *settings)
 {
+    int ret;
+    const char *filename;
+    const char *appname;
+    unsigned long flags;
+
     if (openssl_configured)
-        return;
+        return 1;
+
+    filename = settings ? settings->filename : NULL;
+    appname = settings ? settings->appname : NULL;
+    flags = settings ? settings->flags : DEFAULT_CONF_MFLAGS;
+
+#ifdef OPENSSL_INIT_DEBUG
+    fprintf(stderr, "OPENSSL_INIT: openssl_config_int(%s, %s, %lu)\n",
+            filename, appname, flags);
+#endif
 
     OPENSSL_load_builtin_modules();
 #ifndef OPENSSL_NO_ENGINE
@@ -51,11 +65,10 @@ void openssl_config_int(const char *appname)
 #endif
     ERR_clear_error();
 #ifndef OPENSSL_SYS_UEFI
-    CONF_modules_load_file(NULL, appname,
-                               CONF_MFLAGS_DEFAULT_SECTION |
-                               CONF_MFLAGS_IGNORE_MISSING_FILE);
+    ret = CONF_modules_load_file(filename, appname, flags);
 #endif
     openssl_configured = 1;
+    return ret;
 }
 
 void openssl_no_config_int(void)
diff --git a/crypto/conf/conf_ssl.c b/crypto/conf/conf_ssl.c
index 024bdb4808e39..387f2cf46c31b 100644
--- a/crypto/conf/conf_ssl.c
+++ b/crypto/conf/conf_ssl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -78,6 +78,8 @@ static int ssl_module_init(CONF_IMODULE *md, const CONF *cnf)
     cnt = sk_CONF_VALUE_num(cmd_lists);
     ssl_module_free(md);
     ssl_names = OPENSSL_zalloc(sizeof(*ssl_names) * cnt);
+    if (ssl_names == NULL)
+        goto err;
     ssl_names_count = cnt;
     for (i = 0; i < ssl_names_count; i++) {
         struct ssl_conf_name_st *ssl_name = ssl_names + i;
diff --git a/crypto/conf/keysets.pl b/crypto/conf/keysets.pl
index cfa230ec3a1af..27a7214cc5192 100644
--- a/crypto/conf/keysets.pl
+++ b/crypto/conf/keysets.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c
index 1cd77c96d2f7f..7b761a3adced9 100644
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -352,9 +352,9 @@ void OPENSSL_showfatal(const char *fmta, ...)
 
     /*
     * TODO: (For non GUI and no std error cases)
-    * Add event logging feature here. 
+    * Add event logging feature here.
     */
-    
+
 #   if !defined(NDEBUG)
         /*
         * We are in a situation where we tried to report a critical
@@ -393,7 +393,7 @@ void OPENSSL_showfatal(const char *fmta, ...)
 #  endif
 # else
     MessageBox(NULL, buf, _T("OpenSSL: FATAL"), MB_OK | MB_ICONERROR);
-# endif     
+# endif
 }
 #else
 void OPENSSL_showfatal(const char *fmta, ...)
@@ -460,4 +460,14 @@ uint32_t OPENSSL_rdtsc(void)
 {
     return 0;
 }
+
+size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt)
+{
+    return 0;
+}
+
+size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max)
+{
+    return 0;
+}
 #endif
diff --git a/crypto/des/asm/des_enc.m4 b/crypto/des/asm/des_enc.m4
index 4a0d15620c00d..4ada97b175d99 100644
--- a/crypto/des/asm/des_enc.m4
+++ b/crypto/des/asm/des_enc.m4
@@ -1,4 +1,4 @@
-! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
 !
 ! Licensed under the OpenSSL license (the "License").  You may not use
 ! this file except in compliance with the License.  You can obtain a copy
@@ -313,7 +313,7 @@ $4:
 	sll	out1, 28, out1            ! rotate
 	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
 
-	ld	[global2+local2], local2  ! 2 
+	ld	[global2+local2], local2  ! 2
 	srl	out0, 24, local1          ! 7
 	or	out1, local0, out1        ! rotate
 
@@ -1392,7 +1392,7 @@ DES_ncbc_encrypt:
 	add	%o7,global1,global1
 	sub	global1,.PIC.DES_SPtrans-.des_and,out2
 
-	cmp	in5, 0                    ! enc   
+	cmp	in5, 0                    ! enc
 
 	be	.ncbc.dec
 	STPTR	in4, IVEC
diff --git a/crypto/dso/dso_dlfcn.c b/crypto/dso/dso_dlfcn.c
index ad8899c289a37..4240f5f5e30c8 100644
--- a/crypto/dso/dso_dlfcn.c
+++ b/crypto/dso/dso_dlfcn.c
@@ -17,6 +17,7 @@
 #endif
 
 #include "dso_locl.h"
+#include "e_os.h"
 
 #ifdef DSO_DLFCN
 
@@ -99,6 +100,7 @@ static int dlfcn_load(DSO *dso)
     /* See applicable comments in dso_dl.c */
     char *filename = DSO_convert_filename(dso, NULL);
     int flags = DLOPEN_FLAG;
+    int saveerrno = get_last_sys_error();
 
     if (filename == NULL) {
         DSOerr(DSO_F_DLFCN_LOAD, DSO_R_NO_FILENAME);
@@ -118,6 +120,11 @@ static int dlfcn_load(DSO *dso)
         ERR_add_error_data(4, "filename(", filename, "): ", dlerror());
         goto err;
     }
+    /*
+     * Some dlopen() implementations (e.g. solaris) do no preserve errno, even
+     * on a successful call.
+     */
+    set_sys_error(saveerrno);
     if (!sk_void_push(dso->meth_data, (char *)ptr)) {
         DSOerr(DSO_F_DLFCN_LOAD, DSO_R_STACK_ERROR);
         goto err;
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 1361cb395ffb2..887ddfb1ea9b1 100755
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -119,6 +119,7 @@ $code.=<<___;
 .type	ecp_nistz256_to_mont,%function
 .align	6
 ecp_nistz256_to_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -134,6 +135,7 @@ ecp_nistz256_to_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
 
@@ -142,6 +144,7 @@ ecp_nistz256_to_mont:
 .type	ecp_nistz256_from_mont,%function
 .align	4
 ecp_nistz256_from_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -157,6 +160,7 @@ ecp_nistz256_from_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
 
@@ -166,6 +170,7 @@ ecp_nistz256_from_mont:
 .type	ecp_nistz256_mul_mont,%function
 .align	4
 ecp_nistz256_mul_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -180,6 +185,7 @@ ecp_nistz256_mul_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
 
@@ -188,6 +194,7 @@ ecp_nistz256_mul_mont:
 .type	ecp_nistz256_sqr_mont,%function
 .align	4
 ecp_nistz256_sqr_mont:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-32]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -201,6 +208,7 @@ ecp_nistz256_sqr_mont:
 
 	ldp	x19,x20,[sp,#16]
 	ldp	x29,x30,[sp],#32
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
 
@@ -210,6 +218,7 @@ ecp_nistz256_sqr_mont:
 .type	ecp_nistz256_add,%function
 .align	4
 ecp_nistz256_add:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -223,6 +232,7 @@ ecp_nistz256_add:
 	bl	__ecp_nistz256_add
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_add,.-ecp_nistz256_add
 
@@ -231,6 +241,7 @@ ecp_nistz256_add:
 .type	ecp_nistz256_div_by_2,%function
 .align	4
 ecp_nistz256_div_by_2:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -242,6 +253,7 @@ ecp_nistz256_div_by_2:
 	bl	__ecp_nistz256_div_by_2
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		//  autiasp
 	ret
 .size	ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
 
@@ -250,6 +262,7 @@ ecp_nistz256_div_by_2:
 .type	ecp_nistz256_mul_by_2,%function
 .align	4
 ecp_nistz256_mul_by_2:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -265,6 +278,7 @@ ecp_nistz256_mul_by_2:
 	bl	__ecp_nistz256_add	// ret = a+a	// 2*a
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
 
@@ -273,6 +287,7 @@ ecp_nistz256_mul_by_2:
 .type	ecp_nistz256_mul_by_3,%function
 .align	4
 ecp_nistz256_mul_by_3:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -299,6 +314,7 @@ ecp_nistz256_mul_by_3:
 	bl	__ecp_nistz256_add	// ret += a	// 2*a+a=3*a
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
 
@@ -308,6 +324,7 @@ ecp_nistz256_mul_by_3:
 .type	ecp_nistz256_sub,%function
 .align	4
 ecp_nistz256_sub:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -319,6 +336,7 @@ ecp_nistz256_sub:
 	bl	__ecp_nistz256_sub_from
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_sub,.-ecp_nistz256_sub
 
@@ -327,6 +345,7 @@ ecp_nistz256_sub:
 .type	ecp_nistz256_neg,%function
 .align	4
 ecp_nistz256_neg:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -341,6 +360,7 @@ ecp_nistz256_neg:
 	bl	__ecp_nistz256_sub_from
 
 	ldp	x29,x30,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_neg,.-ecp_nistz256_neg
 
@@ -701,6 +721,7 @@ $code.=<<___;
 .type	ecp_nistz256_point_double,%function
 .align	5
 ecp_nistz256_point_double:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -835,6 +856,7 @@ ecp_nistz256_point_double:
 	ldp	x19,x20,[x29,#16]
 	ldp	x21,x22,[x29,#32]
 	ldp	x29,x30,[sp],#80
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_point_double,.-ecp_nistz256_point_double
 ___
@@ -857,6 +879,7 @@ $code.=<<___;
 .type	ecp_nistz256_point_add,%function
 .align	5
 ecp_nistz256_point_add:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1094,12 +1117,13 @@ $code.=<<___;
 	stp	$acc2,$acc3,[$rp_real,#$i+16]
 
 .Ladd_done:
-	add	sp,x29,#0	// destroy frame
+	add	sp,x29,#0		// destroy frame
 	ldp	x19,x20,[x29,#16]
 	ldp	x21,x22,[x29,#32]
 	ldp	x23,x24,[x29,#48]
 	ldp	x25,x26,[x29,#64]
 	ldp	x29,x30,[sp],#80
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_point_add,.-ecp_nistz256_point_add
 ___
@@ -1121,6 +1145,7 @@ $code.=<<___;
 .type	ecp_nistz256_point_add_affine,%function
 .align	5
 ecp_nistz256_point_add_affine:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1309,6 +1334,7 @@ $code.=<<___;
 	ldp	x23,x24,[x29,#48]
 	ldp	x25,x26,[x29,#64]
 	ldp	x29,x30,[sp],#80
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
 ___
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index eba6ffd430bef..87149e7f680df 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
 # Copyright (c) 2014, Intel Corporation. All Rights Reserved.
 # Copyright (c) 2015 CloudFlare, Inc.
 #
@@ -1674,6 +1674,7 @@ $code.=<<___;
 .type	__ecp_nistz256_mul_montq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_montq:
+.cfi_startproc
 	########################################################################
 	# Multiply a by b[0]
 	mov	%rax, $t1
@@ -1885,6 +1886,7 @@ __ecp_nistz256_mul_montq:
 	mov	$acc1, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
 
 ################################################################################
@@ -1968,6 +1970,7 @@ $code.=<<___;
 .type	__ecp_nistz256_sqr_montq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sqr_montq:
+.cfi_startproc
 	mov	%rax, $acc5
 	mulq	$acc6			# a[1]*a[0]
 	mov	%rax, $acc1
@@ -2125,6 +2128,7 @@ __ecp_nistz256_sqr_montq:
 	mov	$acc7, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
 ___
 
@@ -2133,6 +2137,7 @@ $code.=<<___;
 .type	__ecp_nistz256_mul_montx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_montx:
+.cfi_startproc
 	########################################################################
 	# Multiply by b[0]
 	mulx	$acc1, $acc0, $acc1
@@ -2295,11 +2300,13 @@ __ecp_nistz256_mul_montx:
 	mov	$acc1, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
 
 .type	__ecp_nistz256_sqr_montx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sqr_montx:
+.cfi_startproc
 	mulx	$acc6, $acc1, $acc2	# a[0]*a[1]
 	mulx	$acc7, $t0, $acc3	# a[0]*a[2]
 	xor	%eax, %eax
@@ -2423,6 +2430,7 @@ __ecp_nistz256_sqr_montx:
 	mov	$acc7, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
 ___
 }
@@ -2578,6 +2586,7 @@ ecp_nistz256_scatter_w5:
 .type	ecp_nistz256_gather_w5,\@abi-omnipotent
 .align	32
 ecp_nistz256_gather_w5:
+.cfi_startproc
 ___
 $code.=<<___	if ($avx>1);
 	mov	OPENSSL_ia32cap_P+8(%rip), %eax
@@ -2666,6 +2675,7 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_gather_w5:
 .size	ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5
 
@@ -2694,6 +2704,7 @@ ecp_nistz256_scatter_w7:
 .type	ecp_nistz256_gather_w7,\@abi-omnipotent
 .align	32
 ecp_nistz256_gather_w7:
+.cfi_startproc
 ___
 $code.=<<___	if ($avx>1);
 	mov	OPENSSL_ia32cap_P+8(%rip), %eax
@@ -2771,6 +2782,7 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_gather_w7:
 .size	ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7
 ___
@@ -2787,6 +2799,7 @@ $code.=<<___;
 .type	ecp_nistz256_avx2_gather_w5,\@abi-omnipotent
 .align	32
 ecp_nistz256_avx2_gather_w5:
+.cfi_startproc
 .Lavx2_gather_w5:
 	vzeroupper
 ___
@@ -2874,6 +2887,7 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_avx2_gather_w5:
 .size	ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5
 ___
@@ -2893,6 +2907,7 @@ $code.=<<___;
 .type	ecp_nistz256_avx2_gather_w7,\@abi-omnipotent
 .align	32
 ecp_nistz256_avx2_gather_w7:
+.cfi_startproc
 .Lavx2_gather_w7:
 	vzeroupper
 ___
@@ -2995,6 +3010,7 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .LSEH_end_ecp_nistz256_avx2_gather_w7:
 .size	ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7
 ___
@@ -3064,6 +3080,7 @@ $code.=<<___;
 .type	__ecp_nistz256_add_toq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_add_toq:
+.cfi_startproc
 	xor	$t4,$t4
 	add	8*0($b_ptr), $a0
 	adc	8*1($b_ptr), $a1
@@ -3091,11 +3108,13 @@ __ecp_nistz256_add_toq:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
 
 .type	__ecp_nistz256_sub_fromq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sub_fromq:
+.cfi_startproc
 	sub	8*0($b_ptr), $a0
 	sbb	8*1($b_ptr), $a1
 	 mov	$a0, $t0
@@ -3122,11 +3141,13 @@ __ecp_nistz256_sub_fromq:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
 
 .type	__ecp_nistz256_subq,\@abi-omnipotent
 .align	32
 __ecp_nistz256_subq:
+.cfi_startproc
 	sub	$a0, $t0
 	sbb	$a1, $t1
 	 mov	$t0, $a0
@@ -3149,11 +3170,13 @@ __ecp_nistz256_subq:
 	cmovnz	$t3, $a3
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_subq,.-__ecp_nistz256_subq
 
 .type	__ecp_nistz256_mul_by_2q,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_by_2q:
+.cfi_startproc
 	xor	$t4, $t4
 	add	$a0, $a0		# a0:a3+a0:a3
 	adc	$a1, $a1
@@ -3181,6 +3204,7 @@ __ecp_nistz256_mul_by_2q:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
 ___
 									}
@@ -3620,7 +3644,9 @@ $code.=<<___;
 	movq	%xmm1, $a_ptr			# restore $a_ptr
 	movq	%xmm0, $r_ptr			# restore $r_ptr
 	add	\$`32*(18-5)`, %rsp		# difference in frame sizes
+.cfi_adjust_cfa_offset	`-32*(18-5)`
 	jmp	.Lpoint_double_shortcut$x
+.cfi_adjust_cfa_offset	`32*(18-5)`
 
 .align	32
 .Ladd_proceed$x:
@@ -4156,6 +4182,7 @@ $code.=<<___;
 .type	__ecp_nistz256_add_tox,\@abi-omnipotent
 .align	32
 __ecp_nistz256_add_tox:
+.cfi_startproc
 	xor	$t4, $t4
 	adc	8*0($b_ptr), $a0
 	adc	8*1($b_ptr), $a1
@@ -4184,11 +4211,13 @@ __ecp_nistz256_add_tox:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
 
 .type	__ecp_nistz256_sub_fromx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_sub_fromx:
+.cfi_startproc
 	xor	$t4, $t4
 	sbb	8*0($b_ptr), $a0
 	sbb	8*1($b_ptr), $a1
@@ -4217,11 +4246,13 @@ __ecp_nistz256_sub_fromx:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
 
 .type	__ecp_nistz256_subx,\@abi-omnipotent
 .align	32
 __ecp_nistz256_subx:
+.cfi_startproc
 	xor	$t4, $t4
 	sbb	$a0, $t0
 	sbb	$a1, $t1
@@ -4246,11 +4277,13 @@ __ecp_nistz256_subx:
 	cmovc	$t3, $a3
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_subx,.-__ecp_nistz256_subx
 
 .type	__ecp_nistz256_mul_by_2x,\@abi-omnipotent
 .align	32
 __ecp_nistz256_mul_by_2x:
+.cfi_startproc
 	xor	$t4, $t4
 	adc	$a0, $a0		# a0:a3+a0:a3
 	adc	$a1, $a1
@@ -4279,6 +4312,7 @@ __ecp_nistz256_mul_by_2x:
 	mov	$a3, 8*3($r_ptr)
 
 	ret
+.cfi_endproc
 .size	__ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
 ___
 									}
diff --git a/crypto/ec/curve25519.c b/crypto/ec/curve25519.c
index abe9b9cbf6dd0..aa999cc5914e5 100644
--- a/crypto/ec/curve25519.c
+++ b/crypto/ec/curve25519.c
@@ -744,91 +744,99 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
 
 /*
  * Reference base 2^25.5 implementation.
- */
-/*
+ *
  * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
  * 20141124 (http://bench.cr.yp.to/supercop.html).
  *
  * The field functions are shared by Ed25519 and X25519 where possible.
  */
 
-/* fe means field element. Here the field is \Z/(2^255-19). An element t,
+/*
+ * fe means field element. Here the field is \Z/(2^255-19). An element t,
  * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
  * t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
- * context.  */
+ * context.
+ */
 typedef int32_t fe[10];
 
+static const int64_t kBottom21Bits =  0x1fffffLL;
 static const int64_t kBottom25Bits = 0x1ffffffLL;
 static const int64_t kBottom26Bits = 0x3ffffffLL;
 static const int64_t kTop39Bits = 0xfffffffffe000000LL;
 static const int64_t kTop38Bits = 0xfffffffffc000000LL;
 
-static uint64_t load_3(const uint8_t *in) {
-  uint64_t result;
-  result = (uint64_t)in[0];
-  result |= ((uint64_t)in[1]) << 8;
-  result |= ((uint64_t)in[2]) << 16;
-  return result;
+static uint64_t load_3(const uint8_t *in)
+{
+    uint64_t result;
+
+    result  = ((uint64_t)in[0]);
+    result |= ((uint64_t)in[1]) << 8;
+    result |= ((uint64_t)in[2]) << 16;
+    return result;
 }
 
-static uint64_t load_4(const uint8_t *in) {
-  uint64_t result;
-  result = (uint64_t)in[0];
-  result |= ((uint64_t)in[1]) << 8;
-  result |= ((uint64_t)in[2]) << 16;
-  result |= ((uint64_t)in[3]) << 24;
-  return result;
+static uint64_t load_4(const uint8_t *in)
+{
+    uint64_t result;
+
+    result  = ((uint64_t)in[0]);
+    result |= ((uint64_t)in[1]) << 8;
+    result |= ((uint64_t)in[2]) << 16;
+    result |= ((uint64_t)in[3]) << 24;
+    return result;
 }
 
-static void fe_frombytes(fe h, const uint8_t *s) {
-  /* Ignores top bit of h. */
-  int64_t h0 = load_4(s);
-  int64_t h1 = load_3(s + 4) << 6;
-  int64_t h2 = load_3(s + 7) << 5;
-  int64_t h3 = load_3(s + 10) << 3;
-  int64_t h4 = load_3(s + 13) << 2;
-  int64_t h5 = load_4(s + 16);
-  int64_t h6 = load_3(s + 20) << 7;
-  int64_t h7 = load_3(s + 23) << 5;
-  int64_t h8 = load_3(s + 26) << 4;
-  int64_t h9 = (load_3(s + 29) & 8388607) << 2;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-
-  carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
-  carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
-  carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
-  carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
-  carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
-  carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
-
-  h[0] = (int32_t)h0;
-  h[1] = (int32_t)h1;
-  h[2] = (int32_t)h2;
-  h[3] = (int32_t)h3;
-  h[4] = (int32_t)h4;
-  h[5] = (int32_t)h5;
-  h[6] = (int32_t)h6;
-  h[7] = (int32_t)h7;
-  h[8] = (int32_t)h8;
-  h[9] = (int32_t)h9;
+static void fe_frombytes(fe h, const uint8_t *s)
+{
+    /* Ignores top bit of h. */
+    int64_t h0 =  load_4(s);
+    int64_t h1 =  load_3(s +  4) << 6;
+    int64_t h2 =  load_3(s +  7) << 5;
+    int64_t h3 =  load_3(s + 10) << 3;
+    int64_t h4 =  load_3(s + 13) << 2;
+    int64_t h5 =  load_4(s + 16);
+    int64_t h6 =  load_3(s + 20) << 7;
+    int64_t h7 =  load_3(s + 23) << 5;
+    int64_t h8 =  load_3(s + 26) << 4;
+    int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+
+    carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+    carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+    carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+    carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+    carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+    carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+    h[0] = (int32_t)h0;
+    h[1] = (int32_t)h1;
+    h[2] = (int32_t)h2;
+    h[3] = (int32_t)h3;
+    h[4] = (int32_t)h4;
+    h[5] = (int32_t)h5;
+    h[6] = (int32_t)h6;
+    h[7] = (int32_t)h7;
+    h[8] = (int32_t)h8;
+    h[9] = (int32_t)h9;
 }
 
-/* Preconditions:
- *  |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+/*
+ * Preconditions:
+ *   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  *
  * Write p=2^255-19; q=floor(h/p).
  * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
@@ -848,102 +856,112 @@ static void fe_frombytes(fe h, const uint8_t *s) {
  *   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
  *
  *   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
- *   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. */
-static void fe_tobytes(uint8_t *s, const fe h) {
-  int32_t h0 = h[0];
-  int32_t h1 = h[1];
-  int32_t h2 = h[2];
-  int32_t h3 = h[3];
-  int32_t h4 = h[4];
-  int32_t h5 = h[5];
-  int32_t h6 = h[6];
-  int32_t h7 = h[7];
-  int32_t h8 = h[8];
-  int32_t h9 = h[9];
-  int32_t q;
-
-  q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
-  q = (h0 + q) >> 26;
-  q = (h1 + q) >> 25;
-  q = (h2 + q) >> 26;
-  q = (h3 + q) >> 25;
-  q = (h4 + q) >> 26;
-  q = (h5 + q) >> 25;
-  q = (h6 + q) >> 26;
-  q = (h7 + q) >> 25;
-  q = (h8 + q) >> 26;
-  q = (h9 + q) >> 25;
-
-  /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
-  h0 += 19 * q;
-  /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
-
-  h1 += h0 >> 26; h0 &= kBottom26Bits;
-  h2 += h1 >> 25; h1 &= kBottom25Bits;
-  h3 += h2 >> 26; h2 &= kBottom26Bits;
-  h4 += h3 >> 25; h3 &= kBottom25Bits;
-  h5 += h4 >> 26; h4 &= kBottom26Bits;
-  h6 += h5 >> 25; h5 &= kBottom25Bits;
-  h7 += h6 >> 26; h6 &= kBottom26Bits;
-  h8 += h7 >> 25; h7 &= kBottom25Bits;
-  h9 += h8 >> 26; h8 &= kBottom26Bits;
-                  h9 &= kBottom25Bits;
-                  /* h10 = carry9 */
-
-  /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-   * Have h0+...+2^230 h9 between 0 and 2^255-1;
-   * evidently 2^255 h10-2^255 q = 0.
-   * Goal: Output h0+...+2^230 h9.  */
-
-  s[0] = (uint8_t)(h0 >> 0);
-  s[1] = (uint8_t)(h0 >> 8);
-  s[2] = (uint8_t)(h0 >> 16);
-  s[3] = (uint8_t)((h0 >> 24) | ((uint32_t)(h1) << 2));
-  s[4] = (uint8_t)(h1 >> 6);
-  s[5] = (uint8_t)(h1 >> 14);
-  s[6] = (uint8_t)((h1 >> 22) | ((uint32_t)(h2) << 3));
-  s[7] = (uint8_t)(h2 >> 5);
-  s[8] = (uint8_t)(h2 >> 13);
-  s[9] = (uint8_t)((h2 >> 21) | ((uint32_t)(h3) << 5));
-  s[10] = (uint8_t)(h3 >> 3);
-  s[11] = (uint8_t)(h3 >> 11);
-  s[12] = (uint8_t)((h3 >> 19) | ((uint32_t)(h4) << 6));
-  s[13] = (uint8_t)(h4 >> 2);
-  s[14] = (uint8_t)(h4 >> 10);
-  s[15] = (uint8_t)(h4 >> 18);
-  s[16] = (uint8_t)(h5 >> 0);
-  s[17] = (uint8_t)(h5 >> 8);
-  s[18] = (uint8_t)(h5 >> 16);
-  s[19] = (uint8_t)((h5 >> 24) | ((uint32_t)(h6) << 1));
-  s[20] = (uint8_t)(h6 >> 7);
-  s[21] = (uint8_t)(h6 >> 15);
-  s[22] = (uint8_t)((h6 >> 23) | ((uint32_t)(h7) << 3));
-  s[23] = (uint8_t)(h7 >> 5);
-  s[24] = (uint8_t)(h7 >> 13);
-  s[25] = (uint8_t)((h7 >> 21) | ((uint32_t)(h8) << 4));
-  s[26] = (uint8_t)(h8 >> 4);
-  s[27] = (uint8_t)(h8 >> 12);
-  s[28] = (uint8_t)((h8 >> 20) | ((uint32_t)(h9) << 6));
-  s[29] = (uint8_t)(h9 >> 2);
-  s[30] = (uint8_t)(h9 >> 10);
-  s[31] = (uint8_t)(h9 >> 18);
+ *   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+ */
+static void fe_tobytes(uint8_t *s, const fe h)
+{
+    int32_t h0 = h[0];
+    int32_t h1 = h[1];
+    int32_t h2 = h[2];
+    int32_t h3 = h[3];
+    int32_t h4 = h[4];
+    int32_t h5 = h[5];
+    int32_t h6 = h[6];
+    int32_t h7 = h[7];
+    int32_t h8 = h[8];
+    int32_t h9 = h[9];
+    int32_t q;
+
+    q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
+    q = (h0 + q) >> 26;
+    q = (h1 + q) >> 25;
+    q = (h2 + q) >> 26;
+    q = (h3 + q) >> 25;
+    q = (h4 + q) >> 26;
+    q = (h5 + q) >> 25;
+    q = (h6 + q) >> 26;
+    q = (h7 + q) >> 25;
+    q = (h8 + q) >> 26;
+    q = (h9 + q) >> 25;
+
+    /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
+    h0 += 19 * q;
+    /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
+
+    h1 += h0 >> 26; h0 &= kBottom26Bits;
+    h2 += h1 >> 25; h1 &= kBottom25Bits;
+    h3 += h2 >> 26; h2 &= kBottom26Bits;
+    h4 += h3 >> 25; h3 &= kBottom25Bits;
+    h5 += h4 >> 26; h4 &= kBottom26Bits;
+    h6 += h5 >> 25; h5 &= kBottom25Bits;
+    h7 += h6 >> 26; h6 &= kBottom26Bits;
+    h8 += h7 >> 25; h7 &= kBottom25Bits;
+    h9 += h8 >> 26; h8 &= kBottom26Bits;
+                    h9 &= kBottom25Bits;
+    /* h10 = carry9 */
+
+    /*
+     * Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+     * Have h0+...+2^230 h9 between 0 and 2^255-1;
+     * evidently 2^255 h10-2^255 q = 0.
+     * Goal: Output h0+...+2^230 h9.
+     */
+    s[ 0] = (uint8_t) (h0 >>  0);
+    s[ 1] = (uint8_t) (h0 >>  8);
+    s[ 2] = (uint8_t) (h0 >> 16);
+    s[ 3] = (uint8_t)((h0 >> 24) | ((uint32_t)(h1) << 2));
+    s[ 4] = (uint8_t) (h1 >>  6);
+    s[ 5] = (uint8_t) (h1 >> 14);
+    s[ 6] = (uint8_t)((h1 >> 22) | ((uint32_t)(h2) << 3));
+    s[ 7] = (uint8_t) (h2 >>  5);
+    s[ 8] = (uint8_t) (h2 >> 13);
+    s[ 9] = (uint8_t)((h2 >> 21) | ((uint32_t)(h3) << 5));
+    s[10] = (uint8_t) (h3 >>  3);
+    s[11] = (uint8_t) (h3 >> 11);
+    s[12] = (uint8_t)((h3 >> 19) | ((uint32_t)(h4) << 6));
+    s[13] = (uint8_t) (h4 >>  2);
+    s[14] = (uint8_t) (h4 >> 10);
+    s[15] = (uint8_t) (h4 >> 18);
+    s[16] = (uint8_t) (h5 >>  0);
+    s[17] = (uint8_t) (h5 >>  8);
+    s[18] = (uint8_t) (h5 >> 16);
+    s[19] = (uint8_t)((h5 >> 24) | ((uint32_t)(h6) << 1));
+    s[20] = (uint8_t) (h6 >>  7);
+    s[21] = (uint8_t) (h6 >> 15);
+    s[22] = (uint8_t)((h6 >> 23) | ((uint32_t)(h7) << 3));
+    s[23] = (uint8_t) (h7 >>  5);
+    s[24] = (uint8_t) (h7 >> 13);
+    s[25] = (uint8_t)((h7 >> 21) | ((uint32_t)(h8) << 4));
+    s[26] = (uint8_t) (h8 >>  4);
+    s[27] = (uint8_t) (h8 >> 12);
+    s[28] = (uint8_t)((h8 >> 20) | ((uint32_t)(h9) << 6));
+    s[29] = (uint8_t) (h9 >>  2);
+    s[30] = (uint8_t) (h9 >> 10);
+    s[31] = (uint8_t) (h9 >> 18);
 }
 
 /* h = f */
-static void fe_copy(fe h, const fe f) {
-  memmove(h, f, sizeof(int32_t) * 10);
+static void fe_copy(fe h, const fe f)
+{
+    memmove(h, f, sizeof(int32_t) * 10);
 }
 
 /* h = 0 */
-static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); }
+static void fe_0(fe h)
+{
+    memset(h, 0, sizeof(int32_t) * 10);
+}
 
 /* h = 1 */
-static void fe_1(fe h) {
-  memset(h, 0, sizeof(int32_t) * 10);
-  h[0] = 1;
+static void fe_1(fe h)
+{
+    memset(h, 0, sizeof(int32_t) * 10);
+    h[0] = 1;
 }
 
-/* h = f + g
+/*
+ * h = f + g
+ *
  * Can overlap h with f or g.
  *
  * Preconditions:
@@ -951,15 +969,20 @@ static void fe_1(fe h) {
  *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  *
  * Postconditions:
- *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
-static void fe_add(fe h, const fe f, const fe g) {
-  unsigned i;
-  for (i = 0; i < 10; i++) {
-    h[i] = f[i] + g[i];
-  }
+ *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ */
+static void fe_add(fe h, const fe f, const fe g)
+{
+    unsigned i;
+
+    for (i = 0; i < 10; i++) {
+        h[i] = f[i] + g[i];
+    }
 }
 
-/* h = f - g
+/*
+ * h = f - g
+ *
  * Can overlap h with f or g.
  *
  * Preconditions:
@@ -967,15 +990,20 @@ static void fe_add(fe h, const fe f, const fe g) {
  *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  *
  * Postconditions:
- *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
-static void fe_sub(fe h, const fe f, const fe g) {
-  unsigned i;
-  for (i = 0; i < 10; i++) {
-    h[i] = f[i] - g[i];
-  }
+ *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ */
+static void fe_sub(fe h, const fe f, const fe g)
+{
+    unsigned i;
+
+    for (i = 0; i < 10; i++) {
+        h[i] = f[i] - g[i];
+    }
 }
 
-/* h = f * g
+/*
+ * h = f * g
+ *
  * Can overlap h with f or g.
  *
  * Preconditions:
@@ -1001,224 +1029,228 @@ static void fe_sub(fe h, const fe f, const fe g) {
  * 10 of them are 2-way parallelizable and vectorizable.
  * Can get away with 11 carries, but then data flow is much deeper.
  *
- * With tighter constraints on inputs can squeeze carries into int32. */
-static void fe_mul(fe h, const fe f, const fe g) {
-  int32_t f0 = f[0];
-  int32_t f1 = f[1];
-  int32_t f2 = f[2];
-  int32_t f3 = f[3];
-  int32_t f4 = f[4];
-  int32_t f5 = f[5];
-  int32_t f6 = f[6];
-  int32_t f7 = f[7];
-  int32_t f8 = f[8];
-  int32_t f9 = f[9];
-  int32_t g0 = g[0];
-  int32_t g1 = g[1];
-  int32_t g2 = g[2];
-  int32_t g3 = g[3];
-  int32_t g4 = g[4];
-  int32_t g5 = g[5];
-  int32_t g6 = g[6];
-  int32_t g7 = g[7];
-  int32_t g8 = g[8];
-  int32_t g9 = g[9];
-  int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
-  int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
-  int32_t g3_19 = 19 * g3;
-  int32_t g4_19 = 19 * g4;
-  int32_t g5_19 = 19 * g5;
-  int32_t g6_19 = 19 * g6;
-  int32_t g7_19 = 19 * g7;
-  int32_t g8_19 = 19 * g8;
-  int32_t g9_19 = 19 * g9;
-  int32_t f1_2 = 2 * f1;
-  int32_t f3_2 = 2 * f3;
-  int32_t f5_2 = 2 * f5;
-  int32_t f7_2 = 2 * f7;
-  int32_t f9_2 = 2 * f9;
-  int64_t f0g0    = f0   * (int64_t) g0;
-  int64_t f0g1    = f0   * (int64_t) g1;
-  int64_t f0g2    = f0   * (int64_t) g2;
-  int64_t f0g3    = f0   * (int64_t) g3;
-  int64_t f0g4    = f0   * (int64_t) g4;
-  int64_t f0g5    = f0   * (int64_t) g5;
-  int64_t f0g6    = f0   * (int64_t) g6;
-  int64_t f0g7    = f0   * (int64_t) g7;
-  int64_t f0g8    = f0   * (int64_t) g8;
-  int64_t f0g9    = f0   * (int64_t) g9;
-  int64_t f1g0    = f1   * (int64_t) g0;
-  int64_t f1g1_2  = f1_2 * (int64_t) g1;
-  int64_t f1g2    = f1   * (int64_t) g2;
-  int64_t f1g3_2  = f1_2 * (int64_t) g3;
-  int64_t f1g4    = f1   * (int64_t) g4;
-  int64_t f1g5_2  = f1_2 * (int64_t) g5;
-  int64_t f1g6    = f1   * (int64_t) g6;
-  int64_t f1g7_2  = f1_2 * (int64_t) g7;
-  int64_t f1g8    = f1   * (int64_t) g8;
-  int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
-  int64_t f2g0    = f2   * (int64_t) g0;
-  int64_t f2g1    = f2   * (int64_t) g1;
-  int64_t f2g2    = f2   * (int64_t) g2;
-  int64_t f2g3    = f2   * (int64_t) g3;
-  int64_t f2g4    = f2   * (int64_t) g4;
-  int64_t f2g5    = f2   * (int64_t) g5;
-  int64_t f2g6    = f2   * (int64_t) g6;
-  int64_t f2g7    = f2   * (int64_t) g7;
-  int64_t f2g8_19 = f2   * (int64_t) g8_19;
-  int64_t f2g9_19 = f2   * (int64_t) g9_19;
-  int64_t f3g0    = f3   * (int64_t) g0;
-  int64_t f3g1_2  = f3_2 * (int64_t) g1;
-  int64_t f3g2    = f3   * (int64_t) g2;
-  int64_t f3g3_2  = f3_2 * (int64_t) g3;
-  int64_t f3g4    = f3   * (int64_t) g4;
-  int64_t f3g5_2  = f3_2 * (int64_t) g5;
-  int64_t f3g6    = f3   * (int64_t) g6;
-  int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
-  int64_t f3g8_19 = f3   * (int64_t) g8_19;
-  int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
-  int64_t f4g0    = f4   * (int64_t) g0;
-  int64_t f4g1    = f4   * (int64_t) g1;
-  int64_t f4g2    = f4   * (int64_t) g2;
-  int64_t f4g3    = f4   * (int64_t) g3;
-  int64_t f4g4    = f4   * (int64_t) g4;
-  int64_t f4g5    = f4   * (int64_t) g5;
-  int64_t f4g6_19 = f4   * (int64_t) g6_19;
-  int64_t f4g7_19 = f4   * (int64_t) g7_19;
-  int64_t f4g8_19 = f4   * (int64_t) g8_19;
-  int64_t f4g9_19 = f4   * (int64_t) g9_19;
-  int64_t f5g0    = f5   * (int64_t) g0;
-  int64_t f5g1_2  = f5_2 * (int64_t) g1;
-  int64_t f5g2    = f5   * (int64_t) g2;
-  int64_t f5g3_2  = f5_2 * (int64_t) g3;
-  int64_t f5g4    = f5   * (int64_t) g4;
-  int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
-  int64_t f5g6_19 = f5   * (int64_t) g6_19;
-  int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
-  int64_t f5g8_19 = f5   * (int64_t) g8_19;
-  int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
-  int64_t f6g0    = f6   * (int64_t) g0;
-  int64_t f6g1    = f6   * (int64_t) g1;
-  int64_t f6g2    = f6   * (int64_t) g2;
-  int64_t f6g3    = f6   * (int64_t) g3;
-  int64_t f6g4_19 = f6   * (int64_t) g4_19;
-  int64_t f6g5_19 = f6   * (int64_t) g5_19;
-  int64_t f6g6_19 = f6   * (int64_t) g6_19;
-  int64_t f6g7_19 = f6   * (int64_t) g7_19;
-  int64_t f6g8_19 = f6   * (int64_t) g8_19;
-  int64_t f6g9_19 = f6   * (int64_t) g9_19;
-  int64_t f7g0    = f7   * (int64_t) g0;
-  int64_t f7g1_2  = f7_2 * (int64_t) g1;
-  int64_t f7g2    = f7   * (int64_t) g2;
-  int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
-  int64_t f7g4_19 = f7   * (int64_t) g4_19;
-  int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
-  int64_t f7g6_19 = f7   * (int64_t) g6_19;
-  int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
-  int64_t f7g8_19 = f7   * (int64_t) g8_19;
-  int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
-  int64_t f8g0    = f8   * (int64_t) g0;
-  int64_t f8g1    = f8   * (int64_t) g1;
-  int64_t f8g2_19 = f8   * (int64_t) g2_19;
-  int64_t f8g3_19 = f8   * (int64_t) g3_19;
-  int64_t f8g4_19 = f8   * (int64_t) g4_19;
-  int64_t f8g5_19 = f8   * (int64_t) g5_19;
-  int64_t f8g6_19 = f8   * (int64_t) g6_19;
-  int64_t f8g7_19 = f8   * (int64_t) g7_19;
-  int64_t f8g8_19 = f8   * (int64_t) g8_19;
-  int64_t f8g9_19 = f8   * (int64_t) g9_19;
-  int64_t f9g0    = f9   * (int64_t) g0;
-  int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
-  int64_t f9g2_19 = f9   * (int64_t) g2_19;
-  int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
-  int64_t f9g4_19 = f9   * (int64_t) g4_19;
-  int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
-  int64_t f9g6_19 = f9   * (int64_t) g6_19;
-  int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
-  int64_t f9g8_19 = f9   * (int64_t) g8_19;
-  int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
-  int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
-  int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
-  int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
-  int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
-  int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
-  int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
-  int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
-  int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
-  int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
-  int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-
-  /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
-   *   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
-   * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
-   *   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  /* |h0| <= 2^25 */
-  /* |h4| <= 2^25 */
-  /* |h1| <= 1.71*2^59 */
-  /* |h5| <= 1.71*2^59 */
-
-  carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
-  carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
-  /* |h1| <= 2^24; from now on fits into int32 */
-  /* |h5| <= 2^24; from now on fits into int32 */
-  /* |h2| <= 1.41*2^60 */
-  /* |h6| <= 1.41*2^60 */
-
-  carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
-  carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
-  /* |h2| <= 2^25; from now on fits into int32 unchanged */
-  /* |h6| <= 2^25; from now on fits into int32 unchanged */
-  /* |h3| <= 1.71*2^59 */
-  /* |h7| <= 1.71*2^59 */
-
-  carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
-  carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
-  /* |h3| <= 2^24; from now on fits into int32 unchanged */
-  /* |h7| <= 2^24; from now on fits into int32 unchanged */
-  /* |h4| <= 1.72*2^34 */
-  /* |h8| <= 1.41*2^60 */
-
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
-  /* |h4| <= 2^25; from now on fits into int32 unchanged */
-  /* |h8| <= 2^25; from now on fits into int32 unchanged */
-  /* |h5| <= 1.01*2^24 */
-  /* |h9| <= 1.71*2^59 */
-
-  carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
-  /* |h9| <= 2^24; from now on fits into int32 unchanged */
-  /* |h0| <= 1.1*2^39 */
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  /* |h0| <= 2^25; from now on fits into int32 unchanged */
-  /* |h1| <= 1.01*2^24 */
-
-  h[0] = (int32_t)h0;
-  h[1] = (int32_t)h1;
-  h[2] = (int32_t)h2;
-  h[3] = (int32_t)h3;
-  h[4] = (int32_t)h4;
-  h[5] = (int32_t)h5;
-  h[6] = (int32_t)h6;
-  h[7] = (int32_t)h7;
-  h[8] = (int32_t)h8;
-  h[9] = (int32_t)h9;
+ * With tighter constraints on inputs can squeeze carries into int32.
+ */
+static void fe_mul(fe h, const fe f, const fe g)
+{
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t g0 = g[0];
+    int32_t g1 = g[1];
+    int32_t g2 = g[2];
+    int32_t g3 = g[3];
+    int32_t g4 = g[4];
+    int32_t g5 = g[5];
+    int32_t g6 = g[6];
+    int32_t g7 = g[7];
+    int32_t g8 = g[8];
+    int32_t g9 = g[9];
+    int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
+    int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
+    int32_t g3_19 = 19 * g3;
+    int32_t g4_19 = 19 * g4;
+    int32_t g5_19 = 19 * g5;
+    int32_t g6_19 = 19 * g6;
+    int32_t g7_19 = 19 * g7;
+    int32_t g8_19 = 19 * g8;
+    int32_t g9_19 = 19 * g9;
+    int32_t f1_2 = 2 * f1;
+    int32_t f3_2 = 2 * f3;
+    int32_t f5_2 = 2 * f5;
+    int32_t f7_2 = 2 * f7;
+    int32_t f9_2 = 2 * f9;
+    int64_t f0g0    = f0   * (int64_t) g0;
+    int64_t f0g1    = f0   * (int64_t) g1;
+    int64_t f0g2    = f0   * (int64_t) g2;
+    int64_t f0g3    = f0   * (int64_t) g3;
+    int64_t f0g4    = f0   * (int64_t) g4;
+    int64_t f0g5    = f0   * (int64_t) g5;
+    int64_t f0g6    = f0   * (int64_t) g6;
+    int64_t f0g7    = f0   * (int64_t) g7;
+    int64_t f0g8    = f0   * (int64_t) g8;
+    int64_t f0g9    = f0   * (int64_t) g9;
+    int64_t f1g0    = f1   * (int64_t) g0;
+    int64_t f1g1_2  = f1_2 * (int64_t) g1;
+    int64_t f1g2    = f1   * (int64_t) g2;
+    int64_t f1g3_2  = f1_2 * (int64_t) g3;
+    int64_t f1g4    = f1   * (int64_t) g4;
+    int64_t f1g5_2  = f1_2 * (int64_t) g5;
+    int64_t f1g6    = f1   * (int64_t) g6;
+    int64_t f1g7_2  = f1_2 * (int64_t) g7;
+    int64_t f1g8    = f1   * (int64_t) g8;
+    int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
+    int64_t f2g0    = f2   * (int64_t) g0;
+    int64_t f2g1    = f2   * (int64_t) g1;
+    int64_t f2g2    = f2   * (int64_t) g2;
+    int64_t f2g3    = f2   * (int64_t) g3;
+    int64_t f2g4    = f2   * (int64_t) g4;
+    int64_t f2g5    = f2   * (int64_t) g5;
+    int64_t f2g6    = f2   * (int64_t) g6;
+    int64_t f2g7    = f2   * (int64_t) g7;
+    int64_t f2g8_19 = f2   * (int64_t) g8_19;
+    int64_t f2g9_19 = f2   * (int64_t) g9_19;
+    int64_t f3g0    = f3   * (int64_t) g0;
+    int64_t f3g1_2  = f3_2 * (int64_t) g1;
+    int64_t f3g2    = f3   * (int64_t) g2;
+    int64_t f3g3_2  = f3_2 * (int64_t) g3;
+    int64_t f3g4    = f3   * (int64_t) g4;
+    int64_t f3g5_2  = f3_2 * (int64_t) g5;
+    int64_t f3g6    = f3   * (int64_t) g6;
+    int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
+    int64_t f3g8_19 = f3   * (int64_t) g8_19;
+    int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
+    int64_t f4g0    = f4   * (int64_t) g0;
+    int64_t f4g1    = f4   * (int64_t) g1;
+    int64_t f4g2    = f4   * (int64_t) g2;
+    int64_t f4g3    = f4   * (int64_t) g3;
+    int64_t f4g4    = f4   * (int64_t) g4;
+    int64_t f4g5    = f4   * (int64_t) g5;
+    int64_t f4g6_19 = f4   * (int64_t) g6_19;
+    int64_t f4g7_19 = f4   * (int64_t) g7_19;
+    int64_t f4g8_19 = f4   * (int64_t) g8_19;
+    int64_t f4g9_19 = f4   * (int64_t) g9_19;
+    int64_t f5g0    = f5   * (int64_t) g0;
+    int64_t f5g1_2  = f5_2 * (int64_t) g1;
+    int64_t f5g2    = f5   * (int64_t) g2;
+    int64_t f5g3_2  = f5_2 * (int64_t) g3;
+    int64_t f5g4    = f5   * (int64_t) g4;
+    int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
+    int64_t f5g6_19 = f5   * (int64_t) g6_19;
+    int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
+    int64_t f5g8_19 = f5   * (int64_t) g8_19;
+    int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
+    int64_t f6g0    = f6   * (int64_t) g0;
+    int64_t f6g1    = f6   * (int64_t) g1;
+    int64_t f6g2    = f6   * (int64_t) g2;
+    int64_t f6g3    = f6   * (int64_t) g3;
+    int64_t f6g4_19 = f6   * (int64_t) g4_19;
+    int64_t f6g5_19 = f6   * (int64_t) g5_19;
+    int64_t f6g6_19 = f6   * (int64_t) g6_19;
+    int64_t f6g7_19 = f6   * (int64_t) g7_19;
+    int64_t f6g8_19 = f6   * (int64_t) g8_19;
+    int64_t f6g9_19 = f6   * (int64_t) g9_19;
+    int64_t f7g0    = f7   * (int64_t) g0;
+    int64_t f7g1_2  = f7_2 * (int64_t) g1;
+    int64_t f7g2    = f7   * (int64_t) g2;
+    int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
+    int64_t f7g4_19 = f7   * (int64_t) g4_19;
+    int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
+    int64_t f7g6_19 = f7   * (int64_t) g6_19;
+    int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
+    int64_t f7g8_19 = f7   * (int64_t) g8_19;
+    int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
+    int64_t f8g0    = f8   * (int64_t) g0;
+    int64_t f8g1    = f8   * (int64_t) g1;
+    int64_t f8g2_19 = f8   * (int64_t) g2_19;
+    int64_t f8g3_19 = f8   * (int64_t) g3_19;
+    int64_t f8g4_19 = f8   * (int64_t) g4_19;
+    int64_t f8g5_19 = f8   * (int64_t) g5_19;
+    int64_t f8g6_19 = f8   * (int64_t) g6_19;
+    int64_t f8g7_19 = f8   * (int64_t) g7_19;
+    int64_t f8g8_19 = f8   * (int64_t) g8_19;
+    int64_t f8g9_19 = f8   * (int64_t) g9_19;
+    int64_t f9g0    = f9   * (int64_t) g0;
+    int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
+    int64_t f9g2_19 = f9   * (int64_t) g2_19;
+    int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
+    int64_t f9g4_19 = f9   * (int64_t) g4_19;
+    int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
+    int64_t f9g6_19 = f9   * (int64_t) g6_19;
+    int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
+    int64_t f9g8_19 = f9   * (int64_t) g8_19;
+    int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
+    int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
+    int64_t h1 = f0g1 + f1g0    + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
+    int64_t h2 = f0g2 + f1g1_2  + f2g0    + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
+    int64_t h3 = f0g3 + f1g2    + f2g1    + f3g0    + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
+    int64_t h4 = f0g4 + f1g3_2  + f2g2    + f3g1_2  + f4g0    + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
+    int64_t h5 = f0g5 + f1g4    + f2g3    + f3g2    + f4g1    + f5g0    + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
+    int64_t h6 = f0g6 + f1g5_2  + f2g4    + f3g3_2  + f4g2    + f5g1_2  + f6g0    + f7g9_38 + f8g8_19 + f9g7_38;
+    int64_t h7 = f0g7 + f1g6    + f2g5    + f3g4    + f4g3    + f5g2    + f6g1    + f7g0    + f8g9_19 + f9g8_19;
+    int64_t h8 = f0g8 + f1g7_2  + f2g6    + f3g5_2  + f4g4    + f5g3_2  + f6g2    + f7g1_2  + f8g0    + f9g9_38;
+    int64_t h9 = f0g9 + f1g8    + f2g7    + f3g6    + f4g5    + f5g4    + f6g3    + f7g2    + f8g1    + f9g0   ;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+
+    /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
+     *   i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
+     * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
+     *   i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    /* |h0| <= 2^25 */
+    /* |h4| <= 2^25 */
+    /* |h1| <= 1.71*2^59 */
+    /* |h5| <= 1.71*2^59 */
+
+    carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+    carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+    /* |h1| <= 2^24; from now on fits into int32 */
+    /* |h5| <= 2^24; from now on fits into int32 */
+    /* |h2| <= 1.41*2^60 */
+    /* |h6| <= 1.41*2^60 */
+
+    carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+    carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+    /* |h2| <= 2^25; from now on fits into int32 unchanged */
+    /* |h6| <= 2^25; from now on fits into int32 unchanged */
+    /* |h3| <= 1.71*2^59 */
+    /* |h7| <= 1.71*2^59 */
+
+    carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+    carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+    /* |h3| <= 2^24; from now on fits into int32 unchanged */
+    /* |h7| <= 2^24; from now on fits into int32 unchanged */
+    /* |h4| <= 1.72*2^34 */
+    /* |h8| <= 1.41*2^60 */
+
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+    /* |h4| <= 2^25; from now on fits into int32 unchanged */
+    /* |h8| <= 2^25; from now on fits into int32 unchanged */
+    /* |h5| <= 1.01*2^24 */
+    /* |h9| <= 1.71*2^59 */
+
+    carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+    /* |h9| <= 2^24; from now on fits into int32 unchanged */
+    /* |h0| <= 1.1*2^39 */
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    /* |h0| <= 2^25; from now on fits into int32 unchanged */
+    /* |h1| <= 1.01*2^24 */
+
+    h[0] = (int32_t)h0;
+    h[1] = (int32_t)h1;
+    h[2] = (int32_t)h2;
+    h[3] = (int32_t)h3;
+    h[4] = (int32_t)h4;
+    h[5] = (int32_t)h5;
+    h[6] = (int32_t)h6;
+    h[7] = (int32_t)h7;
+    h[8] = (int32_t)h8;
+    h[9] = (int32_t)h9;
 }
 
-/* h = f * f
+/*
+ * h = f * f
+ *
  * Can overlap h with f.
  *
  * Preconditions:
@@ -1227,286 +1259,307 @@ static void fe_mul(fe h, const fe f, const fe g) {
  * Postconditions:
  *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  *
- * See fe_mul.c for discussion of implementation strategy. */
-static void fe_sq(fe h, const fe f) {
-  int32_t f0 = f[0];
-  int32_t f1 = f[1];
-  int32_t f2 = f[2];
-  int32_t f3 = f[3];
-  int32_t f4 = f[4];
-  int32_t f5 = f[5];
-  int32_t f6 = f[6];
-  int32_t f7 = f[7];
-  int32_t f8 = f[8];
-  int32_t f9 = f[9];
-  int32_t f0_2 = 2 * f0;
-  int32_t f1_2 = 2 * f1;
-  int32_t f2_2 = 2 * f2;
-  int32_t f3_2 = 2 * f3;
-  int32_t f4_2 = 2 * f4;
-  int32_t f5_2 = 2 * f5;
-  int32_t f6_2 = 2 * f6;
-  int32_t f7_2 = 2 * f7;
-  int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
-  int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
-  int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
-  int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
-  int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
-  int64_t f0f0    = f0   * (int64_t) f0;
-  int64_t f0f1_2  = f0_2 * (int64_t) f1;
-  int64_t f0f2_2  = f0_2 * (int64_t) f2;
-  int64_t f0f3_2  = f0_2 * (int64_t) f3;
-  int64_t f0f4_2  = f0_2 * (int64_t) f4;
-  int64_t f0f5_2  = f0_2 * (int64_t) f5;
-  int64_t f0f6_2  = f0_2 * (int64_t) f6;
-  int64_t f0f7_2  = f0_2 * (int64_t) f7;
-  int64_t f0f8_2  = f0_2 * (int64_t) f8;
-  int64_t f0f9_2  = f0_2 * (int64_t) f9;
-  int64_t f1f1_2  = f1_2 * (int64_t) f1;
-  int64_t f1f2_2  = f1_2 * (int64_t) f2;
-  int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
-  int64_t f1f4_2  = f1_2 * (int64_t) f4;
-  int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
-  int64_t f1f6_2  = f1_2 * (int64_t) f6;
-  int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
-  int64_t f1f8_2  = f1_2 * (int64_t) f8;
-  int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
-  int64_t f2f2    = f2   * (int64_t) f2;
-  int64_t f2f3_2  = f2_2 * (int64_t) f3;
-  int64_t f2f4_2  = f2_2 * (int64_t) f4;
-  int64_t f2f5_2  = f2_2 * (int64_t) f5;
-  int64_t f2f6_2  = f2_2 * (int64_t) f6;
-  int64_t f2f7_2  = f2_2 * (int64_t) f7;
-  int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
-  int64_t f2f9_38 = f2   * (int64_t) f9_38;
-  int64_t f3f3_2  = f3_2 * (int64_t) f3;
-  int64_t f3f4_2  = f3_2 * (int64_t) f4;
-  int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
-  int64_t f3f6_2  = f3_2 * (int64_t) f6;
-  int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
-  int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
-  int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
-  int64_t f4f4    = f4   * (int64_t) f4;
-  int64_t f4f5_2  = f4_2 * (int64_t) f5;
-  int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
-  int64_t f4f7_38 = f4   * (int64_t) f7_38;
-  int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
-  int64_t f4f9_38 = f4   * (int64_t) f9_38;
-  int64_t f5f5_38 = f5   * (int64_t) f5_38;
-  int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
-  int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
-  int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
-  int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
-  int64_t f6f6_19 = f6   * (int64_t) f6_19;
-  int64_t f6f7_38 = f6   * (int64_t) f7_38;
-  int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
-  int64_t f6f9_38 = f6   * (int64_t) f9_38;
-  int64_t f7f7_38 = f7   * (int64_t) f7_38;
-  int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
-  int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
-  int64_t f8f8_19 = f8   * (int64_t) f8_19;
-  int64_t f8f9_38 = f8   * (int64_t) f9_38;
-  int64_t f9f9_38 = f9   * (int64_t) f9_38;
-  int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
-  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
-  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
-  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
-  int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
-  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
-  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
-  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
-  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
-  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-
-  carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
-  carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
-
-  carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
-  carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
-
-  carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
-  carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
-
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
-
-  carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-
-  h[0] = (int32_t)h0;
-  h[1] = (int32_t)h1;
-  h[2] = (int32_t)h2;
-  h[3] = (int32_t)h3;
-  h[4] = (int32_t)h4;
-  h[5] = (int32_t)h5;
-  h[6] = (int32_t)h6;
-  h[7] = (int32_t)h7;
-  h[8] = (int32_t)h8;
-  h[9] = (int32_t)h9;
+ * See fe_mul.c for discussion of implementation strategy.
+ */
+static void fe_sq(fe h, const fe f)
+{
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t f0_2 = 2 * f0;
+    int32_t f1_2 = 2 * f1;
+    int32_t f2_2 = 2 * f2;
+    int32_t f3_2 = 2 * f3;
+    int32_t f4_2 = 2 * f4;
+    int32_t f5_2 = 2 * f5;
+    int32_t f6_2 = 2 * f6;
+    int32_t f7_2 = 2 * f7;
+    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+    int64_t f0f0    = f0   * (int64_t) f0;
+    int64_t f0f1_2  = f0_2 * (int64_t) f1;
+    int64_t f0f2_2  = f0_2 * (int64_t) f2;
+    int64_t f0f3_2  = f0_2 * (int64_t) f3;
+    int64_t f0f4_2  = f0_2 * (int64_t) f4;
+    int64_t f0f5_2  = f0_2 * (int64_t) f5;
+    int64_t f0f6_2  = f0_2 * (int64_t) f6;
+    int64_t f0f7_2  = f0_2 * (int64_t) f7;
+    int64_t f0f8_2  = f0_2 * (int64_t) f8;
+    int64_t f0f9_2  = f0_2 * (int64_t) f9;
+    int64_t f1f1_2  = f1_2 * (int64_t) f1;
+    int64_t f1f2_2  = f1_2 * (int64_t) f2;
+    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
+    int64_t f1f4_2  = f1_2 * (int64_t) f4;
+    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
+    int64_t f1f6_2  = f1_2 * (int64_t) f6;
+    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
+    int64_t f1f8_2  = f1_2 * (int64_t) f8;
+    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+    int64_t f2f2    = f2   * (int64_t) f2;
+    int64_t f2f3_2  = f2_2 * (int64_t) f3;
+    int64_t f2f4_2  = f2_2 * (int64_t) f4;
+    int64_t f2f5_2  = f2_2 * (int64_t) f5;
+    int64_t f2f6_2  = f2_2 * (int64_t) f6;
+    int64_t f2f7_2  = f2_2 * (int64_t) f7;
+    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+    int64_t f2f9_38 = f2   * (int64_t) f9_38;
+    int64_t f3f3_2  = f3_2 * (int64_t) f3;
+    int64_t f3f4_2  = f3_2 * (int64_t) f4;
+    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
+    int64_t f3f6_2  = f3_2 * (int64_t) f6;
+    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+    int64_t f4f4    = f4   * (int64_t) f4;
+    int64_t f4f5_2  = f4_2 * (int64_t) f5;
+    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+    int64_t f4f7_38 = f4   * (int64_t) f7_38;
+    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+    int64_t f4f9_38 = f4   * (int64_t) f9_38;
+    int64_t f5f5_38 = f5   * (int64_t) f5_38;
+    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+    int64_t f6f6_19 = f6   * (int64_t) f6_19;
+    int64_t f6f7_38 = f6   * (int64_t) f7_38;
+    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+    int64_t f6f9_38 = f6   * (int64_t) f9_38;
+    int64_t f7f7_38 = f7   * (int64_t) f7_38;
+    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+    int64_t f8f8_19 = f8   * (int64_t) f8_19;
+    int64_t f8f9_38 = f8   * (int64_t) f9_38;
+    int64_t f9f9_38 = f9   * (int64_t) f9_38;
+    int64_t h0 = f0f0   + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+    int64_t h2 = f0f2_2 + f1f1_2  + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+    int64_t h3 = f0f3_2 + f1f2_2  + f4f9_38 + f5f8_38 + f6f7_38;
+    int64_t h4 = f0f4_2 + f1f3_4  + f2f2    + f5f9_76 + f6f8_38 + f7f7_38;
+    int64_t h5 = f0f5_2 + f1f4_2  + f2f3_2  + f6f9_38 + f7f8_38;
+    int64_t h6 = f0f6_2 + f1f5_4  + f2f4_2  + f3f3_2  + f7f9_76 + f8f8_19;
+    int64_t h7 = f0f7_2 + f1f6_2  + f2f5_2  + f3f4_2  + f8f9_38;
+    int64_t h8 = f0f8_2 + f1f7_4  + f2f6_2  + f3f5_4  + f4f4    + f9f9_38;
+    int64_t h9 = f0f9_2 + f1f8_2  + f2f7_2  + f3f6_2  + f4f5_2;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+
+    carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+    carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+
+    carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+    carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+
+    carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+    carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+    carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+
+    h[0] = (int32_t)h0;
+    h[1] = (int32_t)h1;
+    h[2] = (int32_t)h2;
+    h[3] = (int32_t)h3;
+    h[4] = (int32_t)h4;
+    h[5] = (int32_t)h5;
+    h[6] = (int32_t)h6;
+    h[7] = (int32_t)h7;
+    h[8] = (int32_t)h8;
+    h[9] = (int32_t)h9;
 }
 
-static void fe_invert(fe out, const fe z) {
-  fe t0;
-  fe t1;
-  fe t2;
-  fe t3;
-  int i;
+static void fe_invert(fe out, const fe z)
+{
+    fe t0;
+    fe t1;
+    fe t2;
+    fe t3;
+    int i;
 
-  /*
-   * Compute z ** -1 = z ** (2 ** 255 - 19 - 2) with the exponent as
-   * 2 ** 255 - 21 = (2 ** 5) * (2 ** 250 - 1) + 11.
-   */
+    /*
+     * Compute z ** -1 = z ** (2 ** 255 - 19 - 2) with the exponent as
+     * 2 ** 255 - 21 = (2 ** 5) * (2 ** 250 - 1) + 11.
+     */
 
-  /* t0 = z ** 2 */
-  fe_sq(t0, z);
+    /* t0 = z ** 2 */
+    fe_sq(t0, z);
 
-  /* t1 = t0 ** (2 ** 2) = z ** 8 */
-  fe_sq(t1, t0);
-  fe_sq(t1, t1);
+    /* t1 = t0 ** (2 ** 2) = z ** 8 */
+    fe_sq(t1, t0);
+    fe_sq(t1, t1);
 
-  /* t1 = z * t1 = z ** 9 */
-  fe_mul(t1, z, t1);
-  /* t0 = t0 * t1 = z ** 11 -- stash t0 away for the end. */
-  fe_mul(t0, t0, t1);
+    /* t1 = z * t1 = z ** 9 */
+    fe_mul(t1, z, t1);
+    /* t0 = t0 * t1 = z ** 11 -- stash t0 away for the end. */
+    fe_mul(t0, t0, t1);
 
-  /* t2 = t0 ** 2 = z ** 22 */
-  fe_sq(t2, t0);
+    /* t2 = t0 ** 2 = z ** 22 */
+    fe_sq(t2, t0);
 
-  /* t1 = t1 * t2 = z ** (2 ** 5 - 1) */
-  fe_mul(t1, t1, t2);
+    /* t1 = t1 * t2 = z ** (2 ** 5 - 1) */
+    fe_mul(t1, t1, t2);
 
-  /* t2 = t1 ** (2 ** 5) = z ** ((2 ** 5) * (2 ** 5 - 1)) */
-  fe_sq(t2, t1);
-  for (i = 1; i < 5; ++i) {
-    fe_sq(t2, t2);
-  }
+    /* t2 = t1 ** (2 ** 5) = z ** ((2 ** 5) * (2 ** 5 - 1)) */
+    fe_sq(t2, t1);
+    for (i = 1; i < 5; ++i) {
+        fe_sq(t2, t2);
+    }
 
-  /* t1 = t1 * t2 = z ** ((2 ** 5 + 1) * (2 ** 5 - 1)) = z ** (2 ** 10 - 1) */
-  fe_mul(t1, t2, t1);
+    /* t1 = t1 * t2 = z ** ((2 ** 5 + 1) * (2 ** 5 - 1)) = z ** (2 ** 10 - 1) */
+    fe_mul(t1, t2, t1);
 
-  /* Continuing similarly... */
+    /* Continuing similarly... */
 
-  /* t2 = z ** (2 ** 20 - 1) */
-  fe_sq(t2, t1);
-  for (i = 1; i < 10; ++i) {
-    fe_sq(t2, t2);
-  }
-  fe_mul(t2, t2, t1);
-
-  /* t2 = z ** (2 ** 40 - 1) */
-  fe_sq(t3, t2);
-  for (i = 1; i < 20; ++i) {
-    fe_sq(t3, t3);
-  }
-  fe_mul(t2, t3, t2);
-
-  /* t2 = z ** (2 ** 10) * (2 ** 40 - 1) */
-  for (i = 0; i < 10; ++i) {
-    fe_sq(t2, t2);
-  }
-  /* t1 = z ** (2 ** 50 - 1) */
-  fe_mul(t1, t2, t1);
+    /* t2 = z ** (2 ** 20 - 1) */
+    fe_sq(t2, t1);
+    for (i = 1; i < 10; ++i) {
+        fe_sq(t2, t2);
+    }
+    fe_mul(t2, t2, t1);
 
-  /* t2 = z ** (2 ** 100 - 1) */
-  fe_sq(t2, t1);
-  for (i = 1; i < 50; ++i) {
-    fe_sq(t2, t2);
-  }
-  fe_mul(t2, t2, t1);
-
-  /* t2 = z ** (2 ** 200 - 1) */
-  fe_sq(t3, t2);
-  for (i = 1; i < 100; ++i) {
-    fe_sq(t3, t3);
-  }
-  fe_mul(t2, t3, t2);
-
-  /* t2 = z ** ((2 ** 50) * (2 ** 200 - 1) */
-  fe_sq(t2, t2);
-  for (i = 1; i < 50; ++i) {
+    /* t2 = z ** (2 ** 40 - 1) */
+    fe_sq(t3, t2);
+    for (i = 1; i < 20; ++i) {
+        fe_sq(t3, t3);
+    }
+    fe_mul(t2, t3, t2);
+
+    /* t2 = z ** (2 ** 10) * (2 ** 40 - 1) */
+    for (i = 0; i < 10; ++i) {
+        fe_sq(t2, t2);
+    }
+    /* t1 = z ** (2 ** 50 - 1) */
+    fe_mul(t1, t2, t1);
+
+    /* t2 = z ** (2 ** 100 - 1) */
+    fe_sq(t2, t1);
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t2, t2);
+    }
+    fe_mul(t2, t2, t1);
+
+    /* t2 = z ** (2 ** 200 - 1) */
+    fe_sq(t3, t2);
+    for (i = 1; i < 100; ++i) {
+        fe_sq(t3, t3);
+    }
+    fe_mul(t2, t3, t2);
+
+    /* t2 = z ** ((2 ** 50) * (2 ** 200 - 1) */
     fe_sq(t2, t2);
-  }
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t2, t2);
+    }
 
-  /* t1 = z ** (2 ** 250 - 1) */
-  fe_mul(t1, t2, t1);
+    /* t1 = z ** (2 ** 250 - 1) */
+    fe_mul(t1, t2, t1);
 
-  /* t1 = z ** ((2 ** 5) * (2 ** 250 - 1)) */
-  fe_sq(t1, t1);
-  for (i = 1; i < 5; ++i) {
+    /* t1 = z ** ((2 ** 5) * (2 ** 250 - 1)) */
     fe_sq(t1, t1);
-  }
+    for (i = 1; i < 5; ++i) {
+        fe_sq(t1, t1);
+    }
 
-  /* Recall t0 = z ** 11; out = z ** (2 ** 255 - 21) */
-  fe_mul(out, t1, t0);
+    /* Recall t0 = z ** 11; out = z ** (2 ** 255 - 21) */
+    fe_mul(out, t1, t0);
 }
 
-/* h = -f
+/*
+ * h = -f
  *
  * Preconditions:
  *    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  *
  * Postconditions:
- *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
-static void fe_neg(fe h, const fe f) {
-  unsigned i;
-  for (i = 0; i < 10; i++) {
-    h[i] = -f[i];
-  }
+ *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ */
+static void fe_neg(fe h, const fe f)
+{
+    unsigned i;
+
+    for (i = 0; i < 10; i++) {
+        h[i] = -f[i];
+    }
 }
 
-/* Replace (f,g) with (g,g) if b == 1;
+/*
+ * Replace (f,g) with (g,g) if b == 1;
  * replace (f,g) with (f,g) if b == 0.
  *
- * Preconditions: b in {0,1}. */
-static void fe_cmov(fe f, const fe g, unsigned b) {
-  size_t i;
-  b = 0-b;
-  for (i = 0; i < 10; i++) {
-    int32_t x = f[i] ^ g[i];
-    x &= b;
-    f[i] ^= x;
-  }
+ * Preconditions: b in {0,1}.
+ */
+static void fe_cmov(fe f, const fe g, unsigned b)
+{
+    size_t i;
+
+    b = 0-b;
+    for (i = 0; i < 10; i++) {
+        int32_t x = f[i] ^ g[i];
+        x &= b;
+        f[i] ^= x;
+    }
 }
 
-/* return 0 if f == 0
+/*
+ * return 0 if f == 0
  * return 1 if f != 0
  *
  * Preconditions:
- *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
-static int fe_isnonzero(const fe f) {
-  uint8_t s[32];
-  static const uint8_t zero[32] = {0};
-  fe_tobytes(s, f);
+ *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ */
+static int fe_isnonzero(const fe f)
+{
+    uint8_t s[32];
+    static const uint8_t zero[32] = {0};
 
-  return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
+    fe_tobytes(s, f);
+
+    return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0;
 }
 
-/* return 1 if f is in {1,3,5,...,q-2}
+/*
+ * return 1 if f is in {1,3,5,...,q-2}
  * return 0 if f is in {0,2,4,...,q-1}
  *
  * Preconditions:
- *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
-static int fe_isnegative(const fe f) {
-  uint8_t s[32];
-  fe_tobytes(s, f);
-  return s[0] & 1;
+ *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ */
+static int fe_isnegative(const fe f)
+{
+    uint8_t s[32];
+
+    fe_tobytes(s, f);
+    return s[0] & 1;
 }
 
-/* h = 2 * f * f
+/*
+ * h = 2 * f * f
+ *
  * Can overlap h with f.
  *
  * Preconditions:
@@ -1515,208 +1568,212 @@ static int fe_isnegative(const fe f) {
  * Postconditions:
  *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  *
- * See fe_mul.c for discussion of implementation strategy. */
-static void fe_sq2(fe h, const fe f) {
-  int32_t f0 = f[0];
-  int32_t f1 = f[1];
-  int32_t f2 = f[2];
-  int32_t f3 = f[3];
-  int32_t f4 = f[4];
-  int32_t f5 = f[5];
-  int32_t f6 = f[6];
-  int32_t f7 = f[7];
-  int32_t f8 = f[8];
-  int32_t f9 = f[9];
-  int32_t f0_2 = 2 * f0;
-  int32_t f1_2 = 2 * f1;
-  int32_t f2_2 = 2 * f2;
-  int32_t f3_2 = 2 * f3;
-  int32_t f4_2 = 2 * f4;
-  int32_t f5_2 = 2 * f5;
-  int32_t f6_2 = 2 * f6;
-  int32_t f7_2 = 2 * f7;
-  int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
-  int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
-  int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
-  int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
-  int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
-  int64_t f0f0    = f0   * (int64_t) f0;
-  int64_t f0f1_2  = f0_2 * (int64_t) f1;
-  int64_t f0f2_2  = f0_2 * (int64_t) f2;
-  int64_t f0f3_2  = f0_2 * (int64_t) f3;
-  int64_t f0f4_2  = f0_2 * (int64_t) f4;
-  int64_t f0f5_2  = f0_2 * (int64_t) f5;
-  int64_t f0f6_2  = f0_2 * (int64_t) f6;
-  int64_t f0f7_2  = f0_2 * (int64_t) f7;
-  int64_t f0f8_2  = f0_2 * (int64_t) f8;
-  int64_t f0f9_2  = f0_2 * (int64_t) f9;
-  int64_t f1f1_2  = f1_2 * (int64_t) f1;
-  int64_t f1f2_2  = f1_2 * (int64_t) f2;
-  int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
-  int64_t f1f4_2  = f1_2 * (int64_t) f4;
-  int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
-  int64_t f1f6_2  = f1_2 * (int64_t) f6;
-  int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
-  int64_t f1f8_2  = f1_2 * (int64_t) f8;
-  int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
-  int64_t f2f2    = f2   * (int64_t) f2;
-  int64_t f2f3_2  = f2_2 * (int64_t) f3;
-  int64_t f2f4_2  = f2_2 * (int64_t) f4;
-  int64_t f2f5_2  = f2_2 * (int64_t) f5;
-  int64_t f2f6_2  = f2_2 * (int64_t) f6;
-  int64_t f2f7_2  = f2_2 * (int64_t) f7;
-  int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
-  int64_t f2f9_38 = f2   * (int64_t) f9_38;
-  int64_t f3f3_2  = f3_2 * (int64_t) f3;
-  int64_t f3f4_2  = f3_2 * (int64_t) f4;
-  int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
-  int64_t f3f6_2  = f3_2 * (int64_t) f6;
-  int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
-  int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
-  int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
-  int64_t f4f4    = f4   * (int64_t) f4;
-  int64_t f4f5_2  = f4_2 * (int64_t) f5;
-  int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
-  int64_t f4f7_38 = f4   * (int64_t) f7_38;
-  int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
-  int64_t f4f9_38 = f4   * (int64_t) f9_38;
-  int64_t f5f5_38 = f5   * (int64_t) f5_38;
-  int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
-  int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
-  int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
-  int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
-  int64_t f6f6_19 = f6   * (int64_t) f6_19;
-  int64_t f6f7_38 = f6   * (int64_t) f7_38;
-  int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
-  int64_t f6f9_38 = f6   * (int64_t) f9_38;
-  int64_t f7f7_38 = f7   * (int64_t) f7_38;
-  int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
-  int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
-  int64_t f8f8_19 = f8   * (int64_t) f8_19;
-  int64_t f8f9_38 = f8   * (int64_t) f9_38;
-  int64_t f9f9_38 = f9   * (int64_t) f9_38;
-  int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
-  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
-  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
-  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
-  int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
-  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
-  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
-  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
-  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
-  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-
-  h0 += h0;
-  h1 += h1;
-  h2 += h2;
-  h3 += h3;
-  h4 += h4;
-  h5 += h5;
-  h6 += h6;
-  h7 += h7;
-  h8 += h8;
-  h9 += h9;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-
-  carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
-  carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
-
-  carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
-  carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
-
-  carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
-  carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
-
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
-
-  carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-
-  h[0] = (int32_t)h0;
-  h[1] = (int32_t)h1;
-  h[2] = (int32_t)h2;
-  h[3] = (int32_t)h3;
-  h[4] = (int32_t)h4;
-  h[5] = (int32_t)h5;
-  h[6] = (int32_t)h6;
-  h[7] = (int32_t)h7;
-  h[8] = (int32_t)h8;
-  h[9] = (int32_t)h9;
+ * See fe_mul.c for discussion of implementation strategy.
+ */
+static void fe_sq2(fe h, const fe f)
+{
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t f0_2 = 2 * f0;
+    int32_t f1_2 = 2 * f1;
+    int32_t f2_2 = 2 * f2;
+    int32_t f3_2 = 2 * f3;
+    int32_t f4_2 = 2 * f4;
+    int32_t f5_2 = 2 * f5;
+    int32_t f6_2 = 2 * f6;
+    int32_t f7_2 = 2 * f7;
+    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+    int64_t f0f0    = f0   * (int64_t) f0;
+    int64_t f0f1_2  = f0_2 * (int64_t) f1;
+    int64_t f0f2_2  = f0_2 * (int64_t) f2;
+    int64_t f0f3_2  = f0_2 * (int64_t) f3;
+    int64_t f0f4_2  = f0_2 * (int64_t) f4;
+    int64_t f0f5_2  = f0_2 * (int64_t) f5;
+    int64_t f0f6_2  = f0_2 * (int64_t) f6;
+    int64_t f0f7_2  = f0_2 * (int64_t) f7;
+    int64_t f0f8_2  = f0_2 * (int64_t) f8;
+    int64_t f0f9_2  = f0_2 * (int64_t) f9;
+    int64_t f1f1_2  = f1_2 * (int64_t) f1;
+    int64_t f1f2_2  = f1_2 * (int64_t) f2;
+    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
+    int64_t f1f4_2  = f1_2 * (int64_t) f4;
+    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
+    int64_t f1f6_2  = f1_2 * (int64_t) f6;
+    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
+    int64_t f1f8_2  = f1_2 * (int64_t) f8;
+    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+    int64_t f2f2    = f2   * (int64_t) f2;
+    int64_t f2f3_2  = f2_2 * (int64_t) f3;
+    int64_t f2f4_2  = f2_2 * (int64_t) f4;
+    int64_t f2f5_2  = f2_2 * (int64_t) f5;
+    int64_t f2f6_2  = f2_2 * (int64_t) f6;
+    int64_t f2f7_2  = f2_2 * (int64_t) f7;
+    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+    int64_t f2f9_38 = f2   * (int64_t) f9_38;
+    int64_t f3f3_2  = f3_2 * (int64_t) f3;
+    int64_t f3f4_2  = f3_2 * (int64_t) f4;
+    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
+    int64_t f3f6_2  = f3_2 * (int64_t) f6;
+    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+    int64_t f4f4    = f4   * (int64_t) f4;
+    int64_t f4f5_2  = f4_2 * (int64_t) f5;
+    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+    int64_t f4f7_38 = f4   * (int64_t) f7_38;
+    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+    int64_t f4f9_38 = f4   * (int64_t) f9_38;
+    int64_t f5f5_38 = f5   * (int64_t) f5_38;
+    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+    int64_t f6f6_19 = f6   * (int64_t) f6_19;
+    int64_t f6f7_38 = f6   * (int64_t) f7_38;
+    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+    int64_t f6f9_38 = f6   * (int64_t) f9_38;
+    int64_t f7f7_38 = f7   * (int64_t) f7_38;
+    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+    int64_t f8f8_19 = f8   * (int64_t) f8_19;
+    int64_t f8f9_38 = f8   * (int64_t) f9_38;
+    int64_t f9f9_38 = f9   * (int64_t) f9_38;
+    int64_t h0 = f0f0   + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+    int64_t h2 = f0f2_2 + f1f1_2  + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+    int64_t h3 = f0f3_2 + f1f2_2  + f4f9_38 + f5f8_38 + f6f7_38;
+    int64_t h4 = f0f4_2 + f1f3_4  + f2f2    + f5f9_76 + f6f8_38 + f7f7_38;
+    int64_t h5 = f0f5_2 + f1f4_2  + f2f3_2  + f6f9_38 + f7f8_38;
+    int64_t h6 = f0f6_2 + f1f5_4  + f2f4_2  + f3f3_2  + f7f9_76 + f8f8_19;
+    int64_t h7 = f0f7_2 + f1f6_2  + f2f5_2  + f3f4_2  + f8f9_38;
+    int64_t h8 = f0f8_2 + f1f7_4  + f2f6_2  + f3f5_4  + f4f4    + f9f9_38;
+    int64_t h9 = f0f9_2 + f1f8_2  + f2f7_2  + f3f6_2  + f4f5_2;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+
+    h0 += h0;
+    h1 += h1;
+    h2 += h2;
+    h3 += h3;
+    h4 += h4;
+    h5 += h5;
+    h6 += h6;
+    h7 += h7;
+    h8 += h8;
+    h9 += h9;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+
+    carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+    carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+
+    carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+    carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+
+    carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+    carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+    carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+
+    h[0] = (int32_t)h0;
+    h[1] = (int32_t)h1;
+    h[2] = (int32_t)h2;
+    h[3] = (int32_t)h3;
+    h[4] = (int32_t)h4;
+    h[5] = (int32_t)h5;
+    h[6] = (int32_t)h6;
+    h[7] = (int32_t)h7;
+    h[8] = (int32_t)h8;
+    h[9] = (int32_t)h9;
 }
 
-static void fe_pow22523(fe out, const fe z) {
-  fe t0;
-  fe t1;
-  fe t2;
-  int i;
+static void fe_pow22523(fe out, const fe z)
+{
+    fe t0;
+    fe t1;
+    fe t2;
+    int i;
 
-  fe_sq(t0, z);
-  fe_sq(t1, t0);
-  for (i = 1; i < 2; ++i) {
-    fe_sq(t1, t1);
-  }
-  fe_mul(t1, z, t1);
-  fe_mul(t0, t0, t1);
-  fe_sq(t0, t0);
-  fe_mul(t0, t1, t0);
-  fe_sq(t1, t0);
-  for (i = 1; i < 5; ++i) {
-    fe_sq(t1, t1);
-  }
-  fe_mul(t0, t1, t0);
-  fe_sq(t1, t0);
-  for (i = 1; i < 10; ++i) {
-    fe_sq(t1, t1);
-  }
-  fe_mul(t1, t1, t0);
-  fe_sq(t2, t1);
-  for (i = 1; i < 20; ++i) {
-    fe_sq(t2, t2);
-  }
-  fe_mul(t1, t2, t1);
-  fe_sq(t1, t1);
-  for (i = 1; i < 10; ++i) {
-    fe_sq(t1, t1);
-  }
-  fe_mul(t0, t1, t0);
-  fe_sq(t1, t0);
-  for (i = 1; i < 50; ++i) {
+    fe_sq(t0, z);
+    fe_sq(t1, t0);
+    for (i = 1; i < 2; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t1, z, t1);
+    fe_mul(t0, t0, t1);
+    fe_sq(t0, t0);
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+    for (i = 1; i < 5; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+    for (i = 1; i < 10; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t1, t1, t0);
+    fe_sq(t2, t1);
+    for (i = 1; i < 20; ++i) {
+        fe_sq(t2, t2);
+    }
+    fe_mul(t1, t2, t1);
     fe_sq(t1, t1);
-  }
-  fe_mul(t1, t1, t0);
-  fe_sq(t2, t1);
-  for (i = 1; i < 100; ++i) {
-    fe_sq(t2, t2);
-  }
-  fe_mul(t1, t2, t1);
-  fe_sq(t1, t1);
-  for (i = 1; i < 50; ++i) {
+    for (i = 1; i < 10; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t1, t1, t0);
+    fe_sq(t2, t1);
+    for (i = 1; i < 100; ++i) {
+        fe_sq(t2, t2);
+    }
+    fe_mul(t1, t2, t1);
     fe_sq(t1, t1);
-  }
-  fe_mul(t0, t1, t0);
-  fe_sq(t0, t0);
-  for (i = 1; i < 2; ++i) {
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t1, t1);
+    }
+    fe_mul(t0, t1, t0);
     fe_sq(t0, t0);
-  }
-  fe_mul(out, t0, z);
+    for (i = 1; i < 2; ++i) {
+        fe_sq(t0, t0);
+    }
+    fe_mul(out, t0, z);
 }
 
-/* ge means group element.
-
+/*
+ * ge means group element.
+ *
  * Here the group is the set of pairs (x,y) of field elements (see fe.h)
  * satisfying -x^2 + y^2 = 1 + d x^2y^2
  * where d = -121665/121666.
@@ -1725,268 +1782,292 @@ static void fe_pow22523(fe out, const fe z) {
  *   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
  *   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
  *   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
- *   ge_precomp (Duif): (y+x,y-x,2dxy) */
-
+ *   ge_precomp (Duif): (y+x,y-x,2dxy)
+ */
 typedef struct {
-  fe X;
-  fe Y;
-  fe Z;
+    fe X;
+    fe Y;
+    fe Z;
 } ge_p2;
 
 typedef struct {
-  fe X;
-  fe Y;
-  fe Z;
-  fe T;
+    fe X;
+    fe Y;
+    fe Z;
+    fe T;
 } ge_p3;
 
 typedef struct {
-  fe X;
-  fe Y;
-  fe Z;
-  fe T;
+    fe X;
+    fe Y;
+    fe Z;
+    fe T;
 } ge_p1p1;
 
 typedef struct {
-  fe yplusx;
-  fe yminusx;
-  fe xy2d;
+    fe yplusx;
+    fe yminusx;
+    fe xy2d;
 } ge_precomp;
 
 typedef struct {
-  fe YplusX;
-  fe YminusX;
-  fe Z;
-  fe T2d;
+    fe YplusX;
+    fe YminusX;
+    fe Z;
+    fe T2d;
 } ge_cached;
 
-static void ge_tobytes(uint8_t *s, const ge_p2 *h) {
-  fe recip;
-  fe x;
-  fe y;
+static void ge_tobytes(uint8_t *s, const ge_p2 *h)
+{
+    fe recip;
+    fe x;
+    fe y;
+
+    fe_invert(recip, h->Z);
+    fe_mul(x, h->X, recip);
+    fe_mul(y, h->Y, recip);
+    fe_tobytes(s, y);
+    s[31] ^= fe_isnegative(x) << 7;
+}
 
-  fe_invert(recip, h->Z);
-  fe_mul(x, h->X, recip);
-  fe_mul(y, h->Y, recip);
-  fe_tobytes(s, y);
-  s[31] ^= fe_isnegative(x) << 7;
+static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h)
+{
+    fe recip;
+    fe x;
+    fe y;
+
+    fe_invert(recip, h->Z);
+    fe_mul(x, h->X, recip);
+    fe_mul(y, h->Y, recip);
+    fe_tobytes(s, y);
+    s[31] ^= fe_isnegative(x) << 7;
 }
 
-static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) {
-  fe recip;
-  fe x;
-  fe y;
+static const fe d = {
+    -10913610, 13857413, -15372611, 6949391,   114729,
+    -8787816,  -6275908, -3247719,  -18696448, -12055116
+};
 
-  fe_invert(recip, h->Z);
-  fe_mul(x, h->X, recip);
-  fe_mul(y, h->Y, recip);
-  fe_tobytes(s, y);
-  s[31] ^= fe_isnegative(x) << 7;
-}
+static const fe sqrtm1 = {
+    -32595792, -7943725,  9377950,  3500415, 12389472,
+    -272473,   -25146209, -2005654, 326686,  11406482
+};
 
-static const fe d = {-10913610, 13857413, -15372611, 6949391,   114729,
-                     -8787816,  -6275908, -3247719,  -18696448, -12055116};
-
-static const fe sqrtm1 = {-32595792, -7943725,  9377950,  3500415, 12389472,
-                          -272473,   -25146209, -2005654, 326686,  11406482};
-
-static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
-  fe u;
-  fe v;
-  fe v3;
-  fe vxx;
-  fe check;
-
-  fe_frombytes(h->Y, s);
-  fe_1(h->Z);
-  fe_sq(u, h->Y);
-  fe_mul(v, u, d);
-  fe_sub(u, u, h->Z); /* u = y^2-1 */
-  fe_add(v, v, h->Z); /* v = dy^2+1 */
-
-  fe_sq(v3, v);
-  fe_mul(v3, v3, v); /* v3 = v^3 */
-  fe_sq(h->X, v3);
-  fe_mul(h->X, h->X, v);
-  fe_mul(h->X, h->X, u); /* x = uv^7 */
-
-  fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
-  fe_mul(h->X, h->X, v3);
-  fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
-
-  fe_sq(vxx, h->X);
-  fe_mul(vxx, vxx, v);
-  fe_sub(check, vxx, u); /* vx^2-u */
-  if (fe_isnonzero(check)) {
-    fe_add(check, vxx, u); /* vx^2+u */
+static int ge_frombytes_vartime(ge_p3 *h, const uint8_t *s)
+{
+    fe u;
+    fe v;
+    fe v3;
+    fe vxx;
+    fe check;
+
+    fe_frombytes(h->Y, s);
+    fe_1(h->Z);
+    fe_sq(u, h->Y);
+    fe_mul(v, u, d);
+    fe_sub(u, u, h->Z); /* u = y^2-1 */
+    fe_add(v, v, h->Z); /* v = dy^2+1 */
+
+    fe_sq(v3, v);
+    fe_mul(v3, v3, v); /* v3 = v^3 */
+    fe_sq(h->X, v3);
+    fe_mul(h->X, h->X, v);
+    fe_mul(h->X, h->X, u); /* x = uv^7 */
+
+    fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
+    fe_mul(h->X, h->X, v3);
+    fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
+
+    fe_sq(vxx, h->X);
+    fe_mul(vxx, vxx, v);
+    fe_sub(check, vxx, u); /* vx^2-u */
     if (fe_isnonzero(check)) {
-      return -1;
+        fe_add(check, vxx, u); /* vx^2+u */
+        if (fe_isnonzero(check)) {
+            return -1;
+        }
+        fe_mul(h->X, h->X, sqrtm1);
     }
-    fe_mul(h->X, h->X, sqrtm1);
-  }
 
-  if (fe_isnegative(h->X) != (s[31] >> 7)) {
-    fe_neg(h->X, h->X);
-  }
+    if (fe_isnegative(h->X) != (s[31] >> 7)) {
+        fe_neg(h->X, h->X);
+    }
 
-  fe_mul(h->T, h->X, h->Y);
-  return 0;
+    fe_mul(h->T, h->X, h->Y);
+    return 0;
 }
 
-static void ge_p2_0(ge_p2 *h) {
-  fe_0(h->X);
-  fe_1(h->Y);
-  fe_1(h->Z);
+static void ge_p2_0(ge_p2 *h)
+{
+    fe_0(h->X);
+    fe_1(h->Y);
+    fe_1(h->Z);
 }
 
-static void ge_p3_0(ge_p3 *h) {
-  fe_0(h->X);
-  fe_1(h->Y);
-  fe_1(h->Z);
-  fe_0(h->T);
+static void ge_p3_0(ge_p3 *h)
+{
+    fe_0(h->X);
+    fe_1(h->Y);
+    fe_1(h->Z);
+    fe_0(h->T);
 }
 
-static void ge_precomp_0(ge_precomp *h) {
-  fe_1(h->yplusx);
-  fe_1(h->yminusx);
-  fe_0(h->xy2d);
+static void ge_precomp_0(ge_precomp *h)
+{
+    fe_1(h->yplusx);
+    fe_1(h->yminusx);
+    fe_0(h->xy2d);
 }
 
 /* r = p */
-static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
-  fe_copy(r->X, p->X);
-  fe_copy(r->Y, p->Y);
-  fe_copy(r->Z, p->Z);
+static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p)
+{
+    fe_copy(r->X, p->X);
+    fe_copy(r->Y, p->Y);
+    fe_copy(r->Z, p->Z);
 }
 
-static const fe d2 = {-21827239, -5839606,  -30745221, 13898782, 229458,
-                      15978800,  -12551817, -6495438,  29715968, 9444199};
+static const fe d2 = {
+    -21827239, -5839606,  -30745221, 13898782, 229458,
+    15978800,  -12551817, -6495438,  29715968, 9444199
+};
 
 /* r = p */
-static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
-  fe_add(r->YplusX, p->Y, p->X);
-  fe_sub(r->YminusX, p->Y, p->X);
-  fe_copy(r->Z, p->Z);
-  fe_mul(r->T2d, p->T, d2);
+static void ge_p3_to_cached(ge_cached *r, const ge_p3 *p)
+{
+    fe_add(r->YplusX, p->Y, p->X);
+    fe_sub(r->YminusX, p->Y, p->X);
+    fe_copy(r->Z, p->Z);
+    fe_mul(r->T2d, p->T, d2);
 }
 
 /* r = p */
-static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
-  fe_mul(r->X, p->X, p->T);
-  fe_mul(r->Y, p->Y, p->Z);
-  fe_mul(r->Z, p->Z, p->T);
+static void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p)
+{
+    fe_mul(r->X, p->X, p->T);
+    fe_mul(r->Y, p->Y, p->Z);
+    fe_mul(r->Z, p->Z, p->T);
 }
 
 /* r = p */
-static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
-  fe_mul(r->X, p->X, p->T);
-  fe_mul(r->Y, p->Y, p->Z);
-  fe_mul(r->Z, p->Z, p->T);
-  fe_mul(r->T, p->X, p->Y);
+static void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p)
+{
+    fe_mul(r->X, p->X, p->T);
+    fe_mul(r->Y, p->Y, p->Z);
+    fe_mul(r->Z, p->Z, p->T);
+    fe_mul(r->T, p->X, p->Y);
 }
 
 /* r = 2 * p */
-static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
-  fe t0;
-
-  fe_sq(r->X, p->X);
-  fe_sq(r->Z, p->Y);
-  fe_sq2(r->T, p->Z);
-  fe_add(r->Y, p->X, p->Y);
-  fe_sq(t0, r->Y);
-  fe_add(r->Y, r->Z, r->X);
-  fe_sub(r->Z, r->Z, r->X);
-  fe_sub(r->X, t0, r->Y);
-  fe_sub(r->T, r->T, r->Z);
+static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p)
+{
+    fe t0;
+
+    fe_sq(r->X, p->X);
+    fe_sq(r->Z, p->Y);
+    fe_sq2(r->T, p->Z);
+    fe_add(r->Y, p->X, p->Y);
+    fe_sq(t0, r->Y);
+    fe_add(r->Y, r->Z, r->X);
+    fe_sub(r->Z, r->Z, r->X);
+    fe_sub(r->X, t0, r->Y);
+    fe_sub(r->T, r->T, r->Z);
 }
 
 /* r = 2 * p */
-static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
-  ge_p2 q;
-  ge_p3_to_p2(&q, p);
-  ge_p2_dbl(r, &q);
+static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p)
+{
+    ge_p2 q;
+    ge_p3_to_p2(&q, p);
+    ge_p2_dbl(r, &q);
 }
 
 /* r = p + q */
-static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
-  fe t0;
-
-  fe_add(r->X, p->Y, p->X);
-  fe_sub(r->Y, p->Y, p->X);
-  fe_mul(r->Z, r->X, q->yplusx);
-  fe_mul(r->Y, r->Y, q->yminusx);
-  fe_mul(r->T, q->xy2d, p->T);
-  fe_add(t0, p->Z, p->Z);
-  fe_sub(r->X, r->Z, r->Y);
-  fe_add(r->Y, r->Z, r->Y);
-  fe_add(r->Z, t0, r->T);
-  fe_sub(r->T, t0, r->T);
+static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
+{
+    fe t0;
+
+    fe_add(r->X, p->Y, p->X);
+    fe_sub(r->Y, p->Y, p->X);
+    fe_mul(r->Z, r->X, q->yplusx);
+    fe_mul(r->Y, r->Y, q->yminusx);
+    fe_mul(r->T, q->xy2d, p->T);
+    fe_add(t0, p->Z, p->Z);
+    fe_sub(r->X, r->Z, r->Y);
+    fe_add(r->Y, r->Z, r->Y);
+    fe_add(r->Z, t0, r->T);
+    fe_sub(r->T, t0, r->T);
 }
 
 /* r = p - q */
-static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
-  fe t0;
-
-  fe_add(r->X, p->Y, p->X);
-  fe_sub(r->Y, p->Y, p->X);
-  fe_mul(r->Z, r->X, q->yminusx);
-  fe_mul(r->Y, r->Y, q->yplusx);
-  fe_mul(r->T, q->xy2d, p->T);
-  fe_add(t0, p->Z, p->Z);
-  fe_sub(r->X, r->Z, r->Y);
-  fe_add(r->Y, r->Z, r->Y);
-  fe_sub(r->Z, t0, r->T);
-  fe_add(r->T, t0, r->T);
+static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q)
+{
+    fe t0;
+
+    fe_add(r->X, p->Y, p->X);
+    fe_sub(r->Y, p->Y, p->X);
+    fe_mul(r->Z, r->X, q->yminusx);
+    fe_mul(r->Y, r->Y, q->yplusx);
+    fe_mul(r->T, q->xy2d, p->T);
+    fe_add(t0, p->Z, p->Z);
+    fe_sub(r->X, r->Z, r->Y);
+    fe_add(r->Y, r->Z, r->Y);
+    fe_sub(r->Z, t0, r->T);
+    fe_add(r->T, t0, r->T);
 }
 
 /* r = p + q */
-static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
-  fe t0;
-
-  fe_add(r->X, p->Y, p->X);
-  fe_sub(r->Y, p->Y, p->X);
-  fe_mul(r->Z, r->X, q->YplusX);
-  fe_mul(r->Y, r->Y, q->YminusX);
-  fe_mul(r->T, q->T2d, p->T);
-  fe_mul(r->X, p->Z, q->Z);
-  fe_add(t0, r->X, r->X);
-  fe_sub(r->X, r->Z, r->Y);
-  fe_add(r->Y, r->Z, r->Y);
-  fe_add(r->Z, t0, r->T);
-  fe_sub(r->T, t0, r->T);
+static void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
+{
+    fe t0;
+
+    fe_add(r->X, p->Y, p->X);
+    fe_sub(r->Y, p->Y, p->X);
+    fe_mul(r->Z, r->X, q->YplusX);
+    fe_mul(r->Y, r->Y, q->YminusX);
+    fe_mul(r->T, q->T2d, p->T);
+    fe_mul(r->X, p->Z, q->Z);
+    fe_add(t0, r->X, r->X);
+    fe_sub(r->X, r->Z, r->Y);
+    fe_add(r->Y, r->Z, r->Y);
+    fe_add(r->Z, t0, r->T);
+    fe_sub(r->T, t0, r->T);
 }
 
 /* r = p - q */
-static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
-  fe t0;
-
-  fe_add(r->X, p->Y, p->X);
-  fe_sub(r->Y, p->Y, p->X);
-  fe_mul(r->Z, r->X, q->YminusX);
-  fe_mul(r->Y, r->Y, q->YplusX);
-  fe_mul(r->T, q->T2d, p->T);
-  fe_mul(r->X, p->Z, q->Z);
-  fe_add(t0, r->X, r->X);
-  fe_sub(r->X, r->Z, r->Y);
-  fe_add(r->Y, r->Z, r->Y);
-  fe_sub(r->Z, t0, r->T);
-  fe_add(r->T, t0, r->T);
+static void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q)
+{
+    fe t0;
+
+    fe_add(r->X, p->Y, p->X);
+    fe_sub(r->Y, p->Y, p->X);
+    fe_mul(r->Z, r->X, q->YminusX);
+    fe_mul(r->Y, r->Y, q->YplusX);
+    fe_mul(r->T, q->T2d, p->T);
+    fe_mul(r->X, p->Z, q->Z);
+    fe_add(t0, r->X, r->X);
+    fe_sub(r->X, r->Z, r->Y);
+    fe_add(r->Y, r->Z, r->Y);
+    fe_sub(r->Z, t0, r->T);
+    fe_add(r->T, t0, r->T);
 }
 
-static uint8_t equal(signed char b, signed char c) {
-  uint8_t ub = b;
-  uint8_t uc = c;
-  uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
-  uint32_t y = x;      /* 0: yes; 1..255: no */
-  y -= 1;              /* 4294967295: yes; 0..254: no */
-  y >>= 31;            /* 1: yes; 0: no */
-  return y;
+static uint8_t equal(signed char b, signed char c)
+{
+    uint8_t ub = b;
+    uint8_t uc = c;
+    uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
+    uint32_t y = x;      /* 0: yes; 1..255: no */
+    y -= 1;              /* 4294967295: yes; 0..254: no */
+    y >>= 31;            /* 1: yes; 0: no */
+    return y;
 }
 
-static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
-  fe_cmov(t->yplusx, u->yplusx, b);
-  fe_cmov(t->yminusx, u->yminusx, b);
-  fe_cmov(t->xy2d, u->xy2d, b);
+static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b)
+{
+    fe_cmov(t->yplusx, u->yplusx, b);
+    fe_cmov(t->yminusx, u->yminusx, b);
+    fe_cmov(t->xy2d, u->xy2d, b);
 }
 
 /* k25519Precomp[i][j] = (j+1)*256^i*B */
@@ -4105,257 +4186,273 @@ static const ge_precomp k25519Precomp[32][8] = {
     },
 };
 
-static uint8_t negative(signed char b) {
-  uint32_t x = b;
-  x >>= 31; /* 1: yes; 0: no */
-  return x;
+static uint8_t negative(signed char b)
+{
+    uint32_t x = b;
+
+    x >>= 31; /* 1: yes; 0: no */
+    return x;
 }
 
-static void table_select(ge_precomp *t, int pos, signed char b) {
-  ge_precomp minust;
-  uint8_t bnegative = negative(b);
-  uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
-
-  ge_precomp_0(t);
-  cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
-  cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
-  cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
-  cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
-  cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
-  cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
-  cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
-  cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
-  fe_copy(minust.yplusx, t->yminusx);
-  fe_copy(minust.yminusx, t->yplusx);
-  fe_neg(minust.xy2d, t->xy2d);
-  cmov(t, &minust, bnegative);
+static void table_select(ge_precomp *t, int pos, signed char b)
+{
+    ge_precomp minust;
+    uint8_t bnegative = negative(b);
+    uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
+
+    ge_precomp_0(t);
+    cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
+    cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
+    cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
+    cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
+    cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
+    cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
+    cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
+    cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
+    fe_copy(minust.yplusx, t->yminusx);
+    fe_copy(minust.yminusx, t->yplusx);
+    fe_neg(minust.xy2d, t->xy2d);
+    cmov(t, &minust, bnegative);
 }
 
-/* h = a * B
+/*
+ * h = a * B
+ *
  * where a = a[0]+256*a[1]+...+256^31 a[31]
  * B is the Ed25519 base point (x,4/5) with x positive.
  *
  * Preconditions:
- *   a[31] <= 127 */
-static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
-  signed char e[64];
-  signed char carry;
-  ge_p1p1 r;
-  ge_p2 s;
-  ge_precomp t;
-  int i;
-
-  for (i = 0; i < 32; ++i) {
-    e[2 * i + 0] = (a[i] >> 0) & 15;
-    e[2 * i + 1] = (a[i] >> 4) & 15;
-  }
-  /* each e[i] is between 0 and 15 */
-  /* e[63] is between 0 and 7 */
-
-  carry = 0;
-  for (i = 0; i < 63; ++i) {
-    e[i] += carry;
-    carry = e[i] + 8;
-    carry >>= 4;
-    e[i] -= carry << 4;
-  }
-  e[63] += carry;
-  /* each e[i] is between -8 and 8 */
-
-  ge_p3_0(h);
-  for (i = 1; i < 64; i += 2) {
-    table_select(&t, i / 2, e[i]);
-    ge_madd(&r, h, &t);
-    ge_p1p1_to_p3(h, &r);
-  }
-
-  ge_p3_dbl(&r, h);
-  ge_p1p1_to_p2(&s, &r);
-  ge_p2_dbl(&r, &s);
-  ge_p1p1_to_p2(&s, &r);
-  ge_p2_dbl(&r, &s);
-  ge_p1p1_to_p2(&s, &r);
-  ge_p2_dbl(&r, &s);
-  ge_p1p1_to_p3(h, &r);
-
-  for (i = 0; i < 64; i += 2) {
-    table_select(&t, i / 2, e[i]);
-    ge_madd(&r, h, &t);
+ *   a[31] <= 127
+ */
+static void ge_scalarmult_base(ge_p3 *h, const uint8_t *a)
+{
+    signed char e[64];
+    signed char carry;
+    ge_p1p1 r;
+    ge_p2 s;
+    ge_precomp t;
+    int i;
+
+    for (i = 0; i < 32; ++i) {
+        e[2 * i + 0] = (a[i] >> 0) & 15;
+        e[2 * i + 1] = (a[i] >> 4) & 15;
+    }
+    /* each e[i] is between 0 and 15 */
+    /* e[63] is between 0 and 7 */
+
+    carry = 0;
+    for (i = 0; i < 63; ++i) {
+        e[i] += carry;
+        carry = e[i] + 8;
+        carry >>= 4;
+        e[i] -= carry << 4;
+    }
+    e[63] += carry;
+    /* each e[i] is between -8 and 8 */
+
+    ge_p3_0(h);
+    for (i = 1; i < 64; i += 2) {
+        table_select(&t, i / 2, e[i]);
+        ge_madd(&r, h, &t);
+        ge_p1p1_to_p3(h, &r);
+    }
+
+    ge_p3_dbl(&r, h);
+    ge_p1p1_to_p2(&s, &r);
+    ge_p2_dbl(&r, &s);
+    ge_p1p1_to_p2(&s, &r);
+    ge_p2_dbl(&r, &s);
+    ge_p1p1_to_p2(&s, &r);
+    ge_p2_dbl(&r, &s);
     ge_p1p1_to_p3(h, &r);
-  }
 
-  OPENSSL_cleanse(e, sizeof(e));
+    for (i = 0; i < 64; i += 2) {
+        table_select(&t, i / 2, e[i]);
+        ge_madd(&r, h, &t);
+        ge_p1p1_to_p3(h, &r);
+    }
+
+    OPENSSL_cleanse(e, sizeof(e));
 }
 
 #if !defined(BASE_2_51_IMPLEMENTED)
-/* Replace (f,g) with (g,f) if b == 1;
+/*
+ * Replace (f,g) with (g,f) if b == 1;
  * replace (f,g) with (f,g) if b == 0.
  *
- * Preconditions: b in {0,1}. */
-static void fe_cswap(fe f, fe g, unsigned int b) {
-  size_t i;
-  b = 0-b;
-  for (i = 0; i < 10; i++) {
-    int32_t x = f[i] ^ g[i];
-    x &= b;
-    f[i] ^= x;
-    g[i] ^= x;
-  }
+ * Preconditions: b in {0,1}.
+ */
+static void fe_cswap(fe f, fe g, unsigned int b)
+{
+    size_t i;
+
+    b = 0-b;
+    for (i = 0; i < 10; i++) {
+        int32_t x = f[i] ^ g[i];
+        x &= b;
+        f[i] ^= x;
+        g[i] ^= x;
+    }
 }
 
-/* h = f * 121666
+/*
+ * h = f * 121666
+ *
  * Can overlap h with f.
  *
  * Preconditions:
  *    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  *
  * Postconditions:
- *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
-static void fe_mul121666(fe h, fe f) {
-  int32_t f0 = f[0];
-  int32_t f1 = f[1];
-  int32_t f2 = f[2];
-  int32_t f3 = f[3];
-  int32_t f4 = f[4];
-  int32_t f5 = f[5];
-  int32_t f6 = f[6];
-  int32_t f7 = f[7];
-  int32_t f8 = f[8];
-  int32_t f9 = f[9];
-  int64_t h0 = f0 * (int64_t) 121666;
-  int64_t h1 = f1 * (int64_t) 121666;
-  int64_t h2 = f2 * (int64_t) 121666;
-  int64_t h3 = f3 * (int64_t) 121666;
-  int64_t h4 = f4 * (int64_t) 121666;
-  int64_t h5 = f5 * (int64_t) 121666;
-  int64_t h6 = f6 * (int64_t) 121666;
-  int64_t h7 = f7 * (int64_t) 121666;
-  int64_t h8 = f8 * (int64_t) 121666;
-  int64_t h9 = f9 * (int64_t) 121666;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-
-  carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
-  carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
-  carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
-  carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
-  carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
-
-  carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
-  carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
-  carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
-  carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
-  carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
-
-  h[0] = (int32_t)h0;
-  h[1] = (int32_t)h1;
-  h[2] = (int32_t)h2;
-  h[3] = (int32_t)h3;
-  h[4] = (int32_t)h4;
-  h[5] = (int32_t)h5;
-  h[6] = (int32_t)h6;
-  h[7] = (int32_t)h7;
-  h[8] = (int32_t)h8;
-  h[9] = (int32_t)h9;
+ *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ */
+static void fe_mul121666(fe h, fe f)
+{
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int64_t h0 = f0 * (int64_t) 121666;
+    int64_t h1 = f1 * (int64_t) 121666;
+    int64_t h2 = f2 * (int64_t) 121666;
+    int64_t h3 = f3 * (int64_t) 121666;
+    int64_t h4 = f4 * (int64_t) 121666;
+    int64_t h5 = f5 * (int64_t) 121666;
+    int64_t h6 = f6 * (int64_t) 121666;
+    int64_t h7 = f7 * (int64_t) 121666;
+    int64_t h8 = f8 * (int64_t) 121666;
+    int64_t h9 = f9 * (int64_t) 121666;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+
+    carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+    carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+    carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+    carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+    carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+    carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+    carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+    carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+    carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+    carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+    h[0] = (int32_t)h0;
+    h[1] = (int32_t)h1;
+    h[2] = (int32_t)h2;
+    h[3] = (int32_t)h3;
+    h[4] = (int32_t)h4;
+    h[5] = (int32_t)h5;
+    h[6] = (int32_t)h6;
+    h[7] = (int32_t)h7;
+    h[8] = (int32_t)h8;
+    h[9] = (int32_t)h9;
 }
 
 static void x25519_scalar_mult_generic(uint8_t out[32],
                                        const uint8_t scalar[32],
                                        const uint8_t point[32]) {
-  fe x1, x2, z2, x3, z3, tmp0, tmp1;
-  uint8_t e[32];
-  unsigned swap = 0;
-  int pos;
-
-  memcpy(e, scalar, 32);
-  e[0] &= 248;
-  e[31] &= 127;
-  e[31] |= 64;
-  fe_frombytes(x1, point);
-  fe_1(x2);
-  fe_0(z2);
-  fe_copy(x3, x1);
-  fe_1(z3);
-
-  for (pos = 254; pos >= 0; --pos) {
-    unsigned b = 1 & (e[pos / 8] >> (pos & 7));
-    swap ^= b;
-    fe_cswap(x2, x3, swap);
-    fe_cswap(z2, z3, swap);
-    swap = b;
-    fe_sub(tmp0, x3, z3);
-    fe_sub(tmp1, x2, z2);
-    fe_add(x2, x2, z2);
-    fe_add(z2, x3, z3);
-    fe_mul(z3, tmp0, x2);
-    fe_mul(z2, z2, tmp1);
-    fe_sq(tmp0, tmp1);
-    fe_sq(tmp1, x2);
-    fe_add(x3, z3, z2);
-    fe_sub(z2, z3, z2);
-    fe_mul(x2, tmp1, tmp0);
-    fe_sub(tmp1, tmp1, tmp0);
-    fe_sq(z2, z2);
-    fe_mul121666(z3, tmp1);
-    fe_sq(x3, x3);
-    fe_add(tmp0, tmp0, z3);
-    fe_mul(z3, x1, z2);
-    fe_mul(z2, tmp1, tmp0);
-  }
-
-  fe_invert(z2, z2);
-  fe_mul(x2, x2, z2);
-  fe_tobytes(out, x2);
-
-  OPENSSL_cleanse(e, sizeof(e));
+    fe x1, x2, z2, x3, z3, tmp0, tmp1;
+    uint8_t e[32];
+    unsigned swap = 0;
+    int pos;
+
+    memcpy(e, scalar, 32);
+    e[0] &= 248;
+    e[31] &= 127;
+    e[31] |= 64;
+    fe_frombytes(x1, point);
+    fe_1(x2);
+    fe_0(z2);
+    fe_copy(x3, x1);
+    fe_1(z3);
+
+    for (pos = 254; pos >= 0; --pos) {
+        unsigned b = 1 & (e[pos / 8] >> (pos & 7));
+        swap ^= b;
+        fe_cswap(x2, x3, swap);
+        fe_cswap(z2, z3, swap);
+        swap = b;
+        fe_sub(tmp0, x3, z3);
+        fe_sub(tmp1, x2, z2);
+        fe_add(x2, x2, z2);
+        fe_add(z2, x3, z3);
+        fe_mul(z3, tmp0, x2);
+        fe_mul(z2, z2, tmp1);
+        fe_sq(tmp0, tmp1);
+        fe_sq(tmp1, x2);
+        fe_add(x3, z3, z2);
+        fe_sub(z2, z3, z2);
+        fe_mul(x2, tmp1, tmp0);
+        fe_sub(tmp1, tmp1, tmp0);
+        fe_sq(z2, z2);
+        fe_mul121666(z3, tmp1);
+        fe_sq(x3, x3);
+        fe_add(tmp0, tmp0, z3);
+        fe_mul(z3, x1, z2);
+        fe_mul(z2, tmp1, tmp0);
+    }
+
+    fe_invert(z2, z2);
+    fe_mul(x2, x2, z2);
+    fe_tobytes(out, x2);
+
+    OPENSSL_cleanse(e, sizeof(e));
 }
 
 static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
                                const uint8_t point[32]) {
-  x25519_scalar_mult_generic(out, scalar, point);
+    x25519_scalar_mult_generic(out, scalar, point);
 }
 #endif
 
-static void slide(signed char *r, const uint8_t *a) {
-  int i;
-  int b;
-  int k;
-
-  for (i = 0; i < 256; ++i) {
-    r[i] = 1 & (a[i >> 3] >> (i & 7));
-  }
-
-  for (i = 0; i < 256; ++i) {
-    if (r[i]) {
-      for (b = 1; b <= 6 && i + b < 256; ++b) {
-        if (r[i + b]) {
-          if (r[i] + (r[i + b] << b) <= 15) {
-            r[i] += r[i + b] << b;
-            r[i + b] = 0;
-          } else if (r[i] - (r[i + b] << b) >= -15) {
-            r[i] -= r[i + b] << b;
-            for (k = i + b; k < 256; ++k) {
-              if (!r[k]) {
-                r[k] = 1;
-                break;
-              }
-              r[k] = 0;
+static void slide(signed char *r, const uint8_t *a)
+{
+    int i;
+    int b;
+    int k;
+
+    for (i = 0; i < 256; ++i) {
+        r[i] = 1 & (a[i >> 3] >> (i & 7));
+    }
+
+    for (i = 0; i < 256; ++i) {
+        if (r[i]) {
+            for (b = 1; b <= 6 && i + b < 256; ++b) {
+                if (r[i + b]) {
+                    if (r[i] + (r[i + b] << b) <= 15) {
+                        r[i] += r[i + b] << b;
+                        r[i + b] = 0;
+                    } else if (r[i] - (r[i + b] << b) >= -15) {
+                        r[i] -= r[i + b] << b;
+                        for (k = i + b; k < 256; ++k) {
+                            if (!r[k]) {
+                                r[k] = 1;
+                                break;
+                            }
+                            r[k] = 0;
+                        }
+                    } else {
+                        break;
+                    }
+                }
             }
-          } else {
-            break;
-          }
         }
-      }
     }
-  }
 }
 
 static const ge_precomp Bi[8] = {
@@ -4425,1033 +4522,1075 @@ static const ge_precomp Bi[8] = {
     },
 };
 
-/* r = a * A + b * B
+/*
+ * r = a * A + b * B
+ *
  * where a = a[0]+256*a[1]+...+256^31 a[31].
  * and b = b[0]+256*b[1]+...+256^31 b[31].
- * B is the Ed25519 base point (x,4/5) with x positive. */
+ * B is the Ed25519 base point (x,4/5) with x positive.
+ */
 static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
-                                         const ge_p3 *A, const uint8_t *b) {
-  signed char aslide[256];
-  signed char bslide[256];
-  ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
-  ge_p1p1 t;
-  ge_p3 u;
-  ge_p3 A2;
-  int i;
-
-  slide(aslide, a);
-  slide(bslide, b);
-
-  ge_p3_to_cached(&Ai[0], A);
-  ge_p3_dbl(&t, A);
-  ge_p1p1_to_p3(&A2, &t);
-  ge_add(&t, &A2, &Ai[0]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[1], &u);
-  ge_add(&t, &A2, &Ai[1]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[2], &u);
-  ge_add(&t, &A2, &Ai[2]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[3], &u);
-  ge_add(&t, &A2, &Ai[3]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[4], &u);
-  ge_add(&t, &A2, &Ai[4]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[5], &u);
-  ge_add(&t, &A2, &Ai[5]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[6], &u);
-  ge_add(&t, &A2, &Ai[6]);
-  ge_p1p1_to_p3(&u, &t);
-  ge_p3_to_cached(&Ai[7], &u);
-
-  ge_p2_0(r);
-
-  for (i = 255; i >= 0; --i) {
-    if (aslide[i] || bslide[i]) {
-      break;
+                                         const ge_p3 *A, const uint8_t *b)
+{
+    signed char aslide[256];
+    signed char bslide[256];
+    ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
+    ge_p1p1 t;
+    ge_p3 u;
+    ge_p3 A2;
+    int i;
+
+    slide(aslide, a);
+    slide(bslide, b);
+
+    ge_p3_to_cached(&Ai[0], A);
+    ge_p3_dbl(&t, A);
+    ge_p1p1_to_p3(&A2, &t);
+    ge_add(&t, &A2, &Ai[0]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[1], &u);
+    ge_add(&t, &A2, &Ai[1]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[2], &u);
+    ge_add(&t, &A2, &Ai[2]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[3], &u);
+    ge_add(&t, &A2, &Ai[3]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[4], &u);
+    ge_add(&t, &A2, &Ai[4]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[5], &u);
+    ge_add(&t, &A2, &Ai[5]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[6], &u);
+    ge_add(&t, &A2, &Ai[6]);
+    ge_p1p1_to_p3(&u, &t);
+    ge_p3_to_cached(&Ai[7], &u);
+
+    ge_p2_0(r);
+
+    for (i = 255; i >= 0; --i) {
+        if (aslide[i] || bslide[i]) {
+            break;
+        }
     }
-  }
 
-  for (; i >= 0; --i) {
-    ge_p2_dbl(&t, r);
+    for (; i >= 0; --i) {
+        ge_p2_dbl(&t, r);
 
-    if (aslide[i] > 0) {
-      ge_p1p1_to_p3(&u, &t);
-      ge_add(&t, &u, &Ai[aslide[i] / 2]);
-    } else if (aslide[i] < 0) {
-      ge_p1p1_to_p3(&u, &t);
-      ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
-    }
+        if (aslide[i] > 0) {
+            ge_p1p1_to_p3(&u, &t);
+            ge_add(&t, &u, &Ai[aslide[i] / 2]);
+        } else if (aslide[i] < 0) {
+            ge_p1p1_to_p3(&u, &t);
+            ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
+        }
 
-    if (bslide[i] > 0) {
-      ge_p1p1_to_p3(&u, &t);
-      ge_madd(&t, &u, &Bi[bslide[i] / 2]);
-    } else if (bslide[i] < 0) {
-      ge_p1p1_to_p3(&u, &t);
-      ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
-    }
+        if (bslide[i] > 0) {
+            ge_p1p1_to_p3(&u, &t);
+            ge_madd(&t, &u, &Bi[bslide[i] / 2]);
+        } else if (bslide[i] < 0) {
+            ge_p1p1_to_p3(&u, &t);
+            ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
+        }
 
-    ge_p1p1_to_p2(r, &t);
-  }
+        ge_p1p1_to_p2(r, &t);
+    }
 }
 
-/* The set of scalars is \Z/l
- * where l = 2^252 + 27742317777372353535851937790883648493. */
-
-/* Input:
+/*
+ * The set of scalars is \Z/l
+ * where l = 2^252 + 27742317777372353535851937790883648493.
+ *
+ * Input:
  *   s[0]+256*s[1]+...+256^63*s[63] = s
  *
  * Output:
  *   s[0]+256*s[1]+...+256^31*s[31] = s mod l
  *   where l = 2^252 + 27742317777372353535851937790883648493.
- *   Overwrites s in place. */
-static void x25519_sc_reduce(uint8_t *s) {
-  int64_t s0 = 2097151 & load_3(s);
-  int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
-  int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
-  int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
-  int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
-  int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
-  int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
-  int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
-  int64_t s8 = 2097151 & load_3(s + 21);
-  int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
-  int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
-  int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
-  int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
-  int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
-  int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
-  int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
-  int64_t s16 = 2097151 & load_3(s + 42);
-  int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
-  int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
-  int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
-  int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
-  int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
-  int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
-  int64_t s23 = (load_4(s + 60) >> 3);
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-  int64_t carry10;
-  int64_t carry11;
-  int64_t carry12;
-  int64_t carry13;
-  int64_t carry14;
-  int64_t carry15;
-  int64_t carry16;
-
-  s11 += s23 * 666643;
-  s12 += s23 * 470296;
-  s13 += s23 * 654183;
-  s14 -= s23 * 997805;
-  s15 += s23 * 136657;
-  s16 -= s23 * 683901;
-  s23 = 0;
-
-  s10 += s22 * 666643;
-  s11 += s22 * 470296;
-  s12 += s22 * 654183;
-  s13 -= s22 * 997805;
-  s14 += s22 * 136657;
-  s15 -= s22 * 683901;
-  s22 = 0;
-
-  s9 += s21 * 666643;
-  s10 += s21 * 470296;
-  s11 += s21 * 654183;
-  s12 -= s21 * 997805;
-  s13 += s21 * 136657;
-  s14 -= s21 * 683901;
-  s21 = 0;
-
-  s8 += s20 * 666643;
-  s9 += s20 * 470296;
-  s10 += s20 * 654183;
-  s11 -= s20 * 997805;
-  s12 += s20 * 136657;
-  s13 -= s20 * 683901;
-  s20 = 0;
-
-  s7 += s19 * 666643;
-  s8 += s19 * 470296;
-  s9 += s19 * 654183;
-  s10 -= s19 * 997805;
-  s11 += s19 * 136657;
-  s12 -= s19 * 683901;
-  s19 = 0;
-
-  s6 += s18 * 666643;
-  s7 += s18 * 470296;
-  s8 += s18 * 654183;
-  s9 -= s18 * 997805;
-  s10 += s18 * 136657;
-  s11 -= s18 * 683901;
-  s18 = 0;
-
-  carry6 = (s6 + (1 << 20)) >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry8 = (s8 + (1 << 20)) >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry10 = (s10 + (1 << 20)) >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-  carry12 = (s12 + (1 << 20)) >> 21;
-  s13 += carry12;
-  s12 -= carry12 * (1 << 21);
-  carry14 = (s14 + (1 << 20)) >> 21;
-  s15 += carry14;
-  s14 -= carry14 * (1 << 21);
-  carry16 = (s16 + (1 << 20)) >> 21;
-  s17 += carry16;
-  s16 -= carry16 * (1 << 21);
-
-  carry7 = (s7 + (1 << 20)) >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry9 = (s9 + (1 << 20)) >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry11 = (s11 + (1 << 20)) >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-  carry13 = (s13 + (1 << 20)) >> 21;
-  s14 += carry13;
-  s13 -= carry13 * (1 << 21);
-  carry15 = (s15 + (1 << 20)) >> 21;
-  s16 += carry15;
-  s15 -= carry15 * (1 << 21);
-
-  s5 += s17 * 666643;
-  s6 += s17 * 470296;
-  s7 += s17 * 654183;
-  s8 -= s17 * 997805;
-  s9 += s17 * 136657;
-  s10 -= s17 * 683901;
-  s17 = 0;
-
-  s4 += s16 * 666643;
-  s5 += s16 * 470296;
-  s6 += s16 * 654183;
-  s7 -= s16 * 997805;
-  s8 += s16 * 136657;
-  s9 -= s16 * 683901;
-  s16 = 0;
-
-  s3 += s15 * 666643;
-  s4 += s15 * 470296;
-  s5 += s15 * 654183;
-  s6 -= s15 * 997805;
-  s7 += s15 * 136657;
-  s8 -= s15 * 683901;
-  s15 = 0;
-
-  s2 += s14 * 666643;
-  s3 += s14 * 470296;
-  s4 += s14 * 654183;
-  s5 -= s14 * 997805;
-  s6 += s14 * 136657;
-  s7 -= s14 * 683901;
-  s14 = 0;
-
-  s1 += s13 * 666643;
-  s2 += s13 * 470296;
-  s3 += s13 * 654183;
-  s4 -= s13 * 997805;
-  s5 += s13 * 136657;
-  s6 -= s13 * 683901;
-  s13 = 0;
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = (s0 + (1 << 20)) >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry2 = (s2 + (1 << 20)) >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry4 = (s4 + (1 << 20)) >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry6 = (s6 + (1 << 20)) >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry8 = (s8 + (1 << 20)) >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry10 = (s10 + (1 << 20)) >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-
-  carry1 = (s1 + (1 << 20)) >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry3 = (s3 + (1 << 20)) >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry5 = (s5 + (1 << 20)) >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry7 = (s7 + (1 << 20)) >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry9 = (s9 + (1 << 20)) >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry11 = (s11 + (1 << 20)) >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = s0 >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry1 = s1 >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry2 = s2 >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry3 = s3 >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry4 = s4 >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry5 = s5 >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry6 = s6 >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry7 = s7 >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry8 = s8 >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry9 = s9 >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry10 = s10 >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-  carry11 = s11 >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = s0 >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry1 = s1 >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry2 = s2 >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry3 = s3 >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry4 = s4 >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry5 = s5 >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry6 = s6 >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry7 = s7 >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry8 = s8 >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry9 = s9 >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry10 = s10 >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-
-  s[0] = (uint8_t)(s0 >> 0);
-  s[1] = (uint8_t)(s0 >> 8);
-  s[2] = (uint8_t)((s0 >> 16) | (s1 << 5));
-  s[3] = (uint8_t)(s1 >> 3);
-  s[4] = (uint8_t)(s1 >> 11);
-  s[5] = (uint8_t)((s1 >> 19) | (s2 << 2));
-  s[6] = (uint8_t)(s2 >> 6);
-  s[7] = (uint8_t)((s2 >> 14) | (s3 << 7));
-  s[8] = (uint8_t)(s3 >> 1);
-  s[9] = (uint8_t)(s3 >> 9);
-  s[10] = (uint8_t)((s3 >> 17) | (s4 << 4));
-  s[11] = (uint8_t)(s4 >> 4);
-  s[12] = (uint8_t)(s4 >> 12);
-  s[13] = (uint8_t)((s4 >> 20) | (s5 << 1));
-  s[14] = (uint8_t)(s5 >> 7);
-  s[15] = (uint8_t)((s5 >> 15) | (s6 << 6));
-  s[16] = (uint8_t)(s6 >> 2);
-  s[17] = (uint8_t)(s6 >> 10);
-  s[18] = (uint8_t)((s6 >> 18) | (s7 << 3));
-  s[19] = (uint8_t)(s7 >> 5);
-  s[20] = (uint8_t)(s7 >> 13);
-  s[21] = (uint8_t)(s8 >> 0);
-  s[22] = (uint8_t)(s8 >> 8);
-  s[23] = (uint8_t)((s8 >> 16) | (s9 << 5));
-  s[24] = (uint8_t)(s9 >> 3);
-  s[25] = (uint8_t)(s9 >> 11);
-  s[26] = (uint8_t)((s9 >> 19) | (s10 << 2));
-  s[27] = (uint8_t)(s10 >> 6);
-  s[28] = (uint8_t)((s10 >> 14) | (s11 << 7));
-  s[29] = (uint8_t)(s11 >> 1);
-  s[30] = (uint8_t)(s11 >> 9);
-  s[31] = (uint8_t)(s11 >> 17);
+ *   Overwrites s in place.
+*/
+static void x25519_sc_reduce(uint8_t *s)
+{
+    int64_t s0  = kBottom21Bits &  load_3(s);
+    int64_t s1  = kBottom21Bits & (load_4(s +  2) >> 5);
+    int64_t s2  = kBottom21Bits & (load_3(s +  5) >> 2);
+    int64_t s3  = kBottom21Bits & (load_4(s +  7) >> 7);
+    int64_t s4  = kBottom21Bits & (load_4(s + 10) >> 4);
+    int64_t s5  = kBottom21Bits & (load_3(s + 13) >> 1);
+    int64_t s6  = kBottom21Bits & (load_4(s + 15) >> 6);
+    int64_t s7  = kBottom21Bits & (load_3(s + 18) >> 3);
+    int64_t s8  = kBottom21Bits &  load_3(s + 21);
+    int64_t s9  = kBottom21Bits & (load_4(s + 23) >> 5);
+    int64_t s10 = kBottom21Bits & (load_3(s + 26) >> 2);
+    int64_t s11 = kBottom21Bits & (load_4(s + 28) >> 7);
+    int64_t s12 = kBottom21Bits & (load_4(s + 31) >> 4);
+    int64_t s13 = kBottom21Bits & (load_3(s + 34) >> 1);
+    int64_t s14 = kBottom21Bits & (load_4(s + 36) >> 6);
+    int64_t s15 = kBottom21Bits & (load_3(s + 39) >> 3);
+    int64_t s16 = kBottom21Bits &  load_3(s + 42);
+    int64_t s17 = kBottom21Bits & (load_4(s + 44) >> 5);
+    int64_t s18 = kBottom21Bits & (load_3(s + 47) >> 2);
+    int64_t s19 = kBottom21Bits & (load_4(s + 49) >> 7);
+    int64_t s20 = kBottom21Bits & (load_4(s + 52) >> 4);
+    int64_t s21 = kBottom21Bits & (load_3(s + 55) >> 1);
+    int64_t s22 = kBottom21Bits & (load_4(s + 57) >> 6);
+    int64_t s23 =                 (load_4(s + 60) >> 3);
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+    int64_t carry10;
+    int64_t carry11;
+    int64_t carry12;
+    int64_t carry13;
+    int64_t carry14;
+    int64_t carry15;
+    int64_t carry16;
+
+    s11 += s23 * 666643;
+    s12 += s23 * 470296;
+    s13 += s23 * 654183;
+    s14 -= s23 * 997805;
+    s15 += s23 * 136657;
+    s16 -= s23 * 683901;
+    s23  = 0;
+
+    s10 += s22 * 666643;
+    s11 += s22 * 470296;
+    s12 += s22 * 654183;
+    s13 -= s22 * 997805;
+    s14 += s22 * 136657;
+    s15 -= s22 * 683901;
+    s22  = 0;
+
+    s9  += s21 * 666643;
+    s10 += s21 * 470296;
+    s11 += s21 * 654183;
+    s12 -= s21 * 997805;
+    s13 += s21 * 136657;
+    s14 -= s21 * 683901;
+    s21  = 0;
+
+    s8  += s20 * 666643;
+    s9  += s20 * 470296;
+    s10 += s20 * 654183;
+    s11 -= s20 * 997805;
+    s12 += s20 * 136657;
+    s13 -= s20 * 683901;
+    s20  = 0;
+
+    s7  += s19 * 666643;
+    s8  += s19 * 470296;
+    s9  += s19 * 654183;
+    s10 -= s19 * 997805;
+    s11 += s19 * 136657;
+    s12 -= s19 * 683901;
+    s19  = 0;
+
+    s6  += s18 * 666643;
+    s7  += s18 * 470296;
+    s8  += s18 * 654183;
+    s9  -= s18 * 997805;
+    s10 += s18 * 136657;
+    s11 -= s18 * 683901;
+    s18  = 0;
+
+    carry6 = (s6 + (1 << 20)) >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry8 = (s8 + (1 << 20)) >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry10 = (s10 + (1 << 20)) >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+    carry12 = (s12 + (1 << 20)) >> 21;
+    s13 += carry12;
+    s12 -= carry12 * (1 << 21);
+    carry14 = (s14 + (1 << 20)) >> 21;
+    s15 += carry14;
+    s14 -= carry14 * (1 << 21);
+    carry16 = (s16 + (1 << 20)) >> 21;
+    s17 += carry16;
+    s16 -= carry16 * (1 << 21);
+
+    carry7 = (s7 + (1 << 20)) >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry9 = (s9 + (1 << 20)) >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry11 = (s11 + (1 << 20)) >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+    carry13 = (s13 + (1 << 20)) >> 21;
+    s14 += carry13;
+    s13 -= carry13 * (1 << 21);
+    carry15 = (s15 + (1 << 20)) >> 21;
+    s16 += carry15;
+    s15 -= carry15 * (1 << 21);
+
+    s5  += s17 * 666643;
+    s6  += s17 * 470296;
+    s7  += s17 * 654183;
+    s8  -= s17 * 997805;
+    s9  += s17 * 136657;
+    s10 -= s17 * 683901;
+    s17  = 0;
+
+    s4  += s16 * 666643;
+    s5  += s16 * 470296;
+    s6  += s16 * 654183;
+    s7  -= s16 * 997805;
+    s8  += s16 * 136657;
+    s9  -= s16 * 683901;
+    s16  = 0;
+
+    s3  += s15 * 666643;
+    s4  += s15 * 470296;
+    s5  += s15 * 654183;
+    s6  -= s15 * 997805;
+    s7  += s15 * 136657;
+    s8  -= s15 * 683901;
+    s15  = 0;
+
+    s2  += s14 * 666643;
+    s3  += s14 * 470296;
+    s4  += s14 * 654183;
+    s5  -= s14 * 997805;
+    s6  += s14 * 136657;
+    s7  -= s14 * 683901;
+    s14  = 0;
+
+    s1  += s13 * 666643;
+    s2  += s13 * 470296;
+    s3  += s13 * 654183;
+    s4  -= s13 * 997805;
+    s5  += s13 * 136657;
+    s6  -= s13 * 683901;
+    s13  = 0;
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12  = 0;
+
+    carry0 = (s0 + (1 << 20)) >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry2 = (s2 + (1 << 20)) >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry4 = (s4 + (1 << 20)) >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry6 = (s6 + (1 << 20)) >> 21;
+    s7 += carry6;
+    s6 -= carry6 * (1 << 21);
+    carry8 = (s8 + (1 << 20)) >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry10 = (s10 + (1 << 20)) >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+
+    carry1 = (s1 + (1 << 20)) >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry3 = (s3 + (1 << 20)) >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry5 = (s5 + (1 << 20)) >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry7 = (s7 + (1 << 20)) >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry9 = (s9 + (1 << 20)) >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry11 = (s11 + (1 << 20)) >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12  = 0;
+
+    carry0 = s0 >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry1 = s1 >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry2 = s2 >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry3 = s3 >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry4 = s4 >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry5 = s5 >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry6 = s6 >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry7 = s7 >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry8 = s8 >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry9 = s9 >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry10 = s10 >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+    carry11 = s11 >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12  = 0;
+
+    carry0 = s0 >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry1 = s1 >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry2 = s2 >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry3 = s3 >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry4 = s4 >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry5 = s5 >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry6 = s6 >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry7 = s7 >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry8 = s8 >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry9 = s9 >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry10 = s10 >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+
+    s[ 0] = (uint8_t) (s0  >>  0);
+    s[ 1] = (uint8_t) (s0  >>  8);
+    s[ 2] = (uint8_t)((s0  >> 16) | (s1  <<  5));
+    s[ 3] = (uint8_t) (s1  >>  3);
+    s[ 4] = (uint8_t) (s1  >> 11);
+    s[ 5] = (uint8_t)((s1  >> 19) | (s2  <<  2));
+    s[ 6] = (uint8_t) (s2  >>  6);
+    s[ 7] = (uint8_t)((s2  >> 14) | (s3  <<  7));
+    s[ 8] = (uint8_t) (s3  >>  1);
+    s[ 9] = (uint8_t) (s3  >>  9);
+    s[10] = (uint8_t)((s3  >> 17) | (s4  <<  4));
+    s[11] = (uint8_t) (s4  >>  4);
+    s[12] = (uint8_t) (s4  >> 12);
+    s[13] = (uint8_t)((s4  >> 20) | (s5  <<  1));
+    s[14] = (uint8_t) (s5  >>  7);
+    s[15] = (uint8_t)((s5  >> 15) | (s6  <<  6));
+    s[16] = (uint8_t) (s6  >>  2);
+    s[17] = (uint8_t) (s6  >> 10);
+    s[18] = (uint8_t)((s6  >> 18) | (s7  <<  3));
+    s[19] = (uint8_t) (s7  >>  5);
+    s[20] = (uint8_t) (s7  >> 13);
+    s[21] = (uint8_t) (s8  >>  0);
+    s[22] = (uint8_t) (s8  >>  8);
+    s[23] = (uint8_t)((s8  >> 16) | (s9  <<  5));
+    s[24] = (uint8_t) (s9  >>  3);
+    s[25] = (uint8_t) (s9  >> 11);
+    s[26] = (uint8_t)((s9  >> 19) | (s10 <<  2));
+    s[27] = (uint8_t) (s10 >>  6);
+    s[28] = (uint8_t)((s10 >> 14) | (s11 <<  7));
+    s[29] = (uint8_t) (s11 >>  1);
+    s[30] = (uint8_t) (s11 >>  9);
+    s[31] = (uint8_t) (s11 >> 17);
 }
 
-/* Input:
+/*
+ * Input:
  *   a[0]+256*a[1]+...+256^31*a[31] = a
  *   b[0]+256*b[1]+...+256^31*b[31] = b
  *   c[0]+256*c[1]+...+256^31*c[31] = c
  *
  * Output:
  *   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
- *   where l = 2^252 + 27742317777372353535851937790883648493. */
+ *   where l = 2^252 + 27742317777372353535851937790883648493.
+ */
 static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
-                      const uint8_t *c) {
-  int64_t a0 = 2097151 & load_3(a);
-  int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
-  int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
-  int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
-  int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
-  int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
-  int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
-  int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
-  int64_t a8 = 2097151 & load_3(a + 21);
-  int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
-  int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
-  int64_t a11 = (load_4(a + 28) >> 7);
-  int64_t b0 = 2097151 & load_3(b);
-  int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
-  int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
-  int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
-  int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
-  int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
-  int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
-  int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
-  int64_t b8 = 2097151 & load_3(b + 21);
-  int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
-  int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
-  int64_t b11 = (load_4(b + 28) >> 7);
-  int64_t c0 = 2097151 & load_3(c);
-  int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
-  int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
-  int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
-  int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
-  int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
-  int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
-  int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
-  int64_t c8 = 2097151 & load_3(c + 21);
-  int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
-  int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
-  int64_t c11 = (load_4(c + 28) >> 7);
-  int64_t s0;
-  int64_t s1;
-  int64_t s2;
-  int64_t s3;
-  int64_t s4;
-  int64_t s5;
-  int64_t s6;
-  int64_t s7;
-  int64_t s8;
-  int64_t s9;
-  int64_t s10;
-  int64_t s11;
-  int64_t s12;
-  int64_t s13;
-  int64_t s14;
-  int64_t s15;
-  int64_t s16;
-  int64_t s17;
-  int64_t s18;
-  int64_t s19;
-  int64_t s20;
-  int64_t s21;
-  int64_t s22;
-  int64_t s23;
-  int64_t carry0;
-  int64_t carry1;
-  int64_t carry2;
-  int64_t carry3;
-  int64_t carry4;
-  int64_t carry5;
-  int64_t carry6;
-  int64_t carry7;
-  int64_t carry8;
-  int64_t carry9;
-  int64_t carry10;
-  int64_t carry11;
-  int64_t carry12;
-  int64_t carry13;
-  int64_t carry14;
-  int64_t carry15;
-  int64_t carry16;
-  int64_t carry17;
-  int64_t carry18;
-  int64_t carry19;
-  int64_t carry20;
-  int64_t carry21;
-  int64_t carry22;
-
-  s0 = c0 + a0 * b0;
-  s1 = c1 + a0 * b1 + a1 * b0;
-  s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
-  s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
-  s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
-  s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
-  s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
-  s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
-       a6 * b1 + a7 * b0;
-  s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
-       a6 * b2 + a7 * b1 + a8 * b0;
-  s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
-       a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
-  s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
-        a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
-  s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
-        a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
-  s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 +
-        a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
-  s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 +
-        a9 * b4 + a10 * b3 + a11 * b2;
-  s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 +
-        a10 * b4 + a11 * b3;
-  s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 +
-        a11 * b4;
-  s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
-  s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
-  s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
-  s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
-  s20 = a9 * b11 + a10 * b10 + a11 * b9;
-  s21 = a10 * b11 + a11 * b10;
-  s22 = a11 * b11;
-  s23 = 0;
-
-  carry0 = (s0 + (1 << 20)) >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry2 = (s2 + (1 << 20)) >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry4 = (s4 + (1 << 20)) >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry6 = (s6 + (1 << 20)) >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry8 = (s8 + (1 << 20)) >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry10 = (s10 + (1 << 20)) >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-  carry12 = (s12 + (1 << 20)) >> 21;
-  s13 += carry12;
-  s12 -= carry12 * (1 << 21);
-  carry14 = (s14 + (1 << 20)) >> 21;
-  s15 += carry14;
-  s14 -= carry14 * (1 << 21);
-  carry16 = (s16 + (1 << 20)) >> 21;
-  s17 += carry16;
-  s16 -= carry16 * (1 << 21);
-  carry18 = (s18 + (1 << 20)) >> 21;
-  s19 += carry18;
-  s18 -= carry18 * (1 << 21);
-  carry20 = (s20 + (1 << 20)) >> 21;
-  s21 += carry20;
-  s20 -= carry20 * (1 << 21);
-  carry22 = (s22 + (1 << 20)) >> 21;
-  s23 += carry22;
-  s22 -= carry22 * (1 << 21);
-
-  carry1 = (s1 + (1 << 20)) >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry3 = (s3 + (1 << 20)) >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry5 = (s5 + (1 << 20)) >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry7 = (s7 + (1 << 20)) >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry9 = (s9 + (1 << 20)) >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry11 = (s11 + (1 << 20)) >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-  carry13 = (s13 + (1 << 20)) >> 21;
-  s14 += carry13;
-  s13 -= carry13 * (1 << 21);
-  carry15 = (s15 + (1 << 20)) >> 21;
-  s16 += carry15;
-  s15 -= carry15 * (1 << 21);
-  carry17 = (s17 + (1 << 20)) >> 21;
-  s18 += carry17;
-  s17 -= carry17 * (1 << 21);
-  carry19 = (s19 + (1 << 20)) >> 21;
-  s20 += carry19;
-  s19 -= carry19 * (1 << 21);
-  carry21 = (s21 + (1 << 20)) >> 21;
-  s22 += carry21;
-  s21 -= carry21 * (1 << 21);
-
-  s11 += s23 * 666643;
-  s12 += s23 * 470296;
-  s13 += s23 * 654183;
-  s14 -= s23 * 997805;
-  s15 += s23 * 136657;
-  s16 -= s23 * 683901;
-  s23 = 0;
-
-  s10 += s22 * 666643;
-  s11 += s22 * 470296;
-  s12 += s22 * 654183;
-  s13 -= s22 * 997805;
-  s14 += s22 * 136657;
-  s15 -= s22 * 683901;
-  s22 = 0;
-
-  s9 += s21 * 666643;
-  s10 += s21 * 470296;
-  s11 += s21 * 654183;
-  s12 -= s21 * 997805;
-  s13 += s21 * 136657;
-  s14 -= s21 * 683901;
-  s21 = 0;
-
-  s8 += s20 * 666643;
-  s9 += s20 * 470296;
-  s10 += s20 * 654183;
-  s11 -= s20 * 997805;
-  s12 += s20 * 136657;
-  s13 -= s20 * 683901;
-  s20 = 0;
-
-  s7 += s19 * 666643;
-  s8 += s19 * 470296;
-  s9 += s19 * 654183;
-  s10 -= s19 * 997805;
-  s11 += s19 * 136657;
-  s12 -= s19 * 683901;
-  s19 = 0;
-
-  s6 += s18 * 666643;
-  s7 += s18 * 470296;
-  s8 += s18 * 654183;
-  s9 -= s18 * 997805;
-  s10 += s18 * 136657;
-  s11 -= s18 * 683901;
-  s18 = 0;
-
-  carry6 = (s6 + (1 << 20)) >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry8 = (s8 + (1 << 20)) >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry10 = (s10 + (1 << 20)) >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-  carry12 = (s12 + (1 << 20)) >> 21;
-  s13 += carry12;
-  s12 -= carry12 * (1 << 21);
-  carry14 = (s14 + (1 << 20)) >> 21;
-  s15 += carry14;
-  s14 -= carry14 * (1 << 21);
-  carry16 = (s16 + (1 << 20)) >> 21;
-  s17 += carry16;
-  s16 -= carry16 * (1 << 21);
-
-  carry7 = (s7 + (1 << 20)) >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry9 = (s9 + (1 << 20)) >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry11 = (s11 + (1 << 20)) >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-  carry13 = (s13 + (1 << 20)) >> 21;
-  s14 += carry13;
-  s13 -= carry13 * (1 << 21);
-  carry15 = (s15 + (1 << 20)) >> 21;
-  s16 += carry15;
-  s15 -= carry15 * (1 << 21);
-
-  s5 += s17 * 666643;
-  s6 += s17 * 470296;
-  s7 += s17 * 654183;
-  s8 -= s17 * 997805;
-  s9 += s17 * 136657;
-  s10 -= s17 * 683901;
-  s17 = 0;
-
-  s4 += s16 * 666643;
-  s5 += s16 * 470296;
-  s6 += s16 * 654183;
-  s7 -= s16 * 997805;
-  s8 += s16 * 136657;
-  s9 -= s16 * 683901;
-  s16 = 0;
-
-  s3 += s15 * 666643;
-  s4 += s15 * 470296;
-  s5 += s15 * 654183;
-  s6 -= s15 * 997805;
-  s7 += s15 * 136657;
-  s8 -= s15 * 683901;
-  s15 = 0;
-
-  s2 += s14 * 666643;
-  s3 += s14 * 470296;
-  s4 += s14 * 654183;
-  s5 -= s14 * 997805;
-  s6 += s14 * 136657;
-  s7 -= s14 * 683901;
-  s14 = 0;
-
-  s1 += s13 * 666643;
-  s2 += s13 * 470296;
-  s3 += s13 * 654183;
-  s4 -= s13 * 997805;
-  s5 += s13 * 136657;
-  s6 -= s13 * 683901;
-  s13 = 0;
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = (s0 + (1 << 20)) >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry2 = (s2 + (1 << 20)) >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry4 = (s4 + (1 << 20)) >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry6 = (s6 + (1 << 20)) >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry8 = (s8 + (1 << 20)) >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry10 = (s10 + (1 << 20)) >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-
-  carry1 = (s1 + (1 << 20)) >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry3 = (s3 + (1 << 20)) >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry5 = (s5 + (1 << 20)) >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry7 = (s7 + (1 << 20)) >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry9 = (s9 + (1 << 20)) >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry11 = (s11 + (1 << 20)) >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = s0 >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry1 = s1 >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry2 = s2 >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry3 = s3 >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry4 = s4 >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry5 = s5 >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry6 = s6 >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry7 = s7 >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry8 = s8 >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry9 = s9 >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry10 = s10 >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-  carry11 = s11 >> 21;
-  s12 += carry11;
-  s11 -= carry11 * (1 << 21);
-
-  s0 += s12 * 666643;
-  s1 += s12 * 470296;
-  s2 += s12 * 654183;
-  s3 -= s12 * 997805;
-  s4 += s12 * 136657;
-  s5 -= s12 * 683901;
-  s12 = 0;
-
-  carry0 = s0 >> 21;
-  s1 += carry0;
-  s0 -= carry0 * (1 << 21);
-  carry1 = s1 >> 21;
-  s2 += carry1;
-  s1 -= carry1 * (1 << 21);
-  carry2 = s2 >> 21;
-  s3 += carry2;
-  s2 -= carry2 * (1 << 21);
-  carry3 = s3 >> 21;
-  s4 += carry3;
-  s3 -= carry3 * (1 << 21);
-  carry4 = s4 >> 21;
-  s5 += carry4;
-  s4 -= carry4 * (1 << 21);
-  carry5 = s5 >> 21;
-  s6 += carry5;
-  s5 -= carry5 * (1 << 21);
-  carry6 = s6 >> 21;
-  s7 += carry6;
-  s6 -= carry6 * (1 << 21);
-  carry7 = s7 >> 21;
-  s8 += carry7;
-  s7 -= carry7 * (1 << 21);
-  carry8 = s8 >> 21;
-  s9 += carry8;
-  s8 -= carry8 * (1 << 21);
-  carry9 = s9 >> 21;
-  s10 += carry9;
-  s9 -= carry9 * (1 << 21);
-  carry10 = s10 >> 21;
-  s11 += carry10;
-  s10 -= carry10 * (1 << 21);
-
-  s[0] = (uint8_t)(s0 >> 0);
-  s[1] = (uint8_t)(s0 >> 8);
-  s[2] = (uint8_t)((s0 >> 16) | (s1 << 5));
-  s[3] = (uint8_t)(s1 >> 3);
-  s[4] = (uint8_t)(s1 >> 11);
-  s[5] = (uint8_t)((s1 >> 19) | (s2 << 2));
-  s[6] = (uint8_t)(s2 >> 6);
-  s[7] = (uint8_t)((s2 >> 14) | (s3 << 7));
-  s[8] = (uint8_t)(s3 >> 1);
-  s[9] = (uint8_t)(s3 >> 9);
-  s[10] = (uint8_t)((s3 >> 17) | (s4 << 4));
-  s[11] = (uint8_t)(s4 >> 4);
-  s[12] = (uint8_t)(s4 >> 12);
-  s[13] = (uint8_t)((s4 >> 20) | (s5 << 1));
-  s[14] = (uint8_t)(s5 >> 7);
-  s[15] = (uint8_t)((s5 >> 15) | (s6 << 6));
-  s[16] = (uint8_t)(s6 >> 2);
-  s[17] = (uint8_t)(s6 >> 10);
-  s[18] = (uint8_t)((s6 >> 18) | (s7 << 3));
-  s[19] = (uint8_t)(s7 >> 5);
-  s[20] = (uint8_t)(s7 >> 13);
-  s[21] = (uint8_t)(s8 >> 0);
-  s[22] = (uint8_t)(s8 >> 8);
-  s[23] = (uint8_t)((s8 >> 16) | (s9 << 5));
-  s[24] = (uint8_t)(s9 >> 3);
-  s[25] = (uint8_t)(s9 >> 11);
-  s[26] = (uint8_t)((s9 >> 19) | (s10 << 2));
-  s[27] = (uint8_t)(s10 >> 6);
-  s[28] = (uint8_t)((s10 >> 14) | (s11 << 7));
-  s[29] = (uint8_t)(s11 >> 1);
-  s[30] = (uint8_t)(s11 >> 9);
-  s[31] = (uint8_t)(s11 >> 17);
+                      const uint8_t *c)
+{
+    int64_t a0  = kBottom21Bits &  load_3(a);
+    int64_t a1  = kBottom21Bits & (load_4(a +  2) >> 5);
+    int64_t a2  = kBottom21Bits & (load_3(a +  5) >> 2);
+    int64_t a3  = kBottom21Bits & (load_4(a +  7) >> 7);
+    int64_t a4  = kBottom21Bits & (load_4(a + 10) >> 4);
+    int64_t a5  = kBottom21Bits & (load_3(a + 13) >> 1);
+    int64_t a6  = kBottom21Bits & (load_4(a + 15) >> 6);
+    int64_t a7  = kBottom21Bits & (load_3(a + 18) >> 3);
+    int64_t a8  = kBottom21Bits &  load_3(a + 21);
+    int64_t a9  = kBottom21Bits & (load_4(a + 23) >> 5);
+    int64_t a10 = kBottom21Bits & (load_3(a + 26) >> 2);
+    int64_t a11 =                 (load_4(a + 28) >> 7);
+    int64_t b0  = kBottom21Bits &  load_3(b);
+    int64_t b1  = kBottom21Bits & (load_4(b +  2) >> 5);
+    int64_t b2  = kBottom21Bits & (load_3(b +  5) >> 2);
+    int64_t b3  = kBottom21Bits & (load_4(b +  7) >> 7);
+    int64_t b4  = kBottom21Bits & (load_4(b + 10) >> 4);
+    int64_t b5  = kBottom21Bits & (load_3(b + 13) >> 1);
+    int64_t b6  = kBottom21Bits & (load_4(b + 15) >> 6);
+    int64_t b7  = kBottom21Bits & (load_3(b + 18) >> 3);
+    int64_t b8  = kBottom21Bits &  load_3(b + 21);
+    int64_t b9  = kBottom21Bits & (load_4(b + 23) >> 5);
+    int64_t b10 = kBottom21Bits & (load_3(b + 26) >> 2);
+    int64_t b11 =                 (load_4(b + 28) >> 7);
+    int64_t c0  = kBottom21Bits &  load_3(c);
+    int64_t c1  = kBottom21Bits & (load_4(c +  2) >> 5);
+    int64_t c2  = kBottom21Bits & (load_3(c +  5) >> 2);
+    int64_t c3  = kBottom21Bits & (load_4(c +  7) >> 7);
+    int64_t c4  = kBottom21Bits & (load_4(c + 10) >> 4);
+    int64_t c5  = kBottom21Bits & (load_3(c + 13) >> 1);
+    int64_t c6  = kBottom21Bits & (load_4(c + 15) >> 6);
+    int64_t c7  = kBottom21Bits & (load_3(c + 18) >> 3);
+    int64_t c8  = kBottom21Bits &  load_3(c + 21);
+    int64_t c9  = kBottom21Bits & (load_4(c + 23) >> 5);
+    int64_t c10 = kBottom21Bits & (load_3(c + 26) >> 2);
+    int64_t c11 =                 (load_4(c + 28) >> 7);
+    int64_t s0;
+    int64_t s1;
+    int64_t s2;
+    int64_t s3;
+    int64_t s4;
+    int64_t s5;
+    int64_t s6;
+    int64_t s7;
+    int64_t s8;
+    int64_t s9;
+    int64_t s10;
+    int64_t s11;
+    int64_t s12;
+    int64_t s13;
+    int64_t s14;
+    int64_t s15;
+    int64_t s16;
+    int64_t s17;
+    int64_t s18;
+    int64_t s19;
+    int64_t s20;
+    int64_t s21;
+    int64_t s22;
+    int64_t s23;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+    int64_t carry10;
+    int64_t carry11;
+    int64_t carry12;
+    int64_t carry13;
+    int64_t carry14;
+    int64_t carry15;
+    int64_t carry16;
+    int64_t carry17;
+    int64_t carry18;
+    int64_t carry19;
+    int64_t carry20;
+    int64_t carry21;
+    int64_t carry22;
+
+    s0  = c0   +   a0 * b0;
+    s1  = c1   +   a0 * b1   +   a1 * b0;
+    s2  = c2   +   a0 * b2   +   a1 * b1   +   a2 * b0;
+    s3  = c3   +   a0 * b3   +   a1 * b2   +   a2 * b1  +   a3 * b0;
+    s4  = c4   +   a0 * b4   +   a1 * b3   +   a2 * b2  +   a3 * b1  +   a4 * b0;
+    s5  = c5   +   a0 * b5   +   a1 * b4   +   a2 * b3  +   a3 * b2  +   a4 * b1  +   a5 * b0;
+    s6  = c6   +   a0 * b6   +   a1 * b5   +   a2 * b4  +   a3 * b3  +   a4 * b2  +   a5 * b1 +   a6 * b0;
+    s7  = c7   +   a0 * b7   +   a1 * b6   +   a2 * b5  +   a3 * b4  +   a4 * b3  +   a5 * b2 +   a6 * b1   +   a7 * b0;
+    s8  = c8   +   a0 * b8   +   a1 * b7   +   a2 * b6  +   a3 * b5  +   a4 * b4  +   a5 * b3 +   a6 * b2   +   a7 * b1   +   a8 * b0;
+    s9  = c9   +   a0 * b9   +   a1 * b8   +   a2 * b7  +   a3 * b6  +   a4 * b5  +   a5 * b4 +   a6 * b3   +   a7 * b2   +   a8 * b1  +   a9 * b0;
+    s10 = c10  +   a0 * b10  +   a1 * b9   +   a2 * b8  +   a3 * b7  +   a4 * b6  +   a5 * b5 +   a6 * b4   +   a7 * b3   +   a8 * b2  +   a9 * b1  +  a10 * b0;
+    s11 = c11  +   a0 * b11  +   a1 * b10  +   a2 * b9  +   a3 * b8  +   a4 * b7  +   a5 * b6 +   a6 * b5   +   a7 * b4   +   a8 * b3  +   a9 * b2  +  a10 * b1  +  a11 * b0;
+    s12 =          a1 * b11  +   a2 * b10  +   a3 * b9  +   a4 * b8  +   a5 * b7  +   a6 * b6 +   a7 * b5   +   a8 * b4   +   a9 * b3  +  a10 * b2  +  a11 * b1;
+    s13 =          a2 * b11  +   a3 * b10  +   a4 * b9  +   a5 * b8  +   a6 * b7  +   a7 * b6 +   a8 * b5   +   a9 * b4   +  a10 * b3  +  a11 * b2;
+    s14 =          a3 * b11  +   a4 * b10  +   a5 * b9  +   a6 * b8  +   a7 * b7  +   a8 * b6 +   a9 * b5   +  a10 * b4   +  a11 * b3;
+    s15 =          a4 * b11  +   a5 * b10  +   a6 * b9  +   a7 * b8  +   a8 * b7  +   a9 * b6 +  a10 * b5   +  a11 * b4;
+    s16 =          a5 * b11  +   a6 * b10  +   a7 * b9  +   a8 * b8  +   a9 * b7  +  a10 * b6 +  a11 * b5;
+    s17 =          a6 * b11  +   a7 * b10  +   a8 * b9  +   a9 * b8  +  a10 * b7  +  a11 * b6;
+    s18 =          a7 * b11  +   a8 * b10  +   a9 * b9  +  a10 * b8  +  a11 * b7;
+    s19 =          a8 * b11  +   a9 * b10  +  a10 * b9  +  a11 * b8;
+    s20 =          a9 * b11  +  a10 * b10  +  a11 * b9;
+    s21 =         a10 * b11  +  a11 * b10;
+    s22 =         a11 * b11;
+    s23 =         0;
+
+    carry0 = (s0 + (1 << 20)) >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry2 = (s2 + (1 << 20)) >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry4 = (s4 + (1 << 20)) >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry6 = (s6 + (1 << 20)) >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry8 = (s8 + (1 << 20)) >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry10 = (s10 + (1 << 20)) >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+    carry12 = (s12 + (1 << 20)) >> 21;
+    s13 += carry12;
+    s12 -= carry12 * (1 << 21);
+    carry14 = (s14 + (1 << 20)) >> 21;
+    s15 += carry14;
+    s14 -= carry14 * (1 << 21);
+    carry16 = (s16 + (1 << 20)) >> 21;
+    s17 += carry16;
+    s16 -= carry16 * (1 << 21);
+    carry18 = (s18 + (1 << 20)) >> 21;
+    s19 += carry18;
+    s18 -= carry18 * (1 << 21);
+    carry20 = (s20 + (1 << 20)) >> 21;
+    s21 += carry20;
+    s20 -= carry20 * (1 << 21);
+    carry22 = (s22 + (1 << 20)) >> 21;
+    s23 += carry22;
+    s22 -= carry22 * (1 << 21);
+
+    carry1 = (s1 + (1 << 20)) >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry3 = (s3 + (1 << 20)) >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry5 = (s5 + (1 << 20)) >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry7 = (s7 + (1 << 20)) >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry9 = (s9 + (1 << 20)) >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry11 = (s11 + (1 << 20)) >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+    carry13 = (s13 + (1 << 20)) >> 21;
+    s14 += carry13;
+    s13 -= carry13 * (1 << 21);
+    carry15 = (s15 + (1 << 20)) >> 21;
+    s16 += carry15;
+    s15 -= carry15 * (1 << 21);
+    carry17 = (s17 + (1 << 20)) >> 21;
+    s18 += carry17;
+    s17 -= carry17 * (1 << 21);
+    carry19 = (s19 + (1 << 20)) >> 21;
+    s20 += carry19;
+    s19 -= carry19 * (1 << 21);
+    carry21 = (s21 + (1 << 20)) >> 21;
+    s22 += carry21;
+    s21 -= carry21 * (1 << 21);
+
+    s11 += s23 * 666643;
+    s12 += s23 * 470296;
+    s13 += s23 * 654183;
+    s14 -= s23 * 997805;
+    s15 += s23 * 136657;
+    s16 -= s23 * 683901;
+    s23  = 0;
+
+    s10 += s22 * 666643;
+    s11 += s22 * 470296;
+    s12 += s22 * 654183;
+    s13 -= s22 * 997805;
+    s14 += s22 * 136657;
+    s15 -= s22 * 683901;
+    s22  = 0;
+
+    s9  += s21 * 666643;
+    s10 += s21 * 470296;
+    s11 += s21 * 654183;
+    s12 -= s21 * 997805;
+    s13 += s21 * 136657;
+    s14 -= s21 * 683901;
+    s21  = 0;
+
+    s8  += s20 * 666643;
+    s9  += s20 * 470296;
+    s10 += s20 * 654183;
+    s11 -= s20 * 997805;
+    s12 += s20 * 136657;
+    s13 -= s20 * 683901;
+    s20  = 0;
+
+    s7  += s19 * 666643;
+    s8  += s19 * 470296;
+    s9  += s19 * 654183;
+    s10 -= s19 * 997805;
+    s11 += s19 * 136657;
+    s12 -= s19 * 683901;
+    s19  = 0;
+
+    s6  += s18 * 666643;
+    s7  += s18 * 470296;
+    s8  += s18 * 654183;
+    s9  -= s18 * 997805;
+    s10 += s18 * 136657;
+    s11 -= s18 * 683901;
+    s18  = 0;
+
+    carry6 = (s6 + (1 << 20)) >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry8 = (s8 + (1 << 20)) >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry10 = (s10 + (1 << 20)) >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+    carry12 = (s12 + (1 << 20)) >> 21;
+    s13 += carry12;
+    s12 -= carry12 * (1 << 21);
+    carry14 = (s14 + (1 << 20)) >> 21;
+    s15 += carry14;
+    s14 -= carry14 * (1 << 21);
+    carry16 = (s16 + (1 << 20)) >> 21;
+    s17 += carry16;
+    s16 -= carry16 * (1 << 21);
+
+    carry7 = (s7 + (1 << 20)) >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry9 = (s9 + (1 << 20)) >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry11 = (s11 + (1 << 20)) >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+    carry13 = (s13 + (1 << 20)) >> 21;
+    s14 += carry13;
+    s13 -= carry13 * (1 << 21);
+    carry15 = (s15 + (1 << 20)) >> 21;
+    s16 += carry15;
+    s15 -= carry15 * (1 << 21);
+
+    s5  += s17 * 666643;
+    s6  += s17 * 470296;
+    s7  += s17 * 654183;
+    s8  -= s17 * 997805;
+    s9  += s17 * 136657;
+    s10 -= s17 * 683901;
+    s17  = 0;
+
+    s4  += s16 * 666643;
+    s5  += s16 * 470296;
+    s6  += s16 * 654183;
+    s7  -= s16 * 997805;
+    s8  += s16 * 136657;
+    s9  -= s16 * 683901;
+    s16  = 0;
+
+    s3  += s15 * 666643;
+    s4  += s15 * 470296;
+    s5  += s15 * 654183;
+    s6  -= s15 * 997805;
+    s7  += s15 * 136657;
+    s8  -= s15 * 683901;
+    s15  = 0;
+
+    s2  += s14 * 666643;
+    s3  += s14 * 470296;
+    s4  += s14 * 654183;
+    s5  -= s14 * 997805;
+    s6  += s14 * 136657;
+    s7  -= s14 * 683901;
+    s14  = 0;
+
+    s1  += s13 * 666643;
+    s2  += s13 * 470296;
+    s3  += s13 * 654183;
+    s4  -= s13 * 997805;
+    s5  += s13 * 136657;
+    s6  -= s13 * 683901;
+    s13  = 0;
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12 = 0;
+
+    carry0 = (s0 + (1 << 20)) >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry2 = (s2 + (1 << 20)) >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry4 = (s4 + (1 << 20)) >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry6 = (s6 + (1 << 20)) >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry8 = (s8 + (1 << 20)) >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry10 = (s10 + (1 << 20)) >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+
+    carry1 = (s1 + (1 << 20)) >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry3 = (s3 + (1 << 20)) >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry5 = (s5 + (1 << 20)) >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry7 = (s7 + (1 << 20)) >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry9 = (s9 + (1 << 20)) >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry11 = (s11 + (1 << 20)) >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12  = 0;
+
+    carry0 = s0 >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry1 = s1 >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry2 = s2 >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry3 = s3 >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry4 = s4 >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry5 = s5 >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry6 = s6 >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry7 = s7 >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry8 = s8 >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry9 = s9 >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry10 = s10 >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+    carry11 = s11 >> 21;
+    s12 += carry11;
+    s11 -= carry11 * (1 << 21);
+
+    s0  += s12 * 666643;
+    s1  += s12 * 470296;
+    s2  += s12 * 654183;
+    s3  -= s12 * 997805;
+    s4  += s12 * 136657;
+    s5  -= s12 * 683901;
+    s12  = 0;
+
+    carry0 = s0 >> 21;
+    s1  += carry0;
+    s0  -= carry0 * (1 << 21);
+    carry1 = s1 >> 21;
+    s2  += carry1;
+    s1  -= carry1 * (1 << 21);
+    carry2 = s2 >> 21;
+    s3  += carry2;
+    s2  -= carry2 * (1 << 21);
+    carry3 = s3 >> 21;
+    s4  += carry3;
+    s3  -= carry3 * (1 << 21);
+    carry4 = s4 >> 21;
+    s5  += carry4;
+    s4  -= carry4 * (1 << 21);
+    carry5 = s5 >> 21;
+    s6  += carry5;
+    s5  -= carry5 * (1 << 21);
+    carry6 = s6 >> 21;
+    s7  += carry6;
+    s6  -= carry6 * (1 << 21);
+    carry7 = s7 >> 21;
+    s8  += carry7;
+    s7  -= carry7 * (1 << 21);
+    carry8 = s8 >> 21;
+    s9  += carry8;
+    s8  -= carry8 * (1 << 21);
+    carry9 = s9 >> 21;
+    s10 += carry9;
+    s9  -= carry9 * (1 << 21);
+    carry10 = s10 >> 21;
+    s11 += carry10;
+    s10 -= carry10 * (1 << 21);
+
+    s[ 0] = (uint8_t) (s0  >>  0);
+    s[ 1] = (uint8_t) (s0  >>  8);
+    s[ 2] = (uint8_t)((s0  >> 16) | (s1 << 5));
+    s[ 3] = (uint8_t) (s1  >>  3);
+    s[ 4] = (uint8_t) (s1  >> 11);
+    s[ 5] = (uint8_t)((s1  >> 19) | (s2 << 2));
+    s[ 6] = (uint8_t) (s2  >>  6);
+    s[ 7] = (uint8_t)((s2  >> 14) | (s3 << 7));
+    s[ 8] = (uint8_t) (s3  >>  1);
+    s[ 9] = (uint8_t) (s3  >>  9);
+    s[10] = (uint8_t)((s3  >> 17) | (s4 << 4));
+    s[11] = (uint8_t) (s4  >>  4);
+    s[12] = (uint8_t) (s4  >> 12);
+    s[13] = (uint8_t)((s4  >> 20) | (s5 << 1));
+    s[14] = (uint8_t) (s5  >>  7);
+    s[15] = (uint8_t)((s5  >> 15) | (s6 << 6));
+    s[16] = (uint8_t) (s6  >>  2);
+    s[17] = (uint8_t) (s6  >> 10);
+    s[18] = (uint8_t)((s6  >> 18) | (s7 << 3));
+    s[19] = (uint8_t) (s7  >>  5);
+    s[20] = (uint8_t) (s7  >> 13);
+    s[21] = (uint8_t) (s8  >>  0);
+    s[22] = (uint8_t) (s8  >>  8);
+    s[23] = (uint8_t)((s8  >> 16) | (s9 << 5));
+    s[24] = (uint8_t) (s9  >>  3);
+    s[25] = (uint8_t) (s9  >> 11);
+    s[26] = (uint8_t)((s9  >> 19) | (s10 << 2));
+    s[27] = (uint8_t) (s10 >>  6);
+    s[28] = (uint8_t)((s10 >> 14) | (s11 << 7));
+    s[29] = (uint8_t) (s11 >>  1);
+    s[30] = (uint8_t) (s11 >>  9);
+    s[31] = (uint8_t) (s11 >> 17);
 }
 
 int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
-                 const uint8_t public_key[32], const uint8_t private_key[32]) {
-  uint8_t az[SHA512_DIGEST_LENGTH];
-  uint8_t nonce[SHA512_DIGEST_LENGTH];
-  ge_p3 R;
-  uint8_t hram[SHA512_DIGEST_LENGTH];
-  SHA512_CTX hash_ctx;
-
-  SHA512_Init(&hash_ctx);
-  SHA512_Update(&hash_ctx, private_key, 32);
-  SHA512_Final(az, &hash_ctx);
-
-  az[0] &= 248;
-  az[31] &= 63;
-  az[31] |= 64;
-
-  SHA512_Init(&hash_ctx);
-  SHA512_Update(&hash_ctx, az + 32, 32);
-  SHA512_Update(&hash_ctx, message, message_len);
-  SHA512_Final(nonce, &hash_ctx);
-
-  x25519_sc_reduce(nonce);
-  ge_scalarmult_base(&R, nonce);
-  ge_p3_tobytes(out_sig, &R);
-
-  SHA512_Init(&hash_ctx);
-  SHA512_Update(&hash_ctx, out_sig, 32);
-  SHA512_Update(&hash_ctx, public_key, 32);
-  SHA512_Update(&hash_ctx, message, message_len);
-  SHA512_Final(hram, &hash_ctx);
-
-  x25519_sc_reduce(hram);
-  sc_muladd(out_sig + 32, hram, az, nonce);
-
-  OPENSSL_cleanse(&hash_ctx, sizeof(hash_ctx));
-  OPENSSL_cleanse(nonce, sizeof(nonce));
-  OPENSSL_cleanse(az, sizeof(az));
-
-  return 1;
+                 const uint8_t public_key[32], const uint8_t private_key[32])
+{
+    uint8_t az[SHA512_DIGEST_LENGTH];
+    uint8_t nonce[SHA512_DIGEST_LENGTH];
+    ge_p3 R;
+    uint8_t hram[SHA512_DIGEST_LENGTH];
+    SHA512_CTX hash_ctx;
+
+    SHA512_Init(&hash_ctx);
+    SHA512_Update(&hash_ctx, private_key, 32);
+    SHA512_Final(az, &hash_ctx);
+
+    az[0] &= 248;
+    az[31] &= 63;
+    az[31] |= 64;
+
+    SHA512_Init(&hash_ctx);
+    SHA512_Update(&hash_ctx, az + 32, 32);
+    SHA512_Update(&hash_ctx, message, message_len);
+    SHA512_Final(nonce, &hash_ctx);
+
+    x25519_sc_reduce(nonce);
+    ge_scalarmult_base(&R, nonce);
+    ge_p3_tobytes(out_sig, &R);
+
+    SHA512_Init(&hash_ctx);
+    SHA512_Update(&hash_ctx, out_sig, 32);
+    SHA512_Update(&hash_ctx, public_key, 32);
+    SHA512_Update(&hash_ctx, message, message_len);
+    SHA512_Final(hram, &hash_ctx);
+
+    x25519_sc_reduce(hram);
+    sc_muladd(out_sig + 32, hram, az, nonce);
+
+    OPENSSL_cleanse(&hash_ctx, sizeof(hash_ctx));
+    OPENSSL_cleanse(nonce, sizeof(nonce));
+    OPENSSL_cleanse(az, sizeof(az));
+
+    return 1;
 }
 
+static const char allzeroes[15];
+
 int ED25519_verify(const uint8_t *message, size_t message_len,
-                   const uint8_t signature[64], const uint8_t public_key[32]) {
-  ge_p3 A;
-  uint8_t rcopy[32];
-  uint8_t scopy[32];
-  SHA512_CTX hash_ctx;
-  ge_p2 R;
-  uint8_t rcheck[32];
-  uint8_t h[SHA512_DIGEST_LENGTH];
-
-  if ((signature[63] & 224) != 0 ||
-      ge_frombytes_vartime(&A, public_key) != 0) {
-    return 0;
-  }
+                   const uint8_t signature[64], const uint8_t public_key[32])
+{
+    int i;
+    ge_p3 A;
+    const uint8_t *r, *s;
+    SHA512_CTX hash_ctx;
+    ge_p2 R;
+    uint8_t rcheck[32];
+    uint8_t h[SHA512_DIGEST_LENGTH];
+    /* 27742317777372353535851937790883648493 in little endian format */
+    const uint8_t l_low[16] = {
+        0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2,
+        0xDE, 0xF9, 0xDE, 0x14
+    };
+
+    r = signature;
+    s = signature + 32;
+
+    /*
+     * Check 0 <= s < L where L = 2^252 + 27742317777372353535851937790883648493
+     *
+     * If not the signature is publicly invalid. Since it's public we can do the
+     * check in variable time.
+     *
+     * First check the most significant byte
+     */
+    if (s[31] > 0x10)
+        return 0;
+    if (s[31] == 0x10) {
+        /*
+         * Most significant byte indicates a value close to 2^252 so check the
+         * rest
+         */
+        if (memcmp(s + 16, allzeroes, sizeof(allzeroes)) != 0)
+            return 0;
+        for (i = 15; i >= 0; i--) {
+            if (s[i] < l_low[i])
+                break;
+            if (s[i] > l_low[i])
+                return 0;
+        }
+        if (i < 0)
+            return 0;
+    }
 
-  fe_neg(A.X, A.X);
-  fe_neg(A.T, A.T);
+    if (ge_frombytes_vartime(&A, public_key) != 0) {
+        return 0;
+    }
 
-  memcpy(rcopy, signature, 32);
-  memcpy(scopy, signature + 32, 32);
+    fe_neg(A.X, A.X);
+    fe_neg(A.T, A.T);
 
-  SHA512_Init(&hash_ctx);
-  SHA512_Update(&hash_ctx, signature, 32);
-  SHA512_Update(&hash_ctx, public_key, 32);
-  SHA512_Update(&hash_ctx, message, message_len);
-  SHA512_Final(h, &hash_ctx);
+    SHA512_Init(&hash_ctx);
+    SHA512_Update(&hash_ctx, r, 32);
+    SHA512_Update(&hash_ctx, public_key, 32);
+    SHA512_Update(&hash_ctx, message, message_len);
+    SHA512_Final(h, &hash_ctx);
 
-  x25519_sc_reduce(h);
+    x25519_sc_reduce(h);
 
-  ge_double_scalarmult_vartime(&R, h, &A, scopy);
+    ge_double_scalarmult_vartime(&R, h, &A, s);
 
-  ge_tobytes(rcheck, &R);
+    ge_tobytes(rcheck, &R);
 
-  return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
+    return CRYPTO_memcmp(rcheck, r, sizeof(rcheck)) == 0;
 }
 
 void ED25519_public_from_private(uint8_t out_public_key[32],
-                                 const uint8_t private_key[32]) {
-  uint8_t az[SHA512_DIGEST_LENGTH];
-  ge_p3 A;
+                                 const uint8_t private_key[32])
+{
+    uint8_t az[SHA512_DIGEST_LENGTH];
+    ge_p3 A;
 
-  SHA512(private_key, 32, az);
+    SHA512(private_key, 32, az);
 
-  az[0] &= 248;
-  az[31] &= 63;
-  az[31] |= 64;
+    az[0] &= 248;
+    az[31] &= 63;
+    az[31] |= 64;
 
-  ge_scalarmult_base(&A, az);
-  ge_p3_tobytes(out_public_key, &A);
+    ge_scalarmult_base(&A, az);
+    ge_p3_tobytes(out_public_key, &A);
 
-  OPENSSL_cleanse(az, sizeof(az));
+    OPENSSL_cleanse(az, sizeof(az));
 }
 
 int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32],
-           const uint8_t peer_public_value[32]) {
-  static const uint8_t kZeros[32] = {0};
-  x25519_scalar_mult(out_shared_key, private_key, peer_public_value);
-  /* The all-zero output results when the input is a point of small order. */
-  return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
+           const uint8_t peer_public_value[32])
+{
+    static const uint8_t kZeros[32] = {0};
+    x25519_scalar_mult(out_shared_key, private_key, peer_public_value);
+    /* The all-zero output results when the input is a point of small order. */
+    return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
 }
 
 void X25519_public_from_private(uint8_t out_public_value[32],
-                                const uint8_t private_key[32]) {
-  uint8_t e[32];
-  ge_p3 A;
-  fe zplusy, zminusy, zminusy_inv;
-
-  memcpy(e, private_key, 32);
-  e[0] &= 248;
-  e[31] &= 127;
-  e[31] |= 64;
-
-  ge_scalarmult_base(&A, e);
-
-  /* We only need the u-coordinate of the curve25519 point. The map is
-   * u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */
-  fe_add(zplusy, A.Z, A.Y);
-  fe_sub(zminusy, A.Z, A.Y);
-  fe_invert(zminusy_inv, zminusy);
-  fe_mul(zplusy, zplusy, zminusy_inv);
-  fe_tobytes(out_public_value, zplusy);
-
-  OPENSSL_cleanse(e, sizeof(e));
+                                const uint8_t private_key[32])
+{
+    uint8_t e[32];
+    ge_p3 A;
+    fe zplusy, zminusy, zminusy_inv;
+
+    memcpy(e, private_key, 32);
+    e[0] &= 248;
+    e[31] &= 127;
+    e[31] |= 64;
+
+    ge_scalarmult_base(&A, e);
+
+    /*
+     * We only need the u-coordinate of the curve25519 point.
+     * The map is u=(y+1)/(1-y). Since y=Y/Z, this gives
+     * u=(Z+Y)/(Z-Y).
+     */
+    fe_add(zplusy, A.Z, A.Y);
+    fe_sub(zminusy, A.Z, A.Y);
+    fe_invert(zminusy_inv, zminusy);
+    fe_mul(zplusy, zplusy, zminusy_inv);
+    fe_tobytes(out_public_value, zplusy);
+
+    OPENSSL_cleanse(e, sizeof(e));
 }
diff --git a/crypto/ec/curve448/eddsa.c b/crypto/ec/curve448/eddsa.c
index 909413a535a8e..b28f7dff91386 100644
--- a/crypto/ec/curve448/eddsa.c
+++ b/crypto/ec/curve448/eddsa.c
@@ -246,10 +246,36 @@ c448_error_t c448_ed448_verify(
                     uint8_t context_len)
 {
     curve448_point_t pk_point, r_point;
-    c448_error_t error =
-        curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point, pubkey);
+    c448_error_t error;
     curve448_scalar_t challenge_scalar;
     curve448_scalar_t response_scalar;
+    /* Order in little endian format */
+    static const uint8_t order[] = {
+        0xF3, 0x44, 0x58, 0xAB, 0x92, 0xC2, 0x78, 0x23, 0x55, 0x8F, 0xC5, 0x8D,
+        0x72, 0xC2, 0x6C, 0x21, 0x90, 0x36, 0xD6, 0xAE, 0x49, 0xDB, 0x4E, 0xC4,
+        0xE9, 0x23, 0xCA, 0x7C, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00
+    };
+    int i;
+
+    /*
+     * Check that s (second 57 bytes of the sig) is less than the order. Both
+     * s and the order are in little-endian format. This can be done in
+     * variable time, since if this is not the case the signature if publicly
+     * invalid.
+     */
+    for (i = EDDSA_448_PUBLIC_BYTES - 1; i >= 0; i--) {
+        if (signature[i + EDDSA_448_PUBLIC_BYTES] > order[i])
+            return C448_FAILURE;
+        if (signature[i + EDDSA_448_PUBLIC_BYTES] < order[i])
+            break;
+    }
+    if (i < 0)
+        return C448_FAILURE;
+
+    error =
+        curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point, pubkey);
 
     if (C448_SUCCESS != error)
         return error;
diff --git a/crypto/ec/curve448/point_448.h b/crypto/ec/curve448/point_448.h
index 0ef3b8714e263..399f91b9a1d98 100644
--- a/crypto/ec/curve448/point_448.h
+++ b/crypto/ec/curve448/point_448.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright 2015-2016 Cryptography Research, Inc.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -116,7 +116,7 @@ void curve448_scalar_encode(unsigned char ser[C448_SCALAR_BYTES],
 
 /*
  * Add two scalars. |a|, |b| and |out| may alias each other.
- * 
+ *
  * a (in): One scalar.
  * b (in): Another scalar.
  * out (out): a+b.
@@ -135,7 +135,7 @@ void curve448_scalar_sub(curve448_scalar_t out,
 
 /*
  * Multiply two scalars. |a|, |b| and |out| may alias each other.
- * 
+ *
  * a (in): One scalar.
  * b (in): Another scalar.
  * out (out): a*b.
@@ -145,7 +145,7 @@ void curve448_scalar_mul(curve448_scalar_t out,
 
 /*
 * Halve a scalar.  |a| and |out| may alias each other.
-* 
+*
 * a (in): A scalar.
 * out (out): a/2.
 */
@@ -154,7 +154,7 @@ void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a);
 /*
  * Copy a scalar.  The scalars may alias each other, in which case this
  * function does nothing.
- * 
+ *
  * a (in): A scalar.
  * out (out): Will become a copy of a.
  */
@@ -183,7 +183,7 @@ static ossl_inline void curve448_point_copy(curve448_point_t a,
  *
  * a (in): A point.
  * b (in): Another point.
- * 
+ *
  * Returns:
  * C448_TRUE: The points are equal.
  * C448_FALSE: The points are not equal.
@@ -243,7 +243,7 @@ void curve448_point_mul_by_ratio_and_encode_like_x448(
 /*
  * RFC 7748 Diffie-Hellman base point scalarmul.  This function uses a different
  * (non-Decaf) encoding.
- * 
+ *
  * out (out): The scaled point base*scalar
  * scalar (in): The scalar to multiply by.
  */
@@ -273,7 +273,7 @@ void curve448_precomputed_scalarmul(curve448_point_t scaled,
  * base2 (in): A second point to be scaled.
  * scalar2 (in) A second scalar to multiply by.
  *
- * Warning: This function takes variable time, and may leak the scalars used. 
+ * Warning: This function takes variable time, and may leak the scalars used.
  * It is designed for signature verification.
  */
 void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
diff --git a/crypto/ec/ec2_smpl.c b/crypto/ec/ec2_smpl.c
index 87f7ce56911d9..0a05a7aeea61c 100644
--- a/crypto/ec/ec2_smpl.c
+++ b/crypto/ec/ec2_smpl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -810,7 +810,7 @@ int ec_GF2m_simple_ladder_post(const EC_GROUP *group,
         || !group->meth->field_mul(group, t2, t2, t0, ctx)
         || !BN_GF2m_add(t1, t2, t1)
         || !group->meth->field_mul(group, t2, p->X, t0, ctx)
-        || !BN_GF2m_mod_inv(t2, t2, group->field, ctx)
+        || !group->meth->field_inv(group, t2, t2, ctx)
         || !group->meth->field_mul(group, t1, t1, t2, ctx)
         || !group->meth->field_mul(group, r->X, r->Z, t2, ctx)
         || !BN_GF2m_add(t2, p->X, r->X)
@@ -889,6 +889,21 @@ int ec_GF2m_simple_points_mul(const EC_GROUP *group, EC_POINT *r,
     return ret;
 }
 
+/*-
+ * Computes the multiplicative inverse of a in GF(2^m), storing the result in r.
+ * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error.
+ * SCA hardening is with blinding: BN_GF2m_mod_inv does that.
+ */
+static int ec_GF2m_simple_field_inv(const EC_GROUP *group, BIGNUM *r,
+                                    const BIGNUM *a, BN_CTX *ctx)
+{
+    int ret;
+
+    if (!(ret = BN_GF2m_mod_inv(r, a, group->field, ctx)))
+        ECerr(EC_F_EC_GF2M_SIMPLE_FIELD_INV, EC_R_CANNOT_INVERT);
+    return ret;
+}
+
 const EC_METHOD *EC_GF2m_simple_method(void)
 {
     static const EC_METHOD ret = {
@@ -929,6 +944,7 @@ const EC_METHOD *EC_GF2m_simple_method(void)
         ec_GF2m_simple_field_mul,
         ec_GF2m_simple_field_sqr,
         ec_GF2m_simple_field_div,
+        ec_GF2m_simple_field_inv,
         0, /* field_encode */
         0, /* field_decode */
         0, /* field_set_to_one */
diff --git a/crypto/ec/ec_ameth.c b/crypto/ec/ec_ameth.c
index a3164b5b2ed97..8b363e096beda 100644
--- a/crypto/ec/ec_ameth.c
+++ b/crypto/ec/ec_ameth.c
@@ -505,7 +505,7 @@ static int ec_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
 
     case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
         *(int *)arg2 = NID_sha256;
-        return 2;
+        return 1;
 
     case ASN1_PKEY_CTRL_SET1_TLS_ENCPT:
         return EC_KEY_oct2key(EVP_PKEY_get0_EC_KEY(pkey), arg2, arg1, NULL);
diff --git a/crypto/ec/ec_err.c b/crypto/ec/ec_err.c
index 8f4911abec79a..ce3493823218f 100644
--- a/crypto/ec/ec_err.c
+++ b/crypto/ec/ec_err.c
@@ -1,6 +1,6 @@
 /*
  * Generated by util/mkerr.pl DO NOT EDIT
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -66,6 +66,8 @@ static const ERR_STRING_DATA EC_str_functs[] = {
      "ec_asn1_group2fieldid"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_MONTGOMERY_POINT_MULTIPLY, 0),
      "ec_GF2m_montgomery_point_multiply"},
+    {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_FIELD_INV, 0),
+     "ec_GF2m_simple_field_inv"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_GROUP_CHECK_DISCRIMINANT, 0),
      "ec_GF2m_simple_group_check_discriminant"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GF2M_SIMPLE_GROUP_SET_CURVE, 0),
@@ -90,6 +92,8 @@ static const ERR_STRING_DATA EC_str_functs[] = {
      "ec_GFp_mont_field_decode"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_ENCODE, 0),
      "ec_GFp_mont_field_encode"},
+    {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_INV, 0),
+     "ec_GFp_mont_field_inv"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_MUL, 0),
      "ec_GFp_mont_field_mul"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_MONT_FIELD_SET_TO_ONE, 0),
@@ -124,6 +128,8 @@ static const ERR_STRING_DATA EC_str_functs[] = {
      "ec_GFp_nist_group_set_curve"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_BLIND_COORDINATES, 0),
      "ec_GFp_simple_blind_coordinates"},
+    {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_FIELD_INV, 0),
+     "ec_GFp_simple_field_inv"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_GROUP_CHECK_DISCRIMINANT, 0),
      "ec_GFp_simple_group_check_discriminant"},
     {ERR_PACK(ERR_LIB_EC, EC_F_EC_GFP_SIMPLE_GROUP_SET_CURVE, 0),
@@ -287,6 +293,7 @@ static const ERR_STRING_DATA EC_str_reasons[] = {
     {ERR_PACK(ERR_LIB_EC, 0, EC_R_BAD_SIGNATURE), "bad signature"},
     {ERR_PACK(ERR_LIB_EC, 0, EC_R_BIGNUM_OUT_OF_RANGE), "bignum out of range"},
     {ERR_PACK(ERR_LIB_EC, 0, EC_R_BUFFER_TOO_SMALL), "buffer too small"},
+    {ERR_PACK(ERR_LIB_EC, 0, EC_R_CANNOT_INVERT), "cannot invert"},
     {ERR_PACK(ERR_LIB_EC, 0, EC_R_COORDINATES_OUT_OF_RANGE),
     "coordinates out of range"},
     {ERR_PACK(ERR_LIB_EC, 0, EC_R_CURVE_DOES_NOT_SUPPORT_ECDH),
diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h
index e055ddab1c76e..119255f1dc832 100644
--- a/crypto/ec/ec_lcl.h
+++ b/crypto/ec/ec_lcl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -15,7 +15,6 @@
 #include <openssl/bn.h>
 #include "internal/refcount.h"
 #include "internal/ec_int.h"
-#include "curve448/curve448_lcl.h"
 
 #if defined(__SUNPRO_C)
 # if __SUNPRO_C >= 0x520
@@ -154,6 +153,13 @@ struct ec_method_st {
     int (*field_sqr) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *);
     int (*field_div) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                       const BIGNUM *b, BN_CTX *);
+    /*-
+     * 'field_inv' computes the multipicative inverse of a in the field,
+     * storing the result in r.
+     *
+     * If 'a' is zero (or equivalent), you'll get an EC_R_CANNOT_INVERT error.
+     */
+    int (*field_inv) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a, BN_CTX *);
     /* e.g. to Montgomery */
     int (*field_encode) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                          BN_CTX *);
@@ -390,6 +396,8 @@ int ec_GFp_simple_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                             const BIGNUM *b, BN_CTX *);
 int ec_GFp_simple_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                             BN_CTX *);
+int ec_GFp_simple_field_inv(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
+                            BN_CTX *);
 int ec_GFp_simple_blind_coordinates(const EC_GROUP *group, EC_POINT *p,
                                     BN_CTX *ctx);
 int ec_GFp_simple_ladder_pre(const EC_GROUP *group,
@@ -413,6 +421,8 @@ int ec_GFp_mont_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                           const BIGNUM *b, BN_CTX *);
 int ec_GFp_mont_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                           BN_CTX *);
+int ec_GFp_mont_field_inv(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
+                          BN_CTX *);
 int ec_GFp_mont_field_encode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
                              BN_CTX *);
 int ec_GFp_mont_field_decode(const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
diff --git a/crypto/ec/ecp_mont.c b/crypto/ec/ecp_mont.c
index 36682e5cfbd18..252e66ef3791c 100644
--- a/crypto/ec/ecp_mont.c
+++ b/crypto/ec/ecp_mont.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -50,6 +50,7 @@ const EC_METHOD *EC_GFp_mont_method(void)
         ec_GFp_mont_field_mul,
         ec_GFp_mont_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_mont_field_inv,
         ec_GFp_mont_field_encode,
         ec_GFp_mont_field_decode,
         ec_GFp_mont_field_set_to_one,
@@ -206,6 +207,54 @@ int ec_GFp_mont_field_sqr(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
     return BN_mod_mul_montgomery(r, a, a, group->field_data1, ctx);
 }
 
+/*-
+ * Computes the multiplicative inverse of a in GF(p), storing the result in r.
+ * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error.
+ * We have a Mont structure, so SCA hardening is FLT inversion.
+ */
+int ec_GFp_mont_field_inv(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
+                            BN_CTX *ctx)
+{
+    BIGNUM *e = NULL;
+    BN_CTX *new_ctx = NULL;
+    int ret = 0;
+
+    if (group->field_data1 == NULL)
+        return 0;
+
+    if (ctx == NULL && (ctx = new_ctx = BN_CTX_secure_new()) == NULL)
+        return 0;
+
+    BN_CTX_start(ctx);
+    if ((e = BN_CTX_get(ctx)) == NULL)
+        goto err;
+
+    /* Inverse in constant time with Fermats Little Theorem */
+    if (!BN_set_word(e, 2))
+        goto err;
+    if (!BN_sub(e, group->field, e))
+        goto err;
+    /*-
+     * Exponent e is public.
+     * No need for scatter-gather or BN_FLG_CONSTTIME.
+     */
+    if (!BN_mod_exp_mont(r, a, e, group->field, ctx, group->field_data1))
+        goto err;
+
+    /* throw an error on zero */
+    if (BN_is_zero(r)) {
+        ECerr(EC_F_EC_GFP_MONT_FIELD_INV, EC_R_CANNOT_INVERT);
+        goto err;
+    }
+
+    ret = 1;
+
+  err:
+    BN_CTX_end(ctx);
+    BN_CTX_free(new_ctx);
+    return ret;
+}
+
 int ec_GFp_mont_field_encode(const EC_GROUP *group, BIGNUM *r,
                              const BIGNUM *a, BN_CTX *ctx)
 {
diff --git a/crypto/ec/ecp_nist.c b/crypto/ec/ecp_nist.c
index f53de1a1638bd..5eaa99d8402b6 100644
--- a/crypto/ec/ecp_nist.c
+++ b/crypto/ec/ecp_nist.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -52,6 +52,7 @@ const EC_METHOD *EC_GFp_nist_method(void)
         ec_GFp_nist_field_mul,
         ec_GFp_nist_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_simple_field_inv,
         0 /* field_encode */ ,
         0 /* field_decode */ ,
         0,                      /* field_set_to_one */
diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c
index 555bf307dd031..025273a144408 100644
--- a/crypto/ec/ecp_nistp224.c
+++ b/crypto/ec/ecp_nistp224.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -279,6 +279,7 @@ const EC_METHOD *EC_GFp_nistp224_method(void)
         ec_GFp_nist_field_mul,
         ec_GFp_nist_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_simple_field_inv,
         0 /* field_encode */ ,
         0 /* field_decode */ ,
         0,                      /* field_set_to_one */
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index c87a5e548d369..a21e5f78fc906 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -1810,6 +1810,7 @@ const EC_METHOD *EC_GFp_nistp256_method(void)
         ec_GFp_nist_field_mul,
         ec_GFp_nist_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_simple_field_inv,
         0 /* field_encode */ ,
         0 /* field_decode */ ,
         0,                      /* field_set_to_one */
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
index 14f2feeb69996..2f47772a3477f 100644
--- a/crypto/ec/ecp_nistp521.c
+++ b/crypto/ec/ecp_nistp521.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -1647,6 +1647,7 @@ const EC_METHOD *EC_GFp_nistp521_method(void)
         ec_GFp_nist_field_mul,
         ec_GFp_nist_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_simple_field_inv,
         0 /* field_encode */ ,
         0 /* field_decode */ ,
         0,                      /* field_set_to_one */
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index b0564bdbd04c5..aea6394169ce9 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2014, Intel Corporation. All Rights Reserved.
  * Copyright (c) 2015, CloudFlare, Inc.
  *
@@ -1677,6 +1677,7 @@ const EC_METHOD *EC_GFp_nistz256_method(void)
         ec_GFp_mont_field_mul,
         ec_GFp_mont_field_sqr,
         0,                                          /* field_div */
+        ec_GFp_mont_field_inv,
         ec_GFp_mont_field_encode,
         ec_GFp_mont_field_decode,
         ec_GFp_mont_field_set_to_one,
diff --git a/crypto/ec/ecp_smpl.c b/crypto/ec/ecp_smpl.c
index d0c5557ff4dda..f6a6cedb0ae36 100644
--- a/crypto/ec/ecp_smpl.c
+++ b/crypto/ec/ecp_smpl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -51,6 +51,7 @@ const EC_METHOD *EC_GFp_simple_method(void)
         ec_GFp_simple_field_mul,
         ec_GFp_simple_field_sqr,
         0 /* field_div */ ,
+        ec_GFp_simple_field_inv,
         0 /* field_encode */ ,
         0 /* field_decode */ ,
         0,                      /* field_set_to_one */
@@ -553,7 +554,7 @@ int ec_GFp_simple_point_get_affine_coordinates(const EC_GROUP *group,
             }
         }
     } else {
-        if (!BN_mod_inverse(Z_1, Z_, group->field, ctx)) {
+        if (!group->meth->field_inv(group, Z_1, Z_, ctx)) {
             ECerr(EC_F_EC_GFP_SIMPLE_POINT_GET_AFFINE_COORDINATES,
                   ERR_R_BN_LIB);
             goto err;
@@ -1266,7 +1267,7 @@ int ec_GFp_simple_points_make_affine(const EC_GROUP *group, size_t num,
      * points[i]->Z by its inverse.
      */
 
-    if (!BN_mod_inverse(tmp, prod_Z[num - 1], group->field, ctx)) {
+    if (!group->meth->field_inv(group, tmp, prod_Z[num - 1], ctx)) {
         ECerr(EC_F_EC_GFP_SIMPLE_POINTS_MAKE_AFFINE, ERR_R_BN_LIB);
         goto err;
     }
@@ -1370,6 +1371,50 @@ int ec_GFp_simple_field_sqr(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
 }
 
 /*-
+ * Computes the multiplicative inverse of a in GF(p), storing the result in r.
+ * If a is zero (or equivalent), you'll get a EC_R_CANNOT_INVERT error.
+ * Since we don't have a Mont structure here, SCA hardening is with blinding.
+ */
+int ec_GFp_simple_field_inv(const EC_GROUP *group, BIGNUM *r, const BIGNUM *a,
+                            BN_CTX *ctx)
+{
+    BIGNUM *e = NULL;
+    BN_CTX *new_ctx = NULL;
+    int ret = 0;
+
+    if (ctx == NULL && (ctx = new_ctx = BN_CTX_secure_new()) == NULL)
+        return 0;
+
+    BN_CTX_start(ctx);
+    if ((e = BN_CTX_get(ctx)) == NULL)
+        goto err;
+
+    do {
+        if (!BN_priv_rand_range(e, group->field))
+        goto err;
+    } while (BN_is_zero(e));
+
+    /* r := a * e */
+    if (!group->meth->field_mul(group, r, a, e, ctx))
+        goto err;
+    /* r := 1/(a * e) */
+    if (!BN_mod_inverse(r, r, group->field, ctx)) {
+        ECerr(EC_F_EC_GFP_SIMPLE_FIELD_INV, EC_R_CANNOT_INVERT);
+        goto err;
+    }
+    /* r := e/(a * e) = 1/a */
+    if (!group->meth->field_mul(group, r, r, e, ctx))
+        goto err;
+
+    ret = 1;
+
+ err:
+    BN_CTX_end(ctx);
+    BN_CTX_free(new_ctx);
+    return ret;
+}
+
+/*-
  * Apply randomization of EC point projective coordinates:
  *
  *   (X, Y ,Z ) = (lambda^2*X, lambda^3*Y, lambda*Z)
diff --git a/crypto/ec/ecx_meth.c b/crypto/ec/ecx_meth.c
index b76bfdb6dc342..e4cac99e2d2ac 100644
--- a/crypto/ec/ecx_meth.c
+++ b/crypto/ec/ecx_meth.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -15,6 +15,7 @@
 #include "internal/asn1_int.h"
 #include "internal/evp_int.h"
 #include "ec_lcl.h"
+#include "curve448/curve448_lcl.h"
 
 #define X25519_BITS          253
 #define X25519_SECURITY_BITS 128
diff --git a/crypto/engine/README b/crypto/engine/README
index 0050b9e50951c..c7a5696ca14cd 100644
--- a/crypto/engine/README
+++ b/crypto/engine/README
@@ -26,7 +26,7 @@ algorithm/mode pair are;
      EVP_EncryptInit(&ctx, cipher, key, iv);
      [ ... use EVP_EncryptUpdate() and EVP_EncryptFinal() ...]
 
-(ii) indirectly; 
+(ii) indirectly;
      OpenSSL_add_all_ciphers();
      cipher = EVP_get_cipherbyname("des_cbc");
      EVP_EncryptInit(&ctx, cipher, key, iv);
diff --git a/crypto/engine/eng_devcrypto.c b/crypto/engine/eng_devcrypto.c
index 4a0ba09a38bec..717d7c27794ff 100644
--- a/crypto/engine/eng_devcrypto.c
+++ b/crypto/engine/eng_devcrypto.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -24,6 +24,8 @@
 
 #include "internal/engine.h"
 
+/* #define ENGINE_DEVCRYPTO_DEBUG */
+
 #ifdef CRYPTO_ALGORITHM_MIN
 # define CHECK_BSD_STYLE_MACROS
 #endif
@@ -35,6 +37,15 @@
  */
 static int cfd;
 
+static int clean_devcrypto_session(struct session_op *sess) {
+    if (ioctl(cfd, CIOCFSESSION, &sess->ses) < 0) {
+        SYSerr(SYS_F_IOCTL, errno);
+        return 0;
+    }
+    memset(sess, 0, sizeof(struct session_op));
+    return 1;
+}
+
 /******************************************************************************
  *
  * Ciphers
@@ -47,10 +58,12 @@ static int cfd;
 
 struct cipher_ctx {
     struct session_op sess;
-
-    /* to pass from init to do_cipher */
-    const unsigned char *iv;
     int op;                      /* COP_ENCRYPT or COP_DECRYPT */
+    unsigned long mode;          /* EVP_CIPH_*_MODE */
+
+    /* to handle ctr mode being a stream cipher */
+    unsigned char partial[EVP_MAX_BLOCK_LENGTH];
+    unsigned int blocksize, num;
 };
 
 static const struct cipher_data_st {
@@ -87,9 +100,9 @@ static const struct cipher_data_st {
     { NID_aes_256_xts, 16, 256 / 8 * 2, 16, EVP_CIPH_XTS_MODE, CRYPTO_AES_XTS },
 #endif
 #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_AES_ECB)
-    { NID_aes_128_ecb, 16, 128 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
-    { NID_aes_192_ecb, 16, 192 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
-    { NID_aes_256_ecb, 16, 256 / 8, 16, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
+    { NID_aes_128_ecb, 16, 128 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
+    { NID_aes_192_ecb, 16, 192 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
+    { NID_aes_256_ecb, 16, 256 / 8, 0, EVP_CIPH_ECB_MODE, CRYPTO_AES_ECB },
 #endif
 #if 0                            /* Not yet supported */
     { NID_aes_128_gcm, 16, 128 / 8, 16, EVP_CIPH_GCM_MODE, CRYPTO_AES_GCM },
@@ -141,11 +154,17 @@ static int cipher_init(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     const struct cipher_data_st *cipher_d =
         get_cipher_data(EVP_CIPHER_CTX_nid(ctx));
 
-    memset(&cipher_ctx->sess, 0, sizeof(cipher_ctx->sess));
+    /* cleanup a previous session */
+    if (cipher_ctx->sess.ses != 0 &&
+        clean_devcrypto_session(&cipher_ctx->sess) == 0)
+        return 0;
+
     cipher_ctx->sess.cipher = cipher_d->devcryptoid;
     cipher_ctx->sess.keylen = cipher_d->keylen;
     cipher_ctx->sess.key = (void *)key;
     cipher_ctx->op = enc ? COP_ENCRYPT : COP_DECRYPT;
+    cipher_ctx->mode = cipher_d->flags & EVP_CIPH_MODE;
+    cipher_ctx->blocksize = cipher_d->blocksize;
     if (ioctl(cfd, CIOCGSESSION, &cipher_ctx->sess) < 0) {
         SYSerr(SYS_F_IOCTL, errno);
         return 0;
@@ -160,8 +179,11 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     struct cipher_ctx *cipher_ctx =
         (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx);
     struct crypt_op cryp;
+    unsigned char *iv = EVP_CIPHER_CTX_iv_noconst(ctx);
 #if !defined(COP_FLAG_WRITE_IV)
     unsigned char saved_iv[EVP_MAX_IV_LENGTH];
+    const unsigned char *ivptr;
+    size_t nblocks, ivlen;
 #endif
 
     memset(&cryp, 0, sizeof(cryp));
@@ -169,19 +191,28 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     cryp.len = inl;
     cryp.src = (void *)in;
     cryp.dst = (void *)out;
-    cryp.iv = (void *)EVP_CIPHER_CTX_iv_noconst(ctx);
+    cryp.iv = (void *)iv;
     cryp.op = cipher_ctx->op;
 #if !defined(COP_FLAG_WRITE_IV)
     cryp.flags = 0;
 
-    if (EVP_CIPHER_CTX_iv_length(ctx) > 0) {
-        assert(inl >= EVP_CIPHER_CTX_iv_length(ctx));
-        if (!EVP_CIPHER_CTX_encrypting(ctx)) {
-            unsigned char *ivptr = in + inl - EVP_CIPHER_CTX_iv_length(ctx);
-
-            memcpy(saved_iv, ivptr, EVP_CIPHER_CTX_iv_length(ctx));
+    ivlen = EVP_CIPHER_CTX_iv_length(ctx);
+    if (ivlen > 0)
+        switch (cipher_ctx->mode) {
+        case EVP_CIPH_CBC_MODE:
+            assert(inl >= ivlen);
+            if (!EVP_CIPHER_CTX_encrypting(ctx)) {
+                ivptr = in + inl - ivlen;
+                memcpy(saved_iv, ivptr, ivlen);
+            }
+            break;
+
+        case EVP_CIPH_CTR_MODE:
+            break;
+
+        default: /* should not happen */
+            return 0;
         }
-    }
 #else
     cryp.flags = COP_FLAG_WRITE_IV;
 #endif
@@ -192,32 +223,113 @@ static int cipher_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     }
 
 #if !defined(COP_FLAG_WRITE_IV)
-    if (EVP_CIPHER_CTX_iv_length(ctx) > 0) {
-        unsigned char *ivptr = saved_iv;
+    if (ivlen > 0)
+        switch (cipher_ctx->mode) {
+        case EVP_CIPH_CBC_MODE:
+            assert(inl >= ivlen);
+            if (EVP_CIPHER_CTX_encrypting(ctx))
+                ivptr = out + inl - ivlen;
+            else
+                ivptr = saved_iv;
+
+            memcpy(iv, ivptr, ivlen);
+            break;
+
+        case EVP_CIPH_CTR_MODE:
+            nblocks = (inl + cipher_ctx->blocksize - 1)
+                      / cipher_ctx->blocksize;
+            do {
+                ivlen--;
+                nblocks += iv[ivlen];
+                iv[ivlen] = (uint8_t) nblocks;
+                nblocks >>= 8;
+            } while (ivlen);
+            break;
+
+        default: /* should not happen */
+            return 0;
+        }
+#endif
+
+    return 1;
+}
 
-        assert(inl >= EVP_CIPHER_CTX_iv_length(ctx));
-        if (!EVP_CIPHER_CTX_encrypting(ctx))
-            ivptr = out + inl - EVP_CIPHER_CTX_iv_length(ctx);
+static int ctr_do_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                         const unsigned char *in, size_t inl)
+{
+    struct cipher_ctx *cipher_ctx =
+        (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx);
+    size_t nblocks, len;
 
-        memcpy(EVP_CIPHER_CTX_iv_noconst(ctx), ivptr,
-               EVP_CIPHER_CTX_iv_length(ctx));
+    /* initial partial block */
+    while (cipher_ctx->num && inl) {
+        (*out++) = *(in++) ^ cipher_ctx->partial[cipher_ctx->num];
+        --inl;
+        cipher_ctx->num = (cipher_ctx->num + 1) % cipher_ctx->blocksize;
+    }
+
+    /* full blocks */
+    if (inl > (unsigned int) cipher_ctx->blocksize) {
+        nblocks = inl/cipher_ctx->blocksize;
+        len = nblocks * cipher_ctx->blocksize;
+        if (cipher_do_cipher(ctx, out, in, len) < 1)
+            return 0;
+        inl -= len;
+        out += len;
+        in += len;
+    }
+
+    /* final partial block */
+    if (inl) {
+        memset(cipher_ctx->partial, 0, cipher_ctx->blocksize);
+        if (cipher_do_cipher(ctx, cipher_ctx->partial, cipher_ctx->partial,
+            cipher_ctx->blocksize) < 1)
+            return 0;
+        while (inl--) {
+            out[cipher_ctx->num] = in[cipher_ctx->num]
+                                   ^ cipher_ctx->partial[cipher_ctx->num];
+            cipher_ctx->num++;
+        }
     }
-#endif
 
     return 1;
 }
 
-static int cipher_cleanup(EVP_CIPHER_CTX *ctx)
+static int cipher_ctrl(EVP_CIPHER_CTX *ctx, int type, int p1, void* p2)
 {
     struct cipher_ctx *cipher_ctx =
         (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx);
+    EVP_CIPHER_CTX *to_ctx = (EVP_CIPHER_CTX *)p2;
+    struct cipher_ctx *to_cipher_ctx;
+
+    switch (type) {
+    case EVP_CTRL_COPY:
+        if (cipher_ctx == NULL)
+            return 1;
+        /* when copying the context, a new session needs to be initialized */
+        to_cipher_ctx =
+            (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(to_ctx);
+        memset(&to_cipher_ctx->sess, 0, sizeof(to_cipher_ctx->sess));
+        return cipher_init(to_ctx, cipher_ctx->sess.key, EVP_CIPHER_CTX_iv(ctx),
+                           (cipher_ctx->op == COP_ENCRYPT));
+
+    case EVP_CTRL_INIT:
+        memset(&cipher_ctx->sess, 0, sizeof(cipher_ctx->sess));
+        return 1;
 
-    if (ioctl(cfd, CIOCFSESSION, &cipher_ctx->sess.ses) < 0) {
-        SYSerr(SYS_F_IOCTL, errno);
-        return 0;
+    default:
+        break;
     }
 
-    return 1;
+    return -1;
+}
+
+static int cipher_cleanup(EVP_CIPHER_CTX *ctx)
+{
+    struct cipher_ctx *cipher_ctx =
+        (struct cipher_ctx *)EVP_CIPHER_CTX_get_cipher_data(ctx);
+
+    return clean_devcrypto_session(&cipher_ctx->sess);
 }
 
 /*
@@ -233,6 +345,7 @@ static void prepare_cipher_methods(void)
 {
     size_t i;
     struct session_op sess;
+    unsigned long cipher_mode;
 
     memset(&sess, 0, sizeof(sess));
     sess.key = (void *)"01234567890123456789012345678901234567890123456789";
@@ -250,18 +363,26 @@ static void prepare_cipher_methods(void)
             || ioctl(cfd, CIOCFSESSION, &sess.ses) < 0)
             continue;
 
+        cipher_mode = cipher_data[i].flags & EVP_CIPH_MODE;
+
         if ((known_cipher_methods[i] =
                  EVP_CIPHER_meth_new(cipher_data[i].nid,
-                                     cipher_data[i].blocksize,
+                                     cipher_mode == EVP_CIPH_CTR_MODE ? 1 :
+                                                    cipher_data[i].blocksize,
                                      cipher_data[i].keylen)) == NULL
             || !EVP_CIPHER_meth_set_iv_length(known_cipher_methods[i],
                                               cipher_data[i].ivlen)
             || !EVP_CIPHER_meth_set_flags(known_cipher_methods[i],
                                           cipher_data[i].flags
+                                          | EVP_CIPH_CUSTOM_COPY
+                                          | EVP_CIPH_CTRL_INIT
                                           | EVP_CIPH_FLAG_DEFAULT_ASN1)
             || !EVP_CIPHER_meth_set_init(known_cipher_methods[i], cipher_init)
             || !EVP_CIPHER_meth_set_do_cipher(known_cipher_methods[i],
+                                     cipher_mode == EVP_CIPH_CTR_MODE ?
+                                              ctr_do_cipher :
                                               cipher_do_cipher)
+            || !EVP_CIPHER_meth_set_ctrl(known_cipher_methods[i], cipher_ctrl)
             || !EVP_CIPHER_meth_set_cleanup(known_cipher_methods[i],
                                             cipher_cleanup)
             || !EVP_CIPHER_meth_set_impl_ctx_size(known_cipher_methods[i],
@@ -338,34 +459,36 @@ static int devcrypto_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
 
 struct digest_ctx {
     struct session_op sess;
-    int init;
+    /* This signals that the init function was called, not that it succeeded. */
+    int init_called;
 };
 
 static const struct digest_data_st {
     int nid;
+    int blocksize;
     int digestlen;
     int devcryptoid;
 } digest_data[] = {
 #ifndef OPENSSL_NO_MD5
-    { NID_md5, 16, CRYPTO_MD5 },
+    { NID_md5, /* MD5_CBLOCK */ 64, 16, CRYPTO_MD5 },
 #endif
-    { NID_sha1, 20, CRYPTO_SHA1 },
+    { NID_sha1, SHA_CBLOCK, 20, CRYPTO_SHA1 },
 #ifndef OPENSSL_NO_RMD160
 # if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_RIPEMD160)
-    { NID_ripemd160, 20, CRYPTO_RIPEMD160 },
+    { NID_ripemd160, /* RIPEMD160_CBLOCK */ 64, 20, CRYPTO_RIPEMD160 },
 # endif
 #endif
 #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_224)
-    { NID_sha224, 224 / 8, CRYPTO_SHA2_224 },
+    { NID_sha224, SHA256_CBLOCK, 224 / 8, CRYPTO_SHA2_224 },
 #endif
 #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_256)
-    { NID_sha256, 256 / 8, CRYPTO_SHA2_256 },
+    { NID_sha256, SHA256_CBLOCK, 256 / 8, CRYPTO_SHA2_256 },
 #endif
 #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_384)
-    { NID_sha384, 384 / 8, CRYPTO_SHA2_384 },
+    { NID_sha384, SHA512_CBLOCK, 384 / 8, CRYPTO_SHA2_384 },
 #endif
 #if !defined(CHECK_BSD_STYLE_MACROS) || defined(CRYPTO_SHA2_512)
-    { NID_sha512, 512 / 8, CRYPTO_SHA2_512 },
+    { NID_sha512, SHA512_CBLOCK, 512 / 8, CRYPTO_SHA2_512 },
 #endif
 };
 
@@ -403,7 +526,7 @@ static int digest_init(EVP_MD_CTX *ctx)
     const struct digest_data_st *digest_d =
         get_digest_data(EVP_MD_CTX_type(ctx));
 
-    digest_ctx->init = 1;
+    digest_ctx->init_called = 1;
 
     memset(&digest_ctx->sess, 0, sizeof(digest_ctx->sess));
     digest_ctx->sess.mac = digest_d->devcryptoid;
@@ -438,6 +561,9 @@ static int digest_update(EVP_MD_CTX *ctx, const void *data, size_t count)
     if (count == 0)
         return 1;
 
+    if (digest_ctx == NULL)
+        return 0;
+
     if (digest_op(digest_ctx, data, count, NULL, COP_FLAG_UPDATE) < 0) {
         SYSerr(SYS_F_IOCTL, errno);
         return 0;
@@ -451,11 +577,9 @@ static int digest_final(EVP_MD_CTX *ctx, unsigned char *md)
     struct digest_ctx *digest_ctx =
         (struct digest_ctx *)EVP_MD_CTX_md_data(ctx);
 
-    if (digest_op(digest_ctx, NULL, 0, md, COP_FLAG_FINAL) < 0) {
-        SYSerr(SYS_F_IOCTL, errno);
+    if (md == NULL || digest_ctx == NULL)
         return 0;
-    }
-    if (ioctl(cfd, CIOCFSESSION, &digest_ctx->sess.ses) < 0) {
+    if (digest_op(digest_ctx, NULL, 0, md, COP_FLAG_FINAL) < 0) {
         SYSerr(SYS_F_IOCTL, errno);
         return 0;
     }
@@ -471,14 +595,9 @@ static int digest_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from)
         (struct digest_ctx *)EVP_MD_CTX_md_data(to);
     struct cphash_op cphash;
 
-    if (digest_from == NULL)
+    if (digest_from == NULL || digest_from->init_called != 1)
         return 1;
 
-    if (digest_from->init != 1) {
-        SYSerr(SYS_F_IOCTL, EINVAL);
-        return 0;
-    }
-
     if (!digest_init(to)) {
         SYSerr(SYS_F_IOCTL, errno);
         return 0;
@@ -495,7 +614,37 @@ static int digest_copy(EVP_MD_CTX *to, const EVP_MD_CTX *from)
 
 static int digest_cleanup(EVP_MD_CTX *ctx)
 {
-    return 1;
+    struct digest_ctx *digest_ctx =
+        (struct digest_ctx *)EVP_MD_CTX_md_data(ctx);
+
+    if (digest_ctx == NULL)
+        return 1;
+
+    return clean_devcrypto_session(&digest_ctx->sess);
+}
+
+static int devcrypto_test_digest(size_t digest_data_index)
+{
+    struct session_op sess1, sess2;
+    struct cphash_op cphash;
+    int ret=0;
+
+    memset(&sess1, 0, sizeof(sess1));
+    memset(&sess2, 0, sizeof(sess2));
+    sess1.mac = digest_data[digest_data_index].devcryptoid;
+    if (ioctl(cfd, CIOCGSESSION, &sess1) < 0)
+        return 0;
+    /* Make sure the driver is capable of hash state copy */
+    sess2.mac = sess1.mac;
+    if (ioctl(cfd, CIOCGSESSION, &sess2) >= 0) {
+        cphash.src_ses = sess1.ses;
+        cphash.dst_ses = sess2.ses;
+        if (ioctl(cfd, CIOCCPHASH, &cphash) >= 0)
+            ret = 1;
+        ioctl(cfd, CIOCFSESSION, &sess2.ses);
+    }
+    ioctl(cfd, CIOCFSESSION, &sess1.ses);
+    return ret;
 }
 
 /*
@@ -510,24 +659,20 @@ static EVP_MD *known_digest_methods[OSSL_NELEM(digest_data)] = { NULL, };
 static void prepare_digest_methods(void)
 {
     size_t i;
-    struct session_op sess;
-
-    memset(&sess, 0, sizeof(sess));
 
     for (i = 0, known_digest_nids_amount = 0; i < OSSL_NELEM(digest_data);
          i++) {
 
         /*
-         * Check that the algo is really availably by trying to open and close
-         * a session.
+         * Check that the algo is usable
          */
-        sess.mac = digest_data[i].devcryptoid;
-        if (ioctl(cfd, CIOCGSESSION, &sess) < 0
-            || ioctl(cfd, CIOCFSESSION, &sess.ses) < 0)
+        if (!devcrypto_test_digest(i))
             continue;
 
         if ((known_digest_methods[i] = EVP_MD_meth_new(digest_data[i].nid,
                                                        NID_undef)) == NULL
+            || !EVP_MD_meth_set_input_blocksize(known_digest_methods[i],
+                                                digest_data[i].blocksize)
             || !EVP_MD_meth_set_result_size(known_digest_methods[i],
                                             digest_data[i].digestlen)
             || !EVP_MD_meth_set_init(known_digest_methods[i], digest_init)
@@ -615,15 +760,13 @@ void engine_load_devcrypto_int()
     ENGINE *e = NULL;
 
     if ((cfd = open("/dev/crypto", O_RDWR, 0)) < 0) {
-        fprintf(stderr, "Could not open /dev/crypto: %s\n", strerror(errno));
+#ifndef ENGINE_DEVCRYPTO_DEBUG
+        if (errno != ENOENT)
+#endif
+            fprintf(stderr, "Could not open /dev/crypto: %s\n", strerror(errno));
         return;
     }
 
-    prepare_cipher_methods();
-#ifdef IMPLEMENT_DIGEST
-    prepare_digest_methods();
-#endif
-
     if ((e = ENGINE_new()) == NULL
         || !ENGINE_set_destroy_function(e, devcrypto_unload)) {
         ENGINE_free(e);
@@ -636,6 +779,11 @@ void engine_load_devcrypto_int()
         return;
     }
 
+    prepare_cipher_methods();
+#ifdef IMPLEMENT_DIGEST
+    prepare_digest_methods();
+#endif
+
     if (!ENGINE_set_id(e, "devcrypto")
         || !ENGINE_set_name(e, "/dev/crypto engine")
 
diff --git a/crypto/engine/eng_lib.c b/crypto/engine/eng_lib.c
index 3ef3aae28a210..d7f2026fac546 100644
--- a/crypto/engine/eng_lib.c
+++ b/crypto/engine/eng_lib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -124,7 +124,7 @@ static int int_cleanup_check(int create)
 static ENGINE_CLEANUP_ITEM *int_cleanup_item(ENGINE_CLEANUP_CB *cb)
 {
     ENGINE_CLEANUP_ITEM *item;
-    
+
     if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
         ENGINEerr(ENGINE_F_INT_CLEANUP_ITEM, ERR_R_MALLOC_FAILURE);
         return NULL;
diff --git a/crypto/err/err.c b/crypto/err/err.c
index 03cbd738e1932..c737b2a9c3e6c 100644
--- a/crypto/err/err.c
+++ b/crypto/err/err.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -19,6 +19,9 @@
 #include <openssl/bio.h>
 #include <openssl/opensslconf.h>
 #include "internal/thread_once.h"
+#include "internal/ctype.h"
+#include "internal/constant_time_locl.h"
+#include "e_os.h"
 
 static int err_load_strings(const ERR_STRING_DATA *str);
 
@@ -181,8 +184,9 @@ static ERR_STRING_DATA *int_err_get_item(const ERR_STRING_DATA *d)
 }
 
 #ifndef OPENSSL_NO_ERR
+/* A measurement on Linux 2018-11-21 showed about 3.5kib */
+# define SPACE_SYS_STR_REASONS 4 * 1024
 # define NUM_SYS_STR_REASONS 127
-# define LEN_SYS_STR_REASON 32
 
 static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1];
 /*
@@ -198,9 +202,12 @@ static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1];
 static void build_SYS_str_reasons(void)
 {
     /* OPENSSL_malloc cannot be used here, use static storage instead */
-    static char strerror_tab[NUM_SYS_STR_REASONS][LEN_SYS_STR_REASON];
+    static char strerror_pool[SPACE_SYS_STR_REASONS];
+    char *cur = strerror_pool;
+    size_t cnt = 0;
     static int init = 1;
     int i;
+    int saveerrno = get_last_sys_error();
 
     CRYPTO_THREAD_write_lock(err_string_lock);
     if (!init) {
@@ -213,9 +220,26 @@ static void build_SYS_str_reasons(void)
 
         str->error = ERR_PACK(ERR_LIB_SYS, 0, i);
         if (str->string == NULL) {
-            char (*dest)[LEN_SYS_STR_REASON] = &(strerror_tab[i - 1]);
-            if (openssl_strerror_r(i, *dest, sizeof(*dest)))
-                str->string = *dest;
+            if (openssl_strerror_r(i, cur, sizeof(strerror_pool) - cnt)) {
+                size_t l = strlen(cur);
+
+                str->string = cur;
+                cnt += l;
+                if (cnt > sizeof(strerror_pool))
+                    cnt = sizeof(strerror_pool);
+                cur += l;
+
+                /*
+                 * VMS has an unusual quirk of adding spaces at the end of
+                 * some (most? all?) messages.  Lets trim them off.
+                 */
+                while (ossl_isspace(cur[-1])) {
+                    cur--;
+                    cnt--;
+                }
+                *cur++ = '\0';
+                cnt++;
+            }
         }
         if (str->string == NULL)
             str->string = "unknown";
@@ -229,6 +253,8 @@ static void build_SYS_str_reasons(void)
     init = 0;
 
     CRYPTO_THREAD_unlock(err_string_lock);
+    /* openssl_strerror_r could change errno, but we want to preserve it */
+    set_sys_error(saveerrno);
     err_load_strings(SYS_str_reasons);
 }
 #endif
@@ -671,6 +697,7 @@ DEFINE_RUN_ONCE_STATIC(err_do_init)
 ERR_STATE *ERR_get_state(void)
 {
     ERR_STATE *state;
+    int saveerrno = get_last_sys_error();
 
     if (!OPENSSL_init_crypto(OPENSSL_INIT_BASE_ONLY, NULL))
         return NULL;
@@ -702,6 +729,7 @@ ERR_STATE *ERR_get_state(void)
         OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
     }
 
+    set_sys_error(saveerrno);
     return state;
 }
 
@@ -711,6 +739,20 @@ ERR_STATE *ERR_get_state(void)
  */
 int err_shelve_state(void **state)
 {
+    int saveerrno = get_last_sys_error();
+
+    /*
+     * Note, at present our only caller is OPENSSL_init_crypto(), indirectly
+     * via ossl_init_load_crypto_nodelete(), by which point the requested
+     * "base" initialization has already been performed, so the below call is a
+     * NOOP, that re-enters OPENSSL_init_crypto() only to quickly return.
+     *
+     * If are no other valid callers of this function, the call below can be
+     * removed, avoiding the re-entry into OPENSSL_init_crypto().  If there are
+     * potential uses that are not from inside OPENSSL_init_crypto(), then this
+     * call is needed, but some care is required to make sure that the re-entry
+     * remains a NOOP.
+     */
     if (!OPENSSL_init_crypto(OPENSSL_INIT_BASE_ONLY, NULL))
         return 0;
 
@@ -721,6 +763,7 @@ int err_shelve_state(void **state)
     if (!CRYPTO_THREAD_set_local(&err_thread_local, (ERR_STATE*)-1))
         return 0;
 
+    set_sys_error(saveerrno);
     return 1;
 }
 
@@ -747,20 +790,31 @@ int ERR_get_next_error_library(void)
     return ret;
 }
 
-void ERR_set_error_data(char *data, int flags)
+static int err_set_error_data_int(char *data, int flags)
 {
     ERR_STATE *es;
     int i;
 
     es = ERR_get_state();
     if (es == NULL)
-        return;
+        return 0;
 
     i = es->top;
 
     err_clear_data(es, i);
     es->err_data[i] = data;
     es->err_data_flags[i] = flags;
+
+    return 1;
+}
+
+void ERR_set_error_data(char *data, int flags)
+{
+    /*
+     * This function is void so we cannot propagate the error return. Since it
+     * is also in the public API we can't change the return type.
+     */
+    err_set_error_data_int(data, flags);
 }
 
 void ERR_add_error_data(int num, ...)
@@ -800,7 +854,8 @@ void ERR_add_error_vdata(int num, va_list args)
         }
         OPENSSL_strlcat(str, a, (size_t)s + 1);
     }
-    ERR_set_error_data(str, ERR_TXT_MALLOCED | ERR_TXT_STRING);
+    if (!err_set_error_data_int(str, ERR_TXT_MALLOCED | ERR_TXT_STRING))
+        OPENSSL_free(str);
 }
 
 int ERR_set_mark(void)
@@ -857,3 +912,42 @@ int ERR_clear_last_mark(void)
     es->err_flags[top] &= ~ERR_FLAG_MARK;
     return 1;
 }
+
+#ifdef UINTPTR_T
+# undef UINTPTR_T
+#endif
+/*
+ * uintptr_t is the answer, but unfortunately C89, current "least common
+ * denominator" doesn't define it. Most legacy platforms typedef it anyway,
+ * so that attempt to fill the gaps means that one would have to identify
+ * that track these gaps, which would be undesirable. Macro it is...
+ */
+#if defined(__VMS) && __INITIAL_POINTER_SIZE==64
+/*
+ * But we can't use size_t on VMS, because it adheres to sizeof(size_t)==4
+ * even in 64-bit builds, which means that it won't work as mask.
+ */
+# define UINTPTR_T unsigned long long
+#else
+# define UINTPTR_T size_t
+#endif
+
+void err_clear_last_constant_time(int clear)
+{
+    ERR_STATE *es;
+    int top;
+
+    es = ERR_get_state();
+    if (es == NULL)
+        return;
+
+    top = es->top;
+
+    es->err_flags[top] &= ~(0 - clear);
+    es->err_buffer[top] &= ~(0UL - clear);
+    es->err_file[top] = (const char *)((UINTPTR_T)es->err_file[top] &
+                                       ~((UINTPTR_T)0 - clear));
+    es->err_line[top] |= 0 - clear;
+
+    es->top = (top + ERR_NUM_ERRORS - clear) % ERR_NUM_ERRORS;
+}
diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt
index 5003d8735a4d2..feff1dccded72 100644
--- a/crypto/err/openssl.txt
+++ b/crypto/err/openssl.txt
@@ -1,4 +1,4 @@
-# Copyright 1999-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -519,6 +519,7 @@ EC_F_ECX_PUB_ENCODE:268:ecx_pub_encode
 EC_F_EC_ASN1_GROUP2CURVE:153:ec_asn1_group2curve
 EC_F_EC_ASN1_GROUP2FIELDID:154:ec_asn1_group2fieldid
 EC_F_EC_GF2M_MONTGOMERY_POINT_MULTIPLY:208:ec_GF2m_montgomery_point_multiply
+EC_F_EC_GF2M_SIMPLE_FIELD_INV:296:ec_GF2m_simple_field_inv
 EC_F_EC_GF2M_SIMPLE_GROUP_CHECK_DISCRIMINANT:159:\
 	ec_GF2m_simple_group_check_discriminant
 EC_F_EC_GF2M_SIMPLE_GROUP_SET_CURVE:195:ec_GF2m_simple_group_set_curve
@@ -535,6 +536,7 @@ EC_F_EC_GF2M_SIMPLE_SET_COMPRESSED_COORDINATES:164:\
 	ec_GF2m_simple_set_compressed_coordinates
 EC_F_EC_GFP_MONT_FIELD_DECODE:133:ec_GFp_mont_field_decode
 EC_F_EC_GFP_MONT_FIELD_ENCODE:134:ec_GFp_mont_field_encode
+EC_F_EC_GFP_MONT_FIELD_INV:297:ec_GFp_mont_field_inv
 EC_F_EC_GFP_MONT_FIELD_MUL:131:ec_GFp_mont_field_mul
 EC_F_EC_GFP_MONT_FIELD_SET_TO_ONE:209:ec_GFp_mont_field_set_to_one
 EC_F_EC_GFP_MONT_FIELD_SQR:132:ec_GFp_mont_field_sqr
@@ -555,6 +557,7 @@ EC_F_EC_GFP_NIST_FIELD_MUL:200:ec_GFp_nist_field_mul
 EC_F_EC_GFP_NIST_FIELD_SQR:201:ec_GFp_nist_field_sqr
 EC_F_EC_GFP_NIST_GROUP_SET_CURVE:202:ec_GFp_nist_group_set_curve
 EC_F_EC_GFP_SIMPLE_BLIND_COORDINATES:287:ec_GFp_simple_blind_coordinates
+EC_F_EC_GFP_SIMPLE_FIELD_INV:298:ec_GFp_simple_field_inv
 EC_F_EC_GFP_SIMPLE_GROUP_CHECK_DISCRIMINANT:165:\
 	ec_GFp_simple_group_check_discriminant
 EC_F_EC_GFP_SIMPLE_GROUP_SET_CURVE:166:ec_GFp_simple_group_set_curve
@@ -737,6 +740,7 @@ EVP_F_EVP_DECRYPTFINAL_EX:101:EVP_DecryptFinal_ex
 EVP_F_EVP_DECRYPTUPDATE:166:EVP_DecryptUpdate
 EVP_F_EVP_DIGESTFINALXOF:174:EVP_DigestFinalXOF
 EVP_F_EVP_DIGESTINIT_EX:128:EVP_DigestInit_ex
+EVP_F_EVP_ENCRYPTDECRYPTUPDATE:219:evp_EncryptDecryptUpdate
 EVP_F_EVP_ENCRYPTFINAL_EX:127:EVP_EncryptFinal_ex
 EVP_F_EVP_ENCRYPTUPDATE:167:EVP_EncryptUpdate
 EVP_F_EVP_MD_CTX_COPY_EX:110:EVP_MD_CTX_copy_ex
@@ -2115,6 +2119,7 @@ EC_R_ASN1_ERROR:115:asn1 error
 EC_R_BAD_SIGNATURE:156:bad signature
 EC_R_BIGNUM_OUT_OF_RANGE:144:bignum out of range
 EC_R_BUFFER_TOO_SMALL:100:buffer too small
+EC_R_CANNOT_INVERT:165:cannot invert
 EC_R_COORDINATES_OUT_OF_RANGE:146:coordinates out of range
 EC_R_CURVE_DOES_NOT_SUPPORT_ECDH:160:curve does not support ecdh
 EC_R_CURVE_DOES_NOT_SUPPORT_SIGNING:159:curve does not support signing
@@ -2722,6 +2727,8 @@ SSL_R_MISSING_SRP_PARAM:358:can't find SRP server param
 SSL_R_MISSING_SUPPORTED_GROUPS_EXTENSION:209:missing supported groups extension
 SSL_R_MISSING_TMP_DH_KEY:171:missing tmp dh key
 SSL_R_MISSING_TMP_ECDH_KEY:311:missing tmp ecdh key
+SSL_R_MIXED_HANDSHAKE_AND_NON_HANDSHAKE_DATA:293:\
+	mixed handshake and non handshake data
 SSL_R_NOT_ON_RECORD_BOUNDARY:182:not on record boundary
 SSL_R_NOT_REPLACING_CERTIFICATE:289:not replacing certificate
 SSL_R_NOT_SERVER:284:not server
diff --git a/crypto/evp/evp_enc.c b/crypto/evp/evp_enc.c
index 38633410cd1ab..05dd791b6cb07 100644
--- a/crypto/evp/evp_enc.c
+++ b/crypto/evp/evp_enc.c
@@ -294,8 +294,9 @@ int is_partially_overlapping(const void *ptr1, const void *ptr2, int len)
     return overlapped;
 }
 
-int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
-                      const unsigned char *in, int inl)
+static int evp_EncryptDecryptUpdate(EVP_CIPHER_CTX *ctx,
+                                    unsigned char *out, int *outl,
+                                    const unsigned char *in, int inl)
 {
     int i, j, bl, cmpl = inl;
 
@@ -307,7 +308,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
     if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
         /* If block size > 1 then the cipher will have to do this check */
         if (bl == 1 && is_partially_overlapping(out, in, cmpl)) {
-            EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING);
+            EVPerr(EVP_F_EVP_ENCRYPTDECRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING);
             return 0;
         }
 
@@ -324,7 +325,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
         return inl == 0;
     }
     if (is_partially_overlapping(out + ctx->buf_len, in, cmpl)) {
-        EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING);
+        EVPerr(EVP_F_EVP_ENCRYPTDECRYPTUPDATE, EVP_R_PARTIALLY_OVERLAPPING);
         return 0;
     }
 
@@ -371,6 +372,19 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
     return 1;
 }
 
+
+int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
+                      const unsigned char *in, int inl)
+{
+    /* Prevent accidental use of decryption context when encrypting */
+    if (!ctx->encrypt) {
+        EVPerr(EVP_F_EVP_ENCRYPTUPDATE, EVP_R_INVALID_OPERATION);
+        return 0;
+    }
+
+    return evp_EncryptDecryptUpdate(ctx, out, outl, in, inl);
+}
+
 int EVP_EncryptFinal(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
 {
     int ret;
@@ -383,6 +397,12 @@ int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
     int n, ret;
     unsigned int i, b, bl;
 
+    /* Prevent accidental use of decryption context when encrypting */
+    if (!ctx->encrypt) {
+        EVPerr(EVP_F_EVP_ENCRYPTFINAL_EX, EVP_R_INVALID_OPERATION);
+        return 0;
+    }
+
     if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
         ret = ctx->cipher->do_cipher(ctx, out, NULL, 0);
         if (ret < 0)
@@ -426,6 +446,12 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
     int fix_len, cmpl = inl;
     unsigned int b;
 
+    /* Prevent accidental use of encryption context when decrypting */
+    if (ctx->encrypt) {
+        EVPerr(EVP_F_EVP_DECRYPTUPDATE, EVP_R_INVALID_OPERATION);
+        return 0;
+    }
+
     b = ctx->cipher->block_size;
 
     if (EVP_CIPHER_CTX_test_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS))
@@ -452,7 +478,7 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
     }
 
     if (ctx->flags & EVP_CIPH_NO_PADDING)
-        return EVP_EncryptUpdate(ctx, out, outl, in, inl);
+        return evp_EncryptDecryptUpdate(ctx, out, outl, in, inl);
 
     OPENSSL_assert(b <= sizeof(ctx->final));
 
@@ -469,7 +495,7 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
     } else
         fix_len = 0;
 
-    if (!EVP_EncryptUpdate(ctx, out, outl, in, inl))
+    if (!evp_EncryptDecryptUpdate(ctx, out, outl, in, inl))
         return 0;
 
     /*
@@ -500,6 +526,13 @@ int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
 {
     int i, n;
     unsigned int b;
+
+    /* Prevent accidental use of encryption context when decrypting */
+    if (ctx->encrypt) {
+        EVPerr(EVP_F_EVP_DECRYPTFINAL_EX, EVP_R_INVALID_OPERATION);
+        return 0;
+    }
+
     *outl = 0;
 
     if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c
index 3e14a7b509496..60df27cbc20ac 100644
--- a/crypto/evp/evp_err.c
+++ b/crypto/evp/evp_err.c
@@ -50,6 +50,8 @@ static const ERR_STRING_DATA EVP_str_functs[] = {
     {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DECRYPTUPDATE, 0), "EVP_DecryptUpdate"},
     {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DIGESTFINALXOF, 0), "EVP_DigestFinalXOF"},
     {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_DIGESTINIT_EX, 0), "EVP_DigestInit_ex"},
+    {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTDECRYPTUPDATE, 0),
+     "evp_EncryptDecryptUpdate"},
     {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTFINAL_EX, 0),
      "EVP_EncryptFinal_ex"},
     {ERR_PACK(ERR_LIB_EVP, EVP_F_EVP_ENCRYPTUPDATE, 0), "EVP_EncryptUpdate"},
diff --git a/crypto/evp/p_lib.c b/crypto/evp/p_lib.c
index 9429be97e3f93..148df90f84b13 100644
--- a/crypto/evp/p_lib.c
+++ b/crypto/evp/p_lib.c
@@ -42,7 +42,7 @@ int EVP_PKEY_security_bits(const EVP_PKEY *pkey)
     return pkey->ameth->pkey_security_bits(pkey);
 }
 
-int EVP_PKEY_size(EVP_PKEY *pkey)
+int EVP_PKEY_size(const EVP_PKEY *pkey)
 {
     if (pkey && pkey->ameth && pkey->ameth->pkey_size)
         return pkey->ameth->pkey_size(pkey);
diff --git a/crypto/include/internal/bn_int.h b/crypto/include/internal/bn_int.h
index cffe5cfc16507..30be7efe14d8d 100644
--- a/crypto/include/internal/bn_int.h
+++ b/crypto/include/internal/bn_int.h
@@ -65,7 +65,10 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words);
  * is customarily arranged by bn_correct_top. Output from below functions
  * is not processed with bn_correct_top, and for this reason it may not be
  * returned out of public API. It may only be passed internally into other
- * functions known to support non-minimal or zero-padded BIGNUMs.
+ * functions known to support non-minimal or zero-padded BIGNUMs. Even
+ * though the goal is to facilitate constant-time-ness, not each subroutine
+ * is constant-time by itself. They all have pre-conditions, consult source
+ * code...
  */
 int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                           BN_MONT_CTX *mont, BN_CTX *ctx);
@@ -79,5 +82,9 @@ int bn_mod_sub_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                          const BIGNUM *m);
 int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
 int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
+int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
+int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
+int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
+                     const BIGNUM *d, BN_CTX *ctx);
 
 #endif
diff --git a/crypto/init.c b/crypto/init.c
index 209d1a483daeb..b9a7334a7ed70 100644
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -100,10 +100,6 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_base)
         return 0;
     if ((init_lock = CRYPTO_THREAD_lock_new()) == NULL)
         goto err;
-#ifndef OPENSSL_SYS_UEFI
-    if (atexit(OPENSSL_cleanup) != 0)
-        goto err;
-#endif
     OPENSSL_cpuid_setup();
 
     destructor_key.value = key;
@@ -121,13 +117,53 @@ err:
     return 0;
 }
 
+static CRYPTO_ONCE register_atexit = CRYPTO_ONCE_STATIC_INIT;
+#if !defined(OPENSSL_SYS_UEFI) && defined(_WIN32)
+static int win32atexit(void)
+{
+    OPENSSL_cleanup();
+    return 0;
+}
+#endif
+
+DEFINE_RUN_ONCE_STATIC(ossl_init_register_atexit)
+{
+#ifdef OPENSSL_INIT_DEBUG
+    fprintf(stderr, "OPENSSL_INIT: ossl_init_register_atexit()\n");
+#endif
+#ifndef OPENSSL_SYS_UEFI
+# ifdef _WIN32
+    /* We use _onexit() in preference because it gets called on DLL unload */
+    if (_onexit(win32atexit) == NULL)
+        return 0;
+# else
+    if (atexit(OPENSSL_cleanup) != 0)
+        return 0;
+# endif
+#endif
+
+    return 1;
+}
+
+DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_register_atexit,
+                           ossl_init_register_atexit)
+{
+#ifdef OPENSSL_INIT_DEBUG
+    fprintf(stderr, "OPENSSL_INIT: ossl_init_no_register_atexit ok!\n");
+#endif
+    /* Do nothing in this case */
+    return 1;
+}
+
 static CRYPTO_ONCE load_crypto_nodelete = CRYPTO_ONCE_STATIC_INIT;
 DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_nodelete)
 {
 #ifdef OPENSSL_INIT_DEBUG
     fprintf(stderr, "OPENSSL_INIT: ossl_init_load_crypto_nodelete()\n");
 #endif
-#if !defined(OPENSSL_NO_DSO) && !defined(OPENSSL_USE_NODELETE)
+#if !defined(OPENSSL_NO_DSO) \
+    && !defined(OPENSSL_USE_NODELETE) \
+    && !defined(OPENSSL_NO_PINSHARED)
 # ifdef DSO_WIN32
     {
         HMODULE handle = NULL;
@@ -177,12 +213,6 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_nodelete)
 
 static CRYPTO_ONCE load_crypto_strings = CRYPTO_ONCE_STATIC_INIT;
 static int load_crypto_strings_inited = 0;
-DEFINE_RUN_ONCE_STATIC(ossl_init_no_load_crypto_strings)
-{
-    /* Do nothing in this case */
-    return 1;
-}
-
 DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_strings)
 {
     int ret = 1;
@@ -201,6 +231,13 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_load_crypto_strings)
     return ret;
 }
 
+DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_load_crypto_strings,
+                           ossl_init_load_crypto_strings)
+{
+    /* Do nothing in this case */
+    return 1;
+}
+
 static CRYPTO_ONCE add_all_ciphers = CRYPTO_ONCE_STATIC_INIT;
 DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_ciphers)
 {
@@ -218,6 +255,13 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_ciphers)
     return 1;
 }
 
+DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_add_all_ciphers,
+                           ossl_init_add_all_ciphers)
+{
+    /* Do nothing */
+    return 1;
+}
+
 static CRYPTO_ONCE add_all_digests = CRYPTO_ONCE_STATIC_INIT;
 DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_digests)
 {
@@ -235,7 +279,8 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_add_all_digests)
     return 1;
 }
 
-DEFINE_RUN_ONCE_STATIC(ossl_init_no_add_algs)
+DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_add_all_digests,
+                           ossl_init_add_all_digests)
 {
     /* Do nothing */
     return 1;
@@ -243,19 +288,14 @@ DEFINE_RUN_ONCE_STATIC(ossl_init_no_add_algs)
 
 static CRYPTO_ONCE config = CRYPTO_ONCE_STATIC_INIT;
 static int config_inited = 0;
-static const char *appname;
+static const OPENSSL_INIT_SETTINGS *conf_settings = NULL;
 DEFINE_RUN_ONCE_STATIC(ossl_init_config)
 {
-#ifdef OPENSSL_INIT_DEBUG
-    fprintf(stderr,
-            "OPENSSL_INIT: ossl_init_config: openssl_config(%s)\n",
-            appname == NULL ? "NULL" : appname);
-#endif
-    openssl_config_int(appname);
+    int ret = openssl_config_int(conf_settings);
     config_inited = 1;
-    return 1;
+    return ret;
 }
-DEFINE_RUN_ONCE_STATIC(ossl_init_no_config)
+DEFINE_RUN_ONCE_STATIC_ALT(ossl_init_no_config, ossl_init_config)
 {
 #ifdef OPENSSL_INIT_DEBUG
     fprintf(stderr,
@@ -586,17 +626,43 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings)
         return 0;
     }
 
+    /*
+     * When the caller specifies OPENSSL_INIT_BASE_ONLY, that should be the
+     * *only* option specified.  With that option we return immediately after
+     * doing the requested limited initialization.  Note that
+     * err_shelve_state() called by us via ossl_init_load_crypto_nodelete()
+     * re-enters OPENSSL_init_crypto() with OPENSSL_INIT_BASE_ONLY, but with
+     * base already initialized this is a harmless NOOP.
+     *
+     * If we remain the only caller of err_shelve_state() the recursion should
+     * perhaps be removed, but if in doubt, it can be left in place.
+     */
     if (!RUN_ONCE(&base, ossl_init_base))
         return 0;
+    if (opts & OPENSSL_INIT_BASE_ONLY)
+        return 1;
+
+    /*
+     * Now we don't always set up exit handlers, the INIT_BASE_ONLY calls
+     * should not have the side-effect of setting up exit handlers, and
+     * therefore, this code block is below the INIT_BASE_ONLY-conditioned early
+     * return above.
+     */
+    if ((opts & OPENSSL_INIT_NO_ATEXIT) != 0) {
+        if (!RUN_ONCE_ALT(&register_atexit, ossl_init_no_register_atexit,
+                          ossl_init_register_atexit))
+            return 0;
+    } else if (!RUN_ONCE(&register_atexit, ossl_init_register_atexit)) {
+        return 0;
+    }
 
-    if (!(opts & OPENSSL_INIT_BASE_ONLY)
-            && !RUN_ONCE(&load_crypto_nodelete,
-                         ossl_init_load_crypto_nodelete))
+    if (!RUN_ONCE(&load_crypto_nodelete, ossl_init_load_crypto_nodelete))
         return 0;
 
     if ((opts & OPENSSL_INIT_NO_LOAD_CRYPTO_STRINGS)
-            && !RUN_ONCE(&load_crypto_strings,
-                         ossl_init_no_load_crypto_strings))
+            && !RUN_ONCE_ALT(&load_crypto_strings,
+                             ossl_init_no_load_crypto_strings,
+                             ossl_init_load_crypto_strings))
         return 0;
 
     if ((opts & OPENSSL_INIT_LOAD_CRYPTO_STRINGS)
@@ -604,7 +670,8 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings)
         return 0;
 
     if ((opts & OPENSSL_INIT_NO_ADD_ALL_CIPHERS)
-            && !RUN_ONCE(&add_all_ciphers, ossl_init_no_add_algs))
+            && !RUN_ONCE_ALT(&add_all_ciphers, ossl_init_no_add_all_ciphers,
+                             ossl_init_add_all_ciphers))
         return 0;
 
     if ((opts & OPENSSL_INIT_ADD_ALL_CIPHERS)
@@ -612,7 +679,8 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings)
         return 0;
 
     if ((opts & OPENSSL_INIT_NO_ADD_ALL_DIGESTS)
-            && !RUN_ONCE(&add_all_digests, ossl_init_no_add_algs))
+            && !RUN_ONCE_ALT(&add_all_digests, ossl_init_no_add_all_digests,
+                             ossl_init_add_all_digests))
         return 0;
 
     if ((opts & OPENSSL_INIT_ADD_ALL_DIGESTS)
@@ -624,14 +692,15 @@ int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings)
         return 0;
 
     if ((opts & OPENSSL_INIT_NO_LOAD_CONFIG)
-            && !RUN_ONCE(&config, ossl_init_no_config))
+            && !RUN_ONCE_ALT(&config, ossl_init_no_config, ossl_init_config))
         return 0;
 
     if (opts & OPENSSL_INIT_LOAD_CONFIG) {
         int ret;
         CRYPTO_THREAD_write_lock(init_lock);
-        appname = (settings == NULL) ? NULL : settings->appname;
+        conf_settings = settings;
         ret = RUN_ONCE(&config, ossl_init_config);
+        conf_settings = NULL;
         CRYPTO_THREAD_unlock(init_lock);
         if (!ret)
             return 0;
@@ -695,7 +764,9 @@ int OPENSSL_atexit(void (*handler)(void))
 {
     OPENSSL_INIT_STOP *newhand;
 
-#if !defined(OPENSSL_NO_DSO) && !defined(OPENSSL_USE_NODELETE)
+#if !defined(OPENSSL_NO_DSO) \
+    && !defined(OPENSSL_USE_NODELETE)\
+    && !defined(OPENSSL_NO_PINSHARED)
     {
         union {
             void *sym;
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index afc30c3e72a43..30158aa076da9 100755
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2010-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -529,6 +529,7 @@ $code.=<<___;
 .type	gcm_init_clmul,\@abi-omnipotent
 .align	16
 gcm_init_clmul:
+.cfi_startproc
 .L_init_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -598,6 +599,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_init_clmul,.-gcm_init_clmul
 ___
 }
@@ -609,6 +611,7 @@ $code.=<<___;
 .type	gcm_gmult_clmul,\@abi-omnipotent
 .align	16
 gcm_gmult_clmul:
+.cfi_startproc
 .L_gmult_clmul:
 	movdqu		($Xip),$Xi
 	movdqa		.Lbswap_mask(%rip),$T3
@@ -645,6 +648,7 @@ $code.=<<___;
 	pshufb		$T3,$Xi
 	movdqu		$Xi,($Xip)
 	ret
+.cfi_endproc
 .size	gcm_gmult_clmul,.-gcm_gmult_clmul
 ___
 }
@@ -658,6 +662,7 @@ $code.=<<___;
 .type	gcm_ghash_clmul,\@abi-omnipotent
 .align	32
 gcm_ghash_clmul:
+.cfi_startproc
 .L_ghash_clmul:
 ___
 $code.=<<___ if ($win64);
@@ -1005,6 +1010,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
 ___
 }
@@ -1014,6 +1020,7 @@ $code.=<<___;
 .type	gcm_init_avx,\@abi-omnipotent
 .align	32
 gcm_init_avx:
+.cfi_startproc
 ___
 if ($avx) {
 my ($Htbl,$Xip)=@_4args;
@@ -1142,6 +1149,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_init_avx,.-gcm_init_avx
 ___
 } else {
@@ -1156,7 +1164,9 @@ $code.=<<___;
 .type	gcm_gmult_avx,\@abi-omnipotent
 .align	32
 gcm_gmult_avx:
+.cfi_startproc
 	jmp	.L_gmult_clmul
+.cfi_endproc
 .size	gcm_gmult_avx,.-gcm_gmult_avx
 ___
 
@@ -1165,6 +1175,7 @@ $code.=<<___;
 .type	gcm_ghash_avx,\@abi-omnipotent
 .align	32
 gcm_ghash_avx:
+.cfi_startproc
 ___
 if ($avx) {
 my ($Xip,$Htbl,$inp,$len)=@_4args;
@@ -1577,6 +1588,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	gcm_ghash_avx,.-gcm_ghash_avx
 ___
 } else {
diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h
index e931f7f516ca8..9ab1a14b9e327 100644
--- a/crypto/objects/obj_dat.h
+++ b/crypto/objects/obj_dat.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/objects/obj_dat.pl
  *
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
diff --git a/crypto/objects/obj_dat.pl b/crypto/objects/obj_dat.pl
index e80900d09d268..e5d38147eccf0 100644
--- a/crypto/objects/obj_dat.pl
+++ b/crypto/objects/obj_dat.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/objects/obj_xref.h b/crypto/objects/obj_xref.h
index 9606e57d6191e..9144d569dcd04 100644
--- a/crypto/objects/obj_xref.h
+++ b/crypto/objects/obj_xref.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by objxref.pl
  *
- * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/objects/objects.pl b/crypto/objects/objects.pl
index 8f9b67f95991a..d7d1962c9999f 100644
--- a/crypto/objects/objects.pl
+++ b/crypto/objects/objects.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/objects/objxref.pl b/crypto/objects/objxref.pl
index 0ec63f067e3cf..ce76cadae31c8 100755
--- a/crypto/objects/objxref.pl
+++ b/crypto/objects/objxref.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1998-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/pem/pem_info.c b/crypto/pem/pem_info.c
index a45fe83001b35..f90cb4465096c 100644
--- a/crypto/pem/pem_info.c
+++ b/crypto/pem/pem_info.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -297,7 +297,7 @@ int PEM_X509_INFO_write_bio(BIO *bp, X509_INFO *xi, EVP_CIPHER *enc,
                 goto err;
             }
 
-            /* Create the right magic header stuff */ 
+            /* Create the right magic header stuff */
             buf[0] = '\0';
             PEM_proc_type(buf, PEM_TYPE_ENCRYPTED);
             PEM_dek_info(buf, objstr, EVP_CIPHER_iv_length(enc),
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index f8380f2e9cfa9..29a0eacfd532a 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -541,6 +541,7 @@ my %globals;
 	);
 
     my ($cfa_reg, $cfa_rsp);
+    my @cfa_stack;
 
     # [us]leb128 format is variable-length integer representation base
     # 2^128, with most significant bit of each byte being 0 denoting
@@ -648,7 +649,13 @@ my %globals;
 	    # why it starts with -8. Recall that CFA is top of caller's
 	    # stack...
 	    /startproc/	&& do {	($cfa_reg, $cfa_rsp) = ("%rsp", -8); last; };
-	    /endproc/	&& do {	($cfa_reg, $cfa_rsp) = ("%rsp",  0); last; };
+	    /endproc/	&& do {	($cfa_reg, $cfa_rsp) = ("%rsp",  0);
+				# .cfi_remember_state directives that are not
+				# matched with .cfi_restore_state are
+				# unnecessary.
+				die "unpaired .cfi_remember_state" if (@cfa_stack);
+				last;
+			      };
 	    /def_cfa_register/
 			&& do {	$cfa_reg = $$line; last; };
 	    /def_cfa_offset/
@@ -688,6 +695,14 @@ my %globals;
 						      cfa_expression($$line)));
 				last;
 			      };
+	    /remember_state/
+			&& do {	push @cfa_stack, [$cfa_reg, $cfa_rsp];
+				last;
+			      };
+	    /restore_state/
+			&& do {	($cfa_reg, $cfa_rsp) = @{pop @cfa_stack};
+				last;
+			      };
 	    }
 
 	    $self->{value} = ".cfi_$dir\t$$line" if ($dir);
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl
index ac06457b65301..6c6c9bb05be07 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -291,6 +291,7 @@ poly1305_blocks_neon:
 	cbz	$is_base2_26,poly1305_blocks
 
 .Lblocks_neon:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 
@@ -859,6 +860,7 @@ poly1305_blocks_neon:
 	st1	{$ACC4}[0],[$ctx]
 
 .Lno_data_neon:
+	.inst	0xd50323bf		// autiasp
 	ldr	x29,[sp],#80
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h
index 65cf96fc1fe8b..72bd7468745c7 100644
--- a/crypto/ppc_arch.h
+++ b/crypto/ppc_arch.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -22,5 +22,7 @@ extern unsigned int OPENSSL_ppccap_P;
 # define PPC_CRYPTO207   (1<<2)
 # define PPC_FPU         (1<<3)
 # define PPC_MADD300     (1<<4)
+# define PPC_MFTB        (1<<5)
+# define PPC_MFSPR268    (1<<6)
 
 #endif
diff --git a/crypto/ppccap.c b/crypto/ppccap.c
index 8b7d765c3aa23..afb9e31b00288 100644
--- a/crypto/ppccap.c
+++ b/crypto/ppccap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2009-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -168,16 +168,50 @@ void OPENSSL_altivec_probe(void);
 void OPENSSL_crypto207_probe(void);
 void OPENSSL_madd300_probe(void);
 
-/*
- * Use a weak reference to getauxval() so we can use it if it is available
- * but don't break the build if it is not. Note that this is *link-time*
- * feature detection, not *run-time*. In other words if we link with
- * symbol present, it's expected to be present even at run-time.
- */
-#if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__)
-extern unsigned long getauxval(unsigned long type) __attribute__ ((weak));
-#else
-static unsigned long (*getauxval) (unsigned long) = NULL;
+long OPENSSL_rdtsc_mftb(void);
+long OPENSSL_rdtsc_mfspr268(void);
+
+uint32_t OPENSSL_rdtsc(void)
+{
+    if (OPENSSL_ppccap_P & PPC_MFTB)
+        return OPENSSL_rdtsc_mftb();
+    else if (OPENSSL_ppccap_P & PPC_MFSPR268)
+        return OPENSSL_rdtsc_mfspr268();
+    else
+        return 0;
+}
+
+size_t OPENSSL_instrument_bus_mftb(unsigned int *, size_t);
+size_t OPENSSL_instrument_bus_mfspr268(unsigned int *, size_t);
+
+size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt)
+{
+    if (OPENSSL_ppccap_P & PPC_MFTB)
+        return OPENSSL_instrument_bus_mftb(out, cnt);
+    else if (OPENSSL_ppccap_P & PPC_MFSPR268)
+        return OPENSSL_instrument_bus_mfspr268(out, cnt);
+    else
+        return 0;
+}
+
+size_t OPENSSL_instrument_bus2_mftb(unsigned int *, size_t, size_t);
+size_t OPENSSL_instrument_bus2_mfspr268(unsigned int *, size_t, size_t);
+
+size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max)
+{
+    if (OPENSSL_ppccap_P & PPC_MFTB)
+        return OPENSSL_instrument_bus2_mftb(out, cnt, max);
+    else if (OPENSSL_ppccap_P & PPC_MFSPR268)
+        return OPENSSL_instrument_bus2_mfspr268(out, cnt, max);
+    else
+        return 0;
+}
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+# if __GLIBC_PREREQ(2, 16)
+#  include <sys/auxv.h>
+#  define OSSL_IMPLEMENT_GETAUXVAL
+# endif
 #endif
 
 /* I wish <sys/auxv.h> was universally available */
@@ -277,7 +311,8 @@ void OPENSSL_cpuid_setup(void)
     }
 #endif
 
-    if (getauxval != NULL) {
+#ifdef OSSL_IMPLEMENT_GETAUXVAL
+    {
         unsigned long hwcap = getauxval(HWCAP);
 
         if (hwcap & HWCAP_FPU) {
@@ -304,9 +339,8 @@ void OPENSSL_cpuid_setup(void)
         if (hwcap & HWCAP_ARCH_3_00) {
             OPENSSL_ppccap_P |= PPC_MADD300;
         }
-
-        return;
     }
+#endif
 
     sigfillset(&all_masked);
     sigdelset(&all_masked, SIGILL);
@@ -325,15 +359,16 @@ void OPENSSL_cpuid_setup(void)
     sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
     sigaction(SIGILL, &ill_act, &ill_oact);
 
+#ifndef OSSL_IMPLEMENT_GETAUXVAL
     if (sigsetjmp(ill_jmp,1) == 0) {
         OPENSSL_fpu_probe();
         OPENSSL_ppccap_P |= PPC_FPU;
 
         if (sizeof(size_t) == 4) {
-#ifdef __linux
+# ifdef __linux
             struct utsname uts;
             if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0)
-#endif
+# endif
                 if (sigsetjmp(ill_jmp, 1) == 0) {
                     OPENSSL_ppc64_probe();
                     OPENSSL_ppccap_P |= PPC_FPU64;
@@ -358,6 +393,15 @@ void OPENSSL_cpuid_setup(void)
         OPENSSL_madd300_probe();
         OPENSSL_ppccap_P |= PPC_MADD300;
     }
+#endif
+
+    if (sigsetjmp(ill_jmp, 1) == 0) {
+        OPENSSL_rdtsc_mftb();
+        OPENSSL_ppccap_P |= PPC_MFTB;
+    } else if (sigsetjmp(ill_jmp, 1) == 0) {
+        OPENSSL_rdtsc_mfspr268();
+        OPENSSL_ppccap_P |= PPC_MFSPR268;
+    }
 
     sigaction(SIGILL, &ill_oact, NULL);
     sigprocmask(SIG_SETMASK, &oset, NULL);
diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl
index 9d1cada4dc4c9..a38445fd3c529 100755
--- a/crypto/ppccpuid.pl
+++ b/crypto/ppccpuid.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -124,26 +124,23 @@ Ladd:	lwarx	r5,0,r3
 	.long	0
 .size	.OPENSSL_atomic_add,.-.OPENSSL_atomic_add
 
-.globl	.OPENSSL_rdtsc
+.globl	.OPENSSL_rdtsc_mftb
 .align	4
-.OPENSSL_rdtsc:
-___
-$code.=<<___	if ($flavour =~ /64/);
-	mftb	r3
-___
-$code.=<<___	if ($flavour !~ /64/);
-Loop_rdtsc:
-	mftbu	r5
+.OPENSSL_rdtsc_mftb:
 	mftb	r3
-	mftbu	r4
-	cmplw	r4,r5
-	bne	Loop_rdtsc
-___
-$code.=<<___;
 	blr
 	.long	0
 	.byte	0,12,0x14,0,0,0,0,0
-.size	.OPENSSL_rdtsc,.-.OPENSSL_rdtsc
+.size	.OPENSSL_rdtsc_mftb,.-.OPENSSL_rdtsc_mftb
+
+.globl	.OPENSSL_rdtsc_mfspr268
+.align	4
+.OPENSSL_rdtsc_mfspr268:
+	mfspr	r3,268
+	blr
+	.long	0
+	.byte	0,12,0x14,0,0,0,0,0
+.size	.OPENSSL_rdtsc_mfspr268,.-.OPENSSL_rdtsc_mfspr268
 
 .globl	.OPENSSL_cleanse
 .align	4
@@ -210,9 +207,9 @@ my ($tick,$lasttick)=("r6","r7");
 my ($diff,$lastdiff)=("r8","r9");
 
 $code.=<<___;
-.globl	.OPENSSL_instrument_bus
+.globl	.OPENSSL_instrument_bus_mftb
 .align	4
-.OPENSSL_instrument_bus:
+.OPENSSL_instrument_bus_mftb:
 	mtctr	$cnt
 
 	mftb	$lasttick		# collect 1st tick
@@ -240,11 +237,11 @@ Loop:	mftb	$tick
 	.long	0
 	.byte	0,12,0x14,0,0,0,2,0
 	.long	0
-.size	.OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus
+.size	.OPENSSL_instrument_bus_mftb,.-.OPENSSL_instrument_bus_mftb
 
-.globl	.OPENSSL_instrument_bus2
+.globl	.OPENSSL_instrument_bus2_mftb
 .align	4
-.OPENSSL_instrument_bus2:
+.OPENSSL_instrument_bus2_mftb:
 	mr	r0,$cnt
 	slwi	$cnt,$cnt,2
 
@@ -292,7 +289,91 @@ Ldone2:
 	.long	0
 	.byte	0,12,0x14,0,0,0,3,0
 	.long	0
-.size	.OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2
+.size	.OPENSSL_instrument_bus2_mftb,.-.OPENSSL_instrument_bus2_mftb
+
+.globl	.OPENSSL_instrument_bus_mfspr268
+.align	4
+.OPENSSL_instrument_bus_mfspr268:
+	mtctr	$cnt
+
+	mfspr	$lasttick,268		# collect 1st tick
+	li	$diff,0
+
+	dcbf	0,$out			# flush cache line
+	lwarx	$tick,0,$out		# load and lock
+	add	$tick,$tick,$diff
+	stwcx.	$tick,0,$out
+	stwx	$tick,0,$out
+
+Loop3:	mfspr	$tick,268
+	sub	$diff,$tick,$lasttick
+	mr	$lasttick,$tick
+	dcbf	0,$out			# flush cache line
+	lwarx	$tick,0,$out		# load and lock
+	add	$tick,$tick,$diff
+	stwcx.	$tick,0,$out
+	stwx	$tick,0,$out
+	addi	$out,$out,4		# ++$out
+	bdnz	Loop3
+
+	mr	r3,$cnt
+	blr
+	.long	0
+	.byte	0,12,0x14,0,0,0,2,0
+	.long	0
+.size	.OPENSSL_instrument_bus_mfspr268,.-.OPENSSL_instrument_bus_mfspr268
+
+.globl	.OPENSSL_instrument_bus2_mfspr268
+.align	4
+.OPENSSL_instrument_bus2_mfspr268:
+	mr	r0,$cnt
+	slwi	$cnt,$cnt,2
+
+	mfspr	$lasttick,268		# collect 1st tick
+	li	$diff,0
+
+	dcbf	0,$out			# flush cache line
+	lwarx	$tick,0,$out		# load and lock
+	add	$tick,$tick,$diff
+	stwcx.	$tick,0,$out
+	stwx	$tick,0,$out
+
+	mfspr	$tick,268		# collect 1st diff
+	sub	$diff,$tick,$lasttick
+	mr	$lasttick,$tick
+	mr	$lastdiff,$diff
+Loop4:
+	dcbf	0,$out			# flush cache line
+	lwarx	$tick,0,$out		# load and lock
+	add	$tick,$tick,$diff
+	stwcx.	$tick,0,$out
+	stwx	$tick,0,$out
+
+	addic.	$max,$max,-1
+	beq	Ldone4
+
+	mfspr	$tick,268
+	sub	$diff,$tick,$lasttick
+	mr	$lasttick,$tick
+	cmplw	7,$diff,$lastdiff
+	mr	$lastdiff,$diff
+
+	mfcr	$tick			# pull cr
+	not	$tick,$tick		# flip bits
+	rlwinm	$tick,$tick,1,29,29	# isolate flipped eq bit and scale
+
+	sub.	$cnt,$cnt,$tick		# conditional --$cnt
+	add	$out,$out,$tick		# conditional ++$out
+	bne	Loop4
+
+Ldone4:
+	srwi	$cnt,$cnt,2
+	sub	r3,r0,$cnt
+	blr
+	.long	0
+	.byte	0,12,0x14,0,0,0,3,0
+	.long	0
+.size	.OPENSSL_instrument_bus2_mfspr268,.-.OPENSSL_instrument_bus2_mfspr268
 ___
 }
 
diff --git a/crypto/rand/rand_unix.c b/crypto/rand/rand_unix.c
index 9d8ffdd537965..9cbc9ade77fa2 100644
--- a/crypto/rand/rand_unix.c
+++ b/crypto/rand/rand_unix.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -93,6 +93,27 @@ static uint64_t get_timer_bits(void);
 # error "UEFI and VXWorks only support seeding NONE"
 #endif
 
+#if defined(OPENSSL_SYS_VXWORKS)
+/* empty implementation */
+int rand_pool_init(void)
+{
+    return 1;
+}
+
+void rand_pool_cleanup(void)
+{
+}
+
+void rand_pool_keep_random_devices_open(int keep)
+{
+}
+
+size_t rand_pool_acquire_entropy(RAND_POOL *pool)
+{
+    return rand_pool_entropy_available(pool);
+}
+#endif
+
 #if !(defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_WIN32) \
     || defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_VXWORKS) \
     || defined(OPENSSL_SYS_UEFI))
diff --git a/crypto/rsa/rsa_ameth.c b/crypto/rsa/rsa_ameth.c
index a6595aec05420..75debb3e0a9d2 100644
--- a/crypto/rsa/rsa_ameth.c
+++ b/crypto/rsa/rsa_ameth.c
@@ -34,7 +34,7 @@ static int rsa_param_encode(const EVP_PKEY *pkey,
 
     *pstr = NULL;
     /* If RSA it's just NULL type */
-    if (pkey->ameth->pkey_id == EVP_PKEY_RSA) {
+    if (pkey->ameth->pkey_id != EVP_PKEY_RSA_PSS) {
         *pstrtype = V_ASN1_NULL;
         return 1;
     }
@@ -58,7 +58,7 @@ static int rsa_param_decode(RSA *rsa, const X509_ALGOR *alg)
     int algptype;
 
     X509_ALGOR_get0(&algoid, &algptype, &algp, alg);
-    if (OBJ_obj2nid(algoid) == EVP_PKEY_RSA)
+    if (OBJ_obj2nid(algoid) != EVP_PKEY_RSA_PSS)
         return 1;
     if (algptype == V_ASN1_UNDEF)
         return 1;
@@ -109,7 +109,10 @@ static int rsa_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey)
         RSA_free(rsa);
         return 0;
     }
-    EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, rsa);
+    if (!EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, rsa)) {
+        RSA_free(rsa);
+        return 0;
+    }
     return 1;
 }
 
diff --git a/crypto/rsa/rsa_oaep.c b/crypto/rsa/rsa_oaep.c
index f13c6fc9e5063..689e6dc222922 100644
--- a/crypto/rsa/rsa_oaep.c
+++ b/crypto/rsa/rsa_oaep.c
@@ -120,7 +120,7 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
                                       const EVP_MD *mgf1md)
 {
     int i, dblen = 0, mlen = -1, one_index = 0, msg_index;
-    unsigned int good, found_one_byte;
+    unsigned int good = 0, found_one_byte, mask;
     const unsigned char *maskedseed, *maskeddb;
     /*
      * |em| is the encoded message, zero-padded to exactly |num| bytes: em =
@@ -147,8 +147,11 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
      * the ciphertext, see PKCS #1 v2.2, section 7.1.2.
      * This does not leak any side-channel information.
      */
-    if (num < flen || num < 2 * mdlen + 2)
-        goto decoding_err;
+    if (num < flen || num < 2 * mdlen + 2) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
+               RSA_R_OAEP_DECODING_ERROR);
+        return -1;
+    }
 
     dblen = num - mdlen - 1;
     db = OPENSSL_malloc(dblen);
@@ -157,25 +160,26 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
         goto cleanup;
     }
 
-    if (flen != num) {
-        em = OPENSSL_zalloc(num);
-        if (em == NULL) {
-            RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
-                   ERR_R_MALLOC_FAILURE);
-            goto cleanup;
-        }
+    em = OPENSSL_malloc(num);
+    if (em == NULL) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
+               ERR_R_MALLOC_FAILURE);
+        goto cleanup;
+    }
 
-        /*
-         * Caller is encouraged to pass zero-padded message created with
-         * BN_bn2binpad, but if it doesn't, we do this zero-padding copy
-         * to avoid leaking that information. The copy still leaks some
-         * side-channel information, but it's impossible to have a fixed
-         * memory access pattern since we can't read out of the bounds of
-         * |from|.
-         */
-        memcpy(em + num - flen, from, flen);
-        from = em;
+    /*
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
     }
+    from = em;
 
     /*
      * The first byte must be zero, however we must not leak if this is
@@ -222,32 +226,48 @@ int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
      * so plaintext-awareness ensures timing side-channels are no longer a
      * concern.
      */
-    if (!good)
-        goto decoding_err;
-
     msg_index = one_index + 1;
     mlen = dblen - msg_index;
 
-    if (tlen < mlen) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE);
-        mlen = -1;
-    } else {
-        memcpy(to, db + msg_index, mlen);
-        goto cleanup;
+    /*
+     * For good measure, do this check in constant tine as well.
+     */
+    good &= constant_time_ge(tlen, mlen);
+
+    /*
+     * Even though we can't fake result's length, we can pretend copying
+     * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |dblen|
+     * bytes are viewed as circular buffer with start at |tlen|-|mlen'|,
+     * where |mlen'| is "saturated" |mlen| value. Deducing information
+     * about failure or |mlen| would take attacker's ability to observe
+     * memory access pattern with byte granularity *as it occurs*. It
+     * should be noted that failure is indistinguishable from normal
+     * operation if |tlen| is fixed by protocol.
+     */
+    tlen = constant_time_select_int(constant_time_lt(dblen, tlen), dblen, tlen);
+    msg_index = constant_time_select_int(good, msg_index, dblen - tlen);
+    mlen = dblen - msg_index;
+    for (from = db + msg_index, mask = good, i = 0; i < tlen; i++) {
+        unsigned int equals = constant_time_eq(i, mlen);
+
+        from -= dblen & equals; /* if (i == dblen) rewind   */
+        mask &= mask ^ equals;  /* if (i == dblen) mask = 0 */
+        to[i] = constant_time_select_8(mask, from[i], to[i]);
     }
 
- decoding_err:
     /*
      * To avoid chosen ciphertext attacks, the error message should not
      * reveal which kind of decoding error happened.
      */
     RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
            RSA_R_OAEP_DECODING_ERROR);
+    err_clear_last_constant_time(1 & good);
  cleanup:
     OPENSSL_cleanse(seed, sizeof(seed));
     OPENSSL_clear_free(db, dblen);
     OPENSSL_clear_free(em, num);
-    return mlen;
+
+    return constant_time_select_int(good, mlen, -1);
 }
 
 int PKCS1_MGF1(unsigned char *mask, long len,
diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c
index 2b1b006c2801d..465134257fcc7 100644
--- a/crypto/rsa/rsa_ossl.c
+++ b/crypto/rsa/rsa_ossl.c
@@ -10,6 +10,7 @@
 #include "internal/cryptlib.h"
 #include "internal/bn_int.h"
 #include "rsa_locl.h"
+#include "internal/constant_time_locl.h"
 
 static int rsa_ossl_public_encrypt(int flen, const unsigned char *from,
                                   unsigned char *to, RSA *rsa, int padding);
@@ -286,6 +287,11 @@ static int rsa_ossl_private_encrypt(int flen, const unsigned char *from,
         goto err;
     }
 
+    if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                    rsa->n, ctx))
+            goto err;
+
     if (!(rsa->flags & RSA_FLAG_NO_BLINDING)) {
         blinding = rsa_get_blinding(rsa, &local_blinding, ctx);
         if (blinding == NULL) {
@@ -318,13 +324,6 @@ static int rsa_ossl_private_encrypt(int flen, const unsigned char *from,
         }
         BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
 
-        if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-            if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
-                                        rsa->n, ctx)) {
-                BN_free(d);
-                goto err;
-            }
-
         if (!rsa->meth->bn_mod_exp(ret, f, d, rsa->n, ctx,
                                    rsa->_method_mod_n)) {
             BN_free(d);
@@ -481,8 +480,8 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
         RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_UNKNOWN_PADDING_TYPE);
         goto err;
     }
-    if (r < 0)
-        RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED);
+    RSAerr(RSA_F_RSA_OSSL_PRIVATE_DECRYPT, RSA_R_PADDING_CHECK_FAILED);
+    err_clear_last_constant_time(r >= 0);
 
  err:
     if (ctx != NULL)
diff --git a/crypto/rsa/rsa_pk1.c b/crypto/rsa/rsa_pk1.c
index d07c0d6f852b9..0626907418095 100644
--- a/crypto/rsa/rsa_pk1.c
+++ b/crypto/rsa/rsa_pk1.c
@@ -158,10 +158,10 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen,
     int i;
     /* |em| is the encoded message, zero-padded to exactly |num| bytes */
     unsigned char *em = NULL;
-    unsigned int good, found_zero_byte;
+    unsigned int good, found_zero_byte, mask;
     int zero_index = 0, msg_index, mlen = -1;
 
-    if (tlen < 0 || flen < 0)
+    if (tlen <= 0 || flen <= 0)
         return -1;
 
     /*
@@ -169,39 +169,41 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen,
      * section 7.2.2.
      */
 
-    if (flen > num)
-        goto err;
-
-    if (num < 11)
-        goto err;
+    if (flen > num || num < 11) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,
+               RSA_R_PKCS_DECODING_ERROR);
+        return -1;
+    }
 
-    if (flen != num) {
-        em = OPENSSL_zalloc(num);
-        if (em == NULL) {
-            RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE);
-            return -1;
-        }
-        /*
-         * Caller is encouraged to pass zero-padded message created with
-         * BN_bn2binpad, but if it doesn't, we do this zero-padding copy
-         * to avoid leaking that information. The copy still leaks some
-         * side-channel information, but it's impossible to have a fixed
-         * memory access pattern since we can't read out of the bounds of
-         * |from|.
-         */
-        memcpy(em + num - flen, from, flen);
-        from = em;
+    em = OPENSSL_malloc(num);
+    if (em == NULL) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, ERR_R_MALLOC_FAILURE);
+        return -1;
+    }
+    /*
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
     }
+    from = em;
 
     good = constant_time_is_zero(from[0]);
     good &= constant_time_eq(from[1], 2);
 
+    /* scan over padding data */
     found_zero_byte = 0;
     for (i = 2; i < num; i++) {
         unsigned int equals0 = constant_time_is_zero(from[i]);
-        zero_index =
-            constant_time_select_int(~found_zero_byte & equals0, i,
-                                     zero_index);
+
+        zero_index = constant_time_select_int(~found_zero_byte & equals0,
+                                              i, zero_index);
         found_zero_byte |= equals0;
     }
 
@@ -210,7 +212,7 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen,
      * If we never found a 0-byte, then |zero_index| is 0 and the check
      * also fails.
      */
-    good &= constant_time_ge((unsigned int)(zero_index), 2 + 8);
+    good &= constant_time_ge(zero_index, 2 + 8);
 
     /*
      * Skip the zero byte. This is incorrect if we never found a zero-byte
@@ -220,27 +222,34 @@ int RSA_padding_check_PKCS1_type_2(unsigned char *to, int tlen,
     mlen = num - msg_index;
 
     /*
-     * For good measure, do this check in constant time as well; it could
-     * leak something if |tlen| was assuming valid padding.
+     * For good measure, do this check in constant time as well.
      */
-    good &= constant_time_ge((unsigned int)(tlen), (unsigned int)(mlen));
+    good &= constant_time_ge(tlen, mlen);
 
     /*
-     * We can't continue in constant-time because we need to copy the result
-     * and we cannot fake its length. This unavoidably leaks timing
-     * information at the API boundary.
+     * Even though we can't fake result's length, we can pretend copying
+     * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |num|
+     * bytes are viewed as circular buffer with start at |tlen|-|mlen'|,
+     * where |mlen'| is "saturated" |mlen| value. Deducing information
+     * about failure or |mlen| would take attacker's ability to observe
+     * memory access pattern with byte granularity *as it occurs*. It
+     * should be noted that failure is indistinguishable from normal
+     * operation if |tlen| is fixed by protocol.
      */
-    if (!good) {
-        mlen = -1;
-        goto err;
-    }
+    tlen = constant_time_select_int(constant_time_lt(num, tlen), num, tlen);
+    msg_index = constant_time_select_int(good, msg_index, num - tlen);
+    mlen = num - msg_index;
+    for (from += msg_index, mask = good, i = 0; i < tlen; i++) {
+        unsigned int equals = constant_time_eq(i, mlen);
 
-    memcpy(to, from + msg_index, mlen);
+        from -= tlen & equals;  /* if (i == mlen) rewind   */
+        mask &= mask ^ equals;  /* if (i == mlen) mask = 0 */
+        to[i] = constant_time_select_8(mask, from[i], to[i]);
+    }
 
- err:
     OPENSSL_clear_free(em, num);
-    if (mlen == -1)
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2,
-               RSA_R_PKCS_DECODING_ERROR);
-    return mlen;
+    RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2, RSA_R_PKCS_DECODING_ERROR);
+    err_clear_last_constant_time(1 & good);
+
+    return constant_time_select_int(good, mlen, -1);
 }
diff --git a/crypto/rsa/rsa_ssl.c b/crypto/rsa/rsa_ssl.c
index 286d0a42de0f0..c5654595fb2f3 100644
--- a/crypto/rsa/rsa_ssl.c
+++ b/crypto/rsa/rsa_ssl.c
@@ -12,6 +12,7 @@
 #include <openssl/bn.h>
 #include <openssl/rsa.h>
 #include <openssl/rand.h>
+#include "internal/constant_time_locl.h"
 
 int RSA_padding_add_SSLv23(unsigned char *to, int tlen,
                            const unsigned char *from, int flen)
@@ -52,57 +53,115 @@ int RSA_padding_add_SSLv23(unsigned char *to, int tlen,
     return 1;
 }
 
+/*
+ * Copy of RSA_padding_check_PKCS1_type_2 with a twist that rejects padding
+ * if nul delimiter is preceded by 8 consecutive 0x03 bytes. It also
+ * preserves error code reporting for backward compatibility.
+ */
 int RSA_padding_check_SSLv23(unsigned char *to, int tlen,
                              const unsigned char *from, int flen, int num)
 {
-    int i, j, k;
-    const unsigned char *p;
+    int i;
+    /* |em| is the encoded message, zero-padded to exactly |num| bytes */
+    unsigned char *em = NULL;
+    unsigned int good, found_zero_byte, mask, threes_in_row;
+    int zero_index = 0, msg_index, mlen = -1, err;
 
-    p = from;
     if (flen < 10) {
         RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_SMALL);
         return -1;
     }
-    /* Accept even zero-padded input */
-    if (flen == num) {
-        if (*(p++) != 0) {
-            RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_BLOCK_TYPE_IS_NOT_02);
-            return -1;
-        }
-        flen--;
-    }
-    if ((num != (flen + 1)) || (*(p++) != 02)) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_BLOCK_TYPE_IS_NOT_02);
-        return -1;
-    }
 
-    /* scan over padding data */
-    j = flen - 1;               /* one for type */
-    for (i = 0; i < j; i++)
-        if (*(p++) == 0)
-            break;
-
-    if ((i == j) || (i < 8)) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23,
-               RSA_R_NULL_BEFORE_BLOCK_MISSING);
+    em = OPENSSL_malloc(num);
+    if (em == NULL) {
+        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, ERR_R_MALLOC_FAILURE);
         return -1;
     }
-    for (k = -9; k < -1; k++) {
-        if (p[k] != 0x03)
-            break;
+    /*
+     * Caller is encouraged to pass zero-padded message created with
+     * BN_bn2binpad. Trouble is that since we can't read out of |from|'s
+     * bounds, it's impossible to have an invariant memory access pattern
+     * in case |from| was not zero-padded in advance.
+     */
+    for (from += flen, em += num, i = 0; i < num; i++) {
+        mask = ~constant_time_is_zero(flen);
+        flen -= 1 & mask;
+        from -= 1 & mask;
+        *--em = *from & mask;
     }
-    if (k == -1) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_SSLV3_ROLLBACK_ATTACK);
-        return -1;
+    from = em;
+
+    good = constant_time_is_zero(from[0]);
+    good &= constant_time_eq(from[1], 2);
+    err = constant_time_select_int(good, 0, RSA_R_BLOCK_TYPE_IS_NOT_02);
+    mask = ~good;
+
+    /* scan over padding data */
+    found_zero_byte = 0;
+    threes_in_row = 0;
+    for (i = 2; i < num; i++) {
+        unsigned int equals0 = constant_time_is_zero(from[i]);
+
+        zero_index = constant_time_select_int(~found_zero_byte & equals0,
+                                              i, zero_index);
+        found_zero_byte |= equals0;
+
+        threes_in_row += 1 & ~found_zero_byte;
+        threes_in_row &= found_zero_byte | constant_time_eq(from[i], 3);
     }
 
-    i++;                        /* Skip over the '\0' */
-    j -= i;
-    if (j > tlen) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, RSA_R_DATA_TOO_LARGE);
-        return -1;
+    /*
+     * PS must be at least 8 bytes long, and it starts two bytes into |from|.
+     * If we never found a 0-byte, then |zero_index| is 0 and the check
+     * also fails.
+     */
+    good &= constant_time_ge(zero_index, 2 + 8);
+    err = constant_time_select_int(mask | good, err,
+                                   RSA_R_NULL_BEFORE_BLOCK_MISSING);
+    mask = ~good;
+
+    good &= constant_time_lt(threes_in_row, 8);
+    err = constant_time_select_int(mask | good, err,
+                                   RSA_R_SSLV3_ROLLBACK_ATTACK);
+    mask = ~good;
+
+    /*
+     * Skip the zero byte. This is incorrect if we never found a zero-byte
+     * but in this case we also do not copy the message out.
+     */
+    msg_index = zero_index + 1;
+    mlen = num - msg_index;
+
+    /*
+     * For good measure, do this check in constant time as well.
+     */
+    good &= constant_time_ge(tlen, mlen);
+    err = constant_time_select_int(mask | good, err, RSA_R_DATA_TOO_LARGE);
+
+    /*
+     * Even though we can't fake result's length, we can pretend copying
+     * |tlen| bytes where |mlen| bytes would be real. Last |tlen| of |num|
+     * bytes are viewed as circular buffer with start at |tlen|-|mlen'|,
+     * where |mlen'| is "saturated" |mlen| value. Deducing information
+     * about failure or |mlen| would take attacker's ability to observe
+     * memory access pattern with byte granularity *as it occurs*. It
+     * should be noted that failure is indistinguishable from normal
+     * operation if |tlen| is fixed by protocol.
+     */
+    tlen = constant_time_select_int(constant_time_lt(num, tlen), num, tlen);
+    msg_index = constant_time_select_int(good, msg_index, num - tlen);
+    mlen = num - msg_index;
+    for (from += msg_index, mask = good, i = 0; i < tlen; i++) {
+        unsigned int equals = constant_time_eq(i, mlen);
+
+        from -= tlen & equals;  /* if (i == mlen) rewind   */
+        mask &= mask ^ equals;  /* if (i == mlen) mask = 0 */
+        to[i] = constant_time_select_8(mask, from[i], to[i]);
     }
-    memcpy(to, p, (unsigned int)j);
 
-    return j;
+    OPENSSL_clear_free(em, num);
+    RSAerr(RSA_F_RSA_PADDING_CHECK_SSLV23, err);
+    err_clear_last_constant_time(1 & good);
+
+    return constant_time_select_int(good, mlen, -1);
 }
diff --git a/crypto/rsa/rsa_x931g.c b/crypto/rsa/rsa_x931g.c
index 3563670a12aca..15e40e8d1dd73 100644
--- a/crypto/rsa/rsa_x931g.c
+++ b/crypto/rsa/rsa_x931g.c
@@ -128,6 +128,8 @@ int RSA_X931_derive_ex(RSA *rsa, BIGNUM *p1, BIGNUM *p2, BIGNUM *q1,
 
     /* calculate inverse of q mod p */
     rsa->iqmp = BN_mod_inverse(NULL, rsa->q, rsa->p, ctx2);
+    if (rsa->iqmp == NULL)
+        goto err;
 
     ret = 1;
  err:
diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl
index 704ab4a7e45a8..a3117bd7506dd 100755
--- a/crypto/sha/asm/keccak1600-armv8.pl
+++ b/crypto/sha/asm/keccak1600-armv8.pl
@@ -1,5 +1,5 @@
 #!/usr/bin/env perl
-# Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -121,6 +121,7 @@ $code.=<<___;
 .align	5
 KeccakF1600_int:
 	adr	$C[2],iotas
+	.inst	0xd503233f			// paciasp
 	stp	$C[2],x30,[sp,#16]		// 32 bytes on top are mine
 	b	.Loop
 .align	4
@@ -292,12 +293,14 @@ $code.=<<___;
 	bne	.Loop
 
 	ldr	x30,[sp,#24]
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	KeccakF1600_int,.-KeccakF1600_int
 
 .type	KeccakF1600,%function
 .align	5
 KeccakF1600:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -347,6 +350,7 @@ KeccakF1600:
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	KeccakF1600,.-KeccakF1600
 
@@ -354,6 +358,7 @@ KeccakF1600:
 .type	SHA3_absorb,%function
 .align	5
 SHA3_absorb:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -451,6 +456,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	SHA3_absorb,.-SHA3_absorb
 ___
@@ -461,6 +467,7 @@ $code.=<<___;
 .type	SHA3_squeeze,%function
 .align	5
 SHA3_squeeze:
+	.inst	0xd503233f			// paciasp
 	stp	x29,x30,[sp,#-48]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -523,6 +530,7 @@ SHA3_squeeze:
 	ldp	x19,x20,[sp,#16]
 	ldp	x21,x22,[sp,#32]
 	ldp	x29,x30,[sp],#48
+	.inst	0xd50323bf			// autiasp
 	ret
 .size	SHA3_squeeze,.-SHA3_squeeze
 ___
@@ -649,6 +657,7 @@ $code.=<<___;
 .type	KeccakF1600_cext,%function
 .align	5
 KeccakF1600_cext:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#16]		// per ABI requirement
@@ -681,6 +690,7 @@ $code.=<<___;
 	ldp	d12,d13,[sp,#48]
 	ldp	d14,d15,[sp,#64]
 	ldr	x29,[sp],#80
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	KeccakF1600_cext,.-KeccakF1600_cext
 ___
@@ -693,6 +703,7 @@ $code.=<<___;
 .type	SHA3_absorb_cext,%function
 .align	5
 SHA3_absorb_cext:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-80]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#16]		// per ABI requirement
@@ -764,6 +775,7 @@ $code.=<<___;
 	ldp	d12,d13,[sp,#48]
 	ldp	d14,d15,[sp,#64]
 	ldp	x29,x30,[sp],#80
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	SHA3_absorb_cext,.-SHA3_absorb_cext
 ___
@@ -775,6 +787,7 @@ $code.=<<___;
 .type	SHA3_squeeze_cext,%function
 .align	5
 SHA3_squeeze_cext:
+	.inst	0xd503233f		// paciasp
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	mov	x9,$ctx
@@ -830,6 +843,7 @@ SHA3_squeeze_cext:
 
 .Lsqueeze_done_ce:
 	ldr	x29,[sp],#16
+	.inst	0xd50323bf		// autiasp
 	ret
 .size	SHA3_squeeze_cext,.-SHA3_squeeze_cext
 ___
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
index ac84ebb52e4f7..01ffe9f98c3ea 100755
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -219,6 +219,7 @@ $code.=<<___	if ($SZ==8);
 ___
 $code.=<<___;
 #endif
+	.inst	0xd503233f				// paciasp
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -280,6 +281,7 @@ $code.=<<___;
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	.inst	0xd50323bf				// autiasp
 	ret
 .size	$func,.-$func
 
diff --git a/crypto/srp/srp_lib.c b/crypto/srp/srp_lib.c
index b97d630d37533..ca20f6d097981 100644
--- a/crypto/srp/srp_lib.c
+++ b/crypto/srp/srp_lib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2004-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2004, EdelKey Project. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -26,6 +26,7 @@ static BIGNUM *srp_Calc_xy(const BIGNUM *x, const BIGNUM *y, const BIGNUM *N)
     unsigned char *tmp = NULL;
     int numN = BN_num_bytes(N);
     BIGNUM *res = NULL;
+
     if (x != N && BN_ucmp(x, N) >= 0)
         return NULL;
     if (y != N && BN_ucmp(y, N) >= 0)
@@ -139,7 +140,8 @@ BIGNUM *SRP_Calc_x(const BIGNUM *s, const char *user, const char *pass)
         || !EVP_DigestFinal_ex(ctxt, dig, NULL)
         || !EVP_DigestInit_ex(ctxt, EVP_sha1(), NULL))
         goto err;
-    BN_bn2bin(s, cs);
+    if (BN_bn2bin(s, cs) < 0)
+        goto err;
     if (!EVP_DigestUpdate(ctxt, cs, BN_num_bytes(s)))
         goto err;
 
diff --git a/crypto/srp/srp_vfy.c b/crypto/srp/srp_vfy.c
index 17b35c00f9da8..eb279dd4187a2 100644
--- a/crypto/srp/srp_vfy.c
+++ b/crypto/srp/srp_vfy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2004-2019 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright (c) 2004, EdelKey Project. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
@@ -187,7 +187,7 @@ void SRP_user_pwd_free(SRP_user_pwd *user_pwd)
 static SRP_user_pwd *SRP_user_pwd_new(void)
 {
     SRP_user_pwd *ret;
-    
+
     if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
         /* SRPerr(SRP_F_SRP_USER_PWD_NEW, ERR_R_MALLOC_FAILURE); */ /*ckerr_ignore*/
         return NULL;
@@ -598,10 +598,14 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt,
         if ((len = t_fromb64(tmp, sizeof(tmp), N)) <= 0)
             goto err;
         N_bn_alloc = BN_bin2bn(tmp, len, NULL);
+        if (N_bn_alloc == NULL)
+            goto err;
         N_bn = N_bn_alloc;
         if ((len = t_fromb64(tmp, sizeof(tmp) ,g)) <= 0)
             goto err;
         g_bn_alloc = BN_bin2bn(tmp, len, NULL);
+        if (g_bn_alloc == NULL)
+            goto err;
         g_bn = g_bn_alloc;
         defgNid = "*";
     } else {
@@ -623,15 +627,19 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt,
             goto err;
         s = BN_bin2bn(tmp2, len, NULL);
     }
+    if (s == NULL)
+        goto err;
 
     if (!SRP_create_verifier_BN(user, pass, &s, &v, N_bn, g_bn))
         goto err;
 
-    BN_bn2bin(v, tmp);
+    if (BN_bn2bin(v, tmp) < 0)
+        goto err;
     vfsize = BN_num_bytes(v) * 2;
     if (((vf = OPENSSL_malloc(vfsize)) == NULL))
         goto err;
-    t_tob64(vf, tmp, BN_num_bytes(v));
+    if (!t_tob64(vf, tmp, BN_num_bytes(v)))
+        goto err;
 
     if (*salt == NULL) {
         char *tmp_salt;
@@ -639,7 +647,10 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt,
         if ((tmp_salt = OPENSSL_malloc(SRP_RANDOM_SALT_LEN * 2)) == NULL) {
             goto err;
         }
-        t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN);
+        if (!t_tob64(tmp_salt, tmp2, SRP_RANDOM_SALT_LEN)) {
+            OPENSSL_free(tmp_salt);
+            goto err;
+        }
         *salt = tmp_salt;
     }
 
@@ -686,11 +697,15 @@ int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt,
             goto err;
 
         salttmp = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL);
+        if (salttmp == NULL)
+            goto err;
     } else {
         salttmp = *salt;
     }
 
     x = SRP_Calc_x(salttmp, user, pass);
+    if (x == NULL)
+        goto err;
 
     *verifier = BN_new();
     if (*verifier == NULL)
diff --git a/crypto/ui/ui_openssl.c b/crypto/ui/ui_openssl.c
index 6b996134df49e..5ca418d24870e 100644
--- a/crypto/ui/ui_openssl.c
+++ b/crypto/ui/ui_openssl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -101,6 +101,12 @@
 
 # endif
 
+# if defined(OPENSSL_SYS_VXWORKS)
+#  undef TERMIOS
+#  undef TERMIO
+#  undef SGTTY
+# endif
+
 # ifdef TERMIOS
 #  include <termios.h>
 #  define TTY_STRUCT             struct termios
diff --git a/crypto/uid.c b/crypto/uid.c
index f7ae2610b3607..b2b096446fb4e 100644
--- a/crypto/uid.c
+++ b/crypto/uid.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -34,12 +34,13 @@ int OPENSSL_issetugid(void)
 # if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
 #  if __GLIBC_PREREQ(2, 16)
 #   include <sys/auxv.h>
+#   define OSSL_IMPLEMENT_GETAUXVAL
 #  endif
 # endif
 
 int OPENSSL_issetugid(void)
 {
-# ifdef AT_SECURE
+# ifdef OSSL_IMPLEMENT_GETAUXVAL
     return getauxval(AT_SECURE) != 0;
 # else
     return getuid() != geteuid() || getgid() != getegid();
diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c
index 61e81922b4dab..4ced716e3646b 100644
--- a/crypto/x509/x509_vfy.c
+++ b/crypto/x509/x509_vfy.c
@@ -3232,12 +3232,19 @@ static int check_key_level(X509_STORE_CTX *ctx, X509 *cert)
     EVP_PKEY *pkey = X509_get0_pubkey(cert);
     int level = ctx->param->auth_level;
 
+    /*
+     * At security level zero, return without checking for a supported public
+     * key type.  Some engines support key types not understood outside the
+     * engine, and we only need to understand the key when enforcing a security
+     * floor.
+     */
+    if (level <= 0)
+        return 1;
+
     /* Unsupported or malformed keys are not secure */
     if (pkey == NULL)
         return 0;
 
-    if (level <= 0)
-        return 1;
     if (level > NUM_AUTH_LEVELS)
         level = NUM_AUTH_LEVELS;
 
diff --git a/crypto/x509/x_crl.c b/crypto/x509/x_crl.c
index 10733b58bca28..12ab3cca42c05 100644
--- a/crypto/x509/x_crl.c
+++ b/crypto/x509/x_crl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -158,6 +158,18 @@ static int crl_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
     int idx;
 
     switch (operation) {
+    case ASN1_OP_D2I_PRE:
+        if (crl->meth->crl_free) {
+            if (!crl->meth->crl_free(crl))
+                return 0;
+        }
+        AUTHORITY_KEYID_free(crl->akid);
+        ISSUING_DIST_POINT_free(crl->idp);
+        ASN1_INTEGER_free(crl->crl_number);
+        ASN1_INTEGER_free(crl->base_crl_number);
+        sk_GENERAL_NAMES_pop_free(crl->issuers, GENERAL_NAMES_free);
+        /* fall thru */
+
     case ASN1_OP_NEW_POST:
         crl->idp = NULL;
         crl->akid = NULL;
diff --git a/crypto/x509/x_pubkey.c b/crypto/x509/x_pubkey.c
index d050b0b4b3e24..1c87b8268eb57 100644
--- a/crypto/x509/x_pubkey.c
+++ b/crypto/x509/x_pubkey.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -36,6 +36,7 @@ static int pubkey_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
         /* Attempt to decode public key and cache in pubkey structure. */
         X509_PUBKEY *pubkey = (X509_PUBKEY *)*pval;
         EVP_PKEY_free(pubkey->pkey);
+        pubkey->pkey = NULL;
         /*
          * Opportunistically decode the key but remove any non fatal errors
          * from the queue. Subsequent explicit attempts to decode/use the key
diff --git a/crypto/x509/x_x509.c b/crypto/x509/x_x509.c
index 4c04f12c94940..afe59c46c5188 100644
--- a/crypto/x509/x_x509.c
+++ b/crypto/x509/x_x509.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -40,12 +40,35 @@ static int x509_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
 
     switch (operation) {
 
+    case ASN1_OP_D2I_PRE:
+        CRYPTO_free_ex_data(CRYPTO_EX_INDEX_X509, ret, &ret->ex_data);
+        X509_CERT_AUX_free(ret->aux);
+        ASN1_OCTET_STRING_free(ret->skid);
+        AUTHORITY_KEYID_free(ret->akid);
+        CRL_DIST_POINTS_free(ret->crldp);
+        policy_cache_free(ret->policy_cache);
+        GENERAL_NAMES_free(ret->altname);
+        NAME_CONSTRAINTS_free(ret->nc);
+#ifndef OPENSSL_NO_RFC3779
+        sk_IPAddressFamily_pop_free(ret->rfc3779_addr, IPAddressFamily_free);
+        ASIdentifiers_free(ret->rfc3779_asid);
+#endif
+
+        /* fall thru */
+
     case ASN1_OP_NEW_POST:
+        ret->ex_cached = 0;
+        ret->ex_kusage = 0;
+        ret->ex_xkusage = 0;
+        ret->ex_nscert = 0;
         ret->ex_flags = 0;
         ret->ex_pathlen = -1;
         ret->ex_pcpathlen = -1;
         ret->skid = NULL;
         ret->akid = NULL;
+        ret->policy_cache = NULL;
+        ret->altname = NULL;
+        ret->nc = NULL;
 #ifndef OPENSSL_NO_RFC3779
         ret->rfc3779_addr = NULL;
         ret->rfc3779_asid = NULL;
author	Jung-uk Kim <jkim@FreeBSD.org>	2019-02-26 18:06:51 +0000
committer	Jung-uk Kim <jkim@FreeBSD.org>	2019-02-26 18:06:51 +0000
commit	851f7386fd78b9787f4f6669ad271886a2a003f1 (patch)
tree	952920d27fdcd105b7f77b6e5fef3fedae8f74ea /crypto
parent	8c3f9abd70b3f447a4795c1b00b386b044fb322d (diff)
download	src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.tar.gz src-test-851f7386fd78b9787f4f6669ad271886a2a003f1.zip