diff options
Diffstat (limited to 'src/int/i32_muladd.c')
-rw-r--r-- | src/int/i32_muladd.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/src/int/i32_muladd.c b/src/int/i32_muladd.c new file mode 100644 index 000000000000..dd526ad5ef20 --- /dev/null +++ b/src/int/i32_muladd.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) +{ + uint32_t m_bitlen; + size_t u, mlen; + uint32_t a0, a1, b0, hi, g, q, tb; + uint32_t chf, clow, under, over; + uint64_t cc; + + /* + * We can test on the modulus bit length since we accept to + * leak that length. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 32) { + x[1] = br_rem(x[1], z, m[1]); + return; + } + mlen = (m_bitlen + 31) >> 5; + + /* + * Principle: we estimate the quotient (x*2^32+z)/m by + * doing a 64/32 division with the high words. + * + * Let: + * w = 2^32 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), we can estimate q by + * doing an Euclidean division on the top words: + * a0*w+a1 = b0*u + v (with 0 <= v < w) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + a0 = br_i32_word(x, m_bitlen - 32); + hi = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a1 = br_i32_word(x, m_bitlen - 32); + b0 = br_i32_word(m, m_bitlen - 32); + + /* + * We estimate a divisor q. If the quotient returned by br_div() + * is g: + * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. + * -- Otherwise: + * -- if g == 0 then we set q = 0; + * -- otherwise, we set q = g - 1. + * The properties described above then ensure that the true + * quotient is q-1, q or q+1. + */ + g = br_div(a0, a1, b0); + q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); + + /* + * We subtract q*m from x (with the extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zw, xw, nxw; + uint64_t zl; + + mw = m[u]; + zl = MUL(mw, q) + cc; + cc = (uint32_t)(zl >> 32); + zw = (uint32_t)zl; + xw = x[u]; + nxw = xw - zw; + cc += (uint64_t)GT(nxw, xw); + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). In + * these cases we must subtract m once. + * + * Otherwise, we may have overestimated, which will show as + * cc > hi (thus a negative result). Correction is adding m once. + */ + chf = (uint32_t)(cc >> 32); + clow = (uint32_t)cc; + over = chf | GT(clow, hi); + under = ~over & (tb | (~chf & LT(clow, hi))); + br_i32_add(x, m, over); + br_i32_sub(x, m, under); +} |