aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c')
-rw-r--r--contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c b/contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c
new file mode 100644
index 000000000000..55def37c9e1f
--- /dev/null
+++ b/contrib/llvm-project/compiler-rt/lib/builtins/udivmodti4.c
@@ -0,0 +1,158 @@
+//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivmodti4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
+// Remainder stored in r.
+// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
+// fallback. For a correctness proof see the reference for this algorithm
+// in Knuth, Volume 2, section 4.3.1, Algorithm D.
+UNUSED
+static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
+ du_int un1, un0; // Norm. dividend LSD's
+ du_int vn1, vn0; // Norm. divisor digits
+ du_int q1, q0; // Quotient digits
+ du_int un64, un21, un10; // Dividend digit pairs
+ du_int rhat; // A remainder
+ si_int s; // Shift amount for normalization
+
+ s = __builtin_clzll(v);
+ if (s > 0) {
+ // Normalize the divisor.
+ v = v << s;
+ un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
+ un10 = u0 << s; // Shift dividend left
+ } else {
+ // Avoid undefined behavior of (u0 >> 64).
+ un64 = u1;
+ un10 = u0;
+ }
+
+ // Break divisor up into two 32-bit digits.
+ vn1 = v >> (n_udword_bits / 2);
+ vn0 = v & 0xFFFFFFFF;
+
+ // Break right half of dividend into two digits.
+ un1 = un10 >> (n_udword_bits / 2);
+ un0 = un10 & 0xFFFFFFFF;
+
+ // Compute the first quotient digit, q1.
+ q1 = un64 / vn1;
+ rhat = un64 - q1 * vn1;
+
+ // q1 has at most error 2. No more than 2 iterations.
+ while (q1 >= b || q1 * vn0 > b * rhat + un1) {
+ q1 = q1 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ un21 = un64 * b + un1 - q1 * v;
+
+ // Compute the second quotient digit.
+ q0 = un21 / vn1;
+ rhat = un21 - q0 * vn1;
+
+ // q0 has at most error 2. No more than 2 iterations.
+ while (q0 >= b || q0 * vn0 > b * rhat + un0) {
+ q0 = q0 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ *r = (un21 * b + un0 - q0 * v) >> s;
+ return q1 * b + q0;
+}
+
+static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+#if defined(__x86_64__)
+ du_int result;
+ __asm__("divq %[v]"
+ : "=a"(result), "=d"(*r)
+ : [ v ] "r"(v), "a"(u0), "d"(u1));
+ return result;
+#else
+ return udiv128by64to64default(u1, u0, v, r);
+#endif
+}
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
+ const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+ utwords dividend;
+ dividend.all = a;
+ utwords divisor;
+ divisor.all = b;
+ utwords quotient;
+ utwords remainder;
+ if (divisor.all > dividend.all) {
+ if (rem)
+ *rem = dividend.all;
+ return 0;
+ }
+ // When the divisor fits in 64 bits, we can use an optimized path.
+ if (divisor.s.high == 0) {
+ remainder.s.high = 0;
+ if (dividend.s.high < divisor.s.low) {
+ // The result fits in 64 bits.
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
+ quotient.s.high = 0;
+ } else {
+ // First, divide with the high part to get the remainder in dividend.s.high.
+ // After that dividend.s.high < divisor.s.low.
+ quotient.s.high = dividend.s.high / divisor.s.low;
+ dividend.s.high = dividend.s.high % divisor.s.low;
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
+ }
+ if (rem)
+ *rem = remainder.all;
+ return quotient.all;
+ }
+ // 0 <= shift <= 63.
+ si_int shift =
+ __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
+ divisor.all <<= shift;
+ quotient.s.high = 0;
+ quotient.s.low = 0;
+ for (; shift >= 0; --shift) {
+ quotient.s.low <<= 1;
+ // Branch free version of.
+ // if (dividend.all >= divisor.all)
+ // {
+ // dividend.all -= divisor.all;
+ // carry = 1;
+ // }
+ const ti_int s =
+ (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
+ quotient.s.low |= s & 1;
+ dividend.all -= divisor.all & s;
+ divisor.all >>= 1;
+ }
+ if (rem)
+ *rem = dividend.all;
+ return quotient.all;
+}
+
+#endif // CRT_HAS_128BIT