aboutsummaryrefslogtreecommitdiff
path: root/lib/msun
diff options
context:
space:
mode:
authorBruce Evans <bde@FreeBSD.org>2018-07-20 12:42:24 +0000
committerBruce Evans <bde@FreeBSD.org>2018-07-20 12:42:24 +0000
commit27aa844253c86af777298be505c7b2b01bb69ed3 (patch)
tree4b24e3058b5660cc433bc466ac44d2aa1a5c0c92 /lib/msun
parent1a59bccc425fc8af01aee3086ab09616d1bb8936 (diff)
downloadsrc-27aa844253c86af777298be505c7b2b01bb69ed3.tar.gz
src-27aa844253c86af777298be505c7b2b01bb69ed3.zip
Notes
Diffstat (limited to 'lib/msun')
-rw-r--r--lib/msun/ld128/e_rem_pio2l.h9
-rw-r--r--lib/msun/ld128/k_expl.h10
-rw-r--r--lib/msun/ld128/s_expl.c7
-rw-r--r--lib/msun/ld80/e_rem_pio2l.h8
-rw-r--r--lib/msun/ld80/k_expl.h9
-rw-r--r--lib/msun/ld80/s_expl.c9
-rw-r--r--lib/msun/src/e_rem_pio2.c8
-rw-r--r--lib/msun/src/e_rem_pio2f.c8
-rw-r--r--lib/msun/src/math_private.h100
9 files changed, 93 insertions, 75 deletions
diff --git a/lib/msun/ld128/e_rem_pio2l.h b/lib/msun/ld128/e_rem_pio2l.h
index 5d78c4de0905..fcef399a83a6 100644
--- a/lib/msun/ld128/e_rem_pio2l.h
+++ b/lib/msun/ld128/e_rem_pio2l.h
@@ -74,14 +74,9 @@ __ieee754_rem_pio2l(long double x, long double *y)
if (ex < BIAS + 45 || ex == BIAS + 45 &&
u.bits.manh < 0x921fb54442d1LL) {
/* |x| ~< 2^45*(pi/2), medium size */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- fn = x*invpio2+0x1.8p112;
- fn = fn-0x1.8p112;
-#ifdef HAVE_EFFICIENT_I64RINT
+ /* TODO: use only double precision for fn, as in expl(). */
+ fn = rnintl(x * invpio2);
n = i64rint(fn);
-#else
- n = fn;
-#endif
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 180 bit */
{
diff --git a/lib/msun/ld128/k_expl.h b/lib/msun/ld128/k_expl.h
index 38a9cb98b33d..913107652272 100644
--- a/lib/msun/ld128/k_expl.h
+++ b/lib/msun/ld128/k_expl.h
@@ -244,16 +244,8 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp)
int n, n2;
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- /* XXX assume no extra precision for the additions, as for trig fns. */
- /* XXX this set of comments is now quadruplicated. */
- /* XXX but see ../src/e_exp.c for a fix using double_t. */
- fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
-#if defined(HAVE_EFFICIENT_IRINT)
+ fn = rnint((double)x * INV_L);
n = irint(fn);
-#else
- n = (int)fn;
-#endif
n2 = (unsigned)n % INTERVALS;
/* Depend on the sign bit being propagated: */
*kp = n >> LOG2_INTERVALS;
diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c
index f624d5143902..b47402b0fb4e 100644
--- a/lib/msun/ld128/s_expl.c
+++ b/lib/msun/ld128/s_expl.c
@@ -268,13 +268,8 @@ expm1l(long double x)
}
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
-#if defined(HAVE_EFFICIENT_IRINT)
+ fn = rnint((double)x * INV_L);
n = irint(fn);
-#else
- n = (int)fn;
-#endif
n2 = (unsigned)n % INTERVALS;
k = n >> LOG2_INTERVALS;
r1 = x - fn * L1;
diff --git a/lib/msun/ld80/e_rem_pio2l.h b/lib/msun/ld80/e_rem_pio2l.h
index 75e4e4bc1abc..9a5c64ca9943 100644
--- a/lib/msun/ld80/e_rem_pio2l.h
+++ b/lib/msun/ld80/e_rem_pio2l.h
@@ -84,14 +84,8 @@ __ieee754_rem_pio2l(long double x, long double *y)
ex = expsign & 0x7fff;
if (ex < BIAS + 25 || (ex == BIAS + 25 && u.bits.manh < 0xc90fdaa2)) {
/* |x| ~< 2^25*(pi/2), medium size */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- fn = x*invpio2+0x1.8p63;
- fn = fn-0x1.8p63;
-#ifdef HAVE_EFFICIENT_IRINT
+ fn = rnintl(x*invpio2);
n = irint(fn);
-#else
- n = fn;
-#endif
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 102 bit */
{
diff --git a/lib/msun/ld80/k_expl.h b/lib/msun/ld80/k_expl.h
index 2d803ea647ed..63c65b65cf6e 100644
--- a/lib/msun/ld80/k_expl.h
+++ b/lib/msun/ld80/k_expl.h
@@ -225,16 +225,9 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp)
int n, n2;
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
+ fn = rnintl(x * INV_L);
r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */
-#if defined(HAVE_EFFICIENT_IRINTL)
- n = irintl(fn);
-#elif defined(HAVE_EFFICIENT_IRINT)
n = irint(fn);
-#else
- n = (int)fn;
-#endif
n2 = (unsigned)n % INTERVALS;
/* Depend on the sign bit being propagated: */
*kp = n >> LOG2_INTERVALS;
diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c
index d28b7dd0317c..e46e73f0c1fa 100644
--- a/lib/msun/ld80/s_expl.c
+++ b/lib/msun/ld80/s_expl.c
@@ -225,15 +225,8 @@ expm1l(long double x)
}
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
-#if defined(HAVE_EFFICIENT_IRINTL)
- n = irintl(fn);
-#elif defined(HAVE_EFFICIENT_IRINT)
+ fn = rnintl(x * INV_L);
n = irint(fn);
-#else
- n = (int)fn;
-#endif
n2 = (unsigned)n % INTERVALS;
k = n >> LOG2_INTERVALS;
r1 = x - fn * L1;
diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c
index be2630b26fba..47b651351a8c 100644
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c
@@ -127,14 +127,8 @@ __ieee754_rem_pio2(double x, double *y)
}
if(ix<0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
- fn = fn-0x1.8p52;
-#ifdef HAVE_EFFICIENT_IRINT
+ fn = rnint((double_t)x*invpio2);
n = irint(fn);
-#else
- n = (int32_t)fn;
-#endif
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 85 bit */
{
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index f1ee7a042b7c..597f6139f248 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -55,14 +55,8 @@ __ieee754_rem_pio2f(float x, double *y)
ix = hx&0x7fffffff;
/* 33+53 bit pi is good enough for medium size */
if(ix<0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
- /* Use a specialized rint() to get fn. Assume round-to-nearest. */
- STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
- fn = fn-0x1.8p52;
-#ifdef HAVE_EFFICIENT_IRINT
+ fn = rnint((float_t)x*invpio2);
n = irint(fn);
-#else
- n = (int32_t)fn;
-#endif
r = x-fn*pio2_1;
w = fn*pio2_1t;
*y = r-w;
diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index 1b1105e1c3ac..91c1b92969cd 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@@ -571,48 +571,116 @@ CMPLXL(long double x, long double y)
#endif /* _COMPLEX_H */
-#ifdef __GNUCLIKE_ASM
+/*
+ * The rnint() family rounds to the nearest integer for a restricted range
+ * range of args (up to about 2**MANT_DIG). We assume that the current
+ * rounding mode is FE_TONEAREST so that this can be done efficiently.
+ * Extra precision causes more problems in practice, and we only centralize
+ * this here to reduce those problems, and have not solved the efficiency
+ * problems. The exp2() family uses a more delicate version of this that
+ * requires extracting bits from the intermediate value, so it is not
+ * centralized here and should copy any solution of the efficiency problems.
+ */
+
+static inline double
+rnint(__double_t x)
+{
+ /*
+ * This casts to double to kill any extra precision. This depends
+ * on the cast being applied to a double_t to avoid compiler bugs
+ * (this is a cleaner version of STRICT_ASSIGN()). This is
+ * inefficient if there actually is extra precision, but is hard
+ * to improve on. We use double_t in the API to minimise conversions
+ * for just calling here. Note that we cannot easily change the
+ * magic number to the one that works directly with double_t, since
+ * the rounding precision is variable at runtime on x86 so the
+ * magic number would need to be variable. Assuming that the
+ * rounding precision is always the default is too fragile. This
+ * and many other complications will move when the default is
+ * changed to FP_PE.
+ */
+ return ((double)(x + 0x1.8p52) - 0x1.8p52);
+}
+
+static inline float
+rnintf(__float_t x)
+{
+ /*
+ * As for rnint(), except we could just call that to handle the
+ * extra precision case, usually without losing efficiency.
+ */
+ return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
+}
+
+#ifdef LDBL_MANT_DIG
+/*
+ * The complications for extra precision are smaller for rnintl() since it
+ * can safely assume that the rounding precision has been increased from
+ * its default to FP_PE on x86. We don't exploit that here to get small
+ * optimizations from limiting the rangle to double. We just need it for
+ * the magic number to work with long doubles. ld128 callers should use
+ * rnint() instead of this if possible. ld80 callers should prefer
+ * rnintl() since for amd64 this avoids swapping the register set, while
+ * for i386 it makes no difference (assuming FP_PE), and for other arches
+ * it makes little difference.
+ */
+static inline long double
+rnintl(long double x)
+{
+ return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
+ __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
+}
+#endif /* LDBL_MANT_DIG */
-/* Asm versions of some functions. */
+/*
+ * irint() and i64rint() give the same result as casting to their integer
+ * return type provided their arg is a floating point integer. They can
+ * sometimes be more efficient because no rounding is required.
+ */
+#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
+#define irint(x) \
+ (sizeof(x) == sizeof(float) && \
+ sizeof(__float_t) == sizeof(long double) ? irintf(x) : \
+ sizeof(x) == sizeof(double) && \
+ sizeof(__double_t) == sizeof(long double) ? irintd(x) : \
+ sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
+#else
+#define irint(x) ((int)(x))
+#endif
-#ifdef __amd64__
+#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */
+
+#if defined(__i386__) && defined(__GNUCLIKE_ASM)
static __inline int
-irint(double x)
+irintf(float x)
{
int n;
- asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINT
-#endif
-#ifdef __i386__
static __inline int
-irint(double x)
+irintd(double x)
{
int n;
- asm("fistl %0" : "=m" (n) : "t" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINT
#endif
-#if defined(__amd64__) || defined(__i386__)
+#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
static __inline int
irintl(long double x)
{
int n;
- asm("fistl %0" : "=m" (n) : "t" (x));
+ __asm("fistl %0" : "=m" (n) : "t" (x));
return (n);
}
-#define HAVE_EFFICIENT_IRINTL
#endif
-#endif /* __GNUCLIKE_ASM */
-
#ifdef DEBUG
#if defined(__amd64__) || defined(__i386__)
#define breakpoint() asm("int $3")