src - FreeBSD source tree

diff options


context:
space:
mode:

author	Bruce Evans <bde@FreeBSD.org>	2018-07-20 12:42:24 +0000
committer	Bruce Evans <bde@FreeBSD.org>	2018-07-20 12:42:24 +0000
commit	27aa844253c86af777298be505c7b2b01bb69ed3 (patch)
tree	4b24e3058b5660cc433bc466ac44d2aa1a5c0c92 /lib/msun
parent	1a59bccc425fc8af01aee3086ab09616d1bb8936 (diff)

Notes

Diffstat (limited to 'lib/msun')

-rw-r--r--

lib/msun/ld128/e_rem_pio2l.h

-rw-r--r--

lib/msun/ld128/k_expl.h

-rw-r--r--

lib/msun/ld128/s_expl.c

-rw-r--r--

lib/msun/ld80/e_rem_pio2l.h

-rw-r--r--

lib/msun/ld80/k_expl.h

-rw-r--r--

lib/msun/ld80/s_expl.c

-rw-r--r--

lib/msun/src/e_rem_pio2.c

-rw-r--r--

lib/msun/src/e_rem_pio2f.c

-rw-r--r--

lib/msun/src/math_private.h

100

9 files changed, 93 insertions, 75 deletions

diff --git a/lib/msun/ld128/e_rem_pio2l.h b/lib/msun/ld128/e_rem_pio2l.h
index 5d78c4de0905..fcef399a83a6 100644
--- a/lib/msun/ld128/e_rem_pio2l.h
+++ b/lib/msun/ld128/e_rem_pio2l.h

@@ -74,14 +74,9 @@ __ieee754_rem_pio2l(long double x, long double *y)

if (ex < BIAS + 45 || ex == BIAS + 45 &&

u.bits.manh < 0x921fb54442d1LL) {

/* |x| ~< 2^45*(pi/2), medium size */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- fn = x*invpio2+0x1.8p112;

- fn = fn-0x1.8p112;

-#ifdef HAVE_EFFICIENT_I64RINT

+ /* TODO: use only double precision for fn, as in expl(). */

+ fn = rnintl(x * invpio2);

n = i64rint(fn);

-#else

- n = fn;

-#endif

r = x-fn*pio2_1;

w = fn*pio2_1t; /* 1st round good to 180 bit */

{

diff --git a/lib/msun/ld128/k_expl.h b/lib/msun/ld128/k_expl.h
index 38a9cb98b33d..913107652272 100644
--- a/lib/msun/ld128/k_expl.h
+++ b/lib/msun/ld128/k_expl.h

@@ -244,16 +244,8 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp)

int n, n2;

/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- /* XXX assume no extra precision for the additions, as for trig fns. */

- /* XXX this set of comments is now quadruplicated. */

- /* XXX but see ../src/e_exp.c for a fix using double_t. */

- fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;

-#if defined(HAVE_EFFICIENT_IRINT)

+ fn = rnint((double)x * INV_L);

n = irint(fn);

-#else

- n = (int)fn;

-#endif

n2 = (unsigned)n % INTERVALS;

/* Depend on the sign bit being propagated: */

*kp = n >> LOG2_INTERVALS;

diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c
index f624d5143902..b47402b0fb4e 100644
--- a/lib/msun/ld128/s_expl.c
+++ b/lib/msun/ld128/s_expl.c

@@ -268,13 +268,8 @@ expm1l(long double x)

}

/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;

-#if defined(HAVE_EFFICIENT_IRINT)

+ fn = rnint((double)x * INV_L);

n = irint(fn);

-#else

- n = (int)fn;

-#endif

n2 = (unsigned)n % INTERVALS;

k = n >> LOG2_INTERVALS;

r1 = x - fn * L1;

diff --git a/lib/msun/ld80/e_rem_pio2l.h b/lib/msun/ld80/e_rem_pio2l.h
index 75e4e4bc1abc..9a5c64ca9943 100644
--- a/lib/msun/ld80/e_rem_pio2l.h
+++ b/lib/msun/ld80/e_rem_pio2l.h

@@ -84,14 +84,8 @@ __ieee754_rem_pio2l(long double x, long double *y)

ex = expsign & 0x7fff;

if (ex < BIAS + 25 || (ex == BIAS + 25 && u.bits.manh < 0xc90fdaa2)) {

/* |x| ~< 2^25*(pi/2), medium size */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- fn = x*invpio2+0x1.8p63;

- fn = fn-0x1.8p63;

-#ifdef HAVE_EFFICIENT_IRINT

+ fn = rnintl(x*invpio2);

n = irint(fn);

-#else

- n = fn;

-#endif

r = x-fn*pio2_1;

w = fn*pio2_1t; /* 1st round good to 102 bit */

{

diff --git a/lib/msun/ld80/k_expl.h b/lib/msun/ld80/k_expl.h
index 2d803ea647ed..63c65b65cf6e 100644
--- a/lib/msun/ld80/k_expl.h
+++ b/lib/msun/ld80/k_expl.h

@@ -225,16 +225,9 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp)

int n, n2;

/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- fn = x * INV_L + 0x1.8p63 - 0x1.8p63;

+ fn = rnintl(x * INV_L);

r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */

-#if defined(HAVE_EFFICIENT_IRINTL)

- n = irintl(fn);

-#elif defined(HAVE_EFFICIENT_IRINT)

n = irint(fn);

-#else

- n = (int)fn;

-#endif

n2 = (unsigned)n % INTERVALS;

/* Depend on the sign bit being propagated: */

*kp = n >> LOG2_INTERVALS;

diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c
index d28b7dd0317c..e46e73f0c1fa 100644
--- a/lib/msun/ld80/s_expl.c
+++ b/lib/msun/ld80/s_expl.c

@@ -225,15 +225,8 @@ expm1l(long double x)

}

/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- fn = x * INV_L + 0x1.8p63 - 0x1.8p63;

-#if defined(HAVE_EFFICIENT_IRINTL)

- n = irintl(fn);

-#elif defined(HAVE_EFFICIENT_IRINT)

+ fn = rnintl(x * INV_L);

n = irint(fn);

-#else

- n = (int)fn;

-#endif

n2 = (unsigned)n % INTERVALS;

k = n >> LOG2_INTERVALS;

r1 = x - fn * L1;

diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c
index be2630b26fba..47b651351a8c 100644
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c

@@ -127,14 +127,8 @@ __ieee754_rem_pio2(double x, double *y)

}

if(ix<0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */

medium:

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);

- fn = fn-0x1.8p52;

-#ifdef HAVE_EFFICIENT_IRINT

+ fn = rnint((double_t)x*invpio2);

n = irint(fn);

-#else

- n = (int32_t)fn;

-#endif

r = x-fn*pio2_1;

w = fn*pio2_1t; /* 1st round good to 85 bit */

{

diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index f1ee7a042b7c..597f6139f248 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c

@@ -55,14 +55,8 @@ __ieee754_rem_pio2f(float x, double *y)

ix = hx&0x7fffffff;

/* 33+53 bit pi is good enough for medium size */

if(ix<0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */

- /* Use a specialized rint() to get fn. Assume round-to-nearest. */

- STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);

- fn = fn-0x1.8p52;

-#ifdef HAVE_EFFICIENT_IRINT

+ fn = rnint((float_t)x*invpio2);

n = irint(fn);

-#else

- n = (int32_t)fn;

-#endif

r = x-fn*pio2_1;

w = fn*pio2_1t;

*y = r-w;

diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index 1b1105e1c3ac..91c1b92969cd 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h

@@ -571,48 +571,116 @@ CMPLXL(long double x, long double y)

#endif /* _COMPLEX_H */

-#ifdef __GNUCLIKE_ASM

+/*

+ * The rnint() family rounds to the nearest integer for a restricted range

+ * range of args (up to about 2**MANT_DIG). We assume that the current

+ * rounding mode is FE_TONEAREST so that this can be done efficiently.

+ * Extra precision causes more problems in practice, and we only centralize

+ * this here to reduce those problems, and have not solved the efficiency

+ * problems. The exp2() family uses a more delicate version of this that

+ * requires extracting bits from the intermediate value, so it is not

+ * centralized here and should copy any solution of the efficiency problems.

+ */

+static inline double

+rnint(__double_t x)

+ /*

+ * This casts to double to kill any extra precision. This depends

+ * on the cast being applied to a double_t to avoid compiler bugs

+ * (this is a cleaner version of STRICT_ASSIGN()). This is

+ * inefficient if there actually is extra precision, but is hard

+ * to improve on. We use double_t in the API to minimise conversions

+ * for just calling here. Note that we cannot easily change the

+ * magic number to the one that works directly with double_t, since

+ * the rounding precision is variable at runtime on x86 so the

+ * magic number would need to be variable. Assuming that the

+ * rounding precision is always the default is too fragile. This

+ * and many other complications will move when the default is

+ * changed to FP_PE.

+ */

+ return ((double)(x + 0x1.8p52) - 0x1.8p52);

+static inline float

+rnintf(__float_t x)

+ /*

+ * As for rnint(), except we could just call that to handle the

+ * extra precision case, usually without losing efficiency.

+ */

+ return ((float)(x + 0x1.8p23F) - 0x1.8p23F);

+#ifdef LDBL_MANT_DIG

+/*

+ * The complications for extra precision are smaller for rnintl() since it

+ * can safely assume that the rounding precision has been increased from

+ * its default to FP_PE on x86. We don't exploit that here to get small

+ * optimizations from limiting the rangle to double. We just need it for

+ * the magic number to work with long doubles. ld128 callers should use

+ * rnint() instead of this if possible. ld80 callers should prefer

+ * rnintl() since for amd64 this avoids swapping the register set, while

+ * for i386 it makes no difference (assuming FP_PE), and for other arches

+ * it makes little difference.

+ */

+static inline long double

+rnintl(long double x)

+ return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -

+ __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);

+#endif /* LDBL_MANT_DIG */

-/* Asm versions of some functions. */

+/*

+ * irint() and i64rint() give the same result as casting to their integer

+ * return type provided their arg is a floating point integer. They can

+ * sometimes be more efficient because no rounding is required.

+ */

+#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)

+#define irint(x) \

+ (sizeof(x) == sizeof(float) && \

+ sizeof(__float_t) == sizeof(long double) ? irintf(x) : \

+ sizeof(x) == sizeof(double) && \

+ sizeof(__double_t) == sizeof(long double) ? irintd(x) : \

+ sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))

+#else

+#define irint(x) ((int)(x))

+#endif

-#ifdef __amd64__

+#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */

+#if defined(__i386__) && defined(__GNUCLIKE_ASM)

static __inline int

-irint(double x)

+irintf(float x)

{

int n;

- asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x));

+ __asm("fistl %0" : "=m" (n) : "t" (x));

return (n);

}

-#define HAVE_EFFICIENT_IRINT

-#endif

-#ifdef __i386__

static __inline int

-irint(double x)

+irintd(double x)

{

int n;

- asm("fistl %0" : "=m" (n) : "t" (x));

+ __asm("fistl %0" : "=m" (n) : "t" (x));

return (n);

}

-#define HAVE_EFFICIENT_IRINT

#endif

-#if defined(__amd64__) || defined(__i386__)

+#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)

static __inline int

irintl(long double x)

{

int n;

- asm("fistl %0" : "=m" (n) : "t" (x));

+ __asm("fistl %0" : "=m" (n) : "t" (x));

return (n);

}

-#define HAVE_EFFICIENT_IRINTL

#endif

-#endif /* __GNUCLIKE_ASM */

#ifdef DEBUG

#if defined(__amd64__) || defined(__i386__)

#define breakpoint() asm("int $3")