diff options
author | Bruce Evans <bde@FreeBSD.org> | 2018-07-20 12:42:24 +0000 |
---|---|---|
committer | Bruce Evans <bde@FreeBSD.org> | 2018-07-20 12:42:24 +0000 |
commit | 27aa844253c86af777298be505c7b2b01bb69ed3 (patch) | |
tree | 4b24e3058b5660cc433bc466ac44d2aa1a5c0c92 /lib/msun | |
parent | 1a59bccc425fc8af01aee3086ab09616d1bb8936 (diff) | |
download | src-27aa844253c86af777298be505c7b2b01bb69ed3.tar.gz src-27aa844253c86af777298be505c7b2b01bb69ed3.zip |
Notes
Diffstat (limited to 'lib/msun')
-rw-r--r-- | lib/msun/ld128/e_rem_pio2l.h | 9 | ||||
-rw-r--r-- | lib/msun/ld128/k_expl.h | 10 | ||||
-rw-r--r-- | lib/msun/ld128/s_expl.c | 7 | ||||
-rw-r--r-- | lib/msun/ld80/e_rem_pio2l.h | 8 | ||||
-rw-r--r-- | lib/msun/ld80/k_expl.h | 9 | ||||
-rw-r--r-- | lib/msun/ld80/s_expl.c | 9 | ||||
-rw-r--r-- | lib/msun/src/e_rem_pio2.c | 8 | ||||
-rw-r--r-- | lib/msun/src/e_rem_pio2f.c | 8 | ||||
-rw-r--r-- | lib/msun/src/math_private.h | 100 |
9 files changed, 93 insertions, 75 deletions
diff --git a/lib/msun/ld128/e_rem_pio2l.h b/lib/msun/ld128/e_rem_pio2l.h index 5d78c4de0905..fcef399a83a6 100644 --- a/lib/msun/ld128/e_rem_pio2l.h +++ b/lib/msun/ld128/e_rem_pio2l.h @@ -74,14 +74,9 @@ __ieee754_rem_pio2l(long double x, long double *y) if (ex < BIAS + 45 || ex == BIAS + 45 && u.bits.manh < 0x921fb54442d1LL) { /* |x| ~< 2^45*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = x*invpio2+0x1.8p112; - fn = fn-0x1.8p112; -#ifdef HAVE_EFFICIENT_I64RINT + /* TODO: use only double precision for fn, as in expl(). */ + fn = rnintl(x * invpio2); n = i64rint(fn); -#else - n = fn; -#endif r = x-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 180 bit */ { diff --git a/lib/msun/ld128/k_expl.h b/lib/msun/ld128/k_expl.h index 38a9cb98b33d..913107652272 100644 --- a/lib/msun/ld128/k_expl.h +++ b/lib/msun/ld128/k_expl.h @@ -244,16 +244,8 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp) int n, n2; /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - /* XXX assume no extra precision for the additions, as for trig fns. */ - /* XXX this set of comments is now quadruplicated. */ - /* XXX but see ../src/e_exp.c for a fix using double_t. */ - fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52; -#if defined(HAVE_EFFICIENT_IRINT) + fn = rnint((double)x * INV_L); n = irint(fn); -#else - n = (int)fn; -#endif n2 = (unsigned)n % INTERVALS; /* Depend on the sign bit being propagated: */ *kp = n >> LOG2_INTERVALS; diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index f624d5143902..b47402b0fb4e 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -268,13 +268,8 @@ expm1l(long double x) } /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52; -#if defined(HAVE_EFFICIENT_IRINT) + fn = rnint((double)x * INV_L); n = irint(fn); -#else - n = (int)fn; -#endif n2 = (unsigned)n % INTERVALS; k = n >> LOG2_INTERVALS; r1 = x - fn * L1; diff --git a/lib/msun/ld80/e_rem_pio2l.h b/lib/msun/ld80/e_rem_pio2l.h index 75e4e4bc1abc..9a5c64ca9943 100644 --- a/lib/msun/ld80/e_rem_pio2l.h +++ b/lib/msun/ld80/e_rem_pio2l.h @@ -84,14 +84,8 @@ __ieee754_rem_pio2l(long double x, long double *y) ex = expsign & 0x7fff; if (ex < BIAS + 25 || (ex == BIAS + 25 && u.bits.manh < 0xc90fdaa2)) { /* |x| ~< 2^25*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = x*invpio2+0x1.8p63; - fn = fn-0x1.8p63; -#ifdef HAVE_EFFICIENT_IRINT + fn = rnintl(x*invpio2); n = irint(fn); -#else - n = fn; -#endif r = x-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 102 bit */ { diff --git a/lib/msun/ld80/k_expl.h b/lib/msun/ld80/k_expl.h index 2d803ea647ed..63c65b65cf6e 100644 --- a/lib/msun/ld80/k_expl.h +++ b/lib/msun/ld80/k_expl.h @@ -225,16 +225,9 @@ __k_expl(long double x, long double *hip, long double *lop, int *kp) int n, n2; /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = x * INV_L + 0x1.8p63 - 0x1.8p63; + fn = rnintl(x * INV_L); r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ -#if defined(HAVE_EFFICIENT_IRINTL) - n = irintl(fn); -#elif defined(HAVE_EFFICIENT_IRINT) n = irint(fn); -#else - n = (int)fn; -#endif n2 = (unsigned)n % INTERVALS; /* Depend on the sign bit being propagated: */ *kp = n >> LOG2_INTERVALS; diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index d28b7dd0317c..e46e73f0c1fa 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -225,15 +225,8 @@ expm1l(long double x) } /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - fn = x * INV_L + 0x1.8p63 - 0x1.8p63; -#if defined(HAVE_EFFICIENT_IRINTL) - n = irintl(fn); -#elif defined(HAVE_EFFICIENT_IRINT) + fn = rnintl(x * INV_L); n = irint(fn); -#else - n = (int)fn; -#endif n2 = (unsigned)n % INTERVALS; k = n >> LOG2_INTERVALS; r1 = x - fn * L1; diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c index be2630b26fba..47b651351a8c 100644 --- a/lib/msun/src/e_rem_pio2.c +++ b/lib/msun/src/e_rem_pio2.c @@ -127,14 +127,8 @@ __ieee754_rem_pio2(double x, double *y) } if(ix<0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52); - fn = fn-0x1.8p52; -#ifdef HAVE_EFFICIENT_IRINT + fn = rnint((double_t)x*invpio2); n = irint(fn); -#else - n = (int32_t)fn; -#endif r = x-fn*pio2_1; w = fn*pio2_1t; /* 1st round good to 85 bit */ { diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c index f1ee7a042b7c..597f6139f248 100644 --- a/lib/msun/src/e_rem_pio2f.c +++ b/lib/msun/src/e_rem_pio2f.c @@ -55,14 +55,8 @@ __ieee754_rem_pio2f(float x, double *y) ix = hx&0x7fffffff; /* 33+53 bit pi is good enough for medium size */ if(ix<0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ - /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52); - fn = fn-0x1.8p52; -#ifdef HAVE_EFFICIENT_IRINT + fn = rnint((float_t)x*invpio2); n = irint(fn); -#else - n = (int32_t)fn; -#endif r = x-fn*pio2_1; w = fn*pio2_1t; *y = r-w; diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 1b1105e1c3ac..91c1b92969cd 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -571,48 +571,116 @@ CMPLXL(long double x, long double y) #endif /* _COMPLEX_H */ -#ifdef __GNUCLIKE_ASM +/* + * The rnint() family rounds to the nearest integer for a restricted range + * range of args (up to about 2**MANT_DIG). We assume that the current + * rounding mode is FE_TONEAREST so that this can be done efficiently. + * Extra precision causes more problems in practice, and we only centralize + * this here to reduce those problems, and have not solved the efficiency + * problems. The exp2() family uses a more delicate version of this that + * requires extracting bits from the intermediate value, so it is not + * centralized here and should copy any solution of the efficiency problems. + */ + +static inline double +rnint(__double_t x) +{ + /* + * This casts to double to kill any extra precision. This depends + * on the cast being applied to a double_t to avoid compiler bugs + * (this is a cleaner version of STRICT_ASSIGN()). This is + * inefficient if there actually is extra precision, but is hard + * to improve on. We use double_t in the API to minimise conversions + * for just calling here. Note that we cannot easily change the + * magic number to the one that works directly with double_t, since + * the rounding precision is variable at runtime on x86 so the + * magic number would need to be variable. Assuming that the + * rounding precision is always the default is too fragile. This + * and many other complications will move when the default is + * changed to FP_PE. + */ + return ((double)(x + 0x1.8p52) - 0x1.8p52); +} + +static inline float +rnintf(__float_t x) +{ + /* + * As for rnint(), except we could just call that to handle the + * extra precision case, usually without losing efficiency. + */ + return ((float)(x + 0x1.8p23F) - 0x1.8p23F); +} + +#ifdef LDBL_MANT_DIG +/* + * The complications for extra precision are smaller for rnintl() since it + * can safely assume that the rounding precision has been increased from + * its default to FP_PE on x86. We don't exploit that here to get small + * optimizations from limiting the rangle to double. We just need it for + * the magic number to work with long doubles. ld128 callers should use + * rnint() instead of this if possible. ld80 callers should prefer + * rnintl() since for amd64 this avoids swapping the register set, while + * for i386 it makes no difference (assuming FP_PE), and for other arches + * it makes little difference. + */ +static inline long double +rnintl(long double x) +{ + return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 - + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2); +} +#endif /* LDBL_MANT_DIG */ -/* Asm versions of some functions. */ +/* + * irint() and i64rint() give the same result as casting to their integer + * return type provided their arg is a floating point integer. They can + * sometimes be more efficient because no rounding is required. + */ +#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM) +#define irint(x) \ + (sizeof(x) == sizeof(float) && \ + sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ + sizeof(x) == sizeof(double) && \ + sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ + sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) +#else +#define irint(x) ((int)(x)) +#endif -#ifdef __amd64__ +#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ + +#if defined(__i386__) && defined(__GNUCLIKE_ASM) static __inline int -irint(double x) +irintf(float x) { int n; - asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x)); + __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } -#define HAVE_EFFICIENT_IRINT -#endif -#ifdef __i386__ static __inline int -irint(double x) +irintd(double x) { int n; - asm("fistl %0" : "=m" (n) : "t" (x)); + __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } -#define HAVE_EFFICIENT_IRINT #endif -#if defined(__amd64__) || defined(__i386__) +#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM) static __inline int irintl(long double x) { int n; - asm("fistl %0" : "=m" (n) : "t" (x)); + __asm("fistl %0" : "=m" (n) : "t" (x)); return (n); } -#define HAVE_EFFICIENT_IRINTL #endif -#endif /* __GNUCLIKE_ASM */ - #ifdef DEBUG #if defined(__amd64__) || defined(__i386__) #define breakpoint() asm("int $3") |