diff options
Diffstat (limited to 'pl/math/math_config.h')
-rw-r--r-- | pl/math/math_config.h | 252 |
1 files changed, 152 insertions, 100 deletions
diff --git a/pl/math/math_config.h b/pl/math/math_config.h index dccb3ce4c775..c3dd8f2db8c7 100644 --- a/pl/math/math_config.h +++ b/pl/math/math_config.h @@ -13,9 +13,9 @@ #ifndef WANT_ROUNDING /* If defined to 1, return correct results for special cases in non-nearest - rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f). - This may be set to 0 if there is no fenv support or if math functions only - get called in round to nearest mode. */ + rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than + -0.0f). This may be set to 0 if there is no fenv support or if math + functions only get called in round to nearest mode. */ # define WANT_ROUNDING 1 #endif #ifndef WANT_ERRNO @@ -27,33 +27,34 @@ #ifndef WANT_SIMD_EXCEPT /* If defined to 1, trigger fp exceptions in vector routines, consistently with behaviour expected from the corresponding scalar routine. */ -#define WANT_SIMD_EXCEPT 0 +# define WANT_SIMD_EXCEPT 0 #endif /* Compiler can inline round as a single instruction. */ #ifndef HAVE_FAST_ROUND # if __aarch64__ -# define HAVE_FAST_ROUND 1 +# define HAVE_FAST_ROUND 1 # else -# define HAVE_FAST_ROUND 0 +# define HAVE_FAST_ROUND 0 # endif #endif /* Compiler can inline lround, but not (long)round(x). */ #ifndef HAVE_FAST_LROUND -# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__ -# define HAVE_FAST_LROUND 1 +# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \ + && __NO_MATH_ERRNO__ +# define HAVE_FAST_LROUND 1 # else -# define HAVE_FAST_LROUND 0 +# define HAVE_FAST_LROUND 0 # endif #endif /* Compiler can inline fma as a single instruction. */ #ifndef HAVE_FAST_FMA # if defined FP_FAST_FMA || __aarch64__ -# define HAVE_FAST_FMA 1 +# define HAVE_FAST_FMA 1 # else -# define HAVE_FAST_FMA 0 +# define HAVE_FAST_FMA 0 # endif #endif @@ -62,9 +63,9 @@ to interpose math functions with both static and dynamic linking. */ #ifndef USE_GLIBC_ABI # if __GNUC__ -# define USE_GLIBC_ABI 1 +# define USE_GLIBC_ABI 1 # else -# define USE_GLIBC_ABI 0 +# define USE_GLIBC_ABI 0 # endif #endif @@ -76,15 +77,15 @@ # define likely(x) __builtin_expect (!!(x), 1) # define unlikely(x) __builtin_expect (x, 0) # if __GNUC__ >= 9 -# define attribute_copy(f) __attribute__ ((copy (f))) +# define attribute_copy(f) __attribute__ ((copy (f))) # else -# define attribute_copy(f) +# define attribute_copy(f) # endif -# define strong_alias(f, a) \ - extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f); -# define hidden_alias(f, a) \ - extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \ - attribute_copy (f); +# define strong_alias(f, a) \ + extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f); +# define hidden_alias(f, a) \ + extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \ + attribute_copy (f); #else # define HIDDEN # define NOINLINE @@ -93,6 +94,31 @@ # define unlikely(x) (x) #endif +/* Return ptr but hide its value from the compiler so accesses through it + cannot be optimized based on the contents. */ +#define ptr_barrier(ptr) \ + ({ \ + __typeof (ptr) __ptr = (ptr); \ + __asm("" : "+r"(__ptr)); \ + __ptr; \ + }) + +/* Symbol renames to avoid libc conflicts. */ +#define __math_oflowf arm_math_oflowf +#define __math_uflowf arm_math_uflowf +#define __math_may_uflowf arm_math_may_uflowf +#define __math_divzerof arm_math_divzerof +#define __math_oflow arm_math_oflow +#define __math_uflow arm_math_uflow +#define __math_may_uflow arm_math_may_uflow +#define __math_divzero arm_math_divzero +#define __math_invalidf arm_math_invalidf +#define __math_invalid arm_math_invalid +#define __math_check_oflow arm_math_check_oflow +#define __math_check_uflow arm_math_check_uflow +#define __math_check_oflowf arm_math_check_oflowf +#define __math_check_uflowf arm_math_check_uflowf + #if HAVE_FAST_ROUND /* When set, the roundtoint and converttoint functions are provided with the semantics documented below. */ @@ -128,7 +154,7 @@ asuint (float f) { float f; uint32_t i; - } u = {f}; + } u = { f }; return u.i; } @@ -139,7 +165,7 @@ asfloat (uint32_t i) { uint32_t i; float f; - } u = {i}; + } u = { i }; return u.f; } @@ -150,7 +176,7 @@ asuint64 (double f) { double f; uint64_t i; - } u = {f}; + } u = { f }; return u.i; } @@ -161,7 +187,7 @@ asdouble (uint64_t i) { uint64_t i; double f; - } u = {i}; + } u = { i }; return u.f; } @@ -320,10 +346,26 @@ check_uflowf (float x) extern const struct erff_data { - float erff_poly_A[6]; - float erff_poly_B[7]; + struct + { + float erf, scale; + } tab[513]; } __erff_data HIDDEN; +extern const struct sv_erff_data +{ + float erf[513]; + float scale[513]; +} __sv_erff_data HIDDEN; + +extern const struct erfcf_data +{ + struct + { + float erfc, scale; + } tab[645]; +} __erfcf_data HIDDEN; + /* Data for logf and log10f. */ #define LOGF_TABLE_BITS 4 #define LOGF_POLY_ORDER 4 @@ -349,9 +391,15 @@ extern const struct log10_data double invln10; double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */ double poly1[LOG10_POLY1_ORDER - 1]; - struct {double invc, logc;} tab[1 << LOG10_TABLE_BITS]; + struct + { + double invc, logc; + } tab[1 << LOG10_TABLE_BITS]; #if !HAVE_FAST_FMA - struct {double chi, clo;} tab2[1 << LOG10_TABLE_BITS]; + struct + { + double chi, clo; + } tab2[1 << LOG10_TABLE_BITS]; #endif } __log10_data HIDDEN; @@ -374,44 +422,38 @@ extern const struct exp_data double poly[4]; /* Last four coefficients. */ double exp2_shift; double exp2_poly[EXP2_POLY_ORDER]; - uint64_t tab[2*(1 << EXP_TABLE_BITS)]; + uint64_t tab[2 * (1 << EXP_TABLE_BITS)]; } __exp_data HIDDEN; -#define ERFC_NUM_INTERVALS 20 -#define ERFC_POLY_ORDER 12 -extern const struct erfc_data -{ - double interval_bounds[ERFC_NUM_INTERVALS + 1]; - double poly[ERFC_NUM_INTERVALS][ERFC_POLY_ORDER + 1]; -} __erfc_data HIDDEN; -extern const struct v_erfc_data -{ - double interval_bounds[ERFC_NUM_INTERVALS + 1]; - double poly[ERFC_NUM_INTERVALS + 1][ERFC_POLY_ORDER + 1]; -} __v_erfc_data HIDDEN; - -#define ERFCF_POLY_NCOEFFS 16 -extern const struct erfcf_poly_data -{ - double poly[4][ERFCF_POLY_NCOEFFS]; -} __erfcf_poly_data HIDDEN; - +/* Copied from math/v_exp.h for use in vector exp_tail. */ #define V_EXP_TAIL_TABLE_BITS 8 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN; -#define V_ERF_NINTS 49 -#define V_ERF_NCOEFFS 10 -extern const struct v_erf_data +/* Copied from math/v_exp.h for use in vector exp2. */ +#define V_EXP_TABLE_BITS 7 +extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN; + +extern const struct erf_data +{ + struct + { + double erf, scale; + } tab[769]; +} __erf_data HIDDEN; + +extern const struct sv_erf_data { - double shifts[V_ERF_NINTS]; - double coeffs[V_ERF_NCOEFFS][V_ERF_NINTS]; -} __v_erf_data HIDDEN; + double erf[769]; + double scale[769]; +} __sv_erf_data HIDDEN; -#define V_ERFF_NCOEFFS 7 -extern const struct v_erff_data +extern const struct erfc_data { - float coeffs[V_ERFF_NCOEFFS][2]; -} __v_erff_data HIDDEN; + struct + { + double erfc, scale; + } tab[3488]; +} __erfc_data HIDDEN; #define ATAN_POLY_NCOEFFS 20 extern const struct atan_poly_data @@ -465,7 +507,6 @@ extern const struct log1p_data } __log1p_data HIDDEN; #define LOG1PF_2U5 -#define V_LOG1PF_2U5 #define LOG1PF_NCOEFFS 9 extern const struct log1pf_data { @@ -481,61 +522,52 @@ extern const struct tanf_poly_data float poly_cotan[TANF_Q_POLY_NCOEFFS]; } __tanf_poly_data HIDDEN; -#define V_LOG2F_POLY_NCOEFFS 9 -extern const struct v_log2f_data -{ - float poly[V_LOG2F_POLY_NCOEFFS]; -} __v_log2f_data HIDDEN; - #define V_LOG2_TABLE_BITS 7 -#define V_LOG2_POLY_ORDER 6 extern const struct v_log2_data { - double poly[V_LOG2_POLY_ORDER - 1]; + double poly[5]; + double invln2; struct { double invc, log2c; - } tab[1 << V_LOG2_TABLE_BITS]; + } table[1 << V_LOG2_TABLE_BITS]; } __v_log2_data HIDDEN; -#define V_SINF_NCOEFFS 4 -extern const struct sv_sinf_data -{ - float coeffs[V_SINF_NCOEFFS]; -} __sv_sinf_data HIDDEN; - #define V_LOG10_TABLE_BITS 7 -#define V_LOG10_POLY_ORDER 6 extern const struct v_log10_data { + double poly[5]; + double invln10, log10_2; struct { double invc, log10c; - } tab[1 << V_LOG10_TABLE_BITS]; - double poly[V_LOG10_POLY_ORDER - 1]; - double invln10, log10_2; + } table[1 << V_LOG10_TABLE_BITS]; } __v_log10_data HIDDEN; -#define V_LOG10F_POLY_ORDER 9 -extern const float __v_log10f_poly[V_LOG10F_POLY_ORDER - 1] HIDDEN; - -#define SV_LOGF_POLY_ORDER 8 -extern const float __sv_logf_poly[SV_LOGF_POLY_ORDER - 1] HIDDEN; - -#define SV_LOG_POLY_ORDER 6 -#define SV_LOG_TABLE_BITS 7 -extern const struct sv_log_data +/* Some data for SVE powf's internal exp and log. */ +#define V_POWF_EXP2_TABLE_BITS 5 +#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS) +#define V_POWF_LOG2_TABLE_BITS 5 +#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS) +extern const struct v_powf_data { - double invc[1 << SV_LOG_TABLE_BITS]; - double logc[1 << SV_LOG_TABLE_BITS]; - double poly[SV_LOG_POLY_ORDER - 1]; -} __sv_log_data HIDDEN; + double invc[V_POWF_LOG2_N]; + double logc[V_POWF_LOG2_N]; + uint64_t scale[V_POWF_EXP2_N]; +} __v_powf_data HIDDEN; -#ifndef SV_EXPF_USE_FEXPA -#define SV_EXPF_USE_FEXPA 0 -#endif -#define SV_EXPF_POLY_ORDER 6 -extern const float __sv_expf_poly[SV_EXPF_POLY_ORDER - 1] HIDDEN; +#define V_LOG_POLY_ORDER 6 +#define V_LOG_TABLE_BITS 7 +extern const struct v_log_data +{ + /* Shared data for vector log and log-derived routines (e.g. asinh). */ + double poly[V_LOG_POLY_ORDER - 1]; + double ln2; + struct + { + double invc, logc; + } table[1 << V_LOG_TABLE_BITS]; +} __v_log_data HIDDEN; #define EXPM1F_POLY_ORDER 5 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN; @@ -564,9 +596,29 @@ extern const struct cbrt_data double table[5]; } __cbrt_data HIDDEN; -extern const struct v_tan_data +#define ASINF_POLY_ORDER 4 +extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN; + +#define ASIN_POLY_ORDER 11 +extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN; + +/* Some data for AdvSIMD and SVE pow's internal exp and log. */ +#define V_POW_EXP_TABLE_BITS 8 +extern const struct v_pow_exp_data { - double neg_half_pi_hi, neg_half_pi_lo; - double poly[9]; -} __v_tan_data HIDDEN; + double poly[3]; + double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift; + uint64_t sbits[1 << V_POW_EXP_TABLE_BITS]; +} __v_pow_exp_data HIDDEN; + +#define V_POW_LOG_TABLE_BITS 7 +extern const struct v_pow_log_data +{ + double poly[7]; /* First coefficient is 1. */ + double ln2_hi, ln2_lo; + double invc[1 << V_POW_LOG_TABLE_BITS]; + double logc[1 << V_POW_LOG_TABLE_BITS]; + double logctail[1 << V_POW_LOG_TABLE_BITS]; +} __v_pow_log_data HIDDEN; + #endif |