aboutsummaryrefslogtreecommitdiff
path: root/pl/math/math_config.h
diff options
context:
space:
mode:
Diffstat (limited to 'pl/math/math_config.h')
-rw-r--r--pl/math/math_config.h252
1 files changed, 152 insertions, 100 deletions
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index dccb3ce4c775..c3dd8f2db8c7 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -13,9 +13,9 @@
#ifndef WANT_ROUNDING
/* If defined to 1, return correct results for special cases in non-nearest
- rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
- This may be set to 0 if there is no fenv support or if math functions only
- get called in round to nearest mode. */
+ rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
+ -0.0f). This may be set to 0 if there is no fenv support or if math
+ functions only get called in round to nearest mode. */
# define WANT_ROUNDING 1
#endif
#ifndef WANT_ERRNO
@@ -27,33 +27,34 @@
#ifndef WANT_SIMD_EXCEPT
/* If defined to 1, trigger fp exceptions in vector routines, consistently with
behaviour expected from the corresponding scalar routine. */
-#define WANT_SIMD_EXCEPT 0
+# define WANT_SIMD_EXCEPT 0
#endif
/* Compiler can inline round as a single instruction. */
#ifndef HAVE_FAST_ROUND
# if __aarch64__
-# define HAVE_FAST_ROUND 1
+# define HAVE_FAST_ROUND 1
# else
-# define HAVE_FAST_ROUND 0
+# define HAVE_FAST_ROUND 0
# endif
#endif
/* Compiler can inline lround, but not (long)round(x). */
#ifndef HAVE_FAST_LROUND
-# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
-# define HAVE_FAST_LROUND 1
+# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \
+ && __NO_MATH_ERRNO__
+# define HAVE_FAST_LROUND 1
# else
-# define HAVE_FAST_LROUND 0
+# define HAVE_FAST_LROUND 0
# endif
#endif
/* Compiler can inline fma as a single instruction. */
#ifndef HAVE_FAST_FMA
# if defined FP_FAST_FMA || __aarch64__
-# define HAVE_FAST_FMA 1
+# define HAVE_FAST_FMA 1
# else
-# define HAVE_FAST_FMA 0
+# define HAVE_FAST_FMA 0
# endif
#endif
@@ -62,9 +63,9 @@
to interpose math functions with both static and dynamic linking. */
#ifndef USE_GLIBC_ABI
# if __GNUC__
-# define USE_GLIBC_ABI 1
+# define USE_GLIBC_ABI 1
# else
-# define USE_GLIBC_ABI 0
+# define USE_GLIBC_ABI 0
# endif
#endif
@@ -76,15 +77,15 @@
# define likely(x) __builtin_expect (!!(x), 1)
# define unlikely(x) __builtin_expect (x, 0)
# if __GNUC__ >= 9
-# define attribute_copy(f) __attribute__ ((copy (f)))
+# define attribute_copy(f) __attribute__ ((copy (f)))
# else
-# define attribute_copy(f)
+# define attribute_copy(f)
# endif
-# define strong_alias(f, a) \
- extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
-# define hidden_alias(f, a) \
- extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
- attribute_copy (f);
+# define strong_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+ attribute_copy (f);
#else
# define HIDDEN
# define NOINLINE
@@ -93,6 +94,31 @@
# define unlikely(x) (x)
#endif
+/* Return ptr but hide its value from the compiler so accesses through it
+ cannot be optimized based on the contents. */
+#define ptr_barrier(ptr) \
+ ({ \
+ __typeof (ptr) __ptr = (ptr); \
+ __asm("" : "+r"(__ptr)); \
+ __ptr; \
+ })
+
+/* Symbol renames to avoid libc conflicts. */
+#define __math_oflowf arm_math_oflowf
+#define __math_uflowf arm_math_uflowf
+#define __math_may_uflowf arm_math_may_uflowf
+#define __math_divzerof arm_math_divzerof
+#define __math_oflow arm_math_oflow
+#define __math_uflow arm_math_uflow
+#define __math_may_uflow arm_math_may_uflow
+#define __math_divzero arm_math_divzero
+#define __math_invalidf arm_math_invalidf
+#define __math_invalid arm_math_invalid
+#define __math_check_oflow arm_math_check_oflow
+#define __math_check_uflow arm_math_check_uflow
+#define __math_check_oflowf arm_math_check_oflowf
+#define __math_check_uflowf arm_math_check_uflowf
+
#if HAVE_FAST_ROUND
/* When set, the roundtoint and converttoint functions are provided with
the semantics documented below. */
@@ -128,7 +154,7 @@ asuint (float f)
{
float f;
uint32_t i;
- } u = {f};
+ } u = { f };
return u.i;
}
@@ -139,7 +165,7 @@ asfloat (uint32_t i)
{
uint32_t i;
float f;
- } u = {i};
+ } u = { i };
return u.f;
}
@@ -150,7 +176,7 @@ asuint64 (double f)
{
double f;
uint64_t i;
- } u = {f};
+ } u = { f };
return u.i;
}
@@ -161,7 +187,7 @@ asdouble (uint64_t i)
{
uint64_t i;
double f;
- } u = {i};
+ } u = { i };
return u.f;
}
@@ -320,10 +346,26 @@ check_uflowf (float x)
extern const struct erff_data
{
- float erff_poly_A[6];
- float erff_poly_B[7];
+ struct
+ {
+ float erf, scale;
+ } tab[513];
} __erff_data HIDDEN;
+extern const struct sv_erff_data
+{
+ float erf[513];
+ float scale[513];
+} __sv_erff_data HIDDEN;
+
+extern const struct erfcf_data
+{
+ struct
+ {
+ float erfc, scale;
+ } tab[645];
+} __erfcf_data HIDDEN;
+
/* Data for logf and log10f. */
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
@@ -349,9 +391,15 @@ extern const struct log10_data
double invln10;
double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */
double poly1[LOG10_POLY1_ORDER - 1];
- struct {double invc, logc;} tab[1 << LOG10_TABLE_BITS];
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOG10_TABLE_BITS];
#if !HAVE_FAST_FMA
- struct {double chi, clo;} tab2[1 << LOG10_TABLE_BITS];
+ struct
+ {
+ double chi, clo;
+ } tab2[1 << LOG10_TABLE_BITS];
#endif
} __log10_data HIDDEN;
@@ -374,44 +422,38 @@ extern const struct exp_data
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
- uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+ uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
} __exp_data HIDDEN;
-#define ERFC_NUM_INTERVALS 20
-#define ERFC_POLY_ORDER 12
-extern const struct erfc_data
-{
- double interval_bounds[ERFC_NUM_INTERVALS + 1];
- double poly[ERFC_NUM_INTERVALS][ERFC_POLY_ORDER + 1];
-} __erfc_data HIDDEN;
-extern const struct v_erfc_data
-{
- double interval_bounds[ERFC_NUM_INTERVALS + 1];
- double poly[ERFC_NUM_INTERVALS + 1][ERFC_POLY_ORDER + 1];
-} __v_erfc_data HIDDEN;
-
-#define ERFCF_POLY_NCOEFFS 16
-extern const struct erfcf_poly_data
-{
- double poly[4][ERFCF_POLY_NCOEFFS];
-} __erfcf_poly_data HIDDEN;
-
+/* Copied from math/v_exp.h for use in vector exp_tail. */
#define V_EXP_TAIL_TABLE_BITS 8
extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
-#define V_ERF_NINTS 49
-#define V_ERF_NCOEFFS 10
-extern const struct v_erf_data
+/* Copied from math/v_exp.h for use in vector exp2. */
+#define V_EXP_TABLE_BITS 7
+extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
+
+extern const struct erf_data
+{
+ struct
+ {
+ double erf, scale;
+ } tab[769];
+} __erf_data HIDDEN;
+
+extern const struct sv_erf_data
{
- double shifts[V_ERF_NINTS];
- double coeffs[V_ERF_NCOEFFS][V_ERF_NINTS];
-} __v_erf_data HIDDEN;
+ double erf[769];
+ double scale[769];
+} __sv_erf_data HIDDEN;
-#define V_ERFF_NCOEFFS 7
-extern const struct v_erff_data
+extern const struct erfc_data
{
- float coeffs[V_ERFF_NCOEFFS][2];
-} __v_erff_data HIDDEN;
+ struct
+ {
+ double erfc, scale;
+ } tab[3488];
+} __erfc_data HIDDEN;
#define ATAN_POLY_NCOEFFS 20
extern const struct atan_poly_data
@@ -465,7 +507,6 @@ extern const struct log1p_data
} __log1p_data HIDDEN;
#define LOG1PF_2U5
-#define V_LOG1PF_2U5
#define LOG1PF_NCOEFFS 9
extern const struct log1pf_data
{
@@ -481,61 +522,52 @@ extern const struct tanf_poly_data
float poly_cotan[TANF_Q_POLY_NCOEFFS];
} __tanf_poly_data HIDDEN;
-#define V_LOG2F_POLY_NCOEFFS 9
-extern const struct v_log2f_data
-{
- float poly[V_LOG2F_POLY_NCOEFFS];
-} __v_log2f_data HIDDEN;
-
#define V_LOG2_TABLE_BITS 7
-#define V_LOG2_POLY_ORDER 6
extern const struct v_log2_data
{
- double poly[V_LOG2_POLY_ORDER - 1];
+ double poly[5];
+ double invln2;
struct
{
double invc, log2c;
- } tab[1 << V_LOG2_TABLE_BITS];
+ } table[1 << V_LOG2_TABLE_BITS];
} __v_log2_data HIDDEN;
-#define V_SINF_NCOEFFS 4
-extern const struct sv_sinf_data
-{
- float coeffs[V_SINF_NCOEFFS];
-} __sv_sinf_data HIDDEN;
-
#define V_LOG10_TABLE_BITS 7
-#define V_LOG10_POLY_ORDER 6
extern const struct v_log10_data
{
+ double poly[5];
+ double invln10, log10_2;
struct
{
double invc, log10c;
- } tab[1 << V_LOG10_TABLE_BITS];
- double poly[V_LOG10_POLY_ORDER - 1];
- double invln10, log10_2;
+ } table[1 << V_LOG10_TABLE_BITS];
} __v_log10_data HIDDEN;
-#define V_LOG10F_POLY_ORDER 9
-extern const float __v_log10f_poly[V_LOG10F_POLY_ORDER - 1] HIDDEN;
-
-#define SV_LOGF_POLY_ORDER 8
-extern const float __sv_logf_poly[SV_LOGF_POLY_ORDER - 1] HIDDEN;
-
-#define SV_LOG_POLY_ORDER 6
-#define SV_LOG_TABLE_BITS 7
-extern const struct sv_log_data
+/* Some data for SVE powf's internal exp and log. */
+#define V_POWF_EXP2_TABLE_BITS 5
+#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
+extern const struct v_powf_data
{
- double invc[1 << SV_LOG_TABLE_BITS];
- double logc[1 << SV_LOG_TABLE_BITS];
- double poly[SV_LOG_POLY_ORDER - 1];
-} __sv_log_data HIDDEN;
+ double invc[V_POWF_LOG2_N];
+ double logc[V_POWF_LOG2_N];
+ uint64_t scale[V_POWF_EXP2_N];
+} __v_powf_data HIDDEN;
-#ifndef SV_EXPF_USE_FEXPA
-#define SV_EXPF_USE_FEXPA 0
-#endif
-#define SV_EXPF_POLY_ORDER 6
-extern const float __sv_expf_poly[SV_EXPF_POLY_ORDER - 1] HIDDEN;
+#define V_LOG_POLY_ORDER 6
+#define V_LOG_TABLE_BITS 7
+extern const struct v_log_data
+{
+ /* Shared data for vector log and log-derived routines (e.g. asinh). */
+ double poly[V_LOG_POLY_ORDER - 1];
+ double ln2;
+ struct
+ {
+ double invc, logc;
+ } table[1 << V_LOG_TABLE_BITS];
+} __v_log_data HIDDEN;
#define EXPM1F_POLY_ORDER 5
extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
@@ -564,9 +596,29 @@ extern const struct cbrt_data
double table[5];
} __cbrt_data HIDDEN;
-extern const struct v_tan_data
+#define ASINF_POLY_ORDER 4
+extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
+
+#define ASIN_POLY_ORDER 11
+extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
+
+/* Some data for AdvSIMD and SVE pow's internal exp and log. */
+#define V_POW_EXP_TABLE_BITS 8
+extern const struct v_pow_exp_data
{
- double neg_half_pi_hi, neg_half_pi_lo;
- double poly[9];
-} __v_tan_data HIDDEN;
+ double poly[3];
+ double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
+ uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
+} __v_pow_exp_data HIDDEN;
+
+#define V_POW_LOG_TABLE_BITS 7
+extern const struct v_pow_log_data
+{
+ double poly[7]; /* First coefficient is 1. */
+ double ln2_hi, ln2_lo;
+ double invc[1 << V_POW_LOG_TABLE_BITS];
+ double logc[1 << V_POW_LOG_TABLE_BITS];
+ double logctail[1 << V_POW_LOG_TABLE_BITS];
+} __v_pow_log_data HIDDEN;
+
#endif