diff options
Diffstat (limited to 'math/include')
-rw-r--r-- | math/include/mathlib.h | 294 | ||||
-rw-r--r-- | math/include/test_defs.h | 21 | ||||
-rw-r--r-- | math/include/test_sig.h | 47 |
3 files changed, 320 insertions, 42 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 64cbb9c1f850..23d04da99d93 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -1,58 +1,268 @@ /* * Public API. * - * Copyright (c) 2015-2023, Arm Limited. + * Copyright (c) 2015-2024, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _MATHLIB_H #define _MATHLIB_H -float expf (float); -float exp2f (float); -float logf (float); -float log2f (float); -float powf (float, float); -float sinf (float); -float cosf (float); -void sincosf (float, float*, float*); - -double exp (double); -double exp10 (double); -double exp2 (double); -double log (double); -double log2 (double); -double pow (double, double); - #if __aarch64__ -# if __GNUC__ >= 5 -typedef __Float32x4_t __f32x4_t; -typedef __Float64x2_t __f64x2_t; -# elif __clang_major__*100+__clang_minor__ >= 305 -typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t; -typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t; -# else -# error Unsupported compiler -# endif +/* Low-accuracy scalar implementations of C23 routines. */ +float arm_math_cospif (float); +double arm_math_cospi (double); +float arm_math_sinpif (float); +double arm_math_sinpi (double); +float arm_math_tanpif (float); +double arm_math_tanpi (double); +void arm_math_sincospif (float, float *, float *); +void arm_math_sincospi (double, double *, double *); +#endif + +/* SIMD declaration for autovectorisation with fast-math enabled. Only GCC is + supported, and vector routines are only supported on Linux on AArch64. */ +#if defined __aarch64__ && __linux__ && defined(__GNUC__) \ + && !defined(__clang__) && defined(__FAST_MATH__) +# define DECL_SIMD_aarch64 __attribute__ ((__simd__ ("notinbranch"), const)) +#else +# define DECL_SIMD_aarch64 +#endif + +#if WANT_EXPERIMENTAL_MATH + +float arm_math_erff (float); +DECL_SIMD_aarch64 float cospif (float); +DECL_SIMD_aarch64 float erfinvf (float); +DECL_SIMD_aarch64 float sinpif (float); +DECL_SIMD_aarch64 float tanpif (float); + +double arm_math_erf (double); +DECL_SIMD_aarch64 double cospi (double); +DECL_SIMD_aarch64 double erfinv (double); +DECL_SIMD_aarch64 double sinpi (double); +DECL_SIMD_aarch64 double tanpi (double); + +long double erfinvl (long double); + +#endif -# if __GNUC__ >= 9 || __clang_major__ >= 8 -# undef __vpcs -# define __vpcs __attribute__((__aarch64_vector_pcs__)) +/* Note these routines may not be provided by AOR (some are only available with + WANT_EXPERIMENTAL_MATH, some are not provided at all. Redeclare them here to + add vector annotations. */ +DECL_SIMD_aarch64 float acosf (float); +DECL_SIMD_aarch64 float acoshf (float); +DECL_SIMD_aarch64 float asinf (float); +DECL_SIMD_aarch64 float asinhf (float); +DECL_SIMD_aarch64 float atan2f (float, float); +DECL_SIMD_aarch64 float atanf (float); +DECL_SIMD_aarch64 float atanhf (float); +DECL_SIMD_aarch64 float cbrtf (float); +DECL_SIMD_aarch64 float cosf (float); +DECL_SIMD_aarch64 float coshf (float); +DECL_SIMD_aarch64 float erfcf (float); +DECL_SIMD_aarch64 float erff (float); +DECL_SIMD_aarch64 float exp10f (float); +DECL_SIMD_aarch64 float exp2f (float); +DECL_SIMD_aarch64 float expf (float); +DECL_SIMD_aarch64 float expm1f (float); +DECL_SIMD_aarch64 float hypotf (float, float); +DECL_SIMD_aarch64 float log10f (float); +DECL_SIMD_aarch64 float log1pf (float); +DECL_SIMD_aarch64 float log2f (float); +DECL_SIMD_aarch64 float logf (float); +DECL_SIMD_aarch64 float powf (float, float); +DECL_SIMD_aarch64 float sinf (float); +void sincosf (float, float *, float *); +DECL_SIMD_aarch64 float sinhf (float); +DECL_SIMD_aarch64 float tanf (float); +DECL_SIMD_aarch64 float tanhf (float); + +DECL_SIMD_aarch64 double acos (double); +DECL_SIMD_aarch64 double acosh (double); +DECL_SIMD_aarch64 double asin (double); +DECL_SIMD_aarch64 double asinh (double); +DECL_SIMD_aarch64 double atan2 (double, double); +DECL_SIMD_aarch64 double atan (double); +DECL_SIMD_aarch64 double atanh (double); +DECL_SIMD_aarch64 double cbrt (double); +DECL_SIMD_aarch64 double cos (double); +DECL_SIMD_aarch64 double cosh (double); +DECL_SIMD_aarch64 double erfc (double); +DECL_SIMD_aarch64 double erf (double); +DECL_SIMD_aarch64 double exp10 (double); +DECL_SIMD_aarch64 double exp2 (double); +DECL_SIMD_aarch64 double exp (double); +DECL_SIMD_aarch64 double expm1 (double); +DECL_SIMD_aarch64 double hypot (double, double); +DECL_SIMD_aarch64 double log10 (double); +DECL_SIMD_aarch64 double log1p (double); +DECL_SIMD_aarch64 double log2 (double); +DECL_SIMD_aarch64 double log (double); +DECL_SIMD_aarch64 double pow (double, double); +DECL_SIMD_aarch64 double sin (double); +DECL_SIMD_aarch64 double sinh (double); +DECL_SIMD_aarch64 double tan (double); +DECL_SIMD_aarch64 double tanh (double); + +#if __aarch64__ && __linux__ +# include <arm_neon.h> +# undef __vpcs +# define __vpcs __attribute__((__aarch64_vector_pcs__)) /* Vector functions following the vector PCS using ABI names. */ -__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_expf_1u (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_exp2f_1u (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); -__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); -__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); -__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); -__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); -__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); -__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); +__vpcs float32x4_t _ZGVnN4v_acosf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_acoshf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_asinf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_asinhf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_atanf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_atanhf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_cbrtf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_cosf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_coshf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_cospif (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_erfcf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_erff (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_exp10f (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_exp2f (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_exp2f_1u (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_expf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_expf_1u (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_expm1f (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_log10f (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_log1pf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_log2f (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_logf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_sinf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_sinhf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_sinpif (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_tanf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_tanhf (float32x4_t); +__vpcs float32x4_t _ZGVnN4v_tanpif (float32x4_t); +__vpcs float32x4_t _ZGVnN4vl4_modff (float32x4_t, float *); +__vpcs float32x4_t _ZGVnN4vv_atan2f (float32x4_t, float32x4_t); +__vpcs float32x4_t _ZGVnN4vv_hypotf (float32x4_t, float32x4_t); +__vpcs float32x4_t _ZGVnN4vv_powf (float32x4_t, float32x4_t); +__vpcs float32x4x2_t _ZGVnN4v_cexpif (float32x4_t); +__vpcs void _ZGVnN4vl4l4_sincosf (float32x4_t, float *, float *); +__vpcs void _ZGVnN4vl4l4_sincospif (float32x4_t, float *, float *); + +__vpcs float64x2_t _ZGVnN2v_acos (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_acosh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_asin (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_asinh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_atan (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_atanh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_cbrt (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_cos (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_cosh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_cospi (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_erf (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_erfc (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_exp (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_exp10 (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_exp2 (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_log (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_sin (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_sinh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_sinpi (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_tan (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_tanh (float64x2_t); +__vpcs float64x2_t _ZGVnN2v_tanpi (float64x2_t); +__vpcs float64x2_t _ZGVnN2vl8_modf (float64x2_t, double *); +__vpcs float64x2_t _ZGVnN2vv_atan2 (float64x2_t, float64x2_t); +__vpcs float64x2_t _ZGVnN2vv_hypot (float64x2_t, float64x2_t); +__vpcs float64x2_t _ZGVnN2vv_pow (float64x2_t, float64x2_t); +__vpcs float64x2x2_t _ZGVnN2v_cexpi (float64x2_t); +__vpcs void _ZGVnN2vl8l8_sincos (float64x2_t, double *, double *); +__vpcs void _ZGVnN2vl8l8_sincospi (float64x2_t, double *, double *); + +# if WANT_EXPERIMENTAL_MATH +__vpcs float32x4_t _ZGVnN4v_erfinvf (float32x4_t); +__vpcs float64x2_t _ZGVnN2v_erfinv (float64x2_t); +# endif + +# include <arm_sve.h> +svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxv_tanpif (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxvl4_modff (svfloat32_t, float *, svbool_t); +svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t); +svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t); +void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t); +void _ZGVsMxvl4l4_sincospif (svfloat32_t, float *, float *, svbool_t); + +svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxv_tanpi (svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxvl8_modf (svfloat64_t, double *, svbool_t); +svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t); +svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t); +svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t); +void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t); +void _ZGVsMxvl8l8_sincospi (svfloat64_t, double *, double *, svbool_t); + +# if WANT_EXPERIMENTAL_MATH + +svfloat32_t _ZGVsMxv_erfinvf (svfloat32_t, svbool_t); +svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t); + +svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t); +svfloat64_t _ZGVsMxv_erfinv (svfloat64_t, svbool_t); + # endif #endif diff --git a/math/include/test_defs.h b/math/include/test_defs.h new file mode 100644 index 000000000000..2fe66fa6f14c --- /dev/null +++ b/math/include/test_defs.h @@ -0,0 +1,21 @@ +/* + * Helper macros for emitting various details about routines for consumption by + * runulp.sh. This version of the file is for inclusion when building routines, + * so expansions are empty - see math/test/test_defs for versions used by the + * build system. + * + * Copyright (c) 2024, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception. + */ + +#define TEST_ULP(f, l) +#define TEST_ULP_NONNEAREST(f, l) + +#define TEST_DISABLE_FENV(f) +#define TEST_DISABLE_FENV_IF_NOT(f, e) + +#define TEST_INTERVAL(f, lo, hi, n) +#define TEST_SYM_INTERVAL(f, lo, hi, n) +#define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n) + +#define TEST_CONTROL_VALUE(f, c) diff --git a/math/include/test_sig.h b/math/include/test_sig.h new file mode 100644 index 000000000000..a967829098d6 --- /dev/null +++ b/math/include/test_sig.h @@ -0,0 +1,47 @@ +/* + * Macros for emitting various ulp/bench entries based on function signature + * + * Copyright (c) 2024, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception. + */ + +#define TEST_DECL_SF1(fun) float fun##f (float); +#define TEST_DECL_SF2(fun) float fun##f (float, float); +#define TEST_DECL_SD1(fun) double fun (double); +#define TEST_DECL_SD2(fun) double fun (double, double); + +#define TEST_DECL_VF1(fun) \ + float32x4_t VPCS_ATTR V_NAME_F1 (fun##f) (float32x4_t); +#define TEST_DECL_VF2(fun) \ + float32x4_t VPCS_ATTR V_NAME_F2 (fun##f) (float32x4_t, float32x4_t); +#define TEST_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t); +#define TEST_DECL_VD2(fun) \ + VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t); + +#define TEST_DECL_SVF1(fun) \ + svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t); +#define TEST_DECL_SVF2(fun) \ + svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t); +#define TEST_DECL_SVD1(fun) \ + svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t); +#define TEST_DECL_SVD2(fun) \ + svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t); + +/* For building the routines, emit function prototype from TEST_SIG. This + ensures that the correct signature has been chosen (wrong one will be a + compile error). TEST_SIG is defined differently by various components of the + build system to emit entries in the wrappers and entries for mathbench and + ulp. */ +#ifndef _TEST_SIG +# if defined(EMIT_ULP_FUNCS) +# define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f) +# elif defined(EMIT_ULP_WRAPPERS) +# define _TEST_SIG(v, t, a, f, ...) TEST_SIG Z##v##N##t##a##_WRAP (f) +# elif defined(EMIT_MATHBENCH_FUNCS) +# define _TEST_SIG(v, t, a, f, ...) TEST_SIG _Z##v##t##a (f, ##__VA_ARGS__) +# else +# define _TEST_SIG(v, t, a, f, ...) TEST_DECL_##v##t##a (f) +# endif +#endif + +#define TEST_SIG(...) _TEST_SIG (__VA_ARGS__) |