1190225Srpaulo/* 2190225Srpaulo * Configuration for math routines. 3190225Srpaulo * 4190225Srpaulo * Copyright (c) 2017-2023, Arm Limited. 5190225Srpaulo * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6190225Srpaulo */ 7190225Srpaulo 8190225Srpaulo#ifndef _MATH_CONFIG_H 9190225Srpaulo#define _MATH_CONFIG_H 10190225Srpaulo 11190225Srpaulo#include <math.h> 12190225Srpaulo#include <stdint.h> 13190225Srpaulo 14190225Srpaulo#ifndef WANT_ROUNDING 15190225Srpaulo/* If defined to 1, return correct results for special cases in non-nearest 16190225Srpaulo rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f). 17190225Srpaulo This may be set to 0 if there is no fenv support or if math functions only 18190225Srpaulo get called in round to nearest mode. */ 19190225Srpaulo# define WANT_ROUNDING 1 20276768Sdelphij#endif 21190225Srpaulo#ifndef WANT_ERRNO 22190225Srpaulo/* If defined to 1, set errno in math functions according to ISO C. Many math 23190225Srpaulo libraries do not set errno, so this is 0 by default. It may need to be 24190225Srpaulo set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */ 25190225Srpaulo# define WANT_ERRNO 0 26190225Srpaulo#endif 27190225Srpaulo#ifndef WANT_ERRNO_UFLOW 28190225Srpaulo/* Set errno to ERANGE if result underflows to 0 (in all rounding modes). */ 29190225Srpaulo# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO) 30190225Srpaulo#endif 31190225Srpaulo 32190225Srpaulo/* Compiler can inline round as a single instruction. */ 33190225Srpaulo#ifndef HAVE_FAST_ROUND 34190225Srpaulo# if __aarch64__ 35190225Srpaulo# define HAVE_FAST_ROUND 1 36190225Srpaulo# else 37190225Srpaulo# define HAVE_FAST_ROUND 0 38190225Srpaulo# endif 39190225Srpaulo#endif 40190225Srpaulo 41190225Srpaulo/* Compiler can inline lround, but not (long)round(x). */ 42190225Srpaulo#ifndef HAVE_FAST_LROUND 43190225Srpaulo# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__ 44190225Srpaulo# define HAVE_FAST_LROUND 1 45190225Srpaulo# else 46190225Srpaulo# define HAVE_FAST_LROUND 0 47190225Srpaulo# endif 48190225Srpaulo#endif 49190225Srpaulo 50190225Srpaulo/* Compiler can inline fma as a single instruction. */ 51190225Srpaulo#ifndef HAVE_FAST_FMA 52190225Srpaulo# if defined FP_FAST_FMA || __aarch64__ 53190225Srpaulo# define HAVE_FAST_FMA 1 54190225Srpaulo# else 55276768Sdelphij# define HAVE_FAST_FMA 0 56276768Sdelphij# endif 57276768Sdelphij#endif 58276768Sdelphij 59276768Sdelphij/* Provide *_finite symbols and some of the glibc hidden symbols 60190225Srpaulo so libmathlib can be used with binaries compiled against glibc 61190225Srpaulo to interpose math functions with both static and dynamic linking. */ 62190225Srpaulo#ifndef USE_GLIBC_ABI 63190225Srpaulo# if __GNUC__ 64190225Srpaulo# define USE_GLIBC_ABI 1 65276768Sdelphij# else 66276768Sdelphij# define USE_GLIBC_ABI 0 67276768Sdelphij# endif 68276768Sdelphij#endif 69276768Sdelphij 70276768Sdelphij/* Optionally used extensions. */ 71276768Sdelphij#ifdef __GNUC__ 72276768Sdelphij# define HIDDEN __attribute__ ((__visibility__ ("hidden"))) 73190225Srpaulo# define NOINLINE __attribute__ ((noinline)) 74190225Srpaulo# define UNUSED __attribute__ ((unused)) 75190225Srpaulo# define likely(x) __builtin_expect (!!(x), 1) 76190225Srpaulo# define unlikely(x) __builtin_expect (x, 0) 77190225Srpaulo# if __GNUC__ >= 9 78190225Srpaulo# define attribute_copy(f) __attribute__ ((copy (f))) 79190225Srpaulo# else 80190225Srpaulo# define attribute_copy(f) 81190225Srpaulo# endif 82190225Srpaulo# define strong_alias(f, a) \ 83190225Srpaulo extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f); 84190225Srpaulo# define hidden_alias(f, a) \ 85190225Srpaulo extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \ 86190225Srpaulo attribute_copy (f); 87190225Srpaulo#else 88190225Srpaulo# define HIDDEN 89190225Srpaulo# define NOINLINE 90190225Srpaulo# define UNUSED 91190225Srpaulo# define likely(x) (x) 92190225Srpaulo# define unlikely(x) (x) 93190225Srpaulo#endif 94190225Srpaulo 95190225Srpaulo/* Return ptr but hide its value from the compiler so accesses through it 96190225Srpaulo cannot be optimized based on the contents. */ 97190225Srpaulo#define ptr_barrier(ptr) \ 98190225Srpaulo ({ \ 99190225Srpaulo __typeof (ptr) __ptr = (ptr); \ 100190225Srpaulo __asm("" : "+r"(__ptr)); \ 101190225Srpaulo __ptr; \ 102190225Srpaulo }) 103190225Srpaulo 104190225Srpaulo/* Symbol renames to avoid libc conflicts. */ 105190225Srpaulo#define __math_oflowf arm_math_oflowf 106190225Srpaulo#define __math_uflowf arm_math_uflowf 107190225Srpaulo#define __math_may_uflowf arm_math_may_uflowf 108190225Srpaulo#define __math_divzerof arm_math_divzerof 109190225Srpaulo#define __math_oflow arm_math_oflow 110190225Srpaulo#define __math_uflow arm_math_uflow 111190225Srpaulo#define __math_may_uflow arm_math_may_uflow 112276768Sdelphij#define __math_divzero arm_math_divzero 113190225Srpaulo#define __math_invalidf arm_math_invalidf 114190225Srpaulo#define __math_invalid arm_math_invalid 115190225Srpaulo#define __math_check_oflow arm_math_check_oflow 116190225Srpaulo#define __math_check_uflow arm_math_check_uflow 117190225Srpaulo#define __math_check_oflowf arm_math_check_oflowf 118190225Srpaulo#define __math_check_uflowf arm_math_check_uflowf 119190225Srpaulo 120190225Srpaulo#define __sincosf_table arm_math_sincosf_table 121190225Srpaulo#define __inv_pio4 arm_math_inv_pio4 122190225Srpaulo#define __exp2f_data arm_math_exp2f_data 123190225Srpaulo#define __logf_data arm_math_logf_data 124190225Srpaulo#define __log2f_data arm_math_log2f_data 125276768Sdelphij#define __powf_log2_data arm_math_powf_log2_data 126276768Sdelphij#define __exp_data arm_math_exp_data 127276768Sdelphij#define __log_data arm_math_log_data 128276768Sdelphij#define __log2_data arm_math_log2_data 129276768Sdelphij#define __pow_log_data arm_math_pow_log_data 130276768Sdelphij#define __erff_data arm_math_erff_data 131276768Sdelphij#define __erf_data arm_math_erf_data 132190225Srpaulo#define __v_exp_data arm_math_v_exp_data 133190225Srpaulo#define __v_log_data arm_math_v_log_data 134 135#if HAVE_FAST_ROUND 136/* When set, the roundtoint and converttoint functions are provided with 137 the semantics documented below. */ 138# define TOINT_INTRINSICS 1 139 140/* Round x to nearest int in all rounding modes, ties have to be rounded 141 consistently with converttoint so the results match. If the result 142 would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ 143static inline double_t 144roundtoint (double_t x) 145{ 146 return round (x); 147} 148 149/* Convert x to nearest int in all rounding modes, ties have to be rounded 150 consistently with roundtoint. If the result is not representible in an 151 int32_t then the semantics is unspecified. */ 152static inline int32_t 153converttoint (double_t x) 154{ 155# if HAVE_FAST_LROUND 156 return lround (x); 157# else 158 return (long) round (x); 159# endif 160} 161#endif 162 163static inline uint32_t 164asuint (float f) 165{ 166 union 167 { 168 float f; 169 uint32_t i; 170 } u = {f}; 171 return u.i; 172} 173 174static inline float 175asfloat (uint32_t i) 176{ 177 union 178 { 179 uint32_t i; 180 float f; 181 } u = {i}; 182 return u.f; 183} 184 185static inline uint64_t 186asuint64 (double f) 187{ 188 union 189 { 190 double f; 191 uint64_t i; 192 } u = {f}; 193 return u.i; 194} 195 196static inline double 197asdouble (uint64_t i) 198{ 199 union 200 { 201 uint64_t i; 202 double f; 203 } u = {i}; 204 return u.f; 205} 206 207#ifndef IEEE_754_2008_SNAN 208# define IEEE_754_2008_SNAN 1 209#endif 210static inline int 211issignalingf_inline (float x) 212{ 213 uint32_t ix = asuint (x); 214 if (!IEEE_754_2008_SNAN) 215 return (ix & 0x7fc00000) == 0x7fc00000; 216 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000; 217} 218 219static inline int 220issignaling_inline (double x) 221{ 222 uint64_t ix = asuint64 (x); 223 if (!IEEE_754_2008_SNAN) 224 return (ix & 0x7ff8000000000000) == 0x7ff8000000000000; 225 return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL; 226} 227 228#if __aarch64__ && __GNUC__ 229/* Prevent the optimization of a floating-point expression. */ 230static inline float 231opt_barrier_float (float x) 232{ 233 __asm__ __volatile__ ("" : "+w" (x)); 234 return x; 235} 236static inline double 237opt_barrier_double (double x) 238{ 239 __asm__ __volatile__ ("" : "+w" (x)); 240 return x; 241} 242/* Force the evaluation of a floating-point expression for its side-effect. */ 243static inline void 244force_eval_float (float x) 245{ 246 __asm__ __volatile__ ("" : "+w" (x)); 247} 248static inline void 249force_eval_double (double x) 250{ 251 __asm__ __volatile__ ("" : "+w" (x)); 252} 253#else 254static inline float 255opt_barrier_float (float x) 256{ 257 volatile float y = x; 258 return y; 259} 260static inline double 261opt_barrier_double (double x) 262{ 263 volatile double y = x; 264 return y; 265} 266static inline void 267force_eval_float (float x) 268{ 269 volatile float y UNUSED = x; 270} 271static inline void 272force_eval_double (double x) 273{ 274 volatile double y UNUSED = x; 275} 276#endif 277 278/* Evaluate an expression as the specified type, normally a type 279 cast should be enough, but compilers implement non-standard 280 excess-precision handling, so when FLT_EVAL_METHOD != 0 then 281 these functions may need to be customized. */ 282static inline float 283eval_as_float (float x) 284{ 285 return x; 286} 287static inline double 288eval_as_double (double x) 289{ 290 return x; 291} 292 293/* Error handling tail calls for special cases, with a sign argument. 294 The sign of the return value is set if the argument is non-zero. */ 295 296/* The result overflows. */ 297HIDDEN float __math_oflowf (uint32_t); 298/* The result underflows to 0 in nearest rounding mode. */ 299HIDDEN float __math_uflowf (uint32_t); 300/* The result underflows to 0 in some directed rounding mode only. */ 301HIDDEN float __math_may_uflowf (uint32_t); 302/* Division by zero. */ 303HIDDEN float __math_divzerof (uint32_t); 304/* The result overflows. */ 305HIDDEN double __math_oflow (uint32_t); 306/* The result underflows to 0 in nearest rounding mode. */ 307HIDDEN double __math_uflow (uint32_t); 308/* The result underflows to 0 in some directed rounding mode only. */ 309HIDDEN double __math_may_uflow (uint32_t); 310/* Division by zero. */ 311HIDDEN double __math_divzero (uint32_t); 312 313/* Error handling using input checking. */ 314 315/* Invalid input unless it is a quiet NaN. */ 316HIDDEN float __math_invalidf (float); 317/* Invalid input unless it is a quiet NaN. */ 318HIDDEN double __math_invalid (double); 319 320/* Error handling using output checking, only for errno setting. */ 321 322/* Check if the result overflowed to infinity. */ 323HIDDEN double __math_check_oflow (double); 324/* Check if the result underflowed to 0. */ 325HIDDEN double __math_check_uflow (double); 326 327/* Check if the result overflowed to infinity. */ 328static inline double 329check_oflow (double x) 330{ 331 return WANT_ERRNO ? __math_check_oflow (x) : x; 332} 333 334/* Check if the result underflowed to 0. */ 335static inline double 336check_uflow (double x) 337{ 338 return WANT_ERRNO ? __math_check_uflow (x) : x; 339} 340 341/* Check if the result overflowed to infinity. */ 342HIDDEN float __math_check_oflowf (float); 343/* Check if the result underflowed to 0. */ 344HIDDEN float __math_check_uflowf (float); 345 346/* Check if the result overflowed to infinity. */ 347static inline float 348check_oflowf (float x) 349{ 350 return WANT_ERRNO ? __math_check_oflowf (x) : x; 351} 352 353/* Check if the result underflowed to 0. */ 354static inline float 355check_uflowf (float x) 356{ 357 return WANT_ERRNO ? __math_check_uflowf (x) : x; 358} 359 360/* Shared between expf, exp2f and powf. */ 361#define EXP2F_TABLE_BITS 5 362#define EXP2F_POLY_ORDER 3 363extern const struct exp2f_data 364{ 365 uint64_t tab[1 << EXP2F_TABLE_BITS]; 366 double shift_scaled; 367 double poly[EXP2F_POLY_ORDER]; 368 double shift; 369 double invln2_scaled; 370 double poly_scaled[EXP2F_POLY_ORDER]; 371} __exp2f_data HIDDEN; 372 373#define LOGF_TABLE_BITS 4 374#define LOGF_POLY_ORDER 4 375extern const struct logf_data 376{ 377 struct 378 { 379 double invc, logc; 380 } tab[1 << LOGF_TABLE_BITS]; 381 double ln2; 382 double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */ 383} __logf_data HIDDEN; 384 385#define LOG2F_TABLE_BITS 4 386#define LOG2F_POLY_ORDER 4 387extern const struct log2f_data 388{ 389 struct 390 { 391 double invc, logc; 392 } tab[1 << LOG2F_TABLE_BITS]; 393 double poly[LOG2F_POLY_ORDER]; 394} __log2f_data HIDDEN; 395 396#define POWF_LOG2_TABLE_BITS 4 397#define POWF_LOG2_POLY_ORDER 5 398#if TOINT_INTRINSICS 399# define POWF_SCALE_BITS EXP2F_TABLE_BITS 400#else 401# define POWF_SCALE_BITS 0 402#endif 403#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS)) 404extern const struct powf_log2_data 405{ 406 struct 407 { 408 double invc, logc; 409 } tab[1 << POWF_LOG2_TABLE_BITS]; 410 double poly[POWF_LOG2_POLY_ORDER]; 411} __powf_log2_data HIDDEN; 412 413 414#define EXP_TABLE_BITS 7 415#define EXP_POLY_ORDER 5 416/* Use polynomial that is optimized for a wider input range. This may be 417 needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */ 418#define EXP_POLY_WIDE 0 419/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be 420 needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */ 421#define EXP_USE_TOINT_NARROW 0 422#define EXP2_POLY_ORDER 5 423#define EXP2_POLY_WIDE 0 424/* Wider exp10 polynomial necessary for good precision in non-nearest rounding 425 and !TOINT_INTRINSICS. */ 426#define EXP10_POLY_WIDE 0 427extern const struct exp_data 428{ 429 double invln2N; 430 double invlog10_2N; 431 double shift; 432 double negln2hiN; 433 double negln2loN; 434 double neglog10_2hiN; 435 double neglog10_2loN; 436 double poly[4]; /* Last four coefficients. */ 437 double exp2_shift; 438 double exp2_poly[EXP2_POLY_ORDER]; 439 double exp10_poly[5]; 440 uint64_t tab[2*(1 << EXP_TABLE_BITS)]; 441} __exp_data HIDDEN; 442 443#define LOG_TABLE_BITS 7 444#define LOG_POLY_ORDER 6 445#define LOG_POLY1_ORDER 12 446extern const struct log_data 447{ 448 double ln2hi; 449 double ln2lo; 450 double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ 451 double poly1[LOG_POLY1_ORDER - 1]; 452 struct {double invc, logc;} tab[1 << LOG_TABLE_BITS]; 453#if !HAVE_FAST_FMA 454 struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS]; 455#endif 456} __log_data HIDDEN; 457 458#define LOG2_TABLE_BITS 6 459#define LOG2_POLY_ORDER 7 460#define LOG2_POLY1_ORDER 11 461extern const struct log2_data 462{ 463 double invln2hi; 464 double invln2lo; 465 double poly[LOG2_POLY_ORDER - 1]; 466 double poly1[LOG2_POLY1_ORDER - 1]; 467 struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS]; 468#if !HAVE_FAST_FMA 469 struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS]; 470#endif 471} __log2_data HIDDEN; 472 473#define POW_LOG_TABLE_BITS 7 474#define POW_LOG_POLY_ORDER 8 475extern const struct pow_log_data 476{ 477 double ln2hi; 478 double ln2lo; 479 double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ 480 /* Note: the pad field is unused, but allows slightly faster indexing. */ 481 struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS]; 482} __pow_log_data HIDDEN; 483 484extern const struct erff_data 485{ 486 float erff_poly_A[6]; 487 float erff_poly_B[7]; 488} __erff_data HIDDEN; 489 490#define ERF_POLY_A_ORDER 19 491#define ERF_POLY_A_NCOEFFS 10 492#define ERFC_POLY_C_NCOEFFS 16 493#define ERFC_POLY_D_NCOEFFS 18 494#define ERFC_POLY_E_NCOEFFS 14 495#define ERFC_POLY_F_NCOEFFS 17 496extern const struct erf_data 497{ 498 double erf_poly_A[ERF_POLY_A_NCOEFFS]; 499 double erf_ratio_N_A[5]; 500 double erf_ratio_D_A[5]; 501 double erf_ratio_N_B[7]; 502 double erf_ratio_D_B[6]; 503 double erfc_poly_C[ERFC_POLY_C_NCOEFFS]; 504 double erfc_poly_D[ERFC_POLY_D_NCOEFFS]; 505 double erfc_poly_E[ERFC_POLY_E_NCOEFFS]; 506 double erfc_poly_F[ERFC_POLY_F_NCOEFFS]; 507} __erf_data HIDDEN; 508 509#define V_EXP_TABLE_BITS 7 510extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN; 511 512#define V_LOG_TABLE_BITS 7 513extern const struct v_log_data 514{ 515 struct 516 { 517 double invc, logc; 518 } table[1 << V_LOG_TABLE_BITS]; 519} __v_log_data HIDDEN; 520 521#endif 522