1/*
2 * Configuration for math routines.
3 *
4 * Copyright (c) 2017-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#ifndef _MATH_CONFIG_H
9#define _MATH_CONFIG_H
10
11#include <math.h>
12#include <stdint.h>
13
14#ifndef WANT_ROUNDING
15/* If defined to 1, return correct results for special cases in non-nearest
16   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
17   -0.0f). This may be set to 0 if there is no fenv support or if math
18   functions only get called in round to nearest mode.  */
19# define WANT_ROUNDING 1
20#endif
21#ifndef WANT_ERRNO
22/* If defined to 1, set errno in math functions according to ISO C.  Many math
23   libraries do not set errno, so this is 0 by default.  It may need to be
24   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25# define WANT_ERRNO 0
26#endif
27#ifndef WANT_SIMD_EXCEPT
28/* If defined to 1, trigger fp exceptions in vector routines, consistently with
29   behaviour expected from the corresponding scalar routine.  */
30# define WANT_SIMD_EXCEPT 0
31#endif
32
33/* Compiler can inline round as a single instruction.  */
34#ifndef HAVE_FAST_ROUND
35# if __aarch64__
36#  define HAVE_FAST_ROUND 1
37# else
38#  define HAVE_FAST_ROUND 0
39# endif
40#endif
41
42/* Compiler can inline lround, but not (long)round(x).  */
43#ifndef HAVE_FAST_LROUND
44# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408                 \
45      && __NO_MATH_ERRNO__
46#  define HAVE_FAST_LROUND 1
47# else
48#  define HAVE_FAST_LROUND 0
49# endif
50#endif
51
52/* Compiler can inline fma as a single instruction.  */
53#ifndef HAVE_FAST_FMA
54# if defined FP_FAST_FMA || __aarch64__
55#  define HAVE_FAST_FMA 1
56# else
57#  define HAVE_FAST_FMA 0
58# endif
59#endif
60
61/* Provide *_finite symbols and some of the glibc hidden symbols
62   so libmathlib can be used with binaries compiled against glibc
63   to interpose math functions with both static and dynamic linking.  */
64#ifndef USE_GLIBC_ABI
65# if __GNUC__
66#  define USE_GLIBC_ABI 1
67# else
68#  define USE_GLIBC_ABI 0
69# endif
70#endif
71
72/* Optionally used extensions.  */
73#ifdef __GNUC__
74# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
75# define NOINLINE __attribute__ ((noinline))
76# define UNUSED __attribute__ ((unused))
77# define likely(x) __builtin_expect (!!(x), 1)
78# define unlikely(x) __builtin_expect (x, 0)
79# if __GNUC__ >= 9
80#  define attribute_copy(f) __attribute__ ((copy (f)))
81# else
82#  define attribute_copy(f)
83# endif
84# define strong_alias(f, a)                                                   \
85    extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
86# define hidden_alias(f, a)                                                   \
87    extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
88	attribute_copy (f);
89#else
90# define HIDDEN
91# define NOINLINE
92# define UNUSED
93# define likely(x) (x)
94# define unlikely(x) (x)
95#endif
96
97/* Return ptr but hide its value from the compiler so accesses through it
98   cannot be optimized based on the contents.  */
99#define ptr_barrier(ptr)                                                      \
100  ({                                                                          \
101    __typeof (ptr) __ptr = (ptr);                                             \
102    __asm("" : "+r"(__ptr));                                                  \
103    __ptr;                                                                    \
104  })
105
106/* Symbol renames to avoid libc conflicts.  */
107#define __math_oflowf arm_math_oflowf
108#define __math_uflowf arm_math_uflowf
109#define __math_may_uflowf arm_math_may_uflowf
110#define __math_divzerof arm_math_divzerof
111#define __math_oflow arm_math_oflow
112#define __math_uflow arm_math_uflow
113#define __math_may_uflow arm_math_may_uflow
114#define __math_divzero arm_math_divzero
115#define __math_invalidf arm_math_invalidf
116#define __math_invalid arm_math_invalid
117#define __math_check_oflow arm_math_check_oflow
118#define __math_check_uflow arm_math_check_uflow
119#define __math_check_oflowf arm_math_check_oflowf
120#define __math_check_uflowf arm_math_check_uflowf
121
122#if HAVE_FAST_ROUND
123/* When set, the roundtoint and converttoint functions are provided with
124   the semantics documented below.  */
125# define TOINT_INTRINSICS 1
126
127/* Round x to nearest int in all rounding modes, ties have to be rounded
128   consistently with converttoint so the results match.  If the result
129   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
130static inline double_t
131roundtoint (double_t x)
132{
133  return round (x);
134}
135
136/* Convert x to nearest int in all rounding modes, ties have to be rounded
137   consistently with roundtoint.  If the result is not representible in an
138   int32_t then the semantics is unspecified.  */
139static inline int32_t
140converttoint (double_t x)
141{
142# if HAVE_FAST_LROUND
143  return lround (x);
144# else
145  return (long) round (x);
146# endif
147}
148#endif
149
150static inline uint32_t
151asuint (float f)
152{
153  union
154  {
155    float f;
156    uint32_t i;
157  } u = { f };
158  return u.i;
159}
160
161static inline float
162asfloat (uint32_t i)
163{
164  union
165  {
166    uint32_t i;
167    float f;
168  } u = { i };
169  return u.f;
170}
171
172static inline uint64_t
173asuint64 (double f)
174{
175  union
176  {
177    double f;
178    uint64_t i;
179  } u = { f };
180  return u.i;
181}
182
183static inline double
184asdouble (uint64_t i)
185{
186  union
187  {
188    uint64_t i;
189    double f;
190  } u = { i };
191  return u.f;
192}
193
194#ifndef IEEE_754_2008_SNAN
195# define IEEE_754_2008_SNAN 1
196#endif
197static inline int
198issignalingf_inline (float x)
199{
200  uint32_t ix = asuint (x);
201  if (!IEEE_754_2008_SNAN)
202    return (ix & 0x7fc00000) == 0x7fc00000;
203  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
204}
205
206static inline int
207issignaling_inline (double x)
208{
209  uint64_t ix = asuint64 (x);
210  if (!IEEE_754_2008_SNAN)
211    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
212  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
213}
214
215#if __aarch64__ && __GNUC__
216/* Prevent the optimization of a floating-point expression.  */
217static inline float
218opt_barrier_float (float x)
219{
220  __asm__ __volatile__ ("" : "+w" (x));
221  return x;
222}
223static inline double
224opt_barrier_double (double x)
225{
226  __asm__ __volatile__ ("" : "+w" (x));
227  return x;
228}
229/* Force the evaluation of a floating-point expression for its side-effect.  */
230static inline void
231force_eval_float (float x)
232{
233  __asm__ __volatile__ ("" : "+w" (x));
234}
235static inline void
236force_eval_double (double x)
237{
238  __asm__ __volatile__ ("" : "+w" (x));
239}
240#else
241static inline float
242opt_barrier_float (float x)
243{
244  volatile float y = x;
245  return y;
246}
247static inline double
248opt_barrier_double (double x)
249{
250  volatile double y = x;
251  return y;
252}
253static inline void
254force_eval_float (float x)
255{
256  volatile float y UNUSED = x;
257}
258static inline void
259force_eval_double (double x)
260{
261  volatile double y UNUSED = x;
262}
263#endif
264
265/* Evaluate an expression as the specified type, normally a type
266   cast should be enough, but compilers implement non-standard
267   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
268   these functions may need to be customized.  */
269static inline float
270eval_as_float (float x)
271{
272  return x;
273}
274static inline double
275eval_as_double (double x)
276{
277  return x;
278}
279
280/* Error handling tail calls for special cases, with a sign argument.
281   The sign of the return value is set if the argument is non-zero.  */
282
283/* The result overflows.  */
284HIDDEN float __math_oflowf (uint32_t);
285/* The result underflows to 0 in nearest rounding mode.  */
286HIDDEN float __math_uflowf (uint32_t);
287/* The result underflows to 0 in some directed rounding mode only.  */
288HIDDEN float __math_may_uflowf (uint32_t);
289/* Division by zero.  */
290HIDDEN float __math_divzerof (uint32_t);
291/* The result overflows.  */
292HIDDEN double __math_oflow (uint32_t);
293/* The result underflows to 0 in nearest rounding mode.  */
294HIDDEN double __math_uflow (uint32_t);
295/* The result underflows to 0 in some directed rounding mode only.  */
296HIDDEN double __math_may_uflow (uint32_t);
297/* Division by zero.  */
298HIDDEN double __math_divzero (uint32_t);
299
300/* Error handling using input checking.  */
301
302/* Invalid input unless it is a quiet NaN.  */
303HIDDEN float __math_invalidf (float);
304/* Invalid input unless it is a quiet NaN.  */
305HIDDEN double __math_invalid (double);
306
307/* Error handling using output checking, only for errno setting.  */
308
309/* Check if the result overflowed to infinity.  */
310HIDDEN double __math_check_oflow (double);
311/* Check if the result underflowed to 0.  */
312HIDDEN double __math_check_uflow (double);
313
314/* Check if the result overflowed to infinity.  */
315static inline double
316check_oflow (double x)
317{
318  return WANT_ERRNO ? __math_check_oflow (x) : x;
319}
320
321/* Check if the result underflowed to 0.  */
322static inline double
323check_uflow (double x)
324{
325  return WANT_ERRNO ? __math_check_uflow (x) : x;
326}
327
328/* Check if the result overflowed to infinity.  */
329HIDDEN float __math_check_oflowf (float);
330/* Check if the result underflowed to 0.  */
331HIDDEN float __math_check_uflowf (float);
332
333/* Check if the result overflowed to infinity.  */
334static inline float
335check_oflowf (float x)
336{
337  return WANT_ERRNO ? __math_check_oflowf (x) : x;
338}
339
340/* Check if the result underflowed to 0.  */
341static inline float
342check_uflowf (float x)
343{
344  return WANT_ERRNO ? __math_check_uflowf (x) : x;
345}
346
347extern const struct erff_data
348{
349  struct
350  {
351    float erf, scale;
352  } tab[513];
353} __erff_data HIDDEN;
354
355extern const struct sv_erff_data
356{
357  float erf[513];
358  float scale[513];
359} __sv_erff_data HIDDEN;
360
361extern const struct erfcf_data
362{
363  struct
364  {
365    float erfc, scale;
366  } tab[645];
367} __erfcf_data HIDDEN;
368
369/* Data for logf and log10f.  */
370#define LOGF_TABLE_BITS 4
371#define LOGF_POLY_ORDER 4
372extern const struct logf_data
373{
374  struct
375  {
376    double invc, logc;
377  } tab[1 << LOGF_TABLE_BITS];
378  double ln2;
379  double invln10;
380  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
381} __logf_data HIDDEN;
382
383/* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
384#define LOG10_TABLE_BITS 7
385#define LOG10_POLY_ORDER 6
386#define LOG10_POLY1_ORDER 12
387extern const struct log10_data
388{
389  double ln2hi;
390  double ln2lo;
391  double invln10;
392  double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
393  double poly1[LOG10_POLY1_ORDER - 1];
394  struct
395  {
396    double invc, logc;
397  } tab[1 << LOG10_TABLE_BITS];
398#if !HAVE_FAST_FMA
399  struct
400  {
401    double chi, clo;
402  } tab2[1 << LOG10_TABLE_BITS];
403#endif
404} __log10_data HIDDEN;
405
406#define EXP_TABLE_BITS 7
407#define EXP_POLY_ORDER 5
408/* Use polynomial that is optimized for a wider input range.  This may be
409   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
410#define EXP_POLY_WIDE 0
411/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
412   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
413#define EXP_USE_TOINT_NARROW 0
414#define EXP2_POLY_ORDER 5
415#define EXP2_POLY_WIDE 0
416extern const struct exp_data
417{
418  double invln2N;
419  double shift;
420  double negln2hiN;
421  double negln2loN;
422  double poly[4]; /* Last four coefficients.  */
423  double exp2_shift;
424  double exp2_poly[EXP2_POLY_ORDER];
425  uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
426} __exp_data HIDDEN;
427
428/* Copied from math/v_exp.h for use in vector exp_tail.  */
429#define V_EXP_TAIL_TABLE_BITS 8
430extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
431
432/* Copied from math/v_exp.h for use in vector exp2.  */
433#define V_EXP_TABLE_BITS 7
434extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
435
436extern const struct erf_data
437{
438  struct
439  {
440    double erf, scale;
441  } tab[769];
442} __erf_data HIDDEN;
443
444extern const struct sv_erf_data
445{
446  double erf[769];
447  double scale[769];
448} __sv_erf_data HIDDEN;
449
450extern const struct erfc_data
451{
452  struct
453  {
454    double erfc, scale;
455  } tab[3488];
456} __erfc_data HIDDEN;
457
458#define ATAN_POLY_NCOEFFS 20
459extern const struct atan_poly_data
460{
461  double poly[ATAN_POLY_NCOEFFS];
462} __atan_poly_data HIDDEN;
463
464#define ATANF_POLY_NCOEFFS 8
465extern const struct atanf_poly_data
466{
467  float poly[ATANF_POLY_NCOEFFS];
468} __atanf_poly_data HIDDEN;
469
470#define ASINHF_NCOEFFS 8
471extern const struct asinhf_data
472{
473  float coeffs[ASINHF_NCOEFFS];
474} __asinhf_data HIDDEN;
475
476#define LOG_TABLE_BITS 7
477#define LOG_POLY_ORDER 6
478#define LOG_POLY1_ORDER 12
479extern const struct log_data
480{
481  double ln2hi;
482  double ln2lo;
483  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
484  double poly1[LOG_POLY1_ORDER - 1];
485  struct
486  {
487    double invc, logc;
488  } tab[1 << LOG_TABLE_BITS];
489#if !HAVE_FAST_FMA
490  struct
491  {
492    double chi, clo;
493  } tab2[1 << LOG_TABLE_BITS];
494#endif
495} __log_data HIDDEN;
496
497#define ASINH_NCOEFFS 18
498extern const struct asinh_data
499{
500  double poly[ASINH_NCOEFFS];
501} __asinh_data HIDDEN;
502
503#define LOG1P_NCOEFFS 19
504extern const struct log1p_data
505{
506  double coeffs[LOG1P_NCOEFFS];
507} __log1p_data HIDDEN;
508
509#define LOG1PF_2U5
510#define LOG1PF_NCOEFFS 9
511extern const struct log1pf_data
512{
513  float coeffs[LOG1PF_NCOEFFS];
514} __log1pf_data HIDDEN;
515
516#define TANF_P_POLY_NCOEFFS 6
517/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
518#define TANF_Q_POLY_NCOEFFS 4
519extern const struct tanf_poly_data
520{
521  float poly_tan[TANF_P_POLY_NCOEFFS];
522  float poly_cotan[TANF_Q_POLY_NCOEFFS];
523} __tanf_poly_data HIDDEN;
524
525#define V_LOG2_TABLE_BITS 7
526extern const struct v_log2_data
527{
528  double poly[5];
529  double invln2;
530  struct
531  {
532    double invc, log2c;
533  } table[1 << V_LOG2_TABLE_BITS];
534} __v_log2_data HIDDEN;
535
536#define V_LOG10_TABLE_BITS 7
537extern const struct v_log10_data
538{
539  double poly[5];
540  double invln10, log10_2;
541  struct
542  {
543    double invc, log10c;
544  } table[1 << V_LOG10_TABLE_BITS];
545} __v_log10_data HIDDEN;
546
547/* Some data for SVE powf's internal exp and log.  */
548#define V_POWF_EXP2_TABLE_BITS 5
549#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
550#define V_POWF_LOG2_TABLE_BITS 5
551#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
552extern const struct v_powf_data
553{
554  double invc[V_POWF_LOG2_N];
555  double logc[V_POWF_LOG2_N];
556  uint64_t scale[V_POWF_EXP2_N];
557} __v_powf_data HIDDEN;
558
559#define V_LOG_POLY_ORDER 6
560#define V_LOG_TABLE_BITS 7
561extern const struct v_log_data
562{
563  /* Shared data for vector log and log-derived routines (e.g. asinh).  */
564  double poly[V_LOG_POLY_ORDER - 1];
565  double ln2;
566  struct
567  {
568    double invc, logc;
569  } table[1 << V_LOG_TABLE_BITS];
570} __v_log_data HIDDEN;
571
572#define EXPM1F_POLY_ORDER 5
573extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
574
575#define EXPF_TABLE_BITS 5
576#define EXPF_POLY_ORDER 3
577extern const struct expf_data
578{
579  uint64_t tab[1 << EXPF_TABLE_BITS];
580  double invln2_scaled;
581  double poly_scaled[EXPF_POLY_ORDER];
582} __expf_data HIDDEN;
583
584#define EXPM1_POLY_ORDER 11
585extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
586
587extern const struct cbrtf_data
588{
589  float poly[4];
590  float table[5];
591} __cbrtf_data HIDDEN;
592
593extern const struct cbrt_data
594{
595  double poly[4];
596  double table[5];
597} __cbrt_data HIDDEN;
598
599#define ASINF_POLY_ORDER 4
600extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
601
602#define ASIN_POLY_ORDER 11
603extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
604
605/* Some data for AdvSIMD and SVE pow's internal exp and log.  */
606#define V_POW_EXP_TABLE_BITS 8
607extern const struct v_pow_exp_data
608{
609  double poly[3];
610  double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
611  uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
612} __v_pow_exp_data HIDDEN;
613
614#define V_POW_LOG_TABLE_BITS 7
615extern const struct v_pow_log_data
616{
617  double poly[7]; /* First coefficient is 1.  */
618  double ln2_hi, ln2_lo;
619  double invc[1 << V_POW_LOG_TABLE_BITS];
620  double logc[1 << V_POW_LOG_TABLE_BITS];
621  double logctail[1 << V_POW_LOG_TABLE_BITS];
622} __v_pow_log_data HIDDEN;
623
624#endif
625