1190225Srpaulo/*
2190225Srpaulo * Configuration for math routines.
3190225Srpaulo *
4190225Srpaulo * Copyright (c) 2017-2023, Arm Limited.
5190225Srpaulo * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6190225Srpaulo */
7190225Srpaulo
8190225Srpaulo#ifndef _MATH_CONFIG_H
9190225Srpaulo#define _MATH_CONFIG_H
10190225Srpaulo
11190225Srpaulo#include <math.h>
12190225Srpaulo#include <stdint.h>
13190225Srpaulo
14190225Srpaulo#ifndef WANT_ROUNDING
15190225Srpaulo/* If defined to 1, return correct results for special cases in non-nearest
16190225Srpaulo   rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than -0.0f).
17190225Srpaulo   This may be set to 0 if there is no fenv support or if math functions only
18190225Srpaulo   get called in round to nearest mode.  */
19190225Srpaulo# define WANT_ROUNDING 1
20276768Sdelphij#endif
21190225Srpaulo#ifndef WANT_ERRNO
22190225Srpaulo/* If defined to 1, set errno in math functions according to ISO C.  Many math
23190225Srpaulo   libraries do not set errno, so this is 0 by default.  It may need to be
24190225Srpaulo   set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25190225Srpaulo# define WANT_ERRNO 0
26190225Srpaulo#endif
27190225Srpaulo#ifndef WANT_ERRNO_UFLOW
28190225Srpaulo/* Set errno to ERANGE if result underflows to 0 (in all rounding modes).  */
29190225Srpaulo# define WANT_ERRNO_UFLOW (WANT_ROUNDING && WANT_ERRNO)
30190225Srpaulo#endif
31190225Srpaulo
32190225Srpaulo/* Compiler can inline round as a single instruction.  */
33190225Srpaulo#ifndef HAVE_FAST_ROUND
34190225Srpaulo# if __aarch64__
35190225Srpaulo#   define HAVE_FAST_ROUND 1
36190225Srpaulo# else
37190225Srpaulo#   define HAVE_FAST_ROUND 0
38190225Srpaulo# endif
39190225Srpaulo#endif
40190225Srpaulo
41190225Srpaulo/* Compiler can inline lround, but not (long)round(x).  */
42190225Srpaulo#ifndef HAVE_FAST_LROUND
43190225Srpaulo# if __aarch64__ && (100*__GNUC__ + __GNUC_MINOR__) >= 408 && __NO_MATH_ERRNO__
44190225Srpaulo#   define HAVE_FAST_LROUND 1
45190225Srpaulo# else
46190225Srpaulo#   define HAVE_FAST_LROUND 0
47190225Srpaulo# endif
48190225Srpaulo#endif
49190225Srpaulo
50190225Srpaulo/* Compiler can inline fma as a single instruction.  */
51190225Srpaulo#ifndef HAVE_FAST_FMA
52190225Srpaulo# if defined FP_FAST_FMA || __aarch64__
53190225Srpaulo#   define HAVE_FAST_FMA 1
54190225Srpaulo# else
55276768Sdelphij#   define HAVE_FAST_FMA 0
56276768Sdelphij# endif
57276768Sdelphij#endif
58276768Sdelphij
59276768Sdelphij/* Provide *_finite symbols and some of the glibc hidden symbols
60190225Srpaulo   so libmathlib can be used with binaries compiled against glibc
61190225Srpaulo   to interpose math functions with both static and dynamic linking.  */
62190225Srpaulo#ifndef USE_GLIBC_ABI
63190225Srpaulo# if __GNUC__
64190225Srpaulo#   define USE_GLIBC_ABI 1
65276768Sdelphij# else
66276768Sdelphij#   define USE_GLIBC_ABI 0
67276768Sdelphij# endif
68276768Sdelphij#endif
69276768Sdelphij
70276768Sdelphij/* Optionally used extensions.  */
71276768Sdelphij#ifdef __GNUC__
72276768Sdelphij# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
73190225Srpaulo# define NOINLINE __attribute__ ((noinline))
74190225Srpaulo# define UNUSED __attribute__ ((unused))
75190225Srpaulo# define likely(x) __builtin_expect (!!(x), 1)
76190225Srpaulo# define unlikely(x) __builtin_expect (x, 0)
77190225Srpaulo# if __GNUC__ >= 9
78190225Srpaulo#   define attribute_copy(f) __attribute__ ((copy (f)))
79190225Srpaulo# else
80190225Srpaulo#   define attribute_copy(f)
81190225Srpaulo# endif
82190225Srpaulo# define strong_alias(f, a) \
83190225Srpaulo  extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
84190225Srpaulo# define hidden_alias(f, a) \
85190225Srpaulo  extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
86190225Srpaulo  attribute_copy (f);
87190225Srpaulo#else
88190225Srpaulo# define HIDDEN
89190225Srpaulo# define NOINLINE
90190225Srpaulo# define UNUSED
91190225Srpaulo# define likely(x) (x)
92190225Srpaulo# define unlikely(x) (x)
93190225Srpaulo#endif
94190225Srpaulo
95190225Srpaulo/* Return ptr but hide its value from the compiler so accesses through it
96190225Srpaulo   cannot be optimized based on the contents.  */
97190225Srpaulo#define ptr_barrier(ptr)                                                      \
98190225Srpaulo  ({                                                                          \
99190225Srpaulo    __typeof (ptr) __ptr = (ptr);                                             \
100190225Srpaulo    __asm("" : "+r"(__ptr));                                                  \
101190225Srpaulo    __ptr;                                                                    \
102190225Srpaulo  })
103190225Srpaulo
104190225Srpaulo/* Symbol renames to avoid libc conflicts.  */
105190225Srpaulo#define __math_oflowf arm_math_oflowf
106190225Srpaulo#define __math_uflowf arm_math_uflowf
107190225Srpaulo#define __math_may_uflowf arm_math_may_uflowf
108190225Srpaulo#define __math_divzerof arm_math_divzerof
109190225Srpaulo#define __math_oflow arm_math_oflow
110190225Srpaulo#define __math_uflow arm_math_uflow
111190225Srpaulo#define __math_may_uflow arm_math_may_uflow
112276768Sdelphij#define __math_divzero arm_math_divzero
113190225Srpaulo#define __math_invalidf arm_math_invalidf
114190225Srpaulo#define __math_invalid arm_math_invalid
115190225Srpaulo#define __math_check_oflow arm_math_check_oflow
116190225Srpaulo#define __math_check_uflow arm_math_check_uflow
117190225Srpaulo#define __math_check_oflowf arm_math_check_oflowf
118190225Srpaulo#define __math_check_uflowf arm_math_check_uflowf
119190225Srpaulo
120190225Srpaulo#define __sincosf_table arm_math_sincosf_table
121190225Srpaulo#define __inv_pio4 arm_math_inv_pio4
122190225Srpaulo#define __exp2f_data arm_math_exp2f_data
123190225Srpaulo#define __logf_data arm_math_logf_data
124190225Srpaulo#define __log2f_data arm_math_log2f_data
125276768Sdelphij#define __powf_log2_data arm_math_powf_log2_data
126276768Sdelphij#define __exp_data arm_math_exp_data
127276768Sdelphij#define __log_data arm_math_log_data
128276768Sdelphij#define __log2_data arm_math_log2_data
129276768Sdelphij#define __pow_log_data arm_math_pow_log_data
130276768Sdelphij#define __erff_data arm_math_erff_data
131276768Sdelphij#define __erf_data arm_math_erf_data
132190225Srpaulo#define __v_exp_data arm_math_v_exp_data
133190225Srpaulo#define __v_log_data arm_math_v_log_data
134
135#if HAVE_FAST_ROUND
136/* When set, the roundtoint and converttoint functions are provided with
137   the semantics documented below.  */
138# define TOINT_INTRINSICS 1
139
140/* Round x to nearest int in all rounding modes, ties have to be rounded
141   consistently with converttoint so the results match.  If the result
142   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
143static inline double_t
144roundtoint (double_t x)
145{
146  return round (x);
147}
148
149/* Convert x to nearest int in all rounding modes, ties have to be rounded
150   consistently with roundtoint.  If the result is not representible in an
151   int32_t then the semantics is unspecified.  */
152static inline int32_t
153converttoint (double_t x)
154{
155# if HAVE_FAST_LROUND
156  return lround (x);
157# else
158  return (long) round (x);
159# endif
160}
161#endif
162
163static inline uint32_t
164asuint (float f)
165{
166  union
167  {
168    float f;
169    uint32_t i;
170  } u = {f};
171  return u.i;
172}
173
174static inline float
175asfloat (uint32_t i)
176{
177  union
178  {
179    uint32_t i;
180    float f;
181  } u = {i};
182  return u.f;
183}
184
185static inline uint64_t
186asuint64 (double f)
187{
188  union
189  {
190    double f;
191    uint64_t i;
192  } u = {f};
193  return u.i;
194}
195
196static inline double
197asdouble (uint64_t i)
198{
199  union
200  {
201    uint64_t i;
202    double f;
203  } u = {i};
204  return u.f;
205}
206
207#ifndef IEEE_754_2008_SNAN
208# define IEEE_754_2008_SNAN 1
209#endif
210static inline int
211issignalingf_inline (float x)
212{
213  uint32_t ix = asuint (x);
214  if (!IEEE_754_2008_SNAN)
215    return (ix & 0x7fc00000) == 0x7fc00000;
216  return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
217}
218
219static inline int
220issignaling_inline (double x)
221{
222  uint64_t ix = asuint64 (x);
223  if (!IEEE_754_2008_SNAN)
224    return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
225  return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
226}
227
228#if __aarch64__ && __GNUC__
229/* Prevent the optimization of a floating-point expression.  */
230static inline float
231opt_barrier_float (float x)
232{
233  __asm__ __volatile__ ("" : "+w" (x));
234  return x;
235}
236static inline double
237opt_barrier_double (double x)
238{
239  __asm__ __volatile__ ("" : "+w" (x));
240  return x;
241}
242/* Force the evaluation of a floating-point expression for its side-effect.  */
243static inline void
244force_eval_float (float x)
245{
246  __asm__ __volatile__ ("" : "+w" (x));
247}
248static inline void
249force_eval_double (double x)
250{
251  __asm__ __volatile__ ("" : "+w" (x));
252}
253#else
254static inline float
255opt_barrier_float (float x)
256{
257  volatile float y = x;
258  return y;
259}
260static inline double
261opt_barrier_double (double x)
262{
263  volatile double y = x;
264  return y;
265}
266static inline void
267force_eval_float (float x)
268{
269  volatile float y UNUSED = x;
270}
271static inline void
272force_eval_double (double x)
273{
274  volatile double y UNUSED = x;
275}
276#endif
277
278/* Evaluate an expression as the specified type, normally a type
279   cast should be enough, but compilers implement non-standard
280   excess-precision handling, so when FLT_EVAL_METHOD != 0 then
281   these functions may need to be customized.  */
282static inline float
283eval_as_float (float x)
284{
285  return x;
286}
287static inline double
288eval_as_double (double x)
289{
290  return x;
291}
292
293/* Error handling tail calls for special cases, with a sign argument.
294   The sign of the return value is set if the argument is non-zero.  */
295
296/* The result overflows.  */
297HIDDEN float __math_oflowf (uint32_t);
298/* The result underflows to 0 in nearest rounding mode.  */
299HIDDEN float __math_uflowf (uint32_t);
300/* The result underflows to 0 in some directed rounding mode only.  */
301HIDDEN float __math_may_uflowf (uint32_t);
302/* Division by zero.  */
303HIDDEN float __math_divzerof (uint32_t);
304/* The result overflows.  */
305HIDDEN double __math_oflow (uint32_t);
306/* The result underflows to 0 in nearest rounding mode.  */
307HIDDEN double __math_uflow (uint32_t);
308/* The result underflows to 0 in some directed rounding mode only.  */
309HIDDEN double __math_may_uflow (uint32_t);
310/* Division by zero.  */
311HIDDEN double __math_divzero (uint32_t);
312
313/* Error handling using input checking.  */
314
315/* Invalid input unless it is a quiet NaN.  */
316HIDDEN float __math_invalidf (float);
317/* Invalid input unless it is a quiet NaN.  */
318HIDDEN double __math_invalid (double);
319
320/* Error handling using output checking, only for errno setting.  */
321
322/* Check if the result overflowed to infinity.  */
323HIDDEN double __math_check_oflow (double);
324/* Check if the result underflowed to 0.  */
325HIDDEN double __math_check_uflow (double);
326
327/* Check if the result overflowed to infinity.  */
328static inline double
329check_oflow (double x)
330{
331  return WANT_ERRNO ? __math_check_oflow (x) : x;
332}
333
334/* Check if the result underflowed to 0.  */
335static inline double
336check_uflow (double x)
337{
338  return WANT_ERRNO ? __math_check_uflow (x) : x;
339}
340
341/* Check if the result overflowed to infinity.  */
342HIDDEN float __math_check_oflowf (float);
343/* Check if the result underflowed to 0.  */
344HIDDEN float __math_check_uflowf (float);
345
346/* Check if the result overflowed to infinity.  */
347static inline float
348check_oflowf (float x)
349{
350  return WANT_ERRNO ? __math_check_oflowf (x) : x;
351}
352
353/* Check if the result underflowed to 0.  */
354static inline float
355check_uflowf (float x)
356{
357  return WANT_ERRNO ? __math_check_uflowf (x) : x;
358}
359
360/* Shared between expf, exp2f and powf.  */
361#define EXP2F_TABLE_BITS 5
362#define EXP2F_POLY_ORDER 3
363extern const struct exp2f_data
364{
365  uint64_t tab[1 << EXP2F_TABLE_BITS];
366  double shift_scaled;
367  double poly[EXP2F_POLY_ORDER];
368  double shift;
369  double invln2_scaled;
370  double poly_scaled[EXP2F_POLY_ORDER];
371} __exp2f_data HIDDEN;
372
373#define LOGF_TABLE_BITS 4
374#define LOGF_POLY_ORDER 4
375extern const struct logf_data
376{
377  struct
378  {
379    double invc, logc;
380  } tab[1 << LOGF_TABLE_BITS];
381  double ln2;
382  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
383} __logf_data HIDDEN;
384
385#define LOG2F_TABLE_BITS 4
386#define LOG2F_POLY_ORDER 4
387extern const struct log2f_data
388{
389  struct
390  {
391    double invc, logc;
392  } tab[1 << LOG2F_TABLE_BITS];
393  double poly[LOG2F_POLY_ORDER];
394} __log2f_data HIDDEN;
395
396#define POWF_LOG2_TABLE_BITS 4
397#define POWF_LOG2_POLY_ORDER 5
398#if TOINT_INTRINSICS
399# define POWF_SCALE_BITS EXP2F_TABLE_BITS
400#else
401# define POWF_SCALE_BITS 0
402#endif
403#define POWF_SCALE ((double) (1 << POWF_SCALE_BITS))
404extern const struct powf_log2_data
405{
406  struct
407  {
408    double invc, logc;
409  } tab[1 << POWF_LOG2_TABLE_BITS];
410  double poly[POWF_LOG2_POLY_ORDER];
411} __powf_log2_data HIDDEN;
412
413
414#define EXP_TABLE_BITS 7
415#define EXP_POLY_ORDER 5
416/* Use polynomial that is optimized for a wider input range.  This may be
417   needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
418#define EXP_POLY_WIDE 0
419/* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
420   needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
421#define EXP_USE_TOINT_NARROW 0
422#define EXP2_POLY_ORDER 5
423#define EXP2_POLY_WIDE 0
424/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
425   and !TOINT_INTRINSICS.  */
426#define EXP10_POLY_WIDE 0
427extern const struct exp_data
428{
429  double invln2N;
430  double invlog10_2N;
431  double shift;
432  double negln2hiN;
433  double negln2loN;
434  double neglog10_2hiN;
435  double neglog10_2loN;
436  double poly[4]; /* Last four coefficients.  */
437  double exp2_shift;
438  double exp2_poly[EXP2_POLY_ORDER];
439  double exp10_poly[5];
440  uint64_t tab[2*(1 << EXP_TABLE_BITS)];
441} __exp_data HIDDEN;
442
443#define LOG_TABLE_BITS 7
444#define LOG_POLY_ORDER 6
445#define LOG_POLY1_ORDER 12
446extern const struct log_data
447{
448  double ln2hi;
449  double ln2lo;
450  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
451  double poly1[LOG_POLY1_ORDER - 1];
452  struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
453#if !HAVE_FAST_FMA
454  struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
455#endif
456} __log_data HIDDEN;
457
458#define LOG2_TABLE_BITS 6
459#define LOG2_POLY_ORDER 7
460#define LOG2_POLY1_ORDER 11
461extern const struct log2_data
462{
463  double invln2hi;
464  double invln2lo;
465  double poly[LOG2_POLY_ORDER - 1];
466  double poly1[LOG2_POLY1_ORDER - 1];
467  struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
468#if !HAVE_FAST_FMA
469  struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
470#endif
471} __log2_data HIDDEN;
472
473#define POW_LOG_TABLE_BITS 7
474#define POW_LOG_POLY_ORDER 8
475extern const struct pow_log_data
476{
477  double ln2hi;
478  double ln2lo;
479  double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
480  /* Note: the pad field is unused, but allows slightly faster indexing.  */
481  struct {double invc, pad, logc, logctail;} tab[1 << POW_LOG_TABLE_BITS];
482} __pow_log_data HIDDEN;
483
484extern const struct erff_data
485{
486  float erff_poly_A[6];
487  float erff_poly_B[7];
488} __erff_data HIDDEN;
489
490#define ERF_POLY_A_ORDER 19
491#define ERF_POLY_A_NCOEFFS 10
492#define ERFC_POLY_C_NCOEFFS 16
493#define ERFC_POLY_D_NCOEFFS 18
494#define ERFC_POLY_E_NCOEFFS 14
495#define ERFC_POLY_F_NCOEFFS 17
496extern const struct erf_data
497{
498  double erf_poly_A[ERF_POLY_A_NCOEFFS];
499  double erf_ratio_N_A[5];
500  double erf_ratio_D_A[5];
501  double erf_ratio_N_B[7];
502  double erf_ratio_D_B[6];
503  double erfc_poly_C[ERFC_POLY_C_NCOEFFS];
504  double erfc_poly_D[ERFC_POLY_D_NCOEFFS];
505  double erfc_poly_E[ERFC_POLY_E_NCOEFFS];
506  double erfc_poly_F[ERFC_POLY_F_NCOEFFS];
507} __erf_data HIDDEN;
508
509#define V_EXP_TABLE_BITS 7
510extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
511
512#define V_LOG_TABLE_BITS 7
513extern const struct v_log_data
514{
515  struct
516  {
517    double invc, logc;
518  } table[1 << V_LOG_TABLE_BITS];
519} __v_log_data HIDDEN;
520
521#endif
522