1/*
2 * Generic functions for ULP error estimation.
3 *
4 * Copyright (c) 2019-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8/* For each different math function type,
9   T(x) should add a different suffix to x.
10   RT(x) should add a return type specific suffix to x. */
11
12#ifdef NEW_RT
13#undef NEW_RT
14
15# if USE_MPFR
16static int RT(ulpscale_mpfr) (mpfr_t x, int t)
17{
18  /* TODO: pow of 2 cases.  */
19  if (mpfr_regular_p (x))
20    {
21      mpfr_exp_t e = mpfr_get_exp (x) - RT(prec);
22      if (e < RT(emin))
23	e = RT(emin) - 1;
24      if (e > RT(emax) - RT(prec))
25	e = RT(emax) - RT(prec);
26      return e;
27    }
28  if (mpfr_zero_p (x))
29    return RT(emin) - 1;
30  if (mpfr_inf_p (x))
31    return RT(emax) - RT(prec);
32  /* NaN.  */
33  return 0;
34}
35# endif
36
37/* Difference between exact result and closest real number that
38   gets rounded to got, i.e. error before rounding, for a correctly
39   rounded result the difference is 0.  */
40static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
41			   int ignore_zero_sign)
42{
43  RT(float) want = p->y;
44  RT(float) d;
45  double e;
46
47  if (RT(asuint) (got) == RT(asuint) (want))
48    return 0.0;
49  if (isnan (got) && isnan (want))
50    /* Ignore sign of NaN.  */
51    return RT (issignaling) (got) == RT (issignaling) (want) ? 0 : INFINITY;
52  if (signbit (got) != signbit (want))
53    {
54      /* Fall through to ULP calculation if ignoring sign of zero and at
55	 exactly one of want and got is non-zero.  */
56      if (ignore_zero_sign && want == got)
57	return 0.0;
58      if (!ignore_zero_sign || (want != 0 && got != 0))
59	return INFINITY;
60    }
61  if (!isfinite (want) || !isfinite (got))
62    {
63      if (isnan (got) != isnan (want))
64	return INFINITY;
65      if (isnan (want))
66	return 0;
67      if (isinf (got))
68	{
69	  got = RT(copysign) (RT(halfinf), got);
70	  want *= 0.5f;
71	}
72      if (isinf (want))
73	{
74	  want = RT(copysign) (RT(halfinf), want);
75	  got *= 0.5f;
76	}
77    }
78  if (r == FE_TONEAREST)
79    {
80      // TODO: incorrect when got vs want cross a powof2 boundary
81      /* error = got > want
82	      ? got - want - tail ulp - 0.5 ulp
83	      : got - want - tail ulp + 0.5 ulp;  */
84      d = got - want;
85      e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
86    }
87  else
88    {
89      if ((r == FE_DOWNWARD && got < want) || (r == FE_UPWARD && got > want)
90	  || (r == FE_TOWARDZERO && fabs (got) < fabs (want)))
91	got = RT(nextafter) (got, want);
92      d = got - want;
93      e = -p->tail;
94    }
95  return RT(scalbn) (d, -p->ulpexp) + e;
96}
97
98static int RT(isok) (RT(float) ygot, int exgot, RT(float) ywant, int exwant,
99		      int exmay)
100{
101  return RT(asuint) (ygot) == RT(asuint) (ywant)
102	 && ((exgot ^ exwant) & ~exmay) == 0;
103}
104
105static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
106{
107  return RT(asuint) (ygot) == RT(asuint) (ywant);
108}
109#endif
110
111static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
112				  RT(float) * y, int *ex)
113{
114  if (r != FE_TONEAREST)
115    fesetround (r);
116  feclearexcept (FE_ALL_EXCEPT);
117  *y = T(call) (f, a);
118  *ex = fetestexcept (FE_ALL_EXCEPT);
119  if (r != FE_TONEAREST)
120    fesetround (FE_TONEAREST);
121}
122
123static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
124				    int r, RT(float) * y, int *ex)
125{
126  if (r != FE_TONEAREST)
127    fesetround (r);
128  *y = T(call) (f, a);
129  *ex = 0;
130  if (r != FE_TONEAREST)
131    fesetround (FE_TONEAREST);
132}
133
134static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
135				      int r, struct RT(ret) * p,
136				      RT(float) ygot, int exgot)
137{
138  if (r != FE_TONEAREST)
139    fesetround (r);
140  feclearexcept (FE_ALL_EXCEPT);
141  volatile struct T(args) va = a; // TODO: barrier
142  a = va;
143  RT(double) yl = T(call_long) (f, a);
144  p->y = (RT(float)) yl;
145  volatile RT(float) vy = p->y; // TODO: barrier
146  (void) vy;
147  p->ex = fetestexcept (FE_ALL_EXCEPT);
148  if (r != FE_TONEAREST)
149    fesetround (FE_TONEAREST);
150  p->ex_may = FE_INEXACT;
151  if (RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
152    return 1;
153  p->ulpexp = RT(ulpscale) (p->y);
154  if (isinf (p->y))
155    p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
156  else
157    p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
158  if (RT(fabs) (p->y) < RT(min_normal))
159    {
160      /* TODO: subnormal result is treated as undeflow even if it's
161	 exact since call_long may not raise inexact correctly.  */
162      if (p->y != 0 || (p->ex & FE_INEXACT))
163	p->ex |= FE_UNDERFLOW | FE_INEXACT;
164    }
165  return 0;
166}
167static inline int T(call_long_nofenv) (const struct fun *f, struct T(args) a,
168					int r, struct RT(ret) * p,
169					RT(float) ygot, int exgot)
170{
171  if (r != FE_TONEAREST)
172    fesetround (r);
173  RT(double) yl = T(call_long) (f, a);
174  p->y = (RT(float)) yl;
175  if (r != FE_TONEAREST)
176    fesetround (FE_TONEAREST);
177  if (RT(isok_nofenv) (ygot, p->y))
178    return 1;
179  p->ulpexp = RT(ulpscale) (p->y);
180  if (isinf (p->y))
181    p->tail = RT(lscalbn) (yl - (RT(double)) 2 * RT(halfinf), -p->ulpexp);
182  else
183    p->tail = RT(lscalbn) (yl - p->y, -p->ulpexp);
184  return 0;
185}
186
187/* There are nan input args and all quiet.  */
188static inline int T(qnanpropagation) (struct T(args) a)
189{
190  return T(reduce) (a, isnan, ||) && !T(reduce) (a, RT(issignaling), ||);
191}
192static inline RT(float) T(sum) (struct T(args) a)
193{
194  return T(reduce) (a, , +);
195}
196
197/* returns 1 if the got result is ok.  */
198static inline int T(call_mpfr_fix) (const struct fun *f, struct T(args) a,
199				     int r_fenv, struct RT(ret) * p,
200				     RT(float) ygot, int exgot)
201{
202#if USE_MPFR
203  int t, t2;
204  mpfr_rnd_t r = rmap (r_fenv);
205  MPFR_DECL_INIT(my, RT(prec_mpfr));
206  MPFR_DECL_INIT(mr, RT(prec));
207  MPFR_DECL_INIT(me, RT(prec_mpfr));
208  mpfr_clear_flags ();
209  t = T(call_mpfr) (my, f, a, r);
210  /* Double rounding.  */
211  t2 = mpfr_set (mr, my, r);
212  if (t2)
213    t = t2;
214  mpfr_set_emin (RT(emin));
215  mpfr_set_emax (RT(emax));
216  t = mpfr_check_range (mr, t, r);
217  t = mpfr_subnormalize (mr, t, r);
218  mpfr_set_emax (MPFR_EMAX_DEFAULT);
219  mpfr_set_emin (MPFR_EMIN_DEFAULT);
220  p->y = mpfr_get_d (mr, r);
221  p->ex = t ? FE_INEXACT : 0;
222  p->ex_may = FE_INEXACT;
223  if (mpfr_underflow_p () && (p->ex & FE_INEXACT))
224    /* TODO: handle before and after rounding uflow cases.  */
225    p->ex |= FE_UNDERFLOW;
226  if (mpfr_overflow_p ())
227    p->ex |= FE_OVERFLOW | FE_INEXACT;
228  if (mpfr_divby0_p ())
229    p->ex |= FE_DIVBYZERO;
230  //if (mpfr_erangeflag_p ())
231  //  p->ex |= FE_INVALID;
232  if (!mpfr_nanflag_p () && RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may))
233    return 1;
234  if (mpfr_nanflag_p () && !T(qnanpropagation) (a))
235    p->ex |= FE_INVALID;
236  p->ulpexp = RT(ulpscale_mpfr) (my, t);
237  if (!isfinite (p->y))
238    {
239      p->tail = 0;
240      if (isnan (p->y))
241	{
242	  /* If an input was nan keep its sign.  */
243	  p->y = T(sum) (a);
244	  if (!isnan (p->y))
245	    p->y = (p->y - p->y) / (p->y - p->y);
246	  return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
247	}
248      mpfr_set_si_2exp (mr, signbit (p->y) ? -1 : 1, 1024, MPFR_RNDN);
249      if (mpfr_cmpabs (my, mr) >= 0)
250	return RT(isok) (ygot, exgot, p->y, p->ex, p->ex_may);
251    }
252  mpfr_sub (me, my, mr, MPFR_RNDN);
253  mpfr_mul_2si (me, me, -p->ulpexp, MPFR_RNDN);
254  p->tail = mpfr_get_d (me, MPFR_RNDN);
255  return 0;
256#else
257  abort ();
258#endif
259}
260
261static int T(cmp) (const struct fun *f, struct gen *gen,
262		     const struct conf *conf)
263{
264  double maxerr = 0;
265  uint64_t cnt = 0;
266  uint64_t cnt1 = 0;
267  uint64_t cnt2 = 0;
268  uint64_t cntfail = 0;
269  int r = conf->r;
270  int use_mpfr = conf->mpfr;
271  int fenv = conf->fenv;
272  for (;;)
273    {
274      struct RT(ret) want;
275      struct T(args) a = T(next) (gen);
276      int exgot;
277      int exgot2;
278      RT(float) ygot;
279      RT(float) ygot2;
280      int fail = 0;
281      if (fenv)
282	T(call_fenv) (f, a, r, &ygot, &exgot);
283      else
284	T(call_nofenv) (f, a, r, &ygot, &exgot);
285      if (f->twice) {
286	secondcall = 1;
287	if (fenv)
288	  T(call_fenv) (f, a, r, &ygot2, &exgot2);
289	else
290	  T(call_nofenv) (f, a, r, &ygot2, &exgot2);
291	secondcall = 0;
292	if (RT(asuint) (ygot) != RT(asuint) (ygot2))
293	  {
294	    fail = 1;
295	    cntfail++;
296	    T(printcall) (f, a);
297	    printf (" got %a then %a for same input\n", ygot, ygot2);
298	  }
299      }
300      cnt++;
301      int ok = use_mpfr
302		 ? T(call_mpfr_fix) (f, a, r, &want, ygot, exgot)
303		 : (fenv ? T(call_long_fenv) (f, a, r, &want, ygot, exgot)
304			 : T(call_long_nofenv) (f, a, r, &want, ygot, exgot));
305      if (!ok)
306	{
307	  int print = 0;
308	  double err = RT (ulperr) (ygot, &want, r, conf->ignore_zero_sign);
309	  double abserr = fabs (err);
310	  // TODO: count errors below accuracy limit.
311	  if (abserr > 0)
312	    cnt1++;
313	  if (abserr > 1)
314	    cnt2++;
315	  if (abserr > conf->errlim)
316	    {
317	      print = 1;
318	      if (!fail)
319		{
320		  fail = 1;
321		  cntfail++;
322		}
323	    }
324	  if (abserr > maxerr)
325	    {
326	      maxerr = abserr;
327	      if (!conf->quiet && abserr > conf->softlim)
328		print = 1;
329	    }
330	  if (print)
331	    {
332	      T(printcall) (f, a);
333	      // TODO: inf ulp handling
334	      printf (" got %a want %a %+g ulp err %g\n", ygot, want.y,
335		      want.tail, err);
336	    }
337	  int diff = fenv ? exgot ^ want.ex : 0;
338	  if (fenv && (diff & ~want.ex_may))
339	    {
340	      if (!fail)
341		{
342		  fail = 1;
343		  cntfail++;
344		}
345	      T(printcall) (f, a);
346	      printf (" is %a %+g ulp, got except 0x%0x", want.y, want.tail,
347		      exgot);
348	      if (diff & exgot)
349		printf (" wrongly set: 0x%x", diff & exgot);
350	      if (diff & ~exgot)
351		printf (" wrongly clear: 0x%x", diff & ~exgot);
352	      putchar ('\n');
353	    }
354	}
355      if (cnt >= conf->n)
356	break;
357      if (!conf->quiet && cnt % 0x100000 == 0)
358	printf ("progress: %6.3f%% cnt %llu cnt1 %llu cnt2 %llu cntfail %llu "
359		"maxerr %g\n",
360		100.0 * cnt / conf->n, (unsigned long long) cnt,
361		(unsigned long long) cnt1, (unsigned long long) cnt2,
362		(unsigned long long) cntfail, maxerr);
363    }
364  double cc = cnt;
365  if (cntfail)
366    printf ("FAIL ");
367  else
368    printf ("PASS ");
369  T(printgen) (f, gen);
370  printf (" round %c errlim %g maxerr %g %s cnt %llu cnt1 %llu %g%% cnt2 %llu "
371	  "%g%% cntfail %llu %g%%\n",
372	  conf->rc, conf->errlim,
373	  maxerr, conf->r == FE_TONEAREST ? "+0.5" : "+1.0",
374	  (unsigned long long) cnt,
375	  (unsigned long long) cnt1, 100.0 * cnt1 / cc,
376	  (unsigned long long) cnt2, 100.0 * cnt2 / cc,
377	  (unsigned long long) cntfail, 100.0 * cntfail / cc);
378  return !!cntfail;
379}
380