1/* Sum -- efficiently sum a list of floating-point numbers
2
3Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4Contributed by the AriC and Caramel projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23/* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point
24   summation with application to computational geometry, Numerical Algorithms,
25   volume 37, number 1-4, pages 101--112, 2004. */
26
27#define MPFR_NEED_LONGLONG_H
28#include "mpfr-impl.h"
29
30/* I would really like to use "mpfr_srcptr const []" but the norm is buggy:
31   it doesn't automaticaly cast a "mpfr_ptr []" to "mpfr_srcptr const []"
32   if necessary. So the choice are:
33     mpfr_s **                : ok
34     mpfr_s *const*           : ok
35     mpfr_s **const           : ok
36     mpfr_s *const*const      : ok
37     const mpfr_s *const*     : no
38     const mpfr_s **const     : no
39     const mpfr_s *const*const: no
40   VL: this is not a bug, but a feature. See the reason here:
41     http://c-faq.com/ansi/constmismatch.html
42*/
43static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *);
44static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *,
45                        mpfr_exp_t, mpfr_uexp_t);
46
47/* Either sort the tab in perm and returns 0
48   Or returns 1 for +INF, -1 for -INF and 2 for NAN */
49int
50mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
51{
52  mpfr_exp_t min, max;
53  mpfr_uexp_t exp_num;
54  unsigned long i;
55  int sign_inf;
56
57  sign_inf = 0;
58  min = MPFR_EMIN_MAX;
59  max = MPFR_EMAX_MIN;
60  for (i = 0; i < n; i++)
61    {
62      if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i])))
63        {
64          if (MPFR_IS_NAN (tab[i]))
65            return 2; /* Return NAN code */
66          else if (MPFR_IS_INF (tab[i]))
67            {
68              if (sign_inf == 0) /* No previous INF */
69                sign_inf = MPFR_SIGN (tab[i]);
70              else if (sign_inf != MPFR_SIGN (tab[i]))
71                return 2; /* Return NAN */
72            }
73        }
74      else
75        {
76          MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i]));
77          if (MPFR_GET_EXP (tab[i]) < min)
78            min = MPFR_GET_EXP(tab[i]);
79          if (MPFR_GET_EXP (tab[i]) > max)
80            max = MPFR_GET_EXP(tab[i]);
81        }
82    }
83  if (MPFR_UNLIKELY (sign_inf != 0))
84    return sign_inf;
85
86  exp_num = max - min + 1;
87  /* FIXME : better test */
88  if (exp_num > n * MPFR_INT_CEIL_LOG2 (n))
89    heap_sort (tab, n, perm);
90  else
91    count_sort (tab, n, perm, min, exp_num);
92  return 0;
93}
94
95#define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x))
96/* Performs a count sort of the entries */
97static void
98count_sort (mpfr_srcptr *const tab, unsigned long n,
99            mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num)
100{
101  unsigned long *account;
102  unsigned long target_rank, i;
103  MPFR_TMP_DECL(marker);
104
105  /* Reserve a place for potential 0 (with EXP min-1)
106     If there is no zero, we only lose one unused entry */
107  min--;
108  exp_num++;
109
110  /* Performs a counting sort of the entries */
111  MPFR_TMP_MARK (marker);
112  account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account);
113  for (i = 0; i < exp_num; i++)
114    account[i] = 0;
115  for (i = 0; i < n; i++)
116    account[GET_EXP1 (tab[i]) - min]++;
117  for (i = exp_num - 1; i >= 1; i--)
118    account[i - 1] += account[i];
119  for (i = 0; i < n; i++)
120    {
121      target_rank = --account[GET_EXP1 (tab[i]) - min];
122      perm[target_rank] = tab[i];
123    }
124  MPFR_TMP_FREE (marker);
125}
126
127
128#define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x))
129
130/* Performs a heap sort of the entries */
131static void
132heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
133{
134  unsigned long dernier_traite;
135  unsigned long i, pere;
136  mpfr_srcptr tmp;
137  unsigned long fils_gauche, fils_droit, fils_indigne;
138  /* Reminder of a heap structure :
139     node(i) has for left son node(2i +1) and right son node(2i)
140     and father(node(i)) = node((i - 1) / 2)
141  */
142
143  /* initialize the permutation to identity */
144  for (i = 0; i < n; i++)
145    perm[i] = tab[i];
146
147  /* insertion phase */
148  for (dernier_traite = 1; dernier_traite < n; dernier_traite++)
149    {
150      i = dernier_traite;
151      while (i > 0)
152        {
153          pere = (i - 1) / 2;
154          if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i]))
155            {
156              tmp = perm[pere];
157              perm[pere] = perm[i];
158              perm[i] = tmp;
159              i = pere;
160            }
161          else
162            break;
163        }
164    }
165
166  /* extraction phase */
167  for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--)
168    {
169      tmp = perm[0];
170      perm[0] = perm[dernier_traite];
171      perm[dernier_traite] = tmp;
172
173      i = 0;
174      while (1)
175        {
176          fils_gauche = 2 * i + 1;
177          fils_droit = fils_gauche + 1;
178          if (fils_gauche < dernier_traite)
179            {
180              if (fils_droit < dernier_traite)
181                {
182                  if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche]))
183                    fils_indigne = fils_droit;
184                  else
185                    fils_indigne = fils_gauche;
186
187                  if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne]))
188                    {
189                      tmp = perm[i];
190                      perm[i] = perm[fils_indigne];
191                      perm[fils_indigne] = tmp;
192                      i = fils_indigne;
193                    }
194                  else
195                    break;
196                }
197              else /* on a un fils gauche, pas de fils droit */
198                {
199                  if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche]))
200                    {
201                      tmp = perm[i];
202                      perm[i] = perm[fils_gauche];
203                      perm[fils_gauche] = tmp;
204                    }
205                  break;
206                }
207            }
208          else /* on n'a pas de fils */
209            break;
210        }
211    }
212}
213
214
215/* Sum a list of float with order given by permutation perm,
216 * intermediate size set to F.
217 * Internal use function.
218 */
219static int
220sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F)
221{
222  mpfr_t sum;
223  unsigned long i;
224  int error_trap;
225
226  MPFR_ASSERTD (n >= 2);
227
228  mpfr_init2 (sum, F);
229  error_trap = mpfr_set (sum, tab[0], MPFR_RNDN);
230  for (i = 1; i < n - 1; i++)
231    {
232      MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum));
233      error_trap |= mpfr_add (sum, sum, tab[i], MPFR_RNDN);
234    }
235  error_trap |= mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN);
236  mpfr_clear (sum);
237  return error_trap;
238}
239
240/* Sum a list of floating-point numbers.
241 */
242
243int
244mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd)
245{
246  mpfr_t cur_sum;
247  mpfr_prec_t prec;
248  mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p;
249  int k, error_trap;
250  MPFR_ZIV_DECL (loop);
251  MPFR_SAVE_EXPO_DECL (expo);
252  MPFR_TMP_DECL (marker);
253
254  if (MPFR_UNLIKELY (n <= 1))
255    {
256      if (n < 1)
257        {
258          MPFR_SET_ZERO (ret);
259          MPFR_SET_POS (ret);
260          return 0;
261        }
262      else
263        return mpfr_set (ret, tab[0], rnd);
264    }
265
266  /* Sort and treat special cases */
267  MPFR_TMP_MARK (marker);
268  perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm);
269  error_trap = mpfr_sum_sort (tab, n, perm);
270  /* Check if there was a NAN or a INF */
271  if (MPFR_UNLIKELY (error_trap != 0))
272    {
273      MPFR_TMP_FREE (marker);
274      if (error_trap == 2)
275        {
276          MPFR_SET_NAN (ret);
277          MPFR_RET_NAN;
278        }
279      MPFR_SET_INF (ret);
280      MPFR_SET_SIGN (ret, error_trap);
281      MPFR_RET (0);
282    }
283
284  /* Initial precision */
285  prec = MAX (MPFR_PREC (tab[0]), MPFR_PREC (ret));
286  k = MPFR_INT_CEIL_LOG2 (n) + 1;
287  prec += k + 2;
288  mpfr_init2 (cur_sum, prec);
289
290  /* Ziv Loop */
291  MPFR_SAVE_EXPO_MARK (expo);
292  MPFR_ZIV_INIT (loop, prec);
293  for (;;)
294    {
295      error_trap = sum_once (cur_sum, perm, n, prec + k);
296      if (MPFR_LIKELY (error_trap == 0 ||
297                       (!MPFR_IS_ZERO (cur_sum) &&
298                        mpfr_can_round (cur_sum,
299                                        MPFR_GET_EXP (cur_sum) - prec + 2,
300                                        MPFR_RNDN, rnd, MPFR_PREC (ret)))))
301        break;
302      MPFR_ZIV_NEXT (loop, prec);
303      mpfr_set_prec (cur_sum, prec);
304    }
305  MPFR_ZIV_FREE (loop);
306  MPFR_TMP_FREE (marker);
307
308  error_trap |= mpfr_set (ret, cur_sum, rnd);
309  mpfr_clear (cur_sum);
310
311  MPFR_SAVE_EXPO_FREE (expo);
312  error_trap |= mpfr_check_range (ret, 0, rnd);
313  return error_trap; /* It doesn't return the ternary value */
314}
315
316/* __END__ */
317