1/* Sum -- efficiently sum a list of floating-point numbers 2 3Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. 4Contributed by the AriC and Caramel projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23/* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point 24 summation with application to computational geometry, Numerical Algorithms, 25 volume 37, number 1-4, pages 101--112, 2004. */ 26 27#define MPFR_NEED_LONGLONG_H 28#include "mpfr-impl.h" 29 30/* I would really like to use "mpfr_srcptr const []" but the norm is buggy: 31 it doesn't automaticaly cast a "mpfr_ptr []" to "mpfr_srcptr const []" 32 if necessary. So the choice are: 33 mpfr_s ** : ok 34 mpfr_s *const* : ok 35 mpfr_s **const : ok 36 mpfr_s *const*const : ok 37 const mpfr_s *const* : no 38 const mpfr_s **const : no 39 const mpfr_s *const*const: no 40 VL: this is not a bug, but a feature. See the reason here: 41 http://c-faq.com/ansi/constmismatch.html 42*/ 43static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *); 44static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *, 45 mpfr_exp_t, mpfr_uexp_t); 46 47/* Either sort the tab in perm and returns 0 48 Or returns 1 for +INF, -1 for -INF and 2 for NAN */ 49int 50mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm) 51{ 52 mpfr_exp_t min, max; 53 mpfr_uexp_t exp_num; 54 unsigned long i; 55 int sign_inf; 56 57 sign_inf = 0; 58 min = MPFR_EMIN_MAX; 59 max = MPFR_EMAX_MIN; 60 for (i = 0; i < n; i++) 61 { 62 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i]))) 63 { 64 if (MPFR_IS_NAN (tab[i])) 65 return 2; /* Return NAN code */ 66 else if (MPFR_IS_INF (tab[i])) 67 { 68 if (sign_inf == 0) /* No previous INF */ 69 sign_inf = MPFR_SIGN (tab[i]); 70 else if (sign_inf != MPFR_SIGN (tab[i])) 71 return 2; /* Return NAN */ 72 } 73 } 74 else 75 { 76 MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i])); 77 if (MPFR_GET_EXP (tab[i]) < min) 78 min = MPFR_GET_EXP(tab[i]); 79 if (MPFR_GET_EXP (tab[i]) > max) 80 max = MPFR_GET_EXP(tab[i]); 81 } 82 } 83 if (MPFR_UNLIKELY (sign_inf != 0)) 84 return sign_inf; 85 86 exp_num = max - min + 1; 87 /* FIXME : better test */ 88 if (exp_num > n * MPFR_INT_CEIL_LOG2 (n)) 89 heap_sort (tab, n, perm); 90 else 91 count_sort (tab, n, perm, min, exp_num); 92 return 0; 93} 94 95#define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x)) 96/* Performs a count sort of the entries */ 97static void 98count_sort (mpfr_srcptr *const tab, unsigned long n, 99 mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num) 100{ 101 unsigned long *account; 102 unsigned long target_rank, i; 103 MPFR_TMP_DECL(marker); 104 105 /* Reserve a place for potential 0 (with EXP min-1) 106 If there is no zero, we only lose one unused entry */ 107 min--; 108 exp_num++; 109 110 /* Performs a counting sort of the entries */ 111 MPFR_TMP_MARK (marker); 112 account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account); 113 for (i = 0; i < exp_num; i++) 114 account[i] = 0; 115 for (i = 0; i < n; i++) 116 account[GET_EXP1 (tab[i]) - min]++; 117 for (i = exp_num - 1; i >= 1; i--) 118 account[i - 1] += account[i]; 119 for (i = 0; i < n; i++) 120 { 121 target_rank = --account[GET_EXP1 (tab[i]) - min]; 122 perm[target_rank] = tab[i]; 123 } 124 MPFR_TMP_FREE (marker); 125} 126 127 128#define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x)) 129 130/* Performs a heap sort of the entries */ 131static void 132heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm) 133{ 134 unsigned long dernier_traite; 135 unsigned long i, pere; 136 mpfr_srcptr tmp; 137 unsigned long fils_gauche, fils_droit, fils_indigne; 138 /* Reminder of a heap structure : 139 node(i) has for left son node(2i +1) and right son node(2i) 140 and father(node(i)) = node((i - 1) / 2) 141 */ 142 143 /* initialize the permutation to identity */ 144 for (i = 0; i < n; i++) 145 perm[i] = tab[i]; 146 147 /* insertion phase */ 148 for (dernier_traite = 1; dernier_traite < n; dernier_traite++) 149 { 150 i = dernier_traite; 151 while (i > 0) 152 { 153 pere = (i - 1) / 2; 154 if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i])) 155 { 156 tmp = perm[pere]; 157 perm[pere] = perm[i]; 158 perm[i] = tmp; 159 i = pere; 160 } 161 else 162 break; 163 } 164 } 165 166 /* extraction phase */ 167 for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--) 168 { 169 tmp = perm[0]; 170 perm[0] = perm[dernier_traite]; 171 perm[dernier_traite] = tmp; 172 173 i = 0; 174 while (1) 175 { 176 fils_gauche = 2 * i + 1; 177 fils_droit = fils_gauche + 1; 178 if (fils_gauche < dernier_traite) 179 { 180 if (fils_droit < dernier_traite) 181 { 182 if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche])) 183 fils_indigne = fils_droit; 184 else 185 fils_indigne = fils_gauche; 186 187 if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne])) 188 { 189 tmp = perm[i]; 190 perm[i] = perm[fils_indigne]; 191 perm[fils_indigne] = tmp; 192 i = fils_indigne; 193 } 194 else 195 break; 196 } 197 else /* on a un fils gauche, pas de fils droit */ 198 { 199 if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche])) 200 { 201 tmp = perm[i]; 202 perm[i] = perm[fils_gauche]; 203 perm[fils_gauche] = tmp; 204 } 205 break; 206 } 207 } 208 else /* on n'a pas de fils */ 209 break; 210 } 211 } 212} 213 214 215/* Sum a list of float with order given by permutation perm, 216 * intermediate size set to F. 217 * Internal use function. 218 */ 219static int 220sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F) 221{ 222 mpfr_t sum; 223 unsigned long i; 224 int error_trap; 225 226 MPFR_ASSERTD (n >= 2); 227 228 mpfr_init2 (sum, F); 229 error_trap = mpfr_set (sum, tab[0], MPFR_RNDN); 230 for (i = 1; i < n - 1; i++) 231 { 232 MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum)); 233 error_trap |= mpfr_add (sum, sum, tab[i], MPFR_RNDN); 234 } 235 error_trap |= mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN); 236 mpfr_clear (sum); 237 return error_trap; 238} 239 240/* Sum a list of floating-point numbers. 241 */ 242 243int 244mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd) 245{ 246 mpfr_t cur_sum; 247 mpfr_prec_t prec; 248 mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p; 249 int k, error_trap; 250 MPFR_ZIV_DECL (loop); 251 MPFR_SAVE_EXPO_DECL (expo); 252 MPFR_TMP_DECL (marker); 253 254 if (MPFR_UNLIKELY (n <= 1)) 255 { 256 if (n < 1) 257 { 258 MPFR_SET_ZERO (ret); 259 MPFR_SET_POS (ret); 260 return 0; 261 } 262 else 263 return mpfr_set (ret, tab[0], rnd); 264 } 265 266 /* Sort and treat special cases */ 267 MPFR_TMP_MARK (marker); 268 perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm); 269 error_trap = mpfr_sum_sort (tab, n, perm); 270 /* Check if there was a NAN or a INF */ 271 if (MPFR_UNLIKELY (error_trap != 0)) 272 { 273 MPFR_TMP_FREE (marker); 274 if (error_trap == 2) 275 { 276 MPFR_SET_NAN (ret); 277 MPFR_RET_NAN; 278 } 279 MPFR_SET_INF (ret); 280 MPFR_SET_SIGN (ret, error_trap); 281 MPFR_RET (0); 282 } 283 284 /* Initial precision */ 285 prec = MAX (MPFR_PREC (tab[0]), MPFR_PREC (ret)); 286 k = MPFR_INT_CEIL_LOG2 (n) + 1; 287 prec += k + 2; 288 mpfr_init2 (cur_sum, prec); 289 290 /* Ziv Loop */ 291 MPFR_SAVE_EXPO_MARK (expo); 292 MPFR_ZIV_INIT (loop, prec); 293 for (;;) 294 { 295 error_trap = sum_once (cur_sum, perm, n, prec + k); 296 if (MPFR_LIKELY (error_trap == 0 || 297 (!MPFR_IS_ZERO (cur_sum) && 298 mpfr_can_round (cur_sum, 299 MPFR_GET_EXP (cur_sum) - prec + 2, 300 MPFR_RNDN, rnd, MPFR_PREC (ret))))) 301 break; 302 MPFR_ZIV_NEXT (loop, prec); 303 mpfr_set_prec (cur_sum, prec); 304 } 305 MPFR_ZIV_FREE (loop); 306 MPFR_TMP_FREE (marker); 307 308 error_trap |= mpfr_set (ret, cur_sum, rnd); 309 mpfr_clear (cur_sum); 310 311 MPFR_SAVE_EXPO_FREE (expo); 312 error_trap |= mpfr_check_range (ret, 0, rnd); 313 return error_trap; /* It doesn't return the ternary value */ 314} 315 316/* __END__ */ 317