1/* mpf_set_str (dest, string, base) -- Convert the string STRING 2 in base BASE to a float in dest. If BASE is zero, the leading characters 3 of STRING is used to figure out the base. 4 5Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007, 62008 Free Software Foundation, Inc. 7 8This file is part of the GNU MP Library. 9 10The GNU MP Library is free software; you can redistribute it and/or modify 11it under the terms of the GNU Lesser General Public License as published by 12the Free Software Foundation; either version 3 of the License, or (at your 13option) any later version. 14 15The GNU MP Library is distributed in the hope that it will be useful, but 16WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18License for more details. 19 20You should have received a copy of the GNU Lesser General Public License 21along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 22 23/* 24 This still needs work, as suggested by some FIXME comments. 25 1. Don't depend on superfluous mantissa digits. 26 2. Allocate temp space more cleverly. 27 3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem. 28*/ 29 30#define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */ 31 32#include "config.h" 33 34#include <stdlib.h> 35#include <string.h> 36#include <ctype.h> 37 38#if HAVE_LANGINFO_H 39#include <langinfo.h> /* for nl_langinfo */ 40#endif 41 42#if HAVE_LOCALE_H 43#include <locale.h> /* for localeconv */ 44#endif 45 46#include "gmp.h" 47#include "gmp-impl.h" 48#include "longlong.h" 49 50extern const unsigned char __gmp_digit_value_tab[]; 51#define digit_value_tab __gmp_digit_value_tab 52 53/* Compute base^exp and return the most significant prec limbs in rp[]. 54 Put the count of omitted low limbs in *ign. 55 Return the actual size (which might be less than prec). */ 56static mp_size_t 57mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp, 58 mp_limb_t base, mp_exp_t exp, 59 mp_size_t prec, mp_ptr tp) 60{ 61 mp_size_t ign; /* counts number of ignored low limbs in r */ 62 mp_size_t off; /* keeps track of offset where value starts */ 63 mp_ptr passed_rp = rp; 64 mp_size_t rn; 65 int cnt; 66 int i; 67 68 rp[0] = base; 69 rn = 1; 70 off = 0; 71 ign = 0; 72 count_leading_zeros (cnt, exp); 73 for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--) 74 { 75 mpn_sqr (tp, rp + off, rn); 76 rn = 2 * rn; 77 rn -= tp[rn - 1] == 0; 78 ign <<= 1; 79 80 off = 0; 81 if (rn > prec) 82 { 83 ign += rn - prec; 84 off = rn - prec; 85 rn = prec; 86 } 87 MP_PTR_SWAP (rp, tp); 88 89 if (((exp >> i) & 1) != 0) 90 { 91 mp_limb_t cy; 92 cy = mpn_mul_1 (rp, rp + off, rn, base); 93 rp[rn] = cy; 94 rn += cy != 0; 95 off = 0; 96 } 97 } 98 99 if (rn > prec) 100 { 101 ign += rn - prec; 102 rp += rn - prec; 103 rn = prec; 104 } 105 106 MPN_COPY_INCR (passed_rp, rp + off, rn); 107 *ignp = ign; 108 return rn; 109} 110 111int 112mpf_set_str (mpf_ptr x, const char *str, int base) 113{ 114 size_t str_size; 115 char *s, *begs; 116 size_t i, j; 117 int c; 118 int negative; 119 char *dotpos = 0; 120 const char *expptr; 121 int exp_base; 122 const char *point = GMP_DECIMAL_POINT; 123 size_t pointlen = strlen (point); 124 const unsigned char *digit_value; 125 TMP_DECL; 126 127 c = (unsigned char) *str; 128 129 /* Skip whitespace. */ 130 while (isspace (c)) 131 c = (unsigned char) *++str; 132 133 negative = 0; 134 if (c == '-') 135 { 136 negative = 1; 137 c = (unsigned char) *++str; 138 } 139 140 /* Default base to decimal. */ 141 if (base == 0) 142 base = 10; 143 144 exp_base = base; 145 146 if (base < 0) 147 { 148 exp_base = 10; 149 base = -base; 150 } 151 152 digit_value = digit_value_tab; 153 if (base > 36) 154 { 155 /* For bases > 36, use the collating sequence 156 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz. */ 157 digit_value += 224; 158 if (base > 62) 159 return -1; /* too large base */ 160 } 161 162 /* Require at least one digit, possibly after an initial decimal point. */ 163 if (digit_value[c] >= (base == 0 ? 10 : base)) 164 { 165 /* not a digit, must be a decimal point */ 166 for (i = 0; i < pointlen; i++) 167 if (str[i] != point[i]) 168 return -1; 169 if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base)) 170 return -1; 171 } 172 173 /* Locate exponent part of the input. Look from the right of the string, 174 since the exponent is usually a lot shorter than the mantissa. */ 175 expptr = NULL; 176 str_size = strlen (str); 177 for (i = str_size - 1; i > 0; i--) 178 { 179 c = (unsigned char) str[i]; 180 if (c == '@' || (base <= 10 && (c == 'e' || c == 'E'))) 181 { 182 expptr = str + i + 1; 183 str_size = i; 184 break; 185 } 186 } 187 188 TMP_MARK; 189 s = begs = (char *) TMP_ALLOC (str_size + 1); 190 191 /* Loop through mantissa, converting it from ASCII to raw byte values. */ 192 for (i = 0; i < str_size; i++) 193 { 194 c = (unsigned char) *str; 195 if (!isspace (c)) 196 { 197 int dig; 198 199 for (j = 0; j < pointlen; j++) 200 if (str[j] != point[j]) 201 goto not_point; 202 if (1) 203 { 204 if (dotpos != 0) 205 { 206 /* already saw a decimal point, another is invalid */ 207 TMP_FREE; 208 return -1; 209 } 210 dotpos = s; 211 str += pointlen - 1; 212 i += pointlen - 1; 213 } 214 else 215 { 216 not_point: 217 dig = digit_value[c]; 218 if (dig >= base) 219 { 220 TMP_FREE; 221 return -1; 222 } 223 *s++ = dig; 224 } 225 } 226 c = (unsigned char) *++str; 227 } 228 229 str_size = s - begs; 230 231 { 232 long exp_in_base; 233 mp_size_t ra, ma, rn, mn; 234 int cnt; 235 mp_ptr mp, tp, rp; 236 mp_exp_t exp_in_limbs; 237 mp_size_t prec = PREC(x) + 1; 238 int divflag; 239 mp_size_t madj, radj; 240 241#if 0 242 size_t n_chars_needed; 243 244 /* This breaks things like 0.000...0001. To safely ignore superfluous 245 digits, we need to skip over leading zeros. */ 246 /* Just consider the relevant leading digits of the mantissa. */ 247 n_chars_needed = 2 + (size_t) 248 (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly); 249 if (str_size > n_chars_needed) 250 str_size = n_chars_needed; 251#endif 252 253 ma = 2 + (mp_size_t) 254 (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly)); 255 mp = TMP_ALLOC_LIMBS (ma); 256 mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base); 257 258 if (mn == 0) 259 { 260 SIZ(x) = 0; 261 EXP(x) = 0; 262 TMP_FREE; 263 return 0; 264 } 265 266 madj = 0; 267 /* Ignore excess limbs in MP,MSIZE. */ 268 if (mn > prec) 269 { 270 madj = mn - prec; 271 mp += mn - prec; 272 mn = prec; 273 } 274 275 if (expptr != 0) 276 { 277 /* Scan and convert the exponent, in base exp_base. */ 278 long dig, minus, plusminus; 279 c = (unsigned char) *expptr; 280 minus = -(long) (c == '-'); 281 plusminus = minus | -(long) (c == '+'); 282 expptr -= plusminus; /* conditional increment */ 283 c = (unsigned char) *expptr++; 284 dig = digit_value[c]; 285 if (dig >= exp_base) 286 { 287 TMP_FREE; 288 return -1; 289 } 290 exp_in_base = dig; 291 c = (unsigned char) *expptr++; 292 dig = digit_value[c]; 293 while (dig < exp_base) 294 { 295 exp_in_base = exp_in_base * exp_base; 296 exp_in_base += dig; 297 c = (unsigned char) *expptr++; 298 dig = digit_value[c]; 299 } 300 exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */ 301 } 302 else 303 exp_in_base = 0; 304 if (dotpos != 0) 305 exp_in_base -= s - dotpos; 306 divflag = exp_in_base < 0; 307 exp_in_base = ABS (exp_in_base); 308 309 if (exp_in_base == 0) 310 { 311 MPN_COPY (PTR(x), mp, mn); 312 SIZ(x) = negative ? -mn : mn; 313 EXP(x) = mn + madj; 314 TMP_FREE; 315 return 0; 316 } 317 318 ra = 2 * (prec + 1); 319 rp = TMP_ALLOC_LIMBS (ra); 320 tp = TMP_ALLOC_LIMBS (ra); 321 rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp); 322 323 if (divflag) 324 { 325#if 0 326 /* FIXME: Should use mpn_tdiv here. */ 327 mpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn); 328#else 329 mp_ptr qp; 330 mp_limb_t qlimb; 331 if (mn < rn) 332 { 333 /* Pad out MP,MSIZE for current divrem semantics. */ 334 mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1); 335 MPN_ZERO (tmp, rn - mn); 336 MPN_COPY (tmp + rn - mn, mp, mn); 337 mp = tmp; 338 madj -= rn - mn; 339 mn = rn; 340 } 341 if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0) 342 { 343 mp_limb_t cy; 344 count_leading_zeros (cnt, rp[rn - 1]); 345 cnt -= GMP_NAIL_BITS; 346 mpn_lshift (rp, rp, rn, cnt); 347 cy = mpn_lshift (mp, mp, mn, cnt); 348 if (cy) 349 mp[mn++] = cy; 350 } 351 352 qp = TMP_ALLOC_LIMBS (prec + 1); 353 qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn); 354 tp = qp; 355 exp_in_limbs = qlimb + (mn - rn) + (madj - radj); 356 rn = prec; 357 if (qlimb != 0) 358 { 359 tp[prec] = qlimb; 360 /* Skip the least significant limb not to overrun the destination 361 variable. */ 362 tp++; 363 } 364#endif 365 } 366 else 367 { 368 tp = TMP_ALLOC_LIMBS (rn + mn); 369 if (rn > mn) 370 mpn_mul (tp, rp, rn, mp, mn); 371 else 372 mpn_mul (tp, mp, mn, rp, rn); 373 rn += mn; 374 rn -= tp[rn - 1] == 0; 375 exp_in_limbs = rn + madj + radj; 376 377 if (rn > prec) 378 { 379 tp += rn - prec; 380 rn = prec; 381 exp_in_limbs += 0; 382 } 383 } 384 385 MPN_COPY (PTR(x), tp, rn); 386 SIZ(x) = negative ? -rn : rn; 387 EXP(x) = exp_in_limbs; 388 TMP_FREE; 389 return 0; 390 } 391} 392