1/* mpf_set_str (dest, string, base) -- Convert the string STRING
2   in base BASE to a float in dest.  If BASE is zero, the leading characters
3   of STRING is used to figure out the base.
4
5Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007,
62008 Free Software Foundation, Inc.
7
8This file is part of the GNU MP Library.
9
10The GNU MP Library is free software; you can redistribute it and/or modify
11it under the terms of the GNU Lesser General Public License as published by
12the Free Software Foundation; either version 3 of the License, or (at your
13option) any later version.
14
15The GNU MP Library is distributed in the hope that it will be useful, but
16WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18License for more details.
19
20You should have received a copy of the GNU Lesser General Public License
21along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
22
23/*
24  This still needs work, as suggested by some FIXME comments.
25  1. Don't depend on superfluous mantissa digits.
26  2. Allocate temp space more cleverly.
27  3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem.
28*/
29
30#define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
31
32#include "config.h"
33
34#include <stdlib.h>
35#include <string.h>
36#include <ctype.h>
37
38#if HAVE_LANGINFO_H
39#include <langinfo.h>  /* for nl_langinfo */
40#endif
41
42#if HAVE_LOCALE_H
43#include <locale.h>    /* for localeconv */
44#endif
45
46#include "gmp.h"
47#include "gmp-impl.h"
48#include "longlong.h"
49
50extern const unsigned char __gmp_digit_value_tab[];
51#define digit_value_tab __gmp_digit_value_tab
52
53/* Compute base^exp and return the most significant prec limbs in rp[].
54   Put the count of omitted low limbs in *ign.
55   Return the actual size (which might be less than prec).  */
56static mp_size_t
57mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
58		    mp_limb_t base, mp_exp_t exp,
59		    mp_size_t prec, mp_ptr tp)
60{
61  mp_size_t ign;		/* counts number of ignored low limbs in r */
62  mp_size_t off;		/* keeps track of offset where value starts */
63  mp_ptr passed_rp = rp;
64  mp_size_t rn;
65  int cnt;
66  int i;
67
68  rp[0] = base;
69  rn = 1;
70  off = 0;
71  ign = 0;
72  count_leading_zeros (cnt, exp);
73  for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
74    {
75      mpn_sqr (tp, rp + off, rn);
76      rn = 2 * rn;
77      rn -= tp[rn - 1] == 0;
78      ign <<= 1;
79
80      off = 0;
81      if (rn > prec)
82	{
83	  ign += rn - prec;
84	  off = rn - prec;
85	  rn = prec;
86	}
87      MP_PTR_SWAP (rp, tp);
88
89      if (((exp >> i) & 1) != 0)
90	{
91	  mp_limb_t cy;
92	  cy = mpn_mul_1 (rp, rp + off, rn, base);
93	  rp[rn] = cy;
94	  rn += cy != 0;
95	  off = 0;
96	}
97    }
98
99  if (rn > prec)
100    {
101      ign += rn - prec;
102      rp += rn - prec;
103      rn = prec;
104    }
105
106  MPN_COPY_INCR (passed_rp, rp + off, rn);
107  *ignp = ign;
108  return rn;
109}
110
111int
112mpf_set_str (mpf_ptr x, const char *str, int base)
113{
114  size_t str_size;
115  char *s, *begs;
116  size_t i, j;
117  int c;
118  int negative;
119  char *dotpos = 0;
120  const char *expptr;
121  int exp_base;
122  const char  *point = GMP_DECIMAL_POINT;
123  size_t      pointlen = strlen (point);
124  const unsigned char *digit_value;
125  TMP_DECL;
126
127  c = (unsigned char) *str;
128
129  /* Skip whitespace.  */
130  while (isspace (c))
131    c = (unsigned char) *++str;
132
133  negative = 0;
134  if (c == '-')
135    {
136      negative = 1;
137      c = (unsigned char) *++str;
138    }
139
140  /* Default base to decimal.  */
141  if (base == 0)
142    base = 10;
143
144  exp_base = base;
145
146  if (base < 0)
147    {
148      exp_base = 10;
149      base = -base;
150    }
151
152  digit_value = digit_value_tab;
153  if (base > 36)
154    {
155      /* For bases > 36, use the collating sequence
156	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
157      digit_value += 224;
158      if (base > 62)
159	return -1;		/* too large base */
160    }
161
162  /* Require at least one digit, possibly after an initial decimal point.  */
163  if (digit_value[c] >= (base == 0 ? 10 : base))
164    {
165      /* not a digit, must be a decimal point */
166      for (i = 0; i < pointlen; i++)
167        if (str[i] != point[i])
168          return -1;
169      if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))
170	return -1;
171    }
172
173  /* Locate exponent part of the input.  Look from the right of the string,
174     since the exponent is usually a lot shorter than the mantissa.  */
175  expptr = NULL;
176  str_size = strlen (str);
177  for (i = str_size - 1; i > 0; i--)
178    {
179      c = (unsigned char) str[i];
180      if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
181	{
182	  expptr = str + i + 1;
183	  str_size = i;
184	  break;
185	}
186    }
187
188  TMP_MARK;
189  s = begs = (char *) TMP_ALLOC (str_size + 1);
190
191  /* Loop through mantissa, converting it from ASCII to raw byte values.  */
192  for (i = 0; i < str_size; i++)
193    {
194      c = (unsigned char) *str;
195      if (!isspace (c))
196	{
197	  int dig;
198
199          for (j = 0; j < pointlen; j++)
200            if (str[j] != point[j])
201              goto not_point;
202          if (1)
203	    {
204	      if (dotpos != 0)
205		{
206		  /* already saw a decimal point, another is invalid */
207		  TMP_FREE;
208		  return -1;
209		}
210	      dotpos = s;
211	      str += pointlen - 1;
212	      i += pointlen - 1;
213	    }
214	  else
215	    {
216            not_point:
217	      dig = digit_value[c];
218	      if (dig >= base)
219		{
220		  TMP_FREE;
221		  return -1;
222		}
223	      *s++ = dig;
224	    }
225	}
226      c = (unsigned char) *++str;
227    }
228
229  str_size = s - begs;
230
231  {
232    long exp_in_base;
233    mp_size_t ra, ma, rn, mn;
234    int cnt;
235    mp_ptr mp, tp, rp;
236    mp_exp_t exp_in_limbs;
237    mp_size_t prec = PREC(x) + 1;
238    int divflag;
239    mp_size_t madj, radj;
240
241#if 0
242    size_t n_chars_needed;
243
244    /* This breaks things like 0.000...0001.  To safely ignore superfluous
245       digits, we need to skip over leading zeros.  */
246    /* Just consider the relevant leading digits of the mantissa.  */
247    n_chars_needed = 2 + (size_t)
248      (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly);
249    if (str_size > n_chars_needed)
250      str_size = n_chars_needed;
251#endif
252
253    ma = 2 + (mp_size_t)
254      (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
255    mp = TMP_ALLOC_LIMBS (ma);
256    mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
257
258    if (mn == 0)
259      {
260	SIZ(x) = 0;
261	EXP(x) = 0;
262	TMP_FREE;
263	return 0;
264      }
265
266    madj = 0;
267    /* Ignore excess limbs in MP,MSIZE.  */
268    if (mn > prec)
269      {
270	madj = mn - prec;
271	mp += mn - prec;
272	mn = prec;
273      }
274
275    if (expptr != 0)
276      {
277	/* Scan and convert the exponent, in base exp_base.  */
278	long dig, minus, plusminus;
279	c = (unsigned char) *expptr;
280	minus = -(long) (c == '-');
281	plusminus = minus | -(long) (c == '+');
282	expptr -= plusminus;			/* conditional increment */
283	c = (unsigned char) *expptr++;
284	dig = digit_value[c];
285	if (dig >= exp_base)
286	  {
287	    TMP_FREE;
288	    return -1;
289	  }
290	exp_in_base = dig;
291	c = (unsigned char) *expptr++;
292	dig = digit_value[c];
293	while (dig < exp_base)
294	  {
295	    exp_in_base = exp_in_base * exp_base;
296	    exp_in_base += dig;
297	    c = (unsigned char) *expptr++;
298	    dig = digit_value[c];
299	  }
300	exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
301      }
302    else
303      exp_in_base = 0;
304    if (dotpos != 0)
305      exp_in_base -= s - dotpos;
306    divflag = exp_in_base < 0;
307    exp_in_base = ABS (exp_in_base);
308
309    if (exp_in_base == 0)
310      {
311	MPN_COPY (PTR(x), mp, mn);
312	SIZ(x) = negative ? -mn : mn;
313	EXP(x) = mn + madj;
314	TMP_FREE;
315	return 0;
316      }
317
318    ra = 2 * (prec + 1);
319    rp = TMP_ALLOC_LIMBS (ra);
320    tp = TMP_ALLOC_LIMBS (ra);
321    rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
322
323    if (divflag)
324      {
325#if 0
326	/* FIXME: Should use mpn_tdiv here.  */
327	mpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn);
328#else
329	mp_ptr qp;
330	mp_limb_t qlimb;
331	if (mn < rn)
332	  {
333	    /* Pad out MP,MSIZE for current divrem semantics.  */
334	    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
335	    MPN_ZERO (tmp, rn - mn);
336	    MPN_COPY (tmp + rn - mn, mp, mn);
337	    mp = tmp;
338	    madj -= rn - mn;
339	    mn = rn;
340	  }
341	if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
342	  {
343	    mp_limb_t cy;
344	    count_leading_zeros (cnt, rp[rn - 1]);
345	    cnt -= GMP_NAIL_BITS;
346	    mpn_lshift (rp, rp, rn, cnt);
347	    cy = mpn_lshift (mp, mp, mn, cnt);
348	    if (cy)
349	      mp[mn++] = cy;
350	  }
351
352	qp = TMP_ALLOC_LIMBS (prec + 1);
353	qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
354	tp = qp;
355	exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
356	rn = prec;
357	if (qlimb != 0)
358	  {
359	    tp[prec] = qlimb;
360	    /* Skip the least significant limb not to overrun the destination
361	       variable.  */
362	    tp++;
363	  }
364#endif
365      }
366    else
367      {
368	tp = TMP_ALLOC_LIMBS (rn + mn);
369	if (rn > mn)
370	  mpn_mul (tp, rp, rn, mp, mn);
371	else
372	  mpn_mul (tp, mp, mn, rp, rn);
373	rn += mn;
374	rn -= tp[rn - 1] == 0;
375	exp_in_limbs = rn + madj + radj;
376
377	if (rn > prec)
378	  {
379	    tp += rn - prec;
380	    rn = prec;
381	    exp_in_limbs += 0;
382	  }
383      }
384
385    MPN_COPY (PTR(x), tp, rn);
386    SIZ(x) = negative ? -rn : rn;
387    EXP(x) = exp_in_limbs;
388    TMP_FREE;
389    return 0;
390  }
391}
392