1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Copyright (c) 2011 The FreeBSD Foundation
8 *
9 * Copyright (c) 2023 Dag-Erling Sm��rgrav
10 *
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Chris Torek.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42#include "namespace.h"
43#include <ctype.h>
44#include <inttypes.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <stddef.h>
48#include <stdarg.h>
49#include <string.h>
50#include <wchar.h>
51#include <wctype.h>
52#include "un-namespace.h"
53
54#include "collate.h"
55#include "libc_private.h"
56#include "local.h"
57#include "xlocale_private.h"
58
59#ifndef NO_FLOATING_POINT
60#include <locale.h>
61#endif
62
63#define	BUF		513	/* Maximum length of numeric string. */
64
65/*
66 * Flags used during conversion.
67 */
68#define	LONG		0x01	/* l: long or double */
69#define	LONGDBL		0x02	/* L: long double */
70#define	SHORT		0x04	/* h: short */
71#define	SUPPRESS	0x08	/* *: suppress assignment */
72#define	POINTER		0x10	/* p: void * (as hex) */
73#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
74#define FASTINT		0x200	/* wfN: int_fastN_t */
75#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
76#define	INTMAXT		0x800	/* j: intmax_t */
77#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
78#define	SIZET		0x2000	/* z: size_t */
79#define	SHORTSHORT	0x4000	/* hh: char */
80#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
81
82/*
83 * Conversion types.
84 */
85#define	CT_CHAR		0	/* %c conversion */
86#define	CT_CCL		1	/* %[...] conversion */
87#define	CT_STRING	2	/* %s conversion */
88#define	CT_INT		3	/* %[dioupxX] conversion */
89#define	CT_FLOAT	4	/* %[efgEFG] conversion */
90
91static const u_char *__sccl(char *, const u_char *);
92#ifndef NO_FLOATING_POINT
93static int parsefloat(FILE *, char *, char *, locale_t);
94#endif
95
96__weak_reference(__vfscanf, vfscanf);
97
98/*
99 * Conversion functions are passed a pointer to this object instead of
100 * a real parameter to indicate that the assignment-suppression (*)
101 * flag was specified.  We could use a NULL pointer to indicate this,
102 * but that would mask bugs in applications that call scanf() with a
103 * NULL pointer.
104 */
105static const int suppress;
106#define	SUPPRESS_PTR	((void *)&suppress)
107
108static const mbstate_t initial_mbs;
109
110/*
111 * The following conversion functions return the number of characters consumed,
112 * or -1 on input failure.  Character class conversion returns 0 on match
113 * failure.
114 */
115
116static __inline int
117convert_char(FILE *fp, char * p, int width)
118{
119	int n;
120
121	if (p == SUPPRESS_PTR) {
122		size_t sum = 0;
123		for (;;) {
124			if ((n = fp->_r) < width) {
125				sum += n;
126				width -= n;
127				fp->_p += n;
128				if (__srefill(fp)) {
129					if (sum == 0)
130						return (-1);
131					break;
132				}
133			} else {
134				sum += width;
135				fp->_r -= width;
136				fp->_p += width;
137				break;
138			}
139		}
140		return (sum);
141	} else {
142		size_t r = __fread(p, 1, width, fp);
143
144		if (r == 0)
145			return (-1);
146		return (r);
147	}
148}
149
150static __inline int
151convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
152{
153	mbstate_t mbs;
154	int n, nread;
155	wint_t wi;
156
157	mbs = initial_mbs;
158	n = 0;
159	while (width-- != 0 &&
160	    (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
161		if (wcp != SUPPRESS_PTR)
162			*wcp++ = (wchar_t)wi;
163		n += nread;
164	}
165	if (n == 0)
166		return (-1);
167	return (n);
168}
169
170static __inline int
171convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
172{
173	char *p0;
174	int n;
175
176	if (p == SUPPRESS_PTR) {
177		n = 0;
178		while (ccltab[*fp->_p]) {
179			n++, fp->_r--, fp->_p++;
180			if (--width == 0)
181				break;
182			if (fp->_r <= 0 && __srefill(fp)) {
183				if (n == 0)
184					return (-1);
185				break;
186			}
187		}
188	} else {
189		p0 = p;
190		while (ccltab[*fp->_p]) {
191			fp->_r--;
192			*p++ = *fp->_p++;
193			if (--width == 0)
194				break;
195			if (fp->_r <= 0 && __srefill(fp)) {
196				if (p == p0)
197					return (-1);
198				break;
199			}
200		}
201		n = p - p0;
202		if (n == 0)
203			return (0);
204		*p = 0;
205	}
206	return (n);
207}
208
209static __inline int
210convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
211    locale_t locale)
212{
213	mbstate_t mbs;
214	wint_t wi;
215	int n, nread;
216
217	mbs = initial_mbs;
218	n = 0;
219	if (wcp == SUPPRESS_PTR) {
220		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
221		    width-- != 0 && ccltab[wctob(wi)])
222			n += nread;
223		if (wi != WEOF)
224			__ungetwc(wi, fp, __get_locale());
225	} else {
226		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
227		    width-- != 0 && ccltab[wctob(wi)]) {
228			*wcp++ = (wchar_t)wi;
229			n += nread;
230		}
231		if (wi != WEOF)
232			__ungetwc(wi, fp, __get_locale());
233		if (n == 0)
234			return (0);
235		*wcp = 0;
236	}
237	return (n);
238}
239
240static __inline int
241convert_string(FILE *fp, char * p, int width)
242{
243	char *p0;
244	int n;
245
246	if (p == SUPPRESS_PTR) {
247		n = 0;
248		while (!isspace(*fp->_p)) {
249			n++, fp->_r--, fp->_p++;
250			if (--width == 0)
251				break;
252			if (fp->_r <= 0 && __srefill(fp))
253				break;
254		}
255	} else {
256		p0 = p;
257		while (!isspace(*fp->_p)) {
258			fp->_r--;
259			*p++ = *fp->_p++;
260			if (--width == 0)
261				break;
262			if (fp->_r <= 0 && __srefill(fp))
263				break;
264		}
265		*p = 0;
266		n = p - p0;
267	}
268	return (n);
269}
270
271static __inline int
272convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
273{
274	mbstate_t mbs;
275	wint_t wi;
276	int n, nread;
277
278	mbs = initial_mbs;
279	n = 0;
280	if (wcp == SUPPRESS_PTR) {
281		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
282		    width-- != 0 && !iswspace(wi))
283			n += nread;
284		if (wi != WEOF)
285			__ungetwc(wi, fp, __get_locale());
286	} else {
287		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
288		    width-- != 0 && !iswspace(wi)) {
289			*wcp++ = (wchar_t)wi;
290			n += nread;
291		}
292		if (wi != WEOF)
293			__ungetwc(wi, fp, __get_locale());
294		*wcp = '\0';
295	}
296	return (n);
297}
298
299enum parseint_state {
300	begin,
301	havesign,
302	havezero,
303	haveprefix,
304	any,
305};
306
307static __inline int
308parseint_fsm(int c, enum parseint_state *state, int *base)
309{
310	switch (c) {
311	case '+':
312	case '-':
313		if (*state == begin) {
314			*state = havesign;
315			return 1;
316		}
317		break;
318	case '0':
319		if (*state == begin || *state == havesign) {
320			*state = havezero;
321		} else {
322			*state = any;
323		}
324		return 1;
325	case '1':
326	case '2':
327	case '3':
328	case '4':
329	case '5':
330	case '6':
331	case '7':
332		if (*state == havezero && *base == 0) {
333			*base = 8;
334		}
335		/* FALL THROUGH */
336	case '8':
337	case '9':
338		if (*state == begin ||
339		    *state == havesign) {
340			if (*base == 0) {
341				*base = 10;
342			}
343		}
344		if (*state == begin ||
345		    *state == havesign ||
346		    *state == havezero ||
347		    *state == haveprefix ||
348		    *state == any) {
349			if (*base > c - '0') {
350				*state = any;
351				return 1;
352			}
353		}
354		break;
355	case 'b':
356		if (*state == havezero) {
357			if (*base == 0 || *base == 2) {
358				*state = haveprefix;
359				*base = 2;
360				return 1;
361			}
362		}
363		/* FALL THROUGH */
364	case 'a':
365	case 'c':
366	case 'd':
367	case 'e':
368	case 'f':
369		if (*state == begin ||
370		    *state == havesign ||
371		    *state == havezero ||
372		    *state == haveprefix ||
373		    *state == any) {
374			if (*base > c - 'a' + 10) {
375				*state = any;
376				return 1;
377			}
378		}
379		break;
380	case 'B':
381		if (*state == havezero) {
382			if (*base == 0 || *base == 2) {
383				*state = haveprefix;
384				*base = 2;
385				return 1;
386			}
387		}
388		/* FALL THROUGH */
389	case 'A':
390	case 'C':
391	case 'D':
392	case 'E':
393	case 'F':
394		if (*state == begin ||
395		    *state == havesign ||
396		    *state == havezero ||
397		    *state == haveprefix ||
398		    *state == any) {
399			if (*base > c - 'A' + 10) {
400				*state = any;
401				return 1;
402			}
403		}
404		break;
405	case 'x':
406	case 'X':
407		if (*state == havezero) {
408			if (*base == 0 || *base == 16) {
409				*state = haveprefix;
410				*base = 16;
411				return 1;
412			}
413		}
414		break;
415	}
416	return 0;
417}
418
419/*
420 * Read an integer, storing it in buf.
421 *
422 * Return 0 on a match failure, and the number of characters read
423 * otherwise.
424 */
425static __inline int
426parseint(FILE *fp, char * __restrict buf, int width, int base)
427{
428	enum parseint_state state = begin;
429	char *p;
430	int c;
431
432	for (p = buf; width; width--) {
433		c = __sgetc(fp);
434		if (c == EOF)
435			break;
436		if (!parseint_fsm(c, &state, &base))
437			break;
438		*p++ = c;
439	}
440	/*
441	 * If we only had a sign, push it back.  If we only had a 0b or 0x
442	 * prefix (possibly preceded by a sign), we view it as "0" and
443	 * push back the letter.  In all other cases, if we stopped
444	 * because we read a non-number character, push it back.
445	 */
446	if (state == havesign) {
447		p--;
448		(void) __ungetc(*(u_char *)p, fp);
449	} else if (state == haveprefix) {
450		p--;
451		(void) __ungetc(c, fp);
452	} else if (width && c != EOF) {
453		(void) __ungetc(c, fp);
454	}
455	return (p - buf);
456}
457
458/*
459 * __vfscanf - MT-safe version
460 */
461int
462__vfscanf(FILE *fp, char const *fmt0, va_list ap)
463{
464	int ret;
465
466	FLOCKFILE_CANCELSAFE(fp);
467	ret = __svfscanf(fp, __get_locale(), fmt0, ap);
468	FUNLOCKFILE_CANCELSAFE();
469	return (ret);
470}
471int
472vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
473{
474	int ret;
475	FIX_LOCALE(locale);
476
477	FLOCKFILE_CANCELSAFE(fp);
478	ret = __svfscanf(fp, locale, fmt0, ap);
479	FUNLOCKFILE_CANCELSAFE();
480	return (ret);
481}
482
483/*
484 * __svfscanf - non-MT-safe version of __vfscanf
485 */
486int
487__svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
488{
489#define	GETARG(type)	((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
490	const u_char *fmt = (const u_char *)fmt0;
491	int c;			/* character from format, or conversion */
492	size_t width;		/* field width, or 0 */
493	int flags;		/* flags as defined above */
494	int nassigned;		/* number of fields assigned */
495	int nconversions;	/* number of conversions */
496	int nr;			/* characters read by the current conversion */
497	int nread;		/* number of characters consumed from fp */
498	int base;		/* base argument to conversion function */
499	char ccltab[256];	/* character class table for %[...] */
500	char buf[BUF];		/* buffer for numeric conversions */
501
502	ORIENT(fp, -1);
503
504	nassigned = 0;
505	nconversions = 0;
506	nread = 0;
507	for (;;) {
508		c = *fmt++;
509		if (c == 0)
510			return (nassigned);
511		if (isspace(c)) {
512			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
513				nread++, fp->_r--, fp->_p++;
514			continue;
515		}
516		if (c != '%')
517			goto literal;
518		width = 0;
519		flags = 0;
520		/*
521		 * switch on the format.  continue if done;
522		 * break once format type is derived.
523		 */
524again:		c = *fmt++;
525		switch (c) {
526		case '%':
527literal:
528			if (fp->_r <= 0 && __srefill(fp))
529				goto input_failure;
530			if (*fp->_p != c)
531				goto match_failure;
532			fp->_r--, fp->_p++;
533			nread++;
534			continue;
535
536		case '*':
537			flags |= SUPPRESS;
538			goto again;
539		case 'j':
540			flags |= INTMAXT;
541			goto again;
542		case 'l':
543			if (flags & LONG) {
544				flags &= ~LONG;
545				flags |= LONGLONG;
546			} else
547				flags |= LONG;
548			goto again;
549		case 'q':
550			flags |= LONGLONG;	/* not quite */
551			goto again;
552		case 't':
553			flags |= PTRDIFFT;
554			goto again;
555		case 'w':
556			/*
557			 * Fixed-width integer types.  On all platforms we
558			 * support, int8_t is equivalent to char, int16_t
559			 * is equivalent to short, int32_t is equivalent
560			 * to int, int64_t is equivalent to long long int.
561			 * Furthermore, int_fast8_t, int_fast16_t and
562			 * int_fast32_t are equivalent to int, and
563			 * int_fast64_t is equivalent to long long int.
564			 */
565			flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
566			if (fmt[0] == 'f') {
567				flags |= FASTINT;
568				fmt++;
569			} else {
570				flags &= ~FASTINT;
571			}
572			if (fmt[0] == '8') {
573				if (!(flags & FASTINT))
574					flags |= SHORTSHORT;
575				else
576					/* no flag set = 32 */ ;
577				fmt += 1;
578			} else if (fmt[0] == '1' && fmt[1] == '6') {
579				if (!(flags & FASTINT))
580					flags |= SHORT;
581				else
582					/* no flag set = 32 */ ;
583				fmt += 2;
584			} else if (fmt[0] == '3' && fmt[1] == '2') {
585				/* no flag set = 32 */ ;
586				fmt += 2;
587			} else if (fmt[0] == '6' && fmt[1] == '4') {
588				flags |= LONGLONG;
589				fmt += 2;
590			} else {
591				goto match_failure;
592			}
593			goto again;
594		case 'z':
595			flags |= SIZET;
596			goto again;
597		case 'L':
598			flags |= LONGDBL;
599			goto again;
600		case 'h':
601			if (flags & SHORT) {
602				flags &= ~SHORT;
603				flags |= SHORTSHORT;
604			} else
605				flags |= SHORT;
606			goto again;
607
608		case '0': case '1': case '2': case '3': case '4':
609		case '5': case '6': case '7': case '8': case '9':
610			width = width * 10 + c - '0';
611			goto again;
612
613		/*
614		 * Conversions.
615		 */
616		case 'B':
617		case 'b':
618			c = CT_INT;
619			flags |= UNSIGNED;
620			base = 2;
621			break;
622
623		case 'd':
624			c = CT_INT;
625			base = 10;
626			break;
627
628		case 'i':
629			c = CT_INT;
630			base = 0;
631			break;
632
633		case 'o':
634			c = CT_INT;
635			flags |= UNSIGNED;
636			base = 8;
637			break;
638
639		case 'u':
640			c = CT_INT;
641			flags |= UNSIGNED;
642			base = 10;
643			break;
644
645		case 'X':
646		case 'x':
647			c = CT_INT;
648			flags |= UNSIGNED;
649			base = 16;
650			break;
651
652#ifndef NO_FLOATING_POINT
653		case 'A': case 'E': case 'F': case 'G':
654		case 'a': case 'e': case 'f': case 'g':
655			c = CT_FLOAT;
656			break;
657#endif
658
659		case 'S':
660			flags |= LONG;
661			/* FALLTHROUGH */
662		case 's':
663			c = CT_STRING;
664			break;
665
666		case '[':
667			fmt = __sccl(ccltab, fmt);
668			flags |= NOSKIP;
669			c = CT_CCL;
670			break;
671
672		case 'C':
673			flags |= LONG;
674			/* FALLTHROUGH */
675		case 'c':
676			flags |= NOSKIP;
677			c = CT_CHAR;
678			break;
679
680		case 'p':	/* pointer format is like hex */
681			flags |= POINTER;
682			c = CT_INT;		/* assumes sizeof(uintmax_t) */
683			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
684			base = 16;
685			break;
686
687		case 'n':
688			if (flags & SUPPRESS)	/* ??? */
689				continue;
690			if (flags & SHORTSHORT)
691				*va_arg(ap, char *) = nread;
692			else if (flags & SHORT)
693				*va_arg(ap, short *) = nread;
694			else if (flags & LONG)
695				*va_arg(ap, long *) = nread;
696			else if (flags & LONGLONG)
697				*va_arg(ap, long long *) = nread;
698			else if (flags & INTMAXT)
699				*va_arg(ap, intmax_t *) = nread;
700			else if (flags & SIZET)
701				*va_arg(ap, size_t *) = nread;
702			else if (flags & PTRDIFFT)
703				*va_arg(ap, ptrdiff_t *) = nread;
704			else
705				*va_arg(ap, int *) = nread;
706			continue;
707
708		default:
709			goto match_failure;
710
711		/*
712		 * Disgusting backwards compatibility hack.	XXX
713		 */
714		case '\0':	/* compat */
715			return (EOF);
716		}
717
718		/*
719		 * We have a conversion that requires input.
720		 */
721		if (fp->_r <= 0 && __srefill(fp))
722			goto input_failure;
723
724		/*
725		 * Consume leading white space, except for formats
726		 * that suppress this.
727		 */
728		if ((flags & NOSKIP) == 0) {
729			while (isspace(*fp->_p)) {
730				nread++;
731				if (--fp->_r > 0)
732					fp->_p++;
733				else if (__srefill(fp))
734					goto input_failure;
735			}
736			/*
737			 * Note that there is at least one character in
738			 * the buffer, so conversions that do not set NOSKIP
739			 * ca no longer result in an input failure.
740			 */
741		}
742
743		/*
744		 * Do the conversion.
745		 */
746		switch (c) {
747
748		case CT_CHAR:
749			/* scan arbitrary characters (sets NOSKIP) */
750			if (width == 0)
751				width = 1;
752			if (flags & LONG) {
753				nr = convert_wchar(fp, GETARG(wchar_t *),
754				    width, locale);
755			} else {
756				nr = convert_char(fp, GETARG(char *), width);
757			}
758			if (nr < 0)
759				goto input_failure;
760			break;
761
762		case CT_CCL:
763			/* scan a (nonempty) character class (sets NOSKIP) */
764			if (width == 0)
765				width = (size_t)~0;	/* `infinity' */
766			if (flags & LONG) {
767				nr = convert_wccl(fp, GETARG(wchar_t *), width,
768				    ccltab, locale);
769			} else {
770				nr = convert_ccl(fp, GETARG(char *), width,
771				    ccltab);
772			}
773			if (nr <= 0) {
774				if (nr < 0)
775					goto input_failure;
776				else /* nr == 0 */
777					goto match_failure;
778			}
779			break;
780
781		case CT_STRING:
782			/* like CCL, but zero-length string OK, & no NOSKIP */
783			if (width == 0)
784				width = (size_t)~0;
785			if (flags & LONG) {
786				nr = convert_wstring(fp, GETARG(wchar_t *),
787				    width, locale);
788			} else {
789				nr = convert_string(fp, GETARG(char *), width);
790			}
791			if (nr < 0)
792				goto input_failure;
793			break;
794
795		case CT_INT:
796			/* scan an integer as if by the conversion function */
797#ifdef hardway
798			if (width == 0 || width > sizeof(buf) - 1)
799				width = sizeof(buf) - 1;
800#else
801			/* size_t is unsigned, hence this optimisation */
802			if (--width > sizeof(buf) - 2)
803				width = sizeof(buf) - 2;
804			width++;
805#endif
806			nr = parseint(fp, buf, width, base);
807			if (nr == 0)
808				goto match_failure;
809			if ((flags & SUPPRESS) == 0) {
810				uintmax_t res;
811
812				buf[nr] = '\0';
813				if ((flags & UNSIGNED) == 0)
814				    res = strtoimax_l(buf, (char **)NULL, base, locale);
815				else
816				    res = strtoumax_l(buf, (char **)NULL, base, locale);
817				if (flags & POINTER)
818					*va_arg(ap, void **) =
819							(void *)(uintptr_t)res;
820				else if (flags & SHORTSHORT)
821					*va_arg(ap, char *) = res;
822				else if (flags & SHORT)
823					*va_arg(ap, short *) = res;
824				else if (flags & LONG)
825					*va_arg(ap, long *) = res;
826				else if (flags & LONGLONG)
827					*va_arg(ap, long long *) = res;
828				else if (flags & INTMAXT)
829					*va_arg(ap, intmax_t *) = res;
830				else if (flags & PTRDIFFT)
831					*va_arg(ap, ptrdiff_t *) = res;
832				else if (flags & SIZET)
833					*va_arg(ap, size_t *) = res;
834				else
835					*va_arg(ap, int *) = res;
836			}
837			break;
838
839#ifndef NO_FLOATING_POINT
840		case CT_FLOAT:
841			/* scan a floating point number as if by strtod */
842			if (width == 0 || width > sizeof(buf) - 1)
843				width = sizeof(buf) - 1;
844			nr = parsefloat(fp, buf, buf + width, locale);
845			if (nr == 0)
846				goto match_failure;
847			if ((flags & SUPPRESS) == 0) {
848				if (flags & LONGDBL) {
849					long double res = strtold_l(buf, NULL,
850					    locale);
851					*va_arg(ap, long double *) = res;
852				} else if (flags & LONG) {
853					double res = strtod_l(buf, NULL,
854					    locale);
855					*va_arg(ap, double *) = res;
856				} else {
857					float res = strtof_l(buf, NULL, locale);
858					*va_arg(ap, float *) = res;
859				}
860			}
861			break;
862#endif /* !NO_FLOATING_POINT */
863		}
864		if (!(flags & SUPPRESS))
865			nassigned++;
866		nread += nr;
867		nconversions++;
868	}
869input_failure:
870	return (nconversions != 0 ? nassigned : EOF);
871match_failure:
872	return (nassigned);
873}
874
875/*
876 * Fill in the given table from the scanset at the given format
877 * (just after `[').  Return a pointer to the character past the
878 * closing `]'.  The table has a 1 wherever characters should be
879 * considered part of the scanset.
880 */
881static const u_char *
882__sccl(char *tab, const u_char *fmt)
883{
884	int c, n, v, i;
885	struct xlocale_collate *table =
886		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
887
888	/* first `clear' the whole table */
889	c = *fmt++;		/* first char hat => negated scanset */
890	if (c == '^') {
891		v = 1;		/* default => accept */
892		c = *fmt++;	/* get new first char */
893	} else
894		v = 0;		/* default => reject */
895
896	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
897	(void) memset(tab, v, 256);
898
899	if (c == 0)
900		return (fmt - 1);/* format ended before closing ] */
901
902	/*
903	 * Now set the entries corresponding to the actual scanset
904	 * to the opposite of the above.
905	 *
906	 * The first character may be ']' (or '-') without being special;
907	 * the last character may be '-'.
908	 */
909	v = 1 - v;
910	for (;;) {
911		tab[c] = v;		/* take character c */
912doswitch:
913		n = *fmt++;		/* and examine the next */
914		switch (n) {
915
916		case 0:			/* format ended too soon */
917			return (fmt - 1);
918
919		case '-':
920			/*
921			 * A scanset of the form
922			 *	[01+-]
923			 * is defined as `the digit 0, the digit 1,
924			 * the character +, the character -', but
925			 * the effect of a scanset such as
926			 *	[a-zA-Z0-9]
927			 * is implementation defined.  The V7 Unix
928			 * scanf treats `a-z' as `the letters a through
929			 * z', but treats `a-a' as `the letter a, the
930			 * character -, and the letter a'.
931			 *
932			 * For compatibility, the `-' is not considered
933			 * to define a range if the character following
934			 * it is either a close bracket (required by ANSI)
935			 * or is not numerically greater than the character
936			 * we just stored in the table (c).
937			 */
938			n = *fmt;
939			if (n == ']'
940			    || (table->__collate_load_error ? n < c :
941				__collate_range_cmp(n, c) < 0
942			       )
943			   ) {
944				c = '-';
945				break;	/* resume the for(;;) */
946			}
947			fmt++;
948			/* fill in the range */
949			if (table->__collate_load_error) {
950				do {
951					tab[++c] = v;
952				} while (c < n);
953			} else {
954				for (i = 0; i < 256; i ++)
955					if (__collate_range_cmp(c, i) <= 0 &&
956					    __collate_range_cmp(i, n) <= 0
957					   )
958						tab[i] = v;
959			}
960#if 1	/* XXX another disgusting compatibility hack */
961			c = n;
962			/*
963			 * Alas, the V7 Unix scanf also treats formats
964			 * such as [a-c-e] as `the letters a through e'.
965			 * This too is permitted by the standard....
966			 */
967			goto doswitch;
968#else
969			c = *fmt++;
970			if (c == 0)
971				return (fmt - 1);
972			if (c == ']')
973				return (fmt);
974#endif
975			break;
976
977		case ']':		/* end of scanset */
978			return (fmt);
979
980		default:		/* just another character */
981			c = n;
982			break;
983		}
984	}
985	/* NOTREACHED */
986}
987
988#ifndef NO_FLOATING_POINT
989static int
990parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
991{
992	char *commit, *p;
993	int infnanpos = 0, decptpos = 0;
994	enum {
995		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
996		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
997	} state = S_START;
998	unsigned char c;
999	const char *decpt = localeconv_l(locale)->decimal_point;
1000	_Bool gotmantdig = 0, ishex = 0;
1001
1002	/*
1003	 * We set commit = p whenever the string we have read so far
1004	 * constitutes a valid representation of a floating point
1005	 * number by itself.  At some point, the parse will complete
1006	 * or fail, and we will ungetc() back to the last commit point.
1007	 * To ensure that the file offset gets updated properly, it is
1008	 * always necessary to read at least one character that doesn't
1009	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1010	 */
1011	commit = buf - 1;
1012	for (p = buf; p < end; ) {
1013		c = *fp->_p;
1014reswitch:
1015		switch (state) {
1016		case S_START:
1017			state = S_GOTSIGN;
1018			if (c == '-' || c == '+')
1019				break;
1020			else
1021				goto reswitch;
1022		case S_GOTSIGN:
1023			switch (c) {
1024			case '0':
1025				state = S_MAYBEHEX;
1026				commit = p;
1027				break;
1028			case 'I':
1029			case 'i':
1030				state = S_INF;
1031				break;
1032			case 'N':
1033			case 'n':
1034				state = S_NAN;
1035				break;
1036			default:
1037				state = S_DIGITS;
1038				goto reswitch;
1039			}
1040			break;
1041		case S_INF:
1042			if (infnanpos > 6 ||
1043			    (c != "nfinity"[infnanpos] &&
1044			     c != "NFINITY"[infnanpos]))
1045				goto parsedone;
1046			if (infnanpos == 1 || infnanpos == 6)
1047				commit = p;	/* inf or infinity */
1048			infnanpos++;
1049			break;
1050		case S_NAN:
1051			switch (infnanpos) {
1052			case 0:
1053				if (c != 'A' && c != 'a')
1054					goto parsedone;
1055				break;
1056			case 1:
1057				if (c != 'N' && c != 'n')
1058					goto parsedone;
1059				else
1060					commit = p;
1061				break;
1062			case 2:
1063				if (c != '(')
1064					goto parsedone;
1065				break;
1066			default:
1067				if (c == ')') {
1068					commit = p;
1069					state = S_DONE;
1070				} else if (!isalnum(c) && c != '_')
1071					goto parsedone;
1072				break;
1073			}
1074			infnanpos++;
1075			break;
1076		case S_DONE:
1077			goto parsedone;
1078		case S_MAYBEHEX:
1079			state = S_DIGITS;
1080			if (c == 'X' || c == 'x') {
1081				ishex = 1;
1082				break;
1083			} else {	/* we saw a '0', but no 'x' */
1084				gotmantdig = 1;
1085				goto reswitch;
1086			}
1087		case S_DIGITS:
1088			if ((ishex && isxdigit(c)) || isdigit(c)) {
1089				gotmantdig = 1;
1090				commit = p;
1091				break;
1092			} else {
1093				state = S_DECPT;
1094				goto reswitch;
1095			}
1096		case S_DECPT:
1097			if (c == decpt[decptpos]) {
1098				if (decpt[++decptpos] == '\0') {
1099					/* We read the complete decpt seq. */
1100					state = S_FRAC;
1101					if (gotmantdig)
1102						commit = p;
1103				}
1104				break;
1105			} else if (!decptpos) {
1106				/* We didn't read any decpt characters. */
1107				state = S_FRAC;
1108				goto reswitch;
1109			} else {
1110				/*
1111				 * We read part of a multibyte decimal point,
1112				 * but the rest is invalid, so bail.
1113				 */
1114				goto parsedone;
1115			}
1116		case S_FRAC:
1117			if (((c == 'E' || c == 'e') && !ishex) ||
1118			    ((c == 'P' || c == 'p') && ishex)) {
1119				if (!gotmantdig)
1120					goto parsedone;
1121				else
1122					state = S_EXP;
1123			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1124				commit = p;
1125				gotmantdig = 1;
1126			} else
1127				goto parsedone;
1128			break;
1129		case S_EXP:
1130			state = S_EXPDIGITS;
1131			if (c == '-' || c == '+')
1132				break;
1133			else
1134				goto reswitch;
1135		case S_EXPDIGITS:
1136			if (isdigit(c))
1137				commit = p;
1138			else
1139				goto parsedone;
1140			break;
1141		default:
1142			abort();
1143		}
1144		*p++ = c;
1145		if (--fp->_r > 0)
1146			fp->_p++;
1147		else if (__srefill(fp))
1148			break;	/* EOF */
1149	}
1150
1151parsedone:
1152	while (commit < --p)
1153		__ungetc(*(u_char *)p, fp);
1154	*++commit = '\0';
1155	return (commit - buf);
1156}
1157#endif
1158