1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35 * From: static char sccsid[] = "@(#)strtol.c	8.1 (Berkeley) 6/4/93";
36 * From: static char sccsid[] = "@(#)strtoul.c	8.1 (Berkeley) 6/4/93";
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD$");
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/ctype.h>
45#include <sys/limits.h>
46#include <sys/stddef.h>
47
48/*
49 * Note that stdarg.h and the ANSI style va_start macro is used for both
50 * ANSI and traditional C compilers.
51 */
52#include <machine/stdarg.h>
53
54#define	BUF		32 	/* Maximum length of numeric string. */
55
56/*
57 * Flags used during conversion.
58 */
59#define	LONG		0x01	/* l: long or double */
60#define	SHORT		0x04	/* h: short */
61#define	SUPPRESS	0x08	/* suppress assignment */
62#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
63#define	NOSKIP		0x20	/* do not skip blanks */
64#define	QUAD		0x400
65#define	INTMAXT		0x800	/* j: intmax_t */
66#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
67#define	SIZET		0x2000	/* z: size_t */
68#define	SHORTSHORT	0x4000	/** hh: char */
69
70/*
71 * The following are used in numeric conversions only:
72 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
73 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
74 */
75#define	SIGNOK		0x40	/* +/- is (still) legal */
76#define	NDIGITS		0x80	/* no digits detected */
77
78#define	DPTOK		0x100	/* (float) decimal point is still legal */
79#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
80
81#define	PFXOK		0x100	/* 0x prefix is (still) legal */
82#define	NZDIGITS	0x200	/* no zero digits detected */
83
84/*
85 * Conversion types.
86 */
87#define	CT_CHAR		0	/* %c conversion */
88#define	CT_CCL		1	/* %[...] conversion */
89#define	CT_STRING	2	/* %s conversion */
90#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
91typedef u_quad_t (*ccfntype)(const char *, char **, int);
92
93static const u_char *__sccl(char *, const u_char *);
94
95int
96sscanf(const char *ibuf, const char *fmt, ...)
97{
98	va_list ap;
99	int ret;
100
101	va_start(ap, fmt);
102	ret = vsscanf(ibuf, fmt, ap);
103	va_end(ap);
104	return(ret);
105}
106
107int
108vsscanf(const char *inp, char const *fmt0, va_list ap)
109{
110	int inr;
111	const u_char *fmt = (const u_char *)fmt0;
112	int c;			/* character from format, or conversion */
113	size_t width;		/* field width, or 0 */
114	char *p;		/* points into all kinds of strings */
115	int n;			/* handy integer */
116	int flags;		/* flags as defined above */
117	char *p0;		/* saves original value of p when necessary */
118	int nassigned;		/* number of fields assigned */
119	int nconversions;	/* number of conversions */
120	int nread;		/* number of characters consumed from fp */
121	int base;		/* base argument to strtoq/strtouq */
122	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
123	char ccltab[256];	/* character class table for %[...] */
124	char buf[BUF];		/* buffer for numeric conversions */
125
126	/* `basefix' is used to avoid `if' tests in the integer scanner */
127	static short basefix[17] =
128		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
129
130	inr = strlen(inp);
131
132	nassigned = 0;
133	nconversions = 0;
134	nread = 0;
135	base = 0;		/* XXX just to keep gcc happy */
136	ccfn = NULL;		/* XXX just to keep gcc happy */
137	for (;;) {
138		c = *fmt++;
139		if (c == 0)
140			return (nassigned);
141		if (isspace(c)) {
142			while (inr > 0 && isspace(*inp))
143				nread++, inr--, inp++;
144			continue;
145		}
146		if (c != '%')
147			goto literal;
148		width = 0;
149		flags = 0;
150		/*
151		 * switch on the format.  continue if done;
152		 * break once format type is derived.
153		 */
154again:		c = *fmt++;
155		switch (c) {
156		case '%':
157literal:
158			if (inr <= 0)
159				goto input_failure;
160			if (*inp != c)
161				goto match_failure;
162			inr--, inp++;
163			nread++;
164			continue;
165
166		case '*':
167			flags |= SUPPRESS;
168			goto again;
169		case 'j':
170			flags |= INTMAXT;
171			goto again;
172		case 'l':
173			if (flags & LONG){
174				flags &= ~LONG;
175				flags |= QUAD;
176			} else {
177				flags |= LONG;
178			}
179			goto again;
180		case 'q':
181			flags |= QUAD;
182			goto again;
183		case 't':
184			flags |= PTRDIFFT;
185			goto again;
186		case 'z':
187			flags |= SIZET;
188			goto again;
189		case 'h':
190			if (flags & SHORT){
191				flags &= ~SHORT;
192				flags |= SHORTSHORT;
193			} else {
194				flags |= SHORT;
195			}
196			goto again;
197
198		case '0': case '1': case '2': case '3': case '4':
199		case '5': case '6': case '7': case '8': case '9':
200			width = width * 10 + c - '0';
201			goto again;
202
203		/*
204		 * Conversions.
205		 *
206		 */
207		case 'd':
208			c = CT_INT;
209			ccfn = (ccfntype)strtoq;
210			base = 10;
211			break;
212
213		case 'i':
214			c = CT_INT;
215			ccfn = (ccfntype)strtoq;
216			base = 0;
217			break;
218
219		case 'o':
220			c = CT_INT;
221			ccfn = strtouq;
222			base = 8;
223			break;
224
225		case 'u':
226			c = CT_INT;
227			ccfn = strtouq;
228			base = 10;
229			break;
230
231		case 'x':
232			flags |= PFXOK;	/* enable 0x prefixing */
233			c = CT_INT;
234			ccfn = strtouq;
235			base = 16;
236			break;
237
238		case 's':
239			c = CT_STRING;
240			break;
241
242		case '[':
243			fmt = __sccl(ccltab, fmt);
244			flags |= NOSKIP;
245			c = CT_CCL;
246			break;
247
248		case 'c':
249			flags |= NOSKIP;
250			c = CT_CHAR;
251			break;
252
253		case 'p':	/* pointer format is like hex */
254			flags |= POINTER | PFXOK;
255			c = CT_INT;
256			ccfn = strtouq;
257			base = 16;
258			break;
259
260		case 'n':
261			nconversions++;
262			if (flags & SUPPRESS)	/* ??? */
263				continue;
264			if (flags & SHORTSHORT)
265				*va_arg(ap, char *) = nread;
266			else if (flags & SHORT)
267				*va_arg(ap, short *) = nread;
268			else if (flags & LONG)
269				*va_arg(ap, long *) = nread;
270			else if (flags & QUAD)
271				*va_arg(ap, quad_t *) = nread;
272			else if (flags & INTMAXT)
273				*va_arg(ap, intmax_t *) = nread;
274			else if (flags & SIZET)
275				*va_arg(ap, size_t *) = nread;
276			else if (flags & PTRDIFFT)
277				*va_arg(ap, ptrdiff_t *) = nread;
278			else
279				*va_arg(ap, int *) = nread;
280			continue;
281		}
282
283		/*
284		 * We have a conversion that requires input.
285		 */
286		if (inr <= 0)
287			goto input_failure;
288
289		/*
290		 * Consume leading white space, except for formats
291		 * that suppress this.
292		 */
293		if ((flags & NOSKIP) == 0) {
294			while (isspace(*inp)) {
295				nread++;
296				if (--inr > 0)
297					inp++;
298				else
299					goto input_failure;
300			}
301			/*
302			 * Note that there is at least one character in
303			 * the buffer, so conversions that do not set NOSKIP
304			 * can no longer result in an input failure.
305			 */
306		}
307
308		/*
309		 * Do the conversion.
310		 */
311		switch (c) {
312		case CT_CHAR:
313			/* scan arbitrary characters (sets NOSKIP) */
314			if (width == 0)
315				width = 1;
316			if (flags & SUPPRESS) {
317				size_t sum = 0;
318				for (;;) {
319					if ((n = inr) < width) {
320						sum += n;
321						width -= n;
322						inp += n;
323						if (sum == 0)
324							goto input_failure;
325						break;
326					} else {
327						sum += width;
328						inr -= width;
329						inp += width;
330						break;
331					}
332				}
333				nread += sum;
334			} else {
335				bcopy(inp, va_arg(ap, char *), width);
336				inr -= width;
337				inp += width;
338				nread += width;
339				nassigned++;
340			}
341			nconversions++;
342			break;
343
344		case CT_CCL:
345			/* scan a (nonempty) character class (sets NOSKIP) */
346			if (width == 0)
347				width = (size_t)~0;	/* `infinity' */
348			/* take only those things in the class */
349			if (flags & SUPPRESS) {
350				n = 0;
351				while (ccltab[(unsigned char)*inp]) {
352					n++, inr--, inp++;
353					if (--width == 0)
354						break;
355					if (inr <= 0) {
356						if (n == 0)
357							goto input_failure;
358						break;
359					}
360				}
361				if (n == 0)
362					goto match_failure;
363			} else {
364				p0 = p = va_arg(ap, char *);
365				while (ccltab[(unsigned char)*inp]) {
366					inr--;
367					*p++ = *inp++;
368					if (--width == 0)
369						break;
370					if (inr <= 0) {
371						if (p == p0)
372							goto input_failure;
373						break;
374					}
375				}
376				n = p - p0;
377				if (n == 0)
378					goto match_failure;
379				*p = 0;
380				nassigned++;
381			}
382			nread += n;
383			nconversions++;
384			break;
385
386		case CT_STRING:
387			/* like CCL, but zero-length string OK, & no NOSKIP */
388			if (width == 0)
389				width = (size_t)~0;
390			if (flags & SUPPRESS) {
391				n = 0;
392				while (!isspace(*inp)) {
393					n++, inr--, inp++;
394					if (--width == 0)
395						break;
396					if (inr <= 0)
397						break;
398				}
399				nread += n;
400			} else {
401				p0 = p = va_arg(ap, char *);
402				while (!isspace(*inp)) {
403					inr--;
404					*p++ = *inp++;
405					if (--width == 0)
406						break;
407					if (inr <= 0)
408						break;
409				}
410				*p = 0;
411				nread += p - p0;
412				nassigned++;
413			}
414			nconversions++;
415			continue;
416
417		case CT_INT:
418			/* scan an integer as if by strtoq/strtouq */
419#ifdef hardway
420			if (width == 0 || width > sizeof(buf) - 1)
421				width = sizeof(buf) - 1;
422#else
423			/* size_t is unsigned, hence this optimisation */
424			if (--width > sizeof(buf) - 2)
425				width = sizeof(buf) - 2;
426			width++;
427#endif
428			flags |= SIGNOK | NDIGITS | NZDIGITS;
429			for (p = buf; width; width--) {
430				c = *inp;
431				/*
432				 * Switch on the character; `goto ok'
433				 * if we accept it as a part of number.
434				 */
435				switch (c) {
436				/*
437				 * The digit 0 is always legal, but is
438				 * special.  For %i conversions, if no
439				 * digits (zero or nonzero) have been
440				 * scanned (only signs), we will have
441				 * base==0.  In that case, we should set
442				 * it to 8 and enable 0x prefixing.
443				 * Also, if we have not scanned zero digits
444				 * before this, do not turn off prefixing
445				 * (someone else will turn it off if we
446				 * have scanned any nonzero digits).
447				 */
448				case '0':
449					if (base == 0) {
450						base = 8;
451						flags |= PFXOK;
452					}
453					if (flags & NZDIGITS)
454					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
455					else
456					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
457					goto ok;
458
459				/* 1 through 7 always legal */
460				case '1': case '2': case '3':
461				case '4': case '5': case '6': case '7':
462					base = basefix[base];
463					flags &= ~(SIGNOK | PFXOK | NDIGITS);
464					goto ok;
465
466				/* digits 8 and 9 ok iff decimal or hex */
467				case '8': case '9':
468					base = basefix[base];
469					if (base <= 8)
470						break;	/* not legal here */
471					flags &= ~(SIGNOK | PFXOK | NDIGITS);
472					goto ok;
473
474				/* letters ok iff hex */
475				case 'A': case 'B': case 'C':
476				case 'D': case 'E': case 'F':
477				case 'a': case 'b': case 'c':
478				case 'd': case 'e': case 'f':
479					/* no need to fix base here */
480					if (base <= 10)
481						break;	/* not legal here */
482					flags &= ~(SIGNOK | PFXOK | NDIGITS);
483					goto ok;
484
485				/* sign ok only as first character */
486				case '+': case '-':
487					if (flags & SIGNOK) {
488						flags &= ~SIGNOK;
489						goto ok;
490					}
491					break;
492
493				/* x ok iff flag still set & 2nd char */
494				case 'x': case 'X':
495					if (flags & PFXOK && p == buf + 1) {
496						base = 16;	/* if %i */
497						flags &= ~PFXOK;
498						goto ok;
499					}
500					break;
501				}
502
503				/*
504				 * If we got here, c is not a legal character
505				 * for a number.  Stop accumulating digits.
506				 */
507				break;
508		ok:
509				/*
510				 * c is legal: store it and look at the next.
511				 */
512				*p++ = c;
513				if (--inr > 0)
514					inp++;
515				else
516					break;		/* end of input */
517			}
518			/*
519			 * If we had only a sign, it is no good; push
520			 * back the sign.  If the number ends in `x',
521			 * it was [sign] '0' 'x', so push back the x
522			 * and treat it as [sign] '0'.
523			 */
524			if (flags & NDIGITS) {
525				if (p > buf) {
526					inp--;
527					inr++;
528				}
529				goto match_failure;
530			}
531			c = ((u_char *)p)[-1];
532			if (c == 'x' || c == 'X') {
533				--p;
534				inp--;
535				inr++;
536			}
537			if ((flags & SUPPRESS) == 0) {
538				u_quad_t res;
539
540				*p = 0;
541				res = (*ccfn)(buf, (char **)NULL, base);
542				if (flags & POINTER)
543					*va_arg(ap, void **) =
544						(void *)(uintptr_t)res;
545				else if (flags & SHORTSHORT)
546					*va_arg(ap, char *) = res;
547				else if (flags & SHORT)
548					*va_arg(ap, short *) = res;
549				else if (flags & LONG)
550					*va_arg(ap, long *) = res;
551				else if (flags & QUAD)
552					*va_arg(ap, quad_t *) = res;
553				else if (flags & INTMAXT)
554					*va_arg(ap, intmax_t *) = res;
555				else if (flags & PTRDIFFT)
556					*va_arg(ap, ptrdiff_t *) = res;
557				else if (flags & SIZET)
558					*va_arg(ap, size_t *) = res;
559				else
560					*va_arg(ap, int *) = res;
561				nassigned++;
562			}
563			nread += p - buf;
564			nconversions++;
565			break;
566		}
567	}
568input_failure:
569	return (nconversions != 0 ? nassigned : -1);
570match_failure:
571	return (nassigned);
572}
573
574/*
575 * Fill in the given table from the scanset at the given format
576 * (just after `[').  Return a pointer to the character past the
577 * closing `]'.  The table has a 1 wherever characters should be
578 * considered part of the scanset.
579 */
580static const u_char *
581__sccl(char *tab, const u_char *fmt)
582{
583	int c, n, v;
584
585	/* first `clear' the whole table */
586	c = *fmt++;		/* first char hat => negated scanset */
587	if (c == '^') {
588		v = 1;		/* default => accept */
589		c = *fmt++;	/* get new first char */
590	} else
591		v = 0;		/* default => reject */
592
593	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
594	for (n = 0; n < 256; n++)
595		     tab[n] = v;	/* memset(tab, v, 256) */
596
597	if (c == 0)
598		return (fmt - 1);/* format ended before closing ] */
599
600	/*
601	 * Now set the entries corresponding to the actual scanset
602	 * to the opposite of the above.
603	 *
604	 * The first character may be ']' (or '-') without being special;
605	 * the last character may be '-'.
606	 */
607	v = 1 - v;
608	for (;;) {
609		tab[c] = v;		/* take character c */
610doswitch:
611		n = *fmt++;		/* and examine the next */
612		switch (n) {
613		case 0:			/* format ended too soon */
614			return (fmt - 1);
615
616		case '-':
617			/*
618			 * A scanset of the form
619			 *	[01+-]
620			 * is defined as `the digit 0, the digit 1,
621			 * the character +, the character -', but
622			 * the effect of a scanset such as
623			 *	[a-zA-Z0-9]
624			 * is implementation defined.  The V7 Unix
625			 * scanf treats `a-z' as `the letters a through
626			 * z', but treats `a-a' as `the letter a, the
627			 * character -, and the letter a'.
628			 *
629			 * For compatibility, the `-' is not considered
630			 * to define a range if the character following
631			 * it is either a close bracket (required by ANSI)
632			 * or is not numerically greater than the character
633			 * we just stored in the table (c).
634			 */
635			n = *fmt;
636			if (n == ']' || n < c) {
637				c = '-';
638				break;	/* resume the for(;;) */
639			}
640			fmt++;
641			/* fill in the range */
642			do {
643			    tab[++c] = v;
644			} while (c < n);
645			c = n;
646			/*
647			 * Alas, the V7 Unix scanf also treats formats
648			 * such as [a-c-e] as `the letters a through e'.
649			 * This too is permitted by the standard....
650			 */
651			goto doswitch;
652			break;
653
654		case ']':		/* end of scanset */
655			return (fmt);
656
657		default:		/* just another character */
658			c = n;
659			break;
660		}
661	}
662	/* NOTREACHED */
663}
664