1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/ctype.h>
40#include <sys/limits.h>
41#include <sys/stddef.h>
42
43/*
44 * Note that stdarg.h and the ANSI style va_start macro is used for both
45 * ANSI and traditional C compilers.
46 */
47#include <machine/stdarg.h>
48
49#define	BUF		32 	/* Maximum length of numeric string. */
50
51/*
52 * Flags used during conversion.
53 */
54#define	LONG		0x01	/* l: long or double */
55#define	SHORT		0x04	/* h: short */
56#define	SUPPRESS	0x08	/* suppress assignment */
57#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
58#define	NOSKIP		0x20	/* do not skip blanks */
59#define	QUAD		0x400
60#define	INTMAXT		0x800	/* j: intmax_t */
61#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
62#define	SIZET		0x2000	/* z: size_t */
63#define	SHORTSHORT	0x4000	/** hh: char */
64
65/*
66 * The following are used in numeric conversions only:
67 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
68 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
69 */
70#define	SIGNOK		0x40	/* +/- is (still) legal */
71#define	NDIGITS		0x80	/* no digits detected */
72
73#define	DPTOK		0x100	/* (float) decimal point is still legal */
74#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
75
76#define	PFXOK		0x100	/* 0x prefix is (still) legal */
77#define	NZDIGITS	0x200	/* no zero digits detected */
78
79/*
80 * Conversion types.
81 */
82#define	CT_CHAR		0	/* %c conversion */
83#define	CT_CCL		1	/* %[...] conversion */
84#define	CT_STRING	2	/* %s conversion */
85#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
86typedef u_quad_t (*ccfntype)(const char *, char **, int);
87
88static const u_char *__sccl(char *, const u_char *);
89
90int
91sscanf(const char *ibuf, const char *fmt, ...)
92{
93	va_list ap;
94	int ret;
95
96	va_start(ap, fmt);
97	ret = vsscanf(ibuf, fmt, ap);
98	va_end(ap);
99	return(ret);
100}
101
102int
103vsscanf(const char *inp, char const *fmt0, va_list ap)
104{
105	int inr;
106	const u_char *fmt = (const u_char *)fmt0;
107	int c;			/* character from format, or conversion */
108	size_t width;		/* field width, or 0 */
109	char *p;		/* points into all kinds of strings */
110	int n;			/* handy integer */
111	int flags;		/* flags as defined above */
112	char *p0;		/* saves original value of p when necessary */
113	int nassigned;		/* number of fields assigned */
114	int nconversions;	/* number of conversions */
115	int nread;		/* number of characters consumed from fp */
116	int base;		/* base argument to strtoq/strtouq */
117	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
118	char ccltab[256];	/* character class table for %[...] */
119	char buf[BUF];		/* buffer for numeric conversions */
120
121	/* `basefix' is used to avoid `if' tests in the integer scanner */
122	static short basefix[17] =
123		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
124
125	inr = strlen(inp);
126
127	nassigned = 0;
128	nconversions = 0;
129	nread = 0;
130	base = 0;		/* XXX just to keep gcc happy */
131	ccfn = NULL;		/* XXX just to keep gcc happy */
132	for (;;) {
133		c = *fmt++;
134		if (c == 0)
135			return (nassigned);
136		if (isspace(c)) {
137			while (inr > 0 && isspace(*inp))
138				nread++, inr--, inp++;
139			continue;
140		}
141		if (c != '%')
142			goto literal;
143		width = 0;
144		flags = 0;
145		/*
146		 * switch on the format.  continue if done;
147		 * break once format type is derived.
148		 */
149again:		c = *fmt++;
150		switch (c) {
151		case '%':
152literal:
153			if (inr <= 0)
154				goto input_failure;
155			if (*inp != c)
156				goto match_failure;
157			inr--, inp++;
158			nread++;
159			continue;
160
161		case '*':
162			flags |= SUPPRESS;
163			goto again;
164		case 'j':
165			flags |= INTMAXT;
166			goto again;
167		case 'l':
168			if (flags & LONG){
169				flags &= ~LONG;
170				flags |= QUAD;
171			} else {
172				flags |= LONG;
173			}
174			goto again;
175		case 'q':
176			flags |= QUAD;
177			goto again;
178		case 't':
179			flags |= PTRDIFFT;
180			goto again;
181		case 'z':
182			flags |= SIZET;
183			goto again;
184		case 'h':
185			if (flags & SHORT){
186				flags &= ~SHORT;
187				flags |= SHORTSHORT;
188			} else {
189				flags |= SHORT;
190			}
191			goto again;
192
193		case '0': case '1': case '2': case '3': case '4':
194		case '5': case '6': case '7': case '8': case '9':
195			width = width * 10 + c - '0';
196			goto again;
197
198		/*
199		 * Conversions.
200		 *
201		 */
202		case 'd':
203			c = CT_INT;
204			ccfn = (ccfntype)strtoq;
205			base = 10;
206			break;
207
208		case 'i':
209			c = CT_INT;
210			ccfn = (ccfntype)strtoq;
211			base = 0;
212			break;
213
214		case 'o':
215			c = CT_INT;
216			ccfn = strtouq;
217			base = 8;
218			break;
219
220		case 'u':
221			c = CT_INT;
222			ccfn = strtouq;
223			base = 10;
224			break;
225
226		case 'x':
227			flags |= PFXOK;	/* enable 0x prefixing */
228			c = CT_INT;
229			ccfn = strtouq;
230			base = 16;
231			break;
232
233		case 's':
234			c = CT_STRING;
235			break;
236
237		case '[':
238			fmt = __sccl(ccltab, fmt);
239			flags |= NOSKIP;
240			c = CT_CCL;
241			break;
242
243		case 'c':
244			flags |= NOSKIP;
245			c = CT_CHAR;
246			break;
247
248		case 'p':	/* pointer format is like hex */
249			flags |= POINTER | PFXOK;
250			c = CT_INT;
251			ccfn = strtouq;
252			base = 16;
253			break;
254
255		case 'n':
256			nconversions++;
257			if (flags & SUPPRESS)	/* ??? */
258				continue;
259			if (flags & SHORTSHORT)
260				*va_arg(ap, char *) = nread;
261			else if (flags & SHORT)
262				*va_arg(ap, short *) = nread;
263			else if (flags & LONG)
264				*va_arg(ap, long *) = nread;
265			else if (flags & QUAD)
266				*va_arg(ap, quad_t *) = nread;
267			else if (flags & INTMAXT)
268				*va_arg(ap, intmax_t *) = nread;
269			else if (flags & SIZET)
270				*va_arg(ap, size_t *) = nread;
271			else if (flags & PTRDIFFT)
272				*va_arg(ap, ptrdiff_t *) = nread;
273			else
274				*va_arg(ap, int *) = nread;
275			continue;
276		}
277
278		/*
279		 * We have a conversion that requires input.
280		 */
281		if (inr <= 0)
282			goto input_failure;
283
284		/*
285		 * Consume leading white space, except for formats
286		 * that suppress this.
287		 */
288		if ((flags & NOSKIP) == 0) {
289			while (isspace(*inp)) {
290				nread++;
291				if (--inr > 0)
292					inp++;
293				else
294					goto input_failure;
295			}
296			/*
297			 * Note that there is at least one character in
298			 * the buffer, so conversions that do not set NOSKIP
299			 * can no longer result in an input failure.
300			 */
301		}
302
303		/*
304		 * Do the conversion.
305		 */
306		switch (c) {
307		case CT_CHAR:
308			/* scan arbitrary characters (sets NOSKIP) */
309			if (width == 0)
310				width = 1;
311			if (flags & SUPPRESS) {
312				size_t sum = 0;
313				for (;;) {
314					if ((n = inr) < width) {
315						sum += n;
316						width -= n;
317						inp += n;
318						if (sum == 0)
319							goto input_failure;
320						break;
321					} else {
322						sum += width;
323						inr -= width;
324						inp += width;
325						break;
326					}
327				}
328				nread += sum;
329			} else {
330				bcopy(inp, va_arg(ap, char *), width);
331				inr -= width;
332				inp += width;
333				nread += width;
334				nassigned++;
335			}
336			nconversions++;
337			break;
338
339		case CT_CCL:
340			/* scan a (nonempty) character class (sets NOSKIP) */
341			if (width == 0)
342				width = (size_t)~0;	/* `infinity' */
343			/* take only those things in the class */
344			if (flags & SUPPRESS) {
345				n = 0;
346				while (ccltab[(unsigned char)*inp]) {
347					n++, inr--, inp++;
348					if (--width == 0)
349						break;
350					if (inr <= 0) {
351						if (n == 0)
352							goto input_failure;
353						break;
354					}
355				}
356				if (n == 0)
357					goto match_failure;
358			} else {
359				p0 = p = va_arg(ap, char *);
360				while (ccltab[(unsigned char)*inp]) {
361					inr--;
362					*p++ = *inp++;
363					if (--width == 0)
364						break;
365					if (inr <= 0) {
366						if (p == p0)
367							goto input_failure;
368						break;
369					}
370				}
371				n = p - p0;
372				if (n == 0)
373					goto match_failure;
374				*p = 0;
375				nassigned++;
376			}
377			nread += n;
378			nconversions++;
379			break;
380
381		case CT_STRING:
382			/* like CCL, but zero-length string OK, & no NOSKIP */
383			if (width == 0)
384				width = (size_t)~0;
385			if (flags & SUPPRESS) {
386				n = 0;
387				while (!isspace(*inp)) {
388					n++, inr--, inp++;
389					if (--width == 0)
390						break;
391					if (inr <= 0)
392						break;
393				}
394				nread += n;
395			} else {
396				p0 = p = va_arg(ap, char *);
397				while (!isspace(*inp)) {
398					inr--;
399					*p++ = *inp++;
400					if (--width == 0)
401						break;
402					if (inr <= 0)
403						break;
404				}
405				*p = 0;
406				nread += p - p0;
407				nassigned++;
408			}
409			nconversions++;
410			continue;
411
412		case CT_INT:
413			/* scan an integer as if by strtoq/strtouq */
414#ifdef hardway
415			if (width == 0 || width > sizeof(buf) - 1)
416				width = sizeof(buf) - 1;
417#else
418			/* size_t is unsigned, hence this optimisation */
419			if (--width > sizeof(buf) - 2)
420				width = sizeof(buf) - 2;
421			width++;
422#endif
423			flags |= SIGNOK | NDIGITS | NZDIGITS;
424			for (p = buf; width; width--) {
425				c = *inp;
426				/*
427				 * Switch on the character; `goto ok'
428				 * if we accept it as a part of number.
429				 */
430				switch (c) {
431				/*
432				 * The digit 0 is always legal, but is
433				 * special.  For %i conversions, if no
434				 * digits (zero or nonzero) have been
435				 * scanned (only signs), we will have
436				 * base==0.  In that case, we should set
437				 * it to 8 and enable 0x prefixing.
438				 * Also, if we have not scanned zero digits
439				 * before this, do not turn off prefixing
440				 * (someone else will turn it off if we
441				 * have scanned any nonzero digits).
442				 */
443				case '0':
444					if (base == 0) {
445						base = 8;
446						flags |= PFXOK;
447					}
448					if (flags & NZDIGITS)
449					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
450					else
451					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
452					goto ok;
453
454				/* 1 through 7 always legal */
455				case '1': case '2': case '3':
456				case '4': case '5': case '6': case '7':
457					base = basefix[base];
458					flags &= ~(SIGNOK | PFXOK | NDIGITS);
459					goto ok;
460
461				/* digits 8 and 9 ok iff decimal or hex */
462				case '8': case '9':
463					base = basefix[base];
464					if (base <= 8)
465						break;	/* not legal here */
466					flags &= ~(SIGNOK | PFXOK | NDIGITS);
467					goto ok;
468
469				/* letters ok iff hex */
470				case 'A': case 'B': case 'C':
471				case 'D': case 'E': case 'F':
472				case 'a': case 'b': case 'c':
473				case 'd': case 'e': case 'f':
474					/* no need to fix base here */
475					if (base <= 10)
476						break;	/* not legal here */
477					flags &= ~(SIGNOK | PFXOK | NDIGITS);
478					goto ok;
479
480				/* sign ok only as first character */
481				case '+': case '-':
482					if (flags & SIGNOK) {
483						flags &= ~SIGNOK;
484						goto ok;
485					}
486					break;
487
488				/* x ok iff flag still set & 2nd char */
489				case 'x': case 'X':
490					if (flags & PFXOK && p == buf + 1) {
491						base = 16;	/* if %i */
492						flags &= ~PFXOK;
493						goto ok;
494					}
495					break;
496				}
497
498				/*
499				 * If we got here, c is not a legal character
500				 * for a number.  Stop accumulating digits.
501				 */
502				break;
503		ok:
504				/*
505				 * c is legal: store it and look at the next.
506				 */
507				*p++ = c;
508				if (--inr > 0)
509					inp++;
510				else
511					break;		/* end of input */
512			}
513			/*
514			 * If we had only a sign, it is no good; push
515			 * back the sign.  If the number ends in `x',
516			 * it was [sign] '0' 'x', so push back the x
517			 * and treat it as [sign] '0'.
518			 */
519			if (flags & NDIGITS) {
520				if (p > buf) {
521					inp--;
522					inr++;
523				}
524				goto match_failure;
525			}
526			c = ((u_char *)p)[-1];
527			if (c == 'x' || c == 'X') {
528				--p;
529				inp--;
530				inr++;
531			}
532			if ((flags & SUPPRESS) == 0) {
533				u_quad_t res;
534
535				*p = 0;
536				res = (*ccfn)(buf, (char **)NULL, base);
537				if (flags & POINTER)
538					*va_arg(ap, void **) =
539						(void *)(uintptr_t)res;
540				else if (flags & SHORTSHORT)
541					*va_arg(ap, char *) = res;
542				else if (flags & SHORT)
543					*va_arg(ap, short *) = res;
544				else if (flags & LONG)
545					*va_arg(ap, long *) = res;
546				else if (flags & QUAD)
547					*va_arg(ap, quad_t *) = res;
548				else if (flags & INTMAXT)
549					*va_arg(ap, intmax_t *) = res;
550				else if (flags & PTRDIFFT)
551					*va_arg(ap, ptrdiff_t *) = res;
552				else if (flags & SIZET)
553					*va_arg(ap, size_t *) = res;
554				else
555					*va_arg(ap, int *) = res;
556				nassigned++;
557			}
558			nread += p - buf;
559			nconversions++;
560			break;
561		}
562	}
563input_failure:
564	return (nconversions != 0 ? nassigned : -1);
565match_failure:
566	return (nassigned);
567}
568
569/*
570 * Fill in the given table from the scanset at the given format
571 * (just after `[').  Return a pointer to the character past the
572 * closing `]'.  The table has a 1 wherever characters should be
573 * considered part of the scanset.
574 */
575static const u_char *
576__sccl(char *tab, const u_char *fmt)
577{
578	int c, n, v;
579
580	/* first `clear' the whole table */
581	c = *fmt++;		/* first char hat => negated scanset */
582	if (c == '^') {
583		v = 1;		/* default => accept */
584		c = *fmt++;	/* get new first char */
585	} else
586		v = 0;		/* default => reject */
587
588	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
589	for (n = 0; n < 256; n++)
590		     tab[n] = v;	/* memset(tab, v, 256) */
591
592	if (c == 0)
593		return (fmt - 1);/* format ended before closing ] */
594
595	/*
596	 * Now set the entries corresponding to the actual scanset
597	 * to the opposite of the above.
598	 *
599	 * The first character may be ']' (or '-') without being special;
600	 * the last character may be '-'.
601	 */
602	v = 1 - v;
603	for (;;) {
604		tab[c] = v;		/* take character c */
605doswitch:
606		n = *fmt++;		/* and examine the next */
607		switch (n) {
608		case 0:			/* format ended too soon */
609			return (fmt - 1);
610
611		case '-':
612			/*
613			 * A scanset of the form
614			 *	[01+-]
615			 * is defined as `the digit 0, the digit 1,
616			 * the character +, the character -', but
617			 * the effect of a scanset such as
618			 *	[a-zA-Z0-9]
619			 * is implementation defined.  The V7 Unix
620			 * scanf treats `a-z' as `the letters a through
621			 * z', but treats `a-a' as `the letter a, the
622			 * character -, and the letter a'.
623			 *
624			 * For compatibility, the `-' is not considered
625			 * to define a range if the character following
626			 * it is either a close bracket (required by ANSI)
627			 * or is not numerically greater than the character
628			 * we just stored in the table (c).
629			 */
630			n = *fmt;
631			if (n == ']' || n < c) {
632				c = '-';
633				break;	/* resume the for(;;) */
634			}
635			fmt++;
636			/* fill in the range */
637			do {
638			    tab[++c] = v;
639			} while (c < n);
640			c = n;
641			/*
642			 * Alas, the V7 Unix scanf also treats formats
643			 * such as [a-c-e] as `the letters a through e'.
644			 * This too is permitted by the standard....
645			 */
646			goto doswitch;
647			break;
648
649		case ']':		/* end of scanset */
650			return (fmt);
651
652		default:		/* just another character */
653			c = n;
654			break;
655		}
656	}
657	/* NOTREACHED */
658}
659