subr_scanf.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
35 * From: static char sccsid[] = "@(#)strtol.c	8.1 (Berkeley) 6/4/93";
36 * From: static char sccsid[] = "@(#)strtoul.c	8.1 (Berkeley) 6/4/93";
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: stable/11/sys/kern/subr_scanf.c 330897 2018-03-14 03:19:51Z eadler $");
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/ctype.h>
45#include <sys/limits.h>
46
47/*
48 * Note that stdarg.h and the ANSI style va_start macro is used for both
49 * ANSI and traditional C compilers.
50 */
51#include <machine/stdarg.h>
52
53#define	BUF		32 	/* Maximum length of numeric string. */
54
55/*
56 * Flags used during conversion.
57 */
58#define	LONG		0x01	/* l: long or double */
59#define	SHORT		0x04	/* h: short */
60#define	SUPPRESS	0x08	/* suppress assignment */
61#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
62#define	NOSKIP		0x20	/* do not skip blanks */
63#define	QUAD		0x400
64#define	SHORTSHORT	0x4000	/** hh: char */
65
66/*
67 * The following are used in numeric conversions only:
68 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
69 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
70 */
71#define	SIGNOK		0x40	/* +/- is (still) legal */
72#define	NDIGITS		0x80	/* no digits detected */
73
74#define	DPTOK		0x100	/* (float) decimal point is still legal */
75#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
76
77#define	PFXOK		0x100	/* 0x prefix is (still) legal */
78#define	NZDIGITS	0x200	/* no zero digits detected */
79
80/*
81 * Conversion types.
82 */
83#define	CT_CHAR		0	/* %c conversion */
84#define	CT_CCL		1	/* %[...] conversion */
85#define	CT_STRING	2	/* %s conversion */
86#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
87typedef u_quad_t (*ccfntype)(const char *, char **, int);
88
89static const u_char *__sccl(char *, const u_char *);
90
91int
92sscanf(const char *ibuf, const char *fmt, ...)
93{
94	va_list ap;
95	int ret;
96
97	va_start(ap, fmt);
98	ret = vsscanf(ibuf, fmt, ap);
99	va_end(ap);
100	return(ret);
101}
102
103int
104vsscanf(const char *inp, char const *fmt0, va_list ap)
105{
106	int inr;
107	const u_char *fmt = (const u_char *)fmt0;
108	int c;			/* character from format, or conversion */
109	size_t width;		/* field width, or 0 */
110	char *p;		/* points into all kinds of strings */
111	int n;			/* handy integer */
112	int flags;		/* flags as defined above */
113	char *p0;		/* saves original value of p when necessary */
114	int nassigned;		/* number of fields assigned */
115	int nconversions;	/* number of conversions */
116	int nread;		/* number of characters consumed from fp */
117	int base;		/* base argument to strtoq/strtouq */
118	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
119	char ccltab[256];	/* character class table for %[...] */
120	char buf[BUF];		/* buffer for numeric conversions */
121
122	/* `basefix' is used to avoid `if' tests in the integer scanner */
123	static short basefix[17] =
124		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
125
126	inr = strlen(inp);
127
128	nassigned = 0;
129	nconversions = 0;
130	nread = 0;
131	base = 0;		/* XXX just to keep gcc happy */
132	ccfn = NULL;		/* XXX just to keep gcc happy */
133	for (;;) {
134		c = *fmt++;
135		if (c == 0)
136			return (nassigned);
137		if (isspace(c)) {
138			while (inr > 0 && isspace(*inp))
139				nread++, inr--, inp++;
140			continue;
141		}
142		if (c != '%')
143			goto literal;
144		width = 0;
145		flags = 0;
146		/*
147		 * switch on the format.  continue if done;
148		 * break once format type is derived.
149		 */
150again:		c = *fmt++;
151		switch (c) {
152		case '%':
153literal:
154			if (inr <= 0)
155				goto input_failure;
156			if (*inp != c)
157				goto match_failure;
158			inr--, inp++;
159			nread++;
160			continue;
161
162		case '*':
163			flags |= SUPPRESS;
164			goto again;
165		case 'l':
166			if (flags & LONG){
167				flags &= ~LONG;
168				flags |= QUAD;
169			} else {
170				flags |= LONG;
171			}
172			goto again;
173		case 'q':
174			flags |= QUAD;
175			goto again;
176		case 'h':
177			if (flags & SHORT){
178				flags &= ~SHORT;
179				flags |= SHORTSHORT;
180			} else {
181				flags |= SHORT;
182			}
183			goto again;
184
185		case '0': case '1': case '2': case '3': case '4':
186		case '5': case '6': case '7': case '8': case '9':
187			width = width * 10 + c - '0';
188			goto again;
189
190		/*
191		 * Conversions.
192		 *
193		 */
194		case 'd':
195			c = CT_INT;
196			ccfn = (ccfntype)strtoq;
197			base = 10;
198			break;
199
200		case 'i':
201			c = CT_INT;
202			ccfn = (ccfntype)strtoq;
203			base = 0;
204			break;
205
206		case 'o':
207			c = CT_INT;
208			ccfn = strtouq;
209			base = 8;
210			break;
211
212		case 'u':
213			c = CT_INT;
214			ccfn = strtouq;
215			base = 10;
216			break;
217
218		case 'x':
219			flags |= PFXOK;	/* enable 0x prefixing */
220			c = CT_INT;
221			ccfn = strtouq;
222			base = 16;
223			break;
224
225		case 's':
226			c = CT_STRING;
227			break;
228
229		case '[':
230			fmt = __sccl(ccltab, fmt);
231			flags |= NOSKIP;
232			c = CT_CCL;
233			break;
234
235		case 'c':
236			flags |= NOSKIP;
237			c = CT_CHAR;
238			break;
239
240		case 'p':	/* pointer format is like hex */
241			flags |= POINTER | PFXOK;
242			c = CT_INT;
243			ccfn = strtouq;
244			base = 16;
245			break;
246
247		case 'n':
248			nconversions++;
249			if (flags & SUPPRESS)	/* ??? */
250				continue;
251			if (flags & SHORTSHORT)
252				*va_arg(ap, char *) = nread;
253			else if (flags & SHORT)
254				*va_arg(ap, short *) = nread;
255			else if (flags & LONG)
256				*va_arg(ap, long *) = nread;
257			else if (flags & QUAD)
258				*va_arg(ap, quad_t *) = nread;
259			else
260				*va_arg(ap, int *) = nread;
261			continue;
262		}
263
264		/*
265		 * We have a conversion that requires input.
266		 */
267		if (inr <= 0)
268			goto input_failure;
269
270		/*
271		 * Consume leading white space, except for formats
272		 * that suppress this.
273		 */
274		if ((flags & NOSKIP) == 0) {
275			while (isspace(*inp)) {
276				nread++;
277				if (--inr > 0)
278					inp++;
279				else
280					goto input_failure;
281			}
282			/*
283			 * Note that there is at least one character in
284			 * the buffer, so conversions that do not set NOSKIP
285			 * can no longer result in an input failure.
286			 */
287		}
288
289		/*
290		 * Do the conversion.
291		 */
292		switch (c) {
293
294		case CT_CHAR:
295			/* scan arbitrary characters (sets NOSKIP) */
296			if (width == 0)
297				width = 1;
298			if (flags & SUPPRESS) {
299				size_t sum = 0;
300				for (;;) {
301					if ((n = inr) < width) {
302						sum += n;
303						width -= n;
304						inp += n;
305						if (sum == 0)
306							goto input_failure;
307						break;
308					} else {
309						sum += width;
310						inr -= width;
311						inp += width;
312						break;
313					}
314				}
315				nread += sum;
316			} else {
317				bcopy(inp, va_arg(ap, char *), width);
318				inr -= width;
319				inp += width;
320				nread += width;
321				nassigned++;
322			}
323			nconversions++;
324			break;
325
326		case CT_CCL:
327			/* scan a (nonempty) character class (sets NOSKIP) */
328			if (width == 0)
329				width = (size_t)~0;	/* `infinity' */
330			/* take only those things in the class */
331			if (flags & SUPPRESS) {
332				n = 0;
333				while (ccltab[(unsigned char)*inp]) {
334					n++, inr--, inp++;
335					if (--width == 0)
336						break;
337					if (inr <= 0) {
338						if (n == 0)
339							goto input_failure;
340						break;
341					}
342				}
343				if (n == 0)
344					goto match_failure;
345			} else {
346				p0 = p = va_arg(ap, char *);
347				while (ccltab[(unsigned char)*inp]) {
348					inr--;
349					*p++ = *inp++;
350					if (--width == 0)
351						break;
352					if (inr <= 0) {
353						if (p == p0)
354							goto input_failure;
355						break;
356					}
357				}
358				n = p - p0;
359				if (n == 0)
360					goto match_failure;
361				*p = 0;
362				nassigned++;
363			}
364			nread += n;
365			nconversions++;
366			break;
367
368		case CT_STRING:
369			/* like CCL, but zero-length string OK, & no NOSKIP */
370			if (width == 0)
371				width = (size_t)~0;
372			if (flags & SUPPRESS) {
373				n = 0;
374				while (!isspace(*inp)) {
375					n++, inr--, inp++;
376					if (--width == 0)
377						break;
378					if (inr <= 0)
379						break;
380				}
381				nread += n;
382			} else {
383				p0 = p = va_arg(ap, char *);
384				while (!isspace(*inp)) {
385					inr--;
386					*p++ = *inp++;
387					if (--width == 0)
388						break;
389					if (inr <= 0)
390						break;
391				}
392				*p = 0;
393				nread += p - p0;
394				nassigned++;
395			}
396			nconversions++;
397			continue;
398
399		case CT_INT:
400			/* scan an integer as if by strtoq/strtouq */
401#ifdef hardway
402			if (width == 0 || width > sizeof(buf) - 1)
403				width = sizeof(buf) - 1;
404#else
405			/* size_t is unsigned, hence this optimisation */
406			if (--width > sizeof(buf) - 2)
407				width = sizeof(buf) - 2;
408			width++;
409#endif
410			flags |= SIGNOK | NDIGITS | NZDIGITS;
411			for (p = buf; width; width--) {
412				c = *inp;
413				/*
414				 * Switch on the character; `goto ok'
415				 * if we accept it as a part of number.
416				 */
417				switch (c) {
418
419				/*
420				 * The digit 0 is always legal, but is
421				 * special.  For %i conversions, if no
422				 * digits (zero or nonzero) have been
423				 * scanned (only signs), we will have
424				 * base==0.  In that case, we should set
425				 * it to 8 and enable 0x prefixing.
426				 * Also, if we have not scanned zero digits
427				 * before this, do not turn off prefixing
428				 * (someone else will turn it off if we
429				 * have scanned any nonzero digits).
430				 */
431				case '0':
432					if (base == 0) {
433						base = 8;
434						flags |= PFXOK;
435					}
436					if (flags & NZDIGITS)
437					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
438					else
439					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
440					goto ok;
441
442				/* 1 through 7 always legal */
443				case '1': case '2': case '3':
444				case '4': case '5': case '6': case '7':
445					base = basefix[base];
446					flags &= ~(SIGNOK | PFXOK | NDIGITS);
447					goto ok;
448
449				/* digits 8 and 9 ok iff decimal or hex */
450				case '8': case '9':
451					base = basefix[base];
452					if (base <= 8)
453						break;	/* not legal here */
454					flags &= ~(SIGNOK | PFXOK | NDIGITS);
455					goto ok;
456
457				/* letters ok iff hex */
458				case 'A': case 'B': case 'C':
459				case 'D': case 'E': case 'F':
460				case 'a': case 'b': case 'c':
461				case 'd': case 'e': case 'f':
462					/* no need to fix base here */
463					if (base <= 10)
464						break;	/* not legal here */
465					flags &= ~(SIGNOK | PFXOK | NDIGITS);
466					goto ok;
467
468				/* sign ok only as first character */
469				case '+': case '-':
470					if (flags & SIGNOK) {
471						flags &= ~SIGNOK;
472						goto ok;
473					}
474					break;
475
476				/* x ok iff flag still set & 2nd char */
477				case 'x': case 'X':
478					if (flags & PFXOK && p == buf + 1) {
479						base = 16;	/* if %i */
480						flags &= ~PFXOK;
481						goto ok;
482					}
483					break;
484				}
485
486				/*
487				 * If we got here, c is not a legal character
488				 * for a number.  Stop accumulating digits.
489				 */
490				break;
491		ok:
492				/*
493				 * c is legal: store it and look at the next.
494				 */
495				*p++ = c;
496				if (--inr > 0)
497					inp++;
498				else
499					break;		/* end of input */
500			}
501			/*
502			 * If we had only a sign, it is no good; push
503			 * back the sign.  If the number ends in `x',
504			 * it was [sign] '0' 'x', so push back the x
505			 * and treat it as [sign] '0'.
506			 */
507			if (flags & NDIGITS) {
508				if (p > buf) {
509					inp--;
510					inr++;
511				}
512				goto match_failure;
513			}
514			c = ((u_char *)p)[-1];
515			if (c == 'x' || c == 'X') {
516				--p;
517				inp--;
518				inr++;
519			}
520			if ((flags & SUPPRESS) == 0) {
521				u_quad_t res;
522
523				*p = 0;
524				res = (*ccfn)(buf, (char **)NULL, base);
525				if (flags & POINTER)
526					*va_arg(ap, void **) =
527						(void *)(uintptr_t)res;
528				else if (flags & SHORTSHORT)
529					*va_arg(ap, char *) = res;
530				else if (flags & SHORT)
531					*va_arg(ap, short *) = res;
532				else if (flags & LONG)
533					*va_arg(ap, long *) = res;
534				else if (flags & QUAD)
535					*va_arg(ap, quad_t *) = res;
536				else
537					*va_arg(ap, int *) = res;
538				nassigned++;
539			}
540			nread += p - buf;
541			nconversions++;
542			break;
543
544		}
545	}
546input_failure:
547	return (nconversions != 0 ? nassigned : -1);
548match_failure:
549	return (nassigned);
550}
551
552/*
553 * Fill in the given table from the scanset at the given format
554 * (just after `[').  Return a pointer to the character past the
555 * closing `]'.  The table has a 1 wherever characters should be
556 * considered part of the scanset.
557 */
558static const u_char *
559__sccl(char *tab, const u_char *fmt)
560{
561	int c, n, v;
562
563	/* first `clear' the whole table */
564	c = *fmt++;		/* first char hat => negated scanset */
565	if (c == '^') {
566		v = 1;		/* default => accept */
567		c = *fmt++;	/* get new first char */
568	} else
569		v = 0;		/* default => reject */
570
571	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
572	for (n = 0; n < 256; n++)
573		     tab[n] = v;	/* memset(tab, v, 256) */
574
575	if (c == 0)
576		return (fmt - 1);/* format ended before closing ] */
577
578	/*
579	 * Now set the entries corresponding to the actual scanset
580	 * to the opposite of the above.
581	 *
582	 * The first character may be ']' (or '-') without being special;
583	 * the last character may be '-'.
584	 */
585	v = 1 - v;
586	for (;;) {
587		tab[c] = v;		/* take character c */
588doswitch:
589		n = *fmt++;		/* and examine the next */
590		switch (n) {
591
592		case 0:			/* format ended too soon */
593			return (fmt - 1);
594
595		case '-':
596			/*
597			 * A scanset of the form
598			 *	[01+-]
599			 * is defined as `the digit 0, the digit 1,
600			 * the character +, the character -', but
601			 * the effect of a scanset such as
602			 *	[a-zA-Z0-9]
603			 * is implementation defined.  The V7 Unix
604			 * scanf treats `a-z' as `the letters a through
605			 * z', but treats `a-a' as `the letter a, the
606			 * character -, and the letter a'.
607			 *
608			 * For compatibility, the `-' is not considered
609			 * to define a range if the character following
610			 * it is either a close bracket (required by ANSI)
611			 * or is not numerically greater than the character
612			 * we just stored in the table (c).
613			 */
614			n = *fmt;
615			if (n == ']' || n < c) {
616				c = '-';
617				break;	/* resume the for(;;) */
618			}
619			fmt++;
620			/* fill in the range */
621			do {
622			    tab[++c] = v;
623			} while (c < n);
624			c = n;
625			/*
626			 * Alas, the V7 Unix scanf also treats formats
627			 * such as [a-c-e] as `the letters a through e'.
628			 * This too is permitted by the standard....
629			 */
630			goto doswitch;
631			break;
632
633		case ']':		/* end of scanset */
634			return (fmt);
635
636		default:		/* just another character */
637			c = n;
638			break;
639		}
640	}
641	/* NOTREACHED */
642}
643
644