1/*	$NetBSD: parse.c,v 1.26 2009/01/18 21:34:32 apb Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if HAVE_NBTOOL_CONFIG_H
33#include "nbtool_config.h"
34#endif
35
36#include <sys/cdefs.h>
37#if !defined(lint)
38#if 0
39static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
40#else
41__RCSID("$NetBSD: parse.c,v 1.26 2009/01/18 21:34:32 apb Exp $");
42#endif
43#endif /* not lint */
44
45#include <sys/types.h>
46#include <sys/file.h>
47
48#include <ctype.h>
49#include <err.h>
50#include <errno.h>
51#include <fcntl.h>
52#include <inttypes.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <util.h>
57
58#include "hexdump.h"
59
60__dead static void	 badcnt(char *);
61__dead static void	 badconv(char *);
62__dead static void	 badfmt(const char *);
63__dead static void	 badsfmt(void);
64
65FU *endfu;					/* format at end-of-data */
66
67void
68addfile(char *name)
69{
70	char *p;
71	FILE *fp;
72	int ch;
73	char buf[2048 + 1];
74
75	if ((fp = fopen(name, "r")) == NULL)
76		err(1, "fopen %s", name);
77	while (fgets(buf, sizeof(buf), fp)) {
78		if (!(p = strchr(buf, '\n'))) {
79			warnx("line too long.");
80			while ((ch = getchar()) != '\n' && ch != EOF);
81			continue;
82		}
83		*p = '\0';
84		for (p = buf; *p && isspace((unsigned char)*p); ++p);
85		if (!*p || *p == '#')
86			continue;
87		add(p);
88	}
89	(void)fclose(fp);
90}
91
92void
93add(const char *fmt)
94{
95	const char *p;
96	static FS **nextfs;
97	FS *tfs;
98	FU *tfu, **nextfu;
99	const char *savep;
100
101	/* start new linked list of format units */
102	tfs = ecalloc(1, sizeof(FS));
103	if (!fshead)
104		fshead = tfs;
105	else
106		*nextfs = tfs;
107	nextfs = &tfs->nextfs;
108	nextfu = &tfs->nextfu;
109
110	/* take the format string and break it up into format units */
111	for (p = fmt;;) {
112		/* skip leading white space */
113		for (; isspace((unsigned char)*p); ++p);
114		if (!*p)
115			break;
116
117		/* allocate a new format unit and link it in */
118		tfu = ecalloc(1, sizeof(FU));
119		*nextfu = tfu;
120		nextfu = &tfu->nextfu;
121		tfu->reps = 1;
122
123		/* if leading digit, repetition count */
124		if (isdigit((unsigned char)*p)) {
125			for (savep = p; isdigit((unsigned char)*p); ++p);
126			if (!isspace((unsigned char)*p) && *p != '/')
127				badfmt(fmt);
128			/* may overwrite either white space or slash */
129			tfu->reps = atoi(savep);
130			tfu->flags = F_SETREP;
131			/* skip trailing white space */
132			for (++p; isspace((unsigned char)*p); ++p);
133		}
134
135		/* skip slash and trailing white space */
136		if (*p == '/')
137			while (isspace((unsigned char)*++p));
138
139		/* byte count */
140		if (isdigit((unsigned char)*p)) {
141			for (savep = p; isdigit((unsigned char)*p); ++p);
142			if (!isspace((unsigned char)*p))
143				badfmt(fmt);
144			tfu->bcnt = atoi(savep);
145			/* skip trailing white space */
146			for (++p; isspace((unsigned char)*p); ++p);
147		}
148
149		/* format */
150		if (*p != '"')
151			badfmt(fmt);
152		for (savep = ++p; *p != '"';)
153			if (*p++ == 0)
154				badfmt(fmt);
155		tfu->fmt = emalloc(p - savep + 1);
156		(void) strncpy(tfu->fmt, savep, p - savep);
157		tfu->fmt[p - savep] = '\0';
158		escape(tfu->fmt);
159		p++;
160	}
161}
162
163static const char *spec = ".#-+ 0123456789";
164
165int
166size(FS *fs)
167{
168	FU *fu;
169	int bcnt, cursize;
170	char *fmt;
171	int prec;
172
173	/* figure out the data block size needed for each format unit */
174	for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
175		if (fu->bcnt) {
176			cursize += fu->bcnt * fu->reps;
177			continue;
178		}
179		for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
180			if (*fmt != '%')
181				continue;
182			/*
183			 * skip any special chars -- save precision in
184			 * case it's a %s format.
185			 */
186			while (strchr(spec + 1, *++fmt));
187			if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
188				prec = atoi(fmt);
189				while (isdigit((unsigned char)*++fmt));
190			}
191			switch(*fmt) {
192			case 'c':
193				bcnt += 1;
194				break;
195			case 'd': case 'i': case 'o': case 'u':
196			case 'x': case 'X':
197				bcnt += 4;
198				break;
199			case 'e': case 'E': case 'f': case 'g': case 'G':
200				bcnt += 8;
201				break;
202			case 's':
203				bcnt += prec;
204				break;
205			case '_':
206				switch(*++fmt) {
207				case 'c': case 'p': case 'u':
208					bcnt += 1;
209					break;
210				}
211			}
212		}
213		cursize += bcnt * fu->reps;
214	}
215	return (cursize);
216}
217
218void
219rewrite(FS *fs)
220{
221	enum { NOTOKAY, USEBCNT, USEPREC } sokay;
222	PR *pr, **nextpr;
223	FU *fu;
224	char *p1, *p2;
225	char savech, *fmtp, cs[sizeof(PRId64)];
226	int nconv, prec;
227
228	prec = 0;
229	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
230		/*
231		 * Break each format unit into print units; each conversion
232		 * character gets its own.
233		 */
234		nextpr = &fu->nextpr;
235		for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
236			pr = ecalloc(1, sizeof(*pr));
237			*nextpr = pr;
238
239			/* Skip preceding text and up to the next % sign. */
240			for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
241
242			/* Only text in the string. */
243			if (!*p1) {
244				pr->fmt = fmtp;
245				pr->flags = F_TEXT;
246				break;
247			}
248
249			/*
250			 * Get precision for %s -- if have a byte count, don't
251			 * need it.
252			 */
253			if (fu->bcnt) {
254				sokay = USEBCNT;
255				/* Skip to conversion character. */
256				for (++p1; *p1 && strchr(spec, *p1); ++p1);
257			} else {
258				/* Skip any special chars, field width. */
259				while (*++p1 && strchr(spec + 1, *p1));
260				if (*p1 == '.' &&
261				    isdigit((unsigned char)*++p1)) {
262					sokay = USEPREC;
263					prec = atoi(p1);
264					while (isdigit((unsigned char)*++p1))
265						continue;
266				} else
267					sokay = NOTOKAY;
268			}
269
270			p2 = *p1 ? p1 + 1 : p1;	/* Set end pointer. */
271			cs[0] = *p1;		/* Set conversion string. */
272			cs[1] = '\0';
273
274			/*
275			 * Figure out the byte count for each conversion;
276			 * rewrite the format as necessary, set up blank-
277			 * padding for end of data.
278			 */
279			switch(cs[0]) {
280			case 'c':
281				pr->flags = F_CHAR;
282				switch(fu->bcnt) {
283				case 0: case 1:
284					pr->bcnt = 1;
285					break;
286				default:
287					p1[1] = '\0';
288					badcnt(p1);
289				}
290				break;
291			case 'd': case 'i':
292				pr->flags = F_INT;
293				goto isint;
294			case 'o': case 'u': case 'x': case 'X':
295				pr->flags = F_UINT;
296isint:
297				/*
298				 * Regardless of pr->bcnt, all integer
299				 * values are cast to [u]int64_t before
300				 * being printed by display().  We
301				 * therefore need to use PRI?64 as the
302				 * format, where '?' could actually
303				 * be any of [diouxX].  We make the
304				 * assumption (not guaranteed by the
305				 * C99 standard) that we can derive
306				 * all the other PRI?64 values from
307				 * PRId64 simply by changing the last
308				 * character.  For example, if PRId64 is
309				 * "lld" or "qd", and cs[0] is 'o', then
310				 * we end up with "llo" or "qo".
311				 */
312				savech = cs[0];
313				strncpy(cs, PRId64, sizeof(PRId64) - 2);
314				cs[sizeof(PRId64) - 2] = savech;
315				cs[sizeof(PRId64) - 1] = '\0';
316				switch(fu->bcnt) {
317				case 0: case 4:
318					pr->bcnt = 4;
319					break;
320				case 1:
321					pr->bcnt = 1;
322					break;
323				case 2:
324					pr->bcnt = 2;
325					break;
326				case 8:
327					pr->bcnt = 8;
328					break;
329				default:
330					p1[1] = '\0';
331					badcnt(p1);
332				}
333				break;
334			case 'e': case 'E': case 'f': case 'g': case 'G':
335				pr->flags = F_DBL;
336				switch(fu->bcnt) {
337				case 0: case 8:
338					pr->bcnt = 8;
339					break;
340				case 4:
341					pr->bcnt = 4;
342					break;
343				default:
344					p1[1] = '\0';
345					badcnt(p1);
346				}
347				break;
348			case 's':
349				pr->flags = F_STR;
350				switch(sokay) {
351				case NOTOKAY:
352					badsfmt();
353				case USEBCNT:
354					pr->bcnt = fu->bcnt;
355					break;
356				case USEPREC:
357					pr->bcnt = prec;
358					break;
359				}
360				break;
361			case '_':
362				++p2;
363				switch(p1[1]) {
364				case 'A':
365					endfu = fu;
366					fu->flags |= F_IGNORE;
367					/* FALLTHROUGH */
368				case 'a':
369					pr->flags = F_ADDRESS;
370					++p2;
371					switch(p1[2]) {
372					case 'd': case 'o': case'x':
373						/*
374						 * See comments above for
375						 * the way we use PRId64.
376						 */
377						strncpy(cs, PRId64,
378							sizeof(PRId64) - 2);
379						cs[sizeof(PRId64) - 2] = p1[2];
380						cs[sizeof(PRId64) - 1] = '\0';
381						break;
382					default:
383						p1[3] = '\0';
384						badconv(p1);
385					}
386					break;
387				case 'c':
388					pr->flags = F_C;
389					/* cs[0] = 'c';	set in conv_c */
390					goto isint2;
391				case 'p':
392					pr->flags = F_P;
393					cs[0] = 'c';
394					goto isint2;
395				case 'u':
396					pr->flags = F_U;
397					/* cs[0] = 'c';	set in conv_u */
398isint2:					switch(fu->bcnt) {
399					case 0: case 1:
400						pr->bcnt = 1;
401						break;
402					default:
403						p1[2] = '\0';
404						badcnt(p1);
405					}
406					break;
407				default:
408					p1[2] = '\0';
409					badconv(p1);
410				}
411				break;
412			default:
413				p1[1] = '\0';
414				badconv(p1);
415			}
416
417			/*
418			 * Copy to PR format string, set conversion character
419			 * pointer, update original.
420			 */
421			savech = *p2;
422			p1[0] = '\0';
423			pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
424			(void)strcpy(pr->fmt, fmtp);
425			(void)strcat(pr->fmt, cs);
426			*p2 = savech;
427			pr->cchar = pr->fmt + (p1 - fmtp);
428			fmtp = p2;
429
430			/* Only one conversion character if byte count. */
431			if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
432				errx(1,
433			    "byte count with multiple conversion characters");
434		}
435		/*
436		 * If format unit byte count not specified, figure it out
437		 * so can adjust rep count later.
438		 */
439		if (!fu->bcnt)
440			for (pr = fu->nextpr; pr; pr = pr->nextpr)
441				fu->bcnt += pr->bcnt;
442	}
443	/*
444	 * If the format string interprets any data at all, and it's
445	 * not the same as the blocksize, and its last format unit
446	 * interprets any data at all, and has no iteration count,
447	 * repeat it as necessary.
448	 *
449	 * If, rep count is greater than 1, no trailing whitespace
450	 * gets output from the last iteration of the format unit.
451	 */
452	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
453		if (!fu->nextfu && fs->bcnt < blocksize &&
454		    !(fu->flags&F_SETREP) && fu->bcnt)
455			fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
456		if (fu->reps > 1) {
457			if (!fu->nextpr)
458				break;
459			for (pr = fu->nextpr;; pr = pr->nextpr)
460				if (!pr->nextpr)
461					break;
462			for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
463				p2 = isspace((unsigned char)*p1) ? p1 : NULL;
464			if (p2)
465				pr->nospace = p2;
466		}
467	}
468#ifdef DEBUG
469	for (fu = fs->nextfu; fu; fu = fu->nextfu) {
470		(void)printf("fmt:");
471		for (pr = fu->nextpr; pr; pr = pr->nextpr)
472			(void)printf(" {%s}", pr->fmt);
473		(void)printf("\n");
474	}
475#endif
476}
477
478void
479escape(char *p1)
480{
481	char *p2;
482
483	/* alphabetic escape sequences have to be done in place */
484	for (p2 = p1;; ++p1, ++p2) {
485		if (!*p1) {
486			*p2 = *p1;
487			break;
488		}
489		if (*p1 == '\\')
490			switch(*++p1) {
491			case '\0':
492				*p2 = '\\';
493				*++p2 = '\0';
494				return;	/* incomplete escape sequence */
495			case 'a':
496			     /* *p2 = '\a'; */
497				*p2 = '\007';
498				break;
499			case 'b':
500				*p2 = '\b';
501				break;
502			case 'f':
503				*p2 = '\f';
504				break;
505			case 'n':
506				*p2 = '\n';
507				break;
508			case 'r':
509				*p2 = '\r';
510				break;
511			case 't':
512				*p2 = '\t';
513				break;
514			case 'v':
515				*p2 = '\v';
516				break;
517			default:
518				*p2 = *p1;
519				break;
520			}
521		else
522			*p2 = *p1;
523	}
524}
525
526static void
527badcnt(char *s)
528{
529	errx(1, "%s: bad byte count", s);
530}
531
532static void
533badsfmt(void)
534{
535	errx(1, "%%s: requires a precision or a byte count");
536}
537
538static void
539badfmt(const char *fmt)
540{
541	errx(1, "\"%s\": bad format", fmt);
542}
543
544static void
545badconv(char *ch)
546{
547	errx(1, "%%%s: bad conversion character", ch);
548}
549