unifdef.c revision 279639
1/*
2 * Copyright (c) 2002 - 2014 Tony Finch <dot@dotat.at>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*
27 * unifdef - remove ifdef'ed lines
28 *
29 * This code was derived from software contributed to Berkeley by Dave Yost.
30 * It was rewritten to support ANSI C by Tony Finch. The original version
31 * of unifdef carried the 4-clause BSD copyright licence. None of its code
32 * remains in this version (though some of the names remain) so it now
33 * carries a more liberal licence.
34 *
35 *  Wishlist:
36 *      provide an option which will append the name of the
37 *        appropriate symbol after #else's and #endif's
38 *      provide an option which will check symbols after
39 *        #else's and #endif's to see that they match their
40 *        corresponding #ifdef or #ifndef
41 *
42 *   These require better buffer handling, which would also make
43 *   it possible to handle all "dodgy" directives correctly.
44 */
45
46#include "unifdef.h"
47
48static const char copyright[] =
49    "@(#) $Version: unifdef-2.10 $\n"
50    "@(#) $FreeBSD: stable/10/usr.bin/unifdef/unifdef.c 279639 2015-03-05 09:39:29Z hselasky $\n"
51    "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
52    "@(#) $URL: http://dotat.at/prog/unifdef $\n"
53;
54
55/* types of input lines: */
56typedef enum {
57	LT_TRUEI,		/* a true #if with ignore flag */
58	LT_FALSEI,		/* a false #if with ignore flag */
59	LT_IF,			/* an unknown #if */
60	LT_TRUE,		/* a true #if */
61	LT_FALSE,		/* a false #if */
62	LT_ELIF,		/* an unknown #elif */
63	LT_ELTRUE,		/* a true #elif */
64	LT_ELFALSE,		/* a false #elif */
65	LT_ELSE,		/* #else */
66	LT_ENDIF,		/* #endif */
67	LT_DODGY,		/* flag: directive is not on one line */
68	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
69	LT_PLAIN,		/* ordinary line */
70	LT_EOF,			/* end of file */
71	LT_ERROR,		/* unevaluable #if */
72	LT_COUNT
73} Linetype;
74
75static char const * const linetype_name[] = {
76	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
77	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
78	"DODGY TRUEI", "DODGY FALSEI",
79	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
80	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
81	"DODGY ELSE", "DODGY ENDIF",
82	"PLAIN", "EOF", "ERROR"
83};
84
85#define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
86#define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
87
88/* state of #if processing */
89typedef enum {
90	IS_OUTSIDE,
91	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
92	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
93	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
94	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
95	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
96	IS_PASS_ELSE,		/* an else after a pass state */
97	IS_FALSE_ELSE,		/* an else after a true state */
98	IS_TRUE_ELSE,		/* an else after only false states */
99	IS_FALSE_TRAILER,	/* #elifs after a true are false */
100	IS_COUNT
101} Ifstate;
102
103static char const * const ifstate_name[] = {
104	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
105	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
106	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
107	"FALSE_TRAILER"
108};
109
110/* state of comment parser */
111typedef enum {
112	NO_COMMENT = false,	/* outside a comment */
113	C_COMMENT,		/* in a comment like this one */
114	CXX_COMMENT,		/* between // and end of line */
115	STARTING_COMMENT,	/* just after slash-backslash-newline */
116	FINISHING_COMMENT,	/* star-backslash-newline in a C comment */
117	CHAR_LITERAL,		/* inside '' */
118	STRING_LITERAL		/* inside "" */
119} Comment_state;
120
121static char const * const comment_name[] = {
122	"NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
123};
124
125/* state of preprocessor line parser */
126typedef enum {
127	LS_START,		/* only space and comments on this line */
128	LS_HASH,		/* only space, comments, and a hash */
129	LS_DIRTY		/* this line can't be a preprocessor line */
130} Line_state;
131
132static char const * const linestate_name[] = {
133	"START", "HASH", "DIRTY"
134};
135
136/*
137 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
138 */
139#define	MAXDEPTH        64			/* maximum #if nesting */
140#define	MAXLINE         4096			/* maximum length of line */
141#define	MAXSYMS         16384			/* maximum number of symbols */
142
143/*
144 * Sometimes when editing a keyword the replacement text is longer, so
145 * we leave some space at the end of the tline buffer to accommodate this.
146 */
147#define	EDITSLOP        10
148
149/*
150 * Globals.
151 */
152
153static bool             compblank;		/* -B: compress blank lines */
154static bool             lnblank;		/* -b: blank deleted lines */
155static bool             complement;		/* -c: do the complement */
156static bool             debugging;		/* -d: debugging reports */
157static bool             inplace;		/* -m: modify in place */
158static bool             iocccok;		/* -e: fewer IOCCC errors */
159static bool             strictlogic;		/* -K: keep ambiguous #ifs */
160static bool             killconsts;		/* -k: eval constant #ifs */
161static bool             lnnum;			/* -n: add #line directives */
162static bool             symlist;		/* -s: output symbol list */
163static bool             symdepth;		/* -S: output symbol depth */
164static bool             text;			/* -t: this is a text file */
165
166static const char      *symname[MAXSYMS];	/* symbol name */
167static const char      *value[MAXSYMS];		/* -Dsym=value */
168static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
169static int              nsyms;			/* number of symbols */
170
171static FILE            *input;			/* input file pointer */
172static const char      *filename;		/* input file name */
173static int              linenum;		/* current line number */
174static const char      *linefile;		/* file name for #line */
175static FILE            *output;			/* output file pointer */
176static const char      *ofilename;		/* output file name */
177static const char      *backext;		/* backup extension */
178static char            *tempname;		/* avoid splatting input */
179
180static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
181static char            *keyword;		/* used for editing #elif's */
182
183/*
184 * When processing a file, the output's newline style will match the
185 * input's, and unifdef correctly handles CRLF or LF endings whatever
186 * the platform's native style. The stdio streams are opened in binary
187 * mode to accommodate platforms whose native newline style is CRLF.
188 * When the output isn't a processed input file (when it is error /
189 * debug / diagnostic messages) then unifdef uses native line endings.
190 */
191
192static const char      *newline;		/* input file format */
193static const char       newline_unix[] = "\n";
194static const char       newline_crlf[] = "\r\n";
195
196static Comment_state    incomment;		/* comment parser state */
197static Line_state       linestate;		/* #if line parser state */
198static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
199static bool             ignoring[MAXDEPTH];	/* ignore comments state */
200static int              stifline[MAXDEPTH];	/* start of current #if */
201static int              depth;			/* current #if nesting */
202static int              delcount;		/* count of deleted lines */
203static unsigned         blankcount;		/* count of blank lines */
204static unsigned         blankmax;		/* maximum recent blankcount */
205static bool             constexpr;		/* constant #if expression */
206static bool             zerosyms;		/* to format symdepth output */
207static bool             firstsym;		/* ditto */
208
209static int              exitmode;		/* exit status mode */
210static int              exitstat;		/* program exit status */
211
212static void             addsym1(bool, bool, char *);
213static void             addsym2(bool, const char *, const char *);
214static char            *astrcat(const char *, const char *);
215static void             cleantemp(void);
216static void             closeio(void);
217static void             debug(const char *, ...);
218static void             debugsym(const char *, int);
219static bool             defundef(void);
220static void             defundefile(const char *);
221static void             done(void);
222static void             error(const char *);
223static int              findsym(const char **);
224static void             flushline(bool);
225static void             hashline(void);
226static void             help(void);
227static Linetype         ifeval(const char **);
228static void             ignoreoff(void);
229static void             ignoreon(void);
230static void             indirectsym(void);
231static void             keywordedit(const char *);
232static const char      *matchsym(const char *, const char *);
233static void             nest(void);
234static Linetype         parseline(void);
235static void             process(void);
236static void             processinout(const char *, const char *);
237static const char      *skipargs(const char *);
238static const char      *skipcomment(const char *);
239static const char      *skiphash(void);
240static const char      *skipline(const char *);
241static const char      *skipsym(const char *);
242static void             state(Ifstate);
243static void             unnest(void);
244static void             usage(void);
245static void             version(void);
246static const char      *xstrdup(const char *, const char *);
247
248#define endsym(c) (!isalnum((unsigned char)c) && c != '_')
249
250/*
251 * The main program.
252 */
253int
254main(int argc, char *argv[])
255{
256	int opt;
257
258	while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
259		switch (opt) {
260		case 'i': /* treat stuff controlled by these symbols as text */
261			/*
262			 * For strict backwards-compatibility the U or D
263			 * should be immediately after the -i but it doesn't
264			 * matter much if we relax that requirement.
265			 */
266			opt = *optarg++;
267			if (opt == 'D')
268				addsym1(true, true, optarg);
269			else if (opt == 'U')
270				addsym1(true, false, optarg);
271			else
272				usage();
273			break;
274		case 'D': /* define a symbol */
275			addsym1(false, true, optarg);
276			break;
277		case 'U': /* undef a symbol */
278			addsym1(false, false, optarg);
279			break;
280		case 'I': /* no-op for compatibility with cpp */
281			break;
282		case 'b': /* blank deleted lines instead of omitting them */
283		case 'l': /* backwards compatibility */
284			lnblank = true;
285			break;
286		case 'B': /* compress blank lines around removed section */
287			compblank = true;
288			break;
289		case 'c': /* treat -D as -U and vice versa */
290			complement = true;
291			break;
292		case 'd':
293			debugging = true;
294			break;
295		case 'e': /* fewer errors from dodgy lines */
296			iocccok = true;
297			break;
298		case 'f': /* definitions file */
299			defundefile(optarg);
300			break;
301		case 'h':
302			help();
303			break;
304		case 'K': /* keep ambiguous #ifs */
305			strictlogic = true;
306			break;
307		case 'k': /* process constant #ifs */
308			killconsts = true;
309			break;
310		case 'm': /* modify in place */
311			inplace = true;
312			break;
313		case 'M': /* modify in place and keep backup */
314			inplace = true;
315			backext = optarg;
316			break;
317		case 'n': /* add #line directive after deleted lines */
318			lnnum = true;
319			break;
320		case 'o': /* output to a file */
321			ofilename = optarg;
322			break;
323		case 's': /* only output list of symbols that control #ifs */
324			symlist = true;
325			break;
326		case 'S': /* list symbols with their nesting depth */
327			symlist = symdepth = true;
328			break;
329		case 't': /* don't parse C comments */
330			text = true;
331			break;
332		case 'V':
333			version();
334			break;
335		case 'x':
336			exitmode = atoi(optarg);
337			if(exitmode < 0 || exitmode > 2)
338				usage();
339			break;
340		default:
341			usage();
342		}
343	argc -= optind;
344	argv += optind;
345	if (compblank && lnblank)
346		errx(2, "-B and -b are mutually exclusive");
347	if (symlist && (ofilename != NULL || inplace || argc > 1))
348		errx(2, "-s only works with one input file");
349	if (argc > 1 && ofilename != NULL)
350		errx(2, "-o cannot be used with multiple input files");
351	if (argc > 1 && !inplace)
352		errx(2, "multiple input files require -m or -M");
353	if (argc == 0)
354		argc = 1;
355	if (argc == 1 && !inplace && ofilename == NULL)
356		ofilename = "-";
357	indirectsym();
358
359	atexit(cleantemp);
360	if (ofilename != NULL)
361		processinout(*argv, ofilename);
362	else while (argc-- > 0) {
363		processinout(*argv, *argv);
364		argv++;
365	}
366	switch(exitmode) {
367	case(0): exit(exitstat);
368	case(1): exit(!exitstat);
369	case(2): exit(0);
370	default: abort(); /* bug */
371	}
372}
373
374/*
375 * File logistics.
376 */
377static void
378processinout(const char *ifn, const char *ofn)
379{
380	struct stat st;
381
382	if (ifn == NULL || strcmp(ifn, "-") == 0) {
383		filename = "[stdin]";
384		linefile = NULL;
385		input = fbinmode(stdin);
386	} else {
387		filename = ifn;
388		linefile = ifn;
389		input = fopen(ifn, "rb");
390		if (input == NULL)
391			err(2, "can't open %s", ifn);
392	}
393	if (strcmp(ofn, "-") == 0) {
394		output = fbinmode(stdout);
395		process();
396		return;
397	}
398	if (stat(ofn, &st) < 0) {
399		output = fopen(ofn, "wb");
400		if (output == NULL)
401			err(2, "can't create %s", ofn);
402		process();
403		return;
404	}
405
406	tempname = astrcat(ofn, ".XXXXXX");
407	output = mktempmode(tempname, st.st_mode);
408	if (output == NULL)
409		err(2, "can't create %s", tempname);
410
411	process();
412
413	if (backext != NULL) {
414		char *backname = astrcat(ofn, backext);
415		if (rename(ofn, backname) < 0)
416			err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
417		free(backname);
418	}
419	if (replace(tempname, ofn) < 0)
420		err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
421	free(tempname);
422	tempname = NULL;
423}
424
425/*
426 * For cleaning up if there is an error.
427 */
428static void
429cleantemp(void)
430{
431	if (tempname != NULL)
432		remove(tempname);
433}
434
435/*
436 * Self-identification functions.
437 */
438
439static void
440version(void)
441{
442	const char *c = copyright;
443	for (;;) {
444		while (*++c != '$')
445			if (*c == '\0')
446				exit(0);
447		while (*++c != '$')
448			putc(*c, stderr);
449		putc('\n', stderr);
450	}
451}
452
453static void
454synopsis(FILE *fp)
455{
456	fprintf(fp,
457	    "usage:	unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n"
458	    "		[-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");
459}
460
461static void
462usage(void)
463{
464	synopsis(stderr);
465	exit(2);
466}
467
468static void
469help(void)
470{
471	synopsis(stdout);
472	printf(
473	    "	-Dsym=val  define preprocessor symbol with given value\n"
474	    "	-Dsym      define preprocessor symbol with value 1\n"
475	    "	-Usym	   preprocessor symbol is undefined\n"
476	    "	-iDsym=val \\  ignore C strings and comments\n"
477	    "	-iDsym      ) in sections controlled by these\n"
478	    "	-iUsym	   /  preprocessor symbols\n"
479	    "	-fpath	file containing #define and #undef directives\n"
480	    "	-b	blank lines instead of deleting them\n"
481	    "	-B	compress blank lines around deleted section\n"
482	    "	-c	complement (invert) keep vs. delete\n"
483	    "	-d	debugging mode\n"
484	    "	-e	ignore multiline preprocessor directives\n"
485	    "	-h	print help\n"
486	    "	-Ipath	extra include file path (ignored)\n"
487	    "	-K	disable && and || short-circuiting\n"
488	    "	-k	process constant #if expressions\n"
489	    "	-Mext	modify in place and keep backups\n"
490	    "	-m	modify input files in place\n"
491	    "	-n	add #line directives to output\n"
492	    "	-opath	output file name\n"
493	    "	-S	list #if control symbols with nesting\n"
494	    "	-s	list #if control symbols\n"
495	    "	-t	ignore C strings and comments\n"
496	    "	-V	print version\n"
497	    "	-x{012}	exit status mode\n"
498	);
499	exit(0);
500}
501
502/*
503 * A state transition function alters the global #if processing state
504 * in a particular way. The table below is indexed by the current
505 * processing state and the type of the current line.
506 *
507 * Nesting is handled by keeping a stack of states; some transition
508 * functions increase or decrease the depth. They also maintain the
509 * ignore state on a stack. In some complicated cases they have to
510 * alter the preprocessor directive, as follows.
511 *
512 * When we have processed a group that starts off with a known-false
513 * #if/#elif sequence (which has therefore been deleted) followed by a
514 * #elif that we don't understand and therefore must keep, we edit the
515 * latter into a #if to keep the nesting correct. We use memcpy() to
516 * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
517 *
518 * When we find a true #elif in a group, the following block will
519 * always be kept and the rest of the sequence after the next #elif or
520 * #else will be discarded. We edit the #elif into a #else and the
521 * following directive to #endif since this has the desired behaviour.
522 *
523 * "Dodgy" directives are split across multiple lines, the most common
524 * example being a multi-line comment hanging off the right of the
525 * directive. We can handle them correctly only if there is no change
526 * from printing to dropping (or vice versa) caused by that directive.
527 * If the directive is the first of a group we have a choice between
528 * failing with an error, or passing it through unchanged instead of
529 * evaluating it. The latter is not the default to avoid questions from
530 * users about unifdef unexpectedly leaving behind preprocessor directives.
531 */
532typedef void state_fn(void);
533
534/* report an error */
535static void Eelif (void) { error("Inappropriate #elif"); }
536static void Eelse (void) { error("Inappropriate #else"); }
537static void Eendif(void) { error("Inappropriate #endif"); }
538static void Eeof  (void) { error("Premature EOF"); }
539static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
540/* plain line handling */
541static void print (void) { flushline(true); }
542static void drop  (void) { flushline(false); }
543/* output lacks group's start line */
544static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
545static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
546static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
547/* print/pass this block */
548static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
549static void Pelse (void) { print();              state(IS_PASS_ELSE); }
550static void Pendif(void) { print(); unnest(); }
551/* discard this block */
552static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
553static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
554static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
555static void Dendif(void) { drop();  unnest(); }
556/* first line of group */
557static void Fdrop (void) { nest();  Dfalse(); }
558static void Fpass (void) { nest();  Pelif(); }
559static void Ftrue (void) { nest();  Strue(); }
560static void Ffalse(void) { nest();  Sfalse(); }
561/* variable pedantry for obfuscated lines */
562static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
563static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
564static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
565/* ignore comments in this block */
566static void Idrop (void) { Fdrop();  ignoreon(); }
567static void Itrue (void) { Ftrue();  ignoreon(); }
568static void Ifalse(void) { Ffalse(); ignoreon(); }
569/* modify this line */
570static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
571static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
572static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
573static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
574
575static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
576/* IS_OUTSIDE */
577{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
578  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
579  print, done,  abort },
580/* IS_FALSE_PREFIX */
581{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
582  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
583  drop,  Eeof,  abort },
584/* IS_TRUE_PREFIX */
585{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
586  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
587  print, Eeof,  abort },
588/* IS_PASS_MIDDLE */
589{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
590  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
591  print, Eeof,  abort },
592/* IS_FALSE_MIDDLE */
593{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
594  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
595  drop,  Eeof,  abort },
596/* IS_TRUE_MIDDLE */
597{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
598  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
599  print, Eeof,  abort },
600/* IS_PASS_ELSE */
601{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
602  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
603  print, Eeof,  abort },
604/* IS_FALSE_ELSE */
605{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
606  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
607  drop,  Eeof,  abort },
608/* IS_TRUE_ELSE */
609{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
610  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
611  print, Eeof,  abort },
612/* IS_FALSE_TRAILER */
613{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
614  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
615  drop,  Eeof,  abort }
616/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
617  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
618  PLAIN  EOF    ERROR */
619};
620
621/*
622 * State machine utility functions
623 */
624static void
625ignoreoff(void)
626{
627	if (depth == 0)
628		abort(); /* bug */
629	ignoring[depth] = ignoring[depth-1];
630}
631static void
632ignoreon(void)
633{
634	ignoring[depth] = true;
635}
636static void
637keywordedit(const char *replacement)
638{
639	snprintf(keyword, tline + sizeof(tline) - keyword,
640	    "%s%s", replacement, newline);
641	print();
642}
643static void
644nest(void)
645{
646	if (depth > MAXDEPTH-1)
647		abort(); /* bug */
648	if (depth == MAXDEPTH-1)
649		error("Too many levels of nesting");
650	depth += 1;
651	stifline[depth] = linenum;
652}
653static void
654unnest(void)
655{
656	if (depth == 0)
657		abort(); /* bug */
658	depth -= 1;
659}
660static void
661state(Ifstate is)
662{
663	ifstate[depth] = is;
664}
665
666/*
667 * The last state transition function. When this is called,
668 * lineval == LT_EOF, so the process() loop will terminate.
669 */
670static void
671done(void)
672{
673	if (incomment)
674		error("EOF in comment");
675	closeio();
676}
677
678/*
679 * Write a line to the output or not, according to command line options.
680 * If writing fails, closeio() will print the error and exit.
681 */
682static void
683flushline(bool keep)
684{
685	if (symlist)
686		return;
687	if (keep ^ complement) {
688		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
689		if (blankline && compblank && blankcount != blankmax) {
690			delcount += 1;
691			blankcount += 1;
692		} else {
693			if (lnnum && delcount > 0)
694				hashline();
695			if (fputs(tline, output) == EOF)
696				closeio();
697			delcount = 0;
698			blankmax = blankcount = blankline ? blankcount + 1 : 0;
699		}
700	} else {
701		if (lnblank && fputs(newline, output) == EOF)
702			closeio();
703		exitstat = 1;
704		delcount += 1;
705		blankcount = 0;
706	}
707	if (debugging && fflush(output) == EOF)
708		closeio();
709}
710
711/*
712 * Format of #line directives depends on whether we know the input filename.
713 */
714static void
715hashline(void)
716{
717	int e;
718
719	if (linefile == NULL)
720		e = fprintf(output, "#line %d%s", linenum, newline);
721	else
722		e = fprintf(output, "#line %d \"%s\"%s",
723		    linenum, linefile, newline);
724	if (e < 0)
725		closeio();
726}
727
728/*
729 * Flush the output and handle errors.
730 */
731static void
732closeio(void)
733{
734	/* Tidy up after findsym(). */
735	if (symdepth && !zerosyms)
736		printf("\n");
737	if (output != NULL && (ferror(output) || fclose(output) == EOF))
738			err(2, "%s: can't write to output", filename);
739	fclose(input);
740}
741
742/*
743 * The driver for the state machine.
744 */
745static void
746process(void)
747{
748	Linetype lineval = LT_PLAIN;
749	/* When compressing blank lines, act as if the file
750	   is preceded by a large number of blank lines. */
751	blankmax = blankcount = 1000;
752	zerosyms = true;
753	newline = NULL;
754	linenum = 0;
755	while (lineval != LT_EOF) {
756		lineval = parseline();
757		trans_table[ifstate[depth]][lineval]();
758		debug("process line %d %s -> %s depth %d",
759		    linenum, linetype_name[lineval],
760		    ifstate_name[ifstate[depth]], depth);
761	}
762}
763
764/*
765 * Parse a line and determine its type. We keep the preprocessor line
766 * parser state between calls in the global variable linestate, with
767 * help from skipcomment().
768 */
769static Linetype
770parseline(void)
771{
772	const char *cp;
773	int cursym;
774	Linetype retval;
775	Comment_state wascomment;
776
777	wascomment = incomment;
778	cp = skiphash();
779	if (cp == NULL)
780		return (LT_EOF);
781	if (newline == NULL) {
782		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
783			newline = newline_crlf;
784		else
785			newline = newline_unix;
786	}
787	if (*cp == '\0') {
788		retval = LT_PLAIN;
789		goto done;
790	}
791	keyword = tline + (cp - tline);
792	if ((cp = matchsym("ifdef", keyword)) != NULL ||
793	    (cp = matchsym("ifndef", keyword)) != NULL) {
794		cp = skipcomment(cp);
795		if ((cursym = findsym(&cp)) < 0)
796			retval = LT_IF;
797		else {
798			retval = (keyword[2] == 'n')
799			    ? LT_FALSE : LT_TRUE;
800			if (value[cursym] == NULL)
801				retval = (retval == LT_TRUE)
802				    ? LT_FALSE : LT_TRUE;
803			if (ignore[cursym])
804				retval = (retval == LT_TRUE)
805				    ? LT_TRUEI : LT_FALSEI;
806		}
807	} else if ((cp = matchsym("if", keyword)) != NULL)
808		retval = ifeval(&cp);
809	else if ((cp = matchsym("elif", keyword)) != NULL)
810		retval = linetype_if2elif(ifeval(&cp));
811	else if ((cp = matchsym("else", keyword)) != NULL)
812		retval = LT_ELSE;
813	else if ((cp = matchsym("endif", keyword)) != NULL)
814		retval = LT_ENDIF;
815	else {
816		cp = skipsym(keyword);
817		/* no way can we deal with a continuation inside a keyword */
818		if (strncmp(cp, "\\\r\n", 3) == 0 ||
819		    strncmp(cp, "\\\n", 2) == 0)
820			Eioccc();
821		cp = skipline(cp);
822		retval = LT_PLAIN;
823		goto done;
824	}
825	cp = skipcomment(cp);
826	if (*cp != '\0') {
827		cp = skipline(cp);
828		if (retval == LT_TRUE || retval == LT_FALSE ||
829		    retval == LT_TRUEI || retval == LT_FALSEI)
830			retval = LT_IF;
831		if (retval == LT_ELTRUE || retval == LT_ELFALSE)
832			retval = LT_ELIF;
833	}
834	/* the following can happen if the last line of the file lacks a
835	   newline or if there is too much whitespace in a directive */
836	if (linestate == LS_HASH) {
837		long len = cp - tline;
838		if (fgets(tline + len, MAXLINE - len, input) == NULL) {
839			if (ferror(input))
840				err(2, "can't read %s", filename);
841			/* append the missing newline at eof */
842			strcpy(tline + len, newline);
843			cp += strlen(newline);
844			linestate = LS_START;
845		} else {
846			linestate = LS_DIRTY;
847		}
848	}
849	if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
850		retval = linetype_2dodgy(retval);
851		linestate = LS_DIRTY;
852	}
853done:
854	debug("parser line %d state %s comment %s line", linenum,
855	    comment_name[incomment], linestate_name[linestate]);
856	return (retval);
857}
858
859/*
860 * These are the binary operators that are supported by the expression
861 * evaluator.
862 */
863static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
864	if(at == LT_IF || bt == LT_IF) return (LT_IF);
865	return (*p = v, v ? LT_TRUE : LT_FALSE);
866}
867static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
868	return op_strict(p, a < b, at, bt);
869}
870static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
871	return op_strict(p, a > b, at, bt);
872}
873static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
874	return op_strict(p, a <= b, at, bt);
875}
876static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
877	return op_strict(p, a >= b, at, bt);
878}
879static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
880	return op_strict(p, a == b, at, bt);
881}
882static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
883	return op_strict(p, a != b, at, bt);
884}
885static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
886	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
887		return (*p = 1, LT_TRUE);
888	return op_strict(p, a || b, at, bt);
889}
890static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
891	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
892		return (*p = 0, LT_FALSE);
893	return op_strict(p, a && b, at, bt);
894}
895
896/*
897 * An evaluation function takes three arguments, as follows: (1) a pointer to
898 * an element of the precedence table which lists the operators at the current
899 * level of precedence; (2) a pointer to an integer which will receive the
900 * value of the expression; and (3) a pointer to a char* that points to the
901 * expression to be evaluated and that is updated to the end of the expression
902 * when evaluation is complete. The function returns LT_FALSE if the value of
903 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
904 * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
905 */
906struct ops;
907
908typedef Linetype eval_fn(const struct ops *, long *, const char **);
909
910static eval_fn eval_table, eval_unary;
911
912/*
913 * The precedence table. Expressions involving binary operators are evaluated
914 * in a table-driven way by eval_table. When it evaluates a subexpression it
915 * calls the inner function with its first argument pointing to the next
916 * element of the table. Innermost expressions have special non-table-driven
917 * handling.
918 */
919struct op {
920	const char *str;
921	Linetype (*fn)(long *, Linetype, long, Linetype, long);
922};
923struct ops {
924	eval_fn *inner;
925	struct op op[5];
926};
927static const struct ops eval_ops[] = {
928	{ eval_table, { { "||", op_or } } },
929	{ eval_table, { { "&&", op_and } } },
930	{ eval_table, { { "==", op_eq },
931			{ "!=", op_ne } } },
932	{ eval_unary, { { "<=", op_le },
933			{ ">=", op_ge },
934			{ "<", op_lt },
935			{ ">", op_gt } } }
936};
937
938/* Current operator precedence level */
939static long prec(const struct ops *ops)
940{
941	return (ops - eval_ops);
942}
943
944/*
945 * Function for evaluating the innermost parts of expressions,
946 * viz. !expr (expr) number defined(symbol) symbol
947 * We reset the constexpr flag in the last two cases.
948 */
949static Linetype
950eval_unary(const struct ops *ops, long *valp, const char **cpp)
951{
952	const char *cp;
953	char *ep;
954	int sym;
955	bool defparen;
956	Linetype lt;
957
958	cp = skipcomment(*cpp);
959	if (*cp == '!') {
960		debug("eval%d !", prec(ops));
961		cp++;
962		lt = eval_unary(ops, valp, &cp);
963		if (lt == LT_ERROR)
964			return (LT_ERROR);
965		if (lt != LT_IF) {
966			*valp = !*valp;
967			lt = *valp ? LT_TRUE : LT_FALSE;
968		}
969	} else if (*cp == '(') {
970		cp++;
971		debug("eval%d (", prec(ops));
972		lt = eval_table(eval_ops, valp, &cp);
973		if (lt == LT_ERROR)
974			return (LT_ERROR);
975		cp = skipcomment(cp);
976		if (*cp++ != ')')
977			return (LT_ERROR);
978	} else if (isdigit((unsigned char)*cp)) {
979		debug("eval%d number", prec(ops));
980		*valp = strtol(cp, &ep, 0);
981		if (ep == cp)
982			return (LT_ERROR);
983		lt = *valp ? LT_TRUE : LT_FALSE;
984		cp = ep;
985	} else if (matchsym("defined", cp) != NULL) {
986		cp = skipcomment(cp+7);
987		if (*cp == '(') {
988			cp = skipcomment(cp+1);
989			defparen = true;
990		} else {
991			defparen = false;
992		}
993		sym = findsym(&cp);
994		cp = skipcomment(cp);
995		if (defparen && *cp++ != ')') {
996			debug("eval%d defined missing ')'", prec(ops));
997			return (LT_ERROR);
998		}
999		if (sym < 0) {
1000			debug("eval%d defined unknown", prec(ops));
1001			lt = LT_IF;
1002		} else {
1003			debug("eval%d defined %s", prec(ops), symname[sym]);
1004			*valp = (value[sym] != NULL);
1005			lt = *valp ? LT_TRUE : LT_FALSE;
1006		}
1007		constexpr = false;
1008	} else if (!endsym(*cp)) {
1009		debug("eval%d symbol", prec(ops));
1010		sym = findsym(&cp);
1011		if (sym < 0) {
1012			lt = LT_IF;
1013			cp = skipargs(cp);
1014		} else if (value[sym] == NULL) {
1015			*valp = 0;
1016			lt = LT_FALSE;
1017		} else {
1018			*valp = strtol(value[sym], &ep, 0);
1019			if (*ep != '\0' || ep == value[sym])
1020				return (LT_ERROR);
1021			lt = *valp ? LT_TRUE : LT_FALSE;
1022			cp = skipargs(cp);
1023		}
1024		constexpr = false;
1025	} else {
1026		debug("eval%d bad expr", prec(ops));
1027		return (LT_ERROR);
1028	}
1029
1030	*cpp = cp;
1031	debug("eval%d = %d", prec(ops), *valp);
1032	return (lt);
1033}
1034
1035/*
1036 * Table-driven evaluation of binary operators.
1037 */
1038static Linetype
1039eval_table(const struct ops *ops, long *valp, const char **cpp)
1040{
1041	const struct op *op;
1042	const char *cp;
1043	long val;
1044	Linetype lt, rt;
1045
1046	debug("eval%d", prec(ops));
1047	cp = *cpp;
1048	lt = ops->inner(ops+1, valp, &cp);
1049	if (lt == LT_ERROR)
1050		return (LT_ERROR);
1051	for (;;) {
1052		cp = skipcomment(cp);
1053		for (op = ops->op; op->str != NULL; op++)
1054			if (strncmp(cp, op->str, strlen(op->str)) == 0)
1055				break;
1056		if (op->str == NULL)
1057			break;
1058		cp += strlen(op->str);
1059		debug("eval%d %s", prec(ops), op->str);
1060		rt = ops->inner(ops+1, &val, &cp);
1061		if (rt == LT_ERROR)
1062			return (LT_ERROR);
1063		lt = op->fn(valp, lt, *valp, rt, val);
1064	}
1065
1066	*cpp = cp;
1067	debug("eval%d = %d", prec(ops), *valp);
1068	debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1069	return (lt);
1070}
1071
1072/*
1073 * Evaluate the expression on a #if or #elif line. If we can work out
1074 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1075 * return just a generic LT_IF.
1076 */
1077static Linetype
1078ifeval(const char **cpp)
1079{
1080	Linetype ret;
1081	long val = 0;
1082
1083	debug("eval %s", *cpp);
1084	constexpr = killconsts ? false : true;
1085	ret = eval_table(eval_ops, &val, cpp);
1086	debug("eval = %d", val);
1087	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1088}
1089
1090/*
1091 * Read a line and examine its initial part to determine if it is a
1092 * preprocessor directive. Returns NULL on EOF, or a pointer to a
1093 * preprocessor directive name, or a pointer to the zero byte at the
1094 * end of the line.
1095 */
1096static const char *
1097skiphash(void)
1098{
1099	const char *cp;
1100
1101	linenum++;
1102	if (fgets(tline, MAXLINE, input) == NULL) {
1103		if (ferror(input))
1104			err(2, "can't read %s", filename);
1105		else
1106			return (NULL);
1107	}
1108	cp = skipcomment(tline);
1109	if (linestate == LS_START && *cp == '#') {
1110		linestate = LS_HASH;
1111		return (skipcomment(cp + 1));
1112	} else if (*cp == '\0') {
1113		return (cp);
1114	} else {
1115		return (skipline(cp));
1116	}
1117}
1118
1119/*
1120 * Mark a line dirty and consume the rest of it, keeping track of the
1121 * lexical state.
1122 */
1123static const char *
1124skipline(const char *cp)
1125{
1126	if (*cp != '\0')
1127		linestate = LS_DIRTY;
1128	while (*cp != '\0')
1129		cp = skipcomment(cp + 1);
1130	return (cp);
1131}
1132
1133/*
1134 * Skip over comments, strings, and character literals and stop at the
1135 * next character position that is not whitespace. Between calls we keep
1136 * the comment state in the global variable incomment, and we also adjust
1137 * the global variable linestate when we see a newline.
1138 * XXX: doesn't cope with the buffer splitting inside a state transition.
1139 */
1140static const char *
1141skipcomment(const char *cp)
1142{
1143	if (text || ignoring[depth]) {
1144		for (; isspace((unsigned char)*cp); cp++)
1145			if (*cp == '\n')
1146				linestate = LS_START;
1147		return (cp);
1148	}
1149	while (*cp != '\0')
1150		/* don't reset to LS_START after a line continuation */
1151		if (strncmp(cp, "\\\r\n", 3) == 0)
1152			cp += 3;
1153		else if (strncmp(cp, "\\\n", 2) == 0)
1154			cp += 2;
1155		else switch (incomment) {
1156		case NO_COMMENT:
1157			if (strncmp(cp, "/\\\r\n", 4) == 0) {
1158				incomment = STARTING_COMMENT;
1159				cp += 4;
1160			} else if (strncmp(cp, "/\\\n", 3) == 0) {
1161				incomment = STARTING_COMMENT;
1162				cp += 3;
1163			} else if (strncmp(cp, "/*", 2) == 0) {
1164				incomment = C_COMMENT;
1165				cp += 2;
1166			} else if (strncmp(cp, "//", 2) == 0) {
1167				incomment = CXX_COMMENT;
1168				cp += 2;
1169			} else if (strncmp(cp, "\'", 1) == 0) {
1170				incomment = CHAR_LITERAL;
1171				linestate = LS_DIRTY;
1172				cp += 1;
1173			} else if (strncmp(cp, "\"", 1) == 0) {
1174				incomment = STRING_LITERAL;
1175				linestate = LS_DIRTY;
1176				cp += 1;
1177			} else if (strncmp(cp, "\n", 1) == 0) {
1178				linestate = LS_START;
1179				cp += 1;
1180			} else if (strchr(" \r\t", *cp) != NULL) {
1181				cp += 1;
1182			} else
1183				return (cp);
1184			continue;
1185		case CXX_COMMENT:
1186			if (strncmp(cp, "\n", 1) == 0) {
1187				incomment = NO_COMMENT;
1188				linestate = LS_START;
1189			}
1190			cp += 1;
1191			continue;
1192		case CHAR_LITERAL:
1193		case STRING_LITERAL:
1194			if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1195			    (incomment == STRING_LITERAL && cp[0] == '\"')) {
1196				incomment = NO_COMMENT;
1197				cp += 1;
1198			} else if (cp[0] == '\\') {
1199				if (cp[1] == '\0')
1200					cp += 1;
1201				else
1202					cp += 2;
1203			} else if (strncmp(cp, "\n", 1) == 0) {
1204				if (incomment == CHAR_LITERAL)
1205					error("unterminated char literal");
1206				else
1207					error("unterminated string literal");
1208			} else
1209				cp += 1;
1210			continue;
1211		case C_COMMENT:
1212			if (strncmp(cp, "*\\\r\n", 4) == 0) {
1213				incomment = FINISHING_COMMENT;
1214				cp += 4;
1215			} else if (strncmp(cp, "*\\\n", 3) == 0) {
1216				incomment = FINISHING_COMMENT;
1217				cp += 3;
1218			} else if (strncmp(cp, "*/", 2) == 0) {
1219				incomment = NO_COMMENT;
1220				cp += 2;
1221			} else
1222				cp += 1;
1223			continue;
1224		case STARTING_COMMENT:
1225			if (*cp == '*') {
1226				incomment = C_COMMENT;
1227				cp += 1;
1228			} else if (*cp == '/') {
1229				incomment = CXX_COMMENT;
1230				cp += 1;
1231			} else {
1232				incomment = NO_COMMENT;
1233				linestate = LS_DIRTY;
1234			}
1235			continue;
1236		case FINISHING_COMMENT:
1237			if (*cp == '/') {
1238				incomment = NO_COMMENT;
1239				cp += 1;
1240			} else
1241				incomment = C_COMMENT;
1242			continue;
1243		default:
1244			abort(); /* bug */
1245		}
1246	return (cp);
1247}
1248
1249/*
1250 * Skip macro arguments.
1251 */
1252static const char *
1253skipargs(const char *cp)
1254{
1255	const char *ocp = cp;
1256	int level = 0;
1257	cp = skipcomment(cp);
1258	if (*cp != '(')
1259		return (cp);
1260	do {
1261		if (*cp == '(')
1262			level++;
1263		if (*cp == ')')
1264			level--;
1265		cp = skipcomment(cp+1);
1266	} while (level != 0 && *cp != '\0');
1267	if (level == 0)
1268		return (cp);
1269	else
1270	/* Rewind and re-detect the syntax error later. */
1271		return (ocp);
1272}
1273
1274/*
1275 * Skip over an identifier.
1276 */
1277static const char *
1278skipsym(const char *cp)
1279{
1280	while (!endsym(*cp))
1281		++cp;
1282	return (cp);
1283}
1284
1285/*
1286 * Skip whitespace and take a copy of any following identifier.
1287 */
1288static const char *
1289getsym(const char **cpp)
1290{
1291	const char *cp = *cpp, *sym;
1292
1293	cp = skipcomment(cp);
1294	cp = skipsym(sym = cp);
1295	if (cp == sym)
1296		return NULL;
1297	*cpp = cp;
1298	return (xstrdup(sym, cp));
1299}
1300
1301/*
1302 * Check that s (a symbol) matches the start of t, and that the
1303 * following character in t is not a symbol character. Returns a
1304 * pointer to the following character in t if there is a match,
1305 * otherwise NULL.
1306 */
1307static const char *
1308matchsym(const char *s, const char *t)
1309{
1310	while (*s != '\0' && *t != '\0')
1311		if (*s != *t)
1312			return (NULL);
1313		else
1314			++s, ++t;
1315	if (*s == '\0' && endsym(*t))
1316		return(t);
1317	else
1318		return(NULL);
1319}
1320
1321/*
1322 * Look for the symbol in the symbol table. If it is found, we return
1323 * the symbol table index, else we return -1.
1324 */
1325static int
1326findsym(const char **strp)
1327{
1328	const char *str;
1329	int symind;
1330
1331	str = *strp;
1332	*strp = skipsym(str);
1333	if (symlist) {
1334		if (*strp == str)
1335			return (-1);
1336		if (symdepth && firstsym)
1337			printf("%s%3d", zerosyms ? "" : "\n", depth);
1338		firstsym = zerosyms = false;
1339		printf("%s%.*s%s",
1340		       symdepth ? " " : "",
1341		       (int)(*strp-str), str,
1342		       symdepth ? "" : "\n");
1343		/* we don't care about the value of the symbol */
1344		return (0);
1345	}
1346	for (symind = 0; symind < nsyms; ++symind) {
1347		if (matchsym(symname[symind], str) != NULL) {
1348			debugsym("findsym", symind);
1349			return (symind);
1350		}
1351	}
1352	return (-1);
1353}
1354
1355/*
1356 * Resolve indirect symbol values to their final definitions.
1357 */
1358static void
1359indirectsym(void)
1360{
1361	const char *cp;
1362	int changed, sym, ind;
1363
1364	do {
1365		changed = 0;
1366		for (sym = 0; sym < nsyms; ++sym) {
1367			if (value[sym] == NULL)
1368				continue;
1369			cp = value[sym];
1370			ind = findsym(&cp);
1371			if (ind == -1 || ind == sym ||
1372			    *cp != '\0' ||
1373			    value[ind] == NULL ||
1374			    value[ind] == value[sym])
1375				continue;
1376			debugsym("indir...", sym);
1377			value[sym] = value[ind];
1378			debugsym("...ectsym", sym);
1379			changed++;
1380		}
1381	} while (changed);
1382}
1383
1384/*
1385 * Add a symbol to the symbol table, specified with the format sym=val
1386 */
1387static void
1388addsym1(bool ignorethis, bool definethis, char *symval)
1389{
1390	const char *sym, *val;
1391
1392	sym = symval;
1393	val = skipsym(sym);
1394	if (definethis && *val == '=') {
1395		symval[val - sym] = '\0';
1396		val = val + 1;
1397	} else if (*val == '\0') {
1398		val = definethis ? "1" : NULL;
1399	} else {
1400		usage();
1401	}
1402	addsym2(ignorethis, sym, val);
1403}
1404
1405/*
1406 * Add a symbol to the symbol table.
1407 */
1408static void
1409addsym2(bool ignorethis, const char *sym, const char *val)
1410{
1411	const char *cp = sym;
1412	int symind;
1413
1414	symind = findsym(&cp);
1415	if (symind < 0) {
1416		if (nsyms >= MAXSYMS)
1417			errx(2, "too many symbols");
1418		symind = nsyms++;
1419	}
1420	ignore[symind] = ignorethis;
1421	symname[symind] = sym;
1422	value[symind] = val;
1423	debugsym("addsym", symind);
1424}
1425
1426static void
1427debugsym(const char *why, int symind)
1428{
1429	debug("%s %s%c%s", why, symname[symind],
1430	    value[symind] ? '=' : ' ',
1431	    value[symind] ? value[symind] : "undef");
1432}
1433
1434/*
1435 * Add symbols to the symbol table from a file containing
1436 * #define and #undef preprocessor directives.
1437 */
1438static void
1439defundefile(const char *fn)
1440{
1441	filename = fn;
1442	input = fopen(fn, "rb");
1443	if (input == NULL)
1444		err(2, "can't open %s", fn);
1445	linenum = 0;
1446	while (defundef())
1447		;
1448	if (ferror(input))
1449		err(2, "can't read %s", filename);
1450	else
1451		fclose(input);
1452	if (incomment)
1453		error("EOF in comment");
1454}
1455
1456/*
1457 * Read and process one #define or #undef directive
1458 */
1459static bool
1460defundef(void)
1461{
1462	const char *cp, *kw, *sym, *val, *end;
1463
1464	cp = skiphash();
1465	if (cp == NULL)
1466		return (false);
1467	if (*cp == '\0')
1468		goto done;
1469	/* strip trailing whitespace, and do a fairly rough check to
1470	   avoid unsupported multi-line preprocessor directives */
1471	end = cp + strlen(cp);
1472	while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1473		--end;
1474	if (end > tline && end[-1] == '\\')
1475		Eioccc();
1476
1477	kw = cp;
1478	if ((cp = matchsym("define", kw)) != NULL) {
1479		sym = getsym(&cp);
1480		if (sym == NULL)
1481			error("missing macro name in #define");
1482		if (*cp == '(') {
1483			val = "1";
1484		} else {
1485			cp = skipcomment(cp);
1486			val = (cp < end) ? xstrdup(cp, end) : "";
1487		}
1488		debug("#define");
1489		addsym2(false, sym, val);
1490	} else if ((cp = matchsym("undef", kw)) != NULL) {
1491		sym = getsym(&cp);
1492		if (sym == NULL)
1493			error("missing macro name in #undef");
1494		cp = skipcomment(cp);
1495		debug("#undef");
1496		addsym2(false, sym, NULL);
1497	} else {
1498		error("unrecognized preprocessor directive");
1499	}
1500	skipline(cp);
1501done:
1502	debug("parser line %d state %s comment %s line", linenum,
1503	    comment_name[incomment], linestate_name[linestate]);
1504	return (true);
1505}
1506
1507/*
1508 * Concatenate two strings into new memory, checking for failure.
1509 */
1510static char *
1511astrcat(const char *s1, const char *s2)
1512{
1513	char *s;
1514	int len;
1515	size_t size;
1516
1517	len = snprintf(NULL, 0, "%s%s", s1, s2);
1518	if (len < 0)
1519		err(2, "snprintf");
1520	size = (size_t)len + 1;
1521	s = (char *)malloc(size);
1522	if (s == NULL)
1523		err(2, "malloc");
1524	snprintf(s, size, "%s%s", s1, s2);
1525	return (s);
1526}
1527
1528/*
1529 * Duplicate a segment of a string, checking for failure.
1530 */
1531static const char *
1532xstrdup(const char *start, const char *end)
1533{
1534	size_t n;
1535	char *s;
1536
1537	if (end < start) abort(); /* bug */
1538	n = (size_t)(end - start) + 1;
1539	s = malloc(n);
1540	if (s == NULL)
1541		err(2, "malloc");
1542	snprintf(s, n, "%s", start);
1543	return (s);
1544}
1545
1546/*
1547 * Diagnostics.
1548 */
1549static void
1550debug(const char *msg, ...)
1551{
1552	va_list ap;
1553
1554	if (debugging) {
1555		va_start(ap, msg);
1556		vwarnx(msg, ap);
1557		va_end(ap);
1558	}
1559}
1560
1561static void
1562error(const char *msg)
1563{
1564	if (depth == 0)
1565		warnx("%s: %d: %s", filename, linenum, msg);
1566	else
1567		warnx("%s: %d: %s (#if line %d depth %d)",
1568		    filename, linenum, msg, stifline[depth], depth);
1569	closeio();
1570	errx(2, "output may be truncated");
1571}
1572