1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
6 * Copyright (c) 1980, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#include <sys/param.h>
39#include <sys/capsicum.h>
40#include <capsicum_helpers.h>
41#include <err.h>
42#include <errno.h>
43#include <fcntl.h>
44#include <unistd.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <ctype.h>
49#include "indent_globs.h"
50#include "indent_codes.h"
51#include "indent.h"
52
53/* Globals */
54FILE	*input, *output;
55char	*labbuf, *s_lab, *e_lab, *l_lab;
56char	*codebuf, *s_code, *e_code, *l_code;
57char	*combuf, *s_com, *e_com, *l_com;
58char	*tokenbuf, *s_token, *e_token, *l_token;
59char	*in_buffer, *in_buffer_limit;
60char	*buf_ptr, *buf_end;
61
62char	 sc_buf[sc_size];
63
64char	*save_com, *sc_end;
65char	*bp_save;
66char	*be_save;
67
68struct options		opt;
69int	 line_no;
70
71struct parser_state	ps;
72int	 ifdef_level;
73struct parser_state	state_stack[5];
74struct parser_state	match_state[5];
75
76
77static void bakcopy(void);
78static void indent_declaration(int, int);
79
80const char *in_name = "Standard Input";	/* will always point to name of input
81					 * file */
82const char *out_name = "Standard Output";	/* will always point to name
83						 * of output file */
84const char *simple_backup_suffix = ".BAK";	/* Suffix to use for backup
85						 * files */
86char        bakfile[MAXPATHLEN] = "";
87
88int
89main(int argc, char **argv)
90{
91    cap_rights_t rights;
92
93    int         dec_ind;	/* current indentation for declarations */
94    int         di_stack[20];	/* a stack of structure indentation levels */
95    int         force_nl;	/* when true, code must be broken */
96    int         hd_type = 0;	/* used to store type of stmt for if (...),
97				 * for (...), etc */
98    int		i;		/* local loop counter */
99    int         scase;		/* set to true when we see a case, so we will
100				 * know what to do with the following colon */
101    int         sp_sw;		/* when true, we are in the expression of
102				 * if(...), while(...), etc. */
103    int         squest;		/* when this is positive, we have seen a ?
104				 * without the matching : in a <c>?<s>:<s>
105				 * construct */
106    const char *t_ptr;		/* used for copying tokens */
107    int		tabs_to_var;	/* true if using tabs to indent to var name */
108    int         type_code;	/* the type of token, returned by lexi */
109
110    int         last_else = 0;	/* true iff last keyword was an else */
111    const char *profile_name = NULL;
112    const char *envval = NULL;
113    struct parser_state transient_state; /* a copy for lookup */
114
115    /*-----------------------------------------------*\
116    |		      INITIALIZATION		      |
117    \*-----------------------------------------------*/
118
119    found_err = 0;
120
121    ps.p_stack[0] = stmt;	/* this is the parser's stack */
122    ps.last_nl = true;		/* this is true if the last thing scanned was
123				 * a newline */
124    ps.last_token = semicolon;
125    combuf = (char *) malloc(bufsize);
126    if (combuf == NULL)
127	err(1, NULL);
128    labbuf = (char *) malloc(bufsize);
129    if (labbuf == NULL)
130	err(1, NULL);
131    codebuf = (char *) malloc(bufsize);
132    if (codebuf == NULL)
133	err(1, NULL);
134    tokenbuf = (char *) malloc(bufsize);
135    if (tokenbuf == NULL)
136	err(1, NULL);
137    alloc_typenames();
138    init_constant_tt();
139    l_com = combuf + bufsize - 5;
140    l_lab = labbuf + bufsize - 5;
141    l_code = codebuf + bufsize - 5;
142    l_token = tokenbuf + bufsize - 5;
143    combuf[0] = codebuf[0] = labbuf[0] = ' ';	/* set up code, label, and
144						 * comment buffers */
145    combuf[1] = codebuf[1] = labbuf[1] = '\0';
146    opt.else_if = 1;		/* Default else-if special processing to on */
147    s_lab = e_lab = labbuf + 1;
148    s_code = e_code = codebuf + 1;
149    s_com = e_com = combuf + 1;
150    s_token = e_token = tokenbuf + 1;
151
152    in_buffer = (char *) malloc(10);
153    if (in_buffer == NULL)
154	err(1, NULL);
155    in_buffer_limit = in_buffer + 8;
156    buf_ptr = buf_end = in_buffer;
157    line_no = 1;
158    had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
159    sp_sw = force_nl = false;
160    ps.in_or_st = false;
161    ps.bl_line = true;
162    dec_ind = 0;
163    di_stack[ps.dec_nest = 0] = 0;
164    ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
165
166    scase = ps.pcase = false;
167    squest = 0;
168    sc_end = NULL;
169    bp_save = NULL;
170    be_save = NULL;
171
172    output = NULL;
173    tabs_to_var = 0;
174
175    envval = getenv("SIMPLE_BACKUP_SUFFIX");
176    if (envval)
177        simple_backup_suffix = envval;
178
179    /*--------------------------------------------------*\
180    |   		COMMAND LINE SCAN		 |
181    \*--------------------------------------------------*/
182
183#ifdef undef
184    max_col = 78;		/* -l78 */
185    lineup_to_parens = 1;	/* -lp */
186    lineup_to_parens_always = 0;	/* -nlpl */
187    ps.ljust_decl = 0;		/* -ndj */
188    ps.com_ind = 33;		/* -c33 */
189    star_comment_cont = 1;	/* -sc */
190    ps.ind_size = 8;		/* -i8 */
191    verbose = 0;
192    ps.decl_indent = 16;	/* -di16 */
193    ps.local_decl_indent = -1;	/* if this is not set to some nonnegative value
194				 * by an arg, we will set this equal to
195				 * ps.decl_ind */
196    ps.indent_parameters = 1;	/* -ip */
197    ps.decl_com_ind = 0;	/* if this is not set to some positive value
198				 * by an arg, we will set this equal to
199				 * ps.com_ind */
200    btype_2 = 1;		/* -br */
201    cuddle_else = 1;		/* -ce */
202    ps.unindent_displace = 0;	/* -d0 */
203    ps.case_indent = 0;		/* -cli0 */
204    format_block_comments = 1;	/* -fcb */
205    format_col1_comments = 1;	/* -fc1 */
206    procnames_start_line = 1;	/* -psl */
207    proc_calls_space = 0;	/* -npcs */
208    comment_delimiter_on_blankline = 1;	/* -cdb */
209    ps.leave_comma = 1;		/* -nbc */
210#endif
211
212    for (i = 1; i < argc; ++i)
213	if (strcmp(argv[i], "-npro") == 0)
214	    break;
215	else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0')
216	    profile_name = argv[i];	/* non-empty -P (set profile) */
217    set_defaults();
218    if (i >= argc)
219	set_profile(profile_name);
220
221    for (i = 1; i < argc; ++i) {
222
223	/*
224	 * look thru args (if any) for changes to defaults
225	 */
226	if (argv[i][0] != '-') {/* no flag on parameter */
227	    if (input == NULL) {	/* we must have the input file */
228		in_name = argv[i];	/* remember name of input file */
229		input = fopen(in_name, "r");
230		if (input == NULL)	/* check for open error */
231			err(1, "%s", in_name);
232		continue;
233	    }
234	    else if (output == NULL) {	/* we have the output file */
235		out_name = argv[i];	/* remember name of output file */
236		if (strcmp(in_name, out_name) == 0) {	/* attempt to overwrite
237							 * the file */
238		    errx(1, "input and output files must be different");
239		}
240		output = fopen(out_name, "w");
241		if (output == NULL)	/* check for create error */
242			err(1, "%s", out_name);
243		continue;
244	    }
245	    errx(1, "unknown parameter: %s", argv[i]);
246	}
247	else
248	    set_option(argv[i]);
249    }				/* end of for */
250    if (input == NULL)
251	input = stdin;
252    if (output == NULL) {
253	if (input == stdin)
254	    output = stdout;
255	else {
256	    out_name = in_name;
257	    bakcopy();
258	}
259    }
260
261    /* Restrict input/output descriptors and enter Capsicum sandbox. */
262    cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
263    if (caph_rights_limit(fileno(output), &rights) < 0)
264	err(EXIT_FAILURE, "unable to limit rights for %s", out_name);
265    cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
266    if (caph_rights_limit(fileno(input), &rights) < 0)
267	err(EXIT_FAILURE, "unable to limit rights for %s", in_name);
268    if (caph_enter() < 0)
269	err(EXIT_FAILURE, "unable to enter capability mode");
270
271    if (opt.com_ind <= 1)
272	opt.com_ind = 2;	/* don't put normal comments before column 2 */
273    if (opt.block_comment_max_col <= 0)
274	opt.block_comment_max_col = opt.max_col;
275    if (opt.local_decl_indent < 0) /* if not specified by user, set this */
276	opt.local_decl_indent = opt.decl_indent;
277    if (opt.decl_com_ind <= 0)	/* if not specified by user, set this */
278	opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind;
279    if (opt.continuation_indent == 0)
280	opt.continuation_indent = opt.ind_size;
281    fill_buffer();		/* get first batch of stuff into input buffer */
282
283    parse(semicolon);
284    {
285	char *p = buf_ptr;
286	int col = 1;
287
288	while (1) {
289	    if (*p == ' ')
290		col++;
291	    else if (*p == '\t')
292		col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1;
293	    else
294		break;
295	    p++;
296	}
297	if (col > opt.ind_size)
298	    ps.ind_level = ps.i_l_follow = col / opt.ind_size;
299    }
300
301    /*
302     * START OF MAIN LOOP
303     */
304
305    while (1) {			/* this is the main loop.  it will go until we
306				 * reach eof */
307	int comment_buffered = false;
308
309	type_code = lexi(&ps);	/* lexi reads one token.  The actual
310				 * characters read are stored in "token". lexi
311				 * returns a code indicating the type of token */
312
313	/*
314	 * The following code moves newlines and comments following an if (),
315	 * while (), else, etc. up to the start of the following stmt to
316	 * a buffer. This allows proper handling of both kinds of brace
317	 * placement (-br, -bl) and cuddling "else" (-ce).
318	 */
319
320	while (ps.search_brace) {
321	    switch (type_code) {
322	    case newline:
323		if (sc_end == NULL) {
324		    save_com = sc_buf;
325		    save_com[0] = save_com[1] = ' ';
326		    sc_end = &save_com[2];
327		}
328		*sc_end++ = '\n';
329		/*
330		 * We may have inherited a force_nl == true from the previous
331		 * token (like a semicolon). But once we know that a newline
332		 * has been scanned in this loop, force_nl should be false.
333		 *
334		 * However, the force_nl == true must be preserved if newline
335		 * is never scanned in this loop, so this assignment cannot be
336		 * done earlier.
337		 */
338		force_nl = false;
339	    case form_feed:
340		break;
341	    case comment:
342		if (sc_end == NULL) {
343		    /*
344		     * Copy everything from the start of the line, because
345		     * pr_comment() will use that to calculate original
346		     * indentation of a boxed comment.
347		     */
348		    memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4);
349		    save_com = sc_buf + (buf_ptr - in_buffer - 4);
350		    save_com[0] = save_com[1] = ' ';
351		    sc_end = &save_com[2];
352		}
353		comment_buffered = true;
354		*sc_end++ = '/';	/* copy in start of comment */
355		*sc_end++ = '*';
356		for (;;) {	/* loop until we get to the end of the comment */
357		    *sc_end = *buf_ptr++;
358		    if (buf_ptr >= buf_end)
359			fill_buffer();
360		    if (*sc_end++ == '*' && *buf_ptr == '/')
361			break;	/* we are at end of comment */
362		    if (sc_end >= &save_com[sc_size]) {	/* check for temp buffer
363							 * overflow */
364			diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
365			fflush(output);
366			exit(1);
367		    }
368		}
369		*sc_end++ = '/';	/* add ending slash */
370		if (++buf_ptr >= buf_end)	/* get past / in buffer */
371		    fill_buffer();
372		break;
373	    case lbrace:
374		/*
375		 * Put KNF-style lbraces before the buffered up tokens and
376		 * jump out of this loop in order to avoid copying the token
377		 * again under the default case of the switch below.
378		 */
379		if (sc_end != NULL && opt.btype_2) {
380		    save_com[0] = '{';
381		    /*
382		     * Originally the lbrace may have been alone on its own
383		     * line, but it will be moved into "the else's line", so
384		     * if there was a newline resulting from the "{" before,
385		     * it must be scanned now and ignored.
386		     */
387		    while (isspace((unsigned char)*buf_ptr)) {
388			if (++buf_ptr >= buf_end)
389			    fill_buffer();
390			if (*buf_ptr == '\n')
391			    break;
392		    }
393		    goto sw_buffer;
394		}
395		/* FALLTHROUGH */
396	    default:		/* it is the start of a normal statement */
397		{
398		    int remove_newlines;
399
400		    remove_newlines =
401			/* "} else" */
402			(type_code == sp_nparen && *token == 'e' &&
403			    e_code != s_code && e_code[-1] == '}')
404			/* "else if" */
405			|| (type_code == sp_paren && *token == 'i' &&
406			    last_else && opt.else_if);
407		    if (remove_newlines)
408			force_nl = false;
409		    if (sc_end == NULL) {	/* ignore buffering if
410						 * comment wasn't saved up */
411			ps.search_brace = false;
412			goto check_type;
413		    }
414		    while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) {
415			sc_end--;
416		    }
417		    if (opt.swallow_optional_blanklines ||
418			(!comment_buffered && remove_newlines)) {
419			force_nl = !remove_newlines;
420			while (sc_end > save_com && sc_end[-1] == '\n') {
421			    sc_end--;
422			}
423		    }
424		    if (force_nl) {	/* if we should insert a nl here, put
425					 * it into the buffer */
426			force_nl = false;
427			--line_no;	/* this will be re-increased when the
428					 * newline is read from the buffer */
429			*sc_end++ = '\n';
430			*sc_end++ = ' ';
431			if (opt.verbose) /* print error msg if the line was
432					 * not already broken */
433			    diag2(0, "Line broken");
434		    }
435		    for (t_ptr = token; *t_ptr; ++t_ptr)
436			*sc_end++ = *t_ptr;
437
438	    sw_buffer:
439		    ps.search_brace = false;	/* stop looking for start of
440						 * stmt */
441		    bp_save = buf_ptr;	/* save current input buffer */
442		    be_save = buf_end;
443		    buf_ptr = save_com;	/* fix so that subsequent calls to
444					 * lexi will take tokens out of
445					 * save_com */
446		    *sc_end++ = ' ';/* add trailing blank, just in case */
447		    buf_end = sc_end;
448		    sc_end = NULL;
449		    break;
450		}
451	    }			/* end of switch */
452	    /*
453	     * We must make this check, just in case there was an unexpected
454	     * EOF.
455	     */
456	    if (type_code != 0) {
457		/*
458		 * The only intended purpose of calling lexi() below is to
459		 * categorize the next token in order to decide whether to
460		 * continue buffering forthcoming tokens. Once the buffering
461		 * is over, lexi() will be called again elsewhere on all of
462		 * the tokens - this time for normal processing.
463		 *
464		 * Calling it for this purpose is a bug, because lexi() also
465		 * changes the parser state and discards leading whitespace,
466		 * which is needed mostly for comment-related considerations.
467		 *
468		 * Work around the former problem by giving lexi() a copy of
469		 * the current parser state and discard it if the call turned
470		 * out to be just a look ahead.
471		 *
472		 * Work around the latter problem by copying all whitespace
473		 * characters into the buffer so that the later lexi() call
474		 * will read them.
475		 */
476		if (sc_end != NULL) {
477		    while (*buf_ptr == ' ' || *buf_ptr == '\t') {
478			*sc_end++ = *buf_ptr++;
479			if (sc_end >= &save_com[sc_size]) {
480			    errx(1, "input too long");
481			}
482		    }
483		    if (buf_ptr >= buf_end) {
484			fill_buffer();
485		    }
486		}
487		transient_state = ps;
488		type_code = lexi(&transient_state);	/* read another token */
489		if (type_code != newline && type_code != form_feed &&
490		    type_code != comment && !transient_state.search_brace) {
491		    ps = transient_state;
492		}
493	    }
494	}			/* end of while (search_brace) */
495	last_else = 0;
496check_type:
497	if (type_code == 0) {	/* we got eof */
498	    if (s_lab != e_lab || s_code != e_code
499		    || s_com != e_com)	/* must dump end of line */
500		dump_line();
501	    if (ps.tos > 1)	/* check for balanced braces */
502		diag2(1, "Stuff missing from end of file");
503
504	    if (opt.verbose) {
505		printf("There were %d output lines and %d comments\n",
506		       ps.out_lines, ps.out_coms);
507		printf("(Lines with comments)/(Lines with code): %6.3f\n",
508		       (1.0 * ps.com_lines) / code_lines);
509	    }
510	    fflush(output);
511	    exit(found_err);
512	}
513	if (
514		(type_code != comment) &&
515		(type_code != newline) &&
516		(type_code != preesc) &&
517		(type_code != form_feed)) {
518	    if (force_nl &&
519		    (type_code != semicolon) &&
520		    (type_code != lbrace || !opt.btype_2)) {
521		/* we should force a broken line here */
522		if (opt.verbose)
523		    diag2(0, "Line broken");
524		dump_line();
525		ps.want_blank = false;	/* dont insert blank at line start */
526		force_nl = false;
527	    }
528	    ps.in_stmt = true;	/* turn on flag which causes an extra level of
529				 * indentation. this is turned off by a ; or
530				 * '}' */
531	    if (s_com != e_com) {	/* the turkey has embedded a comment
532					 * in a line. fix it */
533		int len = e_com - s_com;
534
535		CHECK_SIZE_CODE(len + 3);
536		*e_code++ = ' ';
537		memcpy(e_code, s_com, len);
538		e_code += len;
539		*e_code++ = ' ';
540		*e_code = '\0';	/* null terminate code sect */
541		ps.want_blank = false;
542		e_com = s_com;
543	    }
544	}
545	else if (type_code != comment)	/* preserve force_nl thru a comment */
546	    force_nl = false;	/* cancel forced newline after newline, form
547				 * feed, etc */
548
549
550
551	/*-----------------------------------------------------*\
552	|	   do switch on type of token scanned		|
553	\*-----------------------------------------------------*/
554	CHECK_SIZE_CODE(3);	/* maximum number of increments of e_code
555				 * before the next CHECK_SIZE_CODE or
556				 * dump_line() is 2. After that there's the
557				 * final increment for the null character. */
558	switch (type_code) {	/* now, decide what to do with the token */
559
560	case form_feed:	/* found a form feed in line */
561	    ps.use_ff = true;	/* a form feed is treated much like a newline */
562	    dump_line();
563	    ps.want_blank = false;
564	    break;
565
566	case newline:
567	    if (ps.last_token != comma || ps.p_l_follow > 0
568		    || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
569		dump_line();
570		ps.want_blank = false;
571	    }
572	    ++line_no;		/* keep track of input line number */
573	    break;
574
575	case lparen:		/* got a '(' or '[' */
576	    /* count parens to make Healy happy */
577	    if (++ps.p_l_follow == nitems(ps.paren_indents)) {
578		diag3(0, "Reached internal limit of %d unclosed parens",
579		    nitems(ps.paren_indents));
580		ps.p_l_follow--;
581	    }
582	    if (*token == '[')
583		/* not a function pointer declaration or a function call */;
584	    else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
585		ps.procname[0] == '\0' && ps.paren_level == 0) {
586		/* function pointer declarations */
587		indent_declaration(dec_ind, tabs_to_var);
588		ps.dumped_decl_indent = true;
589	    }
590	    else if (ps.want_blank &&
591		    ((ps.last_token != ident && ps.last_token != funcname) ||
592		    opt.proc_calls_space ||
593		    /* offsetof (1) is never allowed a space; sizeof (2) gets
594		     * one iff -bs; all other keywords (>2) always get a space
595		     * before lparen */
596			ps.keyword + opt.Bill_Shannon > 2))
597		*e_code++ = ' ';
598	    ps.want_blank = false;
599	    *e_code++ = token[0];
600	    ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1;
601	    if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent
602		    && ps.paren_indents[0] < 2 * opt.ind_size)
603		ps.paren_indents[0] = 2 * opt.ind_size;
604	    if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
605		/*
606		 * this is a kluge to make sure that declarations will be
607		 * aligned right if proc decl has an explicit type on it, i.e.
608		 * "int a(x) {..."
609		 */
610		parse(semicolon);	/* I said this was a kluge... */
611		ps.in_or_st = false;	/* turn off flag for structure decl or
612					 * initialization */
613	    }
614	    /* parenthesized type following sizeof or offsetof is not a cast */
615	    if (ps.keyword == 1 || ps.keyword == 2)
616		ps.not_cast_mask |= 1 << ps.p_l_follow;
617	    break;
618
619	case rparen:		/* got a ')' or ']' */
620	    if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) {
621		ps.last_u_d = true;
622		ps.cast_mask &= (1 << ps.p_l_follow) - 1;
623		ps.want_blank = opt.space_after_cast;
624	    } else
625		ps.want_blank = true;
626	    ps.not_cast_mask &= (1 << ps.p_l_follow) - 1;
627	    if (--ps.p_l_follow < 0) {
628		ps.p_l_follow = 0;
629		diag3(0, "Extra %c", *token);
630	    }
631	    if (e_code == s_code)	/* if the paren starts the line */
632		ps.paren_level = ps.p_l_follow;	/* then indent it */
633
634	    *e_code++ = token[0];
635
636	    if (sp_sw && (ps.p_l_follow == 0)) {	/* check for end of if
637							 * (...), or some such */
638		sp_sw = false;
639		force_nl = true;/* must force newline after if */
640		ps.last_u_d = true;	/* inform lexi that a following
641					 * operator is unary */
642		ps.in_stmt = false;	/* dont use stmt continuation
643					 * indentation */
644
645		parse(hd_type);	/* let parser worry about if, or whatever */
646	    }
647	    ps.search_brace = opt.btype_2; /* this should ensure that
648					 * constructs such as main(){...}
649					 * and int[]{...} have their braces
650					 * put in the right place */
651	    break;
652
653	case unary_op:		/* this could be any unary operation */
654	    if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init &&
655		ps.procname[0] == '\0' && ps.paren_level == 0) {
656		/* pointer declarations */
657
658		/*
659		 * if this is a unary op in a declaration, we should indent
660		 * this token
661		 */
662		for (i = 0; token[i]; ++i)
663		    /* find length of token */;
664		indent_declaration(dec_ind - i, tabs_to_var);
665		ps.dumped_decl_indent = true;
666	    }
667	    else if (ps.want_blank)
668		*e_code++ = ' ';
669
670	    {
671		int len = e_token - s_token;
672
673		CHECK_SIZE_CODE(len);
674		memcpy(e_code, token, len);
675		e_code += len;
676	    }
677	    ps.want_blank = false;
678	    break;
679
680	case binary_op:	/* any binary operation */
681	    {
682		int len = e_token - s_token;
683
684		CHECK_SIZE_CODE(len + 1);
685		if (ps.want_blank)
686		    *e_code++ = ' ';
687		memcpy(e_code, token, len);
688		e_code += len;
689	    }
690	    ps.want_blank = true;
691	    break;
692
693	case postop:		/* got a trailing ++ or -- */
694	    *e_code++ = token[0];
695	    *e_code++ = token[1];
696	    ps.want_blank = true;
697	    break;
698
699	case question:		/* got a ? */
700	    squest++;		/* this will be used when a later colon
701				 * appears so we can distinguish the
702				 * <c>?<n>:<n> construct */
703	    if (ps.want_blank)
704		*e_code++ = ' ';
705	    *e_code++ = '?';
706	    ps.want_blank = true;
707	    break;
708
709	case casestmt:		/* got word 'case' or 'default' */
710	    scase = true;	/* so we can process the later colon properly */
711	    goto copy_id;
712
713	case colon:		/* got a ':' */
714	    if (squest > 0) {	/* it is part of the <c>?<n>: <n> construct */
715		--squest;
716		if (ps.want_blank)
717		    *e_code++ = ' ';
718		*e_code++ = ':';
719		ps.want_blank = true;
720		break;
721	    }
722	    if (ps.in_or_st) {
723		*e_code++ = ':';
724		ps.want_blank = false;
725		break;
726	    }
727	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
728				 * stmt */
729	    /*
730	     * turn everything so far into a label
731	     */
732	    {
733		int len = e_code - s_code;
734
735		CHECK_SIZE_LAB(len + 3);
736		memcpy(e_lab, s_code, len);
737		e_lab += len;
738		*e_lab++ = ':';
739		*e_lab = '\0';
740		e_code = s_code;
741	    }
742	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
743						 * dump_line to decide how to
744						 * indent the label. force_nl
745						 * will force a case n: to be
746						 * on a line by itself */
747	    scase = false;
748	    ps.want_blank = false;
749	    break;
750
751	case semicolon:	/* got a ';' */
752	    if (ps.dec_nest == 0)
753		ps.in_or_st = false;/* we are not in an initialization or
754				     * structure declaration */
755	    scase = false;	/* these will only need resetting in an error */
756	    squest = 0;
757	    if (ps.last_token == rparen)
758		ps.in_parameter_declaration = 0;
759	    ps.cast_mask = 0;
760	    ps.not_cast_mask = 0;
761	    ps.block_init = 0;
762	    ps.block_init_level = 0;
763	    ps.just_saw_decl--;
764
765	    if (ps.in_decl && s_code == e_code && !ps.block_init &&
766		!ps.dumped_decl_indent && ps.paren_level == 0) {
767		/* indent stray semicolons in declarations */
768		indent_declaration(dec_ind - 1, tabs_to_var);
769		ps.dumped_decl_indent = true;
770	    }
771
772	    ps.in_decl = (ps.dec_nest > 0);	/* if we were in a first level
773						 * structure declaration, we
774						 * arent any more */
775
776	    if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
777
778		/*
779		 * This should be true iff there were unbalanced parens in the
780		 * stmt.  It is a bit complicated, because the semicolon might
781		 * be in a for stmt
782		 */
783		diag2(1, "Unbalanced parens");
784		ps.p_l_follow = 0;
785		if (sp_sw) {	/* this is a check for an if, while, etc. with
786				 * unbalanced parens */
787		    sp_sw = false;
788		    parse(hd_type);	/* dont lose the if, or whatever */
789		}
790	    }
791	    *e_code++ = ';';
792	    ps.want_blank = true;
793	    ps.in_stmt = (ps.p_l_follow > 0);	/* we are no longer in the
794						 * middle of a stmt */
795
796	    if (!sp_sw) {	/* if not if for (;;) */
797		parse(semicolon);	/* let parser know about end of stmt */
798		force_nl = true;/* force newline after an end of stmt */
799	    }
800	    break;
801
802	case lbrace:		/* got a '{' */
803	    ps.in_stmt = false;	/* dont indent the {} */
804	    if (!ps.block_init)
805		force_nl = true;/* force other stuff on same line as '{' onto
806				 * new line */
807	    else if (ps.block_init_level <= 0)
808		ps.block_init_level = 1;
809	    else
810		ps.block_init_level++;
811
812	    if (s_code != e_code && !ps.block_init) {
813		if (!opt.btype_2) {
814		    dump_line();
815		    ps.want_blank = false;
816		}
817		else if (ps.in_parameter_declaration && !ps.in_or_st) {
818		    ps.i_l_follow = 0;
819		    if (opt.function_brace_split) { /* dump the line prior
820				 * to the brace ... */
821			dump_line();
822			ps.want_blank = false;
823		    } else	/* add a space between the decl and brace */
824			ps.want_blank = true;
825		}
826	    }
827	    if (ps.in_parameter_declaration)
828		prefix_blankline_requested = 0;
829
830	    if (ps.p_l_follow > 0) {	/* check for preceding unbalanced
831					 * parens */
832		diag2(1, "Unbalanced parens");
833		ps.p_l_follow = 0;
834		if (sp_sw) {	/* check for unclosed if, for, etc. */
835		    sp_sw = false;
836		    parse(hd_type);
837		    ps.ind_level = ps.i_l_follow;
838		}
839	    }
840	    if (s_code == e_code)
841		ps.ind_stmt = false;	/* dont put extra indentation on line
842					 * with '{' */
843	    if (ps.in_decl && ps.in_or_st) {	/* this is either a structure
844						 * declaration or an init */
845		di_stack[ps.dec_nest] = dec_ind;
846		if (++ps.dec_nest == nitems(di_stack)) {
847		    diag3(0, "Reached internal limit of %d struct levels",
848			nitems(di_stack));
849		    ps.dec_nest--;
850		}
851		/* ?		dec_ind = 0; */
852	    }
853	    else {
854		ps.decl_on_line = false;	/* we can't be in the middle of
855						 * a declaration, so don't do
856						 * special indentation of
857						 * comments */
858		if (opt.blanklines_after_declarations_at_proctop
859			&& ps.in_parameter_declaration)
860		    postfix_blankline_requested = 1;
861		ps.in_parameter_declaration = 0;
862		ps.in_decl = false;
863	    }
864	    dec_ind = 0;
865	    parse(lbrace);	/* let parser know about this */
866	    if (ps.want_blank)	/* put a blank before '{' if '{' is not at
867				 * start of line */
868		*e_code++ = ' ';
869	    ps.want_blank = false;
870	    *e_code++ = '{';
871	    ps.just_saw_decl = 0;
872	    break;
873
874	case rbrace:		/* got a '}' */
875	    if (ps.p_stack[ps.tos] == decl && !ps.block_init)	/* semicolons can be
876								 * omitted in
877								 * declarations */
878		parse(semicolon);
879	    if (ps.p_l_follow) {/* check for unclosed if, for, else. */
880		diag2(1, "Unbalanced parens");
881		ps.p_l_follow = 0;
882		sp_sw = false;
883	    }
884	    ps.just_saw_decl = 0;
885	    ps.block_init_level--;
886	    if (s_code != e_code && !ps.block_init) {	/* '}' must be first on
887							 * line */
888		if (opt.verbose)
889		    diag2(0, "Line broken");
890		dump_line();
891	    }
892	    *e_code++ = '}';
893	    ps.want_blank = true;
894	    ps.in_stmt = ps.ind_stmt = false;
895	    if (ps.dec_nest > 0) {	/* we are in multi-level structure
896					 * declaration */
897		dec_ind = di_stack[--ps.dec_nest];
898		if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
899		    ps.just_saw_decl = 2;
900		ps.in_decl = true;
901	    }
902	    prefix_blankline_requested = 0;
903	    parse(rbrace);	/* let parser know about this */
904	    ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead
905		&& ps.il[ps.tos] >= ps.ind_level;
906	    if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0)
907		postfix_blankline_requested = 1;
908	    break;
909
910	case swstmt:		/* got keyword "switch" */
911	    sp_sw = true;
912	    hd_type = swstmt;	/* keep this for when we have seen the
913				 * expression */
914	    goto copy_id;	/* go move the token into buffer */
915
916	case sp_paren:		/* token is if, while, for */
917	    sp_sw = true;	/* the interesting stuff is done after the
918				 * expression is scanned */
919	    hd_type = (*token == 'i' ? ifstmt :
920		       (*token == 'w' ? whilestmt : forstmt));
921
922	    /*
923	     * remember the type of header for later use by parser
924	     */
925	    goto copy_id;	/* copy the token into line */
926
927	case sp_nparen:	/* got else, do */
928	    ps.in_stmt = false;
929	    if (*token == 'e') {
930		if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) {
931		    if (opt.verbose)
932			diag2(0, "Line broken");
933		    dump_line();/* make sure this starts a line */
934		    ps.want_blank = false;
935		}
936		force_nl = true;/* also, following stuff must go onto new line */
937		last_else = 1;
938		parse(elselit);
939	    }
940	    else {
941		if (e_code != s_code) {	/* make sure this starts a line */
942		    if (opt.verbose)
943			diag2(0, "Line broken");
944		    dump_line();
945		    ps.want_blank = false;
946		}
947		force_nl = true;/* also, following stuff must go onto new line */
948		last_else = 0;
949		parse(dolit);
950	    }
951	    goto copy_id;	/* move the token into line */
952
953	case type_def:
954	case storage:
955	    prefix_blankline_requested = 0;
956	    goto copy_id;
957
958	case structure:
959	    if (ps.p_l_follow > 0)
960		goto copy_id;
961	    /* FALLTHROUGH */
962	case decl:		/* we have a declaration type (int, etc.) */
963	    parse(decl);	/* let parser worry about indentation */
964	    if (ps.last_token == rparen && ps.tos <= 1) {
965		if (s_code != e_code) {
966		    dump_line();
967		    ps.want_blank = 0;
968		}
969	    }
970	    if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) {
971		ps.ind_level = ps.i_l_follow = 1;
972		ps.ind_stmt = 0;
973	    }
974	    ps.in_or_st = true;	/* this might be a structure or initialization
975				 * declaration */
976	    ps.in_decl = ps.decl_on_line = ps.last_token != type_def;
977	    if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
978		ps.just_saw_decl = 2;
979	    prefix_blankline_requested = 0;
980	    for (i = 0; token[i++];);	/* get length of token */
981
982	    if (ps.ind_level == 0 || ps.dec_nest > 0) {
983		/* global variable or struct member in local variable */
984		dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i;
985		tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0);
986	    } else {
987		/* local variable */
988		dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i;
989		tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0);
990	    }
991	    goto copy_id;
992
993	case funcname:
994	case ident:		/* got an identifier or constant */
995	    if (ps.in_decl) {
996		if (type_code == funcname) {
997		    ps.in_decl = false;
998		    if (opt.procnames_start_line && s_code != e_code) {
999			*e_code = '\0';
1000			dump_line();
1001		    }
1002		    else if (ps.want_blank) {
1003			*e_code++ = ' ';
1004		    }
1005		    ps.want_blank = false;
1006		}
1007		else if (!ps.block_init && !ps.dumped_decl_indent &&
1008		    ps.paren_level == 0) { /* if we are in a declaration, we
1009					    * must indent identifier */
1010		    indent_declaration(dec_ind, tabs_to_var);
1011		    ps.dumped_decl_indent = true;
1012		    ps.want_blank = false;
1013		}
1014	    }
1015	    else if (sp_sw && ps.p_l_follow == 0) {
1016		sp_sw = false;
1017		force_nl = true;
1018		ps.last_u_d = true;
1019		ps.in_stmt = false;
1020		parse(hd_type);
1021	    }
1022    copy_id:
1023	    {
1024		int len = e_token - s_token;
1025
1026		CHECK_SIZE_CODE(len + 1);
1027		if (ps.want_blank)
1028		    *e_code++ = ' ';
1029		memcpy(e_code, s_token, len);
1030		e_code += len;
1031	    }
1032	    if (type_code != funcname)
1033		ps.want_blank = true;
1034	    break;
1035
1036	case strpfx:
1037	    {
1038		int len = e_token - s_token;
1039
1040		CHECK_SIZE_CODE(len + 1);
1041		if (ps.want_blank)
1042		    *e_code++ = ' ';
1043		memcpy(e_code, token, len);
1044		e_code += len;
1045	    }
1046	    ps.want_blank = false;
1047	    break;
1048
1049	case period:		/* treat a period kind of like a binary
1050				 * operation */
1051	    *e_code++ = '.';	/* move the period into line */
1052	    ps.want_blank = false;	/* dont put a blank after a period */
1053	    break;
1054
1055	case comma:
1056	    ps.want_blank = (s_code != e_code);	/* only put blank after comma
1057						 * if comma does not start the
1058						 * line */
1059	    if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init &&
1060		!ps.dumped_decl_indent && ps.paren_level == 0) {
1061		/* indent leading commas and not the actual identifiers */
1062		indent_declaration(dec_ind - 1, tabs_to_var);
1063		ps.dumped_decl_indent = true;
1064	    }
1065	    *e_code++ = ',';
1066	    if (ps.p_l_follow == 0) {
1067		if (ps.block_init_level <= 0)
1068		    ps.block_init = 0;
1069		if (break_comma && (!opt.leave_comma ||
1070		    count_spaces_until(compute_code_target(), s_code, e_code) >
1071		    opt.max_col - opt.tabsize))
1072		    force_nl = true;
1073	    }
1074	    break;
1075
1076	case preesc:		/* got the character '#' */
1077	    if ((s_com != e_com) ||
1078		    (s_lab != e_lab) ||
1079		    (s_code != e_code))
1080		dump_line();
1081	    CHECK_SIZE_LAB(1);
1082	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
1083	    {
1084		int         in_comment = 0;
1085		int         com_start = 0;
1086		char        quote = 0;
1087		int         com_end = 0;
1088
1089		while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1090		    buf_ptr++;
1091		    if (buf_ptr >= buf_end)
1092			fill_buffer();
1093		}
1094		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1095		    CHECK_SIZE_LAB(2);
1096		    *e_lab = *buf_ptr++;
1097		    if (buf_ptr >= buf_end)
1098			fill_buffer();
1099		    switch (*e_lab++) {
1100		    case BACKSLASH:
1101			if (!in_comment) {
1102			    *e_lab++ = *buf_ptr++;
1103			    if (buf_ptr >= buf_end)
1104				fill_buffer();
1105			}
1106			break;
1107		    case '/':
1108			if (*buf_ptr == '*' && !in_comment && !quote) {
1109			    in_comment = 1;
1110			    *e_lab++ = *buf_ptr++;
1111			    com_start = e_lab - s_lab - 2;
1112			}
1113			break;
1114		    case '"':
1115			if (quote == '"')
1116			    quote = 0;
1117			break;
1118		    case '\'':
1119			if (quote == '\'')
1120			    quote = 0;
1121			break;
1122		    case '*':
1123			if (*buf_ptr == '/' && in_comment) {
1124			    in_comment = 0;
1125			    *e_lab++ = *buf_ptr++;
1126			    com_end = e_lab - s_lab;
1127			}
1128			break;
1129		    }
1130		}
1131
1132		while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1133		    e_lab--;
1134		if (e_lab - s_lab == com_end && bp_save == NULL) {
1135		    /* comment on preprocessor line */
1136		    if (sc_end == NULL) {	/* if this is the first comment,
1137						 * we must set up the buffer */
1138			save_com = sc_buf;
1139			sc_end = &save_com[0];
1140		    }
1141		    else {
1142			*sc_end++ = '\n';	/* add newline between
1143						 * comments */
1144			*sc_end++ = ' ';
1145			--line_no;
1146		    }
1147		    if (sc_end - save_com + com_end - com_start > sc_size)
1148			errx(1, "input too long");
1149		    memmove(sc_end, s_lab + com_start, com_end - com_start);
1150		    sc_end += com_end - com_start;
1151		    e_lab = s_lab + com_start;
1152		    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1153			e_lab--;
1154		    bp_save = buf_ptr;	/* save current input buffer */
1155		    be_save = buf_end;
1156		    buf_ptr = save_com;	/* fix so that subsequent calls to
1157					 * lexi will take tokens out of
1158					 * save_com */
1159		    *sc_end++ = ' ';	/* add trailing blank, just in case */
1160		    buf_end = sc_end;
1161		    sc_end = NULL;
1162		}
1163		CHECK_SIZE_LAB(1);
1164		*e_lab = '\0';	/* null terminate line */
1165		ps.pcase = false;
1166	    }
1167
1168	    if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
1169		if ((size_t)ifdef_level < nitems(state_stack)) {
1170		    match_state[ifdef_level].tos = -1;
1171		    state_stack[ifdef_level++] = ps;
1172		}
1173		else
1174		    diag2(1, "#if stack overflow");
1175	    }
1176	    else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
1177		if (ifdef_level <= 0)
1178		    diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
1179		else {
1180		    match_state[ifdef_level - 1] = ps;
1181		    ps = state_stack[ifdef_level - 1];
1182		}
1183	    }
1184	    else if (strncmp(s_lab, "#endif", 6) == 0) {
1185		if (ifdef_level <= 0)
1186		    diag2(1, "Unmatched #endif");
1187		else
1188		    ifdef_level--;
1189	    } else {
1190		struct directives {
1191		    int size;
1192		    const char *string;
1193		}
1194		recognized[] = {
1195		    {7, "include"},
1196		    {6, "define"},
1197		    {5, "undef"},
1198		    {4, "line"},
1199		    {5, "error"},
1200		    {6, "pragma"}
1201		};
1202		int d = nitems(recognized);
1203		while (--d >= 0)
1204		    if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
1205			break;
1206		if (d < 0) {
1207		    diag2(1, "Unrecognized cpp directive");
1208		    break;
1209		}
1210	    }
1211	    if (opt.blanklines_around_conditional_compilation) {
1212		postfix_blankline_requested++;
1213		n_real_blanklines = 0;
1214	    }
1215	    else {
1216		postfix_blankline_requested = 0;
1217		prefix_blankline_requested = 0;
1218	    }
1219	    break;		/* subsequent processing of the newline
1220				 * character will cause the line to be printed */
1221
1222	case comment:		/* we have gotten a / followed by * this is a biggie */
1223	    pr_comment();
1224	    break;
1225	}			/* end of big switch stmt */
1226
1227	*e_code = '\0';		/* make sure code section is null terminated */
1228	if (type_code != comment && type_code != newline && type_code != preesc)
1229	    ps.last_token = type_code;
1230    }				/* end of main while (1) loop */
1231}
1232
1233/*
1234 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1235 * backup file will be ".Bfile" then make the backup file the input and
1236 * original input file the output
1237 */
1238static void
1239bakcopy(void)
1240{
1241    int         n,
1242                bakchn;
1243    char        buff[8 * 1024];
1244    const char *p;
1245
1246    /* construct file name .Bfile */
1247    for (p = in_name; *p; p++);	/* skip to end of string */
1248    while (p > in_name && *p != '/')	/* find last '/' */
1249	p--;
1250    if (*p == '/')
1251	p++;
1252    sprintf(bakfile, "%s%s", p, simple_backup_suffix);
1253
1254    /* copy in_name to backup file */
1255    bakchn = creat(bakfile, 0600);
1256    if (bakchn < 0)
1257	err(1, "%s", bakfile);
1258    while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
1259	if (write(bakchn, buff, n) != n)
1260	    err(1, "%s", bakfile);
1261    if (n < 0)
1262	err(1, "%s", in_name);
1263    close(bakchn);
1264    fclose(input);
1265
1266    /* re-open backup file as the input file */
1267    input = fopen(bakfile, "r");
1268    if (input == NULL)
1269	err(1, "%s", bakfile);
1270    /* now the original input file will be the output */
1271    output = fopen(in_name, "w");
1272    if (output == NULL) {
1273	unlink(bakfile);
1274	err(1, "%s", in_name);
1275    }
1276}
1277
1278static void
1279indent_declaration(int cur_dec_ind, int tabs_to_var)
1280{
1281    int pos = e_code - s_code;
1282    char *startpos = e_code;
1283
1284    /*
1285     * get the tab math right for indentations that are not multiples of tabsize
1286     */
1287    if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) {
1288	pos += (ps.ind_level * opt.ind_size) % opt.tabsize;
1289	cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize;
1290    }
1291    if (tabs_to_var) {
1292	int tpos;
1293
1294	CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize);
1295	while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) {
1296	    *e_code++ = '\t';
1297	    pos = tpos;
1298	}
1299    }
1300    CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
1301    while (pos < cur_dec_ind) {
1302	*e_code++ = ' ';
1303	pos++;
1304    }
1305    if (e_code == startpos && ps.want_blank) {
1306	*e_code++ = ' ';
1307	ps.want_blank = false;
1308    }
1309}
1310