11590Srgrimes/*
21590Srgrimes * Copyright (c) 1985 Sun Microsystems, Inc.
31590Srgrimes * Copyright (c) 1980, 1993
41590Srgrimes *	The Regents of the University of California.  All rights reserved.
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * Redistribution and use in source and binary forms, with or without
81590Srgrimes * modification, are permitted provided that the following conditions
91590Srgrimes * are met:
101590Srgrimes * 1. Redistributions of source code must retain the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer.
121590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131590Srgrimes *    notice, this list of conditions and the following disclaimer in the
141590Srgrimes *    documentation and/or other materials provided with the distribution.
151590Srgrimes * 3. All advertising materials mentioning features or use of this software
161590Srgrimes *    must display the following acknowledgement:
171590Srgrimes *	This product includes software developed by the University of
181590Srgrimes *	California, Berkeley and its contributors.
191590Srgrimes * 4. Neither the name of the University nor the names of its contributors
201590Srgrimes *    may be used to endorse or promote products derived from this software
211590Srgrimes *    without specific prior written permission.
221590Srgrimes *
231590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
241590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
251590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
261590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
271590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
281590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
291590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
301590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
311590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
321590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
331590Srgrimes * SUCH DAMAGE.
341590Srgrimes */
351590Srgrimes
3685632Sschweikh#if 0
371590Srgrimes#ifndef lint
381590Srgrimesstatic char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
391590Srgrimes#endif /* not lint */
4085632Sschweikh#endif
4199112Sobrien#include <sys/cdefs.h>
4299112Sobrien__FBSDID("$FreeBSD$");
431590Srgrimes
441590Srgrimes/*
451590Srgrimes * Here we have the token scanner for indent.  It scans off one token and puts
461590Srgrimes * it in the global variable "token".  It returns a code, indicating the type
471590Srgrimes * of token scanned.
481590Srgrimes */
491590Srgrimes
50116390Scharnier#include <err.h>
511590Srgrimes#include <stdio.h>
521590Srgrimes#include <ctype.h>
531590Srgrimes#include <stdlib.h>
541590Srgrimes#include <string.h>
551590Srgrimes#include "indent_globs.h"
561590Srgrimes#include "indent_codes.h"
5793440Sdwmalone#include "indent.h"
581590Srgrimes
591590Srgrimes#define alphanum 1
601590Srgrimes#define opchar 3
611590Srgrimes
621590Srgrimesstruct templ {
6393440Sdwmalone    const char *rwd;
641590Srgrimes    int         rwcode;
651590Srgrimes};
661590Srgrimes
67125619Sbdestruct templ specials[1000] =
681590Srgrimes{
6985632Sschweikh    {"switch", 1},
7085632Sschweikh    {"case", 2},
7185632Sschweikh    {"break", 0},
7285632Sschweikh    {"struct", 3},
7385632Sschweikh    {"union", 3},
7485632Sschweikh    {"enum", 3},
7585632Sschweikh    {"default", 2},
7685632Sschweikh    {"int", 4},
7785632Sschweikh    {"char", 4},
7885632Sschweikh    {"float", 4},
7985632Sschweikh    {"double", 4},
8085632Sschweikh    {"long", 4},
8185632Sschweikh    {"short", 4},
8285632Sschweikh    {"typdef", 4},
8385632Sschweikh    {"unsigned", 4},
8485632Sschweikh    {"register", 4},
8585632Sschweikh    {"static", 4},
8685632Sschweikh    {"global", 4},
8785632Sschweikh    {"extern", 4},
8885632Sschweikh    {"void", 4},
89125619Sbde    {"const", 4},
90125619Sbde    {"volatile", 4},
9185632Sschweikh    {"goto", 0},
9285632Sschweikh    {"return", 0},
9385632Sschweikh    {"if", 5},
9485632Sschweikh    {"while", 5},
9585632Sschweikh    {"for", 5},
9685632Sschweikh    {"else", 6},
9785632Sschweikh    {"do", 6},
9885632Sschweikh    {"sizeof", 7},
9985632Sschweikh    {0, 0}
1001590Srgrimes};
1011590Srgrimes
1021590Srgrimeschar        chartype[128] =
1031590Srgrimes{				/* this is used to facilitate the decision of
1041590Srgrimes				 * what type (alphanumeric, operator) each
1051590Srgrimes				 * character is */
1061590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1071590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1081590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1091590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1101590Srgrimes    0, 3, 0, 0, 1, 3, 3, 0,
1111590Srgrimes    0, 0, 3, 3, 0, 3, 0, 3,
1121590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1131590Srgrimes    1, 1, 0, 0, 3, 3, 3, 3,
1141590Srgrimes    0, 1, 1, 1, 1, 1, 1, 1,
1151590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1161590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1171590Srgrimes    1, 1, 1, 0, 0, 0, 3, 1,
1181590Srgrimes    0, 1, 1, 1, 1, 1, 1, 1,
1191590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1201590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1211590Srgrimes    1, 1, 1, 0, 3, 0, 3, 0
1221590Srgrimes};
1231590Srgrimes
1241590Srgrimesint
12585632Sschweikhlexi(void)
1261590Srgrimes{
1271590Srgrimes    int         unary_delim;	/* this is set to 1 if the current token
1281590Srgrimes				 * forces a following operator to be unary */
1291590Srgrimes    static int  last_code;	/* the last token type returned */
1301590Srgrimes    static int  l_struct;	/* set to 1 if the last token was 'struct' */
1311590Srgrimes    int         code;		/* internal code to be returned */
1321590Srgrimes    char        qchar;		/* the delimiter character for a string */
1331590Srgrimes
1341590Srgrimes    e_token = s_token;		/* point to start of place to save token */
1351590Srgrimes    unary_delim = false;
1361590Srgrimes    ps.col_1 = ps.last_nl;	/* tell world that this token started in
1371590Srgrimes				 * column 1 iff the last thing scanned was nl */
1381590Srgrimes    ps.last_nl = false;
1391590Srgrimes
1401590Srgrimes    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
1411590Srgrimes	ps.col_1 = false;	/* leading blanks imply token is not in column
1421590Srgrimes				 * 1 */
1431590Srgrimes	if (++buf_ptr >= buf_end)
1441590Srgrimes	    fill_buffer();
1451590Srgrimes    }
1461590Srgrimes
1471590Srgrimes    /* Scan an alphanumeric token */
14885632Sschweikh    if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1491590Srgrimes	/*
1501590Srgrimes	 * we have a character or number
1511590Srgrimes	 */
15293440Sdwmalone	const char *j;		/* used for searching thru list of
1538874Srgrimes				 *
1541590Srgrimes				 * reserved words */
15598771Sjmallett	struct templ *p;
1561590Srgrimes
15785632Sschweikh	if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1581590Srgrimes	    int         seendot = 0,
15936211Srnordier	                seenexp = 0,
16036211Srnordier			seensfx = 0;
1611590Srgrimes	    if (*buf_ptr == '0' &&
1621590Srgrimes		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
1631590Srgrimes		*e_token++ = *buf_ptr++;
1641590Srgrimes		*e_token++ = *buf_ptr++;
1651590Srgrimes		while (isxdigit(*buf_ptr)) {
1661590Srgrimes		    CHECK_SIZE_TOKEN;
1671590Srgrimes		    *e_token++ = *buf_ptr++;
1681590Srgrimes		}
1691590Srgrimes	    }
1701590Srgrimes	    else
1711590Srgrimes		while (1) {
17285632Sschweikh		    if (*buf_ptr == '.') {
1731590Srgrimes			if (seendot)
1741590Srgrimes			    break;
1751590Srgrimes			else
1761590Srgrimes			    seendot++;
17785632Sschweikh		    }
1781590Srgrimes		    CHECK_SIZE_TOKEN;
1791590Srgrimes		    *e_token++ = *buf_ptr++;
18085632Sschweikh		    if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
1811590Srgrimes			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
1821590Srgrimes			    break;
1831590Srgrimes			else {
1841590Srgrimes			    seenexp++;
1851590Srgrimes			    seendot++;
1861590Srgrimes			    CHECK_SIZE_TOKEN;
1871590Srgrimes			    *e_token++ = *buf_ptr++;
1881590Srgrimes			    if (*buf_ptr == '+' || *buf_ptr == '-')
1891590Srgrimes				*e_token++ = *buf_ptr++;
1901590Srgrimes			}
19185632Sschweikh		    }
1921590Srgrimes		}
19336211Srnordier	    while (1) {
19436211Srnordier		if (!(seensfx & 1) &&
19536211Srnordier			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
19636211Srnordier		    CHECK_SIZE_TOKEN;
19736211Srnordier		    *e_token++ = *buf_ptr++;
19836211Srnordier		    seensfx |= 1;
19936211Srnordier		    continue;
20036211Srnordier		}
20136211Srnordier        	if (!(seensfx & 2) &&
20236211Srnordier			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
20336211Srnordier		    CHECK_SIZE_TOKEN;
20436211Srnordier		    if (buf_ptr[1] == buf_ptr[0])
20536211Srnordier		        *e_token++ = *buf_ptr++;
20636211Srnordier		    *e_token++ = *buf_ptr++;
20736211Srnordier		    seensfx |= 2;
20836211Srnordier		    continue;
20936211Srnordier		}
21036211Srnordier		break;
21136211Srnordier	    }
2121590Srgrimes	}
2131590Srgrimes	else
21485632Sschweikh	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
21585182Sschweikh		/* fill_buffer() terminates buffer with newline */
21685182Sschweikh		if (*buf_ptr == BACKSLASH) {
21785182Sschweikh		    if (*(buf_ptr + 1) == '\n') {
21885182Sschweikh			buf_ptr += 2;
21985182Sschweikh			if (buf_ptr >= buf_end)
22085182Sschweikh			    fill_buffer();
22185182Sschweikh			} else
22285182Sschweikh			    break;
22385182Sschweikh		}
2241590Srgrimes		CHECK_SIZE_TOKEN;
22585182Sschweikh		/* copy it over */
2261590Srgrimes		*e_token++ = *buf_ptr++;
2271590Srgrimes		if (buf_ptr >= buf_end)
2281590Srgrimes		    fill_buffer();
2291590Srgrimes	    }
2301590Srgrimes	*e_token++ = '\0';
2311590Srgrimes	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
2321590Srgrimes	    if (++buf_ptr >= buf_end)
2331590Srgrimes		fill_buffer();
2341590Srgrimes	}
2351590Srgrimes	ps.its_a_keyword = false;
2361590Srgrimes	ps.sizeof_keyword = false;
237125623Sbde	if (l_struct && !ps.p_l_follow) {
238125623Sbde				/* if last token was 'struct' and we're not
239125623Sbde				 * in parentheses, then this token
2401590Srgrimes				 * should be treated as a declaration */
2411590Srgrimes	    l_struct = false;
2421590Srgrimes	    last_code = ident;
2431590Srgrimes	    ps.last_u_d = true;
2441590Srgrimes	    return (decl);
2451590Srgrimes	}
246125623Sbde	ps.last_u_d = l_struct;	/* Operator after identifier is binary
247125623Sbde				 * unless last token was 'struct' */
248125623Sbde	l_struct = false;
2491590Srgrimes	last_code = ident;	/* Remember that this is the code we will
2501590Srgrimes				 * return */
2511590Srgrimes
252205989Savg	if (auto_typedefs) {
253205989Savg	    const char *q = s_token;
254206687Savg	    size_t q_len = strlen(q);
255205989Savg	    /* Check if we have an "_t" in the end */
256206687Savg	    if (q_len > 2 &&
257206687Savg	        (strcmp(q + q_len - 2, "_t") == 0)) {
258205989Savg	        ps.its_a_keyword = true;
259205989Savg		ps.last_u_d = true;
260205989Savg	        goto found_auto_typedef;
261205989Savg	    }
262205989Savg	}
263205989Savg
2641590Srgrimes	/*
2651590Srgrimes	 * This loop will check if the token is a keyword.
2661590Srgrimes	 */
2671590Srgrimes	for (p = specials; (j = p->rwd) != 0; p++) {
26893440Sdwmalone	    const char *q = s_token;	/* point at scanned token */
26993440Sdwmalone	    if (*j++ != *q++ || *j++ != *q++)
2701590Srgrimes		continue;	/* This test depends on the fact that
2711590Srgrimes				 * identifiers are always at least 1 character
2721590Srgrimes				 * long (ie. the first two bytes of the
2731590Srgrimes				 * identifier are always meaningful) */
27493440Sdwmalone	    if (q[-1] == 0)
2751590Srgrimes		break;		/* If its a one-character identifier */
27693440Sdwmalone	    while (*q++ == *j)
2771590Srgrimes		if (*j++ == 0)
2781590Srgrimes		    goto found_keyword;	/* I wish that C had a multi-level
2791590Srgrimes					 * break... */
2801590Srgrimes	}
2811590Srgrimes	if (p->rwd) {		/* we have a keyword */
2821590Srgrimes    found_keyword:
2831590Srgrimes	    ps.its_a_keyword = true;
2841590Srgrimes	    ps.last_u_d = true;
2851590Srgrimes	    switch (p->rwcode) {
2861590Srgrimes	    case 1:		/* it is a switch */
2871590Srgrimes		return (swstmt);
2881590Srgrimes	    case 2:		/* a case or default */
2891590Srgrimes		return (casestmt);
2901590Srgrimes
2911590Srgrimes	    case 3:		/* a "struct" */
2921590Srgrimes		/*
293125618Sbde		 * Next time around, we will want to know that we have had a
294125618Sbde		 * 'struct'
2951590Srgrimes		 */
296125623Sbde		l_struct = true;
297125623Sbde		/* FALLTHROUGH */
298125623Sbde
2991590Srgrimes	    case 4:		/* one of the declaration keywords */
300205989Savg	    found_auto_typedef:
3011590Srgrimes		if (ps.p_l_follow) {
302125623Sbde		    ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
303125623Sbde		    break;	/* inside parens: cast, param list or sizeof */
3041590Srgrimes		}
3051590Srgrimes		last_code = decl;
3061590Srgrimes		return (decl);
3071590Srgrimes
3081590Srgrimes	    case 5:		/* if, while, for */
3091590Srgrimes		return (sp_paren);
3101590Srgrimes
3111590Srgrimes	    case 6:		/* do, else */
3121590Srgrimes		return (sp_nparen);
3131590Srgrimes
3141590Srgrimes	    case 7:
3151590Srgrimes		ps.sizeof_keyword = true;
3161590Srgrimes	    default:		/* all others are treated like any other
3171590Srgrimes				 * identifier */
3181590Srgrimes		return (ident);
3191590Srgrimes	    }			/* end of switch */
3201590Srgrimes	}			/* end of if (found_it) */
3211590Srgrimes	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
32298771Sjmallett	    char *tp = buf_ptr;
3231590Srgrimes	    while (tp < buf_end)
3241590Srgrimes		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
3251590Srgrimes		    goto not_proc;
3261590Srgrimes	    strncpy(ps.procname, token, sizeof ps.procname - 1);
3271590Srgrimes	    ps.in_parameter_declaration = 1;
3281590Srgrimes	    rparen_count = 1;
3291590Srgrimes    not_proc:;
3301590Srgrimes	}
3311590Srgrimes	/*
3321590Srgrimes	 * The following hack attempts to guess whether or not the current
3331590Srgrimes	 * token is in fact a declaration keyword -- one that has been
3341590Srgrimes	 * typedefd
3351590Srgrimes	 */
3361590Srgrimes	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
3371590Srgrimes		&& !ps.p_l_follow
3381590Srgrimes	        && !ps.block_init
3391590Srgrimes		&& (ps.last_token == rparen || ps.last_token == semicolon ||
3401590Srgrimes		    ps.last_token == decl ||
3411590Srgrimes		    ps.last_token == lbrace || ps.last_token == rbrace)) {
3421590Srgrimes	    ps.its_a_keyword = true;
3431590Srgrimes	    ps.last_u_d = true;
3441590Srgrimes	    last_code = decl;
3451590Srgrimes	    return decl;
3461590Srgrimes	}
3471590Srgrimes	if (last_code == decl)	/* if this is a declared variable, then
3481590Srgrimes				 * following sign is unary */
3491590Srgrimes	    ps.last_u_d = true;	/* will make "int a -1" work */
3501590Srgrimes	last_code = ident;
3511590Srgrimes	return (ident);		/* the ident is not in the list */
3521590Srgrimes    }				/* end of procesing for alpanum character */
3531590Srgrimes
3541590Srgrimes    /* Scan a non-alphanumeric token */
3551590Srgrimes
3561590Srgrimes    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
3571590Srgrimes				 * moved here */
3581590Srgrimes    *e_token = '\0';
3591590Srgrimes    if (++buf_ptr >= buf_end)
3601590Srgrimes	fill_buffer();
3611590Srgrimes
3621590Srgrimes    switch (*token) {
3631590Srgrimes    case '\n':
3641590Srgrimes	unary_delim = ps.last_u_d;
3651590Srgrimes	ps.last_nl = true;	/* remember that we just had a newline */
3661590Srgrimes	code = (had_eof ? 0 : newline);
3671590Srgrimes
3681590Srgrimes	/*
369105244Scharnier	 * if data has been exhausted, the newline is a dummy, and we should
3701590Srgrimes	 * return code to stop
3711590Srgrimes	 */
3721590Srgrimes	break;
3731590Srgrimes
3741590Srgrimes    case '\'':			/* start of quoted character */
3751590Srgrimes    case '"':			/* start of string */
3761590Srgrimes	qchar = *token;
3771590Srgrimes	if (troff) {
3781590Srgrimes	    e_token[-1] = '`';
3791590Srgrimes	    if (qchar == '"')
3801590Srgrimes		*e_token++ = '`';
3811590Srgrimes	    e_token = chfont(&bodyf, &stringf, e_token);
3821590Srgrimes	}
3831590Srgrimes	do {			/* copy the string */
3841590Srgrimes	    while (1) {		/* move one character or [/<char>]<char> */
3851590Srgrimes		if (*buf_ptr == '\n') {
386152635Sdds		    diag2(1, "Unterminated literal");
3871590Srgrimes		    goto stop_lit;
3881590Srgrimes		}
3891590Srgrimes		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
3901590Srgrimes					 * since CHECK_SIZE guarantees that there
3911590Srgrimes					 * are at least 5 entries left */
3921590Srgrimes		*e_token = *buf_ptr++;
3931590Srgrimes		if (buf_ptr >= buf_end)
3941590Srgrimes		    fill_buffer();
3951590Srgrimes		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
3961590Srgrimes		    if (*buf_ptr == '\n')	/* check for escaped newline */
3971590Srgrimes			++line_no;
3981590Srgrimes		    if (troff) {
3991590Srgrimes			*++e_token = BACKSLASH;
4001590Srgrimes			if (*buf_ptr == BACKSLASH)
4011590Srgrimes			    *++e_token = BACKSLASH;
4021590Srgrimes		    }
4031590Srgrimes		    *++e_token = *buf_ptr++;
4041590Srgrimes		    ++e_token;	/* we must increment this again because we
4051590Srgrimes				 * copied two chars */
4061590Srgrimes		    if (buf_ptr >= buf_end)
4071590Srgrimes			fill_buffer();
4081590Srgrimes		}
4091590Srgrimes		else
4101590Srgrimes		    break;	/* we copied one character */
4111590Srgrimes	    }			/* end of while (1) */
4121590Srgrimes	} while (*e_token++ != qchar);
4131590Srgrimes	if (troff) {
4141590Srgrimes	    e_token = chfont(&stringf, &bodyf, e_token - 1);
4151590Srgrimes	    if (qchar == '"')
4161590Srgrimes		*e_token++ = '\'';
4171590Srgrimes	}
4181590Srgrimesstop_lit:
4191590Srgrimes	code = ident;
4201590Srgrimes	break;
4211590Srgrimes
4221590Srgrimes    case ('('):
4231590Srgrimes    case ('['):
4241590Srgrimes	unary_delim = true;
4251590Srgrimes	code = lparen;
4261590Srgrimes	break;
4271590Srgrimes
4281590Srgrimes    case (')'):
4291590Srgrimes    case (']'):
4301590Srgrimes	code = rparen;
4311590Srgrimes	break;
4321590Srgrimes
4331590Srgrimes    case '#':
4341590Srgrimes	unary_delim = ps.last_u_d;
4351590Srgrimes	code = preesc;
4361590Srgrimes	break;
4371590Srgrimes
4381590Srgrimes    case '?':
4391590Srgrimes	unary_delim = true;
4401590Srgrimes	code = question;
4411590Srgrimes	break;
4421590Srgrimes
4431590Srgrimes    case (':'):
4441590Srgrimes	code = colon;
4451590Srgrimes	unary_delim = true;
4461590Srgrimes	break;
4471590Srgrimes
4481590Srgrimes    case (';'):
4491590Srgrimes	unary_delim = true;
4501590Srgrimes	code = semicolon;
4511590Srgrimes	break;
4521590Srgrimes
4531590Srgrimes    case ('{'):
4541590Srgrimes	unary_delim = true;
4551590Srgrimes
4561590Srgrimes	/*
4571590Srgrimes	 * if (ps.in_or_st) ps.block_init = 1;
4581590Srgrimes	 */
4591590Srgrimes	/* ?	code = ps.block_init ? lparen : lbrace; */
4601590Srgrimes	code = lbrace;
4611590Srgrimes	break;
4621590Srgrimes
4631590Srgrimes    case ('}'):
4641590Srgrimes	unary_delim = true;
4651590Srgrimes	/* ?	code = ps.block_init ? rparen : rbrace; */
4661590Srgrimes	code = rbrace;
4671590Srgrimes	break;
4681590Srgrimes
4691590Srgrimes    case 014:			/* a form feed */
4701590Srgrimes	unary_delim = ps.last_u_d;
4711590Srgrimes	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
4721590Srgrimes				 * right */
4731590Srgrimes	code = form_feed;
4741590Srgrimes	break;
4751590Srgrimes
4761590Srgrimes    case (','):
4771590Srgrimes	unary_delim = true;
4781590Srgrimes	code = comma;
4791590Srgrimes	break;
4801590Srgrimes
4811590Srgrimes    case '.':
4821590Srgrimes	unary_delim = false;
4831590Srgrimes	code = period;
4841590Srgrimes	break;
4851590Srgrimes
4861590Srgrimes    case '-':
4871590Srgrimes    case '+':			/* check for -, +, --, ++ */
4881590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
4891590Srgrimes	unary_delim = true;
4901590Srgrimes
4911590Srgrimes	if (*buf_ptr == token[0]) {
4921590Srgrimes	    /* check for doubled character */
4931590Srgrimes	    *e_token++ = *buf_ptr++;
4941590Srgrimes	    /* buffer overflow will be checked at end of loop */
4951590Srgrimes	    if (last_code == ident || last_code == rparen) {
4961590Srgrimes		code = (ps.last_u_d ? unary_op : postop);
4971590Srgrimes		/* check for following ++ or -- */
4981590Srgrimes		unary_delim = false;
4991590Srgrimes	    }
5001590Srgrimes	}
5011590Srgrimes	else if (*buf_ptr == '=')
5021590Srgrimes	    /* check for operator += */
5031590Srgrimes	    *e_token++ = *buf_ptr++;
5041590Srgrimes	else if (*buf_ptr == '>') {
5051590Srgrimes	    /* check for operator -> */
5061590Srgrimes	    *e_token++ = *buf_ptr++;
5071590Srgrimes	    if (!pointer_as_binop) {
5081590Srgrimes		unary_delim = false;
5091590Srgrimes		code = unary_op;
5101590Srgrimes		ps.want_blank = false;
5111590Srgrimes	    }
5121590Srgrimes	}
5131590Srgrimes	break;			/* buffer overflow will be checked at end of
5141590Srgrimes				 * switch */
5151590Srgrimes
5161590Srgrimes    case '=':
5171590Srgrimes	if (ps.in_or_st)
5181590Srgrimes	    ps.block_init = 1;
5191590Srgrimes#ifdef undef
5201590Srgrimes	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
5211590Srgrimes	    e_token[-1] = *buf_ptr++;
5221590Srgrimes	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
5231590Srgrimes		*e_token++ = *buf_ptr++;
5241590Srgrimes	    *e_token++ = '=';	/* Flip =+ to += */
5251590Srgrimes	    *e_token = 0;
5261590Srgrimes	}
5271590Srgrimes#else
5281590Srgrimes	if (*buf_ptr == '=') {/* == */
5291590Srgrimes	    *e_token++ = '=';	/* Flip =+ to += */
5301590Srgrimes	    buf_ptr++;
5311590Srgrimes	    *e_token = 0;
5321590Srgrimes	}
5331590Srgrimes#endif
5341590Srgrimes	code = binary_op;
5351590Srgrimes	unary_delim = true;
5361590Srgrimes	break;
5371590Srgrimes	/* can drop thru!!! */
5381590Srgrimes
5391590Srgrimes    case '>':
5401590Srgrimes    case '<':
5411590Srgrimes    case '!':			/* ops like <, <<, <=, !=, etc */
5421590Srgrimes	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
5431590Srgrimes	    *e_token++ = *buf_ptr;
5441590Srgrimes	    if (++buf_ptr >= buf_end)
5451590Srgrimes		fill_buffer();
5461590Srgrimes	}
5471590Srgrimes	if (*buf_ptr == '=')
5481590Srgrimes	    *e_token++ = *buf_ptr++;
5491590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
5501590Srgrimes	unary_delim = true;
5511590Srgrimes	break;
5521590Srgrimes
5531590Srgrimes    default:
5541590Srgrimes	if (token[0] == '/' && *buf_ptr == '*') {
5551590Srgrimes	    /* it is start of comment */
5561590Srgrimes	    *e_token++ = '*';
5571590Srgrimes
5581590Srgrimes	    if (++buf_ptr >= buf_end)
5591590Srgrimes		fill_buffer();
5601590Srgrimes
5611590Srgrimes	    code = comment;
5621590Srgrimes	    unary_delim = ps.last_u_d;
5631590Srgrimes	    break;
5641590Srgrimes	}
5651590Srgrimes	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
5661590Srgrimes	    /*
5671590Srgrimes	     * handle ||, &&, etc, and also things as in int *****i
5681590Srgrimes	     */
5691590Srgrimes	    *e_token++ = *buf_ptr;
5701590Srgrimes	    if (++buf_ptr >= buf_end)
5711590Srgrimes		fill_buffer();
5721590Srgrimes	}
5731590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
5741590Srgrimes	unary_delim = true;
5751590Srgrimes
5761590Srgrimes
5771590Srgrimes    }				/* end of switch */
5781590Srgrimes    if (code != newline) {
5791590Srgrimes	l_struct = false;
5801590Srgrimes	last_code = code;
5811590Srgrimes    }
5821590Srgrimes    if (buf_ptr >= buf_end)	/* check for input buffer empty */
5831590Srgrimes	fill_buffer();
5841590Srgrimes    ps.last_u_d = unary_delim;
5851590Srgrimes    *e_token = '\0';		/* null terminate the token */
5861590Srgrimes    return (code);
5871590Srgrimes}
5881590Srgrimes
5891590Srgrimes/*
5901590Srgrimes * Add the given keyword to the keyword table, using val as the keyword type
5911590Srgrimes */
59285632Sschweikhvoid
59385632Sschweikhaddkey(char *key, int val)
5941590Srgrimes{
59598771Sjmallett    struct templ *p = specials;
5961590Srgrimes    while (p->rwd)
5971590Srgrimes	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
5981590Srgrimes	    return;
5991590Srgrimes	else
6001590Srgrimes	    p++;
6011590Srgrimes    if (p >= specials + sizeof specials / sizeof specials[0])
6021590Srgrimes	return;			/* For now, table overflows are silently
6031590Srgrimes				 * ignored */
6041590Srgrimes    p->rwd = key;
6051590Srgrimes    p->rwcode = val;
6061590Srgrimes    p[1].rwd = 0;
6071590Srgrimes    p[1].rwcode = 0;
6081590Srgrimes}
609