C.c revision 1590
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
341590Srgrimes#ifndef lint
351590Srgrimesstatic char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
361590Srgrimes#endif /* not lint */
371590Srgrimes
381590Srgrimes#include <limits.h>
391590Srgrimes#include <stdio.h>
401590Srgrimes#include <string.h>
411590Srgrimes
421590Srgrimes#include "ctags.h"
431590Srgrimes
441590Srgrimesstatic int	func_entry __P((void));
451590Srgrimesstatic void	hash_entry __P((void));
461590Srgrimesstatic void	skip_string __P((int));
471590Srgrimesstatic int	str_entry __P((int));
481590Srgrimes
491590Srgrimes/*
501590Srgrimes * c_entries --
511590Srgrimes *	read .c and .h files and call appropriate routines
521590Srgrimes */
531590Srgrimesvoid
541590Srgrimesc_entries()
551590Srgrimes{
561590Srgrimes	int	c;			/* current character */
571590Srgrimes	int	level;			/* brace level */
581590Srgrimes	int	token;			/* if reading a token */
591590Srgrimes	int	t_def;			/* if reading a typedef */
601590Srgrimes	int	t_level;		/* typedef's brace level */
611590Srgrimes	char	*sp;			/* buffer pointer */
621590Srgrimes	char	tok[MAXTOKEN];		/* token buffer */
631590Srgrimes
641590Srgrimes	lineftell = ftell(inf);
651590Srgrimes	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
661590Srgrimes	while (GETC(!=, EOF)) {
671590Srgrimes		switch (c) {
681590Srgrimes		/*
691590Srgrimes		 * Here's where it DOESN'T handle: {
701590Srgrimes		 *	foo(a)
711590Srgrimes		 *	{
721590Srgrimes		 *	#ifdef notdef
731590Srgrimes		 *		}
741590Srgrimes		 *	#endif
751590Srgrimes		 *		if (a)
761590Srgrimes		 *			puts("hello, world");
771590Srgrimes		 *	}
781590Srgrimes		 */
791590Srgrimes		case '{':
801590Srgrimes			++level;
811590Srgrimes			goto endtok;
821590Srgrimes		case '}':
831590Srgrimes			/*
841590Srgrimes			 * if level goes below zero, try and fix
851590Srgrimes			 * it, even though we've already messed up
861590Srgrimes			 */
871590Srgrimes			if (--level < 0)
881590Srgrimes				level = 0;
891590Srgrimes			goto endtok;
901590Srgrimes
911590Srgrimes		case '\n':
921590Srgrimes			SETLINE;
931590Srgrimes			/*
941590Srgrimes			 * the above 3 cases are similar in that they
951590Srgrimes			 * are special characters that also end tokens.
961590Srgrimes			 */
971590Srgrimes	endtok:			if (sp > tok) {
981590Srgrimes				*sp = EOS;
991590Srgrimes				token = YES;
1001590Srgrimes				sp = tok;
1011590Srgrimes			}
1021590Srgrimes			else
1031590Srgrimes				token = NO;
1041590Srgrimes			continue;
1051590Srgrimes
1061590Srgrimes		/*
1071590Srgrimes		 * We ignore quoted strings and character constants
1081590Srgrimes		 * completely.
1091590Srgrimes		 */
1101590Srgrimes		case '"':
1111590Srgrimes		case '\'':
1121590Srgrimes			(void)skip_string(c);
1131590Srgrimes			break;
1141590Srgrimes
1151590Srgrimes		/*
1161590Srgrimes		 * comments can be fun; note the state is unchanged after
1171590Srgrimes		 * return, in case we found:
1181590Srgrimes		 *	"foo() XX comment XX { int bar; }"
1191590Srgrimes		 */
1201590Srgrimes		case '/':
1211590Srgrimes			if (GETC(==, '*')) {
1221590Srgrimes				skip_comment();
1231590Srgrimes				continue;
1241590Srgrimes			}
1251590Srgrimes			(void)ungetc(c, inf);
1261590Srgrimes			c = '/';
1271590Srgrimes			goto storec;
1281590Srgrimes
1291590Srgrimes		/* hash marks flag #define's. */
1301590Srgrimes		case '#':
1311590Srgrimes			if (sp == tok) {
1321590Srgrimes				hash_entry();
1331590Srgrimes				break;
1341590Srgrimes			}
1351590Srgrimes			goto storec;
1361590Srgrimes
1371590Srgrimes		/*
1381590Srgrimes		 * if we have a current token, parenthesis on
1391590Srgrimes		 * level zero indicates a function.
1401590Srgrimes		 */
1411590Srgrimes		case '(':
1421590Srgrimes			if (!level && token) {
1431590Srgrimes				int	curline;
1441590Srgrimes
1451590Srgrimes				if (sp != tok)
1461590Srgrimes					*sp = EOS;
1471590Srgrimes				/*
1481590Srgrimes				 * grab the line immediately, we may
1491590Srgrimes				 * already be wrong, for example,
1501590Srgrimes				 *	foo\n
1511590Srgrimes				 *	(arg1,
1521590Srgrimes				 */
1531590Srgrimes				getline();
1541590Srgrimes				curline = lineno;
1551590Srgrimes				if (func_entry()) {
1561590Srgrimes					++level;
1571590Srgrimes					pfnote(tok, curline);
1581590Srgrimes				}
1591590Srgrimes				break;
1601590Srgrimes			}
1611590Srgrimes			goto storec;
1621590Srgrimes
1631590Srgrimes		/*
1641590Srgrimes		 * semi-colons indicate the end of a typedef; if we find a
1651590Srgrimes		 * typedef we search for the next semi-colon of the same
1661590Srgrimes		 * level as the typedef.  Ignoring "structs", they are
1671590Srgrimes		 * tricky, since you can find:
1681590Srgrimes		 *
1691590Srgrimes		 *	"typedef long time_t;"
1701590Srgrimes		 *	"typedef unsigned int u_int;"
1711590Srgrimes		 *	"typedef unsigned int u_int [10];"
1721590Srgrimes		 *
1731590Srgrimes		 * If looking at a typedef, we save a copy of the last token
1741590Srgrimes		 * found.  Then, when we find the ';' we take the current
1751590Srgrimes		 * token if it starts with a valid token name, else we take
1761590Srgrimes		 * the one we saved.  There's probably some reasonable
1771590Srgrimes		 * alternative to this...
1781590Srgrimes		 */
1791590Srgrimes		case ';':
1801590Srgrimes			if (t_def && level == t_level) {
1811590Srgrimes				t_def = NO;
1821590Srgrimes				getline();
1831590Srgrimes				if (sp != tok)
1841590Srgrimes					*sp = EOS;
1851590Srgrimes				pfnote(tok, lineno);
1861590Srgrimes				break;
1871590Srgrimes			}
1881590Srgrimes			goto storec;
1891590Srgrimes
1901590Srgrimes		/*
1911590Srgrimes		 * store characters until one that can't be part of a token
1921590Srgrimes		 * comes along; check the current token against certain
1931590Srgrimes		 * reserved words.
1941590Srgrimes		 */
1951590Srgrimes		default:
1961590Srgrimes	storec:		if (!intoken(c)) {
1971590Srgrimes				if (sp == tok)
1981590Srgrimes					break;
1991590Srgrimes				*sp = EOS;
2001590Srgrimes				if (tflag) {
2011590Srgrimes					/* no typedefs inside typedefs */
2021590Srgrimes					if (!t_def &&
2031590Srgrimes						   !memcmp(tok, "typedef",8)) {
2041590Srgrimes						t_def = YES;
2051590Srgrimes						t_level = level;
2061590Srgrimes						break;
2071590Srgrimes					}
2081590Srgrimes					/* catch "typedef struct" */
2091590Srgrimes					if ((!t_def || t_level < level)
2101590Srgrimes					    && (!memcmp(tok, "struct", 7)
2111590Srgrimes					    || !memcmp(tok, "union", 6)
2121590Srgrimes					    || !memcmp(tok, "enum", 5))) {
2131590Srgrimes						/*
2141590Srgrimes						 * get line immediately;
2151590Srgrimes						 * may change before '{'
2161590Srgrimes						 */
2171590Srgrimes						getline();
2181590Srgrimes						if (str_entry(c))
2191590Srgrimes							++level;
2201590Srgrimes						break;
2211590Srgrimes						/* } */
2221590Srgrimes					}
2231590Srgrimes				}
2241590Srgrimes				sp = tok;
2251590Srgrimes			}
2261590Srgrimes			else if (sp != tok || begtoken(c)) {
2271590Srgrimes				*sp++ = c;
2281590Srgrimes				token = YES;
2291590Srgrimes			}
2301590Srgrimes			continue;
2311590Srgrimes		}
2321590Srgrimes
2331590Srgrimes		sp = tok;
2341590Srgrimes		token = NO;
2351590Srgrimes	}
2361590Srgrimes}
2371590Srgrimes
2381590Srgrimes/*
2391590Srgrimes * func_entry --
2401590Srgrimes *	handle a function reference
2411590Srgrimes */
2421590Srgrimesstatic int
2431590Srgrimesfunc_entry()
2441590Srgrimes{
2451590Srgrimes	int	c;			/* current character */
2461590Srgrimes	int	level = 0;		/* for matching '()' */
2471590Srgrimes
2481590Srgrimes	/*
2491590Srgrimes	 * Find the end of the assumed function declaration.
2501590Srgrimes	 * Note that ANSI C functions can have type definitions so keep
2511590Srgrimes	 * track of the parentheses nesting level.
2521590Srgrimes	 */
2531590Srgrimes	while (GETC(!=, EOF)) {
2541590Srgrimes		switch (c) {
2551590Srgrimes		case '\'':
2561590Srgrimes		case '"':
2571590Srgrimes			/* skip strings and character constants */
2581590Srgrimes			skip_string(c);
2591590Srgrimes			break;
2601590Srgrimes		case '/':
2611590Srgrimes			/* skip comments */
2621590Srgrimes			if (GETC(==, '*'))
2631590Srgrimes				skip_comment();
2641590Srgrimes			break;
2651590Srgrimes		case '(':
2661590Srgrimes			level++;
2671590Srgrimes			break;
2681590Srgrimes		case ')':
2691590Srgrimes			if (level == 0)
2701590Srgrimes				goto fnd;
2711590Srgrimes			level--;
2721590Srgrimes			break;
2731590Srgrimes		case '\n':
2741590Srgrimes			SETLINE;
2751590Srgrimes		}
2761590Srgrimes	}
2771590Srgrimes	return (NO);
2781590Srgrimesfnd:
2791590Srgrimes	/*
2801590Srgrimes	 * we assume that the character after a function's right paren
2811590Srgrimes	 * is a token character if it's a function and a non-token
2821590Srgrimes	 * character if it's a declaration.  Comments don't count...
2831590Srgrimes	 */
2841590Srgrimes	for (;;) {
2851590Srgrimes		while (GETC(!=, EOF) && iswhite(c))
2861590Srgrimes			if (c == '\n')
2871590Srgrimes				SETLINE;
2881590Srgrimes		if (intoken(c) || c == '{')
2891590Srgrimes			break;
2901590Srgrimes		if (c == '/' && GETC(==, '*'))
2911590Srgrimes			skip_comment();
2921590Srgrimes		else {				/* don't ever "read" '/' */
2931590Srgrimes			(void)ungetc(c, inf);
2941590Srgrimes			return (NO);
2951590Srgrimes		}
2961590Srgrimes	}
2971590Srgrimes	if (c != '{')
2981590Srgrimes		(void)skip_key('{');
2991590Srgrimes	return (YES);
3001590Srgrimes}
3011590Srgrimes
3021590Srgrimes/*
3031590Srgrimes * hash_entry --
3041590Srgrimes *	handle a line starting with a '#'
3051590Srgrimes */
3061590Srgrimesstatic void
3071590Srgrimeshash_entry()
3081590Srgrimes{
3091590Srgrimes	int	c;			/* character read */
3101590Srgrimes	int	curline;		/* line started on */
3111590Srgrimes	char	*sp;			/* buffer pointer */
3121590Srgrimes	char	tok[MAXTOKEN];		/* storage buffer */
3131590Srgrimes
3141590Srgrimes	curline = lineno;
3151590Srgrimes	for (sp = tok;;) {		/* get next token */
3161590Srgrimes		if (GETC(==, EOF))
3171590Srgrimes			return;
3181590Srgrimes		if (iswhite(c))
3191590Srgrimes			break;
3201590Srgrimes		*sp++ = c;
3211590Srgrimes	}
3221590Srgrimes	*sp = EOS;
3231590Srgrimes	if (memcmp(tok, "define", 6))	/* only interested in #define's */
3241590Srgrimes		goto skip;
3251590Srgrimes	for (;;) {			/* this doesn't handle "#define \n" */
3261590Srgrimes		if (GETC(==, EOF))
3271590Srgrimes			return;
3281590Srgrimes		if (!iswhite(c))
3291590Srgrimes			break;
3301590Srgrimes	}
3311590Srgrimes	for (sp = tok;;) {		/* get next token */
3321590Srgrimes		*sp++ = c;
3331590Srgrimes		if (GETC(==, EOF))
3341590Srgrimes			return;
3351590Srgrimes		/*
3361590Srgrimes		 * this is where it DOESN'T handle
3371590Srgrimes		 * "#define \n"
3381590Srgrimes		 */
3391590Srgrimes		if (!intoken(c))
3401590Srgrimes			break;
3411590Srgrimes	}
3421590Srgrimes	*sp = EOS;
3431590Srgrimes	if (dflag || c == '(') {	/* only want macros */
3441590Srgrimes		getline();
3451590Srgrimes		pfnote(tok, curline);
3461590Srgrimes	}
3471590Srgrimesskip:	if (c == '\n') {		/* get rid of rest of define */
3481590Srgrimes		SETLINE
3491590Srgrimes		if (*(sp - 1) != '\\')
3501590Srgrimes			return;
3511590Srgrimes	}
3521590Srgrimes	(void)skip_key('\n');
3531590Srgrimes}
3541590Srgrimes
3551590Srgrimes/*
3561590Srgrimes * str_entry --
3571590Srgrimes *	handle a struct, union or enum entry
3581590Srgrimes */
3591590Srgrimesstatic int
3601590Srgrimesstr_entry(c)
3611590Srgrimes	int	c;			/* current character */
3621590Srgrimes{
3631590Srgrimes	int	curline;		/* line started on */
3641590Srgrimes	char	*sp;			/* buffer pointer */
3651590Srgrimes	char	tok[LINE_MAX];		/* storage buffer */
3661590Srgrimes
3671590Srgrimes	curline = lineno;
3681590Srgrimes	while (iswhite(c))
3691590Srgrimes		if (GETC(==, EOF))
3701590Srgrimes			return (NO);
3711590Srgrimes	if (c == '{')		/* it was "struct {" */
3721590Srgrimes		return (YES);
3731590Srgrimes	for (sp = tok;;) {		/* get next token */
3741590Srgrimes		*sp++ = c;
3751590Srgrimes		if (GETC(==, EOF))
3761590Srgrimes			return (NO);
3771590Srgrimes		if (!intoken(c))
3781590Srgrimes			break;
3791590Srgrimes	}
3801590Srgrimes	switch (c) {
3811590Srgrimes		case '{':		/* it was "struct foo{" */
3821590Srgrimes			--sp;
3831590Srgrimes			break;
3841590Srgrimes		case '\n':		/* it was "struct foo\n" */
3851590Srgrimes			SETLINE;
3861590Srgrimes			/*FALLTHROUGH*/
3871590Srgrimes		default:		/* probably "struct foo " */
3881590Srgrimes			while (GETC(!=, EOF))
3891590Srgrimes				if (!iswhite(c))
3901590Srgrimes					break;
3911590Srgrimes			if (c != '{') {
3921590Srgrimes				(void)ungetc(c, inf);
3931590Srgrimes				return (NO);
3941590Srgrimes			}
3951590Srgrimes	}
3961590Srgrimes	*sp = EOS;
3971590Srgrimes	pfnote(tok, curline);
3981590Srgrimes	return (YES);
3991590Srgrimes}
4001590Srgrimes
4011590Srgrimes/*
4021590Srgrimes * skip_comment --
4031590Srgrimes *	skip over comment
4041590Srgrimes */
4051590Srgrimesvoid
4061590Srgrimesskip_comment()
4071590Srgrimes{
4081590Srgrimes	int	c;			/* character read */
4091590Srgrimes	int	star;			/* '*' flag */
4101590Srgrimes
4111590Srgrimes	for (star = 0; GETC(!=, EOF);)
4121590Srgrimes		switch(c) {
4131590Srgrimes		/* comments don't nest, nor can they be escaped. */
4141590Srgrimes		case '*':
4151590Srgrimes			star = YES;
4161590Srgrimes			break;
4171590Srgrimes		case '/':
4181590Srgrimes			if (star)
4191590Srgrimes				return;
4201590Srgrimes			break;
4211590Srgrimes		case '\n':
4221590Srgrimes			SETLINE;
4231590Srgrimes			/*FALLTHROUGH*/
4241590Srgrimes		default:
4251590Srgrimes			star = NO;
4261590Srgrimes			break;
4271590Srgrimes		}
4281590Srgrimes}
4291590Srgrimes
4301590Srgrimes/*
4311590Srgrimes * skip_string --
4321590Srgrimes *	skip to the end of a string or character constant.
4331590Srgrimes */
4341590Srgrimesvoid
4351590Srgrimesskip_string(key)
4361590Srgrimes	int	key;
4371590Srgrimes{
4381590Srgrimes	int	c,
4391590Srgrimes		skip;
4401590Srgrimes
4411590Srgrimes	for (skip = NO; GETC(!=, EOF); )
4421590Srgrimes		switch (c) {
4431590Srgrimes		case '\\':		/* a backslash escapes anything */
4441590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
4451590Srgrimes			break;
4461590Srgrimes		case '\n':
4471590Srgrimes			SETLINE;
4481590Srgrimes			/*FALLTHROUGH*/
4491590Srgrimes		default:
4501590Srgrimes			if (c == key && !skip)
4511590Srgrimes				return;
4521590Srgrimes			skip = NO;
4531590Srgrimes		}
4541590Srgrimes}
4551590Srgrimes
4561590Srgrimes/*
4571590Srgrimes * skip_key --
4581590Srgrimes *	skip to next char "key"
4591590Srgrimes */
4601590Srgrimesint
4611590Srgrimesskip_key(key)
4621590Srgrimes	int	key;
4631590Srgrimes{
4641590Srgrimes	int	c,
4651590Srgrimes		skip,
4661590Srgrimes		retval;
4671590Srgrimes
4681590Srgrimes	for (skip = retval = NO; GETC(!=, EOF);)
4691590Srgrimes		switch(c) {
4701590Srgrimes		case '\\':		/* a backslash escapes anything */
4711590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
4721590Srgrimes			break;
4731590Srgrimes		case ';':		/* special case for yacc; if one */
4741590Srgrimes		case '|':		/* of these chars occurs, we may */
4751590Srgrimes			retval = YES;	/* have moved out of the rule */
4761590Srgrimes			break;		/* not used by C */
4771590Srgrimes		case '\'':
4781590Srgrimes		case '"':
4791590Srgrimes			/* skip strings and character constants */
4801590Srgrimes			skip_string(c);
4811590Srgrimes			break;
4821590Srgrimes		case '/':
4831590Srgrimes			/* skip comments */
4841590Srgrimes			if (GETC(==, '*')) {
4851590Srgrimes				skip_comment();
4861590Srgrimes				break;
4871590Srgrimes			}
4881590Srgrimes			(void)ungetc(c, inf);
4891590Srgrimes			c = '/';
4901590Srgrimes			goto norm;
4911590Srgrimes		case '\n':
4921590Srgrimes			SETLINE;
4931590Srgrimes			/*FALLTHROUGH*/
4941590Srgrimes		default:
4951590Srgrimes		norm:
4961590Srgrimes			if (c == key && !skip)
4971590Srgrimes				return (retval);
4981590Srgrimes			skip = NO;
4991590Srgrimes		}
5001590Srgrimes	return (retval);
5011590Srgrimes}
502