C.c revision 166503
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
3487628Sdwmalone#if 0
351590Srgrimes#ifndef lint
361590Srgrimesstatic char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
3728625Ssteve#endif
3887628Sdwmalone#endif
391590Srgrimes
4087628Sdwmalone#include <sys/cdefs.h>
4187628Sdwmalone__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 166503 2007-02-04 20:07:07Z rse $");
4287628Sdwmalone
431590Srgrimes#include <limits.h>
441590Srgrimes#include <stdio.h>
4591382Sdwmalone#include <string.h>
461590Srgrimes
471590Srgrimes#include "ctags.h"
481590Srgrimes
4992920Simpstatic int	func_entry(void);
5092920Simpstatic void	hash_entry(void);
5192920Simpstatic void	skip_string(int);
5292920Simpstatic int	str_entry(int);
531590Srgrimes
541590Srgrimes/*
551590Srgrimes * c_entries --
561590Srgrimes *	read .c and .h files and call appropriate routines
571590Srgrimes */
581590Srgrimesvoid
59100822Sdwmalonec_entries(void)
601590Srgrimes{
611590Srgrimes	int	c;			/* current character */
621590Srgrimes	int	level;			/* brace level */
631590Srgrimes	int	token;			/* if reading a token */
641590Srgrimes	int	t_def;			/* if reading a typedef */
651590Srgrimes	int	t_level;		/* typedef's brace level */
661590Srgrimes	char	*sp;			/* buffer pointer */
671590Srgrimes	char	tok[MAXTOKEN];		/* token buffer */
681590Srgrimes
691590Srgrimes	lineftell = ftell(inf);
701590Srgrimes	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
711590Srgrimes	while (GETC(!=, EOF)) {
721590Srgrimes		switch (c) {
731590Srgrimes		/*
741590Srgrimes		 * Here's where it DOESN'T handle: {
751590Srgrimes		 *	foo(a)
761590Srgrimes		 *	{
771590Srgrimes		 *	#ifdef notdef
781590Srgrimes		 *		}
791590Srgrimes		 *	#endif
801590Srgrimes		 *		if (a)
811590Srgrimes		 *			puts("hello, world");
821590Srgrimes		 *	}
831590Srgrimes		 */
841590Srgrimes		case '{':
851590Srgrimes			++level;
861590Srgrimes			goto endtok;
871590Srgrimes		case '}':
881590Srgrimes			/*
891590Srgrimes			 * if level goes below zero, try and fix
901590Srgrimes			 * it, even though we've already messed up
911590Srgrimes			 */
921590Srgrimes			if (--level < 0)
931590Srgrimes				level = 0;
941590Srgrimes			goto endtok;
951590Srgrimes
961590Srgrimes		case '\n':
971590Srgrimes			SETLINE;
981590Srgrimes			/*
991590Srgrimes			 * the above 3 cases are similar in that they
1001590Srgrimes			 * are special characters that also end tokens.
1011590Srgrimes			 */
1021590Srgrimes	endtok:			if (sp > tok) {
1031590Srgrimes				*sp = EOS;
1041590Srgrimes				token = YES;
1051590Srgrimes				sp = tok;
1061590Srgrimes			}
1071590Srgrimes			else
1081590Srgrimes				token = NO;
1091590Srgrimes			continue;
1101590Srgrimes
1111590Srgrimes		/*
1121590Srgrimes		 * We ignore quoted strings and character constants
1131590Srgrimes		 * completely.
1141590Srgrimes		 */
1151590Srgrimes		case '"':
1161590Srgrimes		case '\'':
117166503Srse			skip_string(c);
1181590Srgrimes			break;
1191590Srgrimes
1201590Srgrimes		/*
1211590Srgrimes		 * comments can be fun; note the state is unchanged after
1221590Srgrimes		 * return, in case we found:
1231590Srgrimes		 *	"foo() XX comment XX { int bar; }"
1241590Srgrimes		 */
1251590Srgrimes		case '/':
12691189Sgshapiro			if (GETC(==, '*') || c == '/') {
12791189Sgshapiro				skip_comment(c);
1281590Srgrimes				continue;
1291590Srgrimes			}
1301590Srgrimes			(void)ungetc(c, inf);
1311590Srgrimes			c = '/';
1321590Srgrimes			goto storec;
1331590Srgrimes
1341590Srgrimes		/* hash marks flag #define's. */
1351590Srgrimes		case '#':
1361590Srgrimes			if (sp == tok) {
1371590Srgrimes				hash_entry();
1381590Srgrimes				break;
1391590Srgrimes			}
1401590Srgrimes			goto storec;
1411590Srgrimes
1421590Srgrimes		/*
1431590Srgrimes		 * if we have a current token, parenthesis on
1441590Srgrimes		 * level zero indicates a function.
1451590Srgrimes		 */
1461590Srgrimes		case '(':
1471590Srgrimes			if (!level && token) {
1481590Srgrimes				int	curline;
1491590Srgrimes
1501590Srgrimes				if (sp != tok)
1511590Srgrimes					*sp = EOS;
1521590Srgrimes				/*
1531590Srgrimes				 * grab the line immediately, we may
1541590Srgrimes				 * already be wrong, for example,
1551590Srgrimes				 *	foo\n
1561590Srgrimes				 *	(arg1,
1571590Srgrimes				 */
1581590Srgrimes				getline();
1591590Srgrimes				curline = lineno;
1601590Srgrimes				if (func_entry()) {
1611590Srgrimes					++level;
1621590Srgrimes					pfnote(tok, curline);
1631590Srgrimes				}
1641590Srgrimes				break;
1651590Srgrimes			}
1661590Srgrimes			goto storec;
1671590Srgrimes
1681590Srgrimes		/*
1691590Srgrimes		 * semi-colons indicate the end of a typedef; if we find a
1701590Srgrimes		 * typedef we search for the next semi-colon of the same
1711590Srgrimes		 * level as the typedef.  Ignoring "structs", they are
1721590Srgrimes		 * tricky, since you can find:
1731590Srgrimes		 *
1741590Srgrimes		 *	"typedef long time_t;"
1751590Srgrimes		 *	"typedef unsigned int u_int;"
1761590Srgrimes		 *	"typedef unsigned int u_int [10];"
1771590Srgrimes		 *
1781590Srgrimes		 * If looking at a typedef, we save a copy of the last token
1791590Srgrimes		 * found.  Then, when we find the ';' we take the current
1801590Srgrimes		 * token if it starts with a valid token name, else we take
1811590Srgrimes		 * the one we saved.  There's probably some reasonable
1821590Srgrimes		 * alternative to this...
1831590Srgrimes		 */
1841590Srgrimes		case ';':
1851590Srgrimes			if (t_def && level == t_level) {
1861590Srgrimes				t_def = NO;
1871590Srgrimes				getline();
1881590Srgrimes				if (sp != tok)
1891590Srgrimes					*sp = EOS;
1901590Srgrimes				pfnote(tok, lineno);
1911590Srgrimes				break;
1921590Srgrimes			}
1931590Srgrimes			goto storec;
1941590Srgrimes
1951590Srgrimes		/*
1961590Srgrimes		 * store characters until one that can't be part of a token
1971590Srgrimes		 * comes along; check the current token against certain
1981590Srgrimes		 * reserved words.
1991590Srgrimes		 */
2001590Srgrimes		default:
20128625Ssteve			/* ignore whitespace */
20228625Ssteve			if (c == ' ' || c == '\t') {
20328625Ssteve				int save = c;
20428625Ssteve				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
20528625Ssteve					;
20628625Ssteve				if (c == EOF)
20728625Ssteve					return;
20828625Ssteve				(void)ungetc(c, inf);
20928625Ssteve				c = save;
21028625Ssteve			}
2111590Srgrimes	storec:		if (!intoken(c)) {
2121590Srgrimes				if (sp == tok)
2131590Srgrimes					break;
2141590Srgrimes				*sp = EOS;
2151590Srgrimes				if (tflag) {
2161590Srgrimes					/* no typedefs inside typedefs */
2171590Srgrimes					if (!t_def &&
2181590Srgrimes						   !memcmp(tok, "typedef",8)) {
2191590Srgrimes						t_def = YES;
2201590Srgrimes						t_level = level;
2211590Srgrimes						break;
2221590Srgrimes					}
2231590Srgrimes					/* catch "typedef struct" */
2241590Srgrimes					if ((!t_def || t_level < level)
2251590Srgrimes					    && (!memcmp(tok, "struct", 7)
2261590Srgrimes					    || !memcmp(tok, "union", 6)
2271590Srgrimes					    || !memcmp(tok, "enum", 5))) {
2281590Srgrimes						/*
2291590Srgrimes						 * get line immediately;
2301590Srgrimes						 * may change before '{'
2311590Srgrimes						 */
2321590Srgrimes						getline();
2331590Srgrimes						if (str_entry(c))
2341590Srgrimes							++level;
2351590Srgrimes						break;
2361590Srgrimes						/* } */
2371590Srgrimes					}
2381590Srgrimes				}
2391590Srgrimes				sp = tok;
2401590Srgrimes			}
2411590Srgrimes			else if (sp != tok || begtoken(c)) {
24297574Stjr				if (sp == tok + sizeof tok - 1)
24397574Stjr					/* Too long -- truncate it */
24497574Stjr					*sp = EOS;
24597574Stjr				else
24697574Stjr					*sp++ = c;
2471590Srgrimes				token = YES;
2481590Srgrimes			}
2491590Srgrimes			continue;
2501590Srgrimes		}
2511590Srgrimes
2521590Srgrimes		sp = tok;
2531590Srgrimes		token = NO;
2541590Srgrimes	}
2551590Srgrimes}
2561590Srgrimes
2571590Srgrimes/*
2581590Srgrimes * func_entry --
2591590Srgrimes *	handle a function reference
2601590Srgrimes */
2611590Srgrimesstatic int
262100822Sdwmalonefunc_entry(void)
2631590Srgrimes{
2641590Srgrimes	int	c;			/* current character */
2651590Srgrimes	int	level = 0;		/* for matching '()' */
2661590Srgrimes
2671590Srgrimes	/*
2681590Srgrimes	 * Find the end of the assumed function declaration.
2691590Srgrimes	 * Note that ANSI C functions can have type definitions so keep
2701590Srgrimes	 * track of the parentheses nesting level.
2711590Srgrimes	 */
2721590Srgrimes	while (GETC(!=, EOF)) {
2731590Srgrimes		switch (c) {
2741590Srgrimes		case '\'':
2751590Srgrimes		case '"':
2761590Srgrimes			/* skip strings and character constants */
2771590Srgrimes			skip_string(c);
2781590Srgrimes			break;
2791590Srgrimes		case '/':
2801590Srgrimes			/* skip comments */
28191189Sgshapiro			if (GETC(==, '*') || c == '/')
28291189Sgshapiro				skip_comment(c);
2831590Srgrimes			break;
2841590Srgrimes		case '(':
2851590Srgrimes			level++;
2861590Srgrimes			break;
2871590Srgrimes		case ')':
2881590Srgrimes			if (level == 0)
2891590Srgrimes				goto fnd;
2901590Srgrimes			level--;
2911590Srgrimes			break;
2921590Srgrimes		case '\n':
2931590Srgrimes			SETLINE;
2941590Srgrimes		}
2951590Srgrimes	}
2961590Srgrimes	return (NO);
2971590Srgrimesfnd:
2981590Srgrimes	/*
2991590Srgrimes	 * we assume that the character after a function's right paren
3001590Srgrimes	 * is a token character if it's a function and a non-token
3011590Srgrimes	 * character if it's a declaration.  Comments don't count...
3021590Srgrimes	 */
3031590Srgrimes	for (;;) {
3041590Srgrimes		while (GETC(!=, EOF) && iswhite(c))
3051590Srgrimes			if (c == '\n')
3061590Srgrimes				SETLINE;
3071590Srgrimes		if (intoken(c) || c == '{')
3081590Srgrimes			break;
30991189Sgshapiro		if (c == '/' && (GETC(==, '*') || c == '/'))
31091189Sgshapiro			skip_comment(c);
3111590Srgrimes		else {				/* don't ever "read" '/' */
3121590Srgrimes			(void)ungetc(c, inf);
3131590Srgrimes			return (NO);
3141590Srgrimes		}
3151590Srgrimes	}
3161590Srgrimes	if (c != '{')
3171590Srgrimes		(void)skip_key('{');
3181590Srgrimes	return (YES);
3191590Srgrimes}
3201590Srgrimes
3211590Srgrimes/*
3221590Srgrimes * hash_entry --
3231590Srgrimes *	handle a line starting with a '#'
3241590Srgrimes */
3251590Srgrimesstatic void
326100822Sdwmalonehash_entry(void)
3271590Srgrimes{
3281590Srgrimes	int	c;			/* character read */
3291590Srgrimes	int	curline;		/* line started on */
3301590Srgrimes	char	*sp;			/* buffer pointer */
3311590Srgrimes	char	tok[MAXTOKEN];		/* storage buffer */
3321590Srgrimes
33328625Ssteve	/* ignore leading whitespace */
33428625Ssteve	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
33528625Ssteve		;
33628625Ssteve	(void)ungetc(c, inf);
33728625Ssteve
3381590Srgrimes	curline = lineno;
3391590Srgrimes	for (sp = tok;;) {		/* get next token */
3401590Srgrimes		if (GETC(==, EOF))
3411590Srgrimes			return;
3421590Srgrimes		if (iswhite(c))
3431590Srgrimes			break;
34497574Stjr		if (sp == tok + sizeof tok - 1)
34597574Stjr			/* Too long -- truncate it */
34697574Stjr			*sp = EOS;
34797574Stjr		else
34897574Stjr			*sp++ = c;
3491590Srgrimes	}
3501590Srgrimes	*sp = EOS;
3511590Srgrimes	if (memcmp(tok, "define", 6))	/* only interested in #define's */
3521590Srgrimes		goto skip;
3531590Srgrimes	for (;;) {			/* this doesn't handle "#define \n" */
3541590Srgrimes		if (GETC(==, EOF))
3551590Srgrimes			return;
3561590Srgrimes		if (!iswhite(c))
3571590Srgrimes			break;
3581590Srgrimes	}
3591590Srgrimes	for (sp = tok;;) {		/* get next token */
36097574Stjr		if (sp == tok + sizeof tok - 1)
36197574Stjr			/* Too long -- truncate it */
36297574Stjr			*sp = EOS;
36397574Stjr		else
36497574Stjr			*sp++ = c;
3651590Srgrimes		if (GETC(==, EOF))
3661590Srgrimes			return;
3671590Srgrimes		/*
3681590Srgrimes		 * this is where it DOESN'T handle
3691590Srgrimes		 * "#define \n"
3701590Srgrimes		 */
3711590Srgrimes		if (!intoken(c))
3721590Srgrimes			break;
3731590Srgrimes	}
3741590Srgrimes	*sp = EOS;
3751590Srgrimes	if (dflag || c == '(') {	/* only want macros */
3761590Srgrimes		getline();
3771590Srgrimes		pfnote(tok, curline);
3781590Srgrimes	}
3791590Srgrimesskip:	if (c == '\n') {		/* get rid of rest of define */
3801590Srgrimes		SETLINE
3811590Srgrimes		if (*(sp - 1) != '\\')
3821590Srgrimes			return;
3831590Srgrimes	}
3841590Srgrimes	(void)skip_key('\n');
3851590Srgrimes}
3861590Srgrimes
3871590Srgrimes/*
3881590Srgrimes * str_entry --
3891590Srgrimes *	handle a struct, union or enum entry
3901590Srgrimes */
3911590Srgrimesstatic int
392100822Sdwmalonestr_entry(int c) /* c is current character */
3931590Srgrimes{
3941590Srgrimes	int	curline;		/* line started on */
3951590Srgrimes	char	*sp;			/* buffer pointer */
3961590Srgrimes	char	tok[LINE_MAX];		/* storage buffer */
3971590Srgrimes
3981590Srgrimes	curline = lineno;
3991590Srgrimes	while (iswhite(c))
4001590Srgrimes		if (GETC(==, EOF))
4011590Srgrimes			return (NO);
4021590Srgrimes	if (c == '{')		/* it was "struct {" */
4031590Srgrimes		return (YES);
4041590Srgrimes	for (sp = tok;;) {		/* get next token */
40597574Stjr		if (sp == tok + sizeof tok - 1)
40697574Stjr			/* Too long -- truncate it */
40797574Stjr			*sp = EOS;
40897574Stjr		else
40997574Stjr			*sp++ = c;
4101590Srgrimes		if (GETC(==, EOF))
4111590Srgrimes			return (NO);
4121590Srgrimes		if (!intoken(c))
4131590Srgrimes			break;
4141590Srgrimes	}
4151590Srgrimes	switch (c) {
4161590Srgrimes		case '{':		/* it was "struct foo{" */
4171590Srgrimes			--sp;
4181590Srgrimes			break;
4191590Srgrimes		case '\n':		/* it was "struct foo\n" */
4201590Srgrimes			SETLINE;
4211590Srgrimes			/*FALLTHROUGH*/
4221590Srgrimes		default:		/* probably "struct foo " */
4231590Srgrimes			while (GETC(!=, EOF))
4241590Srgrimes				if (!iswhite(c))
4251590Srgrimes					break;
4261590Srgrimes			if (c != '{') {
4271590Srgrimes				(void)ungetc(c, inf);
4281590Srgrimes				return (NO);
4291590Srgrimes			}
4301590Srgrimes	}
4311590Srgrimes	*sp = EOS;
4321590Srgrimes	pfnote(tok, curline);
4331590Srgrimes	return (YES);
4341590Srgrimes}
4351590Srgrimes
4361590Srgrimes/*
4371590Srgrimes * skip_comment --
4381590Srgrimes *	skip over comment
4391590Srgrimes */
4401590Srgrimesvoid
441100822Sdwmaloneskip_comment(int t) /* t is comment character */
4421590Srgrimes{
4431590Srgrimes	int	c;			/* character read */
4441590Srgrimes	int	star;			/* '*' flag */
4451590Srgrimes
4461590Srgrimes	for (star = 0; GETC(!=, EOF);)
4471590Srgrimes		switch(c) {
4481590Srgrimes		/* comments don't nest, nor can they be escaped. */
4491590Srgrimes		case '*':
4501590Srgrimes			star = YES;
4511590Srgrimes			break;
4521590Srgrimes		case '/':
45391189Sgshapiro			if (star && t == '*')
4541590Srgrimes				return;
4551590Srgrimes			break;
4561590Srgrimes		case '\n':
45791189Sgshapiro			if (t == '/')
45891189Sgshapiro				return;
4591590Srgrimes			SETLINE;
4601590Srgrimes			/*FALLTHROUGH*/
4611590Srgrimes		default:
4621590Srgrimes			star = NO;
4631590Srgrimes			break;
4641590Srgrimes		}
4651590Srgrimes}
4661590Srgrimes
4671590Srgrimes/*
4681590Srgrimes * skip_string --
4691590Srgrimes *	skip to the end of a string or character constant.
4701590Srgrimes */
4711590Srgrimesvoid
472100822Sdwmaloneskip_string(int key)
4731590Srgrimes{
4741590Srgrimes	int	c,
4751590Srgrimes		skip;
4761590Srgrimes
4771590Srgrimes	for (skip = NO; GETC(!=, EOF); )
4781590Srgrimes		switch (c) {
4791590Srgrimes		case '\\':		/* a backslash escapes anything */
4801590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
4811590Srgrimes			break;
4821590Srgrimes		case '\n':
4831590Srgrimes			SETLINE;
4841590Srgrimes			/*FALLTHROUGH*/
4851590Srgrimes		default:
4861590Srgrimes			if (c == key && !skip)
4871590Srgrimes				return;
4881590Srgrimes			skip = NO;
4891590Srgrimes		}
4901590Srgrimes}
4911590Srgrimes
4921590Srgrimes/*
4931590Srgrimes * skip_key --
4941590Srgrimes *	skip to next char "key"
4951590Srgrimes */
4961590Srgrimesint
497100822Sdwmaloneskip_key(int key)
4981590Srgrimes{
4991590Srgrimes	int	c,
5001590Srgrimes		skip,
5011590Srgrimes		retval;
5021590Srgrimes
5031590Srgrimes	for (skip = retval = NO; GETC(!=, EOF);)
5041590Srgrimes		switch(c) {
5051590Srgrimes		case '\\':		/* a backslash escapes anything */
5061590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
5071590Srgrimes			break;
5081590Srgrimes		case ';':		/* special case for yacc; if one */
5091590Srgrimes		case '|':		/* of these chars occurs, we may */
5101590Srgrimes			retval = YES;	/* have moved out of the rule */
5111590Srgrimes			break;		/* not used by C */
5121590Srgrimes		case '\'':
5131590Srgrimes		case '"':
5141590Srgrimes			/* skip strings and character constants */
5151590Srgrimes			skip_string(c);
5161590Srgrimes			break;
5171590Srgrimes		case '/':
5181590Srgrimes			/* skip comments */
51991189Sgshapiro			if (GETC(==, '*') || c == '/') {
52091189Sgshapiro				skip_comment(c);
5211590Srgrimes				break;
5221590Srgrimes			}
5231590Srgrimes			(void)ungetc(c, inf);
5241590Srgrimes			c = '/';
5251590Srgrimes			goto norm;
5261590Srgrimes		case '\n':
5271590Srgrimes			SETLINE;
5281590Srgrimes			/*FALLTHROUGH*/
5291590Srgrimes		default:
5301590Srgrimes		norm:
5311590Srgrimes			if (c == key && !skip)
5321590Srgrimes				return (retval);
5331590Srgrimes			skip = NO;
5341590Srgrimes		}
5351590Srgrimes	return (retval);
5361590Srgrimes}
537