C.c revision 97574
1169689Skan/*
2169689Skan * Copyright (c) 1987, 1993, 1994
3169689Skan *	The Regents of the University of California.  All rights reserved.
4169689Skan *
5169689Skan * Redistribution and use in source and binary forms, with or without
6169689Skan * modification, are permitted provided that the following conditions
7169689Skan * are met:
8169689Skan * 1. Redistributions of source code must retain the above copyright
9169689Skan *    notice, this list of conditions and the following disclaimer.
10169689Skan * 2. Redistributions in binary form must reproduce the above copyright
11169689Skan *    notice, this list of conditions and the following disclaimer in the
12169689Skan *    documentation and/or other materials provided with the distribution.
13169689Skan * 3. All advertising materials mentioning features or use of this software
14169689Skan *    must display the following acknowledgement:
15169689Skan *	This product includes software developed by the University of
16169689Skan *	California, Berkeley and its contributors.
17169689Skan * 4. Neither the name of the University nor the names of its contributors
18169689Skan *    may be used to endorse or promote products derived from this software
19169689Skan *    without specific prior written permission.
20169689Skan *
21169689Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22169689Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24169689Skan * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29169689Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31169689Skan * SUCH DAMAGE.
32169689Skan */
33169689Skan
34169689Skan#if 0
35169689Skan#ifndef lint
36169689Skanstatic char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
37169689Skan#endif
38169689Skan#endif
39169689Skan
40169689Skan#include <sys/cdefs.h>
41169689Skan__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 97574 2002-05-30 10:54:53Z tjr $");
42169689Skan
43169689Skan#include <limits.h>
44169689Skan#include <stdio.h>
45169689Skan#include <string.h>
46169689Skan
47169689Skan#include "ctags.h"
48169689Skan
49169689Skanstatic int	func_entry(void);
50169689Skanstatic void	hash_entry(void);
51169689Skanstatic void	skip_string(int);
52169689Skanstatic int	str_entry(int);
53169689Skan
54169689Skan/*
55169689Skan * c_entries --
56169689Skan *	read .c and .h files and call appropriate routines
57169689Skan */
58169689Skanvoid
59169689Skanc_entries()
60169689Skan{
61169689Skan	int	c;			/* current character */
62169689Skan	int	level;			/* brace level */
63169689Skan	int	token;			/* if reading a token */
64169689Skan	int	t_def;			/* if reading a typedef */
65169689Skan	int	t_level;		/* typedef's brace level */
66169689Skan	char	*sp;			/* buffer pointer */
67169689Skan	char	tok[MAXTOKEN];		/* token buffer */
68169689Skan
69169689Skan	lineftell = ftell(inf);
70169689Skan	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
71169689Skan	while (GETC(!=, EOF)) {
72169689Skan		switch (c) {
73169689Skan		/*
74169689Skan		 * Here's where it DOESN'T handle: {
75169689Skan		 *	foo(a)
76169689Skan		 *	{
77169689Skan		 *	#ifdef notdef
78169689Skan		 *		}
79169689Skan		 *	#endif
80169689Skan		 *		if (a)
81169689Skan		 *			puts("hello, world");
82169689Skan		 *	}
83169689Skan		 */
84169689Skan		case '{':
85169689Skan			++level;
86169689Skan			goto endtok;
87169689Skan		case '}':
88169689Skan			/*
89169689Skan			 * if level goes below zero, try and fix
90169689Skan			 * it, even though we've already messed up
91169689Skan			 */
92169689Skan			if (--level < 0)
93169689Skan				level = 0;
94169689Skan			goto endtok;
95169689Skan
96169689Skan		case '\n':
97169689Skan			SETLINE;
98169689Skan			/*
99169689Skan			 * the above 3 cases are similar in that they
100169689Skan			 * are special characters that also end tokens.
101169689Skan			 */
102169689Skan	endtok:			if (sp > tok) {
103169689Skan				*sp = EOS;
104169689Skan				token = YES;
105169689Skan				sp = tok;
106169689Skan			}
107169689Skan			else
108169689Skan				token = NO;
109169689Skan			continue;
110169689Skan
111169689Skan		/*
112169689Skan		 * We ignore quoted strings and character constants
113169689Skan		 * completely.
114169689Skan		 */
115169689Skan		case '"':
116169689Skan		case '\'':
117169689Skan			(void)skip_string(c);
118169689Skan			break;
119169689Skan
120169689Skan		/*
121169689Skan		 * comments can be fun; note the state is unchanged after
122169689Skan		 * return, in case we found:
123169689Skan		 *	"foo() XX comment XX { int bar; }"
124169689Skan		 */
125169689Skan		case '/':
126169689Skan			if (GETC(==, '*') || c == '/') {
127169689Skan				skip_comment(c);
128169689Skan				continue;
129169689Skan			}
130169689Skan			(void)ungetc(c, inf);
131169689Skan			c = '/';
132169689Skan			goto storec;
133169689Skan
134169689Skan		/* hash marks flag #define's. */
135169689Skan		case '#':
136169689Skan			if (sp == tok) {
137169689Skan				hash_entry();
138169689Skan				break;
139169689Skan			}
140169689Skan			goto storec;
141169689Skan
142169689Skan		/*
143169689Skan		 * if we have a current token, parenthesis on
144169689Skan		 * level zero indicates a function.
145169689Skan		 */
146169689Skan		case '(':
147169689Skan			if (!level && token) {
148169689Skan				int	curline;
149169689Skan
150169689Skan				if (sp != tok)
151169689Skan					*sp = EOS;
152169689Skan				/*
153169689Skan				 * grab the line immediately, we may
154169689Skan				 * already be wrong, for example,
155169689Skan				 *	foo\n
156169689Skan				 *	(arg1,
157169689Skan				 */
158169689Skan				getline();
159169689Skan				curline = lineno;
160169689Skan				if (func_entry()) {
161169689Skan					++level;
162169689Skan					pfnote(tok, curline);
163169689Skan				}
164169689Skan				break;
165169689Skan			}
166169689Skan			goto storec;
167169689Skan
168169689Skan		/*
169169689Skan		 * semi-colons indicate the end of a typedef; if we find a
170169689Skan		 * typedef we search for the next semi-colon of the same
171169689Skan		 * level as the typedef.  Ignoring "structs", they are
172169689Skan		 * tricky, since you can find:
173169689Skan		 *
174169689Skan		 *	"typedef long time_t;"
175169689Skan		 *	"typedef unsigned int u_int;"
176169689Skan		 *	"typedef unsigned int u_int [10];"
177169689Skan		 *
178169689Skan		 * If looking at a typedef, we save a copy of the last token
179169689Skan		 * found.  Then, when we find the ';' we take the current
180169689Skan		 * token if it starts with a valid token name, else we take
181169689Skan		 * the one we saved.  There's probably some reasonable
182169689Skan		 * alternative to this...
183169689Skan		 */
184169689Skan		case ';':
185169689Skan			if (t_def && level == t_level) {
186169689Skan				t_def = NO;
187169689Skan				getline();
188169689Skan				if (sp != tok)
189169689Skan					*sp = EOS;
190169689Skan				pfnote(tok, lineno);
191169689Skan				break;
192169689Skan			}
193169689Skan			goto storec;
194169689Skan
195169689Skan		/*
196169689Skan		 * store characters until one that can't be part of a token
197169689Skan		 * comes along; check the current token against certain
198169689Skan		 * reserved words.
199169689Skan		 */
200169689Skan		default:
201169689Skan			/* ignore whitespace */
202169689Skan			if (c == ' ' || c == '\t') {
203169689Skan				int save = c;
204169689Skan				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
205169689Skan					;
206169689Skan				if (c == EOF)
207169689Skan					return;
208169689Skan				(void)ungetc(c, inf);
209169689Skan				c = save;
210169689Skan			}
211169689Skan	storec:		if (!intoken(c)) {
212169689Skan				if (sp == tok)
213169689Skan					break;
214169689Skan				*sp = EOS;
215169689Skan				if (tflag) {
216169689Skan					/* no typedefs inside typedefs */
217169689Skan					if (!t_def &&
218169689Skan						   !memcmp(tok, "typedef",8)) {
219169689Skan						t_def = YES;
220169689Skan						t_level = level;
221169689Skan						break;
222169689Skan					}
223169689Skan					/* catch "typedef struct" */
224169689Skan					if ((!t_def || t_level < level)
225169689Skan					    && (!memcmp(tok, "struct", 7)
226169689Skan					    || !memcmp(tok, "union", 6)
227169689Skan					    || !memcmp(tok, "enum", 5))) {
228169689Skan						/*
229169689Skan						 * get line immediately;
230169689Skan						 * may change before '{'
231169689Skan						 */
232169689Skan						getline();
233169689Skan						if (str_entry(c))
234169689Skan							++level;
235169689Skan						break;
236169689Skan						/* } */
237169689Skan					}
238169689Skan				}
239169689Skan				sp = tok;
240169689Skan			}
241169689Skan			else if (sp != tok || begtoken(c)) {
242169689Skan				if (sp == tok + sizeof tok - 1)
243169689Skan					/* Too long -- truncate it */
244169689Skan					*sp = EOS;
245169689Skan				else
246169689Skan					*sp++ = c;
247169689Skan				token = YES;
248169689Skan			}
249169689Skan			continue;
250169689Skan		}
251169689Skan
252169689Skan		sp = tok;
253169689Skan		token = NO;
254169689Skan	}
255169689Skan}
256169689Skan
257169689Skan/*
258169689Skan * func_entry --
259169689Skan *	handle a function reference
260169689Skan */
261169689Skanstatic int
262169689Skanfunc_entry()
263169689Skan{
264169689Skan	int	c;			/* current character */
265169689Skan	int	level = 0;		/* for matching '()' */
266169689Skan
267169689Skan	/*
268169689Skan	 * Find the end of the assumed function declaration.
269169689Skan	 * Note that ANSI C functions can have type definitions so keep
270169689Skan	 * track of the parentheses nesting level.
271169689Skan	 */
272169689Skan	while (GETC(!=, EOF)) {
273169689Skan		switch (c) {
274169689Skan		case '\'':
275169689Skan		case '"':
276169689Skan			/* skip strings and character constants */
277169689Skan			skip_string(c);
278169689Skan			break;
279169689Skan		case '/':
280169689Skan			/* skip comments */
281169689Skan			if (GETC(==, '*') || c == '/')
282169689Skan				skip_comment(c);
283169689Skan			break;
284169689Skan		case '(':
285169689Skan			level++;
286169689Skan			break;
287169689Skan		case ')':
288169689Skan			if (level == 0)
289169689Skan				goto fnd;
290169689Skan			level--;
291169689Skan			break;
292169689Skan		case '\n':
293169689Skan			SETLINE;
294169689Skan		}
295169689Skan	}
296169689Skan	return (NO);
297169689Skanfnd:
298169689Skan	/*
299169689Skan	 * we assume that the character after a function's right paren
300169689Skan	 * is a token character if it's a function and a non-token
301169689Skan	 * character if it's a declaration.  Comments don't count...
302169689Skan	 */
303169689Skan	for (;;) {
304169689Skan		while (GETC(!=, EOF) && iswhite(c))
305169689Skan			if (c == '\n')
306169689Skan				SETLINE;
307169689Skan		if (intoken(c) || c == '{')
308169689Skan			break;
309169689Skan		if (c == '/' && (GETC(==, '*') || c == '/'))
310169689Skan			skip_comment(c);
311169689Skan		else {				/* don't ever "read" '/' */
312169689Skan			(void)ungetc(c, inf);
313169689Skan			return (NO);
314169689Skan		}
315169689Skan	}
316169689Skan	if (c != '{')
317169689Skan		(void)skip_key('{');
318169689Skan	return (YES);
319169689Skan}
320169689Skan
321169689Skan/*
322169689Skan * hash_entry --
323169689Skan *	handle a line starting with a '#'
324169689Skan */
325169689Skanstatic void
326169689Skanhash_entry()
327169689Skan{
328169689Skan	int	c;			/* character read */
329169689Skan	int	curline;		/* line started on */
330169689Skan	char	*sp;			/* buffer pointer */
331169689Skan	char	tok[MAXTOKEN];		/* storage buffer */
332169689Skan
333169689Skan	/* ignore leading whitespace */
334169689Skan	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
335169689Skan		;
336169689Skan	(void)ungetc(c, inf);
337169689Skan
338169689Skan	curline = lineno;
339169689Skan	for (sp = tok;;) {		/* get next token */
340169689Skan		if (GETC(==, EOF))
341169689Skan			return;
342169689Skan		if (iswhite(c))
343169689Skan			break;
344169689Skan		if (sp == tok + sizeof tok - 1)
345169689Skan			/* Too long -- truncate it */
346169689Skan			*sp = EOS;
347169689Skan		else
348169689Skan			*sp++ = c;
349169689Skan	}
350169689Skan	*sp = EOS;
351169689Skan	if (memcmp(tok, "define", 6))	/* only interested in #define's */
352169689Skan		goto skip;
353169689Skan	for (;;) {			/* this doesn't handle "#define \n" */
354169689Skan		if (GETC(==, EOF))
355169689Skan			return;
356169689Skan		if (!iswhite(c))
357169689Skan			break;
358169689Skan	}
359169689Skan	for (sp = tok;;) {		/* get next token */
360169689Skan		if (sp == tok + sizeof tok - 1)
361169689Skan			/* Too long -- truncate it */
362169689Skan			*sp = EOS;
363169689Skan		else
364169689Skan			*sp++ = c;
365169689Skan		if (GETC(==, EOF))
366169689Skan			return;
367169689Skan		/*
368169689Skan		 * this is where it DOESN'T handle
369169689Skan		 * "#define \n"
370169689Skan		 */
371169689Skan		if (!intoken(c))
372169689Skan			break;
373169689Skan	}
374169689Skan	*sp = EOS;
375169689Skan	if (dflag || c == '(') {	/* only want macros */
376169689Skan		getline();
377169689Skan		pfnote(tok, curline);
378169689Skan	}
379169689Skanskip:	if (c == '\n') {		/* get rid of rest of define */
380169689Skan		SETLINE
381169689Skan		if (*(sp - 1) != '\\')
382169689Skan			return;
383169689Skan	}
384169689Skan	(void)skip_key('\n');
385169689Skan}
386169689Skan
387169689Skan/*
388169689Skan * str_entry --
389169689Skan *	handle a struct, union or enum entry
390169689Skan */
391169689Skanstatic int
392169689Skanstr_entry(c)
393169689Skan	int	c;			/* current character */
394169689Skan{
395169689Skan	int	curline;		/* line started on */
396169689Skan	char	*sp;			/* buffer pointer */
397169689Skan	char	tok[LINE_MAX];		/* storage buffer */
398169689Skan
399169689Skan	curline = lineno;
400169689Skan	while (iswhite(c))
401169689Skan		if (GETC(==, EOF))
402169689Skan			return (NO);
403169689Skan	if (c == '{')		/* it was "struct {" */
404169689Skan		return (YES);
405169689Skan	for (sp = tok;;) {		/* get next token */
406169689Skan		if (sp == tok + sizeof tok - 1)
407169689Skan			/* Too long -- truncate it */
408169689Skan			*sp = EOS;
409169689Skan		else
410169689Skan			*sp++ = c;
411169689Skan		if (GETC(==, EOF))
412169689Skan			return (NO);
413169689Skan		if (!intoken(c))
414169689Skan			break;
415169689Skan	}
416169689Skan	switch (c) {
417169689Skan		case '{':		/* it was "struct foo{" */
418169689Skan			--sp;
419169689Skan			break;
420169689Skan		case '\n':		/* it was "struct foo\n" */
421169689Skan			SETLINE;
422169689Skan			/*FALLTHROUGH*/
423169689Skan		default:		/* probably "struct foo " */
424169689Skan			while (GETC(!=, EOF))
425169689Skan				if (!iswhite(c))
426169689Skan					break;
427169689Skan			if (c != '{') {
428169689Skan				(void)ungetc(c, inf);
429169689Skan				return (NO);
430169689Skan			}
431169689Skan	}
432169689Skan	*sp = EOS;
433169689Skan	pfnote(tok, curline);
434169689Skan	return (YES);
435169689Skan}
436169689Skan
437169689Skan/*
438169689Skan * skip_comment --
439169689Skan *	skip over comment
440169689Skan */
441169689Skanvoid
442169689Skanskip_comment(t)
443169689Skan	int	t;			/* comment character */
444169689Skan{
445169689Skan	int	c;			/* character read */
446169689Skan	int	star;			/* '*' flag */
447169689Skan
448169689Skan	for (star = 0; GETC(!=, EOF);)
449169689Skan		switch(c) {
450169689Skan		/* comments don't nest, nor can they be escaped. */
451169689Skan		case '*':
452169689Skan			star = YES;
453169689Skan			break;
454169689Skan		case '/':
455169689Skan			if (star && t == '*')
456169689Skan				return;
457169689Skan			break;
458169689Skan		case '\n':
459169689Skan			if (t == '/')
460169689Skan				return;
461169689Skan			SETLINE;
462169689Skan			/*FALLTHROUGH*/
463169689Skan		default:
464169689Skan			star = NO;
465169689Skan			break;
466169689Skan		}
467169689Skan}
468169689Skan
469169689Skan/*
470169689Skan * skip_string --
471169689Skan *	skip to the end of a string or character constant.
472169689Skan */
473169689Skanvoid
474169689Skanskip_string(key)
475169689Skan	int	key;
476169689Skan{
477169689Skan	int	c,
478169689Skan		skip;
479169689Skan
480169689Skan	for (skip = NO; GETC(!=, EOF); )
481169689Skan		switch (c) {
482169689Skan		case '\\':		/* a backslash escapes anything */
483169689Skan			skip = !skip;	/* we toggle in case it's "\\" */
484169689Skan			break;
485169689Skan		case '\n':
486169689Skan			SETLINE;
487169689Skan			/*FALLTHROUGH*/
488169689Skan		default:
489169689Skan			if (c == key && !skip)
490169689Skan				return;
491169689Skan			skip = NO;
492169689Skan		}
493169689Skan}
494169689Skan
495169689Skan/*
496169689Skan * skip_key --
497169689Skan *	skip to next char "key"
498169689Skan */
499169689Skanint
500169689Skanskip_key(key)
501169689Skan	int	key;
502169689Skan{
503169689Skan	int	c,
504169689Skan		skip,
505169689Skan		retval;
506169689Skan
507169689Skan	for (skip = retval = NO; GETC(!=, EOF);)
508169689Skan		switch(c) {
509169689Skan		case '\\':		/* a backslash escapes anything */
510169689Skan			skip = !skip;	/* we toggle in case it's "\\" */
511169689Skan			break;
512169689Skan		case ';':		/* special case for yacc; if one */
513169689Skan		case '|':		/* of these chars occurs, we may */
514169689Skan			retval = YES;	/* have moved out of the rule */
515169689Skan			break;		/* not used by C */
516169689Skan		case '\'':
517169689Skan		case '"':
518169689Skan			/* skip strings and character constants */
519169689Skan			skip_string(c);
520169689Skan			break;
521169689Skan		case '/':
522169689Skan			/* skip comments */
523169689Skan			if (GETC(==, '*') || c == '/') {
524169689Skan				skip_comment(c);
525169689Skan				break;
526169689Skan			}
527169689Skan			(void)ungetc(c, inf);
528169689Skan			c = '/';
529169689Skan			goto norm;
530169689Skan		case '\n':
531169689Skan			SETLINE;
532169689Skan			/*FALLTHROUGH*/
533169689Skan		default:
534169689Skan		norm:
535169689Skan			if (c == key && !skip)
536169689Skan				return (retval);
537169689Skan			skip = NO;
538169689Skan		}
539169689Skan	return (retval);
540169689Skan}
541169689Skan