C.c revision 87628
190380Smsmith/*
290380Smsmith * Copyright (c) 1987, 1993, 1994
390380Smsmith *	The Regents of the University of California.  All rights reserved.
490380Smsmith *
590380Smsmith * Redistribution and use in source and binary forms, with or without
690380Smsmith * modification, are permitted provided that the following conditions
798146Siwasaki * are met:
898146Siwasaki * 1. Redistributions of source code must retain the above copyright
998146Siwasaki *    notice, this list of conditions and the following disclaimer.
1098146Siwasaki * 2. Redistributions in binary form must reproduce the above copyright
1198146Siwasaki *    notice, this list of conditions and the following disclaimer in the
1290380Smsmith *    documentation and/or other materials provided with the distribution.
13193529Sjkim * 3. All advertising materials mentioning features or use of this software
14193529Sjkim *    must display the following acknowledgement:
1590380Smsmith *	This product includes software developed by the University of
16123333Snjl *	California, Berkeley and its contributors.
17231844Sjkim * 4. Neither the name of the University nor the names of its contributors
18193529Sjkim *    may be used to endorse or promote products derived from this software
1990380Smsmith *    without specific prior written permission.
20231844Sjkim *
21193529Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22228110Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23228110Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24193529Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25151946Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26193529Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27193529Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28193529Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29193529Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30193529Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31228110Sjkim * SUCH DAMAGE.
32197104Sjkim */
33213806Sjkim
34213806Sjkim#if 0
35193529Sjkim#ifndef lint
3690380Smsmithstatic char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
3790380Smsmith#endif
3890380Smsmith#endif
39193529Sjkim
4090380Smsmith#include <sys/cdefs.h>
4190380Smsmith__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 87628 2001-12-10 21:13:08Z dwmalone $");
4290380Smsmith
4390380Smsmith#include <limits.h>
4490380Smsmith#include <stdio.h>
4590380Smsmith#include <string.h>
4690380Smsmith
4790380Smsmith#include "ctags.h"
4890380Smsmith
4990380Smsmithstatic int	func_entry __P((void));
50209746Sjkimstatic void	hash_entry __P((void));
5190380Smsmithstatic void	skip_string __P((int));
5290380Smsmithstatic int	str_entry __P((int));
53151946Sjkim
5490380Smsmith/*
5590380Smsmith * c_entries --
56193529Sjkim *	read .c and .h files and call appropriate routines
57123333Snjl */
58123333Snjlvoid
59209746Sjkimc_entries()
60123333Snjl{
61193529Sjkim	int	c;			/* current character */
62209746Sjkim	int	level;			/* brace level */
6390380Smsmith	int	token;			/* if reading a token */
64151604Sobrien	int	t_def;			/* if reading a typedef */
65151946Sjkim	int	t_level;		/* typedef's brace level */
66220663Sjkim	char	*sp;			/* buffer pointer */
67193529Sjkim	char	tok[MAXTOKEN];		/* token buffer */
68151946Sjkim
69151946Sjkim	lineftell = ftell(inf);
70209746Sjkim	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
71151946Sjkim	while (GETC(!=, EOF)) {
72151946Sjkim		switch (c) {
73193529Sjkim		/*
74209746Sjkim		 * Here's where it DOESN'T handle: {
75193529Sjkim		 *	foo(a)
76193529Sjkim		 *	{
77151604Sobrien		 *	#ifdef notdef
7890380Smsmith		 *		}
7990380Smsmith		 *	#endif
8098146Siwasaki		 *		if (a)
81114244Snjl		 *			puts("hello, world");
82114244Snjl		 *	}
83228110Sjkim		 */
84228110Sjkim		case '{':
85228110Sjkim			++level;
86228110Sjkim			goto endtok;
87228110Sjkim		case '}':
88			/*
89			 * if level goes below zero, try and fix
90			 * it, even though we've already messed up
91			 */
92			if (--level < 0)
93				level = 0;
94			goto endtok;
95
96		case '\n':
97			SETLINE;
98			/*
99			 * the above 3 cases are similar in that they
100			 * are special characters that also end tokens.
101			 */
102	endtok:			if (sp > tok) {
103				*sp = EOS;
104				token = YES;
105				sp = tok;
106			}
107			else
108				token = NO;
109			continue;
110
111		/*
112		 * We ignore quoted strings and character constants
113		 * completely.
114		 */
115		case '"':
116		case '\'':
117			(void)skip_string(c);
118			break;
119
120		/*
121		 * comments can be fun; note the state is unchanged after
122		 * return, in case we found:
123		 *	"foo() XX comment XX { int bar; }"
124		 */
125		case '/':
126			if (GETC(==, '*')) {
127				skip_comment();
128				continue;
129			}
130			(void)ungetc(c, inf);
131			c = '/';
132			goto storec;
133
134		/* hash marks flag #define's. */
135		case '#':
136			if (sp == tok) {
137				hash_entry();
138				break;
139			}
140			goto storec;
141
142		/*
143		 * if we have a current token, parenthesis on
144		 * level zero indicates a function.
145		 */
146		case '(':
147			if (!level && token) {
148				int	curline;
149
150				if (sp != tok)
151					*sp = EOS;
152				/*
153				 * grab the line immediately, we may
154				 * already be wrong, for example,
155				 *	foo\n
156				 *	(arg1,
157				 */
158				getline();
159				curline = lineno;
160				if (func_entry()) {
161					++level;
162					pfnote(tok, curline);
163				}
164				break;
165			}
166			goto storec;
167
168		/*
169		 * semi-colons indicate the end of a typedef; if we find a
170		 * typedef we search for the next semi-colon of the same
171		 * level as the typedef.  Ignoring "structs", they are
172		 * tricky, since you can find:
173		 *
174		 *	"typedef long time_t;"
175		 *	"typedef unsigned int u_int;"
176		 *	"typedef unsigned int u_int [10];"
177		 *
178		 * If looking at a typedef, we save a copy of the last token
179		 * found.  Then, when we find the ';' we take the current
180		 * token if it starts with a valid token name, else we take
181		 * the one we saved.  There's probably some reasonable
182		 * alternative to this...
183		 */
184		case ';':
185			if (t_def && level == t_level) {
186				t_def = NO;
187				getline();
188				if (sp != tok)
189					*sp = EOS;
190				pfnote(tok, lineno);
191				break;
192			}
193			goto storec;
194
195		/*
196		 * store characters until one that can't be part of a token
197		 * comes along; check the current token against certain
198		 * reserved words.
199		 */
200		default:
201			/* ignore whitespace */
202			if (c == ' ' || c == '\t') {
203				int save = c;
204				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
205					;
206				if (c == EOF)
207					return;
208				(void)ungetc(c, inf);
209				c = save;
210			}
211	storec:		if (!intoken(c)) {
212				if (sp == tok)
213					break;
214				*sp = EOS;
215				if (tflag) {
216					/* no typedefs inside typedefs */
217					if (!t_def &&
218						   !memcmp(tok, "typedef",8)) {
219						t_def = YES;
220						t_level = level;
221						break;
222					}
223					/* catch "typedef struct" */
224					if ((!t_def || t_level < level)
225					    && (!memcmp(tok, "struct", 7)
226					    || !memcmp(tok, "union", 6)
227					    || !memcmp(tok, "enum", 5))) {
228						/*
229						 * get line immediately;
230						 * may change before '{'
231						 */
232						getline();
233						if (str_entry(c))
234							++level;
235						break;
236						/* } */
237					}
238				}
239				sp = tok;
240			}
241			else if (sp != tok || begtoken(c)) {
242				*sp++ = c;
243				token = YES;
244			}
245			continue;
246		}
247
248		sp = tok;
249		token = NO;
250	}
251}
252
253/*
254 * func_entry --
255 *	handle a function reference
256 */
257static int
258func_entry()
259{
260	int	c;			/* current character */
261	int	level = 0;		/* for matching '()' */
262
263	/*
264	 * Find the end of the assumed function declaration.
265	 * Note that ANSI C functions can have type definitions so keep
266	 * track of the parentheses nesting level.
267	 */
268	while (GETC(!=, EOF)) {
269		switch (c) {
270		case '\'':
271		case '"':
272			/* skip strings and character constants */
273			skip_string(c);
274			break;
275		case '/':
276			/* skip comments */
277			if (GETC(==, '*'))
278				skip_comment();
279			break;
280		case '(':
281			level++;
282			break;
283		case ')':
284			if (level == 0)
285				goto fnd;
286			level--;
287			break;
288		case '\n':
289			SETLINE;
290		}
291	}
292	return (NO);
293fnd:
294	/*
295	 * we assume that the character after a function's right paren
296	 * is a token character if it's a function and a non-token
297	 * character if it's a declaration.  Comments don't count...
298	 */
299	for (;;) {
300		while (GETC(!=, EOF) && iswhite(c))
301			if (c == '\n')
302				SETLINE;
303		if (intoken(c) || c == '{')
304			break;
305		if (c == '/' && GETC(==, '*'))
306			skip_comment();
307		else {				/* don't ever "read" '/' */
308			(void)ungetc(c, inf);
309			return (NO);
310		}
311	}
312	if (c != '{')
313		(void)skip_key('{');
314	return (YES);
315}
316
317/*
318 * hash_entry --
319 *	handle a line starting with a '#'
320 */
321static void
322hash_entry()
323{
324	int	c;			/* character read */
325	int	curline;		/* line started on */
326	char	*sp;			/* buffer pointer */
327	char	tok[MAXTOKEN];		/* storage buffer */
328
329	/* ignore leading whitespace */
330	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
331		;
332	(void)ungetc(c, inf);
333
334	curline = lineno;
335	for (sp = tok;;) {		/* get next token */
336		if (GETC(==, EOF))
337			return;
338		if (iswhite(c))
339			break;
340		*sp++ = c;
341	}
342	*sp = EOS;
343	if (memcmp(tok, "define", 6))	/* only interested in #define's */
344		goto skip;
345	for (;;) {			/* this doesn't handle "#define \n" */
346		if (GETC(==, EOF))
347			return;
348		if (!iswhite(c))
349			break;
350	}
351	for (sp = tok;;) {		/* get next token */
352		*sp++ = c;
353		if (GETC(==, EOF))
354			return;
355		/*
356		 * this is where it DOESN'T handle
357		 * "#define \n"
358		 */
359		if (!intoken(c))
360			break;
361	}
362	*sp = EOS;
363	if (dflag || c == '(') {	/* only want macros */
364		getline();
365		pfnote(tok, curline);
366	}
367skip:	if (c == '\n') {		/* get rid of rest of define */
368		SETLINE
369		if (*(sp - 1) != '\\')
370			return;
371	}
372	(void)skip_key('\n');
373}
374
375/*
376 * str_entry --
377 *	handle a struct, union or enum entry
378 */
379static int
380str_entry(c)
381	int	c;			/* current character */
382{
383	int	curline;		/* line started on */
384	char	*sp;			/* buffer pointer */
385	char	tok[LINE_MAX];		/* storage buffer */
386
387	curline = lineno;
388	while (iswhite(c))
389		if (GETC(==, EOF))
390			return (NO);
391	if (c == '{')		/* it was "struct {" */
392		return (YES);
393	for (sp = tok;;) {		/* get next token */
394		*sp++ = c;
395		if (GETC(==, EOF))
396			return (NO);
397		if (!intoken(c))
398			break;
399	}
400	switch (c) {
401		case '{':		/* it was "struct foo{" */
402			--sp;
403			break;
404		case '\n':		/* it was "struct foo\n" */
405			SETLINE;
406			/*FALLTHROUGH*/
407		default:		/* probably "struct foo " */
408			while (GETC(!=, EOF))
409				if (!iswhite(c))
410					break;
411			if (c != '{') {
412				(void)ungetc(c, inf);
413				return (NO);
414			}
415	}
416	*sp = EOS;
417	pfnote(tok, curline);
418	return (YES);
419}
420
421/*
422 * skip_comment --
423 *	skip over comment
424 */
425void
426skip_comment()
427{
428	int	c;			/* character read */
429	int	star;			/* '*' flag */
430
431	for (star = 0; GETC(!=, EOF);)
432		switch(c) {
433		/* comments don't nest, nor can they be escaped. */
434		case '*':
435			star = YES;
436			break;
437		case '/':
438			if (star)
439				return;
440			break;
441		case '\n':
442			SETLINE;
443			/*FALLTHROUGH*/
444		default:
445			star = NO;
446			break;
447		}
448}
449
450/*
451 * skip_string --
452 *	skip to the end of a string or character constant.
453 */
454void
455skip_string(key)
456	int	key;
457{
458	int	c,
459		skip;
460
461	for (skip = NO; GETC(!=, EOF); )
462		switch (c) {
463		case '\\':		/* a backslash escapes anything */
464			skip = !skip;	/* we toggle in case it's "\\" */
465			break;
466		case '\n':
467			SETLINE;
468			/*FALLTHROUGH*/
469		default:
470			if (c == key && !skip)
471				return;
472			skip = NO;
473		}
474}
475
476/*
477 * skip_key --
478 *	skip to next char "key"
479 */
480int
481skip_key(key)
482	int	key;
483{
484	int	c,
485		skip,
486		retval;
487
488	for (skip = retval = NO; GETC(!=, EOF);)
489		switch(c) {
490		case '\\':		/* a backslash escapes anything */
491			skip = !skip;	/* we toggle in case it's "\\" */
492			break;
493		case ';':		/* special case for yacc; if one */
494		case '|':		/* of these chars occurs, we may */
495			retval = YES;	/* have moved out of the rule */
496			break;		/* not used by C */
497		case '\'':
498		case '"':
499			/* skip strings and character constants */
500			skip_string(c);
501			break;
502		case '/':
503			/* skip comments */
504			if (GETC(==, '*')) {
505				skip_comment();
506				break;
507			}
508			(void)ungetc(c, inf);
509			c = '/';
510			goto norm;
511		case '\n':
512			SETLINE;
513			/*FALLTHROUGH*/
514		default:
515		norm:
516			if (c == key && !skip)
517				return (retval);
518			skip = NO;
519		}
520	return (retval);
521}
522