C.c revision 1590
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 341590Srgrimes#ifndef lint 351590Srgrimesstatic char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 361590Srgrimes#endif /* not lint */ 371590Srgrimes 381590Srgrimes#include <limits.h> 391590Srgrimes#include <stdio.h> 401590Srgrimes#include <string.h> 411590Srgrimes 421590Srgrimes#include "ctags.h" 431590Srgrimes 441590Srgrimesstatic int func_entry __P((void)); 451590Srgrimesstatic void hash_entry __P((void)); 461590Srgrimesstatic void skip_string __P((int)); 471590Srgrimesstatic int str_entry __P((int)); 481590Srgrimes 491590Srgrimes/* 501590Srgrimes * c_entries -- 511590Srgrimes * read .c and .h files and call appropriate routines 521590Srgrimes */ 531590Srgrimesvoid 541590Srgrimesc_entries() 551590Srgrimes{ 561590Srgrimes int c; /* current character */ 571590Srgrimes int level; /* brace level */ 581590Srgrimes int token; /* if reading a token */ 591590Srgrimes int t_def; /* if reading a typedef */ 601590Srgrimes int t_level; /* typedef's brace level */ 611590Srgrimes char *sp; /* buffer pointer */ 621590Srgrimes char tok[MAXTOKEN]; /* token buffer */ 631590Srgrimes 641590Srgrimes lineftell = ftell(inf); 651590Srgrimes sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 661590Srgrimes while (GETC(!=, EOF)) { 671590Srgrimes switch (c) { 681590Srgrimes /* 691590Srgrimes * Here's where it DOESN'T handle: { 701590Srgrimes * foo(a) 711590Srgrimes * { 721590Srgrimes * #ifdef notdef 731590Srgrimes * } 741590Srgrimes * #endif 751590Srgrimes * if (a) 761590Srgrimes * puts("hello, world"); 771590Srgrimes * } 781590Srgrimes */ 791590Srgrimes case '{': 801590Srgrimes ++level; 811590Srgrimes goto endtok; 821590Srgrimes case '}': 831590Srgrimes /* 841590Srgrimes * if level goes below zero, try and fix 851590Srgrimes * it, even though we've already messed up 861590Srgrimes */ 871590Srgrimes if (--level < 0) 881590Srgrimes level = 0; 891590Srgrimes goto endtok; 901590Srgrimes 911590Srgrimes case '\n': 921590Srgrimes SETLINE; 931590Srgrimes /* 941590Srgrimes * the above 3 cases are similar in that they 951590Srgrimes * are special characters that also end tokens. 961590Srgrimes */ 971590Srgrimes endtok: if (sp > tok) { 981590Srgrimes *sp = EOS; 991590Srgrimes token = YES; 1001590Srgrimes sp = tok; 1011590Srgrimes } 1021590Srgrimes else 1031590Srgrimes token = NO; 1041590Srgrimes continue; 1051590Srgrimes 1061590Srgrimes /* 1071590Srgrimes * We ignore quoted strings and character constants 1081590Srgrimes * completely. 1091590Srgrimes */ 1101590Srgrimes case '"': 1111590Srgrimes case '\'': 1121590Srgrimes (void)skip_string(c); 1131590Srgrimes break; 1141590Srgrimes 1151590Srgrimes /* 1161590Srgrimes * comments can be fun; note the state is unchanged after 1171590Srgrimes * return, in case we found: 1181590Srgrimes * "foo() XX comment XX { int bar; }" 1191590Srgrimes */ 1201590Srgrimes case '/': 1211590Srgrimes if (GETC(==, '*')) { 1221590Srgrimes skip_comment(); 1231590Srgrimes continue; 1241590Srgrimes } 1251590Srgrimes (void)ungetc(c, inf); 1261590Srgrimes c = '/'; 1271590Srgrimes goto storec; 1281590Srgrimes 1291590Srgrimes /* hash marks flag #define's. */ 1301590Srgrimes case '#': 1311590Srgrimes if (sp == tok) { 1321590Srgrimes hash_entry(); 1331590Srgrimes break; 1341590Srgrimes } 1351590Srgrimes goto storec; 1361590Srgrimes 1371590Srgrimes /* 1381590Srgrimes * if we have a current token, parenthesis on 1391590Srgrimes * level zero indicates a function. 1401590Srgrimes */ 1411590Srgrimes case '(': 1421590Srgrimes if (!level && token) { 1431590Srgrimes int curline; 1441590Srgrimes 1451590Srgrimes if (sp != tok) 1461590Srgrimes *sp = EOS; 1471590Srgrimes /* 1481590Srgrimes * grab the line immediately, we may 1491590Srgrimes * already be wrong, for example, 1501590Srgrimes * foo\n 1511590Srgrimes * (arg1, 1521590Srgrimes */ 1531590Srgrimes getline(); 1541590Srgrimes curline = lineno; 1551590Srgrimes if (func_entry()) { 1561590Srgrimes ++level; 1571590Srgrimes pfnote(tok, curline); 1581590Srgrimes } 1591590Srgrimes break; 1601590Srgrimes } 1611590Srgrimes goto storec; 1621590Srgrimes 1631590Srgrimes /* 1641590Srgrimes * semi-colons indicate the end of a typedef; if we find a 1651590Srgrimes * typedef we search for the next semi-colon of the same 1661590Srgrimes * level as the typedef. Ignoring "structs", they are 1671590Srgrimes * tricky, since you can find: 1681590Srgrimes * 1691590Srgrimes * "typedef long time_t;" 1701590Srgrimes * "typedef unsigned int u_int;" 1711590Srgrimes * "typedef unsigned int u_int [10];" 1721590Srgrimes * 1731590Srgrimes * If looking at a typedef, we save a copy of the last token 1741590Srgrimes * found. Then, when we find the ';' we take the current 1751590Srgrimes * token if it starts with a valid token name, else we take 1761590Srgrimes * the one we saved. There's probably some reasonable 1771590Srgrimes * alternative to this... 1781590Srgrimes */ 1791590Srgrimes case ';': 1801590Srgrimes if (t_def && level == t_level) { 1811590Srgrimes t_def = NO; 1821590Srgrimes getline(); 1831590Srgrimes if (sp != tok) 1841590Srgrimes *sp = EOS; 1851590Srgrimes pfnote(tok, lineno); 1861590Srgrimes break; 1871590Srgrimes } 1881590Srgrimes goto storec; 1891590Srgrimes 1901590Srgrimes /* 1911590Srgrimes * store characters until one that can't be part of a token 1921590Srgrimes * comes along; check the current token against certain 1931590Srgrimes * reserved words. 1941590Srgrimes */ 1951590Srgrimes default: 1961590Srgrimes storec: if (!intoken(c)) { 1971590Srgrimes if (sp == tok) 1981590Srgrimes break; 1991590Srgrimes *sp = EOS; 2001590Srgrimes if (tflag) { 2011590Srgrimes /* no typedefs inside typedefs */ 2021590Srgrimes if (!t_def && 2031590Srgrimes !memcmp(tok, "typedef",8)) { 2041590Srgrimes t_def = YES; 2051590Srgrimes t_level = level; 2061590Srgrimes break; 2071590Srgrimes } 2081590Srgrimes /* catch "typedef struct" */ 2091590Srgrimes if ((!t_def || t_level < level) 2101590Srgrimes && (!memcmp(tok, "struct", 7) 2111590Srgrimes || !memcmp(tok, "union", 6) 2121590Srgrimes || !memcmp(tok, "enum", 5))) { 2131590Srgrimes /* 2141590Srgrimes * get line immediately; 2151590Srgrimes * may change before '{' 2161590Srgrimes */ 2171590Srgrimes getline(); 2181590Srgrimes if (str_entry(c)) 2191590Srgrimes ++level; 2201590Srgrimes break; 2211590Srgrimes /* } */ 2221590Srgrimes } 2231590Srgrimes } 2241590Srgrimes sp = tok; 2251590Srgrimes } 2261590Srgrimes else if (sp != tok || begtoken(c)) { 2271590Srgrimes *sp++ = c; 2281590Srgrimes token = YES; 2291590Srgrimes } 2301590Srgrimes continue; 2311590Srgrimes } 2321590Srgrimes 2331590Srgrimes sp = tok; 2341590Srgrimes token = NO; 2351590Srgrimes } 2361590Srgrimes} 2371590Srgrimes 2381590Srgrimes/* 2391590Srgrimes * func_entry -- 2401590Srgrimes * handle a function reference 2411590Srgrimes */ 2421590Srgrimesstatic int 2431590Srgrimesfunc_entry() 2441590Srgrimes{ 2451590Srgrimes int c; /* current character */ 2461590Srgrimes int level = 0; /* for matching '()' */ 2471590Srgrimes 2481590Srgrimes /* 2491590Srgrimes * Find the end of the assumed function declaration. 2501590Srgrimes * Note that ANSI C functions can have type definitions so keep 2511590Srgrimes * track of the parentheses nesting level. 2521590Srgrimes */ 2531590Srgrimes while (GETC(!=, EOF)) { 2541590Srgrimes switch (c) { 2551590Srgrimes case '\'': 2561590Srgrimes case '"': 2571590Srgrimes /* skip strings and character constants */ 2581590Srgrimes skip_string(c); 2591590Srgrimes break; 2601590Srgrimes case '/': 2611590Srgrimes /* skip comments */ 2621590Srgrimes if (GETC(==, '*')) 2631590Srgrimes skip_comment(); 2641590Srgrimes break; 2651590Srgrimes case '(': 2661590Srgrimes level++; 2671590Srgrimes break; 2681590Srgrimes case ')': 2691590Srgrimes if (level == 0) 2701590Srgrimes goto fnd; 2711590Srgrimes level--; 2721590Srgrimes break; 2731590Srgrimes case '\n': 2741590Srgrimes SETLINE; 2751590Srgrimes } 2761590Srgrimes } 2771590Srgrimes return (NO); 2781590Srgrimesfnd: 2791590Srgrimes /* 2801590Srgrimes * we assume that the character after a function's right paren 2811590Srgrimes * is a token character if it's a function and a non-token 2821590Srgrimes * character if it's a declaration. Comments don't count... 2831590Srgrimes */ 2841590Srgrimes for (;;) { 2851590Srgrimes while (GETC(!=, EOF) && iswhite(c)) 2861590Srgrimes if (c == '\n') 2871590Srgrimes SETLINE; 2881590Srgrimes if (intoken(c) || c == '{') 2891590Srgrimes break; 2901590Srgrimes if (c == '/' && GETC(==, '*')) 2911590Srgrimes skip_comment(); 2921590Srgrimes else { /* don't ever "read" '/' */ 2931590Srgrimes (void)ungetc(c, inf); 2941590Srgrimes return (NO); 2951590Srgrimes } 2961590Srgrimes } 2971590Srgrimes if (c != '{') 2981590Srgrimes (void)skip_key('{'); 2991590Srgrimes return (YES); 3001590Srgrimes} 3011590Srgrimes 3021590Srgrimes/* 3031590Srgrimes * hash_entry -- 3041590Srgrimes * handle a line starting with a '#' 3051590Srgrimes */ 3061590Srgrimesstatic void 3071590Srgrimeshash_entry() 3081590Srgrimes{ 3091590Srgrimes int c; /* character read */ 3101590Srgrimes int curline; /* line started on */ 3111590Srgrimes char *sp; /* buffer pointer */ 3121590Srgrimes char tok[MAXTOKEN]; /* storage buffer */ 3131590Srgrimes 3141590Srgrimes curline = lineno; 3151590Srgrimes for (sp = tok;;) { /* get next token */ 3161590Srgrimes if (GETC(==, EOF)) 3171590Srgrimes return; 3181590Srgrimes if (iswhite(c)) 3191590Srgrimes break; 3201590Srgrimes *sp++ = c; 3211590Srgrimes } 3221590Srgrimes *sp = EOS; 3231590Srgrimes if (memcmp(tok, "define", 6)) /* only interested in #define's */ 3241590Srgrimes goto skip; 3251590Srgrimes for (;;) { /* this doesn't handle "#define \n" */ 3261590Srgrimes if (GETC(==, EOF)) 3271590Srgrimes return; 3281590Srgrimes if (!iswhite(c)) 3291590Srgrimes break; 3301590Srgrimes } 3311590Srgrimes for (sp = tok;;) { /* get next token */ 3321590Srgrimes *sp++ = c; 3331590Srgrimes if (GETC(==, EOF)) 3341590Srgrimes return; 3351590Srgrimes /* 3361590Srgrimes * this is where it DOESN'T handle 3371590Srgrimes * "#define \n" 3381590Srgrimes */ 3391590Srgrimes if (!intoken(c)) 3401590Srgrimes break; 3411590Srgrimes } 3421590Srgrimes *sp = EOS; 3431590Srgrimes if (dflag || c == '(') { /* only want macros */ 3441590Srgrimes getline(); 3451590Srgrimes pfnote(tok, curline); 3461590Srgrimes } 3471590Srgrimesskip: if (c == '\n') { /* get rid of rest of define */ 3481590Srgrimes SETLINE 3491590Srgrimes if (*(sp - 1) != '\\') 3501590Srgrimes return; 3511590Srgrimes } 3521590Srgrimes (void)skip_key('\n'); 3531590Srgrimes} 3541590Srgrimes 3551590Srgrimes/* 3561590Srgrimes * str_entry -- 3571590Srgrimes * handle a struct, union or enum entry 3581590Srgrimes */ 3591590Srgrimesstatic int 3601590Srgrimesstr_entry(c) 3611590Srgrimes int c; /* current character */ 3621590Srgrimes{ 3631590Srgrimes int curline; /* line started on */ 3641590Srgrimes char *sp; /* buffer pointer */ 3651590Srgrimes char tok[LINE_MAX]; /* storage buffer */ 3661590Srgrimes 3671590Srgrimes curline = lineno; 3681590Srgrimes while (iswhite(c)) 3691590Srgrimes if (GETC(==, EOF)) 3701590Srgrimes return (NO); 3711590Srgrimes if (c == '{') /* it was "struct {" */ 3721590Srgrimes return (YES); 3731590Srgrimes for (sp = tok;;) { /* get next token */ 3741590Srgrimes *sp++ = c; 3751590Srgrimes if (GETC(==, EOF)) 3761590Srgrimes return (NO); 3771590Srgrimes if (!intoken(c)) 3781590Srgrimes break; 3791590Srgrimes } 3801590Srgrimes switch (c) { 3811590Srgrimes case '{': /* it was "struct foo{" */ 3821590Srgrimes --sp; 3831590Srgrimes break; 3841590Srgrimes case '\n': /* it was "struct foo\n" */ 3851590Srgrimes SETLINE; 3861590Srgrimes /*FALLTHROUGH*/ 3871590Srgrimes default: /* probably "struct foo " */ 3881590Srgrimes while (GETC(!=, EOF)) 3891590Srgrimes if (!iswhite(c)) 3901590Srgrimes break; 3911590Srgrimes if (c != '{') { 3921590Srgrimes (void)ungetc(c, inf); 3931590Srgrimes return (NO); 3941590Srgrimes } 3951590Srgrimes } 3961590Srgrimes *sp = EOS; 3971590Srgrimes pfnote(tok, curline); 3981590Srgrimes return (YES); 3991590Srgrimes} 4001590Srgrimes 4011590Srgrimes/* 4021590Srgrimes * skip_comment -- 4031590Srgrimes * skip over comment 4041590Srgrimes */ 4051590Srgrimesvoid 4061590Srgrimesskip_comment() 4071590Srgrimes{ 4081590Srgrimes int c; /* character read */ 4091590Srgrimes int star; /* '*' flag */ 4101590Srgrimes 4111590Srgrimes for (star = 0; GETC(!=, EOF);) 4121590Srgrimes switch(c) { 4131590Srgrimes /* comments don't nest, nor can they be escaped. */ 4141590Srgrimes case '*': 4151590Srgrimes star = YES; 4161590Srgrimes break; 4171590Srgrimes case '/': 4181590Srgrimes if (star) 4191590Srgrimes return; 4201590Srgrimes break; 4211590Srgrimes case '\n': 4221590Srgrimes SETLINE; 4231590Srgrimes /*FALLTHROUGH*/ 4241590Srgrimes default: 4251590Srgrimes star = NO; 4261590Srgrimes break; 4271590Srgrimes } 4281590Srgrimes} 4291590Srgrimes 4301590Srgrimes/* 4311590Srgrimes * skip_string -- 4321590Srgrimes * skip to the end of a string or character constant. 4331590Srgrimes */ 4341590Srgrimesvoid 4351590Srgrimesskip_string(key) 4361590Srgrimes int key; 4371590Srgrimes{ 4381590Srgrimes int c, 4391590Srgrimes skip; 4401590Srgrimes 4411590Srgrimes for (skip = NO; GETC(!=, EOF); ) 4421590Srgrimes switch (c) { 4431590Srgrimes case '\\': /* a backslash escapes anything */ 4441590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 4451590Srgrimes break; 4461590Srgrimes case '\n': 4471590Srgrimes SETLINE; 4481590Srgrimes /*FALLTHROUGH*/ 4491590Srgrimes default: 4501590Srgrimes if (c == key && !skip) 4511590Srgrimes return; 4521590Srgrimes skip = NO; 4531590Srgrimes } 4541590Srgrimes} 4551590Srgrimes 4561590Srgrimes/* 4571590Srgrimes * skip_key -- 4581590Srgrimes * skip to next char "key" 4591590Srgrimes */ 4601590Srgrimesint 4611590Srgrimesskip_key(key) 4621590Srgrimes int key; 4631590Srgrimes{ 4641590Srgrimes int c, 4651590Srgrimes skip, 4661590Srgrimes retval; 4671590Srgrimes 4681590Srgrimes for (skip = retval = NO; GETC(!=, EOF);) 4691590Srgrimes switch(c) { 4701590Srgrimes case '\\': /* a backslash escapes anything */ 4711590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 4721590Srgrimes break; 4731590Srgrimes case ';': /* special case for yacc; if one */ 4741590Srgrimes case '|': /* of these chars occurs, we may */ 4751590Srgrimes retval = YES; /* have moved out of the rule */ 4761590Srgrimes break; /* not used by C */ 4771590Srgrimes case '\'': 4781590Srgrimes case '"': 4791590Srgrimes /* skip strings and character constants */ 4801590Srgrimes skip_string(c); 4811590Srgrimes break; 4821590Srgrimes case '/': 4831590Srgrimes /* skip comments */ 4841590Srgrimes if (GETC(==, '*')) { 4851590Srgrimes skip_comment(); 4861590Srgrimes break; 4871590Srgrimes } 4881590Srgrimes (void)ungetc(c, inf); 4891590Srgrimes c = '/'; 4901590Srgrimes goto norm; 4911590Srgrimes case '\n': 4921590Srgrimes SETLINE; 4931590Srgrimes /*FALLTHROUGH*/ 4941590Srgrimes default: 4951590Srgrimes norm: 4961590Srgrimes if (c == key && !skip) 4971590Srgrimes return (retval); 4981590Srgrimes skip = NO; 4991590Srgrimes } 5001590Srgrimes return (retval); 5011590Srgrimes} 502