C.c revision 100822
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 3487628Sdwmalone#if 0 351590Srgrimes#ifndef lint 361590Srgrimesstatic char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 3728625Ssteve#endif 3887628Sdwmalone#endif 391590Srgrimes 4087628Sdwmalone#include <sys/cdefs.h> 4187628Sdwmalone__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 100822 2002-07-28 15:50:38Z dwmalone $"); 4287628Sdwmalone 431590Srgrimes#include <limits.h> 441590Srgrimes#include <stdio.h> 4591382Sdwmalone#include <string.h> 461590Srgrimes 471590Srgrimes#include "ctags.h" 481590Srgrimes 4992920Simpstatic int func_entry(void); 5092920Simpstatic void hash_entry(void); 5192920Simpstatic void skip_string(int); 5292920Simpstatic int str_entry(int); 531590Srgrimes 541590Srgrimes/* 551590Srgrimes * c_entries -- 561590Srgrimes * read .c and .h files and call appropriate routines 571590Srgrimes */ 581590Srgrimesvoid 59100822Sdwmalonec_entries(void) 601590Srgrimes{ 611590Srgrimes int c; /* current character */ 621590Srgrimes int level; /* brace level */ 631590Srgrimes int token; /* if reading a token */ 641590Srgrimes int t_def; /* if reading a typedef */ 651590Srgrimes int t_level; /* typedef's brace level */ 661590Srgrimes char *sp; /* buffer pointer */ 671590Srgrimes char tok[MAXTOKEN]; /* token buffer */ 681590Srgrimes 691590Srgrimes lineftell = ftell(inf); 701590Srgrimes sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 711590Srgrimes while (GETC(!=, EOF)) { 721590Srgrimes switch (c) { 731590Srgrimes /* 741590Srgrimes * Here's where it DOESN'T handle: { 751590Srgrimes * foo(a) 761590Srgrimes * { 771590Srgrimes * #ifdef notdef 781590Srgrimes * } 791590Srgrimes * #endif 801590Srgrimes * if (a) 811590Srgrimes * puts("hello, world"); 821590Srgrimes * } 831590Srgrimes */ 841590Srgrimes case '{': 851590Srgrimes ++level; 861590Srgrimes goto endtok; 871590Srgrimes case '}': 881590Srgrimes /* 891590Srgrimes * if level goes below zero, try and fix 901590Srgrimes * it, even though we've already messed up 911590Srgrimes */ 921590Srgrimes if (--level < 0) 931590Srgrimes level = 0; 941590Srgrimes goto endtok; 951590Srgrimes 961590Srgrimes case '\n': 971590Srgrimes SETLINE; 981590Srgrimes /* 991590Srgrimes * the above 3 cases are similar in that they 1001590Srgrimes * are special characters that also end tokens. 1011590Srgrimes */ 1021590Srgrimes endtok: if (sp > tok) { 1031590Srgrimes *sp = EOS; 1041590Srgrimes token = YES; 1051590Srgrimes sp = tok; 1061590Srgrimes } 1071590Srgrimes else 1081590Srgrimes token = NO; 1091590Srgrimes continue; 1101590Srgrimes 1111590Srgrimes /* 1121590Srgrimes * We ignore quoted strings and character constants 1131590Srgrimes * completely. 1141590Srgrimes */ 1151590Srgrimes case '"': 1161590Srgrimes case '\'': 1171590Srgrimes (void)skip_string(c); 1181590Srgrimes break; 1191590Srgrimes 1201590Srgrimes /* 1211590Srgrimes * comments can be fun; note the state is unchanged after 1221590Srgrimes * return, in case we found: 1231590Srgrimes * "foo() XX comment XX { int bar; }" 1241590Srgrimes */ 1251590Srgrimes case '/': 12691189Sgshapiro if (GETC(==, '*') || c == '/') { 12791189Sgshapiro skip_comment(c); 1281590Srgrimes continue; 1291590Srgrimes } 1301590Srgrimes (void)ungetc(c, inf); 1311590Srgrimes c = '/'; 1321590Srgrimes goto storec; 1331590Srgrimes 1341590Srgrimes /* hash marks flag #define's. */ 1351590Srgrimes case '#': 1361590Srgrimes if (sp == tok) { 1371590Srgrimes hash_entry(); 1381590Srgrimes break; 1391590Srgrimes } 1401590Srgrimes goto storec; 1411590Srgrimes 1421590Srgrimes /* 1431590Srgrimes * if we have a current token, parenthesis on 1441590Srgrimes * level zero indicates a function. 1451590Srgrimes */ 1461590Srgrimes case '(': 1471590Srgrimes if (!level && token) { 1481590Srgrimes int curline; 1491590Srgrimes 1501590Srgrimes if (sp != tok) 1511590Srgrimes *sp = EOS; 1521590Srgrimes /* 1531590Srgrimes * grab the line immediately, we may 1541590Srgrimes * already be wrong, for example, 1551590Srgrimes * foo\n 1561590Srgrimes * (arg1, 1571590Srgrimes */ 1581590Srgrimes getline(); 1591590Srgrimes curline = lineno; 1601590Srgrimes if (func_entry()) { 1611590Srgrimes ++level; 1621590Srgrimes pfnote(tok, curline); 1631590Srgrimes } 1641590Srgrimes break; 1651590Srgrimes } 1661590Srgrimes goto storec; 1671590Srgrimes 1681590Srgrimes /* 1691590Srgrimes * semi-colons indicate the end of a typedef; if we find a 1701590Srgrimes * typedef we search for the next semi-colon of the same 1711590Srgrimes * level as the typedef. Ignoring "structs", they are 1721590Srgrimes * tricky, since you can find: 1731590Srgrimes * 1741590Srgrimes * "typedef long time_t;" 1751590Srgrimes * "typedef unsigned int u_int;" 1761590Srgrimes * "typedef unsigned int u_int [10];" 1771590Srgrimes * 1781590Srgrimes * If looking at a typedef, we save a copy of the last token 1791590Srgrimes * found. Then, when we find the ';' we take the current 1801590Srgrimes * token if it starts with a valid token name, else we take 1811590Srgrimes * the one we saved. There's probably some reasonable 1821590Srgrimes * alternative to this... 1831590Srgrimes */ 1841590Srgrimes case ';': 1851590Srgrimes if (t_def && level == t_level) { 1861590Srgrimes t_def = NO; 1871590Srgrimes getline(); 1881590Srgrimes if (sp != tok) 1891590Srgrimes *sp = EOS; 1901590Srgrimes pfnote(tok, lineno); 1911590Srgrimes break; 1921590Srgrimes } 1931590Srgrimes goto storec; 1941590Srgrimes 1951590Srgrimes /* 1961590Srgrimes * store characters until one that can't be part of a token 1971590Srgrimes * comes along; check the current token against certain 1981590Srgrimes * reserved words. 1991590Srgrimes */ 2001590Srgrimes default: 20128625Ssteve /* ignore whitespace */ 20228625Ssteve if (c == ' ' || c == '\t') { 20328625Ssteve int save = c; 20428625Ssteve while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 20528625Ssteve ; 20628625Ssteve if (c == EOF) 20728625Ssteve return; 20828625Ssteve (void)ungetc(c, inf); 20928625Ssteve c = save; 21028625Ssteve } 2111590Srgrimes storec: if (!intoken(c)) { 2121590Srgrimes if (sp == tok) 2131590Srgrimes break; 2141590Srgrimes *sp = EOS; 2151590Srgrimes if (tflag) { 2161590Srgrimes /* no typedefs inside typedefs */ 2171590Srgrimes if (!t_def && 2181590Srgrimes !memcmp(tok, "typedef",8)) { 2191590Srgrimes t_def = YES; 2201590Srgrimes t_level = level; 2211590Srgrimes break; 2221590Srgrimes } 2231590Srgrimes /* catch "typedef struct" */ 2241590Srgrimes if ((!t_def || t_level < level) 2251590Srgrimes && (!memcmp(tok, "struct", 7) 2261590Srgrimes || !memcmp(tok, "union", 6) 2271590Srgrimes || !memcmp(tok, "enum", 5))) { 2281590Srgrimes /* 2291590Srgrimes * get line immediately; 2301590Srgrimes * may change before '{' 2311590Srgrimes */ 2321590Srgrimes getline(); 2331590Srgrimes if (str_entry(c)) 2341590Srgrimes ++level; 2351590Srgrimes break; 2361590Srgrimes /* } */ 2371590Srgrimes } 2381590Srgrimes } 2391590Srgrimes sp = tok; 2401590Srgrimes } 2411590Srgrimes else if (sp != tok || begtoken(c)) { 24297574Stjr if (sp == tok + sizeof tok - 1) 24397574Stjr /* Too long -- truncate it */ 24497574Stjr *sp = EOS; 24597574Stjr else 24697574Stjr *sp++ = c; 2471590Srgrimes token = YES; 2481590Srgrimes } 2491590Srgrimes continue; 2501590Srgrimes } 2511590Srgrimes 2521590Srgrimes sp = tok; 2531590Srgrimes token = NO; 2541590Srgrimes } 2551590Srgrimes} 2561590Srgrimes 2571590Srgrimes/* 2581590Srgrimes * func_entry -- 2591590Srgrimes * handle a function reference 2601590Srgrimes */ 2611590Srgrimesstatic int 262100822Sdwmalonefunc_entry(void) 2631590Srgrimes{ 2641590Srgrimes int c; /* current character */ 2651590Srgrimes int level = 0; /* for matching '()' */ 2661590Srgrimes 2671590Srgrimes /* 2681590Srgrimes * Find the end of the assumed function declaration. 2691590Srgrimes * Note that ANSI C functions can have type definitions so keep 2701590Srgrimes * track of the parentheses nesting level. 2711590Srgrimes */ 2721590Srgrimes while (GETC(!=, EOF)) { 2731590Srgrimes switch (c) { 2741590Srgrimes case '\'': 2751590Srgrimes case '"': 2761590Srgrimes /* skip strings and character constants */ 2771590Srgrimes skip_string(c); 2781590Srgrimes break; 2791590Srgrimes case '/': 2801590Srgrimes /* skip comments */ 28191189Sgshapiro if (GETC(==, '*') || c == '/') 28291189Sgshapiro skip_comment(c); 2831590Srgrimes break; 2841590Srgrimes case '(': 2851590Srgrimes level++; 2861590Srgrimes break; 2871590Srgrimes case ')': 2881590Srgrimes if (level == 0) 2891590Srgrimes goto fnd; 2901590Srgrimes level--; 2911590Srgrimes break; 2921590Srgrimes case '\n': 2931590Srgrimes SETLINE; 2941590Srgrimes } 2951590Srgrimes } 2961590Srgrimes return (NO); 2971590Srgrimesfnd: 2981590Srgrimes /* 2991590Srgrimes * we assume that the character after a function's right paren 3001590Srgrimes * is a token character if it's a function and a non-token 3011590Srgrimes * character if it's a declaration. Comments don't count... 3021590Srgrimes */ 3031590Srgrimes for (;;) { 3041590Srgrimes while (GETC(!=, EOF) && iswhite(c)) 3051590Srgrimes if (c == '\n') 3061590Srgrimes SETLINE; 3071590Srgrimes if (intoken(c) || c == '{') 3081590Srgrimes break; 30991189Sgshapiro if (c == '/' && (GETC(==, '*') || c == '/')) 31091189Sgshapiro skip_comment(c); 3111590Srgrimes else { /* don't ever "read" '/' */ 3121590Srgrimes (void)ungetc(c, inf); 3131590Srgrimes return (NO); 3141590Srgrimes } 3151590Srgrimes } 3161590Srgrimes if (c != '{') 3171590Srgrimes (void)skip_key('{'); 3181590Srgrimes return (YES); 3191590Srgrimes} 3201590Srgrimes 3211590Srgrimes/* 3221590Srgrimes * hash_entry -- 3231590Srgrimes * handle a line starting with a '#' 3241590Srgrimes */ 3251590Srgrimesstatic void 326100822Sdwmalonehash_entry(void) 3271590Srgrimes{ 3281590Srgrimes int c; /* character read */ 3291590Srgrimes int curline; /* line started on */ 3301590Srgrimes char *sp; /* buffer pointer */ 3311590Srgrimes char tok[MAXTOKEN]; /* storage buffer */ 3321590Srgrimes 33328625Ssteve /* ignore leading whitespace */ 33428625Ssteve while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 33528625Ssteve ; 33628625Ssteve (void)ungetc(c, inf); 33728625Ssteve 3381590Srgrimes curline = lineno; 3391590Srgrimes for (sp = tok;;) { /* get next token */ 3401590Srgrimes if (GETC(==, EOF)) 3411590Srgrimes return; 3421590Srgrimes if (iswhite(c)) 3431590Srgrimes break; 34497574Stjr if (sp == tok + sizeof tok - 1) 34597574Stjr /* Too long -- truncate it */ 34697574Stjr *sp = EOS; 34797574Stjr else 34897574Stjr *sp++ = c; 3491590Srgrimes } 3501590Srgrimes *sp = EOS; 3511590Srgrimes if (memcmp(tok, "define", 6)) /* only interested in #define's */ 3521590Srgrimes goto skip; 3531590Srgrimes for (;;) { /* this doesn't handle "#define \n" */ 3541590Srgrimes if (GETC(==, EOF)) 3551590Srgrimes return; 3561590Srgrimes if (!iswhite(c)) 3571590Srgrimes break; 3581590Srgrimes } 3591590Srgrimes for (sp = tok;;) { /* get next token */ 36097574Stjr if (sp == tok + sizeof tok - 1) 36197574Stjr /* Too long -- truncate it */ 36297574Stjr *sp = EOS; 36397574Stjr else 36497574Stjr *sp++ = c; 3651590Srgrimes if (GETC(==, EOF)) 3661590Srgrimes return; 3671590Srgrimes /* 3681590Srgrimes * this is where it DOESN'T handle 3691590Srgrimes * "#define \n" 3701590Srgrimes */ 3711590Srgrimes if (!intoken(c)) 3721590Srgrimes break; 3731590Srgrimes } 3741590Srgrimes *sp = EOS; 3751590Srgrimes if (dflag || c == '(') { /* only want macros */ 3761590Srgrimes getline(); 3771590Srgrimes pfnote(tok, curline); 3781590Srgrimes } 3791590Srgrimesskip: if (c == '\n') { /* get rid of rest of define */ 3801590Srgrimes SETLINE 3811590Srgrimes if (*(sp - 1) != '\\') 3821590Srgrimes return; 3831590Srgrimes } 3841590Srgrimes (void)skip_key('\n'); 3851590Srgrimes} 3861590Srgrimes 3871590Srgrimes/* 3881590Srgrimes * str_entry -- 3891590Srgrimes * handle a struct, union or enum entry 3901590Srgrimes */ 3911590Srgrimesstatic int 392100822Sdwmalonestr_entry(int c) /* c is current character */ 3931590Srgrimes{ 3941590Srgrimes int curline; /* line started on */ 3951590Srgrimes char *sp; /* buffer pointer */ 3961590Srgrimes char tok[LINE_MAX]; /* storage buffer */ 3971590Srgrimes 3981590Srgrimes curline = lineno; 3991590Srgrimes while (iswhite(c)) 4001590Srgrimes if (GETC(==, EOF)) 4011590Srgrimes return (NO); 4021590Srgrimes if (c == '{') /* it was "struct {" */ 4031590Srgrimes return (YES); 4041590Srgrimes for (sp = tok;;) { /* get next token */ 40597574Stjr if (sp == tok + sizeof tok - 1) 40697574Stjr /* Too long -- truncate it */ 40797574Stjr *sp = EOS; 40897574Stjr else 40997574Stjr *sp++ = c; 4101590Srgrimes if (GETC(==, EOF)) 4111590Srgrimes return (NO); 4121590Srgrimes if (!intoken(c)) 4131590Srgrimes break; 4141590Srgrimes } 4151590Srgrimes switch (c) { 4161590Srgrimes case '{': /* it was "struct foo{" */ 4171590Srgrimes --sp; 4181590Srgrimes break; 4191590Srgrimes case '\n': /* it was "struct foo\n" */ 4201590Srgrimes SETLINE; 4211590Srgrimes /*FALLTHROUGH*/ 4221590Srgrimes default: /* probably "struct foo " */ 4231590Srgrimes while (GETC(!=, EOF)) 4241590Srgrimes if (!iswhite(c)) 4251590Srgrimes break; 4261590Srgrimes if (c != '{') { 4271590Srgrimes (void)ungetc(c, inf); 4281590Srgrimes return (NO); 4291590Srgrimes } 4301590Srgrimes } 4311590Srgrimes *sp = EOS; 4321590Srgrimes pfnote(tok, curline); 4331590Srgrimes return (YES); 4341590Srgrimes} 4351590Srgrimes 4361590Srgrimes/* 4371590Srgrimes * skip_comment -- 4381590Srgrimes * skip over comment 4391590Srgrimes */ 4401590Srgrimesvoid 441100822Sdwmaloneskip_comment(int t) /* t is comment character */ 4421590Srgrimes{ 4431590Srgrimes int c; /* character read */ 4441590Srgrimes int star; /* '*' flag */ 4451590Srgrimes 4461590Srgrimes for (star = 0; GETC(!=, EOF);) 4471590Srgrimes switch(c) { 4481590Srgrimes /* comments don't nest, nor can they be escaped. */ 4491590Srgrimes case '*': 4501590Srgrimes star = YES; 4511590Srgrimes break; 4521590Srgrimes case '/': 45391189Sgshapiro if (star && t == '*') 4541590Srgrimes return; 4551590Srgrimes break; 4561590Srgrimes case '\n': 45791189Sgshapiro if (t == '/') 45891189Sgshapiro return; 4591590Srgrimes SETLINE; 4601590Srgrimes /*FALLTHROUGH*/ 4611590Srgrimes default: 4621590Srgrimes star = NO; 4631590Srgrimes break; 4641590Srgrimes } 4651590Srgrimes} 4661590Srgrimes 4671590Srgrimes/* 4681590Srgrimes * skip_string -- 4691590Srgrimes * skip to the end of a string or character constant. 4701590Srgrimes */ 4711590Srgrimesvoid 472100822Sdwmaloneskip_string(int key) 4731590Srgrimes{ 4741590Srgrimes int c, 4751590Srgrimes skip; 4761590Srgrimes 4771590Srgrimes for (skip = NO; GETC(!=, EOF); ) 4781590Srgrimes switch (c) { 4791590Srgrimes case '\\': /* a backslash escapes anything */ 4801590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 4811590Srgrimes break; 4821590Srgrimes case '\n': 4831590Srgrimes SETLINE; 4841590Srgrimes /*FALLTHROUGH*/ 4851590Srgrimes default: 4861590Srgrimes if (c == key && !skip) 4871590Srgrimes return; 4881590Srgrimes skip = NO; 4891590Srgrimes } 4901590Srgrimes} 4911590Srgrimes 4921590Srgrimes/* 4931590Srgrimes * skip_key -- 4941590Srgrimes * skip to next char "key" 4951590Srgrimes */ 4961590Srgrimesint 497100822Sdwmaloneskip_key(int key) 4981590Srgrimes{ 4991590Srgrimes int c, 5001590Srgrimes skip, 5011590Srgrimes retval; 5021590Srgrimes 5031590Srgrimes for (skip = retval = NO; GETC(!=, EOF);) 5041590Srgrimes switch(c) { 5051590Srgrimes case '\\': /* a backslash escapes anything */ 5061590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 5071590Srgrimes break; 5081590Srgrimes case ';': /* special case for yacc; if one */ 5091590Srgrimes case '|': /* of these chars occurs, we may */ 5101590Srgrimes retval = YES; /* have moved out of the rule */ 5111590Srgrimes break; /* not used by C */ 5121590Srgrimes case '\'': 5131590Srgrimes case '"': 5141590Srgrimes /* skip strings and character constants */ 5151590Srgrimes skip_string(c); 5161590Srgrimes break; 5171590Srgrimes case '/': 5181590Srgrimes /* skip comments */ 51991189Sgshapiro if (GETC(==, '*') || c == '/') { 52091189Sgshapiro skip_comment(c); 5211590Srgrimes break; 5221590Srgrimes } 5231590Srgrimes (void)ungetc(c, inf); 5241590Srgrimes c = '/'; 5251590Srgrimes goto norm; 5261590Srgrimes case '\n': 5271590Srgrimes SETLINE; 5281590Srgrimes /*FALLTHROUGH*/ 5291590Srgrimes default: 5301590Srgrimes norm: 5311590Srgrimes if (c == key && !skip) 5321590Srgrimes return (retval); 5331590Srgrimes skip = NO; 5341590Srgrimes } 5351590Srgrimes return (retval); 5361590Srgrimes} 537