C.c revision 97574
1169689Skan/* 2169689Skan * Copyright (c) 1987, 1993, 1994 3169689Skan * The Regents of the University of California. All rights reserved. 4169689Skan * 5169689Skan * Redistribution and use in source and binary forms, with or without 6169689Skan * modification, are permitted provided that the following conditions 7169689Skan * are met: 8169689Skan * 1. Redistributions of source code must retain the above copyright 9169689Skan * notice, this list of conditions and the following disclaimer. 10169689Skan * 2. Redistributions in binary form must reproduce the above copyright 11169689Skan * notice, this list of conditions and the following disclaimer in the 12169689Skan * documentation and/or other materials provided with the distribution. 13169689Skan * 3. All advertising materials mentioning features or use of this software 14169689Skan * must display the following acknowledgement: 15169689Skan * This product includes software developed by the University of 16169689Skan * California, Berkeley and its contributors. 17169689Skan * 4. Neither the name of the University nor the names of its contributors 18169689Skan * may be used to endorse or promote products derived from this software 19169689Skan * without specific prior written permission. 20169689Skan * 21169689Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22169689Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24169689Skan * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29169689Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31169689Skan * SUCH DAMAGE. 32169689Skan */ 33169689Skan 34169689Skan#if 0 35169689Skan#ifndef lint 36169689Skanstatic char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 37169689Skan#endif 38169689Skan#endif 39169689Skan 40169689Skan#include <sys/cdefs.h> 41169689Skan__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 97574 2002-05-30 10:54:53Z tjr $"); 42169689Skan 43169689Skan#include <limits.h> 44169689Skan#include <stdio.h> 45169689Skan#include <string.h> 46169689Skan 47169689Skan#include "ctags.h" 48169689Skan 49169689Skanstatic int func_entry(void); 50169689Skanstatic void hash_entry(void); 51169689Skanstatic void skip_string(int); 52169689Skanstatic int str_entry(int); 53169689Skan 54169689Skan/* 55169689Skan * c_entries -- 56169689Skan * read .c and .h files and call appropriate routines 57169689Skan */ 58169689Skanvoid 59169689Skanc_entries() 60169689Skan{ 61169689Skan int c; /* current character */ 62169689Skan int level; /* brace level */ 63169689Skan int token; /* if reading a token */ 64169689Skan int t_def; /* if reading a typedef */ 65169689Skan int t_level; /* typedef's brace level */ 66169689Skan char *sp; /* buffer pointer */ 67169689Skan char tok[MAXTOKEN]; /* token buffer */ 68169689Skan 69169689Skan lineftell = ftell(inf); 70169689Skan sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 71169689Skan while (GETC(!=, EOF)) { 72169689Skan switch (c) { 73169689Skan /* 74169689Skan * Here's where it DOESN'T handle: { 75169689Skan * foo(a) 76169689Skan * { 77169689Skan * #ifdef notdef 78169689Skan * } 79169689Skan * #endif 80169689Skan * if (a) 81169689Skan * puts("hello, world"); 82169689Skan * } 83169689Skan */ 84169689Skan case '{': 85169689Skan ++level; 86169689Skan goto endtok; 87169689Skan case '}': 88169689Skan /* 89169689Skan * if level goes below zero, try and fix 90169689Skan * it, even though we've already messed up 91169689Skan */ 92169689Skan if (--level < 0) 93169689Skan level = 0; 94169689Skan goto endtok; 95169689Skan 96169689Skan case '\n': 97169689Skan SETLINE; 98169689Skan /* 99169689Skan * the above 3 cases are similar in that they 100169689Skan * are special characters that also end tokens. 101169689Skan */ 102169689Skan endtok: if (sp > tok) { 103169689Skan *sp = EOS; 104169689Skan token = YES; 105169689Skan sp = tok; 106169689Skan } 107169689Skan else 108169689Skan token = NO; 109169689Skan continue; 110169689Skan 111169689Skan /* 112169689Skan * We ignore quoted strings and character constants 113169689Skan * completely. 114169689Skan */ 115169689Skan case '"': 116169689Skan case '\'': 117169689Skan (void)skip_string(c); 118169689Skan break; 119169689Skan 120169689Skan /* 121169689Skan * comments can be fun; note the state is unchanged after 122169689Skan * return, in case we found: 123169689Skan * "foo() XX comment XX { int bar; }" 124169689Skan */ 125169689Skan case '/': 126169689Skan if (GETC(==, '*') || c == '/') { 127169689Skan skip_comment(c); 128169689Skan continue; 129169689Skan } 130169689Skan (void)ungetc(c, inf); 131169689Skan c = '/'; 132169689Skan goto storec; 133169689Skan 134169689Skan /* hash marks flag #define's. */ 135169689Skan case '#': 136169689Skan if (sp == tok) { 137169689Skan hash_entry(); 138169689Skan break; 139169689Skan } 140169689Skan goto storec; 141169689Skan 142169689Skan /* 143169689Skan * if we have a current token, parenthesis on 144169689Skan * level zero indicates a function. 145169689Skan */ 146169689Skan case '(': 147169689Skan if (!level && token) { 148169689Skan int curline; 149169689Skan 150169689Skan if (sp != tok) 151169689Skan *sp = EOS; 152169689Skan /* 153169689Skan * grab the line immediately, we may 154169689Skan * already be wrong, for example, 155169689Skan * foo\n 156169689Skan * (arg1, 157169689Skan */ 158169689Skan getline(); 159169689Skan curline = lineno; 160169689Skan if (func_entry()) { 161169689Skan ++level; 162169689Skan pfnote(tok, curline); 163169689Skan } 164169689Skan break; 165169689Skan } 166169689Skan goto storec; 167169689Skan 168169689Skan /* 169169689Skan * semi-colons indicate the end of a typedef; if we find a 170169689Skan * typedef we search for the next semi-colon of the same 171169689Skan * level as the typedef. Ignoring "structs", they are 172169689Skan * tricky, since you can find: 173169689Skan * 174169689Skan * "typedef long time_t;" 175169689Skan * "typedef unsigned int u_int;" 176169689Skan * "typedef unsigned int u_int [10];" 177169689Skan * 178169689Skan * If looking at a typedef, we save a copy of the last token 179169689Skan * found. Then, when we find the ';' we take the current 180169689Skan * token if it starts with a valid token name, else we take 181169689Skan * the one we saved. There's probably some reasonable 182169689Skan * alternative to this... 183169689Skan */ 184169689Skan case ';': 185169689Skan if (t_def && level == t_level) { 186169689Skan t_def = NO; 187169689Skan getline(); 188169689Skan if (sp != tok) 189169689Skan *sp = EOS; 190169689Skan pfnote(tok, lineno); 191169689Skan break; 192169689Skan } 193169689Skan goto storec; 194169689Skan 195169689Skan /* 196169689Skan * store characters until one that can't be part of a token 197169689Skan * comes along; check the current token against certain 198169689Skan * reserved words. 199169689Skan */ 200169689Skan default: 201169689Skan /* ignore whitespace */ 202169689Skan if (c == ' ' || c == '\t') { 203169689Skan int save = c; 204169689Skan while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 205169689Skan ; 206169689Skan if (c == EOF) 207169689Skan return; 208169689Skan (void)ungetc(c, inf); 209169689Skan c = save; 210169689Skan } 211169689Skan storec: if (!intoken(c)) { 212169689Skan if (sp == tok) 213169689Skan break; 214169689Skan *sp = EOS; 215169689Skan if (tflag) { 216169689Skan /* no typedefs inside typedefs */ 217169689Skan if (!t_def && 218169689Skan !memcmp(tok, "typedef",8)) { 219169689Skan t_def = YES; 220169689Skan t_level = level; 221169689Skan break; 222169689Skan } 223169689Skan /* catch "typedef struct" */ 224169689Skan if ((!t_def || t_level < level) 225169689Skan && (!memcmp(tok, "struct", 7) 226169689Skan || !memcmp(tok, "union", 6) 227169689Skan || !memcmp(tok, "enum", 5))) { 228169689Skan /* 229169689Skan * get line immediately; 230169689Skan * may change before '{' 231169689Skan */ 232169689Skan getline(); 233169689Skan if (str_entry(c)) 234169689Skan ++level; 235169689Skan break; 236169689Skan /* } */ 237169689Skan } 238169689Skan } 239169689Skan sp = tok; 240169689Skan } 241169689Skan else if (sp != tok || begtoken(c)) { 242169689Skan if (sp == tok + sizeof tok - 1) 243169689Skan /* Too long -- truncate it */ 244169689Skan *sp = EOS; 245169689Skan else 246169689Skan *sp++ = c; 247169689Skan token = YES; 248169689Skan } 249169689Skan continue; 250169689Skan } 251169689Skan 252169689Skan sp = tok; 253169689Skan token = NO; 254169689Skan } 255169689Skan} 256169689Skan 257169689Skan/* 258169689Skan * func_entry -- 259169689Skan * handle a function reference 260169689Skan */ 261169689Skanstatic int 262169689Skanfunc_entry() 263169689Skan{ 264169689Skan int c; /* current character */ 265169689Skan int level = 0; /* for matching '()' */ 266169689Skan 267169689Skan /* 268169689Skan * Find the end of the assumed function declaration. 269169689Skan * Note that ANSI C functions can have type definitions so keep 270169689Skan * track of the parentheses nesting level. 271169689Skan */ 272169689Skan while (GETC(!=, EOF)) { 273169689Skan switch (c) { 274169689Skan case '\'': 275169689Skan case '"': 276169689Skan /* skip strings and character constants */ 277169689Skan skip_string(c); 278169689Skan break; 279169689Skan case '/': 280169689Skan /* skip comments */ 281169689Skan if (GETC(==, '*') || c == '/') 282169689Skan skip_comment(c); 283169689Skan break; 284169689Skan case '(': 285169689Skan level++; 286169689Skan break; 287169689Skan case ')': 288169689Skan if (level == 0) 289169689Skan goto fnd; 290169689Skan level--; 291169689Skan break; 292169689Skan case '\n': 293169689Skan SETLINE; 294169689Skan } 295169689Skan } 296169689Skan return (NO); 297169689Skanfnd: 298169689Skan /* 299169689Skan * we assume that the character after a function's right paren 300169689Skan * is a token character if it's a function and a non-token 301169689Skan * character if it's a declaration. Comments don't count... 302169689Skan */ 303169689Skan for (;;) { 304169689Skan while (GETC(!=, EOF) && iswhite(c)) 305169689Skan if (c == '\n') 306169689Skan SETLINE; 307169689Skan if (intoken(c) || c == '{') 308169689Skan break; 309169689Skan if (c == '/' && (GETC(==, '*') || c == '/')) 310169689Skan skip_comment(c); 311169689Skan else { /* don't ever "read" '/' */ 312169689Skan (void)ungetc(c, inf); 313169689Skan return (NO); 314169689Skan } 315169689Skan } 316169689Skan if (c != '{') 317169689Skan (void)skip_key('{'); 318169689Skan return (YES); 319169689Skan} 320169689Skan 321169689Skan/* 322169689Skan * hash_entry -- 323169689Skan * handle a line starting with a '#' 324169689Skan */ 325169689Skanstatic void 326169689Skanhash_entry() 327169689Skan{ 328169689Skan int c; /* character read */ 329169689Skan int curline; /* line started on */ 330169689Skan char *sp; /* buffer pointer */ 331169689Skan char tok[MAXTOKEN]; /* storage buffer */ 332169689Skan 333169689Skan /* ignore leading whitespace */ 334169689Skan while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 335169689Skan ; 336169689Skan (void)ungetc(c, inf); 337169689Skan 338169689Skan curline = lineno; 339169689Skan for (sp = tok;;) { /* get next token */ 340169689Skan if (GETC(==, EOF)) 341169689Skan return; 342169689Skan if (iswhite(c)) 343169689Skan break; 344169689Skan if (sp == tok + sizeof tok - 1) 345169689Skan /* Too long -- truncate it */ 346169689Skan *sp = EOS; 347169689Skan else 348169689Skan *sp++ = c; 349169689Skan } 350169689Skan *sp = EOS; 351169689Skan if (memcmp(tok, "define", 6)) /* only interested in #define's */ 352169689Skan goto skip; 353169689Skan for (;;) { /* this doesn't handle "#define \n" */ 354169689Skan if (GETC(==, EOF)) 355169689Skan return; 356169689Skan if (!iswhite(c)) 357169689Skan break; 358169689Skan } 359169689Skan for (sp = tok;;) { /* get next token */ 360169689Skan if (sp == tok + sizeof tok - 1) 361169689Skan /* Too long -- truncate it */ 362169689Skan *sp = EOS; 363169689Skan else 364169689Skan *sp++ = c; 365169689Skan if (GETC(==, EOF)) 366169689Skan return; 367169689Skan /* 368169689Skan * this is where it DOESN'T handle 369169689Skan * "#define \n" 370169689Skan */ 371169689Skan if (!intoken(c)) 372169689Skan break; 373169689Skan } 374169689Skan *sp = EOS; 375169689Skan if (dflag || c == '(') { /* only want macros */ 376169689Skan getline(); 377169689Skan pfnote(tok, curline); 378169689Skan } 379169689Skanskip: if (c == '\n') { /* get rid of rest of define */ 380169689Skan SETLINE 381169689Skan if (*(sp - 1) != '\\') 382169689Skan return; 383169689Skan } 384169689Skan (void)skip_key('\n'); 385169689Skan} 386169689Skan 387169689Skan/* 388169689Skan * str_entry -- 389169689Skan * handle a struct, union or enum entry 390169689Skan */ 391169689Skanstatic int 392169689Skanstr_entry(c) 393169689Skan int c; /* current character */ 394169689Skan{ 395169689Skan int curline; /* line started on */ 396169689Skan char *sp; /* buffer pointer */ 397169689Skan char tok[LINE_MAX]; /* storage buffer */ 398169689Skan 399169689Skan curline = lineno; 400169689Skan while (iswhite(c)) 401169689Skan if (GETC(==, EOF)) 402169689Skan return (NO); 403169689Skan if (c == '{') /* it was "struct {" */ 404169689Skan return (YES); 405169689Skan for (sp = tok;;) { /* get next token */ 406169689Skan if (sp == tok + sizeof tok - 1) 407169689Skan /* Too long -- truncate it */ 408169689Skan *sp = EOS; 409169689Skan else 410169689Skan *sp++ = c; 411169689Skan if (GETC(==, EOF)) 412169689Skan return (NO); 413169689Skan if (!intoken(c)) 414169689Skan break; 415169689Skan } 416169689Skan switch (c) { 417169689Skan case '{': /* it was "struct foo{" */ 418169689Skan --sp; 419169689Skan break; 420169689Skan case '\n': /* it was "struct foo\n" */ 421169689Skan SETLINE; 422169689Skan /*FALLTHROUGH*/ 423169689Skan default: /* probably "struct foo " */ 424169689Skan while (GETC(!=, EOF)) 425169689Skan if (!iswhite(c)) 426169689Skan break; 427169689Skan if (c != '{') { 428169689Skan (void)ungetc(c, inf); 429169689Skan return (NO); 430169689Skan } 431169689Skan } 432169689Skan *sp = EOS; 433169689Skan pfnote(tok, curline); 434169689Skan return (YES); 435169689Skan} 436169689Skan 437169689Skan/* 438169689Skan * skip_comment -- 439169689Skan * skip over comment 440169689Skan */ 441169689Skanvoid 442169689Skanskip_comment(t) 443169689Skan int t; /* comment character */ 444169689Skan{ 445169689Skan int c; /* character read */ 446169689Skan int star; /* '*' flag */ 447169689Skan 448169689Skan for (star = 0; GETC(!=, EOF);) 449169689Skan switch(c) { 450169689Skan /* comments don't nest, nor can they be escaped. */ 451169689Skan case '*': 452169689Skan star = YES; 453169689Skan break; 454169689Skan case '/': 455169689Skan if (star && t == '*') 456169689Skan return; 457169689Skan break; 458169689Skan case '\n': 459169689Skan if (t == '/') 460169689Skan return; 461169689Skan SETLINE; 462169689Skan /*FALLTHROUGH*/ 463169689Skan default: 464169689Skan star = NO; 465169689Skan break; 466169689Skan } 467169689Skan} 468169689Skan 469169689Skan/* 470169689Skan * skip_string -- 471169689Skan * skip to the end of a string or character constant. 472169689Skan */ 473169689Skanvoid 474169689Skanskip_string(key) 475169689Skan int key; 476169689Skan{ 477169689Skan int c, 478169689Skan skip; 479169689Skan 480169689Skan for (skip = NO; GETC(!=, EOF); ) 481169689Skan switch (c) { 482169689Skan case '\\': /* a backslash escapes anything */ 483169689Skan skip = !skip; /* we toggle in case it's "\\" */ 484169689Skan break; 485169689Skan case '\n': 486169689Skan SETLINE; 487169689Skan /*FALLTHROUGH*/ 488169689Skan default: 489169689Skan if (c == key && !skip) 490169689Skan return; 491169689Skan skip = NO; 492169689Skan } 493169689Skan} 494169689Skan 495169689Skan/* 496169689Skan * skip_key -- 497169689Skan * skip to next char "key" 498169689Skan */ 499169689Skanint 500169689Skanskip_key(key) 501169689Skan int key; 502169689Skan{ 503169689Skan int c, 504169689Skan skip, 505169689Skan retval; 506169689Skan 507169689Skan for (skip = retval = NO; GETC(!=, EOF);) 508169689Skan switch(c) { 509169689Skan case '\\': /* a backslash escapes anything */ 510169689Skan skip = !skip; /* we toggle in case it's "\\" */ 511169689Skan break; 512169689Skan case ';': /* special case for yacc; if one */ 513169689Skan case '|': /* of these chars occurs, we may */ 514169689Skan retval = YES; /* have moved out of the rule */ 515169689Skan break; /* not used by C */ 516169689Skan case '\'': 517169689Skan case '"': 518169689Skan /* skip strings and character constants */ 519169689Skan skip_string(c); 520169689Skan break; 521169689Skan case '/': 522169689Skan /* skip comments */ 523169689Skan if (GETC(==, '*') || c == '/') { 524169689Skan skip_comment(c); 525169689Skan break; 526169689Skan } 527169689Skan (void)ungetc(c, inf); 528169689Skan c = '/'; 529169689Skan goto norm; 530169689Skan case '\n': 531169689Skan SETLINE; 532169689Skan /*FALLTHROUGH*/ 533169689Skan default: 534169689Skan norm: 535169689Skan if (c == key && !skip) 536169689Skan return (retval); 537169689Skan skip = NO; 538169689Skan } 539169689Skan return (retval); 540169689Skan} 541169689Skan