C.c revision 87628
190380Smsmith/* 290380Smsmith * Copyright (c) 1987, 1993, 1994 390380Smsmith * The Regents of the University of California. All rights reserved. 490380Smsmith * 590380Smsmith * Redistribution and use in source and binary forms, with or without 690380Smsmith * modification, are permitted provided that the following conditions 798146Siwasaki * are met: 898146Siwasaki * 1. Redistributions of source code must retain the above copyright 998146Siwasaki * notice, this list of conditions and the following disclaimer. 1098146Siwasaki * 2. Redistributions in binary form must reproduce the above copyright 1198146Siwasaki * notice, this list of conditions and the following disclaimer in the 1290380Smsmith * documentation and/or other materials provided with the distribution. 13193529Sjkim * 3. All advertising materials mentioning features or use of this software 14193529Sjkim * must display the following acknowledgement: 1590380Smsmith * This product includes software developed by the University of 16123333Snjl * California, Berkeley and its contributors. 17231844Sjkim * 4. Neither the name of the University nor the names of its contributors 18193529Sjkim * may be used to endorse or promote products derived from this software 1990380Smsmith * without specific prior written permission. 20231844Sjkim * 21193529Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22228110Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23228110Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24193529Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25151946Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26193529Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27193529Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28193529Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29193529Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30193529Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31228110Sjkim * SUCH DAMAGE. 32197104Sjkim */ 33213806Sjkim 34213806Sjkim#if 0 35193529Sjkim#ifndef lint 3690380Smsmithstatic char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 3790380Smsmith#endif 3890380Smsmith#endif 39193529Sjkim 4090380Smsmith#include <sys/cdefs.h> 4190380Smsmith__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 87628 2001-12-10 21:13:08Z dwmalone $"); 4290380Smsmith 4390380Smsmith#include <limits.h> 4490380Smsmith#include <stdio.h> 4590380Smsmith#include <string.h> 4690380Smsmith 4790380Smsmith#include "ctags.h" 4890380Smsmith 4990380Smsmithstatic int func_entry __P((void)); 50209746Sjkimstatic void hash_entry __P((void)); 5190380Smsmithstatic void skip_string __P((int)); 5290380Smsmithstatic int str_entry __P((int)); 53151946Sjkim 5490380Smsmith/* 5590380Smsmith * c_entries -- 56193529Sjkim * read .c and .h files and call appropriate routines 57123333Snjl */ 58123333Snjlvoid 59209746Sjkimc_entries() 60123333Snjl{ 61193529Sjkim int c; /* current character */ 62209746Sjkim int level; /* brace level */ 6390380Smsmith int token; /* if reading a token */ 64151604Sobrien int t_def; /* if reading a typedef */ 65151946Sjkim int t_level; /* typedef's brace level */ 66220663Sjkim char *sp; /* buffer pointer */ 67193529Sjkim char tok[MAXTOKEN]; /* token buffer */ 68151946Sjkim 69151946Sjkim lineftell = ftell(inf); 70209746Sjkim sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 71151946Sjkim while (GETC(!=, EOF)) { 72151946Sjkim switch (c) { 73193529Sjkim /* 74209746Sjkim * Here's where it DOESN'T handle: { 75193529Sjkim * foo(a) 76193529Sjkim * { 77151604Sobrien * #ifdef notdef 7890380Smsmith * } 7990380Smsmith * #endif 8098146Siwasaki * if (a) 81114244Snjl * puts("hello, world"); 82114244Snjl * } 83228110Sjkim */ 84228110Sjkim case '{': 85228110Sjkim ++level; 86228110Sjkim goto endtok; 87228110Sjkim case '}': 88 /* 89 * if level goes below zero, try and fix 90 * it, even though we've already messed up 91 */ 92 if (--level < 0) 93 level = 0; 94 goto endtok; 95 96 case '\n': 97 SETLINE; 98 /* 99 * the above 3 cases are similar in that they 100 * are special characters that also end tokens. 101 */ 102 endtok: if (sp > tok) { 103 *sp = EOS; 104 token = YES; 105 sp = tok; 106 } 107 else 108 token = NO; 109 continue; 110 111 /* 112 * We ignore quoted strings and character constants 113 * completely. 114 */ 115 case '"': 116 case '\'': 117 (void)skip_string(c); 118 break; 119 120 /* 121 * comments can be fun; note the state is unchanged after 122 * return, in case we found: 123 * "foo() XX comment XX { int bar; }" 124 */ 125 case '/': 126 if (GETC(==, '*')) { 127 skip_comment(); 128 continue; 129 } 130 (void)ungetc(c, inf); 131 c = '/'; 132 goto storec; 133 134 /* hash marks flag #define's. */ 135 case '#': 136 if (sp == tok) { 137 hash_entry(); 138 break; 139 } 140 goto storec; 141 142 /* 143 * if we have a current token, parenthesis on 144 * level zero indicates a function. 145 */ 146 case '(': 147 if (!level && token) { 148 int curline; 149 150 if (sp != tok) 151 *sp = EOS; 152 /* 153 * grab the line immediately, we may 154 * already be wrong, for example, 155 * foo\n 156 * (arg1, 157 */ 158 getline(); 159 curline = lineno; 160 if (func_entry()) { 161 ++level; 162 pfnote(tok, curline); 163 } 164 break; 165 } 166 goto storec; 167 168 /* 169 * semi-colons indicate the end of a typedef; if we find a 170 * typedef we search for the next semi-colon of the same 171 * level as the typedef. Ignoring "structs", they are 172 * tricky, since you can find: 173 * 174 * "typedef long time_t;" 175 * "typedef unsigned int u_int;" 176 * "typedef unsigned int u_int [10];" 177 * 178 * If looking at a typedef, we save a copy of the last token 179 * found. Then, when we find the ';' we take the current 180 * token if it starts with a valid token name, else we take 181 * the one we saved. There's probably some reasonable 182 * alternative to this... 183 */ 184 case ';': 185 if (t_def && level == t_level) { 186 t_def = NO; 187 getline(); 188 if (sp != tok) 189 *sp = EOS; 190 pfnote(tok, lineno); 191 break; 192 } 193 goto storec; 194 195 /* 196 * store characters until one that can't be part of a token 197 * comes along; check the current token against certain 198 * reserved words. 199 */ 200 default: 201 /* ignore whitespace */ 202 if (c == ' ' || c == '\t') { 203 int save = c; 204 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 205 ; 206 if (c == EOF) 207 return; 208 (void)ungetc(c, inf); 209 c = save; 210 } 211 storec: if (!intoken(c)) { 212 if (sp == tok) 213 break; 214 *sp = EOS; 215 if (tflag) { 216 /* no typedefs inside typedefs */ 217 if (!t_def && 218 !memcmp(tok, "typedef",8)) { 219 t_def = YES; 220 t_level = level; 221 break; 222 } 223 /* catch "typedef struct" */ 224 if ((!t_def || t_level < level) 225 && (!memcmp(tok, "struct", 7) 226 || !memcmp(tok, "union", 6) 227 || !memcmp(tok, "enum", 5))) { 228 /* 229 * get line immediately; 230 * may change before '{' 231 */ 232 getline(); 233 if (str_entry(c)) 234 ++level; 235 break; 236 /* } */ 237 } 238 } 239 sp = tok; 240 } 241 else if (sp != tok || begtoken(c)) { 242 *sp++ = c; 243 token = YES; 244 } 245 continue; 246 } 247 248 sp = tok; 249 token = NO; 250 } 251} 252 253/* 254 * func_entry -- 255 * handle a function reference 256 */ 257static int 258func_entry() 259{ 260 int c; /* current character */ 261 int level = 0; /* for matching '()' */ 262 263 /* 264 * Find the end of the assumed function declaration. 265 * Note that ANSI C functions can have type definitions so keep 266 * track of the parentheses nesting level. 267 */ 268 while (GETC(!=, EOF)) { 269 switch (c) { 270 case '\'': 271 case '"': 272 /* skip strings and character constants */ 273 skip_string(c); 274 break; 275 case '/': 276 /* skip comments */ 277 if (GETC(==, '*')) 278 skip_comment(); 279 break; 280 case '(': 281 level++; 282 break; 283 case ')': 284 if (level == 0) 285 goto fnd; 286 level--; 287 break; 288 case '\n': 289 SETLINE; 290 } 291 } 292 return (NO); 293fnd: 294 /* 295 * we assume that the character after a function's right paren 296 * is a token character if it's a function and a non-token 297 * character if it's a declaration. Comments don't count... 298 */ 299 for (;;) { 300 while (GETC(!=, EOF) && iswhite(c)) 301 if (c == '\n') 302 SETLINE; 303 if (intoken(c) || c == '{') 304 break; 305 if (c == '/' && GETC(==, '*')) 306 skip_comment(); 307 else { /* don't ever "read" '/' */ 308 (void)ungetc(c, inf); 309 return (NO); 310 } 311 } 312 if (c != '{') 313 (void)skip_key('{'); 314 return (YES); 315} 316 317/* 318 * hash_entry -- 319 * handle a line starting with a '#' 320 */ 321static void 322hash_entry() 323{ 324 int c; /* character read */ 325 int curline; /* line started on */ 326 char *sp; /* buffer pointer */ 327 char tok[MAXTOKEN]; /* storage buffer */ 328 329 /* ignore leading whitespace */ 330 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 331 ; 332 (void)ungetc(c, inf); 333 334 curline = lineno; 335 for (sp = tok;;) { /* get next token */ 336 if (GETC(==, EOF)) 337 return; 338 if (iswhite(c)) 339 break; 340 *sp++ = c; 341 } 342 *sp = EOS; 343 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 344 goto skip; 345 for (;;) { /* this doesn't handle "#define \n" */ 346 if (GETC(==, EOF)) 347 return; 348 if (!iswhite(c)) 349 break; 350 } 351 for (sp = tok;;) { /* get next token */ 352 *sp++ = c; 353 if (GETC(==, EOF)) 354 return; 355 /* 356 * this is where it DOESN'T handle 357 * "#define \n" 358 */ 359 if (!intoken(c)) 360 break; 361 } 362 *sp = EOS; 363 if (dflag || c == '(') { /* only want macros */ 364 getline(); 365 pfnote(tok, curline); 366 } 367skip: if (c == '\n') { /* get rid of rest of define */ 368 SETLINE 369 if (*(sp - 1) != '\\') 370 return; 371 } 372 (void)skip_key('\n'); 373} 374 375/* 376 * str_entry -- 377 * handle a struct, union or enum entry 378 */ 379static int 380str_entry(c) 381 int c; /* current character */ 382{ 383 int curline; /* line started on */ 384 char *sp; /* buffer pointer */ 385 char tok[LINE_MAX]; /* storage buffer */ 386 387 curline = lineno; 388 while (iswhite(c)) 389 if (GETC(==, EOF)) 390 return (NO); 391 if (c == '{') /* it was "struct {" */ 392 return (YES); 393 for (sp = tok;;) { /* get next token */ 394 *sp++ = c; 395 if (GETC(==, EOF)) 396 return (NO); 397 if (!intoken(c)) 398 break; 399 } 400 switch (c) { 401 case '{': /* it was "struct foo{" */ 402 --sp; 403 break; 404 case '\n': /* it was "struct foo\n" */ 405 SETLINE; 406 /*FALLTHROUGH*/ 407 default: /* probably "struct foo " */ 408 while (GETC(!=, EOF)) 409 if (!iswhite(c)) 410 break; 411 if (c != '{') { 412 (void)ungetc(c, inf); 413 return (NO); 414 } 415 } 416 *sp = EOS; 417 pfnote(tok, curline); 418 return (YES); 419} 420 421/* 422 * skip_comment -- 423 * skip over comment 424 */ 425void 426skip_comment() 427{ 428 int c; /* character read */ 429 int star; /* '*' flag */ 430 431 for (star = 0; GETC(!=, EOF);) 432 switch(c) { 433 /* comments don't nest, nor can they be escaped. */ 434 case '*': 435 star = YES; 436 break; 437 case '/': 438 if (star) 439 return; 440 break; 441 case '\n': 442 SETLINE; 443 /*FALLTHROUGH*/ 444 default: 445 star = NO; 446 break; 447 } 448} 449 450/* 451 * skip_string -- 452 * skip to the end of a string or character constant. 453 */ 454void 455skip_string(key) 456 int key; 457{ 458 int c, 459 skip; 460 461 for (skip = NO; GETC(!=, EOF); ) 462 switch (c) { 463 case '\\': /* a backslash escapes anything */ 464 skip = !skip; /* we toggle in case it's "\\" */ 465 break; 466 case '\n': 467 SETLINE; 468 /*FALLTHROUGH*/ 469 default: 470 if (c == key && !skip) 471 return; 472 skip = NO; 473 } 474} 475 476/* 477 * skip_key -- 478 * skip to next char "key" 479 */ 480int 481skip_key(key) 482 int key; 483{ 484 int c, 485 skip, 486 retval; 487 488 for (skip = retval = NO; GETC(!=, EOF);) 489 switch(c) { 490 case '\\': /* a backslash escapes anything */ 491 skip = !skip; /* we toggle in case it's "\\" */ 492 break; 493 case ';': /* special case for yacc; if one */ 494 case '|': /* of these chars occurs, we may */ 495 retval = YES; /* have moved out of the rule */ 496 break; /* not used by C */ 497 case '\'': 498 case '"': 499 /* skip strings and character constants */ 500 skip_string(c); 501 break; 502 case '/': 503 /* skip comments */ 504 if (GETC(==, '*')) { 505 skip_comment(); 506 break; 507 } 508 (void)ungetc(c, inf); 509 c = '/'; 510 goto norm; 511 case '\n': 512 SETLINE; 513 /*FALLTHROUGH*/ 514 default: 515 norm: 516 if (c == key && !skip) 517 return (retval); 518 skip = NO; 519 } 520 return (retval); 521} 522