lex.c revision 259890
1/* CPP Library - lexical analysis. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3 Contributed by Per Bothner, 1994-95. 4 Based on CCCP program by Paul Rubin, June 1986 5 Adapted to ANSI C, Richard Stallman, Jan 1987 6 Broken out to separate file, Zack Weinberg, Mar 2000 7 8This program is free software; you can redistribute it and/or modify it 9under the terms of the GNU General Public License as published by the 10Free Software Foundation; either version 2, or (at your option) any 11later version. 12 13This program is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with this program; if not, write to the Free Software 20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22#include "config.h" 23#include "system.h" 24#include "cpplib.h" 25#include "internal.h" 26 27enum spell_type 28{ 29 SPELL_OPERATOR = 0, 30 SPELL_IDENT, 31 SPELL_LITERAL, 32 SPELL_NONE 33}; 34 35struct token_spelling 36{ 37 enum spell_type category; 38 const unsigned char *name; 39}; 40 41static const unsigned char *const digraph_spellings[] = 42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; 43 44#define OP(e, s) { SPELL_OPERATOR, U s }, 45#define TK(e, s) { SPELL_ ## s, U #e }, 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 47#undef OP 48#undef TK 49 50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 51#define TOKEN_NAME(token) (token_spellings[(token)->type].name) 52 53static void add_line_note (cpp_buffer *, const uchar *, unsigned int); 54static int skip_line_comment (cpp_reader *); 55static void skip_whitespace (cpp_reader *, cppchar_t); 56static void lex_string (cpp_reader *, cpp_token *, const uchar *); 57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 58static void create_literal (cpp_reader *, cpp_token *, const uchar *, 59 unsigned int, enum cpp_ttype); 60static bool warn_in_comment (cpp_reader *, _cpp_line_note *); 61static int name_p (cpp_reader *, const cpp_string *); 62static tokenrun *next_tokenrun (tokenrun *); 63 64static _cpp_buff *new_buff (size_t); 65 66 67/* Utility routine: 68 69 Compares, the token TOKEN to the NUL-terminated string STRING. 70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 71int 72cpp_ideq (const cpp_token *token, const char *string) 73{ 74 if (token->type != CPP_NAME) 75 return 0; 76 77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); 78} 79 80/* Record a note TYPE at byte POS into the current cleaned logical 81 line. */ 82static void 83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 84{ 85 if (buffer->notes_used == buffer->notes_cap) 86 { 87 buffer->notes_cap = buffer->notes_cap * 2 + 200; 88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 89 buffer->notes_cap); 90 } 91 92 buffer->notes[buffer->notes_used].pos = pos; 93 buffer->notes[buffer->notes_used].type = type; 94 buffer->notes_used++; 95} 96 97/* Returns with a logical line that contains no escaped newlines or 98 trigraphs. This is a time-critical inner loop. */ 99void 100_cpp_clean_line (cpp_reader *pfile) 101{ 102 cpp_buffer *buffer; 103 const uchar *s; 104 uchar c, *d, *p; 105 106 buffer = pfile->buffer; 107 buffer->cur_note = buffer->notes_used = 0; 108 buffer->cur = buffer->line_base = buffer->next_line; 109 buffer->need_line = false; 110 s = buffer->next_line - 1; 111 112 if (!buffer->from_stage3) 113 { 114 const uchar *pbackslash = NULL; 115 116 /* Short circuit for the common case of an un-escaped line with 117 no trigraphs. The primary win here is by not writing any 118 data back to memory until we have to. */ 119 for (;;) 120 { 121 c = *++s; 122 if (__builtin_expect (c == '\n', false) 123 || __builtin_expect (c == '\r', false)) 124 { 125 d = (uchar *) s; 126 127 if (__builtin_expect (s == buffer->rlimit, false)) 128 goto done; 129 130 /* DOS line ending? */ 131 if (__builtin_expect (c == '\r', false) 132 && s[1] == '\n') 133 { 134 s++; 135 if (s == buffer->rlimit) 136 goto done; 137 } 138 139 if (__builtin_expect (pbackslash == NULL, true)) 140 goto done; 141 142 /* Check for escaped newline. */ 143 p = d; 144 while (is_nvspace (p[-1])) 145 p--; 146 if (p - 1 != pbackslash) 147 goto done; 148 149 /* Have an escaped newline; process it and proceed to 150 the slow path. */ 151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 152 d = p - 2; 153 buffer->next_line = p - 1; 154 break; 155 } 156 if (__builtin_expect (c == '\\', false)) 157 pbackslash = s; 158 else if (__builtin_expect (c == '?', false) 159 && __builtin_expect (s[1] == '?', false) 160 && _cpp_trigraph_map[s[2]]) 161 { 162 /* Have a trigraph. We may or may not have to convert 163 it. Add a line note regardless, for -Wtrigraphs. */ 164 add_line_note (buffer, s, s[2]); 165 if (CPP_OPTION (pfile, trigraphs)) 166 { 167 /* We do, and that means we have to switch to the 168 slow path. */ 169 d = (uchar *) s; 170 *d = _cpp_trigraph_map[s[2]]; 171 s += 2; 172 break; 173 } 174 } 175 } 176 177 178 for (;;) 179 { 180 c = *++s; 181 *++d = c; 182 183 if (c == '\n' || c == '\r') 184 { 185 /* Handle DOS line endings. */ 186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 187 s++; 188 if (s == buffer->rlimit) 189 break; 190 191 /* Escaped? */ 192 p = d; 193 while (p != buffer->next_line && is_nvspace (p[-1])) 194 p--; 195 if (p == buffer->next_line || p[-1] != '\\') 196 break; 197 198 add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 199 d = p - 2; 200 buffer->next_line = p - 1; 201 } 202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 203 { 204 /* Add a note regardless, for the benefit of -Wtrigraphs. */ 205 add_line_note (buffer, d, s[2]); 206 if (CPP_OPTION (pfile, trigraphs)) 207 { 208 *d = _cpp_trigraph_map[s[2]]; 209 s += 2; 210 } 211 } 212 } 213 } 214 else 215 { 216 do 217 s++; 218 while (*s != '\n' && *s != '\r'); 219 d = (uchar *) s; 220 221 /* Handle DOS line endings. */ 222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 223 s++; 224 } 225 226 done: 227 *d = '\n'; 228 /* A sentinel note that should never be processed. */ 229 add_line_note (buffer, d + 1, '\n'); 230 buffer->next_line = s + 1; 231} 232 233/* Return true if the trigraph indicated by NOTE should be warned 234 about in a comment. */ 235static bool 236warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 237{ 238 const uchar *p; 239 240 /* Within comments we don't warn about trigraphs, unless the 241 trigraph forms an escaped newline, as that may change 242 behavior. */ 243 if (note->type != '/') 244 return false; 245 246 /* If -trigraphs, then this was an escaped newline iff the next note 247 is coincident. */ 248 if (CPP_OPTION (pfile, trigraphs)) 249 return note[1].pos == note->pos; 250 251 /* Otherwise, see if this forms an escaped newline. */ 252 p = note->pos + 3; 253 while (is_nvspace (*p)) 254 p++; 255 256 /* There might have been escaped newlines between the trigraph and the 257 newline we found. Hence the position test. */ 258 return (*p == '\n' && p < note[1].pos); 259} 260 261/* Process the notes created by add_line_note as far as the current 262 location. */ 263void 264_cpp_process_line_notes (cpp_reader *pfile, int in_comment) 265{ 266 cpp_buffer *buffer = pfile->buffer; 267 268 for (;;) 269 { 270 _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 271 unsigned int col; 272 273 if (note->pos > buffer->cur) 274 break; 275 276 buffer->cur_note++; 277 col = CPP_BUF_COLUMN (buffer, note->pos + 1); 278 279 if (note->type == '\\' || note->type == ' ') 280 { 281 if (note->type == ' ' && !in_comment) 282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 283 "backslash and newline separated by space"); 284 285 if (buffer->next_line > buffer->rlimit) 286 { 287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 288 "backslash-newline at end of file"); 289 /* Prevent "no newline at end of file" warning. */ 290 buffer->next_line = buffer->rlimit; 291 } 292 293 buffer->line_base = note->pos; 294 CPP_INCREMENT_LINE (pfile, 0); 295 } 296 else if (_cpp_trigraph_map[note->type]) 297 { 298 if (CPP_OPTION (pfile, warn_trigraphs) 299 && (!in_comment || warn_in_comment (pfile, note))) 300 { 301 if (CPP_OPTION (pfile, trigraphs)) 302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 303 "trigraph ??%c converted to %c", 304 note->type, 305 (int) _cpp_trigraph_map[note->type]); 306 else 307 { 308 cpp_error_with_line 309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 310 "trigraph ??%c ignored, use -trigraphs to enable", 311 note->type); 312 } 313 } 314 } 315 else 316 abort (); 317 } 318} 319 320/* Skip a C-style block comment. We find the end of the comment by 321 seeing if an asterisk is before every '/' we encounter. Returns 322 nonzero if comment terminated by EOF, zero otherwise. 323 324 Buffer->cur points to the initial asterisk of the comment. */ 325bool 326_cpp_skip_block_comment (cpp_reader *pfile) 327{ 328 cpp_buffer *buffer = pfile->buffer; 329 const uchar *cur = buffer->cur; 330 uchar c; 331 332 cur++; 333 if (*cur == '/') 334 cur++; 335 336 for (;;) 337 { 338 /* People like decorating comments with '*', so check for '/' 339 instead for efficiency. */ 340 c = *cur++; 341 342 if (c == '/') 343 { 344 if (cur[-2] == '*') 345 break; 346 347 /* Warn about potential nested comments, but not if the '/' 348 comes immediately before the true comment delimiter. 349 Don't bother to get it right across escaped newlines. */ 350 if (CPP_OPTION (pfile, warn_comments) 351 && cur[0] == '*' && cur[1] != '/') 352 { 353 buffer->cur = cur; 354 cpp_error_with_line (pfile, CPP_DL_WARNING, 355 pfile->line_table->highest_line, CPP_BUF_COL (buffer), 356 "\"/*\" within comment"); 357 } 358 } 359 else if (c == '\n') 360 { 361 unsigned int cols; 362 buffer->cur = cur - 1; 363 _cpp_process_line_notes (pfile, true); 364 if (buffer->next_line >= buffer->rlimit) 365 return true; 366 _cpp_clean_line (pfile); 367 368 cols = buffer->next_line - buffer->line_base; 369 CPP_INCREMENT_LINE (pfile, cols); 370 371 cur = buffer->cur; 372 } 373 } 374 375 buffer->cur = cur; 376 _cpp_process_line_notes (pfile, true); 377 return false; 378} 379 380/* Skip a C++ line comment, leaving buffer->cur pointing to the 381 terminating newline. Handles escaped newlines. Returns nonzero 382 if a multiline comment. */ 383static int 384skip_line_comment (cpp_reader *pfile) 385{ 386 cpp_buffer *buffer = pfile->buffer; 387 unsigned int orig_line = pfile->line_table->highest_line; 388 389 while (*buffer->cur != '\n') 390 buffer->cur++; 391 392 _cpp_process_line_notes (pfile, true); 393 return orig_line != pfile->line_table->highest_line; 394} 395 396/* Skips whitespace, saving the next non-whitespace character. */ 397static void 398skip_whitespace (cpp_reader *pfile, cppchar_t c) 399{ 400 cpp_buffer *buffer = pfile->buffer; 401 bool saw_NUL = false; 402 403 do 404 { 405 /* Horizontal space always OK. */ 406 if (c == ' ' || c == '\t') 407 ; 408 /* Just \f \v or \0 left. */ 409 else if (c == '\0') 410 saw_NUL = true; 411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 413 CPP_BUF_COL (buffer), 414 "%s in preprocessing directive", 415 c == '\f' ? "form feed" : "vertical tab"); 416 417 c = *buffer->cur++; 418 } 419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 420 while (is_nvspace (c)); 421 422 if (saw_NUL) 423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 424 425 buffer->cur--; 426} 427 428/* See if the characters of a number token are valid in a name (no 429 '.', '+' or '-'). */ 430static int 431name_p (cpp_reader *pfile, const cpp_string *string) 432{ 433 unsigned int i; 434 435 for (i = 0; i < string->len; i++) 436 if (!is_idchar (string->text[i])) 437 return 0; 438 439 return 1; 440} 441 442/* After parsing an identifier or other sequence, produce a warning about 443 sequences not in NFC/NFKC. */ 444static void 445warn_about_normalization (cpp_reader *pfile, 446 const cpp_token *token, 447 const struct normalize_state *s) 448{ 449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 450 && !pfile->state.skipping) 451 { 452 /* Make sure that the token is printed using UCNs, even 453 if we'd otherwise happily print UTF-8. */ 454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 455 size_t sz; 456 457 sz = cpp_spell_token (pfile, token, buf, false) - buf; 458 if (NORMALIZE_STATE_RESULT (s) == normalized_C) 459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 460 "`%.*s' is not in NFKC", (int) sz, buf); 461 else 462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 463 "`%.*s' is not in NFC", (int) sz, buf); 464 } 465} 466 467/* Returns TRUE if the sequence starting at buffer->cur is invalid in 468 an identifier. FIRST is TRUE if this starts an identifier. */ 469static bool 470forms_identifier_p (cpp_reader *pfile, int first, 471 struct normalize_state *state) 472{ 473 cpp_buffer *buffer = pfile->buffer; 474 475 if (*buffer->cur == '$') 476 { 477 if (!CPP_OPTION (pfile, dollars_in_ident)) 478 return false; 479 480 buffer->cur++; 481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 482 { 483 CPP_OPTION (pfile, warn_dollars) = 0; 484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 485 } 486 487 return true; 488 } 489 490 /* Is this a syntactically valid UCN? */ 491 if (CPP_OPTION (pfile, extended_identifiers) 492 && *buffer->cur == '\\' 493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 494 { 495 buffer->cur += 2; 496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 497 state)) 498 return true; 499 buffer->cur -= 2; 500 } 501 502 return false; 503} 504 505/* Lex an identifier starting at BUFFER->CUR - 1. */ 506static cpp_hashnode * 507lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 508 struct normalize_state *nst) 509{ 510 cpp_hashnode *result; 511 const uchar *cur; 512 unsigned int len; 513 unsigned int hash = HT_HASHSTEP (0, *base); 514 515 cur = pfile->buffer->cur; 516 if (! starts_ucn) 517 while (ISIDNUM (*cur)) 518 { 519 hash = HT_HASHSTEP (hash, *cur); 520 cur++; 521 } 522 pfile->buffer->cur = cur; 523 if (starts_ucn || forms_identifier_p (pfile, false, nst)) 524 { 525 /* Slower version for identifiers containing UCNs (or $). */ 526 do { 527 while (ISIDNUM (*pfile->buffer->cur)) 528 { 529 pfile->buffer->cur++; 530 NORMALIZE_STATE_UPDATE_IDNUM (nst); 531 } 532 } while (forms_identifier_p (pfile, false, nst)); 533 result = _cpp_interpret_identifier (pfile, base, 534 pfile->buffer->cur - base); 535 } 536 else 537 { 538 len = cur - base; 539 hash = HT_HASHFINISH (hash, len); 540 541 result = (cpp_hashnode *) 542 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); 543 } 544 545 /* Rarely, identifiers require diagnostics when lexed. */ 546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 547 && !pfile->state.skipping, 0)) 548 { 549 /* It is allowed to poison the same identifier twice. */ 550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 552 NODE_NAME (result)); 553 554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 555 replacement list of a variadic macro. */ 556 if (result == pfile->spec_nodes.n__VA_ARGS__ 557 && !pfile->state.va_args_ok) 558 cpp_error (pfile, CPP_DL_PEDWARN, 559 "__VA_ARGS__ can only appear in the expansion" 560 " of a C99 variadic macro"); 561 } 562 563 return result; 564} 565 566/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 567static void 568lex_number (cpp_reader *pfile, cpp_string *number, 569 struct normalize_state *nst) 570{ 571 const uchar *cur; 572 const uchar *base; 573 uchar *dest; 574 575 base = pfile->buffer->cur - 1; 576 do 577 { 578 cur = pfile->buffer->cur; 579 580 /* N.B. ISIDNUM does not include $. */ 581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 582 { 583 cur++; 584 NORMALIZE_STATE_UPDATE_IDNUM (nst); 585 } 586 587 pfile->buffer->cur = cur; 588 } 589 while (forms_identifier_p (pfile, false, nst)); 590 591 number->len = cur - base; 592 dest = _cpp_unaligned_alloc (pfile, number->len + 1); 593 memcpy (dest, base, number->len); 594 dest[number->len] = '\0'; 595 number->text = dest; 596} 597 598/* Create a token of type TYPE with a literal spelling. */ 599static void 600create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 601 unsigned int len, enum cpp_ttype type) 602{ 603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 604 605 memcpy (dest, base, len); 606 dest[len] = '\0'; 607 token->type = type; 608 token->val.str.len = len; 609 token->val.str.text = dest; 610} 611 612/* Lexes a string, character constant, or angle-bracketed header file 613 name. The stored string contains the spelling, including opening 614 quote and leading any leading 'L'. It returns the type of the 615 literal, or CPP_OTHER if it was not properly terminated. 616 617 The spelling is NUL-terminated, but it is not guaranteed that this 618 is the first NUL since embedded NULs are preserved. */ 619static void 620lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 621{ 622 bool saw_NUL = false; 623 const uchar *cur; 624 cppchar_t terminator; 625 enum cpp_ttype type; 626 627 cur = base; 628 terminator = *cur++; 629 if (terminator == 'L') 630 terminator = *cur++; 631 if (terminator == '\"') 632 type = *base == 'L' ? CPP_WSTRING: CPP_STRING; 633 else if (terminator == '\'') 634 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; 635 else 636 terminator = '>', type = CPP_HEADER_NAME; 637 638 for (;;) 639 { 640 cppchar_t c = *cur++; 641 642 /* In #include-style directives, terminators are not escapable. */ 643 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 644 cur++; 645 else if (c == terminator) 646 break; 647 else if (c == '\n') 648 { 649 cur--; 650 type = CPP_OTHER; 651 break; 652 } 653 else if (c == '\0') 654 saw_NUL = true; 655 } 656 657 if (saw_NUL && !pfile->state.skipping) 658 cpp_error (pfile, CPP_DL_WARNING, 659 "null character(s) preserved in literal"); 660 661 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 662 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 663 (int) terminator); 664 665 pfile->buffer->cur = cur; 666 create_literal (pfile, token, base, cur - base, type); 667} 668 669/* The stored comment includes the comment start and any terminator. */ 670static void 671save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 672 cppchar_t type) 673{ 674 unsigned char *buffer; 675 unsigned int len, clen; 676 677 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 678 679 /* C++ comments probably (not definitely) have moved past a new 680 line, which we don't want to save in the comment. */ 681 if (is_vspace (pfile->buffer->cur[-1])) 682 len--; 683 684 /* If we are currently in a directive, then we need to store all 685 C++ comments as C comments internally, and so we need to 686 allocate a little extra space in that case. 687 688 Note that the only time we encounter a directive here is 689 when we are saving comments in a "#define". */ 690 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 691 692 buffer = _cpp_unaligned_alloc (pfile, clen); 693 694 token->type = CPP_COMMENT; 695 token->val.str.len = clen; 696 token->val.str.text = buffer; 697 698 buffer[0] = '/'; 699 memcpy (buffer + 1, from, len - 1); 700 701 /* Finish conversion to a C comment, if necessary. */ 702 if (pfile->state.in_directive && type == '/') 703 { 704 buffer[1] = '*'; 705 buffer[clen - 2] = '*'; 706 buffer[clen - 1] = '/'; 707 } 708} 709 710/* Allocate COUNT tokens for RUN. */ 711void 712_cpp_init_tokenrun (tokenrun *run, unsigned int count) 713{ 714 run->base = XNEWVEC (cpp_token, count); 715 run->limit = run->base + count; 716 run->next = NULL; 717} 718 719/* Returns the next tokenrun, or creates one if there is none. */ 720static tokenrun * 721next_tokenrun (tokenrun *run) 722{ 723 if (run->next == NULL) 724 { 725 run->next = XNEW (tokenrun); 726 run->next->prev = run; 727 _cpp_init_tokenrun (run->next, 250); 728 } 729 730 return run->next; 731} 732 733/* Allocate a single token that is invalidated at the same time as the 734 rest of the tokens on the line. Has its line and col set to the 735 same as the last lexed token, so that diagnostics appear in the 736 right place. */ 737cpp_token * 738_cpp_temp_token (cpp_reader *pfile) 739{ 740 cpp_token *old, *result; 741 742 old = pfile->cur_token - 1; 743 if (pfile->cur_token == pfile->cur_run->limit) 744 { 745 pfile->cur_run = next_tokenrun (pfile->cur_run); 746 pfile->cur_token = pfile->cur_run->base; 747 } 748 749 result = pfile->cur_token++; 750 result->src_loc = old->src_loc; 751 return result; 752} 753 754/* Lex a token into RESULT (external interface). Takes care of issues 755 like directive handling, token lookahead, multiple include 756 optimization and skipping. */ 757const cpp_token * 758_cpp_lex_token (cpp_reader *pfile) 759{ 760 cpp_token *result; 761 762 for (;;) 763 { 764 if (pfile->cur_token == pfile->cur_run->limit) 765 { 766 pfile->cur_run = next_tokenrun (pfile->cur_run); 767 pfile->cur_token = pfile->cur_run->base; 768 } 769 /* We assume that the current token is somewhere in the current 770 run. */ 771 if (pfile->cur_token < pfile->cur_run->base 772 || pfile->cur_token >= pfile->cur_run->limit) 773 abort (); 774 775 if (pfile->lookaheads) 776 { 777 pfile->lookaheads--; 778 result = pfile->cur_token++; 779 } 780 else 781 result = _cpp_lex_direct (pfile); 782 783 if (result->flags & BOL) 784 { 785 /* Is this a directive. If _cpp_handle_directive returns 786 false, it is an assembler #. */ 787 if (result->type == CPP_HASH 788 /* 6.10.3 p 11: Directives in a list of macro arguments 789 gives undefined behavior. This implementation 790 handles the directive as normal. */ 791 && pfile->state.parsing_args != 1) 792 { 793 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 794 { 795 if (pfile->directive_result.type == CPP_PADDING) 796 continue; 797 result = &pfile->directive_result; 798 } 799 } 800 else if (pfile->state.in_deferred_pragma) 801 result = &pfile->directive_result; 802 803 if (pfile->cb.line_change && !pfile->state.skipping) 804 pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 805 } 806 807 /* We don't skip tokens in directives. */ 808 if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 809 break; 810 811 /* Outside a directive, invalidate controlling macros. At file 812 EOF, _cpp_lex_direct takes care of popping the buffer, so we never 813 get here and MI optimization works. */ 814 pfile->mi_valid = false; 815 816 if (!pfile->state.skipping || result->type == CPP_EOF) 817 break; 818 } 819 820 return result; 821} 822 823/* Returns true if a fresh line has been loaded. */ 824bool 825_cpp_get_fresh_line (cpp_reader *pfile) 826{ 827 int return_at_eof; 828 829 /* We can't get a new line until we leave the current directive. */ 830 if (pfile->state.in_directive) 831 return false; 832 833 for (;;) 834 { 835 cpp_buffer *buffer = pfile->buffer; 836 837 if (!buffer->need_line) 838 return true; 839 840 if (buffer->next_line < buffer->rlimit) 841 { 842 _cpp_clean_line (pfile); 843 return true; 844 } 845 846 /* First, get out of parsing arguments state. */ 847 if (pfile->state.parsing_args) 848 return false; 849 850 /* End of buffer. Non-empty files should end in a newline. */ 851 if (buffer->buf != buffer->rlimit 852 && buffer->next_line > buffer->rlimit 853 && !buffer->from_stage3) 854 { 855 /* Clip to buffer size. */ 856 buffer->next_line = buffer->rlimit; 857 /* APPLE LOCAL begin suppress no newline warning. */ 858 if ( CPP_OPTION (pfile, warn_newline_at_eof)) 859 { 860 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 861 CPP_BUF_COLUMN (buffer, buffer->cur), 862 "no newline at end of file"); 863 } 864 /* APPLE LOCAL end suppress no newline warning. */ 865 } 866 867 return_at_eof = buffer->return_at_eof; 868 _cpp_pop_buffer (pfile); 869 if (pfile->buffer == NULL || return_at_eof) 870 return false; 871 } 872} 873 874#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 875 do \ 876 { \ 877 result->type = ELSE_TYPE; \ 878 if (*buffer->cur == CHAR) \ 879 buffer->cur++, result->type = THEN_TYPE; \ 880 } \ 881 while (0) 882 883/* Lex a token into pfile->cur_token, which is also incremented, to 884 get diagnostics pointing to the correct location. 885 886 Does not handle issues such as token lookahead, multiple-include 887 optimization, directives, skipping etc. This function is only 888 suitable for use by _cpp_lex_token, and in special cases like 889 lex_expansion_token which doesn't care for any of these issues. 890 891 When meeting a newline, returns CPP_EOF if parsing a directive, 892 otherwise returns to the start of the token buffer if permissible. 893 Returns the location of the lexed token. */ 894cpp_token * 895_cpp_lex_direct (cpp_reader *pfile) 896{ 897 cppchar_t c; 898 cpp_buffer *buffer; 899 const unsigned char *comment_start; 900 cpp_token *result = pfile->cur_token++; 901 902 fresh_line: 903 result->flags = 0; 904 buffer = pfile->buffer; 905 if (buffer->need_line) 906 { 907 if (pfile->state.in_deferred_pragma) 908 { 909 result->type = CPP_PRAGMA_EOL; 910 pfile->state.in_deferred_pragma = false; 911 if (!pfile->state.pragma_allow_expansion) 912 pfile->state.prevent_expansion--; 913 return result; 914 } 915 if (!_cpp_get_fresh_line (pfile)) 916 { 917 result->type = CPP_EOF; 918 if (!pfile->state.in_directive) 919 { 920 /* Tell the compiler the line number of the EOF token. */ 921 result->src_loc = pfile->line_table->highest_line; 922 result->flags = BOL; 923 } 924 return result; 925 } 926 if (!pfile->keep_tokens) 927 { 928 pfile->cur_run = &pfile->base_run; 929 result = pfile->base_run.base; 930 pfile->cur_token = result + 1; 931 } 932 result->flags = BOL; 933 if (pfile->state.parsing_args == 2) 934 result->flags |= PREV_WHITE; 935 } 936 buffer = pfile->buffer; 937 update_tokens_line: 938 result->src_loc = pfile->line_table->highest_line; 939 940 skipped_white: 941 if (buffer->cur >= buffer->notes[buffer->cur_note].pos 942 && !pfile->overlaid_buffer) 943 { 944 _cpp_process_line_notes (pfile, false); 945 result->src_loc = pfile->line_table->highest_line; 946 } 947 c = *buffer->cur++; 948 949 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 950 CPP_BUF_COLUMN (buffer, buffer->cur)); 951 952 switch (c) 953 { 954 case ' ': case '\t': case '\f': case '\v': case '\0': 955 result->flags |= PREV_WHITE; 956 skip_whitespace (pfile, c); 957 goto skipped_white; 958 959 case '\n': 960 if (buffer->cur < buffer->rlimit) 961 CPP_INCREMENT_LINE (pfile, 0); 962 buffer->need_line = true; 963 goto fresh_line; 964 965 case '0': case '1': case '2': case '3': case '4': 966 case '5': case '6': case '7': case '8': case '9': 967 { 968 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 969 result->type = CPP_NUMBER; 970 lex_number (pfile, &result->val.str, &nst); 971 warn_about_normalization (pfile, result, &nst); 972 break; 973 } 974 975 case 'L': 976 /* 'L' may introduce wide characters or strings. */ 977 if (*buffer->cur == '\'' || *buffer->cur == '"') 978 { 979 lex_string (pfile, result, buffer->cur - 1); 980 break; 981 } 982 /* Fall through. */ 983 984 case '_': 985 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 986 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 987 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 988 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 989 case 'y': case 'z': 990 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 991 case 'G': case 'H': case 'I': case 'J': case 'K': 992 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 993 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 994 case 'Y': case 'Z': 995 result->type = CPP_NAME; 996 { 997 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 998 result->val.node = lex_identifier (pfile, buffer->cur - 1, false, 999 &nst); 1000 warn_about_normalization (pfile, result, &nst); 1001 } 1002 1003 /* Convert named operators to their proper types. */ 1004 if (result->val.node->flags & NODE_OPERATOR) 1005 { 1006 result->flags |= NAMED_OP; 1007 result->type = (enum cpp_ttype) result->val.node->directive_index; 1008 } 1009 break; 1010 1011 case '\'': 1012 case '"': 1013 lex_string (pfile, result, buffer->cur - 1); 1014 break; 1015 1016 case '/': 1017 /* A potential block or line comment. */ 1018 comment_start = buffer->cur; 1019 c = *buffer->cur; 1020 1021 if (c == '*') 1022 { 1023 if (_cpp_skip_block_comment (pfile)) 1024 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 1025 } 1026 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1027 || cpp_in_system_header (pfile))) 1028 { 1029 /* Warn about comments only if pedantically GNUC89, and not 1030 in system headers. */ 1031 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1032 && ! buffer->warned_cplusplus_comments) 1033 { 1034 cpp_error (pfile, CPP_DL_PEDWARN, 1035 "C++ style comments are not allowed in ISO C90"); 1036 cpp_error (pfile, CPP_DL_PEDWARN, 1037 "(this will be reported only once per input file)"); 1038 buffer->warned_cplusplus_comments = 1; 1039 } 1040 1041 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1042 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); 1043 } 1044 else if (c == '=') 1045 { 1046 buffer->cur++; 1047 result->type = CPP_DIV_EQ; 1048 break; 1049 } 1050 else 1051 { 1052 result->type = CPP_DIV; 1053 break; 1054 } 1055 1056 if (!pfile->state.save_comments) 1057 { 1058 result->flags |= PREV_WHITE; 1059 goto update_tokens_line; 1060 } 1061 1062 /* Save the comment as a token in its own right. */ 1063 save_comment (pfile, result, comment_start, c); 1064 break; 1065 1066 case '<': 1067 if (pfile->state.angled_headers) 1068 { 1069 lex_string (pfile, result, buffer->cur - 1); 1070 break; 1071 } 1072 1073 result->type = CPP_LESS; 1074 if (*buffer->cur == '=') 1075 buffer->cur++, result->type = CPP_LESS_EQ; 1076 else if (*buffer->cur == '<') 1077 { 1078 buffer->cur++; 1079 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1080 } 1081 else if (CPP_OPTION (pfile, digraphs)) 1082 { 1083 if (*buffer->cur == ':') 1084 { 1085 buffer->cur++; 1086 result->flags |= DIGRAPH; 1087 result->type = CPP_OPEN_SQUARE; 1088 } 1089 else if (*buffer->cur == '%') 1090 { 1091 buffer->cur++; 1092 result->flags |= DIGRAPH; 1093 result->type = CPP_OPEN_BRACE; 1094 } 1095 } 1096 break; 1097 1098 case '>': 1099 result->type = CPP_GREATER; 1100 if (*buffer->cur == '=') 1101 buffer->cur++, result->type = CPP_GREATER_EQ; 1102 else if (*buffer->cur == '>') 1103 { 1104 buffer->cur++; 1105 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1106 } 1107 break; 1108 1109 case '%': 1110 result->type = CPP_MOD; 1111 if (*buffer->cur == '=') 1112 buffer->cur++, result->type = CPP_MOD_EQ; 1113 else if (CPP_OPTION (pfile, digraphs)) 1114 { 1115 if (*buffer->cur == ':') 1116 { 1117 buffer->cur++; 1118 result->flags |= DIGRAPH; 1119 result->type = CPP_HASH; 1120 if (*buffer->cur == '%' && buffer->cur[1] == ':') 1121 buffer->cur += 2, result->type = CPP_PASTE; 1122 } 1123 else if (*buffer->cur == '>') 1124 { 1125 buffer->cur++; 1126 result->flags |= DIGRAPH; 1127 result->type = CPP_CLOSE_BRACE; 1128 } 1129 } 1130 break; 1131 1132 case '.': 1133 result->type = CPP_DOT; 1134 if (ISDIGIT (*buffer->cur)) 1135 { 1136 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1137 result->type = CPP_NUMBER; 1138 lex_number (pfile, &result->val.str, &nst); 1139 warn_about_normalization (pfile, result, &nst); 1140 } 1141 else if (*buffer->cur == '.' && buffer->cur[1] == '.') 1142 buffer->cur += 2, result->type = CPP_ELLIPSIS; 1143 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1144 buffer->cur++, result->type = CPP_DOT_STAR; 1145 break; 1146 1147 case '+': 1148 result->type = CPP_PLUS; 1149 if (*buffer->cur == '+') 1150 buffer->cur++, result->type = CPP_PLUS_PLUS; 1151 else if (*buffer->cur == '=') 1152 buffer->cur++, result->type = CPP_PLUS_EQ; 1153 break; 1154 1155 case '-': 1156 result->type = CPP_MINUS; 1157 if (*buffer->cur == '>') 1158 { 1159 buffer->cur++; 1160 result->type = CPP_DEREF; 1161 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1162 buffer->cur++, result->type = CPP_DEREF_STAR; 1163 } 1164 else if (*buffer->cur == '-') 1165 buffer->cur++, result->type = CPP_MINUS_MINUS; 1166 else if (*buffer->cur == '=') 1167 buffer->cur++, result->type = CPP_MINUS_EQ; 1168 break; 1169 1170 case '&': 1171 result->type = CPP_AND; 1172 if (*buffer->cur == '&') 1173 buffer->cur++, result->type = CPP_AND_AND; 1174 else if (*buffer->cur == '=') 1175 buffer->cur++, result->type = CPP_AND_EQ; 1176 break; 1177 1178 case '|': 1179 result->type = CPP_OR; 1180 if (*buffer->cur == '|') 1181 buffer->cur++, result->type = CPP_OR_OR; 1182 else if (*buffer->cur == '=') 1183 buffer->cur++, result->type = CPP_OR_EQ; 1184 break; 1185 1186 case ':': 1187 result->type = CPP_COLON; 1188 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 1189 buffer->cur++, result->type = CPP_SCOPE; 1190 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 1191 { 1192 buffer->cur++; 1193 result->flags |= DIGRAPH; 1194 result->type = CPP_CLOSE_SQUARE; 1195 } 1196 break; 1197 1198 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1199 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1200 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1201 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1202 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; 1203 1204 case '?': result->type = CPP_QUERY; break; 1205 case '~': result->type = CPP_COMPL; break; 1206 case ',': result->type = CPP_COMMA; break; 1207 case '(': result->type = CPP_OPEN_PAREN; break; 1208 case ')': result->type = CPP_CLOSE_PAREN; break; 1209 case '[': result->type = CPP_OPEN_SQUARE; break; 1210 case ']': result->type = CPP_CLOSE_SQUARE; break; 1211 case '{': result->type = CPP_OPEN_BRACE; break; 1212 case '}': result->type = CPP_CLOSE_BRACE; break; 1213 case ';': result->type = CPP_SEMICOLON; break; 1214 1215 /* @ is a punctuator in Objective-C. */ 1216 case '@': result->type = CPP_ATSIGN; break; 1217 1218 case '$': 1219 case '\\': 1220 { 1221 const uchar *base = --buffer->cur; 1222 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1223 1224 if (forms_identifier_p (pfile, true, &nst)) 1225 { 1226 result->type = CPP_NAME; 1227 result->val.node = lex_identifier (pfile, base, true, &nst); 1228 warn_about_normalization (pfile, result, &nst); 1229 break; 1230 } 1231 buffer->cur++; 1232 } 1233 1234 default: 1235 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 1236 break; 1237 } 1238 1239 return result; 1240} 1241 1242/* An upper bound on the number of bytes needed to spell TOKEN. 1243 Does not include preceding whitespace. */ 1244unsigned int 1245cpp_token_len (const cpp_token *token) 1246{ 1247 unsigned int len; 1248 1249 switch (TOKEN_SPELL (token)) 1250 { 1251 default: len = 4; break; 1252 case SPELL_LITERAL: len = token->val.str.len; break; 1253 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; 1254 } 1255 1256 return len; 1257} 1258 1259/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 1260 Return the number of bytes read out of NAME. (There are always 1261 10 bytes written to BUFFER.) */ 1262 1263static size_t 1264utf8_to_ucn (unsigned char *buffer, const unsigned char *name) 1265{ 1266 int j; 1267 int ucn_len = 0; 1268 int ucn_len_c; 1269 unsigned t; 1270 unsigned long utf32; 1271 1272 /* Compute the length of the UTF-8 sequence. */ 1273 for (t = *name; t & 0x80; t <<= 1) 1274 ucn_len++; 1275 1276 utf32 = *name & (0x7F >> ucn_len); 1277 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 1278 { 1279 utf32 = (utf32 << 6) | (*++name & 0x3F); 1280 1281 /* Ill-formed UTF-8. */ 1282 if ((*name & ~0x3F) != 0x80) 1283 abort (); 1284 } 1285 1286 *buffer++ = '\\'; 1287 *buffer++ = 'U'; 1288 for (j = 7; j >= 0; j--) 1289 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 1290 return ucn_len; 1291} 1292 1293 1294/* Write the spelling of a token TOKEN to BUFFER. The buffer must 1295 already contain the enough space to hold the token's spelling. 1296 Returns a pointer to the character after the last character written. 1297 FORSTRING is true if this is to be the spelling after translation 1298 phase 1 (this is different for UCNs). 1299 FIXME: Would be nice if we didn't need the PFILE argument. */ 1300unsigned char * 1301cpp_spell_token (cpp_reader *pfile, const cpp_token *token, 1302 unsigned char *buffer, bool forstring) 1303{ 1304 switch (TOKEN_SPELL (token)) 1305 { 1306 case SPELL_OPERATOR: 1307 { 1308 const unsigned char *spelling; 1309 unsigned char c; 1310 1311 if (token->flags & DIGRAPH) 1312 spelling 1313 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1314 else if (token->flags & NAMED_OP) 1315 goto spell_ident; 1316 else 1317 spelling = TOKEN_NAME (token); 1318 1319 while ((c = *spelling++) != '\0') 1320 *buffer++ = c; 1321 } 1322 break; 1323 1324 spell_ident: 1325 case SPELL_IDENT: 1326 if (forstring) 1327 { 1328 memcpy (buffer, NODE_NAME (token->val.node), 1329 NODE_LEN (token->val.node)); 1330 buffer += NODE_LEN (token->val.node); 1331 } 1332 else 1333 { 1334 size_t i; 1335 const unsigned char * name = NODE_NAME (token->val.node); 1336 1337 for (i = 0; i < NODE_LEN (token->val.node); i++) 1338 if (name[i] & ~0x7F) 1339 { 1340 i += utf8_to_ucn (buffer, name + i) - 1; 1341 buffer += 10; 1342 } 1343 else 1344 *buffer++ = NODE_NAME (token->val.node)[i]; 1345 } 1346 break; 1347 1348 case SPELL_LITERAL: 1349 memcpy (buffer, token->val.str.text, token->val.str.len); 1350 buffer += token->val.str.len; 1351 break; 1352 1353 case SPELL_NONE: 1354 cpp_error (pfile, CPP_DL_ICE, 1355 "unspellable token %s", TOKEN_NAME (token)); 1356 break; 1357 } 1358 1359 return buffer; 1360} 1361 1362/* Returns TOKEN spelt as a null-terminated string. The string is 1363 freed when the reader is destroyed. Useful for diagnostics. */ 1364unsigned char * 1365cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 1366{ 1367 unsigned int len = cpp_token_len (token) + 1; 1368 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1369 1370 end = cpp_spell_token (pfile, token, start, false); 1371 end[0] = '\0'; 1372 1373 return start; 1374} 1375 1376/* Used by C front ends, which really should move to using 1377 cpp_token_as_text. */ 1378const char * 1379cpp_type2name (enum cpp_ttype type) 1380{ 1381 return (const char *) token_spellings[type].name; 1382} 1383 1384/* Writes the spelling of token to FP, without any preceding space. 1385 Separated from cpp_spell_token for efficiency - to avoid stdio 1386 double-buffering. */ 1387void 1388cpp_output_token (const cpp_token *token, FILE *fp) 1389{ 1390 switch (TOKEN_SPELL (token)) 1391 { 1392 case SPELL_OPERATOR: 1393 { 1394 const unsigned char *spelling; 1395 int c; 1396 1397 if (token->flags & DIGRAPH) 1398 spelling 1399 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1400 else if (token->flags & NAMED_OP) 1401 goto spell_ident; 1402 else 1403 spelling = TOKEN_NAME (token); 1404 1405 c = *spelling; 1406 do 1407 putc (c, fp); 1408 while ((c = *++spelling) != '\0'); 1409 } 1410 break; 1411 1412 spell_ident: 1413 case SPELL_IDENT: 1414 { 1415 size_t i; 1416 const unsigned char * name = NODE_NAME (token->val.node); 1417 1418 for (i = 0; i < NODE_LEN (token->val.node); i++) 1419 if (name[i] & ~0x7F) 1420 { 1421 unsigned char buffer[10]; 1422 i += utf8_to_ucn (buffer, name + i) - 1; 1423 fwrite (buffer, 1, 10, fp); 1424 } 1425 else 1426 fputc (NODE_NAME (token->val.node)[i], fp); 1427 } 1428 break; 1429 1430 case SPELL_LITERAL: 1431 fwrite (token->val.str.text, 1, token->val.str.len, fp); 1432 break; 1433 1434 case SPELL_NONE: 1435 /* An error, most probably. */ 1436 break; 1437 } 1438} 1439 1440/* Compare two tokens. */ 1441int 1442_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 1443{ 1444 if (a->type == b->type && a->flags == b->flags) 1445 switch (TOKEN_SPELL (a)) 1446 { 1447 default: /* Keep compiler happy. */ 1448 case SPELL_OPERATOR: 1449 return 1; 1450 case SPELL_NONE: 1451 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); 1452 case SPELL_IDENT: 1453 return a->val.node == b->val.node; 1454 case SPELL_LITERAL: 1455 return (a->val.str.len == b->val.str.len 1456 && !memcmp (a->val.str.text, b->val.str.text, 1457 a->val.str.len)); 1458 } 1459 1460 return 0; 1461} 1462 1463/* Returns nonzero if a space should be inserted to avoid an 1464 accidental token paste for output. For simplicity, it is 1465 conservative, and occasionally advises a space where one is not 1466 needed, e.g. "." and ".2". */ 1467int 1468cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 1469 const cpp_token *token2) 1470{ 1471 enum cpp_ttype a = token1->type, b = token2->type; 1472 cppchar_t c; 1473 1474 if (token1->flags & NAMED_OP) 1475 a = CPP_NAME; 1476 if (token2->flags & NAMED_OP) 1477 b = CPP_NAME; 1478 1479 c = EOF; 1480 if (token2->flags & DIGRAPH) 1481 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1482 else if (token_spellings[b].category == SPELL_OPERATOR) 1483 c = token_spellings[b].name[0]; 1484 1485 /* Quickly get everything that can paste with an '='. */ 1486 if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1487 return 1; 1488 1489 switch (a) 1490 { 1491 case CPP_GREATER: return c == '>'; 1492 case CPP_LESS: return c == '<' || c == '%' || c == ':'; 1493 case CPP_PLUS: return c == '+'; 1494 case CPP_MINUS: return c == '-' || c == '>'; 1495 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 1496 case CPP_MOD: return c == ':' || c == '>'; 1497 case CPP_AND: return c == '&'; 1498 case CPP_OR: return c == '|'; 1499 case CPP_COLON: return c == ':' || c == '>'; 1500 case CPP_DEREF: return c == '*'; 1501 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 1502 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 1503 case CPP_NAME: return ((b == CPP_NUMBER 1504 && name_p (pfile, &token2->val.str)) 1505 || b == CPP_NAME 1506 || b == CPP_CHAR || b == CPP_STRING); /* L */ 1507 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 1508 || c == '.' || c == '+' || c == '-'); 1509 /* UCNs */ 1510 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 1511 && b == CPP_NAME) 1512 || (CPP_OPTION (pfile, objc) 1513 && token1->val.str.text[0] == '@' 1514 && (b == CPP_NAME || b == CPP_STRING))); 1515 default: break; 1516 } 1517 1518 return 0; 1519} 1520 1521/* Output all the remaining tokens on the current line, and a newline 1522 character, to FP. Leading whitespace is removed. If there are 1523 macros, special token padding is not performed. */ 1524void 1525cpp_output_line (cpp_reader *pfile, FILE *fp) 1526{ 1527 const cpp_token *token; 1528 1529 token = cpp_get_token (pfile); 1530 while (token->type != CPP_EOF) 1531 { 1532 cpp_output_token (token, fp); 1533 token = cpp_get_token (pfile); 1534 if (token->flags & PREV_WHITE) 1535 putc (' ', fp); 1536 } 1537 1538 putc ('\n', fp); 1539} 1540 1541/* Memory buffers. Changing these three constants can have a dramatic 1542 effect on performance. The values here are reasonable defaults, 1543 but might be tuned. If you adjust them, be sure to test across a 1544 range of uses of cpplib, including heavy nested function-like macro 1545 expansion. Also check the change in peak memory usage (NJAMD is a 1546 good tool for this). */ 1547#define MIN_BUFF_SIZE 8000 1548#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 1549#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 1550 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 1551 1552#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 1553 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 1554#endif 1555 1556/* Create a new allocation buffer. Place the control block at the end 1557 of the buffer, so that buffer overflows will cause immediate chaos. */ 1558static _cpp_buff * 1559new_buff (size_t len) 1560{ 1561 _cpp_buff *result; 1562 unsigned char *base; 1563 1564 if (len < MIN_BUFF_SIZE) 1565 len = MIN_BUFF_SIZE; 1566 len = CPP_ALIGN (len); 1567 1568 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 1569 result = (_cpp_buff *) (base + len); 1570 result->base = base; 1571 result->cur = base; 1572 result->limit = base + len; 1573 result->next = NULL; 1574 return result; 1575} 1576 1577/* Place a chain of unwanted allocation buffers on the free list. */ 1578void 1579_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 1580{ 1581 _cpp_buff *end = buff; 1582 1583 while (end->next) 1584 end = end->next; 1585 end->next = pfile->free_buffs; 1586 pfile->free_buffs = buff; 1587} 1588 1589/* Return a free buffer of size at least MIN_SIZE. */ 1590_cpp_buff * 1591_cpp_get_buff (cpp_reader *pfile, size_t min_size) 1592{ 1593 _cpp_buff *result, **p; 1594 1595 for (p = &pfile->free_buffs;; p = &(*p)->next) 1596 { 1597 size_t size; 1598 1599 if (*p == NULL) 1600 return new_buff (min_size); 1601 result = *p; 1602 size = result->limit - result->base; 1603 /* Return a buffer that's big enough, but don't waste one that's 1604 way too big. */ 1605 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 1606 break; 1607 } 1608 1609 *p = result->next; 1610 result->next = NULL; 1611 result->cur = result->base; 1612 return result; 1613} 1614 1615/* Creates a new buffer with enough space to hold the uncommitted 1616 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 1617 the excess bytes to the new buffer. Chains the new buffer after 1618 BUFF, and returns the new buffer. */ 1619_cpp_buff * 1620_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 1621{ 1622 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 1623 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 1624 1625 buff->next = new_buff; 1626 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 1627 return new_buff; 1628} 1629 1630/* Creates a new buffer with enough space to hold the uncommitted 1631 remaining bytes of the buffer pointed to by BUFF, and at least 1632 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 1633 Chains the new buffer before the buffer pointed to by BUFF, and 1634 updates the pointer to point to the new buffer. */ 1635void 1636_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 1637{ 1638 _cpp_buff *new_buff, *old_buff = *pbuff; 1639 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 1640 1641 new_buff = _cpp_get_buff (pfile, size); 1642 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 1643 new_buff->next = old_buff; 1644 *pbuff = new_buff; 1645} 1646 1647/* Free a chain of buffers starting at BUFF. */ 1648void 1649_cpp_free_buff (_cpp_buff *buff) 1650{ 1651 _cpp_buff *next; 1652 1653 for (; buff; buff = next) 1654 { 1655 next = buff->next; 1656 free (buff->base); 1657 } 1658} 1659 1660/* Allocate permanent, unaligned storage of length LEN. */ 1661unsigned char * 1662_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 1663{ 1664 _cpp_buff *buff = pfile->u_buff; 1665 unsigned char *result = buff->cur; 1666 1667 if (len > (size_t) (buff->limit - result)) 1668 { 1669 buff = _cpp_get_buff (pfile, len); 1670 buff->next = pfile->u_buff; 1671 pfile->u_buff = buff; 1672 result = buff->cur; 1673 } 1674 1675 buff->cur = result + len; 1676 return result; 1677} 1678 1679/* Allocate permanent, unaligned storage of length LEN from a_buff. 1680 That buffer is used for growing allocations when saving macro 1681 replacement lists in a #define, and when parsing an answer to an 1682 assertion in #assert, #unassert or #if (and therefore possibly 1683 whilst expanding macros). It therefore must not be used by any 1684 code that they might call: specifically the lexer and the guts of 1685 the macro expander. 1686 1687 All existing other uses clearly fit this restriction: storing 1688 registered pragmas during initialization. */ 1689unsigned char * 1690_cpp_aligned_alloc (cpp_reader *pfile, size_t len) 1691{ 1692 _cpp_buff *buff = pfile->a_buff; 1693 unsigned char *result = buff->cur; 1694 1695 if (len > (size_t) (buff->limit - result)) 1696 { 1697 buff = _cpp_get_buff (pfile, len); 1698 buff->next = pfile->a_buff; 1699 pfile->a_buff = buff; 1700 result = buff->cur; 1701 } 1702 1703 buff->cur = result + len; 1704 return result; 1705} 1706 1707/* Say which field of TOK is in use. */ 1708 1709enum cpp_token_fld_kind 1710cpp_token_val_index (cpp_token *tok) 1711{ 1712 switch (TOKEN_SPELL (tok)) 1713 { 1714 case SPELL_IDENT: 1715 return CPP_TOKEN_FLD_NODE; 1716 case SPELL_LITERAL: 1717 return CPP_TOKEN_FLD_STR; 1718 case SPELL_NONE: 1719 if (tok->type == CPP_MACRO_ARG) 1720 return CPP_TOKEN_FLD_ARG_NO; 1721 else if (tok->type == CPP_PADDING) 1722 return CPP_TOKEN_FLD_SOURCE; 1723 else if (tok->type == CPP_PRAGMA) 1724 return CPP_TOKEN_FLD_PRAGMA; 1725 /* else fall through */ 1726 default: 1727 return CPP_TOKEN_FLD_NONE; 1728 } 1729} 1730