lex.c revision 259268
1/* CPP Library - lexical analysis.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3   Contributed by Per Bothner, 1994-95.
4   Based on CCCP program by Paul Rubin, June 1986
5   Adapted to ANSI C, Richard Stallman, Jan 1987
6   Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29  SPELL_OPERATOR = 0,
30  SPELL_IDENT,
31  SPELL_LITERAL,
32  SPELL_NONE
33};
34
35struct token_spelling
36{
37  enum spell_type category;
38  const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, U s  },
45#define TK(e, s) { SPELL_ ## s,    U #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59			    unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
62static tokenrun *next_tokenrun (tokenrun *);
63
64static _cpp_buff *new_buff (size_t);
65
66
67/* Utility routine:
68
69   Compares, the token TOKEN to the NUL-terminated string STRING.
70   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71int
72cpp_ideq (const cpp_token *token, const char *string)
73{
74  if (token->type != CPP_NAME)
75    return 0;
76
77  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78}
79
80/* Record a note TYPE at byte POS into the current cleaned logical
81   line.  */
82static void
83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84{
85  if (buffer->notes_used == buffer->notes_cap)
86    {
87      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89                                  buffer->notes_cap);
90    }
91
92  buffer->notes[buffer->notes_used].pos = pos;
93  buffer->notes[buffer->notes_used].type = type;
94  buffer->notes_used++;
95}
96
97/* Returns with a logical line that contains no escaped newlines or
98   trigraphs.  This is a time-critical inner loop.  */
99void
100_cpp_clean_line (cpp_reader *pfile)
101{
102  cpp_buffer *buffer;
103  const uchar *s;
104  uchar c, *d, *p;
105
106  buffer = pfile->buffer;
107  buffer->cur_note = buffer->notes_used = 0;
108  buffer->cur = buffer->line_base = buffer->next_line;
109  buffer->need_line = false;
110  s = buffer->next_line - 1;
111
112  if (!buffer->from_stage3)
113    {
114      const uchar *pbackslash = NULL;
115
116      /* Short circuit for the common case of an un-escaped line with
117	 no trigraphs.  The primary win here is by not writing any
118	 data back to memory until we have to.  */
119      for (;;)
120	{
121	  c = *++s;
122	  if (__builtin_expect (c == '\n', false)
123	      || __builtin_expect (c == '\r', false))
124	    {
125	      d = (uchar *) s;
126
127	      if (__builtin_expect (s == buffer->rlimit, false))
128		goto done;
129
130	      /* DOS line ending? */
131	      if (__builtin_expect (c == '\r', false)
132		  && s[1] == '\n')
133		{
134		  s++;
135		  if (s == buffer->rlimit)
136		    goto done;
137		}
138
139	      if (__builtin_expect (pbackslash == NULL, true))
140		goto done;
141
142	      /* Check for escaped newline.  */
143	      p = d;
144	      while (is_nvspace (p[-1]))
145		p--;
146	      if (p - 1 != pbackslash)
147		goto done;
148
149	      /* Have an escaped newline; process it and proceed to
150		 the slow path.  */
151	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152	      d = p - 2;
153	      buffer->next_line = p - 1;
154	      break;
155	    }
156	  if (__builtin_expect (c == '\\', false))
157	    pbackslash = s;
158	  else if (__builtin_expect (c == '?', false)
159		   && __builtin_expect (s[1] == '?', false)
160		   && _cpp_trigraph_map[s[2]])
161	    {
162	      /* Have a trigraph.  We may or may not have to convert
163		 it.  Add a line note regardless, for -Wtrigraphs.  */
164	      add_line_note (buffer, s, s[2]);
165	      if (CPP_OPTION (pfile, trigraphs))
166		{
167		  /* We do, and that means we have to switch to the
168		     slow path.  */
169		  d = (uchar *) s;
170		  *d = _cpp_trigraph_map[s[2]];
171		  s += 2;
172		  break;
173		}
174	    }
175	}
176
177
178      for (;;)
179	{
180	  c = *++s;
181	  *++d = c;
182
183	  if (c == '\n' || c == '\r')
184	    {
185		  /* Handle DOS line endings.  */
186	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187		s++;
188	      if (s == buffer->rlimit)
189		break;
190
191	      /* Escaped?  */
192	      p = d;
193	      while (p != buffer->next_line && is_nvspace (p[-1]))
194		p--;
195	      if (p == buffer->next_line || p[-1] != '\\')
196		break;
197
198	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199	      d = p - 2;
200	      buffer->next_line = p - 1;
201	    }
202	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203	    {
204	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
205	      add_line_note (buffer, d, s[2]);
206	      if (CPP_OPTION (pfile, trigraphs))
207		{
208		  *d = _cpp_trigraph_map[s[2]];
209		  s += 2;
210		}
211	    }
212	}
213    }
214  else
215    {
216      do
217	s++;
218      while (*s != '\n' && *s != '\r');
219      d = (uchar *) s;
220
221      /* Handle DOS line endings.  */
222      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223	s++;
224    }
225
226 done:
227  *d = '\n';
228  /* A sentinel note that should never be processed.  */
229  add_line_note (buffer, d + 1, '\n');
230  buffer->next_line = s + 1;
231}
232
233/* Return true if the trigraph indicated by NOTE should be warned
234   about in a comment.  */
235static bool
236warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237{
238  const uchar *p;
239
240  /* Within comments we don't warn about trigraphs, unless the
241     trigraph forms an escaped newline, as that may change
242     behavior.  */
243  if (note->type != '/')
244    return false;
245
246  /* If -trigraphs, then this was an escaped newline iff the next note
247     is coincident.  */
248  if (CPP_OPTION (pfile, trigraphs))
249    return note[1].pos == note->pos;
250
251  /* Otherwise, see if this forms an escaped newline.  */
252  p = note->pos + 3;
253  while (is_nvspace (*p))
254    p++;
255
256  /* There might have been escaped newlines between the trigraph and the
257     newline we found.  Hence the position test.  */
258  return (*p == '\n' && p < note[1].pos);
259}
260
261/* Process the notes created by add_line_note as far as the current
262   location.  */
263void
264_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265{
266  cpp_buffer *buffer = pfile->buffer;
267
268  for (;;)
269    {
270      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271      unsigned int col;
272
273      if (note->pos > buffer->cur)
274	break;
275
276      buffer->cur_note++;
277      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278
279      if (note->type == '\\' || note->type == ' ')
280	{
281	  if (note->type == ' ' && !in_comment)
282	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283				 "backslash and newline separated by space");
284
285	  if (buffer->next_line > buffer->rlimit)
286	    {
287	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288				   "backslash-newline at end of file");
289	      /* Prevent "no newline at end of file" warning.  */
290	      buffer->next_line = buffer->rlimit;
291	    }
292
293	  buffer->line_base = note->pos;
294	  CPP_INCREMENT_LINE (pfile, 0);
295	}
296      else if (_cpp_trigraph_map[note->type])
297	{
298	  if (CPP_OPTION (pfile, warn_trigraphs)
299	      && (!in_comment || warn_in_comment (pfile, note)))
300	    {
301	      if (CPP_OPTION (pfile, trigraphs))
302		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303				     "trigraph ??%c converted to %c",
304				     note->type,
305				     (int) _cpp_trigraph_map[note->type]);
306	      else
307		{
308		  cpp_error_with_line
309		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310		     "trigraph ??%c ignored, use -trigraphs to enable",
311		     note->type);
312		}
313	    }
314	}
315      else
316	abort ();
317    }
318}
319
320/* Skip a C-style block comment.  We find the end of the comment by
321   seeing if an asterisk is before every '/' we encounter.  Returns
322   nonzero if comment terminated by EOF, zero otherwise.
323
324   Buffer->cur points to the initial asterisk of the comment.  */
325bool
326_cpp_skip_block_comment (cpp_reader *pfile)
327{
328  cpp_buffer *buffer = pfile->buffer;
329  const uchar *cur = buffer->cur;
330  uchar c;
331
332  cur++;
333  if (*cur == '/')
334    cur++;
335
336  for (;;)
337    {
338      /* People like decorating comments with '*', so check for '/'
339	 instead for efficiency.  */
340      c = *cur++;
341
342      if (c == '/')
343	{
344	  if (cur[-2] == '*')
345	    break;
346
347	  /* Warn about potential nested comments, but not if the '/'
348	     comes immediately before the true comment delimiter.
349	     Don't bother to get it right across escaped newlines.  */
350	  if (CPP_OPTION (pfile, warn_comments)
351	      && cur[0] == '*' && cur[1] != '/')
352	    {
353	      buffer->cur = cur;
354	      cpp_error_with_line (pfile, CPP_DL_WARNING,
355				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356				   "\"/*\" within comment");
357	    }
358	}
359      else if (c == '\n')
360	{
361	  unsigned int cols;
362	  buffer->cur = cur - 1;
363	  _cpp_process_line_notes (pfile, true);
364	  if (buffer->next_line >= buffer->rlimit)
365	    return true;
366	  _cpp_clean_line (pfile);
367
368	  cols = buffer->next_line - buffer->line_base;
369	  CPP_INCREMENT_LINE (pfile, cols);
370
371	  cur = buffer->cur;
372	}
373    }
374
375  buffer->cur = cur;
376  _cpp_process_line_notes (pfile, true);
377  return false;
378}
379
380/* Skip a C++ line comment, leaving buffer->cur pointing to the
381   terminating newline.  Handles escaped newlines.  Returns nonzero
382   if a multiline comment.  */
383static int
384skip_line_comment (cpp_reader *pfile)
385{
386  cpp_buffer *buffer = pfile->buffer;
387  unsigned int orig_line = pfile->line_table->highest_line;
388
389  while (*buffer->cur != '\n')
390    buffer->cur++;
391
392  _cpp_process_line_notes (pfile, true);
393  return orig_line != pfile->line_table->highest_line;
394}
395
396/* Skips whitespace, saving the next non-whitespace character.  */
397static void
398skip_whitespace (cpp_reader *pfile, cppchar_t c)
399{
400  cpp_buffer *buffer = pfile->buffer;
401  bool saw_NUL = false;
402
403  do
404    {
405      /* Horizontal space always OK.  */
406      if (c == ' ' || c == '\t')
407	;
408      /* Just \f \v or \0 left.  */
409      else if (c == '\0')
410	saw_NUL = true;
411      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413			     CPP_BUF_COL (buffer),
414			     "%s in preprocessing directive",
415			     c == '\f' ? "form feed" : "vertical tab");
416
417      c = *buffer->cur++;
418    }
419  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
420  while (is_nvspace (c));
421
422  if (saw_NUL)
423    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424
425  buffer->cur--;
426}
427
428/* See if the characters of a number token are valid in a name (no
429   '.', '+' or '-').  */
430static int
431name_p (cpp_reader *pfile, const cpp_string *string)
432{
433  unsigned int i;
434
435  for (i = 0; i < string->len; i++)
436    if (!is_idchar (string->text[i]))
437      return 0;
438
439  return 1;
440}
441
442/* After parsing an identifier or other sequence, produce a warning about
443   sequences not in NFC/NFKC.  */
444static void
445warn_about_normalization (cpp_reader *pfile,
446			  const cpp_token *token,
447			  const struct normalize_state *s)
448{
449  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450      && !pfile->state.skipping)
451    {
452      /* Make sure that the token is printed using UCNs, even
453	 if we'd otherwise happily print UTF-8.  */
454      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455      size_t sz;
456
457      sz = cpp_spell_token (pfile, token, buf, false) - buf;
458      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460			     "`%.*s' is not in NFKC", (int) sz, buf);
461      else
462	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463			     "`%.*s' is not in NFC", (int) sz, buf);
464    }
465}
466
467/* Returns TRUE if the sequence starting at buffer->cur is invalid in
468   an identifier.  FIRST is TRUE if this starts an identifier.  */
469static bool
470forms_identifier_p (cpp_reader *pfile, int first,
471		    struct normalize_state *state)
472{
473  cpp_buffer *buffer = pfile->buffer;
474
475  if (*buffer->cur == '$')
476    {
477      if (!CPP_OPTION (pfile, dollars_in_ident))
478	return false;
479
480      buffer->cur++;
481      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482	{
483	  CPP_OPTION (pfile, warn_dollars) = 0;
484	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485	}
486
487      return true;
488    }
489
490  /* Is this a syntactically valid UCN?  */
491  if (CPP_OPTION (pfile, extended_identifiers)
492      && *buffer->cur == '\\'
493      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494    {
495      buffer->cur += 2;
496      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497			  state))
498	return true;
499      buffer->cur -= 2;
500    }
501
502  return false;
503}
504
505/* Lex an identifier starting at BUFFER->CUR - 1.  */
506static cpp_hashnode *
507lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508		struct normalize_state *nst)
509{
510  cpp_hashnode *result;
511  const uchar *cur;
512  unsigned int len;
513  unsigned int hash = HT_HASHSTEP (0, *base);
514
515  cur = pfile->buffer->cur;
516  if (! starts_ucn)
517    while (ISIDNUM (*cur))
518      {
519	hash = HT_HASHSTEP (hash, *cur);
520	cur++;
521      }
522  pfile->buffer->cur = cur;
523  if (starts_ucn || forms_identifier_p (pfile, false, nst))
524    {
525      /* Slower version for identifiers containing UCNs (or $).  */
526      do {
527	while (ISIDNUM (*pfile->buffer->cur))
528	  {
529	    pfile->buffer->cur++;
530	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
531	  }
532      } while (forms_identifier_p (pfile, false, nst));
533      result = _cpp_interpret_identifier (pfile, base,
534					  pfile->buffer->cur - base);
535    }
536  else
537    {
538      len = cur - base;
539      hash = HT_HASHFINISH (hash, len);
540
541      result = (cpp_hashnode *)
542	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543    }
544
545  /* Rarely, identifiers require diagnostics when lexed.  */
546  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547			&& !pfile->state.skipping, 0))
548    {
549      /* It is allowed to poison the same identifier twice.  */
550      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552		   NODE_NAME (result));
553
554      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555	 replacement list of a variadic macro.  */
556      if (result == pfile->spec_nodes.n__VA_ARGS__
557	  && !pfile->state.va_args_ok)
558	cpp_error (pfile, CPP_DL_PEDWARN,
559		   "__VA_ARGS__ can only appear in the expansion"
560		   " of a C99 variadic macro");
561    }
562
563  return result;
564}
565
566/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
567static void
568lex_number (cpp_reader *pfile, cpp_string *number,
569	    struct normalize_state *nst)
570{
571  const uchar *cur;
572  const uchar *base;
573  uchar *dest;
574
575  base = pfile->buffer->cur - 1;
576  do
577    {
578      cur = pfile->buffer->cur;
579
580      /* N.B. ISIDNUM does not include $.  */
581      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582	{
583	  cur++;
584	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
585	}
586
587      pfile->buffer->cur = cur;
588    }
589  while (forms_identifier_p (pfile, false, nst));
590
591  number->len = cur - base;
592  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593  memcpy (dest, base, number->len);
594  dest[number->len] = '\0';
595  number->text = dest;
596}
597
598/* Create a token of type TYPE with a literal spelling.  */
599static void
600create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601		unsigned int len, enum cpp_ttype type)
602{
603  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604
605  memcpy (dest, base, len);
606  dest[len] = '\0';
607  token->type = type;
608  token->val.str.len = len;
609  token->val.str.text = dest;
610}
611
612/* Lexes a string, character constant, or angle-bracketed header file
613   name.  The stored string contains the spelling, including opening
614   quote and leading any leading 'L'.  It returns the type of the
615   literal, or CPP_OTHER if it was not properly terminated.
616
617   The spelling is NUL-terminated, but it is not guaranteed that this
618   is the first NUL since embedded NULs are preserved.  */
619static void
620lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621{
622  bool saw_NUL = false;
623  const uchar *cur;
624  cppchar_t terminator;
625  enum cpp_ttype type;
626
627  cur = base;
628  terminator = *cur++;
629  if (terminator == 'L')
630    terminator = *cur++;
631  if (terminator == '\"')
632    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633  else if (terminator == '\'')
634    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635  else
636    terminator = '>', type = CPP_HEADER_NAME;
637
638  for (;;)
639    {
640      cppchar_t c = *cur++;
641
642      /* In #include-style directives, terminators are not escapable.  */
643      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644	cur++;
645      else if (c == terminator)
646	break;
647      else if (c == '\n')
648	{
649	  cur--;
650	  type = CPP_OTHER;
651	  break;
652	}
653      else if (c == '\0')
654	saw_NUL = true;
655    }
656
657  if (saw_NUL && !pfile->state.skipping)
658    cpp_error (pfile, CPP_DL_WARNING,
659	       "null character(s) preserved in literal");
660
661  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
662    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
663	       (int) terminator);
664
665  pfile->buffer->cur = cur;
666  create_literal (pfile, token, base, cur - base, type);
667}
668
669/* The stored comment includes the comment start and any terminator.  */
670static void
671save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
672	      cppchar_t type)
673{
674  unsigned char *buffer;
675  unsigned int len, clen;
676
677  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
678
679  /* C++ comments probably (not definitely) have moved past a new
680     line, which we don't want to save in the comment.  */
681  if (is_vspace (pfile->buffer->cur[-1]))
682    len--;
683
684  /* If we are currently in a directive, then we need to store all
685     C++ comments as C comments internally, and so we need to
686     allocate a little extra space in that case.
687
688     Note that the only time we encounter a directive here is
689     when we are saving comments in a "#define".  */
690  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
691
692  buffer = _cpp_unaligned_alloc (pfile, clen);
693
694  token->type = CPP_COMMENT;
695  token->val.str.len = clen;
696  token->val.str.text = buffer;
697
698  buffer[0] = '/';
699  memcpy (buffer + 1, from, len - 1);
700
701  /* Finish conversion to a C comment, if necessary.  */
702  if (pfile->state.in_directive && type == '/')
703    {
704      buffer[1] = '*';
705      buffer[clen - 2] = '*';
706      buffer[clen - 1] = '/';
707    }
708}
709
710/* Allocate COUNT tokens for RUN.  */
711void
712_cpp_init_tokenrun (tokenrun *run, unsigned int count)
713{
714  run->base = XNEWVEC (cpp_token, count);
715  run->limit = run->base + count;
716  run->next = NULL;
717}
718
719/* Returns the next tokenrun, or creates one if there is none.  */
720static tokenrun *
721next_tokenrun (tokenrun *run)
722{
723  if (run->next == NULL)
724    {
725      run->next = XNEW (tokenrun);
726      run->next->prev = run;
727      _cpp_init_tokenrun (run->next, 250);
728    }
729
730  return run->next;
731}
732
733/* Allocate a single token that is invalidated at the same time as the
734   rest of the tokens on the line.  Has its line and col set to the
735   same as the last lexed token, so that diagnostics appear in the
736   right place.  */
737cpp_token *
738_cpp_temp_token (cpp_reader *pfile)
739{
740  cpp_token *old, *result;
741
742  old = pfile->cur_token - 1;
743  if (pfile->cur_token == pfile->cur_run->limit)
744    {
745      pfile->cur_run = next_tokenrun (pfile->cur_run);
746      pfile->cur_token = pfile->cur_run->base;
747    }
748
749  result = pfile->cur_token++;
750  result->src_loc = old->src_loc;
751  return result;
752}
753
754/* Lex a token into RESULT (external interface).  Takes care of issues
755   like directive handling, token lookahead, multiple include
756   optimization and skipping.  */
757const cpp_token *
758_cpp_lex_token (cpp_reader *pfile)
759{
760  cpp_token *result;
761
762  for (;;)
763    {
764      if (pfile->cur_token == pfile->cur_run->limit)
765	{
766	  pfile->cur_run = next_tokenrun (pfile->cur_run);
767	  pfile->cur_token = pfile->cur_run->base;
768	}
769
770      if (pfile->lookaheads)
771	{
772	  pfile->lookaheads--;
773	  result = pfile->cur_token++;
774	}
775      else
776	result = _cpp_lex_direct (pfile);
777
778      if (result->flags & BOL)
779	{
780	  /* Is this a directive.  If _cpp_handle_directive returns
781	     false, it is an assembler #.  */
782	  if (result->type == CPP_HASH
783	      /* 6.10.3 p 11: Directives in a list of macro arguments
784		 gives undefined behavior.  This implementation
785		 handles the directive as normal.  */
786	      && pfile->state.parsing_args != 1)
787	    {
788	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
789		{
790		  if (pfile->directive_result.type == CPP_PADDING)
791		    continue;
792		  result = &pfile->directive_result;
793		}
794	    }
795	  else if (pfile->state.in_deferred_pragma)
796	    result = &pfile->directive_result;
797
798	  if (pfile->cb.line_change && !pfile->state.skipping)
799	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
800	}
801
802      /* We don't skip tokens in directives.  */
803      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
804	break;
805
806      /* Outside a directive, invalidate controlling macros.  At file
807	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
808	 get here and MI optimization works.  */
809      pfile->mi_valid = false;
810
811      if (!pfile->state.skipping || result->type == CPP_EOF)
812	break;
813    }
814
815  return result;
816}
817
818/* Returns true if a fresh line has been loaded.  */
819bool
820_cpp_get_fresh_line (cpp_reader *pfile)
821{
822  int return_at_eof;
823
824  /* We can't get a new line until we leave the current directive.  */
825  if (pfile->state.in_directive)
826    return false;
827
828  for (;;)
829    {
830      cpp_buffer *buffer = pfile->buffer;
831
832      if (!buffer->need_line)
833	return true;
834
835      if (buffer->next_line < buffer->rlimit)
836	{
837	  _cpp_clean_line (pfile);
838	  return true;
839	}
840
841      /* First, get out of parsing arguments state.  */
842      if (pfile->state.parsing_args)
843	return false;
844
845      /* End of buffer.  Non-empty files should end in a newline.  */
846      if (buffer->buf != buffer->rlimit
847	  && buffer->next_line > buffer->rlimit
848	  && !buffer->from_stage3)
849	{
850	  /* Only warn once.  */
851	  buffer->next_line = buffer->rlimit;
852	  cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
853			       CPP_BUF_COLUMN (buffer, buffer->cur),
854			       "no newline at end of file");
855	}
856
857      return_at_eof = buffer->return_at_eof;
858      _cpp_pop_buffer (pfile);
859      if (pfile->buffer == NULL || return_at_eof)
860	return false;
861    }
862}
863
864#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
865  do							\
866    {							\
867      result->type = ELSE_TYPE;				\
868      if (*buffer->cur == CHAR)				\
869	buffer->cur++, result->type = THEN_TYPE;	\
870    }							\
871  while (0)
872
873/* Lex a token into pfile->cur_token, which is also incremented, to
874   get diagnostics pointing to the correct location.
875
876   Does not handle issues such as token lookahead, multiple-include
877   optimization, directives, skipping etc.  This function is only
878   suitable for use by _cpp_lex_token, and in special cases like
879   lex_expansion_token which doesn't care for any of these issues.
880
881   When meeting a newline, returns CPP_EOF if parsing a directive,
882   otherwise returns to the start of the token buffer if permissible.
883   Returns the location of the lexed token.  */
884cpp_token *
885_cpp_lex_direct (cpp_reader *pfile)
886{
887  cppchar_t c;
888  cpp_buffer *buffer;
889  const unsigned char *comment_start;
890  cpp_token *result = pfile->cur_token++;
891
892 fresh_line:
893  result->flags = 0;
894  buffer = pfile->buffer;
895  if (buffer->need_line)
896    {
897      if (pfile->state.in_deferred_pragma)
898	{
899	  result->type = CPP_PRAGMA_EOL;
900	  pfile->state.in_deferred_pragma = false;
901	  if (!pfile->state.pragma_allow_expansion)
902	    pfile->state.prevent_expansion--;
903	  return result;
904	}
905      if (!_cpp_get_fresh_line (pfile))
906	{
907	  result->type = CPP_EOF;
908	  if (!pfile->state.in_directive)
909	    {
910	      /* Tell the compiler the line number of the EOF token.  */
911	      result->src_loc = pfile->line_table->highest_line;
912	      result->flags = BOL;
913	    }
914	  return result;
915	}
916      if (!pfile->keep_tokens)
917	{
918	  pfile->cur_run = &pfile->base_run;
919	  result = pfile->base_run.base;
920	  pfile->cur_token = result + 1;
921	}
922      result->flags = BOL;
923      if (pfile->state.parsing_args == 2)
924	result->flags |= PREV_WHITE;
925    }
926  buffer = pfile->buffer;
927 update_tokens_line:
928  result->src_loc = pfile->line_table->highest_line;
929
930 skipped_white:
931  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
932      && !pfile->overlaid_buffer)
933    {
934      _cpp_process_line_notes (pfile, false);
935      result->src_loc = pfile->line_table->highest_line;
936    }
937  c = *buffer->cur++;
938
939  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
940			       CPP_BUF_COLUMN (buffer, buffer->cur));
941
942  switch (c)
943    {
944    case ' ': case '\t': case '\f': case '\v': case '\0':
945      result->flags |= PREV_WHITE;
946      skip_whitespace (pfile, c);
947      goto skipped_white;
948
949    case '\n':
950      if (buffer->cur < buffer->rlimit)
951	CPP_INCREMENT_LINE (pfile, 0);
952      buffer->need_line = true;
953      goto fresh_line;
954
955    case '0': case '1': case '2': case '3': case '4':
956    case '5': case '6': case '7': case '8': case '9':
957      {
958	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
959	result->type = CPP_NUMBER;
960	lex_number (pfile, &result->val.str, &nst);
961	warn_about_normalization (pfile, result, &nst);
962	break;
963      }
964
965    case 'L':
966      /* 'L' may introduce wide characters or strings.  */
967      if (*buffer->cur == '\'' || *buffer->cur == '"')
968	{
969	  lex_string (pfile, result, buffer->cur - 1);
970	  break;
971	}
972      /* Fall through.  */
973
974    case '_':
975    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
976    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
977    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
978    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
979    case 'y': case 'z':
980    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
981    case 'G': case 'H': case 'I': case 'J': case 'K':
982    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
983    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
984    case 'Y': case 'Z':
985      result->type = CPP_NAME;
986      {
987	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
988	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
989					   &nst);
990	warn_about_normalization (pfile, result, &nst);
991      }
992
993      /* Convert named operators to their proper types.  */
994      if (result->val.node->flags & NODE_OPERATOR)
995	{
996	  result->flags |= NAMED_OP;
997	  result->type = (enum cpp_ttype) result->val.node->directive_index;
998	}
999      break;
1000
1001    case '\'':
1002    case '"':
1003      lex_string (pfile, result, buffer->cur - 1);
1004      break;
1005
1006    case '/':
1007      /* A potential block or line comment.  */
1008      comment_start = buffer->cur;
1009      c = *buffer->cur;
1010
1011      if (c == '*')
1012	{
1013	  if (_cpp_skip_block_comment (pfile))
1014	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1015	}
1016      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1017			    || cpp_in_system_header (pfile)))
1018	{
1019	  /* Warn about comments only if pedantically GNUC89, and not
1020	     in system headers.  */
1021	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1022	      && ! buffer->warned_cplusplus_comments)
1023	    {
1024	      cpp_error (pfile, CPP_DL_PEDWARN,
1025			 "C++ style comments are not allowed in ISO C90");
1026	      cpp_error (pfile, CPP_DL_PEDWARN,
1027			 "(this will be reported only once per input file)");
1028	      buffer->warned_cplusplus_comments = 1;
1029	    }
1030
1031	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1032	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1033	}
1034      else if (c == '=')
1035	{
1036	  buffer->cur++;
1037	  result->type = CPP_DIV_EQ;
1038	  break;
1039	}
1040      else
1041	{
1042	  result->type = CPP_DIV;
1043	  break;
1044	}
1045
1046      if (!pfile->state.save_comments)
1047	{
1048	  result->flags |= PREV_WHITE;
1049	  goto update_tokens_line;
1050	}
1051
1052      /* Save the comment as a token in its own right.  */
1053      save_comment (pfile, result, comment_start, c);
1054      break;
1055
1056    case '<':
1057      if (pfile->state.angled_headers)
1058	{
1059	  lex_string (pfile, result, buffer->cur - 1);
1060	  break;
1061	}
1062
1063      result->type = CPP_LESS;
1064      if (*buffer->cur == '=')
1065	buffer->cur++, result->type = CPP_LESS_EQ;
1066      else if (*buffer->cur == '<')
1067	{
1068	  buffer->cur++;
1069	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1070	}
1071      else if (CPP_OPTION (pfile, digraphs))
1072	{
1073	  if (*buffer->cur == ':')
1074	    {
1075	      buffer->cur++;
1076	      result->flags |= DIGRAPH;
1077	      result->type = CPP_OPEN_SQUARE;
1078	    }
1079	  else if (*buffer->cur == '%')
1080	    {
1081	      buffer->cur++;
1082	      result->flags |= DIGRAPH;
1083	      result->type = CPP_OPEN_BRACE;
1084	    }
1085	}
1086      break;
1087
1088    case '>':
1089      result->type = CPP_GREATER;
1090      if (*buffer->cur == '=')
1091	buffer->cur++, result->type = CPP_GREATER_EQ;
1092      else if (*buffer->cur == '>')
1093	{
1094	  buffer->cur++;
1095	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1096	}
1097      break;
1098
1099    case '%':
1100      result->type = CPP_MOD;
1101      if (*buffer->cur == '=')
1102	buffer->cur++, result->type = CPP_MOD_EQ;
1103      else if (CPP_OPTION (pfile, digraphs))
1104	{
1105	  if (*buffer->cur == ':')
1106	    {
1107	      buffer->cur++;
1108	      result->flags |= DIGRAPH;
1109	      result->type = CPP_HASH;
1110	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1111		buffer->cur += 2, result->type = CPP_PASTE;
1112	    }
1113	  else if (*buffer->cur == '>')
1114	    {
1115	      buffer->cur++;
1116	      result->flags |= DIGRAPH;
1117	      result->type = CPP_CLOSE_BRACE;
1118	    }
1119	}
1120      break;
1121
1122    case '.':
1123      result->type = CPP_DOT;
1124      if (ISDIGIT (*buffer->cur))
1125	{
1126	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1127	  result->type = CPP_NUMBER;
1128	  lex_number (pfile, &result->val.str, &nst);
1129	  warn_about_normalization (pfile, result, &nst);
1130	}
1131      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1132	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1133      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1134	buffer->cur++, result->type = CPP_DOT_STAR;
1135      break;
1136
1137    case '+':
1138      result->type = CPP_PLUS;
1139      if (*buffer->cur == '+')
1140	buffer->cur++, result->type = CPP_PLUS_PLUS;
1141      else if (*buffer->cur == '=')
1142	buffer->cur++, result->type = CPP_PLUS_EQ;
1143      break;
1144
1145    case '-':
1146      result->type = CPP_MINUS;
1147      if (*buffer->cur == '>')
1148	{
1149	  buffer->cur++;
1150	  result->type = CPP_DEREF;
1151	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1152	    buffer->cur++, result->type = CPP_DEREF_STAR;
1153	}
1154      else if (*buffer->cur == '-')
1155	buffer->cur++, result->type = CPP_MINUS_MINUS;
1156      else if (*buffer->cur == '=')
1157	buffer->cur++, result->type = CPP_MINUS_EQ;
1158      break;
1159
1160    case '&':
1161      result->type = CPP_AND;
1162      if (*buffer->cur == '&')
1163	buffer->cur++, result->type = CPP_AND_AND;
1164      else if (*buffer->cur == '=')
1165	buffer->cur++, result->type = CPP_AND_EQ;
1166      break;
1167
1168    case '|':
1169      result->type = CPP_OR;
1170      if (*buffer->cur == '|')
1171	buffer->cur++, result->type = CPP_OR_OR;
1172      else if (*buffer->cur == '=')
1173	buffer->cur++, result->type = CPP_OR_EQ;
1174      break;
1175
1176    case ':':
1177      result->type = CPP_COLON;
1178      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1179	buffer->cur++, result->type = CPP_SCOPE;
1180      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1181	{
1182	  buffer->cur++;
1183	  result->flags |= DIGRAPH;
1184	  result->type = CPP_CLOSE_SQUARE;
1185	}
1186      break;
1187
1188    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1189    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1190    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1191    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1192    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1193
1194    case '?': result->type = CPP_QUERY; break;
1195    case '~': result->type = CPP_COMPL; break;
1196    case ',': result->type = CPP_COMMA; break;
1197    case '(': result->type = CPP_OPEN_PAREN; break;
1198    case ')': result->type = CPP_CLOSE_PAREN; break;
1199    case '[': result->type = CPP_OPEN_SQUARE; break;
1200    case ']': result->type = CPP_CLOSE_SQUARE; break;
1201    case '{': result->type = CPP_OPEN_BRACE; break;
1202    case '}': result->type = CPP_CLOSE_BRACE; break;
1203    case ';': result->type = CPP_SEMICOLON; break;
1204
1205      /* @ is a punctuator in Objective-C.  */
1206    case '@': result->type = CPP_ATSIGN; break;
1207
1208    case '$':
1209    case '\\':
1210      {
1211	const uchar *base = --buffer->cur;
1212	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1213
1214	if (forms_identifier_p (pfile, true, &nst))
1215	  {
1216	    result->type = CPP_NAME;
1217	    result->val.node = lex_identifier (pfile, base, true, &nst);
1218	    warn_about_normalization (pfile, result, &nst);
1219	    break;
1220	  }
1221	buffer->cur++;
1222      }
1223
1224    default:
1225      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1226      break;
1227    }
1228
1229  return result;
1230}
1231
1232/* An upper bound on the number of bytes needed to spell TOKEN.
1233   Does not include preceding whitespace.  */
1234unsigned int
1235cpp_token_len (const cpp_token *token)
1236{
1237  unsigned int len;
1238
1239  switch (TOKEN_SPELL (token))
1240    {
1241    default:		len = 4;				break;
1242    case SPELL_LITERAL:	len = token->val.str.len;		break;
1243    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1244    }
1245
1246  return len;
1247}
1248
1249/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1250   Return the number of bytes read out of NAME.  (There are always
1251   10 bytes written to BUFFER.)  */
1252
1253static size_t
1254utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1255{
1256  int j;
1257  int ucn_len = 0;
1258  int ucn_len_c;
1259  unsigned t;
1260  unsigned long utf32;
1261
1262  /* Compute the length of the UTF-8 sequence.  */
1263  for (t = *name; t & 0x80; t <<= 1)
1264    ucn_len++;
1265
1266  utf32 = *name & (0x7F >> ucn_len);
1267  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1268    {
1269      utf32 = (utf32 << 6) | (*++name & 0x3F);
1270
1271      /* Ill-formed UTF-8.  */
1272      if ((*name & ~0x3F) != 0x80)
1273	abort ();
1274    }
1275
1276  *buffer++ = '\\';
1277  *buffer++ = 'U';
1278  for (j = 7; j >= 0; j--)
1279    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1280  return ucn_len;
1281}
1282
1283
1284/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1285   already contain the enough space to hold the token's spelling.
1286   Returns a pointer to the character after the last character written.
1287   FORSTRING is true if this is to be the spelling after translation
1288   phase 1 (this is different for UCNs).
1289   FIXME: Would be nice if we didn't need the PFILE argument.  */
1290unsigned char *
1291cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1292		 unsigned char *buffer, bool forstring)
1293{
1294  switch (TOKEN_SPELL (token))
1295    {
1296    case SPELL_OPERATOR:
1297      {
1298	const unsigned char *spelling;
1299	unsigned char c;
1300
1301	if (token->flags & DIGRAPH)
1302	  spelling
1303	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1304	else if (token->flags & NAMED_OP)
1305	  goto spell_ident;
1306	else
1307	  spelling = TOKEN_NAME (token);
1308
1309	while ((c = *spelling++) != '\0')
1310	  *buffer++ = c;
1311      }
1312      break;
1313
1314    spell_ident:
1315    case SPELL_IDENT:
1316      if (forstring)
1317	{
1318	  memcpy (buffer, NODE_NAME (token->val.node),
1319		  NODE_LEN (token->val.node));
1320	  buffer += NODE_LEN (token->val.node);
1321	}
1322      else
1323	{
1324	  size_t i;
1325	  const unsigned char * name = NODE_NAME (token->val.node);
1326
1327	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1328	    if (name[i] & ~0x7F)
1329	      {
1330		i += utf8_to_ucn (buffer, name + i) - 1;
1331		buffer += 10;
1332	      }
1333	    else
1334	      *buffer++ = NODE_NAME (token->val.node)[i];
1335	}
1336      break;
1337
1338    case SPELL_LITERAL:
1339      memcpy (buffer, token->val.str.text, token->val.str.len);
1340      buffer += token->val.str.len;
1341      break;
1342
1343    case SPELL_NONE:
1344      cpp_error (pfile, CPP_DL_ICE,
1345		 "unspellable token %s", TOKEN_NAME (token));
1346      break;
1347    }
1348
1349  return buffer;
1350}
1351
1352/* Returns TOKEN spelt as a null-terminated string.  The string is
1353   freed when the reader is destroyed.  Useful for diagnostics.  */
1354unsigned char *
1355cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1356{
1357  unsigned int len = cpp_token_len (token) + 1;
1358  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1359
1360  end = cpp_spell_token (pfile, token, start, false);
1361  end[0] = '\0';
1362
1363  return start;
1364}
1365
1366/* Used by C front ends, which really should move to using
1367   cpp_token_as_text.  */
1368const char *
1369cpp_type2name (enum cpp_ttype type)
1370{
1371  return (const char *) token_spellings[type].name;
1372}
1373
1374/* Writes the spelling of token to FP, without any preceding space.
1375   Separated from cpp_spell_token for efficiency - to avoid stdio
1376   double-buffering.  */
1377void
1378cpp_output_token (const cpp_token *token, FILE *fp)
1379{
1380  switch (TOKEN_SPELL (token))
1381    {
1382    case SPELL_OPERATOR:
1383      {
1384	const unsigned char *spelling;
1385	int c;
1386
1387	if (token->flags & DIGRAPH)
1388	  spelling
1389	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1390	else if (token->flags & NAMED_OP)
1391	  goto spell_ident;
1392	else
1393	  spelling = TOKEN_NAME (token);
1394
1395	c = *spelling;
1396	do
1397	  putc (c, fp);
1398	while ((c = *++spelling) != '\0');
1399      }
1400      break;
1401
1402    spell_ident:
1403    case SPELL_IDENT:
1404      {
1405	size_t i;
1406	const unsigned char * name = NODE_NAME (token->val.node);
1407
1408	for (i = 0; i < NODE_LEN (token->val.node); i++)
1409	  if (name[i] & ~0x7F)
1410	    {
1411	      unsigned char buffer[10];
1412	      i += utf8_to_ucn (buffer, name + i) - 1;
1413	      fwrite (buffer, 1, 10, fp);
1414	    }
1415	  else
1416	    fputc (NODE_NAME (token->val.node)[i], fp);
1417      }
1418      break;
1419
1420    case SPELL_LITERAL:
1421      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1422      break;
1423
1424    case SPELL_NONE:
1425      /* An error, most probably.  */
1426      break;
1427    }
1428}
1429
1430/* Compare two tokens.  */
1431int
1432_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1433{
1434  if (a->type == b->type && a->flags == b->flags)
1435    switch (TOKEN_SPELL (a))
1436      {
1437      default:			/* Keep compiler happy.  */
1438      case SPELL_OPERATOR:
1439	return 1;
1440      case SPELL_NONE:
1441	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1442      case SPELL_IDENT:
1443	return a->val.node == b->val.node;
1444      case SPELL_LITERAL:
1445	return (a->val.str.len == b->val.str.len
1446		&& !memcmp (a->val.str.text, b->val.str.text,
1447			    a->val.str.len));
1448      }
1449
1450  return 0;
1451}
1452
1453/* Returns nonzero if a space should be inserted to avoid an
1454   accidental token paste for output.  For simplicity, it is
1455   conservative, and occasionally advises a space where one is not
1456   needed, e.g. "." and ".2".  */
1457int
1458cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1459		 const cpp_token *token2)
1460{
1461  enum cpp_ttype a = token1->type, b = token2->type;
1462  cppchar_t c;
1463
1464  if (token1->flags & NAMED_OP)
1465    a = CPP_NAME;
1466  if (token2->flags & NAMED_OP)
1467    b = CPP_NAME;
1468
1469  c = EOF;
1470  if (token2->flags & DIGRAPH)
1471    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1472  else if (token_spellings[b].category == SPELL_OPERATOR)
1473    c = token_spellings[b].name[0];
1474
1475  /* Quickly get everything that can paste with an '='.  */
1476  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1477    return 1;
1478
1479  switch (a)
1480    {
1481    case CPP_GREATER:	return c == '>';
1482    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1483    case CPP_PLUS:	return c == '+';
1484    case CPP_MINUS:	return c == '-' || c == '>';
1485    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1486    case CPP_MOD:	return c == ':' || c == '>';
1487    case CPP_AND:	return c == '&';
1488    case CPP_OR:	return c == '|';
1489    case CPP_COLON:	return c == ':' || c == '>';
1490    case CPP_DEREF:	return c == '*';
1491    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1492    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1493    case CPP_NAME:	return ((b == CPP_NUMBER
1494				 && name_p (pfile, &token2->val.str))
1495				|| b == CPP_NAME
1496				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1497    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1498				|| c == '.' || c == '+' || c == '-');
1499				      /* UCNs */
1500    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1501				 && b == CPP_NAME)
1502				|| (CPP_OPTION (pfile, objc)
1503				    && token1->val.str.text[0] == '@'
1504				    && (b == CPP_NAME || b == CPP_STRING)));
1505    default:		break;
1506    }
1507
1508  return 0;
1509}
1510
1511/* Output all the remaining tokens on the current line, and a newline
1512   character, to FP.  Leading whitespace is removed.  If there are
1513   macros, special token padding is not performed.  */
1514void
1515cpp_output_line (cpp_reader *pfile, FILE *fp)
1516{
1517  const cpp_token *token;
1518
1519  token = cpp_get_token (pfile);
1520  while (token->type != CPP_EOF)
1521    {
1522      cpp_output_token (token, fp);
1523      token = cpp_get_token (pfile);
1524      if (token->flags & PREV_WHITE)
1525	putc (' ', fp);
1526    }
1527
1528  putc ('\n', fp);
1529}
1530
1531/* Memory buffers.  Changing these three constants can have a dramatic
1532   effect on performance.  The values here are reasonable defaults,
1533   but might be tuned.  If you adjust them, be sure to test across a
1534   range of uses of cpplib, including heavy nested function-like macro
1535   expansion.  Also check the change in peak memory usage (NJAMD is a
1536   good tool for this).  */
1537#define MIN_BUFF_SIZE 8000
1538#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1539#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1540	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1541
1542#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1543  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1544#endif
1545
1546/* Create a new allocation buffer.  Place the control block at the end
1547   of the buffer, so that buffer overflows will cause immediate chaos.  */
1548static _cpp_buff *
1549new_buff (size_t len)
1550{
1551  _cpp_buff *result;
1552  unsigned char *base;
1553
1554  if (len < MIN_BUFF_SIZE)
1555    len = MIN_BUFF_SIZE;
1556  len = CPP_ALIGN (len);
1557
1558  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1559  result = (_cpp_buff *) (base + len);
1560  result->base = base;
1561  result->cur = base;
1562  result->limit = base + len;
1563  result->next = NULL;
1564  return result;
1565}
1566
1567/* Place a chain of unwanted allocation buffers on the free list.  */
1568void
1569_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1570{
1571  _cpp_buff *end = buff;
1572
1573  while (end->next)
1574    end = end->next;
1575  end->next = pfile->free_buffs;
1576  pfile->free_buffs = buff;
1577}
1578
1579/* Return a free buffer of size at least MIN_SIZE.  */
1580_cpp_buff *
1581_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1582{
1583  _cpp_buff *result, **p;
1584
1585  for (p = &pfile->free_buffs;; p = &(*p)->next)
1586    {
1587      size_t size;
1588
1589      if (*p == NULL)
1590	return new_buff (min_size);
1591      result = *p;
1592      size = result->limit - result->base;
1593      /* Return a buffer that's big enough, but don't waste one that's
1594         way too big.  */
1595      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1596	break;
1597    }
1598
1599  *p = result->next;
1600  result->next = NULL;
1601  result->cur = result->base;
1602  return result;
1603}
1604
1605/* Creates a new buffer with enough space to hold the uncommitted
1606   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1607   the excess bytes to the new buffer.  Chains the new buffer after
1608   BUFF, and returns the new buffer.  */
1609_cpp_buff *
1610_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1611{
1612  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1613  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1614
1615  buff->next = new_buff;
1616  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1617  return new_buff;
1618}
1619
1620/* Creates a new buffer with enough space to hold the uncommitted
1621   remaining bytes of the buffer pointed to by BUFF, and at least
1622   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1623   Chains the new buffer before the buffer pointed to by BUFF, and
1624   updates the pointer to point to the new buffer.  */
1625void
1626_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1627{
1628  _cpp_buff *new_buff, *old_buff = *pbuff;
1629  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1630
1631  new_buff = _cpp_get_buff (pfile, size);
1632  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1633  new_buff->next = old_buff;
1634  *pbuff = new_buff;
1635}
1636
1637/* Free a chain of buffers starting at BUFF.  */
1638void
1639_cpp_free_buff (_cpp_buff *buff)
1640{
1641  _cpp_buff *next;
1642
1643  for (; buff; buff = next)
1644    {
1645      next = buff->next;
1646      free (buff->base);
1647    }
1648}
1649
1650/* Allocate permanent, unaligned storage of length LEN.  */
1651unsigned char *
1652_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1653{
1654  _cpp_buff *buff = pfile->u_buff;
1655  unsigned char *result = buff->cur;
1656
1657  if (len > (size_t) (buff->limit - result))
1658    {
1659      buff = _cpp_get_buff (pfile, len);
1660      buff->next = pfile->u_buff;
1661      pfile->u_buff = buff;
1662      result = buff->cur;
1663    }
1664
1665  buff->cur = result + len;
1666  return result;
1667}
1668
1669/* Allocate permanent, unaligned storage of length LEN from a_buff.
1670   That buffer is used for growing allocations when saving macro
1671   replacement lists in a #define, and when parsing an answer to an
1672   assertion in #assert, #unassert or #if (and therefore possibly
1673   whilst expanding macros).  It therefore must not be used by any
1674   code that they might call: specifically the lexer and the guts of
1675   the macro expander.
1676
1677   All existing other uses clearly fit this restriction: storing
1678   registered pragmas during initialization.  */
1679unsigned char *
1680_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1681{
1682  _cpp_buff *buff = pfile->a_buff;
1683  unsigned char *result = buff->cur;
1684
1685  if (len > (size_t) (buff->limit - result))
1686    {
1687      buff = _cpp_get_buff (pfile, len);
1688      buff->next = pfile->a_buff;
1689      pfile->a_buff = buff;
1690      result = buff->cur;
1691    }
1692
1693  buff->cur = result + len;
1694  return result;
1695}
1696
1697/* Say which field of TOK is in use.  */
1698
1699enum cpp_token_fld_kind
1700cpp_token_val_index (cpp_token *tok)
1701{
1702  switch (TOKEN_SPELL (tok))
1703    {
1704    case SPELL_IDENT:
1705      return CPP_TOKEN_FLD_NODE;
1706    case SPELL_LITERAL:
1707      return CPP_TOKEN_FLD_STR;
1708    case SPELL_NONE:
1709      if (tok->type == CPP_MACRO_ARG)
1710	return CPP_TOKEN_FLD_ARG_NO;
1711      else if (tok->type == CPP_PADDING)
1712	return CPP_TOKEN_FLD_SOURCE;
1713      else if (tok->type == CPP_PRAGMA)
1714	return CPP_TOKEN_FLD_PRAGMA;
1715      /* else fall through */
1716    default:
1717      return CPP_TOKEN_FLD_NONE;
1718    }
1719}
1720