lex.c revision 259890
1/* CPP Library - lexical analysis.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3   Contributed by Per Bothner, 1994-95.
4   Based on CCCP program by Paul Rubin, June 1986
5   Adapted to ANSI C, Richard Stallman, Jan 1987
6   Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29  SPELL_OPERATOR = 0,
30  SPELL_IDENT,
31  SPELL_LITERAL,
32  SPELL_NONE
33};
34
35struct token_spelling
36{
37  enum spell_type category;
38  const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, U s  },
45#define TK(e, s) { SPELL_ ## s,    U #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59			    unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
62static tokenrun *next_tokenrun (tokenrun *);
63
64static _cpp_buff *new_buff (size_t);
65
66
67/* Utility routine:
68
69   Compares, the token TOKEN to the NUL-terminated string STRING.
70   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71int
72cpp_ideq (const cpp_token *token, const char *string)
73{
74  if (token->type != CPP_NAME)
75    return 0;
76
77  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78}
79
80/* Record a note TYPE at byte POS into the current cleaned logical
81   line.  */
82static void
83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84{
85  if (buffer->notes_used == buffer->notes_cap)
86    {
87      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89                                  buffer->notes_cap);
90    }
91
92  buffer->notes[buffer->notes_used].pos = pos;
93  buffer->notes[buffer->notes_used].type = type;
94  buffer->notes_used++;
95}
96
97/* Returns with a logical line that contains no escaped newlines or
98   trigraphs.  This is a time-critical inner loop.  */
99void
100_cpp_clean_line (cpp_reader *pfile)
101{
102  cpp_buffer *buffer;
103  const uchar *s;
104  uchar c, *d, *p;
105
106  buffer = pfile->buffer;
107  buffer->cur_note = buffer->notes_used = 0;
108  buffer->cur = buffer->line_base = buffer->next_line;
109  buffer->need_line = false;
110  s = buffer->next_line - 1;
111
112  if (!buffer->from_stage3)
113    {
114      const uchar *pbackslash = NULL;
115
116      /* Short circuit for the common case of an un-escaped line with
117	 no trigraphs.  The primary win here is by not writing any
118	 data back to memory until we have to.  */
119      for (;;)
120	{
121	  c = *++s;
122	  if (__builtin_expect (c == '\n', false)
123	      || __builtin_expect (c == '\r', false))
124	    {
125	      d = (uchar *) s;
126
127	      if (__builtin_expect (s == buffer->rlimit, false))
128		goto done;
129
130	      /* DOS line ending? */
131	      if (__builtin_expect (c == '\r', false)
132		  && s[1] == '\n')
133		{
134		  s++;
135		  if (s == buffer->rlimit)
136		    goto done;
137		}
138
139	      if (__builtin_expect (pbackslash == NULL, true))
140		goto done;
141
142	      /* Check for escaped newline.  */
143	      p = d;
144	      while (is_nvspace (p[-1]))
145		p--;
146	      if (p - 1 != pbackslash)
147		goto done;
148
149	      /* Have an escaped newline; process it and proceed to
150		 the slow path.  */
151	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152	      d = p - 2;
153	      buffer->next_line = p - 1;
154	      break;
155	    }
156	  if (__builtin_expect (c == '\\', false))
157	    pbackslash = s;
158	  else if (__builtin_expect (c == '?', false)
159		   && __builtin_expect (s[1] == '?', false)
160		   && _cpp_trigraph_map[s[2]])
161	    {
162	      /* Have a trigraph.  We may or may not have to convert
163		 it.  Add a line note regardless, for -Wtrigraphs.  */
164	      add_line_note (buffer, s, s[2]);
165	      if (CPP_OPTION (pfile, trigraphs))
166		{
167		  /* We do, and that means we have to switch to the
168		     slow path.  */
169		  d = (uchar *) s;
170		  *d = _cpp_trigraph_map[s[2]];
171		  s += 2;
172		  break;
173		}
174	    }
175	}
176
177
178      for (;;)
179	{
180	  c = *++s;
181	  *++d = c;
182
183	  if (c == '\n' || c == '\r')
184	    {
185		  /* Handle DOS line endings.  */
186	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187		s++;
188	      if (s == buffer->rlimit)
189		break;
190
191	      /* Escaped?  */
192	      p = d;
193	      while (p != buffer->next_line && is_nvspace (p[-1]))
194		p--;
195	      if (p == buffer->next_line || p[-1] != '\\')
196		break;
197
198	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199	      d = p - 2;
200	      buffer->next_line = p - 1;
201	    }
202	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203	    {
204	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
205	      add_line_note (buffer, d, s[2]);
206	      if (CPP_OPTION (pfile, trigraphs))
207		{
208		  *d = _cpp_trigraph_map[s[2]];
209		  s += 2;
210		}
211	    }
212	}
213    }
214  else
215    {
216      do
217	s++;
218      while (*s != '\n' && *s != '\r');
219      d = (uchar *) s;
220
221      /* Handle DOS line endings.  */
222      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223	s++;
224    }
225
226 done:
227  *d = '\n';
228  /* A sentinel note that should never be processed.  */
229  add_line_note (buffer, d + 1, '\n');
230  buffer->next_line = s + 1;
231}
232
233/* Return true if the trigraph indicated by NOTE should be warned
234   about in a comment.  */
235static bool
236warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237{
238  const uchar *p;
239
240  /* Within comments we don't warn about trigraphs, unless the
241     trigraph forms an escaped newline, as that may change
242     behavior.  */
243  if (note->type != '/')
244    return false;
245
246  /* If -trigraphs, then this was an escaped newline iff the next note
247     is coincident.  */
248  if (CPP_OPTION (pfile, trigraphs))
249    return note[1].pos == note->pos;
250
251  /* Otherwise, see if this forms an escaped newline.  */
252  p = note->pos + 3;
253  while (is_nvspace (*p))
254    p++;
255
256  /* There might have been escaped newlines between the trigraph and the
257     newline we found.  Hence the position test.  */
258  return (*p == '\n' && p < note[1].pos);
259}
260
261/* Process the notes created by add_line_note as far as the current
262   location.  */
263void
264_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265{
266  cpp_buffer *buffer = pfile->buffer;
267
268  for (;;)
269    {
270      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271      unsigned int col;
272
273      if (note->pos > buffer->cur)
274	break;
275
276      buffer->cur_note++;
277      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278
279      if (note->type == '\\' || note->type == ' ')
280	{
281	  if (note->type == ' ' && !in_comment)
282	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283				 "backslash and newline separated by space");
284
285	  if (buffer->next_line > buffer->rlimit)
286	    {
287	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288				   "backslash-newline at end of file");
289	      /* Prevent "no newline at end of file" warning.  */
290	      buffer->next_line = buffer->rlimit;
291	    }
292
293	  buffer->line_base = note->pos;
294	  CPP_INCREMENT_LINE (pfile, 0);
295	}
296      else if (_cpp_trigraph_map[note->type])
297	{
298	  if (CPP_OPTION (pfile, warn_trigraphs)
299	      && (!in_comment || warn_in_comment (pfile, note)))
300	    {
301	      if (CPP_OPTION (pfile, trigraphs))
302		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303				     "trigraph ??%c converted to %c",
304				     note->type,
305				     (int) _cpp_trigraph_map[note->type]);
306	      else
307		{
308		  cpp_error_with_line
309		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310		     "trigraph ??%c ignored, use -trigraphs to enable",
311		     note->type);
312		}
313	    }
314	}
315      else
316	abort ();
317    }
318}
319
320/* Skip a C-style block comment.  We find the end of the comment by
321   seeing if an asterisk is before every '/' we encounter.  Returns
322   nonzero if comment terminated by EOF, zero otherwise.
323
324   Buffer->cur points to the initial asterisk of the comment.  */
325bool
326_cpp_skip_block_comment (cpp_reader *pfile)
327{
328  cpp_buffer *buffer = pfile->buffer;
329  const uchar *cur = buffer->cur;
330  uchar c;
331
332  cur++;
333  if (*cur == '/')
334    cur++;
335
336  for (;;)
337    {
338      /* People like decorating comments with '*', so check for '/'
339	 instead for efficiency.  */
340      c = *cur++;
341
342      if (c == '/')
343	{
344	  if (cur[-2] == '*')
345	    break;
346
347	  /* Warn about potential nested comments, but not if the '/'
348	     comes immediately before the true comment delimiter.
349	     Don't bother to get it right across escaped newlines.  */
350	  if (CPP_OPTION (pfile, warn_comments)
351	      && cur[0] == '*' && cur[1] != '/')
352	    {
353	      buffer->cur = cur;
354	      cpp_error_with_line (pfile, CPP_DL_WARNING,
355				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356				   "\"/*\" within comment");
357	    }
358	}
359      else if (c == '\n')
360	{
361	  unsigned int cols;
362	  buffer->cur = cur - 1;
363	  _cpp_process_line_notes (pfile, true);
364	  if (buffer->next_line >= buffer->rlimit)
365	    return true;
366	  _cpp_clean_line (pfile);
367
368	  cols = buffer->next_line - buffer->line_base;
369	  CPP_INCREMENT_LINE (pfile, cols);
370
371	  cur = buffer->cur;
372	}
373    }
374
375  buffer->cur = cur;
376  _cpp_process_line_notes (pfile, true);
377  return false;
378}
379
380/* Skip a C++ line comment, leaving buffer->cur pointing to the
381   terminating newline.  Handles escaped newlines.  Returns nonzero
382   if a multiline comment.  */
383static int
384skip_line_comment (cpp_reader *pfile)
385{
386  cpp_buffer *buffer = pfile->buffer;
387  unsigned int orig_line = pfile->line_table->highest_line;
388
389  while (*buffer->cur != '\n')
390    buffer->cur++;
391
392  _cpp_process_line_notes (pfile, true);
393  return orig_line != pfile->line_table->highest_line;
394}
395
396/* Skips whitespace, saving the next non-whitespace character.  */
397static void
398skip_whitespace (cpp_reader *pfile, cppchar_t c)
399{
400  cpp_buffer *buffer = pfile->buffer;
401  bool saw_NUL = false;
402
403  do
404    {
405      /* Horizontal space always OK.  */
406      if (c == ' ' || c == '\t')
407	;
408      /* Just \f \v or \0 left.  */
409      else if (c == '\0')
410	saw_NUL = true;
411      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413			     CPP_BUF_COL (buffer),
414			     "%s in preprocessing directive",
415			     c == '\f' ? "form feed" : "vertical tab");
416
417      c = *buffer->cur++;
418    }
419  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
420  while (is_nvspace (c));
421
422  if (saw_NUL)
423    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424
425  buffer->cur--;
426}
427
428/* See if the characters of a number token are valid in a name (no
429   '.', '+' or '-').  */
430static int
431name_p (cpp_reader *pfile, const cpp_string *string)
432{
433  unsigned int i;
434
435  for (i = 0; i < string->len; i++)
436    if (!is_idchar (string->text[i]))
437      return 0;
438
439  return 1;
440}
441
442/* After parsing an identifier or other sequence, produce a warning about
443   sequences not in NFC/NFKC.  */
444static void
445warn_about_normalization (cpp_reader *pfile,
446			  const cpp_token *token,
447			  const struct normalize_state *s)
448{
449  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450      && !pfile->state.skipping)
451    {
452      /* Make sure that the token is printed using UCNs, even
453	 if we'd otherwise happily print UTF-8.  */
454      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455      size_t sz;
456
457      sz = cpp_spell_token (pfile, token, buf, false) - buf;
458      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460			     "`%.*s' is not in NFKC", (int) sz, buf);
461      else
462	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463			     "`%.*s' is not in NFC", (int) sz, buf);
464    }
465}
466
467/* Returns TRUE if the sequence starting at buffer->cur is invalid in
468   an identifier.  FIRST is TRUE if this starts an identifier.  */
469static bool
470forms_identifier_p (cpp_reader *pfile, int first,
471		    struct normalize_state *state)
472{
473  cpp_buffer *buffer = pfile->buffer;
474
475  if (*buffer->cur == '$')
476    {
477      if (!CPP_OPTION (pfile, dollars_in_ident))
478	return false;
479
480      buffer->cur++;
481      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482	{
483	  CPP_OPTION (pfile, warn_dollars) = 0;
484	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485	}
486
487      return true;
488    }
489
490  /* Is this a syntactically valid UCN?  */
491  if (CPP_OPTION (pfile, extended_identifiers)
492      && *buffer->cur == '\\'
493      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494    {
495      buffer->cur += 2;
496      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497			  state))
498	return true;
499      buffer->cur -= 2;
500    }
501
502  return false;
503}
504
505/* Lex an identifier starting at BUFFER->CUR - 1.  */
506static cpp_hashnode *
507lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508		struct normalize_state *nst)
509{
510  cpp_hashnode *result;
511  const uchar *cur;
512  unsigned int len;
513  unsigned int hash = HT_HASHSTEP (0, *base);
514
515  cur = pfile->buffer->cur;
516  if (! starts_ucn)
517    while (ISIDNUM (*cur))
518      {
519	hash = HT_HASHSTEP (hash, *cur);
520	cur++;
521      }
522  pfile->buffer->cur = cur;
523  if (starts_ucn || forms_identifier_p (pfile, false, nst))
524    {
525      /* Slower version for identifiers containing UCNs (or $).  */
526      do {
527	while (ISIDNUM (*pfile->buffer->cur))
528	  {
529	    pfile->buffer->cur++;
530	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
531	  }
532      } while (forms_identifier_p (pfile, false, nst));
533      result = _cpp_interpret_identifier (pfile, base,
534					  pfile->buffer->cur - base);
535    }
536  else
537    {
538      len = cur - base;
539      hash = HT_HASHFINISH (hash, len);
540
541      result = (cpp_hashnode *)
542	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543    }
544
545  /* Rarely, identifiers require diagnostics when lexed.  */
546  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547			&& !pfile->state.skipping, 0))
548    {
549      /* It is allowed to poison the same identifier twice.  */
550      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552		   NODE_NAME (result));
553
554      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555	 replacement list of a variadic macro.  */
556      if (result == pfile->spec_nodes.n__VA_ARGS__
557	  && !pfile->state.va_args_ok)
558	cpp_error (pfile, CPP_DL_PEDWARN,
559		   "__VA_ARGS__ can only appear in the expansion"
560		   " of a C99 variadic macro");
561    }
562
563  return result;
564}
565
566/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
567static void
568lex_number (cpp_reader *pfile, cpp_string *number,
569	    struct normalize_state *nst)
570{
571  const uchar *cur;
572  const uchar *base;
573  uchar *dest;
574
575  base = pfile->buffer->cur - 1;
576  do
577    {
578      cur = pfile->buffer->cur;
579
580      /* N.B. ISIDNUM does not include $.  */
581      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582	{
583	  cur++;
584	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
585	}
586
587      pfile->buffer->cur = cur;
588    }
589  while (forms_identifier_p (pfile, false, nst));
590
591  number->len = cur - base;
592  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593  memcpy (dest, base, number->len);
594  dest[number->len] = '\0';
595  number->text = dest;
596}
597
598/* Create a token of type TYPE with a literal spelling.  */
599static void
600create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601		unsigned int len, enum cpp_ttype type)
602{
603  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604
605  memcpy (dest, base, len);
606  dest[len] = '\0';
607  token->type = type;
608  token->val.str.len = len;
609  token->val.str.text = dest;
610}
611
612/* Lexes a string, character constant, or angle-bracketed header file
613   name.  The stored string contains the spelling, including opening
614   quote and leading any leading 'L'.  It returns the type of the
615   literal, or CPP_OTHER if it was not properly terminated.
616
617   The spelling is NUL-terminated, but it is not guaranteed that this
618   is the first NUL since embedded NULs are preserved.  */
619static void
620lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621{
622  bool saw_NUL = false;
623  const uchar *cur;
624  cppchar_t terminator;
625  enum cpp_ttype type;
626
627  cur = base;
628  terminator = *cur++;
629  if (terminator == 'L')
630    terminator = *cur++;
631  if (terminator == '\"')
632    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633  else if (terminator == '\'')
634    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635  else
636    terminator = '>', type = CPP_HEADER_NAME;
637
638  for (;;)
639    {
640      cppchar_t c = *cur++;
641
642      /* In #include-style directives, terminators are not escapable.  */
643      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644	cur++;
645      else if (c == terminator)
646	break;
647      else if (c == '\n')
648	{
649	  cur--;
650	  type = CPP_OTHER;
651	  break;
652	}
653      else if (c == '\0')
654	saw_NUL = true;
655    }
656
657  if (saw_NUL && !pfile->state.skipping)
658    cpp_error (pfile, CPP_DL_WARNING,
659	       "null character(s) preserved in literal");
660
661  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
662    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
663	       (int) terminator);
664
665  pfile->buffer->cur = cur;
666  create_literal (pfile, token, base, cur - base, type);
667}
668
669/* The stored comment includes the comment start and any terminator.  */
670static void
671save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
672	      cppchar_t type)
673{
674  unsigned char *buffer;
675  unsigned int len, clen;
676
677  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
678
679  /* C++ comments probably (not definitely) have moved past a new
680     line, which we don't want to save in the comment.  */
681  if (is_vspace (pfile->buffer->cur[-1]))
682    len--;
683
684  /* If we are currently in a directive, then we need to store all
685     C++ comments as C comments internally, and so we need to
686     allocate a little extra space in that case.
687
688     Note that the only time we encounter a directive here is
689     when we are saving comments in a "#define".  */
690  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
691
692  buffer = _cpp_unaligned_alloc (pfile, clen);
693
694  token->type = CPP_COMMENT;
695  token->val.str.len = clen;
696  token->val.str.text = buffer;
697
698  buffer[0] = '/';
699  memcpy (buffer + 1, from, len - 1);
700
701  /* Finish conversion to a C comment, if necessary.  */
702  if (pfile->state.in_directive && type == '/')
703    {
704      buffer[1] = '*';
705      buffer[clen - 2] = '*';
706      buffer[clen - 1] = '/';
707    }
708}
709
710/* Allocate COUNT tokens for RUN.  */
711void
712_cpp_init_tokenrun (tokenrun *run, unsigned int count)
713{
714  run->base = XNEWVEC (cpp_token, count);
715  run->limit = run->base + count;
716  run->next = NULL;
717}
718
719/* Returns the next tokenrun, or creates one if there is none.  */
720static tokenrun *
721next_tokenrun (tokenrun *run)
722{
723  if (run->next == NULL)
724    {
725      run->next = XNEW (tokenrun);
726      run->next->prev = run;
727      _cpp_init_tokenrun (run->next, 250);
728    }
729
730  return run->next;
731}
732
733/* Allocate a single token that is invalidated at the same time as the
734   rest of the tokens on the line.  Has its line and col set to the
735   same as the last lexed token, so that diagnostics appear in the
736   right place.  */
737cpp_token *
738_cpp_temp_token (cpp_reader *pfile)
739{
740  cpp_token *old, *result;
741
742  old = pfile->cur_token - 1;
743  if (pfile->cur_token == pfile->cur_run->limit)
744    {
745      pfile->cur_run = next_tokenrun (pfile->cur_run);
746      pfile->cur_token = pfile->cur_run->base;
747    }
748
749  result = pfile->cur_token++;
750  result->src_loc = old->src_loc;
751  return result;
752}
753
754/* Lex a token into RESULT (external interface).  Takes care of issues
755   like directive handling, token lookahead, multiple include
756   optimization and skipping.  */
757const cpp_token *
758_cpp_lex_token (cpp_reader *pfile)
759{
760  cpp_token *result;
761
762  for (;;)
763    {
764      if (pfile->cur_token == pfile->cur_run->limit)
765	{
766	  pfile->cur_run = next_tokenrun (pfile->cur_run);
767	  pfile->cur_token = pfile->cur_run->base;
768	}
769      /* We assume that the current token is somewhere in the current
770	 run.  */
771      if (pfile->cur_token < pfile->cur_run->base
772	  || pfile->cur_token >= pfile->cur_run->limit)
773	abort ();
774
775      if (pfile->lookaheads)
776	{
777	  pfile->lookaheads--;
778	  result = pfile->cur_token++;
779	}
780      else
781	result = _cpp_lex_direct (pfile);
782
783      if (result->flags & BOL)
784	{
785	  /* Is this a directive.  If _cpp_handle_directive returns
786	     false, it is an assembler #.  */
787	  if (result->type == CPP_HASH
788	      /* 6.10.3 p 11: Directives in a list of macro arguments
789		 gives undefined behavior.  This implementation
790		 handles the directive as normal.  */
791	      && pfile->state.parsing_args != 1)
792	    {
793	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
794		{
795		  if (pfile->directive_result.type == CPP_PADDING)
796		    continue;
797		  result = &pfile->directive_result;
798		}
799	    }
800	  else if (pfile->state.in_deferred_pragma)
801	    result = &pfile->directive_result;
802
803	  if (pfile->cb.line_change && !pfile->state.skipping)
804	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
805	}
806
807      /* We don't skip tokens in directives.  */
808      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
809	break;
810
811      /* Outside a directive, invalidate controlling macros.  At file
812	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
813	 get here and MI optimization works.  */
814      pfile->mi_valid = false;
815
816      if (!pfile->state.skipping || result->type == CPP_EOF)
817	break;
818    }
819
820  return result;
821}
822
823/* Returns true if a fresh line has been loaded.  */
824bool
825_cpp_get_fresh_line (cpp_reader *pfile)
826{
827  int return_at_eof;
828
829  /* We can't get a new line until we leave the current directive.  */
830  if (pfile->state.in_directive)
831    return false;
832
833  for (;;)
834    {
835      cpp_buffer *buffer = pfile->buffer;
836
837      if (!buffer->need_line)
838	return true;
839
840      if (buffer->next_line < buffer->rlimit)
841	{
842	  _cpp_clean_line (pfile);
843	  return true;
844	}
845
846      /* First, get out of parsing arguments state.  */
847      if (pfile->state.parsing_args)
848	return false;
849
850      /* End of buffer.  Non-empty files should end in a newline.  */
851      if (buffer->buf != buffer->rlimit
852	  && buffer->next_line > buffer->rlimit
853	  && !buffer->from_stage3)
854	{
855	  /* Clip to buffer size.  */
856	  buffer->next_line = buffer->rlimit;
857	  /* APPLE LOCAL begin suppress no newline warning.  */
858	  if ( CPP_OPTION (pfile, warn_newline_at_eof))
859	    {
860	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
861				   CPP_BUF_COLUMN (buffer, buffer->cur),
862				   "no newline at end of file");
863	    }
864	  /* APPLE LOCAL end suppress no newline warning.  */
865	}
866
867      return_at_eof = buffer->return_at_eof;
868      _cpp_pop_buffer (pfile);
869      if (pfile->buffer == NULL || return_at_eof)
870	return false;
871    }
872}
873
874#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
875  do							\
876    {							\
877      result->type = ELSE_TYPE;				\
878      if (*buffer->cur == CHAR)				\
879	buffer->cur++, result->type = THEN_TYPE;	\
880    }							\
881  while (0)
882
883/* Lex a token into pfile->cur_token, which is also incremented, to
884   get diagnostics pointing to the correct location.
885
886   Does not handle issues such as token lookahead, multiple-include
887   optimization, directives, skipping etc.  This function is only
888   suitable for use by _cpp_lex_token, and in special cases like
889   lex_expansion_token which doesn't care for any of these issues.
890
891   When meeting a newline, returns CPP_EOF if parsing a directive,
892   otherwise returns to the start of the token buffer if permissible.
893   Returns the location of the lexed token.  */
894cpp_token *
895_cpp_lex_direct (cpp_reader *pfile)
896{
897  cppchar_t c;
898  cpp_buffer *buffer;
899  const unsigned char *comment_start;
900  cpp_token *result = pfile->cur_token++;
901
902 fresh_line:
903  result->flags = 0;
904  buffer = pfile->buffer;
905  if (buffer->need_line)
906    {
907      if (pfile->state.in_deferred_pragma)
908	{
909	  result->type = CPP_PRAGMA_EOL;
910	  pfile->state.in_deferred_pragma = false;
911	  if (!pfile->state.pragma_allow_expansion)
912	    pfile->state.prevent_expansion--;
913	  return result;
914	}
915      if (!_cpp_get_fresh_line (pfile))
916	{
917	  result->type = CPP_EOF;
918	  if (!pfile->state.in_directive)
919	    {
920	      /* Tell the compiler the line number of the EOF token.  */
921	      result->src_loc = pfile->line_table->highest_line;
922	      result->flags = BOL;
923	    }
924	  return result;
925	}
926      if (!pfile->keep_tokens)
927	{
928	  pfile->cur_run = &pfile->base_run;
929	  result = pfile->base_run.base;
930	  pfile->cur_token = result + 1;
931	}
932      result->flags = BOL;
933      if (pfile->state.parsing_args == 2)
934	result->flags |= PREV_WHITE;
935    }
936  buffer = pfile->buffer;
937 update_tokens_line:
938  result->src_loc = pfile->line_table->highest_line;
939
940 skipped_white:
941  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
942      && !pfile->overlaid_buffer)
943    {
944      _cpp_process_line_notes (pfile, false);
945      result->src_loc = pfile->line_table->highest_line;
946    }
947  c = *buffer->cur++;
948
949  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
950			       CPP_BUF_COLUMN (buffer, buffer->cur));
951
952  switch (c)
953    {
954    case ' ': case '\t': case '\f': case '\v': case '\0':
955      result->flags |= PREV_WHITE;
956      skip_whitespace (pfile, c);
957      goto skipped_white;
958
959    case '\n':
960      if (buffer->cur < buffer->rlimit)
961	CPP_INCREMENT_LINE (pfile, 0);
962      buffer->need_line = true;
963      goto fresh_line;
964
965    case '0': case '1': case '2': case '3': case '4':
966    case '5': case '6': case '7': case '8': case '9':
967      {
968	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
969	result->type = CPP_NUMBER;
970	lex_number (pfile, &result->val.str, &nst);
971	warn_about_normalization (pfile, result, &nst);
972	break;
973      }
974
975    case 'L':
976      /* 'L' may introduce wide characters or strings.  */
977      if (*buffer->cur == '\'' || *buffer->cur == '"')
978	{
979	  lex_string (pfile, result, buffer->cur - 1);
980	  break;
981	}
982      /* Fall through.  */
983
984    case '_':
985    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
986    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
987    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
988    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
989    case 'y': case 'z':
990    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
991    case 'G': case 'H': case 'I': case 'J': case 'K':
992    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
993    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
994    case 'Y': case 'Z':
995      result->type = CPP_NAME;
996      {
997	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
998	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
999					   &nst);
1000	warn_about_normalization (pfile, result, &nst);
1001      }
1002
1003      /* Convert named operators to their proper types.  */
1004      if (result->val.node->flags & NODE_OPERATOR)
1005	{
1006	  result->flags |= NAMED_OP;
1007	  result->type = (enum cpp_ttype) result->val.node->directive_index;
1008	}
1009      break;
1010
1011    case '\'':
1012    case '"':
1013      lex_string (pfile, result, buffer->cur - 1);
1014      break;
1015
1016    case '/':
1017      /* A potential block or line comment.  */
1018      comment_start = buffer->cur;
1019      c = *buffer->cur;
1020
1021      if (c == '*')
1022	{
1023	  if (_cpp_skip_block_comment (pfile))
1024	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1025	}
1026      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1027			    || cpp_in_system_header (pfile)))
1028	{
1029	  /* Warn about comments only if pedantically GNUC89, and not
1030	     in system headers.  */
1031	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1032	      && ! buffer->warned_cplusplus_comments)
1033	    {
1034	      cpp_error (pfile, CPP_DL_PEDWARN,
1035			 "C++ style comments are not allowed in ISO C90");
1036	      cpp_error (pfile, CPP_DL_PEDWARN,
1037			 "(this will be reported only once per input file)");
1038	      buffer->warned_cplusplus_comments = 1;
1039	    }
1040
1041	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1042	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1043	}
1044      else if (c == '=')
1045	{
1046	  buffer->cur++;
1047	  result->type = CPP_DIV_EQ;
1048	  break;
1049	}
1050      else
1051	{
1052	  result->type = CPP_DIV;
1053	  break;
1054	}
1055
1056      if (!pfile->state.save_comments)
1057	{
1058	  result->flags |= PREV_WHITE;
1059	  goto update_tokens_line;
1060	}
1061
1062      /* Save the comment as a token in its own right.  */
1063      save_comment (pfile, result, comment_start, c);
1064      break;
1065
1066    case '<':
1067      if (pfile->state.angled_headers)
1068	{
1069	  lex_string (pfile, result, buffer->cur - 1);
1070	  break;
1071	}
1072
1073      result->type = CPP_LESS;
1074      if (*buffer->cur == '=')
1075	buffer->cur++, result->type = CPP_LESS_EQ;
1076      else if (*buffer->cur == '<')
1077	{
1078	  buffer->cur++;
1079	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1080	}
1081      else if (CPP_OPTION (pfile, digraphs))
1082	{
1083	  if (*buffer->cur == ':')
1084	    {
1085	      buffer->cur++;
1086	      result->flags |= DIGRAPH;
1087	      result->type = CPP_OPEN_SQUARE;
1088	    }
1089	  else if (*buffer->cur == '%')
1090	    {
1091	      buffer->cur++;
1092	      result->flags |= DIGRAPH;
1093	      result->type = CPP_OPEN_BRACE;
1094	    }
1095	}
1096      break;
1097
1098    case '>':
1099      result->type = CPP_GREATER;
1100      if (*buffer->cur == '=')
1101	buffer->cur++, result->type = CPP_GREATER_EQ;
1102      else if (*buffer->cur == '>')
1103	{
1104	  buffer->cur++;
1105	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1106	}
1107      break;
1108
1109    case '%':
1110      result->type = CPP_MOD;
1111      if (*buffer->cur == '=')
1112	buffer->cur++, result->type = CPP_MOD_EQ;
1113      else if (CPP_OPTION (pfile, digraphs))
1114	{
1115	  if (*buffer->cur == ':')
1116	    {
1117	      buffer->cur++;
1118	      result->flags |= DIGRAPH;
1119	      result->type = CPP_HASH;
1120	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1121		buffer->cur += 2, result->type = CPP_PASTE;
1122	    }
1123	  else if (*buffer->cur == '>')
1124	    {
1125	      buffer->cur++;
1126	      result->flags |= DIGRAPH;
1127	      result->type = CPP_CLOSE_BRACE;
1128	    }
1129	}
1130      break;
1131
1132    case '.':
1133      result->type = CPP_DOT;
1134      if (ISDIGIT (*buffer->cur))
1135	{
1136	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1137	  result->type = CPP_NUMBER;
1138	  lex_number (pfile, &result->val.str, &nst);
1139	  warn_about_normalization (pfile, result, &nst);
1140	}
1141      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1142	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1143      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1144	buffer->cur++, result->type = CPP_DOT_STAR;
1145      break;
1146
1147    case '+':
1148      result->type = CPP_PLUS;
1149      if (*buffer->cur == '+')
1150	buffer->cur++, result->type = CPP_PLUS_PLUS;
1151      else if (*buffer->cur == '=')
1152	buffer->cur++, result->type = CPP_PLUS_EQ;
1153      break;
1154
1155    case '-':
1156      result->type = CPP_MINUS;
1157      if (*buffer->cur == '>')
1158	{
1159	  buffer->cur++;
1160	  result->type = CPP_DEREF;
1161	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1162	    buffer->cur++, result->type = CPP_DEREF_STAR;
1163	}
1164      else if (*buffer->cur == '-')
1165	buffer->cur++, result->type = CPP_MINUS_MINUS;
1166      else if (*buffer->cur == '=')
1167	buffer->cur++, result->type = CPP_MINUS_EQ;
1168      break;
1169
1170    case '&':
1171      result->type = CPP_AND;
1172      if (*buffer->cur == '&')
1173	buffer->cur++, result->type = CPP_AND_AND;
1174      else if (*buffer->cur == '=')
1175	buffer->cur++, result->type = CPP_AND_EQ;
1176      break;
1177
1178    case '|':
1179      result->type = CPP_OR;
1180      if (*buffer->cur == '|')
1181	buffer->cur++, result->type = CPP_OR_OR;
1182      else if (*buffer->cur == '=')
1183	buffer->cur++, result->type = CPP_OR_EQ;
1184      break;
1185
1186    case ':':
1187      result->type = CPP_COLON;
1188      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1189	buffer->cur++, result->type = CPP_SCOPE;
1190      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1191	{
1192	  buffer->cur++;
1193	  result->flags |= DIGRAPH;
1194	  result->type = CPP_CLOSE_SQUARE;
1195	}
1196      break;
1197
1198    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1199    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1200    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1201    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1202    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1203
1204    case '?': result->type = CPP_QUERY; break;
1205    case '~': result->type = CPP_COMPL; break;
1206    case ',': result->type = CPP_COMMA; break;
1207    case '(': result->type = CPP_OPEN_PAREN; break;
1208    case ')': result->type = CPP_CLOSE_PAREN; break;
1209    case '[': result->type = CPP_OPEN_SQUARE; break;
1210    case ']': result->type = CPP_CLOSE_SQUARE; break;
1211    case '{': result->type = CPP_OPEN_BRACE; break;
1212    case '}': result->type = CPP_CLOSE_BRACE; break;
1213    case ';': result->type = CPP_SEMICOLON; break;
1214
1215      /* @ is a punctuator in Objective-C.  */
1216    case '@': result->type = CPP_ATSIGN; break;
1217
1218    case '$':
1219    case '\\':
1220      {
1221	const uchar *base = --buffer->cur;
1222	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1223
1224	if (forms_identifier_p (pfile, true, &nst))
1225	  {
1226	    result->type = CPP_NAME;
1227	    result->val.node = lex_identifier (pfile, base, true, &nst);
1228	    warn_about_normalization (pfile, result, &nst);
1229	    break;
1230	  }
1231	buffer->cur++;
1232      }
1233
1234    default:
1235      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1236      break;
1237    }
1238
1239  return result;
1240}
1241
1242/* An upper bound on the number of bytes needed to spell TOKEN.
1243   Does not include preceding whitespace.  */
1244unsigned int
1245cpp_token_len (const cpp_token *token)
1246{
1247  unsigned int len;
1248
1249  switch (TOKEN_SPELL (token))
1250    {
1251    default:		len = 4;				break;
1252    case SPELL_LITERAL:	len = token->val.str.len;		break;
1253    case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1254    }
1255
1256  return len;
1257}
1258
1259/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1260   Return the number of bytes read out of NAME.  (There are always
1261   10 bytes written to BUFFER.)  */
1262
1263static size_t
1264utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1265{
1266  int j;
1267  int ucn_len = 0;
1268  int ucn_len_c;
1269  unsigned t;
1270  unsigned long utf32;
1271
1272  /* Compute the length of the UTF-8 sequence.  */
1273  for (t = *name; t & 0x80; t <<= 1)
1274    ucn_len++;
1275
1276  utf32 = *name & (0x7F >> ucn_len);
1277  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1278    {
1279      utf32 = (utf32 << 6) | (*++name & 0x3F);
1280
1281      /* Ill-formed UTF-8.  */
1282      if ((*name & ~0x3F) != 0x80)
1283	abort ();
1284    }
1285
1286  *buffer++ = '\\';
1287  *buffer++ = 'U';
1288  for (j = 7; j >= 0; j--)
1289    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1290  return ucn_len;
1291}
1292
1293
1294/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1295   already contain the enough space to hold the token's spelling.
1296   Returns a pointer to the character after the last character written.
1297   FORSTRING is true if this is to be the spelling after translation
1298   phase 1 (this is different for UCNs).
1299   FIXME: Would be nice if we didn't need the PFILE argument.  */
1300unsigned char *
1301cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1302		 unsigned char *buffer, bool forstring)
1303{
1304  switch (TOKEN_SPELL (token))
1305    {
1306    case SPELL_OPERATOR:
1307      {
1308	const unsigned char *spelling;
1309	unsigned char c;
1310
1311	if (token->flags & DIGRAPH)
1312	  spelling
1313	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1314	else if (token->flags & NAMED_OP)
1315	  goto spell_ident;
1316	else
1317	  spelling = TOKEN_NAME (token);
1318
1319	while ((c = *spelling++) != '\0')
1320	  *buffer++ = c;
1321      }
1322      break;
1323
1324    spell_ident:
1325    case SPELL_IDENT:
1326      if (forstring)
1327	{
1328	  memcpy (buffer, NODE_NAME (token->val.node),
1329		  NODE_LEN (token->val.node));
1330	  buffer += NODE_LEN (token->val.node);
1331	}
1332      else
1333	{
1334	  size_t i;
1335	  const unsigned char * name = NODE_NAME (token->val.node);
1336
1337	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1338	    if (name[i] & ~0x7F)
1339	      {
1340		i += utf8_to_ucn (buffer, name + i) - 1;
1341		buffer += 10;
1342	      }
1343	    else
1344	      *buffer++ = NODE_NAME (token->val.node)[i];
1345	}
1346      break;
1347
1348    case SPELL_LITERAL:
1349      memcpy (buffer, token->val.str.text, token->val.str.len);
1350      buffer += token->val.str.len;
1351      break;
1352
1353    case SPELL_NONE:
1354      cpp_error (pfile, CPP_DL_ICE,
1355		 "unspellable token %s", TOKEN_NAME (token));
1356      break;
1357    }
1358
1359  return buffer;
1360}
1361
1362/* Returns TOKEN spelt as a null-terminated string.  The string is
1363   freed when the reader is destroyed.  Useful for diagnostics.  */
1364unsigned char *
1365cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1366{
1367  unsigned int len = cpp_token_len (token) + 1;
1368  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1369
1370  end = cpp_spell_token (pfile, token, start, false);
1371  end[0] = '\0';
1372
1373  return start;
1374}
1375
1376/* Used by C front ends, which really should move to using
1377   cpp_token_as_text.  */
1378const char *
1379cpp_type2name (enum cpp_ttype type)
1380{
1381  return (const char *) token_spellings[type].name;
1382}
1383
1384/* Writes the spelling of token to FP, without any preceding space.
1385   Separated from cpp_spell_token for efficiency - to avoid stdio
1386   double-buffering.  */
1387void
1388cpp_output_token (const cpp_token *token, FILE *fp)
1389{
1390  switch (TOKEN_SPELL (token))
1391    {
1392    case SPELL_OPERATOR:
1393      {
1394	const unsigned char *spelling;
1395	int c;
1396
1397	if (token->flags & DIGRAPH)
1398	  spelling
1399	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1400	else if (token->flags & NAMED_OP)
1401	  goto spell_ident;
1402	else
1403	  spelling = TOKEN_NAME (token);
1404
1405	c = *spelling;
1406	do
1407	  putc (c, fp);
1408	while ((c = *++spelling) != '\0');
1409      }
1410      break;
1411
1412    spell_ident:
1413    case SPELL_IDENT:
1414      {
1415	size_t i;
1416	const unsigned char * name = NODE_NAME (token->val.node);
1417
1418	for (i = 0; i < NODE_LEN (token->val.node); i++)
1419	  if (name[i] & ~0x7F)
1420	    {
1421	      unsigned char buffer[10];
1422	      i += utf8_to_ucn (buffer, name + i) - 1;
1423	      fwrite (buffer, 1, 10, fp);
1424	    }
1425	  else
1426	    fputc (NODE_NAME (token->val.node)[i], fp);
1427      }
1428      break;
1429
1430    case SPELL_LITERAL:
1431      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1432      break;
1433
1434    case SPELL_NONE:
1435      /* An error, most probably.  */
1436      break;
1437    }
1438}
1439
1440/* Compare two tokens.  */
1441int
1442_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1443{
1444  if (a->type == b->type && a->flags == b->flags)
1445    switch (TOKEN_SPELL (a))
1446      {
1447      default:			/* Keep compiler happy.  */
1448      case SPELL_OPERATOR:
1449	return 1;
1450      case SPELL_NONE:
1451	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1452      case SPELL_IDENT:
1453	return a->val.node == b->val.node;
1454      case SPELL_LITERAL:
1455	return (a->val.str.len == b->val.str.len
1456		&& !memcmp (a->val.str.text, b->val.str.text,
1457			    a->val.str.len));
1458      }
1459
1460  return 0;
1461}
1462
1463/* Returns nonzero if a space should be inserted to avoid an
1464   accidental token paste for output.  For simplicity, it is
1465   conservative, and occasionally advises a space where one is not
1466   needed, e.g. "." and ".2".  */
1467int
1468cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1469		 const cpp_token *token2)
1470{
1471  enum cpp_ttype a = token1->type, b = token2->type;
1472  cppchar_t c;
1473
1474  if (token1->flags & NAMED_OP)
1475    a = CPP_NAME;
1476  if (token2->flags & NAMED_OP)
1477    b = CPP_NAME;
1478
1479  c = EOF;
1480  if (token2->flags & DIGRAPH)
1481    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1482  else if (token_spellings[b].category == SPELL_OPERATOR)
1483    c = token_spellings[b].name[0];
1484
1485  /* Quickly get everything that can paste with an '='.  */
1486  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1487    return 1;
1488
1489  switch (a)
1490    {
1491    case CPP_GREATER:	return c == '>';
1492    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1493    case CPP_PLUS:	return c == '+';
1494    case CPP_MINUS:	return c == '-' || c == '>';
1495    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1496    case CPP_MOD:	return c == ':' || c == '>';
1497    case CPP_AND:	return c == '&';
1498    case CPP_OR:	return c == '|';
1499    case CPP_COLON:	return c == ':' || c == '>';
1500    case CPP_DEREF:	return c == '*';
1501    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1502    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1503    case CPP_NAME:	return ((b == CPP_NUMBER
1504				 && name_p (pfile, &token2->val.str))
1505				|| b == CPP_NAME
1506				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1507    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1508				|| c == '.' || c == '+' || c == '-');
1509				      /* UCNs */
1510    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1511				 && b == CPP_NAME)
1512				|| (CPP_OPTION (pfile, objc)
1513				    && token1->val.str.text[0] == '@'
1514				    && (b == CPP_NAME || b == CPP_STRING)));
1515    default:		break;
1516    }
1517
1518  return 0;
1519}
1520
1521/* Output all the remaining tokens on the current line, and a newline
1522   character, to FP.  Leading whitespace is removed.  If there are
1523   macros, special token padding is not performed.  */
1524void
1525cpp_output_line (cpp_reader *pfile, FILE *fp)
1526{
1527  const cpp_token *token;
1528
1529  token = cpp_get_token (pfile);
1530  while (token->type != CPP_EOF)
1531    {
1532      cpp_output_token (token, fp);
1533      token = cpp_get_token (pfile);
1534      if (token->flags & PREV_WHITE)
1535	putc (' ', fp);
1536    }
1537
1538  putc ('\n', fp);
1539}
1540
1541/* Memory buffers.  Changing these three constants can have a dramatic
1542   effect on performance.  The values here are reasonable defaults,
1543   but might be tuned.  If you adjust them, be sure to test across a
1544   range of uses of cpplib, including heavy nested function-like macro
1545   expansion.  Also check the change in peak memory usage (NJAMD is a
1546   good tool for this).  */
1547#define MIN_BUFF_SIZE 8000
1548#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1549#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1550	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1551
1552#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1553  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1554#endif
1555
1556/* Create a new allocation buffer.  Place the control block at the end
1557   of the buffer, so that buffer overflows will cause immediate chaos.  */
1558static _cpp_buff *
1559new_buff (size_t len)
1560{
1561  _cpp_buff *result;
1562  unsigned char *base;
1563
1564  if (len < MIN_BUFF_SIZE)
1565    len = MIN_BUFF_SIZE;
1566  len = CPP_ALIGN (len);
1567
1568  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1569  result = (_cpp_buff *) (base + len);
1570  result->base = base;
1571  result->cur = base;
1572  result->limit = base + len;
1573  result->next = NULL;
1574  return result;
1575}
1576
1577/* Place a chain of unwanted allocation buffers on the free list.  */
1578void
1579_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1580{
1581  _cpp_buff *end = buff;
1582
1583  while (end->next)
1584    end = end->next;
1585  end->next = pfile->free_buffs;
1586  pfile->free_buffs = buff;
1587}
1588
1589/* Return a free buffer of size at least MIN_SIZE.  */
1590_cpp_buff *
1591_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1592{
1593  _cpp_buff *result, **p;
1594
1595  for (p = &pfile->free_buffs;; p = &(*p)->next)
1596    {
1597      size_t size;
1598
1599      if (*p == NULL)
1600	return new_buff (min_size);
1601      result = *p;
1602      size = result->limit - result->base;
1603      /* Return a buffer that's big enough, but don't waste one that's
1604         way too big.  */
1605      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1606	break;
1607    }
1608
1609  *p = result->next;
1610  result->next = NULL;
1611  result->cur = result->base;
1612  return result;
1613}
1614
1615/* Creates a new buffer with enough space to hold the uncommitted
1616   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1617   the excess bytes to the new buffer.  Chains the new buffer after
1618   BUFF, and returns the new buffer.  */
1619_cpp_buff *
1620_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1621{
1622  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1623  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1624
1625  buff->next = new_buff;
1626  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1627  return new_buff;
1628}
1629
1630/* Creates a new buffer with enough space to hold the uncommitted
1631   remaining bytes of the buffer pointed to by BUFF, and at least
1632   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1633   Chains the new buffer before the buffer pointed to by BUFF, and
1634   updates the pointer to point to the new buffer.  */
1635void
1636_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1637{
1638  _cpp_buff *new_buff, *old_buff = *pbuff;
1639  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1640
1641  new_buff = _cpp_get_buff (pfile, size);
1642  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1643  new_buff->next = old_buff;
1644  *pbuff = new_buff;
1645}
1646
1647/* Free a chain of buffers starting at BUFF.  */
1648void
1649_cpp_free_buff (_cpp_buff *buff)
1650{
1651  _cpp_buff *next;
1652
1653  for (; buff; buff = next)
1654    {
1655      next = buff->next;
1656      free (buff->base);
1657    }
1658}
1659
1660/* Allocate permanent, unaligned storage of length LEN.  */
1661unsigned char *
1662_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1663{
1664  _cpp_buff *buff = pfile->u_buff;
1665  unsigned char *result = buff->cur;
1666
1667  if (len > (size_t) (buff->limit - result))
1668    {
1669      buff = _cpp_get_buff (pfile, len);
1670      buff->next = pfile->u_buff;
1671      pfile->u_buff = buff;
1672      result = buff->cur;
1673    }
1674
1675  buff->cur = result + len;
1676  return result;
1677}
1678
1679/* Allocate permanent, unaligned storage of length LEN from a_buff.
1680   That buffer is used for growing allocations when saving macro
1681   replacement lists in a #define, and when parsing an answer to an
1682   assertion in #assert, #unassert or #if (and therefore possibly
1683   whilst expanding macros).  It therefore must not be used by any
1684   code that they might call: specifically the lexer and the guts of
1685   the macro expander.
1686
1687   All existing other uses clearly fit this restriction: storing
1688   registered pragmas during initialization.  */
1689unsigned char *
1690_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1691{
1692  _cpp_buff *buff = pfile->a_buff;
1693  unsigned char *result = buff->cur;
1694
1695  if (len > (size_t) (buff->limit - result))
1696    {
1697      buff = _cpp_get_buff (pfile, len);
1698      buff->next = pfile->a_buff;
1699      pfile->a_buff = buff;
1700      result = buff->cur;
1701    }
1702
1703  buff->cur = result + len;
1704  return result;
1705}
1706
1707/* Say which field of TOK is in use.  */
1708
1709enum cpp_token_fld_kind
1710cpp_token_val_index (cpp_token *tok)
1711{
1712  switch (TOKEN_SPELL (tok))
1713    {
1714    case SPELL_IDENT:
1715      return CPP_TOKEN_FLD_NODE;
1716    case SPELL_LITERAL:
1717      return CPP_TOKEN_FLD_STR;
1718    case SPELL_NONE:
1719      if (tok->type == CPP_MACRO_ARG)
1720	return CPP_TOKEN_FLD_ARG_NO;
1721      else if (tok->type == CPP_PADDING)
1722	return CPP_TOKEN_FLD_SOURCE;
1723      else if (tok->type == CPP_PRAGMA)
1724	return CPP_TOKEN_FLD_PRAGMA;
1725      /* else fall through */
1726    default:
1727      return CPP_TOKEN_FLD_NONE;
1728    }
1729}
1730