1/* CPP Library - lexical analysis.
2   Copyright (C) 2000-2015 Free Software Foundation, Inc.
3   Contributed by Per Bothner, 1994-95.
4   Based on CCCP program by Paul Rubin, June 1986
5   Adapted to ANSI C, Richard Stallman, Jan 1987
6   Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "cpplib.h"
25#include "internal.h"
26
27enum spell_type
28{
29  SPELL_OPERATOR = 0,
30  SPELL_IDENT,
31  SPELL_LITERAL,
32  SPELL_NONE
33};
34
35struct token_spelling
36{
37  enum spell_type category;
38  const unsigned char *name;
39};
40
41static const unsigned char *const digraph_spellings[] =
42{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43
44#define OP(e, s) { SPELL_OPERATOR, UC s  },
45#define TK(e, s) { SPELL_ ## s,    UC #e },
46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void store_comment (cpp_reader *, cpp_token *);
59static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60			    unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
63static tokenrun *next_tokenrun (tokenrun *);
64
65static _cpp_buff *new_buff (size_t);
66
67
68/* Utility routine:
69
70   Compares, the token TOKEN to the NUL-terminated string STRING.
71   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72int
73cpp_ideq (const cpp_token *token, const char *string)
74{
75  if (token->type != CPP_NAME)
76    return 0;
77
78  return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79}
80
81/* Record a note TYPE at byte POS into the current cleaned logical
82   line.  */
83static void
84add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85{
86  if (buffer->notes_used == buffer->notes_cap)
87    {
88      buffer->notes_cap = buffer->notes_cap * 2 + 200;
89      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                  buffer->notes_cap);
91    }
92
93  buffer->notes[buffer->notes_used].pos = pos;
94  buffer->notes[buffer->notes_used].type = type;
95  buffer->notes_used++;
96}
97
98
99/* Fast path to find line special characters using optimized character
100   scanning algorithms.  Anything complicated falls back to the slow
101   path below.  Since this loop is very hot it's worth doing these kinds
102   of optimizations.
103
104   One of the paths through the ifdefs should provide
105
106     const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108   Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109   the found character.
110
111   Note that the last character of the buffer is *always* a newline,
112   as forced by _cpp_convert_input.  This fact can be used to avoid
113   explicitly looking for the end of the buffer.  */
114
115/* Configure gives us an ifdef test.  */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register.  There's nothing
121   in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122   but MS decided on an LLP64 model.  Thankfully when building with GCC we
123   can get the "real" word size.  */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131   Die at compile-time if this expectation is violated.  */
132typedef char check_word_type_size
133  [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136   of the interesting characters.  Note that NUL is not interesting.  */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141  word_type mask = -1;
142  if (WORDS_BIGENDIAN)
143    mask >>= n * 8;
144  else
145    mask <<= n * 8;
146  return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE.  */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154  word_type ret;
155
156  ret = (x << 24) | (x << 16) | (x << 8) | x;
157  if (sizeof(word_type) == 8)
158    ret = (ret << 16 << 16) | ret;
159  return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C.  */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168  /* We can get exact results using a compare-bytes instruction.
169     Get (val == c) via (0 >= (val ^ c)).  */
170  return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172  word_type magic = 0x7efefefeU;
173  if (sizeof(word_type) == 8)
174    magic = (magic << 16 << 16) | 0xfefefefeU;
175  magic |= 1;
176
177  val ^= c;
178  return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183   the found character.  If this was a false positive, return -1.  */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187		word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190  /* The cmpbge instruction sets *bits* of the result corresponding to
191     matches in the bytes with no false positives.  */
192  return __builtin_ctzl (cmp);
193#else
194  unsigned int i;
195
196  /* ??? It would be nice to force unrolling here,
197     and have all of these constants folded.  */
198  for (i = 0; i < sizeof(word_type); ++i)
199    {
200      uchar c;
201      if (WORDS_BIGENDIAN)
202	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203      else
204	c = (val >> i * 8) & 0xff;
205
206      if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207	return i;
208    }
209
210  return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216   For 32-bit words, one would normally perform 16 comparisons and
217   16 branches.  With this algorithm one performs 24 arithmetic
218   operations and one branch.  Whether this is faster with a 32-bit
219   word size is going to be somewhat system dependent.
220
221   For 64-bit words, we eliminate twice the number of comparisons
222   and branches without increasing the number of arithmetic operations.
223   It's almost certainly going to be a win with 64-bit word size.  */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226  ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231  const word_type repl_nl = acc_char_replicate ('\n');
232  const word_type repl_cr = acc_char_replicate ('\r');
233  const word_type repl_bs = acc_char_replicate ('\\');
234  const word_type repl_qm = acc_char_replicate ('?');
235
236  unsigned int misalign;
237  const word_type *p;
238  word_type val, t;
239
240  /* Align the buffer.  Mask out any bytes from before the beginning.  */
241  p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242  val = *p;
243  misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244  if (misalign)
245    val = acc_char_mask_misalign (val, misalign);
246
247  /* Main loop.  */
248  while (1)
249    {
250      t  = acc_char_cmp (val, repl_nl);
251      t |= acc_char_cmp (val, repl_cr);
252      t |= acc_char_cmp (val, repl_bs);
253      t |= acc_char_cmp (val, repl_qm);
254
255      if (__builtin_expect (t != 0, 0))
256	{
257	  int i = acc_char_index (t, val);
258	  if (i >= 0)
259	    return (const uchar *)p + i;
260	}
261
262      val = *++p;
263    }
264}
265
266/* Disable on Solaris 2/x86 until the following problem can be properly
267   autoconfed:
268
269   The Solaris 10+ assembler tags objects with the instruction set
270   extensions used, so SSE4.2 executables cannot run on machines that
271   don't support that extension.  */
272
273#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274
275/* Replicated character data to be shared between implementations.
276   Recall that outside of a context with vector support we can't
277   define compatible vector types, therefore these are all defined
278   in terms of raw characters.  */
279static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280  { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282  { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284  { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286  { '?', '?', '?', '?', '?', '?', '?', '?',
287    '?', '?', '?', '?', '?', '?', '?', '?' },
288};
289
290/* A version of the fast scanner using MMX vectorized byte compare insns.
291
292   This uses the PMOVMSKB instruction which was introduced with "MMX2",
293   which was packaged into SSE1; it is also present in the AMD MMX
294   extension.  Mark the function as using "sse" so that we emit a real
295   "emms" instruction, rather than the 3dNOW "femms" instruction.  */
296
297static const uchar *
298#ifndef __SSE__
299__attribute__((__target__("sse")))
300#endif
301search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302{
303  typedef char v8qi __attribute__ ((__vector_size__ (8)));
304  typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305
306  const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307  const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308  const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309  const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310
311  unsigned int misalign, found, mask;
312  const v8qi *p;
313  v8qi data, t, c;
314
315  /* Align the source pointer.  While MMX doesn't generate unaligned data
316     faults, this allows us to safely scan to the end of the buffer without
317     reading beyond the end of the last page.  */
318  misalign = (uintptr_t)s & 7;
319  p = (const v8qi *)((uintptr_t)s & -8);
320  data = *p;
321
322  /* Create a mask for the bytes that are valid within the first
323     16-byte block.  The Idea here is that the AND with the mask
324     within the loop is "free", since we need some AND or TEST
325     insn in order to set the flags for the branch anyway.  */
326  mask = -1u << misalign;
327
328  /* Main loop processing 8 bytes at a time.  */
329  goto start;
330  do
331    {
332      data = *++p;
333      mask = -1;
334
335    start:
336      t = __builtin_ia32_pcmpeqb(data, repl_nl);
337      c = __builtin_ia32_pcmpeqb(data, repl_cr);
338      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339      c = __builtin_ia32_pcmpeqb(data, repl_bs);
340      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341      c = __builtin_ia32_pcmpeqb(data, repl_qm);
342      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343      found = __builtin_ia32_pmovmskb (t);
344      found &= mask;
345    }
346  while (!found);
347
348  __builtin_ia32_emms ();
349
350  /* FOUND contains 1 in bits for which we matched a relevant
351     character.  Conversion to the byte index is trivial.  */
352  found = __builtin_ctz(found);
353  return (const uchar *)p + found;
354}
355
356/* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
357
358static const uchar *
359#ifndef __SSE2__
360__attribute__((__target__("sse2")))
361#endif
362search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363{
364  typedef char v16qi __attribute__ ((__vector_size__ (16)));
365
366  const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367  const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368  const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369  const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370
371  unsigned int misalign, found, mask;
372  const v16qi *p;
373  v16qi data, t;
374
375  /* Align the source pointer.  */
376  misalign = (uintptr_t)s & 15;
377  p = (const v16qi *)((uintptr_t)s & -16);
378  data = *p;
379
380  /* Create a mask for the bytes that are valid within the first
381     16-byte block.  The Idea here is that the AND with the mask
382     within the loop is "free", since we need some AND or TEST
383     insn in order to set the flags for the branch anyway.  */
384  mask = -1u << misalign;
385
386  /* Main loop processing 16 bytes at a time.  */
387  goto start;
388  do
389    {
390      data = *++p;
391      mask = -1;
392
393    start:
394      t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
395      t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396      t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397      t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398      found = __builtin_ia32_pmovmskb128 (t);
399      found &= mask;
400    }
401  while (!found);
402
403  /* FOUND contains 1 in bits for which we matched a relevant
404     character.  Conversion to the byte index is trivial.  */
405  found = __builtin_ctz(found);
406  return (const uchar *)p + found;
407}
408
409#ifdef HAVE_SSE4
410/* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
411
412static const uchar *
413#ifndef __SSE4_2__
414__attribute__((__target__("sse4.2")))
415#endif
416search_line_sse42 (const uchar *s, const uchar *end)
417{
418  typedef char v16qi __attribute__ ((__vector_size__ (16)));
419  static const v16qi search = { '\n', '\r', '?', '\\' };
420
421  uintptr_t si = (uintptr_t)s;
422  uintptr_t index;
423
424  /* Check for unaligned input.  */
425  if (si & 15)
426    {
427      v16qi sv;
428
429      if (__builtin_expect (end - s < 16, 0)
430	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431	{
432	  /* There are less than 16 bytes left in the buffer, and less
433	     than 16 bytes left on the page.  Reading 16 bytes at this
434	     point might generate a spurious page fault.  Defer to the
435	     SSE2 implementation, which already handles alignment.  */
436	  return search_line_sse2 (s, end);
437	}
438
439      /* ??? The builtin doesn't understand that the PCMPESTRI read from
440	 memory need not be aligned.  */
441      sv = __builtin_ia32_loaddqu ((const char *) s);
442      index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
444      if (__builtin_expect (index < 16, 0))
445	goto found;
446
447      /* Advance the pointer to an aligned address.  We will re-scan a
448	 few bytes, but we no longer need care for reading past the
449	 end of a page, since we're guaranteed a match.  */
450      s = (const uchar *)((si + 16) & -16);
451    }
452
453  /* Main loop, processing 16 bytes at a time.  By doing the whole loop
454     in inline assembly, we can make proper use of the flags set.  */
455  __asm (      "sub $16, %1\n"
456	"	.balign 16\n"
457	"0:	add $16, %1\n"
458	"	%vpcmpestri $0, (%1), %2\n"
459	"	jnc 0b"
460	: "=&c"(index), "+r"(s)
461	: "x"(search), "a"(4), "d"(16));
462
463 found:
464  return s + index;
465}
466
467#else
468/* Work around out-dated assemblers without sse4 support.  */
469#define search_line_sse42 search_line_sse2
470#endif
471
472/* Check the CPU capabilities.  */
473
474#include "../gcc/config/i386/cpuid.h"
475
476typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
477static search_line_fast_type search_line_fast;
478
479#define HAVE_init_vectorized_lexer 1
480static inline void
481init_vectorized_lexer (void)
482{
483  unsigned dummy, ecx = 0, edx = 0;
484  search_line_fast_type impl = search_line_acc_char;
485  int minimum = 0;
486
487#if defined(__SSE4_2__)
488  minimum = 3;
489#elif defined(__SSE2__)
490  minimum = 2;
491#elif defined(__SSE__)
492  minimum = 1;
493#endif
494
495  if (minimum == 3)
496    impl = search_line_sse42;
497  else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
498    {
499      if (minimum == 3 || (ecx & bit_SSE4_2))
500        impl = search_line_sse42;
501      else if (minimum == 2 || (edx & bit_SSE2))
502	impl = search_line_sse2;
503      else if (minimum == 1 || (edx & bit_SSE))
504	impl = search_line_mmx;
505    }
506  else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
507    {
508      if (minimum == 1
509	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
510	impl = search_line_mmx;
511    }
512
513  search_line_fast = impl;
514}
515
516#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
517
518/* A vection of the fast scanner using AltiVec vectorized byte compares
519   and VSX unaligned loads (when VSX is available).  This is otherwise
520   the same as the pre-GCC 5 version.  */
521
522ATTRIBUTE_NO_SANITIZE_UNDEFINED
523static const uchar *
524search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
525{
526  typedef __attribute__((altivec(vector))) unsigned char vc;
527
528  const vc repl_nl = {
529    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
530    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
531  };
532  const vc repl_cr = {
533    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
534    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
535  };
536  const vc repl_bs = {
537    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
538    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
539  };
540  const vc repl_qm = {
541    '?', '?', '?', '?', '?', '?', '?', '?',
542    '?', '?', '?', '?', '?', '?', '?', '?',
543  };
544  const vc zero = { 0 };
545
546  vc data, t;
547
548  /* Main loop processing 16 bytes at a time.  */
549  do
550    {
551      vc m_nl, m_cr, m_bs, m_qm;
552
553      data = *((const vc *)s);
554      s += 16;
555
556      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
557      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
558      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
559      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
560      t = (m_nl | m_cr) | (m_bs | m_qm);
561
562      /* T now contains 0xff in bytes for which we matched one of the relevant
563	 characters.  We want to exit the loop if any byte in T is non-zero.
564	 Below is the expansion of vec_any_ne(t, zero).  */
565    }
566  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
567
568  /* Restore s to to point to the 16 bytes we just processed.  */
569  s -= 16;
570
571  {
572#define N  (sizeof(vc) / sizeof(long))
573
574    union {
575      vc v;
576      /* Statically assert that N is 2 or 4.  */
577      unsigned long l[(N == 2 || N == 4) ? N : -1];
578    } u;
579    unsigned long l, i = 0;
580
581    u.v = t;
582
583    /* Find the first word of T that is non-zero.  */
584    switch (N)
585      {
586      case 4:
587	l = u.l[i++];
588	if (l != 0)
589	  break;
590	s += sizeof(unsigned long);
591	l = u.l[i++];
592	if (l != 0)
593	  break;
594	s += sizeof(unsigned long);
595      case 2:
596	l = u.l[i++];
597	if (l != 0)
598	  break;
599	s += sizeof(unsigned long);
600	l = u.l[i];
601      }
602
603    /* L now contains 0xff in bytes for which we matched one of the
604       relevant characters.  We can find the byte index by finding
605       its bit index and dividing by 8.  */
606#ifdef __BIG_ENDIAN__
607    l = __builtin_clzl(l) >> 3;
608#else
609    l = __builtin_ctzl(l) >> 3;
610#endif
611    return s + l;
612
613#undef N
614  }
615}
616
617#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
618
619/* A vection of the fast scanner using AltiVec vectorized byte compares.
620   This cannot be used for little endian because vec_lvsl/lvsr are
621   deprecated for little endian and the code won't work properly.  */
622/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
623   so we can't compile this function without -maltivec on the command line
624   (or implied by some other switch).  */
625
626static const uchar *
627search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
628{
629  typedef __attribute__((altivec(vector))) unsigned char vc;
630
631  const vc repl_nl = {
632    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
633    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
634  };
635  const vc repl_cr = {
636    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
637    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
638  };
639  const vc repl_bs = {
640    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
641    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
642  };
643  const vc repl_qm = {
644    '?', '?', '?', '?', '?', '?', '?', '?',
645    '?', '?', '?', '?', '?', '?', '?', '?',
646  };
647  const vc ones = {
648    -1, -1, -1, -1, -1, -1, -1, -1,
649    -1, -1, -1, -1, -1, -1, -1, -1,
650  };
651  const vc zero = { 0 };
652
653  vc data, mask, t;
654
655  /* Altivec loads automatically mask addresses with -16.  This lets us
656     issue the first load as early as possible.  */
657  data = __builtin_vec_ld(0, (const vc *)s);
658
659  /* Discard bytes before the beginning of the buffer.  Do this by
660     beginning with all ones and shifting in zeros according to the
661     mis-alignment.  The LVSR instruction pulls the exact shift we
662     want from the address.  */
663  mask = __builtin_vec_lvsr(0, s);
664  mask = __builtin_vec_perm(zero, ones, mask);
665  data &= mask;
666
667  /* While altivec loads mask addresses, we still need to align S so
668     that the offset we compute at the end is correct.  */
669  s = (const uchar *)((uintptr_t)s & -16);
670
671  /* Main loop processing 16 bytes at a time.  */
672  goto start;
673  do
674    {
675      vc m_nl, m_cr, m_bs, m_qm;
676
677      s += 16;
678      data = __builtin_vec_ld(0, (const vc *)s);
679
680    start:
681      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
682      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
683      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
684      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
685      t = (m_nl | m_cr) | (m_bs | m_qm);
686
687      /* T now contains 0xff in bytes for which we matched one of the relevant
688	 characters.  We want to exit the loop if any byte in T is non-zero.
689	 Below is the expansion of vec_any_ne(t, zero).  */
690    }
691  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
692
693  {
694#define N  (sizeof(vc) / sizeof(long))
695
696    union {
697      vc v;
698      /* Statically assert that N is 2 or 4.  */
699      unsigned long l[(N == 2 || N == 4) ? N : -1];
700    } u;
701    unsigned long l, i = 0;
702
703    u.v = t;
704
705    /* Find the first word of T that is non-zero.  */
706    switch (N)
707      {
708      case 4:
709	l = u.l[i++];
710	if (l != 0)
711	  break;
712	s += sizeof(unsigned long);
713	l = u.l[i++];
714	if (l != 0)
715	  break;
716	s += sizeof(unsigned long);
717      case 2:
718	l = u.l[i++];
719	if (l != 0)
720	  break;
721	s += sizeof(unsigned long);
722	l = u.l[i];
723      }
724
725    /* L now contains 0xff in bytes for which we matched one of the
726       relevant characters.  We can find the byte index by finding
727       its bit index and dividing by 8.  */
728    l = __builtin_clzl(l) >> 3;
729    return s + l;
730
731#undef N
732  }
733}
734
735#elif defined (__ARM_NEON)
736#include "arm_neon.h"
737
738static const uchar *
739search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
740{
741  const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
742  const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
743  const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
744  const uint8x16_t repl_qm = vdupq_n_u8 ('?');
745  const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
746
747  unsigned int misalign, found, mask;
748  const uint8_t *p;
749  uint8x16_t data;
750
751  /* Align the source pointer.  */
752  misalign = (uintptr_t)s & 15;
753  p = (const uint8_t *)((uintptr_t)s & -16);
754  data = vld1q_u8 (p);
755
756  /* Create a mask for the bytes that are valid within the first
757     16-byte block.  The Idea here is that the AND with the mask
758     within the loop is "free", since we need some AND or TEST
759     insn in order to set the flags for the branch anyway.  */
760  mask = (-1u << misalign) & 0xffff;
761
762  /* Main loop, processing 16 bytes at a time.  */
763  goto start;
764
765  do
766    {
767      uint8x8_t l;
768      uint16x4_t m;
769      uint32x2_t n;
770      uint8x16_t t, u, v, w;
771
772      p += 16;
773      data = vld1q_u8 (p);
774      mask = 0xffff;
775
776    start:
777      t = vceqq_u8 (data, repl_nl);
778      u = vceqq_u8 (data, repl_cr);
779      v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
780      w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
781      t = vandq_u8 (vorrq_u8 (v, w), xmask);
782      l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
783      m = vpaddl_u8 (l);
784      n = vpaddl_u16 (m);
785
786      found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
787	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
788      found &= mask;
789    }
790  while (!found);
791
792  /* FOUND contains 1 in bits for which we matched a relevant
793     character.  Conversion to the byte index is trivial.  */
794  found = __builtin_ctz (found);
795  return (const uchar *)p + found;
796}
797
798#else
799
800/* We only have one accellerated alternative.  Use a direct call so that
801   we encourage inlining.  */
802
803#define search_line_fast  search_line_acc_char
804
805#endif
806
807/* Initialize the lexer if needed.  */
808
809void
810_cpp_init_lexer (void)
811{
812#ifdef HAVE_init_vectorized_lexer
813  init_vectorized_lexer ();
814#endif
815}
816
817/* Returns with a logical line that contains no escaped newlines or
818   trigraphs.  This is a time-critical inner loop.  */
819void
820_cpp_clean_line (cpp_reader *pfile)
821{
822  cpp_buffer *buffer;
823  const uchar *s;
824  uchar c, *d, *p;
825
826  buffer = pfile->buffer;
827  buffer->cur_note = buffer->notes_used = 0;
828  buffer->cur = buffer->line_base = buffer->next_line;
829  buffer->need_line = false;
830  s = buffer->next_line;
831
832  if (!buffer->from_stage3)
833    {
834      const uchar *pbackslash = NULL;
835
836      /* Fast path.  This is the common case of an un-escaped line with
837	 no trigraphs.  The primary win here is by not writing any
838	 data back to memory until we have to.  */
839      while (1)
840	{
841	  /* Perform an optimized search for \n, \r, \\, ?.  */
842	  s = search_line_fast (s, buffer->rlimit);
843
844	  c = *s;
845	  if (c == '\\')
846	    {
847	      /* Record the location of the backslash and continue.  */
848	      pbackslash = s++;
849	    }
850	  else if (__builtin_expect (c == '?', 0))
851	    {
852	      if (__builtin_expect (s[1] == '?', false)
853		   && _cpp_trigraph_map[s[2]])
854		{
855		  /* Have a trigraph.  We may or may not have to convert
856		     it.  Add a line note regardless, for -Wtrigraphs.  */
857		  add_line_note (buffer, s, s[2]);
858		  if (CPP_OPTION (pfile, trigraphs))
859		    {
860		      /* We do, and that means we have to switch to the
861		         slow path.  */
862		      d = (uchar *) s;
863		      *d = _cpp_trigraph_map[s[2]];
864		      s += 2;
865		      goto slow_path;
866		    }
867		}
868	      /* Not a trigraph.  Continue on fast-path.  */
869	      s++;
870	    }
871	  else
872	    break;
873	}
874
875      /* This must be \r or \n.  We're either done, or we'll be forced
876	 to write back to the buffer and continue on the slow path.  */
877      d = (uchar *) s;
878
879      if (__builtin_expect (s == buffer->rlimit, false))
880	goto done;
881
882      /* DOS line ending? */
883      if (__builtin_expect (c == '\r', false) && s[1] == '\n')
884	{
885	  s++;
886	  if (s == buffer->rlimit)
887	    goto done;
888	}
889
890      if (__builtin_expect (pbackslash == NULL, true))
891	goto done;
892
893      /* Check for escaped newline.  */
894      p = d;
895      while (is_nvspace (p[-1]))
896	p--;
897      if (p - 1 != pbackslash)
898	goto done;
899
900      /* Have an escaped newline; process it and proceed to
901	 the slow path.  */
902      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
903      d = p - 2;
904      buffer->next_line = p - 1;
905
906    slow_path:
907      while (1)
908	{
909	  c = *++s;
910	  *++d = c;
911
912	  if (c == '\n' || c == '\r')
913	    {
914	      /* Handle DOS line endings.  */
915	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
916		s++;
917	      if (s == buffer->rlimit)
918		break;
919
920	      /* Escaped?  */
921	      p = d;
922	      while (p != buffer->next_line && is_nvspace (p[-1]))
923		p--;
924	      if (p == buffer->next_line || p[-1] != '\\')
925		break;
926
927	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
928	      d = p - 2;
929	      buffer->next_line = p - 1;
930	    }
931	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
932	    {
933	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
934	      add_line_note (buffer, d, s[2]);
935	      if (CPP_OPTION (pfile, trigraphs))
936		{
937		  *d = _cpp_trigraph_map[s[2]];
938		  s += 2;
939		}
940	    }
941	}
942    }
943  else
944    {
945      while (*s != '\n' && *s != '\r')
946	s++;
947      d = (uchar *) s;
948
949      /* Handle DOS line endings.  */
950      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
951	s++;
952    }
953
954 done:
955  *d = '\n';
956  /* A sentinel note that should never be processed.  */
957  add_line_note (buffer, d + 1, '\n');
958  buffer->next_line = s + 1;
959}
960
961/* Return true if the trigraph indicated by NOTE should be warned
962   about in a comment.  */
963static bool
964warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
965{
966  const uchar *p;
967
968  /* Within comments we don't warn about trigraphs, unless the
969     trigraph forms an escaped newline, as that may change
970     behavior.  */
971  if (note->type != '/')
972    return false;
973
974  /* If -trigraphs, then this was an escaped newline iff the next note
975     is coincident.  */
976  if (CPP_OPTION (pfile, trigraphs))
977    return note[1].pos == note->pos;
978
979  /* Otherwise, see if this forms an escaped newline.  */
980  p = note->pos + 3;
981  while (is_nvspace (*p))
982    p++;
983
984  /* There might have been escaped newlines between the trigraph and the
985     newline we found.  Hence the position test.  */
986  return (*p == '\n' && p < note[1].pos);
987}
988
989/* Process the notes created by add_line_note as far as the current
990   location.  */
991void
992_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
993{
994  cpp_buffer *buffer = pfile->buffer;
995
996  for (;;)
997    {
998      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
999      unsigned int col;
1000
1001      if (note->pos > buffer->cur)
1002	break;
1003
1004      buffer->cur_note++;
1005      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1006
1007      if (note->type == '\\' || note->type == ' ')
1008	{
1009	  if (note->type == ' ' && !in_comment)
1010	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1011				 "backslash and newline separated by space");
1012
1013	  if (buffer->next_line > buffer->rlimit)
1014	    {
1015	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1016				   "backslash-newline at end of file");
1017	      /* Prevent "no newline at end of file" warning.  */
1018	      buffer->next_line = buffer->rlimit;
1019	    }
1020
1021	  buffer->line_base = note->pos;
1022	  CPP_INCREMENT_LINE (pfile, 0);
1023	}
1024      else if (_cpp_trigraph_map[note->type])
1025	{
1026	  if (CPP_OPTION (pfile, warn_trigraphs)
1027	      && (!in_comment || warn_in_comment (pfile, note)))
1028	    {
1029	      if (CPP_OPTION (pfile, trigraphs))
1030		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1031                                       pfile->line_table->highest_line, col,
1032				       "trigraph ??%c converted to %c",
1033				       note->type,
1034				       (int) _cpp_trigraph_map[note->type]);
1035	      else
1036		{
1037		  cpp_warning_with_line
1038		    (pfile, CPP_W_TRIGRAPHS,
1039                     pfile->line_table->highest_line, col,
1040		     "trigraph ??%c ignored, use -trigraphs to enable",
1041		     note->type);
1042		}
1043	    }
1044	}
1045      else if (note->type == 0)
1046	/* Already processed in lex_raw_string.  */;
1047      else
1048	abort ();
1049    }
1050}
1051
1052/* Skip a C-style block comment.  We find the end of the comment by
1053   seeing if an asterisk is before every '/' we encounter.  Returns
1054   nonzero if comment terminated by EOF, zero otherwise.
1055
1056   Buffer->cur points to the initial asterisk of the comment.  */
1057bool
1058_cpp_skip_block_comment (cpp_reader *pfile)
1059{
1060  cpp_buffer *buffer = pfile->buffer;
1061  const uchar *cur = buffer->cur;
1062  uchar c;
1063
1064  cur++;
1065  if (*cur == '/')
1066    cur++;
1067
1068  for (;;)
1069    {
1070      /* People like decorating comments with '*', so check for '/'
1071	 instead for efficiency.  */
1072      c = *cur++;
1073
1074      if (c == '/')
1075	{
1076	  if (cur[-2] == '*')
1077	    break;
1078
1079	  /* Warn about potential nested comments, but not if the '/'
1080	     comes immediately before the true comment delimiter.
1081	     Don't bother to get it right across escaped newlines.  */
1082	  if (CPP_OPTION (pfile, warn_comments)
1083	      && cur[0] == '*' && cur[1] != '/')
1084	    {
1085	      buffer->cur = cur;
1086	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1087				     pfile->line_table->highest_line,
1088				     CPP_BUF_COL (buffer),
1089				     "\"/*\" within comment");
1090	    }
1091	}
1092      else if (c == '\n')
1093	{
1094	  unsigned int cols;
1095	  buffer->cur = cur - 1;
1096	  _cpp_process_line_notes (pfile, true);
1097	  if (buffer->next_line >= buffer->rlimit)
1098	    return true;
1099	  _cpp_clean_line (pfile);
1100
1101	  cols = buffer->next_line - buffer->line_base;
1102	  CPP_INCREMENT_LINE (pfile, cols);
1103
1104	  cur = buffer->cur;
1105	}
1106    }
1107
1108  buffer->cur = cur;
1109  _cpp_process_line_notes (pfile, true);
1110  return false;
1111}
1112
1113/* Skip a C++ line comment, leaving buffer->cur pointing to the
1114   terminating newline.  Handles escaped newlines.  Returns nonzero
1115   if a multiline comment.  */
1116static int
1117skip_line_comment (cpp_reader *pfile)
1118{
1119  cpp_buffer *buffer = pfile->buffer;
1120  source_location orig_line = pfile->line_table->highest_line;
1121
1122  while (*buffer->cur != '\n')
1123    buffer->cur++;
1124
1125  _cpp_process_line_notes (pfile, true);
1126  return orig_line != pfile->line_table->highest_line;
1127}
1128
1129/* Skips whitespace, saving the next non-whitespace character.  */
1130static void
1131skip_whitespace (cpp_reader *pfile, cppchar_t c)
1132{
1133  cpp_buffer *buffer = pfile->buffer;
1134  bool saw_NUL = false;
1135
1136  do
1137    {
1138      /* Horizontal space always OK.  */
1139      if (c == ' ' || c == '\t')
1140	;
1141      /* Just \f \v or \0 left.  */
1142      else if (c == '\0')
1143	saw_NUL = true;
1144      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1145	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1146			     CPP_BUF_COL (buffer),
1147			     "%s in preprocessing directive",
1148			     c == '\f' ? "form feed" : "vertical tab");
1149
1150      c = *buffer->cur++;
1151    }
1152  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1153  while (is_nvspace (c));
1154
1155  if (saw_NUL)
1156    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1157
1158  buffer->cur--;
1159}
1160
1161/* See if the characters of a number token are valid in a name (no
1162   '.', '+' or '-').  */
1163static int
1164name_p (cpp_reader *pfile, const cpp_string *string)
1165{
1166  unsigned int i;
1167
1168  for (i = 0; i < string->len; i++)
1169    if (!is_idchar (string->text[i]))
1170      return 0;
1171
1172  return 1;
1173}
1174
1175/* After parsing an identifier or other sequence, produce a warning about
1176   sequences not in NFC/NFKC.  */
1177static void
1178warn_about_normalization (cpp_reader *pfile,
1179			  const cpp_token *token,
1180			  const struct normalize_state *s)
1181{
1182  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1183      && !pfile->state.skipping)
1184    {
1185      /* Make sure that the token is printed using UCNs, even
1186	 if we'd otherwise happily print UTF-8.  */
1187      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1188      size_t sz;
1189
1190      sz = cpp_spell_token (pfile, token, buf, false) - buf;
1191      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1192	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1193			       "`%.*s' is not in NFKC", (int) sz, buf);
1194      else
1195	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1196			       "`%.*s' is not in NFC", (int) sz, buf);
1197      free (buf);
1198    }
1199}
1200
1201/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1202   an identifier.  FIRST is TRUE if this starts an identifier.  */
1203static bool
1204forms_identifier_p (cpp_reader *pfile, int first,
1205		    struct normalize_state *state)
1206{
1207  cpp_buffer *buffer = pfile->buffer;
1208
1209  if (*buffer->cur == '$')
1210    {
1211      if (!CPP_OPTION (pfile, dollars_in_ident))
1212	return false;
1213
1214      buffer->cur++;
1215      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1216	{
1217	  CPP_OPTION (pfile, warn_dollars) = 0;
1218	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1219	}
1220
1221      return true;
1222    }
1223
1224  /* Is this a syntactically valid UCN?  */
1225  if (CPP_OPTION (pfile, extended_identifiers)
1226      && *buffer->cur == '\\'
1227      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1228    {
1229      buffer->cur += 2;
1230      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1231			  state))
1232	return true;
1233      buffer->cur -= 2;
1234    }
1235
1236  return false;
1237}
1238
1239/* Helper function to get the cpp_hashnode of the identifier BASE.  */
1240static cpp_hashnode *
1241lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1242{
1243  cpp_hashnode *result;
1244  const uchar *cur;
1245  unsigned int len;
1246  unsigned int hash = HT_HASHSTEP (0, *base);
1247
1248  cur = base + 1;
1249  while (ISIDNUM (*cur))
1250    {
1251      hash = HT_HASHSTEP (hash, *cur);
1252      cur++;
1253    }
1254  len = cur - base;
1255  hash = HT_HASHFINISH (hash, len);
1256  result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1257					      base, len, hash, HT_ALLOC));
1258
1259  /* Rarely, identifiers require diagnostics when lexed.  */
1260  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1261			&& !pfile->state.skipping, 0))
1262    {
1263      /* It is allowed to poison the same identifier twice.  */
1264      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1265	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1266		   NODE_NAME (result));
1267
1268      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1269	 replacement list of a variadic macro.  */
1270      if (result == pfile->spec_nodes.n__VA_ARGS__
1271	  && !pfile->state.va_args_ok)
1272	{
1273	  if (CPP_OPTION (pfile, cplusplus))
1274	    cpp_error (pfile, CPP_DL_PEDWARN,
1275		       "__VA_ARGS__ can only appear in the expansion"
1276		       " of a C++11 variadic macro");
1277	  else
1278	    cpp_error (pfile, CPP_DL_PEDWARN,
1279		       "__VA_ARGS__ can only appear in the expansion"
1280		       " of a C99 variadic macro");
1281	}
1282
1283      /* For -Wc++-compat, warn about use of C++ named operators.  */
1284      if (result->flags & NODE_WARN_OPERATOR)
1285	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1286		     "identifier \"%s\" is a special operator name in C++",
1287		     NODE_NAME (result));
1288    }
1289
1290  return result;
1291}
1292
1293/* Get the cpp_hashnode of an identifier specified by NAME in
1294   the current cpp_reader object.  If none is found, NULL is returned.  */
1295cpp_hashnode *
1296_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1297{
1298  cpp_hashnode *result;
1299  result = lex_identifier_intern (pfile, (uchar *) name);
1300  return result;
1301}
1302
1303/* Lex an identifier starting at BUFFER->CUR - 1.  */
1304static cpp_hashnode *
1305lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1306		struct normalize_state *nst, cpp_hashnode **spelling)
1307{
1308  cpp_hashnode *result;
1309  const uchar *cur;
1310  unsigned int len;
1311  unsigned int hash = HT_HASHSTEP (0, *base);
1312
1313  cur = pfile->buffer->cur;
1314  if (! starts_ucn)
1315    {
1316      while (ISIDNUM (*cur))
1317	{
1318	  hash = HT_HASHSTEP (hash, *cur);
1319	  cur++;
1320	}
1321      NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1322    }
1323  pfile->buffer->cur = cur;
1324  if (starts_ucn || forms_identifier_p (pfile, false, nst))
1325    {
1326      /* Slower version for identifiers containing UCNs (or $).  */
1327      do {
1328	while (ISIDNUM (*pfile->buffer->cur))
1329	  {
1330	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1331	    pfile->buffer->cur++;
1332	  }
1333      } while (forms_identifier_p (pfile, false, nst));
1334      result = _cpp_interpret_identifier (pfile, base,
1335					  pfile->buffer->cur - base);
1336      *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1337    }
1338  else
1339    {
1340      len = cur - base;
1341      hash = HT_HASHFINISH (hash, len);
1342
1343      result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1344						  base, len, hash, HT_ALLOC));
1345      *spelling = result;
1346    }
1347
1348  /* Rarely, identifiers require diagnostics when lexed.  */
1349  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1350			&& !pfile->state.skipping, 0))
1351    {
1352      /* It is allowed to poison the same identifier twice.  */
1353      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1354	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1355		   NODE_NAME (result));
1356
1357      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1358	 replacement list of a variadic macro.  */
1359      if (result == pfile->spec_nodes.n__VA_ARGS__
1360	  && !pfile->state.va_args_ok)
1361	{
1362	  if (CPP_OPTION (pfile, cplusplus))
1363	    cpp_error (pfile, CPP_DL_PEDWARN,
1364		       "__VA_ARGS__ can only appear in the expansion"
1365		       " of a C++11 variadic macro");
1366	  else
1367	    cpp_error (pfile, CPP_DL_PEDWARN,
1368		       "__VA_ARGS__ can only appear in the expansion"
1369		       " of a C99 variadic macro");
1370	}
1371
1372      /* For -Wc++-compat, warn about use of C++ named operators.  */
1373      if (result->flags & NODE_WARN_OPERATOR)
1374	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1375		     "identifier \"%s\" is a special operator name in C++",
1376		     NODE_NAME (result));
1377    }
1378
1379  return result;
1380}
1381
1382/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1383static void
1384lex_number (cpp_reader *pfile, cpp_string *number,
1385	    struct normalize_state *nst)
1386{
1387  const uchar *cur;
1388  const uchar *base;
1389  uchar *dest;
1390
1391  base = pfile->buffer->cur - 1;
1392  do
1393    {
1394      cur = pfile->buffer->cur;
1395
1396      /* N.B. ISIDNUM does not include $.  */
1397      while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1398	     || VALID_SIGN (*cur, cur[-1]))
1399	{
1400	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1401	  cur++;
1402	}
1403      /* A number can't end with a digit separator.  */
1404      while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1405	--cur;
1406
1407      pfile->buffer->cur = cur;
1408    }
1409  while (forms_identifier_p (pfile, false, nst));
1410
1411  number->len = cur - base;
1412  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1413  memcpy (dest, base, number->len);
1414  dest[number->len] = '\0';
1415  number->text = dest;
1416}
1417
1418/* Create a token of type TYPE with a literal spelling.  */
1419static void
1420create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1421		unsigned int len, enum cpp_ttype type)
1422{
1423  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1424
1425  memcpy (dest, base, len);
1426  dest[len] = '\0';
1427  token->type = type;
1428  token->val.str.len = len;
1429  token->val.str.text = dest;
1430}
1431
1432/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1433   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1434
1435static void
1436bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1437		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1438{
1439  _cpp_buff *first_buff = *first_buff_p;
1440  _cpp_buff *last_buff = *last_buff_p;
1441
1442  if (first_buff == NULL)
1443    first_buff = last_buff = _cpp_get_buff (pfile, len);
1444  else if (len > BUFF_ROOM (last_buff))
1445    {
1446      size_t room = BUFF_ROOM (last_buff);
1447      memcpy (BUFF_FRONT (last_buff), base, room);
1448      BUFF_FRONT (last_buff) += room;
1449      base += room;
1450      len -= room;
1451      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1452    }
1453
1454  memcpy (BUFF_FRONT (last_buff), base, len);
1455  BUFF_FRONT (last_buff) += len;
1456
1457  *first_buff_p = first_buff;
1458  *last_buff_p = last_buff;
1459}
1460
1461
1462/* Returns true if a macro has been defined.
1463   This might not work if compile with -save-temps,
1464   or preprocess separately from compilation.  */
1465
1466static bool
1467is_macro(cpp_reader *pfile, const uchar *base)
1468{
1469  const uchar *cur = base;
1470  if (! ISIDST (*cur))
1471    return false;
1472  unsigned int hash = HT_HASHSTEP (0, *cur);
1473  ++cur;
1474  while (ISIDNUM (*cur))
1475    {
1476      hash = HT_HASHSTEP (hash, *cur);
1477      ++cur;
1478    }
1479  hash = HT_HASHFINISH (hash, cur - base);
1480
1481  cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1482					base, cur - base, hash, HT_NO_INSERT));
1483
1484  return !result ? false : (result->type == NT_MACRO);
1485}
1486
1487
1488/* Lexes a raw string.  The stored string contains the spelling, including
1489   double quotes, delimiter string, '(' and ')', any leading
1490   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1491   literal, or CPP_OTHER if it was not properly terminated.
1492
1493   The spelling is NUL-terminated, but it is not guaranteed that this
1494   is the first NUL since embedded NULs are preserved.  */
1495
1496static void
1497lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1498		const uchar *cur)
1499{
1500  uchar raw_prefix[17];
1501  uchar temp_buffer[18];
1502  const uchar *orig_base;
1503  unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1504  enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1505  raw_str_phase phase = RAW_STR_PREFIX;
1506  enum cpp_ttype type;
1507  size_t total_len = 0;
1508  /* Index into temp_buffer during phases other than RAW_STR,
1509     during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1510     be appended to temp_buffer.  */
1511  size_t temp_buffer_len = 0;
1512  _cpp_buff *first_buff = NULL, *last_buff = NULL;
1513  size_t raw_prefix_start;
1514  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1515
1516  type = (*base == 'L' ? CPP_WSTRING :
1517	  *base == 'U' ? CPP_STRING32 :
1518	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1519	  : CPP_STRING);
1520
1521#define BUF_APPEND(STR,LEN)					\
1522      do {							\
1523	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1524			&first_buff, &last_buff);		\
1525	total_len += (LEN);					\
1526	if (__builtin_expect (temp_buffer_len < 17, 0)		\
1527	    && (const uchar *)(STR) != base			\
1528	    && (LEN) <= 2)					\
1529	  {							\
1530	    memcpy (temp_buffer + temp_buffer_len,		\
1531		    (const uchar *)(STR), (LEN));		\
1532	    temp_buffer_len += (LEN);				\
1533	  }							\
1534      } while (0);
1535
1536  orig_base = base;
1537  ++cur;
1538  raw_prefix_start = cur - base;
1539  for (;;)
1540    {
1541      cppchar_t c;
1542
1543      /* If we previously performed any trigraph or line splicing
1544	 transformations, undo them in between the opening and closing
1545	 double quote.  */
1546      while (note->pos < cur)
1547	++note;
1548      for (; note->pos == cur; ++note)
1549	{
1550	  switch (note->type)
1551	    {
1552	    case '\\':
1553	    case ' ':
1554	      /* Restore backslash followed by newline.  */
1555	      BUF_APPEND (base, cur - base);
1556	      base = cur;
1557	      BUF_APPEND ("\\", 1);
1558	    after_backslash:
1559	      if (note->type == ' ')
1560		{
1561		  /* GNU backslash whitespace newline extension.  FIXME
1562		     could be any sequence of non-vertical space.  When we
1563		     can properly restore any such sequence, we should mark
1564		     this note as handled so _cpp_process_line_notes
1565		     doesn't warn.  */
1566		  BUF_APPEND (" ", 1);
1567		}
1568
1569	      BUF_APPEND ("\n", 1);
1570	      break;
1571
1572	    case 0:
1573	      /* Already handled.  */
1574	      break;
1575
1576	    default:
1577	      if (_cpp_trigraph_map[note->type])
1578		{
1579		  /* Don't warn about this trigraph in
1580		     _cpp_process_line_notes, since trigraphs show up as
1581		     trigraphs in raw strings.  */
1582		  uchar type = note->type;
1583		  note->type = 0;
1584
1585		  if (!CPP_OPTION (pfile, trigraphs))
1586		    /* If we didn't convert the trigraph in the first
1587		       place, don't do anything now either.  */
1588		    break;
1589
1590		  BUF_APPEND (base, cur - base);
1591		  base = cur;
1592		  BUF_APPEND ("??", 2);
1593
1594		  /* ??/ followed by newline gets two line notes, one for
1595		     the trigraph and one for the backslash/newline.  */
1596		  if (type == '/' && note[1].pos == cur)
1597		    {
1598		      if (note[1].type != '\\'
1599			  && note[1].type != ' ')
1600			abort ();
1601		      BUF_APPEND ("/", 1);
1602		      ++note;
1603		      goto after_backslash;
1604		    }
1605		  else
1606		    {
1607		      /* Skip the replacement character.  */
1608		      base = ++cur;
1609		      BUF_APPEND (&type, 1);
1610		      c = type;
1611		      goto check_c;
1612		    }
1613		}
1614	      else
1615		abort ();
1616	      break;
1617	    }
1618	}
1619      c = *cur++;
1620      if (__builtin_expect (temp_buffer_len < 17, 0))
1621	temp_buffer[temp_buffer_len++] = c;
1622
1623     check_c:
1624      if (phase == RAW_STR_PREFIX)
1625	{
1626	  while (raw_prefix_len < temp_buffer_len)
1627	    {
1628	      raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1629	      switch (raw_prefix[raw_prefix_len])
1630		{
1631		case ' ': case '(': case ')': case '\\': case '\t':
1632		case '\v': case '\f': case '\n': default:
1633		  break;
1634		/* Basic source charset except the above chars.  */
1635		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1636		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1637		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1638		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1639		case 'y': case 'z':
1640		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1641		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1642		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1643		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1644		case 'Y': case 'Z':
1645		case '0': case '1': case '2': case '3': case '4': case '5':
1646		case '6': case '7': case '8': case '9':
1647		case '_': case '{': case '}': case '#': case '[': case ']':
1648		case '<': case '>': case '%': case ':': case ';': case '.':
1649		case '?': case '*': case '+': case '-': case '/': case '^':
1650		case '&': case '|': case '~': case '!': case '=': case ',':
1651		case '"': case '\'':
1652		  if (raw_prefix_len < 16)
1653		    {
1654		      raw_prefix_len++;
1655		      continue;
1656		    }
1657		  break;
1658		}
1659
1660	      if (raw_prefix[raw_prefix_len] != '(')
1661		{
1662		  int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1663		  if (raw_prefix_len == 16)
1664		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1665					 col, "raw string delimiter longer "
1666					      "than 16 characters");
1667		  else if (raw_prefix[raw_prefix_len] == '\n')
1668		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1669					 col, "invalid new-line in raw "
1670					      "string delimiter");
1671		  else
1672		    cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1673					 col, "invalid character '%c' in "
1674					      "raw string delimiter",
1675					 (int) raw_prefix[raw_prefix_len]);
1676		  pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1677		  create_literal (pfile, token, orig_base,
1678				  raw_prefix_start - 1, CPP_OTHER);
1679		  if (first_buff)
1680		    _cpp_release_buff (pfile, first_buff);
1681		  return;
1682		}
1683	      raw_prefix[raw_prefix_len] = '"';
1684	      phase = RAW_STR;
1685	      /* Nothing should be appended to temp_buffer during
1686		 RAW_STR phase.  */
1687	      temp_buffer_len = 17;
1688	      break;
1689	    }
1690	  continue;
1691	}
1692      else if (phase == RAW_STR_SUFFIX)
1693	{
1694	  while (raw_suffix_len <= raw_prefix_len
1695		 && raw_suffix_len < temp_buffer_len
1696		 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1697	    raw_suffix_len++;
1698	  if (raw_suffix_len > raw_prefix_len)
1699	    break;
1700	  if (raw_suffix_len == temp_buffer_len)
1701	    continue;
1702	  phase = RAW_STR;
1703	  /* Nothing should be appended to temp_buffer during
1704	     RAW_STR phase.  */
1705	  temp_buffer_len = 17;
1706	}
1707      if (c == ')')
1708	{
1709	  phase = RAW_STR_SUFFIX;
1710	  raw_suffix_len = 0;
1711	  temp_buffer_len = 0;
1712	}
1713      else if (c == '\n')
1714	{
1715	  if (pfile->state.in_directive
1716	      || (pfile->state.parsing_args
1717		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
1718	    {
1719	      cur--;
1720	      type = CPP_OTHER;
1721	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1722				   "unterminated raw string");
1723	      break;
1724	    }
1725
1726	  BUF_APPEND (base, cur - base);
1727
1728	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1729	    CPP_INCREMENT_LINE (pfile, 0);
1730	  pfile->buffer->need_line = true;
1731
1732	  pfile->buffer->cur = cur-1;
1733	  _cpp_process_line_notes (pfile, false);
1734	  if (!_cpp_get_fresh_line (pfile))
1735	    {
1736	      source_location src_loc = token->src_loc;
1737	      token->type = CPP_EOF;
1738	      /* Tell the compiler the line number of the EOF token.  */
1739	      token->src_loc = pfile->line_table->highest_line;
1740	      token->flags = BOL;
1741	      if (first_buff != NULL)
1742		_cpp_release_buff (pfile, first_buff);
1743	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1744				   "unterminated raw string");
1745	      return;
1746	    }
1747
1748	  cur = base = pfile->buffer->cur;
1749	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1750	}
1751    }
1752
1753  if (CPP_OPTION (pfile, user_literals))
1754    {
1755      /* If a string format macro, say from inttypes.h, is placed touching
1756	 a string literal it could be parsed as a C++11 user-defined string
1757	 literal thus breaking the program.
1758	 Try to identify macros with is_macro. A warning is issued. */
1759      if (is_macro (pfile, cur))
1760	{
1761	  /* Raise a warning, but do not consume subsequent tokens.  */
1762	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1763	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1764				   token->src_loc, 0,
1765				   "invalid suffix on literal; C++11 requires "
1766				   "a space between literal and string macro");
1767	}
1768      /* Grab user defined literal suffix.  */
1769      else if (ISIDST (*cur))
1770	{
1771	  type = cpp_userdef_string_add_type (type);
1772	  ++cur;
1773
1774	  while (ISIDNUM (*cur))
1775	    ++cur;
1776	}
1777    }
1778
1779  pfile->buffer->cur = cur;
1780  if (first_buff == NULL)
1781    create_literal (pfile, token, base, cur - base, type);
1782  else
1783    {
1784      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1785
1786      token->type = type;
1787      token->val.str.len = total_len + (cur - base);
1788      token->val.str.text = dest;
1789      last_buff = first_buff;
1790      while (last_buff != NULL)
1791	{
1792	  memcpy (dest, last_buff->base,
1793		  BUFF_FRONT (last_buff) - last_buff->base);
1794	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1795	  last_buff = last_buff->next;
1796	}
1797      _cpp_release_buff (pfile, first_buff);
1798      memcpy (dest, base, cur - base);
1799      dest[cur - base] = '\0';
1800    }
1801}
1802
1803/* Lexes a string, character constant, or angle-bracketed header file
1804   name.  The stored string contains the spelling, including opening
1805   quote and any leading 'L', 'u', 'U' or 'u8' and optional
1806   'R' modifier.  It returns the type of the literal, or CPP_OTHER
1807   if it was not properly terminated, or CPP_LESS for an unterminated
1808   header name which must be relexed as normal tokens.
1809
1810   The spelling is NUL-terminated, but it is not guaranteed that this
1811   is the first NUL since embedded NULs are preserved.  */
1812static void
1813lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1814{
1815  bool saw_NUL = false;
1816  const uchar *cur;
1817  cppchar_t terminator;
1818  enum cpp_ttype type;
1819
1820  cur = base;
1821  terminator = *cur++;
1822  if (terminator == 'L' || terminator == 'U')
1823    terminator = *cur++;
1824  else if (terminator == 'u')
1825    {
1826      terminator = *cur++;
1827      if (terminator == '8')
1828	terminator = *cur++;
1829    }
1830  if (terminator == 'R')
1831    {
1832      lex_raw_string (pfile, token, base, cur);
1833      return;
1834    }
1835  if (terminator == '"')
1836    type = (*base == 'L' ? CPP_WSTRING :
1837	    *base == 'U' ? CPP_STRING32 :
1838	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1839			 : CPP_STRING);
1840  else if (terminator == '\'')
1841    type = (*base == 'L' ? CPP_WCHAR :
1842	    *base == 'U' ? CPP_CHAR32 :
1843	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1844  else
1845    terminator = '>', type = CPP_HEADER_NAME;
1846
1847  for (;;)
1848    {
1849      cppchar_t c = *cur++;
1850
1851      /* In #include-style directives, terminators are not escapable.  */
1852      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1853	cur++;
1854      else if (c == terminator)
1855	break;
1856      else if (c == '\n')
1857	{
1858	  cur--;
1859	  /* Unmatched quotes always yield undefined behavior, but
1860	     greedy lexing means that what appears to be an unterminated
1861	     header name may actually be a legitimate sequence of tokens.  */
1862	  if (terminator == '>')
1863	    {
1864	      token->type = CPP_LESS;
1865	      return;
1866	    }
1867	  type = CPP_OTHER;
1868	  break;
1869	}
1870      else if (c == '\0')
1871	saw_NUL = true;
1872    }
1873
1874  if (saw_NUL && !pfile->state.skipping)
1875    cpp_error (pfile, CPP_DL_WARNING,
1876	       "null character(s) preserved in literal");
1877
1878  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1879    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1880	       (int) terminator);
1881
1882  if (CPP_OPTION (pfile, user_literals))
1883    {
1884      /* If a string format macro, say from inttypes.h, is placed touching
1885	 a string literal it could be parsed as a C++11 user-defined string
1886	 literal thus breaking the program.
1887	 Try to identify macros with is_macro. A warning is issued. */
1888      if (is_macro (pfile, cur))
1889	{
1890	  /* Raise a warning, but do not consume subsequent tokens.  */
1891	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1892	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1893				   token->src_loc, 0,
1894				   "invalid suffix on literal; C++11 requires "
1895				   "a space between literal and string macro");
1896	}
1897      /* Grab user defined literal suffix.  */
1898      else if (ISIDST (*cur))
1899	{
1900	  type = cpp_userdef_char_add_type (type);
1901	  type = cpp_userdef_string_add_type (type);
1902          ++cur;
1903
1904	  while (ISIDNUM (*cur))
1905	    ++cur;
1906	}
1907    }
1908
1909  pfile->buffer->cur = cur;
1910  create_literal (pfile, token, base, cur - base, type);
1911}
1912
1913/* Return the comment table. The client may not make any assumption
1914   about the ordering of the table.  */
1915cpp_comment_table *
1916cpp_get_comments (cpp_reader *pfile)
1917{
1918  return &pfile->comments;
1919}
1920
1921/* Append a comment to the end of the comment table. */
1922static void
1923store_comment (cpp_reader *pfile, cpp_token *token)
1924{
1925  int len;
1926
1927  if (pfile->comments.allocated == 0)
1928    {
1929      pfile->comments.allocated = 256;
1930      pfile->comments.entries = (cpp_comment *) xmalloc
1931	(pfile->comments.allocated * sizeof (cpp_comment));
1932    }
1933
1934  if (pfile->comments.count == pfile->comments.allocated)
1935    {
1936      pfile->comments.allocated *= 2;
1937      pfile->comments.entries = (cpp_comment *) xrealloc
1938	(pfile->comments.entries,
1939	 pfile->comments.allocated * sizeof (cpp_comment));
1940    }
1941
1942  len = token->val.str.len;
1943
1944  /* Copy comment. Note, token may not be NULL terminated. */
1945  pfile->comments.entries[pfile->comments.count].comment =
1946    (char *) xmalloc (sizeof (char) * (len + 1));
1947  memcpy (pfile->comments.entries[pfile->comments.count].comment,
1948	  token->val.str.text, len);
1949  pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1950
1951  /* Set source location. */
1952  pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1953
1954  /* Increment the count of entries in the comment table. */
1955  pfile->comments.count++;
1956}
1957
1958/* The stored comment includes the comment start and any terminator.  */
1959static void
1960save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1961	      cppchar_t type)
1962{
1963  unsigned char *buffer;
1964  unsigned int len, clen, i;
1965
1966  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1967
1968  /* C++ comments probably (not definitely) have moved past a new
1969     line, which we don't want to save in the comment.  */
1970  if (is_vspace (pfile->buffer->cur[-1]))
1971    len--;
1972
1973  /* If we are currently in a directive or in argument parsing, then
1974     we need to store all C++ comments as C comments internally, and
1975     so we need to allocate a little extra space in that case.
1976
1977     Note that the only time we encounter a directive here is
1978     when we are saving comments in a "#define".  */
1979  clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1980	  && type == '/') ? len + 2 : len;
1981
1982  buffer = _cpp_unaligned_alloc (pfile, clen);
1983
1984  token->type = CPP_COMMENT;
1985  token->val.str.len = clen;
1986  token->val.str.text = buffer;
1987
1988  buffer[0] = '/';
1989  memcpy (buffer + 1, from, len - 1);
1990
1991  /* Finish conversion to a C comment, if necessary.  */
1992  if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1993    {
1994      buffer[1] = '*';
1995      buffer[clen - 2] = '*';
1996      buffer[clen - 1] = '/';
1997      /* As there can be in a C++ comments illegal sequences for C comments
1998         we need to filter them out.  */
1999      for (i = 2; i < (clen - 2); i++)
2000        if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2001          buffer[i] = '|';
2002    }
2003
2004  /* Finally store this comment for use by clients of libcpp. */
2005  store_comment (pfile, token);
2006}
2007
2008/* Allocate COUNT tokens for RUN.  */
2009void
2010_cpp_init_tokenrun (tokenrun *run, unsigned int count)
2011{
2012  run->base = XNEWVEC (cpp_token, count);
2013  run->limit = run->base + count;
2014  run->next = NULL;
2015}
2016
2017/* Returns the next tokenrun, or creates one if there is none.  */
2018static tokenrun *
2019next_tokenrun (tokenrun *run)
2020{
2021  if (run->next == NULL)
2022    {
2023      run->next = XNEW (tokenrun);
2024      run->next->prev = run;
2025      _cpp_init_tokenrun (run->next, 250);
2026    }
2027
2028  return run->next;
2029}
2030
2031/* Return the number of not yet processed token in a given
2032   context.  */
2033int
2034_cpp_remaining_tokens_num_in_context (cpp_context *context)
2035{
2036  if (context->tokens_kind == TOKENS_KIND_DIRECT)
2037    return (LAST (context).token - FIRST (context).token);
2038  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2039	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2040    return (LAST (context).ptoken - FIRST (context).ptoken);
2041  else
2042      abort ();
2043}
2044
2045/* Returns the token present at index INDEX in a given context.  If
2046   INDEX is zero, the next token to be processed is returned.  */
2047static const cpp_token*
2048_cpp_token_from_context_at (cpp_context *context, int index)
2049{
2050  if (context->tokens_kind == TOKENS_KIND_DIRECT)
2051    return &(FIRST (context).token[index]);
2052  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2053	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
2054    return FIRST (context).ptoken[index];
2055 else
2056   abort ();
2057}
2058
2059/* Look ahead in the input stream.  */
2060const cpp_token *
2061cpp_peek_token (cpp_reader *pfile, int index)
2062{
2063  cpp_context *context = pfile->context;
2064  const cpp_token *peektok;
2065  int count;
2066
2067  /* First, scan through any pending cpp_context objects.  */
2068  while (context->prev)
2069    {
2070      ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2071
2072      if (index < (int) sz)
2073        return _cpp_token_from_context_at (context, index);
2074      index -= (int) sz;
2075      context = context->prev;
2076    }
2077
2078  /* We will have to read some new tokens after all (and do so
2079     without invalidating preceding tokens).  */
2080  count = index;
2081  pfile->keep_tokens++;
2082
2083  /* For peeked tokens temporarily disable line_change reporting,
2084     until the tokens are parsed for real.  */
2085  void (*line_change) (cpp_reader *, const cpp_token *, int)
2086    = pfile->cb.line_change;
2087  pfile->cb.line_change = NULL;
2088
2089  do
2090    {
2091      peektok = _cpp_lex_token (pfile);
2092      if (peektok->type == CPP_EOF)
2093	{
2094	  index--;
2095	  break;
2096	}
2097    }
2098  while (index--);
2099
2100  _cpp_backup_tokens_direct (pfile, count - index);
2101  pfile->keep_tokens--;
2102  pfile->cb.line_change = line_change;
2103
2104  return peektok;
2105}
2106
2107/* Allocate a single token that is invalidated at the same time as the
2108   rest of the tokens on the line.  Has its line and col set to the
2109   same as the last lexed token, so that diagnostics appear in the
2110   right place.  */
2111cpp_token *
2112_cpp_temp_token (cpp_reader *pfile)
2113{
2114  cpp_token *old, *result;
2115  ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2116  ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2117
2118  old = pfile->cur_token - 1;
2119  /* Any pre-existing lookaheads must not be clobbered.  */
2120  if (la)
2121    {
2122      if (sz <= la)
2123        {
2124          tokenrun *next = next_tokenrun (pfile->cur_run);
2125
2126          if (sz < la)
2127            memmove (next->base + 1, next->base,
2128                     (la - sz) * sizeof (cpp_token));
2129
2130          next->base[0] = pfile->cur_run->limit[-1];
2131        }
2132
2133      if (sz > 1)
2134        memmove (pfile->cur_token + 1, pfile->cur_token,
2135                 MIN (la, sz - 1) * sizeof (cpp_token));
2136    }
2137
2138  if (!sz && pfile->cur_token == pfile->cur_run->limit)
2139    {
2140      pfile->cur_run = next_tokenrun (pfile->cur_run);
2141      pfile->cur_token = pfile->cur_run->base;
2142    }
2143
2144  result = pfile->cur_token++;
2145  result->src_loc = old->src_loc;
2146  return result;
2147}
2148
2149/* Lex a token into RESULT (external interface).  Takes care of issues
2150   like directive handling, token lookahead, multiple include
2151   optimization and skipping.  */
2152const cpp_token *
2153_cpp_lex_token (cpp_reader *pfile)
2154{
2155  cpp_token *result;
2156
2157  for (;;)
2158    {
2159      if (pfile->cur_token == pfile->cur_run->limit)
2160	{
2161	  pfile->cur_run = next_tokenrun (pfile->cur_run);
2162	  pfile->cur_token = pfile->cur_run->base;
2163	}
2164      /* We assume that the current token is somewhere in the current
2165	 run.  */
2166      if (pfile->cur_token < pfile->cur_run->base
2167	  || pfile->cur_token >= pfile->cur_run->limit)
2168	abort ();
2169
2170      if (pfile->lookaheads)
2171	{
2172	  pfile->lookaheads--;
2173	  result = pfile->cur_token++;
2174	}
2175      else
2176	result = _cpp_lex_direct (pfile);
2177
2178      if (result->flags & BOL)
2179	{
2180	  /* Is this a directive.  If _cpp_handle_directive returns
2181	     false, it is an assembler #.  */
2182	  if (result->type == CPP_HASH
2183	      /* 6.10.3 p 11: Directives in a list of macro arguments
2184		 gives undefined behavior.  This implementation
2185		 handles the directive as normal.  */
2186	      && pfile->state.parsing_args != 1)
2187	    {
2188	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2189		{
2190		  if (pfile->directive_result.type == CPP_PADDING)
2191		    continue;
2192		  result = &pfile->directive_result;
2193		}
2194	    }
2195	  else if (pfile->state.in_deferred_pragma)
2196	    result = &pfile->directive_result;
2197
2198	  if (pfile->cb.line_change && !pfile->state.skipping)
2199	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2200	}
2201
2202      /* We don't skip tokens in directives.  */
2203      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2204	break;
2205
2206      /* Outside a directive, invalidate controlling macros.  At file
2207	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2208	 get here and MI optimization works.  */
2209      pfile->mi_valid = false;
2210
2211      if (!pfile->state.skipping || result->type == CPP_EOF)
2212	break;
2213    }
2214
2215  return result;
2216}
2217
2218/* Returns true if a fresh line has been loaded.  */
2219bool
2220_cpp_get_fresh_line (cpp_reader *pfile)
2221{
2222  int return_at_eof;
2223
2224  /* We can't get a new line until we leave the current directive.  */
2225  if (pfile->state.in_directive)
2226    return false;
2227
2228  for (;;)
2229    {
2230      cpp_buffer *buffer = pfile->buffer;
2231
2232      if (!buffer->need_line)
2233	return true;
2234
2235      if (buffer->next_line < buffer->rlimit)
2236	{
2237	  _cpp_clean_line (pfile);
2238	  return true;
2239	}
2240
2241      /* First, get out of parsing arguments state.  */
2242      if (pfile->state.parsing_args)
2243	return false;
2244
2245      /* End of buffer.  Non-empty files should end in a newline.  */
2246      if (buffer->buf != buffer->rlimit
2247	  && buffer->next_line > buffer->rlimit
2248	  && !buffer->from_stage3)
2249	{
2250	  /* Clip to buffer size.  */
2251	  buffer->next_line = buffer->rlimit;
2252	}
2253
2254      return_at_eof = buffer->return_at_eof;
2255      _cpp_pop_buffer (pfile);
2256      if (pfile->buffer == NULL || return_at_eof)
2257	return false;
2258    }
2259}
2260
2261#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
2262  do							\
2263    {							\
2264      result->type = ELSE_TYPE;				\
2265      if (*buffer->cur == CHAR)				\
2266	buffer->cur++, result->type = THEN_TYPE;	\
2267    }							\
2268  while (0)
2269
2270/* Lex a token into pfile->cur_token, which is also incremented, to
2271   get diagnostics pointing to the correct location.
2272
2273   Does not handle issues such as token lookahead, multiple-include
2274   optimization, directives, skipping etc.  This function is only
2275   suitable for use by _cpp_lex_token, and in special cases like
2276   lex_expansion_token which doesn't care for any of these issues.
2277
2278   When meeting a newline, returns CPP_EOF if parsing a directive,
2279   otherwise returns to the start of the token buffer if permissible.
2280   Returns the location of the lexed token.  */
2281cpp_token *
2282_cpp_lex_direct (cpp_reader *pfile)
2283{
2284  cppchar_t c;
2285  cpp_buffer *buffer;
2286  const unsigned char *comment_start;
2287  cpp_token *result = pfile->cur_token++;
2288
2289 fresh_line:
2290  result->flags = 0;
2291  buffer = pfile->buffer;
2292  if (buffer->need_line)
2293    {
2294      if (pfile->state.in_deferred_pragma)
2295	{
2296	  result->type = CPP_PRAGMA_EOL;
2297	  pfile->state.in_deferred_pragma = false;
2298	  if (!pfile->state.pragma_allow_expansion)
2299	    pfile->state.prevent_expansion--;
2300	  return result;
2301	}
2302      if (!_cpp_get_fresh_line (pfile))
2303	{
2304	  result->type = CPP_EOF;
2305	  if (!pfile->state.in_directive)
2306	    {
2307	      /* Tell the compiler the line number of the EOF token.  */
2308	      result->src_loc = pfile->line_table->highest_line;
2309	      result->flags = BOL;
2310	    }
2311	  return result;
2312	}
2313      if (!pfile->keep_tokens)
2314	{
2315	  pfile->cur_run = &pfile->base_run;
2316	  result = pfile->base_run.base;
2317	  pfile->cur_token = result + 1;
2318	}
2319      result->flags = BOL;
2320      if (pfile->state.parsing_args == 2)
2321	result->flags |= PREV_WHITE;
2322    }
2323  buffer = pfile->buffer;
2324 update_tokens_line:
2325  result->src_loc = pfile->line_table->highest_line;
2326
2327 skipped_white:
2328  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2329      && !pfile->overlaid_buffer)
2330    {
2331      _cpp_process_line_notes (pfile, false);
2332      result->src_loc = pfile->line_table->highest_line;
2333    }
2334  c = *buffer->cur++;
2335
2336  if (pfile->forced_token_location_p)
2337    result->src_loc = *pfile->forced_token_location_p;
2338  else
2339    result->src_loc = linemap_position_for_column (pfile->line_table,
2340					  CPP_BUF_COLUMN (buffer, buffer->cur));
2341
2342  switch (c)
2343    {
2344    case ' ': case '\t': case '\f': case '\v': case '\0':
2345      result->flags |= PREV_WHITE;
2346      skip_whitespace (pfile, c);
2347      goto skipped_white;
2348
2349    case '\n':
2350      if (buffer->cur < buffer->rlimit)
2351	CPP_INCREMENT_LINE (pfile, 0);
2352      buffer->need_line = true;
2353      goto fresh_line;
2354
2355    case '0': case '1': case '2': case '3': case '4':
2356    case '5': case '6': case '7': case '8': case '9':
2357      {
2358	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2359	result->type = CPP_NUMBER;
2360	lex_number (pfile, &result->val.str, &nst);
2361	warn_about_normalization (pfile, result, &nst);
2362	break;
2363      }
2364
2365    case 'L':
2366    case 'u':
2367    case 'U':
2368    case 'R':
2369      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2370	 wide strings or raw strings.  */
2371      if (c == 'L' || CPP_OPTION (pfile, rliterals)
2372	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2373	{
2374	  if ((*buffer->cur == '\'' && c != 'R')
2375	      || *buffer->cur == '"'
2376	      || (*buffer->cur == 'R'
2377		  && c != 'R'
2378		  && buffer->cur[1] == '"'
2379		  && CPP_OPTION (pfile, rliterals))
2380	      || (*buffer->cur == '8'
2381		  && c == 'u'
2382		  && (buffer->cur[1] == '"'
2383		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2384			  && CPP_OPTION (pfile, rliterals)))))
2385	    {
2386	      lex_string (pfile, result, buffer->cur - 1);
2387	      break;
2388	    }
2389	}
2390      /* Fall through.  */
2391
2392    case '_':
2393    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2394    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2395    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2396    case 's': case 't':           case 'v': case 'w': case 'x':
2397    case 'y': case 'z':
2398    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2399    case 'G': case 'H': case 'I': case 'J': case 'K':
2400    case 'M': case 'N': case 'O': case 'P': case 'Q':
2401    case 'S': case 'T':           case 'V': case 'W': case 'X':
2402    case 'Y': case 'Z':
2403      result->type = CPP_NAME;
2404      {
2405	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2406	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2407						&nst,
2408						&result->val.node.spelling);
2409	warn_about_normalization (pfile, result, &nst);
2410      }
2411
2412      /* Convert named operators to their proper types.  */
2413      if (result->val.node.node->flags & NODE_OPERATOR)
2414	{
2415	  result->flags |= NAMED_OP;
2416	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2417	}
2418      break;
2419
2420    case '\'':
2421    case '"':
2422      lex_string (pfile, result, buffer->cur - 1);
2423      break;
2424
2425    case '/':
2426      /* A potential block or line comment.  */
2427      comment_start = buffer->cur;
2428      c = *buffer->cur;
2429
2430      if (c == '*')
2431	{
2432	  if (_cpp_skip_block_comment (pfile))
2433	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2434	}
2435      else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2436	{
2437	  /* Don't warn for system headers.  */
2438	  if (cpp_in_system_header (pfile))
2439	    ;
2440	  /* Warn about comments if pedantically GNUC89, and not
2441	     in system headers.  */
2442	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2443		   && CPP_PEDANTIC (pfile)
2444		   && ! buffer->warned_cplusplus_comments)
2445	    {
2446	      cpp_error (pfile, CPP_DL_PEDWARN,
2447			 "C++ style comments are not allowed in ISO C90");
2448	      cpp_error (pfile, CPP_DL_PEDWARN,
2449			 "(this will be reported only once per input file)");
2450	      buffer->warned_cplusplus_comments = 1;
2451	    }
2452	  /* Or if specifically desired via -Wc90-c99-compat.  */
2453	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2454		   && ! CPP_OPTION (pfile, cplusplus)
2455		   && ! buffer->warned_cplusplus_comments)
2456	    {
2457	      cpp_error (pfile, CPP_DL_WARNING,
2458			 "C++ style comments are incompatible with C90");
2459	      cpp_error (pfile, CPP_DL_WARNING,
2460			 "(this will be reported only once per input file)");
2461	      buffer->warned_cplusplus_comments = 1;
2462	    }
2463	  /* In C89/C94, C++ style comments are forbidden.  */
2464	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2465		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
2466	    {
2467	      /* But don't be confused about valid code such as
2468	         - // immediately followed by *,
2469		 - // in a preprocessing directive,
2470		 - // in an #if 0 block.  */
2471	      if (buffer->cur[1] == '*'
2472		  || pfile->state.in_directive
2473		  || pfile->state.skipping)
2474		{
2475		  result->type = CPP_DIV;
2476		  break;
2477		}
2478	      else if (! buffer->warned_cplusplus_comments)
2479		{
2480		  cpp_error (pfile, CPP_DL_ERROR,
2481			     "C++ style comments are not allowed in ISO C90");
2482		  cpp_error (pfile, CPP_DL_ERROR,
2483			     "(this will be reported only once per input "
2484			     "file)");
2485		  buffer->warned_cplusplus_comments = 1;
2486		}
2487	    }
2488	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2489	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2490	}
2491      else if (c == '=')
2492	{
2493	  buffer->cur++;
2494	  result->type = CPP_DIV_EQ;
2495	  break;
2496	}
2497      else
2498	{
2499	  result->type = CPP_DIV;
2500	  break;
2501	}
2502
2503      if (!pfile->state.save_comments)
2504	{
2505	  result->flags |= PREV_WHITE;
2506	  goto update_tokens_line;
2507	}
2508
2509      /* Save the comment as a token in its own right.  */
2510      save_comment (pfile, result, comment_start, c);
2511      break;
2512
2513    case '<':
2514      if (pfile->state.angled_headers)
2515	{
2516	  lex_string (pfile, result, buffer->cur - 1);
2517	  if (result->type != CPP_LESS)
2518	    break;
2519	}
2520
2521      result->type = CPP_LESS;
2522      if (*buffer->cur == '=')
2523	buffer->cur++, result->type = CPP_LESS_EQ;
2524      else if (*buffer->cur == '<')
2525	{
2526	  buffer->cur++;
2527	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2528	}
2529      else if (CPP_OPTION (pfile, digraphs))
2530	{
2531	  if (*buffer->cur == ':')
2532	    {
2533	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2534		 three characters are <:: and the subsequent character
2535		 is neither : nor >, the < is treated as a preprocessor
2536		 token by itself".  */
2537	      if (CPP_OPTION (pfile, cplusplus)
2538		  && CPP_OPTION (pfile, lang) != CLK_CXX98
2539		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2540		  && buffer->cur[1] == ':'
2541		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2542		break;
2543
2544	      buffer->cur++;
2545	      result->flags |= DIGRAPH;
2546	      result->type = CPP_OPEN_SQUARE;
2547	    }
2548	  else if (*buffer->cur == '%')
2549	    {
2550	      buffer->cur++;
2551	      result->flags |= DIGRAPH;
2552	      result->type = CPP_OPEN_BRACE;
2553	    }
2554	}
2555      break;
2556
2557    case '>':
2558      result->type = CPP_GREATER;
2559      if (*buffer->cur == '=')
2560	buffer->cur++, result->type = CPP_GREATER_EQ;
2561      else if (*buffer->cur == '>')
2562	{
2563	  buffer->cur++;
2564	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2565	}
2566      break;
2567
2568    case '%':
2569      result->type = CPP_MOD;
2570      if (*buffer->cur == '=')
2571	buffer->cur++, result->type = CPP_MOD_EQ;
2572      else if (CPP_OPTION (pfile, digraphs))
2573	{
2574	  if (*buffer->cur == ':')
2575	    {
2576	      buffer->cur++;
2577	      result->flags |= DIGRAPH;
2578	      result->type = CPP_HASH;
2579	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
2580		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2581	    }
2582	  else if (*buffer->cur == '>')
2583	    {
2584	      buffer->cur++;
2585	      result->flags |= DIGRAPH;
2586	      result->type = CPP_CLOSE_BRACE;
2587	    }
2588	}
2589      break;
2590
2591    case '.':
2592      result->type = CPP_DOT;
2593      if (ISDIGIT (*buffer->cur))
2594	{
2595	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2596	  result->type = CPP_NUMBER;
2597	  lex_number (pfile, &result->val.str, &nst);
2598	  warn_about_normalization (pfile, result, &nst);
2599	}
2600      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2601	buffer->cur += 2, result->type = CPP_ELLIPSIS;
2602      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2603	buffer->cur++, result->type = CPP_DOT_STAR;
2604      break;
2605
2606    case '+':
2607      result->type = CPP_PLUS;
2608      if (*buffer->cur == '+')
2609	buffer->cur++, result->type = CPP_PLUS_PLUS;
2610      else if (*buffer->cur == '=')
2611	buffer->cur++, result->type = CPP_PLUS_EQ;
2612      break;
2613
2614    case '-':
2615      result->type = CPP_MINUS;
2616      if (*buffer->cur == '>')
2617	{
2618	  buffer->cur++;
2619	  result->type = CPP_DEREF;
2620	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2621	    buffer->cur++, result->type = CPP_DEREF_STAR;
2622	}
2623      else if (*buffer->cur == '-')
2624	buffer->cur++, result->type = CPP_MINUS_MINUS;
2625      else if (*buffer->cur == '=')
2626	buffer->cur++, result->type = CPP_MINUS_EQ;
2627      break;
2628
2629    case '&':
2630      result->type = CPP_AND;
2631      if (*buffer->cur == '&')
2632	buffer->cur++, result->type = CPP_AND_AND;
2633      else if (*buffer->cur == '=')
2634	buffer->cur++, result->type = CPP_AND_EQ;
2635      break;
2636
2637    case '|':
2638      result->type = CPP_OR;
2639      if (*buffer->cur == '|')
2640	buffer->cur++, result->type = CPP_OR_OR;
2641      else if (*buffer->cur == '=')
2642	buffer->cur++, result->type = CPP_OR_EQ;
2643      break;
2644
2645    case ':':
2646      result->type = CPP_COLON;
2647      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2648	buffer->cur++, result->type = CPP_SCOPE;
2649      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2650	{
2651	  buffer->cur++;
2652	  result->flags |= DIGRAPH;
2653	  result->type = CPP_CLOSE_SQUARE;
2654	}
2655      break;
2656
2657    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2658    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2659    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2660    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2661    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2662
2663    case '?': result->type = CPP_QUERY; break;
2664    case '~': result->type = CPP_COMPL; break;
2665    case ',': result->type = CPP_COMMA; break;
2666    case '(': result->type = CPP_OPEN_PAREN; break;
2667    case ')': result->type = CPP_CLOSE_PAREN; break;
2668    case '[': result->type = CPP_OPEN_SQUARE; break;
2669    case ']': result->type = CPP_CLOSE_SQUARE; break;
2670    case '{': result->type = CPP_OPEN_BRACE; break;
2671    case '}': result->type = CPP_CLOSE_BRACE; break;
2672    case ';': result->type = CPP_SEMICOLON; break;
2673
2674      /* @ is a punctuator in Objective-C.  */
2675    case '@': result->type = CPP_ATSIGN; break;
2676
2677    case '$':
2678    case '\\':
2679      {
2680	const uchar *base = --buffer->cur;
2681	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2682
2683	if (forms_identifier_p (pfile, true, &nst))
2684	  {
2685	    result->type = CPP_NAME;
2686	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
2687						    &result->val.node.spelling);
2688	    warn_about_normalization (pfile, result, &nst);
2689	    break;
2690	  }
2691	buffer->cur++;
2692      }
2693
2694    default:
2695      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2696      break;
2697    }
2698
2699  return result;
2700}
2701
2702/* An upper bound on the number of bytes needed to spell TOKEN.
2703   Does not include preceding whitespace.  */
2704unsigned int
2705cpp_token_len (const cpp_token *token)
2706{
2707  unsigned int len;
2708
2709  switch (TOKEN_SPELL (token))
2710    {
2711    default:		len = 6;				break;
2712    case SPELL_LITERAL:	len = token->val.str.len;		break;
2713    case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
2714    }
2715
2716  return len;
2717}
2718
2719/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2720   Return the number of bytes read out of NAME.  (There are always
2721   10 bytes written to BUFFER.)  */
2722
2723static size_t
2724utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2725{
2726  int j;
2727  int ucn_len = 0;
2728  int ucn_len_c;
2729  unsigned t;
2730  unsigned long utf32;
2731
2732  /* Compute the length of the UTF-8 sequence.  */
2733  for (t = *name; t & 0x80; t <<= 1)
2734    ucn_len++;
2735
2736  utf32 = *name & (0x7F >> ucn_len);
2737  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2738    {
2739      utf32 = (utf32 << 6) | (*++name & 0x3F);
2740
2741      /* Ill-formed UTF-8.  */
2742      if ((*name & ~0x3F) != 0x80)
2743	abort ();
2744    }
2745
2746  *buffer++ = '\\';
2747  *buffer++ = 'U';
2748  for (j = 7; j >= 0; j--)
2749    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2750  return ucn_len;
2751}
2752
2753/* Given a token TYPE corresponding to a digraph, return a pointer to
2754   the spelling of the digraph.  */
2755static const unsigned char *
2756cpp_digraph2name (enum cpp_ttype type)
2757{
2758  return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2759}
2760
2761/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2762   The buffer must already contain the enough space to hold the
2763   token's spelling.  Returns a pointer to the character after the
2764   last character written.  */
2765unsigned char *
2766_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2767{
2768  size_t i;
2769  const unsigned char *name = NODE_NAME (ident);
2770
2771  for (i = 0; i < NODE_LEN (ident); i++)
2772    if (name[i] & ~0x7F)
2773      {
2774	i += utf8_to_ucn (buffer, name + i) - 1;
2775	buffer += 10;
2776      }
2777    else
2778      *buffer++ = name[i];
2779
2780  return buffer;
2781}
2782
2783/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2784   already contain the enough space to hold the token's spelling.
2785   Returns a pointer to the character after the last character written.
2786   FORSTRING is true if this is to be the spelling after translation
2787   phase 1 (with the original spelling of extended identifiers), false
2788   if extended identifiers should always be written using UCNs (there is
2789   no option for always writing them in the internal UTF-8 form).
2790   FIXME: Would be nice if we didn't need the PFILE argument.  */
2791unsigned char *
2792cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2793		 unsigned char *buffer, bool forstring)
2794{
2795  switch (TOKEN_SPELL (token))
2796    {
2797    case SPELL_OPERATOR:
2798      {
2799	const unsigned char *spelling;
2800	unsigned char c;
2801
2802	if (token->flags & DIGRAPH)
2803	  spelling = cpp_digraph2name (token->type);
2804	else if (token->flags & NAMED_OP)
2805	  goto spell_ident;
2806	else
2807	  spelling = TOKEN_NAME (token);
2808
2809	while ((c = *spelling++) != '\0')
2810	  *buffer++ = c;
2811      }
2812      break;
2813
2814    spell_ident:
2815    case SPELL_IDENT:
2816      if (forstring)
2817	{
2818	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
2819		  NODE_LEN (token->val.node.spelling));
2820	  buffer += NODE_LEN (token->val.node.spelling);
2821	}
2822      else
2823	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2824      break;
2825
2826    case SPELL_LITERAL:
2827      memcpy (buffer, token->val.str.text, token->val.str.len);
2828      buffer += token->val.str.len;
2829      break;
2830
2831    case SPELL_NONE:
2832      cpp_error (pfile, CPP_DL_ICE,
2833		 "unspellable token %s", TOKEN_NAME (token));
2834      break;
2835    }
2836
2837  return buffer;
2838}
2839
2840/* Returns TOKEN spelt as a null-terminated string.  The string is
2841   freed when the reader is destroyed.  Useful for diagnostics.  */
2842unsigned char *
2843cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2844{
2845  unsigned int len = cpp_token_len (token) + 1;
2846  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2847
2848  end = cpp_spell_token (pfile, token, start, false);
2849  end[0] = '\0';
2850
2851  return start;
2852}
2853
2854/* Returns a pointer to a string which spells the token defined by
2855   TYPE and FLAGS.  Used by C front ends, which really should move to
2856   using cpp_token_as_text.  */
2857const char *
2858cpp_type2name (enum cpp_ttype type, unsigned char flags)
2859{
2860  if (flags & DIGRAPH)
2861    return (const char *) cpp_digraph2name (type);
2862  else if (flags & NAMED_OP)
2863    return cpp_named_operator2name (type);
2864
2865  return (const char *) token_spellings[type].name;
2866}
2867
2868/* Writes the spelling of token to FP, without any preceding space.
2869   Separated from cpp_spell_token for efficiency - to avoid stdio
2870   double-buffering.  */
2871void
2872cpp_output_token (const cpp_token *token, FILE *fp)
2873{
2874  switch (TOKEN_SPELL (token))
2875    {
2876    case SPELL_OPERATOR:
2877      {
2878	const unsigned char *spelling;
2879	int c;
2880
2881	if (token->flags & DIGRAPH)
2882	  spelling = cpp_digraph2name (token->type);
2883	else if (token->flags & NAMED_OP)
2884	  goto spell_ident;
2885	else
2886	  spelling = TOKEN_NAME (token);
2887
2888	c = *spelling;
2889	do
2890	  putc (c, fp);
2891	while ((c = *++spelling) != '\0');
2892      }
2893      break;
2894
2895    spell_ident:
2896    case SPELL_IDENT:
2897      {
2898	size_t i;
2899	const unsigned char * name = NODE_NAME (token->val.node.node);
2900
2901	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2902	  if (name[i] & ~0x7F)
2903	    {
2904	      unsigned char buffer[10];
2905	      i += utf8_to_ucn (buffer, name + i) - 1;
2906	      fwrite (buffer, 1, 10, fp);
2907	    }
2908	  else
2909	    fputc (NODE_NAME (token->val.node.node)[i], fp);
2910      }
2911      break;
2912
2913    case SPELL_LITERAL:
2914      fwrite (token->val.str.text, 1, token->val.str.len, fp);
2915      break;
2916
2917    case SPELL_NONE:
2918      /* An error, most probably.  */
2919      break;
2920    }
2921}
2922
2923/* Compare two tokens.  */
2924int
2925_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2926{
2927  if (a->type == b->type && a->flags == b->flags)
2928    switch (TOKEN_SPELL (a))
2929      {
2930      default:			/* Keep compiler happy.  */
2931      case SPELL_OPERATOR:
2932	/* token_no is used to track where multiple consecutive ##
2933	   tokens were originally located.  */
2934	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2935      case SPELL_NONE:
2936	return (a->type != CPP_MACRO_ARG
2937		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2938		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2939      case SPELL_IDENT:
2940	return (a->val.node.node == b->val.node.node
2941		&& a->val.node.spelling == b->val.node.spelling);
2942      case SPELL_LITERAL:
2943	return (a->val.str.len == b->val.str.len
2944		&& !memcmp (a->val.str.text, b->val.str.text,
2945			    a->val.str.len));
2946      }
2947
2948  return 0;
2949}
2950
2951/* Returns nonzero if a space should be inserted to avoid an
2952   accidental token paste for output.  For simplicity, it is
2953   conservative, and occasionally advises a space where one is not
2954   needed, e.g. "." and ".2".  */
2955int
2956cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2957		 const cpp_token *token2)
2958{
2959  enum cpp_ttype a = token1->type, b = token2->type;
2960  cppchar_t c;
2961
2962  if (token1->flags & NAMED_OP)
2963    a = CPP_NAME;
2964  if (token2->flags & NAMED_OP)
2965    b = CPP_NAME;
2966
2967  c = EOF;
2968  if (token2->flags & DIGRAPH)
2969    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2970  else if (token_spellings[b].category == SPELL_OPERATOR)
2971    c = token_spellings[b].name[0];
2972
2973  /* Quickly get everything that can paste with an '='.  */
2974  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2975    return 1;
2976
2977  switch (a)
2978    {
2979    case CPP_GREATER:	return c == '>';
2980    case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2981    case CPP_PLUS:	return c == '+';
2982    case CPP_MINUS:	return c == '-' || c == '>';
2983    case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2984    case CPP_MOD:	return c == ':' || c == '>';
2985    case CPP_AND:	return c == '&';
2986    case CPP_OR:	return c == '|';
2987    case CPP_COLON:	return c == ':' || c == '>';
2988    case CPP_DEREF:	return c == '*';
2989    case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2990    case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2991    case CPP_NAME:	return ((b == CPP_NUMBER
2992				 && name_p (pfile, &token2->val.str))
2993				|| b == CPP_NAME
2994				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2995    case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2996				|| c == '.' || c == '+' || c == '-');
2997				      /* UCNs */
2998    case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
2999				 && b == CPP_NAME)
3000				|| (CPP_OPTION (pfile, objc)
3001				    && token1->val.str.text[0] == '@'
3002				    && (b == CPP_NAME || b == CPP_STRING)));
3003    case CPP_STRING:
3004    case CPP_WSTRING:
3005    case CPP_UTF8STRING:
3006    case CPP_STRING16:
3007    case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
3008				&& (b == CPP_NAME
3009				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
3010					&& ISIDST (token2->val.str.text[0]))));
3011
3012    default:		break;
3013    }
3014
3015  return 0;
3016}
3017
3018/* Output all the remaining tokens on the current line, and a newline
3019   character, to FP.  Leading whitespace is removed.  If there are
3020   macros, special token padding is not performed.  */
3021void
3022cpp_output_line (cpp_reader *pfile, FILE *fp)
3023{
3024  const cpp_token *token;
3025
3026  token = cpp_get_token (pfile);
3027  while (token->type != CPP_EOF)
3028    {
3029      cpp_output_token (token, fp);
3030      token = cpp_get_token (pfile);
3031      if (token->flags & PREV_WHITE)
3032	putc (' ', fp);
3033    }
3034
3035  putc ('\n', fp);
3036}
3037
3038/* Return a string representation of all the remaining tokens on the
3039   current line.  The result is allocated using xmalloc and must be
3040   freed by the caller.  */
3041unsigned char *
3042cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3043{
3044  const cpp_token *token;
3045  unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3046  unsigned int alloced = 120 + out;
3047  unsigned char *result = (unsigned char *) xmalloc (alloced);
3048
3049  /* If DIR_NAME is empty, there are no initial contents.  */
3050  if (dir_name)
3051    {
3052      sprintf ((char *) result, "#%s ", dir_name);
3053      out += 2;
3054    }
3055
3056  token = cpp_get_token (pfile);
3057  while (token->type != CPP_EOF)
3058    {
3059      unsigned char *last;
3060      /* Include room for a possible space and the terminating nul.  */
3061      unsigned int len = cpp_token_len (token) + 2;
3062
3063      if (out + len > alloced)
3064	{
3065	  alloced *= 2;
3066	  if (out + len > alloced)
3067	    alloced = out + len;
3068	  result = (unsigned char *) xrealloc (result, alloced);
3069	}
3070
3071      last = cpp_spell_token (pfile, token, &result[out], 0);
3072      out = last - result;
3073
3074      token = cpp_get_token (pfile);
3075      if (token->flags & PREV_WHITE)
3076	result[out++] = ' ';
3077    }
3078
3079  result[out] = '\0';
3080  return result;
3081}
3082
3083/* Memory buffers.  Changing these three constants can have a dramatic
3084   effect on performance.  The values here are reasonable defaults,
3085   but might be tuned.  If you adjust them, be sure to test across a
3086   range of uses of cpplib, including heavy nested function-like macro
3087   expansion.  Also check the change in peak memory usage (NJAMD is a
3088   good tool for this).  */
3089#define MIN_BUFF_SIZE 8000
3090#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3091#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3092	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3093
3094#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3095  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3096#endif
3097
3098/* Create a new allocation buffer.  Place the control block at the end
3099   of the buffer, so that buffer overflows will cause immediate chaos.  */
3100static _cpp_buff *
3101new_buff (size_t len)
3102{
3103  _cpp_buff *result;
3104  unsigned char *base;
3105
3106  if (len < MIN_BUFF_SIZE)
3107    len = MIN_BUFF_SIZE;
3108  len = CPP_ALIGN (len);
3109
3110#ifdef ENABLE_VALGRIND_CHECKING
3111  /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3112     struct first.  */
3113  size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3114  base = XNEWVEC (unsigned char, len + slen);
3115  result = (_cpp_buff *) base;
3116  base += slen;
3117#else
3118  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3119  result = (_cpp_buff *) (base + len);
3120#endif
3121  result->base = base;
3122  result->cur = base;
3123  result->limit = base + len;
3124  result->next = NULL;
3125  return result;
3126}
3127
3128/* Place a chain of unwanted allocation buffers on the free list.  */
3129void
3130_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3131{
3132  _cpp_buff *end = buff;
3133
3134  while (end->next)
3135    end = end->next;
3136  end->next = pfile->free_buffs;
3137  pfile->free_buffs = buff;
3138}
3139
3140/* Return a free buffer of size at least MIN_SIZE.  */
3141_cpp_buff *
3142_cpp_get_buff (cpp_reader *pfile, size_t min_size)
3143{
3144  _cpp_buff *result, **p;
3145
3146  for (p = &pfile->free_buffs;; p = &(*p)->next)
3147    {
3148      size_t size;
3149
3150      if (*p == NULL)
3151	return new_buff (min_size);
3152      result = *p;
3153      size = result->limit - result->base;
3154      /* Return a buffer that's big enough, but don't waste one that's
3155         way too big.  */
3156      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3157	break;
3158    }
3159
3160  *p = result->next;
3161  result->next = NULL;
3162  result->cur = result->base;
3163  return result;
3164}
3165
3166/* Creates a new buffer with enough space to hold the uncommitted
3167   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3168   the excess bytes to the new buffer.  Chains the new buffer after
3169   BUFF, and returns the new buffer.  */
3170_cpp_buff *
3171_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3172{
3173  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3174  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3175
3176  buff->next = new_buff;
3177  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3178  return new_buff;
3179}
3180
3181/* Creates a new buffer with enough space to hold the uncommitted
3182   remaining bytes of the buffer pointed to by BUFF, and at least
3183   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3184   Chains the new buffer before the buffer pointed to by BUFF, and
3185   updates the pointer to point to the new buffer.  */
3186void
3187_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3188{
3189  _cpp_buff *new_buff, *old_buff = *pbuff;
3190  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3191
3192  new_buff = _cpp_get_buff (pfile, size);
3193  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3194  new_buff->next = old_buff;
3195  *pbuff = new_buff;
3196}
3197
3198/* Free a chain of buffers starting at BUFF.  */
3199void
3200_cpp_free_buff (_cpp_buff *buff)
3201{
3202  _cpp_buff *next;
3203
3204  for (; buff; buff = next)
3205    {
3206      next = buff->next;
3207#ifdef ENABLE_VALGRIND_CHECKING
3208      free (buff);
3209#else
3210      free (buff->base);
3211#endif
3212    }
3213}
3214
3215/* Allocate permanent, unaligned storage of length LEN.  */
3216unsigned char *
3217_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3218{
3219  _cpp_buff *buff = pfile->u_buff;
3220  unsigned char *result = buff->cur;
3221
3222  if (len > (size_t) (buff->limit - result))
3223    {
3224      buff = _cpp_get_buff (pfile, len);
3225      buff->next = pfile->u_buff;
3226      pfile->u_buff = buff;
3227      result = buff->cur;
3228    }
3229
3230  buff->cur = result + len;
3231  return result;
3232}
3233
3234/* Allocate permanent, unaligned storage of length LEN from a_buff.
3235   That buffer is used for growing allocations when saving macro
3236   replacement lists in a #define, and when parsing an answer to an
3237   assertion in #assert, #unassert or #if (and therefore possibly
3238   whilst expanding macros).  It therefore must not be used by any
3239   code that they might call: specifically the lexer and the guts of
3240   the macro expander.
3241
3242   All existing other uses clearly fit this restriction: storing
3243   registered pragmas during initialization.  */
3244unsigned char *
3245_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3246{
3247  _cpp_buff *buff = pfile->a_buff;
3248  unsigned char *result = buff->cur;
3249
3250  if (len > (size_t) (buff->limit - result))
3251    {
3252      buff = _cpp_get_buff (pfile, len);
3253      buff->next = pfile->a_buff;
3254      pfile->a_buff = buff;
3255      result = buff->cur;
3256    }
3257
3258  buff->cur = result + len;
3259  return result;
3260}
3261
3262/* Say which field of TOK is in use.  */
3263
3264enum cpp_token_fld_kind
3265cpp_token_val_index (const cpp_token *tok)
3266{
3267  switch (TOKEN_SPELL (tok))
3268    {
3269    case SPELL_IDENT:
3270      return CPP_TOKEN_FLD_NODE;
3271    case SPELL_LITERAL:
3272      return CPP_TOKEN_FLD_STR;
3273    case SPELL_OPERATOR:
3274      if (tok->type == CPP_PASTE)
3275	return CPP_TOKEN_FLD_TOKEN_NO;
3276      else
3277	return CPP_TOKEN_FLD_NONE;
3278    case SPELL_NONE:
3279      if (tok->type == CPP_MACRO_ARG)
3280	return CPP_TOKEN_FLD_ARG_NO;
3281      else if (tok->type == CPP_PADDING)
3282	return CPP_TOKEN_FLD_SOURCE;
3283      else if (tok->type == CPP_PRAGMA)
3284	return CPP_TOKEN_FLD_PRAGMA;
3285      /* else fall through */
3286    default:
3287      return CPP_TOKEN_FLD_NONE;
3288    }
3289}
3290
3291/* All tokens lexed in R after calling this function will be forced to have
3292   their source_location the same as the location referenced by P, until
3293   cpp_stop_forcing_token_locations is called for R.  */
3294
3295void
3296cpp_force_token_locations (cpp_reader *r, source_location *p)
3297{
3298  r->forced_token_location_p = p;
3299}
3300
3301/* Go back to assigning locations naturally for lexed tokens.  */
3302
3303void
3304cpp_stop_forcing_token_locations (cpp_reader *r)
3305{
3306  r->forced_token_location_p = NULL;
3307}
3308