1/* CPP Library - traditional lexical analysis and macro expansion.
2   Copyright (C) 2002-2015 Free Software Foundation, Inc.
3   Contributed by Neil Booth, May 2002
4
5This program is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; see the file COPYING3.  If not see
17<http://www.gnu.org/licenses/>.  */
18
19#include "config.h"
20#include "system.h"
21#include "cpplib.h"
22#include "internal.h"
23
24/* The replacement text of a function-like macro is stored as a
25   contiguous sequence of aligned blocks, each representing the text
26   between subsequent parameters.
27
28   Each block comprises the text between its surrounding parameters,
29   the length of that text, and the one-based index of the following
30   parameter.  The final block in the replacement text is easily
31   recognizable as it has an argument index of zero.  */
32
33struct block
34{
35  unsigned int text_len;
36  unsigned short arg_index;
37  uchar text[1];
38};
39
40#define BLOCK_HEADER_LEN offsetof (struct block, text)
41#define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42
43/* Structure holding information about a function-like macro
44   invocation.  */
45struct fun_macro
46{
47  /* Memory buffer holding the trad_arg array.  */
48  _cpp_buff *buff;
49
50  /* An array of size the number of macro parameters + 1, containing
51     the offsets of the start of each macro argument in the output
52     buffer.  The argument continues until the character before the
53     start of the next one.  */
54  size_t *args;
55
56  /* The hashnode of the macro.  */
57  cpp_hashnode *node;
58
59  /* The offset of the macro name in the output buffer.  */
60  size_t offset;
61
62  /* The line the macro name appeared on.  */
63  source_location line;
64
65  /* Number of parameters.  */
66  unsigned int paramc;
67
68  /* Zero-based index of argument being currently lexed.  */
69  unsigned int argc;
70};
71
72/* Lexing state.  It is mostly used to prevent macro expansion.  */
73enum ls {ls_none = 0,		/* Normal state.  */
74	 ls_fun_open,		/* When looking for '('.  */
75	 ls_fun_close,		/* When looking for ')'.  */
76	 ls_defined,		/* After defined.  */
77	 ls_defined_close,	/* Looking for ')' of defined().  */
78	 ls_hash,		/* After # in preprocessor conditional.  */
79	 ls_predicate,		/* After the predicate, maybe paren?  */
80	 ls_answer,		/* In answer to predicate.  */
81	 ls_has_include,	/* After __has_include__.  */
82	 ls_has_include_close};	/* Looking for ')' of __has_include__.  */
83
84/* Lexing TODO: Maybe handle space in escaped newlines.  Stop lex.c
85   from recognizing comments and directives during its lexing pass.  */
86
87static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
88static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
89static const uchar *copy_comment (cpp_reader *, const uchar *, int);
90static void check_output_buffer (cpp_reader *, size_t);
91static void push_replacement_text (cpp_reader *, cpp_hashnode *);
92static bool scan_parameters (cpp_reader *, cpp_macro *);
93static bool recursive_macro (cpp_reader *, cpp_hashnode *);
94static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
95static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
96				 struct fun_macro *);
97static void save_argument (struct fun_macro *, size_t);
98static void replace_args_and_push (cpp_reader *, struct fun_macro *);
99static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
100
101/* Ensures we have N bytes' space in the output buffer, and
102   reallocates it if not.  */
103static void
104check_output_buffer (cpp_reader *pfile, size_t n)
105{
106  /* We might need two bytes to terminate an unterminated comment, and
107     one more to terminate the line with a NUL.  */
108  n += 2 + 1;
109
110  if (n > (size_t) (pfile->out.limit - pfile->out.cur))
111    {
112      size_t size = pfile->out.cur - pfile->out.base;
113      size_t new_size = (size + n) * 3 / 2;
114
115      pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
116      pfile->out.limit = pfile->out.base + new_size;
117      pfile->out.cur = pfile->out.base + size;
118    }
119}
120
121/* Skip a C-style block comment in a macro as a result of -CC.
122   Buffer->cur points to the initial asterisk of the comment.  */
123static void
124skip_macro_block_comment (cpp_reader *pfile)
125{
126  const uchar *cur = pfile->buffer->cur;
127
128  cur++;
129  if (*cur == '/')
130    cur++;
131
132  /* People like decorating comments with '*', so check for '/'
133     instead for efficiency.  */
134  while(! (*cur++ == '/' && cur[-2] == '*') )
135    ;
136
137  pfile->buffer->cur = cur;
138}
139
140/* CUR points to the asterisk introducing a comment in the current
141   context.  IN_DEFINE is true if we are in the replacement text of a
142   macro.
143
144   The asterisk and following comment is copied to the buffer pointed
145   to by pfile->out.cur, which must be of sufficient size.
146   Unterminated comments are diagnosed, and correctly terminated in
147   the output.  pfile->out.cur is updated depending upon IN_DEFINE,
148   -C, -CC and pfile->state.in_directive.
149
150   Returns a pointer to the first character after the comment in the
151   input buffer.  */
152static const uchar *
153copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
154{
155  bool unterminated, copy = false;
156  source_location src_loc = pfile->line_table->highest_line;
157  cpp_buffer *buffer = pfile->buffer;
158
159  buffer->cur = cur;
160  if (pfile->context->prev)
161    unterminated = false, skip_macro_block_comment (pfile);
162  else
163    unterminated = _cpp_skip_block_comment (pfile);
164
165  if (unterminated)
166    cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
167			 "unterminated comment");
168
169  /* Comments in directives become spaces so that tokens are properly
170     separated when the ISO preprocessor re-lexes the line.  The
171     exception is #define.  */
172  if (pfile->state.in_directive)
173    {
174      if (in_define)
175	{
176	  if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
177	    pfile->out.cur--;
178	  else
179	    copy = true;
180	}
181      else
182	pfile->out.cur[-1] = ' ';
183    }
184  else if (CPP_OPTION (pfile, discard_comments))
185    pfile->out.cur--;
186  else
187    copy = true;
188
189  if (copy)
190    {
191      size_t len = (size_t) (buffer->cur - cur);
192      memcpy (pfile->out.cur, cur, len);
193      pfile->out.cur += len;
194      if (unterminated)
195	{
196	  *pfile->out.cur++ = '*';
197	  *pfile->out.cur++ = '/';
198	}
199    }
200
201  return buffer->cur;
202}
203
204/* CUR points to any character in the input buffer.  Skips over all
205   contiguous horizontal white space and NULs, including comments if
206   SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
207   character or the end of the current context.  Escaped newlines are
208   removed.
209
210   The whitespace is copied verbatim to the output buffer, except that
211   comments are handled as described in copy_comment().
212   pfile->out.cur is updated.
213
214   Returns a pointer to the first character after the whitespace in
215   the input buffer.  */
216static const uchar *
217skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
218{
219  uchar *out = pfile->out.cur;
220
221  for (;;)
222    {
223      unsigned int c = *cur++;
224      *out++ = c;
225
226      if (is_nvspace (c))
227	continue;
228
229      if (c == '/' && *cur == '*' && skip_comments)
230	{
231	  pfile->out.cur = out;
232	  cur = copy_comment (pfile, cur, false /* in_define */);
233	  out = pfile->out.cur;
234	  continue;
235	}
236
237      out--;
238      break;
239    }
240
241  pfile->out.cur = out;
242  return cur - 1;
243}
244
245/* Lexes and outputs an identifier starting at CUR, which is assumed
246   to point to a valid first character of an identifier.  Returns
247   the hashnode, and updates out.cur.  */
248static cpp_hashnode *
249lex_identifier (cpp_reader *pfile, const uchar *cur)
250{
251  size_t len;
252  uchar *out = pfile->out.cur;
253  cpp_hashnode *result;
254
255  do
256    *out++ = *cur++;
257  while (is_numchar (*cur));
258
259  CUR (pfile->context) = cur;
260  len = out - pfile->out.cur;
261  result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
262				    len, HT_ALLOC));
263  pfile->out.cur = out;
264  return result;
265}
266
267/* Overlays the true file buffer temporarily with text of length LEN
268   starting at START.  The true buffer is restored upon calling
269   restore_buff().  */
270void
271_cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
272{
273  cpp_buffer *buffer = pfile->buffer;
274
275  pfile->overlaid_buffer = buffer;
276  pfile->saved_cur = buffer->cur;
277  pfile->saved_rlimit = buffer->rlimit;
278  pfile->saved_line_base = buffer->next_line;
279  buffer->need_line = false;
280
281  buffer->cur = start;
282  buffer->line_base = start;
283  buffer->rlimit = start + len;
284}
285
286/* Restores a buffer overlaid by _cpp_overlay_buffer().  */
287void
288_cpp_remove_overlay (cpp_reader *pfile)
289{
290  cpp_buffer *buffer = pfile->overlaid_buffer;
291
292  buffer->cur = pfile->saved_cur;
293  buffer->rlimit = pfile->saved_rlimit;
294  buffer->line_base = pfile->saved_line_base;
295  buffer->need_line = true;
296
297  pfile->overlaid_buffer = NULL;
298}
299
300/* Reads a logical line into the output buffer.  Returns TRUE if there
301   is more text left in the buffer.  */
302bool
303_cpp_read_logical_line_trad (cpp_reader *pfile)
304{
305  do
306    {
307      if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
308	return false;
309    }
310  while (!_cpp_scan_out_logical_line (pfile, NULL, false)
311	 || pfile->state.skipping);
312
313  return pfile->buffer != NULL;
314}
315
316/* Return true if NODE is a fun_like macro.  */
317static inline bool
318fun_like_macro (cpp_hashnode *node)
319{
320  if (node->flags & NODE_BUILTIN)
321    return node->value.builtin == BT_HAS_ATTRIBUTE;
322  else
323    return node->value.macro->fun_like;
324}
325
326/* Set up state for finding the opening '(' of a function-like
327   macro.  */
328static void
329maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
330		     struct fun_macro *macro)
331{
332  unsigned int n;
333  if (node->flags & NODE_BUILTIN)
334    n = 1;
335  else
336    n = node->value.macro->paramc;
337
338  if (macro->buff)
339    _cpp_release_buff (pfile, macro->buff);
340  macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
341  macro->args = (size_t *) BUFF_FRONT (macro->buff);
342  macro->node = node;
343  macro->offset = start - pfile->out.base;
344  macro->paramc = n;
345  macro->argc = 0;
346}
347
348/* Save the OFFSET of the start of the next argument to MACRO.  */
349static void
350save_argument (struct fun_macro *macro, size_t offset)
351{
352  macro->argc++;
353  if (macro->argc <= macro->paramc)
354    macro->args[macro->argc] = offset;
355}
356
357/* Copies the next logical line in the current buffer (starting at
358   buffer->cur) to the output buffer.  The output is guaranteed to
359   terminate with a NUL character.  buffer->cur is updated.
360
361   If MACRO is non-NULL, then we are scanning the replacement list of
362   MACRO, and we call save_replacement_text() every time we meet an
363   argument.
364
365   If BUILTIN_MACRO_ARG is true, this is called to macro expand
366   arguments of builtin function-like macros.  */
367bool
368_cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
369			    bool builtin_macro_arg)
370{
371  bool result = true;
372  cpp_context *context;
373  const uchar *cur;
374  uchar *out;
375  struct fun_macro fmacro;
376  unsigned int c, paren_depth = 0, quote;
377  enum ls lex_state = ls_none;
378  bool header_ok;
379  const uchar *start_of_input_line;
380
381  fmacro.buff = NULL;
382  fmacro.args = NULL;
383  fmacro.node = NULL;
384  fmacro.offset = 0;
385  fmacro.line = 0;
386  fmacro.paramc = 0;
387  fmacro.argc = 0;
388
389  quote = 0;
390  header_ok = pfile->state.angled_headers;
391  CUR (pfile->context) = pfile->buffer->cur;
392  RLIMIT (pfile->context) = pfile->buffer->rlimit;
393  if (!builtin_macro_arg)
394    {
395      pfile->out.cur = pfile->out.base;
396      pfile->out.first_line = pfile->line_table->highest_line;
397    }
398  /* start_of_input_line is needed to make sure that directives really,
399     really start at the first character of the line.  */
400  start_of_input_line = pfile->buffer->cur;
401 new_context:
402  context = pfile->context;
403  cur = CUR (context);
404  check_output_buffer (pfile, RLIMIT (context) - cur);
405  out = pfile->out.cur;
406
407  for (;;)
408    {
409      if (!context->prev
410	  && !builtin_macro_arg
411	  && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
412	{
413	  pfile->buffer->cur = cur;
414	  _cpp_process_line_notes (pfile, false);
415	}
416      c = *cur++;
417      *out++ = c;
418
419      /* Whitespace should "continue" out of the switch,
420	 non-whitespace should "break" out of it.  */
421      switch (c)
422	{
423	case ' ':
424	case '\t':
425	case '\f':
426	case '\v':
427	case '\0':
428	  continue;
429
430	case '\n':
431	  /* If this is a macro's expansion, pop it.  */
432	  if (context->prev)
433	    {
434	      pfile->out.cur = out - 1;
435	      _cpp_pop_context (pfile);
436	      goto new_context;
437	    }
438
439	  /* Omit the newline from the output buffer.  */
440	  pfile->out.cur = out - 1;
441	  pfile->buffer->cur = cur;
442	  if (builtin_macro_arg)
443	    goto done;
444	  pfile->buffer->need_line = true;
445	  CPP_INCREMENT_LINE (pfile, 0);
446
447	  if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
448	      && !pfile->state.in_directive
449	      && _cpp_get_fresh_line (pfile))
450	    {
451	      /* Newlines in arguments become a space, but we don't
452		 clear any in-progress quote.  */
453	      if (lex_state == ls_fun_close)
454		out[-1] = ' ';
455	      cur = pfile->buffer->cur;
456	      continue;
457	    }
458	  goto done;
459
460	case '<':
461	  if (header_ok)
462	    quote = '>';
463	  break;
464	case '>':
465	  if (c == quote)
466	    quote = 0;
467	  break;
468
469	case '"':
470	case '\'':
471	  if (c == quote)
472	    quote = 0;
473	  else if (!quote)
474	    quote = c;
475	  break;
476
477	case '\\':
478	  /* Skip escaped quotes here, it's easier than above.  */
479	  if (*cur == '\\' || *cur == '"' || *cur == '\'')
480	    *out++ = *cur++;
481	  break;
482
483	case '/':
484	  /* Traditional CPP does not recognize comments within
485	     literals.  */
486	  if (!quote && *cur == '*')
487	    {
488	      pfile->out.cur = out;
489	      cur = copy_comment (pfile, cur, macro != 0);
490	      out = pfile->out.cur;
491	      continue;
492	    }
493	  break;
494
495	case '_':
496	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
497	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
498	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
499	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
500	case 'y': case 'z':
501	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
502	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
503	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
504	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
505	case 'Y': case 'Z':
506	  if (!pfile->state.skipping && (quote == 0 || macro))
507	    {
508	      cpp_hashnode *node;
509	      uchar *out_start = out - 1;
510
511	      pfile->out.cur = out_start;
512	      node = lex_identifier (pfile, cur - 1);
513	      out = pfile->out.cur;
514	      cur = CUR (context);
515
516	      if (node->type == NT_MACRO
517		  /* Should we expand for ls_answer?  */
518		  && (lex_state == ls_none || lex_state == ls_fun_open)
519		  && !pfile->state.prevent_expansion)
520		{
521		  /* Macros invalidate MI optimization.  */
522		  pfile->mi_valid = false;
523		  if (fun_like_macro (node))
524		    {
525		      maybe_start_funlike (pfile, node, out_start, &fmacro);
526		      lex_state = ls_fun_open;
527		      fmacro.line = pfile->line_table->highest_line;
528		      continue;
529		    }
530		  else if (!recursive_macro (pfile, node))
531		    {
532		      /* Remove the object-like macro's name from the
533			 output, and push its replacement text.  */
534		      pfile->out.cur = out_start;
535		      push_replacement_text (pfile, node);
536		      lex_state = ls_none;
537		      goto new_context;
538		    }
539		}
540	      else if (macro && (node->flags & NODE_MACRO_ARG) != 0)
541		{
542		  /* Found a parameter in the replacement text of a
543		     #define.  Remove its name from the output.  */
544		  pfile->out.cur = out_start;
545		  save_replacement_text (pfile, macro, node->value.arg_index);
546		  out = pfile->out.base;
547		}
548	      else if (lex_state == ls_hash)
549		{
550		  lex_state = ls_predicate;
551		  continue;
552		}
553	      else if (pfile->state.in_expression
554		       && node == pfile->spec_nodes.n_defined)
555		{
556		  lex_state = ls_defined;
557		  continue;
558		}
559	      else if (pfile->state.in_expression
560		       && (node == pfile->spec_nodes.n__has_include__
561			|| node == pfile->spec_nodes.n__has_include_next__))
562		{
563		  lex_state = ls_has_include;
564		  continue;
565		}
566	    }
567	  break;
568
569	case '(':
570	  if (quote == 0)
571	    {
572	      paren_depth++;
573	      if (lex_state == ls_fun_open)
574		{
575		  if (recursive_macro (pfile, fmacro.node))
576		    lex_state = ls_none;
577		  else
578		    {
579		      lex_state = ls_fun_close;
580		      paren_depth = 1;
581		      out = pfile->out.base + fmacro.offset;
582		      fmacro.args[0] = fmacro.offset;
583		    }
584		}
585	      else if (lex_state == ls_predicate)
586		lex_state = ls_answer;
587	      else if (lex_state == ls_defined)
588		lex_state = ls_defined_close;
589	      else if (lex_state == ls_has_include)
590		lex_state = ls_has_include_close;
591	    }
592	  break;
593
594	case ',':
595	  if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
596	    save_argument (&fmacro, out - pfile->out.base);
597	  break;
598
599	case ')':
600	  if (quote == 0)
601	    {
602	      paren_depth--;
603	      if (lex_state == ls_fun_close && paren_depth == 0)
604		{
605		  if (fmacro.node->flags & NODE_BUILTIN)
606		    {
607		      /* Handle builtin function-like macros like
608			 __has_attribute.  The already parsed arguments
609			 are put into a buffer, which is then preprocessed
610			 and the result is fed to _cpp_push_text_context
611			 with disabled expansion, where the ISO preprocessor
612			 parses it.  While in traditional preprocessing
613			 macro arguments aren't immediately expanded, they in
614			 the end are because the macro with replaced arguments
615			 is preprocessed again.  For the builtin function-like
616			 macros we need the argument immediately though,
617			 if we don't preprocess them, they would behave
618			 very differently from ISO preprocessor handling
619			 of those builtin macros.  So, this handling is
620			 more similar to traditional preprocessing of
621			 #if directives, where we also keep preprocessing
622			 until everything is expanded, and then feed the
623			 result with disabled expansion to ISO preprocessor
624			 for handling the directives.  */
625		      lex_state = ls_none;
626		      save_argument (&fmacro, out - pfile->out.base);
627		      cpp_macro m;
628		      memset (&m, '\0', sizeof (m));
629		      m.paramc = fmacro.paramc;
630		      if (_cpp_arguments_ok (pfile, &m, fmacro.node,
631					     fmacro.argc))
632			{
633			  size_t len = fmacro.args[1] - fmacro.args[0];
634			  uchar *buf;
635
636			  /* Remove the macro's invocation from the
637			     output, and push its replacement text.  */
638			  pfile->out.cur = pfile->out.base + fmacro.offset;
639			  CUR (context) = cur;
640			  buf = _cpp_unaligned_alloc (pfile, len + 2);
641			  buf[0] = '(';
642			  memcpy (buf + 1, pfile->out.base + fmacro.args[0],
643				  len);
644			  buf[len + 1] = '\n';
645
646			  const unsigned char *ctx_rlimit = RLIMIT (context);
647			  const unsigned char *saved_cur = pfile->buffer->cur;
648			  const unsigned char *saved_rlimit
649			    = pfile->buffer->rlimit;
650			  const unsigned char *saved_line_base
651			    = pfile->buffer->line_base;
652			  bool saved_need_line = pfile->buffer->need_line;
653			  cpp_buffer *saved_overlaid_buffer
654			    = pfile->overlaid_buffer;
655			  pfile->buffer->cur = buf;
656			  pfile->buffer->line_base = buf;
657			  pfile->buffer->rlimit = buf + len + 1;
658			  pfile->buffer->need_line = false;
659			  pfile->overlaid_buffer = pfile->buffer;
660			  bool saved_in_directive = pfile->state.in_directive;
661			  pfile->state.in_directive = true;
662			  cpp_context *saved_prev_context = context->prev;
663			  context->prev = NULL;
664
665			  _cpp_scan_out_logical_line (pfile, NULL, true);
666
667			  pfile->state.in_directive = saved_in_directive;
668			  check_output_buffer (pfile, 1);
669			  *pfile->out.cur = '\n';
670			  pfile->buffer->cur = pfile->out.base + fmacro.offset;
671			  pfile->buffer->line_base = pfile->buffer->cur;
672			  pfile->buffer->rlimit = pfile->out.cur;
673			  CUR (context) = pfile->buffer->cur;
674			  RLIMIT (context) = pfile->buffer->rlimit;
675
676			  pfile->state.prevent_expansion++;
677			  const uchar *text
678			    = _cpp_builtin_macro_text (pfile, fmacro.node);
679			  pfile->state.prevent_expansion--;
680
681			  context->prev = saved_prev_context;
682			  pfile->buffer->cur = saved_cur;
683			  pfile->buffer->rlimit = saved_rlimit;
684			  pfile->buffer->line_base = saved_line_base;
685			  pfile->buffer->need_line = saved_need_line;
686			  pfile->overlaid_buffer = saved_overlaid_buffer;
687			  pfile->out.cur = pfile->out.base + fmacro.offset;
688			  CUR (context) = cur;
689			  RLIMIT (context) = ctx_rlimit;
690			  len = ustrlen (text);
691			  buf = _cpp_unaligned_alloc (pfile, len + 1);
692			  memcpy (buf, text, len);
693			  buf[len] = '\n';
694			  text = buf;
695			  _cpp_push_text_context (pfile, fmacro.node,
696						  text, len);
697			  goto new_context;
698			}
699		      break;
700		    }
701
702		  cpp_macro *m = fmacro.node->value.macro;
703
704		  m->used = 1;
705		  lex_state = ls_none;
706		  save_argument (&fmacro, out - pfile->out.base);
707
708		  /* A single zero-length argument is no argument.  */
709		  if (fmacro.argc == 1
710		      && m->paramc == 0
711		      && out == pfile->out.base + fmacro.offset + 1)
712		    fmacro.argc = 0;
713
714		  if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
715		    {
716		      /* Remove the macro's invocation from the
717			 output, and push its replacement text.  */
718		      pfile->out.cur = pfile->out.base + fmacro.offset;
719		      CUR (context) = cur;
720		      replace_args_and_push (pfile, &fmacro);
721		      goto new_context;
722		    }
723		}
724	      else if (lex_state == ls_answer || lex_state == ls_defined_close
725			|| lex_state == ls_has_include_close)
726		lex_state = ls_none;
727	    }
728	  break;
729
730	case '#':
731	  if (cur - 1 == start_of_input_line
732	      /* A '#' from a macro doesn't start a directive.  */
733	      && !pfile->context->prev
734	      && !pfile->state.in_directive)
735	    {
736	      /* A directive.  With the way _cpp_handle_directive
737		 currently works, we only want to call it if either we
738		 know the directive is OK, or we want it to fail and
739		 be removed from the output.  If we want it to be
740		 passed through (the assembler case) then we must not
741		 call _cpp_handle_directive.  */
742	      pfile->out.cur = out;
743	      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
744	      out = pfile->out.cur;
745
746	      if (*cur == '\n')
747		{
748		  /* Null directive.  Ignore it and don't invalidate
749		     the MI optimization.  */
750		  pfile->buffer->need_line = true;
751		  CPP_INCREMENT_LINE (pfile, 0);
752		  result = false;
753		  goto done;
754		}
755	      else
756		{
757		  bool do_it = false;
758
759		  if (is_numstart (*cur)
760		      && CPP_OPTION (pfile, lang) != CLK_ASM)
761		    do_it = true;
762		  else if (is_idstart (*cur))
763		    /* Check whether we know this directive, but don't
764		       advance.  */
765		    do_it = lex_identifier (pfile, cur)->is_directive;
766
767		  if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
768		    {
769		      /* This is a kludge.  We want to have the ISO
770			 preprocessor lex the next token.  */
771		      pfile->buffer->cur = cur;
772		      _cpp_handle_directive (pfile, false /* indented */);
773		      result = false;
774		      goto done;
775		    }
776		}
777	    }
778
779	  if (pfile->state.in_expression)
780	    {
781	      lex_state = ls_hash;
782	      continue;
783	    }
784	  break;
785
786	default:
787	  break;
788	}
789
790      /* Non-whitespace disables MI optimization and stops treating
791	 '<' as a quote in #include.  */
792      header_ok = false;
793      if (!pfile->state.in_directive)
794	pfile->mi_valid = false;
795
796      if (lex_state == ls_none)
797	continue;
798
799      /* Some of these transitions of state are syntax errors.  The
800	 ISO preprocessor will issue errors later.  */
801      if (lex_state == ls_fun_open)
802	/* Missing '('.  */
803	lex_state = ls_none;
804      else if (lex_state == ls_hash
805	       || lex_state == ls_predicate
806	       || lex_state == ls_defined
807	       || lex_state == ls_has_include)
808	lex_state = ls_none;
809
810      /* ls_answer and ls_defined_close keep going until ')'.  */
811    }
812
813 done:
814  if (fmacro.buff)
815    _cpp_release_buff (pfile, fmacro.buff);
816
817  if (lex_state == ls_fun_close)
818    cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
819			 "unterminated argument list invoking macro \"%s\"",
820			 NODE_NAME (fmacro.node));
821  return result;
822}
823
824/* Push a context holding the replacement text of the macro NODE on
825   the context stack.  NODE is either object-like, or a function-like
826   macro with no arguments.  */
827static void
828push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
829{
830  size_t len;
831  const uchar *text;
832  uchar *buf;
833
834  if (node->flags & NODE_BUILTIN)
835    {
836      text = _cpp_builtin_macro_text (pfile, node);
837      len = ustrlen (text);
838      buf = _cpp_unaligned_alloc (pfile, len + 1);
839      memcpy (buf, text, len);
840      buf[len] = '\n';
841      text = buf;
842    }
843  else
844    {
845      cpp_macro *macro = node->value.macro;
846      macro->used = 1;
847      text = macro->exp.text;
848      macro->traditional = 1;
849      len = macro->count;
850    }
851
852  _cpp_push_text_context (pfile, node, text, len);
853}
854
855/* Returns TRUE if traditional macro recursion is detected.  */
856static bool
857recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
858{
859  bool recursing = !!(node->flags & NODE_DISABLED);
860
861  /* Object-like macros that are already expanding are necessarily
862     recursive.
863
864     However, it is possible to have traditional function-like macros
865     that are not infinitely recursive but recurse to any given depth.
866     Further, it is easy to construct examples that get ever longer
867     until the point they stop recursing.  So there is no easy way to
868     detect true recursion; instead we assume any expansion more than
869     20 deep since the first invocation of this macro must be
870     recursing.  */
871  if (recursing && fun_like_macro (node))
872    {
873      size_t depth = 0;
874      cpp_context *context = pfile->context;
875
876      do
877	{
878	  depth++;
879	  if (context->c.macro == node && depth > 20)
880	    break;
881	  context = context->prev;
882	}
883      while (context);
884      recursing = context != NULL;
885    }
886
887  if (recursing)
888    cpp_error (pfile, CPP_DL_ERROR,
889	       "detected recursion whilst expanding macro \"%s\"",
890	       NODE_NAME (node));
891
892  return recursing;
893}
894
895/* Return the length of the replacement text of a function-like or
896   object-like non-builtin macro.  */
897size_t
898_cpp_replacement_text_len (const cpp_macro *macro)
899{
900  size_t len;
901
902  if (macro->fun_like && (macro->paramc != 0))
903    {
904      const uchar *exp;
905
906      len = 0;
907      for (exp = macro->exp.text;;)
908	{
909	  struct block *b = (struct block *) exp;
910
911	  len += b->text_len;
912	  if (b->arg_index == 0)
913	    break;
914	  len += NODE_LEN (macro->params[b->arg_index - 1]);
915	  exp += BLOCK_LEN (b->text_len);
916	}
917    }
918  else
919    len = macro->count;
920
921  return len;
922}
923
924/* Copy the replacement text of MACRO to DEST, which must be of
925   sufficient size.  It is not NUL-terminated.  The next character is
926   returned.  */
927uchar *
928_cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
929{
930  if (macro->fun_like && (macro->paramc != 0))
931    {
932      const uchar *exp;
933
934      for (exp = macro->exp.text;;)
935	{
936	  struct block *b = (struct block *) exp;
937	  cpp_hashnode *param;
938
939	  memcpy (dest, b->text, b->text_len);
940	  dest += b->text_len;
941	  if (b->arg_index == 0)
942	    break;
943	  param = macro->params[b->arg_index - 1];
944	  memcpy (dest, NODE_NAME (param), NODE_LEN (param));
945	  dest += NODE_LEN (param);
946	  exp += BLOCK_LEN (b->text_len);
947	}
948    }
949  else
950    {
951      memcpy (dest, macro->exp.text, macro->count);
952      dest += macro->count;
953    }
954
955  return dest;
956}
957
958/* Push a context holding the replacement text of the macro NODE on
959   the context stack.  NODE is either object-like, or a function-like
960   macro with no arguments.  */
961static void
962replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
963{
964  cpp_macro *macro = fmacro->node->value.macro;
965
966  if (macro->paramc == 0)
967    push_replacement_text (pfile, fmacro->node);
968  else
969    {
970      const uchar *exp;
971      uchar *p;
972      _cpp_buff *buff;
973      size_t len = 0;
974      int cxtquote = 0;
975
976      /* Get an estimate of the length of the argument-replaced text.
977	 This is a worst case estimate, assuming that every replacement
978	 text character needs quoting.  */
979      for (exp = macro->exp.text;;)
980	{
981	  struct block *b = (struct block *) exp;
982
983	  len += b->text_len;
984	  if (b->arg_index == 0)
985	    break;
986	  len += 2 * (fmacro->args[b->arg_index]
987		      - fmacro->args[b->arg_index - 1] - 1);
988	  exp += BLOCK_LEN (b->text_len);
989	}
990
991      /* Allocate room for the expansion plus \n.  */
992      buff = _cpp_get_buff (pfile, len + 1);
993
994      /* Copy the expansion and replace arguments.  */
995      /* Accumulate actual length, including quoting as necessary */
996      p = BUFF_FRONT (buff);
997      len = 0;
998      for (exp = macro->exp.text;;)
999	{
1000	  struct block *b = (struct block *) exp;
1001	  size_t arglen;
1002	  int argquote;
1003	  uchar *base;
1004	  uchar *in;
1005
1006	  len += b->text_len;
1007	  /* Copy the non-argument text literally, keeping
1008	     track of whether matching quotes have been seen. */
1009	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1010	    {
1011	      if (*in == '"')
1012		cxtquote = ! cxtquote;
1013	      *p++ = *in++;
1014	    }
1015	  /* Done if no more arguments */
1016	  if (b->arg_index == 0)
1017	    break;
1018	  arglen = (fmacro->args[b->arg_index]
1019		    - fmacro->args[b->arg_index - 1] - 1);
1020	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
1021	  in = base;
1022#if 0
1023	  /* Skip leading whitespace in the text for the argument to
1024	     be substituted. To be compatible with gcc 2.95, we would
1025	     also need to trim trailing whitespace. Gcc 2.95 trims
1026	     leading and trailing whitespace, which may be a bug.  The
1027	     current gcc testsuite explicitly checks that this leading
1028	     and trailing whitespace in actual arguments is
1029	     preserved. */
1030	  while (arglen > 0 && is_space (*in))
1031	    {
1032	      in++;
1033	      arglen--;
1034	    }
1035#endif
1036	  for (argquote = 0; arglen > 0; arglen--)
1037	    {
1038	      if (cxtquote && *in == '"')
1039		{
1040		  if (in > base && *(in-1) != '\\')
1041		    argquote = ! argquote;
1042		  /* Always add backslash before double quote if argument
1043		     is expanded in a quoted context */
1044		  *p++ = '\\';
1045		  len++;
1046		}
1047	      else if (cxtquote && argquote && *in == '\\')
1048		{
1049		  /* Always add backslash before a backslash in an argument
1050		     that is expanded in a quoted context and also in the
1051		     range of a quoted context in the argument itself. */
1052		  *p++ = '\\';
1053		  len++;
1054		}
1055	      *p++ = *in++;
1056	      len++;
1057	    }
1058	  exp += BLOCK_LEN (b->text_len);
1059	}
1060
1061      /* \n-terminate.  */
1062      *p = '\n';
1063      _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1064
1065      /* So we free buffer allocation when macro is left.  */
1066      pfile->context->buff = buff;
1067    }
1068}
1069
1070/* Read and record the parameters, if any, of a function-like macro
1071   definition.  Destroys pfile->out.cur.
1072
1073   Returns true on success, false on failure (syntax error or a
1074   duplicate parameter).  On success, CUR (pfile->context) is just
1075   past the closing parenthesis.  */
1076static bool
1077scan_parameters (cpp_reader *pfile, cpp_macro *macro)
1078{
1079  const uchar *cur = CUR (pfile->context) + 1;
1080  bool ok;
1081
1082  for (;;)
1083    {
1084      cur = skip_whitespace (pfile, cur, true /* skip_comments */);
1085
1086      if (is_idstart (*cur))
1087	{
1088	  struct cpp_hashnode *id = lex_identifier (pfile, cur);
1089	  ok = false;
1090	  if (_cpp_save_parameter (pfile, macro, id, id))
1091	    break;
1092	  cur = skip_whitespace (pfile, CUR (pfile->context),
1093				 true /* skip_comments */);
1094	  if (*cur == ',')
1095	    {
1096	      cur++;
1097	      continue;
1098	    }
1099	  ok = (*cur == ')');
1100	  break;
1101	}
1102
1103      ok = (*cur == ')' && macro->paramc == 0);
1104      break;
1105    }
1106
1107  if (!ok)
1108    cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list");
1109
1110  CUR (pfile->context) = cur + (*cur == ')');
1111
1112  return ok;
1113}
1114
1115/* Save the text from pfile->out.base to pfile->out.cur as
1116   the replacement text for the current macro, followed by argument
1117   ARG_INDEX, with zero indicating the end of the replacement
1118   text.  */
1119static void
1120save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1121		       unsigned int arg_index)
1122{
1123  size_t len = pfile->out.cur - pfile->out.base;
1124  uchar *exp;
1125
1126  if (macro->paramc == 0)
1127    {
1128      /* Object-like and function-like macros without parameters
1129	 simply store their \n-terminated replacement text.  */
1130      exp = _cpp_unaligned_alloc (pfile, len + 1);
1131      memcpy (exp, pfile->out.base, len);
1132      exp[len] = '\n';
1133      macro->exp.text = exp;
1134      macro->traditional = 1;
1135      macro->count = len;
1136    }
1137  else
1138    {
1139      /* Store the text's length (unsigned int), the argument index
1140	 (unsigned short, base 1) and then the text.  */
1141      size_t blen = BLOCK_LEN (len);
1142      struct block *block;
1143
1144      if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1145	_cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1146
1147      exp = BUFF_FRONT (pfile->a_buff);
1148      block = (struct block *) (exp + macro->count);
1149      macro->exp.text = exp;
1150      macro->traditional = 1;
1151
1152      /* Write out the block information.  */
1153      block->text_len = len;
1154      block->arg_index = arg_index;
1155      memcpy (block->text, pfile->out.base, len);
1156
1157      /* Lex the rest into the start of the output buffer.  */
1158      pfile->out.cur = pfile->out.base;
1159
1160      macro->count += blen;
1161
1162      /* If we've finished, commit the memory.  */
1163      if (arg_index == 0)
1164	BUFF_FRONT (pfile->a_buff) += macro->count;
1165    }
1166}
1167
1168/* Analyze and save the replacement text of a macro.  Returns true on
1169   success.  */
1170bool
1171_cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro)
1172{
1173  const uchar *cur;
1174  uchar *limit;
1175  cpp_context *context = pfile->context;
1176
1177  /* The context has not been set up for command line defines, and CUR
1178     has not been updated for the macro name for in-file defines.  */
1179  pfile->out.cur = pfile->out.base;
1180  CUR (context) = pfile->buffer->cur;
1181  RLIMIT (context) = pfile->buffer->rlimit;
1182  check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1183
1184  /* Is this a function-like macro?  */
1185  if (* CUR (context) == '(')
1186    {
1187      bool ok = scan_parameters (pfile, macro);
1188
1189      /* Remember the params so we can clear NODE_MACRO_ARG flags.  */
1190      macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
1191
1192      /* Setting macro to NULL indicates an error occurred, and
1193	 prevents unnecessary work in _cpp_scan_out_logical_line.  */
1194      if (!ok)
1195	macro = NULL;
1196      else
1197	{
1198	  BUFF_FRONT (pfile->a_buff) = (uchar *) &macro->params[macro->paramc];
1199	  macro->fun_like = 1;
1200	}
1201    }
1202
1203  /* Skip leading whitespace in the replacement text.  */
1204  pfile->buffer->cur
1205    = skip_whitespace (pfile, CUR (context),
1206		       CPP_OPTION (pfile, discard_comments_in_macro_exp));
1207
1208  pfile->state.prevent_expansion++;
1209  _cpp_scan_out_logical_line (pfile, macro, false);
1210  pfile->state.prevent_expansion--;
1211
1212  if (!macro)
1213    return false;
1214
1215  /* Skip trailing white space.  */
1216  cur = pfile->out.base;
1217  limit = pfile->out.cur;
1218  while (limit > cur && is_space (limit[-1]))
1219    limit--;
1220  pfile->out.cur = limit;
1221  save_replacement_text (pfile, macro, 0);
1222
1223  return true;
1224}
1225
1226/* Copy SRC of length LEN to DEST, but convert all contiguous
1227   whitespace to a single space, provided it is not in quotes.  The
1228   quote currently in effect is pointed to by PQUOTE, and is updated
1229   by the function.  Returns the number of bytes copied.  */
1230static size_t
1231canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1232{
1233  uchar *orig_dest = dest;
1234  uchar quote = *pquote;
1235
1236  while (len)
1237    {
1238      if (is_space (*src) && !quote)
1239	{
1240	  do
1241	    src++, len--;
1242	  while (len && is_space (*src));
1243	  *dest++ = ' ';
1244	}
1245      else
1246	{
1247	  if (*src == '\'' || *src == '"')
1248	    {
1249	      if (!quote)
1250		quote = *src;
1251	      else if (quote == *src)
1252		quote = 0;
1253	    }
1254	  *dest++ = *src++, len--;
1255	}
1256    }
1257
1258  *pquote = quote;
1259  return dest - orig_dest;
1260}
1261
1262/* Returns true if MACRO1 and MACRO2 have expansions different other
1263   than in the form of their whitespace.  */
1264bool
1265_cpp_expansions_different_trad (const cpp_macro *macro1,
1266				const cpp_macro *macro2)
1267{
1268  uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1269  uchar *p2 = p1 + macro1->count;
1270  uchar quote1 = 0, quote2 = 0;
1271  bool mismatch;
1272  size_t len1, len2;
1273
1274  if (macro1->paramc > 0)
1275    {
1276      const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1277
1278      mismatch = true;
1279      for (;;)
1280	{
1281	  struct block *b1 = (struct block *) exp1;
1282	  struct block *b2 = (struct block *) exp2;
1283
1284	  if (b1->arg_index != b2->arg_index)
1285	    break;
1286
1287	  len1 = canonicalize_text (p1, b1->text, b1->text_len, &quote1);
1288	  len2 = canonicalize_text (p2, b2->text, b2->text_len, &quote2);
1289	  if (len1 != len2 || memcmp (p1, p2, len1))
1290	    break;
1291	  if (b1->arg_index == 0)
1292	    {
1293	      mismatch = false;
1294	      break;
1295	    }
1296	  exp1 += BLOCK_LEN (b1->text_len);
1297	  exp2 += BLOCK_LEN (b2->text_len);
1298	}
1299    }
1300  else
1301    {
1302      len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, &quote1);
1303      len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, &quote2);
1304      mismatch = (len1 != len2 || memcmp (p1, p2, len1));
1305    }
1306
1307  free (p1);
1308  return mismatch;
1309}
1310