1/* Character scanner.
2   Copyright (C) 2000-2015 Free Software Foundation, Inc.
3   Contributed by Andy Vaught
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21/* Set of subroutines to (ultimately) return the next character to the
22   various matching subroutines.  This file's job is to read files and
23   build up lines that are parsed by the parser.  This means that we
24   handle continuation lines and "include" lines.
25
26   The first thing the scanner does is to load an entire file into
27   memory.  We load the entire file into memory for a couple reasons.
28   The first is that we want to be able to deal with nonseekable input
29   (pipes, stdin) and there is a lot of backing up involved during
30   parsing.
31
32   The second is that we want to be able to print the locus of errors,
33   and an error on line 999999 could conflict with something on line
34   one.  Given nonseekable input, we've got to store the whole thing.
35
36   One thing that helps are the column truncation limits that give us
37   an upper bound on the size of individual lines.  We don't store the
38   truncated stuff.
39
40   From the scanner's viewpoint, the higher level subroutines ask for
41   new characters and do a lot of jumping backwards.  */
42
43#include "config.h"
44#include "system.h"
45#include "coretypes.h"
46#include "gfortran.h"
47#include "toplev.h"	/* For set_src_pwd.  */
48#include "debug.h"
49#include "flags.h"
50#include "cpp.h"
51#include "scanner.h"
52
53/* List of include file search directories.  */
54gfc_directorylist *include_dirs, *intrinsic_modules_dirs;
55
56static gfc_file *file_head, *current_file;
57
58static int continue_flag, end_flag, gcc_attribute_flag;
59/* If !$omp/!$acc occurred in current comment line.  */
60static int openmp_flag, openacc_flag;
61static int continue_count, continue_line;
62static locus openmp_locus;
63static locus openacc_locus;
64static locus gcc_attribute_locus;
65
66gfc_source_form gfc_current_form;
67static gfc_linebuf *line_head, *line_tail;
68
69locus gfc_current_locus;
70const char *gfc_source_file;
71static FILE *gfc_src_file;
72static gfc_char_t *gfc_src_preprocessor_lines[2];
73
74static struct gfc_file_change
75{
76  const char *filename;
77  gfc_linebuf *lb;
78  int line;
79} *file_changes;
80size_t file_changes_cur, file_changes_count;
81size_t file_changes_allocated;
82
83
84/* Functions dealing with our wide characters (gfc_char_t) and
85   sequences of such characters.  */
86
87int
88gfc_wide_fits_in_byte (gfc_char_t c)
89{
90  return (c <= UCHAR_MAX);
91}
92
93static inline int
94wide_is_ascii (gfc_char_t c)
95{
96  return (gfc_wide_fits_in_byte (c) && ((unsigned char) c & ~0x7f) == 0);
97}
98
99int
100gfc_wide_is_printable (gfc_char_t c)
101{
102  return (gfc_wide_fits_in_byte (c) && ISPRINT ((unsigned char) c));
103}
104
105gfc_char_t
106gfc_wide_tolower (gfc_char_t c)
107{
108  return (wide_is_ascii (c) ? (gfc_char_t) TOLOWER((unsigned char) c) : c);
109}
110
111gfc_char_t
112gfc_wide_toupper (gfc_char_t c)
113{
114  return (wide_is_ascii (c) ? (gfc_char_t) TOUPPER((unsigned char) c) : c);
115}
116
117int
118gfc_wide_is_digit (gfc_char_t c)
119{
120  return (c >= '0' && c <= '9');
121}
122
123static inline int
124wide_atoi (gfc_char_t *c)
125{
126#define MAX_DIGITS 20
127  char buf[MAX_DIGITS+1];
128  int i = 0;
129
130  while (gfc_wide_is_digit(*c) && i < MAX_DIGITS)
131    buf[i++] = *c++;
132  buf[i] = '\0';
133  return atoi (buf);
134}
135
136size_t
137gfc_wide_strlen (const gfc_char_t *str)
138{
139  size_t i;
140
141  for (i = 0; str[i]; i++)
142    ;
143
144  return i;
145}
146
147gfc_char_t *
148gfc_wide_memset (gfc_char_t *b, gfc_char_t c, size_t len)
149{
150  size_t i;
151
152  for (i = 0; i < len; i++)
153    b[i] = c;
154
155  return b;
156}
157
158static gfc_char_t *
159wide_strcpy (gfc_char_t *dest, const gfc_char_t *src)
160{
161  gfc_char_t *d;
162
163  for (d = dest; (*d = *src) != '\0'; ++src, ++d)
164    ;
165
166  return dest;
167}
168
169static gfc_char_t *
170wide_strchr (const gfc_char_t *s, gfc_char_t c)
171{
172  do {
173    if (*s == c)
174      {
175        return CONST_CAST(gfc_char_t *, s);
176      }
177  } while (*s++);
178  return 0;
179}
180
181char *
182gfc_widechar_to_char (const gfc_char_t *s, int length)
183{
184  size_t len, i;
185  char *res;
186
187  if (s == NULL)
188    return NULL;
189
190  /* Passing a negative length is used to indicate that length should be
191     calculated using gfc_wide_strlen().  */
192  len = (length >= 0 ? (size_t) length : gfc_wide_strlen (s));
193  res = XNEWVEC (char, len + 1);
194
195  for (i = 0; i < len; i++)
196    {
197      gcc_assert (gfc_wide_fits_in_byte (s[i]));
198      res[i] = (unsigned char) s[i];
199    }
200
201  res[len] = '\0';
202  return res;
203}
204
205gfc_char_t *
206gfc_char_to_widechar (const char *s)
207{
208  size_t len, i;
209  gfc_char_t *res;
210
211  if (s == NULL)
212    return NULL;
213
214  len = strlen (s);
215  res = gfc_get_wide_string (len + 1);
216
217  for (i = 0; i < len; i++)
218    res[i] = (unsigned char) s[i];
219
220  res[len] = '\0';
221  return res;
222}
223
224static int
225wide_strncmp (const gfc_char_t *s1, const char *s2, size_t n)
226{
227  gfc_char_t c1, c2;
228
229  while (n-- > 0)
230    {
231      c1 = *s1++;
232      c2 = *s2++;
233      if (c1 != c2)
234	return (c1 > c2 ? 1 : -1);
235      if (c1 == '\0')
236	return 0;
237    }
238  return 0;
239}
240
241int
242gfc_wide_strncasecmp (const gfc_char_t *s1, const char *s2, size_t n)
243{
244  gfc_char_t c1, c2;
245
246  while (n-- > 0)
247    {
248      c1 = gfc_wide_tolower (*s1++);
249      c2 = TOLOWER (*s2++);
250      if (c1 != c2)
251	return (c1 > c2 ? 1 : -1);
252      if (c1 == '\0')
253	return 0;
254    }
255  return 0;
256}
257
258
259/* Main scanner initialization.  */
260
261void
262gfc_scanner_init_1 (void)
263{
264  file_head = NULL;
265  line_head = NULL;
266  line_tail = NULL;
267
268  continue_count = 0;
269  continue_line = 0;
270
271  end_flag = 0;
272}
273
274
275/* Main scanner destructor.  */
276
277void
278gfc_scanner_done_1 (void)
279{
280  gfc_linebuf *lb;
281  gfc_file *f;
282
283  while(line_head != NULL)
284    {
285      lb = line_head->next;
286      free (line_head);
287      line_head = lb;
288    }
289
290  while(file_head != NULL)
291    {
292      f = file_head->next;
293      free (file_head->filename);
294      free (file_head);
295      file_head = f;
296    }
297}
298
299
300/* Adds path to the list pointed to by list.  */
301
302static void
303add_path_to_list (gfc_directorylist **list, const char *path,
304		  bool use_for_modules, bool head, bool warn)
305{
306  gfc_directorylist *dir;
307  const char *p;
308  char *q;
309  struct stat st;
310  size_t len;
311  int i;
312
313  p = path;
314  while (*p == ' ' || *p == '\t')  /* someone might do "-I include" */
315    if (*p++ == '\0')
316      return;
317
318  /* Strip trailing directory separators from the path, as this
319     will confuse Windows systems.  */
320  len = strlen (p);
321  q = (char *) alloca (len + 1);
322  memcpy (q, p, len + 1);
323  i = len - 1;
324  while (i >=0 && IS_DIR_SEPARATOR (q[i]))
325    q[i--] = '\0';
326
327  if (stat (q, &st))
328    {
329      if (errno != ENOENT)
330	gfc_warning_now (0, "Include directory %qs: %s", path,
331			 xstrerror(errno));
332      else if (warn)
333	gfc_warning_now (OPT_Wmissing_include_dirs,
334			 "Nonexistent include directory %qs", path);
335      return;
336    }
337  else if (!S_ISDIR (st.st_mode))
338    {
339      gfc_fatal_error ("%qs is not a directory", path);
340      return;
341    }
342
343  if (head || *list == NULL)
344    {
345      dir = XCNEW (gfc_directorylist);
346      if (!head)
347        *list = dir;
348    }
349  else
350    {
351      dir = *list;
352      while (dir->next)
353	dir = dir->next;
354
355      dir->next = XCNEW (gfc_directorylist);
356      dir = dir->next;
357    }
358
359  dir->next = head ? *list : NULL;
360  if (head)
361    *list = dir;
362  dir->use_for_modules = use_for_modules;
363  dir->path = XCNEWVEC (char, strlen (p) + 2);
364  strcpy (dir->path, p);
365  strcat (dir->path, "/");	/* make '/' last character */
366}
367
368
369void
370gfc_add_include_path (const char *path, bool use_for_modules, bool file_dir,
371		      bool warn)
372{
373  add_path_to_list (&include_dirs, path, use_for_modules, file_dir, warn);
374
375  /* For '#include "..."' these directories are automatically searched.  */
376  if (!file_dir)
377    gfc_cpp_add_include_path (xstrdup(path), true);
378}
379
380
381void
382gfc_add_intrinsic_modules_path (const char *path)
383{
384  add_path_to_list (&intrinsic_modules_dirs, path, true, false, false);
385}
386
387
388/* Release resources allocated for options.  */
389
390void
391gfc_release_include_path (void)
392{
393  gfc_directorylist *p;
394
395  while (include_dirs != NULL)
396    {
397      p = include_dirs;
398      include_dirs = include_dirs->next;
399      free (p->path);
400      free (p);
401    }
402
403  while (intrinsic_modules_dirs != NULL)
404    {
405      p = intrinsic_modules_dirs;
406      intrinsic_modules_dirs = intrinsic_modules_dirs->next;
407      free (p->path);
408      free (p);
409    }
410
411  free (gfc_option.module_dir);
412}
413
414
415static FILE *
416open_included_file (const char *name, gfc_directorylist *list,
417		    bool module, bool system)
418{
419  char *fullname;
420  gfc_directorylist *p;
421  FILE *f;
422
423  for (p = list; p; p = p->next)
424    {
425      if (module && !p->use_for_modules)
426	continue;
427
428      fullname = (char *) alloca(strlen (p->path) + strlen (name) + 1);
429      strcpy (fullname, p->path);
430      strcat (fullname, name);
431
432      f = gfc_open_file (fullname);
433      if (f != NULL)
434	{
435	  if (gfc_cpp_makedep ())
436	    gfc_cpp_add_dep (fullname, system);
437
438	  return f;
439	}
440    }
441
442  return NULL;
443}
444
445
446/* Opens file for reading, searching through the include directories
447   given if necessary.  If the include_cwd argument is true, we try
448   to open the file in the current directory first.  */
449
450FILE *
451gfc_open_included_file (const char *name, bool include_cwd, bool module)
452{
453  FILE *f = NULL;
454
455  if (IS_ABSOLUTE_PATH (name) || include_cwd)
456    {
457      f = gfc_open_file (name);
458      if (f && gfc_cpp_makedep ())
459	gfc_cpp_add_dep (name, false);
460    }
461
462  if (!f)
463    f = open_included_file (name, include_dirs, module, false);
464
465  return f;
466}
467
468
469/* Test to see if we're at the end of the main source file.  */
470
471int
472gfc_at_end (void)
473{
474  return end_flag;
475}
476
477
478/* Test to see if we're at the end of the current file.  */
479
480int
481gfc_at_eof (void)
482{
483  if (gfc_at_end ())
484    return 1;
485
486  if (line_head == NULL)
487    return 1;			/* Null file */
488
489  if (gfc_current_locus.lb == NULL)
490    return 1;
491
492  return 0;
493}
494
495
496/* Test to see if we're at the beginning of a new line.  */
497
498int
499gfc_at_bol (void)
500{
501  if (gfc_at_eof ())
502    return 1;
503
504  return (gfc_current_locus.nextc == gfc_current_locus.lb->line);
505}
506
507
508/* Test to see if we're at the end of a line.  */
509
510int
511gfc_at_eol (void)
512{
513  if (gfc_at_eof ())
514    return 1;
515
516  return (*gfc_current_locus.nextc == '\0');
517}
518
519static void
520add_file_change (const char *filename, int line)
521{
522  if (file_changes_count == file_changes_allocated)
523    {
524      if (file_changes_allocated)
525	file_changes_allocated *= 2;
526      else
527	file_changes_allocated = 16;
528      file_changes = XRESIZEVEC (struct gfc_file_change, file_changes,
529				 file_changes_allocated);
530    }
531  file_changes[file_changes_count].filename = filename;
532  file_changes[file_changes_count].lb = NULL;
533  file_changes[file_changes_count++].line = line;
534}
535
536static void
537report_file_change (gfc_linebuf *lb)
538{
539  size_t c = file_changes_cur;
540  while (c < file_changes_count
541	 && file_changes[c].lb == lb)
542    {
543      if (file_changes[c].filename)
544	(*debug_hooks->start_source_file) (file_changes[c].line,
545					   file_changes[c].filename);
546      else
547	(*debug_hooks->end_source_file) (file_changes[c].line);
548      ++c;
549    }
550  file_changes_cur = c;
551}
552
553void
554gfc_start_source_files (void)
555{
556  /* If the debugger wants the name of the main source file,
557     we give it.  */
558  if (debug_hooks->start_end_main_source_file)
559    (*debug_hooks->start_source_file) (0, gfc_source_file);
560
561  file_changes_cur = 0;
562  report_file_change (gfc_current_locus.lb);
563}
564
565void
566gfc_end_source_files (void)
567{
568  report_file_change (NULL);
569
570  if (debug_hooks->start_end_main_source_file)
571    (*debug_hooks->end_source_file) (0);
572}
573
574/* Advance the current line pointer to the next line.  */
575
576void
577gfc_advance_line (void)
578{
579  if (gfc_at_end ())
580    return;
581
582  if (gfc_current_locus.lb == NULL)
583    {
584      end_flag = 1;
585      return;
586    }
587
588  if (gfc_current_locus.lb->next
589      && !gfc_current_locus.lb->next->dbg_emitted)
590    {
591      report_file_change (gfc_current_locus.lb->next);
592      gfc_current_locus.lb->next->dbg_emitted = true;
593    }
594
595  gfc_current_locus.lb = gfc_current_locus.lb->next;
596
597  if (gfc_current_locus.lb != NULL)
598    gfc_current_locus.nextc = gfc_current_locus.lb->line;
599  else
600    {
601      gfc_current_locus.nextc = NULL;
602      end_flag = 1;
603    }
604}
605
606
607/* Get the next character from the input, advancing gfc_current_file's
608   locus.  When we hit the end of the line or the end of the file, we
609   start returning a '\n' in order to complete the current statement.
610   No Fortran line conventions are implemented here.
611
612   Requiring explicit advances to the next line prevents the parse
613   pointer from being on the wrong line if the current statement ends
614   prematurely.  */
615
616static gfc_char_t
617next_char (void)
618{
619  gfc_char_t c;
620
621  if (gfc_current_locus.nextc == NULL)
622    return '\n';
623
624  c = *gfc_current_locus.nextc++;
625  if (c == '\0')
626    {
627      gfc_current_locus.nextc--; /* Remain on this line.  */
628      c = '\n';
629    }
630
631  return c;
632}
633
634
635/* Skip a comment.  When we come here the parse pointer is positioned
636   immediately after the comment character.  If we ever implement
637   compiler directives within comments, here is where we parse the
638   directive.  */
639
640static void
641skip_comment_line (void)
642{
643  gfc_char_t c;
644
645  do
646    {
647      c = next_char ();
648    }
649  while (c != '\n');
650
651  gfc_advance_line ();
652}
653
654
655int
656gfc_define_undef_line (void)
657{
658  char *tmp;
659
660  /* All lines beginning with '#' are either #define or #undef.  */
661  if (debug_info_level != DINFO_LEVEL_VERBOSE || gfc_peek_ascii_char () != '#')
662    return 0;
663
664  if (wide_strncmp (gfc_current_locus.nextc, "#define ", 8) == 0)
665    {
666      tmp = gfc_widechar_to_char (&gfc_current_locus.nextc[8], -1);
667      (*debug_hooks->define) (gfc_linebuf_linenum (gfc_current_locus.lb),
668			      tmp);
669      free (tmp);
670    }
671
672  if (wide_strncmp (gfc_current_locus.nextc, "#undef ", 7) == 0)
673    {
674      tmp = gfc_widechar_to_char (&gfc_current_locus.nextc[7], -1);
675      (*debug_hooks->undef) (gfc_linebuf_linenum (gfc_current_locus.lb),
676			     tmp);
677      free (tmp);
678    }
679
680  /* Skip the rest of the line.  */
681  skip_comment_line ();
682
683  return 1;
684}
685
686
687/* Return true if GCC$ was matched.  */
688static bool
689skip_gcc_attribute (locus start)
690{
691  bool r = false;
692  char c;
693  locus old_loc = gfc_current_locus;
694
695  if ((c = next_char ()) == 'g' || c == 'G')
696    if ((c = next_char ()) == 'c' || c == 'C')
697      if ((c = next_char ()) == 'c' || c == 'C')
698	if ((c = next_char ()) == '$')
699	  r = true;
700
701  if (r == false)
702    gfc_current_locus = old_loc;
703  else
704   {
705      gcc_attribute_flag = 1;
706      gcc_attribute_locus = old_loc;
707      gfc_current_locus = start;
708   }
709
710  return r;
711}
712
713/* Return true if CC was matched.  */
714static bool
715skip_oacc_attribute (locus start, locus old_loc, bool continue_flag)
716{
717  bool r = false;
718  char c;
719
720  if ((c = next_char ()) == 'c' || c == 'C')
721    if ((c = next_char ()) == 'c' || c == 'C')
722      r = true;
723
724  if (r)
725   {
726      if ((c = next_char ()) == ' ' || c == '\t'
727	  || continue_flag)
728	{
729	  while (gfc_is_whitespace (c))
730	    c = next_char ();
731	  if (c != '\n' && c != '!')
732	    {
733	      openacc_flag = 1;
734	      openacc_locus = old_loc;
735	      gfc_current_locus = start;
736	    }
737	  else
738	    r = false;
739	}
740      else
741	{
742	  gfc_warning_now (0, "!$ACC at %C starts a commented "
743			   "line as it neither is followed "
744			   "by a space nor is a "
745			   "continuation line");
746	  r = false;
747	}
748   }
749
750  return r;
751}
752
753/* Return true if MP was matched.  */
754static bool
755skip_omp_attribute (locus start, locus old_loc, bool continue_flag)
756{
757  bool r = false;
758  char c;
759
760  if ((c = next_char ()) == 'm' || c == 'M')
761    if ((c = next_char ()) == 'p' || c == 'P')
762      r = true;
763
764  if (r)
765   {
766      if ((c = next_char ()) == ' ' || c == '\t'
767	  || continue_flag)
768	{
769	  while (gfc_is_whitespace (c))
770	    c = next_char ();
771	  if (c != '\n' && c != '!')
772	    {
773	      openmp_flag = 1;
774	      openmp_locus = old_loc;
775	      gfc_current_locus = start;
776	    }
777	  else
778	    r = false;
779	}
780      else
781	{
782	  gfc_warning_now (0, "!$OMP at %C starts a commented "
783			   "line as it neither is followed "
784			   "by a space nor is a "
785			   "continuation line");
786	  r = false;
787	}
788   }
789
790  return r;
791}
792
793/* Comment lines are null lines, lines containing only blanks or lines
794   on which the first nonblank line is a '!'.
795   Return true if !$ openmp or openacc conditional compilation sentinel was
796   seen.  */
797
798static bool
799skip_free_comments (void)
800{
801  locus start;
802  gfc_char_t c;
803  int at_bol;
804
805  for (;;)
806    {
807      at_bol = gfc_at_bol ();
808      start = gfc_current_locus;
809      if (gfc_at_eof ())
810	break;
811
812      do
813	c = next_char ();
814      while (gfc_is_whitespace (c));
815
816      if (c == '\n')
817	{
818	  gfc_advance_line ();
819	  continue;
820	}
821
822      if (c == '!')
823	{
824	  /* Keep the !GCC$ line.  */
825	  if (at_bol && skip_gcc_attribute (start))
826	    return false;
827
828	  /* If -fopenmp/-fopenacc, we need to handle here 2 things:
829	     1) don't treat !$omp/!$acc as comments, but directives
830	     2) handle OpenMP/OpenACC conditional compilation, where
831		!$ should be treated as 2 spaces (for initial lines
832		only if followed by space).  */
833	  if (at_bol)
834	  {
835	    if ((flag_openmp || flag_openmp_simd)
836		&& flag_openacc)
837	      {
838		locus old_loc = gfc_current_locus;
839		if (next_char () == '$')
840		  {
841		    c = next_char ();
842		    if (c == 'o' || c == 'O')
843		      {
844			if (skip_omp_attribute (start, old_loc, continue_flag))
845			  return false;
846			gfc_current_locus = old_loc;
847			next_char ();
848			c = next_char ();
849		      }
850		    else if (c == 'a' || c == 'A')
851		      {
852			if (skip_oacc_attribute (start, old_loc, continue_flag))
853			  return false;
854			gfc_current_locus = old_loc;
855			next_char ();
856			c = next_char ();
857		      }
858		    if (continue_flag || c == ' ' || c == '\t')
859		      {
860			gfc_current_locus = old_loc;
861			next_char ();
862			openmp_flag = openacc_flag = 0;
863			return true;
864		      }
865		  }
866		gfc_current_locus = old_loc;
867	      }
868	    else if ((flag_openmp || flag_openmp_simd)
869		     && !flag_openacc)
870	      {
871		locus old_loc = gfc_current_locus;
872		if (next_char () == '$')
873		  {
874		    c = next_char ();
875		    if (c == 'o' || c == 'O')
876		      {
877			if (skip_omp_attribute (start, old_loc, continue_flag))
878			  return false;
879			gfc_current_locus = old_loc;
880			next_char ();
881			c = next_char ();
882		      }
883		    if (continue_flag || c == ' ' || c == '\t')
884		      {
885			gfc_current_locus = old_loc;
886			next_char ();
887			openmp_flag = 0;
888			return true;
889		      }
890		  }
891		gfc_current_locus = old_loc;
892	      }
893	    else if (flag_openacc
894		     && !(flag_openmp || flag_openmp_simd))
895	      {
896		locus old_loc = gfc_current_locus;
897		if (next_char () == '$')
898		  {
899		    c = next_char ();
900		      if (c == 'a' || c == 'A')
901			{
902			  if (skip_oacc_attribute (start, old_loc,
903						   continue_flag))
904			    return false;
905			  gfc_current_locus = old_loc;
906			  next_char();
907			  c = next_char();
908			}
909		      if (continue_flag || c == ' ' || c == '\t')
910			{
911			  gfc_current_locus = old_loc;
912			  next_char();
913			  openacc_flag = 0;
914			  return true;
915			}
916		  }
917		gfc_current_locus = old_loc;
918	      }
919	  }
920	  skip_comment_line ();
921	  continue;
922	}
923
924      break;
925    }
926
927  if (openmp_flag && at_bol)
928    openmp_flag = 0;
929
930  if (openacc_flag && at_bol)
931    openacc_flag = 0;
932
933  gcc_attribute_flag = 0;
934  gfc_current_locus = start;
935  return false;
936}
937
938
939/* Skip comment lines in fixed source mode.  We have the same rules as
940   in skip_free_comment(), except that we can have a 'c', 'C' or '*'
941   in column 1, and a '!' cannot be in column 6.  Also, we deal with
942   lines with 'd' or 'D' in column 1, if the user requested this.  */
943
944static void
945skip_fixed_comments (void)
946{
947  locus start;
948  int col;
949  gfc_char_t c;
950
951  if (! gfc_at_bol ())
952    {
953      start = gfc_current_locus;
954      if (! gfc_at_eof ())
955	{
956	  do
957	    c = next_char ();
958	  while (gfc_is_whitespace (c));
959
960	  if (c == '\n')
961	    gfc_advance_line ();
962	  else if (c == '!')
963	    skip_comment_line ();
964	}
965
966      if (! gfc_at_bol ())
967	{
968	  gfc_current_locus = start;
969	  return;
970	}
971    }
972
973  for (;;)
974    {
975      start = gfc_current_locus;
976      if (gfc_at_eof ())
977	break;
978
979      c = next_char ();
980      if (c == '\n')
981	{
982	  gfc_advance_line ();
983	  continue;
984	}
985
986      if (c == '!' || c == 'c' || c == 'C' || c == '*')
987	{
988	  if (skip_gcc_attribute (start))
989	    {
990	      /* Canonicalize to *$omp.  */
991	      *start.nextc = '*';
992	      return;
993	    }
994
995	  /* If -fopenmp/-fopenacc, we need to handle here 2 things:
996	     1) don't treat !$omp/!$acc|c$omp/c$acc|*$omp / *$acc as comments,
997		but directives
998	     2) handle OpenMP/OpenACC conditional compilation, where
999		!$|c$|*$ should be treated as 2 spaces if the characters
1000		in columns 3 to 6 are valid fixed form label columns
1001		characters.  */
1002	  if (gfc_current_locus.lb != NULL
1003	      && continue_line < gfc_linebuf_linenum (gfc_current_locus.lb))
1004	    continue_line = gfc_linebuf_linenum (gfc_current_locus.lb);
1005
1006	  if (flag_openmp || flag_openmp_simd)
1007	    {
1008	      if (next_char () == '$')
1009		{
1010		  c = next_char ();
1011		  if (c == 'o' || c == 'O')
1012		    {
1013		      if (((c = next_char ()) == 'm' || c == 'M')
1014			  && ((c = next_char ()) == 'p' || c == 'P'))
1015			{
1016			  c = next_char ();
1017			  if (c != '\n'
1018			      && ((openmp_flag && continue_flag)
1019				  || c == ' ' || c == '\t' || c == '0'))
1020			    {
1021			      do
1022				c = next_char ();
1023			      while (gfc_is_whitespace (c));
1024			      if (c != '\n' && c != '!')
1025				{
1026				  /* Canonicalize to *$omp.  */
1027				  *start.nextc = '*';
1028				  openmp_flag = 1;
1029				  gfc_current_locus = start;
1030				  return;
1031				}
1032			    }
1033			}
1034		    }
1035		  else
1036		    {
1037		      int digit_seen = 0;
1038
1039		      for (col = 3; col < 6; col++, c = next_char ())
1040			if (c == ' ')
1041			  continue;
1042			else if (c == '\t')
1043			  {
1044			    col = 6;
1045			    break;
1046			  }
1047			else if (c < '0' || c > '9')
1048			  break;
1049			else
1050			  digit_seen = 1;
1051
1052		      if (col == 6 && c != '\n'
1053			  && ((continue_flag && !digit_seen)
1054			      || c == ' ' || c == '\t' || c == '0'))
1055			{
1056			  gfc_current_locus = start;
1057			  start.nextc[0] = ' ';
1058			  start.nextc[1] = ' ';
1059			  continue;
1060			}
1061		    }
1062		}
1063	      gfc_current_locus = start;
1064	    }
1065
1066	  if (flag_openacc)
1067	    {
1068	      if (next_char () == '$')
1069		{
1070		  c = next_char ();
1071		  if (c == 'a' || c == 'A')
1072		    {
1073		      if (((c = next_char ()) == 'c' || c == 'C')
1074			  && ((c = next_char ()) == 'c' || c == 'C'))
1075			{
1076			  c = next_char ();
1077			  if (c != '\n'
1078			      && ((openacc_flag && continue_flag)
1079				  || c == ' ' || c == '\t' || c == '0'))
1080			    {
1081			      do
1082				c = next_char ();
1083			      while (gfc_is_whitespace (c));
1084			      if (c != '\n' && c != '!')
1085				{
1086				  /* Canonicalize to *$acc. */
1087				  *start.nextc = '*';
1088				  openacc_flag = 1;
1089				  gfc_current_locus = start;
1090				  return;
1091				}
1092			    }
1093			}
1094		    }
1095		  else
1096		    {
1097		      int digit_seen = 0;
1098
1099		      for (col = 3; col < 6; col++, c = next_char ())
1100			if (c == ' ')
1101			  continue;
1102			else if (c == '\t')
1103			  {
1104			    col = 6;
1105			    break;
1106			  }
1107			else if (c < '0' || c > '9')
1108			  break;
1109			else
1110			  digit_seen = 1;
1111
1112		      if (col == 6 && c != '\n'
1113			  && ((continue_flag && !digit_seen)
1114			      || c == ' ' || c == '\t' || c == '0'))
1115			{
1116			  gfc_current_locus = start;
1117			  start.nextc[0] = ' ';
1118			  start.nextc[1] = ' ';
1119			  continue;
1120			}
1121		    }
1122		}
1123	      gfc_current_locus = start;
1124	    }
1125
1126	  skip_comment_line ();
1127	  continue;
1128	}
1129
1130      if (gfc_option.flag_d_lines != -1 && (c == 'd' || c == 'D'))
1131	{
1132	  if (gfc_option.flag_d_lines == 0)
1133	    {
1134	      skip_comment_line ();
1135	      continue;
1136	    }
1137	  else
1138	    *start.nextc = c = ' ';
1139	}
1140
1141      col = 1;
1142
1143      while (gfc_is_whitespace (c))
1144	{
1145	  c = next_char ();
1146	  col++;
1147	}
1148
1149      if (c == '\n')
1150	{
1151	  gfc_advance_line ();
1152	  continue;
1153	}
1154
1155      if (col != 6 && c == '!')
1156	{
1157	  if (gfc_current_locus.lb != NULL
1158	      && continue_line < gfc_linebuf_linenum (gfc_current_locus.lb))
1159	    continue_line = gfc_linebuf_linenum (gfc_current_locus.lb);
1160	  skip_comment_line ();
1161	  continue;
1162	}
1163
1164      break;
1165    }
1166
1167  openmp_flag = 0;
1168  openacc_flag = 0;
1169  gcc_attribute_flag = 0;
1170  gfc_current_locus = start;
1171}
1172
1173
1174/* Skips the current line if it is a comment.  */
1175
1176void
1177gfc_skip_comments (void)
1178{
1179  if (gfc_current_form == FORM_FREE)
1180    skip_free_comments ();
1181  else
1182    skip_fixed_comments ();
1183}
1184
1185
1186/* Get the next character from the input, taking continuation lines
1187   and end-of-line comments into account.  This implies that comment
1188   lines between continued lines must be eaten here.  For higher-level
1189   subroutines, this flattens continued lines into a single logical
1190   line.  The in_string flag denotes whether we're inside a character
1191   context or not.  */
1192
1193gfc_char_t
1194gfc_next_char_literal (gfc_instring in_string)
1195{
1196  locus old_loc;
1197  int i, prev_openmp_flag, prev_openacc_flag;
1198  gfc_char_t c;
1199
1200  continue_flag = 0;
1201  prev_openacc_flag = prev_openmp_flag = 0;
1202
1203restart:
1204  c = next_char ();
1205  if (gfc_at_end ())
1206    {
1207      continue_count = 0;
1208      return c;
1209    }
1210
1211  if (gfc_current_form == FORM_FREE)
1212    {
1213      bool openmp_cond_flag;
1214
1215      if (!in_string && c == '!')
1216	{
1217	  if (gcc_attribute_flag
1218	      && memcmp (&gfc_current_locus, &gcc_attribute_locus,
1219		 sizeof (gfc_current_locus)) == 0)
1220	    goto done;
1221
1222	  if (openmp_flag
1223	      && memcmp (&gfc_current_locus, &openmp_locus,
1224		 sizeof (gfc_current_locus)) == 0)
1225	    goto done;
1226
1227	  if (openacc_flag
1228	      && memcmp (&gfc_current_locus, &openacc_locus,
1229	         sizeof (gfc_current_locus)) == 0)
1230	    goto done;
1231
1232	  /* This line can't be continued */
1233	  do
1234	    {
1235	      c = next_char ();
1236	    }
1237	  while (c != '\n');
1238
1239	  /* Avoid truncation warnings for comment ending lines.  */
1240	  gfc_current_locus.lb->truncated = 0;
1241
1242	  goto done;
1243	}
1244
1245      /* Check to see if the continuation line was truncated.  */
1246      if (warn_line_truncation && gfc_current_locus.lb != NULL
1247	  && gfc_current_locus.lb->truncated)
1248	{
1249	  int maxlen = flag_free_line_length;
1250	  gfc_char_t *current_nextc = gfc_current_locus.nextc;
1251
1252	  gfc_current_locus.lb->truncated = 0;
1253	  gfc_current_locus.nextc =  gfc_current_locus.lb->line + maxlen;
1254	  gfc_warning_now (OPT_Wline_truncation,
1255			   "Line truncated at %L", &gfc_current_locus);
1256	  gfc_current_locus.nextc = current_nextc;
1257	}
1258
1259      if (c != '&')
1260	goto done;
1261
1262      /* If the next nonblank character is a ! or \n, we've got a
1263	 continuation line.  */
1264      old_loc = gfc_current_locus;
1265
1266      c = next_char ();
1267      while (gfc_is_whitespace (c))
1268	c = next_char ();
1269
1270      /* Character constants to be continued cannot have commentary
1271	 after the '&'. However, there are cases where we may think we
1272	 are still in a string and we are looking for a possible
1273	 doubled quote and we end up here. See PR64506.  */
1274
1275      if (in_string && c != '\n')
1276	{
1277	  gfc_current_locus = old_loc;
1278	  c = '&';
1279	  goto done;
1280	}
1281
1282      if (c != '!' && c != '\n')
1283	{
1284	  gfc_current_locus = old_loc;
1285	  c = '&';
1286	  goto done;
1287	}
1288
1289      if (flag_openmp)
1290	prev_openmp_flag = openmp_flag;
1291      if (flag_openacc)
1292	prev_openacc_flag = openacc_flag;
1293
1294      continue_flag = 1;
1295      if (c == '!')
1296	skip_comment_line ();
1297      else
1298	gfc_advance_line ();
1299
1300      if (gfc_at_eof ())
1301	goto not_continuation;
1302
1303      /* We've got a continuation line.  If we are on the very next line after
1304	 the last continuation, increment the continuation line count and
1305	 check whether the limit has been exceeded.  */
1306      if (gfc_linebuf_linenum (gfc_current_locus.lb) == continue_line + 1)
1307	{
1308	  if (++continue_count == gfc_option.max_continue_free)
1309	    {
1310	      if (gfc_notification_std (GFC_STD_GNU) || pedantic)
1311		gfc_warning (0, "Limit of %d continuations exceeded in "
1312			     "statement at %C", gfc_option.max_continue_free);
1313	    }
1314	}
1315
1316      /* Now find where it continues. First eat any comment lines.  */
1317      openmp_cond_flag = skip_free_comments ();
1318
1319      if (gfc_current_locus.lb != NULL
1320	  && continue_line < gfc_linebuf_linenum (gfc_current_locus.lb))
1321	continue_line = gfc_linebuf_linenum (gfc_current_locus.lb);
1322
1323      if (flag_openmp)
1324	if (prev_openmp_flag != openmp_flag)
1325	  {
1326	    gfc_current_locus = old_loc;
1327	    openmp_flag = prev_openmp_flag;
1328	    c = '&';
1329	    goto done;
1330	  }
1331
1332      if (flag_openacc)
1333	if (prev_openacc_flag != openacc_flag)
1334	  {
1335	    gfc_current_locus = old_loc;
1336	    openacc_flag = prev_openacc_flag;
1337	    c = '&';
1338	    goto done;
1339	  }
1340
1341      /* Now that we have a non-comment line, probe ahead for the
1342	 first non-whitespace character.  If it is another '&', then
1343	 reading starts at the next character, otherwise we must back
1344	 up to where the whitespace started and resume from there.  */
1345
1346      old_loc = gfc_current_locus;
1347
1348      c = next_char ();
1349      while (gfc_is_whitespace (c))
1350	c = next_char ();
1351
1352      if (openmp_flag)
1353	{
1354	  for (i = 0; i < 5; i++, c = next_char ())
1355	    {
1356	      gcc_assert (gfc_wide_tolower (c) == (unsigned char) "!$omp"[i]);
1357	      if (i == 4)
1358		old_loc = gfc_current_locus;
1359	    }
1360	  while (gfc_is_whitespace (c))
1361	    c = next_char ();
1362	}
1363      if (openacc_flag)
1364	{
1365	  for (i = 0; i < 5; i++, c = next_char ())
1366	    {
1367	      gcc_assert (gfc_wide_tolower (c) == (unsigned char) "!$acc"[i]);
1368	      if (i == 4)
1369		old_loc = gfc_current_locus;
1370	    }
1371	  while (gfc_is_whitespace (c))
1372	    c = next_char ();
1373	}
1374
1375      if (c != '&')
1376	{
1377	  if (in_string)
1378	    {
1379	      gfc_current_locus.nextc--;
1380	      if (warn_ampersand && in_string == INSTRING_WARN)
1381		gfc_warning (OPT_Wampersand,
1382			     "Missing %<&%> in continued character "
1383			     "constant at %C");
1384	    }
1385	  else if (!in_string && (c == '\'' || c == '"'))
1386	      goto done;
1387	  /* Both !$omp and !$ -fopenmp continuation lines have & on the
1388	     continuation line only optionally.  */
1389	  else if (openmp_flag || openacc_flag || openmp_cond_flag)
1390	    gfc_current_locus.nextc--;
1391	  else
1392	    {
1393	      c = ' ';
1394	      gfc_current_locus = old_loc;
1395	      goto done;
1396	    }
1397	}
1398    }
1399  else /* Fixed form.  */
1400    {
1401      /* Fixed form continuation.  */
1402      if (in_string != INSTRING_WARN && c == '!')
1403	{
1404	  /* Skip comment at end of line.  */
1405	  do
1406	    {
1407	      c = next_char ();
1408	    }
1409	  while (c != '\n');
1410
1411	  /* Avoid truncation warnings for comment ending lines.  */
1412	  gfc_current_locus.lb->truncated = 0;
1413	}
1414
1415      if (c != '\n')
1416	goto done;
1417
1418      /* Check to see if the continuation line was truncated.  */
1419      if (warn_line_truncation && gfc_current_locus.lb != NULL
1420	  && gfc_current_locus.lb->truncated)
1421	{
1422	  gfc_current_locus.lb->truncated = 0;
1423	  gfc_warning_now (OPT_Wline_truncation,
1424			   "Line truncated at %L", &gfc_current_locus);
1425	}
1426
1427      if (flag_openmp)
1428	prev_openmp_flag = openmp_flag;
1429      if (flag_openacc)
1430	prev_openacc_flag = openacc_flag;
1431
1432      continue_flag = 1;
1433      old_loc = gfc_current_locus;
1434
1435      gfc_advance_line ();
1436      skip_fixed_comments ();
1437
1438      /* See if this line is a continuation line.  */
1439      if (flag_openmp && openmp_flag != prev_openmp_flag)
1440	{
1441	  openmp_flag = prev_openmp_flag;
1442	  goto not_continuation;
1443	}
1444      if (flag_openacc && openacc_flag != prev_openacc_flag)
1445	{
1446	  openacc_flag = prev_openacc_flag;
1447	  goto not_continuation;
1448	}
1449
1450      if (!openmp_flag && !openacc_flag)
1451	for (i = 0; i < 5; i++)
1452	  {
1453	    c = next_char ();
1454	    if (c != ' ')
1455	      goto not_continuation;
1456	  }
1457      else if (openmp_flag)
1458	for (i = 0; i < 5; i++)
1459	  {
1460	    c = next_char ();
1461	    if (gfc_wide_tolower (c) != (unsigned char) "*$omp"[i])
1462	      goto not_continuation;
1463	  }
1464      else if (openacc_flag)
1465	for (i = 0; i < 5; i++)
1466	  {
1467	    c = next_char ();
1468	    if (gfc_wide_tolower (c) != (unsigned char) "*$acc"[i])
1469	      goto not_continuation;
1470	  }
1471
1472      c = next_char ();
1473      if (c == '0' || c == ' ' || c == '\n')
1474	goto not_continuation;
1475
1476      /* We've got a continuation line.  If we are on the very next line after
1477	 the last continuation, increment the continuation line count and
1478	 check whether the limit has been exceeded.  */
1479      if (gfc_linebuf_linenum (gfc_current_locus.lb) == continue_line + 1)
1480	{
1481	  if (++continue_count == gfc_option.max_continue_fixed)
1482	    {
1483	      if (gfc_notification_std (GFC_STD_GNU) || pedantic)
1484		gfc_warning (0, "Limit of %d continuations exceeded in "
1485			     "statement at %C",
1486			     gfc_option.max_continue_fixed);
1487	    }
1488	}
1489
1490      if (gfc_current_locus.lb != NULL
1491	  && continue_line < gfc_linebuf_linenum (gfc_current_locus.lb))
1492	continue_line = gfc_linebuf_linenum (gfc_current_locus.lb);
1493    }
1494
1495  /* Ready to read first character of continuation line, which might
1496     be another continuation line!  */
1497  goto restart;
1498
1499not_continuation:
1500  c = '\n';
1501  gfc_current_locus = old_loc;
1502  end_flag = 0;
1503
1504done:
1505  if (c == '\n')
1506    continue_count = 0;
1507  continue_flag = 0;
1508  return c;
1509}
1510
1511
1512/* Get the next character of input, folded to lowercase.  In fixed
1513   form mode, we also ignore spaces.  When matcher subroutines are
1514   parsing character literals, they have to call
1515   gfc_next_char_literal().  */
1516
1517gfc_char_t
1518gfc_next_char (void)
1519{
1520  gfc_char_t c;
1521
1522  do
1523    {
1524      c = gfc_next_char_literal (NONSTRING);
1525    }
1526  while (gfc_current_form == FORM_FIXED && gfc_is_whitespace (c));
1527
1528  return gfc_wide_tolower (c);
1529}
1530
1531char
1532gfc_next_ascii_char (void)
1533{
1534  gfc_char_t c = gfc_next_char ();
1535
1536  return (gfc_wide_fits_in_byte (c) ? (unsigned char) c
1537				    : (unsigned char) UCHAR_MAX);
1538}
1539
1540
1541gfc_char_t
1542gfc_peek_char (void)
1543{
1544  locus old_loc;
1545  gfc_char_t c;
1546
1547  old_loc = gfc_current_locus;
1548  c = gfc_next_char ();
1549  gfc_current_locus = old_loc;
1550
1551  return c;
1552}
1553
1554
1555char
1556gfc_peek_ascii_char (void)
1557{
1558  gfc_char_t c = gfc_peek_char ();
1559
1560  return (gfc_wide_fits_in_byte (c) ? (unsigned char) c
1561				    : (unsigned char) UCHAR_MAX);
1562}
1563
1564
1565/* Recover from an error.  We try to get past the current statement
1566   and get lined up for the next.  The next statement follows a '\n'
1567   or a ';'.  We also assume that we are not within a character
1568   constant, and deal with finding a '\'' or '"'.  */
1569
1570void
1571gfc_error_recovery (void)
1572{
1573  gfc_char_t c, delim;
1574
1575  if (gfc_at_eof ())
1576    return;
1577
1578  for (;;)
1579    {
1580      c = gfc_next_char ();
1581      if (c == '\n' || c == ';')
1582	break;
1583
1584      if (c != '\'' && c != '"')
1585	{
1586	  if (gfc_at_eof ())
1587	    break;
1588	  continue;
1589	}
1590      delim = c;
1591
1592      for (;;)
1593	{
1594	  c = next_char ();
1595
1596	  if (c == delim)
1597	    break;
1598	  if (c == '\n')
1599	    return;
1600	  if (c == '\\')
1601	    {
1602	      c = next_char ();
1603	      if (c == '\n')
1604		return;
1605	    }
1606	}
1607      if (gfc_at_eof ())
1608	break;
1609    }
1610}
1611
1612
1613/* Read ahead until the next character to be read is not whitespace.  */
1614
1615void
1616gfc_gobble_whitespace (void)
1617{
1618  static int linenum = 0;
1619  locus old_loc;
1620  gfc_char_t c;
1621
1622  do
1623    {
1624      old_loc = gfc_current_locus;
1625      c = gfc_next_char_literal (NONSTRING);
1626      /* Issue a warning for nonconforming tabs.  We keep track of the line
1627	 number because the Fortran matchers will often back up and the same
1628	 line will be scanned multiple times.  */
1629      if (warn_tabs && c == '\t')
1630	{
1631	  int cur_linenum = LOCATION_LINE (gfc_current_locus.lb->location);
1632	  if (cur_linenum != linenum)
1633	    {
1634	      linenum = cur_linenum;
1635	      gfc_warning_now (OPT_Wtabs, "Nonconforming tab character at %C");
1636	    }
1637	}
1638    }
1639  while (gfc_is_whitespace (c));
1640
1641  gfc_current_locus = old_loc;
1642}
1643
1644
1645/* Load a single line into pbuf.
1646
1647   If pbuf points to a NULL pointer, it is allocated.
1648   We truncate lines that are too long, unless we're dealing with
1649   preprocessor lines or if the option -ffixed-line-length-none is set,
1650   in which case we reallocate the buffer to fit the entire line, if
1651   need be.
1652   In fixed mode, we expand a tab that occurs within the statement
1653   label region to expand to spaces that leave the next character in
1654   the source region.
1655
1656   If first_char is not NULL, it's a pointer to a single char value holding
1657   the first character of the line, which has already been read by the
1658   caller.  This avoids the use of ungetc().
1659
1660   load_line returns whether the line was truncated.
1661
1662   NOTE: The error machinery isn't available at this point, so we can't
1663	 easily report line and column numbers consistent with other
1664	 parts of gfortran.  */
1665
1666static int
1667load_line (FILE *input, gfc_char_t **pbuf, int *pbuflen, const int *first_char)
1668{
1669  static int linenum = 0, current_line = 1;
1670  int c, maxlen, i, preprocessor_flag, buflen = *pbuflen;
1671  int trunc_flag = 0, seen_comment = 0;
1672  int seen_printable = 0, seen_ampersand = 0, quoted = ' ';
1673  gfc_char_t *buffer;
1674  bool found_tab = false;
1675
1676  /* Determine the maximum allowed line length.  */
1677  if (gfc_current_form == FORM_FREE)
1678    maxlen = flag_free_line_length;
1679  else if (gfc_current_form == FORM_FIXED)
1680    maxlen = flag_fixed_line_length;
1681  else
1682    maxlen = 72;
1683
1684  if (*pbuf == NULL)
1685    {
1686      /* Allocate the line buffer, storing its length into buflen.
1687	 Note that if maxlen==0, indicating that arbitrary-length lines
1688	 are allowed, the buffer will be reallocated if this length is
1689	 insufficient; since 132 characters is the length of a standard
1690	 free-form line, we use that as a starting guess.  */
1691      if (maxlen > 0)
1692	buflen = maxlen;
1693      else
1694	buflen = 132;
1695
1696      *pbuf = gfc_get_wide_string (buflen + 1);
1697    }
1698
1699  i = 0;
1700  buffer = *pbuf;
1701
1702  if (first_char)
1703    c = *first_char;
1704  else
1705    c = getc (input);
1706
1707  /* In order to not truncate preprocessor lines, we have to
1708     remember that this is one.  */
1709  preprocessor_flag = (c == '#' ? 1 : 0);
1710
1711  for (;;)
1712    {
1713      if (c == EOF)
1714	break;
1715
1716      if (c == '\n')
1717	{
1718	  /* Check for illegal use of ampersand. See F95 Standard 3.3.1.3.  */
1719	  if (gfc_current_form == FORM_FREE
1720	      && !seen_printable && seen_ampersand)
1721	    {
1722	      if (pedantic)
1723		gfc_error_now ("%<&%> not allowed by itself in line %d",
1724			       current_line);
1725	      else
1726		gfc_warning_now (0, "%<&%> not allowed by itself in line %d",
1727				 current_line);
1728	    }
1729	  break;
1730	}
1731
1732      if (c == '\r' || c == '\0')
1733	goto next_char;			/* Gobble characters.  */
1734
1735      if (c == '&')
1736	{
1737	  if (seen_ampersand)
1738	    {
1739	      seen_ampersand = 0;
1740	      seen_printable = 1;
1741	    }
1742	  else
1743	    seen_ampersand = 1;
1744	}
1745
1746      if ((c != '&' && c != '!' && c != ' ') || (c == '!' && !seen_ampersand))
1747	seen_printable = 1;
1748
1749      /* Is this a fixed-form comment?  */
1750      if (gfc_current_form == FORM_FIXED && i == 0
1751	  && (c == '*' || c == 'c' || c == 'd'))
1752	seen_comment = 1;
1753
1754      if (quoted == ' ')
1755	{
1756	  if (c == '\'' || c == '"')
1757	    quoted = c;
1758	}
1759      else if (c == quoted)
1760	quoted = ' ';
1761
1762      /* Is this a free-form comment?  */
1763      if (c == '!' && quoted == ' ')
1764        seen_comment = 1;
1765
1766      /* Vendor extension: "<tab>1" marks a continuation line.  */
1767      if (found_tab)
1768	{
1769	  found_tab = false;
1770	  if (c >= '1' && c <= '9')
1771	    {
1772	      *(buffer-1) = c;
1773	      goto next_char;
1774	    }
1775	}
1776
1777      if (gfc_current_form == FORM_FIXED && c == '\t' && i < 6)
1778	{
1779	  found_tab = true;
1780
1781	  if (warn_tabs && seen_comment == 0 && current_line != linenum)
1782	    {
1783	      linenum = current_line;
1784	      gfc_warning_now (OPT_Wtabs,
1785			       "Nonconforming tab character in column %d "
1786			       "of line %d", i+1, linenum);
1787	    }
1788
1789	  while (i < 6)
1790	    {
1791	      *buffer++ = ' ';
1792	      i++;
1793	    }
1794
1795	  goto next_char;
1796	}
1797
1798      *buffer++ = c;
1799      i++;
1800
1801      if (maxlen == 0 || preprocessor_flag)
1802	{
1803	  if (i >= buflen)
1804	    {
1805	      /* Reallocate line buffer to double size to hold the
1806		overlong line.  */
1807	      buflen = buflen * 2;
1808	      *pbuf = XRESIZEVEC (gfc_char_t, *pbuf, (buflen + 1));
1809	      buffer = (*pbuf) + i;
1810	    }
1811	}
1812      else if (i >= maxlen)
1813	{
1814	  bool trunc_warn = true;
1815
1816	  /* Enhancement, if the very next non-space character is an ampersand
1817	     or comment that we would otherwise warn about, don't mark as
1818	     truncated.  */
1819
1820	  /* Truncate the rest of the line.  */
1821	  for (;;)
1822	    {
1823	      c = getc (input);
1824	      if (c == '\r' || c == ' ')
1825	        continue;
1826
1827	      if (c == '\n' || c == EOF)
1828		break;
1829
1830	      if (!trunc_warn && c != '!')
1831		trunc_warn = true;
1832
1833	      if (trunc_warn && ((gfc_current_form == FORM_FIXED && c == '&')
1834		  || c == '!'))
1835		trunc_warn = false;
1836
1837	      if (c == '!')
1838		seen_comment = 1;
1839
1840	      if (trunc_warn && !seen_comment)
1841		trunc_flag = 1;
1842	    }
1843
1844	  c = '\n';
1845	  continue;
1846	}
1847
1848next_char:
1849      c = getc (input);
1850    }
1851
1852  /* Pad lines to the selected line length in fixed form.  */
1853  if (gfc_current_form == FORM_FIXED
1854      && flag_fixed_line_length != 0
1855      && !preprocessor_flag
1856      && c != EOF)
1857    {
1858      while (i++ < maxlen)
1859	*buffer++ = ' ';
1860    }
1861
1862  *buffer = '\0';
1863  *pbuflen = buflen;
1864  current_line++;
1865
1866  return trunc_flag;
1867}
1868
1869
1870/* Get a gfc_file structure, initialize it and add it to
1871   the file stack.  */
1872
1873static gfc_file *
1874get_file (const char *name, enum lc_reason reason ATTRIBUTE_UNUSED)
1875{
1876  gfc_file *f;
1877
1878  f = XCNEW (gfc_file);
1879
1880  f->filename = xstrdup (name);
1881
1882  f->next = file_head;
1883  file_head = f;
1884
1885  f->up = current_file;
1886  if (current_file != NULL)
1887    f->inclusion_line = current_file->line;
1888
1889  linemap_add (line_table, reason, false, f->filename, 1);
1890
1891  return f;
1892}
1893
1894
1895/* Deal with a line from the C preprocessor. The
1896   initial octothorp has already been seen.  */
1897
1898static void
1899preprocessor_line (gfc_char_t *c)
1900{
1901  bool flag[5];
1902  int i, line;
1903  gfc_char_t *wide_filename;
1904  gfc_file *f;
1905  int escaped, unescape;
1906  char *filename;
1907
1908  c++;
1909  while (*c == ' ' || *c == '\t')
1910    c++;
1911
1912  if (*c < '0' || *c > '9')
1913    goto bad_cpp_line;
1914
1915  line = wide_atoi (c);
1916
1917  c = wide_strchr (c, ' ');
1918  if (c == NULL)
1919    {
1920      /* No file name given.  Set new line number.  */
1921      current_file->line = line;
1922      return;
1923    }
1924
1925  /* Skip spaces.  */
1926  while (*c == ' ' || *c == '\t')
1927    c++;
1928
1929  /* Skip quote.  */
1930  if (*c != '"')
1931    goto bad_cpp_line;
1932  ++c;
1933
1934  wide_filename = c;
1935
1936  /* Make filename end at quote.  */
1937  unescape = 0;
1938  escaped = false;
1939  while (*c && ! (!escaped && *c == '"'))
1940    {
1941      if (escaped)
1942	escaped = false;
1943      else if (*c == '\\')
1944	{
1945	  escaped = true;
1946	  unescape++;
1947	}
1948      ++c;
1949    }
1950
1951  if (! *c)
1952    /* Preprocessor line has no closing quote.  */
1953    goto bad_cpp_line;
1954
1955  *c++ = '\0';
1956
1957  /* Undo effects of cpp_quote_string.  */
1958  if (unescape)
1959    {
1960      gfc_char_t *s = wide_filename;
1961      gfc_char_t *d = gfc_get_wide_string (c - wide_filename - unescape);
1962
1963      wide_filename = d;
1964      while (*s)
1965	{
1966	  if (*s == '\\')
1967	    *d++ = *++s;
1968	  else
1969	    *d++ = *s;
1970	  s++;
1971	}
1972      *d = '\0';
1973    }
1974
1975  /* Get flags.  */
1976
1977  flag[1] = flag[2] = flag[3] = flag[4] = false;
1978
1979  for (;;)
1980    {
1981      c = wide_strchr (c, ' ');
1982      if (c == NULL)
1983	break;
1984
1985      c++;
1986      i = wide_atoi (c);
1987
1988      if (1 <= i && i <= 4)
1989	flag[i] = true;
1990    }
1991
1992  /* Convert the filename in wide characters into a filename in narrow
1993     characters.  */
1994  filename = gfc_widechar_to_char (wide_filename, -1);
1995
1996  /* Interpret flags.  */
1997
1998  if (flag[1]) /* Starting new file.  */
1999    {
2000      f = get_file (filename, LC_RENAME);
2001      add_file_change (f->filename, f->inclusion_line);
2002      current_file = f;
2003    }
2004
2005  if (flag[2]) /* Ending current file.  */
2006    {
2007      if (!current_file->up
2008	  || filename_cmp (current_file->up->filename, filename) != 0)
2009	{
2010	  gfc_warning_now_1 ("%s:%d: file %s left but not entered",
2011			     current_file->filename, current_file->line,
2012			     filename);
2013	  if (unescape)
2014	    free (wide_filename);
2015	  free (filename);
2016	  return;
2017	}
2018
2019      add_file_change (NULL, line);
2020      current_file = current_file->up;
2021      linemap_add (line_table, LC_RENAME, false, current_file->filename,
2022		   current_file->line);
2023    }
2024
2025  /* The name of the file can be a temporary file produced by
2026     cpp. Replace the name if it is different.  */
2027
2028  if (filename_cmp (current_file->filename, filename) != 0)
2029    {
2030       /* FIXME: we leak the old filename because a pointer to it may be stored
2031          in the linemap.  Alternative could be using GC or updating linemap to
2032          point to the new name, but there is no API for that currently.  */
2033      current_file->filename = xstrdup (filename);
2034    }
2035
2036  /* Set new line number.  */
2037  current_file->line = line;
2038  if (unescape)
2039    free (wide_filename);
2040  free (filename);
2041  return;
2042
2043 bad_cpp_line:
2044  gfc_warning_now_1 ("%s:%d: Illegal preprocessor directive",
2045		   current_file->filename, current_file->line);
2046  current_file->line++;
2047}
2048
2049
2050static bool load_file (const char *, const char *, bool);
2051
2052/* include_line()-- Checks a line buffer to see if it is an include
2053   line.  If so, we call load_file() recursively to load the included
2054   file.  We never return a syntax error because a statement like
2055   "include = 5" is perfectly legal.  We return false if no include was
2056   processed or true if we matched an include.  */
2057
2058static bool
2059include_line (gfc_char_t *line)
2060{
2061  gfc_char_t quote, *c, *begin, *stop;
2062  char *filename;
2063
2064  c = line;
2065
2066  if (flag_openmp || flag_openmp_simd)
2067    {
2068      if (gfc_current_form == FORM_FREE)
2069	{
2070	  while (*c == ' ' || *c == '\t')
2071	    c++;
2072	  if (*c == '!' && c[1] == '$' && (c[2] == ' ' || c[2] == '\t'))
2073	    c += 3;
2074	}
2075      else
2076	{
2077	  if ((*c == '!' || *c == 'c' || *c == 'C' || *c == '*')
2078	      && c[1] == '$' && (c[2] == ' ' || c[2] == '\t'))
2079	    c += 3;
2080	}
2081    }
2082
2083  while (*c == ' ' || *c == '\t')
2084    c++;
2085
2086  if (gfc_wide_strncasecmp (c, "include", 7))
2087    return false;
2088
2089  c += 7;
2090  while (*c == ' ' || *c == '\t')
2091    c++;
2092
2093  /* Find filename between quotes.  */
2094
2095  quote = *c++;
2096  if (quote != '"' && quote != '\'')
2097    return false;
2098
2099  begin = c;
2100
2101  while (*c != quote && *c != '\0')
2102    c++;
2103
2104  if (*c == '\0')
2105    return false;
2106
2107  stop = c++;
2108
2109  while (*c == ' ' || *c == '\t')
2110    c++;
2111
2112  if (*c != '\0' && *c != '!')
2113    return false;
2114
2115  /* We have an include line at this point.  */
2116
2117  *stop = '\0'; /* It's ok to trash the buffer, as this line won't be
2118		   read by anything else.  */
2119
2120  filename = gfc_widechar_to_char (begin, -1);
2121  if (!load_file (filename, NULL, false))
2122    exit (FATAL_EXIT_CODE);
2123
2124  free (filename);
2125  return true;
2126}
2127
2128
2129/* Load a file into memory by calling load_line until the file ends.  */
2130
2131static bool
2132load_file (const char *realfilename, const char *displayedname, bool initial)
2133{
2134  gfc_char_t *line;
2135  gfc_linebuf *b;
2136  gfc_file *f;
2137  FILE *input;
2138  int len, line_len;
2139  bool first_line;
2140  const char *filename;
2141  /* If realfilename and displayedname are different and non-null then
2142     surely realfilename is the preprocessed form of
2143     displayedname.  */
2144  bool preprocessed_p = (realfilename && displayedname
2145			 && strcmp (realfilename, displayedname));
2146
2147  filename = displayedname ? displayedname : realfilename;
2148
2149  for (f = current_file; f; f = f->up)
2150    if (filename_cmp (filename, f->filename) == 0)
2151      {
2152	fprintf (stderr, "%s:%d: Error: File '%s' is being included "
2153		 "recursively\n", current_file->filename, current_file->line,
2154		 filename);
2155	return false;
2156      }
2157
2158  if (initial)
2159    {
2160      if (gfc_src_file)
2161	{
2162	  input = gfc_src_file;
2163	  gfc_src_file = NULL;
2164	}
2165      else
2166	input = gfc_open_file (realfilename);
2167      if (input == NULL)
2168	{
2169	  gfc_error_now ("Can't open file %qs", filename);
2170	  return false;
2171	}
2172    }
2173  else
2174    {
2175      input = gfc_open_included_file (realfilename, false, false);
2176      if (input == NULL)
2177	{
2178	  fprintf (stderr, "%s:%d: Error: Can't open included file '%s'\n",
2179		   current_file->filename, current_file->line, filename);
2180	  return false;
2181	}
2182    }
2183
2184  /* Load the file.
2185
2186     A "non-initial" file means a file that is being included.  In
2187     that case we are creating an LC_ENTER map.
2188
2189     An "initial" file means a main file; one that is not included.
2190     That file has already got at least one (surely more) line map(s)
2191     created by gfc_init.  So the subsequent map created in that case
2192     must have LC_RENAME reason.
2193
2194     This latter case is not true for a preprocessed file.  In that
2195     case, although the file is "initial", the line maps created by
2196     gfc_init was used during the preprocessing of the file.  Now that
2197     the preprocessing is over and we are being fed the result of that
2198     preprocessing, we need to create a brand new line map for the
2199     preprocessed file, so the reason is going to be LC_ENTER.  */
2200
2201  f = get_file (filename, (initial && !preprocessed_p) ? LC_RENAME : LC_ENTER);
2202  if (!initial)
2203    add_file_change (f->filename, f->inclusion_line);
2204  current_file = f;
2205  current_file->line = 1;
2206  line = NULL;
2207  line_len = 0;
2208  first_line = true;
2209
2210  if (initial && gfc_src_preprocessor_lines[0])
2211    {
2212      preprocessor_line (gfc_src_preprocessor_lines[0]);
2213      free (gfc_src_preprocessor_lines[0]);
2214      gfc_src_preprocessor_lines[0] = NULL;
2215      if (gfc_src_preprocessor_lines[1])
2216	{
2217	  preprocessor_line (gfc_src_preprocessor_lines[1]);
2218	  free (gfc_src_preprocessor_lines[1]);
2219	  gfc_src_preprocessor_lines[1] = NULL;
2220	}
2221    }
2222
2223  for (;;)
2224    {
2225      int trunc = load_line (input, &line, &line_len, NULL);
2226
2227      len = gfc_wide_strlen (line);
2228      if (feof (input) && len == 0)
2229	break;
2230
2231      /* If this is the first line of the file, it can contain a byte
2232	 order mark (BOM), which we will ignore:
2233	   FF FE is UTF-16 little endian,
2234	   FE FF is UTF-16 big endian,
2235	   EF BB BF is UTF-8.  */
2236      if (first_line
2237	  && ((line_len >= 2 && line[0] == (unsigned char) '\xFF'
2238			     && line[1] == (unsigned char) '\xFE')
2239	      || (line_len >= 2 && line[0] == (unsigned char) '\xFE'
2240			        && line[1] == (unsigned char) '\xFF')
2241	      || (line_len >= 3 && line[0] == (unsigned char) '\xEF'
2242				&& line[1] == (unsigned char) '\xBB'
2243				&& line[2] == (unsigned char) '\xBF')))
2244	{
2245	  int n = line[1] == (unsigned char) '\xBB' ? 3 : 2;
2246	  gfc_char_t *new_char = gfc_get_wide_string (line_len);
2247
2248	  wide_strcpy (new_char, &line[n]);
2249	  free (line);
2250	  line = new_char;
2251	  len -= n;
2252	}
2253
2254      /* There are three things this line can be: a line of Fortran
2255	 source, an include line or a C preprocessor directive.  */
2256
2257      if (line[0] == '#')
2258	{
2259	  /* When -g3 is specified, it's possible that we emit #define
2260	     and #undef lines, which we need to pass to the middle-end
2261	     so that it can emit correct debug info.  */
2262	  if (debug_info_level == DINFO_LEVEL_VERBOSE
2263	      && (wide_strncmp (line, "#define ", 8) == 0
2264		  || wide_strncmp (line, "#undef ", 7) == 0))
2265	    ;
2266	  else
2267	    {
2268	      preprocessor_line (line);
2269	      continue;
2270	    }
2271	}
2272
2273      /* Preprocessed files have preprocessor lines added before the byte
2274         order mark, so first_line is not about the first line of the file
2275	 but the first line that's not a preprocessor line.  */
2276      first_line = false;
2277
2278      if (include_line (line))
2279	{
2280	  current_file->line++;
2281	  continue;
2282	}
2283
2284      /* Add line.  */
2285
2286      b = XCNEWVAR (gfc_linebuf, gfc_linebuf_header_size
2287		    + (len + 1) * sizeof (gfc_char_t));
2288
2289
2290      b->location
2291	= linemap_line_start (line_table, current_file->line++, len);
2292      /* ??? We add the location for the maximum column possible here,
2293	 because otherwise if the next call creates a new line-map, it
2294	 will not reserve space for any offset.  */
2295      if (len > 0)
2296	linemap_position_for_column (line_table, len);
2297
2298      b->file = current_file;
2299      b->truncated = trunc;
2300      wide_strcpy (b->line, line);
2301
2302      if (line_head == NULL)
2303	line_head = b;
2304      else
2305	line_tail->next = b;
2306
2307      line_tail = b;
2308
2309      while (file_changes_cur < file_changes_count)
2310	file_changes[file_changes_cur++].lb = b;
2311    }
2312
2313  /* Release the line buffer allocated in load_line.  */
2314  free (line);
2315
2316  fclose (input);
2317
2318  if (!initial)
2319    add_file_change (NULL, current_file->inclusion_line + 1);
2320  current_file = current_file->up;
2321  linemap_add (line_table, LC_LEAVE, 0, NULL, 0);
2322  return true;
2323}
2324
2325
2326/* Open a new file and start scanning from that file. Returns true
2327   if everything went OK, false otherwise.  If form == FORM_UNKNOWN
2328   it tries to determine the source form from the filename, defaulting
2329   to free form.  */
2330
2331bool
2332gfc_new_file (void)
2333{
2334  bool result;
2335
2336  if (gfc_cpp_enabled ())
2337    {
2338      result = gfc_cpp_preprocess (gfc_source_file);
2339      if (!gfc_cpp_preprocess_only ())
2340        result = load_file (gfc_cpp_temporary_file (), gfc_source_file, true);
2341    }
2342  else
2343    result = load_file (gfc_source_file, NULL, true);
2344
2345  gfc_current_locus.lb = line_head;
2346  gfc_current_locus.nextc = (line_head == NULL) ? NULL : line_head->line;
2347
2348#if 0 /* Debugging aid.  */
2349  for (; line_head; line_head = line_head->next)
2350    printf ("%s:%3d %s\n", LOCATION_FILE (line_head->location),
2351	    LOCATION_LINE (line_head->location), line_head->line);
2352
2353  exit (SUCCESS_EXIT_CODE);
2354#endif
2355
2356  return result;
2357}
2358
2359static char *
2360unescape_filename (const char *ptr)
2361{
2362  const char *p = ptr, *s;
2363  char *d, *ret;
2364  int escaped, unescape = 0;
2365
2366  /* Make filename end at quote.  */
2367  escaped = false;
2368  while (*p && ! (! escaped && *p == '"'))
2369    {
2370      if (escaped)
2371	escaped = false;
2372      else if (*p == '\\')
2373	{
2374	  escaped = true;
2375	  unescape++;
2376	}
2377      ++p;
2378    }
2379
2380  if (!*p || p[1])
2381    return NULL;
2382
2383  /* Undo effects of cpp_quote_string.  */
2384  s = ptr;
2385  d = XCNEWVEC (char, p + 1 - ptr - unescape);
2386  ret = d;
2387
2388  while (s != p)
2389    {
2390      if (*s == '\\')
2391	*d++ = *++s;
2392      else
2393	*d++ = *s;
2394      s++;
2395    }
2396  *d = '\0';
2397  return ret;
2398}
2399
2400/* For preprocessed files, if the first tokens are of the form # NUM.
2401   handle the directives so we know the original file name.  */
2402
2403const char *
2404gfc_read_orig_filename (const char *filename, const char **canon_source_file)
2405{
2406  int c, len;
2407  char *dirname, *tmp;
2408
2409  gfc_src_file = gfc_open_file (filename);
2410  if (gfc_src_file == NULL)
2411    return NULL;
2412
2413  c = getc (gfc_src_file);
2414
2415  if (c != '#')
2416    return NULL;
2417
2418  len = 0;
2419  load_line (gfc_src_file, &gfc_src_preprocessor_lines[0], &len, &c);
2420
2421  if (wide_strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0)
2422    return NULL;
2423
2424  tmp = gfc_widechar_to_char (&gfc_src_preprocessor_lines[0][5], -1);
2425  filename = unescape_filename (tmp);
2426  free (tmp);
2427  if (filename == NULL)
2428    return NULL;
2429
2430  c = getc (gfc_src_file);
2431
2432  if (c != '#')
2433    return filename;
2434
2435  len = 0;
2436  load_line (gfc_src_file, &gfc_src_preprocessor_lines[1], &len, &c);
2437
2438  if (wide_strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0)
2439    return filename;
2440
2441  tmp = gfc_widechar_to_char (&gfc_src_preprocessor_lines[1][5], -1);
2442  dirname = unescape_filename (tmp);
2443  free (tmp);
2444  if (dirname == NULL)
2445    return filename;
2446
2447  len = strlen (dirname);
2448  if (len < 3 || dirname[len - 1] != '/' || dirname[len - 2] != '/')
2449    {
2450      free (dirname);
2451      return filename;
2452    }
2453  dirname[len - 2] = '\0';
2454  set_src_pwd (dirname);
2455
2456  if (! IS_ABSOLUTE_PATH (filename))
2457    {
2458      char *p = XCNEWVEC (char, len + strlen (filename));
2459
2460      memcpy (p, dirname, len - 2);
2461      p[len - 2] = '/';
2462      strcpy (p + len - 1, filename);
2463      *canon_source_file = p;
2464    }
2465
2466  free (dirname);
2467  return filename;
2468}
2469