1/* parse.y - parser for flex input */
2
3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
5%token OPT_TABLES
6
7%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12
13%left CCL_OP_DIFF CCL_OP_UNION
14
15/*
16 *POSIX and AT&T lex place the
17 * precedence of the repeat operator, {}, below that of concatenation.
18 * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19 * Regular Expression (ERE) precedence that has the repeat operator
20 * higher than concatenation.  This causes ab{3} to yield abbb.
21 *
22 * In order to support the POSIX and AT&T precedence and the flex
23 * precedence we define two token sets for the begin and end tokens of
24 * the repeat operator, '{' and '}'.  The lexical scanner chooses
25 * which tokens to return based on whether posix_compat or lex_compat
26 * are specified. Specifying either posix_compat or lex_compat will
27 * cause flex to parse scanner files as per the AT&T and
28 * POSIX-mandated behavior.
29 */
30
31%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32
33
34%{
35/*  Copyright (c) 1990 The Regents of the University of California. */
36/*  All rights reserved. */
37
38/*  This code is derived from software contributed to Berkeley by */
39/*  Vern Paxson. */
40
41/*  The United States Government has rights in this work pursuant */
42/*  to contract no. DE-AC03-76SF00098 between the United States */
43/*  Department of Energy and the University of California. */
44
45/*  This file is part of flex. */
46
47/*  Redistribution and use in source and binary forms, with or without */
48/*  modification, are permitted provided that the following conditions */
49/*  are met: */
50
51/*  1. Redistributions of source code must retain the above copyright */
52/*     notice, this list of conditions and the following disclaimer. */
53/*  2. Redistributions in binary form must reproduce the above copyright */
54/*     notice, this list of conditions and the following disclaimer in the */
55/*     documentation and/or other materials provided with the distribution. */
56
57/*  Neither the name of the University nor the names of its contributors */
58/*  may be used to endorse or promote products derived from this software */
59/*  without specific prior written permission. */
60
61/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64/*  PURPOSE. */
65
66#include "flexdef.h"
67#include "tables.h"
68
69int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71
72int *scon_stk;
73int scon_stk_ptr;
74
75static int madeany = false;  /* whether we've made the '.' character class */
76static int ccldot, cclany;
77int previous_continued_action;	/* whether the previous rule's action was '|' */
78
79#define format_warn3(fmt, a1, a2) \
80	do{ \
81        char fw3_msg[MAXLINE];\
82        snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83        warn( fw3_msg );\
84	}while(0)
85
86/* Expand a POSIX character class expression. */
87#define CCL_EXPR(func) \
88	do{ \
89	int c; \
90	for ( c = 0; c < csize; ++c ) \
91		if ( isascii(c) && func(c) ) \
92			ccladd( currccl, c ); \
93	}while(0)
94
95/* negated class */
96#define CCL_NEG_EXPR(func) \
97	do{ \
98	int c; \
99	for ( c = 0; c < csize; ++c ) \
100		if ( !func(c) ) \
101			ccladd( currccl, c ); \
102	}while(0)
103
104/* While POSIX defines isblank(), it's not ANSI C. */
105#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106
107/* On some over-ambitious machines, such as DEC Alpha's, the default
108 * token type is "long" instead of "int"; this leads to problems with
109 * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111 * following should ensure that the default token type is "int".
112 */
113#define YYSTYPE int
114
115%}
116
117%%
118goal		:  initlex sect1 sect1end sect2 initforrule
119			{ /* add default rule */
120			int def_rule;
121
122			pat = cclinit();
123			cclnegate( pat );
124
125			def_rule = mkstate( -pat );
126
127			/* Remember the number of the default rule so we
128			 * don't generate "can't match" warnings for it.
129			 */
130			default_rule = num_rules;
131
132			finish_rule( def_rule, false, 0, 0, 0);
133
134			for ( i = 1; i <= lastsc; ++i )
135				scset[i] = mkbranch( scset[i], def_rule );
136
137			if ( spprdflt )
138				add_action(
139				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140			else
141				add_action( "ECHO" );
142
143			add_action( ";\n\tYY_BREAK\n" );
144			}
145		;
146
147initlex		:
148			{ /* initialize for processing rules */
149
150			/* Create default DFA start condition. */
151			scinstal( "INITIAL", false );
152			}
153		;
154
155sect1		:  sect1 startconddecl namelist1
156		|  sect1 options
157		|
158		|  error
159			{ synerr( _("unknown error processing section 1") ); }
160		;
161
162sect1end	:  SECTEND
163			{
164			check_options();
165			scon_stk = allocate_integer_array( lastsc + 1 );
166			scon_stk_ptr = 0;
167			}
168		;
169
170startconddecl	:  SCDECL
171			{ xcluflg = false; }
172
173		|  XSCDECL
174			{ xcluflg = true; }
175		;
176
177namelist1	:  namelist1 NAME
178			{ scinstal( nmstr, xcluflg ); }
179
180		|  NAME
181			{ scinstal( nmstr, xcluflg ); }
182
183		|  error
184			{ synerr( _("bad start condition list") ); }
185		;
186
187options		:  OPTION_OP optionlist
188		;
189
190optionlist	:  optionlist option
191		|
192		;
193
194option		:  OPT_OUTFILE '=' NAME
195			{
196			outfilename = copy_string( nmstr );
197			did_outfilename = 1;
198			}
199		|  OPT_EXTRA_TYPE '=' NAME
200			{ extra_type = copy_string( nmstr ); }
201		|  OPT_PREFIX '=' NAME
202			{ prefix = copy_string( nmstr ); }
203		|  OPT_YYCLASS '=' NAME
204			{ yyclass = copy_string( nmstr ); }
205		|  OPT_HEADER '=' NAME
206			{ headerfilename = copy_string( nmstr ); }
207	    |  OPT_TABLES '=' NAME
208            { tablesext = true; tablesfilename = copy_string( nmstr ); }
209		;
210
211sect2		:  sect2 scon initforrule flexrule '\n'
212			{ scon_stk_ptr = $2; }
213		|  sect2 scon '{' sect2 '}'
214			{ scon_stk_ptr = $2; }
215		|
216		;
217
218initforrule	:
219			{
220			/* Initialize for a parse of one rule. */
221			trlcontxt = variable_trail_rule = varlength = false;
222			trailcnt = headcnt = rulelen = 0;
223			current_state_type = STATE_NORMAL;
224			previous_continued_action = continued_action;
225			in_rule = true;
226
227			new_rule();
228			}
229		;
230
231flexrule	:  '^' rule
232			{
233			pat = $2;
234			finish_rule( pat, variable_trail_rule,
235				headcnt, trailcnt , previous_continued_action);
236
237			if ( scon_stk_ptr > 0 )
238				{
239				for ( i = 1; i <= scon_stk_ptr; ++i )
240					scbol[scon_stk[i]] =
241						mkbranch( scbol[scon_stk[i]],
242								pat );
243				}
244
245			else
246				{
247				/* Add to all non-exclusive start conditions,
248				 * including the default (0) start condition.
249				 */
250
251				for ( i = 1; i <= lastsc; ++i )
252					if ( ! scxclu[i] )
253						scbol[i] = mkbranch( scbol[i],
254									pat );
255				}
256
257			if ( ! bol_needed )
258				{
259				bol_needed = true;
260
261				if ( performance_report > 1 )
262					pinpoint_message(
263			"'^' operator results in sub-optimal performance" );
264				}
265			}
266
267		|  rule
268			{
269			pat = $1;
270			finish_rule( pat, variable_trail_rule,
271				headcnt, trailcnt , previous_continued_action);
272
273			if ( scon_stk_ptr > 0 )
274				{
275				for ( i = 1; i <= scon_stk_ptr; ++i )
276					scset[scon_stk[i]] =
277						mkbranch( scset[scon_stk[i]],
278								pat );
279				}
280
281			else
282				{
283				for ( i = 1; i <= lastsc; ++i )
284					if ( ! scxclu[i] )
285						scset[i] =
286							mkbranch( scset[i],
287								pat );
288				}
289			}
290
291		|  EOF_OP
292			{
293			if ( scon_stk_ptr > 0 )
294				build_eof_action();
295
296			else
297				{
298				/* This EOF applies to all start conditions
299				 * which don't already have EOF actions.
300				 */
301				for ( i = 1; i <= lastsc; ++i )
302					if ( ! sceof[i] )
303						scon_stk[++scon_stk_ptr] = i;
304
305				if ( scon_stk_ptr == 0 )
306					warn(
307			"all start conditions already have <<EOF>> rules" );
308
309				else
310					build_eof_action();
311				}
312			}
313
314		|  error
315			{ synerr( _("unrecognized rule") ); }
316		;
317
318scon_stk_ptr	:
319			{ $$ = scon_stk_ptr; }
320		;
321
322scon		:  '<' scon_stk_ptr namelist2 '>'
323			{ $$ = $2; }
324
325		|  '<' '*' '>'
326			{
327			$$ = scon_stk_ptr;
328
329			for ( i = 1; i <= lastsc; ++i )
330				{
331				int j;
332
333				for ( j = 1; j <= scon_stk_ptr; ++j )
334					if ( scon_stk[j] == i )
335						break;
336
337				if ( j > scon_stk_ptr )
338					scon_stk[++scon_stk_ptr] = i;
339				}
340			}
341
342		|
343			{ $$ = scon_stk_ptr; }
344		;
345
346namelist2	:  namelist2 ',' sconname
347
348		|  sconname
349
350		|  error
351			{ synerr( _("bad start condition list") ); }
352		;
353
354sconname	:  NAME
355			{
356			if ( (scnum = sclookup( nmstr )) == 0 )
357				format_pinpoint_message(
358					"undeclared start condition %s",
359					nmstr );
360			else
361				{
362				for ( i = 1; i <= scon_stk_ptr; ++i )
363					if ( scon_stk[i] == scnum )
364						{
365						format_warn(
366							"<%s> specified twice",
367							scname[scnum] );
368						break;
369						}
370
371				if ( i > scon_stk_ptr )
372					scon_stk[++scon_stk_ptr] = scnum;
373				}
374			}
375		;
376
377rule		:  re2 re
378			{
379			if ( transchar[lastst[$2]] != SYM_EPSILON )
380				/* Provide final transition \now/ so it
381				 * will be marked as a trailing context
382				 * state.
383				 */
384				$2 = link_machines( $2,
385						mkstate( SYM_EPSILON ) );
386
387			mark_beginning_as_normal( $2 );
388			current_state_type = STATE_NORMAL;
389
390			if ( previous_continued_action )
391				{
392				/* We need to treat this as variable trailing
393				 * context so that the backup does not happen
394				 * in the action but before the action switch
395				 * statement.  If the backup happens in the
396				 * action, then the rules "falling into" this
397				 * one's action will *also* do the backup,
398				 * erroneously.
399				 */
400				if ( ! varlength || headcnt != 0 )
401					warn(
402		"trailing context made variable due to preceding '|' action" );
403
404				/* Mark as variable. */
405				varlength = true;
406				headcnt = 0;
407
408				}
409
410			if ( lex_compat || (varlength && headcnt == 0) )
411				{ /* variable trailing context rule */
412				/* Mark the first part of the rule as the
413				 * accepting "head" part of a trailing
414				 * context rule.
415				 *
416				 * By the way, we didn't do this at the
417				 * beginning of this production because back
418				 * then current_state_type was set up for a
419				 * trail rule, and add_accept() can create
420				 * a new state ...
421				 */
422				add_accept( $1,
423					num_rules | YY_TRAILING_HEAD_MASK );
424				variable_trail_rule = true;
425				}
426
427			else
428				trailcnt = rulelen;
429
430			$$ = link_machines( $1, $2 );
431			}
432
433		|  re2 re '$'
434			{ synerr( _("trailing context used twice") ); }
435
436		|  re '$'
437			{
438			headcnt = 0;
439			trailcnt = 1;
440			rulelen = 1;
441			varlength = false;
442
443			current_state_type = STATE_TRAILING_CONTEXT;
444
445			if ( trlcontxt )
446				{
447				synerr( _("trailing context used twice") );
448				$$ = mkstate( SYM_EPSILON );
449				}
450
451			else if ( previous_continued_action )
452				{
453				/* See the comment in the rule for "re2 re"
454				 * above.
455				 */
456				warn(
457		"trailing context made variable due to preceding '|' action" );
458
459				varlength = true;
460				}
461
462			if ( lex_compat || varlength )
463				{
464				/* Again, see the comment in the rule for
465				 * "re2 re" above.
466				 */
467				add_accept( $1,
468					num_rules | YY_TRAILING_HEAD_MASK );
469				variable_trail_rule = true;
470				}
471
472			trlcontxt = true;
473
474			eps = mkstate( SYM_EPSILON );
475			$$ = link_machines( $1,
476				link_machines( eps, mkstate( '\n' ) ) );
477			}
478
479		|  re
480			{
481			$$ = $1;
482
483			if ( trlcontxt )
484				{
485				if ( lex_compat || (varlength && headcnt == 0) )
486					/* Both head and trail are
487					 * variable-length.
488					 */
489					variable_trail_rule = true;
490				else
491					trailcnt = rulelen;
492				}
493			}
494		;
495
496
497re		:  re '|' series
498			{
499			varlength = true;
500			$$ = mkor( $1, $3 );
501			}
502
503		|  series
504			{ $$ = $1; }
505		;
506
507
508re2		:  re '/'
509			{
510			/* This rule is written separately so the
511			 * reduction will occur before the trailing
512			 * series is parsed.
513			 */
514
515			if ( trlcontxt )
516				synerr( _("trailing context used twice") );
517			else
518				trlcontxt = true;
519
520			if ( varlength )
521				/* We hope the trailing context is
522				 * fixed-length.
523				 */
524				varlength = false;
525			else
526				headcnt = rulelen;
527
528			rulelen = 0;
529
530			current_state_type = STATE_TRAILING_CONTEXT;
531			$$ = $1;
532			}
533		;
534
535series		:  series singleton
536			{
537			/* This is where concatenation of adjacent patterns
538			 * gets done.
539			 */
540			$$ = link_machines( $1, $2 );
541			}
542
543		|  singleton
544			{ $$ = $1; }
545
546		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
547			{
548			varlength = true;
549
550			if ( $3 > $5 || $3 < 0 )
551				{
552				synerr( _("bad iteration values") );
553				$$ = $1;
554				}
555			else
556				{
557				if ( $3 == 0 )
558					{
559					if ( $5 <= 0 )
560						{
561						synerr(
562						_("bad iteration values") );
563						$$ = $1;
564						}
565					else
566						$$ = mkopt(
567							mkrep( $1, 1, $5 ) );
568					}
569				else
570					$$ = mkrep( $1, $3, $5 );
571				}
572			}
573
574		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
575			{
576			varlength = true;
577
578			if ( $3 <= 0 )
579				{
580				synerr( _("iteration value must be positive") );
581				$$ = $1;
582				}
583
584			else
585				$$ = mkrep( $1, $3, INFINITE_REPEAT );
586			}
587
588		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
589			{
590			/* The series could be something like "(foo)",
591			 * in which case we have no idea what its length
592			 * is, so we punt here.
593			 */
594			varlength = true;
595
596			if ( $3 <= 0 )
597				{
598				  synerr( _("iteration value must be positive")
599					  );
600				$$ = $1;
601				}
602
603			else
604				$$ = link_machines( $1,
605						copysingl( $1, $3 - 1 ) );
606			}
607
608		;
609
610singleton	:  singleton '*'
611			{
612			varlength = true;
613
614			$$ = mkclos( $1 );
615			}
616
617		|  singleton '+'
618			{
619			varlength = true;
620			$$ = mkposcl( $1 );
621			}
622
623		|  singleton '?'
624			{
625			varlength = true;
626			$$ = mkopt( $1 );
627			}
628
629		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
630			{
631			varlength = true;
632
633			if ( $3 > $5 || $3 < 0 )
634				{
635				synerr( _("bad iteration values") );
636				$$ = $1;
637				}
638			else
639				{
640				if ( $3 == 0 )
641					{
642					if ( $5 <= 0 )
643						{
644						synerr(
645						_("bad iteration values") );
646						$$ = $1;
647						}
648					else
649						$$ = mkopt(
650							mkrep( $1, 1, $5 ) );
651					}
652				else
653					$$ = mkrep( $1, $3, $5 );
654				}
655			}
656
657		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
658			{
659			varlength = true;
660
661			if ( $3 <= 0 )
662				{
663				synerr( _("iteration value must be positive") );
664				$$ = $1;
665				}
666
667			else
668				$$ = mkrep( $1, $3, INFINITE_REPEAT );
669			}
670
671		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
672			{
673			/* The singleton could be something like "(foo)",
674			 * in which case we have no idea what its length
675			 * is, so we punt here.
676			 */
677			varlength = true;
678
679			if ( $3 <= 0 )
680				{
681				synerr( _("iteration value must be positive") );
682				$$ = $1;
683				}
684
685			else
686				$$ = link_machines( $1,
687						copysingl( $1, $3 - 1 ) );
688			}
689
690		|  '.'
691			{
692			if ( ! madeany )
693				{
694				/* Create the '.' character class. */
695                    ccldot = cclinit();
696                    ccladd( ccldot, '\n' );
697                    cclnegate( ccldot );
698
699                    if ( useecs )
700                        mkeccl( ccltbl + cclmap[ccldot],
701                            ccllen[ccldot], nextecm,
702                            ecgroup, csize, csize );
703
704				/* Create the (?s:'.') character class. */
705                    cclany = cclinit();
706                    cclnegate( cclany );
707
708                    if ( useecs )
709                        mkeccl( ccltbl + cclmap[cclany],
710                            ccllen[cclany], nextecm,
711                            ecgroup, csize, csize );
712
713				madeany = true;
714				}
715
716			++rulelen;
717
718            if (sf_dot_all())
719                $$ = mkstate( -cclany );
720            else
721                $$ = mkstate( -ccldot );
722			}
723
724		|  fullccl
725			{
726				/* Sort characters for fast searching.
727				 */
728				qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
729
730			if ( useecs )
731				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
732					nextecm, ecgroup, csize, csize );
733
734			++rulelen;
735
736			if (ccl_has_nl[$1])
737				rule_has_nl[num_rules] = true;
738
739			$$ = mkstate( -$1 );
740			}
741
742		|  PREVCCL
743			{
744			++rulelen;
745
746			if (ccl_has_nl[$1])
747				rule_has_nl[num_rules] = true;
748
749			$$ = mkstate( -$1 );
750			}
751
752		|  '"' string '"'
753			{ $$ = $2; }
754
755		|  '(' re ')'
756			{ $$ = $2; }
757
758		|  CHAR
759			{
760			++rulelen;
761
762			if ($1 == nlch)
763				rule_has_nl[num_rules] = true;
764
765            if (sf_case_ins() && has_case($1))
766                /* create an alternation, as in (a|A) */
767                $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
768            else
769                $$ = mkstate( $1 );
770			}
771		;
772fullccl:
773        fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
774    |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
775    |   braceccl
776    ;
777
778braceccl:
779
780            '[' ccl ']' { $$ = $2; }
781
782		|  '[' '^' ccl ']'
783			{
784			cclnegate( $3 );
785			$$ = $3;
786			}
787		;
788
789ccl		:  ccl CHAR '-' CHAR
790			{
791
792			if (sf_case_ins())
793			  {
794
795			    /* If one end of the range has case and the other
796			     * does not, or the cases are different, then we're not
797			     * sure what range the user is trying to express.
798			     * Examples: [@-z] or [S-t]
799			     */
800			    if (has_case ($2) != has_case ($4)
801				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
802				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
803			      format_warn3 (
804			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
805					    $2, $4);
806
807			    /* If the range spans uppercase characters but not
808			     * lowercase (or vice-versa), then should we automatically
809			     * include lowercase characters in the range?
810			     * Example: [@-_] spans [a-z] but not [A-Z]
811			     */
812			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
813			      format_warn3 (
814			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
815					    $2, $4);
816			  }
817
818			if ( $2 > $4 )
819				synerr( _("negative range in character class") );
820
821			else
822				{
823				for ( i = $2; i <= $4; ++i )
824					ccladd( $1, i );
825
826				/* Keep track if this ccl is staying in
827				 * alphabetical order.
828				 */
829				cclsorted = cclsorted && ($2 > lastchar);
830				lastchar = $4;
831
832                /* Do it again for upper/lowercase */
833                if (sf_case_ins() && has_case($2) && has_case($4)){
834                    $2 = reverse_case ($2);
835                    $4 = reverse_case ($4);
836
837                    for ( i = $2; i <= $4; ++i )
838                        ccladd( $1, i );
839
840                    cclsorted = cclsorted && ($2 > lastchar);
841                    lastchar = $4;
842                }
843
844				}
845
846			$$ = $1;
847			}
848
849		|  ccl CHAR
850			{
851			ccladd( $1, $2 );
852			cclsorted = cclsorted && ($2 > lastchar);
853			lastchar = $2;
854
855            /* Do it again for upper/lowercase */
856            if (sf_case_ins() && has_case($2)){
857                $2 = reverse_case ($2);
858                ccladd ($1, $2);
859
860                cclsorted = cclsorted && ($2 > lastchar);
861                lastchar = $2;
862            }
863
864			$$ = $1;
865			}
866
867		|  ccl ccl_expr
868			{
869			/* Too hard to properly maintain cclsorted. */
870			cclsorted = false;
871			$$ = $1;
872			}
873
874		|
875			{
876			cclsorted = true;
877			lastchar = 0;
878			currccl = $$ = cclinit();
879			}
880		;
881
882ccl_expr:
883           CCE_ALNUM	{ CCL_EXPR(isalnum); }
884		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
885		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
886		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
887		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
888		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
889		|  CCE_LOWER	{
890                          CCL_EXPR(islower);
891                          if (sf_case_ins())
892                              CCL_EXPR(isupper);
893                        }
894		|  CCE_PRINT	{ CCL_EXPR(isprint); }
895		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
896		|  CCE_SPACE	{ CCL_EXPR(isspace); }
897		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
898		|  CCE_UPPER	{
899                    CCL_EXPR(isupper);
900                    if (sf_case_ins())
901                        CCL_EXPR(islower);
902				}
903
904        |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
905		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
906		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
907		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
908		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
909		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
910		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
911		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
912		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
913		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
914		|  CCE_NEG_LOWER	{
915				if ( sf_case_ins() )
916					warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
917				else
918					CCL_NEG_EXPR(islower);
919				}
920		|  CCE_NEG_UPPER	{
921				if ( sf_case_ins() )
922					warn(_("[:^upper:] ambiguous in case insensitive scanner"));
923				else
924					CCL_NEG_EXPR(isupper);
925				}
926		;
927
928string		:  string CHAR
929			{
930			if ( $2 == nlch )
931				rule_has_nl[num_rules] = true;
932
933			++rulelen;
934
935            if (sf_case_ins() && has_case($2))
936                $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
937            else
938                $$ = mkstate ($2);
939
940			$$ = link_machines( $1, $$);
941			}
942
943		|
944			{ $$ = mkstate( SYM_EPSILON ); }
945		;
946
947%%
948
949
950/* build_eof_action - build the "<<EOF>>" action for the active start
951 *                    conditions
952 */
953
954void build_eof_action()
955	{
956	int i;
957	char action_text[MAXLINE];
958
959	for ( i = 1; i <= scon_stk_ptr; ++i )
960		{
961		if ( sceof[scon_stk[i]] )
962			format_pinpoint_message(
963				"multiple <<EOF>> rules for start condition %s",
964				scname[scon_stk[i]] );
965
966		else
967			{
968			sceof[scon_stk[i]] = true;
969
970			if (previous_continued_action /* && previous action was regular */)
971				add_action("YY_RULE_SETUP\n");
972
973			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
974				scname[scon_stk[i]] );
975			add_action( action_text );
976			}
977		}
978
979	line_directive_out( (FILE *) 0, 1 );
980
981	/* This isn't a normal rule after all - don't count it as
982	 * such, so we don't have any holes in the rule numbering
983	 * (which make generating "rule can never match" warnings
984	 * more difficult.
985	 */
986	--num_rules;
987	++num_eof_rules;
988	}
989
990
991/* format_synerr - write out formatted syntax error */
992
993void format_synerr( msg, arg )
994const char *msg, arg[];
995	{
996	char errmsg[MAXLINE];
997
998	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
999	synerr( errmsg );
1000	}
1001
1002
1003/* synerr - report a syntax error */
1004
1005void synerr( str )
1006const char *str;
1007	{
1008	syntaxerror = true;
1009	pinpoint_message( str );
1010	}
1011
1012
1013/* format_warn - write out formatted warning */
1014
1015void format_warn( msg, arg )
1016const char *msg, arg[];
1017	{
1018	char warn_msg[MAXLINE];
1019
1020	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021	warn( warn_msg );
1022	}
1023
1024
1025/* warn - report a warning, unless -w was given */
1026
1027void warn( str )
1028const char *str;
1029	{
1030	line_warning( str, linenum );
1031	}
1032
1033/* format_pinpoint_message - write out a message formatted with one string,
1034 *			     pinpointing its location
1035 */
1036
1037void format_pinpoint_message( msg, arg )
1038const char *msg, arg[];
1039	{
1040	char errmsg[MAXLINE];
1041
1042	snprintf( errmsg, sizeof(errmsg), msg, arg );
1043	pinpoint_message( errmsg );
1044	}
1045
1046
1047/* pinpoint_message - write out a message, pinpointing its location */
1048
1049void pinpoint_message( str )
1050const char *str;
1051	{
1052	line_pinpoint( str, linenum );
1053	}
1054
1055
1056/* line_warning - report a warning at a given line, unless -w was given */
1057
1058void line_warning( str, line )
1059const char *str;
1060int line;
1061	{
1062	char warning[MAXLINE];
1063
1064	if ( ! nowarn )
1065		{
1066		snprintf( warning, sizeof(warning), "warning, %s", str );
1067		line_pinpoint( warning, line );
1068		}
1069	}
1070
1071
1072/* line_pinpoint - write out a message, pinpointing it at the given line */
1073
1074void line_pinpoint( str, line )
1075const char *str;
1076int line;
1077	{
1078	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1079	}
1080
1081
1082/* yyerror - eat up an error message from the parser;
1083 *	     currently, messages are ignore
1084 */
1085
1086void yyerror( msg )
1087const char *msg;
1088	{
1089	}
1090