1%{
2/*-
3 * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4 * Copyright 2015 John Marino <draco@marino.st>
5 *
6 * This source code is derived from the illumos localedef command, and
7 * provided under BSD-style license terms by Nexenta Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * POSIX localedef grammar.
34 */
35
36#include <wchar.h>
37#include <stdio.h>
38#include <limits.h>
39#include "localedef.h"
40
41%}
42%union {
43	int		num;
44	wchar_t		wc;
45	char		*token;
46	collsym_t	*collsym;
47	collelem_t	*collelem;
48}
49
50%token		T_CODE_SET
51%token		T_MB_CUR_MAX
52%token		T_MB_CUR_MIN
53%token		T_COM_CHAR
54%token		T_ESC_CHAR
55%token		T_LT
56%token		T_GT
57%token		T_NL
58%token		T_SEMI
59%token		T_COMMA
60%token		T_ELLIPSIS
61%token		T_RPAREN
62%token		T_LPAREN
63%token		T_QUOTE
64%token		T_NULL
65%token		T_WS
66%token		T_END
67%token		T_COPY
68%token		T_CHARMAP
69%token		T_WIDTH
70%token		T_CTYPE
71%token		T_ISUPPER
72%token		T_ISLOWER
73%token		T_ISALPHA
74%token		T_ISDIGIT
75%token		T_ISPUNCT
76%token		T_ISXDIGIT
77%token		T_ISSPACE
78%token		T_ISPRINT
79%token		T_ISGRAPH
80%token		T_ISBLANK
81%token		T_ISCNTRL
82%token		T_ISALNUM
83%token		T_ISSPECIAL
84%token		T_ISPHONOGRAM
85%token		T_ISIDEOGRAM
86%token		T_ISENGLISH
87%token		T_ISNUMBER
88%token		T_TOUPPER
89%token		T_TOLOWER
90%token		T_COLLATE
91%token		T_COLLATING_SYMBOL
92%token		T_COLLATING_ELEMENT
93%token		T_ORDER_START
94%token		T_ORDER_END
95%token		T_FORWARD
96%token		T_BACKWARD
97%token		T_POSITION
98%token		T_FROM
99%token		T_UNDEFINED
100%token		T_IGNORE
101%token		T_MESSAGES
102%token		T_YESSTR
103%token		T_NOSTR
104%token		T_YESEXPR
105%token		T_NOEXPR
106%token		T_MONETARY
107%token		T_INT_CURR_SYMBOL
108%token		T_CURRENCY_SYMBOL
109%token		T_MON_DECIMAL_POINT
110%token		T_MON_THOUSANDS_SEP
111%token		T_POSITIVE_SIGN
112%token		T_NEGATIVE_SIGN
113%token		T_MON_GROUPING
114%token		T_INT_FRAC_DIGITS
115%token		T_FRAC_DIGITS
116%token		T_P_CS_PRECEDES
117%token		T_P_SEP_BY_SPACE
118%token		T_N_CS_PRECEDES
119%token		T_N_SEP_BY_SPACE
120%token		T_P_SIGN_POSN
121%token		T_N_SIGN_POSN
122%token		T_INT_P_CS_PRECEDES
123%token		T_INT_N_CS_PRECEDES
124%token		T_INT_P_SEP_BY_SPACE
125%token		T_INT_N_SEP_BY_SPACE
126%token		T_INT_P_SIGN_POSN
127%token		T_INT_N_SIGN_POSN
128%token		T_NUMERIC
129%token		T_DECIMAL_POINT
130%token		T_THOUSANDS_SEP
131%token		T_GROUPING
132%token		T_TIME
133%token		T_ABDAY
134%token		T_DAY
135%token		T_ABMON
136%token		T_MON
137%token		T_ERA
138%token		T_ERA_D_FMT
139%token		T_ERA_T_FMT
140%token		T_ERA_D_T_FMT
141%token		T_ALT_DIGITS
142%token		T_D_T_FMT
143%token		T_D_FMT
144%token		T_T_FMT
145%token		T_AM_PM
146%token		T_T_FMT_AMPM
147%token		T_DATE_FMT
148%token	<wc>		T_CHAR
149%token	<token>		T_NAME
150%token	<num>		T_NUMBER
151%token	<token>		T_SYMBOL
152%token	<collsym>	T_COLLSYM
153%token	<collelem>	T_COLLELEM
154
155%%
156
157localedef	: setting_list categories
158		| categories
159		;
160
161string		: T_QUOTE charlist T_QUOTE
162		| T_QUOTE T_QUOTE
163		;
164
165charlist	: charlist T_CHAR
166		{
167			add_wcs($2);
168		}
169		| T_CHAR
170		{
171			add_wcs($1);
172		}
173		;
174
175setting_list	: setting_list setting
176		| setting
177		;
178
179
180setting		: T_COM_CHAR T_CHAR T_NL
181		{
182			com_char = $2;
183		}
184		| T_ESC_CHAR T_CHAR T_NL
185		{
186			esc_char = $2;
187		}
188		| T_MB_CUR_MAX T_NUMBER T_NL
189		{
190			mb_cur_max = $2;
191		}
192		| T_MB_CUR_MIN T_NUMBER T_NL
193		{
194			mb_cur_min = $2;
195		}
196		| T_CODE_SET string T_NL
197		{
198			wchar_t *w = get_wcs();
199			set_wide_encoding(to_mb_string(w));
200			free(w);
201		}
202		| T_CODE_SET T_NAME T_NL
203		{
204			set_wide_encoding($2);
205		}
206		;
207
208copycat		: T_COPY T_NAME T_NL
209		{
210			copy_category($2);
211		}
212		| T_COPY string T_NL
213		{
214			wchar_t *w = get_wcs();
215			copy_category(to_mb_string(w));
216			free(w);
217		}
218		;
219
220categories	: categories category
221		| category
222		;
223
224
225category	: charmap
226		| messages
227		| monetary
228		| ctype
229		| collate
230		| numeric
231		| time
232		;
233
234
235charmap		: T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
236		| T_WIDTH T_NL width_list T_END T_WIDTH T_NL
237		;
238
239
240charmap_list	: charmap_list charmap_entry
241		| charmap_entry
242		;
243
244
245charmap_entry	: T_SYMBOL T_CHAR
246		{
247			add_charmap($1, $2);
248			scan_to_eol();
249		}
250		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
251		{
252			add_charmap_range($1, $3, $4);
253			scan_to_eol();
254		}
255		| T_NL
256		;
257
258width_list	: width_list width_entry
259		| width_entry
260		;
261
262width_entry	: T_CHAR T_NUMBER T_NL
263		{
264			add_width($1, $2);
265		}
266		| T_SYMBOL T_NUMBER T_NL
267		{
268			add_charmap_undefined($1);
269		}
270		| T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
271		{
272			add_width_range($1, $3, $4);
273		}
274		| T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
275		{
276			add_charmap_undefined($1);
277			add_charmap_undefined($3);
278		}
279		| T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
280		{
281			add_width($1, $4);
282			add_charmap_undefined($3);
283		}
284		| T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
285		{
286			add_width($3, $4);
287			add_charmap_undefined($1);
288		}
289		| T_NL
290		;
291
292ctype		: T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
293		{
294			dump_ctype();
295		}
296		| T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
297		;
298
299ctype_list	: ctype_list ctype_kw
300		| ctype_kw
301		;
302
303ctype_kw	: T_ISUPPER cc_list T_NL
304		| T_ISLOWER cc_list T_NL
305		| T_ISALPHA cc_list T_NL
306		| T_ISDIGIT cc_list T_NL
307		| T_ISPUNCT cc_list T_NL
308		| T_ISXDIGIT cc_list T_NL
309		| T_ISSPACE cc_list T_NL
310		| T_ISPRINT cc_list T_NL
311		| T_ISGRAPH cc_list T_NL
312		| T_ISBLANK cc_list T_NL
313		| T_ISCNTRL cc_list T_NL
314		| T_ISALNUM cc_list T_NL
315		| T_ISSPECIAL cc_list T_NL
316		| T_ISENGLISH cc_list T_NL
317		| T_ISNUMBER cc_list T_NL
318		| T_ISIDEOGRAM cc_list T_NL
319		| T_ISPHONOGRAM cc_list T_NL
320		| T_TOUPPER conv_list T_NL
321		| T_TOLOWER conv_list T_NL
322		;
323
324cc_list		: cc_list T_SEMI cc_range_end
325		| cc_list T_SEMI cc_char
326		| cc_char
327		;
328
329cc_range_end	: T_ELLIPSIS T_SEMI T_CHAR
330		{
331			add_ctype_range($3);
332		}
333		;
334
335cc_char		: T_CHAR
336		{
337			add_ctype($1);
338		}
339		| T_SYMBOL
340		{
341			add_charmap_undefined($1);
342		}
343		;
344
345conv_list	: conv_list T_SEMI conv_pair
346		| conv_pair
347		;
348
349
350conv_pair	: T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
351		{
352			add_caseconv($2, $4);
353		}
354		| T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
355		{
356			add_charmap_undefined($2);
357		}
358		| T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
359		{
360			add_charmap_undefined($2);
361			add_charmap_undefined($4);
362		}
363		| T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
364		{
365			add_charmap_undefined($4);
366		}
367		;
368
369collate		: T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
370		{
371			dump_collate();
372		}
373		| T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
374		{
375			dump_collate();
376		}
377		| T_COLLATE T_NL copycat T_END T_COLLATE T_NL
378		;
379
380
381coll_optional	: coll_optional coll_symbols
382		| coll_optional coll_elements
383		| coll_symbols
384		| coll_elements
385		;
386
387
388coll_symbols	: T_COLLATING_SYMBOL T_SYMBOL T_NL
389		{
390			define_collsym($2);
391		}
392		;
393
394
395coll_elements	: T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
396		{
397			define_collelem($2, get_wcs());
398		}
399		;
400
401coll_order	: T_ORDER_START T_NL order_list T_ORDER_END T_NL
402		{
403			/* If no order list supplied default to one forward */
404			add_order_bit(T_FORWARD);
405			add_order_directive();
406		}
407		| T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
408		;
409
410
411order_args	: order_args T_SEMI order_arg
412		{
413			add_order_directive();
414		}
415		| order_arg
416		{
417			add_order_directive();
418		}
419		;
420
421order_arg	: order_arg T_COMMA order_dir
422		| order_dir
423		;
424
425order_dir	: T_FORWARD
426		{
427			add_order_bit(T_FORWARD);
428		}
429		| T_BACKWARD
430		{
431			add_order_bit(T_BACKWARD);
432		}
433		| T_POSITION
434		{
435			add_order_bit(T_POSITION);
436		}
437		;
438
439order_list	: order_list order_item
440		| order_item
441		;
442
443order_item	: T_COLLSYM T_NL
444		{
445			end_order_collsym($1);
446		}
447		| order_itemkw T_NL
448		{
449			end_order();
450		}
451		| order_itemkw order_weights T_NL
452		{
453			end_order();
454		}
455		;
456
457order_itemkw	: T_CHAR
458		{
459			start_order_char($1);
460		}
461		| T_ELLIPSIS
462		{
463			start_order_ellipsis();
464		}
465		| T_COLLELEM
466		{
467			start_order_collelem($1);
468		}
469		| T_UNDEFINED
470		{
471			start_order_undefined();
472		}
473		| T_SYMBOL
474		{
475			start_order_symbol($1);
476		}
477		;
478
479order_weights	: order_weights T_SEMI order_weight
480		| order_weights T_SEMI
481		| order_weight
482		;
483
484order_weight	: T_COLLELEM
485		{
486			add_order_collelem($1);
487		}
488		| T_COLLSYM
489		{
490			add_order_collsym($1);
491		}
492		| T_CHAR
493		{
494			add_order_char($1);
495		}
496		| T_ELLIPSIS
497		{
498			add_order_ellipsis();
499		}
500		| T_IGNORE
501		{
502			add_order_ignore();
503		}
504		| T_SYMBOL
505		{
506			add_order_symbol($1);
507		}
508		| T_QUOTE order_str T_QUOTE
509		{
510			add_order_subst();
511		}
512		;
513
514order_str	: order_str order_stritem
515		| order_stritem
516		;
517
518order_stritem	: T_CHAR
519		{
520			add_subst_char($1);
521		}
522		| T_COLLSYM
523		{
524			add_subst_collsym($1);
525		}
526		| T_COLLELEM
527		{
528			add_subst_collelem($1);
529		}
530		| T_SYMBOL
531		{
532			add_subst_symbol($1);
533		}
534		;
535
536messages	: T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
537		{
538			dump_messages();
539		}
540		| T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
541		;
542
543messages_list	: messages_list messages_item
544		| messages_item
545		;
546
547messages_kw	: T_YESSTR
548		| T_NOSTR
549		| T_YESEXPR
550		| T_NOEXPR
551		;
552
553messages_item	: messages_kw string T_NL
554		{
555			add_message(get_wcs());
556		}
557		;
558
559monetary	: T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
560		{
561			dump_monetary();
562		}
563		| T_MONETARY T_NL copycat T_END T_MONETARY T_NL
564		;
565
566monetary_list	: monetary_list monetary_kw
567		| monetary_kw
568		;
569
570monetary_strkw	: T_INT_CURR_SYMBOL
571		| T_CURRENCY_SYMBOL
572		| T_MON_DECIMAL_POINT
573		| T_MON_THOUSANDS_SEP
574		| T_POSITIVE_SIGN
575		| T_NEGATIVE_SIGN
576		;
577
578monetary_numkw	: T_INT_FRAC_DIGITS
579		| T_FRAC_DIGITS
580		| T_P_CS_PRECEDES
581		| T_P_SEP_BY_SPACE
582		| T_N_CS_PRECEDES
583		| T_N_SEP_BY_SPACE
584		| T_P_SIGN_POSN
585		| T_N_SIGN_POSN
586		| T_INT_P_CS_PRECEDES
587		| T_INT_N_CS_PRECEDES
588		| T_INT_P_SEP_BY_SPACE
589		| T_INT_N_SEP_BY_SPACE
590		| T_INT_P_SIGN_POSN
591		| T_INT_N_SIGN_POSN
592		;
593
594monetary_kw	: monetary_strkw string T_NL
595		{
596			add_monetary_str(get_wcs());
597		}
598		| monetary_numkw T_NUMBER T_NL
599		{
600			add_monetary_num($2);
601		}
602		| T_MON_GROUPING mon_group_list T_NL
603		;
604
605mon_group_list	: T_NUMBER
606		{
607			reset_monetary_group();
608			add_monetary_group($1);
609		}
610		| mon_group_list T_SEMI T_NUMBER
611		{
612			add_monetary_group($3);
613		}
614		;
615
616
617numeric		: T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
618		{
619			dump_numeric();
620		}
621		| T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
622		;
623
624
625numeric_list	: numeric_list numeric_item
626		| numeric_item
627		;
628
629
630numeric_item	: numeric_strkw string T_NL
631		{
632			add_numeric_str(get_wcs());
633		}
634		| T_GROUPING group_list T_NL
635		;
636
637numeric_strkw	: T_DECIMAL_POINT
638		| T_THOUSANDS_SEP
639		;
640
641
642group_list	: T_NUMBER
643		{
644			reset_numeric_group();
645			add_numeric_group($1);
646		}
647		| group_list T_SEMI T_NUMBER
648		{
649			add_numeric_group($3);
650		}
651		;
652
653
654time		: T_TIME T_NL time_kwlist T_END T_TIME T_NL
655		{
656			dump_time();
657		}
658		| T_TIME T_NL copycat T_END T_NUMERIC T_NL
659		;
660
661time_kwlist	: time_kwlist time_kw
662		| time_kw
663		;
664
665time_kw		: time_strkw string T_NL
666		{
667			add_time_str(get_wcs());
668		}
669		| time_listkw time_list T_NL
670		{
671			check_time_list();
672		}
673		;
674
675time_listkw	: T_ABDAY
676		| T_DAY
677		| T_ABMON
678		| T_MON
679		| T_ERA
680		| T_ALT_DIGITS
681		| T_AM_PM
682		;
683
684time_strkw	: T_ERA_D_T_FMT
685		| T_ERA_T_FMT
686		| T_ERA_D_FMT
687		| T_D_T_FMT
688		| T_D_FMT
689		| T_T_FMT
690		| T_T_FMT_AMPM
691		| T_DATE_FMT
692		;
693
694time_list	: time_list T_SEMI string
695		{
696			add_time_list(get_wcs());
697		}
698		| string
699		{
700			reset_time_list();
701			add_time_list(get_wcs());
702		}
703		;
704