mksyntax.c revision 3044
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Kenneth Almquist.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	$Id$
37 */
38
39#ifndef lint
40static char copyright[] =
41"@(#) Copyright (c) 1991, 1993\n\
42	The Regents of the University of California.  All rights reserved.\n";
43#endif /* not lint */
44
45#ifndef lint
46static char sccsid[] = "@(#)mksyntax.c	8.1 (Berkeley) 5/31/93";
47#endif /* not lint */
48
49/*
50 * This program creates syntax.h and syntax.c.
51 */
52
53#include <stdio.h>
54#include "parser.h"
55
56
57struct synclass {
58	char *name;
59	char *comment;
60};
61
62/* Syntax classes */
63struct synclass synclass[] = {
64	"CWORD",		"character is nothing special",
65	"CNL",		"newline character",
66	"CBACK",		"a backslash character",
67	"CSQUOTE",	"single quote",
68	"CDQUOTE",	"double quote",
69	"CENDQUOTE",	"a terminating quote",
70	"CBQUOTE",	"backwards single quote",
71	"CVAR",		"a dollar sign",
72	"CENDVAR",	"a '}' character",
73	"CLP",		"a left paren in arithmetic",
74	"CRP",		"a right paren in arithmetic",
75	"CEOF",		"end of file",
76	"CCTL",		"like CWORD, except it must be escaped",
77	"CSPCL",		"these terminate a word",
78	NULL, NULL
79};
80
81
82/*
83 * Syntax classes for is_ functions.  Warning:  if you add new classes
84 * you may have to change the definition of the is_in_name macro.
85 */
86struct synclass is_entry[] = {
87	"ISDIGIT",	"a digit",
88	"ISUPPER",	"an upper case letter",
89	"ISLOWER",	"a lower case letter",
90	"ISUNDER",	"an underscore",
91	"ISSPECL",	"the name of a special parameter",
92	NULL, NULL,
93};
94
95char writer[] = "\
96/*\n\
97 * This file was generated by the mksyntax program.\n\
98 */\n\
99\n";
100
101
102FILE *cfile;
103FILE *hfile;
104char *syntax[513];
105int base;
106int size;		/* number of values which a char variable can have */
107int nbits;		/* number of bits in a character */
108int digit_contig;	/* true if digits are contiguous */
109
110
111main() {
112	char c;
113	char d;
114	int sign;
115	int i;
116	char buf[80];
117	int pos;
118	static char digit[] = "0123456789";
119
120	/* Create output files */
121	if ((cfile = fopen("syntax.c", "w")) == NULL) {
122		perror("syntax.c");
123		exit(2);
124	}
125	if ((hfile = fopen("syntax.h", "w")) == NULL) {
126		perror("syntax.h");
127		exit(2);
128	}
129	fputs(writer, hfile);
130	fputs(writer, cfile);
131
132	/* Determine the characteristics of chars. */
133	c = -1;
134	if (c < 0)
135		sign = 1;
136	else
137		sign = 0;
138	for (nbits = 1 ; ; nbits++) {
139		d = (1 << nbits) - 1;
140		if (d == c)
141			break;
142	}
143	printf("%s %d bit chars\n", sign? "signed" : "unsigned", nbits);
144	if (nbits > 9) {
145		fputs("Characters can't have more than 9 bits\n", stderr);
146		exit(2);
147	}
148	size = (1 << nbits) + 1;
149	base = 1;
150	if (sign)
151		base += 1 << (nbits - 1);
152	digit_contig = 1;
153	for (i = 0 ; i < 10 ; i++) {
154		if (digit[i] != '0' + i)
155			digit_contig = 0;
156	}
157
158	fputs("#include <sys/cdefs.h>\n", hfile);
159
160	/* Generate the #define statements in the header file */
161	fputs("/* Syntax classes */\n", hfile);
162	for (i = 0 ; synclass[i].name ; i++) {
163		sprintf(buf, "#define %s %d", synclass[i].name, i);
164		fputs(buf, hfile);
165		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
166			putc('\t', hfile);
167		fprintf(hfile, "/* %s */\n", synclass[i].comment);
168	}
169	putc('\n', hfile);
170	fputs("/* Syntax classes for is_ functions */\n", hfile);
171	for (i = 0 ; is_entry[i].name ; i++) {
172		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
173		fputs(buf, hfile);
174		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
175			putc('\t', hfile);
176		fprintf(hfile, "/* %s */\n", is_entry[i].comment);
177	}
178	putc('\n', hfile);
179	fprintf(hfile, "#define SYNBASE %d\n", base);
180	fprintf(hfile, "#define PEOF %d\n\n", -base);
181	putc('\n', hfile);
182	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
183	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
184	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
185	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
186	putc('\n', hfile);
187	output_type_macros();		/* is_digit, etc. */
188	putc('\n', hfile);
189
190	/* Generate the syntax tables. */
191	fputs("#include \"shell.h\"\n", cfile);
192	fputs("#include \"syntax.h\"\n\n", cfile);
193	init();
194	fputs("/* syntax table used when not in quotes */\n", cfile);
195	add("\n", "CNL");
196	add("\\", "CBACK");
197	add("'", "CSQUOTE");
198	add("\"", "CDQUOTE");
199	add("`", "CBQUOTE");
200	add("$", "CVAR");
201	add("}", "CENDVAR");
202	add("<>();&| \t", "CSPCL");
203	print("basesyntax");
204	init();
205	fputs("\n/* syntax table used when in double quotes */\n", cfile);
206	add("\n", "CNL");
207	add("\\", "CBACK");
208	add("\"", "CENDQUOTE");
209	add("`", "CBQUOTE");
210	add("$", "CVAR");
211	add("}", "CENDVAR");
212	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
213	print("dqsyntax");
214	init();
215	fputs("\n/* syntax table used when in single quotes */\n", cfile);
216	add("\n", "CNL");
217	add("'", "CENDQUOTE");
218	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
219	print("sqsyntax");
220	init();
221	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
222	add("\n", "CNL");
223	add("\\", "CBACK");
224	add("`", "CBQUOTE");
225	add("'", "CSQUOTE");
226	add("\"", "CDQUOTE");
227	add("$", "CVAR");
228	add("}", "CENDVAR");
229	add("(", "CLP");
230	add(")", "CRP");
231	print("arisyntax");
232	filltable("0");
233	fputs("\n/* character classification table */\n", cfile);
234	add("0123456789", "ISDIGIT");
235	add("abcdefghijklmnopqrstucvwxyz", "ISLOWER");
236	add("ABCDEFGHIJKLMNOPQRSTUCVWXYZ", "ISUPPER");
237	add("_", "ISUNDER");
238	add("#?$!-*@", "ISSPECL");
239	print("is_type");
240	if (! digit_contig)
241		digit_convert();
242	exit(0);
243}
244
245
246
247/*
248 * Clear the syntax table.
249 */
250
251filltable(dftval)
252	char *dftval;
253	{
254	int i;
255
256	for (i = 0 ; i < size ; i++)
257		syntax[i] = dftval;
258}
259
260
261/*
262 * Initialize the syntax table with default values.
263 */
264
265init() {
266	filltable("CWORD");
267	syntax[0] = "CEOF";
268	syntax[base + CTLESC] = "CCTL";
269	syntax[base + CTLVAR] = "CCTL";
270	syntax[base + CTLENDVAR] = "CCTL";
271	syntax[base + CTLBACKQ] = "CCTL";
272	syntax[base + CTLBACKQ + CTLQUOTE] = "CCTL";
273	syntax[base + CTLARI] = "CCTL";
274	syntax[base + CTLENDARI] = "CCTL";
275}
276
277
278/*
279 * Add entries to the syntax table.
280 */
281
282add(p, type)
283	char *p, *type;
284	{
285	while (*p)
286		syntax[*p++ + base] = type;
287}
288
289
290
291/*
292 * Output the syntax table.
293 */
294
295print(name)
296	char *name;
297	{
298	int i;
299	int col;
300
301	fprintf(hfile, "extern const char %s[];\n", name);
302	fprintf(cfile, "const char %s[%d] = {\n", name, size);
303	col = 0;
304	for (i = 0 ; i < size ; i++) {
305		if (i == 0) {
306			fputs("      ", cfile);
307		} else if ((i & 03) == 0) {
308			fputs(",\n      ", cfile);
309			col = 0;
310		} else {
311			putc(',', cfile);
312			while (++col < 9 * (i & 03))
313				putc(' ', cfile);
314		}
315		fputs(syntax[i], cfile);
316		col += strlen(syntax[i]);
317	}
318	fputs("\n};\n", cfile);
319}
320
321
322
323/*
324 * Output character classification macros (e.g. is_digit).  If digits are
325 * contiguous, we can test for them quickly.
326 */
327
328char *macro[] = {
329	"#define is_digit(c)\t((is_type+SYNBASE)[c] & ISDIGIT)",
330	"#define is_alpha(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER))",
331	"#define is_name(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER|ISUNDER))",
332	"#define is_in_name(c)\t((is_type+SYNBASE)[c] & (ISUPPER|ISLOWER|ISUNDER|ISDIGIT))",
333	"#define is_special(c)\t((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))",
334	NULL
335};
336
337output_type_macros() {
338	char **pp;
339
340	if (digit_contig)
341		macro[0] = "#define is_digit(c)\t((unsigned)((c) - '0') <= 9)";
342	for (pp = macro ; *pp ; pp++)
343		fprintf(hfile, "%s\n", *pp);
344	if (digit_contig)
345		fputs("#define digit_val(c)\t((c) - '0')\n", hfile);
346	else
347		fputs("#define digit_val(c)\t(digit_value[c])\n", hfile);
348}
349
350
351
352/*
353 * Output digit conversion table (if digits are not contiguous).
354 */
355
356digit_convert() {
357	int maxdigit;
358	static char digit[] = "0123456789";
359	char *p;
360	int i;
361
362	maxdigit = 0;
363	for (p = digit ; *p ; p++)
364		if (*p > maxdigit)
365			maxdigit = *p;
366	fputs("extern const char digit_value[];\n", hfile);
367	fputs("\n\nconst char digit_value[] = {\n", cfile);
368	for (i = 0 ; i <= maxdigit ; i++) {
369		for (p = digit ; *p && *p != i ; p++);
370		if (*p == '\0')
371			p = digit;
372		fprintf(cfile, "      %d,\n", p - digit);
373	}
374	fputs("};\n", cfile);
375}
376