1/*	$OpenBSD: lalr.c,v 1.19 2017/05/25 20:11:03 tedu Exp $	*/
2/*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3
4/*
5 * Copyright (c) 1989 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Robert Paul Corbett.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include "defs.h"
37
38typedef struct shorts {
39	struct shorts *next;
40	short value;
41} shorts;
42
43int tokensetsize;
44short *lookaheads;
45short *LAruleno;
46unsigned *LA;
47short *accessing_symbol;
48core **state_table;
49shifts **shift_table;
50reductions **reduction_table;
51short *goto_map;
52short *from_state;
53short *to_state;
54
55short **transpose(short **, int);
56void set_state_table(void);
57void set_accessing_symbol(void);
58void set_shift_table(void);
59void set_reduction_table(void);
60void set_maxrhs(void);
61void initialize_LA(void);
62void set_goto_map(void);
63void initialize_F(void);
64void build_relations(void);
65void compute_FOLLOWS(void);
66void compute_lookaheads(void);
67int map_goto(int, int);
68void digraph(short **);
69void add_lookback_edge(int, int, int);
70void traverse(int);
71
72static int infinity;
73static int maxrhs;
74static int ngotos;
75static unsigned *F;
76static short **includes;
77static shorts **lookback;
78static short **R;
79static short *INDEX;
80static short *VERTICES;
81static int top;
82
83void
84lalr(void)
85{
86	tokensetsize = WORDSIZE(ntokens);
87
88	set_state_table();
89	set_accessing_symbol();
90	set_shift_table();
91	set_reduction_table();
92	set_maxrhs();
93	initialize_LA();
94	set_goto_map();
95	initialize_F();
96	build_relations();
97	compute_FOLLOWS();
98	compute_lookaheads();
99	free_derives();
100	free_nullable();
101}
102
103
104void
105set_state_table(void)
106{
107	core *sp;
108
109	state_table = NEW2(nstates, core *);
110	for (sp = first_state; sp; sp = sp->next)
111		state_table[sp->number] = sp;
112}
113
114
115void
116set_accessing_symbol(void)
117{
118	core *sp;
119
120	accessing_symbol = NEW2(nstates, short);
121	for (sp = first_state; sp; sp = sp->next)
122		accessing_symbol[sp->number] = sp->accessing_symbol;
123}
124
125
126void
127set_shift_table(void)
128{
129	shifts *sp;
130
131	shift_table = NEW2(nstates, shifts *);
132	for (sp = first_shift; sp; sp = sp->next)
133		shift_table[sp->number] = sp;
134}
135
136
137void
138set_reduction_table(void)
139{
140	reductions *rp;
141
142	reduction_table = NEW2(nstates, reductions *);
143	for (rp = first_reduction; rp; rp = rp->next)
144		reduction_table[rp->number] = rp;
145}
146
147
148void
149set_maxrhs(void)
150{
151	short *itemp, *item_end;
152	int length, max;
153
154	length = 0;
155	max = 0;
156	item_end = ritem + nitems;
157	for (itemp = ritem; itemp < item_end; itemp++) {
158		if (*itemp >= 0) {
159			length++;
160		} else {
161			if (length > max) max = length;
162			length = 0;
163		}
164	}
165
166	maxrhs = max;
167}
168
169
170void
171initialize_LA(void)
172{
173	int i, j, k;
174	reductions *rp;
175
176	lookaheads = NEW2(nstates + 1, short);
177
178	k = 0;
179	for (i = 0; i < nstates; i++) {
180		lookaheads[i] = k;
181		rp = reduction_table[i];
182		if (rp)
183			k += rp->nreds;
184	}
185	lookaheads[nstates] = k;
186
187	LA = NEW2(k * tokensetsize, unsigned);
188	LAruleno = NEW2(k, short);
189	lookback = NEW2(k, shorts *);
190
191	k = 0;
192	for (i = 0; i < nstates; i++) {
193		rp = reduction_table[i];
194		if (rp) {
195			for (j = 0; j < rp->nreds; j++) {
196				LAruleno[k] = rp->rules[j];
197				k++;
198			}
199		}
200	}
201}
202
203void
204set_goto_map(void)
205{
206	shifts *sp;
207	int i, k, symbol;
208	int state1, state2;
209	short *temp_map;
210
211	goto_map = NEW2(nvars + 1, short) - ntokens;
212	temp_map = NEW2(nvars + 1, short) - ntokens;
213
214	ngotos = 0;
215	for (sp = first_shift; sp; sp = sp->next) {
216		for (i = sp->nshifts - 1; i >= 0; i--) {
217			symbol = accessing_symbol[sp->shift[i]];
218
219			if (ISTOKEN(symbol)) break;
220
221			if (ngotos == MAXSHORT)
222				fatal("too many gotos");
223
224			ngotos++;
225			goto_map[symbol]++;
226		}
227	}
228
229	k = 0;
230	for (i = ntokens; i < nsyms; i++) {
231		temp_map[i] = k;
232		k += goto_map[i];
233	}
234
235	for (i = ntokens; i < nsyms; i++)
236		goto_map[i] = temp_map[i];
237
238	goto_map[nsyms] = ngotos;
239	temp_map[nsyms] = ngotos;
240
241	from_state = NEW2(ngotos, short);
242	to_state = NEW2(ngotos, short);
243
244	for (sp = first_shift; sp; sp = sp->next) {
245		state1 = sp->number;
246		for (i = sp->nshifts - 1; i >= 0; i--) {
247			state2 = sp->shift[i];
248			symbol = accessing_symbol[state2];
249
250			if (ISTOKEN(symbol)) break;
251
252			k = temp_map[symbol]++;
253			from_state[k] = state1;
254			to_state[k] = state2;
255		}
256	}
257
258	free(temp_map + ntokens);
259}
260
261
262
263/*  Map_goto maps a state/symbol pair into its numeric representation.	*/
264
265int
266map_goto(int state, int symbol)
267{
268	int high, low, middle, s;
269
270	low = goto_map[symbol];
271	high = goto_map[symbol + 1];
272
273	for (;;) {
274		assert(low <= high);
275		middle = (low + high) >> 1;
276		s = from_state[middle];
277		if (s == state)
278			return (middle);
279		else if (s < state)
280			low = middle + 1;
281		else
282			high = middle - 1;
283	}
284}
285
286
287void
288initialize_F(void)
289{
290	int i, j, k;
291	shifts *sp;
292	short *edge, *rp, **reads;
293	unsigned int *rowp;
294	int nedges, stateno, symbol, nwords;
295
296	nwords = ngotos * tokensetsize;
297	F = NEW2(nwords, unsigned);
298
299	reads = NEW2(ngotos, short *);
300	edge = NEW2(ngotos + 1, short);
301	nedges = 0;
302
303	rowp = F;
304	for (i = 0; i < ngotos; i++) {
305		stateno = to_state[i];
306		sp = shift_table[stateno];
307
308		if (sp) {
309			k = sp->nshifts;
310
311			for (j = 0; j < k; j++) {
312				symbol = accessing_symbol[sp->shift[j]];
313				if (ISVAR(symbol))
314					break;
315				SETBIT(rowp, symbol);
316			}
317
318			for (; j < k; j++) {
319				symbol = accessing_symbol[sp->shift[j]];
320				if (nullable[symbol])
321					edge[nedges++] = map_goto(stateno, symbol);
322			}
323
324			if (nedges) {
325				reads[i] = rp = NEW2(nedges + 1, short);
326
327				for (j = 0; j < nedges; j++)
328					rp[j] = edge[j];
329
330				rp[nedges] = -1;
331				nedges = 0;
332			}
333		}
334
335		rowp += tokensetsize;
336	}
337
338	SETBIT(F, 0);
339	digraph(reads);
340
341	for (i = 0; i < ngotos; i++)
342		free(reads[i]);
343
344	free(reads);
345	free(edge);
346}
347
348
349void
350build_relations(void)
351{
352	int i, j, k;
353	short *rulep, *rp;
354	shifts *sp;
355	int length, nedges, done;
356	int state1, stateno, symbol1, symbol2;
357	short *shortp, *edge, *states;
358	short **new_includes;
359
360	includes = NEW2(ngotos, short *);
361	edge = NEW2(ngotos + 1, short);
362	states = NEW2(maxrhs + 1, short);
363
364	for (i = 0; i < ngotos; i++) {
365		nedges = 0;
366		state1 = from_state[i];
367		symbol1 = accessing_symbol[to_state[i]];
368
369		for (rulep = derives[symbol1]; *rulep >= 0; rulep++) {
370			length = 1;
371			states[0] = state1;
372			stateno = state1;
373
374			for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) {
375				symbol2 = *rp;
376				sp = shift_table[stateno];
377				k = sp->nshifts;
378
379				for (j = 0; j < k; j++) {
380					stateno = sp->shift[j];
381					if (accessing_symbol[stateno] == symbol2)
382						break;
383				}
384
385				states[length++] = stateno;
386			}
387
388			add_lookback_edge(stateno, *rulep, i);
389
390			length--;
391			done = 0;
392			while (!done) {
393				done = 1;
394				rp--;
395				if (ISVAR(*rp)) {
396					stateno = states[--length];
397					edge[nedges++] = map_goto(stateno, *rp);
398					if (nullable[*rp] && length > 0)
399						done = 0;
400				}
401			}
402		}
403
404		if (nedges) {
405			includes[i] = shortp = NEW2(nedges + 1, short);
406			for (j = 0; j < nedges; j++)
407				shortp[j] = edge[j];
408			shortp[nedges] = -1;
409		}
410	}
411
412	new_includes = transpose(includes, ngotos);
413
414	for (i = 0; i < ngotos; i++)
415		free(includes[i]);
416
417	free(includes);
418
419	includes = new_includes;
420
421	free(edge);
422	free(states);
423}
424
425void
426add_lookback_edge(int stateno, int ruleno, int gotono)
427{
428	int i, k, found;
429	shorts *sp;
430
431	i = lookaheads[stateno];
432	k = lookaheads[stateno + 1];
433	found = 0;
434	while (!found && i < k) {
435		if (LAruleno[i] == ruleno)
436			found = 1;
437		else
438			++i;
439	}
440	assert(found);
441
442	sp = NEW(shorts);
443	sp->next = lookback[i];
444	sp->value = gotono;
445	lookback[i] = sp;
446}
447
448
449
450short **
451transpose(short **old_R, int n)
452{
453	short **new_R, **temp_R, *nedges, *sp;
454	int i, k;
455
456	nedges = NEW2(n, short);
457
458	for (i = 0; i < n; i++) {
459		sp = old_R[i];
460		if (sp) {
461			while (*sp >= 0)
462				nedges[*sp++]++;
463		}
464	}
465
466	new_R = NEW2(n, short *);
467	temp_R = NEW2(n, short *);
468
469	for (i = 0; i < n; i++) {
470		k = nedges[i];
471		if (k > 0) {
472			sp = NEW2(k + 1, short);
473			new_R[i] = sp;
474			temp_R[i] = sp;
475			sp[k] = -1;
476		}
477	}
478
479	free(nedges);
480
481	for (i = 0; i < n; i++) {
482		sp = old_R[i];
483		if (sp) {
484			while (*sp >= 0)
485				*temp_R[*sp++]++ = i;
486		}
487	}
488
489	free(temp_R);
490
491	return (new_R);
492}
493
494
495void
496compute_FOLLOWS(void)
497{
498	digraph(includes);
499}
500
501void
502compute_lookaheads(void)
503{
504	int i, n;
505	unsigned int *fp1, *fp2, *fp3;
506	shorts *sp, *next;
507	unsigned int *rowp;
508
509	rowp = LA;
510	n = lookaheads[nstates];
511	for (i = 0; i < n; i++) {
512		fp3 = rowp + tokensetsize;
513		for (sp = lookback[i]; sp; sp = sp->next) {
514			fp1 = rowp;
515			fp2 = F + tokensetsize * sp->value;
516			while (fp1 < fp3)
517				*fp1++ |= *fp2++;
518		}
519		rowp = fp3;
520	}
521
522	for (i = 0; i < n; i++)
523		for (sp = lookback[i]; sp; sp = next) {
524			next = sp->next;
525			free(sp);
526		}
527
528	free(lookback);
529	free(F);
530}
531
532void
533digraph(short **relation)
534{
535	int i;
536
537	infinity = ngotos + 2;
538	INDEX = NEW2(ngotos + 1, short);
539	VERTICES = NEW2(ngotos + 1, short);
540	top = 0;
541	R = relation;
542
543	memset(INDEX, 0, ngotos * sizeof(short));
544	for (i = 0; i < ngotos; i++)
545		if (R[i])
546			traverse(i);
547
548	free(INDEX);
549	free(VERTICES);
550}
551
552
553void
554traverse(int i)
555{
556	unsigned int *base, *fp1, *fp2, *fp3;
557	int j, height;
558	short *rp;
559
560	VERTICES[++top] = i;
561	INDEX[i] = height = top;
562
563	base = F + i * tokensetsize;
564	fp3 = base + tokensetsize;
565
566	rp = R[i];
567	if (rp) {
568		while ((j = *rp++) >= 0) {
569			if (INDEX[j] == 0)
570				traverse(j);
571
572			if (INDEX[i] > INDEX[j])
573				INDEX[i] = INDEX[j];
574
575			fp1 = base;
576			fp2 = F + j * tokensetsize;
577
578			while (fp1 < fp3)
579				*fp1++ |= *fp2++;
580		}
581	}
582
583	if (INDEX[i] == height) {
584		for (;;) {
585			j = VERTICES[top--];
586			INDEX[j] = infinity;
587
588			if (i == j)
589				break;
590
591			fp1 = base;
592			fp2 = F + j * tokensetsize;
593
594			while (fp1 < fp3)
595				*fp2++ = *fp1++;
596		}
597	}
598}
599