159243Sobrien/*	$NetBSD: lalr.c,v 1.11 2021/02/20 22:57:56 christos Exp $	*/
259243Sobrien
3231990Smp#include "defs.h"
4231990Smp/* Id: lalr.c,v 1.13 2020/09/10 17:26:21 tom Exp  */
5231990Smp
6231990Smp#include <sys/cdefs.h>
7231990Smp__RCSID("$NetBSD: lalr.c,v 1.11 2021/02/20 22:57:56 christos Exp $");
8231990Smp
9231990Smptypedef struct shorts
10231990Smp{
1159243Sobrien    struct shorts *next;
1259243Sobrien    Value_t value;
13231990Smp}
14231990Smpshorts;
15
16static Value_t map_goto(int state, int symbol);
17static Value_t **transpose(Value_t **R, int n);
18static void add_lookback_edge(int stateno, int ruleno, int gotono);
19static void build_relations(void);
20static void compute_FOLLOWS(void);
21static void compute_lookaheads(void);
22static void digraph(Value_t **relation);
23static void initialize_F(void);
24static void initialize_LA(void);
25static void set_accessing_symbol(void);
26static void set_goto_map(void);
27static void set_maxrhs(void);
28static void set_reduction_table(void);
29static void set_shift_table(void);
30static void set_state_table(void);
31static void traverse(int i);
32
33static int tokensetsize;
34Value_t *lookaheads;
35Value_t *LAruleno;
36unsigned *LA;
37Value_t *accessing_symbol;
38core **state_table;
39shifts **shift_table;
40reductions **reduction_table;
41Value_t *goto_base;
42Value_t *goto_map;
43Value_t *from_state;
44Value_t *to_state;
45
46static Value_t infinity;
47static int maxrhs;
48static int ngotos;
49static unsigned *F;
50static Value_t **includes;
51static shorts **lookback;
52static Value_t **R;
53static Value_t *INDEX;
54static Value_t *VERTICES;
55static Value_t top;
56
57void
58lalr(void)
59{
60    tokensetsize = WORDSIZE(ntokens);
61
62    set_state_table();
63    set_accessing_symbol();
64    set_shift_table();
65    set_reduction_table();
66    set_maxrhs();
67    initialize_LA();
68    set_goto_map();
69    initialize_F();
70    build_relations();
71    compute_FOLLOWS();
72    compute_lookaheads();
73}
74
75static void
76set_state_table(void)
77{
78    core *sp;
79
80    state_table = NEW2(nstates, core *);
81    for (sp = first_state; sp; sp = sp->next)
82	state_table[sp->number] = sp;
83}
84
85static void
86set_accessing_symbol(void)
87{
88    core *sp;
89
90    accessing_symbol = NEW2(nstates, Value_t);
91    for (sp = first_state; sp; sp = sp->next)
92	accessing_symbol[sp->number] = sp->accessing_symbol;
93}
94
95static void
96set_shift_table(void)
97{
98    shifts *sp;
99
100    shift_table = NEW2(nstates, shifts *);
101    for (sp = first_shift; sp; sp = sp->next)
102	shift_table[sp->number] = sp;
103}
104
105static void
106set_reduction_table(void)
107{
108    reductions *rp;
109
110    reduction_table = NEW2(nstates, reductions *);
111    for (rp = first_reduction; rp; rp = rp->next)
112	reduction_table[rp->number] = rp;
113}
114
115static void
116set_maxrhs(void)
117{
118    Value_t *itemp;
119    Value_t *item_end;
120    int length;
121    int max;
122
123    length = 0;
124    max = 0;
125    item_end = ritem + nitems;
126    for (itemp = ritem; itemp < item_end; itemp++)
127    {
128	if (*itemp >= 0)
129	{
130	    length++;
131	}
132	else
133	{
134	    if (length > max)
135		max = length;
136	    length = 0;
137	}
138    }
139
140    maxrhs = max;
141}
142
143static void
144initialize_LA(void)
145{
146    int i, j, k;
147    reductions *rp;
148
149    lookaheads = NEW2(nstates + 1, Value_t);
150
151    k = 0;
152    for (i = 0; i < nstates; i++)
153    {
154	lookaheads[i] = (Value_t)k;
155	rp = reduction_table[i];
156	if (rp)
157	    k += rp->nreds;
158    }
159    lookaheads[nstates] = (Value_t)k;
160
161    LA = NEW2(k * tokensetsize, unsigned);
162    LAruleno = NEW2(k, Value_t);
163    lookback = NEW2(k, shorts *);
164
165    k = 0;
166    for (i = 0; i < nstates; i++)
167    {
168	rp = reduction_table[i];
169	if (rp)
170	{
171	    for (j = 0; j < rp->nreds; j++)
172	    {
173		LAruleno[k] = rp->rules[j];
174		k++;
175	    }
176	}
177    }
178}
179
180static void
181set_goto_map(void)
182{
183    shifts *sp;
184    int i;
185    int symbol;
186    int k;
187    Value_t *temp_base;
188    Value_t *temp_map;
189    Value_t state2;
190
191    goto_base = NEW2(nvars + 1, Value_t);
192    temp_base = NEW2(nvars + 1, Value_t);
193
194    goto_map = goto_base - ntokens;
195    temp_map = temp_base - ntokens;
196
197    ngotos = 0;
198    for (sp = first_shift; sp; sp = sp->next)
199    {
200	for (i = sp->nshifts - 1; i >= 0; i--)
201	{
202	    symbol = accessing_symbol[sp->shift[i]];
203
204	    if (ISTOKEN(symbol))
205		break;
206
207	    if (ngotos == MAXYYINT)
208		fatal("too many gotos");
209
210	    ngotos++;
211	    goto_map[symbol]++;
212	}
213    }
214
215    k = 0;
216    for (i = ntokens; i < nsyms; i++)
217    {
218	temp_map[i] = (Value_t)k;
219	k += goto_map[i];
220    }
221
222    for (i = ntokens; i < nsyms; i++)
223	goto_map[i] = temp_map[i];
224
225    goto_map[nsyms] = (Value_t)ngotos;
226    temp_map[nsyms] = (Value_t)ngotos;
227
228    from_state = NEW2(ngotos, Value_t);
229    to_state = NEW2(ngotos, Value_t);
230
231    for (sp = first_shift; sp; sp = sp->next)
232    {
233	Value_t state1 = sp->number;
234
235	for (i = sp->nshifts - 1; i >= 0; i--)
236	{
237	    state2 = sp->shift[i];
238	    symbol = accessing_symbol[state2];
239
240	    if (ISTOKEN(symbol))
241		break;
242
243	    k = temp_map[symbol]++;
244	    from_state[k] = state1;
245	    to_state[k] = state2;
246	}
247    }
248
249    FREE(temp_base);
250}
251
252/*  Map_goto maps a state/symbol pair into its numeric representation.	*/
253
254static Value_t
255map_goto(int state, int symbol)
256{
257    int low = goto_map[symbol];
258    int high = goto_map[symbol + 1];
259
260    for (;;)
261    {
262	int middle;
263	int s;
264
265	assert(low <= high);
266	middle = (low + high) >> 1;
267	s = from_state[middle];
268	if (s == state)
269	    return (Value_t)(middle);
270	else if (s < state)
271	    low = middle + 1;
272	else
273	    high = middle - 1;
274    }
275}
276
277static void
278initialize_F(void)
279{
280    int i;
281    int j;
282    int k;
283    shifts *sp;
284    Value_t *edge;
285    unsigned *rowp;
286    Value_t *rp;
287    Value_t **reads;
288    int nedges;
289    int symbol;
290    int nwords;
291
292    nwords = ngotos * tokensetsize;
293    F = NEW2(nwords, unsigned);
294
295    reads = NEW2(ngotos, Value_t *);
296    edge = NEW2(ngotos + 1, Value_t);
297    nedges = 0;
298
299    rowp = F;
300    for (i = 0; i < ngotos; i++)
301    {
302	int stateno = to_state[i];
303
304	sp = shift_table[stateno];
305
306	if (sp)
307	{
308	    k = sp->nshifts;
309
310	    for (j = 0; j < k; j++)
311	    {
312		symbol = accessing_symbol[sp->shift[j]];
313		if (ISVAR(symbol))
314		    break;
315		SETBIT(rowp, symbol);
316	    }
317
318	    for (; j < k; j++)
319	    {
320		symbol = accessing_symbol[sp->shift[j]];
321		if (nullable[symbol])
322		    edge[nedges++] = map_goto(stateno, symbol);
323	    }
324
325	    if (nedges)
326	    {
327		reads[i] = rp = NEW2(nedges + 1, Value_t);
328
329		for (j = 0; j < nedges; j++)
330		    rp[j] = edge[j];
331
332		rp[nedges] = -1;
333		nedges = 0;
334	    }
335	}
336
337	rowp += tokensetsize;
338    }
339
340    SETBIT(F, 0);
341    digraph(reads);
342
343    for (i = 0; i < ngotos; i++)
344    {
345	if (reads[i])
346	    FREE(reads[i]);
347    }
348
349    FREE(reads);
350    FREE(edge);
351}
352
353static void
354build_relations(void)
355{
356    int i;
357    int j;
358    int k;
359    Value_t *rulep;
360    Value_t *rp;
361    shifts *sp;
362    int length;
363    int done_flag;
364    Value_t stateno;
365    int symbol2;
366    Value_t *shortp;
367    Value_t *edge;
368    Value_t *states;
369    Value_t **new_includes;
370
371    includes = NEW2(ngotos, Value_t *);
372    edge = NEW2(ngotos + 1, Value_t);
373    states = NEW2(maxrhs + 1, Value_t);
374
375    for (i = 0; i < ngotos; i++)
376    {
377	int nedges = 0;
378	int symbol1 = accessing_symbol[to_state[i]];
379	Value_t state1 = from_state[i];
380
381	for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
382	{
383	    length = 1;
384	    states[0] = state1;
385	    stateno = state1;
386
387	    for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
388	    {
389		symbol2 = *rp;
390		sp = shift_table[stateno];
391		k = sp->nshifts;
392
393		for (j = 0; j < k; j++)
394		{
395		    stateno = sp->shift[j];
396		    if (accessing_symbol[stateno] == symbol2)
397			break;
398		}
399
400		states[length++] = stateno;
401	    }
402
403	    add_lookback_edge(stateno, *rulep, i);
404
405	    length--;
406	    done_flag = 0;
407	    while (!done_flag)
408	    {
409		done_flag = 1;
410		rp--;
411		if (ISVAR(*rp))
412		{
413		    stateno = states[--length];
414		    edge[nedges++] = map_goto(stateno, *rp);
415		    if (nullable[*rp] && length > 0)
416			done_flag = 0;
417		}
418	    }
419	}
420
421	if (nedges)
422	{
423	    includes[i] = shortp = NEW2(nedges + 1, Value_t);
424	    for (j = 0; j < nedges; j++)
425		shortp[j] = edge[j];
426	    shortp[nedges] = -1;
427	}
428    }
429
430    new_includes = transpose(includes, ngotos);
431
432    for (i = 0; i < ngotos; i++)
433	if (includes[i])
434	    FREE(includes[i]);
435
436    FREE(includes);
437
438    includes = new_includes;
439
440    FREE(edge);
441    FREE(states);
442}
443
444static void
445add_lookback_edge(int stateno, int ruleno, int gotono)
446{
447    int i, k;
448    int found;
449    shorts *sp;
450
451    i = lookaheads[stateno];
452    k = lookaheads[stateno + 1];
453    found = 0;
454    while (!found && i < k)
455    {
456	if (LAruleno[i] == ruleno)
457	    found = 1;
458	else
459	    ++i;
460    }
461    assert(found);
462
463    sp = NEW(shorts);
464    sp->next = lookback[i];
465    sp->value = (Value_t)gotono;
466    lookback[i] = sp;
467}
468
469static Value_t **
470transpose(Value_t **R2, int n)
471{
472    Value_t **new_R;
473    Value_t **temp_R;
474    Value_t *nedges;
475    Value_t *sp;
476    int i;
477
478    nedges = NEW2(n, Value_t);
479
480    for (i = 0; i < n; i++)
481    {
482	sp = R2[i];
483	if (sp)
484	{
485	    while (*sp >= 0)
486		nedges[*sp++]++;
487	}
488    }
489
490    new_R = NEW2(n, Value_t *);
491    temp_R = NEW2(n, Value_t *);
492
493    for (i = 0; i < n; i++)
494    {
495	int k = nedges[i];
496
497	if (k > 0)
498	{
499	    sp = NEW2(k + 1, Value_t);
500	    new_R[i] = sp;
501	    temp_R[i] = sp;
502	    sp[k] = -1;
503	}
504    }
505
506    FREE(nedges);
507
508    for (i = 0; i < n; i++)
509    {
510	sp = R2[i];
511	if (sp)
512	{
513	    while (*sp >= 0)
514		*temp_R[*sp++]++ = (Value_t)i;
515	}
516    }
517
518    FREE(temp_R);
519
520    return (new_R);
521}
522
523static void
524compute_FOLLOWS(void)
525{
526    digraph(includes);
527}
528
529static void
530compute_lookaheads(void)
531{
532    int i, n;
533    unsigned *fp1, *fp2, *fp3;
534    shorts *sp, *next;
535    unsigned *rowp;
536
537    rowp = LA;
538    n = lookaheads[nstates];
539    for (i = 0; i < n; i++)
540    {
541	fp3 = rowp + tokensetsize;
542	for (sp = lookback[i]; sp; sp = sp->next)
543	{
544	    fp1 = rowp;
545	    fp2 = F + tokensetsize * sp->value;
546	    while (fp1 < fp3)
547		*fp1++ |= *fp2++;
548	}
549	rowp = fp3;
550    }
551
552    for (i = 0; i < n; i++)
553	for (sp = lookback[i]; sp; sp = next)
554	{
555	    next = sp->next;
556	    FREE(sp);
557	}
558
559    FREE(lookback);
560    FREE(F);
561}
562
563static void
564digraph(Value_t **relation)
565{
566    int i;
567
568    infinity = (Value_t)(ngotos + 2);
569    INDEX = NEW2(ngotos + 1, Value_t);
570    VERTICES = NEW2(ngotos + 1, Value_t);
571    top = 0;
572
573    R = relation;
574
575    for (i = 0; i < ngotos; i++)
576	INDEX[i] = 0;
577
578    for (i = 0; i < ngotos; i++)
579    {
580	if (INDEX[i] == 0 && R[i])
581	    traverse(i);
582    }
583
584    FREE(INDEX);
585    FREE(VERTICES);
586}
587
588static void
589traverse(int i)
590{
591    unsigned *fp1;
592    unsigned *fp2;
593    unsigned *fp3;
594    int j;
595    Value_t *rp;
596
597    Value_t height;
598    unsigned *base;
599
600    VERTICES[++top] = (Value_t)i;
601    INDEX[i] = height = top;
602
603    base = F + i * tokensetsize;
604    fp3 = base + tokensetsize;
605
606    rp = R[i];
607    if (rp)
608    {
609	while ((j = *rp++) >= 0)
610	{
611	    if (INDEX[j] == 0)
612		traverse(j);
613
614	    if (INDEX[i] > INDEX[j])
615		INDEX[i] = INDEX[j];
616
617	    fp1 = base;
618	    fp2 = F + j * tokensetsize;
619
620	    while (fp1 < fp3)
621		*fp1++ |= *fp2++;
622	}
623    }
624
625    if (INDEX[i] == height)
626    {
627	for (;;)
628	{
629	    j = VERTICES[top--];
630	    INDEX[j] = infinity;
631
632	    if (i == j)
633		break;
634
635	    fp1 = base;
636	    fp2 = F + j * tokensetsize;
637
638	    while (fp1 < fp3)
639		*fp2++ = *fp1++;
640	}
641    }
642}
643
644#ifdef NO_LEAKS
645void
646lalr_leaks(void)
647{
648    if (includes != 0)
649    {
650	int i;
651
652	for (i = 0; i < ngotos; i++)
653	{
654	    free(includes[i]);
655	}
656	DO_FREE(includes);
657    }
658}
659#endif
660