1228072Sbapt/* ecs - equivalence class routines */
2228072Sbapt
3228072Sbapt/*  Copyright (c) 1990 The Regents of the University of California. */
4228072Sbapt/*  All rights reserved. */
5228072Sbapt
6228072Sbapt/*  This code is derived from software contributed to Berkeley by */
7228072Sbapt/*  Vern Paxson. */
8228072Sbapt
9228072Sbapt/*  The United States Government has rights in this work pursuant */
10228072Sbapt/*  to contract no. DE-AC03-76SF00098 between the United States */
11228072Sbapt/*  Department of Energy and the University of California. */
12228072Sbapt
13228072Sbapt/* This file is part of flex */
14228072Sbapt
15228072Sbapt/*  Redistribution and use in source and binary forms, with or without */
16228072Sbapt/*  modification, are permitted provided that the following conditions */
17228072Sbapt/*  are met: */
18228072Sbapt
19228072Sbapt/*  1. Redistributions of source code must retain the above copyright */
20228072Sbapt/*     notice, this list of conditions and the following disclaimer. */
21228072Sbapt/*  2. Redistributions in binary form must reproduce the above copyright */
22228072Sbapt/*     notice, this list of conditions and the following disclaimer in the */
23228072Sbapt/*     documentation and/or other materials provided with the distribution. */
24228072Sbapt
25228072Sbapt/*  Neither the name of the University nor the names of its contributors */
26228072Sbapt/*  may be used to endorse or promote products derived from this software */
27228072Sbapt/*  without specific prior written permission. */
28228072Sbapt
29228072Sbapt/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30228072Sbapt/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31228072Sbapt/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32228072Sbapt/*  PURPOSE. */
33228072Sbapt
34228072Sbapt
35228072Sbapt#include "flexdef.h"
36228072Sbapt
37228072Sbapt/* ccl2ecl - convert character classes to set of equivalence classes */
38228072Sbapt
39228072Sbaptvoid    ccl2ecl ()
40228072Sbapt{
41228072Sbapt	int     i, ich, newlen, cclp, ccls, cclmec;
42228072Sbapt
43228072Sbapt	for (i = 1; i <= lastccl; ++i) {
44228072Sbapt		/* We loop through each character class, and for each character
45228072Sbapt		 * in the class, add the character's equivalence class to the
46228072Sbapt		 * new "character" class we are creating.  Thus when we are all
47228072Sbapt		 * done, character classes will really consist of collections
48228072Sbapt		 * of equivalence classes
49228072Sbapt		 */
50228072Sbapt
51228072Sbapt		newlen = 0;
52228072Sbapt		cclp = cclmap[i];
53228072Sbapt
54228072Sbapt		for (ccls = 0; ccls < ccllen[i]; ++ccls) {
55228072Sbapt			ich = ccltbl[cclp + ccls];
56228072Sbapt			cclmec = ecgroup[ich];
57228072Sbapt
58228072Sbapt			if (cclmec > 0) {
59228072Sbapt				ccltbl[cclp + newlen] = cclmec;
60228072Sbapt				++newlen;
61228072Sbapt			}
62228072Sbapt		}
63228072Sbapt
64228072Sbapt		ccllen[i] = newlen;
65228072Sbapt	}
66228072Sbapt}
67228072Sbapt
68228072Sbapt
69228072Sbapt/* cre8ecs - associate equivalence class numbers with class members
70228072Sbapt *
71228072Sbapt * fwd is the forward linked-list of equivalence class members.  bck
72228072Sbapt * is the backward linked-list, and num is the number of class members.
73228072Sbapt *
74228072Sbapt * Returned is the number of classes.
75228072Sbapt */
76228072Sbapt
77228072Sbaptint     cre8ecs (fwd, bck, num)
78228072Sbapt     int     fwd[], bck[], num;
79228072Sbapt{
80228072Sbapt	int     i, j, numcl;
81228072Sbapt
82228072Sbapt	numcl = 0;
83228072Sbapt
84228072Sbapt	/* Create equivalence class numbers.  From now on, ABS( bck(x) )
85228072Sbapt	 * is the equivalence class number for object x.  If bck(x)
86228072Sbapt	 * is positive, then x is the representative of its equivalence
87228072Sbapt	 * class.
88228072Sbapt	 */
89228072Sbapt	for (i = 1; i <= num; ++i)
90228072Sbapt		if (bck[i] == NIL) {
91228072Sbapt			bck[i] = ++numcl;
92228072Sbapt			for (j = fwd[i]; j != NIL; j = fwd[j])
93228072Sbapt				bck[j] = -numcl;
94228072Sbapt		}
95228072Sbapt
96228072Sbapt	return numcl;
97228072Sbapt}
98228072Sbapt
99228072Sbapt
100228072Sbapt/* mkeccl - update equivalence classes based on character class xtions
101228072Sbapt *
102228072Sbapt * synopsis
103228072Sbapt *    Char ccls[];
104228072Sbapt *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
105228072Sbapt *    void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
106228072Sbapt *			int llsiz, int NUL_mapping );
107228072Sbapt *
108228072Sbapt * ccls contains the elements of the character class, lenccl is the
109228072Sbapt * number of elements in the ccl, fwd is the forward link-list of equivalent
110228072Sbapt * characters, bck is the backward link-list, and llsiz size of the link-list.
111228072Sbapt *
112228072Sbapt * NUL_mapping is the value which NUL (0) should be mapped to.
113228072Sbapt */
114228072Sbapt
115228072Sbaptvoid    mkeccl (ccls, lenccl, fwd, bck, llsiz, NUL_mapping)
116228072Sbapt     Char    ccls[];
117228072Sbapt     int     lenccl, fwd[], bck[], llsiz, NUL_mapping;
118228072Sbapt{
119228072Sbapt	int     cclp, oldec, newec;
120228072Sbapt	int     cclm, i, j;
121228072Sbapt	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
122228072Sbapt
123228072Sbapt	/* Note that it doesn't matter whether or not the character class is
124228072Sbapt	 * negated.  The same results will be obtained in either case.
125228072Sbapt	 */
126228072Sbapt
127228072Sbapt	cclp = 0;
128228072Sbapt
129228072Sbapt	while (cclp < lenccl) {
130228072Sbapt		cclm = ccls[cclp];
131228072Sbapt
132228072Sbapt		if (NUL_mapping && cclm == 0)
133228072Sbapt			cclm = NUL_mapping;
134228072Sbapt
135228072Sbapt		oldec = bck[cclm];
136228072Sbapt		newec = cclm;
137228072Sbapt
138228072Sbapt		j = cclp + 1;
139228072Sbapt
140228072Sbapt		for (i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i]) {	/* look for the symbol in the character class */
141228072Sbapt			for (; j < lenccl; ++j) {
142250874Sjkim				int ccl_char;
143228072Sbapt
144228072Sbapt				if (NUL_mapping && ccls[j] == 0)
145228072Sbapt					ccl_char = NUL_mapping;
146228072Sbapt				else
147228072Sbapt					ccl_char = ccls[j];
148228072Sbapt
149228072Sbapt				if (ccl_char > i)
150228072Sbapt					break;
151228072Sbapt
152228072Sbapt				if (ccl_char == i && !cclflags[j]) {
153228072Sbapt					/* We found an old companion of cclm
154228072Sbapt					 * in the ccl.  Link it into the new
155228072Sbapt					 * equivalence class and flag it as
156228072Sbapt					 * having been processed.
157228072Sbapt					 */
158228072Sbapt
159228072Sbapt					bck[i] = newec;
160228072Sbapt					fwd[newec] = i;
161228072Sbapt					newec = i;
162228072Sbapt					/* Set flag so we don't reprocess. */
163228072Sbapt					cclflags[j] = 1;
164228072Sbapt
165228072Sbapt					/* Get next equivalence class member. */
166228072Sbapt					/* continue 2 */
167228072Sbapt					goto next_pt;
168228072Sbapt				}
169228072Sbapt			}
170228072Sbapt
171228072Sbapt			/* Symbol isn't in character class.  Put it in the old
172228072Sbapt			 * equivalence class.
173228072Sbapt			 */
174228072Sbapt
175228072Sbapt			bck[i] = oldec;
176228072Sbapt
177228072Sbapt			if (oldec != NIL)
178228072Sbapt				fwd[oldec] = i;
179228072Sbapt
180228072Sbapt			oldec = i;
181228072Sbapt
182228072Sbapt		      next_pt:;
183228072Sbapt		}
184228072Sbapt
185228072Sbapt		if (bck[cclm] != NIL || oldec != bck[cclm]) {
186228072Sbapt			bck[cclm] = NIL;
187228072Sbapt			fwd[oldec] = NIL;
188228072Sbapt		}
189228072Sbapt
190228072Sbapt		fwd[newec] = NIL;
191228072Sbapt
192228072Sbapt		/* Find next ccl member to process. */
193228072Sbapt
194228072Sbapt		for (++cclp; cclflags[cclp] && cclp < lenccl; ++cclp) {
195228072Sbapt			/* Reset "doesn't need processing" flag. */
196228072Sbapt			cclflags[cclp] = 0;
197228072Sbapt		}
198228072Sbapt	}
199228072Sbapt}
200228072Sbapt
201228072Sbapt
202228072Sbapt/* mkechar - create equivalence class for single character */
203228072Sbapt
204228072Sbaptvoid    mkechar (tch, fwd, bck)
205228072Sbapt     int     tch, fwd[], bck[];
206228072Sbapt{
207228072Sbapt	/* If until now the character has been a proper subset of
208228072Sbapt	 * an equivalence class, break it away to create a new ec
209228072Sbapt	 */
210228072Sbapt
211228072Sbapt	if (fwd[tch] != NIL)
212228072Sbapt		bck[fwd[tch]] = bck[tch];
213228072Sbapt
214228072Sbapt	if (bck[tch] != NIL)
215228072Sbapt		fwd[bck[tch]] = fwd[tch];
216228072Sbapt
217228072Sbapt	fwd[tch] = NIL;
218228072Sbapt	bck[tch] = NIL;
219228072Sbapt}
220