1254225Speter/*	$NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
2254225Speter
3254225Speter/*-
4254225Speter * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5254225Speter * Copyright (c) 1992, 1993, 1994
6254225Speter *	The Regents of the University of California.  All rights reserved.
7254225Speter *
8254225Speter * This code is derived from software contributed to Berkeley by
9254225Speter * Henry Spencer of the University of Toronto.
10254225Speter *
11254225Speter * Redistribution and use in source and binary forms, with or without
12254225Speter * modification, are permitted provided that the following conditions
13254225Speter * are met:
14254225Speter * 1. Redistributions of source code must retain the above copyright
15254225Speter *    notice, this list of conditions and the following disclaimer.
16254225Speter * 2. Redistributions in binary form must reproduce the above copyright
17254225Speter *    notice, this list of conditions and the following disclaimer in the
18254225Speter *    documentation and/or other materials provided with the distribution.
19254225Speter * 3. All advertising materials mentioning features or use of this software
20254225Speter *    must display the following acknowledgement:
21254225Speter *	This product includes software developed by the University of
22254225Speter *	California, Berkeley and its contributors.
23254225Speter * 4. Neither the name of the University nor the names of its contributors
24254225Speter *    may be used to endorse or promote products derived from this software
25254225Speter *    without specific prior written permission.
26254225Speter *
27254225Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28254225Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29254225Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30254225Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31254225Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32254225Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33254225Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34254225Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35254225Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36254225Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37254225Speter * SUCH DAMAGE.
38254225Speter *
39254225Speter *	@(#)regcomp.c	8.4 (Berkeley) 3/19/94
40254225Speter */
41254225Speter
42254225Speter#if defined(LIBC_SCCS) && !defined(lint)
43254225Speterstatic char sccsid[] = "@(#)regcomp.c	8.4 (Berkeley) 3/19/94";
44254225Speter#endif /* LIBC_SCCS and not lint */
45254225Speter
46254225Speter#include <sys/types.h>
47254225Speter#include <stdio.h>
48254225Speter#include <string.h>
49254225Speter#include <ctype.h>
50254225Speter#include <limits.h>
51254225Speter#include <stdlib.h>
52254225Speter#include <regex.h>
53254225Speter
54254225Speter#include "utils.h"
55254225Speter#include "regex2.h"
56254225Speter
57254225Speter#include "cclass.h"
58254225Speter#include "cname.h"
59254225Speter
60254225Speter/*
61254225Speter * parse structure, passed up and down to avoid global variables and
62254225Speter * other clumsinesses
63254225Speter */
64254225Speterstruct parse {
65254225Speter	RCHAR_T *next;		/* next character in RE */
66254225Speter	RCHAR_T *end;		/* end of string (-> NUL normally) */
67254225Speter	int error;		/* has an error been seen? */
68254225Speter	sop *strip;		/* malloced strip */
69254225Speter	RCHAR_T *stripdata;	/* malloced stripdata */
70254225Speter	sopno ssize;		/* malloced strip size (allocated) */
71254225Speter	sopno slen;		/* malloced strip length (used) */
72254225Speter	int ncsalloc;		/* number of csets allocated */
73254225Speter	struct re_guts *g;
74254225Speter#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
75254225Speter	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
76254225Speter	sopno pend[NPAREN];	/* -> ) ([0] unused) */
77254225Speter};
78254225Speter
79254225Speter/* ========= begin header generated by ./mkh ========= */
80254225Speter#ifdef __cplusplus
81254225Speterextern "C" {
82254225Speter#endif
83254225Speter
84254225Speter/* === regcomp.c === */
85254225Speterstatic void p_ere __P((struct parse *p, int stop, size_t reclimit));
86254225Speterstatic void p_ere_exp __P((struct parse *p, size_t reclimit));
87254225Speterstatic void p_str __P((struct parse *p));
88254225Speterstatic void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit));
89254225Speterstatic int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit));
90254225Speterstatic int p_count __P((struct parse *p));
91254225Speterstatic void p_bracket __P((struct parse *p));
92254225Speterstatic void p_b_term __P((struct parse *p, cset *cs));
93254225Speterstatic void p_b_cclass __P((struct parse *p, cset *cs));
94254225Speterstatic void p_b_eclass __P((struct parse *p, cset *cs));
95254225Speterstatic char p_b_symbol __P((struct parse *p));
96254225Speterstatic char p_b_coll_elem __P((struct parse *p, int endc));
97254225Speterstatic char othercase __P((int ch));
98254225Speterstatic void bothcases __P((struct parse *p, int ch));
99254225Speterstatic void ordinary __P((struct parse *p, int ch));
100254225Speterstatic void nonnewline __P((struct parse *p));
101254225Speterstatic void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit));
102254225Speterstatic int seterr __P((struct parse *p, int e));
103254225Speterstatic cset *allocset __P((struct parse *p));
104254225Speterstatic void freeset __P((struct parse *p, cset *cs));
105254225Speterstatic int freezeset __P((struct parse *p, cset *cs));
106254225Speterstatic int firstch __P((struct parse *p, cset *cs));
107254225Speterstatic int nch __P((struct parse *p, cset *cs));
108254225Speterstatic void mcadd __P((struct parse *p, cset *cs, const char *cp));
109254225Speter#ifdef notdef
110254225Speterstatic void mcsub __P((cset *cs, char *cp));
111254225Speterstatic int mcin __P((cset *cs, char *cp));
112254225Speterstatic char *mcfind __P((cset *cs, char *cp));
113254225Speter#endif
114254225Speterstatic void mcinvert __P((struct parse *p, cset *cs));
115254225Speterstatic void mccase __P((struct parse *p, cset *cs));
116254225Speter#ifdef notdef
117254225Speterstatic int isinsets __P((struct re_guts *g, int c));
118254225Speterstatic int samesets __P((struct re_guts *g, int c1, int c2));
119254225Speter#endif
120254225Speterstatic void categorize __P((struct parse *p, struct re_guts *g));
121254225Speterstatic sopno dupl __P((struct parse *p, sopno start, sopno finish));
122254225Speterstatic void doemit __P((struct parse *p, sop op, size_t opnd));
123254225Speterstatic void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
124254225Speterstatic void dofwd __P((struct parse *p, sopno pos, sop value));
125254225Speterstatic int enlarge __P((struct parse *p, sopno size));
126254225Speterstatic void stripsnug __P((struct parse *p, struct re_guts *g));
127254225Speterstatic void findmust __P((struct parse *p, struct re_guts *g));
128254225Speterstatic sopno pluscount __P((struct parse *p, struct re_guts *g));
129254225Speter
130254225Speter#ifdef __cplusplus
131254225Speter}
132254225Speter#endif
133254225Speter/* ========= end header generated by ./mkh ========= */
134254225Speter
135254225Speterstatic RCHAR_T nuls[10];		/* place to point scanner in event of error */
136254225Speter
137254225Speter/*
138254225Speter * macros for use with parse structure
139254225Speter * BEWARE:  these know that the parse structure is named `p' !!!
140254225Speter */
141254225Speter#define	PEEK()	(*p->next)
142254225Speter#define	PEEK2()	(*(p->next+1))
143254225Speter#define	MORE()	(p->next < p->end)
144254225Speter#define	MORE2()	(p->next+1 < p->end)
145254225Speter#define	SEE(c)	(MORE() && PEEK() == (c))
146254225Speter#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
147254225Speter#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
148254225Speter#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
149254225Speter#define	NEXT()	(p->next++)
150254225Speter#define	NEXT2()	(p->next += 2)
151254225Speter#define	NEXTn(n)	(p->next += (n))
152254225Speter#define	GETNEXT()	(*p->next++)
153254225Speter#define	SETERROR(e)	seterr(p, (e))
154254225Speter#define	REQUIRE(co, e)	((co) || SETERROR(e))
155254225Speter#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
156254225Speter#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
157254225Speter#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
158254225Speter#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
159254225Speter#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
160254225Speter#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
161254225Speter#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
162254225Speter#define	HERE()		(p->slen)
163254225Speter#define	THERE()		(p->slen - 1)
164254225Speter#define	THERETHERE()	(p->slen - 2)
165254225Speter#define	DROP(n)	(p->slen -= (n))
166254225Speter
167254225Speter#ifndef NDEBUG
168254225Speterstatic int never = 0;		/* for use in asserts; shuts lint up */
169254225Speter#else
170254225Speter#define	never	0		/* some <assert.h>s have bugs too */
171254225Speter#endif
172254225Speter
173254225Speter#define	MEMLIMIT	0x8000000
174254225Speter#define MEMSIZE(p) \
175254225Speter	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
176254225Speter	(p)->ncsalloc * sizeof(cset) + \
177254225Speter	(p)->ssize * sizeof(sop))
178254225Speter#define	RECLIMIT	256
179254225Speter
180254225Speter/*
181254225Speter - regcomp - interface for parser and compilation
182254225Speter = extern int regcomp(regex_t *, const RCHAR_T *, int);
183254225Speter = #define	REG_BASIC	0000
184254225Speter = #define	REG_EXTENDED	0001
185254225Speter = #define	REG_ICASE	0002
186254225Speter = #define	REG_NOSUB	0004
187254225Speter = #define	REG_NEWLINE	0010
188254225Speter = #define	REG_NOSPEC	0020
189254225Speter = #define	REG_PEND	0040
190254225Speter = #define	REG_DUMP	0200
191254225Speter */
192254225Speterint				/* 0 success, otherwise REG_something */
193254225Speterregcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
194254225Speter{
195254225Speter	struct parse pa;
196254225Speter	register struct re_guts *g;
197254225Speter	register struct parse *p = &pa;
198254225Speter	register int i;
199254225Speter	register size_t len;
200254225Speter#ifdef REDEBUG
201254225Speter#	define	GOODFLAGS(f)	(f)
202254225Speter#else
203254225Speter#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
204254225Speter#endif
205254225Speter
206254225Speter	cflags = GOODFLAGS(cflags);
207254225Speter	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
208254225Speter		return(REG_INVARG);
209254225Speter
210254225Speter	if (cflags&REG_PEND) {
211254225Speter		if (preg->re_endp < pattern)
212254225Speter			return(REG_INVARG);
213254225Speter		len = preg->re_endp - pattern;
214254225Speter	} else
215254225Speter		len = STRLEN(pattern);
216254225Speter
217254225Speter	/* do the mallocs early so failure handling is easy */
218254225Speter	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
219254225Speter							(NC-1)*sizeof(cat_t));
220254225Speter	if (g == NULL)
221254225Speter		return(REG_ESPACE);
222254225Speter	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
223254225Speter	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
224254225Speter	if (p->strip == NULL) {
225254225Speter		free((char *)g);
226254225Speter		return(REG_ESPACE);
227254225Speter	}
228254225Speter	p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T));
229254225Speter	if (p->stripdata == NULL) {
230254225Speter		free((char *)p->strip);
231254225Speter		free((char *)g);
232254225Speter		return(REG_ESPACE);
233254225Speter	}
234254225Speter	p->slen = 0;
235254225Speter
236254225Speter	/* set things up */
237254225Speter	p->g = g;
238254225Speter	p->next = (RCHAR_T *)pattern;	/* convenience; we do not modify it */
239254225Speter	p->end = p->next + len;
240254225Speter	p->error = 0;
241254225Speter	p->ncsalloc = 0;
242254225Speter	for (i = 0; i < NPAREN; i++) {
243254225Speter		p->pbegin[i] = 0;
244254225Speter		p->pend[i] = 0;
245254225Speter	}
246254225Speter	g->csetsize = NC;
247254225Speter	g->sets = NULL;
248254225Speter	g->setbits = NULL;
249254225Speter	g->ncsets = 0;
250254225Speter	g->cflags = cflags;
251254225Speter	g->iflags = 0;
252254225Speter	g->nbol = 0;
253254225Speter	g->neol = 0;
254254225Speter	g->must = NULL;
255254225Speter	g->mlen = 0;
256254225Speter	g->nsub = 0;
257254225Speter#if 0
258254225Speter	g->ncategories = 1;	/* category 0 is "everything else" */
259254225Speter	g->categories = &g->catspace[-(CHAR_MIN)];
260254225Speter	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
261254225Speter#endif
262254225Speter	g->backrefs = 0;
263254225Speter
264254225Speter	/* do it */
265254225Speter	EMIT(OEND, 0);
266254225Speter	g->firststate = THERE();
267254225Speter	if (cflags&REG_EXTENDED)
268254225Speter		p_ere(p, OUT, 0);
269254225Speter	else if (cflags&REG_NOSPEC)
270254225Speter		p_str(p);
271254225Speter	else
272254225Speter		p_bre(p, OUT, OUT, 0);
273254225Speter	EMIT(OEND, 0);
274254225Speter	g->laststate = THERE();
275254225Speter
276254225Speter	/* tidy up loose ends and fill things in */
277254225Speter	categorize(p, g);
278254225Speter	stripsnug(p, g);
279254225Speter	findmust(p, g);
280254225Speter	g->nplus = pluscount(p, g);
281254225Speter	g->magic = MAGIC2;
282254225Speter	preg->re_nsub = g->nsub;
283254225Speter	preg->re_g = g;
284254225Speter	preg->re_magic = MAGIC1;
285254225Speter#ifndef REDEBUG
286254225Speter	/* not debugging, so can't rely on the assert() in regexec() */
287254225Speter	if (g->iflags&BAD)
288254225Speter		SETERROR(REG_ASSERT);
289254225Speter#endif
290254225Speter
291254225Speter	/* win or lose, we're done */
292254225Speter	if (p->error != 0)	/* lose */
293254225Speter		regfree(preg);
294254225Speter	return(p->error);
295254225Speter}
296254225Speter
297254225Speter/*
298254225Speter - p_ere - ERE parser top level, concatenation and alternation
299254225Speter == static void p_ere(register struct parse *p, int stop, size_t reclimit);
300254225Speter */
301254225Speterstatic void
302254225Speterp_ere(register struct parse *p, int stop, size_t reclimit)
303254225Speter
304254225Speter         			/* character this ERE should end at */
305254225Speter{
306254225Speter	register char c;
307254225Speter	register sopno prevback = 0;
308254225Speter	register sopno prevfwd = 0;
309254225Speter	register sopno conc;
310254225Speter	register int first = 1;		/* is this the first alternative? */
311254225Speter
312254225Speter	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
313254225Speter		p->error = REG_ESPACE;
314254225Speter		return;
315254225Speter	}
316254225Speter
317254225Speter	for (;;) {
318254225Speter		/* do a bunch of concatenated expressions */
319254225Speter		conc = HERE();
320254225Speter		while (MORE() && (c = PEEK()) != '|' && c != stop)
321254225Speter			p_ere_exp(p, reclimit);
322254225Speter		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
323254225Speter
324254225Speter		if (!EAT('|'))
325254225Speter			break;		/* NOTE BREAK OUT */
326254225Speter
327254225Speter		if (first) {
328254225Speter			INSERT(OCH_, conc);	/* offset is wrong */
329254225Speter			prevfwd = conc;
330254225Speter			prevback = conc;
331254225Speter			first = 0;
332254225Speter		}
333254225Speter		ASTERN(OOR1, prevback);
334254225Speter		prevback = THERE();
335254225Speter		AHEAD(prevfwd);			/* fix previous offset */
336254225Speter		prevfwd = HERE();
337254225Speter		EMIT(OOR2, 0);			/* offset is very wrong */
338254225Speter	}
339254225Speter
340254225Speter	if (!first) {		/* tail-end fixups */
341254225Speter		AHEAD(prevfwd);
342254225Speter		ASTERN(O_CH, prevback);
343254225Speter	}
344254225Speter
345254225Speter	assert(!MORE() || SEE(stop));
346254225Speter}
347254225Speter
348254225Speter/*
349254225Speter - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
350254225Speter == static void p_ere_exp(register struct parse *p);
351254225Speter */
352254225Speterstatic void
353254225Speterp_ere_exp(register struct parse *p, size_t reclimit)
354254225Speter{
355254225Speter	register char c;
356254225Speter	register sopno pos;
357254225Speter	register int count;
358254225Speter	register int count2;
359254225Speter	register sopno subno;
360254225Speter	int wascaret = 0;
361254225Speter
362254225Speter	assert(MORE());		/* caller should have ensured this */
363254225Speter	c = GETNEXT();
364254225Speter
365254225Speter	pos = HERE();
366254225Speter	switch (c) {
367254225Speter	case '(':
368254225Speter		(void)REQUIRE(MORE(), REG_EPAREN);
369254225Speter		p->g->nsub++;
370254225Speter		subno = p->g->nsub;
371254225Speter		if (subno < NPAREN)
372254225Speter			p->pbegin[subno] = HERE();
373254225Speter		EMIT(OLPAREN, subno);
374254225Speter		if (!SEE(')'))
375254225Speter			p_ere(p, ')', reclimit);
376254225Speter		if (subno < NPAREN) {
377254225Speter			p->pend[subno] = HERE();
378254225Speter			assert(p->pend[subno] != 0);
379254225Speter		}
380254225Speter		EMIT(ORPAREN, subno);
381254225Speter		(void)MUSTEAT(')', REG_EPAREN);
382254225Speter		break;
383254225Speter#ifndef POSIX_MISTAKE
384254225Speter	case ')':		/* happens only if no current unmatched ( */
385254225Speter		/*
386254225Speter		 * You may ask, why the ifndef?  Because I didn't notice
387254225Speter		 * this until slightly too late for 1003.2, and none of the
388254225Speter		 * other 1003.2 regular-expression reviewers noticed it at
389254225Speter		 * all.  So an unmatched ) is legal POSIX, at least until
390254225Speter		 * we can get it fixed.
391254225Speter		 */
392254225Speter		SETERROR(REG_EPAREN);
393254225Speter		break;
394254225Speter#endif
395254225Speter	case '^':
396254225Speter		EMIT(OBOL, 0);
397254225Speter		p->g->iflags |= USEBOL;
398254225Speter		p->g->nbol++;
399254225Speter		wascaret = 1;
400254225Speter		break;
401254225Speter	case '$':
402254225Speter		EMIT(OEOL, 0);
403254225Speter		p->g->iflags |= USEEOL;
404254225Speter		p->g->neol++;
405254225Speter		break;
406254225Speter	case '|':
407254225Speter		SETERROR(REG_EMPTY);
408254225Speter		break;
409254225Speter	case '*':
410254225Speter	case '+':
411254225Speter	case '?':
412254225Speter		SETERROR(REG_BADRPT);
413254225Speter		break;
414254225Speter	case '.':
415254225Speter		if (p->g->cflags&REG_NEWLINE)
416254225Speter			nonnewline(p);
417254225Speter		else
418254225Speter			EMIT(OANY, 0);
419254225Speter		break;
420254225Speter	case '[':
421254225Speter		p_bracket(p);
422254225Speter		break;
423254225Speter	case '\\':
424254225Speter		(void)REQUIRE(MORE(), REG_EESCAPE);
425254225Speter		c = GETNEXT();
426254225Speter		ordinary(p, c);
427254225Speter		break;
428254225Speter	case '{':		/* okay as ordinary except if digit follows */
429254225Speter		(void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT);
430254225Speter		/* FALLTHROUGH */
431254225Speter	default:
432254225Speter		ordinary(p, c);
433254225Speter		break;
434254225Speter	}
435254225Speter
436254225Speter	if (!MORE())
437254225Speter		return;
438254225Speter	c = PEEK();
439254225Speter	/* we call { a repetition if followed by a digit */
440254225Speter	if (!( c == '*' || c == '+' || c == '?' ||
441254225Speter				(c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ))
442254225Speter		return;		/* no repetition, we're done */
443254225Speter	NEXT();
444254225Speter
445254225Speter	(void)REQUIRE(!wascaret, REG_BADRPT);
446254225Speter	switch (c) {
447254225Speter	case '*':	/* implemented as +? */
448254225Speter		/* this case does not require the (y|) trick, noKLUDGE */
449254225Speter		INSERT(OPLUS_, pos);
450254225Speter		ASTERN(O_PLUS, pos);
451254225Speter		INSERT(OQUEST_, pos);
452254225Speter		ASTERN(O_QUEST, pos);
453254225Speter		break;
454254225Speter	case '+':
455254225Speter		INSERT(OPLUS_, pos);
456254225Speter		ASTERN(O_PLUS, pos);
457254225Speter		break;
458254225Speter	case '?':
459254225Speter		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
460254225Speter		INSERT(OCH_, pos);		/* offset slightly wrong */
461254225Speter		ASTERN(OOR1, pos);		/* this one's right */
462254225Speter		AHEAD(pos);			/* fix the OCH_ */
463254225Speter		EMIT(OOR2, 0);			/* offset very wrong... */
464254225Speter		AHEAD(THERE());			/* ...so fix it */
465254225Speter		ASTERN(O_CH, THERETHERE());
466254225Speter		break;
467254225Speter	case '{':
468254225Speter		count = p_count(p);
469254225Speter		if (EAT(',')) {
470254225Speter			if (ISDIGIT((UCHAR_T)PEEK())) {
471254225Speter				count2 = p_count(p);
472254225Speter				(void)REQUIRE(count <= count2, REG_BADBR);
473254225Speter			} else		/* single number with comma */
474254225Speter				count2 = INFINITY;
475254225Speter		} else		/* just a single number */
476254225Speter			count2 = count;
477254225Speter		repeat(p, pos, count, count2, 0);
478254225Speter		if (!EAT('}')) {	/* error heuristics */
479254225Speter			while (MORE() && PEEK() != '}')
480254225Speter				NEXT();
481254225Speter			(void)REQUIRE(MORE(), REG_EBRACE);
482254225Speter			SETERROR(REG_BADBR);
483254225Speter		}
484254225Speter		break;
485254225Speter	}
486254225Speter
487254225Speter	if (!MORE())
488254225Speter		return;
489254225Speter	c = PEEK();
490254225Speter	if (!( c == '*' || c == '+' || c == '?' ||
491254225Speter				(c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) )
492254225Speter		return;
493254225Speter	SETERROR(REG_BADRPT);
494254225Speter}
495254225Speter
496254225Speter/*
497254225Speter - p_str - string (no metacharacters) "parser"
498254225Speter == static void p_str(register struct parse *p);
499254225Speter */
500254225Speterstatic void
501254225Speterp_str(register struct parse *p)
502254225Speter{
503254225Speter	(void)REQUIRE(MORE(), REG_EMPTY);
504254225Speter	while (MORE())
505254225Speter		ordinary(p, GETNEXT());
506254225Speter}
507254225Speter
508254225Speter/*
509254225Speter - p_bre - BRE parser top level, anchoring and concatenation
510254225Speter == static void p_bre(register struct parse *p, register int end1, \
511254225Speter ==	register int end2, size_t reclimit);
512254225Speter * Giving end1 as OUT essentially eliminates the end1/end2 check.
513254225Speter *
514254225Speter * This implementation is a bit of a kludge, in that a trailing $ is first
515254225Speter * taken as an ordinary character and then revised to be an anchor.  The
516254225Speter * only undesirable side effect is that '$' gets included as a character
517254225Speter * category in such cases.  This is fairly harmless; not worth fixing.
518254225Speter * The amount of lookahead needed to avoid this kludge is excessive.
519254225Speter */
520254225Speterstatic void
521254225Speterp_bre(register struct parse *p, register int end1, register int end2, size_t reclimit)
522254225Speter
523254225Speter                  		/* first terminating character */
524254225Speter                  		/* second terminating character */
525254225Speter{
526254225Speter	register sopno start;
527254225Speter	register int first = 1;			/* first subexpression? */
528254225Speter	register int wasdollar = 0;
529254225Speter
530254225Speter	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
531254225Speter		p->error = REG_ESPACE;
532254225Speter		return;
533254225Speter	}
534254225Speter
535254225Speter	start = HERE();
536254225Speter
537254225Speter	if (EAT('^')) {
538254225Speter		EMIT(OBOL, 0);
539254225Speter		p->g->iflags |= USEBOL;
540254225Speter		p->g->nbol++;
541254225Speter	}
542254225Speter	while (MORE() && !SEETWO(end1, end2)) {
543254225Speter		wasdollar = p_simp_re(p, first, reclimit);
544254225Speter		first = 0;
545254225Speter	}
546254225Speter	if (wasdollar) {	/* oops, that was a trailing anchor */
547254225Speter		DROP(1);
548254225Speter		EMIT(OEOL, 0);
549254225Speter		p->g->iflags |= USEEOL;
550254225Speter		p->g->neol++;
551254225Speter	}
552254225Speter
553254225Speter	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
554254225Speter}
555254225Speter
556254225Speter/*
557254225Speter - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
558254225Speter == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit);
559254225Speter */
560254225Speterstatic int			/* was the simple RE an unbackslashed $? */
561254225Speterp_simp_re(register struct parse *p, int starordinary, size_t reclimit)
562254225Speter
563254225Speter                 		/* is a leading * an ordinary character? */
564254225Speter{
565254225Speter	register int c;
566254225Speter	register int count;
567254225Speter	register int count2;
568254225Speter	register sopno pos;
569254225Speter	register int i;
570254225Speter	register sopno subno;
571254225Speter	int backsl;
572254225Speter
573254225Speter	pos = HERE();		/* repetion op, if any, covers from here */
574254225Speter
575254225Speter	assert(MORE());		/* caller should have ensured this */
576254225Speter	c = GETNEXT();
577254225Speter	backsl = c == '\\';
578254225Speter	if (backsl) {
579254225Speter		(void)REQUIRE(MORE(), REG_EESCAPE);
580254225Speter		c = (unsigned char)GETNEXT();
581254225Speter		switch (c) {
582254225Speter		case '{':
583254225Speter			SETERROR(REG_BADRPT);
584254225Speter			break;
585254225Speter		case '(':
586254225Speter			p->g->nsub++;
587254225Speter			subno = p->g->nsub;
588254225Speter			if (subno < NPAREN)
589254225Speter				p->pbegin[subno] = HERE();
590254225Speter			EMIT(OLPAREN, subno);
591254225Speter			/* the MORE here is an error heuristic */
592254225Speter			if (MORE() && !SEETWO('\\', ')'))
593254225Speter				p_bre(p, '\\', ')', reclimit);
594254225Speter			if (subno < NPAREN) {
595254225Speter				p->pend[subno] = HERE();
596254225Speter				assert(p->pend[subno] != 0);
597254225Speter			}
598254225Speter			EMIT(ORPAREN, subno);
599254225Speter			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
600254225Speter			break;
601254225Speter		case ')':	/* should not get here -- must be user */
602254225Speter		case '}':
603254225Speter			SETERROR(REG_EPAREN);
604254225Speter			break;
605254225Speter		case '1':
606254225Speter		case '2':
607254225Speter		case '3':
608254225Speter		case '4':
609254225Speter		case '5':
610254225Speter		case '6':
611254225Speter		case '7':
612254225Speter		case '8':
613254225Speter		case '9':
614254225Speter			i = c - '0';
615254225Speter			assert(i < NPAREN);
616254225Speter			if (p->pend[i] != 0) {
617254225Speter				assert(i <= p->g->nsub);
618254225Speter				EMIT(OBACK_, i);
619254225Speter				assert(p->pbegin[i] != 0);
620254225Speter				assert(p->strip[p->pbegin[i]] == OLPAREN);
621254225Speter				assert(p->strip[p->pend[i]] == ORPAREN);
622254225Speter				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
623254225Speter				EMIT(O_BACK, i);
624254225Speter			} else
625254225Speter				SETERROR(REG_ESUBREG);
626254225Speter			p->g->backrefs = 1;
627254225Speter			break;
628254225Speter		default:
629254225Speter			ordinary(p, c);
630254225Speter			break;
631254225Speter		}
632254225Speter	} else {
633254225Speter		switch (c) {
634254225Speter		case '.':
635254225Speter			if (p->g->cflags&REG_NEWLINE)
636254225Speter				nonnewline(p);
637254225Speter			else
638254225Speter				EMIT(OANY, 0);
639254225Speter			break;
640254225Speter		case '[':
641254225Speter			p_bracket(p);
642254225Speter			break;
643254225Speter		case '*':
644254225Speter			(void)REQUIRE(starordinary, REG_BADRPT);
645254225Speter			/* FALLTHROUGH */
646254225Speter		default:
647254225Speter			ordinary(p, c);
648254225Speter			break;
649254225Speter		}
650254225Speter	}
651254225Speter
652254225Speter	if (EAT('*')) {		/* implemented as +? */
653254225Speter		/* this case does not require the (y|) trick, noKLUDGE */
654254225Speter		INSERT(OPLUS_, pos);
655254225Speter		ASTERN(O_PLUS, pos);
656254225Speter		INSERT(OQUEST_, pos);
657254225Speter		ASTERN(O_QUEST, pos);
658254225Speter	} else if (EATTWO('\\', '{')) {
659254225Speter		count = p_count(p);
660254225Speter		if (EAT(',')) {
661254225Speter			if (MORE() && ISDIGIT((UCHAR_T)PEEK())) {
662254225Speter				count2 = p_count(p);
663254225Speter				(void)REQUIRE(count <= count2, REG_BADBR);
664254225Speter			} else		/* single number with comma */
665254225Speter				count2 = INFINITY;
666254225Speter		} else		/* just a single number */
667254225Speter			count2 = count;
668254225Speter		repeat(p, pos, count, count2, reclimit);
669254225Speter		if (!EATTWO('\\', '}')) {	/* error heuristics */
670254225Speter			while (MORE() && !SEETWO('\\', '}'))
671254225Speter				NEXT();
672254225Speter			(void)REQUIRE(MORE(), REG_EBRACE);
673254225Speter			SETERROR(REG_BADBR);
674254225Speter		}
675254225Speter	} else if (!backsl && c == (unsigned char)'$')	/* $ (but not \$) ends it */
676254225Speter		return(1);
677254225Speter
678254225Speter	return(0);
679254225Speter}
680254225Speter
681254225Speter/*
682254225Speter - p_count - parse a repetition count
683254225Speter == static int p_count(register struct parse *p);
684254225Speter */
685254225Speterstatic int			/* the value */
686254225Speterp_count(register struct parse *p)
687254225Speter{
688254225Speter	register int count = 0;
689254225Speter	register int ndigits = 0;
690254225Speter
691254225Speter	while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) {
692254225Speter		count = count*10 + (GETNEXT() - '0');
693254225Speter		ndigits++;
694254225Speter	}
695254225Speter
696254225Speter	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
697254225Speter	return(count);
698254225Speter}
699254225Speter
700254225Speter/*
701254225Speter - p_bracket - parse a bracketed character list
702254225Speter == static void p_bracket(register struct parse *p);
703254225Speter *
704254225Speter * Note a significant property of this code:  if the allocset() did SETERROR,
705254225Speter * no set operations are done.
706254225Speter */
707254225Speterstatic void
708254225Speterp_bracket(register struct parse *p)
709254225Speter{
710254225Speter	register cset *cs;
711254225Speter	register int invert = 0;
712254225Speter	static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
713254225Speter	static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
714254225Speter
715254225Speter	cs = allocset(p);
716254225Speter	if (cs == NULL)
717254225Speter		return;
718254225Speter
719254225Speter	/* Dept of Truly Sickening Special-Case Kludges */
720254225Speter	if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
721254225Speter		EMIT(OBOW, 0);
722254225Speter		NEXTn(6);
723254225Speter		return;
724254225Speter	}
725254225Speter	if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) {
726254225Speter		EMIT(OEOW, 0);
727254225Speter		NEXTn(6);
728254225Speter		return;
729254225Speter	}
730254225Speter
731254225Speter	if (EAT('^'))
732254225Speter		invert++;	/* make note to invert set at end */
733254225Speter	if (EAT(']'))
734254225Speter		CHadd(cs, ']');
735254225Speter	else if (EAT('-'))
736254225Speter		CHadd(cs, '-');
737254225Speter	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
738254225Speter		p_b_term(p, cs);
739254225Speter	if (EAT('-'))
740254225Speter		CHadd(cs, '-');
741254225Speter	(void)MUSTEAT(']', REG_EBRACK);
742254225Speter
743254225Speter	if (p->error != 0)	/* don't mess things up further */
744254225Speter		return;
745254225Speter
746254225Speter	if (p->g->cflags&REG_ICASE) {
747254225Speter		register int i;
748254225Speter		register int ci;
749254225Speter
750254225Speter		for (i = p->g->csetsize - 1; i >= 0; i--)
751254225Speter			if (CHIN(cs, i) && isalpha(i)) {
752254225Speter				ci = othercase(i);
753254225Speter				if (ci != i)
754254225Speter					CHadd(cs, ci);
755254225Speter			}
756254225Speter		if (cs->multis != NULL)
757254225Speter			mccase(p, cs);
758254225Speter	}
759254225Speter	if (invert) {
760254225Speter		register int i;
761254225Speter
762254225Speter		for (i = p->g->csetsize - 1; i >= 0; i--)
763254225Speter			if (CHIN(cs, i))
764254225Speter				CHsub(cs, i);
765254225Speter			else
766254225Speter				CHadd(cs, i);
767254225Speter		if (p->g->cflags&REG_NEWLINE)
768254225Speter			CHsub(cs, '\n');
769254225Speter		if (cs->multis != NULL)
770254225Speter			mcinvert(p, cs);
771254225Speter	}
772254225Speter
773254225Speter	assert(cs->multis == NULL);		/* xxx */
774254225Speter
775254225Speter	if (nch(p, cs) == 1) {		/* optimize singleton sets */
776254225Speter		ordinary(p, firstch(p, cs));
777254225Speter		freeset(p, cs);
778254225Speter	} else
779254225Speter		EMIT(OANYOF, freezeset(p, cs));
780254225Speter}
781254225Speter
782254225Speter/*
783254225Speter - p_b_term - parse one term of a bracketed character list
784254225Speter == static void p_b_term(register struct parse *p, register cset *cs);
785254225Speter */
786254225Speterstatic void
787254225Speterp_b_term(register struct parse *p, register cset *cs)
788254225Speter{
789254225Speter	register char c;
790254225Speter	register char start, finish;
791254225Speter	register int i;
792254225Speter
793254225Speter	/* classify what we've got */
794254225Speter	switch ((MORE()) ? PEEK() : '\0') {
795254225Speter	case '[':
796254225Speter		c = (MORE2()) ? PEEK2() : '\0';
797254225Speter		break;
798254225Speter	case '-':
799254225Speter		SETERROR(REG_ERANGE);
800254225Speter		return;			/* NOTE RETURN */
801254225Speter		break;
802254225Speter	default:
803254225Speter		c = '\0';
804254225Speter		break;
805254225Speter	}
806254225Speter
807254225Speter	switch (c) {
808254225Speter	case ':':		/* character class */
809254225Speter		NEXT2();
810254225Speter		(void)REQUIRE(MORE(), REG_EBRACK);
811254225Speter		c = PEEK();
812254225Speter		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
813254225Speter		p_b_cclass(p, cs);
814254225Speter		(void)REQUIRE(MORE(), REG_EBRACK);
815254225Speter		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
816254225Speter		break;
817254225Speter	case '=':		/* equivalence class */
818254225Speter		NEXT2();
819254225Speter		(void)REQUIRE(MORE(), REG_EBRACK);
820254225Speter		c = PEEK();
821254225Speter		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
822254225Speter		p_b_eclass(p, cs);
823254225Speter		(void)REQUIRE(MORE(), REG_EBRACK);
824254225Speter		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
825254225Speter		break;
826254225Speter	default:		/* symbol, ordinary character, or range */
827254225Speter/* xxx revision needed for multichar stuff */
828254225Speter		start = p_b_symbol(p);
829254225Speter		if (SEE('-') && MORE2() && PEEK2() != ']') {
830254225Speter			/* range */
831254225Speter			NEXT();
832254225Speter			if (EAT('-'))
833254225Speter				finish = '-';
834254225Speter			else
835254225Speter				finish = p_b_symbol(p);
836254225Speter		} else
837254225Speter			finish = start;
838254225Speter/* xxx what about signed chars here... */
839254225Speter		(void)REQUIRE(start <= finish, REG_ERANGE);
840254225Speter		for (i = start; i <= finish; i++)
841254225Speter			CHadd(cs, i);
842254225Speter		break;
843254225Speter	}
844254225Speter}
845254225Speter
846254225Speter/*
847254225Speter - p_b_cclass - parse a character-class name and deal with it
848254225Speter == static void p_b_cclass(register struct parse *p, register cset *cs);
849254225Speter */
850254225Speterstatic void
851254225Speterp_b_cclass(register struct parse *p, register cset *cs)
852254225Speter{
853254225Speter	register RCHAR_T *sp = p->next;
854254225Speter	register struct cclass *cp;
855254225Speter	register size_t len;
856254225Speter	register const char *u;
857254225Speter	register char c;
858254225Speter
859254225Speter	while (MORE() && isalpha(PEEK()))
860254225Speter		NEXT();
861254225Speter	len = p->next - sp;
862254225Speter	for (cp = cclasses; cp->name != NULL; cp++)
863254225Speter		if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len))
864254225Speter			break;
865254225Speter	if (cp->name == NULL) {
866254225Speter		/* oops, didn't find it */
867254225Speter		SETERROR(REG_ECTYPE);
868254225Speter		return;
869254225Speter	}
870254225Speter
871254225Speter	u = cp->chars;
872254225Speter	while ((c = *u++) != '\0')
873254225Speter		CHadd(cs, c);
874254225Speter	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
875254225Speter		MCadd(p, cs, u);
876254225Speter}
877254225Speter
878254225Speter/*
879254225Speter - p_b_eclass - parse an equivalence-class name and deal with it
880254225Speter == static void p_b_eclass(register struct parse *p, register cset *cs);
881254225Speter *
882254225Speter * This implementation is incomplete. xxx
883254225Speter */
884254225Speterstatic void
885254225Speterp_b_eclass(register struct parse *p, register cset *cs)
886254225Speter{
887254225Speter	register char c;
888254225Speter
889254225Speter	c = p_b_coll_elem(p, '=');
890254225Speter	CHadd(cs, c);
891254225Speter}
892254225Speter
893254225Speter/*
894254225Speter - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
895254225Speter == static char p_b_symbol(register struct parse *p);
896254225Speter */
897254225Speterstatic char			/* value of symbol */
898254225Speterp_b_symbol(register struct parse *p)
899254225Speter{
900254225Speter	register char value;
901254225Speter
902254225Speter	(void)REQUIRE(MORE(), REG_EBRACK);
903254225Speter	if (!EATTWO('[', '.'))
904254225Speter		return(GETNEXT());
905254225Speter
906254225Speter	/* collating symbol */
907254225Speter	value = p_b_coll_elem(p, '.');
908254225Speter	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
909254225Speter	return(value);
910254225Speter}
911254225Speter
912254225Speter/*
913254225Speter - p_b_coll_elem - parse a collating-element name and look it up
914254225Speter == static char p_b_coll_elem(register struct parse *p, int endc);
915254225Speter */
916254225Speterstatic char			/* value of collating element */
917254225Speterp_b_coll_elem(register struct parse *p, int endc)
918254225Speter
919254225Speter         			/* name ended by endc,']' */
920254225Speter{
921254225Speter	register RCHAR_T *sp = p->next;
922254225Speter	register struct cname *cp;
923254225Speter	register size_t len;
924254225Speter
925254225Speter	while (MORE() && !SEETWO(endc, ']'))
926254225Speter		NEXT();
927254225Speter	if (!MORE()) {
928254225Speter		SETERROR(REG_EBRACK);
929254225Speter		return(0);
930254225Speter	}
931254225Speter	len = p->next - sp;
932254225Speter	for (cp = cnames; cp->name != NULL; cp++)
933254225Speter		if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len))
934254225Speter			return(cp->code);	/* known name */
935254225Speter	if (len == 1)
936254225Speter		return(*sp);	/* single character */
937254225Speter	SETERROR(REG_ECOLLATE);			/* neither */
938254225Speter	return(0);
939254225Speter}
940254225Speter
941254225Speter/*
942254225Speter - othercase - return the case counterpart of an alphabetic
943254225Speter == static char othercase(int ch);
944254225Speter */
945254225Speterstatic char			/* if no counterpart, return ch */
946254225Speterothercase(int ch)
947254225Speter{
948254225Speter	assert(isalpha(ch));
949254225Speter	if (isupper(ch))
950254225Speter		return(tolower(ch));
951254225Speter	else if (islower(ch))
952254225Speter		return(toupper(ch));
953254225Speter	else			/* peculiar, but could happen */
954254225Speter		return(ch);
955254225Speter}
956254225Speter
957254225Speter/*
958254225Speter - bothcases - emit a dualcase version of a two-case character
959254225Speter == static void bothcases(register struct parse *p, int ch);
960254225Speter *
961254225Speter * Boy, is this implementation ever a kludge...
962254225Speter */
963254225Speterstatic void
964254225Speterbothcases(register struct parse *p, int ch)
965254225Speter{
966254225Speter	register RCHAR_T *oldnext = p->next;
967254225Speter	register RCHAR_T *oldend = p->end;
968254225Speter	RCHAR_T bracket[3];
969254225Speter
970254225Speter	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
971254225Speter	p->next = bracket;
972254225Speter	p->end = bracket+2;
973254225Speter	bracket[0] = ch;
974254225Speter	bracket[1] = ']';
975254225Speter	bracket[2] = '\0';
976254225Speter	p_bracket(p);
977254225Speter	assert(p->next == bracket+2);
978254225Speter	p->next = oldnext;
979254225Speter	p->end = oldend;
980254225Speter}
981254225Speter
982254225Speter/*
983254225Speter - ordinary - emit an ordinary character
984254225Speter == static void ordinary(register struct parse *p, register int ch);
985254225Speter */
986254225Speterstatic void
987254225Speterordinary(register struct parse *p, register int ch)
988254225Speter{
989254225Speter/*
990254225Speter	register cat_t *cap = p->g->categories;
991254225Speter*/
992254225Speter
993254225Speter	if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
994254225Speter		bothcases(p, ch);
995254225Speter	else {
996254225Speter		EMIT(OCHAR, (UCHAR_T)ch);
997254225Speter/*
998254225Speter		if (cap[ch] == 0)
999254225Speter			cap[ch] = p->g->ncategories++;
1000254225Speter*/
1001254225Speter	}
1002254225Speter}
1003254225Speter
1004254225Speter/*
1005254225Speter - nonnewline - emit REG_NEWLINE version of OANY
1006254225Speter == static void nonnewline(register struct parse *p);
1007254225Speter *
1008254225Speter * Boy, is this implementation ever a kludge...
1009254225Speter */
1010254225Speterstatic void
1011254225Speternonnewline(register struct parse *p)
1012254225Speter{
1013254225Speter	register RCHAR_T *oldnext = p->next;
1014254225Speter	register RCHAR_T *oldend = p->end;
1015254225Speter	RCHAR_T bracket[4];
1016254225Speter
1017254225Speter	p->next = bracket;
1018254225Speter	p->end = bracket+3;
1019254225Speter	bracket[0] = '^';
1020254225Speter	bracket[1] = '\n';
1021254225Speter	bracket[2] = ']';
1022254225Speter	bracket[3] = '\0';
1023254225Speter	p_bracket(p);
1024254225Speter	assert(p->next == bracket+3);
1025254225Speter	p->next = oldnext;
1026254225Speter	p->end = oldend;
1027254225Speter}
1028254225Speter
1029254225Speter/*
1030254225Speter - repeat - generate code for a bounded repetition, recursively if needed
1031254225Speter == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit);
1032254225Speter */
1033254225Speterstatic void
1034254225Speterrepeat(register struct parse *p, sopno start, int from, int to, size_t reclimit)
1035254225Speter
1036254225Speter            			/* operand from here to end of strip */
1037254225Speter         			/* repeated from this number */
1038254225Speter       				/* to this number of times (maybe INFINITY) */
1039254225Speter{
1040254225Speter	register sopno finish;
1041254225Speter#	define	N	2
1042254225Speter#	define	INF	3
1043254225Speter#	define	REP(f, t)	((f)*8 + (t))
1044254225Speter#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1045254225Speter	register sopno copy;
1046254225Speter
1047254225Speter	if (reclimit++ > RECLIMIT)
1048254225Speter		p->error = REG_ESPACE;
1049254225Speter	if (p->error)
1050254225Speter		return;
1051254225Speter
1052254225Speter	finish = HERE();
1053254225Speter
1054254225Speter	assert(from <= to);
1055254225Speter
1056254225Speter	switch (REP(MAP(from), MAP(to))) {
1057254225Speter	case REP(0, 0):			/* must be user doing this */
1058254225Speter		DROP(finish-start);	/* drop the operand */
1059254225Speter		break;
1060254225Speter	case REP(0, 1):			/* as x{1,1}? */
1061254225Speter	case REP(0, N):			/* as x{1,n}? */
1062254225Speter	case REP(0, INF):		/* as x{1,}? */
1063254225Speter		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1064254225Speter		INSERT(OCH_, start);		/* offset is wrong... */
1065254225Speter		repeat(p, start+1, 1, to, reclimit);
1066254225Speter		ASTERN(OOR1, start);
1067254225Speter		AHEAD(start);			/* ... fix it */
1068254225Speter		EMIT(OOR2, 0);
1069254225Speter		AHEAD(THERE());
1070254225Speter		ASTERN(O_CH, THERETHERE());
1071254225Speter		break;
1072254225Speter	case REP(1, 1):			/* trivial case */
1073254225Speter		/* done */
1074254225Speter		break;
1075254225Speter	case REP(1, N):			/* as x?x{1,n-1} */
1076254225Speter		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1077254225Speter		INSERT(OCH_, start);
1078254225Speter		ASTERN(OOR1, start);
1079254225Speter		AHEAD(start);
1080254225Speter		EMIT(OOR2, 0);			/* offset very wrong... */
1081254225Speter		AHEAD(THERE());			/* ...so fix it */
1082254225Speter		ASTERN(O_CH, THERETHERE());
1083254225Speter		copy = dupl(p, start+1, finish+1);
1084254225Speter		assert(copy == finish+4);
1085254225Speter		repeat(p, copy, 1, to-1, reclimit);
1086254225Speter		break;
1087254225Speter	case REP(1, INF):		/* as x+ */
1088254225Speter		INSERT(OPLUS_, start);
1089254225Speter		ASTERN(O_PLUS, start);
1090254225Speter		break;
1091254225Speter	case REP(N, N):			/* as xx{m-1,n-1} */
1092254225Speter		copy = dupl(p, start, finish);
1093254225Speter		repeat(p, copy, from-1, to-1, reclimit);
1094254225Speter		break;
1095254225Speter	case REP(N, INF):		/* as xx{n-1,INF} */
1096254225Speter		copy = dupl(p, start, finish);
1097254225Speter		repeat(p, copy, from-1, to, reclimit);
1098254225Speter		break;
1099254225Speter	default:			/* "can't happen" */
1100254225Speter		SETERROR(REG_ASSERT);	/* just in case */
1101254225Speter		break;
1102254225Speter	}
1103254225Speter}
1104254225Speter
1105254225Speter/*
1106254225Speter - seterr - set an error condition
1107254225Speter == static int seterr(register struct parse *p, int e);
1108254225Speter */
1109254225Speterstatic int			/* useless but makes type checking happy */
1110254225Speterseterr(register struct parse *p, int e)
1111254225Speter{
1112254225Speter	if (p->error == 0)	/* keep earliest error condition */
1113254225Speter		p->error = e;
1114254225Speter	p->next = nuls;		/* try to bring things to a halt */
1115254225Speter	p->end = nuls;
1116254225Speter	return(0);		/* make the return value well-defined */
1117254225Speter}
1118254225Speter
1119254225Speter/*
1120254225Speter - allocset - allocate a set of characters for []
1121254225Speter == static cset *allocset(register struct parse *p);
1122254225Speter */
1123254225Speterstatic cset *
1124254225Speterallocset(register struct parse *p)
1125254225Speter{
1126254225Speter	register int no = p->g->ncsets++;
1127254225Speter	register size_t nc;
1128254225Speter	register size_t nbytes;
1129254225Speter	register cset *cs;
1130254225Speter	register size_t css = (size_t)p->g->csetsize;
1131254225Speter	register int i;
1132254225Speter
1133254225Speter	if (no >= p->ncsalloc) {	/* need another column of space */
1134254225Speter		p->ncsalloc += CHAR_BIT;
1135254225Speter		nc = p->ncsalloc;
1136254225Speter		assert(nc % CHAR_BIT == 0);
1137254225Speter		nbytes = nc / CHAR_BIT * css;
1138254225Speter		if (MEMSIZE(p) > MEMLIMIT)
1139254225Speter			goto oomem;
1140254225Speter		if (p->g->sets == NULL)
1141254225Speter			p->g->sets = (cset *)malloc(nc * sizeof(cset));
1142254225Speter		else
1143254225Speter			p->g->sets = (cset *)realloc((char *)p->g->sets,
1144254225Speter							nc * sizeof(cset));
1145254225Speter		if (p->g->setbits == NULL)
1146254225Speter			p->g->setbits = (uch *)malloc(nbytes);
1147254225Speter		else {
1148254225Speter			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1149254225Speter								nbytes);
1150254225Speter			/* xxx this isn't right if setbits is now NULL */
1151254225Speter			for (i = 0; i < no; i++)
1152254225Speter				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1153254225Speter		}
1154254225Speter		if (p->g->sets != NULL && p->g->setbits != NULL)
1155254225Speter			(void) memset((char *)p->g->setbits + (nbytes - css),
1156254225Speter								0, css);
1157254225Speter		else {
1158254225Speteroomem:
1159254225Speter			no = 0;
1160254225Speter			SETERROR(REG_ESPACE);
1161254225Speter			/* caller's responsibility not to do set ops */
1162254225Speter			return NULL;
1163254225Speter		}
1164254225Speter	}
1165254225Speter
1166254225Speter	cs = &p->g->sets[no];
1167254225Speter	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1168254225Speter	cs->mask = 1 << ((no) % CHAR_BIT);
1169254225Speter	cs->hash = 0;
1170254225Speter	cs->smultis = 0;
1171254225Speter	cs->multis = NULL;
1172254225Speter
1173254225Speter	return(cs);
1174254225Speter}
1175254225Speter
1176254225Speter/*
1177254225Speter - freeset - free a now-unused set
1178254225Speter == static void freeset(register struct parse *p, register cset *cs);
1179254225Speter */
1180254225Speterstatic void
1181254225Speterfreeset(register struct parse *p, register cset *cs)
1182254225Speter{
1183254225Speter	register size_t i;
1184254225Speter	register cset *top = &p->g->sets[p->g->ncsets];
1185254225Speter	register size_t css = (size_t)p->g->csetsize;
1186254225Speter
1187254225Speter	for (i = 0; i < css; i++)
1188254225Speter		CHsub(cs, i);
1189254225Speter	if (cs == top-1)	/* recover only the easy case */
1190254225Speter		p->g->ncsets--;
1191254225Speter}
1192254225Speter
1193254225Speter/*
1194254225Speter - freezeset - final processing on a set of characters
1195254225Speter == static int freezeset(register struct parse *p, register cset *cs);
1196254225Speter *
1197254225Speter * The main task here is merging identical sets.  This is usually a waste
1198254225Speter * of time (although the hash code minimizes the overhead), but can win
1199254225Speter * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1200254225Speter * is done using addition rather than xor -- all ASCII [aA] sets xor to
1201254225Speter * the same value!
1202254225Speter */
1203254225Speterstatic int			/* set number */
1204254225Speterfreezeset(register struct parse *p, register cset *cs)
1205254225Speter{
1206254225Speter	register uch h = cs->hash;
1207254225Speter	register size_t i;
1208254225Speter	register cset *top = &p->g->sets[p->g->ncsets];
1209254225Speter	register cset *cs2;
1210254225Speter	register size_t css = (size_t)p->g->csetsize;
1211254225Speter
1212254225Speter	/* look for an earlier one which is the same */
1213254225Speter	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1214254225Speter		if (cs2->hash == h && cs2 != cs) {
1215254225Speter			/* maybe */
1216254225Speter			for (i = 0; i < css; i++)
1217254225Speter				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1218254225Speter					break;		/* no */
1219254225Speter			if (i == css)
1220254225Speter				break;			/* yes */
1221254225Speter		}
1222254225Speter
1223254225Speter	if (cs2 < top) {	/* found one */
1224254225Speter		freeset(p, cs);
1225254225Speter		cs = cs2;
1226254225Speter	}
1227254225Speter
1228254225Speter	return((int)(cs - p->g->sets));
1229254225Speter}
1230254225Speter
1231254225Speter/*
1232254225Speter - firstch - return first character in a set (which must have at least one)
1233254225Speter == static int firstch(register struct parse *p, register cset *cs);
1234254225Speter */
1235254225Speterstatic int			/* character; there is no "none" value */
1236254225Speterfirstch(register struct parse *p, register cset *cs)
1237254225Speter{
1238254225Speter	register size_t i;
1239254225Speter	register size_t css = (size_t)p->g->csetsize;
1240254225Speter
1241254225Speter	for (i = 0; i < css; i++)
1242254225Speter		if (CHIN(cs, i))
1243254225Speter			return((char)i);
1244254225Speter	assert(never);
1245254225Speter	return(0);		/* arbitrary */
1246254225Speter}
1247254225Speter
1248254225Speter/*
1249254225Speter - nch - number of characters in a set
1250254225Speter == static int nch(register struct parse *p, register cset *cs);
1251254225Speter */
1252254225Speterstatic int
1253254225Speternch(register struct parse *p, register cset *cs)
1254254225Speter{
1255254225Speter	register size_t i;
1256254225Speter	register size_t css = (size_t)p->g->csetsize;
1257254225Speter	register int n = 0;
1258254225Speter
1259254225Speter	for (i = 0; i < css; i++)
1260254225Speter		if (CHIN(cs, i))
1261254225Speter			n++;
1262254225Speter	return(n);
1263254225Speter}
1264254225Speter
1265254225Speter/*
1266254225Speter - mcadd - add a collating element to a cset
1267254225Speter == static void mcadd(register struct parse *p, register cset *cs, \
1268254225Speter ==	register char *cp);
1269254225Speter */
1270254225Speterstatic void
1271254225Spetermcadd(register struct parse *p, register cset *cs, register const char *cp)
1272254225Speter{
1273254225Speter	register size_t oldend = cs->smultis;
1274254225Speter	void *np;
1275254225Speter
1276254225Speter	cs->smultis += strlen(cp) + 1;
1277254225Speter	np = realloc(cs->multis, cs->smultis);
1278254225Speter	if (np == NULL) {
1279254225Speter		if (cs->multis)
1280254225Speter			free(cs->multis);
1281254225Speter		cs->multis = NULL;
1282254225Speter		SETERROR(REG_ESPACE);
1283254225Speter		return;
1284254225Speter	}
1285254225Speter	cs->multis = np;
1286254225Speter
1287254225Speter	(void) strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1288254225Speter}
1289254225Speter
1290254225Speter#ifdef notdef
1291254225Speter/*
1292254225Speter - mcsub - subtract a collating element from a cset
1293254225Speter == static void mcsub(register cset *cs, register char *cp);
1294254225Speter */
1295254225Speterstatic void
1296254225Spetermcsub(register cset *cs, register char *cp)
1297254225Speter{
1298254225Speter	register char *fp = mcfind(cs, cp);
1299254225Speter	register size_t len = strlen(fp);
1300254225Speter
1301254225Speter	assert(fp != NULL);
1302254225Speter	(void) memmove(fp, fp + len + 1,
1303254225Speter				cs->smultis - (fp + len + 1 - cs->multis));
1304254225Speter	cs->smultis -= len;
1305254225Speter
1306254225Speter	if (cs->smultis == 0) {
1307254225Speter		free(cs->multis);
1308254225Speter		cs->multis = NULL;
1309254225Speter		return;
1310254225Speter	}
1311254225Speter
1312254225Speter	cs->multis = realloc(cs->multis, cs->smultis);
1313254225Speter	assert(cs->multis != NULL);
1314254225Speter}
1315254225Speter
1316254225Speter/*
1317254225Speter - mcin - is a collating element in a cset?
1318254225Speter == static int mcin(register cset *cs, register char *cp);
1319254225Speter */
1320254225Speterstatic int
1321254225Spetermcin(register cset *cs, register char *cp)
1322254225Speter{
1323254225Speter	return(mcfind(cs, cp) != NULL);
1324254225Speter}
1325254225Speter
1326254225Speter/*
1327254225Speter - mcfind - find a collating element in a cset
1328254225Speter == static char *mcfind(register cset *cs, register char *cp);
1329254225Speter */
1330254225Speterstatic char *
1331254225Spetermcfind(register cset *cs, register char *cp)
1332254225Speter{
1333254225Speter	register char *p;
1334254225Speter
1335254225Speter	if (cs->multis == NULL)
1336254225Speter		return(NULL);
1337254225Speter	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1338254225Speter		if (strcmp(cp, p) == 0)
1339254225Speter			return(p);
1340254225Speter	return(NULL);
1341254225Speter}
1342254225Speter#endif
1343254225Speter
1344254225Speter/*
1345254225Speter - mcinvert - invert the list of collating elements in a cset
1346254225Speter == static void mcinvert(register struct parse *p, register cset *cs);
1347254225Speter *
1348254225Speter * This would have to know the set of possibilities.  Implementation
1349254225Speter * is deferred.
1350254225Speter */
1351254225Speterstatic void
1352254225Spetermcinvert(register struct parse *p, register cset *cs)
1353254225Speter{
1354254225Speter	assert(cs->multis == NULL);	/* xxx */
1355254225Speter}
1356254225Speter
1357254225Speter/*
1358254225Speter - mccase - add case counterparts of the list of collating elements in a cset
1359254225Speter == static void mccase(register struct parse *p, register cset *cs);
1360254225Speter *
1361254225Speter * This would have to know the set of possibilities.  Implementation
1362254225Speter * is deferred.
1363254225Speter */
1364254225Speterstatic void
1365254225Spetermccase(register struct parse *p, register cset *cs)
1366254225Speter{
1367254225Speter	assert(cs->multis == NULL);	/* xxx */
1368254225Speter}
1369254225Speter
1370254225Speter#ifdef notdef
1371254225Speter/*
1372254225Speter - isinsets - is this character in any sets?
1373254225Speter == static int isinsets(register struct re_guts *g, int c);
1374254225Speter */
1375254225Speterstatic int			/* predicate */
1376254225Speterisinsets(register struct re_guts *g, int c)
1377254225Speter{
1378254225Speter	register uch *col;
1379254225Speter	register int i;
1380254225Speter	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1381254225Speter	register unsigned uc = (unsigned char)c;
1382254225Speter
1383254225Speter	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1384254225Speter		if (col[uc] != 0)
1385254225Speter			return(1);
1386254225Speter	return(0);
1387254225Speter}
1388254225Speter
1389254225Speter/*
1390254225Speter - samesets - are these two characters in exactly the same sets?
1391254225Speter == static int samesets(register struct re_guts *g, int c1, int c2);
1392254225Speter */
1393254225Speterstatic int			/* predicate */
1394254225Spetersamesets(register struct re_guts *g, int c1, int c2)
1395254225Speter{
1396254225Speter	register uch *col;
1397254225Speter	register int i;
1398254225Speter	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1399254225Speter	register unsigned uc1 = (unsigned char)c1;
1400254225Speter	register unsigned uc2 = (unsigned char)c2;
1401254225Speter
1402254225Speter	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1403254225Speter		if (col[uc1] != col[uc2])
1404254225Speter			return(0);
1405254225Speter	return(1);
1406254225Speter}
1407254225Speter#endif
1408254225Speter
1409254225Speter/*
1410254225Speter - categorize - sort out character categories
1411254225Speter == static void categorize(struct parse *p, register struct re_guts *g);
1412254225Speter */
1413254225Speterstatic void
1414254225Spetercategorize(struct parse *p, register struct re_guts *g)
1415254225Speter{
1416254225Speter#ifdef notdef
1417254225Speter	register cat_t *cats = g->categories;
1418254225Speter	register int c;
1419254225Speter	register int c2;
1420254225Speter	register cat_t cat;
1421254225Speter
1422254225Speter	/* avoid making error situations worse */
1423254225Speter	if (p->error != 0)
1424254225Speter		return;
1425254225Speter
1426254225Speter	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1427254225Speter		if (cats[c] == 0 && isinsets(g, c)) {
1428254225Speter			cat = g->ncategories++;
1429254225Speter			cats[c] = cat;
1430254225Speter			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1431254225Speter				if (cats[c2] == 0 && samesets(g, c, c2))
1432254225Speter					cats[c2] = cat;
1433254225Speter		}
1434254225Speter#endif
1435254225Speter}
1436254225Speter
1437254225Speter/*
1438254225Speter - dupl - emit a duplicate of a bunch of sops
1439254225Speter == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1440254225Speter */
1441254225Speterstatic sopno			/* start of duplicate */
1442254225Speterdupl(register struct parse *p, sopno start, sopno finish)
1443254225Speter
1444254225Speter            			/* from here */
1445254225Speter             			/* to this less one */
1446254225Speter{
1447254225Speter	register sopno ret = HERE();
1448254225Speter	register sopno len = finish - start;
1449254225Speter
1450254225Speter	assert(finish >= start);
1451254225Speter	if (len == 0)
1452254225Speter		return(ret);
1453254225Speter	if (!enlarge(p, p->ssize + len))	/* this many unexpected additions */
1454254225Speter		return ret;
1455254225Speter	assert(p->ssize >= p->slen + len);
1456254225Speter	(void) memcpy((char *)(p->strip + p->slen),
1457254225Speter		(char *)(p->strip + start), (size_t)len*sizeof(sop));
1458254225Speter	(void) memcpy((char *)(p->stripdata + p->slen),
1459254225Speter		(char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T));
1460254225Speter	p->slen += len;
1461254225Speter	return(ret);
1462254225Speter}
1463254225Speter
1464254225Speter/*
1465254225Speter - doemit - emit a strip operator
1466254225Speter == static void doemit(register struct parse *p, sop op, size_t opnd);
1467254225Speter *
1468254225Speter * It might seem better to implement this as a macro with a function as
1469254225Speter * hard-case backup, but it's just too big and messy unless there are
1470254225Speter * some changes to the data structures.  Maybe later.
1471254225Speter */
1472254225Speterstatic void
1473254225Speterdoemit(register struct parse *p, sop op, size_t opnd)
1474254225Speter{
1475254225Speter	/* avoid making error situations worse */
1476254225Speter	if (p->error != 0)
1477254225Speter		return;
1478254225Speter
1479254225Speter	/* deal with oversize operands ("can't happen", more or less) */
1480254225Speter	assert(opnd < 1);
1481254225Speter
1482254225Speter	/* deal with undersized strip */
1483254225Speter	if (p->slen >= p->ssize)
1484254225Speter		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
1485254225Speter			return;
1486254225Speter
1487254225Speter	/* finally, it's all reduced to the easy case */
1488254225Speter	p->strip[p->slen] = op;
1489254225Speter	p->stripdata[p->slen] = opnd;
1490254225Speter	p->slen++;
1491254225Speter}
1492254225Speter
1493254225Speter/*
1494254225Speter - doinsert - insert a sop into the strip
1495254225Speter == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1496254225Speter */
1497254225Speterstatic void
1498254225Speterdoinsert(register struct parse *p, sop op, size_t opnd, sopno pos)
1499254225Speter{
1500254225Speter	register sopno sn;
1501254225Speter	register sop s;
1502254225Speter	register RCHAR_T d;
1503254225Speter	register int i;
1504254225Speter
1505254225Speter	/* avoid making error situations worse */
1506254225Speter	if (p->error != 0)
1507254225Speter		return;
1508254225Speter
1509254225Speter	sn = HERE();
1510254225Speter	EMIT(op, opnd);		/* do checks, ensure space */
1511254225Speter	assert(HERE() == sn+1);
1512254225Speter	s = p->strip[sn];
1513254225Speter	d = p->stripdata[sn];
1514254225Speter
1515254225Speter	/* adjust paren pointers */
1516254225Speter	assert(pos > 0);
1517254225Speter	for (i = 1; i < NPAREN; i++) {
1518254225Speter		if (p->pbegin[i] >= pos) {
1519254225Speter			p->pbegin[i]++;
1520254225Speter		}
1521254225Speter		if (p->pend[i] >= pos) {
1522254225Speter			p->pend[i]++;
1523254225Speter		}
1524254225Speter	}
1525254225Speter
1526254225Speter	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1527254225Speter						(HERE()-pos-1)*sizeof(sop));
1528254225Speter	memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos],
1529254225Speter						(HERE()-pos-1)*sizeof(RCHAR_T));
1530254225Speter	p->strip[pos] = s;
1531254225Speter	p->stripdata[pos] = d;
1532254225Speter}
1533254225Speter
1534254225Speter/*
1535254225Speter - dofwd - complete a forward reference
1536254225Speter == static void dofwd(register struct parse *p, sopno pos, sop value);
1537254225Speter */
1538254225Speterstatic void
1539254225Speterdofwd(register struct parse *p, register sopno pos, sop value)
1540254225Speter{
1541254225Speter	/* avoid making error situations worse */
1542254225Speter	if (p->error != 0)
1543254225Speter		return;
1544254225Speter
1545254225Speter	assert(value < 1);
1546254225Speter	p->stripdata[pos] = value;
1547254225Speter}
1548254225Speter
1549254225Speter/*
1550254225Speter - enlarge - enlarge the strip
1551254225Speter == static int enlarge(register struct parse *p, sopno size);
1552254225Speter */
1553254225Speterstatic int
1554254225Speterenlarge(register struct parse *p, register sopno size)
1555254225Speter{
1556254225Speter	register sop *sp;
1557254225Speter	register RCHAR_T *dp;
1558254225Speter	sopno osize;
1559254225Speter
1560254225Speter	if (p->ssize >= size)
1561254225Speter		return 1;
1562254225Speter
1563254225Speter	osize = p->ssize;
1564254225Speter	p->ssize = size;
1565254225Speter	if (MEMSIZE(p) > MEMLIMIT)
1566254225Speter		goto oomem;
1567254225Speter	sp = realloc(p->strip, p->ssize * sizeof(sop));
1568254225Speter	if (sp == NULL)
1569254225Speter		goto oomem;
1570254225Speter	p->strip = sp;
1571254225Speter	dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
1572254225Speter	if (dp == NULL) {
1573254225Speteroomem:
1574254225Speter		p->ssize = osize;
1575254225Speter		SETERROR(REG_ESPACE);
1576254225Speter		return 0;
1577254225Speter	}
1578254225Speter	p->stripdata = dp;
1579254225Speter	return 1;
1580254225Speter}
1581254225Speter
1582254225Speter/*
1583254225Speter - stripsnug - compact the strip
1584254225Speter == static void stripsnug(register struct parse *p, register struct re_guts *g);
1585254225Speter */
1586254225Speterstatic void
1587254225Speterstripsnug(register struct parse *p, register struct re_guts *g)
1588254225Speter{
1589254225Speter	g->nstates = p->slen;
1590254225Speter	g->strip = (sop *)realloc((char *)p->strip,
1591254225Speter	    p->slen * sizeof(sop));
1592254225Speter	if (g->strip == NULL) {
1593254225Speter		SETERROR(REG_ESPACE);
1594254225Speter		g->strip = p->strip;
1595254225Speter	}
1596254225Speter	g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata,
1597254225Speter	    p->slen * sizeof(RCHAR_T));
1598254225Speter	if (g->stripdata == NULL) {
1599254225Speter		SETERROR(REG_ESPACE);
1600254225Speter		g->stripdata = p->stripdata;
1601254225Speter	}
1602254225Speter}
1603254225Speter
1604254225Speter/*
1605254225Speter - findmust - fill in must and mlen with longest mandatory literal string
1606254225Speter == static void findmust(register struct parse *p, register struct re_guts *g);
1607254225Speter *
1608254225Speter * This algorithm could do fancy things like analyzing the operands of |
1609254225Speter * for common subsequences.  Someday.  This code is simple and finds most
1610254225Speter * of the interesting cases.
1611254225Speter *
1612254225Speter * Note that must and mlen got initialized during setup.
1613254225Speter */
1614254225Speterstatic void
1615254225Speterfindmust(struct parse *p, register struct re_guts *g)
1616254225Speter{
1617254225Speter	register sop *scans;
1618254225Speter	register RCHAR_T *scand;
1619254225Speter	sop *starts = 0;
1620254225Speter	RCHAR_T *startd = NULL;
1621254225Speter	register sop *newstarts = 0;
1622254225Speter	register RCHAR_T *newstartd = NULL;
1623254225Speter	register sopno newlen;
1624254225Speter	register sop s;
1625254225Speter	register RCHAR_T d;
1626254225Speter	register RCHAR_T *cp;
1627254225Speter	register sopno i;
1628254225Speter
1629254225Speter	/* avoid making error situations worse */
1630254225Speter	if (p->error != 0)
1631254225Speter		return;
1632254225Speter
1633254225Speter	/* find the longest OCHAR sequence in strip */
1634254225Speter	newlen = 0;
1635254225Speter	scans = g->strip + 1;
1636254225Speter	scand = g->stripdata + 1;
1637254225Speter	do {
1638254225Speter		s = *scans++;
1639254225Speter		d = *scand++;
1640254225Speter		switch (s) {
1641254225Speter		case OCHAR:		/* sequence member */
1642254225Speter			if (newlen == 0) {		/* new sequence */
1643254225Speter				newstarts = scans - 1;
1644254225Speter				newstartd = scand - 1;
1645254225Speter			}
1646254225Speter			newlen++;
1647254225Speter			break;
1648254225Speter		case OPLUS_:		/* things that don't break one */
1649254225Speter		case OLPAREN:
1650254225Speter		case ORPAREN:
1651254225Speter			break;
1652254225Speter		case OQUEST_:		/* things that must be skipped */
1653254225Speter		case OCH_:
1654254225Speter			scans--;
1655254225Speter			scand--;
1656254225Speter			do {
1657254225Speter				scans += d;
1658254225Speter				scand += d;
1659254225Speter				s = *scans;
1660254225Speter				d = *scand;
1661254225Speter				/* assert() interferes w debug printouts */
1662254225Speter				if (s != O_QUEST && s != O_CH && s != OOR2) {
1663254225Speter					g->iflags |= BAD;
1664254225Speter					return;
1665254225Speter				}
1666254225Speter			} while (s != O_QUEST && s != O_CH);
1667254225Speter			/* fallthrough */
1668254225Speter		default:		/* things that break a sequence */
1669254225Speter			if (newlen > g->mlen) {		/* ends one */
1670254225Speter				starts = newstarts;
1671254225Speter				startd = newstartd;
1672254225Speter				g->mlen = newlen;
1673254225Speter			}
1674254225Speter			newlen = 0;
1675254225Speter			break;
1676254225Speter		}
1677254225Speter	} while (s != OEND);
1678254225Speter
1679254225Speter	if (g->mlen == 0)		/* there isn't one */
1680254225Speter		return;
1681254225Speter
1682254225Speter	/* turn it into a character string */
1683254225Speter	g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T));
1684254225Speter	if (g->must == NULL) {		/* argh; just forget it */
1685254225Speter		g->mlen = 0;
1686254225Speter		return;
1687254225Speter	}
1688254225Speter	cp = g->must;
1689254225Speter	scans = starts;
1690254225Speter	scand = startd;
1691254225Speter	for (i = g->mlen; i > 0; i--) {
1692254225Speter		for (;;) {
1693254225Speter			s = *scans++;
1694254225Speter			d = *scand++;
1695254225Speter			if (s == OCHAR)
1696254225Speter				break;
1697254225Speter		}
1698254225Speter		assert(cp < g->must + g->mlen);
1699254225Speter		*cp++ = d;
1700254225Speter	}
1701254225Speter	assert(cp == g->must + g->mlen);
1702254225Speter	*cp++ = '\0';		/* just on general principles */
1703254225Speter}
1704254225Speter
1705254225Speter/*
1706254225Speter - pluscount - count + nesting
1707254225Speter == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1708254225Speter */
1709254225Speterstatic sopno			/* nesting depth */
1710254225Speterpluscount(struct parse *p, register struct re_guts *g)
1711254225Speter{
1712254225Speter	register sop *scan;
1713254225Speter	register sop s;
1714254225Speter	register sopno plusnest = 0;
1715254225Speter	register sopno maxnest = 0;
1716254225Speter
1717254225Speter	if (p->error != 0)
1718254225Speter		return(0);	/* there may not be an OEND */
1719254225Speter
1720254225Speter	scan = g->strip + 1;
1721254225Speter	do {
1722254225Speter		s = *scan++;
1723254225Speter		switch (s) {
1724254225Speter		case OPLUS_:
1725254225Speter			plusnest++;
1726254225Speter			break;
1727254225Speter		case O_PLUS:
1728254225Speter			if (plusnest > maxnest)
1729254225Speter				maxnest = plusnest;
1730254225Speter			plusnest--;
1731254225Speter			break;
1732254225Speter		}
1733254225Speter	} while (s != OEND);
1734254225Speter	if (plusnest != 0)
1735254225Speter		g->iflags |= BAD;
1736254225Speter	return(maxnest);
1737254225Speter}
1738