160786Sps/*
260786Sps * regcomp and regexec -- regsub and regerror are elsewhere
360786Sps *
460786Sps *	Copyright (c) 1986 by University of Toronto.
560786Sps *	Written by Henry Spencer.  Not derived from licensed software.
660786Sps *
760786Sps *	Permission is granted to anyone to use this software for any
860786Sps *	purpose on any computer system, and to redistribute it freely,
960786Sps *	subject to the following restrictions:
1060786Sps *
1160786Sps *	1. The author is not responsible for the consequences of use of
1260786Sps *		this software, no matter how awful, even if they arise
1360786Sps *		from defects in it.
1460786Sps *
1560786Sps *	2. The origin of this software must not be misrepresented, either
1660786Sps *		by explicit claim or by omission.
1760786Sps *
1860786Sps *	3. Altered versions must be plainly marked as such, and must not
1960786Sps *		be misrepresented as being the original software.
2060786Sps *
2160786Sps * Beware that some of this code is subtly aware of the way operator
2260786Sps * precedence is structured in regular expressions.  Serious changes in
2360786Sps * regular-expression syntax might require a total rethink.
2460786Sps *
2560786Sps * *** NOTE: this code has been altered slightly for use in Tcl. ***
2660786Sps * Slightly modified by David MacKenzie to undo most of the changes for TCL.
2760786Sps * Added regexec2 with notbol parameter. -- 4/19/99 Mark Nudelman
2860786Sps */
2960786Sps
3060786Sps#include "less.h"
3160786Sps#if HAVE_STDIO_H
3260786Sps#include <stdio.h>
3360786Sps#endif
3460786Sps#if HAVE_STDLIB_H
3560786Sps#include <stdlib.h>
3660786Sps#endif
3760786Sps#if HAVE_STRING_H
3860786Sps#include <string.h>
3960786Sps#endif
4060786Sps#include "regexp.h"
4160786Sps
4260786Sps/*
4360786Sps * The "internal use only" fields in regexp.h are present to pass info from
4460786Sps * compile to execute that permits the execute phase to run lots faster on
4560786Sps * simple cases.  They are:
4660786Sps *
4760786Sps * regstart	char that must begin a match; '\0' if none obvious
4860786Sps * reganch	is the match anchored (at beginning-of-line only)?
4960786Sps * regmust	string (pointer into program) that match must include, or NULL
5060786Sps * regmlen	length of regmust string
5160786Sps *
5260786Sps * Regstart and reganch permit very fast decisions on suitable starting points
5360786Sps * for a match, cutting down the work a lot.  Regmust permits fast rejection
5460786Sps * of lines that cannot possibly match.  The regmust tests are costly enough
5560786Sps * that regcomp() supplies a regmust only if the r.e. contains something
5660786Sps * potentially expensive (at present, the only such thing detected is * or +
5760786Sps * at the start of the r.e., which can involve a lot of backup).  Regmlen is
5860786Sps * supplied because the test in regexec() needs it and regcomp() is
5960786Sps * computing it anyway.
6060786Sps */
6160786Sps
6260786Sps/*
6360786Sps * Structure for regexp "program".  This is essentially a linear encoding
6460786Sps * of a nondeterministic finite-state machine (aka syntax charts or
6560786Sps * "railroad normal form" in parsing technology).  Each node is an opcode
6660786Sps * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
6760786Sps * all nodes except BRANCH implement concatenation; a "next" pointer with
6860786Sps * a BRANCH on both ends of it is connecting two alternatives.  (Here we
6960786Sps * have one of the subtle syntax dependencies:  an individual BRANCH (as
7060786Sps * opposed to a collection of them) is never concatenated with anything
7160786Sps * because of operator precedence.)  The operand of some types of node is
7260786Sps * a literal string; for others, it is a node leading into a sub-FSM.  In
7360786Sps * particular, the operand of a BRANCH node is the first node of the branch.
7460786Sps * (NB this is *not* a tree structure:  the tail of the branch connects
7560786Sps * to the thing following the set of BRANCHes.)  The opcodes are:
7660786Sps */
7760786Sps
7860786Sps/* definition	number	opnd?	meaning */
7960786Sps#undef EOL
8060786Sps#define	END	0	/* no	End of program. */
8160786Sps#define	BOL	1	/* no	Match "" at beginning of line. */
8260786Sps#define	EOL	2	/* no	Match "" at end of line. */
8360786Sps#define	ANY	3	/* no	Match any one character. */
8460786Sps#define	ANYOF	4	/* str	Match any character in this string. */
8560786Sps#define	ANYBUT	5	/* str	Match any character not in this string. */
8660786Sps#define	BRANCH	6	/* node	Match this alternative, or the next... */
8760786Sps#define	BACK	7	/* no	Match "", "next" ptr points backward. */
8860786Sps#define	EXACTLY	8	/* str	Match this string. */
8960786Sps#define	NOTHING	9	/* no	Match empty string. */
9060786Sps#define	STAR	10	/* node	Match this (simple) thing 0 or more times. */
9160786Sps#define	PLUS	11	/* node	Match this (simple) thing 1 or more times. */
9260786Sps#define	OPEN	20	/* no	Mark this point in input as start of #n. */
9360786Sps			/*	OPEN+1 is number 1, etc. */
9460786Sps#define	CLOSE	30	/* no	Analogous to OPEN. */
9560786Sps
9660786Sps/*
9760786Sps * Opcode notes:
9860786Sps *
9960786Sps * BRANCH	The set of branches constituting a single choice are hooked
10060786Sps *		together with their "next" pointers, since precedence prevents
10160786Sps *		anything being concatenated to any individual branch.  The
10260786Sps *		"next" pointer of the last BRANCH in a choice points to the
10360786Sps *		thing following the whole choice.  This is also where the
10460786Sps *		final "next" pointer of each individual branch points; each
10560786Sps *		branch starts with the operand node of a BRANCH node.
10660786Sps *
10760786Sps * BACK		Normal "next" pointers all implicitly point forward; BACK
10860786Sps *		exists to make loop structures possible.
10960786Sps *
11060786Sps * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
11160786Sps *		BRANCH structures using BACK.  Simple cases (one character
11260786Sps *		per match) are implemented with STAR and PLUS for speed
11360786Sps *		and to minimize recursive plunges.
11460786Sps *
11560786Sps * OPEN,CLOSE	...are numbered at compile time.
11660786Sps */
11760786Sps
11860786Sps/*
11960786Sps * A node is one char of opcode followed by two chars of "next" pointer.
12060786Sps * "Next" pointers are stored as two 8-bit pieces, high order first.  The
12160786Sps * value is a positive offset from the opcode of the node containing it.
12260786Sps * An operand, if any, simply follows the node.  (Note that much of the
12360786Sps * code generation knows about this implicit relationship.)
12460786Sps *
12560786Sps * Using two bytes for the "next" pointer is vast overkill for most things,
12660786Sps * but allows patterns to get big without disasters.
12760786Sps */
12860786Sps#define	OP(p)	(*(p))
12960786Sps#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
13060786Sps#define	OPERAND(p)	((p) + 3)
13160786Sps
13260786Sps/*
13360786Sps * See regmagic.h for one further detail of program structure.
13460786Sps */
13560786Sps
13660786Sps
13760786Sps/*
13860786Sps * Utility definitions.
13960786Sps */
14060786Sps#ifndef CHARBITS
14160786Sps#define	UCHARAT(p)	((int)*(unsigned char *)(p))
14260786Sps#else
14360786Sps#define	UCHARAT(p)	((int)*(p)&CHARBITS)
14460786Sps#endif
14560786Sps
14660786Sps#define	FAIL(m)	{ regerror(m); return(NULL); }
14760786Sps#define	ISMULT(c)	((c) == '*' || (c) == '+' || (c) == '?')
14860786Sps#define	META	"^$.[()|?+*\\"
14960786Sps
15060786Sps/*
15160786Sps * Flags to be passed up and down.
15260786Sps */
15360786Sps#define	HASWIDTH	01	/* Known never to match null string. */
15460786Sps#define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
15560786Sps#define	SPSTART		04	/* Starts with * or +. */
15660786Sps#define	WORST		0	/* Worst case. */
15760786Sps
15860786Sps/*
15960786Sps * Global work variables for regcomp().
16060786Sps */
16160786Spsstatic char *regparse;		/* Input-scan pointer. */
16260786Spsstatic int regnpar;		/* () count. */
16360786Spsstatic char regdummy;
16460786Spsstatic char *regcode;		/* Code-emit pointer; &regdummy = don't. */
16560786Spsstatic long regsize;		/* Code size. */
16660786Sps
16760786Sps/*
16860786Sps * The first byte of the regexp internal "program" is actually this magic
16960786Sps * number; the start node begins in the second byte.
17060786Sps */
17160786Sps#define	MAGIC	0234
17260786Sps
17360786Sps
17460786Sps/*
17560786Sps * Forward declarations for regcomp()'s friends.
17660786Sps */
17760786Sps#ifndef STATIC
17860786Sps#define	STATIC	static
17960786Sps#endif
18060786SpsSTATIC char *reg();
18160786SpsSTATIC char *regbranch();
18260786SpsSTATIC char *regpiece();
18360786SpsSTATIC char *regatom();
18460786SpsSTATIC char *regnode();
18560786SpsSTATIC char *regnext();
18660786SpsSTATIC void regc();
18760786SpsSTATIC void reginsert();
18860786SpsSTATIC void regtail();
18960786SpsSTATIC void regoptail();
19060786Sps#ifdef STRCSPN
19160786SpsSTATIC int strcspn();
19260786Sps#endif
19360786Sps
19460786Sps/*
19560786Sps - regcomp - compile a regular expression into internal code
19660786Sps *
19760786Sps * We can't allocate space until we know how big the compiled form will be,
19860786Sps * but we can't compile it (and thus know how big it is) until we've got a
19960786Sps * place to put the code.  So we cheat:  we compile it twice, once with code
20060786Sps * generation turned off and size counting turned on, and once "for real".
20160786Sps * This also means that we don't allocate space until we are sure that the
20260786Sps * thing really will compile successfully, and we never have to move the
20360786Sps * code and thus invalidate pointers into it.  (Note that it has to be in
20460786Sps * one piece because free() must be able to free it all.)
20560786Sps *
20660786Sps * Beware that the optimization-preparation code in here knows about some
20760786Sps * of the structure of the compiled regexp.
20860786Sps */
20960786Spsregexp *
21060786Spsregcomp(exp)
21160786Spschar *exp;
21260786Sps{
21360786Sps	register regexp *r;
21460786Sps	register char *scan;
21560786Sps	register char *longest;
21660786Sps	register int len;
21760786Sps	int flags;
21860786Sps
21960786Sps	if (exp == NULL)
22060786Sps		FAIL("NULL argument");
22160786Sps
22260786Sps	/* First pass: determine size, legality. */
22360786Sps	regparse = exp;
22460786Sps	regnpar = 1;
22560786Sps	regsize = 0L;
22660786Sps	regcode = &regdummy;
22760786Sps	regc(MAGIC);
22860786Sps	if (reg(0, &flags) == NULL)
22960786Sps		return(NULL);
23060786Sps
23160786Sps	/* Small enough for pointer-storage convention? */
23260786Sps	if (regsize >= 32767L)		/* Probably could be 65535L. */
23360786Sps		FAIL("regexp too big");
23460786Sps
23560786Sps	/* Allocate space. */
23660786Sps	r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
23760786Sps	if (r == NULL)
23860786Sps		FAIL("out of space");
23960786Sps
24060786Sps	/* Second pass: emit code. */
24160786Sps	regparse = exp;
24260786Sps	regnpar = 1;
24360786Sps	regcode = r->program;
24460786Sps	regc(MAGIC);
24560786Sps	if (reg(0, &flags) == NULL)
246294286Sdelphij	{
247294286Sdelphij		free(r);
24860786Sps		return(NULL);
249294286Sdelphij	}
25060786Sps
25160786Sps	/* Dig out information for optimizations. */
25260786Sps	r->regstart = '\0';	/* Worst-case defaults. */
25360786Sps	r->reganch = 0;
25460786Sps	r->regmust = NULL;
25560786Sps	r->regmlen = 0;
25660786Sps	scan = r->program+1;			/* First BRANCH. */
25760786Sps	if (OP(regnext(scan)) == END) {		/* Only one top-level choice. */
25860786Sps		scan = OPERAND(scan);
25960786Sps
26060786Sps		/* Starting-point info. */
26160786Sps		if (OP(scan) == EXACTLY)
26260786Sps			r->regstart = *OPERAND(scan);
26360786Sps		else if (OP(scan) == BOL)
26460786Sps			r->reganch++;
26560786Sps
26660786Sps		/*
26760786Sps		 * If there's something expensive in the r.e., find the
26860786Sps		 * longest literal string that must appear and make it the
26960786Sps		 * regmust.  Resolve ties in favor of later strings, since
27060786Sps		 * the regstart check works with the beginning of the r.e.
27160786Sps		 * and avoiding duplication strengthens checking.  Not a
27260786Sps		 * strong reason, but sufficient in the absence of others.
27360786Sps		 */
27460786Sps		if (flags&SPSTART) {
27560786Sps			longest = NULL;
27660786Sps			len = 0;
27760786Sps			for (; scan != NULL; scan = regnext(scan))
27860786Sps				if (OP(scan) == EXACTLY && ((int) strlen(OPERAND(scan))) >= len) {
27960786Sps					longest = OPERAND(scan);
280294286Sdelphij					len = (int) strlen(OPERAND(scan));
28160786Sps				}
28260786Sps			r->regmust = longest;
28360786Sps			r->regmlen = len;
28460786Sps		}
28560786Sps	}
28660786Sps
28760786Sps	return(r);
28860786Sps}
28960786Sps
29060786Sps/*
29160786Sps - reg - regular expression, i.e. main body or parenthesized thing
29260786Sps *
29360786Sps * Caller must absorb opening parenthesis.
29460786Sps *
29560786Sps * Combining parenthesis handling with the base level of regular expression
29660786Sps * is a trifle forced, but the need to tie the tails of the branches to what
29760786Sps * follows makes it hard to avoid.
29860786Sps */
29960786Spsstatic char *
30060786Spsreg(paren, flagp)
30160786Spsint paren;			/* Parenthesized? */
30260786Spsint *flagp;
30360786Sps{
30460786Sps	register char *ret;
30560786Sps	register char *br;
30660786Sps	register char *ender;
30760786Sps	register int parno = 0;
30860786Sps	int flags;
30960786Sps
31060786Sps	*flagp = HASWIDTH;	/* Tentatively. */
31160786Sps
31260786Sps	/* Make an OPEN node, if parenthesized. */
31360786Sps	if (paren) {
31460786Sps		if (regnpar >= NSUBEXP)
31560786Sps			FAIL("too many ()");
31660786Sps		parno = regnpar;
31760786Sps		regnpar++;
31860786Sps		ret = regnode(OPEN+parno);
31960786Sps	} else
32060786Sps		ret = NULL;
32160786Sps
32260786Sps	/* Pick up the branches, linking them together. */
32360786Sps	br = regbranch(&flags);
32460786Sps	if (br == NULL)
32560786Sps		return(NULL);
32660786Sps	if (ret != NULL)
32760786Sps		regtail(ret, br);	/* OPEN -> first. */
32860786Sps	else
32960786Sps		ret = br;
33060786Sps	if (!(flags&HASWIDTH))
33160786Sps		*flagp &= ~HASWIDTH;
33260786Sps	*flagp |= flags&SPSTART;
33360786Sps	while (*regparse == '|') {
33460786Sps		regparse++;
33560786Sps		br = regbranch(&flags);
33660786Sps		if (br == NULL)
33760786Sps			return(NULL);
33860786Sps		regtail(ret, br);	/* BRANCH -> BRANCH. */
33960786Sps		if (!(flags&HASWIDTH))
34060786Sps			*flagp &= ~HASWIDTH;
34160786Sps		*flagp |= flags&SPSTART;
34260786Sps	}
34360786Sps
34460786Sps	/* Make a closing node, and hook it on the end. */
34560786Sps	ender = regnode((paren) ? CLOSE+parno : END);
34660786Sps	regtail(ret, ender);
34760786Sps
34860786Sps	/* Hook the tails of the branches to the closing node. */
34960786Sps	for (br = ret; br != NULL; br = regnext(br))
35060786Sps		regoptail(br, ender);
35160786Sps
35260786Sps	/* Check for proper termination. */
35360786Sps	if (paren && *regparse++ != ')') {
35460786Sps		FAIL("unmatched ()");
35560786Sps	} else if (!paren && *regparse != '\0') {
35660786Sps		if (*regparse == ')') {
35760786Sps			FAIL("unmatched ()");
35860786Sps		} else
35960786Sps			FAIL("junk on end");	/* "Can't happen". */
36060786Sps		/* NOTREACHED */
36160786Sps	}
36260786Sps
36360786Sps	return(ret);
36460786Sps}
36560786Sps
36660786Sps/*
36760786Sps - regbranch - one alternative of an | operator
36860786Sps *
36960786Sps * Implements the concatenation operator.
37060786Sps */
37160786Spsstatic char *
37260786Spsregbranch(flagp)
37360786Spsint *flagp;
37460786Sps{
37560786Sps	register char *ret;
37660786Sps	register char *chain;
37760786Sps	register char *latest;
37860786Sps	int flags;
37960786Sps
38060786Sps	*flagp = WORST;		/* Tentatively. */
38160786Sps
38260786Sps	ret = regnode(BRANCH);
38360786Sps	chain = NULL;
38460786Sps	while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
38560786Sps		latest = regpiece(&flags);
38660786Sps		if (latest == NULL)
38760786Sps			return(NULL);
38860786Sps		*flagp |= flags&HASWIDTH;
38960786Sps		if (chain == NULL)	/* First piece. */
39060786Sps			*flagp |= flags&SPSTART;
39160786Sps		else
39260786Sps			regtail(chain, latest);
39360786Sps		chain = latest;
39460786Sps	}
39560786Sps	if (chain == NULL)	/* Loop ran zero times. */
39660786Sps		(void) regnode(NOTHING);
39760786Sps
39860786Sps	return(ret);
39960786Sps}
40060786Sps
40160786Sps/*
40260786Sps - regpiece - something followed by possible [*+?]
40360786Sps *
40460786Sps * Note that the branching code sequences used for ? and the general cases
40560786Sps * of * and + are somewhat optimized:  they use the same NOTHING node as
40660786Sps * both the endmarker for their branch list and the body of the last branch.
40760786Sps * It might seem that this node could be dispensed with entirely, but the
40860786Sps * endmarker role is not redundant.
40960786Sps */
41060786Spsstatic char *
41160786Spsregpiece(flagp)
41260786Spsint *flagp;
41360786Sps{
41460786Sps	register char *ret;
41560786Sps	register char op;
41660786Sps	register char *next;
41760786Sps	int flags;
41860786Sps
41960786Sps	ret = regatom(&flags);
42060786Sps	if (ret == NULL)
42160786Sps		return(NULL);
42260786Sps
42360786Sps	op = *regparse;
42460786Sps	if (!ISMULT(op)) {
42560786Sps		*flagp = flags;
42660786Sps		return(ret);
42760786Sps	}
42860786Sps
42960786Sps	if (!(flags&HASWIDTH) && op != '?')
43060786Sps		FAIL("*+ operand could be empty");
43160786Sps	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
43260786Sps
43360786Sps	if (op == '*' && (flags&SIMPLE))
43460786Sps		reginsert(STAR, ret);
43560786Sps	else if (op == '*') {
43660786Sps		/* Emit x* as (x&|), where & means "self". */
43760786Sps		reginsert(BRANCH, ret);			/* Either x */
43860786Sps		regoptail(ret, regnode(BACK));		/* and loop */
43960786Sps		regoptail(ret, ret);			/* back */
44060786Sps		regtail(ret, regnode(BRANCH));		/* or */
44160786Sps		regtail(ret, regnode(NOTHING));		/* null. */
44260786Sps	} else if (op == '+' && (flags&SIMPLE))
44360786Sps		reginsert(PLUS, ret);
44460786Sps	else if (op == '+') {
44560786Sps		/* Emit x+ as x(&|), where & means "self". */
44660786Sps		next = regnode(BRANCH);			/* Either */
44760786Sps		regtail(ret, next);
44860786Sps		regtail(regnode(BACK), ret);		/* loop back */
44960786Sps		regtail(next, regnode(BRANCH));		/* or */
45060786Sps		regtail(ret, regnode(NOTHING));		/* null. */
45160786Sps	} else if (op == '?') {
45260786Sps		/* Emit x? as (x|) */
45360786Sps		reginsert(BRANCH, ret);			/* Either x */
45460786Sps		regtail(ret, regnode(BRANCH));		/* or */
45560786Sps		next = regnode(NOTHING);		/* null. */
45660786Sps		regtail(ret, next);
45760786Sps		regoptail(ret, next);
45860786Sps	}
45960786Sps	regparse++;
46060786Sps	if (ISMULT(*regparse))
46160786Sps		FAIL("nested *?+");
46260786Sps
46360786Sps	return(ret);
46460786Sps}
46560786Sps
46660786Sps/*
46760786Sps - regatom - the lowest level
46860786Sps *
46960786Sps * Optimization:  gobbles an entire sequence of ordinary characters so that
47060786Sps * it can turn them into a single node, which is smaller to store and
47160786Sps * faster to run.  Backslashed characters are exceptions, each becoming a
47260786Sps * separate node; the code is simpler that way and it's not worth fixing.
47360786Sps */
47460786Spsstatic char *
47560786Spsregatom(flagp)
47660786Spsint *flagp;
47760786Sps{
47860786Sps	register char *ret;
47960786Sps	int flags;
48060786Sps
48160786Sps	*flagp = WORST;		/* Tentatively. */
48260786Sps
48360786Sps	switch (*regparse++) {
48460786Sps	case '^':
48560786Sps		ret = regnode(BOL);
48660786Sps		break;
48760786Sps	case '$':
48860786Sps		ret = regnode(EOL);
48960786Sps		break;
49060786Sps	case '.':
49160786Sps		ret = regnode(ANY);
49260786Sps		*flagp |= HASWIDTH|SIMPLE;
49360786Sps		break;
49460786Sps	case '[': {
49560786Sps			register int clss;
49660786Sps			register int classend;
49760786Sps
49860786Sps			if (*regparse == '^') {	/* Complement of range. */
49960786Sps				ret = regnode(ANYBUT);
50060786Sps				regparse++;
50160786Sps			} else
50260786Sps				ret = regnode(ANYOF);
50360786Sps			if (*regparse == ']' || *regparse == '-')
50460786Sps				regc(*regparse++);
50560786Sps			while (*regparse != '\0' && *regparse != ']') {
50660786Sps				if (*regparse == '-') {
50760786Sps					regparse++;
50860786Sps					if (*regparse == ']' || *regparse == '\0')
50960786Sps						regc('-');
51060786Sps					else {
51160786Sps						clss = UCHARAT(regparse-2)+1;
51260786Sps						classend = UCHARAT(regparse);
51360786Sps						if (clss > classend+1)
51460786Sps							FAIL("invalid [] range");
51560786Sps						for (; clss <= classend; clss++)
51660786Sps							regc(clss);
51760786Sps						regparse++;
51860786Sps					}
51960786Sps				} else
52060786Sps					regc(*regparse++);
52160786Sps			}
52260786Sps			regc('\0');
52360786Sps			if (*regparse != ']')
52460786Sps				FAIL("unmatched []");
52560786Sps			regparse++;
52660786Sps			*flagp |= HASWIDTH|SIMPLE;
52760786Sps		}
52860786Sps		break;
52960786Sps	case '(':
53060786Sps		ret = reg(1, &flags);
53160786Sps		if (ret == NULL)
53260786Sps			return(NULL);
53360786Sps		*flagp |= flags&(HASWIDTH|SPSTART);
53460786Sps		break;
53560786Sps	case '\0':
53660786Sps	case '|':
53760786Sps	case ')':
53860786Sps		FAIL("internal urp");	/* Supposed to be caught earlier. */
53960786Sps		/* NOTREACHED */
54060786Sps		break;
54160786Sps	case '?':
54260786Sps	case '+':
54360786Sps	case '*':
54460786Sps		FAIL("?+* follows nothing");
54560786Sps		/* NOTREACHED */
54660786Sps		break;
54760786Sps	case '\\':
54860786Sps		if (*regparse == '\0')
54960786Sps			FAIL("trailing \\");
55060786Sps		ret = regnode(EXACTLY);
55160786Sps		regc(*regparse++);
55260786Sps		regc('\0');
55360786Sps		*flagp |= HASWIDTH|SIMPLE;
55460786Sps		break;
55560786Sps	default: {
55660786Sps			register int len;
55760786Sps			register char ender;
55860786Sps
55960786Sps			regparse--;
560294286Sdelphij			len = (int) strcspn(regparse, META);
56160786Sps			if (len <= 0)
56260786Sps				FAIL("internal disaster");
56360786Sps			ender = *(regparse+len);
56460786Sps			if (len > 1 && ISMULT(ender))
56560786Sps				len--;		/* Back off clear of ?+* operand. */
56660786Sps			*flagp |= HASWIDTH;
56760786Sps			if (len == 1)
56860786Sps				*flagp |= SIMPLE;
56960786Sps			ret = regnode(EXACTLY);
57060786Sps			while (len > 0) {
57160786Sps				regc(*regparse++);
57260786Sps				len--;
57360786Sps			}
57460786Sps			regc('\0');
57560786Sps		}
57660786Sps		break;
57760786Sps	}
57860786Sps
57960786Sps	return(ret);
58060786Sps}
58160786Sps
58260786Sps/*
58360786Sps - regnode - emit a node
58460786Sps */
58560786Spsstatic char *			/* Location. */
58660786Spsregnode(op)
58760786Spschar op;
58860786Sps{
58960786Sps	register char *ret;
59060786Sps	register char *ptr;
59160786Sps
59260786Sps	ret = regcode;
59360786Sps	if (ret == &regdummy) {
59460786Sps		regsize += 3;
59560786Sps		return(ret);
59660786Sps	}
59760786Sps
59860786Sps	ptr = ret;
59960786Sps	*ptr++ = op;
60060786Sps	*ptr++ = '\0';		/* Null "next" pointer. */
60160786Sps	*ptr++ = '\0';
60260786Sps	regcode = ptr;
60360786Sps
60460786Sps	return(ret);
60560786Sps}
60660786Sps
60760786Sps/*
60860786Sps - regc - emit (if appropriate) a byte of code
60960786Sps */
61060786Spsstatic void
61160786Spsregc(b)
61260786Spschar b;
61360786Sps{
61460786Sps	if (regcode != &regdummy)
61560786Sps		*regcode++ = b;
61660786Sps	else
61760786Sps		regsize++;
61860786Sps}
61960786Sps
62060786Sps/*
62160786Sps - reginsert - insert an operator in front of already-emitted operand
62260786Sps *
62360786Sps * Means relocating the operand.
62460786Sps */
62560786Spsstatic void
62660786Spsreginsert(op, opnd)
62760786Spschar op;
62860786Spschar *opnd;
62960786Sps{
63060786Sps	register char *src;
63160786Sps	register char *dst;
63260786Sps	register char *place;
63360786Sps
63460786Sps	if (regcode == &regdummy) {
63560786Sps		regsize += 3;
63660786Sps		return;
63760786Sps	}
63860786Sps
63960786Sps	src = regcode;
64060786Sps	regcode += 3;
64160786Sps	dst = regcode;
64260786Sps	while (src > opnd)
64360786Sps		*--dst = *--src;
64460786Sps
64560786Sps	place = opnd;		/* Op node, where operand used to be. */
64660786Sps	*place++ = op;
64760786Sps	*place++ = '\0';
64860786Sps	*place++ = '\0';
64960786Sps}
65060786Sps
65160786Sps/*
65260786Sps - regtail - set the next-pointer at the end of a node chain
65360786Sps */
65460786Spsstatic void
65560786Spsregtail(p, val)
65660786Spschar *p;
65760786Spschar *val;
65860786Sps{
65960786Sps	register char *scan;
66060786Sps	register char *temp;
66160786Sps	register int offset;
66260786Sps
66360786Sps	if (p == &regdummy)
66460786Sps		return;
66560786Sps
66660786Sps	/* Find last node. */
66760786Sps	scan = p;
66860786Sps	for (;;) {
66960786Sps		temp = regnext(scan);
67060786Sps		if (temp == NULL)
67160786Sps			break;
67260786Sps		scan = temp;
67360786Sps	}
67460786Sps
67560786Sps	if (OP(scan) == BACK)
676294286Sdelphij		offset = (int) (scan - val);
67760786Sps	else
678294286Sdelphij		offset = (int) (val - scan);
67960786Sps	*(scan+1) = (offset>>8)&0377;
68060786Sps	*(scan+2) = offset&0377;
68160786Sps}
68260786Sps
68360786Sps/*
68460786Sps - regoptail - regtail on operand of first argument; nop if operandless
68560786Sps */
68660786Spsstatic void
68760786Spsregoptail(p, val)
68860786Spschar *p;
68960786Spschar *val;
69060786Sps{
69160786Sps	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
69260786Sps	if (p == NULL || p == &regdummy || OP(p) != BRANCH)
69360786Sps		return;
69460786Sps	regtail(OPERAND(p), val);
69560786Sps}
69660786Sps
69760786Sps/*
69860786Sps * regexec and friends
69960786Sps */
70060786Sps
70160786Sps/*
70260786Sps * Global work variables for regexec().
70360786Sps */
70460786Spsstatic char *reginput;		/* String-input pointer. */
70560786Spsstatic char *regbol;		/* Beginning of input, for ^ check. */
70660786Spsstatic char **regstartp;	/* Pointer to startp array. */
70760786Spsstatic char **regendp;		/* Ditto for endp. */
70860786Sps
70960786Sps/*
71060786Sps * Forwards.
71160786Sps */
71260786SpsSTATIC int regtry();
71360786SpsSTATIC int regmatch();
71460786SpsSTATIC int regrepeat();
71560786Sps
71660786Sps#ifdef DEBUG
71760786Spsint regnarrate = 0;
71860786Spsvoid regdump();
71960786SpsSTATIC char *regprop();
72060786Sps#endif
72160786Sps
72260786Sps/*
72360786Sps - regexec - match a regexp against a string
72460786Sps */
72560786Spsint
72660786Spsregexec2(prog, string, notbol)
72760786Spsregister regexp *prog;
72860786Spsregister char *string;
72960786Spsint notbol;
73060786Sps{
73160786Sps	register char *s;
73260786Sps
73360786Sps	/* Be paranoid... */
73460786Sps	if (prog == NULL || string == NULL) {
73560786Sps		regerror("NULL parameter");
73660786Sps		return(0);
73760786Sps	}
73860786Sps
73960786Sps	/* Check validity of program. */
74060786Sps	if (UCHARAT(prog->program) != MAGIC) {
74160786Sps		regerror("corrupted program");
74260786Sps		return(0);
74360786Sps	}
74460786Sps
74560786Sps	/* If there is a "must appear" string, look for it. */
74660786Sps	if (prog->regmust != NULL) {
74760786Sps		s = string;
74860786Sps		while ((s = strchr(s, prog->regmust[0])) != NULL) {
74960786Sps			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
75060786Sps				break;	/* Found it. */
75160786Sps			s++;
75260786Sps		}
75360786Sps		if (s == NULL)	/* Not present. */
75460786Sps			return(0);
75560786Sps	}
75660786Sps
75760786Sps	/* Mark beginning of line for ^ . */
75860786Sps	if (notbol)
75960786Sps		regbol = NULL;
76060786Sps	else
76160786Sps		regbol = string;
76260786Sps
76360786Sps	/* Simplest case:  anchored match need be tried only once. */
76460786Sps	if (prog->reganch)
76560786Sps		return(regtry(prog, string));
76660786Sps
76760786Sps	/* Messy cases:  unanchored match. */
76860786Sps	s = string;
76960786Sps	if (prog->regstart != '\0')
77060786Sps		/* We know what char it must start with. */
77160786Sps		while ((s = strchr(s, prog->regstart)) != NULL) {
77260786Sps			if (regtry(prog, s))
77360786Sps				return(1);
77460786Sps			s++;
77560786Sps		}
77660786Sps	else
77760786Sps		/* We don't -- general case. */
77860786Sps		do {
77960786Sps			if (regtry(prog, s))
78060786Sps				return(1);
78160786Sps		} while (*s++ != '\0');
78260786Sps
78360786Sps	/* Failure. */
78460786Sps	return(0);
78560786Sps}
78660786Sps
78760786Spsint
78860786Spsregexec(prog, string)
78960786Spsregister regexp *prog;
79060786Spsregister char *string;
79160786Sps{
79260786Sps	return regexec2(prog, string, 0);
79360786Sps}
79460786Sps
79560786Sps/*
79660786Sps - regtry - try match at specific point
79760786Sps */
79860786Spsstatic int			/* 0 failure, 1 success */
79960786Spsregtry(prog, string)
80060786Spsregexp *prog;
80160786Spschar *string;
80260786Sps{
80360786Sps	register int i;
80460786Sps	register char **sp;
80560786Sps	register char **ep;
80660786Sps
80760786Sps	reginput = string;
80860786Sps	regstartp = prog->startp;
80960786Sps	regendp = prog->endp;
81060786Sps
81160786Sps	sp = prog->startp;
81260786Sps	ep = prog->endp;
81360786Sps	for (i = NSUBEXP; i > 0; i--) {
81460786Sps		*sp++ = NULL;
81560786Sps		*ep++ = NULL;
81660786Sps	}
81760786Sps	if (regmatch(prog->program + 1)) {
81860786Sps		prog->startp[0] = string;
81960786Sps		prog->endp[0] = reginput;
82060786Sps		return(1);
82160786Sps	} else
82260786Sps		return(0);
82360786Sps}
82460786Sps
82560786Sps/*
82660786Sps - regmatch - main matching routine
82760786Sps *
82860786Sps * Conceptually the strategy is simple:  check to see whether the current
82960786Sps * node matches, call self recursively to see whether the rest matches,
83060786Sps * and then act accordingly.  In practice we make some effort to avoid
83160786Sps * recursion, in particular by going through "ordinary" nodes (that don't
83260786Sps * need to know whether the rest of the match failed) by a loop instead of
83360786Sps * by recursion.
83460786Sps */
83560786Spsstatic int			/* 0 failure, 1 success */
83660786Spsregmatch(prog)
83760786Spschar *prog;
83860786Sps{
83960786Sps	register char *scan;	/* Current node. */
84060786Sps	char *next;		/* Next node. */
84160786Sps
84260786Sps	scan = prog;
84360786Sps#ifdef DEBUG
84460786Sps	if (scan != NULL && regnarrate)
84560786Sps		fprintf(stderr, "%s(\n", regprop(scan));
84660786Sps#endif
84760786Sps	while (scan != NULL) {
84860786Sps#ifdef DEBUG
84960786Sps		if (regnarrate)
85060786Sps			fprintf(stderr, "%s...\n", regprop(scan));
85160786Sps#endif
85260786Sps		next = regnext(scan);
85360786Sps
85460786Sps		switch (OP(scan)) {
85560786Sps		case BOL:
85660786Sps			if (reginput != regbol)
85760786Sps				return(0);
85860786Sps			break;
85960786Sps		case EOL:
86060786Sps			if (*reginput != '\0')
86160786Sps				return(0);
86260786Sps			break;
86360786Sps		case ANY:
86460786Sps			if (*reginput == '\0')
86560786Sps				return(0);
86660786Sps			reginput++;
86760786Sps			break;
86860786Sps		case EXACTLY: {
86960786Sps				register int len;
87060786Sps				register char *opnd;
87160786Sps
87260786Sps				opnd = OPERAND(scan);
87360786Sps				/* Inline the first character, for speed. */
87460786Sps				if (*opnd != *reginput)
87560786Sps					return(0);
876294286Sdelphij				len = (int) strlen(opnd);
87760786Sps				if (len > 1 && strncmp(opnd, reginput, len) != 0)
87860786Sps					return(0);
87960786Sps				reginput += len;
88060786Sps			}
88160786Sps			break;
88260786Sps		case ANYOF:
88360786Sps 			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
88460786Sps				return(0);
88560786Sps			reginput++;
88660786Sps			break;
88760786Sps		case ANYBUT:
88860786Sps 			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
88960786Sps				return(0);
89060786Sps			reginput++;
89160786Sps			break;
89260786Sps		case NOTHING:
89360786Sps			break;
89460786Sps		case BACK:
89560786Sps			break;
89660786Sps		case OPEN+1:
89760786Sps		case OPEN+2:
89860786Sps		case OPEN+3:
89960786Sps		case OPEN+4:
90060786Sps		case OPEN+5:
90160786Sps		case OPEN+6:
90260786Sps		case OPEN+7:
90360786Sps		case OPEN+8:
90460786Sps		case OPEN+9: {
90560786Sps				register int no;
90660786Sps				register char *save;
90760786Sps
90860786Sps				no = OP(scan) - OPEN;
90960786Sps				save = reginput;
91060786Sps
91160786Sps				if (regmatch(next)) {
91260786Sps					/*
91360786Sps					 * Don't set startp if some later
91460786Sps					 * invocation of the same parentheses
91560786Sps					 * already has.
91660786Sps					 */
91760786Sps					if (regstartp[no] == NULL)
91860786Sps						regstartp[no] = save;
91960786Sps					return(1);
92060786Sps				} else
92160786Sps					return(0);
92260786Sps			}
92360786Sps			/* NOTREACHED */
92460786Sps			break;
92560786Sps		case CLOSE+1:
92660786Sps		case CLOSE+2:
92760786Sps		case CLOSE+3:
92860786Sps		case CLOSE+4:
92960786Sps		case CLOSE+5:
93060786Sps		case CLOSE+6:
93160786Sps		case CLOSE+7:
93260786Sps		case CLOSE+8:
93360786Sps		case CLOSE+9: {
93460786Sps				register int no;
93560786Sps				register char *save;
93660786Sps
93760786Sps				no = OP(scan) - CLOSE;
93860786Sps				save = reginput;
93960786Sps
94060786Sps				if (regmatch(next)) {
94160786Sps					/*
94260786Sps					 * Don't set endp if some later
94360786Sps					 * invocation of the same parentheses
94460786Sps					 * already has.
94560786Sps					 */
94660786Sps					if (regendp[no] == NULL)
94760786Sps						regendp[no] = save;
94860786Sps					return(1);
94960786Sps				} else
95060786Sps					return(0);
95160786Sps			}
95260786Sps			/* NOTREACHED */
95360786Sps			break;
95460786Sps		case BRANCH: {
95560786Sps				register char *save;
95660786Sps
95760786Sps				if (OP(next) != BRANCH)		/* No choice. */
95860786Sps					next = OPERAND(scan);	/* Avoid recursion. */
95960786Sps				else {
96060786Sps					do {
96160786Sps						save = reginput;
96260786Sps						if (regmatch(OPERAND(scan)))
96360786Sps							return(1);
96460786Sps						reginput = save;
96560786Sps						scan = regnext(scan);
96660786Sps					} while (scan != NULL && OP(scan) == BRANCH);
96760786Sps					return(0);
96860786Sps					/* NOTREACHED */
96960786Sps				}
97060786Sps			}
97160786Sps			/* NOTREACHED */
97260786Sps			break;
97360786Sps		case STAR:
97460786Sps		case PLUS: {
97560786Sps				register char nextch;
97660786Sps				register int no;
97760786Sps				register char *save;
97860786Sps				register int min;
97960786Sps
98060786Sps				/*
98160786Sps				 * Lookahead to avoid useless match attempts
98260786Sps				 * when we know what character comes next.
98360786Sps				 */
98460786Sps				nextch = '\0';
98560786Sps				if (OP(next) == EXACTLY)
98660786Sps					nextch = *OPERAND(next);
98760786Sps				min = (OP(scan) == STAR) ? 0 : 1;
98860786Sps				save = reginput;
98960786Sps				no = regrepeat(OPERAND(scan));
99060786Sps				while (no >= min) {
99160786Sps					/* If it could work, try it. */
99260786Sps					if (nextch == '\0' || *reginput == nextch)
99360786Sps						if (regmatch(next))
99460786Sps							return(1);
99560786Sps					/* Couldn't or didn't -- back up. */
99660786Sps					no--;
99760786Sps					reginput = save + no;
99860786Sps				}
99960786Sps				return(0);
100060786Sps			}
100160786Sps			/* NOTREACHED */
100260786Sps			break;
100360786Sps		case END:
100460786Sps			return(1);	/* Success! */
100560786Sps			/* NOTREACHED */
100660786Sps			break;
100760786Sps		default:
100860786Sps			regerror("memory corruption");
100960786Sps			return(0);
101060786Sps			/* NOTREACHED */
101160786Sps			break;
101260786Sps		}
101360786Sps
101460786Sps		scan = next;
101560786Sps	}
101660786Sps
101760786Sps	/*
101860786Sps	 * We get here only if there's trouble -- normally "case END" is
101960786Sps	 * the terminating point.
102060786Sps	 */
102160786Sps	regerror("corrupted pointers");
102260786Sps	return(0);
102360786Sps}
102460786Sps
102560786Sps/*
102660786Sps - regrepeat - repeatedly match something simple, report how many
102760786Sps */
102860786Spsstatic int
102960786Spsregrepeat(p)
103060786Spschar *p;
103160786Sps{
103260786Sps	register int count = 0;
103360786Sps	register char *scan;
103460786Sps	register char *opnd;
103560786Sps
103660786Sps	scan = reginput;
103760786Sps	opnd = OPERAND(p);
103860786Sps	switch (OP(p)) {
103960786Sps	case ANY:
1040294286Sdelphij		count = (int) strlen(scan);
104160786Sps		scan += count;
104260786Sps		break;
104360786Sps	case EXACTLY:
104460786Sps		while (*opnd == *scan) {
104560786Sps			count++;
104660786Sps			scan++;
104760786Sps		}
104860786Sps		break;
104960786Sps	case ANYOF:
105060786Sps		while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
105160786Sps			count++;
105260786Sps			scan++;
105360786Sps		}
105460786Sps		break;
105560786Sps	case ANYBUT:
105660786Sps		while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
105760786Sps			count++;
105860786Sps			scan++;
105960786Sps		}
106060786Sps		break;
106160786Sps	default:		/* Oh dear.  Called inappropriately. */
106260786Sps		regerror("internal foulup");
106360786Sps		count = 0;	/* Best compromise. */
106460786Sps		break;
106560786Sps	}
106660786Sps	reginput = scan;
106760786Sps
106860786Sps	return(count);
106960786Sps}
107060786Sps
107160786Sps/*
107260786Sps - regnext - dig the "next" pointer out of a node
107360786Sps */
107460786Spsstatic char *
107560786Spsregnext(p)
107660786Spsregister char *p;
107760786Sps{
107860786Sps	register int offset;
107960786Sps
108060786Sps	if (p == &regdummy)
108160786Sps		return(NULL);
108260786Sps
108360786Sps	offset = NEXT(p);
108460786Sps	if (offset == 0)
108560786Sps		return(NULL);
108660786Sps
108760786Sps	if (OP(p) == BACK)
108860786Sps		return(p-offset);
108960786Sps	else
109060786Sps		return(p+offset);
109160786Sps}
109260786Sps
109360786Sps#ifdef DEBUG
109460786Sps
109560786SpsSTATIC char *regprop();
109660786Sps
109760786Sps/*
109860786Sps - regdump - dump a regexp onto stdout in vaguely comprehensible form
109960786Sps */
110060786Spsvoid
110160786Spsregdump(r)
110260786Spsregexp *r;
110360786Sps{
110460786Sps	register char *s;
110560786Sps	register char op = EXACTLY;	/* Arbitrary non-END op. */
110660786Sps	register char *next;
110760786Sps
110860786Sps
110960786Sps	s = r->program + 1;
111060786Sps	while (op != END) {	/* While that wasn't END last time... */
111160786Sps		op = OP(s);
111260786Sps		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
111360786Sps		next = regnext(s);
111460786Sps		if (next == NULL)		/* Next ptr. */
111560786Sps			printf("(0)");
111660786Sps		else
111760786Sps			printf("(%d)", (s-r->program)+(next-s));
111860786Sps		s += 3;
111960786Sps		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
112060786Sps			/* Literal string, where present. */
112160786Sps			while (*s != '\0') {
112260786Sps				putchar(*s);
112360786Sps				s++;
112460786Sps			}
112560786Sps			s++;
112660786Sps		}
112760786Sps		putchar('\n');
112860786Sps	}
112960786Sps
113060786Sps	/* Header fields of interest. */
113160786Sps	if (r->regstart != '\0')
113260786Sps		printf("start `%c' ", r->regstart);
113360786Sps	if (r->reganch)
113460786Sps		printf("anchored ");
113560786Sps	if (r->regmust != NULL)
113660786Sps		printf("must have \"%s\"", r->regmust);
113760786Sps	printf("\n");
113860786Sps}
113960786Sps
114060786Sps/*
114160786Sps - regprop - printable representation of opcode
114260786Sps */
114360786Spsstatic char *
114460786Spsregprop(op)
114560786Spschar *op;
114660786Sps{
114760786Sps	register char *p;
114860786Sps	static char buf[50];
114960786Sps
115060786Sps	(void) strcpy(buf, ":");
115160786Sps
115260786Sps	switch (OP(op)) {
115360786Sps	case BOL:
115460786Sps		p = "BOL";
115560786Sps		break;
115660786Sps	case EOL:
115760786Sps		p = "EOL";
115860786Sps		break;
115960786Sps	case ANY:
116060786Sps		p = "ANY";
116160786Sps		break;
116260786Sps	case ANYOF:
116360786Sps		p = "ANYOF";
116460786Sps		break;
116560786Sps	case ANYBUT:
116660786Sps		p = "ANYBUT";
116760786Sps		break;
116860786Sps	case BRANCH:
116960786Sps		p = "BRANCH";
117060786Sps		break;
117160786Sps	case EXACTLY:
117260786Sps		p = "EXACTLY";
117360786Sps		break;
117460786Sps	case NOTHING:
117560786Sps		p = "NOTHING";
117660786Sps		break;
117760786Sps	case BACK:
117860786Sps		p = "BACK";
117960786Sps		break;
118060786Sps	case END:
118160786Sps		p = "END";
118260786Sps		break;
118360786Sps	case OPEN+1:
118460786Sps	case OPEN+2:
118560786Sps	case OPEN+3:
118660786Sps	case OPEN+4:
118760786Sps	case OPEN+5:
118860786Sps	case OPEN+6:
118960786Sps	case OPEN+7:
119060786Sps	case OPEN+8:
119160786Sps	case OPEN+9:
119260786Sps		sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
119360786Sps		p = NULL;
119460786Sps		break;
119560786Sps	case CLOSE+1:
119660786Sps	case CLOSE+2:
119760786Sps	case CLOSE+3:
119860786Sps	case CLOSE+4:
119960786Sps	case CLOSE+5:
120060786Sps	case CLOSE+6:
120160786Sps	case CLOSE+7:
120260786Sps	case CLOSE+8:
120360786Sps	case CLOSE+9:
120460786Sps		sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
120560786Sps		p = NULL;
120660786Sps		break;
120760786Sps	case STAR:
120860786Sps		p = "STAR";
120960786Sps		break;
121060786Sps	case PLUS:
121160786Sps		p = "PLUS";
121260786Sps		break;
121360786Sps	default:
121460786Sps		regerror("corrupted opcode");
121560786Sps		break;
121660786Sps	}
121760786Sps	if (p != NULL)
121860786Sps		(void) strcat(buf, p);
121960786Sps	return(buf);
122060786Sps}
122160786Sps#endif
122260786Sps
122360786Sps/*
122460786Sps * The following is provided for those people who do not have strcspn() in
122560786Sps * their C libraries.  They should get off their butts and do something
122660786Sps * about it; at least one public-domain implementation of those (highly
122760786Sps * useful) string routines has been published on Usenet.
122860786Sps */
122960786Sps#ifdef STRCSPN
123060786Sps/*
123160786Sps * strcspn - find length of initial segment of s1 consisting entirely
123260786Sps * of characters not from s2
123360786Sps */
123460786Sps
123560786Spsstatic int
123660786Spsstrcspn(s1, s2)
123760786Spschar *s1;
123860786Spschar *s2;
123960786Sps{
124060786Sps	register char *scan1;
124160786Sps	register char *scan2;
124260786Sps	register int count;
124360786Sps
124460786Sps	count = 0;
124560786Sps	for (scan1 = s1; *scan1 != '\0'; scan1++) {
124660786Sps		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
124760786Sps			if (*scan1 == *scan2++)
124860786Sps				return(count);
124960786Sps		count++;
125060786Sps	}
125160786Sps	return(count);
125260786Sps}
125360786Sps#endif
1254