185587Sobrien/****************************************************************
285587SobrienCopyright (C) Lucent Technologies 1997
385587SobrienAll Rights Reserved
485587Sobrien
585587SobrienPermission to use, copy, modify, and distribute this software and
685587Sobrienits documentation for any purpose and without fee is hereby
785587Sobriengranted, provided that the above copyright notice appear in all
885587Sobriencopies and that both that the copyright notice and this
985587Sobrienpermission notice and warranty disclaimer appear in supporting
1085587Sobriendocumentation, and that the name Lucent Technologies or any of
1185587Sobrienits entities not be used in advertising or publicity pertaining
1285587Sobriento distribution of the software without specific, written prior
1385587Sobrienpermission.
1485587Sobrien
1585587SobrienLUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1685587SobrienINCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1785587SobrienIN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1885587SobrienSPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1985587SobrienWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2085587SobrienIN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2185587SobrienARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2285587SobrienTHIS SOFTWARE.
2385587Sobrien****************************************************************/
2485587Sobrien
2585587Sobrien#include <stdio.h>
2685587Sobrien#include <stdlib.h>
2785587Sobrien#include <string.h>
2885587Sobrien#include <ctype.h>
2985587Sobrien#include "awk.h"
3085587Sobrien#include "ytab.h"
3185587Sobrien
3285587Sobrienextern YYSTYPE	yylval;
3385587Sobrienextern int	infunc;
3485587Sobrien
3585587Sobrienint	lineno	= 1;
3685587Sobrienint	bracecnt = 0;
3785587Sobrienint	brackcnt  = 0;
3885587Sobrienint	parencnt = 0;
3985587Sobrien
4085587Sobrientypedef struct Keyword {
41107806Sobrien	const char *word;
4285587Sobrien	int	sub;
4385587Sobrien	int	type;
4485587Sobrien} Keyword;
4585587Sobrien
4685587SobrienKeyword keywords[] ={	/* keep sorted: binary searched */
4785587Sobrien	{ "BEGIN",	XBEGIN,		XBEGIN },
4885587Sobrien	{ "END",	XEND,		XEND },
4985587Sobrien	{ "NF",		VARNF,		VARNF },
5085587Sobrien	{ "atan2",	FATAN,		BLTIN },
5185587Sobrien	{ "break",	BREAK,		BREAK },
5285587Sobrien	{ "close",	CLOSE,		CLOSE },
5385587Sobrien	{ "continue",	CONTINUE,	CONTINUE },
5485587Sobrien	{ "cos",	FCOS,		BLTIN },
5585587Sobrien	{ "delete",	DELETE,		DELETE },
5685587Sobrien	{ "do",		DO,		DO },
5785587Sobrien	{ "else",	ELSE,		ELSE },
5885587Sobrien	{ "exit",	EXIT,		EXIT },
5985587Sobrien	{ "exp",	FEXP,		BLTIN },
6085587Sobrien	{ "fflush",	FFLUSH,		BLTIN },
6185587Sobrien	{ "for",	FOR,		FOR },
6285587Sobrien	{ "func",	FUNC,		FUNC },
6385587Sobrien	{ "function",	FUNC,		FUNC },
6485587Sobrien	{ "getline",	GETLINE,	GETLINE },
6585587Sobrien	{ "gsub",	GSUB,		GSUB },
6685587Sobrien	{ "if",		IF,		IF },
6785587Sobrien	{ "in",		IN,		IN },
6885587Sobrien	{ "index",	INDEX,		INDEX },
6985587Sobrien	{ "int",	FINT,		BLTIN },
7085587Sobrien	{ "length",	FLENGTH,	BLTIN },
7185587Sobrien	{ "log",	FLOG,		BLTIN },
7285587Sobrien	{ "match",	MATCHFCN,	MATCHFCN },
7385587Sobrien	{ "next",	NEXT,		NEXT },
7485587Sobrien	{ "nextfile",	NEXTFILE,	NEXTFILE },
7585587Sobrien	{ "print",	PRINT,		PRINT },
7685587Sobrien	{ "printf",	PRINTF,		PRINTF },
7785587Sobrien	{ "rand",	FRAND,		BLTIN },
7885587Sobrien	{ "return",	RETURN,		RETURN },
7985587Sobrien	{ "sin",	FSIN,		BLTIN },
8085587Sobrien	{ "split",	SPLIT,		SPLIT },
8185587Sobrien	{ "sprintf",	SPRINTF,	SPRINTF },
8285587Sobrien	{ "sqrt",	FSQRT,		BLTIN },
8385587Sobrien	{ "srand",	FSRAND,		BLTIN },
8485587Sobrien	{ "sub",	SUB,		SUB },
8585587Sobrien	{ "substr",	SUBSTR,		SUBSTR },
8685587Sobrien	{ "system",	FSYSTEM,	BLTIN },
8785587Sobrien	{ "tolower",	FTOLOWER,	BLTIN },
8885587Sobrien	{ "toupper",	FTOUPPER,	BLTIN },
8985587Sobrien	{ "while",	WHILE,		WHILE },
9085587Sobrien};
9185587Sobrien
9285587Sobrien#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
9385587Sobrien
9485587Sobrienint peek(void)
9585587Sobrien{
9685587Sobrien	int c = input();
9785587Sobrien	unput(c);
9885587Sobrien	return c;
9985587Sobrien}
10085587Sobrien
10185587Sobrienint gettok(char **pbuf, int *psz)	/* get next input token */
10285587Sobrien{
10390902Sdes	int c, retc;
10485587Sobrien	char *buf = *pbuf;
10585587Sobrien	int sz = *psz;
10685587Sobrien	char *bp = buf;
10785587Sobrien
10885587Sobrien	c = input();
10985587Sobrien	if (c == 0)
11085587Sobrien		return 0;
11185587Sobrien	buf[0] = c;
11285587Sobrien	buf[1] = 0;
11385587Sobrien	if (!isalnum(c) && c != '.' && c != '_')
11485587Sobrien		return c;
11585587Sobrien
11685587Sobrien	*bp++ = c;
11785587Sobrien	if (isalpha(c) || c == '_') {	/* it's a varname */
11885587Sobrien		for ( ; (c = input()) != 0; ) {
11985587Sobrien			if (bp-buf >= sz)
120170331Srafan				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
12185587Sobrien					FATAL( "out of space for name %.10s...", buf );
12285587Sobrien			if (isalnum(c) || c == '_')
12385587Sobrien				*bp++ = c;
12485587Sobrien			else {
12585587Sobrien				*bp = 0;
12685587Sobrien				unput(c);
12785587Sobrien				break;
12885587Sobrien			}
12985587Sobrien		}
13085587Sobrien		*bp = 0;
13190902Sdes		retc = 'a';	/* alphanumeric */
132146299Sru	} else {	/* maybe it's a number, but could be . */
13385587Sobrien		char *rem;
13485587Sobrien		/* read input until can't be a number */
13585587Sobrien		for ( ; (c = input()) != 0; ) {
13685587Sobrien			if (bp-buf >= sz)
137170331Srafan				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
13885587Sobrien					FATAL( "out of space for number %.10s...", buf );
13985587Sobrien			if (isdigit(c) || c == 'e' || c == 'E'
14085587Sobrien			  || c == '.' || c == '+' || c == '-')
14185587Sobrien				*bp++ = c;
14285587Sobrien			else {
14385587Sobrien				unput(c);
14485587Sobrien				break;
14585587Sobrien			}
14685587Sobrien		}
14785587Sobrien		*bp = 0;
14885587Sobrien		strtod(buf, &rem);	/* parse the number */
14990902Sdes		if (rem == buf) {	/* it wasn't a valid number at all */
150146299Sru			buf[1] = 0;	/* return one character as token */
15190902Sdes			retc = buf[0];	/* character is its own type */
152146299Sru			unputstr(rem+1); /* put rest back for later */
15390902Sdes		} else {	/* some prefix was a number */
154146299Sru			unputstr(rem);	/* put rest back for later */
155146299Sru			rem[0] = 0;	/* truncate buf after number part */
156146299Sru			retc = '0';	/* type is number */
15790902Sdes		}
15885587Sobrien	}
15985587Sobrien	*pbuf = buf;
16085587Sobrien	*psz = sz;
16190902Sdes	return retc;
16285587Sobrien}
16385587Sobrien
16485587Sobrienint	word(char *);
16585587Sobrienint	string(void);
16685587Sobrienint	regexpr(void);
16785587Sobrienint	sc	= 0;	/* 1 => return a } right now */
16885587Sobrienint	reg	= 0;	/* 1 => return a REGEXPR now */
16985587Sobrien
17085587Sobrienint yylex(void)
17185587Sobrien{
17285587Sobrien	int c;
17385587Sobrien	static char *buf = 0;
174170331Srafan	static int bufsize = 5; /* BUG: setting this small causes core dump! */
17585587Sobrien
17685587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
17785587Sobrien		FATAL( "out of space in yylex" );
17885587Sobrien	if (sc) {
17985587Sobrien		sc = 0;
18085587Sobrien		RET('}');
18185587Sobrien	}
18285587Sobrien	if (reg) {
18385587Sobrien		reg = 0;
18485587Sobrien		return regexpr();
18585587Sobrien	}
18685587Sobrien	for (;;) {
18785587Sobrien		c = gettok(&buf, &bufsize);
18885587Sobrien		if (c == 0)
18985587Sobrien			return 0;
19085587Sobrien		if (isalpha(c) || c == '_')
19185587Sobrien			return word(buf);
19290902Sdes		if (isdigit(c)) {
19385587Sobrien			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
19485587Sobrien			/* should this also have STR set? */
19585587Sobrien			RET(NUMBER);
19685587Sobrien		}
19785587Sobrien
19885587Sobrien		yylval.i = c;
19985587Sobrien		switch (c) {
20085587Sobrien		case '\n':	/* {EOL} */
20185587Sobrien			RET(NL);
20285587Sobrien		case '\r':	/* assume \n is coming */
20385587Sobrien		case ' ':	/* {WS}+ */
20485587Sobrien		case '\t':
20585587Sobrien			break;
20685587Sobrien		case '#':	/* #.* strip comments */
20785587Sobrien			while ((c = input()) != '\n' && c != 0)
20885587Sobrien				;
20985587Sobrien			unput(c);
21085587Sobrien			break;
21185587Sobrien		case ';':
21285587Sobrien			RET(';');
21385587Sobrien		case '\\':
21485587Sobrien			if (peek() == '\n') {
21585587Sobrien				input();
21685587Sobrien			} else if (peek() == '\r') {
21785587Sobrien				input(); input();	/* \n */
21885587Sobrien				lineno++;
21985587Sobrien			} else {
22085587Sobrien				RET(c);
22185587Sobrien			}
22285587Sobrien			break;
22385587Sobrien		case '&':
22485587Sobrien			if (peek() == '&') {
22585587Sobrien				input(); RET(AND);
22685587Sobrien			} else
22785587Sobrien				RET('&');
22885587Sobrien		case '|':
22985587Sobrien			if (peek() == '|') {
23085587Sobrien				input(); RET(BOR);
23185587Sobrien			} else
23285587Sobrien				RET('|');
23385587Sobrien		case '!':
23485587Sobrien			if (peek() == '=') {
23585587Sobrien				input(); yylval.i = NE; RET(NE);
23685587Sobrien			} else if (peek() == '~') {
23785587Sobrien				input(); yylval.i = NOTMATCH; RET(MATCHOP);
23885587Sobrien			} else
23985587Sobrien				RET(NOT);
24085587Sobrien		case '~':
24185587Sobrien			yylval.i = MATCH;
24285587Sobrien			RET(MATCHOP);
24385587Sobrien		case '<':
24485587Sobrien			if (peek() == '=') {
24585587Sobrien				input(); yylval.i = LE; RET(LE);
24685587Sobrien			} else {
24785587Sobrien				yylval.i = LT; RET(LT);
24885587Sobrien			}
24985587Sobrien		case '=':
25085587Sobrien			if (peek() == '=') {
25185587Sobrien				input(); yylval.i = EQ; RET(EQ);
25285587Sobrien			} else {
25385587Sobrien				yylval.i = ASSIGN; RET(ASGNOP);
25485587Sobrien			}
25585587Sobrien		case '>':
25685587Sobrien			if (peek() == '=') {
25785587Sobrien				input(); yylval.i = GE; RET(GE);
25885587Sobrien			} else if (peek() == '>') {
25985587Sobrien				input(); yylval.i = APPEND; RET(APPEND);
26085587Sobrien			} else {
26185587Sobrien				yylval.i = GT; RET(GT);
26285587Sobrien			}
26385587Sobrien		case '+':
26485587Sobrien			if (peek() == '+') {
26585587Sobrien				input(); yylval.i = INCR; RET(INCR);
26685587Sobrien			} else if (peek() == '=') {
26785587Sobrien				input(); yylval.i = ADDEQ; RET(ASGNOP);
26885587Sobrien			} else
26985587Sobrien				RET('+');
27085587Sobrien		case '-':
27185587Sobrien			if (peek() == '-') {
27285587Sobrien				input(); yylval.i = DECR; RET(DECR);
27385587Sobrien			} else if (peek() == '=') {
27485587Sobrien				input(); yylval.i = SUBEQ; RET(ASGNOP);
27585587Sobrien			} else
27685587Sobrien				RET('-');
27785587Sobrien		case '*':
27885587Sobrien			if (peek() == '=') {	/* *= */
27985587Sobrien				input(); yylval.i = MULTEQ; RET(ASGNOP);
28085587Sobrien			} else if (peek() == '*') {	/* ** or **= */
28185587Sobrien				input();	/* eat 2nd * */
28285587Sobrien				if (peek() == '=') {
28385587Sobrien					input(); yylval.i = POWEQ; RET(ASGNOP);
28485587Sobrien				} else {
28585587Sobrien					RET(POWER);
28685587Sobrien				}
28785587Sobrien			} else
28885587Sobrien				RET('*');
28985587Sobrien		case '/':
29085587Sobrien			RET('/');
29185587Sobrien		case '%':
29285587Sobrien			if (peek() == '=') {
29385587Sobrien				input(); yylval.i = MODEQ; RET(ASGNOP);
29485587Sobrien			} else
29585587Sobrien				RET('%');
29685587Sobrien		case '^':
29785587Sobrien			if (peek() == '=') {
29885587Sobrien				input(); yylval.i = POWEQ; RET(ASGNOP);
29985587Sobrien			} else
30085587Sobrien				RET(POWER);
30185587Sobrien
30285587Sobrien		case '$':
30385587Sobrien			/* BUG: awkward, if not wrong */
30485587Sobrien			c = gettok(&buf, &bufsize);
30585587Sobrien			if (isalpha(c)) {
30685587Sobrien				if (strcmp(buf, "NF") == 0) {	/* very special */
30785587Sobrien					unputstr("(NF)");
30885587Sobrien					RET(INDIRECT);
30985587Sobrien				}
31085587Sobrien				c = peek();
31185587Sobrien				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
31285587Sobrien					unputstr(buf);
31385587Sobrien					RET(INDIRECT);
31485587Sobrien				}
31585587Sobrien				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
31685587Sobrien				RET(IVAR);
31790902Sdes			} else if (c == 0) {	/*  */
31890902Sdes				SYNTAX( "unexpected end of input after $" );
31990902Sdes				RET(';');
32085587Sobrien			} else {
32185587Sobrien				unputstr(buf);
32285587Sobrien				RET(INDIRECT);
32385587Sobrien			}
32485587Sobrien
32585587Sobrien		case '}':
32685587Sobrien			if (--bracecnt < 0)
32785587Sobrien				SYNTAX( "extra }" );
32885587Sobrien			sc = 1;
32985587Sobrien			RET(';');
33085587Sobrien		case ']':
33185587Sobrien			if (--brackcnt < 0)
33285587Sobrien				SYNTAX( "extra ]" );
33385587Sobrien			RET(']');
33485587Sobrien		case ')':
33585587Sobrien			if (--parencnt < 0)
33685587Sobrien				SYNTAX( "extra )" );
33785587Sobrien			RET(')');
33885587Sobrien		case '{':
33985587Sobrien			bracecnt++;
34085587Sobrien			RET('{');
34185587Sobrien		case '[':
34285587Sobrien			brackcnt++;
34385587Sobrien			RET('[');
34485587Sobrien		case '(':
34585587Sobrien			parencnt++;
34685587Sobrien			RET('(');
34785587Sobrien
34885587Sobrien		case '"':
34985587Sobrien			return string();	/* BUG: should be like tran.c ? */
35085587Sobrien
35185587Sobrien		default:
35285587Sobrien			RET(c);
35385587Sobrien		}
35485587Sobrien	}
35585587Sobrien}
35685587Sobrien
35785587Sobrienint string(void)
35885587Sobrien{
35985587Sobrien	int c, n;
36085587Sobrien	char *s, *bp;
36185587Sobrien	static char *buf = 0;
36285587Sobrien	static int bufsz = 500;
36385587Sobrien
36485587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
36585587Sobrien		FATAL("out of space for strings");
36685587Sobrien	for (bp = buf; (c = input()) != '"'; ) {
367170331Srafan		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
36885587Sobrien			FATAL("out of space for string %.10s...", buf);
36985587Sobrien		switch (c) {
37085587Sobrien		case '\n':
37185587Sobrien		case '\r':
37285587Sobrien		case 0:
37385587Sobrien			SYNTAX( "non-terminated string %.10s...", buf );
37485587Sobrien			lineno++;
37590902Sdes			if (c == 0)	/* hopeless */
37690902Sdes				FATAL( "giving up" );
37785587Sobrien			break;
37885587Sobrien		case '\\':
37985587Sobrien			c = input();
38085587Sobrien			switch (c) {
38185587Sobrien			case '"': *bp++ = '"'; break;
38285587Sobrien			case 'n': *bp++ = '\n'; break;
38385587Sobrien			case 't': *bp++ = '\t'; break;
38485587Sobrien			case 'f': *bp++ = '\f'; break;
38585587Sobrien			case 'r': *bp++ = '\r'; break;
38685587Sobrien			case 'b': *bp++ = '\b'; break;
38785587Sobrien			case 'v': *bp++ = '\v'; break;
38885587Sobrien			case 'a': *bp++ = '\007'; break;
38985587Sobrien			case '\\': *bp++ = '\\'; break;
39085587Sobrien
39185587Sobrien			case '0': case '1': case '2': /* octal: \d \dd \ddd */
39285587Sobrien			case '3': case '4': case '5': case '6': case '7':
39385587Sobrien				n = c - '0';
39485587Sobrien				if ((c = peek()) >= '0' && c < '8') {
39585587Sobrien					n = 8 * n + input() - '0';
39685587Sobrien					if ((c = peek()) >= '0' && c < '8')
39785587Sobrien						n = 8 * n + input() - '0';
39885587Sobrien				}
39985587Sobrien				*bp++ = n;
40085587Sobrien				break;
40185587Sobrien
40285587Sobrien			case 'x':	/* hex  \x0-9a-fA-F + */
40385587Sobrien			    {	char xbuf[100], *px;
40485587Sobrien				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
40585587Sobrien					if (isdigit(c)
40685587Sobrien					 || (c >= 'a' && c <= 'f')
40785587Sobrien					 || (c >= 'A' && c <= 'F'))
40885587Sobrien						*px++ = c;
40985587Sobrien					else
41085587Sobrien						break;
41185587Sobrien				}
41285587Sobrien				*px = 0;
41385587Sobrien				unput(c);
414224731Sru	  			sscanf(xbuf, "%x", (unsigned int *) &n);
41585587Sobrien				*bp++ = n;
41685587Sobrien				break;
41785587Sobrien			    }
41885587Sobrien
41985587Sobrien			default:
42085587Sobrien				*bp++ = c;
42185587Sobrien				break;
42285587Sobrien			}
42385587Sobrien			break;
42485587Sobrien		default:
42585587Sobrien			*bp++ = c;
42685587Sobrien			break;
42785587Sobrien		}
42885587Sobrien	}
42985587Sobrien	*bp = 0;
43085587Sobrien	s = tostring(buf);
43185587Sobrien	*bp++ = ' '; *bp++ = 0;
43285587Sobrien	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
43385587Sobrien	RET(STRING);
43485587Sobrien}
43585587Sobrien
43685587Sobrien
43785587Sobrienint binsearch(char *w, Keyword *kp, int n)
43885587Sobrien{
43985587Sobrien	int cond, low, mid, high;
44085587Sobrien
44185587Sobrien	low = 0;
44285587Sobrien	high = n - 1;
44385587Sobrien	while (low <= high) {
44485587Sobrien		mid = (low + high) / 2;
44585587Sobrien		if ((cond = strcmp(w, kp[mid].word)) < 0)
44685587Sobrien			high = mid - 1;
44785587Sobrien		else if (cond > 0)
44885587Sobrien			low = mid + 1;
44985587Sobrien		else
45085587Sobrien			return mid;
45185587Sobrien	}
45285587Sobrien	return -1;
45385587Sobrien}
45485587Sobrien
45585587Sobrienint word(char *w)
45685587Sobrien{
45785587Sobrien	Keyword *kp;
45885587Sobrien	int c, n;
45985587Sobrien
46085587Sobrien	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
461170331Srafan/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
46285587Sobrien	kp = keywords + n;
46385587Sobrien	if (n != -1) {	/* found in table */
46485587Sobrien		yylval.i = kp->sub;
46585587Sobrien		switch (kp->type) {	/* special handling */
466170331Srafan		case BLTIN:
467170331Srafan			if (kp->sub == FSYSTEM && safe)
46885587Sobrien				SYNTAX( "system is unsafe" );
46985587Sobrien			RET(kp->type);
47085587Sobrien		case FUNC:
47185587Sobrien			if (infunc)
47285587Sobrien				SYNTAX( "illegal nested function" );
47385587Sobrien			RET(kp->type);
47485587Sobrien		case RETURN:
47585587Sobrien			if (!infunc)
47685587Sobrien				SYNTAX( "return not in function" );
47785587Sobrien			RET(kp->type);
47885587Sobrien		case VARNF:
47985587Sobrien			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
48085587Sobrien			RET(VARNF);
48185587Sobrien		default:
48285587Sobrien			RET(kp->type);
48385587Sobrien		}
48485587Sobrien	}
48585587Sobrien	c = peek();	/* look for '(' */
48685587Sobrien	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
48785587Sobrien		yylval.i = n;
48885587Sobrien		RET(ARG);
48985587Sobrien	} else {
49085587Sobrien		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
49185587Sobrien		if (c == '(') {
49285587Sobrien			RET(CALL);
49385587Sobrien		} else {
49485587Sobrien			RET(VAR);
49585587Sobrien		}
49685587Sobrien	}
49785587Sobrien}
49885587Sobrien
499107806Sobrienvoid startreg(void)	/* next call to yylex will return a regular expression */
50085587Sobrien{
50185587Sobrien	reg = 1;
50285587Sobrien}
50385587Sobrien
50485587Sobrienint regexpr(void)
50585587Sobrien{
50685587Sobrien	int c;
50785587Sobrien	static char *buf = 0;
50885587Sobrien	static int bufsz = 500;
50985587Sobrien	char *bp;
51085587Sobrien
51185587Sobrien	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
51285587Sobrien		FATAL("out of space for rex expr");
51385587Sobrien	bp = buf;
51485587Sobrien	for ( ; (c = input()) != '/' && c != 0; ) {
515170331Srafan		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
51685587Sobrien			FATAL("out of space for reg expr %.10s...", buf);
51785587Sobrien		if (c == '\n') {
51885587Sobrien			SYNTAX( "newline in regular expression %.10s...", buf );
51985587Sobrien			unput('\n');
52085587Sobrien			break;
52185587Sobrien		} else if (c == '\\') {
52285587Sobrien			*bp++ = '\\';
52385587Sobrien			*bp++ = input();
52485587Sobrien		} else {
52585587Sobrien			*bp++ = c;
52685587Sobrien		}
52785587Sobrien	}
52885587Sobrien	*bp = 0;
529118194Sru	if (c == 0)
530118194Sru		SYNTAX("non-terminated regular expression %.10s...", buf);
53185587Sobrien	yylval.s = tostring(buf);
53285587Sobrien	unput('/');
53385587Sobrien	RET(REGEXPR);
53485587Sobrien}
53585587Sobrien
53685587Sobrien/* low-level lexical stuff, sort of inherited from lex */
53785587Sobrien
53885587Sobrienchar	ebuf[300];
53985587Sobrienchar	*ep = ebuf;
54085587Sobrienchar	yysbuf[100];	/* pushback buffer */
54185587Sobrienchar	*yysptr = yysbuf;
54285587SobrienFILE	*yyin = 0;
54385587Sobrien
54485587Sobrienint input(void)	/* get next lexical input character */
54585587Sobrien{
54685587Sobrien	int c;
54785587Sobrien	extern char *lexprog;
54885587Sobrien
54985587Sobrien	if (yysptr > yysbuf)
550125505Sru		c = (uschar)*--yysptr;
55185587Sobrien	else if (lexprog != NULL) {	/* awk '...' */
552125505Sru		if ((c = (uschar)*lexprog) != 0)
55385587Sobrien			lexprog++;
55485587Sobrien	} else				/* awk -f ... */
55585587Sobrien		c = pgetc();
55685587Sobrien	if (c == '\n')
55785587Sobrien		lineno++;
55885587Sobrien	else if (c == EOF)
55985587Sobrien		c = 0;
56085587Sobrien	if (ep >= ebuf + sizeof ebuf)
56185587Sobrien		ep = ebuf;
56285587Sobrien	return *ep++ = c;
56385587Sobrien}
56485587Sobrien
56585587Sobrienvoid unput(int c)	/* put lexical character back on input */
56685587Sobrien{
56785587Sobrien	if (c == '\n')
56885587Sobrien		lineno--;
56985587Sobrien	if (yysptr >= yysbuf + sizeof(yysbuf))
57085587Sobrien		FATAL("pushed back too much: %.20s...", yysbuf);
57185587Sobrien	*yysptr++ = c;
57285587Sobrien	if (--ep < ebuf)
57385587Sobrien		ep = ebuf + sizeof(ebuf) - 1;
57485587Sobrien}
57585587Sobrien
576107806Sobrienvoid unputstr(const char *s)	/* put a string back on input */
57785587Sobrien{
57885587Sobrien	int i;
57985587Sobrien
58085587Sobrien	for (i = strlen(s)-1; i >= 0; i--)
58185587Sobrien		unput(s[i]);
58285587Sobrien}
583