1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#define	DEBUG
26#include <stdio.h>
27#include <math.h>
28#include <ctype.h>
29#include <string.h>
30#include <stdlib.h>
31#include "awk.h"
32
33#define	FULLTAB	2	/* rehash when table gets this x full */
34#define	GROWTAB 4	/* grow table by this factor */
35
36Array	*symtab;	/* main symbol table */
37
38char	**FS;		/* initial field sep */
39char	**RS;		/* initial record sep */
40char	**OFS;		/* output field sep */
41char	**ORS;		/* output record sep */
42char	**OFMT;		/* output format for numbers */
43char	**CONVFMT;	/* format for conversions in getsval */
44Awkfloat *NF;		/* number of fields in current record */
45Awkfloat *NR;		/* number of current record */
46Awkfloat *FNR;		/* number of current record in current file */
47char	**FILENAME;	/* current filename argument */
48Awkfloat *ARGC;		/* number of arguments from command line */
49char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
50Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
51Awkfloat *RLENGTH;	/* length of same */
52
53Cell	*fsloc;		/* FS */
54Cell	*nrloc;		/* NR */
55Cell	*nfloc;		/* NF */
56Cell	*fnrloc;	/* FNR */
57Cell	*ofsloc;	/* OFS */
58Cell	*orsloc;	/* ORS */
59Cell	*rsloc;		/* RS */
60Array	*ARGVtab;	/* symbol table containing ARGV[...] */
61Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
62Cell	*rstartloc;	/* RSTART */
63Cell	*rlengthloc;	/* RLENGTH */
64Cell	*subseploc;	/* SUBSEP */
65Cell	*symtabloc;	/* SYMTAB */
66
67Cell	*nullloc;	/* a guaranteed empty cell */
68Node	*nullnode;	/* zero&null, converted into a node for comparisons */
69Cell	*literal0;
70
71extern Cell **fldtab;
72
73void syminit(void)	/* initialize symbol table with builtin vars */
74{
75	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
76	/* this is used for if(x)... tests: */
77	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
78	nullnode = celltonode(nullloc, CCON);
79
80	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
81	FS = &fsloc->sval;
82	rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
83	RS = &rsloc->sval;
84	ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
85	OFS = &ofsloc->sval;
86	orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
87	ORS = &orsloc->sval;
88	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
89	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
90	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
91	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
92	NF = &nfloc->fval;
93	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
94	NR = &nrloc->fval;
95	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
96	FNR = &fnrloc->fval;
97	subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
98	SUBSEP = &subseploc->sval;
99	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
100	RSTART = &rstartloc->fval;
101	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
102	RLENGTH = &rlengthloc->fval;
103	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
104	free(symtabloc->sval);
105	symtabloc->sval = (char *) symtab;
106}
107
108void arginit(int ac, char **av)	/* set up ARGV and ARGC */
109{
110	Cell *cp;
111	int i;
112	char temp[50];
113
114	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
115	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
116	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
117	free(cp->sval);
118	cp->sval = (char *) ARGVtab;
119	for (i = 0; i < ac; i++) {
120		double result;
121
122		sprintf(temp, "%d", i);
123		if (is_number(*av, & result))
124			setsymtab(temp, *av, result, STR|NUM, ARGVtab);
125		else
126			setsymtab(temp, *av, 0.0, STR, ARGVtab);
127		av++;
128	}
129}
130
131void envinit(char **envp)	/* set up ENVIRON variable */
132{
133	Cell *cp;
134	char *p;
135
136	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
137	ENVtab = makesymtab(NSYMTAB);
138	free(cp->sval);
139	cp->sval = (char *) ENVtab;
140	for ( ; *envp; envp++) {
141		double result;
142
143		if ((p = strchr(*envp, '=')) == NULL)
144			continue;
145		if( p == *envp ) /* no left hand side name in env string */
146			continue;
147		*p++ = 0;	/* split into two strings at = */
148		if (is_number(p, & result))
149			setsymtab(*envp, p, result, STR|NUM, ENVtab);
150		else
151			setsymtab(*envp, p, 0.0, STR, ENVtab);
152		p[-1] = '=';	/* restore in case env is passed down to a shell */
153	}
154}
155
156Array *makesymtab(int n)	/* make a new symbol table */
157{
158	Array *ap;
159	Cell **tp;
160
161	ap = (Array *) malloc(sizeof(*ap));
162	tp = (Cell **) calloc(n, sizeof(*tp));
163	if (ap == NULL || tp == NULL)
164		FATAL("out of space in makesymtab");
165	ap->nelem = 0;
166	ap->size = n;
167	ap->tab = tp;
168	return(ap);
169}
170
171void freesymtab(Cell *ap)	/* free a symbol table */
172{
173	Cell *cp, *temp;
174	Array *tp;
175	int i;
176
177	if (!isarr(ap))
178		return;
179	tp = (Array *) ap->sval;
180	if (tp == NULL)
181		return;
182	for (i = 0; i < tp->size; i++) {
183		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
184			xfree(cp->nval);
185			if (freeable(cp))
186				xfree(cp->sval);
187			temp = cp->cnext;	/* avoids freeing then using */
188			free(cp);
189			tp->nelem--;
190		}
191		tp->tab[i] = NULL;
192	}
193	if (tp->nelem != 0)
194		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
195	free(tp->tab);
196	free(tp);
197}
198
199void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
200{
201	Array *tp;
202	Cell *p, *prev = NULL;
203	int h;
204
205	tp = (Array *) ap->sval;
206	h = hash(s, tp->size);
207	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
208		if (strcmp(s, p->nval) == 0) {
209			if (prev == NULL)	/* 1st one */
210				tp->tab[h] = p->cnext;
211			else			/* middle somewhere */
212				prev->cnext = p->cnext;
213			if (freeable(p))
214				xfree(p->sval);
215			free(p->nval);
216			free(p);
217			tp->nelem--;
218			return;
219		}
220}
221
222Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
223{
224	int h;
225	Cell *p;
226
227	if (n != NULL && (p = lookup(n, tp)) != NULL) {
228		DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
229			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval);
230		return(p);
231	}
232	p = (Cell *) malloc(sizeof(*p));
233	if (p == NULL)
234		FATAL("out of space for symbol table at %s", n);
235	p->nval = tostring(n);
236	p->sval = s ? tostring(s) : tostring("");
237	p->fval = f;
238	p->tval = t;
239	p->csub = CUNK;
240	p->ctype = OCELL;
241	tp->nelem++;
242	if (tp->nelem > FULLTAB * tp->size)
243		rehash(tp);
244	h = hash(n, tp->size);
245	p->cnext = tp->tab[h];
246	tp->tab[h] = p;
247	DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
248		(void*)p, p->nval, p->sval, p->fval, p->tval);
249	return(p);
250}
251
252int hash(const char *s, int n)	/* form hash value for string s */
253{
254	unsigned hashval;
255
256	for (hashval = 0; *s != '\0'; s++)
257		hashval = (*s + 31 * hashval);
258	return hashval % n;
259}
260
261void rehash(Array *tp)	/* rehash items in small table into big one */
262{
263	int i, nh, nsz;
264	Cell *cp, *op, **np;
265
266	nsz = GROWTAB * tp->size;
267	np = (Cell **) calloc(nsz, sizeof(*np));
268	if (np == NULL)		/* can't do it, but can keep running. */
269		return;		/* someone else will run out later. */
270	for (i = 0; i < tp->size; i++) {
271		for (cp = tp->tab[i]; cp; cp = op) {
272			op = cp->cnext;
273			nh = hash(cp->nval, nsz);
274			cp->cnext = np[nh];
275			np[nh] = cp;
276		}
277	}
278	free(tp->tab);
279	tp->tab = np;
280	tp->size = nsz;
281}
282
283Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
284{
285	Cell *p;
286	int h;
287
288	h = hash(s, tp->size);
289	for (p = tp->tab[h]; p != NULL; p = p->cnext)
290		if (strcmp(s, p->nval) == 0)
291			return(p);	/* found it */
292	return(NULL);			/* not found */
293}
294
295Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
296{
297	int fldno;
298
299	f += 0.0;		/* normalise negative zero to positive zero */
300	if ((vp->tval & (NUM | STR)) == 0)
301		funnyvar(vp, "assign to");
302	if (isfld(vp)) {
303		donerec = false;	/* mark $0 invalid */
304		fldno = atoi(vp->nval);
305		if (fldno > *NF)
306			newfld(fldno);
307		DPRINTF("setting field %d to %g\n", fldno, f);
308	} else if (&vp->fval == NF) {
309		donerec = false;	/* mark $0 invalid */
310		setlastfld(f);
311		DPRINTF("setfval: setting NF to %g\n", f);
312	} else if (isrec(vp)) {
313		donefld = false;	/* mark $1... invalid */
314		donerec = true;
315		savefs();
316	} else if (vp == ofsloc) {
317		if (!donerec)
318			recbld();
319	}
320	if (freeable(vp))
321		xfree(vp->sval); /* free any previous string */
322	vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
323	vp->fmt = NULL;
324	vp->tval |= NUM;	/* mark number ok */
325	if (f == -0)  /* who would have thought this possible? */
326		f = 0;
327	DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval);
328	return vp->fval = f;
329}
330
331void funnyvar(Cell *vp, const char *rw)
332{
333	if (isarr(vp))
334		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
335	if (vp->tval & FCN)
336		FATAL("can't %s %s; it's a function.", rw, vp->nval);
337	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
338		(void *)vp, vp->nval, vp->sval, vp->fval, vp->tval);
339}
340
341char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
342{
343	char *t;
344	int fldno;
345	Awkfloat f;
346
347	DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
348		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
349	if ((vp->tval & (NUM | STR)) == 0)
350		funnyvar(vp, "assign to");
351	if (CSV && (vp == rsloc))
352		WARNING("danger: don't set RS when --csv is in effect");
353	if (CSV && (vp == fsloc))
354		WARNING("danger: don't set FS when --csv is in effect");
355	if (isfld(vp)) {
356		donerec = false;	/* mark $0 invalid */
357		fldno = atoi(vp->nval);
358		if (fldno > *NF)
359			newfld(fldno);
360		DPRINTF("setting field %d to %s (%p)\n", fldno, s, (const void*)s);
361	} else if (isrec(vp)) {
362		donefld = false;	/* mark $1... invalid */
363		donerec = true;
364		savefs();
365	} else if (vp == ofsloc) {
366		if (!donerec)
367			recbld();
368	}
369	t = s ? tostring(s) : tostring("");	/* in case it's self-assign */
370	if (freeable(vp))
371		xfree(vp->sval);
372	vp->tval &= ~(NUM|DONTFREE|CONVC|CONVO);
373	vp->tval |= STR;
374	vp->fmt = NULL;
375	DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
376		(void*)vp, NN(vp->nval), t, (void*)t, vp->tval, donerec, donefld);
377	vp->sval = t;
378	if (&vp->fval == NF) {
379		donerec = false;	/* mark $0 invalid */
380		f = getfval(vp);
381		setlastfld(f);
382		DPRINTF("setsval: setting NF to %g\n", f);
383	}
384
385	return(vp->sval);
386}
387
388Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
389{
390	if ((vp->tval & (NUM | STR)) == 0)
391		funnyvar(vp, "read value of");
392	if (isfld(vp) && !donefld)
393		fldbld();
394	else if (isrec(vp) && !donerec)
395		recbld();
396	if (!isnum(vp)) {	/* not a number */
397		double fval;
398		bool no_trailing;
399
400		if (is_valid_number(vp->sval, true, & no_trailing, & fval)) {
401			vp->fval = fval;
402			if (no_trailing && !(vp->tval&CON))
403				vp->tval |= NUM;	/* make NUM only sparingly */
404		} else
405			vp->fval = 0.0;
406	}
407	DPRINTF("getfval %p: %s = %g, t=%o\n",
408		(void*)vp, NN(vp->nval), vp->fval, vp->tval);
409	return(vp->fval);
410}
411
412static const char *get_inf_nan(double d)
413{
414	if (isinf(d)) {
415		return (d < 0 ? "-inf" : "+inf");
416	} else if (isnan(d)) {
417		return (signbit(d) != 0 ? "-nan" : "+nan");
418	} else
419		return NULL;
420}
421
422static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
423{
424	char s[256];
425	double dtemp;
426	const char *p;
427
428	if ((vp->tval & (NUM | STR)) == 0)
429		funnyvar(vp, "read value of");
430	if (isfld(vp) && ! donefld)
431		fldbld();
432	else if (isrec(vp) && ! donerec)
433		recbld();
434
435	/*
436	 * ADR: This is complicated and more fragile than is desirable.
437	 * Retrieving a string value for a number associates the string
438	 * value with the scalar.  Previously, the string value was
439	 * sticky, meaning if converted via OFMT that became the value
440	 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
441	 * changed after a string value was retrieved, the original value
442	 * was maintained and used.  Also not per POSIX.
443	 *
444	 * We work around this design by adding two additional flags,
445	 * CONVC and CONVO, indicating how the string value was
446	 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
447	 * of the pointer to the xFMT format string used for the
448	 * conversion.  This pointer is only read, **never** dereferenced.
449	 * The next time we do a conversion, if it's coming from the same
450	 * xFMT as last time, and the pointer value is different, we
451	 * know that the xFMT format string changed, and we need to
452	 * redo the conversion. If it's the same, we don't have to.
453	 *
454	 * There are also several cases where we don't do a conversion,
455	 * such as for a field (see the checks below).
456	 */
457
458	/* Don't duplicate the code for actually updating the value */
459#define update_str_val(vp) \
460	{ \
461		if (freeable(vp)) \
462			xfree(vp->sval); \
463		if ((p = get_inf_nan(vp->fval)) != NULL) \
464			strcpy(s, p); \
465		else if (modf(vp->fval, &dtemp) == 0)	/* it's integral */ \
466			snprintf(s, sizeof (s), "%.30g", vp->fval); \
467		else \
468			snprintf(s, sizeof (s), *fmt, vp->fval); \
469		vp->sval = tostring(s); \
470		vp->tval &= ~DONTFREE; \
471		vp->tval |= STR; \
472	}
473
474	if (isstr(vp) == 0) {
475		update_str_val(vp);
476		if (fmt == OFMT) {
477			vp->tval &= ~CONVC;
478			vp->tval |= CONVO;
479		} else {
480			/* CONVFMT */
481			vp->tval &= ~CONVO;
482			vp->tval |= CONVC;
483		}
484		vp->fmt = *fmt;
485	} else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
486		goto done;
487	} else if (isstr(vp)) {
488		if (fmt == OFMT) {
489			if ((vp->tval & CONVC) != 0
490			    || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
491				update_str_val(vp);
492				vp->tval &= ~CONVC;
493				vp->tval |= CONVO;
494				vp->fmt = *fmt;
495			}
496		} else {
497			/* CONVFMT */
498			if ((vp->tval & CONVO) != 0
499			    || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
500				update_str_val(vp);
501				vp->tval &= ~CONVO;
502				vp->tval |= CONVC;
503				vp->fmt = *fmt;
504			}
505		}
506	}
507done:
508	DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n",
509		(void*)vp, NN(vp->nval), vp->sval, (void*)vp->sval, vp->tval);
510	return(vp->sval);
511}
512
513char *getsval(Cell *vp)       /* get string val of a Cell */
514{
515      return get_str_val(vp, CONVFMT);
516}
517
518char *getpssval(Cell *vp)     /* get string val of a Cell for print */
519{
520      return get_str_val(vp, OFMT);
521}
522
523
524char *tostring(const char *s)	/* make a copy of string s */
525{
526	char *p = strdup(s);
527	if (p == NULL)
528		FATAL("out of space in tostring on %s", s);
529	return(p);
530}
531
532char *tostringN(const char *s, size_t n)	/* make a copy of string s */
533{
534	char *p;
535
536	p = (char *) malloc(n);
537	if (p == NULL)
538		FATAL("out of space in tostring on %s", s);
539	strcpy(p, s);
540	return(p);
541}
542
543Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
544{
545	Cell *c;
546	char *p;
547	char *sa = getsval(a);
548	char *sb = getsval(b);
549	size_t l = strlen(sa) + strlen(sb) + 1;
550	p = (char *) malloc(l);
551	if (p == NULL)
552		FATAL("out of space concatenating %s and %s", sa, sb);
553	snprintf(p, l, "%s%s", sa, sb);
554
555	l++;	// add room for ' '
556	char *newbuf = (char *) malloc(l);
557	if (newbuf == NULL)
558		FATAL("out of space concatenating %s and %s", sa, sb);
559	// See string() in lex.c; a string "xx" is stored in the symbol
560	// table as "xx ".
561	snprintf(newbuf, l, "%s ", p);
562	c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab);
563	free(p);
564	free(newbuf);
565	return c;
566}
567
568char *qstring(const char *is, int delim)	/* collect string up to next delim */
569{
570	int c, n;
571	const uschar *s = (const uschar *) is;
572	uschar *buf, *bp;
573
574	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
575		FATAL( "out of space in qstring(%s)", s);
576	for (bp = buf; (c = *s) != delim; s++) {
577		if (c == '\n')
578			SYNTAX( "newline in string %.20s...", is );
579		else if (c != '\\')
580			*bp++ = c;
581		else {	/* \something */
582			c = *++s;
583			if (c == 0) {	/* \ at end */
584				*bp++ = '\\';
585				break;	/* for loop */
586			}
587			switch (c) {
588			case '\\':	*bp++ = '\\'; break;
589			case 'n':	*bp++ = '\n'; break;
590			case 't':	*bp++ = '\t'; break;
591			case 'b':	*bp++ = '\b'; break;
592			case 'f':	*bp++ = '\f'; break;
593			case 'r':	*bp++ = '\r'; break;
594			case 'v':	*bp++ = '\v'; break;
595			case 'a':	*bp++ = '\a'; break;
596			default:
597				if (!isdigit(c)) {
598					*bp++ = c;
599					break;
600				}
601				n = c - '0';
602				if (isdigit(s[1])) {
603					n = 8 * n + *++s - '0';
604					if (isdigit(s[1]))
605						n = 8 * n + *++s - '0';
606				}
607				*bp++ = n;
608				break;
609			}
610		}
611	}
612	*bp++ = 0;
613	return (char *) buf;
614}
615
616const char *flags2str(int flags)
617{
618	static const struct ftab {
619		const char *name;
620		int value;
621	} flagtab[] = {
622		{ "NUM", NUM },
623		{ "STR", STR },
624		{ "DONTFREE", DONTFREE },
625		{ "CON", CON },
626		{ "ARR", ARR },
627		{ "FCN", FCN },
628		{ "FLD", FLD },
629		{ "REC", REC },
630		{ "CONVC", CONVC },
631		{ "CONVO", CONVO },
632		{ NULL, 0 }
633	};
634	static char buf[100];
635	int i;
636	char *cp = buf;
637
638	for (i = 0; flagtab[i].name != NULL; i++) {
639		if ((flags & flagtab[i].value) != 0) {
640			if (cp > buf)
641				*cp++ = '|';
642			strcpy(cp, flagtab[i].name);
643			cp += strlen(cp);
644		}
645	}
646
647	return buf;
648}
649