1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#define DEBUG
26#include <stdio.h>
27#include <ctype.h>
28#include <errno.h>
29#include <wctype.h>
30#include <fcntl.h>
31#include <setjmp.h>
32#include <limits.h>
33#include <math.h>
34#include <string.h>
35#include <stdlib.h>
36#include <time.h>
37#include <sys/types.h>
38#include <sys/wait.h>
39#include "awk.h"
40#include "awkgram.tab.h"
41
42
43static void stdinit(void);
44static void flush_all(void);
45static char *wide_char_to_byte_str(int rune, size_t *outlen);
46
47#if 1
48#define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49#else
50void tempfree(Cell *p) {
51	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52		WARNING("bad csub %d in Cell %d %s",
53			p->csub, p->ctype, p->sval);
54	}
55	if (istemp(p))
56		tfree(p);
57}
58#endif
59
60/* do we really need these? */
61/* #ifdef _NFILE */
62/* #ifndef FOPEN_MAX */
63/* #define FOPEN_MAX _NFILE */
64/* #endif */
65/* #endif */
66/*  */
67/* #ifndef	FOPEN_MAX */
68/* #define	FOPEN_MAX	40 */	/* max number of open files */
69/* #endif */
70/*  */
71/* #ifndef RAND_MAX */
72/* #define RAND_MAX	32767 */	/* all that ansi guarantees */
73/* #endif */
74
75jmp_buf env;
76extern	int	pairstack[];
77extern	Awkfloat	srand_seed;
78
79Node	*winner = NULL;	/* root of parse tree */
80Cell	*tmps;		/* free temporary cells for execution */
81
82static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83Cell	*True	= &truecell;
84static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85Cell	*False	= &falsecell;
86static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87Cell	*jbreak	= &breakcell;
88static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89Cell	*jcont	= &contcell;
90static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91Cell	*jnext	= &nextcell;
92static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93Cell	*jnextfile	= &nextfilecell;
94static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95Cell	*jexit	= &exitcell;
96static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97Cell	*jret	= &retcell;
98static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99
100Node	*curnode = NULL;	/* the node being executed, for debugging */
101
102/* buffer memory management */
103int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104	const char *whatrtn)
105/* pbuf:    address of pointer to buffer being managed
106 * psiz:    address of buffer size variable
107 * minlen:  minimum length of buffer needed
108 * quantum: buffer size quantum
109 * pbptr:   address of movable pointer into buffer, or 0 if none
110 * whatrtn: name of the calling routine if failure should cause fatal error
111 *
112 * return   0 for realloc failure, !=0 for success
113 */
114{
115	if (minlen > *psiz) {
116		char *tbuf;
117		int rminlen = quantum ? minlen % quantum : 0;
118		int boff = pbptr ? *pbptr - *pbuf : 0;
119		/* round up to next multiple of quantum */
120		if (rminlen)
121			minlen += quantum - rminlen;
122		tbuf = (char *) realloc(*pbuf, minlen);
123		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124		if (tbuf == NULL) {
125			if (whatrtn)
126				FATAL("out of memory in %s", whatrtn);
127			return 0;
128		}
129		*pbuf = tbuf;
130		*psiz = minlen;
131		if (pbptr)
132			*pbptr = tbuf + boff;
133	}
134	return 1;
135}
136
137void run(Node *a)	/* execution of parse tree starts here */
138{
139
140	stdinit();
141	execute(a);
142	closeall();
143}
144
145Cell *execute(Node *u)	/* execute a node of the parse tree */
146{
147	Cell *(*proc)(Node **, int);
148	Cell *x;
149	Node *a;
150
151	if (u == NULL)
152		return(True);
153	for (a = u; ; a = a->nnext) {
154		curnode = a;
155		if (isvalue(a)) {
156			x = (Cell *) (a->narg[0]);
157			if (isfld(x) && !donefld)
158				fldbld();
159			else if (isrec(x) && !donerec)
160				recbld();
161			return(x);
162		}
163		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
164			FATAL("illegal statement");
165		proc = proctab[a->nobj-FIRSTTOKEN];
166		x = (*proc)(a->narg, a->nobj);
167		if (isfld(x) && !donefld)
168			fldbld();
169		else if (isrec(x) && !donerec)
170			recbld();
171		if (isexpr(a))
172			return(x);
173		if (isjump(x))
174			return(x);
175		if (a->nnext == NULL)
176			return(x);
177		tempfree(x);
178	}
179}
180
181
182Cell *program(Node **a, int n)	/* execute an awk program */
183{				/* a[0] = BEGIN, a[1] = body, a[2] = END */
184	Cell *x;
185
186	if (setjmp(env) != 0)
187		goto ex;
188	if (a[0]) {		/* BEGIN */
189		x = execute(a[0]);
190		if (isexit(x))
191			return(True);
192		if (isjump(x))
193			FATAL("illegal break, continue, next or nextfile from BEGIN");
194		tempfree(x);
195	}
196	if (a[1] || a[2])
197		while (getrec(&record, &recsize, true) > 0) {
198			x = execute(a[1]);
199			if (isexit(x))
200				break;
201			tempfree(x);
202		}
203  ex:
204	if (setjmp(env) != 0)	/* handles exit within END */
205		goto ex1;
206	if (a[2]) {		/* END */
207		x = execute(a[2]);
208		if (isbreak(x) || isnext(x) || iscont(x))
209			FATAL("illegal break, continue, next or nextfile from END");
210		tempfree(x);
211	}
212  ex1:
213	return(True);
214}
215
216struct Frame {	/* stack frame for awk function calls */
217	int nargs;	/* number of arguments in this call */
218	Cell *fcncell;	/* pointer to Cell for function */
219	Cell **args;	/* pointer to array of arguments after execute */
220	Cell *retval;	/* return value */
221};
222
223#define	NARGS	50	/* max args in a call */
224
225struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
226int	nframe = 0;		/* number of frames allocated */
227struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
228
229Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
230{
231	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232	int i, ncall, ndef;
233	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234	Node *x;
235	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
236	Cell *y, *z, *fcn;
237	char *s;
238
239	fcn = execute(a[0]);	/* the function itself */
240	s = fcn->nval;
241	if (!isfcn(fcn))
242		FATAL("calling undefined function %s", s);
243	if (frame == NULL) {
244		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
245		if (frame == NULL)
246			FATAL("out of space for stack frames calling %s", s);
247	}
248	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
249		ncall++;
250	ndef = (int) fcn->fval;			/* args in defn */
251	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252	if (ncall > ndef)
253		WARNING("function %s called with %d args, uses only %d",
254			s, ncall, ndef);
255	if (ncall + ndef > NARGS)
256		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259		y = execute(x);
260		oargs[i] = y;
261		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263		if (isfcn(y))
264			FATAL("can't use function %s as argument in %s", y->nval, s);
265		if (isarr(y))
266			args[i] = y;	/* arrays by ref */
267		else
268			args[i] = copycell(y);
269		tempfree(y);
270	}
271	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
272		args[i] = gettemp();
273		*args[i] = newcopycell;
274	}
275	frp++;	/* now ok to up frame */
276	if (frp >= frame + nframe) {
277		int dfp = frp - frame;	/* old index */
278		frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
279		if (frame == NULL)
280			FATAL("out of space for stack frames in %s", s);
281		frp = frame + dfp;
282	}
283	frp->fcncell = fcn;
284	frp->args = args;
285	frp->nargs = ndef;	/* number defined with (excess are locals) */
286	frp->retval = gettemp();
287
288	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289	y = execute((Node *)(fcn->sval));	/* execute body */
290	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291
292	for (i = 0; i < ndef; i++) {
293		Cell *t = frp->args[i];
294		if (isarr(t)) {
295			if (t->csub == CCOPY) {
296				if (i >= ncall) {
297					freesymtab(t);
298					t->csub = CTEMP;
299					tempfree(t);
300				} else {
301					oargs[i]->tval = t->tval;
302					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303					oargs[i]->sval = t->sval;
304					tempfree(t);
305				}
306			}
307		} else if (t != y) {	/* kludge to prevent freeing twice */
308			t->csub = CTEMP;
309			tempfree(t);
310		} else if (t == y && t->csub == CCOPY) {
311			t->csub = CTEMP;
312			tempfree(t);
313			freed = 1;
314		}
315	}
316	tempfree(fcn);
317	if (isexit(y) || isnext(y))
318		return y;
319	if (freed == 0) {
320		tempfree(y);	/* don't free twice! */
321	}
322	z = frp->retval;			/* return value */
323	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324	frp--;
325	return(z);
326}
327
328Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
329{
330	Cell *y;
331
332	/* copy is not constant or field */
333
334	y = gettemp();
335	y->tval = x->tval & ~(CON|FLD|REC);
336	y->csub = CCOPY;	/* prevents freeing until call is over */
337	y->nval = x->nval;	/* BUG? */
338	if (isstr(x) /* || x->ctype == OCELL */) {
339		y->sval = tostring(x->sval);
340		y->tval &= ~DONTFREE;
341	} else
342		y->tval |= DONTFREE;
343	y->fval = x->fval;
344	return y;
345}
346
347Cell *arg(Node **a, int n)	/* nth argument of a function */
348{
349
350	n = ptoi(a[0]);	/* argument number, counting from 0 */
351	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352	if (n+1 > frp->nargs)
353		FATAL("argument #%d of function %s was not supplied",
354			n+1, frp->fcncell->nval);
355	return frp->args[n];
356}
357
358Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
359{
360	Cell *y;
361
362	switch (n) {
363	case EXIT:
364		if (a[0] != NULL) {
365			y = execute(a[0]);
366			errorflag = (int) getfval(y);
367			tempfree(y);
368		}
369		longjmp(env, 1);
370	case RETURN:
371		if (a[0] != NULL) {
372			y = execute(a[0]);
373			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374				setsval(frp->retval, getsval(y));
375				frp->retval->fval = getfval(y);
376				frp->retval->tval |= NUM;
377			}
378			else if (y->tval & STR)
379				setsval(frp->retval, getsval(y));
380			else if (y->tval & NUM)
381				setfval(frp->retval, getfval(y));
382			else		/* can't happen */
383				FATAL("bad type variable %d", y->tval);
384			tempfree(y);
385		}
386		return(jret);
387	case NEXT:
388		return(jnext);
389	case NEXTFILE:
390		nextfile();
391		return(jnextfile);
392	case BREAK:
393		return(jbreak);
394	case CONTINUE:
395		return(jcont);
396	default:	/* can't happen */
397		FATAL("illegal jump type %d", n);
398	}
399	return 0;	/* not reached */
400}
401
402Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
403{		/* a[0] is variable, a[1] is operator, a[2] is filename */
404	Cell *r, *x;
405	extern Cell **fldtab;
406	FILE *fp;
407	char *buf;
408	int bufsize = recsize;
409	int mode;
410	bool newflag;
411	double result;
412
413	if ((buf = (char *) malloc(bufsize)) == NULL)
414		FATAL("out of memory in getline");
415
416	fflush(stdout);	/* in case someone is waiting for a prompt */
417	r = gettemp();
418	if (a[1] != NULL) {		/* getline < file */
419		x = execute(a[2]);		/* filename */
420		mode = ptoi(a[1]);
421		if (mode == '|')		/* input pipe */
422			mode = LE;	/* arbitrary flag */
423		fp = openfile(mode, getsval(x), &newflag);
424		tempfree(x);
425		if (fp == NULL)
426			n = -1;
427		else
428			n = readrec(&buf, &bufsize, fp, newflag);
429		if (n <= 0) {
430			;
431		} else if (a[0] != NULL) {	/* getline var <file */
432			x = execute(a[0]);
433			setsval(x, buf);
434			if (is_number(x->sval, & result)) {
435				x->fval = result;
436				x->tval |= NUM;
437			}
438			tempfree(x);
439		} else {			/* getline <file */
440			setsval(fldtab[0], buf);
441			if (is_number(fldtab[0]->sval, & result)) {
442				fldtab[0]->fval = result;
443				fldtab[0]->tval |= NUM;
444			}
445		}
446	} else {			/* bare getline; use current input */
447		if (a[0] == NULL)	/* getline */
448			n = getrec(&record, &recsize, true);
449		else {			/* getline var */
450			n = getrec(&buf, &bufsize, false);
451			if (n > 0) {
452				x = execute(a[0]);
453				setsval(x, buf);
454				if (is_number(x->sval, & result)) {
455					x->fval = result;
456					x->tval |= NUM;
457				}
458				tempfree(x);
459			}
460		}
461	}
462	setfval(r, (Awkfloat) n);
463	free(buf);
464	return r;
465}
466
467Cell *getnf(Node **a, int n)	/* get NF */
468{
469	if (!donefld)
470		fldbld();
471	return (Cell *) a[0];
472}
473
474static char *
475makearraystring(Node *p, const char *func)
476{
477	char *buf;
478	int bufsz = recsize;
479	size_t blen;
480
481	if ((buf = (char *) malloc(bufsz)) == NULL) {
482		FATAL("%s: out of memory", func);
483	}
484
485	blen = 0;
486	buf[blen] = '\0';
487
488	for (; p; p = p->nnext) {
489		Cell *x = execute(p);	/* expr */
490		char *s = getsval(x);
491		size_t seplen = strlen(getsval(subseploc));
492		size_t nsub = p->nnext ? seplen : 0;
493		size_t slen = strlen(s);
494		size_t tlen = blen + slen + nsub;
495
496		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497			FATAL("%s: out of memory %s[%s...]",
498			    func, x->nval, buf);
499		}
500		memcpy(buf + blen, s, slen);
501		if (nsub) {
502			memcpy(buf + blen + slen, *SUBSEP, nsub);
503		}
504		buf[tlen] = '\0';
505		blen = tlen;
506		tempfree(x);
507	}
508	return buf;
509}
510
511Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
512{
513	Cell *x, *z;
514	char *buf;
515
516	x = execute(a[0]);	/* Cell* for symbol table */
517	buf = makearraystring(a[1], __func__);
518	if (!isarr(x)) {
519		DPRINTF("making %s into an array\n", NN(x->nval));
520		if (freeable(x))
521			xfree(x->sval);
522		x->tval &= ~(STR|NUM|DONTFREE);
523		x->tval |= ARR;
524		x->sval = (char *) makesymtab(NSYMTAB);
525	}
526	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
527	z->ctype = OCELL;
528	z->csub = CVAR;
529	tempfree(x);
530	free(buf);
531	return(z);
532}
533
534Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
535{
536	Cell *x;
537
538	x = execute(a[0]);	/* Cell* for symbol table */
539	if (x == symtabloc) {
540		FATAL("cannot delete SYMTAB or its elements");
541	}
542	if (!isarr(x))
543		return True;
544	if (a[1] == NULL) {	/* delete the elements, not the table */
545		freesymtab(x);
546		x->tval &= ~STR;
547		x->tval |= ARR;
548		x->sval = (char *) makesymtab(NSYMTAB);
549	} else {
550		char *buf = makearraystring(a[1], __func__);
551		freeelem(x, buf);
552		free(buf);
553	}
554	tempfree(x);
555	return True;
556}
557
558Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
559{
560	Cell *ap, *k;
561	char *buf;
562
563	ap = execute(a[1]);	/* array name */
564	if (!isarr(ap)) {
565		DPRINTF("making %s into an array\n", ap->nval);
566		if (freeable(ap))
567			xfree(ap->sval);
568		ap->tval &= ~(STR|NUM|DONTFREE);
569		ap->tval |= ARR;
570		ap->sval = (char *) makesymtab(NSYMTAB);
571	}
572	buf = makearraystring(a[0], __func__);
573	k = lookup(buf, (Array *) ap->sval);
574	tempfree(ap);
575	free(buf);
576	if (k == NULL)
577		return(False);
578	else
579		return(True);
580}
581
582
583/* ======== utf-8 code ========== */
584
585/*
586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
587 * or utf-8.  u8_isutf tests whether a string starts with a valid
588 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
589 * u8_nextlen returns length of next valid sequence, which is
590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
591 * u8_strlen returns length of string in valid utf-8 sequences
592 * and/or high-bit bytes.  Conversion functions go between byte
593 * number and character number.
594 *
595 * In theory, this behaves the same as before for non-utf8 bytes.
596 *
597 * Limited checking! This is a potential security hole.
598 */
599
600/* is s the beginning of a valid utf-8 string? */
601/* return length 1..4 if yes, 0 if no */
602int u8_isutf(const char *s)
603{
604	int n, ret;
605	unsigned char c;
606
607	c = s[0];
608	if (c < 128 || awk_mb_cur_max == 1)
609		return 1; /* what if it's 0? */
610
611	n = strlen(s);
612	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
613		ret = 2; /* 110xxxxx 10xxxxxx */
614	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
615			 && (s[2] & 0xC0) == 0x80) {
616		ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
617	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
618			 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
619		ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
620	} else {
621		ret = 0;
622	}
623	return ret;
624}
625
626/* Convert (prefix of) utf8 string to utf-32 rune. */
627/* Sets *rune to the value, returns the length. */
628/* No error checking: watch out. */
629int u8_rune(int *rune, const char *s)
630{
631	int n, ret;
632	unsigned char c;
633
634	c = s[0];
635	if (c < 128 || awk_mb_cur_max == 1) {
636		*rune = c;
637		return 1;
638	}
639
640	n = strlen(s);
641	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
642		*rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
643		ret = 2;
644	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
645			  && (s[2] & 0xC0) == 0x80) {
646		*rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
647			/* 1110xxxx 10xxxxxx 10xxxxxx */
648		ret = 3;
649	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
650			  && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
651		*rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
652			/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
653		ret = 4;
654	} else {
655		*rune = c;
656		ret = 1;
657	}
658	return ret; /* returns one byte if sequence doesn't look like utf */
659}
660
661/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
662int u8_nextlen(const char *s)
663{
664	int len;
665
666	len = u8_isutf(s);
667	if (len == 0)
668		len = 1;
669	return len;
670}
671
672/* return number of utf characters or single non-utf bytes */
673int u8_strlen(const char *s)
674{
675	int i, len, n, totlen;
676	unsigned char c;
677
678	n = strlen(s);
679	totlen = 0;
680	for (i = 0; i < n; i += len) {
681		c = s[i];
682		if (c < 128 || awk_mb_cur_max == 1) {
683			len = 1;
684		} else {
685			len = u8_nextlen(&s[i]);
686		}
687		totlen++;
688		if (i > n)
689			FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
690	}
691	return totlen;
692}
693
694/* convert utf-8 char number in a string to its byte offset */
695int u8_char2byte(const char *s, int charnum)
696{
697	int n;
698	int bytenum = 0;
699
700	while (charnum > 0) {
701		n = u8_nextlen(s);
702		s += n;
703		bytenum += n;
704		charnum--;
705	}
706	return bytenum;
707}
708
709/* convert byte offset in s to utf-8 char number that starts there */
710int u8_byte2char(const char *s, int bytenum)
711{
712	int i, len, b;
713	int charnum = 0; /* BUG: what origin? */
714	/* should be 0 to match start==0 which means no match */
715
716	b = strlen(s);
717	if (bytenum > b) {
718		return -1; /* ??? */
719	}
720	for (i = 0; i <= bytenum; i += len) {
721		len = u8_nextlen(s+i);
722		charnum++;
723	}
724	return charnum;
725}
726
727/* runetochar() adapted from rune.c in the Plan 9 distributione */
728
729enum
730{
731	Runeerror = 128, /* from somewhere else */
732	Runemax = 0x10FFFF,
733
734	Bit1    = 7,
735	Bitx    = 6,
736	Bit2    = 5,
737	Bit3    = 4,
738	Bit4    = 3,
739	Bit5    = 2,
740
741	T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
742	Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
743	T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
744	T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
745	T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */
746	T5      = ((1<<(Bit5+1))-1) ^ 0xFF,     /* 1111 1000 */
747
748	Rune1   = (1<<(Bit1+0*Bitx))-1,	 	/* 0000 0000 0000 0000 0111 1111 */
749	Rune2   = (1<<(Bit2+1*Bitx))-1,	 	/* 0000 0000 0000 0111 1111 1111 */
750	Rune3   = (1<<(Bit3+2*Bitx))-1,	 	/* 0000 0000 1111 1111 1111 1111 */
751	Rune4   = (1<<(Bit4+3*Bitx))-1,	 	/* 0011 1111 1111 1111 1111 1111 */
752
753	Maskx   = (1<<Bitx)-1,		  	/* 0011 1111 */
754	Testx   = Maskx ^ 0xFF,		 	/* 1100 0000 */
755
756};
757
758int runetochar(char *str, int c)
759{
760	/* one character sequence 00000-0007F => 00-7F */
761	if (c <= Rune1) {
762		str[0] = c;
763		return 1;
764	}
765
766	/* two character sequence 00080-007FF => T2 Tx */
767	if (c <= Rune2) {
768		str[0] = T2 | (c >> 1*Bitx);
769		str[1] = Tx | (c & Maskx);
770		return 2;
771	}
772
773	/* three character sequence 00800-0FFFF => T3 Tx Tx */
774	if (c > Runemax)
775		c = Runeerror;
776	if (c <= Rune3) {
777		str[0] = T3 |  (c >> 2*Bitx);
778		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
779		str[2] = Tx |  (c & Maskx);
780		return 3;
781	}
782
783	/* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
784	str[0] = T4 |  (c >> 3*Bitx);
785	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
786	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
787	str[3] = Tx |  (c & Maskx);
788	return 4;
789}
790
791
792/* ========== end of utf8 code =========== */
793
794
795
796Cell *matchop(Node **a, int n)	/* ~ and match() */
797{
798	Cell *x, *y, *z;
799	char *s, *t;
800	int i;
801	int cstart, cpatlen, len;
802	fa *pfa;
803	int (*mf)(fa *, const char *) = match, mode = 0;
804
805	if (n == MATCHFCN) {
806		mf = pmatch;
807		mode = 1;
808	}
809	x = execute(a[1]);	/* a[1] = target text */
810	s = getsval(x);
811	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
812		i = (*mf)((fa *) a[2], s);
813	else {
814		y = execute(a[2]);	/* a[2] = regular expr */
815		t = getsval(y);
816		pfa = makedfa(t, mode);
817		i = (*mf)(pfa, s);
818		tempfree(y);
819	}
820	z = x;
821	if (n == MATCHFCN) {
822		int start = patbeg - s + 1; /* origin 1 */
823		if (patlen < 0) {
824			start = 0; /* not found */
825		} else {
826			cstart = u8_byte2char(s, start-1);
827			cpatlen = 0;
828			for (i = 0; i < patlen; i += len) {
829				len = u8_nextlen(patbeg+i);
830				cpatlen++;
831			}
832
833			start = cstart;
834			patlen = cpatlen;
835		}
836
837		setfval(rstartloc, (Awkfloat) start);
838		setfval(rlengthloc, (Awkfloat) patlen);
839		x = gettemp();
840		x->tval = NUM;
841		x->fval = start;
842	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
843		x = True;
844	else
845		x = False;
846
847	tempfree(z);
848	return x;
849}
850
851
852Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
853{
854	Cell *x, *y;
855	int i;
856
857	x = execute(a[0]);
858	i = istrue(x);
859	tempfree(x);
860	switch (n) {
861	case BOR:
862		if (i) return(True);
863		y = execute(a[1]);
864		i = istrue(y);
865		tempfree(y);
866		if (i) return(True);
867		else return(False);
868	case AND:
869		if ( !i ) return(False);
870		y = execute(a[1]);
871		i = istrue(y);
872		tempfree(y);
873		if (i) return(True);
874		else return(False);
875	case NOT:
876		if (i) return(False);
877		else return(True);
878	default:	/* can't happen */
879		FATAL("unknown boolean operator %d", n);
880	}
881	return 0;	/*NOTREACHED*/
882}
883
884Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
885{
886	int i;
887	Cell *x, *y;
888	Awkfloat j;
889	bool x_is_nan, y_is_nan;
890
891	x = execute(a[0]);
892	y = execute(a[1]);
893	x_is_nan = isnan(x->fval);
894	y_is_nan = isnan(y->fval);
895	if (x->tval&NUM && y->tval&NUM) {
896		if ((x_is_nan || y_is_nan) && n != NE)
897			return(False);
898		j = x->fval - y->fval;
899		i = j<0? -1: (j>0? 1: 0);
900	} else {
901		i = strcmp(getsval(x), getsval(y));
902	}
903	tempfree(x);
904	tempfree(y);
905	switch (n) {
906	case LT:	if (i<0) return(True);
907			else return(False);
908	case LE:	if (i<=0) return(True);
909			else return(False);
910	case NE:	if (x_is_nan && y_is_nan) return(True);
911			else if (i!=0) return(True);
912			else return(False);
913	case EQ:	if (i == 0) return(True);
914			else return(False);
915	case GE:	if (i>=0) return(True);
916			else return(False);
917	case GT:	if (i>0) return(True);
918			else return(False);
919	default:	/* can't happen */
920		FATAL("unknown relational operator %d", n);
921	}
922	return 0;	/*NOTREACHED*/
923}
924
925void tfree(Cell *a)	/* free a tempcell */
926{
927	if (freeable(a)) {
928		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
929		xfree(a->sval);
930	}
931	if (a == tmps)
932		FATAL("tempcell list is curdled");
933	a->cnext = tmps;
934	tmps = a;
935}
936
937Cell *gettemp(void)	/* get a tempcell */
938{	int i;
939	Cell *x;
940
941	if (!tmps) {
942		tmps = (Cell *) calloc(100, sizeof(*tmps));
943		if (!tmps)
944			FATAL("out of space for temporaries");
945		for (i = 1; i < 100; i++)
946			tmps[i-1].cnext = &tmps[i];
947		tmps[i-1].cnext = NULL;
948	}
949	x = tmps;
950	tmps = x->cnext;
951	*x = tempcell;
952	return(x);
953}
954
955Cell *indirect(Node **a, int n)	/* $( a[0] ) */
956{
957	Awkfloat val;
958	Cell *x;
959	int m;
960	char *s;
961
962	x = execute(a[0]);
963	val = getfval(x);	/* freebsd: defend against super large field numbers */
964	if ((Awkfloat)INT_MAX < val)
965		FATAL("trying to access out of range field %s", x->nval);
966	m = (int) val;
967	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
968		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
969		/* BUG: can x->nval ever be null??? */
970	tempfree(x);
971	x = fieldadr(m);
972	x->ctype = OCELL;	/* BUG?  why are these needed? */
973	x->csub = CFLD;
974	return(x);
975}
976
977Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
978{
979	int k, m, n;
980	int mb, nb;
981	char *s;
982	int temp;
983	Cell *x, *y, *z = NULL;
984
985	x = execute(a[0]);
986	y = execute(a[1]);
987	if (a[2] != NULL)
988		z = execute(a[2]);
989	s = getsval(x);
990	k = u8_strlen(s) + 1;
991	if (k <= 1) {
992		tempfree(x);
993		tempfree(y);
994		if (a[2] != NULL) {
995			tempfree(z);
996		}
997		x = gettemp();
998		setsval(x, "");
999		return(x);
1000	}
1001	m = (int) getfval(y);
1002	if (m <= 0)
1003		m = 1;
1004	else if (m > k)
1005		m = k;
1006	tempfree(y);
1007	if (a[2] != NULL) {
1008		n = (int) getfval(z);
1009		tempfree(z);
1010	} else
1011		n = k - 1;
1012	if (n < 0)
1013		n = 0;
1014	else if (n > k - m)
1015		n = k - m;
1016	/* m is start, n is length from there */
1017	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1018	y = gettemp();
1019	mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1020	nb = u8_char2byte(s, m-1+n);  /* byte offset of end+1 char in s */
1021
1022	temp = s[nb];	/* with thanks to John Linderman */
1023	s[nb] = '\0';
1024	setsval(y, s + mb);
1025	s[nb] = temp;
1026	tempfree(x);
1027	return(y);
1028}
1029
1030Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
1031{
1032	Cell *x, *y, *z;
1033	char *s1, *s2, *p1, *p2, *q;
1034	Awkfloat v = 0.0;
1035
1036	x = execute(a[0]);
1037	s1 = getsval(x);
1038	y = execute(a[1]);
1039	s2 = getsval(y);
1040
1041	z = gettemp();
1042	for (p1 = s1; *p1 != '\0'; p1++) {
1043		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1044			continue;
1045		if (*p2 == '\0') {
1046			/* v = (Awkfloat) (p1 - s1 + 1);	 origin 1 */
1047
1048		   /* should be a function: used in match() as well */
1049			int i, len;
1050			v = 0;
1051			for (i = 0; i < p1-s1+1; i += len) {
1052				len = u8_nextlen(s1+i);
1053				v++;
1054			}
1055			break;
1056		}
1057	}
1058	tempfree(x);
1059	tempfree(y);
1060	setfval(z, v);
1061	return(z);
1062}
1063
1064int has_utf8(char *s)	/* return 1 if s contains any utf-8 (2 bytes or more) character */
1065{
1066	int n;
1067
1068	for (n = 0; *s != 0; s += n) {
1069		n = u8_nextlen(s);
1070		if (n > 1)
1071			return 1;
1072	}
1073	return 0;
1074}
1075
1076#define	MAXNUMSIZE	50
1077
1078int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
1079{
1080	char *fmt;
1081	char *p, *t;
1082	const char *os;
1083	Cell *x;
1084	int flag = 0, n;
1085	int fmtwd; /* format width */
1086	int fmtsz = recsize;
1087	char *buf = *pbuf;
1088	int bufsize = *pbufsize;
1089#define FMTSZ(a)   (fmtsz - ((a) - fmt))
1090#define BUFSZ(a)   (bufsize - ((a) - buf))
1091
1092	static bool first = true;
1093	static bool have_a_format = false;
1094
1095	if (first) {
1096		char xbuf[100];
1097
1098		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1099		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1100		first = false;
1101	}
1102
1103	os = s;
1104	p = buf;
1105	if ((fmt = (char *) malloc(fmtsz)) == NULL)
1106		FATAL("out of memory in format()");
1107	while (*s) {
1108		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1109		if (*s != '%') {
1110			*p++ = *s++;
1111			continue;
1112		}
1113		if (*(s+1) == '%') {
1114			*p++ = '%';
1115			s += 2;
1116			continue;
1117		}
1118		fmtwd = atoi(s+1);
1119		if (fmtwd < 0)
1120			fmtwd = -fmtwd;
1121		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1122		for (t = fmt; (*t++ = *s) != '\0'; s++) {
1123			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1124				FATAL("format item %.30s... ran format() out of memory", os);
1125			/* Ignore size specifiers */
1126			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
1127				t--;
1128				continue;
1129			}
1130			if (isalpha((uschar)*s))
1131				break;
1132			if (*s == '$') {
1133				FATAL("'$' not permitted in awk formats");
1134			}
1135			if (*s == '*') {
1136				if (a == NULL) {
1137					FATAL("not enough args in printf(%s)", os);
1138				}
1139				x = execute(a);
1140				a = a->nnext;
1141				snprintf(t - 1, FMTSZ(t - 1),
1142				    "%d", fmtwd=(int) getfval(x));
1143				if (fmtwd < 0)
1144					fmtwd = -fmtwd;
1145				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1146				t = fmt + strlen(fmt);
1147				tempfree(x);
1148			}
1149		}
1150		*t = '\0';
1151		if (fmtwd < 0)
1152			fmtwd = -fmtwd;
1153		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1154		switch (*s) {
1155		case 'a': case 'A':
1156			if (have_a_format)
1157				flag = *s;
1158			else
1159				flag = 'f';
1160			break;
1161		case 'f': case 'e': case 'g': case 'E': case 'G':
1162			flag = 'f';
1163			break;
1164		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1165			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1166			*(t-1) = 'j';
1167			*t = *s;
1168			*++t = '\0';
1169			break;
1170		case 's':
1171			flag = 's';
1172			break;
1173		case 'c':
1174			flag = 'c';
1175			break;
1176		default:
1177			WARNING("weird printf conversion %s", fmt);
1178			flag = '?';
1179			break;
1180		}
1181		if (a == NULL)
1182			FATAL("not enough args in printf(%s)", os);
1183		x = execute(a);
1184		a = a->nnext;
1185		n = MAXNUMSIZE;
1186		if (fmtwd > n)
1187			n = fmtwd;
1188		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1189		switch (flag) {
1190		case '?':
1191			snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
1192			t = getsval(x);
1193			n = strlen(t);
1194			if (fmtwd > n)
1195				n = fmtwd;
1196			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1197			p += strlen(p);
1198			snprintf(p, BUFSZ(p), "%s", t);
1199			break;
1200		case 'a':
1201		case 'A':
1202		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1203		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1204		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1205
1206		case 's': {
1207			t = getsval(x);
1208			n = strlen(t);
1209			/* if simple format or no utf-8 in the string, sprintf works */
1210			if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1211				if (fmtwd > n)
1212					n = fmtwd;
1213				if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1214					FATAL("huge string/format (%d chars) in printf %.30s..." \
1215						" ran format() out of memory", n, t);
1216				snprintf(p, BUFSZ(p), fmt, t);
1217				break;
1218			}
1219
1220			/* get here if string has utf-8 chars and fmt is not plain %s */
1221			/* "%-w.ps", where -, w and .p are all optional */
1222			/* '0' before the w is a flag character */
1223			/* fmt points at % */
1224			int ljust = 0, wid = 0, prec = n, pad = 0;
1225			char *f = fmt+1;
1226			if (f[0] == '-') {
1227				ljust = 1;
1228				f++;
1229			}
1230			// flags '0' and '+' are recognized but skipped
1231			if (f[0] == '0') {
1232				f++;
1233				if (f[0] == '+')
1234					f++;
1235			}
1236			if (f[0] == '+') {
1237				f++;
1238				if (f[0] == '0')
1239					f++;
1240			}
1241			if (isdigit(f[0])) { /* there is a wid */
1242				wid = strtol(f, &f, 10);
1243			}
1244			if (f[0] == '.') { /* there is a .prec */
1245				prec = strtol(++f, &f, 10);
1246			}
1247			if (prec > u8_strlen(t))
1248				prec = u8_strlen(t);
1249			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1250			int i, k, n;
1251
1252			if (ljust) { // print prec chars from t, then pad blanks
1253				n = u8_char2byte(t, prec);
1254				for (k = 0; k < n; k++) {
1255					//putchar(t[k]);
1256					*p++ = t[k];
1257				}
1258				for (i = 0; i < pad; i++) {
1259					//printf(" ");
1260					*p++ = ' ';
1261				}
1262			} else { // print pad blanks, then prec chars from t
1263				for (i = 0; i < pad; i++) {
1264					//printf(" ");
1265					*p++ = ' ';
1266				}
1267				n = u8_char2byte(t, prec);
1268				for (k = 0; k < n; k++) {
1269					//putchar(t[k]);
1270					*p++ = t[k];
1271				}
1272			}
1273			*p = 0;
1274			break;
1275		}
1276
1277               case 'c': {
1278			/*
1279			 * If a numeric value is given, awk should just turn
1280			 * it into a character and print it:
1281			 *      BEGIN { printf("%c\n", 65) }
1282			 * prints "A".
1283			 *
1284			 * But what if the numeric value is > 128 and
1285			 * represents a valid Unicode code point?!? We do
1286			 * our best to convert it back into UTF-8. If we
1287			 * can't, we output the encoding of the Unicode
1288			 * "invalid character", 0xFFFD.
1289			 */
1290			if (isnum(x)) {
1291				int charval = (int) getfval(x);
1292
1293				if (charval != 0) {
1294					if (charval < 128 || awk_mb_cur_max == 1)
1295						snprintf(p, BUFSZ(p), fmt, charval);
1296					else {
1297						// possible unicode character
1298						size_t count;
1299						char *bs = wide_char_to_byte_str(charval, &count);
1300
1301						if (bs == NULL)	{ // invalid character
1302							// use unicode invalid character, 0xFFFD
1303							static char invalid_char[] = "\357\277\275";
1304							bs = invalid_char;
1305							count = 3;
1306						}
1307						t = bs;
1308						n = count;
1309						goto format_percent_c;
1310					}
1311				} else {
1312					*p++ = '\0'; /* explicit null byte */
1313					*p = '\0';   /* next output will start here */
1314				}
1315				break;
1316			}
1317			t = getsval(x);
1318			n = u8_nextlen(t);
1319		format_percent_c:
1320			if (n < 2) { /* not utf8 */
1321				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1322				break;
1323			}
1324
1325			// utf8 character, almost same song and dance as for %s
1326			int ljust = 0, wid = 0, prec = n, pad = 0;
1327			char *f = fmt+1;
1328			if (f[0] == '-') {
1329				ljust = 1;
1330				f++;
1331			}
1332			// flags '0' and '+' are recognized but skipped
1333			if (f[0] == '0') {
1334				f++;
1335				if (f[0] == '+')
1336					f++;
1337			}
1338			if (f[0] == '+') {
1339				f++;
1340				if (f[0] == '0')
1341					f++;
1342			}
1343			if (isdigit(f[0])) { /* there is a wid */
1344				wid = strtol(f, &f, 10);
1345			}
1346			if (f[0] == '.') { /* there is a .prec */
1347				prec = strtol(++f, &f, 10);
1348			}
1349			if (prec > 1)           // %c --> only one character
1350				prec = 1;
1351			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1352			int i;
1353
1354			if (ljust) { // print one char from t, then pad blanks
1355				for (i = 0; i < n; i++)
1356					*p++ = t[i];
1357				for (i = 0; i < pad; i++) {
1358					//printf(" ");
1359					*p++ = ' ';
1360				}
1361			} else { // print pad blanks, then prec chars from t
1362				for (i = 0; i < pad; i++) {
1363					//printf(" ");
1364					*p++ = ' ';
1365				}
1366				for (i = 0; i < n; i++)
1367					*p++ = t[i];
1368			}
1369			*p = 0;
1370			break;
1371		}
1372		default:
1373			FATAL("can't happen: bad conversion %c in format()", flag);
1374		}
1375
1376		tempfree(x);
1377		p += strlen(p);
1378		s++;
1379	}
1380	*p = '\0';
1381	free(fmt);
1382	for ( ; a; a = a->nnext) {		/* evaluate any remaining args */
1383		x = execute(a);
1384		tempfree(x);
1385	}
1386	*pbuf = buf;
1387	*pbufsize = bufsize;
1388	return p - buf;
1389}
1390
1391Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
1392{
1393	Cell *x;
1394	Node *y;
1395	char *buf;
1396	int bufsz=3*recsize;
1397
1398	if ((buf = (char *) malloc(bufsz)) == NULL)
1399		FATAL("out of memory in awksprintf");
1400	y = a[0]->nnext;
1401	x = execute(a[0]);
1402	if (format(&buf, &bufsz, getsval(x), y) == -1)
1403		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
1404	tempfree(x);
1405	x = gettemp();
1406	x->sval = buf;
1407	x->tval = STR;
1408	return(x);
1409}
1410
1411Cell *awkprintf(Node **a, int n)		/* printf */
1412{	/* a[0] is list of args, starting with format string */
1413	/* a[1] is redirection operator, a[2] is redirection file */
1414	FILE *fp;
1415	Cell *x;
1416	Node *y;
1417	char *buf;
1418	int len;
1419	int bufsz=3*recsize;
1420
1421	if ((buf = (char *) malloc(bufsz)) == NULL)
1422		FATAL("out of memory in awkprintf");
1423	y = a[0]->nnext;
1424	x = execute(a[0]);
1425	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1426		FATAL("printf string %.30s... too long.  can't happen.", buf);
1427	tempfree(x);
1428	if (a[1] == NULL) {
1429		/* fputs(buf, stdout); */
1430		fwrite(buf, len, 1, stdout);
1431		if (ferror(stdout))
1432			FATAL("write error on stdout");
1433	} else {
1434		fp = redirect(ptoi(a[1]), a[2]);
1435		/* fputs(buf, fp); */
1436		fwrite(buf, len, 1, fp);
1437		fflush(fp);
1438		if (ferror(fp))
1439			FATAL("write error on %s", filename(fp));
1440	}
1441	free(buf);
1442	return(True);
1443}
1444
1445Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1446{
1447	Awkfloat i, j = 0;
1448	double v;
1449	Cell *x, *y, *z;
1450
1451	x = execute(a[0]);
1452	i = getfval(x);
1453	tempfree(x);
1454	if (n != UMINUS && n != UPLUS) {
1455		y = execute(a[1]);
1456		j = getfval(y);
1457		tempfree(y);
1458	}
1459	z = gettemp();
1460	switch (n) {
1461	case ADD:
1462		i += j;
1463		break;
1464	case MINUS:
1465		i -= j;
1466		break;
1467	case MULT:
1468		i *= j;
1469		break;
1470	case DIVIDE:
1471		if (j == 0)
1472			FATAL("division by zero");
1473		i /= j;
1474		break;
1475	case MOD:
1476		if (j == 0)
1477			FATAL("division by zero in mod");
1478		modf(i/j, &v);
1479		i = i - j * v;
1480		break;
1481	case UMINUS:
1482		i = -i;
1483		break;
1484	case UPLUS: /* handled by getfval(), above */
1485		break;
1486	case POWER:
1487		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1488			i = ipow(i, (int) j);
1489               else {
1490			errno = 0;
1491			i = errcheck(pow(i, j), "pow");
1492               }
1493		break;
1494	default:	/* can't happen */
1495		FATAL("illegal arithmetic operator %d", n);
1496	}
1497	setfval(z, i);
1498	return(z);
1499}
1500
1501double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1502{
1503	double v;
1504
1505	if (n <= 0)
1506		return 1;
1507	v = ipow(x, n/2);
1508	if (n % 2 == 0)
1509		return v * v;
1510	else
1511		return x * v * v;
1512}
1513
1514Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1515{
1516	Cell *x, *z;
1517	int k;
1518	Awkfloat xf;
1519
1520	x = execute(a[0]);
1521	xf = getfval(x);
1522	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1523	if (n == PREINCR || n == PREDECR) {
1524		setfval(x, xf + k);
1525		return(x);
1526	}
1527	z = gettemp();
1528	setfval(z, xf);
1529	setfval(x, xf + k);
1530	tempfree(x);
1531	return(z);
1532}
1533
1534Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1535{		/* this is subtle; don't muck with it. */
1536	Cell *x, *y;
1537	Awkfloat xf, yf;
1538	double v;
1539
1540	y = execute(a[1]);
1541	x = execute(a[0]);
1542	if (n == ASSIGN) {	/* ordinary assignment */
1543		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1544			;	/* self-assignment: leave alone unless it's a field or NF */
1545		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1546			yf = getfval(y);
1547			setsval(x, getsval(y));
1548			x->fval = yf;
1549			x->tval |= NUM;
1550		}
1551		else if (isstr(y))
1552			setsval(x, getsval(y));
1553		else if (isnum(y))
1554			setfval(x, getfval(y));
1555		else
1556			funnyvar(y, "read value of");
1557		tempfree(y);
1558		return(x);
1559	}
1560	xf = getfval(x);
1561	yf = getfval(y);
1562	switch (n) {
1563	case ADDEQ:
1564		xf += yf;
1565		break;
1566	case SUBEQ:
1567		xf -= yf;
1568		break;
1569	case MULTEQ:
1570		xf *= yf;
1571		break;
1572	case DIVEQ:
1573		if (yf == 0)
1574			FATAL("division by zero in /=");
1575		xf /= yf;
1576		break;
1577	case MODEQ:
1578		if (yf == 0)
1579			FATAL("division by zero in %%=");
1580		modf(xf/yf, &v);
1581		xf = xf - yf * v;
1582		break;
1583	case POWEQ:
1584		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1585			xf = ipow(xf, (int) yf);
1586               else {
1587			errno = 0;
1588			xf = errcheck(pow(xf, yf), "pow");
1589               }
1590		break;
1591	default:
1592		FATAL("illegal assignment operator %d", n);
1593		break;
1594	}
1595	tempfree(y);
1596	setfval(x, xf);
1597	return(x);
1598}
1599
1600Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1601{
1602	Cell *x, *y, *z;
1603	int n1, n2;
1604	char *s = NULL;
1605	int ssz = 0;
1606
1607	x = execute(a[0]);
1608	n1 = strlen(getsval(x));
1609	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1610	memcpy(s, x->sval, n1);
1611
1612	tempfree(x);
1613
1614	y = execute(a[1]);
1615	n2 = strlen(getsval(y));
1616	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1617	memcpy(s + n1, y->sval, n2);
1618	s[n1 + n2] = '\0';
1619
1620	tempfree(y);
1621
1622	z = gettemp();
1623	z->sval = s;
1624	z->tval = STR;
1625
1626	return(z);
1627}
1628
1629Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1630{
1631	Cell *x;
1632
1633	if (a[0] == NULL)
1634		x = execute(a[1]);
1635	else {
1636		x = execute(a[0]);
1637		if (istrue(x)) {
1638			tempfree(x);
1639			x = execute(a[1]);
1640		}
1641	}
1642	return x;
1643}
1644
1645Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1646{
1647	Cell *x;
1648	int pair;
1649
1650	pair = ptoi(a[3]);
1651	if (pairstack[pair] == 0) {
1652		x = execute(a[0]);
1653		if (istrue(x))
1654			pairstack[pair] = 1;
1655		tempfree(x);
1656	}
1657	if (pairstack[pair] == 1) {
1658		x = execute(a[1]);
1659		if (istrue(x))
1660			pairstack[pair] = 0;
1661		tempfree(x);
1662		x = execute(a[2]);
1663		return(x);
1664	}
1665	return(False);
1666}
1667
1668Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1669{
1670	Cell *x = NULL, *y, *ap;
1671	const char *s, *origs, *t;
1672	const char *fs = NULL;
1673	char *origfs = NULL;
1674	int sep;
1675	char temp, num[50];
1676	int n, tempstat, arg3type;
1677	int j;
1678	double result;
1679
1680	y = execute(a[0]);	/* source string */
1681	origs = s = strdup(getsval(y));
1682	tempfree(y);
1683	arg3type = ptoi(a[3]);
1684	if (a[2] == NULL) {		/* BUG: CSV should override implicit fs but not explicit */
1685		fs = getsval(fsloc);
1686	} else if (arg3type == STRING) {	/* split(str,arr,"string") */
1687		x = execute(a[2]);
1688		fs = origfs = strdup(getsval(x));
1689		tempfree(x);
1690	} else if (arg3type == REGEXPR) {
1691		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1692	} else {
1693		FATAL("illegal type of split");
1694	}
1695	sep = *fs;
1696	ap = execute(a[1]);	/* array name */
1697/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1698	freesymtab(ap);
1699	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1700	ap->tval &= ~STR;
1701	ap->tval |= ARR;
1702	ap->sval = (char *) makesymtab(NSYMTAB);
1703
1704	n = 0;
1705        if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1706		/* split(s, a, //); have to arrange that it looks like empty sep */
1707		arg3type = 0;
1708		fs = "";
1709		sep = 0;
1710	}
1711	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1712		fa *pfa;
1713		if (arg3type == REGEXPR) {	/* it's ready already */
1714			pfa = (fa *) a[2];
1715		} else {
1716			pfa = makedfa(fs, 1);
1717		}
1718		if (nematch(pfa,s)) {
1719			tempstat = pfa->initstat;
1720			pfa->initstat = 2;
1721			do {
1722				n++;
1723				snprintf(num, sizeof(num), "%d", n);
1724				temp = *patbeg;
1725				setptr(patbeg, '\0');
1726				if (is_number(s, & result))
1727					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1728				else
1729					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1730				setptr(patbeg, temp);
1731				s = patbeg + patlen;
1732				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1733					n++;
1734					snprintf(num, sizeof(num), "%d", n);
1735					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1736					pfa->initstat = tempstat;
1737					goto spdone;
1738				}
1739			} while (nematch(pfa,s));
1740			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1741							/* cf gsub and refldbld */
1742		}
1743		n++;
1744		snprintf(num, sizeof(num), "%d", n);
1745		if (is_number(s, & result))
1746			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1747		else
1748			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1749  spdone:
1750		pfa = NULL;
1751
1752	} else if (a[2] == NULL && CSV) {	/* CSV only if no explicit separator */
1753		char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1754		for (;;) {
1755			char *fr = newt;
1756			n++;
1757			if (*s == '"' ) { /* start of "..." */
1758				for (s++ ; *s != '\0'; ) {
1759					if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1760						s += 2; /* doubled quote */
1761						*fr++ = '"';
1762					} else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1763						s++; /* skip over closing quote */
1764						break;
1765					} else {
1766						*fr++ = *s++;
1767					}
1768				}
1769				*fr++ = 0;
1770			} else {	/* unquoted field */
1771				while (*s != ',' && *s != '\0')
1772					*fr++ = *s++;
1773				*fr++ = 0;
1774			}
1775			snprintf(num, sizeof(num), "%d", n);
1776			if (is_number(newt, &result))
1777				setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1778			else
1779				setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1780			if (*s++ == '\0')
1781				break;
1782		}
1783		free(newt);
1784
1785	} else if (!CSV && sep == ' ') { /* usual case: split on white space */
1786		for (n = 0; ; ) {
1787#define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1788			while (ISWS(*s))
1789				s++;
1790			if (*s == '\0')
1791				break;
1792			n++;
1793			t = s;
1794			do
1795				s++;
1796			while (*s != '\0' && !ISWS(*s));
1797			temp = *s;
1798			setptr(s, '\0');
1799			snprintf(num, sizeof(num), "%d", n);
1800			if (is_number(t, & result))
1801				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1802			else
1803				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1804			setptr(s, temp);
1805			if (*s != '\0')
1806				s++;
1807		}
1808
1809	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1810		for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1811			char buf[10];
1812			n++;
1813			snprintf(num, sizeof(num), "%d", n);
1814
1815			for (j = 0; j < u8_nextlen(s); j++) {
1816				buf[j] = s[j];
1817			}
1818			buf[j] = '\0';
1819
1820			if (isdigit((uschar)buf[0]))
1821				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1822			else
1823				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1824		}
1825
1826	} else if (*s != '\0') {  /* some random single character */
1827		for (;;) {
1828			n++;
1829			t = s;
1830			while (*s != sep && *s != '\0')
1831				s++;
1832			temp = *s;
1833			setptr(s, '\0');
1834			snprintf(num, sizeof(num), "%d", n);
1835			if (is_number(t, & result))
1836				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1837			else
1838				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1839			setptr(s, temp);
1840			if (*s++ == '\0')
1841				break;
1842		}
1843	}
1844	tempfree(ap);
1845	xfree(origs);
1846	xfree(origfs);
1847	x = gettemp();
1848	x->tval = NUM;
1849	x->fval = n;
1850	return(x);
1851}
1852
1853Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1854{
1855	Cell *x;
1856
1857	x = execute(a[0]);
1858	if (istrue(x)) {
1859		tempfree(x);
1860		x = execute(a[1]);
1861	} else {
1862		tempfree(x);
1863		x = execute(a[2]);
1864	}
1865	return(x);
1866}
1867
1868Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1869{
1870	Cell *x;
1871
1872	x = execute(a[0]);
1873	if (istrue(x)) {
1874		tempfree(x);
1875		x = execute(a[1]);
1876	} else if (a[2] != NULL) {
1877		tempfree(x);
1878		x = execute(a[2]);
1879	}
1880	return(x);
1881}
1882
1883Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1884{
1885	Cell *x;
1886
1887	for (;;) {
1888		x = execute(a[0]);
1889		if (!istrue(x))
1890			return(x);
1891		tempfree(x);
1892		x = execute(a[1]);
1893		if (isbreak(x)) {
1894			x = True;
1895			return(x);
1896		}
1897		if (isnext(x) || isexit(x) || isret(x))
1898			return(x);
1899		tempfree(x);
1900	}
1901}
1902
1903Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1904{
1905	Cell *x;
1906
1907	for (;;) {
1908		x = execute(a[0]);
1909		if (isbreak(x))
1910			return True;
1911		if (isnext(x) || isexit(x) || isret(x))
1912			return(x);
1913		tempfree(x);
1914		x = execute(a[1]);
1915		if (!istrue(x))
1916			return(x);
1917		tempfree(x);
1918	}
1919}
1920
1921Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1922{
1923	Cell *x;
1924
1925	x = execute(a[0]);
1926	tempfree(x);
1927	for (;;) {
1928		if (a[1]!=NULL) {
1929			x = execute(a[1]);
1930			if (!istrue(x)) return(x);
1931			else tempfree(x);
1932		}
1933		x = execute(a[3]);
1934		if (isbreak(x))		/* turn off break */
1935			return True;
1936		if (isnext(x) || isexit(x) || isret(x))
1937			return(x);
1938		tempfree(x);
1939		x = execute(a[2]);
1940		tempfree(x);
1941	}
1942}
1943
1944Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1945{
1946	Cell *x, *vp, *arrayp, *cp, *ncp;
1947	Array *tp;
1948	int i;
1949
1950	vp = execute(a[0]);
1951	arrayp = execute(a[1]);
1952	if (!isarr(arrayp)) {
1953		return True;
1954	}
1955	tp = (Array *) arrayp->sval;
1956	tempfree(arrayp);
1957	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1958		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1959			setsval(vp, cp->nval);
1960			ncp = cp->cnext;
1961			x = execute(a[2]);
1962			if (isbreak(x)) {
1963				tempfree(vp);
1964				return True;
1965			}
1966			if (isnext(x) || isexit(x) || isret(x)) {
1967				tempfree(vp);
1968				return(x);
1969			}
1970			tempfree(x);
1971		}
1972	}
1973	return True;
1974}
1975
1976static char *nawk_convert(const char *s, int (*fun_c)(int),
1977    wint_t (*fun_wc)(wint_t))
1978{
1979	char *buf      = NULL;
1980	char *pbuf     = NULL;
1981	const char *ps = NULL;
1982	size_t n       = 0;
1983	wchar_t wc;
1984	const size_t sz = awk_mb_cur_max;
1985	int unused;
1986
1987	if (sz == 1) {
1988		buf = tostring(s);
1989
1990		for (pbuf = buf; *pbuf; pbuf++)
1991			*pbuf = fun_c((uschar)*pbuf);
1992
1993		return buf;
1994	} else {
1995		/* upper/lower character may be shorter/longer */
1996		buf = tostringN(s, strlen(s) * sz + 1);
1997
1998		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1999		/*
2000		 * Reset internal state here too.
2001		 * Assign result to avoid a compiler warning. (Casting to void
2002		 * doesn't work.)
2003		 * Increment said variable to avoid a different warning.
2004		 */
2005		unused = wctomb(NULL, L'\0');
2006		unused++;
2007
2008		ps   = s;
2009		pbuf = buf;
2010		while (n = mbtowc(&wc, ps, sz),
2011		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
2012		{
2013			ps += n;
2014
2015			n = wctomb(pbuf, fun_wc(wc));
2016			if (n == (size_t)-1)
2017				FATAL("illegal wide character %s", s);
2018
2019			pbuf += n;
2020		}
2021
2022		*pbuf = '\0';
2023
2024		if (n)
2025			FATAL("illegal byte sequence %s", s);
2026
2027		return buf;
2028	}
2029}
2030
2031#ifdef __DJGPP__
2032static wint_t towupper(wint_t wc)
2033{
2034	if (wc >= 0 && wc < 256)
2035		return toupper(wc & 0xFF);
2036
2037	return wc;
2038}
2039
2040static wint_t towlower(wint_t wc)
2041{
2042	if (wc >= 0 && wc < 256)
2043		return tolower(wc & 0xFF);
2044
2045	return wc;
2046}
2047#endif
2048
2049static char *nawk_toupper(const char *s)
2050{
2051	return nawk_convert(s, toupper, towupper);
2052}
2053
2054static char *nawk_tolower(const char *s)
2055{
2056	return nawk_convert(s, tolower, towlower);
2057}
2058
2059
2060
2061Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
2062{
2063	Cell *x, *y;
2064	Awkfloat u;
2065	int t, sz;
2066	Awkfloat tmp;
2067	char *buf, *fmt;
2068	Node *nextarg;
2069	FILE *fp;
2070	int status = 0;
2071	time_t tv;
2072	struct tm *tm;
2073	int estatus = 0;
2074
2075	t = ptoi(a[0]);
2076	x = execute(a[1]);
2077	nextarg = a[1]->nnext;
2078	switch (t) {
2079	case FLENGTH:
2080		if (isarr(x))
2081			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
2082		else
2083			u = u8_strlen(getsval(x));
2084		break;
2085	case FLOG:
2086		errno = 0;
2087		u = errcheck(log(getfval(x)), "log");
2088		break;
2089	case FINT:
2090		modf(getfval(x), &u); break;
2091	case FEXP:
2092		errno = 0;
2093		u = errcheck(exp(getfval(x)), "exp");
2094		break;
2095	case FSQRT:
2096		errno = 0;
2097		u = errcheck(sqrt(getfval(x)), "sqrt");
2098		break;
2099	case FSIN:
2100		u = sin(getfval(x)); break;
2101	case FCOS:
2102		u = cos(getfval(x)); break;
2103	case FATAN:
2104		if (nextarg == NULL) {
2105			WARNING("atan2 requires two arguments; returning 1.0");
2106			u = 1.0;
2107		} else {
2108			y = execute(a[1]->nnext);
2109			u = atan2(getfval(x), getfval(y));
2110			tempfree(y);
2111			nextarg = nextarg->nnext;
2112		}
2113		break;
2114	case FCOMPL:
2115		u = ~((int)getfval(x));
2116		break;
2117	case FAND:
2118		if (nextarg == 0) {
2119			WARNING("and requires two arguments; returning 0");
2120			u = 0;
2121			break;
2122		}
2123		y = execute(a[1]->nnext);
2124		u = ((int)getfval(x)) & ((int)getfval(y));
2125		tempfree(y);
2126		nextarg = nextarg->nnext;
2127		break;
2128	case FFOR:
2129		if (nextarg == 0) {
2130			WARNING("or requires two arguments; returning 0");
2131			u = 0;
2132			break;
2133		}
2134		y = execute(a[1]->nnext);
2135		u = ((int)getfval(x)) | ((int)getfval(y));
2136		tempfree(y);
2137		nextarg = nextarg->nnext;
2138		break;
2139	case FXOR:
2140		if (nextarg == 0) {
2141			WARNING("xor requires two arguments; returning 0");
2142			u = 0;
2143			break;
2144		}
2145		y = execute(a[1]->nnext);
2146		u = ((int)getfval(x)) ^ ((int)getfval(y));
2147		tempfree(y);
2148		nextarg = nextarg->nnext;
2149		break;
2150	case FLSHIFT:
2151		if (nextarg == 0) {
2152			WARNING("lshift requires two arguments; returning 0");
2153			u = 0;
2154			break;
2155		}
2156		y = execute(a[1]->nnext);
2157		u = ((int)getfval(x)) << ((int)getfval(y));
2158		tempfree(y);
2159		nextarg = nextarg->nnext;
2160		break;
2161	case FRSHIFT:
2162		if (nextarg == 0) {
2163			WARNING("rshift requires two arguments; returning 0");
2164			u = 0;
2165			break;
2166		}
2167		y = execute(a[1]->nnext);
2168		u = ((int)getfval(x)) >> ((int)getfval(y));
2169		tempfree(y);
2170		nextarg = nextarg->nnext;
2171		break;
2172	case FSYSTEM:
2173		fflush(stdout);		/* in case something is buffered already */
2174		estatus = status = system(getsval(x));
2175		if (status != -1) {
2176			if (WIFEXITED(status)) {
2177				estatus = WEXITSTATUS(status);
2178			} else if (WIFSIGNALED(status)) {
2179				estatus = WTERMSIG(status) + 256;
2180#ifdef WCOREDUMP
2181				if (WCOREDUMP(status))
2182					estatus += 256;
2183#endif
2184			} else	/* something else?!? */
2185				estatus = 0;
2186		}
2187		/* else estatus was set to -1 */
2188		u = estatus;
2189		break;
2190	case FRAND:
2191		/* random() returns numbers in [0..2^31-1]
2192		 * in order to get a number in [0, 1), divide it by 2^31
2193		 */
2194		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2195		break;
2196	case FSRAND:
2197		if (isrec(x))	/* no argument provided */
2198			u = time((time_t *)0);
2199		else
2200			u = getfval(x);
2201		tmp = u;
2202		srandom((unsigned long) u);
2203		u = srand_seed;
2204		srand_seed = tmp;
2205		break;
2206	case FTOUPPER:
2207	case FTOLOWER:
2208		if (t == FTOUPPER)
2209			buf = nawk_toupper(getsval(x));
2210		else
2211			buf = nawk_tolower(getsval(x));
2212		tempfree(x);
2213		x = gettemp();
2214		setsval(x, buf);
2215		free(buf);
2216		return x;
2217	case FFLUSH:
2218		if (isrec(x) || strlen(getsval(x)) == 0) {
2219			flush_all();	/* fflush() or fflush("") -> all */
2220			u = 0;
2221		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2222			u = EOF;
2223		else
2224			u = fflush(fp);
2225		break;
2226	case FSYSTIME:
2227		u = time((time_t *) 0);
2228		break;
2229	case FSTRFTIME:
2230		/* strftime([format [,timestamp]]) */
2231		if (nextarg) {
2232			y = execute(nextarg);
2233			nextarg = nextarg->nnext;
2234			tv = (time_t) getfval(y);
2235			tempfree(y);
2236		} else
2237			tv = time((time_t *) 0);
2238		tm = localtime(&tv);
2239		if (tm == NULL)
2240			FATAL("bad time %ld", (long)tv);
2241
2242		if (isrec(x)) {
2243			/* format argument not provided, use default */
2244			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2245		} else
2246			fmt = tostring(getsval(x));
2247
2248		sz = 32;
2249		buf = NULL;
2250		do {
2251			if ((buf = realloc(buf, (sz *= 2))) == NULL)
2252				FATAL("out of memory in strftime");
2253		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2254
2255		y = gettemp();
2256		setsval(y, buf);
2257		free(fmt);
2258		free(buf);
2259
2260		return y;
2261	default:	/* can't happen */
2262		FATAL("illegal function type %d", t);
2263		break;
2264	}
2265	tempfree(x);
2266	x = gettemp();
2267	setfval(x, u);
2268	if (nextarg != NULL) {
2269		WARNING("warning: function has too many arguments");
2270		for ( ; nextarg; nextarg = nextarg->nnext) {
2271			y = execute(nextarg);
2272			tempfree(y);
2273		}
2274	}
2275	return(x);
2276}
2277
2278Cell *printstat(Node **a, int n)	/* print a[0] */
2279{
2280	Node *x;
2281	Cell *y;
2282	FILE *fp;
2283
2284	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
2285		fp = stdout;
2286	else
2287		fp = redirect(ptoi(a[1]), a[2]);
2288	for (x = a[0]; x != NULL; x = x->nnext) {
2289		y = execute(x);
2290		fputs(getpssval(y), fp);
2291		tempfree(y);
2292		if (x->nnext == NULL)
2293			fputs(getsval(orsloc), fp);
2294		else
2295			fputs(getsval(ofsloc), fp);
2296	}
2297	if (a[1] != NULL)
2298		fflush(fp);
2299	if (ferror(fp))
2300		FATAL("write error on %s", filename(fp));
2301	return(True);
2302}
2303
2304Cell *nullproc(Node **a, int n)
2305{
2306	return 0;
2307}
2308
2309
2310FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
2311{
2312	FILE *fp;
2313	Cell *x;
2314	char *fname;
2315
2316	x = execute(b);
2317	fname = getsval(x);
2318	fp = openfile(a, fname, NULL);
2319	if (fp == NULL)
2320		FATAL("can't open file %s", fname);
2321	tempfree(x);
2322	return fp;
2323}
2324
2325struct files {
2326	FILE	*fp;
2327	const char	*fname;
2328	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
2329} *files;
2330
2331size_t nfiles;
2332
2333static void stdinit(void)	/* in case stdin, etc., are not constants */
2334{
2335	nfiles = FOPEN_MAX;
2336	files = (struct files *) calloc(nfiles, sizeof(*files));
2337	if (files == NULL)
2338		FATAL("can't allocate file memory for %zu files", nfiles);
2339        files[0].fp = stdin;
2340	files[0].fname = tostring("/dev/stdin");
2341	files[0].mode = LT;
2342        files[1].fp = stdout;
2343	files[1].fname = tostring("/dev/stdout");
2344	files[1].mode = GT;
2345        files[2].fp = stderr;
2346	files[2].fname = tostring("/dev/stderr");
2347	files[2].mode = GT;
2348}
2349
2350FILE *openfile(int a, const char *us, bool *pnewflag)
2351{
2352	const char *s = us;
2353	size_t i;
2354	int m;
2355	FILE *fp = NULL;
2356
2357	if (*s == '\0')
2358		FATAL("null file name in print or getline");
2359	for (i = 0; i < nfiles; i++)
2360		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2361		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2362		     a == FFLUSH)) {
2363			if (pnewflag)
2364				*pnewflag = false;
2365			return files[i].fp;
2366		}
2367	if (a == FFLUSH)	/* didn't find it, so don't create it! */
2368		return NULL;
2369
2370	for (i = 0; i < nfiles; i++)
2371		if (files[i].fp == NULL)
2372			break;
2373	if (i >= nfiles) {
2374		struct files *nf;
2375		size_t nnf = nfiles + FOPEN_MAX;
2376		nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2377		if (nf == NULL)
2378			FATAL("cannot grow files for %s and %zu files", s, nnf);
2379		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2380		nfiles = nnf;
2381		files = nf;
2382	}
2383	fflush(stdout);	/* force a semblance of order */
2384	m = a;
2385	if (a == GT) {
2386		fp = fopen(s, "w");
2387	} else if (a == APPEND) {
2388		fp = fopen(s, "a");
2389		m = GT;	/* so can mix > and >> */
2390	} else if (a == '|') {	/* output pipe */
2391		fp = popen(s, "w");
2392	} else if (a == LE) {	/* input pipe */
2393		fp = popen(s, "r");
2394	} else if (a == LT) {	/* getline <file */
2395		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
2396	} else	/* can't happen */
2397		FATAL("illegal redirection %d", a);
2398	if (fp != NULL) {
2399		files[i].fname = tostring(s);
2400		files[i].fp = fp;
2401		files[i].mode = m;
2402		if (pnewflag)
2403			*pnewflag = true;
2404		if (fp != stdin && fp != stdout && fp != stderr)
2405			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2406	}
2407	return fp;
2408}
2409
2410const char *filename(FILE *fp)
2411{
2412	size_t i;
2413
2414	for (i = 0; i < nfiles; i++)
2415		if (fp == files[i].fp)
2416			return files[i].fname;
2417	return "???";
2418}
2419
2420Cell *closefile(Node **a, int n)
2421{
2422 	Cell *x;
2423	size_t i;
2424	bool stat;
2425
2426 	x = execute(a[0]);
2427 	getsval(x);
2428	stat = true;
2429 	for (i = 0; i < nfiles; i++) {
2430		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2431			continue;
2432		if (files[i].mode == GT || files[i].mode == '|')
2433			fflush(files[i].fp);
2434		if (ferror(files[i].fp)) {
2435			if ((files[i].mode == GT && files[i].fp != stderr)
2436			  || files[i].mode == '|')
2437				FATAL("write error on %s", files[i].fname);
2438			else
2439				WARNING("i/o error occurred on %s", files[i].fname);
2440		}
2441		if (files[i].fp == stdin || files[i].fp == stdout ||
2442		    files[i].fp == stderr)
2443			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2444		else if (files[i].mode == '|' || files[i].mode == LE)
2445			stat = pclose(files[i].fp) == -1;
2446		else
2447			stat = fclose(files[i].fp) == EOF;
2448		if (stat)
2449			WARNING("i/o error occurred closing %s", files[i].fname);
2450		xfree(files[i].fname);
2451		files[i].fname = NULL;	/* watch out for ref thru this */
2452		files[i].fp = NULL;
2453		break;
2454 	}
2455 	tempfree(x);
2456 	x = gettemp();
2457	setfval(x, (Awkfloat) (stat ? -1 : 0));
2458 	return(x);
2459}
2460
2461void closeall(void)
2462{
2463	size_t i;
2464	bool stat = false;
2465
2466	for (i = 0; i < nfiles; i++) {
2467		if (! files[i].fp)
2468			continue;
2469		if (files[i].mode == GT || files[i].mode == '|')
2470			fflush(files[i].fp);
2471		if (ferror(files[i].fp)) {
2472			if ((files[i].mode == GT && files[i].fp != stderr)
2473			  || files[i].mode == '|')
2474				FATAL("write error on %s", files[i].fname);
2475			else
2476				WARNING("i/o error occurred on %s", files[i].fname);
2477		}
2478		if (files[i].fp == stdin || files[i].fp == stdout ||
2479		    files[i].fp == stderr)
2480			continue;
2481		if (files[i].mode == '|' || files[i].mode == LE)
2482			stat = pclose(files[i].fp) == -1;
2483		else
2484			stat = fclose(files[i].fp) == EOF;
2485		if (stat)
2486			WARNING("i/o error occurred while closing %s", files[i].fname);
2487	}
2488}
2489
2490static void flush_all(void)
2491{
2492	size_t i;
2493
2494	for (i = 0; i < nfiles; i++)
2495		if (files[i].fp)
2496			fflush(files[i].fp);
2497}
2498
2499void backsub(char **pb_ptr, const char **sptr_ptr);
2500
2501Cell *dosub(Node **a, int subop)        /* sub and gsub */
2502{
2503	fa *pfa;
2504	int tempstat = 0;
2505	char *repl;
2506	Cell *x;
2507
2508	char *buf = NULL;
2509	char *pb = NULL;
2510	int bufsz = recsize;
2511
2512	const char *r, *s;
2513	const char *start;
2514	const char *noempty = NULL;      /* empty match disallowed here */
2515	size_t m = 0;                    /* match count */
2516	size_t whichm;                   /* which match to select, 0 = global */
2517	int mtype;                       /* match type */
2518
2519	if (a[0] == NULL) {	/* 0 => a[1] is already-compiled regexpr */
2520		pfa = (fa *) a[1];
2521	} else {
2522		x = execute(a[1]);
2523		pfa = makedfa(getsval(x), 1);
2524		tempfree(x);
2525	}
2526
2527	x = execute(a[2]);	/* replacement string */
2528	repl = tostring(getsval(x));
2529	tempfree(x);
2530
2531	switch (subop) {
2532	case SUB:
2533		whichm = 1;
2534		x = execute(a[3]);    /* source string */
2535		break;
2536	case GSUB:
2537		whichm = 0;
2538		x = execute(a[3]);    /* source string */
2539		break;
2540	default:
2541		FATAL("dosub: unrecognized subop: %d", subop);
2542	}
2543
2544	start = getsval(x);
2545	while (pmatch(pfa, start)) {
2546		if (buf == NULL) {
2547			if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2548				FATAL("out of memory in dosub");
2549			tempstat = pfa->initstat;
2550			pfa->initstat = 2;
2551		}
2552
2553		/* match types */
2554		#define	MT_IGNORE  0  /* unselected or invalid */
2555		#define MT_INSERT  1  /* selected, empty */
2556		#define MT_REPLACE 2  /* selected, not empty */
2557
2558		/* an empty match just after replacement is invalid */
2559
2560		if (patbeg == noempty && patlen == 0) {
2561			mtype = MT_IGNORE;    /* invalid, not counted */
2562		} else if (whichm == ++m || whichm == 0) {
2563			mtype = patlen ? MT_REPLACE : MT_INSERT;
2564		} else {
2565			mtype = MT_IGNORE;    /* unselected, but counted */
2566		}
2567
2568		/* leading text: */
2569		if (patbeg > start) {
2570			adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2571				recsize, &pb, "dosub");
2572			s = start;
2573			while (s < patbeg)
2574				*pb++ = *s++;
2575		}
2576
2577		if (mtype == MT_IGNORE)
2578			goto matching_text;  /* skip replacement text */
2579
2580		r = repl;
2581		while (*r != 0) {
2582			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2583			if (*r == '\\') {
2584				backsub(&pb, &r);
2585			} else if (*r == '&') {
2586				r++;
2587				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2588					&pb, "dosub");
2589				for (s = patbeg; s < patbeg+patlen; )
2590					*pb++ = *s++;
2591			} else {
2592				*pb++ = *r++;
2593			}
2594		}
2595
2596matching_text:
2597		if (mtype == MT_REPLACE || *patbeg == '\0')
2598			goto next_search;  /* skip matching text */
2599
2600		if (patlen == 0)
2601			patlen = u8_nextlen(patbeg);
2602		adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2603		s = patbeg;
2604		while (s < patbeg + patlen)
2605			*pb++ = *s++;
2606
2607next_search:
2608		start = patbeg + patlen;
2609		if (m == whichm || *patbeg == '\0')
2610			break;
2611		if (mtype == MT_REPLACE)
2612			noempty = start;
2613
2614		#undef MT_IGNORE
2615		#undef MT_INSERT
2616		#undef MT_REPLACE
2617	}
2618
2619	xfree(repl);
2620
2621	if (buf != NULL) {
2622		pfa->initstat = tempstat;
2623
2624		/* trailing text */
2625		adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2626		while ((*pb++ = *start++) != '\0')
2627			;
2628
2629		setsval(x, buf);
2630		free(buf);
2631	}
2632
2633	tempfree(x);
2634	x = gettemp();
2635	x->tval = NUM;
2636	x->fval = m;
2637	return x;
2638}
2639
2640Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2641	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2642{
2643	Cell *x, *y, *res, *h;
2644	char *rptr;
2645	const char *sptr;
2646	char *buf, *pb;
2647	const char *t, *q;
2648	fa *pfa;
2649	int mflag, tempstat, num, whichm;
2650	int bufsz = recsize;
2651
2652	if ((buf = malloc(bufsz)) == NULL)
2653		FATAL("out of memory in gensub");
2654	mflag = 0;	/* if mflag == 0, can replace empty string */
2655	num = 0;
2656	x = execute(a[4]);	/* source string */
2657	t = getsval(x);
2658	res = copycell(x);	/* target string - initially copy of source */
2659	res->csub = CTEMP;	/* result values are temporary */
2660	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2661		pfa = (fa *) a[1];	/* regular expression */
2662	else {
2663		y = execute(a[1]);
2664		pfa = makedfa(getsval(y), 1);
2665		tempfree(y);
2666	}
2667	y = execute(a[2]);	/* replacement string */
2668	h = execute(a[3]);	/* which matches should be replaced */
2669	sptr = getsval(h);
2670	if (sptr[0] == 'g' || sptr[0] == 'G')
2671		whichm = -1;
2672	else {
2673		/*
2674		 * The specified number is index of replacement, starting
2675		 * from 1. GNU awk treats index lower than 0 same as
2676		 * 1, we do same for compatibility.
2677		 */
2678		whichm = (int) getfval(h) - 1;
2679		if (whichm < 0)
2680			whichm = 0;
2681	}
2682	tempfree(h);
2683
2684	if (pmatch(pfa, t)) {
2685		char *sl;
2686
2687		tempstat = pfa->initstat;
2688		pfa->initstat = 2;
2689		pb = buf;
2690		rptr = getsval(y);
2691		/*
2692		 * XXX if there are any backreferences in subst string,
2693		 * complain now.
2694		 */
2695		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2696			if (strchr("0123456789", sl[1])) {
2697				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2698			}
2699		}
2700
2701		do {
2702			if (whichm >= 0 && whichm != num) {
2703				num++;
2704				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2705
2706				/* copy the part of string up to and including
2707				 * match to output buffer */
2708				while (t < patbeg + patlen)
2709					*pb++ = *t++;
2710				continue;
2711			}
2712
2713			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2714				if (mflag == 0) {	/* can replace empty */
2715					num++;
2716					sptr = rptr;
2717					while (*sptr != 0) {
2718						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2719						if (*sptr == '\\') {
2720							backsub(&pb, &sptr);
2721						} else if (*sptr == '&') {
2722							sptr++;
2723							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2724							for (q = patbeg; q < patbeg+patlen; )
2725								*pb++ = *q++;
2726						} else
2727							*pb++ = *sptr++;
2728					}
2729				}
2730				if (*t == 0)	/* at end */
2731					goto done;
2732				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2733				*pb++ = *t++;
2734				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2735					FATAL("gensub result0 %.30s too big; can't happen", buf);
2736				mflag = 0;
2737			}
2738			else {	/* matched nonempty string */
2739				num++;
2740				sptr = t;
2741				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2742				while (sptr < patbeg)
2743					*pb++ = *sptr++;
2744				sptr = rptr;
2745				while (*sptr != 0) {
2746					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2747					if (*sptr == '\\') {
2748						backsub(&pb, &sptr);
2749					} else if (*sptr == '&') {
2750						sptr++;
2751						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2752						for (q = patbeg; q < patbeg+patlen; )
2753							*pb++ = *q++;
2754					} else
2755						*pb++ = *sptr++;
2756				}
2757				t = patbeg + patlen;
2758				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2759					goto done;
2760				if (pb > buf + bufsz)
2761					FATAL("gensub result1 %.30s too big; can't happen", buf);
2762				mflag = 1;
2763			}
2764		} while (pmatch(pfa,t));
2765		sptr = t;
2766		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2767		while ((*pb++ = *sptr++) != 0)
2768			;
2769	done:	if (pb > buf + bufsz)
2770			FATAL("gensub result2 %.30s too big; can't happen", buf);
2771		*pb = '\0';
2772		setsval(res, buf);
2773		pfa->initstat = tempstat;
2774	}
2775	tempfree(x);
2776	tempfree(y);
2777	free(buf);
2778	return(res);
2779}
2780
2781void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2782{						/* sptr[0] == '\\' */
2783	char *pb = *pb_ptr;
2784	const char *sptr = *sptr_ptr;
2785	static bool first = true;
2786	static bool do_posix = false;
2787
2788	if (first) {
2789		first = false;
2790		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2791	}
2792
2793	if (sptr[1] == '\\') {
2794		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2795			*pb++ = '\\';
2796			*pb++ = '&';
2797			sptr += 4;
2798		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2799			*pb++ = '\\';
2800			sptr += 2;
2801		} else if (do_posix) {		/* \\x -> \x */
2802			sptr++;
2803			*pb++ = *sptr++;
2804		} else {			/* \\x -> \\x */
2805			*pb++ = *sptr++;
2806			*pb++ = *sptr++;
2807		}
2808	} else if (sptr[1] == '&') {	/* literal & */
2809		sptr++;
2810		*pb++ = *sptr++;
2811	} else				/* literal \ */
2812		*pb++ = *sptr++;
2813
2814	*pb_ptr = pb;
2815	*sptr_ptr = sptr;
2816}
2817
2818static char *wide_char_to_byte_str(int rune, size_t *outlen)
2819{
2820	static char buf[5];
2821	int len;
2822
2823	if (rune < 0 || rune > 0x10FFFF)
2824		return NULL;
2825
2826	memset(buf, 0, sizeof(buf));
2827
2828	len = 0;
2829	if (rune <= 0x0000007F) {
2830		buf[len++] = rune;
2831	} else if (rune <= 0x000007FF) {
2832		// 110xxxxx 10xxxxxx
2833		buf[len++] = 0xC0 | (rune >> 6);
2834		buf[len++] = 0x80 | (rune & 0x3F);
2835	} else if (rune <= 0x0000FFFF) {
2836		// 1110xxxx 10xxxxxx 10xxxxxx
2837		buf[len++] = 0xE0 | (rune >> 12);
2838		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2839		buf[len++] = 0x80 | (rune & 0x3F);
2840
2841	} else {
2842		// 0x00010000 - 0x10FFFF
2843		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2844		buf[len++] = 0xF0 | (rune >> 18);
2845		buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2846		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2847		buf[len++] = 0x80 | (rune & 0x3F);
2848	}
2849
2850	*outlen = len;
2851	buf[len++] = '\0';
2852
2853	return buf;
2854}
2855