expand.c revision 276365
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36#if 0
37static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
38#endif
39#endif /* not lint */
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/bin/sh/expand.c 276365 2014-12-29 15:33:20Z jilles $");
42
43#include <sys/types.h>
44#include <sys/time.h>
45#include <sys/stat.h>
46#include <dirent.h>
47#include <errno.h>
48#include <inttypes.h>
49#include <limits.h>
50#include <pwd.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <wchar.h>
56#include <wctype.h>
57
58/*
59 * Routines to expand arguments to commands.  We have to deal with
60 * backquotes, shell variables, and file metacharacters.
61 */
62
63#include "shell.h"
64#include "main.h"
65#include "nodes.h"
66#include "eval.h"
67#include "expand.h"
68#include "syntax.h"
69#include "parser.h"
70#include "jobs.h"
71#include "options.h"
72#include "var.h"
73#include "input.h"
74#include "output.h"
75#include "memalloc.h"
76#include "error.h"
77#include "mystring.h"
78#include "arith.h"
79#include "show.h"
80#include "builtins.h"
81
82/*
83 * Structure specifying which parts of the string should be searched
84 * for IFS characters.
85 */
86
87struct ifsregion {
88	struct ifsregion *next;	/* next region in list */
89	int begoff;		/* offset of start of region */
90	int endoff;		/* offset of end of region */
91	int inquotes;		/* search for nul bytes only */
92};
93
94
95static char *expdest;			/* output of current string */
96static struct nodelist *argbackq;	/* list of back quote expressions */
97static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
98static struct ifsregion *ifslastp;	/* last struct in list */
99static struct arglist exparg;		/* holds expanded arg list */
100
101static char *argstr(char *, int);
102static char *exptilde(char *, int);
103static char *expari(char *);
104static void expbackq(union node *, int, int);
105static int subevalvar(char *, char *, int, int, int, int, int);
106static char *evalvar(char *, int);
107static int varisset(const char *, int);
108static void varvalue(const char *, int, int, int);
109static void recordregion(int, int, int);
110static void removerecordregions(int);
111static void ifsbreakup(char *, struct arglist *);
112static void expandmeta(struct strlist *, int);
113static void expmeta(char *, char *);
114static void addfname(char *);
115static struct strlist *expsort(struct strlist *);
116static struct strlist *msort(struct strlist *, int);
117static int patmatch(const char *, const char *, int);
118static char *cvtnum(int, char *);
119static int collate_range_cmp(wchar_t, wchar_t);
120
121static int
122collate_range_cmp(wchar_t c1, wchar_t c2)
123{
124	static wchar_t s1[2], s2[2];
125
126	s1[0] = c1;
127	s2[0] = c2;
128	return (wcscoll(s1, s2));
129}
130
131static char *
132stputs_quotes(const char *data, const char *syntax, char *p)
133{
134	while (*data) {
135		CHECKSTRSPACE(2, p);
136		if (syntax[(int)*data] == CCTL)
137			USTPUTC(CTLESC, p);
138		USTPUTC(*data++, p);
139	}
140	return (p);
141}
142#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
143
144/*
145 * Perform expansions on an argument, placing the resulting list of arguments
146 * in arglist.  Parameter expansion, command substitution and arithmetic
147 * expansion are always performed; additional expansions can be requested
148 * via flag (EXP_*).
149 * The result is left in the stack string.
150 * When arglist is NULL, perform here document expansion.
151 *
152 * Caution: this function uses global state and is not reentrant.
153 * However, a new invocation after an interrupted invocation is safe
154 * and will reset the global state for the new call.
155 */
156void
157expandarg(union node *arg, struct arglist *arglist, int flag)
158{
159	struct strlist *sp;
160	char *p;
161
162	argbackq = arg->narg.backquote;
163	STARTSTACKSTR(expdest);
164	ifsfirst.next = NULL;
165	ifslastp = NULL;
166	argstr(arg->narg.text, flag);
167	if (arglist == NULL) {
168		STACKSTRNUL(expdest);
169		return;			/* here document expanded */
170	}
171	STPUTC('\0', expdest);
172	p = grabstackstr(expdest);
173	exparg.lastp = &exparg.list;
174	if (flag & EXP_FULL) {
175		ifsbreakup(p, &exparg);
176		*exparg.lastp = NULL;
177		exparg.lastp = &exparg.list;
178		expandmeta(exparg.list, flag);
179	} else {
180		sp = (struct strlist *)stalloc(sizeof (struct strlist));
181		sp->text = p;
182		*exparg.lastp = sp;
183		exparg.lastp = &sp->next;
184	}
185	while (ifsfirst.next != NULL) {
186		struct ifsregion *ifsp;
187		INTOFF;
188		ifsp = ifsfirst.next->next;
189		ckfree(ifsfirst.next);
190		ifsfirst.next = ifsp;
191		INTON;
192	}
193	*exparg.lastp = NULL;
194	if (exparg.list) {
195		*arglist->lastp = exparg.list;
196		arglist->lastp = exparg.lastp;
197	}
198}
199
200
201
202/*
203 * Perform parameter expansion, command substitution and arithmetic
204 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
205 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'.
206 * This is used to expand word in ${var+word} etc.
207 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC
208 * characters to allow for further processing.
209 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
210 */
211static char *
212argstr(char *p, int flag)
213{
214	char c;
215	int quotes = flag & (EXP_FULL | EXP_CASE);	/* do CTLESC */
216	int firsteq = 1;
217	int split_lit;
218	int lit_quoted;
219
220	split_lit = flag & EXP_SPLIT_LIT;
221	lit_quoted = flag & EXP_LIT_QUOTED;
222	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
223	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
224		p = exptilde(p, flag);
225	for (;;) {
226		CHECKSTRSPACE(2, expdest);
227		switch (c = *p++) {
228		case '\0':
229			return (p - 1);
230		case CTLENDVAR:
231		case CTLENDARI:
232			return (p);
233		case CTLQUOTEMARK:
234			lit_quoted = 1;
235			/* "$@" syntax adherence hack */
236			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
237				break;
238			if ((flag & EXP_FULL) != 0)
239				USTPUTC(c, expdest);
240			break;
241		case CTLQUOTEEND:
242			lit_quoted = 0;
243			break;
244		case CTLESC:
245			if (quotes)
246				USTPUTC(c, expdest);
247			c = *p++;
248			USTPUTC(c, expdest);
249			if (split_lit && !lit_quoted)
250				recordregion(expdest - stackblock() -
251				    (quotes ? 2 : 1),
252				    expdest - stackblock(), 0);
253			break;
254		case CTLVAR:
255			p = evalvar(p, flag);
256			break;
257		case CTLBACKQ:
258		case CTLBACKQ|CTLQUOTE:
259			expbackq(argbackq->n, c & CTLQUOTE, flag);
260			argbackq = argbackq->next;
261			break;
262		case CTLARI:
263			p = expari(p);
264			break;
265		case ':':
266		case '=':
267			/*
268			 * sort of a hack - expand tildes in variable
269			 * assignments (after the first '=' and after ':'s).
270			 */
271			USTPUTC(c, expdest);
272			if (split_lit && !lit_quoted)
273				recordregion(expdest - stackblock() - 1,
274				    expdest - stackblock(), 0);
275			if (flag & EXP_VARTILDE && *p == '~' &&
276			    (c != '=' || firsteq)) {
277				if (c == '=')
278					firsteq = 0;
279				p = exptilde(p, flag);
280			}
281			break;
282		default:
283			USTPUTC(c, expdest);
284			if (split_lit && !lit_quoted)
285				recordregion(expdest - stackblock() - 1,
286				    expdest - stackblock(), 0);
287		}
288	}
289}
290
291/*
292 * Perform tilde expansion, placing the result in the stack string and
293 * returning the next position in the input string to process.
294 */
295static char *
296exptilde(char *p, int flag)
297{
298	char c, *startp = p;
299	struct passwd *pw;
300	char *home;
301	int quotes = flag & (EXP_FULL | EXP_CASE);
302
303	while ((c = *p) != '\0') {
304		switch(c) {
305		case CTLESC: /* This means CTL* are always considered quoted. */
306		case CTLVAR:
307		case CTLBACKQ:
308		case CTLBACKQ | CTLQUOTE:
309		case CTLARI:
310		case CTLENDARI:
311		case CTLQUOTEMARK:
312			return (startp);
313		case ':':
314			if (flag & EXP_VARTILDE)
315				goto done;
316			break;
317		case '/':
318		case CTLENDVAR:
319			goto done;
320		}
321		p++;
322	}
323done:
324	*p = '\0';
325	if (*(startp+1) == '\0') {
326		if ((home = lookupvar("HOME")) == NULL)
327			goto lose;
328	} else {
329		if ((pw = getpwnam(startp+1)) == NULL)
330			goto lose;
331		home = pw->pw_dir;
332	}
333	if (*home == '\0')
334		goto lose;
335	*p = c;
336	if (quotes)
337		STPUTS_QUOTES(home, DQSYNTAX, expdest);
338	else
339		STPUTS(home, expdest);
340	return (p);
341lose:
342	*p = c;
343	return (startp);
344}
345
346
347static void
348removerecordregions(int endoff)
349{
350	if (ifslastp == NULL)
351		return;
352
353	if (ifsfirst.endoff > endoff) {
354		while (ifsfirst.next != NULL) {
355			struct ifsregion *ifsp;
356			INTOFF;
357			ifsp = ifsfirst.next->next;
358			ckfree(ifsfirst.next);
359			ifsfirst.next = ifsp;
360			INTON;
361		}
362		if (ifsfirst.begoff > endoff)
363			ifslastp = NULL;
364		else {
365			ifslastp = &ifsfirst;
366			ifsfirst.endoff = endoff;
367		}
368		return;
369	}
370
371	ifslastp = &ifsfirst;
372	while (ifslastp->next && ifslastp->next->begoff < endoff)
373		ifslastp=ifslastp->next;
374	while (ifslastp->next != NULL) {
375		struct ifsregion *ifsp;
376		INTOFF;
377		ifsp = ifslastp->next->next;
378		ckfree(ifslastp->next);
379		ifslastp->next = ifsp;
380		INTON;
381	}
382	if (ifslastp->endoff > endoff)
383		ifslastp->endoff = endoff;
384}
385
386/*
387 * Expand arithmetic expression.
388 * Note that flag is not required as digits never require CTLESC characters.
389 */
390static char *
391expari(char *p)
392{
393	char *q, *start;
394	arith_t result;
395	int begoff;
396	int quoted;
397	int adj;
398
399	quoted = *p++ == '"';
400	begoff = expdest - stackblock();
401	p = argstr(p, 0);
402	removerecordregions(begoff);
403	STPUTC('\0', expdest);
404	start = stackblock() + begoff;
405
406	q = grabstackstr(expdest);
407	result = arith(start);
408	ungrabstackstr(q, expdest);
409
410	start = stackblock() + begoff;
411	adj = start - expdest;
412	STADJUST(adj, expdest);
413
414	CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest);
415	fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result);
416	adj = strlen(expdest);
417	STADJUST(adj, expdest);
418	if (!quoted)
419		recordregion(begoff, expdest - stackblock(), 0);
420	return p;
421}
422
423
424/*
425 * Perform command substitution.
426 */
427static void
428expbackq(union node *cmd, int quoted, int flag)
429{
430	struct backcmd in;
431	int i;
432	char buf[128];
433	char *p;
434	char *dest = expdest;
435	struct ifsregion saveifs, *savelastp;
436	struct nodelist *saveargbackq;
437	char lastc;
438	int startloc = dest - stackblock();
439	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
440	int quotes = flag & (EXP_FULL | EXP_CASE);
441	size_t nnl;
442
443	INTOFF;
444	saveifs = ifsfirst;
445	savelastp = ifslastp;
446	saveargbackq = argbackq;
447	p = grabstackstr(dest);
448	evalbackcmd(cmd, &in);
449	ungrabstackstr(p, dest);
450	ifsfirst = saveifs;
451	ifslastp = savelastp;
452	argbackq = saveargbackq;
453
454	p = in.buf;
455	lastc = '\0';
456	nnl = 0;
457	/* Don't copy trailing newlines */
458	for (;;) {
459		if (--in.nleft < 0) {
460			if (in.fd < 0)
461				break;
462			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
463			TRACE(("expbackq: read returns %d\n", i));
464			if (i <= 0)
465				break;
466			p = buf;
467			in.nleft = i - 1;
468		}
469		lastc = *p++;
470		if (lastc != '\0') {
471			if (lastc == '\n') {
472				nnl++;
473			} else {
474				CHECKSTRSPACE(nnl + 2, dest);
475				while (nnl > 0) {
476					nnl--;
477					USTPUTC('\n', dest);
478				}
479				if (quotes && syntax[(int)lastc] == CCTL)
480					USTPUTC(CTLESC, dest);
481				USTPUTC(lastc, dest);
482			}
483		}
484	}
485
486	if (in.fd >= 0)
487		close(in.fd);
488	if (in.buf)
489		ckfree(in.buf);
490	if (in.jp)
491		exitstatus = waitforjob(in.jp, (int *)NULL);
492	if (quoted == 0)
493		recordregion(startloc, dest - stackblock(), 0);
494	TRACE(("expbackq: size=%td: \"%.*s\"\n",
495		((dest - stackblock()) - startloc),
496		(int)((dest - stackblock()) - startloc),
497		stackblock() + startloc));
498	expdest = dest;
499	INTON;
500}
501
502
503
504static int
505subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
506  int varflags, int quotes)
507{
508	char *startp;
509	char *loc = NULL;
510	char *q;
511	int c = 0;
512	struct nodelist *saveargbackq = argbackq;
513	int amount;
514
515	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
516	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
517	    EXP_CASE : 0) | EXP_TILDE);
518	STACKSTRNUL(expdest);
519	argbackq = saveargbackq;
520	startp = stackblock() + startloc;
521	if (str == NULL)
522	    str = stackblock() + strloc;
523
524	switch (subtype) {
525	case VSASSIGN:
526		setvar(str, startp, 0);
527		amount = startp - expdest;
528		STADJUST(amount, expdest);
529		varflags &= ~VSNUL;
530		return 1;
531
532	case VSQUESTION:
533		if (*p != CTLENDVAR) {
534			outfmt(out2, "%s\n", startp);
535			error((char *)NULL);
536		}
537		error("%.*s: parameter %snot set", (int)(p - str - 1),
538		      str, (varflags & VSNUL) ? "null or "
539					      : nullstr);
540		return 0;
541
542	case VSTRIMLEFT:
543		for (loc = startp; loc < str; loc++) {
544			c = *loc;
545			*loc = '\0';
546			if (patmatch(str, startp, quotes)) {
547				*loc = c;
548				goto recordleft;
549			}
550			*loc = c;
551			if (quotes && *loc == CTLESC)
552				loc++;
553		}
554		return 0;
555
556	case VSTRIMLEFTMAX:
557		for (loc = str - 1; loc >= startp;) {
558			c = *loc;
559			*loc = '\0';
560			if (patmatch(str, startp, quotes)) {
561				*loc = c;
562				goto recordleft;
563			}
564			*loc = c;
565			loc--;
566			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
567				for (q = startp; q < loc; q++)
568					if (*q == CTLESC)
569						q++;
570				if (q > loc)
571					loc--;
572			}
573		}
574		return 0;
575
576	case VSTRIMRIGHT:
577		for (loc = str - 1; loc >= startp;) {
578			if (patmatch(str, loc, quotes)) {
579				amount = loc - expdest;
580				STADJUST(amount, expdest);
581				return 1;
582			}
583			loc--;
584			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
585				for (q = startp; q < loc; q++)
586					if (*q == CTLESC)
587						q++;
588				if (q > loc)
589					loc--;
590			}
591		}
592		return 0;
593
594	case VSTRIMRIGHTMAX:
595		for (loc = startp; loc < str - 1; loc++) {
596			if (patmatch(str, loc, quotes)) {
597				amount = loc - expdest;
598				STADJUST(amount, expdest);
599				return 1;
600			}
601			if (quotes && *loc == CTLESC)
602				loc++;
603		}
604		return 0;
605
606
607	default:
608		abort();
609	}
610
611recordleft:
612	amount = ((str - 1) - (loc - startp)) - expdest;
613	STADJUST(amount, expdest);
614	while (loc != str - 1)
615		*startp++ = *loc++;
616	return 1;
617}
618
619
620/*
621 * Expand a variable, and return a pointer to the next character in the
622 * input string.
623 */
624
625static char *
626evalvar(char *p, int flag)
627{
628	int subtype;
629	int varflags;
630	char *var;
631	const char *val;
632	int patloc;
633	int c;
634	int set;
635	int special;
636	int startloc;
637	int varlen;
638	int varlenb;
639	int easy;
640	int quotes = flag & (EXP_FULL | EXP_CASE);
641
642	varflags = (unsigned char)*p++;
643	subtype = varflags & VSTYPE;
644	var = p;
645	special = 0;
646	if (! is_name(*p))
647		special = 1;
648	p = strchr(p, '=') + 1;
649again: /* jump here after setting a variable with ${var=text} */
650	if (varflags & VSLINENO) {
651		set = 1;
652		special = 1;
653		val = NULL;
654	} else if (special) {
655		set = varisset(var, varflags & VSNUL);
656		val = NULL;
657	} else {
658		val = bltinlookup(var, 1);
659		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
660			val = NULL;
661			set = 0;
662		} else
663			set = 1;
664	}
665	varlen = 0;
666	startloc = expdest - stackblock();
667	if (!set && uflag && *var != '@' && *var != '*') {
668		switch (subtype) {
669		case VSNORMAL:
670		case VSTRIMLEFT:
671		case VSTRIMLEFTMAX:
672		case VSTRIMRIGHT:
673		case VSTRIMRIGHTMAX:
674		case VSLENGTH:
675			error("%.*s: parameter not set", (int)(p - var - 1),
676			    var);
677		}
678	}
679	if (set && subtype != VSPLUS) {
680		/* insert the value of the variable */
681		if (special) {
682			if (varflags & VSLINENO)
683				STPUTBIN(var, p - var - 1, expdest);
684			else
685				varvalue(var, varflags & VSQUOTE, subtype, flag);
686			if (subtype == VSLENGTH) {
687				varlenb = expdest - stackblock() - startloc;
688				varlen = varlenb;
689				if (localeisutf8) {
690					val = stackblock() + startloc;
691					for (;val != expdest; val++)
692						if ((*val & 0xC0) == 0x80)
693							varlen--;
694				}
695				STADJUST(-varlenb, expdest);
696			}
697		} else {
698			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
699								  : BASESYNTAX;
700
701			if (subtype == VSLENGTH) {
702				for (;*val; val++)
703					if (!localeisutf8 ||
704					    (*val & 0xC0) != 0x80)
705						varlen++;
706			}
707			else {
708				if (quotes)
709					STPUTS_QUOTES(val, syntax, expdest);
710				else
711					STPUTS(val, expdest);
712
713			}
714		}
715	}
716
717	if (subtype == VSPLUS)
718		set = ! set;
719
720	easy = ((varflags & VSQUOTE) == 0 ||
721		(*var == '@' && shellparam.nparam != 1));
722
723
724	switch (subtype) {
725	case VSLENGTH:
726		expdest = cvtnum(varlen, expdest);
727		goto record;
728
729	case VSNORMAL:
730		if (!easy)
731			break;
732record:
733		recordregion(startloc, expdest - stackblock(),
734		    varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
735		    (*var == '@' || *var == '*')));
736		break;
737
738	case VSPLUS:
739	case VSMINUS:
740		if (!set) {
741			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
742			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
743			break;
744		}
745		if (easy)
746			goto record;
747		break;
748
749	case VSTRIMLEFT:
750	case VSTRIMLEFTMAX:
751	case VSTRIMRIGHT:
752	case VSTRIMRIGHTMAX:
753		if (!set)
754			break;
755		/*
756		 * Terminate the string and start recording the pattern
757		 * right after it
758		 */
759		STPUTC('\0', expdest);
760		patloc = expdest - stackblock();
761		if (subevalvar(p, NULL, patloc, subtype,
762		    startloc, varflags, quotes) == 0) {
763			int amount = (expdest - stackblock() - patloc) + 1;
764			STADJUST(-amount, expdest);
765		}
766		/* Remove any recorded regions beyond start of variable */
767		removerecordregions(startloc);
768		goto record;
769
770	case VSASSIGN:
771	case VSQUESTION:
772		if (!set) {
773			if (subevalvar(p, var, 0, subtype, startloc, varflags,
774			    quotes)) {
775				varflags &= ~VSNUL;
776				/*
777				 * Remove any recorded regions beyond
778				 * start of variable
779				 */
780				removerecordregions(startloc);
781				goto again;
782			}
783			break;
784		}
785		if (easy)
786			goto record;
787		break;
788
789	case VSERROR:
790		c = p - var - 1;
791		error("${%.*s%s}: Bad substitution", c, var,
792		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
793
794	default:
795		abort();
796	}
797
798	if (subtype != VSNORMAL) {	/* skip to end of alternative */
799		int nesting = 1;
800		for (;;) {
801			if ((c = *p++) == CTLESC)
802				p++;
803			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
804				if (set)
805					argbackq = argbackq->next;
806			} else if (c == CTLVAR) {
807				if ((*p++ & VSTYPE) != VSNORMAL)
808					nesting++;
809			} else if (c == CTLENDVAR) {
810				if (--nesting == 0)
811					break;
812			}
813		}
814	}
815	return p;
816}
817
818
819
820/*
821 * Test whether a specialized variable is set.
822 */
823
824static int
825varisset(const char *name, int nulok)
826{
827
828	if (*name == '!')
829		return backgndpidset();
830	else if (*name == '@' || *name == '*') {
831		if (*shellparam.p == NULL)
832			return 0;
833
834		if (nulok) {
835			char **av;
836
837			for (av = shellparam.p; *av; av++)
838				if (**av != '\0')
839					return 1;
840			return 0;
841		}
842	} else if (is_digit(*name)) {
843		char *ap;
844		long num;
845
846		errno = 0;
847		num = strtol(name, NULL, 10);
848		if (errno != 0 || num > shellparam.nparam)
849			return 0;
850
851		if (num == 0)
852			ap = arg0;
853		else
854			ap = shellparam.p[num - 1];
855
856		if (nulok && (ap == NULL || *ap == '\0'))
857			return 0;
858	}
859	return 1;
860}
861
862static void
863strtodest(const char *p, int flag, int subtype, int quoted)
864{
865	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
866		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
867	else
868		STPUTS(p, expdest);
869}
870
871/*
872 * Add the value of a specialized variable to the stack string.
873 */
874
875static void
876varvalue(const char *name, int quoted, int subtype, int flag)
877{
878	int num;
879	char *p;
880	int i;
881	char sep;
882	char **ap;
883
884	switch (*name) {
885	case '$':
886		num = rootpid;
887		goto numvar;
888	case '?':
889		num = oexitstatus;
890		goto numvar;
891	case '#':
892		num = shellparam.nparam;
893		goto numvar;
894	case '!':
895		num = backgndpidval();
896numvar:
897		expdest = cvtnum(num, expdest);
898		break;
899	case '-':
900		for (i = 0 ; i < NOPTS ; i++) {
901			if (optlist[i].val)
902				STPUTC(optlist[i].letter, expdest);
903		}
904		break;
905	case '@':
906		if (flag & EXP_FULL && quoted) {
907			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
908				strtodest(p, flag, subtype, quoted);
909				if (*ap)
910					STPUTC('\0', expdest);
911			}
912			break;
913		}
914		/* FALLTHROUGH */
915	case '*':
916		if (ifsset())
917			sep = ifsval()[0];
918		else
919			sep = ' ';
920		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
921			strtodest(p, flag, subtype, quoted);
922			if (!*ap)
923				break;
924			if (sep || (flag & EXP_FULL && !quoted && **ap != '\0'))
925				STPUTC(sep, expdest);
926		}
927		break;
928	case '0':
929		p = arg0;
930		strtodest(p, flag, subtype, quoted);
931		break;
932	default:
933		if (is_digit(*name)) {
934			num = atoi(name);
935			if (num > 0 && num <= shellparam.nparam) {
936				p = shellparam.p[num - 1];
937				strtodest(p, flag, subtype, quoted);
938			}
939		}
940		break;
941	}
942}
943
944
945
946/*
947 * Record the fact that we have to scan this region of the
948 * string for IFS characters.
949 */
950
951static void
952recordregion(int start, int end, int inquotes)
953{
954	struct ifsregion *ifsp;
955
956	INTOFF;
957	if (ifslastp == NULL) {
958		ifsp = &ifsfirst;
959	} else {
960		if (ifslastp->endoff == start
961		    && ifslastp->inquotes == inquotes) {
962			/* extend previous area */
963			ifslastp->endoff = end;
964			INTON;
965			return;
966		}
967		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
968		ifslastp->next = ifsp;
969	}
970	ifslastp = ifsp;
971	ifslastp->next = NULL;
972	ifslastp->begoff = start;
973	ifslastp->endoff = end;
974	ifslastp->inquotes = inquotes;
975	INTON;
976}
977
978
979
980/*
981 * Break the argument string into pieces based upon IFS and add the
982 * strings to the argument list.  The regions of the string to be
983 * searched for IFS characters have been stored by recordregion.
984 * CTLESC characters are preserved but have little effect in this pass
985 * other than escaping CTL* characters.  In particular, they do not escape
986 * IFS characters: that should be done with the ifsregion mechanism.
987 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
988 * This pass treats them as a regular character, making the string non-empty.
989 * Later, they are removed along with the other CTL* characters.
990 */
991static void
992ifsbreakup(char *string, struct arglist *arglist)
993{
994	struct ifsregion *ifsp;
995	struct strlist *sp;
996	char *start;
997	char *p;
998	char *q;
999	const char *ifs;
1000	const char *ifsspc;
1001	int had_param_ch = 0;
1002
1003	start = string;
1004
1005	if (ifslastp == NULL) {
1006		/* Return entire argument, IFS doesn't apply to any of it */
1007		sp = (struct strlist *)stalloc(sizeof *sp);
1008		sp->text = start;
1009		*arglist->lastp = sp;
1010		arglist->lastp = &sp->next;
1011		return;
1012	}
1013
1014	ifs = ifsset() ? ifsval() : " \t\n";
1015
1016	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1017		p = string + ifsp->begoff;
1018		while (p < string + ifsp->endoff) {
1019			q = p;
1020			if (*p == CTLESC)
1021				p++;
1022			if (ifsp->inquotes) {
1023				/* Only NULs (should be from "$@") end args */
1024				had_param_ch = 1;
1025				if (*p != 0) {
1026					p++;
1027					continue;
1028				}
1029				ifsspc = NULL;
1030			} else {
1031				if (!strchr(ifs, *p)) {
1032					had_param_ch = 1;
1033					p++;
1034					continue;
1035				}
1036				ifsspc = strchr(" \t\n", *p);
1037
1038				/* Ignore IFS whitespace at start */
1039				if (q == start && ifsspc != NULL) {
1040					p++;
1041					start = p;
1042					continue;
1043				}
1044				had_param_ch = 0;
1045			}
1046
1047			/* Save this argument... */
1048			*q = '\0';
1049			sp = (struct strlist *)stalloc(sizeof *sp);
1050			sp->text = start;
1051			*arglist->lastp = sp;
1052			arglist->lastp = &sp->next;
1053			p++;
1054
1055			if (ifsspc != NULL) {
1056				/* Ignore further trailing IFS whitespace */
1057				for (; p < string + ifsp->endoff; p++) {
1058					q = p;
1059					if (*p == CTLESC)
1060						p++;
1061					if (strchr(ifs, *p) == NULL) {
1062						p = q;
1063						break;
1064					}
1065					if (strchr(" \t\n", *p) == NULL) {
1066						p++;
1067						break;
1068					}
1069				}
1070			}
1071			start = p;
1072		}
1073	}
1074
1075	/*
1076	 * Save anything left as an argument.
1077	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1078	 * generating 2 arguments, the second of which is empty.
1079	 * Some recent clarification of the Posix spec say that it
1080	 * should only generate one....
1081	 */
1082	if (had_param_ch || *start != 0) {
1083		sp = (struct strlist *)stalloc(sizeof *sp);
1084		sp->text = start;
1085		*arglist->lastp = sp;
1086		arglist->lastp = &sp->next;
1087	}
1088}
1089
1090
1091static char expdir[PATH_MAX];
1092#define expdir_end (expdir + sizeof(expdir))
1093
1094/*
1095 * Perform pathname generation and remove control characters.
1096 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1097 * The results are stored in the list exparg.
1098 */
1099static void
1100expandmeta(struct strlist *str, int flag __unused)
1101{
1102	char *p;
1103	struct strlist **savelastp;
1104	struct strlist *sp;
1105	char c;
1106
1107	while (str) {
1108		if (fflag)
1109			goto nometa;
1110		p = str->text;
1111		for (;;) {			/* fast check for meta chars */
1112			if ((c = *p++) == '\0')
1113				goto nometa;
1114			if (c == '*' || c == '?' || c == '[')
1115				break;
1116		}
1117		savelastp = exparg.lastp;
1118		INTOFF;
1119		expmeta(expdir, str->text);
1120		INTON;
1121		if (exparg.lastp == savelastp) {
1122			/*
1123			 * no matches
1124			 */
1125nometa:
1126			*exparg.lastp = str;
1127			rmescapes(str->text);
1128			exparg.lastp = &str->next;
1129		} else {
1130			*exparg.lastp = NULL;
1131			*savelastp = sp = expsort(*savelastp);
1132			while (sp->next != NULL)
1133				sp = sp->next;
1134			exparg.lastp = &sp->next;
1135		}
1136		str = str->next;
1137	}
1138}
1139
1140
1141/*
1142 * Do metacharacter (i.e. *, ?, [...]) expansion.
1143 */
1144
1145static void
1146expmeta(char *enddir, char *name)
1147{
1148	const char *p;
1149	const char *q;
1150	const char *start;
1151	char *endname;
1152	int metaflag;
1153	struct stat statb;
1154	DIR *dirp;
1155	struct dirent *dp;
1156	int atend;
1157	int matchdot;
1158	int esc;
1159	int namlen;
1160
1161	metaflag = 0;
1162	start = name;
1163	for (p = name; esc = 0, *p; p += esc + 1) {
1164		if (*p == '*' || *p == '?')
1165			metaflag = 1;
1166		else if (*p == '[') {
1167			q = p + 1;
1168			if (*q == '!' || *q == '^')
1169				q++;
1170			for (;;) {
1171				while (*q == CTLQUOTEMARK)
1172					q++;
1173				if (*q == CTLESC)
1174					q++;
1175				if (*q == '/' || *q == '\0')
1176					break;
1177				if (*++q == ']') {
1178					metaflag = 1;
1179					break;
1180				}
1181			}
1182		} else if (*p == '\0')
1183			break;
1184		else if (*p == CTLQUOTEMARK)
1185			continue;
1186		else {
1187			if (*p == CTLESC)
1188				esc++;
1189			if (p[esc] == '/') {
1190				if (metaflag)
1191					break;
1192				start = p + esc + 1;
1193			}
1194		}
1195	}
1196	if (metaflag == 0) {	/* we've reached the end of the file name */
1197		if (enddir != expdir)
1198			metaflag++;
1199		for (p = name ; ; p++) {
1200			if (*p == CTLQUOTEMARK)
1201				continue;
1202			if (*p == CTLESC)
1203				p++;
1204			*enddir++ = *p;
1205			if (*p == '\0')
1206				break;
1207			if (enddir == expdir_end)
1208				return;
1209		}
1210		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1211			addfname(expdir);
1212		return;
1213	}
1214	endname = name + (p - name);
1215	if (start != name) {
1216		p = name;
1217		while (p < start) {
1218			while (*p == CTLQUOTEMARK)
1219				p++;
1220			if (*p == CTLESC)
1221				p++;
1222			*enddir++ = *p++;
1223			if (enddir == expdir_end)
1224				return;
1225		}
1226	}
1227	if (enddir == expdir) {
1228		p = ".";
1229	} else if (enddir == expdir + 1 && *expdir == '/') {
1230		p = "/";
1231	} else {
1232		p = expdir;
1233		enddir[-1] = '\0';
1234	}
1235	if ((dirp = opendir(p)) == NULL)
1236		return;
1237	if (enddir != expdir)
1238		enddir[-1] = '/';
1239	if (*endname == 0) {
1240		atend = 1;
1241	} else {
1242		atend = 0;
1243		*endname = '\0';
1244		endname += esc + 1;
1245	}
1246	matchdot = 0;
1247	p = start;
1248	while (*p == CTLQUOTEMARK)
1249		p++;
1250	if (*p == CTLESC)
1251		p++;
1252	if (*p == '.')
1253		matchdot++;
1254	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1255		if (dp->d_name[0] == '.' && ! matchdot)
1256			continue;
1257		if (patmatch(start, dp->d_name, 0)) {
1258			namlen = dp->d_namlen;
1259			if (enddir + namlen + 1 > expdir_end)
1260				continue;
1261			memcpy(enddir, dp->d_name, namlen + 1);
1262			if (atend)
1263				addfname(expdir);
1264			else {
1265				if (dp->d_type != DT_UNKNOWN &&
1266				    dp->d_type != DT_DIR &&
1267				    dp->d_type != DT_LNK)
1268					continue;
1269				if (enddir + namlen + 2 > expdir_end)
1270					continue;
1271				enddir[namlen] = '/';
1272				enddir[namlen + 1] = '\0';
1273				expmeta(enddir + namlen + 1, endname);
1274			}
1275		}
1276	}
1277	closedir(dirp);
1278	if (! atend)
1279		endname[-esc - 1] = esc ? CTLESC : '/';
1280}
1281
1282
1283/*
1284 * Add a file name to the list.
1285 */
1286
1287static void
1288addfname(char *name)
1289{
1290	char *p;
1291	struct strlist *sp;
1292	size_t len;
1293
1294	len = strlen(name);
1295	p = stalloc(len + 1);
1296	memcpy(p, name, len + 1);
1297	sp = (struct strlist *)stalloc(sizeof *sp);
1298	sp->text = p;
1299	*exparg.lastp = sp;
1300	exparg.lastp = &sp->next;
1301}
1302
1303
1304/*
1305 * Sort the results of file name expansion.  It calculates the number of
1306 * strings to sort and then calls msort (short for merge sort) to do the
1307 * work.
1308 */
1309
1310static struct strlist *
1311expsort(struct strlist *str)
1312{
1313	int len;
1314	struct strlist *sp;
1315
1316	len = 0;
1317	for (sp = str ; sp ; sp = sp->next)
1318		len++;
1319	return msort(str, len);
1320}
1321
1322
1323static struct strlist *
1324msort(struct strlist *list, int len)
1325{
1326	struct strlist *p, *q = NULL;
1327	struct strlist **lpp;
1328	int half;
1329	int n;
1330
1331	if (len <= 1)
1332		return list;
1333	half = len >> 1;
1334	p = list;
1335	for (n = half ; --n >= 0 ; ) {
1336		q = p;
1337		p = p->next;
1338	}
1339	q->next = NULL;			/* terminate first half of list */
1340	q = msort(list, half);		/* sort first half of list */
1341	p = msort(p, len - half);		/* sort second half */
1342	lpp = &list;
1343	for (;;) {
1344		if (strcmp(p->text, q->text) < 0) {
1345			*lpp = p;
1346			lpp = &p->next;
1347			if ((p = *lpp) == NULL) {
1348				*lpp = q;
1349				break;
1350			}
1351		} else {
1352			*lpp = q;
1353			lpp = &q->next;
1354			if ((q = *lpp) == NULL) {
1355				*lpp = p;
1356				break;
1357			}
1358		}
1359	}
1360	return list;
1361}
1362
1363
1364
1365static wchar_t
1366get_wc(const char **p)
1367{
1368	wchar_t c;
1369	int chrlen;
1370
1371	chrlen = mbtowc(&c, *p, 4);
1372	if (chrlen == 0)
1373		return 0;
1374	else if (chrlen == -1)
1375		c = 0;
1376	else
1377		*p += chrlen;
1378	return c;
1379}
1380
1381
1382/*
1383 * See if a character matches a character class, starting at the first colon
1384 * of "[:class:]".
1385 * If a valid character class is recognized, a pointer to the next character
1386 * after the final closing bracket is stored into *end, otherwise a null
1387 * pointer is stored into *end.
1388 */
1389static int
1390match_charclass(const char *p, wchar_t chr, const char **end)
1391{
1392	char name[20];
1393	const char *nameend;
1394	wctype_t cclass;
1395
1396	*end = NULL;
1397	p++;
1398	nameend = strstr(p, ":]");
1399	if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) ||
1400	    nameend == p)
1401		return 0;
1402	memcpy(name, p, nameend - p);
1403	name[nameend - p] = '\0';
1404	*end = nameend + 2;
1405	cclass = wctype(name);
1406	/* An unknown class matches nothing but is valid nevertheless. */
1407	if (cclass == 0)
1408		return 0;
1409	return iswctype(chr, cclass);
1410}
1411
1412
1413/*
1414 * Returns true if the pattern matches the string.
1415 */
1416
1417static int
1418patmatch(const char *pattern, const char *string, int squoted)
1419{
1420	const char *p, *q, *end;
1421	const char *bt_p, *bt_q;
1422	char c;
1423	wchar_t wc, wc2;
1424
1425	p = pattern;
1426	q = string;
1427	bt_p = NULL;
1428	bt_q = NULL;
1429	for (;;) {
1430		switch (c = *p++) {
1431		case '\0':
1432			if (*q != '\0')
1433				goto backtrack;
1434			return 1;
1435		case CTLESC:
1436			if (squoted && *q == CTLESC)
1437				q++;
1438			if (*q++ != *p++)
1439				goto backtrack;
1440			break;
1441		case CTLQUOTEMARK:
1442			continue;
1443		case '?':
1444			if (squoted && *q == CTLESC)
1445				q++;
1446			if (*q == '\0')
1447				return 0;
1448			if (localeisutf8) {
1449				wc = get_wc(&q);
1450				/*
1451				 * A '?' does not match invalid UTF-8 but a
1452				 * '*' does, so backtrack.
1453				 */
1454				if (wc == 0)
1455					goto backtrack;
1456			} else
1457				wc = (unsigned char)*q++;
1458			break;
1459		case '*':
1460			c = *p;
1461			while (c == CTLQUOTEMARK || c == '*')
1462				c = *++p;
1463			/*
1464			 * If the pattern ends here, we know the string
1465			 * matches without needing to look at the rest of it.
1466			 */
1467			if (c == '\0')
1468				return 1;
1469			/*
1470			 * First try the shortest match for the '*' that
1471			 * could work. We can forget any earlier '*' since
1472			 * there is no way having it match more characters
1473			 * can help us, given that we are already here.
1474			 */
1475			bt_p = p;
1476			bt_q = q;
1477			break;
1478		case '[': {
1479			const char *endp;
1480			int invert, found;
1481			wchar_t chr;
1482
1483			endp = p;
1484			if (*endp == '!' || *endp == '^')
1485				endp++;
1486			for (;;) {
1487				while (*endp == CTLQUOTEMARK)
1488					endp++;
1489				if (*endp == 0)
1490					goto dft;		/* no matching ] */
1491				if (*endp == CTLESC)
1492					endp++;
1493				if (*++endp == ']')
1494					break;
1495			}
1496			invert = 0;
1497			if (*p == '!' || *p == '^') {
1498				invert++;
1499				p++;
1500			}
1501			found = 0;
1502			if (squoted && *q == CTLESC)
1503				q++;
1504			if (*q == '\0')
1505				return 0;
1506			if (localeisutf8) {
1507				chr = get_wc(&q);
1508				if (chr == 0)
1509					goto backtrack;
1510			} else
1511				chr = (unsigned char)*q++;
1512			c = *p++;
1513			do {
1514				if (c == CTLQUOTEMARK)
1515					continue;
1516				if (c == '[' && *p == ':') {
1517					found |= match_charclass(p, chr, &end);
1518					if (end != NULL)
1519						p = end;
1520				}
1521				if (c == CTLESC)
1522					c = *p++;
1523				if (localeisutf8 && c & 0x80) {
1524					p--;
1525					wc = get_wc(&p);
1526					if (wc == 0) /* bad utf-8 */
1527						return 0;
1528				} else
1529					wc = (unsigned char)c;
1530				if (*p == '-' && p[1] != ']') {
1531					p++;
1532					while (*p == CTLQUOTEMARK)
1533						p++;
1534					if (*p == CTLESC)
1535						p++;
1536					if (localeisutf8) {
1537						wc2 = get_wc(&p);
1538						if (wc2 == 0) /* bad utf-8 */
1539							return 0;
1540					} else
1541						wc2 = (unsigned char)*p++;
1542					if (   collate_range_cmp(chr, wc) >= 0
1543					    && collate_range_cmp(chr, wc2) <= 0
1544					   )
1545						found = 1;
1546				} else {
1547					if (chr == wc)
1548						found = 1;
1549				}
1550			} while ((c = *p++) != ']');
1551			if (found == invert)
1552				goto backtrack;
1553			break;
1554		}
1555dft:	        default:
1556			if (squoted && *q == CTLESC)
1557				q++;
1558			if (*q == '\0')
1559				return 0;
1560			if (*q++ == c)
1561				break;
1562backtrack:
1563			/*
1564			 * If we have a mismatch (other than hitting the end
1565			 * of the string), go back to the last '*' seen and
1566			 * have it match one additional character.
1567			 */
1568			if (bt_p == NULL)
1569				return 0;
1570			if (squoted && *bt_q == CTLESC)
1571				bt_q++;
1572			if (*bt_q == '\0')
1573				return 0;
1574			bt_q++;
1575			p = bt_p;
1576			q = bt_q;
1577			break;
1578		}
1579	}
1580}
1581
1582
1583
1584/*
1585 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1586 */
1587
1588void
1589rmescapes(char *str)
1590{
1591	char *p, *q;
1592
1593	p = str;
1594	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1595		if (*p++ == '\0')
1596			return;
1597	}
1598	q = p;
1599	while (*p) {
1600		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1601			p++;
1602			continue;
1603		}
1604		if (*p == CTLESC)
1605			p++;
1606		*q++ = *p++;
1607	}
1608	*q = '\0';
1609}
1610
1611
1612
1613/*
1614 * See if a pattern matches in a case statement.
1615 */
1616
1617int
1618casematch(union node *pattern, const char *val)
1619{
1620	struct stackmark smark;
1621	int result;
1622	char *p;
1623
1624	setstackmark(&smark);
1625	argbackq = pattern->narg.backquote;
1626	STARTSTACKSTR(expdest);
1627	ifslastp = NULL;
1628	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1629	STPUTC('\0', expdest);
1630	p = grabstackstr(expdest);
1631	result = patmatch(p, val, 0);
1632	popstackmark(&smark);
1633	return result;
1634}
1635
1636/*
1637 * Our own itoa().
1638 */
1639
1640static char *
1641cvtnum(int num, char *buf)
1642{
1643	char temp[32];
1644	int neg = num < 0;
1645	char *p = temp + 31;
1646
1647	temp[31] = '\0';
1648
1649	do {
1650		*--p = num % 10 + '0';
1651	} while ((num /= 10) != 0);
1652
1653	if (neg)
1654		*--p = '-';
1655
1656	STPUTS(p, buf);
1657	return buf;
1658}
1659
1660/*
1661 * Do most of the work for wordexp(3).
1662 */
1663
1664int
1665wordexpcmd(int argc, char **argv)
1666{
1667	size_t len;
1668	int i;
1669
1670	out1fmt("%08x", argc - 1);
1671	for (i = 1, len = 0; i < argc; i++)
1672		len += strlen(argv[i]);
1673	out1fmt("%08x", (int)len);
1674	for (i = 1; i < argc; i++)
1675		outbin(argv[i], strlen(argv[i]) + 1, out1);
1676        return (0);
1677}
1678