expand.c revision 287750
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36#if 0
37static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
38#endif
39#endif /* not lint */
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/bin/sh/expand.c 287750 2015-09-13 13:43:08Z jilles $");
42
43#include <sys/types.h>
44#include <sys/time.h>
45#include <sys/stat.h>
46#include <dirent.h>
47#include <errno.h>
48#include <inttypes.h>
49#include <limits.h>
50#include <pwd.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <wchar.h>
56#include <wctype.h>
57
58/*
59 * Routines to expand arguments to commands.  We have to deal with
60 * backquotes, shell variables, and file metacharacters.
61 */
62
63#include "shell.h"
64#include "main.h"
65#include "nodes.h"
66#include "eval.h"
67#include "expand.h"
68#include "syntax.h"
69#include "parser.h"
70#include "jobs.h"
71#include "options.h"
72#include "var.h"
73#include "input.h"
74#include "output.h"
75#include "memalloc.h"
76#include "error.h"
77#include "mystring.h"
78#include "arith.h"
79#include "show.h"
80#include "builtins.h"
81
82/*
83 * Structure specifying which parts of the string should be searched
84 * for IFS characters.
85 */
86
87struct ifsregion {
88	struct ifsregion *next;	/* next region in list */
89	int begoff;		/* offset of start of region */
90	int endoff;		/* offset of end of region */
91	int inquotes;		/* search for nul bytes only */
92};
93
94
95static char *expdest;			/* output of current string */
96static struct nodelist *argbackq;	/* list of back quote expressions */
97static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
98static struct ifsregion *ifslastp;	/* last struct in list */
99static struct arglist exparg;		/* holds expanded arg list */
100
101static char *argstr(char *, int);
102static char *exptilde(char *, int);
103static char *expari(char *);
104static void expbackq(union node *, int, int);
105static int subevalvar(char *, char *, int, int, int, int, int);
106static char *evalvar(char *, int);
107static int varisset(const char *, int);
108static void varvalue(const char *, int, int, int);
109static void recordregion(int, int, int);
110static void removerecordregions(int);
111static void ifsbreakup(char *, struct arglist *);
112static void expandmeta(struct strlist *, int);
113static void expmeta(char *, char *);
114static void addfname(char *);
115static struct strlist *expsort(struct strlist *);
116static struct strlist *msort(struct strlist *, int);
117static int patmatch(const char *, const char *, int);
118static char *cvtnum(int, char *);
119static int collate_range_cmp(wchar_t, wchar_t);
120
121static int
122collate_range_cmp(wchar_t c1, wchar_t c2)
123{
124	static wchar_t s1[2], s2[2];
125
126	s1[0] = c1;
127	s2[0] = c2;
128	return (wcscoll(s1, s2));
129}
130
131static char *
132stputs_quotes(const char *data, const char *syntax, char *p)
133{
134	while (*data) {
135		CHECKSTRSPACE(2, p);
136		if (syntax[(int)*data] == CCTL)
137			USTPUTC(CTLESC, p);
138		USTPUTC(*data++, p);
139	}
140	return (p);
141}
142#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
143
144/*
145 * Perform expansions on an argument, placing the resulting list of arguments
146 * in arglist.  Parameter expansion, command substitution and arithmetic
147 * expansion are always performed; additional expansions can be requested
148 * via flag (EXP_*).
149 * The result is left in the stack string.
150 * When arglist is NULL, perform here document expansion.
151 *
152 * Caution: this function uses global state and is not reentrant.
153 * However, a new invocation after an interrupted invocation is safe
154 * and will reset the global state for the new call.
155 */
156void
157expandarg(union node *arg, struct arglist *arglist, int flag)
158{
159	struct strlist *sp;
160	char *p;
161
162	argbackq = arg->narg.backquote;
163	STARTSTACKSTR(expdest);
164	ifsfirst.next = NULL;
165	ifslastp = NULL;
166	argstr(arg->narg.text, flag);
167	if (arglist == NULL) {
168		STACKSTRNUL(expdest);
169		return;			/* here document expanded */
170	}
171	STPUTC('\0', expdest);
172	p = grabstackstr(expdest);
173	exparg.lastp = &exparg.list;
174	if (flag & EXP_FULL) {
175		ifsbreakup(p, &exparg);
176		*exparg.lastp = NULL;
177		exparg.lastp = &exparg.list;
178		expandmeta(exparg.list, flag);
179	} else {
180		sp = (struct strlist *)stalloc(sizeof (struct strlist));
181		sp->text = p;
182		*exparg.lastp = sp;
183		exparg.lastp = &sp->next;
184	}
185	while (ifsfirst.next != NULL) {
186		struct ifsregion *ifsp;
187		INTOFF;
188		ifsp = ifsfirst.next->next;
189		ckfree(ifsfirst.next);
190		ifsfirst.next = ifsp;
191		INTON;
192	}
193	*exparg.lastp = NULL;
194	if (exparg.list) {
195		*arglist->lastp = exparg.list;
196		arglist->lastp = exparg.lastp;
197	}
198}
199
200
201
202/*
203 * Perform parameter expansion, command substitution and arithmetic
204 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
205 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'.
206 * This is used to expand word in ${var+word} etc.
207 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC
208 * characters to allow for further processing.
209 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
210 */
211static char *
212argstr(char *p, int flag)
213{
214	char c;
215	int quotes = flag & (EXP_FULL | EXP_CASE);	/* do CTLESC */
216	int firsteq = 1;
217	int split_lit;
218	int lit_quoted;
219
220	split_lit = flag & EXP_SPLIT_LIT;
221	lit_quoted = flag & EXP_LIT_QUOTED;
222	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
223	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
224		p = exptilde(p, flag);
225	for (;;) {
226		CHECKSTRSPACE(2, expdest);
227		switch (c = *p++) {
228		case '\0':
229			return (p - 1);
230		case CTLENDVAR:
231		case CTLENDARI:
232			return (p);
233		case CTLQUOTEMARK:
234			lit_quoted = 1;
235			/* "$@" syntax adherence hack */
236			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
237				break;
238			if ((flag & EXP_FULL) != 0)
239				USTPUTC(c, expdest);
240			break;
241		case CTLQUOTEEND:
242			lit_quoted = 0;
243			break;
244		case CTLESC:
245			if (quotes)
246				USTPUTC(c, expdest);
247			c = *p++;
248			USTPUTC(c, expdest);
249			if (split_lit && !lit_quoted)
250				recordregion(expdest - stackblock() -
251				    (quotes ? 2 : 1),
252				    expdest - stackblock(), 0);
253			break;
254		case CTLVAR:
255			p = evalvar(p, flag);
256			break;
257		case CTLBACKQ:
258		case CTLBACKQ|CTLQUOTE:
259			expbackq(argbackq->n, c & CTLQUOTE, flag);
260			argbackq = argbackq->next;
261			break;
262		case CTLARI:
263			p = expari(p);
264			break;
265		case ':':
266		case '=':
267			/*
268			 * sort of a hack - expand tildes in variable
269			 * assignments (after the first '=' and after ':'s).
270			 */
271			USTPUTC(c, expdest);
272			if (split_lit && !lit_quoted)
273				recordregion(expdest - stackblock() - 1,
274				    expdest - stackblock(), 0);
275			if (flag & EXP_VARTILDE && *p == '~' &&
276			    (c != '=' || firsteq)) {
277				if (c == '=')
278					firsteq = 0;
279				p = exptilde(p, flag);
280			}
281			break;
282		default:
283			USTPUTC(c, expdest);
284			if (split_lit && !lit_quoted)
285				recordregion(expdest - stackblock() - 1,
286				    expdest - stackblock(), 0);
287		}
288	}
289}
290
291/*
292 * Perform tilde expansion, placing the result in the stack string and
293 * returning the next position in the input string to process.
294 */
295static char *
296exptilde(char *p, int flag)
297{
298	char c, *startp = p;
299	struct passwd *pw;
300	char *home;
301	int quotes = flag & (EXP_FULL | EXP_CASE);
302
303	while ((c = *p) != '\0') {
304		switch(c) {
305		case CTLESC: /* This means CTL* are always considered quoted. */
306		case CTLVAR:
307		case CTLBACKQ:
308		case CTLBACKQ | CTLQUOTE:
309		case CTLARI:
310		case CTLENDARI:
311		case CTLQUOTEMARK:
312			return (startp);
313		case ':':
314			if (flag & EXP_VARTILDE)
315				goto done;
316			break;
317		case '/':
318		case CTLENDVAR:
319			goto done;
320		}
321		p++;
322	}
323done:
324	*p = '\0';
325	if (*(startp+1) == '\0') {
326		home = lookupvar("HOME");
327	} else {
328		pw = getpwnam(startp+1);
329		home = pw != NULL ? pw->pw_dir : NULL;
330	}
331	*p = c;
332	if (home == NULL || *home == '\0')
333		return (startp);
334	if (quotes)
335		STPUTS_QUOTES(home, DQSYNTAX, expdest);
336	else
337		STPUTS(home, expdest);
338	return (p);
339}
340
341
342static void
343removerecordregions(int endoff)
344{
345	if (ifslastp == NULL)
346		return;
347
348	if (ifsfirst.endoff > endoff) {
349		while (ifsfirst.next != NULL) {
350			struct ifsregion *ifsp;
351			INTOFF;
352			ifsp = ifsfirst.next->next;
353			ckfree(ifsfirst.next);
354			ifsfirst.next = ifsp;
355			INTON;
356		}
357		if (ifsfirst.begoff > endoff)
358			ifslastp = NULL;
359		else {
360			ifslastp = &ifsfirst;
361			ifsfirst.endoff = endoff;
362		}
363		return;
364	}
365
366	ifslastp = &ifsfirst;
367	while (ifslastp->next && ifslastp->next->begoff < endoff)
368		ifslastp=ifslastp->next;
369	while (ifslastp->next != NULL) {
370		struct ifsregion *ifsp;
371		INTOFF;
372		ifsp = ifslastp->next->next;
373		ckfree(ifslastp->next);
374		ifslastp->next = ifsp;
375		INTON;
376	}
377	if (ifslastp->endoff > endoff)
378		ifslastp->endoff = endoff;
379}
380
381/*
382 * Expand arithmetic expression.
383 * Note that flag is not required as digits never require CTLESC characters.
384 */
385static char *
386expari(char *p)
387{
388	char *q, *start;
389	arith_t result;
390	int begoff;
391	int quoted;
392	int adj;
393
394	quoted = *p++ == '"';
395	begoff = expdest - stackblock();
396	p = argstr(p, 0);
397	removerecordregions(begoff);
398	STPUTC('\0', expdest);
399	start = stackblock() + begoff;
400
401	q = grabstackstr(expdest);
402	result = arith(start);
403	ungrabstackstr(q, expdest);
404
405	start = stackblock() + begoff;
406	adj = start - expdest;
407	STADJUST(adj, expdest);
408
409	CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest);
410	fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result);
411	adj = strlen(expdest);
412	STADJUST(adj, expdest);
413	if (!quoted)
414		recordregion(begoff, expdest - stackblock(), 0);
415	return p;
416}
417
418
419/*
420 * Perform command substitution.
421 */
422static void
423expbackq(union node *cmd, int quoted, int flag)
424{
425	struct backcmd in;
426	int i;
427	char buf[128];
428	char *p;
429	char *dest = expdest;
430	struct ifsregion saveifs, *savelastp;
431	struct nodelist *saveargbackq;
432	char lastc;
433	int startloc = dest - stackblock();
434	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
435	int quotes = flag & (EXP_FULL | EXP_CASE);
436	size_t nnl;
437
438	INTOFF;
439	saveifs = ifsfirst;
440	savelastp = ifslastp;
441	saveargbackq = argbackq;
442	p = grabstackstr(dest);
443	evalbackcmd(cmd, &in);
444	ungrabstackstr(p, dest);
445	ifsfirst = saveifs;
446	ifslastp = savelastp;
447	argbackq = saveargbackq;
448
449	p = in.buf;
450	lastc = '\0';
451	nnl = 0;
452	/* Don't copy trailing newlines */
453	for (;;) {
454		if (--in.nleft < 0) {
455			if (in.fd < 0)
456				break;
457			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
458			TRACE(("expbackq: read returns %d\n", i));
459			if (i <= 0)
460				break;
461			p = buf;
462			in.nleft = i - 1;
463		}
464		lastc = *p++;
465		if (lastc != '\0') {
466			if (lastc == '\n') {
467				nnl++;
468			} else {
469				CHECKSTRSPACE(nnl + 2, dest);
470				while (nnl > 0) {
471					nnl--;
472					USTPUTC('\n', dest);
473				}
474				if (quotes && syntax[(int)lastc] == CCTL)
475					USTPUTC(CTLESC, dest);
476				USTPUTC(lastc, dest);
477			}
478		}
479	}
480
481	if (in.fd >= 0)
482		close(in.fd);
483	if (in.buf)
484		ckfree(in.buf);
485	if (in.jp)
486		exitstatus = waitforjob(in.jp, (int *)NULL);
487	if (quoted == 0)
488		recordregion(startloc, dest - stackblock(), 0);
489	TRACE(("expbackq: size=%td: \"%.*s\"\n",
490		((dest - stackblock()) - startloc),
491		(int)((dest - stackblock()) - startloc),
492		stackblock() + startloc));
493	expdest = dest;
494	INTON;
495}
496
497
498
499static int
500subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
501  int varflags, int quotes)
502{
503	char *startp;
504	char *loc = NULL;
505	char *q;
506	int c = 0;
507	struct nodelist *saveargbackq = argbackq;
508	int amount;
509
510	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
511	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
512	    EXP_CASE : 0) | EXP_TILDE);
513	STACKSTRNUL(expdest);
514	argbackq = saveargbackq;
515	startp = stackblock() + startloc;
516	if (str == NULL)
517	    str = stackblock() + strloc;
518
519	switch (subtype) {
520	case VSASSIGN:
521		setvar(str, startp, 0);
522		amount = startp - expdest;
523		STADJUST(amount, expdest);
524		varflags &= ~VSNUL;
525		return 1;
526
527	case VSQUESTION:
528		if (*p != CTLENDVAR) {
529			outfmt(out2, "%s\n", startp);
530			error((char *)NULL);
531		}
532		error("%.*s: parameter %snot set", (int)(p - str - 1),
533		      str, (varflags & VSNUL) ? "null or "
534					      : nullstr);
535		return 0;
536
537	case VSTRIMLEFT:
538		for (loc = startp; loc < str; loc++) {
539			c = *loc;
540			*loc = '\0';
541			if (patmatch(str, startp, quotes)) {
542				*loc = c;
543				goto recordleft;
544			}
545			*loc = c;
546			if (quotes && *loc == CTLESC)
547				loc++;
548		}
549		return 0;
550
551	case VSTRIMLEFTMAX:
552		for (loc = str - 1; loc >= startp;) {
553			c = *loc;
554			*loc = '\0';
555			if (patmatch(str, startp, quotes)) {
556				*loc = c;
557				goto recordleft;
558			}
559			*loc = c;
560			loc--;
561			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
562				for (q = startp; q < loc; q++)
563					if (*q == CTLESC)
564						q++;
565				if (q > loc)
566					loc--;
567			}
568		}
569		return 0;
570
571	case VSTRIMRIGHT:
572		for (loc = str - 1; loc >= startp;) {
573			if (patmatch(str, loc, quotes)) {
574				amount = loc - expdest;
575				STADJUST(amount, expdest);
576				return 1;
577			}
578			loc--;
579			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
580				for (q = startp; q < loc; q++)
581					if (*q == CTLESC)
582						q++;
583				if (q > loc)
584					loc--;
585			}
586		}
587		return 0;
588
589	case VSTRIMRIGHTMAX:
590		for (loc = startp; loc < str - 1; loc++) {
591			if (patmatch(str, loc, quotes)) {
592				amount = loc - expdest;
593				STADJUST(amount, expdest);
594				return 1;
595			}
596			if (quotes && *loc == CTLESC)
597				loc++;
598		}
599		return 0;
600
601
602	default:
603		abort();
604	}
605
606recordleft:
607	amount = ((str - 1) - (loc - startp)) - expdest;
608	STADJUST(amount, expdest);
609	while (loc != str - 1)
610		*startp++ = *loc++;
611	return 1;
612}
613
614
615/*
616 * Expand a variable, and return a pointer to the next character in the
617 * input string.
618 */
619
620static char *
621evalvar(char *p, int flag)
622{
623	int subtype;
624	int varflags;
625	char *var;
626	const char *val;
627	int patloc;
628	int c;
629	int set;
630	int special;
631	int startloc;
632	int varlen;
633	int varlenb;
634	int easy;
635	int quotes = flag & (EXP_FULL | EXP_CASE);
636
637	varflags = (unsigned char)*p++;
638	subtype = varflags & VSTYPE;
639	var = p;
640	special = 0;
641	if (! is_name(*p))
642		special = 1;
643	p = strchr(p, '=') + 1;
644again: /* jump here after setting a variable with ${var=text} */
645	if (varflags & VSLINENO) {
646		set = 1;
647		special = 1;
648		val = NULL;
649	} else if (special) {
650		set = varisset(var, varflags & VSNUL);
651		val = NULL;
652	} else {
653		val = bltinlookup(var, 1);
654		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
655			val = NULL;
656			set = 0;
657		} else
658			set = 1;
659	}
660	varlen = 0;
661	startloc = expdest - stackblock();
662	if (!set && uflag && *var != '@' && *var != '*') {
663		switch (subtype) {
664		case VSNORMAL:
665		case VSTRIMLEFT:
666		case VSTRIMLEFTMAX:
667		case VSTRIMRIGHT:
668		case VSTRIMRIGHTMAX:
669		case VSLENGTH:
670			error("%.*s: parameter not set", (int)(p - var - 1),
671			    var);
672		}
673	}
674	if (set && subtype != VSPLUS) {
675		/* insert the value of the variable */
676		if (special) {
677			if (varflags & VSLINENO)
678				STPUTBIN(var, p - var - 1, expdest);
679			else
680				varvalue(var, varflags & VSQUOTE, subtype, flag);
681			if (subtype == VSLENGTH) {
682				varlenb = expdest - stackblock() - startloc;
683				varlen = varlenb;
684				if (localeisutf8) {
685					val = stackblock() + startloc;
686					for (;val != expdest; val++)
687						if ((*val & 0xC0) == 0x80)
688							varlen--;
689				}
690				STADJUST(-varlenb, expdest);
691			}
692		} else {
693			char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
694								  : BASESYNTAX;
695
696			if (subtype == VSLENGTH) {
697				for (;*val; val++)
698					if (!localeisutf8 ||
699					    (*val & 0xC0) != 0x80)
700						varlen++;
701			}
702			else {
703				if (quotes)
704					STPUTS_QUOTES(val, syntax, expdest);
705				else
706					STPUTS(val, expdest);
707
708			}
709		}
710	}
711
712	if (subtype == VSPLUS)
713		set = ! set;
714
715	easy = ((varflags & VSQUOTE) == 0 ||
716		(*var == '@' && shellparam.nparam != 1));
717
718
719	switch (subtype) {
720	case VSLENGTH:
721		expdest = cvtnum(varlen, expdest);
722		goto record;
723
724	case VSNORMAL:
725		if (!easy)
726			break;
727record:
728		recordregion(startloc, expdest - stackblock(),
729		    varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
730		    (*var == '@' || *var == '*')));
731		break;
732
733	case VSPLUS:
734	case VSMINUS:
735		if (!set) {
736			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
737			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
738			break;
739		}
740		if (easy)
741			goto record;
742		break;
743
744	case VSTRIMLEFT:
745	case VSTRIMLEFTMAX:
746	case VSTRIMRIGHT:
747	case VSTRIMRIGHTMAX:
748		if (!set)
749			break;
750		/*
751		 * Terminate the string and start recording the pattern
752		 * right after it
753		 */
754		STPUTC('\0', expdest);
755		patloc = expdest - stackblock();
756		if (subevalvar(p, NULL, patloc, subtype,
757		    startloc, varflags, quotes) == 0) {
758			int amount = (expdest - stackblock() - patloc) + 1;
759			STADJUST(-amount, expdest);
760		}
761		/* Remove any recorded regions beyond start of variable */
762		removerecordregions(startloc);
763		goto record;
764
765	case VSASSIGN:
766	case VSQUESTION:
767		if (!set) {
768			if (subevalvar(p, var, 0, subtype, startloc, varflags,
769			    quotes)) {
770				varflags &= ~VSNUL;
771				/*
772				 * Remove any recorded regions beyond
773				 * start of variable
774				 */
775				removerecordregions(startloc);
776				goto again;
777			}
778			break;
779		}
780		if (easy)
781			goto record;
782		break;
783
784	case VSERROR:
785		c = p - var - 1;
786		error("${%.*s%s}: Bad substitution", c, var,
787		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
788
789	default:
790		abort();
791	}
792
793	if (subtype != VSNORMAL) {	/* skip to end of alternative */
794		int nesting = 1;
795		for (;;) {
796			if ((c = *p++) == CTLESC)
797				p++;
798			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
799				if (set)
800					argbackq = argbackq->next;
801			} else if (c == CTLVAR) {
802				if ((*p++ & VSTYPE) != VSNORMAL)
803					nesting++;
804			} else if (c == CTLENDVAR) {
805				if (--nesting == 0)
806					break;
807			}
808		}
809	}
810	return p;
811}
812
813
814
815/*
816 * Test whether a specialized variable is set.
817 */
818
819static int
820varisset(const char *name, int nulok)
821{
822
823	if (*name == '!')
824		return backgndpidset();
825	else if (*name == '@' || *name == '*') {
826		if (*shellparam.p == NULL)
827			return 0;
828
829		if (nulok) {
830			char **av;
831
832			for (av = shellparam.p; *av; av++)
833				if (**av != '\0')
834					return 1;
835			return 0;
836		}
837	} else if (is_digit(*name)) {
838		char *ap;
839		long num;
840
841		errno = 0;
842		num = strtol(name, NULL, 10);
843		if (errno != 0 || num > shellparam.nparam)
844			return 0;
845
846		if (num == 0)
847			ap = arg0;
848		else
849			ap = shellparam.p[num - 1];
850
851		if (nulok && (ap == NULL || *ap == '\0'))
852			return 0;
853	}
854	return 1;
855}
856
857static void
858strtodest(const char *p, int flag, int subtype, int quoted)
859{
860	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
861		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
862	else
863		STPUTS(p, expdest);
864}
865
866/*
867 * Add the value of a specialized variable to the stack string.
868 */
869
870static void
871varvalue(const char *name, int quoted, int subtype, int flag)
872{
873	int num;
874	char *p;
875	int i;
876	char sep;
877	char **ap;
878
879	switch (*name) {
880	case '$':
881		num = rootpid;
882		goto numvar;
883	case '?':
884		num = oexitstatus;
885		goto numvar;
886	case '#':
887		num = shellparam.nparam;
888		goto numvar;
889	case '!':
890		num = backgndpidval();
891numvar:
892		expdest = cvtnum(num, expdest);
893		break;
894	case '-':
895		for (i = 0 ; i < NOPTS ; i++) {
896			if (optlist[i].val)
897				STPUTC(optlist[i].letter, expdest);
898		}
899		break;
900	case '@':
901		if (flag & EXP_FULL && quoted) {
902			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
903				strtodest(p, flag, subtype, quoted);
904				if (*ap)
905					STPUTC('\0', expdest);
906			}
907			break;
908		}
909		/* FALLTHROUGH */
910	case '*':
911		if (ifsset())
912			sep = ifsval()[0];
913		else
914			sep = ' ';
915		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
916			strtodest(p, flag, subtype, quoted);
917			if (!*ap)
918				break;
919			if (sep || (flag & EXP_FULL && !quoted && **ap != '\0'))
920				STPUTC(sep, expdest);
921		}
922		break;
923	case '0':
924		p = arg0;
925		strtodest(p, flag, subtype, quoted);
926		break;
927	default:
928		if (is_digit(*name)) {
929			num = atoi(name);
930			if (num > 0 && num <= shellparam.nparam) {
931				p = shellparam.p[num - 1];
932				strtodest(p, flag, subtype, quoted);
933			}
934		}
935		break;
936	}
937}
938
939
940
941/*
942 * Record the fact that we have to scan this region of the
943 * string for IFS characters.
944 */
945
946static void
947recordregion(int start, int end, int inquotes)
948{
949	struct ifsregion *ifsp;
950
951	INTOFF;
952	if (ifslastp == NULL) {
953		ifsp = &ifsfirst;
954	} else {
955		if (ifslastp->endoff == start
956		    && ifslastp->inquotes == inquotes) {
957			/* extend previous area */
958			ifslastp->endoff = end;
959			INTON;
960			return;
961		}
962		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
963		ifslastp->next = ifsp;
964	}
965	ifslastp = ifsp;
966	ifslastp->next = NULL;
967	ifslastp->begoff = start;
968	ifslastp->endoff = end;
969	ifslastp->inquotes = inquotes;
970	INTON;
971}
972
973
974
975/*
976 * Break the argument string into pieces based upon IFS and add the
977 * strings to the argument list.  The regions of the string to be
978 * searched for IFS characters have been stored by recordregion.
979 * CTLESC characters are preserved but have little effect in this pass
980 * other than escaping CTL* characters.  In particular, they do not escape
981 * IFS characters: that should be done with the ifsregion mechanism.
982 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
983 * This pass treats them as a regular character, making the string non-empty.
984 * Later, they are removed along with the other CTL* characters.
985 */
986static void
987ifsbreakup(char *string, struct arglist *arglist)
988{
989	struct ifsregion *ifsp;
990	struct strlist *sp;
991	char *start;
992	char *p;
993	char *q;
994	const char *ifs;
995	const char *ifsspc;
996	int had_param_ch = 0;
997
998	start = string;
999
1000	if (ifslastp == NULL) {
1001		/* Return entire argument, IFS doesn't apply to any of it */
1002		sp = (struct strlist *)stalloc(sizeof *sp);
1003		sp->text = start;
1004		*arglist->lastp = sp;
1005		arglist->lastp = &sp->next;
1006		return;
1007	}
1008
1009	ifs = ifsset() ? ifsval() : " \t\n";
1010
1011	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1012		p = string + ifsp->begoff;
1013		while (p < string + ifsp->endoff) {
1014			q = p;
1015			if (*p == CTLESC)
1016				p++;
1017			if (ifsp->inquotes) {
1018				/* Only NULs (should be from "$@") end args */
1019				had_param_ch = 1;
1020				if (*p != 0) {
1021					p++;
1022					continue;
1023				}
1024				ifsspc = NULL;
1025			} else {
1026				if (!strchr(ifs, *p)) {
1027					had_param_ch = 1;
1028					p++;
1029					continue;
1030				}
1031				ifsspc = strchr(" \t\n", *p);
1032
1033				/* Ignore IFS whitespace at start */
1034				if (q == start && ifsspc != NULL) {
1035					p++;
1036					start = p;
1037					continue;
1038				}
1039				had_param_ch = 0;
1040			}
1041
1042			/* Save this argument... */
1043			*q = '\0';
1044			sp = (struct strlist *)stalloc(sizeof *sp);
1045			sp->text = start;
1046			*arglist->lastp = sp;
1047			arglist->lastp = &sp->next;
1048			p++;
1049
1050			if (ifsspc != NULL) {
1051				/* Ignore further trailing IFS whitespace */
1052				for (; p < string + ifsp->endoff; p++) {
1053					q = p;
1054					if (*p == CTLESC)
1055						p++;
1056					if (strchr(ifs, *p) == NULL) {
1057						p = q;
1058						break;
1059					}
1060					if (strchr(" \t\n", *p) == NULL) {
1061						p++;
1062						break;
1063					}
1064				}
1065			}
1066			start = p;
1067		}
1068	}
1069
1070	/*
1071	 * Save anything left as an argument.
1072	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1073	 * generating 2 arguments, the second of which is empty.
1074	 * Some recent clarification of the Posix spec say that it
1075	 * should only generate one....
1076	 */
1077	if (had_param_ch || *start != 0) {
1078		sp = (struct strlist *)stalloc(sizeof *sp);
1079		sp->text = start;
1080		*arglist->lastp = sp;
1081		arglist->lastp = &sp->next;
1082	}
1083}
1084
1085
1086static char expdir[PATH_MAX];
1087#define expdir_end (expdir + sizeof(expdir))
1088
1089/*
1090 * Perform pathname generation and remove control characters.
1091 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1092 * The results are stored in the list exparg.
1093 */
1094static void
1095expandmeta(struct strlist *str, int flag __unused)
1096{
1097	char *p;
1098	struct strlist **savelastp;
1099	struct strlist *sp;
1100	char c;
1101
1102	while (str) {
1103		if (fflag)
1104			goto nometa;
1105		p = str->text;
1106		for (;;) {			/* fast check for meta chars */
1107			if ((c = *p++) == '\0')
1108				goto nometa;
1109			if (c == '*' || c == '?' || c == '[')
1110				break;
1111		}
1112		savelastp = exparg.lastp;
1113		INTOFF;
1114		expmeta(expdir, str->text);
1115		INTON;
1116		if (exparg.lastp == savelastp) {
1117			/*
1118			 * no matches
1119			 */
1120nometa:
1121			*exparg.lastp = str;
1122			rmescapes(str->text);
1123			exparg.lastp = &str->next;
1124		} else {
1125			*exparg.lastp = NULL;
1126			*savelastp = sp = expsort(*savelastp);
1127			while (sp->next != NULL)
1128				sp = sp->next;
1129			exparg.lastp = &sp->next;
1130		}
1131		str = str->next;
1132	}
1133}
1134
1135
1136/*
1137 * Do metacharacter (i.e. *, ?, [...]) expansion.
1138 */
1139
1140static void
1141expmeta(char *enddir, char *name)
1142{
1143	const char *p;
1144	const char *q;
1145	const char *start;
1146	char *endname;
1147	int metaflag;
1148	struct stat statb;
1149	DIR *dirp;
1150	struct dirent *dp;
1151	int atend;
1152	int matchdot;
1153	int esc;
1154	int namlen;
1155
1156	metaflag = 0;
1157	start = name;
1158	for (p = name; esc = 0, *p; p += esc + 1) {
1159		if (*p == '*' || *p == '?')
1160			metaflag = 1;
1161		else if (*p == '[') {
1162			q = p + 1;
1163			if (*q == '!' || *q == '^')
1164				q++;
1165			for (;;) {
1166				while (*q == CTLQUOTEMARK)
1167					q++;
1168				if (*q == CTLESC)
1169					q++;
1170				if (*q == '/' || *q == '\0')
1171					break;
1172				if (*++q == ']') {
1173					metaflag = 1;
1174					break;
1175				}
1176			}
1177		} else if (*p == '\0')
1178			break;
1179		else if (*p == CTLQUOTEMARK)
1180			continue;
1181		else {
1182			if (*p == CTLESC)
1183				esc++;
1184			if (p[esc] == '/') {
1185				if (metaflag)
1186					break;
1187				start = p + esc + 1;
1188			}
1189		}
1190	}
1191	if (metaflag == 0) {	/* we've reached the end of the file name */
1192		if (enddir != expdir)
1193			metaflag++;
1194		for (p = name ; ; p++) {
1195			if (*p == CTLQUOTEMARK)
1196				continue;
1197			if (*p == CTLESC)
1198				p++;
1199			*enddir++ = *p;
1200			if (*p == '\0')
1201				break;
1202			if (enddir == expdir_end)
1203				return;
1204		}
1205		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1206			addfname(expdir);
1207		return;
1208	}
1209	endname = name + (p - name);
1210	if (start != name) {
1211		p = name;
1212		while (p < start) {
1213			while (*p == CTLQUOTEMARK)
1214				p++;
1215			if (*p == CTLESC)
1216				p++;
1217			*enddir++ = *p++;
1218			if (enddir == expdir_end)
1219				return;
1220		}
1221	}
1222	if (enddir == expdir) {
1223		p = ".";
1224	} else if (enddir == expdir + 1 && *expdir == '/') {
1225		p = "/";
1226	} else {
1227		p = expdir;
1228		enddir[-1] = '\0';
1229	}
1230	if ((dirp = opendir(p)) == NULL)
1231		return;
1232	if (enddir != expdir)
1233		enddir[-1] = '/';
1234	if (*endname == 0) {
1235		atend = 1;
1236	} else {
1237		atend = 0;
1238		*endname = '\0';
1239		endname += esc + 1;
1240	}
1241	matchdot = 0;
1242	p = start;
1243	while (*p == CTLQUOTEMARK)
1244		p++;
1245	if (*p == CTLESC)
1246		p++;
1247	if (*p == '.')
1248		matchdot++;
1249	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1250		if (dp->d_name[0] == '.' && ! matchdot)
1251			continue;
1252		if (patmatch(start, dp->d_name, 0)) {
1253			namlen = dp->d_namlen;
1254			if (enddir + namlen + 1 > expdir_end)
1255				continue;
1256			memcpy(enddir, dp->d_name, namlen + 1);
1257			if (atend)
1258				addfname(expdir);
1259			else {
1260				if (dp->d_type != DT_UNKNOWN &&
1261				    dp->d_type != DT_DIR &&
1262				    dp->d_type != DT_LNK)
1263					continue;
1264				if (enddir + namlen + 2 > expdir_end)
1265					continue;
1266				enddir[namlen] = '/';
1267				enddir[namlen + 1] = '\0';
1268				expmeta(enddir + namlen + 1, endname);
1269			}
1270		}
1271	}
1272	closedir(dirp);
1273	if (! atend)
1274		endname[-esc - 1] = esc ? CTLESC : '/';
1275}
1276
1277
1278/*
1279 * Add a file name to the list.
1280 */
1281
1282static void
1283addfname(char *name)
1284{
1285	char *p;
1286	struct strlist *sp;
1287	size_t len;
1288
1289	len = strlen(name);
1290	p = stalloc(len + 1);
1291	memcpy(p, name, len + 1);
1292	sp = (struct strlist *)stalloc(sizeof *sp);
1293	sp->text = p;
1294	*exparg.lastp = sp;
1295	exparg.lastp = &sp->next;
1296}
1297
1298
1299/*
1300 * Sort the results of file name expansion.  It calculates the number of
1301 * strings to sort and then calls msort (short for merge sort) to do the
1302 * work.
1303 */
1304
1305static struct strlist *
1306expsort(struct strlist *str)
1307{
1308	int len;
1309	struct strlist *sp;
1310
1311	len = 0;
1312	for (sp = str ; sp ; sp = sp->next)
1313		len++;
1314	return msort(str, len);
1315}
1316
1317
1318static struct strlist *
1319msort(struct strlist *list, int len)
1320{
1321	struct strlist *p, *q = NULL;
1322	struct strlist **lpp;
1323	int half;
1324	int n;
1325
1326	if (len <= 1)
1327		return list;
1328	half = len >> 1;
1329	p = list;
1330	for (n = half ; --n >= 0 ; ) {
1331		q = p;
1332		p = p->next;
1333	}
1334	q->next = NULL;			/* terminate first half of list */
1335	q = msort(list, half);		/* sort first half of list */
1336	p = msort(p, len - half);		/* sort second half */
1337	lpp = &list;
1338	for (;;) {
1339		if (strcmp(p->text, q->text) < 0) {
1340			*lpp = p;
1341			lpp = &p->next;
1342			if ((p = *lpp) == NULL) {
1343				*lpp = q;
1344				break;
1345			}
1346		} else {
1347			*lpp = q;
1348			lpp = &q->next;
1349			if ((q = *lpp) == NULL) {
1350				*lpp = p;
1351				break;
1352			}
1353		}
1354	}
1355	return list;
1356}
1357
1358
1359
1360static wchar_t
1361get_wc(const char **p)
1362{
1363	wchar_t c;
1364	int chrlen;
1365
1366	chrlen = mbtowc(&c, *p, 4);
1367	if (chrlen == 0)
1368		return 0;
1369	else if (chrlen == -1)
1370		c = 0;
1371	else
1372		*p += chrlen;
1373	return c;
1374}
1375
1376
1377/*
1378 * See if a character matches a character class, starting at the first colon
1379 * of "[:class:]".
1380 * If a valid character class is recognized, a pointer to the next character
1381 * after the final closing bracket is stored into *end, otherwise a null
1382 * pointer is stored into *end.
1383 */
1384static int
1385match_charclass(const char *p, wchar_t chr, const char **end)
1386{
1387	char name[20];
1388	const char *nameend;
1389	wctype_t cclass;
1390
1391	*end = NULL;
1392	p++;
1393	nameend = strstr(p, ":]");
1394	if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) ||
1395	    nameend == p)
1396		return 0;
1397	memcpy(name, p, nameend - p);
1398	name[nameend - p] = '\0';
1399	*end = nameend + 2;
1400	cclass = wctype(name);
1401	/* An unknown class matches nothing but is valid nevertheless. */
1402	if (cclass == 0)
1403		return 0;
1404	return iswctype(chr, cclass);
1405}
1406
1407
1408/*
1409 * Returns true if the pattern matches the string.
1410 */
1411
1412static int
1413patmatch(const char *pattern, const char *string, int squoted)
1414{
1415	const char *p, *q, *end;
1416	const char *bt_p, *bt_q;
1417	char c;
1418	wchar_t wc, wc2;
1419
1420	p = pattern;
1421	q = string;
1422	bt_p = NULL;
1423	bt_q = NULL;
1424	for (;;) {
1425		switch (c = *p++) {
1426		case '\0':
1427			if (*q != '\0')
1428				goto backtrack;
1429			return 1;
1430		case CTLESC:
1431			if (squoted && *q == CTLESC)
1432				q++;
1433			if (*q++ != *p++)
1434				goto backtrack;
1435			break;
1436		case CTLQUOTEMARK:
1437			continue;
1438		case '?':
1439			if (squoted && *q == CTLESC)
1440				q++;
1441			if (*q == '\0')
1442				return 0;
1443			if (localeisutf8) {
1444				wc = get_wc(&q);
1445				/*
1446				 * A '?' does not match invalid UTF-8 but a
1447				 * '*' does, so backtrack.
1448				 */
1449				if (wc == 0)
1450					goto backtrack;
1451			} else
1452				wc = (unsigned char)*q++;
1453			break;
1454		case '*':
1455			c = *p;
1456			while (c == CTLQUOTEMARK || c == '*')
1457				c = *++p;
1458			/*
1459			 * If the pattern ends here, we know the string
1460			 * matches without needing to look at the rest of it.
1461			 */
1462			if (c == '\0')
1463				return 1;
1464			/*
1465			 * First try the shortest match for the '*' that
1466			 * could work. We can forget any earlier '*' since
1467			 * there is no way having it match more characters
1468			 * can help us, given that we are already here.
1469			 */
1470			bt_p = p;
1471			bt_q = q;
1472			break;
1473		case '[': {
1474			const char *endp;
1475			int invert, found;
1476			wchar_t chr;
1477
1478			endp = p;
1479			if (*endp == '!' || *endp == '^')
1480				endp++;
1481			for (;;) {
1482				while (*endp == CTLQUOTEMARK)
1483					endp++;
1484				if (*endp == 0)
1485					goto dft;		/* no matching ] */
1486				if (*endp == CTLESC)
1487					endp++;
1488				if (*++endp == ']')
1489					break;
1490			}
1491			invert = 0;
1492			if (*p == '!' || *p == '^') {
1493				invert++;
1494				p++;
1495			}
1496			found = 0;
1497			if (squoted && *q == CTLESC)
1498				q++;
1499			if (*q == '\0')
1500				return 0;
1501			if (localeisutf8) {
1502				chr = get_wc(&q);
1503				if (chr == 0)
1504					goto backtrack;
1505			} else
1506				chr = (unsigned char)*q++;
1507			c = *p++;
1508			do {
1509				if (c == CTLQUOTEMARK)
1510					continue;
1511				if (c == '[' && *p == ':') {
1512					found |= match_charclass(p, chr, &end);
1513					if (end != NULL)
1514						p = end;
1515				}
1516				if (c == CTLESC)
1517					c = *p++;
1518				if (localeisutf8 && c & 0x80) {
1519					p--;
1520					wc = get_wc(&p);
1521					if (wc == 0) /* bad utf-8 */
1522						return 0;
1523				} else
1524					wc = (unsigned char)c;
1525				if (*p == '-' && p[1] != ']') {
1526					p++;
1527					while (*p == CTLQUOTEMARK)
1528						p++;
1529					if (*p == CTLESC)
1530						p++;
1531					if (localeisutf8) {
1532						wc2 = get_wc(&p);
1533						if (wc2 == 0) /* bad utf-8 */
1534							return 0;
1535					} else
1536						wc2 = (unsigned char)*p++;
1537					if (   collate_range_cmp(chr, wc) >= 0
1538					    && collate_range_cmp(chr, wc2) <= 0
1539					   )
1540						found = 1;
1541				} else {
1542					if (chr == wc)
1543						found = 1;
1544				}
1545			} while ((c = *p++) != ']');
1546			if (found == invert)
1547				goto backtrack;
1548			break;
1549		}
1550dft:	        default:
1551			if (squoted && *q == CTLESC)
1552				q++;
1553			if (*q == '\0')
1554				return 0;
1555			if (*q++ == c)
1556				break;
1557backtrack:
1558			/*
1559			 * If we have a mismatch (other than hitting the end
1560			 * of the string), go back to the last '*' seen and
1561			 * have it match one additional character.
1562			 */
1563			if (bt_p == NULL)
1564				return 0;
1565			if (squoted && *bt_q == CTLESC)
1566				bt_q++;
1567			if (*bt_q == '\0')
1568				return 0;
1569			bt_q++;
1570			p = bt_p;
1571			q = bt_q;
1572			break;
1573		}
1574	}
1575}
1576
1577
1578
1579/*
1580 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1581 */
1582
1583void
1584rmescapes(char *str)
1585{
1586	char *p, *q;
1587
1588	p = str;
1589	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1590		if (*p++ == '\0')
1591			return;
1592	}
1593	q = p;
1594	while (*p) {
1595		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1596			p++;
1597			continue;
1598		}
1599		if (*p == CTLESC)
1600			p++;
1601		*q++ = *p++;
1602	}
1603	*q = '\0';
1604}
1605
1606
1607
1608/*
1609 * See if a pattern matches in a case statement.
1610 */
1611
1612int
1613casematch(union node *pattern, const char *val)
1614{
1615	struct stackmark smark;
1616	int result;
1617	char *p;
1618
1619	setstackmark(&smark);
1620	argbackq = pattern->narg.backquote;
1621	STARTSTACKSTR(expdest);
1622	ifslastp = NULL;
1623	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1624	STPUTC('\0', expdest);
1625	p = grabstackstr(expdest);
1626	result = patmatch(p, val, 0);
1627	popstackmark(&smark);
1628	return result;
1629}
1630
1631/*
1632 * Our own itoa().
1633 */
1634
1635static char *
1636cvtnum(int num, char *buf)
1637{
1638	char temp[32];
1639	int neg = num < 0;
1640	char *p = temp + 31;
1641
1642	temp[31] = '\0';
1643
1644	do {
1645		*--p = num % 10 + '0';
1646	} while ((num /= 10) != 0);
1647
1648	if (neg)
1649		*--p = '-';
1650
1651	STPUTS(p, buf);
1652	return buf;
1653}
1654
1655/*
1656 * Do most of the work for wordexp(3).
1657 */
1658
1659int
1660wordexpcmd(int argc, char **argv)
1661{
1662	size_t len;
1663	int i;
1664
1665	out1fmt("%08x", argc - 1);
1666	for (i = 1, len = 0; i < argc; i++)
1667		len += strlen(argv[i]);
1668	out1fmt("%08x", (int)len);
1669	for (i = 1; i < argc; i++)
1670		outbin(argv[i], strlen(argv[i]) + 1, out1);
1671        return (0);
1672}
1673