expand.c revision 287752
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1997-2005
5 *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36#if 0
37static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
38#endif
39#endif /* not lint */
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/bin/sh/expand.c 287752 2015-09-13 13:58:46Z jilles $");
42
43#include <sys/types.h>
44#include <sys/time.h>
45#include <sys/stat.h>
46#include <dirent.h>
47#include <errno.h>
48#include <inttypes.h>
49#include <limits.h>
50#include <pwd.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <wchar.h>
56#include <wctype.h>
57
58/*
59 * Routines to expand arguments to commands.  We have to deal with
60 * backquotes, shell variables, and file metacharacters.
61 */
62
63#include "shell.h"
64#include "main.h"
65#include "nodes.h"
66#include "eval.h"
67#include "expand.h"
68#include "syntax.h"
69#include "parser.h"
70#include "jobs.h"
71#include "options.h"
72#include "var.h"
73#include "input.h"
74#include "output.h"
75#include "memalloc.h"
76#include "error.h"
77#include "mystring.h"
78#include "arith.h"
79#include "show.h"
80#include "builtins.h"
81
82/*
83 * Structure specifying which parts of the string should be searched
84 * for IFS characters.
85 */
86
87struct ifsregion {
88	struct ifsregion *next;	/* next region in list */
89	int begoff;		/* offset of start of region */
90	int endoff;		/* offset of end of region */
91	int inquotes;		/* search for nul bytes only */
92};
93
94
95static char *expdest;			/* output of current string */
96static struct nodelist *argbackq;	/* list of back quote expressions */
97static struct ifsregion ifsfirst;	/* first struct in list of ifs regions */
98static struct ifsregion *ifslastp;	/* last struct in list */
99static struct arglist exparg;		/* holds expanded arg list */
100
101static char *argstr(char *, int);
102static char *exptilde(char *, int);
103static char *expari(char *);
104static void expbackq(union node *, int, int);
105static int subevalvar(char *, char *, int, int, int, int, int);
106static char *evalvar(char *, int);
107static int varisset(const char *, int);
108static void strtodest(const char *, int, int, int);
109static void varvalue(const char *, int, int, int);
110static void recordregion(int, int, int);
111static void removerecordregions(int);
112static void ifsbreakup(char *, struct arglist *);
113static void expandmeta(struct strlist *);
114static void expmeta(char *, char *);
115static void addfname(char *);
116static struct strlist *expsort(struct strlist *);
117static struct strlist *msort(struct strlist *, int);
118static int patmatch(const char *, const char *, int);
119static char *cvtnum(int, char *);
120static int collate_range_cmp(wchar_t, wchar_t);
121
122static int
123collate_range_cmp(wchar_t c1, wchar_t c2)
124{
125	static wchar_t s1[2], s2[2];
126
127	s1[0] = c1;
128	s2[0] = c2;
129	return (wcscoll(s1, s2));
130}
131
132static char *
133stputs_quotes(const char *data, const char *syntax, char *p)
134{
135	while (*data) {
136		CHECKSTRSPACE(2, p);
137		if (syntax[(int)*data] == CCTL)
138			USTPUTC(CTLESC, p);
139		USTPUTC(*data++, p);
140	}
141	return (p);
142}
143#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
144
145/*
146 * Perform expansions on an argument, placing the resulting list of arguments
147 * in arglist.  Parameter expansion, command substitution and arithmetic
148 * expansion are always performed; additional expansions can be requested
149 * via flag (EXP_*).
150 * The result is left in the stack string.
151 * When arglist is NULL, perform here document expansion.
152 *
153 * Caution: this function uses global state and is not reentrant.
154 * However, a new invocation after an interrupted invocation is safe
155 * and will reset the global state for the new call.
156 */
157void
158expandarg(union node *arg, struct arglist *arglist, int flag)
159{
160	struct strlist *sp;
161	char *p;
162
163	argbackq = arg->narg.backquote;
164	STARTSTACKSTR(expdest);
165	ifsfirst.next = NULL;
166	ifslastp = NULL;
167	argstr(arg->narg.text, flag);
168	if (arglist == NULL) {
169		STACKSTRNUL(expdest);
170		return;			/* here document expanded */
171	}
172	STPUTC('\0', expdest);
173	p = grabstackstr(expdest);
174	exparg.lastp = &exparg.list;
175	if (flag & EXP_FULL) {
176		ifsbreakup(p, &exparg);
177		*exparg.lastp = NULL;
178		exparg.lastp = &exparg.list;
179		expandmeta(exparg.list);
180	} else {
181		sp = (struct strlist *)stalloc(sizeof (struct strlist));
182		sp->text = p;
183		*exparg.lastp = sp;
184		exparg.lastp = &sp->next;
185	}
186	while (ifsfirst.next != NULL) {
187		struct ifsregion *ifsp;
188		INTOFF;
189		ifsp = ifsfirst.next->next;
190		ckfree(ifsfirst.next);
191		ifsfirst.next = ifsp;
192		INTON;
193	}
194	*exparg.lastp = NULL;
195	if (exparg.list) {
196		*arglist->lastp = exparg.list;
197		arglist->lastp = exparg.lastp;
198	}
199}
200
201
202
203/*
204 * Perform parameter expansion, command substitution and arithmetic
205 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
206 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'.
207 * This is used to expand word in ${var+word} etc.
208 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC
209 * characters to allow for further processing.
210 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
211 */
212static char *
213argstr(char *p, int flag)
214{
215	char c;
216	int quotes = flag & (EXP_FULL | EXP_CASE);	/* do CTLESC */
217	int firsteq = 1;
218	int split_lit;
219	int lit_quoted;
220
221	split_lit = flag & EXP_SPLIT_LIT;
222	lit_quoted = flag & EXP_LIT_QUOTED;
223	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
224	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
225		p = exptilde(p, flag);
226	for (;;) {
227		CHECKSTRSPACE(2, expdest);
228		switch (c = *p++) {
229		case '\0':
230			return (p - 1);
231		case CTLENDVAR:
232		case CTLENDARI:
233			return (p);
234		case CTLQUOTEMARK:
235			lit_quoted = 1;
236			/* "$@" syntax adherence hack */
237			if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
238				break;
239			if ((flag & EXP_FULL) != 0)
240				USTPUTC(c, expdest);
241			break;
242		case CTLQUOTEEND:
243			lit_quoted = 0;
244			break;
245		case CTLESC:
246			if (quotes)
247				USTPUTC(c, expdest);
248			c = *p++;
249			USTPUTC(c, expdest);
250			if (split_lit && !lit_quoted)
251				recordregion(expdest - stackblock() -
252				    (quotes ? 2 : 1),
253				    expdest - stackblock(), 0);
254			break;
255		case CTLVAR:
256			p = evalvar(p, flag);
257			break;
258		case CTLBACKQ:
259		case CTLBACKQ|CTLQUOTE:
260			expbackq(argbackq->n, c & CTLQUOTE, flag);
261			argbackq = argbackq->next;
262			break;
263		case CTLARI:
264			p = expari(p);
265			break;
266		case ':':
267		case '=':
268			/*
269			 * sort of a hack - expand tildes in variable
270			 * assignments (after the first '=' and after ':'s).
271			 */
272			USTPUTC(c, expdest);
273			if (split_lit && !lit_quoted)
274				recordregion(expdest - stackblock() - 1,
275				    expdest - stackblock(), 0);
276			if (flag & EXP_VARTILDE && *p == '~' &&
277			    (c != '=' || firsteq)) {
278				if (c == '=')
279					firsteq = 0;
280				p = exptilde(p, flag);
281			}
282			break;
283		default:
284			USTPUTC(c, expdest);
285			if (split_lit && !lit_quoted)
286				recordregion(expdest - stackblock() - 1,
287				    expdest - stackblock(), 0);
288		}
289	}
290}
291
292/*
293 * Perform tilde expansion, placing the result in the stack string and
294 * returning the next position in the input string to process.
295 */
296static char *
297exptilde(char *p, int flag)
298{
299	char c, *startp = p;
300	struct passwd *pw;
301	char *home;
302
303	for (;;) {
304		c = *p;
305		switch(c) {
306		case CTLESC: /* This means CTL* are always considered quoted. */
307		case CTLVAR:
308		case CTLBACKQ:
309		case CTLBACKQ | CTLQUOTE:
310		case CTLARI:
311		case CTLENDARI:
312		case CTLQUOTEMARK:
313			return (startp);
314		case ':':
315			if ((flag & EXP_VARTILDE) == 0)
316				break;
317			/* FALLTHROUGH */
318		case '\0':
319		case '/':
320		case CTLENDVAR:
321			*p = '\0';
322			if (*(startp+1) == '\0') {
323				home = lookupvar("HOME");
324			} else {
325				pw = getpwnam(startp+1);
326				home = pw != NULL ? pw->pw_dir : NULL;
327			}
328			*p = c;
329			if (home == NULL || *home == '\0')
330				return (startp);
331			strtodest(home, flag, VSNORMAL, 1);
332			return (p);
333		}
334		p++;
335	}
336}
337
338
339static void
340removerecordregions(int endoff)
341{
342	if (ifslastp == NULL)
343		return;
344
345	if (ifsfirst.endoff > endoff) {
346		while (ifsfirst.next != NULL) {
347			struct ifsregion *ifsp;
348			INTOFF;
349			ifsp = ifsfirst.next->next;
350			ckfree(ifsfirst.next);
351			ifsfirst.next = ifsp;
352			INTON;
353		}
354		if (ifsfirst.begoff > endoff)
355			ifslastp = NULL;
356		else {
357			ifslastp = &ifsfirst;
358			ifsfirst.endoff = endoff;
359		}
360		return;
361	}
362
363	ifslastp = &ifsfirst;
364	while (ifslastp->next && ifslastp->next->begoff < endoff)
365		ifslastp=ifslastp->next;
366	while (ifslastp->next != NULL) {
367		struct ifsregion *ifsp;
368		INTOFF;
369		ifsp = ifslastp->next->next;
370		ckfree(ifslastp->next);
371		ifslastp->next = ifsp;
372		INTON;
373	}
374	if (ifslastp->endoff > endoff)
375		ifslastp->endoff = endoff;
376}
377
378/*
379 * Expand arithmetic expression.
380 * Note that flag is not required as digits never require CTLESC characters.
381 */
382static char *
383expari(char *p)
384{
385	char *q, *start;
386	arith_t result;
387	int begoff;
388	int quoted;
389	int adj;
390
391	quoted = *p++ == '"';
392	begoff = expdest - stackblock();
393	p = argstr(p, 0);
394	removerecordregions(begoff);
395	STPUTC('\0', expdest);
396	start = stackblock() + begoff;
397
398	q = grabstackstr(expdest);
399	result = arith(start);
400	ungrabstackstr(q, expdest);
401
402	start = stackblock() + begoff;
403	adj = start - expdest;
404	STADJUST(adj, expdest);
405
406	CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest);
407	fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result);
408	adj = strlen(expdest);
409	STADJUST(adj, expdest);
410	if (!quoted)
411		recordregion(begoff, expdest - stackblock(), 0);
412	return p;
413}
414
415
416/*
417 * Perform command substitution.
418 */
419static void
420expbackq(union node *cmd, int quoted, int flag)
421{
422	struct backcmd in;
423	int i;
424	char buf[128];
425	char *p;
426	char *dest = expdest;
427	struct ifsregion saveifs, *savelastp;
428	struct nodelist *saveargbackq;
429	char lastc;
430	int startloc = dest - stackblock();
431	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
432	int quotes = flag & (EXP_FULL | EXP_CASE);
433	size_t nnl;
434
435	INTOFF;
436	saveifs = ifsfirst;
437	savelastp = ifslastp;
438	saveargbackq = argbackq;
439	p = grabstackstr(dest);
440	evalbackcmd(cmd, &in);
441	ungrabstackstr(p, dest);
442	ifsfirst = saveifs;
443	ifslastp = savelastp;
444	argbackq = saveargbackq;
445
446	p = in.buf;
447	lastc = '\0';
448	nnl = 0;
449	/* Don't copy trailing newlines */
450	for (;;) {
451		if (--in.nleft < 0) {
452			if (in.fd < 0)
453				break;
454			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
455			TRACE(("expbackq: read returns %d\n", i));
456			if (i <= 0)
457				break;
458			p = buf;
459			in.nleft = i - 1;
460		}
461		lastc = *p++;
462		if (lastc != '\0') {
463			if (lastc == '\n') {
464				nnl++;
465			} else {
466				CHECKSTRSPACE(nnl + 2, dest);
467				while (nnl > 0) {
468					nnl--;
469					USTPUTC('\n', dest);
470				}
471				if (quotes && syntax[(int)lastc] == CCTL)
472					USTPUTC(CTLESC, dest);
473				USTPUTC(lastc, dest);
474			}
475		}
476	}
477
478	if (in.fd >= 0)
479		close(in.fd);
480	if (in.buf)
481		ckfree(in.buf);
482	if (in.jp)
483		exitstatus = waitforjob(in.jp, (int *)NULL);
484	if (quoted == 0)
485		recordregion(startloc, dest - stackblock(), 0);
486	TRACE(("expbackq: size=%td: \"%.*s\"\n",
487		((dest - stackblock()) - startloc),
488		(int)((dest - stackblock()) - startloc),
489		stackblock() + startloc));
490	expdest = dest;
491	INTON;
492}
493
494
495
496static void
497recordleft(const char *str, const char *loc, char *startp)
498{
499	int amount;
500
501	amount = ((str - 1) - (loc - startp)) - expdest;
502	STADJUST(amount, expdest);
503	while (loc != str - 1)
504		*startp++ = *loc++;
505}
506
507static int
508subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
509  int varflags, int quotes)
510{
511	char *startp;
512	char *loc = NULL;
513	char *q;
514	int c = 0;
515	struct nodelist *saveargbackq = argbackq;
516	int amount;
517
518	argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
519	    subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
520	    EXP_CASE : 0) | EXP_TILDE);
521	STACKSTRNUL(expdest);
522	argbackq = saveargbackq;
523	startp = stackblock() + startloc;
524	if (str == NULL)
525	    str = stackblock() + strloc;
526
527	switch (subtype) {
528	case VSASSIGN:
529		setvar(str, startp, 0);
530		amount = startp - expdest;
531		STADJUST(amount, expdest);
532		varflags &= ~VSNUL;
533		return 1;
534
535	case VSQUESTION:
536		if (*p != CTLENDVAR) {
537			outfmt(out2, "%s\n", startp);
538			error((char *)NULL);
539		}
540		error("%.*s: parameter %snot set", (int)(p - str - 1),
541		      str, (varflags & VSNUL) ? "null or "
542					      : nullstr);
543		return 0;
544
545	case VSTRIMLEFT:
546		for (loc = startp; loc < str; loc++) {
547			c = *loc;
548			*loc = '\0';
549			if (patmatch(str, startp, quotes)) {
550				*loc = c;
551				recordleft(str, loc, startp);
552				return 1;
553			}
554			*loc = c;
555			if (quotes && *loc == CTLESC)
556				loc++;
557		}
558		return 0;
559
560	case VSTRIMLEFTMAX:
561		for (loc = str - 1; loc >= startp;) {
562			c = *loc;
563			*loc = '\0';
564			if (patmatch(str, startp, quotes)) {
565				*loc = c;
566				recordleft(str, loc, startp);
567				return 1;
568			}
569			*loc = c;
570			loc--;
571			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
572				for (q = startp; q < loc; q++)
573					if (*q == CTLESC)
574						q++;
575				if (q > loc)
576					loc--;
577			}
578		}
579		return 0;
580
581	case VSTRIMRIGHT:
582		for (loc = str - 1; loc >= startp;) {
583			if (patmatch(str, loc, quotes)) {
584				amount = loc - expdest;
585				STADJUST(amount, expdest);
586				return 1;
587			}
588			loc--;
589			if (quotes && loc > startp && *(loc - 1) == CTLESC) {
590				for (q = startp; q < loc; q++)
591					if (*q == CTLESC)
592						q++;
593				if (q > loc)
594					loc--;
595			}
596		}
597		return 0;
598
599	case VSTRIMRIGHTMAX:
600		for (loc = startp; loc < str - 1; loc++) {
601			if (patmatch(str, loc, quotes)) {
602				amount = loc - expdest;
603				STADJUST(amount, expdest);
604				return 1;
605			}
606			if (quotes && *loc == CTLESC)
607				loc++;
608		}
609		return 0;
610
611
612	default:
613		abort();
614	}
615}
616
617
618/*
619 * Expand a variable, and return a pointer to the next character in the
620 * input string.
621 */
622
623static char *
624evalvar(char *p, int flag)
625{
626	int subtype;
627	int varflags;
628	char *var;
629	const char *val;
630	int patloc;
631	int c;
632	int set;
633	int special;
634	int startloc;
635	int varlen;
636	int varlenb;
637	int easy;
638	int quotes = flag & (EXP_FULL | EXP_CASE);
639	int record = 0;
640
641	varflags = (unsigned char)*p++;
642	subtype = varflags & VSTYPE;
643	var = p;
644	special = 0;
645	if (! is_name(*p))
646		special = 1;
647	p = strchr(p, '=') + 1;
648again: /* jump here after setting a variable with ${var=text} */
649	if (varflags & VSLINENO) {
650		set = 1;
651		special = 1;
652		val = NULL;
653	} else if (special) {
654		set = varisset(var, varflags & VSNUL);
655		val = NULL;
656	} else {
657		val = bltinlookup(var, 1);
658		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
659			val = NULL;
660			set = 0;
661		} else
662			set = 1;
663	}
664	varlen = 0;
665	startloc = expdest - stackblock();
666	if (!set && uflag && *var != '@' && *var != '*') {
667		switch (subtype) {
668		case VSNORMAL:
669		case VSTRIMLEFT:
670		case VSTRIMLEFTMAX:
671		case VSTRIMRIGHT:
672		case VSTRIMRIGHTMAX:
673		case VSLENGTH:
674			error("%.*s: parameter not set", (int)(p - var - 1),
675			    var);
676		}
677	}
678	if (set && subtype != VSPLUS) {
679		/* insert the value of the variable */
680		if (special) {
681			if (varflags & VSLINENO)
682				STPUTBIN(var, p - var - 1, expdest);
683			else
684				varvalue(var, varflags & VSQUOTE, subtype, flag);
685			if (subtype == VSLENGTH) {
686				varlenb = expdest - stackblock() - startloc;
687				varlen = varlenb;
688				if (localeisutf8) {
689					val = stackblock() + startloc;
690					for (;val != expdest; val++)
691						if ((*val & 0xC0) == 0x80)
692							varlen--;
693				}
694				STADJUST(-varlenb, expdest);
695			}
696		} else {
697			if (subtype == VSLENGTH) {
698				for (;*val; val++)
699					if (!localeisutf8 ||
700					    (*val & 0xC0) != 0x80)
701						varlen++;
702			}
703			else
704				strtodest(val, flag, subtype,
705				    varflags & VSQUOTE);
706		}
707	}
708
709	if (subtype == VSPLUS)
710		set = ! set;
711
712	easy = ((varflags & VSQUOTE) == 0 ||
713		(*var == '@' && shellparam.nparam != 1));
714
715
716	switch (subtype) {
717	case VSLENGTH:
718		expdest = cvtnum(varlen, expdest);
719		record = 1;
720		break;
721
722	case VSNORMAL:
723		record = easy;
724		break;
725
726	case VSPLUS:
727	case VSMINUS:
728		if (!set) {
729			argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
730			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
731			break;
732		}
733		record = easy;
734		break;
735
736	case VSTRIMLEFT:
737	case VSTRIMLEFTMAX:
738	case VSTRIMRIGHT:
739	case VSTRIMRIGHTMAX:
740		if (!set)
741			break;
742		/*
743		 * Terminate the string and start recording the pattern
744		 * right after it
745		 */
746		STPUTC('\0', expdest);
747		patloc = expdest - stackblock();
748		if (subevalvar(p, NULL, patloc, subtype,
749		    startloc, varflags, quotes) == 0) {
750			int amount = (expdest - stackblock() - patloc) + 1;
751			STADJUST(-amount, expdest);
752		}
753		/* Remove any recorded regions beyond start of variable */
754		removerecordregions(startloc);
755		record = 1;
756		break;
757
758	case VSASSIGN:
759	case VSQUESTION:
760		if (!set) {
761			if (subevalvar(p, var, 0, subtype, startloc, varflags,
762			    quotes)) {
763				varflags &= ~VSNUL;
764				/*
765				 * Remove any recorded regions beyond
766				 * start of variable
767				 */
768				removerecordregions(startloc);
769				goto again;
770			}
771			break;
772		}
773		record = easy;
774		break;
775
776	case VSERROR:
777		c = p - var - 1;
778		error("${%.*s%s}: Bad substitution", c, var,
779		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
780
781	default:
782		abort();
783	}
784
785	if (record)
786		recordregion(startloc, expdest - stackblock(),
787		    varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
788		    (*var == '@' || *var == '*')));
789
790	if (subtype != VSNORMAL) {	/* skip to end of alternative */
791		int nesting = 1;
792		for (;;) {
793			if ((c = *p++) == CTLESC)
794				p++;
795			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
796				if (set)
797					argbackq = argbackq->next;
798			} else if (c == CTLVAR) {
799				if ((*p++ & VSTYPE) != VSNORMAL)
800					nesting++;
801			} else if (c == CTLENDVAR) {
802				if (--nesting == 0)
803					break;
804			}
805		}
806	}
807	return p;
808}
809
810
811
812/*
813 * Test whether a specialized variable is set.
814 */
815
816static int
817varisset(const char *name, int nulok)
818{
819
820	if (*name == '!')
821		return backgndpidset();
822	else if (*name == '@' || *name == '*') {
823		if (*shellparam.p == NULL)
824			return 0;
825
826		if (nulok) {
827			char **av;
828
829			for (av = shellparam.p; *av; av++)
830				if (**av != '\0')
831					return 1;
832			return 0;
833		}
834	} else if (is_digit(*name)) {
835		char *ap;
836		long num;
837
838		errno = 0;
839		num = strtol(name, NULL, 10);
840		if (errno != 0 || num > shellparam.nparam)
841			return 0;
842
843		if (num == 0)
844			ap = arg0;
845		else
846			ap = shellparam.p[num - 1];
847
848		if (nulok && (ap == NULL || *ap == '\0'))
849			return 0;
850	}
851	return 1;
852}
853
854static void
855strtodest(const char *p, int flag, int subtype, int quoted)
856{
857	if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
858		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
859	else
860		STPUTS(p, expdest);
861}
862
863/*
864 * Add the value of a specialized variable to the stack string.
865 */
866
867static void
868varvalue(const char *name, int quoted, int subtype, int flag)
869{
870	int num;
871	char *p;
872	int i;
873	char sep;
874	char **ap;
875
876	switch (*name) {
877	case '$':
878		num = rootpid;
879		goto numvar;
880	case '?':
881		num = oexitstatus;
882		goto numvar;
883	case '#':
884		num = shellparam.nparam;
885		goto numvar;
886	case '!':
887		num = backgndpidval();
888numvar:
889		expdest = cvtnum(num, expdest);
890		break;
891	case '-':
892		for (i = 0 ; i < NOPTS ; i++) {
893			if (optlist[i].val)
894				STPUTC(optlist[i].letter, expdest);
895		}
896		break;
897	case '@':
898		if (flag & EXP_FULL && quoted) {
899			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
900				strtodest(p, flag, subtype, quoted);
901				if (*ap)
902					STPUTC('\0', expdest);
903			}
904			break;
905		}
906		/* FALLTHROUGH */
907	case '*':
908		if (ifsset())
909			sep = ifsval()[0];
910		else
911			sep = ' ';
912		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
913			strtodest(p, flag, subtype, quoted);
914			if (!*ap)
915				break;
916			if (sep || (flag & EXP_FULL && !quoted && **ap != '\0'))
917				STPUTC(sep, expdest);
918		}
919		break;
920	case '0':
921		p = arg0;
922		strtodest(p, flag, subtype, quoted);
923		break;
924	default:
925		if (is_digit(*name)) {
926			num = atoi(name);
927			if (num > 0 && num <= shellparam.nparam) {
928				p = shellparam.p[num - 1];
929				strtodest(p, flag, subtype, quoted);
930			}
931		}
932		break;
933	}
934}
935
936
937
938/*
939 * Record the fact that we have to scan this region of the
940 * string for IFS characters.
941 */
942
943static void
944recordregion(int start, int end, int inquotes)
945{
946	struct ifsregion *ifsp;
947
948	INTOFF;
949	if (ifslastp == NULL) {
950		ifsp = &ifsfirst;
951	} else {
952		if (ifslastp->endoff == start
953		    && ifslastp->inquotes == inquotes) {
954			/* extend previous area */
955			ifslastp->endoff = end;
956			INTON;
957			return;
958		}
959		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
960		ifslastp->next = ifsp;
961	}
962	ifslastp = ifsp;
963	ifslastp->next = NULL;
964	ifslastp->begoff = start;
965	ifslastp->endoff = end;
966	ifslastp->inquotes = inquotes;
967	INTON;
968}
969
970
971
972/*
973 * Break the argument string into pieces based upon IFS and add the
974 * strings to the argument list.  The regions of the string to be
975 * searched for IFS characters have been stored by recordregion.
976 * CTLESC characters are preserved but have little effect in this pass
977 * other than escaping CTL* characters.  In particular, they do not escape
978 * IFS characters: that should be done with the ifsregion mechanism.
979 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
980 * This pass treats them as a regular character, making the string non-empty.
981 * Later, they are removed along with the other CTL* characters.
982 */
983static void
984ifsbreakup(char *string, struct arglist *arglist)
985{
986	struct ifsregion *ifsp;
987	struct strlist *sp;
988	char *start;
989	char *p;
990	char *q;
991	const char *ifs;
992	const char *ifsspc;
993	int had_param_ch = 0;
994
995	start = string;
996
997	if (ifslastp == NULL) {
998		/* Return entire argument, IFS doesn't apply to any of it */
999		sp = (struct strlist *)stalloc(sizeof *sp);
1000		sp->text = start;
1001		*arglist->lastp = sp;
1002		arglist->lastp = &sp->next;
1003		return;
1004	}
1005
1006	ifs = ifsset() ? ifsval() : " \t\n";
1007
1008	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1009		p = string + ifsp->begoff;
1010		while (p < string + ifsp->endoff) {
1011			q = p;
1012			if (*p == CTLESC)
1013				p++;
1014			if (ifsp->inquotes) {
1015				/* Only NULs (should be from "$@") end args */
1016				had_param_ch = 1;
1017				if (*p != 0) {
1018					p++;
1019					continue;
1020				}
1021				ifsspc = NULL;
1022			} else {
1023				if (!strchr(ifs, *p)) {
1024					had_param_ch = 1;
1025					p++;
1026					continue;
1027				}
1028				ifsspc = strchr(" \t\n", *p);
1029
1030				/* Ignore IFS whitespace at start */
1031				if (q == start && ifsspc != NULL) {
1032					p++;
1033					start = p;
1034					continue;
1035				}
1036				had_param_ch = 0;
1037			}
1038
1039			/* Save this argument... */
1040			*q = '\0';
1041			sp = (struct strlist *)stalloc(sizeof *sp);
1042			sp->text = start;
1043			*arglist->lastp = sp;
1044			arglist->lastp = &sp->next;
1045			p++;
1046
1047			if (ifsspc != NULL) {
1048				/* Ignore further trailing IFS whitespace */
1049				for (; p < string + ifsp->endoff; p++) {
1050					q = p;
1051					if (*p == CTLESC)
1052						p++;
1053					if (strchr(ifs, *p) == NULL) {
1054						p = q;
1055						break;
1056					}
1057					if (strchr(" \t\n", *p) == NULL) {
1058						p++;
1059						break;
1060					}
1061				}
1062			}
1063			start = p;
1064		}
1065	}
1066
1067	/*
1068	 * Save anything left as an argument.
1069	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1070	 * generating 2 arguments, the second of which is empty.
1071	 * Some recent clarification of the Posix spec say that it
1072	 * should only generate one....
1073	 */
1074	if (had_param_ch || *start != 0) {
1075		sp = (struct strlist *)stalloc(sizeof *sp);
1076		sp->text = start;
1077		*arglist->lastp = sp;
1078		arglist->lastp = &sp->next;
1079	}
1080}
1081
1082
1083static char expdir[PATH_MAX];
1084#define expdir_end (expdir + sizeof(expdir))
1085
1086/*
1087 * Perform pathname generation and remove control characters.
1088 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1089 * The results are stored in the list exparg.
1090 */
1091static void
1092expandmeta(struct strlist *str)
1093{
1094	char *p;
1095	struct strlist **savelastp;
1096	struct strlist *sp;
1097	char c;
1098
1099	while (str) {
1100		if (fflag)
1101			goto nometa;
1102		p = str->text;
1103		for (;;) {			/* fast check for meta chars */
1104			if ((c = *p++) == '\0')
1105				goto nometa;
1106			if (c == '*' || c == '?' || c == '[')
1107				break;
1108		}
1109		savelastp = exparg.lastp;
1110		INTOFF;
1111		expmeta(expdir, str->text);
1112		INTON;
1113		if (exparg.lastp == savelastp) {
1114			/*
1115			 * no matches
1116			 */
1117nometa:
1118			*exparg.lastp = str;
1119			rmescapes(str->text);
1120			exparg.lastp = &str->next;
1121		} else {
1122			*exparg.lastp = NULL;
1123			*savelastp = sp = expsort(*savelastp);
1124			while (sp->next != NULL)
1125				sp = sp->next;
1126			exparg.lastp = &sp->next;
1127		}
1128		str = str->next;
1129	}
1130}
1131
1132
1133/*
1134 * Do metacharacter (i.e. *, ?, [...]) expansion.
1135 */
1136
1137static void
1138expmeta(char *enddir, char *name)
1139{
1140	const char *p;
1141	const char *q;
1142	const char *start;
1143	char *endname;
1144	int metaflag;
1145	struct stat statb;
1146	DIR *dirp;
1147	struct dirent *dp;
1148	int atend;
1149	int matchdot;
1150	int esc;
1151	int namlen;
1152
1153	metaflag = 0;
1154	start = name;
1155	for (p = name; esc = 0, *p; p += esc + 1) {
1156		if (*p == '*' || *p == '?')
1157			metaflag = 1;
1158		else if (*p == '[') {
1159			q = p + 1;
1160			if (*q == '!' || *q == '^')
1161				q++;
1162			for (;;) {
1163				while (*q == CTLQUOTEMARK)
1164					q++;
1165				if (*q == CTLESC)
1166					q++;
1167				if (*q == '/' || *q == '\0')
1168					break;
1169				if (*++q == ']') {
1170					metaflag = 1;
1171					break;
1172				}
1173			}
1174		} else if (*p == '\0')
1175			break;
1176		else if (*p == CTLQUOTEMARK)
1177			continue;
1178		else {
1179			if (*p == CTLESC)
1180				esc++;
1181			if (p[esc] == '/') {
1182				if (metaflag)
1183					break;
1184				start = p + esc + 1;
1185			}
1186		}
1187	}
1188	if (metaflag == 0) {	/* we've reached the end of the file name */
1189		if (enddir != expdir)
1190			metaflag++;
1191		for (p = name ; ; p++) {
1192			if (*p == CTLQUOTEMARK)
1193				continue;
1194			if (*p == CTLESC)
1195				p++;
1196			*enddir++ = *p;
1197			if (*p == '\0')
1198				break;
1199			if (enddir == expdir_end)
1200				return;
1201		}
1202		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1203			addfname(expdir);
1204		return;
1205	}
1206	endname = name + (p - name);
1207	if (start != name) {
1208		p = name;
1209		while (p < start) {
1210			while (*p == CTLQUOTEMARK)
1211				p++;
1212			if (*p == CTLESC)
1213				p++;
1214			*enddir++ = *p++;
1215			if (enddir == expdir_end)
1216				return;
1217		}
1218	}
1219	if (enddir == expdir) {
1220		p = ".";
1221	} else if (enddir == expdir + 1 && *expdir == '/') {
1222		p = "/";
1223	} else {
1224		p = expdir;
1225		enddir[-1] = '\0';
1226	}
1227	if ((dirp = opendir(p)) == NULL)
1228		return;
1229	if (enddir != expdir)
1230		enddir[-1] = '/';
1231	if (*endname == 0) {
1232		atend = 1;
1233	} else {
1234		atend = 0;
1235		*endname = '\0';
1236		endname += esc + 1;
1237	}
1238	matchdot = 0;
1239	p = start;
1240	while (*p == CTLQUOTEMARK)
1241		p++;
1242	if (*p == CTLESC)
1243		p++;
1244	if (*p == '.')
1245		matchdot++;
1246	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1247		if (dp->d_name[0] == '.' && ! matchdot)
1248			continue;
1249		if (patmatch(start, dp->d_name, 0)) {
1250			namlen = dp->d_namlen;
1251			if (enddir + namlen + 1 > expdir_end)
1252				continue;
1253			memcpy(enddir, dp->d_name, namlen + 1);
1254			if (atend)
1255				addfname(expdir);
1256			else {
1257				if (dp->d_type != DT_UNKNOWN &&
1258				    dp->d_type != DT_DIR &&
1259				    dp->d_type != DT_LNK)
1260					continue;
1261				if (enddir + namlen + 2 > expdir_end)
1262					continue;
1263				enddir[namlen] = '/';
1264				enddir[namlen + 1] = '\0';
1265				expmeta(enddir + namlen + 1, endname);
1266			}
1267		}
1268	}
1269	closedir(dirp);
1270	if (! atend)
1271		endname[-esc - 1] = esc ? CTLESC : '/';
1272}
1273
1274
1275/*
1276 * Add a file name to the list.
1277 */
1278
1279static void
1280addfname(char *name)
1281{
1282	char *p;
1283	struct strlist *sp;
1284	size_t len;
1285
1286	len = strlen(name);
1287	p = stalloc(len + 1);
1288	memcpy(p, name, len + 1);
1289	sp = (struct strlist *)stalloc(sizeof *sp);
1290	sp->text = p;
1291	*exparg.lastp = sp;
1292	exparg.lastp = &sp->next;
1293}
1294
1295
1296/*
1297 * Sort the results of file name expansion.  It calculates the number of
1298 * strings to sort and then calls msort (short for merge sort) to do the
1299 * work.
1300 */
1301
1302static struct strlist *
1303expsort(struct strlist *str)
1304{
1305	int len;
1306	struct strlist *sp;
1307
1308	len = 0;
1309	for (sp = str ; sp ; sp = sp->next)
1310		len++;
1311	return msort(str, len);
1312}
1313
1314
1315static struct strlist *
1316msort(struct strlist *list, int len)
1317{
1318	struct strlist *p, *q = NULL;
1319	struct strlist **lpp;
1320	int half;
1321	int n;
1322
1323	if (len <= 1)
1324		return list;
1325	half = len >> 1;
1326	p = list;
1327	for (n = half ; --n >= 0 ; ) {
1328		q = p;
1329		p = p->next;
1330	}
1331	q->next = NULL;			/* terminate first half of list */
1332	q = msort(list, half);		/* sort first half of list */
1333	p = msort(p, len - half);		/* sort second half */
1334	lpp = &list;
1335	for (;;) {
1336		if (strcmp(p->text, q->text) < 0) {
1337			*lpp = p;
1338			lpp = &p->next;
1339			if ((p = *lpp) == NULL) {
1340				*lpp = q;
1341				break;
1342			}
1343		} else {
1344			*lpp = q;
1345			lpp = &q->next;
1346			if ((q = *lpp) == NULL) {
1347				*lpp = p;
1348				break;
1349			}
1350		}
1351	}
1352	return list;
1353}
1354
1355
1356
1357static wchar_t
1358get_wc(const char **p)
1359{
1360	wchar_t c;
1361	int chrlen;
1362
1363	chrlen = mbtowc(&c, *p, 4);
1364	if (chrlen == 0)
1365		return 0;
1366	else if (chrlen == -1)
1367		c = 0;
1368	else
1369		*p += chrlen;
1370	return c;
1371}
1372
1373
1374/*
1375 * See if a character matches a character class, starting at the first colon
1376 * of "[:class:]".
1377 * If a valid character class is recognized, a pointer to the next character
1378 * after the final closing bracket is stored into *end, otherwise a null
1379 * pointer is stored into *end.
1380 */
1381static int
1382match_charclass(const char *p, wchar_t chr, const char **end)
1383{
1384	char name[20];
1385	const char *nameend;
1386	wctype_t cclass;
1387
1388	*end = NULL;
1389	p++;
1390	nameend = strstr(p, ":]");
1391	if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) ||
1392	    nameend == p)
1393		return 0;
1394	memcpy(name, p, nameend - p);
1395	name[nameend - p] = '\0';
1396	*end = nameend + 2;
1397	cclass = wctype(name);
1398	/* An unknown class matches nothing but is valid nevertheless. */
1399	if (cclass == 0)
1400		return 0;
1401	return iswctype(chr, cclass);
1402}
1403
1404
1405/*
1406 * Returns true if the pattern matches the string.
1407 */
1408
1409static int
1410patmatch(const char *pattern, const char *string, int squoted)
1411{
1412	const char *p, *q, *end;
1413	const char *bt_p, *bt_q;
1414	char c;
1415	wchar_t wc, wc2;
1416
1417	p = pattern;
1418	q = string;
1419	bt_p = NULL;
1420	bt_q = NULL;
1421	for (;;) {
1422		switch (c = *p++) {
1423		case '\0':
1424			if (*q != '\0')
1425				goto backtrack;
1426			return 1;
1427		case CTLESC:
1428			if (squoted && *q == CTLESC)
1429				q++;
1430			if (*q++ != *p++)
1431				goto backtrack;
1432			break;
1433		case CTLQUOTEMARK:
1434			continue;
1435		case '?':
1436			if (squoted && *q == CTLESC)
1437				q++;
1438			if (*q == '\0')
1439				return 0;
1440			if (localeisutf8) {
1441				wc = get_wc(&q);
1442				/*
1443				 * A '?' does not match invalid UTF-8 but a
1444				 * '*' does, so backtrack.
1445				 */
1446				if (wc == 0)
1447					goto backtrack;
1448			} else
1449				wc = (unsigned char)*q++;
1450			break;
1451		case '*':
1452			c = *p;
1453			while (c == CTLQUOTEMARK || c == '*')
1454				c = *++p;
1455			/*
1456			 * If the pattern ends here, we know the string
1457			 * matches without needing to look at the rest of it.
1458			 */
1459			if (c == '\0')
1460				return 1;
1461			/*
1462			 * First try the shortest match for the '*' that
1463			 * could work. We can forget any earlier '*' since
1464			 * there is no way having it match more characters
1465			 * can help us, given that we are already here.
1466			 */
1467			bt_p = p;
1468			bt_q = q;
1469			break;
1470		case '[': {
1471			const char *savep, *saveq;
1472			int invert, found;
1473			wchar_t chr;
1474
1475			savep = p, saveq = q;
1476			invert = 0;
1477			if (*p == '!' || *p == '^') {
1478				invert++;
1479				p++;
1480			}
1481			found = 0;
1482			if (squoted && *q == CTLESC)
1483				q++;
1484			if (*q == '\0')
1485				return 0;
1486			if (localeisutf8) {
1487				chr = get_wc(&q);
1488				if (chr == 0)
1489					goto backtrack;
1490			} else
1491				chr = (unsigned char)*q++;
1492			c = *p++;
1493			do {
1494				if (c == '\0') {
1495					p = savep, q = saveq;
1496					c = '[';
1497					goto dft;
1498				}
1499				if (c == CTLQUOTEMARK)
1500					continue;
1501				if (c == '[' && *p == ':') {
1502					found |= match_charclass(p, chr, &end);
1503					if (end != NULL)
1504						p = end;
1505				}
1506				if (c == CTLESC)
1507					c = *p++;
1508				if (localeisutf8 && c & 0x80) {
1509					p--;
1510					wc = get_wc(&p);
1511					if (wc == 0) /* bad utf-8 */
1512						return 0;
1513				} else
1514					wc = (unsigned char)c;
1515				if (*p == '-' && p[1] != ']') {
1516					p++;
1517					while (*p == CTLQUOTEMARK)
1518						p++;
1519					if (*p == CTLESC)
1520						p++;
1521					if (localeisutf8) {
1522						wc2 = get_wc(&p);
1523						if (wc2 == 0) /* bad utf-8 */
1524							return 0;
1525					} else
1526						wc2 = (unsigned char)*p++;
1527					if (   collate_range_cmp(chr, wc) >= 0
1528					    && collate_range_cmp(chr, wc2) <= 0
1529					   )
1530						found = 1;
1531				} else {
1532					if (chr == wc)
1533						found = 1;
1534				}
1535			} while ((c = *p++) != ']');
1536			if (found == invert)
1537				goto backtrack;
1538			break;
1539		}
1540dft:	        default:
1541			if (squoted && *q == CTLESC)
1542				q++;
1543			if (*q == '\0')
1544				return 0;
1545			if (*q++ == c)
1546				break;
1547backtrack:
1548			/*
1549			 * If we have a mismatch (other than hitting the end
1550			 * of the string), go back to the last '*' seen and
1551			 * have it match one additional character.
1552			 */
1553			if (bt_p == NULL)
1554				return 0;
1555			if (squoted && *bt_q == CTLESC)
1556				bt_q++;
1557			if (*bt_q == '\0')
1558				return 0;
1559			bt_q++;
1560			p = bt_p;
1561			q = bt_q;
1562			break;
1563		}
1564	}
1565}
1566
1567
1568
1569/*
1570 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1571 */
1572
1573void
1574rmescapes(char *str)
1575{
1576	char *p, *q;
1577
1578	p = str;
1579	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1580		if (*p++ == '\0')
1581			return;
1582	}
1583	q = p;
1584	while (*p) {
1585		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1586			p++;
1587			continue;
1588		}
1589		if (*p == CTLESC)
1590			p++;
1591		*q++ = *p++;
1592	}
1593	*q = '\0';
1594}
1595
1596
1597
1598/*
1599 * See if a pattern matches in a case statement.
1600 */
1601
1602int
1603casematch(union node *pattern, const char *val)
1604{
1605	struct stackmark smark;
1606	int result;
1607	char *p;
1608
1609	setstackmark(&smark);
1610	argbackq = pattern->narg.backquote;
1611	STARTSTACKSTR(expdest);
1612	ifslastp = NULL;
1613	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1614	STPUTC('\0', expdest);
1615	p = grabstackstr(expdest);
1616	result = patmatch(p, val, 0);
1617	popstackmark(&smark);
1618	return result;
1619}
1620
1621/*
1622 * Our own itoa().
1623 */
1624
1625static char *
1626cvtnum(int num, char *buf)
1627{
1628	char temp[32];
1629	int neg = num < 0;
1630	char *p = temp + 31;
1631
1632	temp[31] = '\0';
1633
1634	do {
1635		*--p = num % 10 + '0';
1636	} while ((num /= 10) != 0);
1637
1638	if (neg)
1639		*--p = '-';
1640
1641	STPUTS(p, buf);
1642	return buf;
1643}
1644
1645/*
1646 * Do most of the work for wordexp(3).
1647 */
1648
1649int
1650wordexpcmd(int argc, char **argv)
1651{
1652	size_t len;
1653	int i;
1654
1655	out1fmt("%08x", argc - 1);
1656	for (i = 1, len = 0; i < argc; i++)
1657		len += strlen(argv[i]);
1658	out1fmt("%08x", (int)len);
1659	for (i = 1; i < argc; i++)
1660		outbin(argv[i], strlen(argv[i]) + 1, out1);
1661        return (0);
1662}
1663