expand.c revision 287752
1/*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1997-2005 5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Kenneth Almquist. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#ifndef lint 36#if 0 37static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; 38#endif 39#endif /* not lint */ 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: stable/10/bin/sh/expand.c 287752 2015-09-13 13:58:46Z jilles $"); 42 43#include <sys/types.h> 44#include <sys/time.h> 45#include <sys/stat.h> 46#include <dirent.h> 47#include <errno.h> 48#include <inttypes.h> 49#include <limits.h> 50#include <pwd.h> 51#include <stdio.h> 52#include <stdlib.h> 53#include <string.h> 54#include <unistd.h> 55#include <wchar.h> 56#include <wctype.h> 57 58/* 59 * Routines to expand arguments to commands. We have to deal with 60 * backquotes, shell variables, and file metacharacters. 61 */ 62 63#include "shell.h" 64#include "main.h" 65#include "nodes.h" 66#include "eval.h" 67#include "expand.h" 68#include "syntax.h" 69#include "parser.h" 70#include "jobs.h" 71#include "options.h" 72#include "var.h" 73#include "input.h" 74#include "output.h" 75#include "memalloc.h" 76#include "error.h" 77#include "mystring.h" 78#include "arith.h" 79#include "show.h" 80#include "builtins.h" 81 82/* 83 * Structure specifying which parts of the string should be searched 84 * for IFS characters. 85 */ 86 87struct ifsregion { 88 struct ifsregion *next; /* next region in list */ 89 int begoff; /* offset of start of region */ 90 int endoff; /* offset of end of region */ 91 int inquotes; /* search for nul bytes only */ 92}; 93 94 95static char *expdest; /* output of current string */ 96static struct nodelist *argbackq; /* list of back quote expressions */ 97static struct ifsregion ifsfirst; /* first struct in list of ifs regions */ 98static struct ifsregion *ifslastp; /* last struct in list */ 99static struct arglist exparg; /* holds expanded arg list */ 100 101static char *argstr(char *, int); 102static char *exptilde(char *, int); 103static char *expari(char *); 104static void expbackq(union node *, int, int); 105static int subevalvar(char *, char *, int, int, int, int, int); 106static char *evalvar(char *, int); 107static int varisset(const char *, int); 108static void strtodest(const char *, int, int, int); 109static void varvalue(const char *, int, int, int); 110static void recordregion(int, int, int); 111static void removerecordregions(int); 112static void ifsbreakup(char *, struct arglist *); 113static void expandmeta(struct strlist *); 114static void expmeta(char *, char *); 115static void addfname(char *); 116static struct strlist *expsort(struct strlist *); 117static struct strlist *msort(struct strlist *, int); 118static int patmatch(const char *, const char *, int); 119static char *cvtnum(int, char *); 120static int collate_range_cmp(wchar_t, wchar_t); 121 122static int 123collate_range_cmp(wchar_t c1, wchar_t c2) 124{ 125 static wchar_t s1[2], s2[2]; 126 127 s1[0] = c1; 128 s2[0] = c2; 129 return (wcscoll(s1, s2)); 130} 131 132static char * 133stputs_quotes(const char *data, const char *syntax, char *p) 134{ 135 while (*data) { 136 CHECKSTRSPACE(2, p); 137 if (syntax[(int)*data] == CCTL) 138 USTPUTC(CTLESC, p); 139 USTPUTC(*data++, p); 140 } 141 return (p); 142} 143#define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p) 144 145/* 146 * Perform expansions on an argument, placing the resulting list of arguments 147 * in arglist. Parameter expansion, command substitution and arithmetic 148 * expansion are always performed; additional expansions can be requested 149 * via flag (EXP_*). 150 * The result is left in the stack string. 151 * When arglist is NULL, perform here document expansion. 152 * 153 * Caution: this function uses global state and is not reentrant. 154 * However, a new invocation after an interrupted invocation is safe 155 * and will reset the global state for the new call. 156 */ 157void 158expandarg(union node *arg, struct arglist *arglist, int flag) 159{ 160 struct strlist *sp; 161 char *p; 162 163 argbackq = arg->narg.backquote; 164 STARTSTACKSTR(expdest); 165 ifsfirst.next = NULL; 166 ifslastp = NULL; 167 argstr(arg->narg.text, flag); 168 if (arglist == NULL) { 169 STACKSTRNUL(expdest); 170 return; /* here document expanded */ 171 } 172 STPUTC('\0', expdest); 173 p = grabstackstr(expdest); 174 exparg.lastp = &exparg.list; 175 if (flag & EXP_FULL) { 176 ifsbreakup(p, &exparg); 177 *exparg.lastp = NULL; 178 exparg.lastp = &exparg.list; 179 expandmeta(exparg.list); 180 } else { 181 sp = (struct strlist *)stalloc(sizeof (struct strlist)); 182 sp->text = p; 183 *exparg.lastp = sp; 184 exparg.lastp = &sp->next; 185 } 186 while (ifsfirst.next != NULL) { 187 struct ifsregion *ifsp; 188 INTOFF; 189 ifsp = ifsfirst.next->next; 190 ckfree(ifsfirst.next); 191 ifsfirst.next = ifsp; 192 INTON; 193 } 194 *exparg.lastp = NULL; 195 if (exparg.list) { 196 *arglist->lastp = exparg.list; 197 arglist->lastp = exparg.lastp; 198 } 199} 200 201 202 203/* 204 * Perform parameter expansion, command substitution and arithmetic 205 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. 206 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'. 207 * This is used to expand word in ${var+word} etc. 208 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC 209 * characters to allow for further processing. 210 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters. 211 */ 212static char * 213argstr(char *p, int flag) 214{ 215 char c; 216 int quotes = flag & (EXP_FULL | EXP_CASE); /* do CTLESC */ 217 int firsteq = 1; 218 int split_lit; 219 int lit_quoted; 220 221 split_lit = flag & EXP_SPLIT_LIT; 222 lit_quoted = flag & EXP_LIT_QUOTED; 223 flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); 224 if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) 225 p = exptilde(p, flag); 226 for (;;) { 227 CHECKSTRSPACE(2, expdest); 228 switch (c = *p++) { 229 case '\0': 230 return (p - 1); 231 case CTLENDVAR: 232 case CTLENDARI: 233 return (p); 234 case CTLQUOTEMARK: 235 lit_quoted = 1; 236 /* "$@" syntax adherence hack */ 237 if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=') 238 break; 239 if ((flag & EXP_FULL) != 0) 240 USTPUTC(c, expdest); 241 break; 242 case CTLQUOTEEND: 243 lit_quoted = 0; 244 break; 245 case CTLESC: 246 if (quotes) 247 USTPUTC(c, expdest); 248 c = *p++; 249 USTPUTC(c, expdest); 250 if (split_lit && !lit_quoted) 251 recordregion(expdest - stackblock() - 252 (quotes ? 2 : 1), 253 expdest - stackblock(), 0); 254 break; 255 case CTLVAR: 256 p = evalvar(p, flag); 257 break; 258 case CTLBACKQ: 259 case CTLBACKQ|CTLQUOTE: 260 expbackq(argbackq->n, c & CTLQUOTE, flag); 261 argbackq = argbackq->next; 262 break; 263 case CTLARI: 264 p = expari(p); 265 break; 266 case ':': 267 case '=': 268 /* 269 * sort of a hack - expand tildes in variable 270 * assignments (after the first '=' and after ':'s). 271 */ 272 USTPUTC(c, expdest); 273 if (split_lit && !lit_quoted) 274 recordregion(expdest - stackblock() - 1, 275 expdest - stackblock(), 0); 276 if (flag & EXP_VARTILDE && *p == '~' && 277 (c != '=' || firsteq)) { 278 if (c == '=') 279 firsteq = 0; 280 p = exptilde(p, flag); 281 } 282 break; 283 default: 284 USTPUTC(c, expdest); 285 if (split_lit && !lit_quoted) 286 recordregion(expdest - stackblock() - 1, 287 expdest - stackblock(), 0); 288 } 289 } 290} 291 292/* 293 * Perform tilde expansion, placing the result in the stack string and 294 * returning the next position in the input string to process. 295 */ 296static char * 297exptilde(char *p, int flag) 298{ 299 char c, *startp = p; 300 struct passwd *pw; 301 char *home; 302 303 for (;;) { 304 c = *p; 305 switch(c) { 306 case CTLESC: /* This means CTL* are always considered quoted. */ 307 case CTLVAR: 308 case CTLBACKQ: 309 case CTLBACKQ | CTLQUOTE: 310 case CTLARI: 311 case CTLENDARI: 312 case CTLQUOTEMARK: 313 return (startp); 314 case ':': 315 if ((flag & EXP_VARTILDE) == 0) 316 break; 317 /* FALLTHROUGH */ 318 case '\0': 319 case '/': 320 case CTLENDVAR: 321 *p = '\0'; 322 if (*(startp+1) == '\0') { 323 home = lookupvar("HOME"); 324 } else { 325 pw = getpwnam(startp+1); 326 home = pw != NULL ? pw->pw_dir : NULL; 327 } 328 *p = c; 329 if (home == NULL || *home == '\0') 330 return (startp); 331 strtodest(home, flag, VSNORMAL, 1); 332 return (p); 333 } 334 p++; 335 } 336} 337 338 339static void 340removerecordregions(int endoff) 341{ 342 if (ifslastp == NULL) 343 return; 344 345 if (ifsfirst.endoff > endoff) { 346 while (ifsfirst.next != NULL) { 347 struct ifsregion *ifsp; 348 INTOFF; 349 ifsp = ifsfirst.next->next; 350 ckfree(ifsfirst.next); 351 ifsfirst.next = ifsp; 352 INTON; 353 } 354 if (ifsfirst.begoff > endoff) 355 ifslastp = NULL; 356 else { 357 ifslastp = &ifsfirst; 358 ifsfirst.endoff = endoff; 359 } 360 return; 361 } 362 363 ifslastp = &ifsfirst; 364 while (ifslastp->next && ifslastp->next->begoff < endoff) 365 ifslastp=ifslastp->next; 366 while (ifslastp->next != NULL) { 367 struct ifsregion *ifsp; 368 INTOFF; 369 ifsp = ifslastp->next->next; 370 ckfree(ifslastp->next); 371 ifslastp->next = ifsp; 372 INTON; 373 } 374 if (ifslastp->endoff > endoff) 375 ifslastp->endoff = endoff; 376} 377 378/* 379 * Expand arithmetic expression. 380 * Note that flag is not required as digits never require CTLESC characters. 381 */ 382static char * 383expari(char *p) 384{ 385 char *q, *start; 386 arith_t result; 387 int begoff; 388 int quoted; 389 int adj; 390 391 quoted = *p++ == '"'; 392 begoff = expdest - stackblock(); 393 p = argstr(p, 0); 394 removerecordregions(begoff); 395 STPUTC('\0', expdest); 396 start = stackblock() + begoff; 397 398 q = grabstackstr(expdest); 399 result = arith(start); 400 ungrabstackstr(q, expdest); 401 402 start = stackblock() + begoff; 403 adj = start - expdest; 404 STADJUST(adj, expdest); 405 406 CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest); 407 fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result); 408 adj = strlen(expdest); 409 STADJUST(adj, expdest); 410 if (!quoted) 411 recordregion(begoff, expdest - stackblock(), 0); 412 return p; 413} 414 415 416/* 417 * Perform command substitution. 418 */ 419static void 420expbackq(union node *cmd, int quoted, int flag) 421{ 422 struct backcmd in; 423 int i; 424 char buf[128]; 425 char *p; 426 char *dest = expdest; 427 struct ifsregion saveifs, *savelastp; 428 struct nodelist *saveargbackq; 429 char lastc; 430 int startloc = dest - stackblock(); 431 char const *syntax = quoted? DQSYNTAX : BASESYNTAX; 432 int quotes = flag & (EXP_FULL | EXP_CASE); 433 size_t nnl; 434 435 INTOFF; 436 saveifs = ifsfirst; 437 savelastp = ifslastp; 438 saveargbackq = argbackq; 439 p = grabstackstr(dest); 440 evalbackcmd(cmd, &in); 441 ungrabstackstr(p, dest); 442 ifsfirst = saveifs; 443 ifslastp = savelastp; 444 argbackq = saveargbackq; 445 446 p = in.buf; 447 lastc = '\0'; 448 nnl = 0; 449 /* Don't copy trailing newlines */ 450 for (;;) { 451 if (--in.nleft < 0) { 452 if (in.fd < 0) 453 break; 454 while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); 455 TRACE(("expbackq: read returns %d\n", i)); 456 if (i <= 0) 457 break; 458 p = buf; 459 in.nleft = i - 1; 460 } 461 lastc = *p++; 462 if (lastc != '\0') { 463 if (lastc == '\n') { 464 nnl++; 465 } else { 466 CHECKSTRSPACE(nnl + 2, dest); 467 while (nnl > 0) { 468 nnl--; 469 USTPUTC('\n', dest); 470 } 471 if (quotes && syntax[(int)lastc] == CCTL) 472 USTPUTC(CTLESC, dest); 473 USTPUTC(lastc, dest); 474 } 475 } 476 } 477 478 if (in.fd >= 0) 479 close(in.fd); 480 if (in.buf) 481 ckfree(in.buf); 482 if (in.jp) 483 exitstatus = waitforjob(in.jp, (int *)NULL); 484 if (quoted == 0) 485 recordregion(startloc, dest - stackblock(), 0); 486 TRACE(("expbackq: size=%td: \"%.*s\"\n", 487 ((dest - stackblock()) - startloc), 488 (int)((dest - stackblock()) - startloc), 489 stackblock() + startloc)); 490 expdest = dest; 491 INTON; 492} 493 494 495 496static void 497recordleft(const char *str, const char *loc, char *startp) 498{ 499 int amount; 500 501 amount = ((str - 1) - (loc - startp)) - expdest; 502 STADJUST(amount, expdest); 503 while (loc != str - 1) 504 *startp++ = *loc++; 505} 506 507static int 508subevalvar(char *p, char *str, int strloc, int subtype, int startloc, 509 int varflags, int quotes) 510{ 511 char *startp; 512 char *loc = NULL; 513 char *q; 514 int c = 0; 515 struct nodelist *saveargbackq = argbackq; 516 int amount; 517 518 argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || 519 subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ? 520 EXP_CASE : 0) | EXP_TILDE); 521 STACKSTRNUL(expdest); 522 argbackq = saveargbackq; 523 startp = stackblock() + startloc; 524 if (str == NULL) 525 str = stackblock() + strloc; 526 527 switch (subtype) { 528 case VSASSIGN: 529 setvar(str, startp, 0); 530 amount = startp - expdest; 531 STADJUST(amount, expdest); 532 varflags &= ~VSNUL; 533 return 1; 534 535 case VSQUESTION: 536 if (*p != CTLENDVAR) { 537 outfmt(out2, "%s\n", startp); 538 error((char *)NULL); 539 } 540 error("%.*s: parameter %snot set", (int)(p - str - 1), 541 str, (varflags & VSNUL) ? "null or " 542 : nullstr); 543 return 0; 544 545 case VSTRIMLEFT: 546 for (loc = startp; loc < str; loc++) { 547 c = *loc; 548 *loc = '\0'; 549 if (patmatch(str, startp, quotes)) { 550 *loc = c; 551 recordleft(str, loc, startp); 552 return 1; 553 } 554 *loc = c; 555 if (quotes && *loc == CTLESC) 556 loc++; 557 } 558 return 0; 559 560 case VSTRIMLEFTMAX: 561 for (loc = str - 1; loc >= startp;) { 562 c = *loc; 563 *loc = '\0'; 564 if (patmatch(str, startp, quotes)) { 565 *loc = c; 566 recordleft(str, loc, startp); 567 return 1; 568 } 569 *loc = c; 570 loc--; 571 if (quotes && loc > startp && *(loc - 1) == CTLESC) { 572 for (q = startp; q < loc; q++) 573 if (*q == CTLESC) 574 q++; 575 if (q > loc) 576 loc--; 577 } 578 } 579 return 0; 580 581 case VSTRIMRIGHT: 582 for (loc = str - 1; loc >= startp;) { 583 if (patmatch(str, loc, quotes)) { 584 amount = loc - expdest; 585 STADJUST(amount, expdest); 586 return 1; 587 } 588 loc--; 589 if (quotes && loc > startp && *(loc - 1) == CTLESC) { 590 for (q = startp; q < loc; q++) 591 if (*q == CTLESC) 592 q++; 593 if (q > loc) 594 loc--; 595 } 596 } 597 return 0; 598 599 case VSTRIMRIGHTMAX: 600 for (loc = startp; loc < str - 1; loc++) { 601 if (patmatch(str, loc, quotes)) { 602 amount = loc - expdest; 603 STADJUST(amount, expdest); 604 return 1; 605 } 606 if (quotes && *loc == CTLESC) 607 loc++; 608 } 609 return 0; 610 611 612 default: 613 abort(); 614 } 615} 616 617 618/* 619 * Expand a variable, and return a pointer to the next character in the 620 * input string. 621 */ 622 623static char * 624evalvar(char *p, int flag) 625{ 626 int subtype; 627 int varflags; 628 char *var; 629 const char *val; 630 int patloc; 631 int c; 632 int set; 633 int special; 634 int startloc; 635 int varlen; 636 int varlenb; 637 int easy; 638 int quotes = flag & (EXP_FULL | EXP_CASE); 639 int record = 0; 640 641 varflags = (unsigned char)*p++; 642 subtype = varflags & VSTYPE; 643 var = p; 644 special = 0; 645 if (! is_name(*p)) 646 special = 1; 647 p = strchr(p, '=') + 1; 648again: /* jump here after setting a variable with ${var=text} */ 649 if (varflags & VSLINENO) { 650 set = 1; 651 special = 1; 652 val = NULL; 653 } else if (special) { 654 set = varisset(var, varflags & VSNUL); 655 val = NULL; 656 } else { 657 val = bltinlookup(var, 1); 658 if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { 659 val = NULL; 660 set = 0; 661 } else 662 set = 1; 663 } 664 varlen = 0; 665 startloc = expdest - stackblock(); 666 if (!set && uflag && *var != '@' && *var != '*') { 667 switch (subtype) { 668 case VSNORMAL: 669 case VSTRIMLEFT: 670 case VSTRIMLEFTMAX: 671 case VSTRIMRIGHT: 672 case VSTRIMRIGHTMAX: 673 case VSLENGTH: 674 error("%.*s: parameter not set", (int)(p - var - 1), 675 var); 676 } 677 } 678 if (set && subtype != VSPLUS) { 679 /* insert the value of the variable */ 680 if (special) { 681 if (varflags & VSLINENO) 682 STPUTBIN(var, p - var - 1, expdest); 683 else 684 varvalue(var, varflags & VSQUOTE, subtype, flag); 685 if (subtype == VSLENGTH) { 686 varlenb = expdest - stackblock() - startloc; 687 varlen = varlenb; 688 if (localeisutf8) { 689 val = stackblock() + startloc; 690 for (;val != expdest; val++) 691 if ((*val & 0xC0) == 0x80) 692 varlen--; 693 } 694 STADJUST(-varlenb, expdest); 695 } 696 } else { 697 if (subtype == VSLENGTH) { 698 for (;*val; val++) 699 if (!localeisutf8 || 700 (*val & 0xC0) != 0x80) 701 varlen++; 702 } 703 else 704 strtodest(val, flag, subtype, 705 varflags & VSQUOTE); 706 } 707 } 708 709 if (subtype == VSPLUS) 710 set = ! set; 711 712 easy = ((varflags & VSQUOTE) == 0 || 713 (*var == '@' && shellparam.nparam != 1)); 714 715 716 switch (subtype) { 717 case VSLENGTH: 718 expdest = cvtnum(varlen, expdest); 719 record = 1; 720 break; 721 722 case VSNORMAL: 723 record = easy; 724 break; 725 726 case VSPLUS: 727 case VSMINUS: 728 if (!set) { 729 argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) | 730 (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); 731 break; 732 } 733 record = easy; 734 break; 735 736 case VSTRIMLEFT: 737 case VSTRIMLEFTMAX: 738 case VSTRIMRIGHT: 739 case VSTRIMRIGHTMAX: 740 if (!set) 741 break; 742 /* 743 * Terminate the string and start recording the pattern 744 * right after it 745 */ 746 STPUTC('\0', expdest); 747 patloc = expdest - stackblock(); 748 if (subevalvar(p, NULL, patloc, subtype, 749 startloc, varflags, quotes) == 0) { 750 int amount = (expdest - stackblock() - patloc) + 1; 751 STADJUST(-amount, expdest); 752 } 753 /* Remove any recorded regions beyond start of variable */ 754 removerecordregions(startloc); 755 record = 1; 756 break; 757 758 case VSASSIGN: 759 case VSQUESTION: 760 if (!set) { 761 if (subevalvar(p, var, 0, subtype, startloc, varflags, 762 quotes)) { 763 varflags &= ~VSNUL; 764 /* 765 * Remove any recorded regions beyond 766 * start of variable 767 */ 768 removerecordregions(startloc); 769 goto again; 770 } 771 break; 772 } 773 record = easy; 774 break; 775 776 case VSERROR: 777 c = p - var - 1; 778 error("${%.*s%s}: Bad substitution", c, var, 779 (c > 0 && *p != CTLENDVAR) ? "..." : ""); 780 781 default: 782 abort(); 783 } 784 785 if (record) 786 recordregion(startloc, expdest - stackblock(), 787 varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && 788 (*var == '@' || *var == '*'))); 789 790 if (subtype != VSNORMAL) { /* skip to end of alternative */ 791 int nesting = 1; 792 for (;;) { 793 if ((c = *p++) == CTLESC) 794 p++; 795 else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { 796 if (set) 797 argbackq = argbackq->next; 798 } else if (c == CTLVAR) { 799 if ((*p++ & VSTYPE) != VSNORMAL) 800 nesting++; 801 } else if (c == CTLENDVAR) { 802 if (--nesting == 0) 803 break; 804 } 805 } 806 } 807 return p; 808} 809 810 811 812/* 813 * Test whether a specialized variable is set. 814 */ 815 816static int 817varisset(const char *name, int nulok) 818{ 819 820 if (*name == '!') 821 return backgndpidset(); 822 else if (*name == '@' || *name == '*') { 823 if (*shellparam.p == NULL) 824 return 0; 825 826 if (nulok) { 827 char **av; 828 829 for (av = shellparam.p; *av; av++) 830 if (**av != '\0') 831 return 1; 832 return 0; 833 } 834 } else if (is_digit(*name)) { 835 char *ap; 836 long num; 837 838 errno = 0; 839 num = strtol(name, NULL, 10); 840 if (errno != 0 || num > shellparam.nparam) 841 return 0; 842 843 if (num == 0) 844 ap = arg0; 845 else 846 ap = shellparam.p[num - 1]; 847 848 if (nulok && (ap == NULL || *ap == '\0')) 849 return 0; 850 } 851 return 1; 852} 853 854static void 855strtodest(const char *p, int flag, int subtype, int quoted) 856{ 857 if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH) 858 STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest); 859 else 860 STPUTS(p, expdest); 861} 862 863/* 864 * Add the value of a specialized variable to the stack string. 865 */ 866 867static void 868varvalue(const char *name, int quoted, int subtype, int flag) 869{ 870 int num; 871 char *p; 872 int i; 873 char sep; 874 char **ap; 875 876 switch (*name) { 877 case '$': 878 num = rootpid; 879 goto numvar; 880 case '?': 881 num = oexitstatus; 882 goto numvar; 883 case '#': 884 num = shellparam.nparam; 885 goto numvar; 886 case '!': 887 num = backgndpidval(); 888numvar: 889 expdest = cvtnum(num, expdest); 890 break; 891 case '-': 892 for (i = 0 ; i < NOPTS ; i++) { 893 if (optlist[i].val) 894 STPUTC(optlist[i].letter, expdest); 895 } 896 break; 897 case '@': 898 if (flag & EXP_FULL && quoted) { 899 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { 900 strtodest(p, flag, subtype, quoted); 901 if (*ap) 902 STPUTC('\0', expdest); 903 } 904 break; 905 } 906 /* FALLTHROUGH */ 907 case '*': 908 if (ifsset()) 909 sep = ifsval()[0]; 910 else 911 sep = ' '; 912 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { 913 strtodest(p, flag, subtype, quoted); 914 if (!*ap) 915 break; 916 if (sep || (flag & EXP_FULL && !quoted && **ap != '\0')) 917 STPUTC(sep, expdest); 918 } 919 break; 920 case '0': 921 p = arg0; 922 strtodest(p, flag, subtype, quoted); 923 break; 924 default: 925 if (is_digit(*name)) { 926 num = atoi(name); 927 if (num > 0 && num <= shellparam.nparam) { 928 p = shellparam.p[num - 1]; 929 strtodest(p, flag, subtype, quoted); 930 } 931 } 932 break; 933 } 934} 935 936 937 938/* 939 * Record the fact that we have to scan this region of the 940 * string for IFS characters. 941 */ 942 943static void 944recordregion(int start, int end, int inquotes) 945{ 946 struct ifsregion *ifsp; 947 948 INTOFF; 949 if (ifslastp == NULL) { 950 ifsp = &ifsfirst; 951 } else { 952 if (ifslastp->endoff == start 953 && ifslastp->inquotes == inquotes) { 954 /* extend previous area */ 955 ifslastp->endoff = end; 956 INTON; 957 return; 958 } 959 ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion)); 960 ifslastp->next = ifsp; 961 } 962 ifslastp = ifsp; 963 ifslastp->next = NULL; 964 ifslastp->begoff = start; 965 ifslastp->endoff = end; 966 ifslastp->inquotes = inquotes; 967 INTON; 968} 969 970 971 972/* 973 * Break the argument string into pieces based upon IFS and add the 974 * strings to the argument list. The regions of the string to be 975 * searched for IFS characters have been stored by recordregion. 976 * CTLESC characters are preserved but have little effect in this pass 977 * other than escaping CTL* characters. In particular, they do not escape 978 * IFS characters: that should be done with the ifsregion mechanism. 979 * CTLQUOTEMARK characters are used to preserve empty quoted strings. 980 * This pass treats them as a regular character, making the string non-empty. 981 * Later, they are removed along with the other CTL* characters. 982 */ 983static void 984ifsbreakup(char *string, struct arglist *arglist) 985{ 986 struct ifsregion *ifsp; 987 struct strlist *sp; 988 char *start; 989 char *p; 990 char *q; 991 const char *ifs; 992 const char *ifsspc; 993 int had_param_ch = 0; 994 995 start = string; 996 997 if (ifslastp == NULL) { 998 /* Return entire argument, IFS doesn't apply to any of it */ 999 sp = (struct strlist *)stalloc(sizeof *sp); 1000 sp->text = start; 1001 *arglist->lastp = sp; 1002 arglist->lastp = &sp->next; 1003 return; 1004 } 1005 1006 ifs = ifsset() ? ifsval() : " \t\n"; 1007 1008 for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { 1009 p = string + ifsp->begoff; 1010 while (p < string + ifsp->endoff) { 1011 q = p; 1012 if (*p == CTLESC) 1013 p++; 1014 if (ifsp->inquotes) { 1015 /* Only NULs (should be from "$@") end args */ 1016 had_param_ch = 1; 1017 if (*p != 0) { 1018 p++; 1019 continue; 1020 } 1021 ifsspc = NULL; 1022 } else { 1023 if (!strchr(ifs, *p)) { 1024 had_param_ch = 1; 1025 p++; 1026 continue; 1027 } 1028 ifsspc = strchr(" \t\n", *p); 1029 1030 /* Ignore IFS whitespace at start */ 1031 if (q == start && ifsspc != NULL) { 1032 p++; 1033 start = p; 1034 continue; 1035 } 1036 had_param_ch = 0; 1037 } 1038 1039 /* Save this argument... */ 1040 *q = '\0'; 1041 sp = (struct strlist *)stalloc(sizeof *sp); 1042 sp->text = start; 1043 *arglist->lastp = sp; 1044 arglist->lastp = &sp->next; 1045 p++; 1046 1047 if (ifsspc != NULL) { 1048 /* Ignore further trailing IFS whitespace */ 1049 for (; p < string + ifsp->endoff; p++) { 1050 q = p; 1051 if (*p == CTLESC) 1052 p++; 1053 if (strchr(ifs, *p) == NULL) { 1054 p = q; 1055 break; 1056 } 1057 if (strchr(" \t\n", *p) == NULL) { 1058 p++; 1059 break; 1060 } 1061 } 1062 } 1063 start = p; 1064 } 1065 } 1066 1067 /* 1068 * Save anything left as an argument. 1069 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as 1070 * generating 2 arguments, the second of which is empty. 1071 * Some recent clarification of the Posix spec say that it 1072 * should only generate one.... 1073 */ 1074 if (had_param_ch || *start != 0) { 1075 sp = (struct strlist *)stalloc(sizeof *sp); 1076 sp->text = start; 1077 *arglist->lastp = sp; 1078 arglist->lastp = &sp->next; 1079 } 1080} 1081 1082 1083static char expdir[PATH_MAX]; 1084#define expdir_end (expdir + sizeof(expdir)) 1085 1086/* 1087 * Perform pathname generation and remove control characters. 1088 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK. 1089 * The results are stored in the list exparg. 1090 */ 1091static void 1092expandmeta(struct strlist *str) 1093{ 1094 char *p; 1095 struct strlist **savelastp; 1096 struct strlist *sp; 1097 char c; 1098 1099 while (str) { 1100 if (fflag) 1101 goto nometa; 1102 p = str->text; 1103 for (;;) { /* fast check for meta chars */ 1104 if ((c = *p++) == '\0') 1105 goto nometa; 1106 if (c == '*' || c == '?' || c == '[') 1107 break; 1108 } 1109 savelastp = exparg.lastp; 1110 INTOFF; 1111 expmeta(expdir, str->text); 1112 INTON; 1113 if (exparg.lastp == savelastp) { 1114 /* 1115 * no matches 1116 */ 1117nometa: 1118 *exparg.lastp = str; 1119 rmescapes(str->text); 1120 exparg.lastp = &str->next; 1121 } else { 1122 *exparg.lastp = NULL; 1123 *savelastp = sp = expsort(*savelastp); 1124 while (sp->next != NULL) 1125 sp = sp->next; 1126 exparg.lastp = &sp->next; 1127 } 1128 str = str->next; 1129 } 1130} 1131 1132 1133/* 1134 * Do metacharacter (i.e. *, ?, [...]) expansion. 1135 */ 1136 1137static void 1138expmeta(char *enddir, char *name) 1139{ 1140 const char *p; 1141 const char *q; 1142 const char *start; 1143 char *endname; 1144 int metaflag; 1145 struct stat statb; 1146 DIR *dirp; 1147 struct dirent *dp; 1148 int atend; 1149 int matchdot; 1150 int esc; 1151 int namlen; 1152 1153 metaflag = 0; 1154 start = name; 1155 for (p = name; esc = 0, *p; p += esc + 1) { 1156 if (*p == '*' || *p == '?') 1157 metaflag = 1; 1158 else if (*p == '[') { 1159 q = p + 1; 1160 if (*q == '!' || *q == '^') 1161 q++; 1162 for (;;) { 1163 while (*q == CTLQUOTEMARK) 1164 q++; 1165 if (*q == CTLESC) 1166 q++; 1167 if (*q == '/' || *q == '\0') 1168 break; 1169 if (*++q == ']') { 1170 metaflag = 1; 1171 break; 1172 } 1173 } 1174 } else if (*p == '\0') 1175 break; 1176 else if (*p == CTLQUOTEMARK) 1177 continue; 1178 else { 1179 if (*p == CTLESC) 1180 esc++; 1181 if (p[esc] == '/') { 1182 if (metaflag) 1183 break; 1184 start = p + esc + 1; 1185 } 1186 } 1187 } 1188 if (metaflag == 0) { /* we've reached the end of the file name */ 1189 if (enddir != expdir) 1190 metaflag++; 1191 for (p = name ; ; p++) { 1192 if (*p == CTLQUOTEMARK) 1193 continue; 1194 if (*p == CTLESC) 1195 p++; 1196 *enddir++ = *p; 1197 if (*p == '\0') 1198 break; 1199 if (enddir == expdir_end) 1200 return; 1201 } 1202 if (metaflag == 0 || lstat(expdir, &statb) >= 0) 1203 addfname(expdir); 1204 return; 1205 } 1206 endname = name + (p - name); 1207 if (start != name) { 1208 p = name; 1209 while (p < start) { 1210 while (*p == CTLQUOTEMARK) 1211 p++; 1212 if (*p == CTLESC) 1213 p++; 1214 *enddir++ = *p++; 1215 if (enddir == expdir_end) 1216 return; 1217 } 1218 } 1219 if (enddir == expdir) { 1220 p = "."; 1221 } else if (enddir == expdir + 1 && *expdir == '/') { 1222 p = "/"; 1223 } else { 1224 p = expdir; 1225 enddir[-1] = '\0'; 1226 } 1227 if ((dirp = opendir(p)) == NULL) 1228 return; 1229 if (enddir != expdir) 1230 enddir[-1] = '/'; 1231 if (*endname == 0) { 1232 atend = 1; 1233 } else { 1234 atend = 0; 1235 *endname = '\0'; 1236 endname += esc + 1; 1237 } 1238 matchdot = 0; 1239 p = start; 1240 while (*p == CTLQUOTEMARK) 1241 p++; 1242 if (*p == CTLESC) 1243 p++; 1244 if (*p == '.') 1245 matchdot++; 1246 while (! int_pending() && (dp = readdir(dirp)) != NULL) { 1247 if (dp->d_name[0] == '.' && ! matchdot) 1248 continue; 1249 if (patmatch(start, dp->d_name, 0)) { 1250 namlen = dp->d_namlen; 1251 if (enddir + namlen + 1 > expdir_end) 1252 continue; 1253 memcpy(enddir, dp->d_name, namlen + 1); 1254 if (atend) 1255 addfname(expdir); 1256 else { 1257 if (dp->d_type != DT_UNKNOWN && 1258 dp->d_type != DT_DIR && 1259 dp->d_type != DT_LNK) 1260 continue; 1261 if (enddir + namlen + 2 > expdir_end) 1262 continue; 1263 enddir[namlen] = '/'; 1264 enddir[namlen + 1] = '\0'; 1265 expmeta(enddir + namlen + 1, endname); 1266 } 1267 } 1268 } 1269 closedir(dirp); 1270 if (! atend) 1271 endname[-esc - 1] = esc ? CTLESC : '/'; 1272} 1273 1274 1275/* 1276 * Add a file name to the list. 1277 */ 1278 1279static void 1280addfname(char *name) 1281{ 1282 char *p; 1283 struct strlist *sp; 1284 size_t len; 1285 1286 len = strlen(name); 1287 p = stalloc(len + 1); 1288 memcpy(p, name, len + 1); 1289 sp = (struct strlist *)stalloc(sizeof *sp); 1290 sp->text = p; 1291 *exparg.lastp = sp; 1292 exparg.lastp = &sp->next; 1293} 1294 1295 1296/* 1297 * Sort the results of file name expansion. It calculates the number of 1298 * strings to sort and then calls msort (short for merge sort) to do the 1299 * work. 1300 */ 1301 1302static struct strlist * 1303expsort(struct strlist *str) 1304{ 1305 int len; 1306 struct strlist *sp; 1307 1308 len = 0; 1309 for (sp = str ; sp ; sp = sp->next) 1310 len++; 1311 return msort(str, len); 1312} 1313 1314 1315static struct strlist * 1316msort(struct strlist *list, int len) 1317{ 1318 struct strlist *p, *q = NULL; 1319 struct strlist **lpp; 1320 int half; 1321 int n; 1322 1323 if (len <= 1) 1324 return list; 1325 half = len >> 1; 1326 p = list; 1327 for (n = half ; --n >= 0 ; ) { 1328 q = p; 1329 p = p->next; 1330 } 1331 q->next = NULL; /* terminate first half of list */ 1332 q = msort(list, half); /* sort first half of list */ 1333 p = msort(p, len - half); /* sort second half */ 1334 lpp = &list; 1335 for (;;) { 1336 if (strcmp(p->text, q->text) < 0) { 1337 *lpp = p; 1338 lpp = &p->next; 1339 if ((p = *lpp) == NULL) { 1340 *lpp = q; 1341 break; 1342 } 1343 } else { 1344 *lpp = q; 1345 lpp = &q->next; 1346 if ((q = *lpp) == NULL) { 1347 *lpp = p; 1348 break; 1349 } 1350 } 1351 } 1352 return list; 1353} 1354 1355 1356 1357static wchar_t 1358get_wc(const char **p) 1359{ 1360 wchar_t c; 1361 int chrlen; 1362 1363 chrlen = mbtowc(&c, *p, 4); 1364 if (chrlen == 0) 1365 return 0; 1366 else if (chrlen == -1) 1367 c = 0; 1368 else 1369 *p += chrlen; 1370 return c; 1371} 1372 1373 1374/* 1375 * See if a character matches a character class, starting at the first colon 1376 * of "[:class:]". 1377 * If a valid character class is recognized, a pointer to the next character 1378 * after the final closing bracket is stored into *end, otherwise a null 1379 * pointer is stored into *end. 1380 */ 1381static int 1382match_charclass(const char *p, wchar_t chr, const char **end) 1383{ 1384 char name[20]; 1385 const char *nameend; 1386 wctype_t cclass; 1387 1388 *end = NULL; 1389 p++; 1390 nameend = strstr(p, ":]"); 1391 if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) || 1392 nameend == p) 1393 return 0; 1394 memcpy(name, p, nameend - p); 1395 name[nameend - p] = '\0'; 1396 *end = nameend + 2; 1397 cclass = wctype(name); 1398 /* An unknown class matches nothing but is valid nevertheless. */ 1399 if (cclass == 0) 1400 return 0; 1401 return iswctype(chr, cclass); 1402} 1403 1404 1405/* 1406 * Returns true if the pattern matches the string. 1407 */ 1408 1409static int 1410patmatch(const char *pattern, const char *string, int squoted) 1411{ 1412 const char *p, *q, *end; 1413 const char *bt_p, *bt_q; 1414 char c; 1415 wchar_t wc, wc2; 1416 1417 p = pattern; 1418 q = string; 1419 bt_p = NULL; 1420 bt_q = NULL; 1421 for (;;) { 1422 switch (c = *p++) { 1423 case '\0': 1424 if (*q != '\0') 1425 goto backtrack; 1426 return 1; 1427 case CTLESC: 1428 if (squoted && *q == CTLESC) 1429 q++; 1430 if (*q++ != *p++) 1431 goto backtrack; 1432 break; 1433 case CTLQUOTEMARK: 1434 continue; 1435 case '?': 1436 if (squoted && *q == CTLESC) 1437 q++; 1438 if (*q == '\0') 1439 return 0; 1440 if (localeisutf8) { 1441 wc = get_wc(&q); 1442 /* 1443 * A '?' does not match invalid UTF-8 but a 1444 * '*' does, so backtrack. 1445 */ 1446 if (wc == 0) 1447 goto backtrack; 1448 } else 1449 wc = (unsigned char)*q++; 1450 break; 1451 case '*': 1452 c = *p; 1453 while (c == CTLQUOTEMARK || c == '*') 1454 c = *++p; 1455 /* 1456 * If the pattern ends here, we know the string 1457 * matches without needing to look at the rest of it. 1458 */ 1459 if (c == '\0') 1460 return 1; 1461 /* 1462 * First try the shortest match for the '*' that 1463 * could work. We can forget any earlier '*' since 1464 * there is no way having it match more characters 1465 * can help us, given that we are already here. 1466 */ 1467 bt_p = p; 1468 bt_q = q; 1469 break; 1470 case '[': { 1471 const char *savep, *saveq; 1472 int invert, found; 1473 wchar_t chr; 1474 1475 savep = p, saveq = q; 1476 invert = 0; 1477 if (*p == '!' || *p == '^') { 1478 invert++; 1479 p++; 1480 } 1481 found = 0; 1482 if (squoted && *q == CTLESC) 1483 q++; 1484 if (*q == '\0') 1485 return 0; 1486 if (localeisutf8) { 1487 chr = get_wc(&q); 1488 if (chr == 0) 1489 goto backtrack; 1490 } else 1491 chr = (unsigned char)*q++; 1492 c = *p++; 1493 do { 1494 if (c == '\0') { 1495 p = savep, q = saveq; 1496 c = '['; 1497 goto dft; 1498 } 1499 if (c == CTLQUOTEMARK) 1500 continue; 1501 if (c == '[' && *p == ':') { 1502 found |= match_charclass(p, chr, &end); 1503 if (end != NULL) 1504 p = end; 1505 } 1506 if (c == CTLESC) 1507 c = *p++; 1508 if (localeisutf8 && c & 0x80) { 1509 p--; 1510 wc = get_wc(&p); 1511 if (wc == 0) /* bad utf-8 */ 1512 return 0; 1513 } else 1514 wc = (unsigned char)c; 1515 if (*p == '-' && p[1] != ']') { 1516 p++; 1517 while (*p == CTLQUOTEMARK) 1518 p++; 1519 if (*p == CTLESC) 1520 p++; 1521 if (localeisutf8) { 1522 wc2 = get_wc(&p); 1523 if (wc2 == 0) /* bad utf-8 */ 1524 return 0; 1525 } else 1526 wc2 = (unsigned char)*p++; 1527 if ( collate_range_cmp(chr, wc) >= 0 1528 && collate_range_cmp(chr, wc2) <= 0 1529 ) 1530 found = 1; 1531 } else { 1532 if (chr == wc) 1533 found = 1; 1534 } 1535 } while ((c = *p++) != ']'); 1536 if (found == invert) 1537 goto backtrack; 1538 break; 1539 } 1540dft: default: 1541 if (squoted && *q == CTLESC) 1542 q++; 1543 if (*q == '\0') 1544 return 0; 1545 if (*q++ == c) 1546 break; 1547backtrack: 1548 /* 1549 * If we have a mismatch (other than hitting the end 1550 * of the string), go back to the last '*' seen and 1551 * have it match one additional character. 1552 */ 1553 if (bt_p == NULL) 1554 return 0; 1555 if (squoted && *bt_q == CTLESC) 1556 bt_q++; 1557 if (*bt_q == '\0') 1558 return 0; 1559 bt_q++; 1560 p = bt_p; 1561 q = bt_q; 1562 break; 1563 } 1564 } 1565} 1566 1567 1568 1569/* 1570 * Remove any CTLESC and CTLQUOTEMARK characters from a string. 1571 */ 1572 1573void 1574rmescapes(char *str) 1575{ 1576 char *p, *q; 1577 1578 p = str; 1579 while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { 1580 if (*p++ == '\0') 1581 return; 1582 } 1583 q = p; 1584 while (*p) { 1585 if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { 1586 p++; 1587 continue; 1588 } 1589 if (*p == CTLESC) 1590 p++; 1591 *q++ = *p++; 1592 } 1593 *q = '\0'; 1594} 1595 1596 1597 1598/* 1599 * See if a pattern matches in a case statement. 1600 */ 1601 1602int 1603casematch(union node *pattern, const char *val) 1604{ 1605 struct stackmark smark; 1606 int result; 1607 char *p; 1608 1609 setstackmark(&smark); 1610 argbackq = pattern->narg.backquote; 1611 STARTSTACKSTR(expdest); 1612 ifslastp = NULL; 1613 argstr(pattern->narg.text, EXP_TILDE | EXP_CASE); 1614 STPUTC('\0', expdest); 1615 p = grabstackstr(expdest); 1616 result = patmatch(p, val, 0); 1617 popstackmark(&smark); 1618 return result; 1619} 1620 1621/* 1622 * Our own itoa(). 1623 */ 1624 1625static char * 1626cvtnum(int num, char *buf) 1627{ 1628 char temp[32]; 1629 int neg = num < 0; 1630 char *p = temp + 31; 1631 1632 temp[31] = '\0'; 1633 1634 do { 1635 *--p = num % 10 + '0'; 1636 } while ((num /= 10) != 0); 1637 1638 if (neg) 1639 *--p = '-'; 1640 1641 STPUTS(p, buf); 1642 return buf; 1643} 1644 1645/* 1646 * Do most of the work for wordexp(3). 1647 */ 1648 1649int 1650wordexpcmd(int argc, char **argv) 1651{ 1652 size_t len; 1653 int i; 1654 1655 out1fmt("%08x", argc - 1); 1656 for (i = 1, len = 0; i < argc; i++) 1657 len += strlen(argv[i]); 1658 out1fmt("%08x", (int)len); 1659 for (i = 1; i < argc; i++) 1660 outbin(argv[i], strlen(argv[i]) + 1, out1); 1661 return (0); 1662} 1663