1147493Sgad/*- 2147493Sgad * Copyright (c) 2005 - Garance Alistair Drosehn <gad@FreeBSD.org>. 3147493Sgad * All rights reserved. 4147493Sgad * 5147493Sgad * Redistribution and use in source and binary forms, with or without 6147493Sgad * modification, are permitted provided that the following conditions 7147493Sgad * are met: 8147493Sgad * 1. Redistributions of source code must retain the above copyright 9147493Sgad * notice, this list of conditions and the following disclaimer. 10147493Sgad * 2. Redistributions in binary form must reproduce the above copyright 11147493Sgad * notice, this list of conditions and the following disclaimer in the 12147493Sgad * documentation and/or other materials provided with the distribution. 13147493Sgad * 14147493Sgad * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15147493Sgad * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16147493Sgad * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17147493Sgad * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18147493Sgad * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19147493Sgad * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20147493Sgad * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21147493Sgad * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22147493Sgad * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23147493Sgad * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24147493Sgad * SUCH DAMAGE. 25147493Sgad * 26147493Sgad * The views and conclusions contained in the software and documentation 27147493Sgad * are those of the authors and should not be interpreted as representing 28147493Sgad * official policies, either expressed or implied, of the FreeBSD Project. 29147493Sgad */ 30147493Sgad 31147493Sgad#include <sys/cdefs.h> 32147493Sgad__FBSDID("$FreeBSD$"); 33147493Sgad 34147493Sgad#include <sys/stat.h> 35147493Sgad#include <sys/param.h> 36147493Sgad#include <err.h> 37147493Sgad#include <errno.h> 38147493Sgad#include <ctype.h> 39147493Sgad#include <stdio.h> 40147493Sgad#include <stdlib.h> 41147493Sgad#include <string.h> 42147493Sgad#include <unistd.h> 43147493Sgad 44147493Sgad#include "envopts.h" 45147493Sgad 46147521Sgadstatic const char * 47147971Sgad expand_vars(int in_thisarg, char **thisarg_p, char **dest_p, 48147971Sgad const char **src_p); 49147493Sgadstatic int is_there(char *candidate); 50147493Sgad 51147493Sgad/* 52147493Sgad * The is*() routines take a parameter of 'int', but expect values in the range 53147493Sgad * of unsigned char. Define some wrappers which take a value of type 'char', 54147493Sgad * whether signed or unsigned, and ensure the value ends up in the right range. 55147493Sgad */ 56147493Sgad#define isalnumch(Anychar) isalnum((u_char)(Anychar)) 57147493Sgad#define isalphach(Anychar) isalpha((u_char)(Anychar)) 58147493Sgad#define isspacech(Anychar) isspace((u_char)(Anychar)) 59147493Sgad 60147493Sgad/* 61147493Sgad * Routine to determine if a given fully-qualified filename is executable. 62147493Sgad * This is copied almost verbatim from FreeBSD's usr.bin/which/which.c. 63147493Sgad */ 64147493Sgadstatic int 65147493Sgadis_there(char *candidate) 66147493Sgad{ 67147493Sgad struct stat fin; 68147493Sgad 69147493Sgad /* XXX work around access(2) false positives for superuser */ 70147493Sgad if (access(candidate, X_OK) == 0 && 71147493Sgad stat(candidate, &fin) == 0 && 72147493Sgad S_ISREG(fin.st_mode) && 73147493Sgad (getuid() != 0 || 74147493Sgad (fin.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)) { 75147493Sgad if (env_verbosity > 1) 76147493Sgad fprintf(stderr, "#env matched:\t'%s'\n", candidate); 77147493Sgad return (1); 78147493Sgad } 79147493Sgad return (0); 80147493Sgad} 81147493Sgad 82147493Sgad/** 83147493Sgad * Routine to search through an alternate path-list, looking for a given 84147493Sgad * filename to execute. If the file is found, replace the original 85147493Sgad * unqualified name with a fully-qualified path. This allows `env' to 86147493Sgad * execute programs from a specific strict list of possible paths, without 87147493Sgad * changing the value of PATH seen by the program which will be executed. 88147493Sgad * E.G.: 89147493Sgad * #!/usr/bin/env -S-P/usr/local/bin:/usr/bin perl 90147493Sgad * will execute /usr/local/bin/perl or /usr/bin/perl (whichever is found 91147493Sgad * first), no matter what the current value of PATH is, and without 92147493Sgad * changing the value of PATH that the script will see when it runs. 93147493Sgad * 94147493Sgad * This is similar to the print_matches() routine in usr.bin/which/which.c. 95147493Sgad */ 96147493Sgadvoid 97147493Sgadsearch_paths(char *path, char **argv) 98147493Sgad{ 99147493Sgad char candidate[PATH_MAX]; 100147493Sgad const char *d; 101147493Sgad char *filename, *fqname; 102147493Sgad 103147493Sgad /* If the file has a `/' in it, then no search is done */ 104147493Sgad filename = *argv; 105147493Sgad if (strchr(filename, '/') != NULL) 106147493Sgad return; 107147493Sgad 108147493Sgad if (env_verbosity > 1) { 109147493Sgad fprintf(stderr, "#env Searching:\t'%s'\n", path); 110147493Sgad fprintf(stderr, "#env for file:\t'%s'\n", filename); 111147493Sgad } 112147493Sgad 113147493Sgad fqname = NULL; 114147493Sgad while ((d = strsep(&path, ":")) != NULL) { 115147493Sgad if (*d == '\0') 116147493Sgad d = "."; 117147493Sgad if (snprintf(candidate, sizeof(candidate), "%s/%s", d, 118147493Sgad filename) >= (int)sizeof(candidate)) 119147493Sgad continue; 120147493Sgad if (is_there(candidate)) { 121147493Sgad fqname = candidate; 122147493Sgad break; 123147493Sgad } 124147493Sgad } 125147493Sgad 126147493Sgad if (fqname == NULL) { 127147493Sgad errno = ENOENT; 128147493Sgad err(127, "%s", filename); 129147493Sgad } 130147493Sgad *argv = strdup(candidate); 131147493Sgad} 132147493Sgad 133147493Sgad/** 134147493Sgad * Routine to split a string into multiple parameters, while recognizing a 135147493Sgad * few special characters. It recognizes both single and double-quoted 136147493Sgad * strings. This processing is designed entirely for the benefit of the 137147493Sgad * parsing of "#!"-lines (aka "shebang" lines == the first line of an 138147493Sgad * executable script). Different operating systems parse that line in very 139147493Sgad * different ways, and this split-on-spaces processing is meant to provide 140147493Sgad * ways to specify arbitrary arguments on that line, no matter how the OS 141147493Sgad * parses it. 142147493Sgad * 143147493Sgad * Within a single-quoted string, the two characters "\'" are treated as 144147493Sgad * a literal "'" character to add to the string, and "\\" are treated as 145147493Sgad * a literal "\" character to add. Other than that, all characters are 146147493Sgad * copied until the processing gets to a terminating "'". 147147493Sgad * 148147493Sgad * Within a double-quoted string, many more "\"-style escape sequences 149147493Sgad * are recognized, mostly copied from what is recognized in the `printf' 150147493Sgad * command. Some OS's will not allow a literal blank character to be 151147493Sgad * included in the one argument that they recognize on a shebang-line, 152147493Sgad * so a few additional escape-sequences are defined to provide ways to 153147493Sgad * specify blanks. 154147493Sgad * 155147493Sgad * Within a double-quoted string "\_" is turned into a literal blank. 156147493Sgad * (Inside of a single-quoted string, the two characters are just copied) 157147493Sgad * Outside of a quoted string, "\_" is treated as both a blank, and the 158147493Sgad * end of the current argument. So with a shelbang-line of: 159147493Sgad * #!/usr/bin/env -SA=avalue\_perl 160147493Sgad * the -S value would be broken up into arguments "A=avalue" and "perl". 161147493Sgad */ 162147493Sgadvoid 163147493Sgadsplit_spaces(const char *str, int *origind, int *origc, char ***origv) 164147493Sgad{ 165147521Sgad static const char *nullarg = ""; 166147521Sgad const char *bq_src, *copystr, *src; 167147493Sgad char *dest, **newargv, *newstr, **nextarg, **oldarg; 168147493Sgad int addcount, bq_destlen, copychar, found_sep, in_arg, in_dq, in_sq; 169147493Sgad 170147493Sgad /* 171147493Sgad * Ignore leading space on the string, and then malloc enough room 172147493Sgad * to build a copy of it. The copy might end up shorter than the 173147493Sgad * original, due to quoted strings and '\'-processing. 174147493Sgad */ 175147493Sgad while (isspacech(*str)) 176147493Sgad str++; 177147493Sgad if (*str == '\0') 178147493Sgad return; 179147493Sgad newstr = malloc(strlen(str) + 1); 180147493Sgad 181147493Sgad /* 182147493Sgad * Allocate plenty of space for the new array of arg-pointers, 183147493Sgad * and start that array off with the first element of the old 184147493Sgad * array. 185147493Sgad */ 186147493Sgad newargv = malloc((*origc + (strlen(str) / 2) + 2) * sizeof(char *)); 187147493Sgad nextarg = newargv; 188147493Sgad *nextarg++ = **origv; 189147493Sgad 190147493Sgad /* Come up with the new args by splitting up the given string. */ 191147493Sgad addcount = 0; 192147493Sgad bq_destlen = in_arg = in_dq = in_sq = 0; 193147493Sgad bq_src = NULL; 194147493Sgad for (src = str, dest = newstr; *src != '\0'; src++) { 195147521Sgad /* 196147521Sgad * This switch will look at a character in *src, and decide 197147521Sgad * what should be copied to *dest. It only decides what 198147521Sgad * character(s) to copy, it should not modify *dest. In some 199147521Sgad * cases, it will look at multiple characters from *src. 200147521Sgad */ 201147493Sgad copychar = found_sep = 0; 202147521Sgad copystr = NULL; 203147493Sgad switch (*src) { 204147493Sgad case '"': 205147493Sgad if (in_sq) 206147493Sgad copychar = *src; 207147493Sgad else if (in_dq) 208147493Sgad in_dq = 0; 209147493Sgad else { 210147521Sgad /* 211147521Sgad * Referencing nullarg ensures that a new 212147521Sgad * argument is created, even if this quoted 213147521Sgad * string ends up with zero characters. 214147521Sgad */ 215147521Sgad copystr = nullarg; 216147493Sgad in_dq = 1; 217147493Sgad bq_destlen = dest - *(nextarg - 1); 218147493Sgad bq_src = src; 219147493Sgad } 220147493Sgad break; 221147493Sgad case '$': 222147493Sgad if (in_sq) 223147493Sgad copychar = *src; 224147493Sgad else { 225147971Sgad copystr = expand_vars(in_arg, (nextarg - 1), 226147971Sgad &dest, &src); 227147493Sgad } 228147493Sgad break; 229147493Sgad case '\'': 230147493Sgad if (in_dq) 231147493Sgad copychar = *src; 232147493Sgad else if (in_sq) 233147493Sgad in_sq = 0; 234147493Sgad else { 235147521Sgad /* 236147521Sgad * Referencing nullarg ensures that a new 237147521Sgad * argument is created, even if this quoted 238147521Sgad * string ends up with zero characters. 239147521Sgad */ 240147521Sgad copystr = nullarg; 241147493Sgad in_sq = 1; 242147493Sgad bq_destlen = dest - *(nextarg - 1); 243147493Sgad bq_src = src; 244147493Sgad } 245147493Sgad break; 246147493Sgad case '\\': 247147493Sgad if (in_sq) { 248147493Sgad /* 249147493Sgad * Inside single-quoted strings, only the 250147493Sgad * "\'" and "\\" are recognized as special 251147493Sgad * strings. 252147493Sgad */ 253147493Sgad copychar = *(src + 1); 254147493Sgad if (copychar == '\'' || copychar == '\\') 255147493Sgad src++; 256147493Sgad else 257147493Sgad copychar = *src; 258147493Sgad break; 259147493Sgad } 260147493Sgad src++; 261147493Sgad switch (*src) { 262147493Sgad case '"': 263147493Sgad case '#': 264147493Sgad case '$': 265147493Sgad case '\'': 266147493Sgad case '\\': 267147493Sgad copychar = *src; 268147493Sgad break; 269147493Sgad case '_': 270147493Sgad /* 271147493Sgad * Alternate way to get a blank, which allows 272147493Sgad * that blank be used to separate arguments 273147493Sgad * when it is not inside a quoted string. 274147493Sgad */ 275147493Sgad if (in_dq) 276147493Sgad copychar = ' '; 277147493Sgad else { 278147493Sgad found_sep = 1; 279147493Sgad src++; 280147493Sgad } 281147493Sgad break; 282147493Sgad case 'c': 283147493Sgad /* 284147493Sgad * Ignore remaining characters in the -S string. 285147493Sgad * This would not make sense if found in the 286147493Sgad * middle of a quoted string. 287147493Sgad */ 288147493Sgad if (in_dq) 289147493Sgad errx(1, "Sequence '\\%c' is not allowed" 290147493Sgad " in quoted strings", *src); 291147493Sgad goto str_done; 292147493Sgad case 'f': 293147493Sgad copychar = '\f'; 294147493Sgad break; 295147493Sgad case 'n': 296147493Sgad copychar = '\n'; 297147493Sgad break; 298147493Sgad case 'r': 299147493Sgad copychar = '\r'; 300147493Sgad break; 301147493Sgad case 't': 302147493Sgad copychar = '\t'; 303147493Sgad break; 304147493Sgad case 'v': 305147493Sgad copychar = '\v'; 306147493Sgad break; 307147493Sgad default: 308147493Sgad if (isspacech(*src)) 309147493Sgad copychar = *src; 310147493Sgad else 311147493Sgad errx(1, "Invalid sequence '\\%c' in -S", 312147493Sgad *src); 313147493Sgad } 314147493Sgad break; 315147493Sgad default: 316147493Sgad if ((in_dq || in_sq) && in_arg) 317147493Sgad copychar = *src; 318147971Sgad else if (isspacech(*src)) 319147493Sgad found_sep = 1; 320147493Sgad else { 321147493Sgad /* 322147493Sgad * If the first character of a new argument 323147493Sgad * is `#', then ignore the remaining chars. 324147493Sgad */ 325147493Sgad if (!in_arg && *src == '#') 326147493Sgad goto str_done; 327147493Sgad copychar = *src; 328147493Sgad } 329147493Sgad } 330147521Sgad /* 331147521Sgad * Now that the switch has determined what (if anything) 332147521Sgad * needs to be copied, copy whatever that is to *dest. 333147521Sgad */ 334147521Sgad if (copychar || copystr != NULL) { 335147493Sgad if (!in_arg) { 336147493Sgad /* This is the first byte of a new argument */ 337147493Sgad *nextarg++ = dest; 338147493Sgad addcount++; 339147493Sgad in_arg = 1; 340147493Sgad } 341147521Sgad if (copychar) 342147521Sgad *dest++ = (char)copychar; 343147521Sgad else if (copystr != NULL) 344147521Sgad while (*copystr != '\0') 345147521Sgad *dest++ = *copystr++; 346147493Sgad } else if (found_sep) { 347147493Sgad *dest++ = '\0'; 348147493Sgad while (isspacech(*src)) 349147493Sgad src++; 350147493Sgad --src; 351147493Sgad in_arg = 0; 352147493Sgad } 353147493Sgad } 354147493Sgadstr_done: 355147493Sgad *dest = '\0'; 356147493Sgad *nextarg = NULL; 357147493Sgad if (in_dq || in_sq) { 358147493Sgad errx(1, "No terminating quote for string: %.*s%s", 359147493Sgad bq_destlen, *(nextarg - 1), bq_src); 360147493Sgad } 361147493Sgad if (env_verbosity > 1) { 362147493Sgad fprintf(stderr, "#env split -S:\t'%s'\n", str); 363147493Sgad oldarg = newargv + 1; 364147493Sgad fprintf(stderr, "#env into:\t'%s'\n", *oldarg); 365147493Sgad for (oldarg++; *oldarg; oldarg++) 366147493Sgad fprintf(stderr, "#env &\t'%s'\n", *oldarg); 367147493Sgad } 368147493Sgad 369147493Sgad /* Copy the unprocessed arg-pointers from the original array */ 370147493Sgad for (oldarg = *origv + *origind; *oldarg; oldarg++) 371147493Sgad *nextarg++ = *oldarg; 372147493Sgad *nextarg = NULL; 373147493Sgad 374147493Sgad /* Update optind/argc/argv in the calling routine */ 375280027Sjilles *origc += addcount - *origind + 1; 376280027Sjilles *origv = newargv; 377147493Sgad *origind = 1; 378147493Sgad} 379147493Sgad 380147493Sgad/** 381147493Sgad * Routine to split expand any environment variables referenced in the string 382147493Sgad * that -S is processing. For now it only supports the form ${VARNAME}. It 383147493Sgad * explicitly does not support $VARNAME, and obviously can not handle special 384147493Sgad * shell-variables such as $?, $*, $1, etc. It is called with *src_p pointing 385147493Sgad * at the initial '$', and if successful it will update *src_p, *dest_p, and 386147493Sgad * possibly *thisarg_p in the calling routine. 387147493Sgad */ 388147521Sgadstatic const char * 389147971Sgadexpand_vars(int in_thisarg, char **thisarg_p, char **dest_p, const char **src_p) 390147493Sgad{ 391147493Sgad const char *vbegin, *vend, *vvalue; 392147521Sgad char *newstr, *vname; 393147493Sgad int bad_reference; 394147493Sgad size_t namelen, newlen; 395147493Sgad 396147493Sgad bad_reference = 1; 397147493Sgad vbegin = vend = (*src_p) + 1; 398147493Sgad if (*vbegin++ == '{') 399147493Sgad if (*vbegin == '_' || isalphach(*vbegin)) { 400147493Sgad vend = vbegin + 1; 401147493Sgad while (*vend == '_' || isalnumch(*vend)) 402147493Sgad vend++; 403147493Sgad if (*vend == '}') 404147493Sgad bad_reference = 0; 405147493Sgad } 406147493Sgad if (bad_reference) 407147493Sgad errx(1, "Only ${VARNAME} expansion is supported, error at: %s", 408147493Sgad *src_p); 409147493Sgad 410147493Sgad /* 411147493Sgad * We now know we have a valid environment variable name, so update 412147493Sgad * the caller's source-pointer to the last character in that reference, 413147493Sgad * and then pick up the matching value. If the variable is not found, 414147493Sgad * or if it has a null value, then our work here is done. 415147493Sgad */ 416147493Sgad *src_p = vend; 417147493Sgad namelen = vend - vbegin + 1; 418147493Sgad vname = malloc(namelen); 419147493Sgad strlcpy(vname, vbegin, namelen); 420147493Sgad vvalue = getenv(vname); 421147493Sgad if (vvalue == NULL || *vvalue == '\0') { 422147493Sgad if (env_verbosity > 2) 423147493Sgad fprintf(stderr, 424147493Sgad "#env replacing ${%s} with null string\n", 425147493Sgad vname); 426148142Sgad free(vname); 427147521Sgad return (NULL); 428147493Sgad } 429147493Sgad 430147493Sgad if (env_verbosity > 2) 431147493Sgad fprintf(stderr, "#env expanding ${%s} into '%s'\n", vname, 432147493Sgad vvalue); 433147493Sgad 434147493Sgad /* 435147493Sgad * There is some value to copy to the destination. If the value is 436147521Sgad * shorter than the ${VARNAME} reference that it replaces, then our 437147521Sgad * caller can just copy the value to the existing destination. 438147493Sgad */ 439148142Sgad if (strlen(vname) + 3 >= strlen(vvalue)) { 440148142Sgad free(vname); 441147521Sgad return (vvalue); 442148142Sgad } 443147493Sgad 444147493Sgad /* 445147493Sgad * The value is longer than the string it replaces, which means the 446147493Sgad * present destination area is too small to hold it. Create a new 447147971Sgad * destination area, and update the caller's 'dest' variable to match. 448147971Sgad * If the caller has already started copying some info for 'thisarg' 449147971Sgad * into the present destination, then the new destination area must 450147971Sgad * include a copy of that data, and the pointer to 'thisarg' must also 451147971Sgad * be updated. Note that it is still the caller which copies this 452147971Sgad * vvalue to the new *dest. 453147493Sgad */ 454147971Sgad newlen = strlen(vvalue) + strlen(*src_p) + 1; 455147971Sgad if (in_thisarg) { 456147971Sgad **dest_p = '\0'; /* Provide terminator for 'thisarg' */ 457147971Sgad newlen += strlen(*thisarg_p); 458147971Sgad newstr = malloc(newlen); 459147971Sgad strcpy(newstr, *thisarg_p); 460147971Sgad *thisarg_p = newstr; 461147971Sgad } else { 462147971Sgad newstr = malloc(newlen); 463147971Sgad *newstr = '\0'; 464147971Sgad } 465147493Sgad *dest_p = strchr(newstr, '\0'); 466148142Sgad free(vname); 467147521Sgad return (vvalue); 468147493Sgad} 469