1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * John B. Roll Jr.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
35 */
36
37#include <sys/types.h>
38#include <sys/wait.h>
39#include <sys/time.h>
40#include <sys/limits.h>
41#include <sys/resource.h>
42#include <err.h>
43#include <errno.h>
44#include <fcntl.h>
45#include <getopt.h>
46#include <langinfo.h>
47#include <locale.h>
48#include <paths.h>
49#include <regex.h>
50#include <stdbool.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55
56#include "pathnames.h"
57
58static void	parse_input(int, char *[]);
59static void	prerun(int, char *[]);
60static int	prompt(void);
61static void	run(char **);
62static void	usage(void);
63bool		strnsubst(char **, const char *, const char *, size_t);
64static pid_t	xwait(int block, int *status);
65static void	xexit(const char *, const int);
66static void	waitchildren(const char *, int);
67static void	pids_init(void);
68static int	pids_empty(void);
69static int	pids_full(void);
70static void	pids_add(pid_t pid);
71static int	pids_remove(pid_t pid);
72static int	findslot(pid_t pid);
73static int	findfreeslot(void);
74static void	clearslot(int slot);
75
76static char echo[] = _PATH_ECHO;
77static char **av, **bxp, **ep, **endxp, **xp;
78static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
79static const char *eofstr;
80static long eoflen;
81static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
82static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
83static int curprocs, maxprocs;
84static pid_t *childpids;
85
86static volatile int childerr;
87
88extern char **environ;
89
90static const char *optstr = "+0E:I:J:L:n:oP:pR:S:s:rtx";
91
92static const struct option long_options[] =
93{
94	{"exit",		no_argument,		NULL,	'x'},
95	{"interactive",		no_argument,		NULL,	'p'},
96	{"max-args",		required_argument,	NULL,	'n'},
97	{"max-chars",		required_argument,	NULL,	's'},
98	{"max-procs",		required_argument,	NULL,	'P'},
99	{"no-run-if-empty",	no_argument,		NULL,	'r'},
100	{"null",		no_argument,		NULL,	'0'},
101	{"verbose",		no_argument,		NULL,	't'},
102
103	{NULL,			no_argument,		NULL,	0},
104};
105
106int
107main(int argc, char *argv[])
108{
109	long arg_max;
110	int ch, Jflag, nargs, nflag, nline;
111	size_t linelen;
112	struct rlimit rl;
113	const char *errstr;
114
115	inpline = replstr = NULL;
116	ep = environ;
117	eofstr = "";
118	eoflen = 0;
119	Jflag = nflag = 0;
120
121	(void)setlocale(LC_ALL, "");
122
123	/*
124	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
125	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
126	 * that the smallest argument is 2 bytes in length, this means that
127	 * the number of arguments is limited to:
128	 *
129	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
130	 *
131	 * We arbitrarily limit the number of arguments to 5000.  This is
132	 * allowed by POSIX.2 as long as the resulting minimum exec line is
133	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
134	 * probably not worthwhile.
135	 */
136	nargs = 5000;
137	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
138		errx(1, "sysconf(_SC_ARG_MAX) failed");
139	nline = arg_max - 4 * 1024;
140	while (*ep != NULL) {
141		/* 1 byte for each '\0' */
142		nline -= strlen(*ep++) + 1 + sizeof(*ep);
143	}
144	maxprocs = 1;
145	while ((ch = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)
146		switch (ch) {
147		case 'E':
148			eofstr = optarg;
149			eoflen = strlen(eofstr);
150			break;
151		case 'I':
152			Jflag = 0;
153			Iflag = 1;
154			Lflag = 1;
155			replstr = optarg;
156			break;
157		case 'J':
158			Iflag = 0;
159			Jflag = 1;
160			replstr = optarg;
161			break;
162		case 'L':
163			Lflag = (int)strtonum(optarg, 1, INT_MAX, &errstr);
164			if (errstr)
165				errx(1, "-%c %s: %s", ch, optarg, errstr);
166			break;
167		case 'n':
168			nflag = 1;
169			nargs = (int)strtonum(optarg, 1, INT_MAX, &errstr);
170			if (errstr)
171				errx(1, "-%c %s: %s", ch, optarg, errstr);
172			break;
173		case 'o':
174			oflag = 1;
175			break;
176		case 'P':
177			maxprocs = (int)strtonum(optarg, 0, INT_MAX, &errstr);
178			if (errstr)
179				errx(1, "-%c %s: %s", ch, optarg, errstr);
180			if (getrlimit(RLIMIT_NPROC, &rl) != 0)
181				errx(1, "getrlimit failed");
182			if (maxprocs == 0 || maxprocs > rl.rlim_cur)
183				maxprocs = rl.rlim_cur;
184			break;
185		case 'p':
186			pflag = 1;
187			break;
188		case 'R':
189			Rflag = (int)strtonum(optarg, INT_MIN, INT_MAX, &errstr);
190			if (errstr)
191				errx(1, "-%c %s: %s", ch, optarg, errstr);
192			if (!Rflag)
193				errx(1, "-%c %s: %s", ch, optarg, "must be non-zero");
194			break;
195		case 'r':
196			/* GNU compatibility */
197			break;
198		case 'S':
199			Sflag = (int)strtonum(optarg, 0, INT_MAX, &errstr);
200			if (errstr)
201				errx(1, "-%c %s: %s", ch, optarg, errstr);
202			break;
203		case 's':
204			nline = (int)strtonum(optarg, 0, INT_MAX, &errstr);
205			if (errstr)
206				errx(1, "-%c %s: %s", ch, optarg, errstr);
207			break;
208		case 't':
209			tflag = 1;
210			break;
211		case 'x':
212			xflag = 1;
213			break;
214		case '0':
215			zflag = 1;
216			break;
217		case '?':
218		default:
219			usage();
220	}
221	argc -= optind;
222	argv += optind;
223
224	if (!Iflag && Rflag)
225		usage();
226	if (!Iflag && Sflag)
227		usage();
228	if (Iflag && !Rflag)
229		Rflag = 5;
230	if (Iflag && !Sflag)
231		Sflag = 255;
232	if (xflag && !nflag)
233		usage();
234	if (Iflag || Lflag)
235		xflag = 1;
236	if (replstr != NULL && *replstr == '\0')
237		errx(1, "replstr may not be empty");
238
239	pids_init();
240
241	/*
242	 * Allocate pointers for the utility name, the utility arguments,
243	 * the maximum arguments to be read from stdin and the trailing
244	 * NULL.
245	 */
246	linelen = 1 + argc + (size_t)nargs + 1;
247	if ((av = bxp = malloc(linelen * sizeof(char *))) == NULL)
248		errx(1, "malloc failed");
249
250	/*
251	 * Use the user's name for the utility as argv[0], just like the
252	 * shell.  Echo is the default.  Set up pointers for the user's
253	 * arguments.
254	 */
255	if (*argv == NULL)
256		cnt = strlen(*bxp++ = echo);
257	else {
258		do {
259			if (Jflag && strcmp(*argv, replstr) == 0) {
260				char **avj;
261				jfound = 1;
262				argv++;
263				for (avj = argv; *avj; avj++)
264					cnt += strlen(*avj) + 1;
265				break;
266			}
267			cnt += strlen(*bxp++ = *argv) + 1;
268		} while (*++argv != NULL);
269	}
270
271	/*
272	 * Set up begin/end/traversing pointers into the array.  The -n
273	 * count doesn't include the trailing NULL pointer, so the malloc
274	 * added in an extra slot.
275	 */
276	endxp = (xp = bxp) + nargs;
277
278	/*
279	 * Allocate buffer space for the arguments read from stdin and the
280	 * trailing NULL.  Buffer space is defined as the default or specified
281	 * space, minus the length of the utility name and arguments.  Set up
282	 * begin/end/traversing pointers into the array.  The -s count does
283	 * include the trailing NULL, so the malloc didn't add in an extra
284	 * slot.
285	 */
286	nline -= cnt;
287	if (nline <= 0)
288		errx(1, "insufficient space for command");
289
290	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
291		errx(1, "malloc failed");
292	ebp = (argp = p = bbp) + nline - 1;
293	for (;;)
294		parse_input(argc, argv);
295}
296
297static void
298parse_input(int argc, char *argv[])
299{
300	int ch, foundeof;
301	char **avj;
302
303	foundeof = 0;
304
305	switch (ch = getchar()) {
306	case EOF:
307		/* No arguments since last exec. */
308		if (p == bbp) {
309			waitchildren(*av, 1);
310			exit(rval);
311		}
312		goto arg1;
313	case ' ':
314	case '\t':
315		/* Quotes escape tabs and spaces. */
316		if (insingle || indouble || zflag)
317			goto addch;
318		goto arg2;
319	case '\0':
320		if (zflag) {
321			/*
322			 * Increment 'count', so that nulls will be treated
323			 * as end-of-line, as well as end-of-argument.  This
324			 * is needed so -0 works properly with -I and -L.
325			 */
326			count++;
327			goto arg2;
328		}
329		goto addch;
330	case '\n':
331		if (zflag)
332			goto addch;
333		count++;	    /* Indicate end-of-line (used by -L) */
334
335		/* Quotes do not escape newlines. */
336arg1:		if (insingle || indouble) {
337			warnx("unterminated quote");
338			xexit(*av, 1);
339		}
340arg2:
341		foundeof = eoflen != 0 && p - argp == eoflen &&
342		    strncmp(argp, eofstr, eoflen) == 0;
343
344		/* Do not make empty args unless they are quoted */
345		if ((argp != p || wasquoted) && !foundeof) {
346			*p++ = '\0';
347			*xp++ = argp;
348			if (Iflag) {
349				size_t curlen;
350
351				if (inpline == NULL)
352					curlen = 0;
353				else {
354					/*
355					 * If this string is not zero
356					 * length, append a space for
357					 * separation before the next
358					 * argument.
359					 */
360					if ((curlen = strlen(inpline)))
361						strcat(inpline, " ");
362				}
363				curlen++;
364				/*
365				 * Allocate enough to hold what we will
366				 * be holding in a second, and to append
367				 * a space next time through, if we have
368				 * to.
369				 */
370				inpline = realloc(inpline, curlen + 2 +
371				    strlen(argp));
372				if (inpline == NULL) {
373					warnx("realloc failed");
374					xexit(*av, 1);
375				}
376				if (curlen == 1)
377					strcpy(inpline, argp);
378				else
379					strcat(inpline, argp);
380			}
381		}
382
383		/*
384		 * If max'd out on args or buffer, or reached EOF,
385		 * run the command.  If xflag and max'd out on buffer
386		 * but not on args, object.  Having reached the limit
387		 * of input lines, as specified by -L is the same as
388		 * maxing out on arguments.
389		 */
390		if (xp == endxp || p > ebp || ch == EOF ||
391		    (Lflag <= count && xflag) || foundeof) {
392			if (xflag && xp != endxp && p > ebp) {
393				warnx("insufficient space for arguments");
394				xexit(*av, 1);
395			}
396			if (jfound) {
397				for (avj = argv; *avj; avj++)
398					*xp++ = *avj;
399			}
400			prerun(argc, av);
401			if (ch == EOF || foundeof) {
402				waitchildren(*av, 1);
403				exit(rval);
404			}
405			p = bbp;
406			xp = bxp;
407			count = 0;
408		}
409		argp = p;
410		wasquoted = 0;
411		break;
412	case '\'':
413		if (indouble || zflag)
414			goto addch;
415		insingle = !insingle;
416		wasquoted = 1;
417		break;
418	case '"':
419		if (insingle || zflag)
420			goto addch;
421		indouble = !indouble;
422		wasquoted = 1;
423		break;
424	case '\\':
425		if (zflag)
426			goto addch;
427		/* Backslash escapes anything, is escaped by quotes. */
428		if (!insingle && !indouble && (ch = getchar()) == EOF) {
429			warnx("backslash at EOF");
430			xexit(*av, 1);
431		}
432		/* FALLTHROUGH */
433	default:
434addch:		if (p < ebp) {
435			*p++ = ch;
436			break;
437		}
438
439		/* If only one argument, not enough buffer space. */
440		if (bxp == xp) {
441			warnx("insufficient space for argument");
442			xexit(*av, 1);
443		}
444		/* Didn't hit argument limit, so if xflag object. */
445		if (xflag) {
446			warnx("insufficient space for arguments");
447			xexit(*av, 1);
448		}
449
450		if (jfound) {
451			for (avj = argv; *avj; avj++)
452				*xp++ = *avj;
453		}
454		prerun(argc, av);
455		xp = bxp;
456		cnt = ebp - argp;
457		memcpy(bbp, argp, (size_t)cnt);
458		p = (argp = bbp) + cnt;
459		*p++ = ch;
460		break;
461	}
462}
463
464/*
465 * Do things necessary before run()'ing, such as -I substitution,
466 * and then call run().
467 */
468static void
469prerun(int argc, char *argv[])
470{
471	char **tmp, **tmp2, **avj;
472	int repls;
473
474	repls = Rflag;
475
476	if (argc == 0 || repls == 0) {
477		*xp = NULL;
478		run(argv);
479		return;
480	}
481
482	avj = argv;
483
484	/*
485	 * Allocate memory to hold the argument list, and
486	 * a NULL at the tail.
487	 */
488	tmp = malloc((argc + 1) * sizeof(char *));
489	if (tmp == NULL) {
490		warnx("malloc failed");
491		xexit(*argv, 1);
492	}
493	tmp2 = tmp;
494
495	/*
496	 * Save the first argument and iterate over it, we
497	 * cannot do strnsubst() to it.
498	 */
499	if ((*tmp++ = strdup(*avj++)) == NULL) {
500		warnx("strdup failed");
501		xexit(*argv, 1);
502	}
503
504	/*
505	 * For each argument to utility, if we have not used up
506	 * the number of replacements we are allowed to do, and
507	 * if the argument contains at least one occurrence of
508	 * replstr, call strnsubst(), else just save the string.
509	 * Iterations over elements of avj and tmp are done
510	 * where appropriate.
511	 */
512	while (--argc) {
513		*tmp = *avj++;
514		if (repls && strstr(*tmp, replstr) != NULL) {
515			if (strnsubst(tmp++, replstr, inpline, (size_t)Sflag)) {
516				warnx("command line cannot be assembled, too long");
517				xexit(*argv, 1);
518			}
519			if (repls > 0)
520				repls--;
521		} else {
522			if ((*tmp = strdup(*tmp)) == NULL) {
523				warnx("strdup failed");
524				xexit(*argv, 1);
525			}
526			tmp++;
527		}
528	}
529
530	/*
531	 * Run it.
532	 */
533	*tmp = NULL;
534	run(tmp2);
535
536	/*
537	 * Walk from the tail to the head, free along the way.
538	 */
539	for (; tmp2 != tmp; tmp--)
540		free(*tmp);
541	/*
542	 * Now free the list itself.
543	 */
544	free(tmp2);
545
546	/*
547	 * Free the input line buffer, if we have one.
548	 */
549	if (inpline != NULL) {
550		free(inpline);
551		inpline = NULL;
552	}
553}
554
555static void
556run(char **argv)
557{
558	pid_t pid;
559	int fd;
560	char **avec;
561
562	/*
563	 * If the user wants to be notified of each command before it is
564	 * executed, notify them.  If they want the notification to be
565	 * followed by a prompt, then prompt them.
566	 */
567	if (tflag || pflag) {
568		(void)fprintf(stderr, "%s", *argv);
569		for (avec = argv + 1; *avec != NULL; ++avec)
570			(void)fprintf(stderr, " %s", *avec);
571		/*
572		 * If the user has asked to be prompted, do so.
573		 */
574		if (pflag)
575			/*
576			 * If they asked not to exec, return without execution
577			 * but if they asked to, go to the execution.  If we
578			 * could not open their tty, break the switch and drop
579			 * back to -t behaviour.
580			 */
581			switch (prompt()) {
582			case 0:
583				return;
584			case 1:
585				goto exec;
586			case 2:
587				break;
588			}
589		(void)fprintf(stderr, "\n");
590		(void)fflush(stderr);
591	}
592exec:
593	childerr = 0;
594	switch (pid = vfork()) {
595	case -1:
596		warn("vfork");
597		xexit(*argv, 1);
598	case 0:
599		if (oflag) {
600			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
601				err(1, "can't open /dev/tty");
602		} else {
603			fd = open(_PATH_DEVNULL, O_RDONLY);
604		}
605		if (fd > STDIN_FILENO) {
606			if (dup2(fd, STDIN_FILENO) != 0)
607				err(1, "can't dup2 to stdin");
608			close(fd);
609		}
610		execvp(argv[0], argv);
611		childerr = errno;
612		_exit(1);
613	}
614	pids_add(pid);
615	waitchildren(*argv, 0);
616}
617
618/*
619 * Wait for a tracked child to exit and return its pid and exit status.
620 *
621 * Ignores (discards) all untracked child processes.
622 * Returns -1 and sets errno to ECHILD if no tracked children exist.
623 * If block is set, waits indefinitely for a child process to exit.
624 * If block is not set and no children have exited, returns 0 immediately.
625 */
626static pid_t
627xwait(int block, int *status) {
628	pid_t pid;
629
630	if (pids_empty()) {
631		errno = ECHILD;
632		return (-1);
633	}
634
635	while ((pid = waitpid(-1, status, block ? 0 : WNOHANG)) > 0)
636		if (pids_remove(pid))
637			break;
638
639	return (pid);
640}
641
642static void
643xexit(const char *name, const int exit_code) {
644	waitchildren(name, 1);
645	exit(exit_code);
646}
647
648static void
649waitchildren(const char *name, int waitall)
650{
651	pid_t pid;
652	int status;
653	int cause_exit = 0;
654
655	while ((pid = xwait(waitall || pids_full(), &status)) > 0) {
656		/*
657		 * If we couldn't invoke the utility or if utility exited
658		 * because of a signal or with a value of 255, warn (per
659		 * POSIX), and then wait until all other children have
660		 * exited before exiting 1-125. POSIX requires us to stop
661		 * reading if child exits because of a signal or with 255,
662		 * but it does not require us to exit immediately; waiting
663		 * is preferable to orphaning.
664		 */
665		if (childerr != 0 && cause_exit == 0) {
666			errno = childerr;
667			waitall = 1;
668			cause_exit = errno == ENOENT ? 127 : 126;
669			warn("%s", name);
670		} else if (WIFSIGNALED(status)) {
671			waitall = cause_exit = 1;
672			warnx("%s: terminated with signal %d; aborting",
673			    name, WTERMSIG(status));
674		} else if (WEXITSTATUS(status) == 255) {
675			waitall = cause_exit = 1;
676			warnx("%s: exited with status 255; aborting", name);
677		} else if (WEXITSTATUS(status))
678 			rval = 1;
679	}
680
681 	if (cause_exit)
682		exit(cause_exit);
683	if (pid == -1 && errno != ECHILD)
684		err(1, "waitpid");
685}
686
687#define	NOPID	(0)
688
689static void
690pids_init(void)
691{
692	int i;
693
694	if ((childpids = malloc(maxprocs * sizeof(*childpids))) == NULL)
695		errx(1, "malloc failed");
696
697	for (i = 0; i < maxprocs; i++)
698		clearslot(i);
699}
700
701static int
702pids_empty(void)
703{
704
705	return (curprocs == 0);
706}
707
708static int
709pids_full(void)
710{
711
712	return (curprocs >= maxprocs);
713}
714
715static void
716pids_add(pid_t pid)
717{
718	int slot;
719
720	slot = findfreeslot();
721	childpids[slot] = pid;
722	curprocs++;
723}
724
725static int
726pids_remove(pid_t pid)
727{
728	int slot;
729
730	if ((slot = findslot(pid)) < 0)
731		return (0);
732
733	clearslot(slot);
734	curprocs--;
735	return (1);
736}
737
738static int
739findfreeslot(void)
740{
741	int slot;
742
743	if ((slot = findslot(NOPID)) < 0)
744		errx(1, "internal error: no free pid slot");
745	return (slot);
746}
747
748static int
749findslot(pid_t pid)
750{
751	int slot;
752
753	for (slot = 0; slot < maxprocs; slot++)
754		if (childpids[slot] == pid)
755			return (slot);
756	return (-1);
757}
758
759static void
760clearslot(int slot)
761{
762
763	childpids[slot] = NOPID;
764}
765
766/*
767 * Prompt the user about running a command.
768 */
769static int
770prompt(void)
771{
772	regex_t cre;
773	size_t rsize = 0;
774	int match;
775	char *response = NULL;
776	FILE *ttyfp;
777
778	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
779		return (2);	/* Indicate that the TTY failed to open. */
780	(void)fprintf(stderr, "?...");
781	(void)fflush(stderr);
782	if (getline(&response, &rsize, ttyfp) < 0 ||
783	    regcomp(&cre, nl_langinfo(YESEXPR), REG_EXTENDED) != 0) {
784		(void)fclose(ttyfp);
785		return (0);
786	}
787	match = regexec(&cre, response, 0, NULL, 0);
788	free(response);
789	(void)fclose(ttyfp);
790	regfree(&cre);
791	return (match == 0);
792}
793
794static void
795usage(void)
796{
797
798	fprintf(stderr,
799"usage: xargs [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
800"             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
801"             [-s size] [utility [argument ...]]\n");
802	exit(1);
803}
804