1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright �� 2002, J��rg Wunsch
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
30 * since the original 4.3BSD version suffers legal problems that
31 * prevent it from being redistributed, and since the 4.4BSD version
32 * was pretty inferior in functionality.
33 */
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/sysctl.h>
38
39#include <dirent.h>
40#include <err.h>
41#include <errno.h>
42#include <locale.h>
43#include <regex.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sysexits.h>
48#include <unistd.h>
49
50#include "pathnames.h"
51
52#define	NO_BIN_FOUND	1
53#define	NO_MAN_FOUND	2
54#define	NO_SRC_FOUND	4
55
56typedef const char *ccharp;
57
58static int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
59static ccharp *bindirs, *mandirs, *sourcedirs;
60static char **query;
61
62static const char *sourcepath = PATH_SOURCES;
63
64static char	*colonify(ccharp *);
65static int	 contains(ccharp *, const char *);
66static void	 decolonify(char *, ccharp **, int *);
67static void	 defaults(void);
68static void	 scanopts(int, char **);
69static void	 usage(void);
70
71/*
72 * Throughout this program, a number of strings are dynamically
73 * allocated but never freed.  Their memory is written to when
74 * splitting the strings into string lists which will later be
75 * processed.  Since it's important that those string lists remain
76 * valid even after the functions allocating the memory returned,
77 * those functions cannot free them.  They could be freed only at end
78 * of main(), which is pretty pointless anyway.
79 *
80 * The overall amount of memory to be allocated for processing the
81 * strings is not expected to exceed a few kilobytes.  For that
82 * reason, allocation can usually always be assumed to succeed (within
83 * a virtual memory environment), thus we simply bail out using
84 * abort(3) in case of an allocation failure.
85 */
86
87static void
88usage(void)
89{
90	(void)fprintf(stderr,
91	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
92	exit(EX_USAGE);
93}
94
95/*
96 * Scan options passed to program.
97 *
98 * Note that the -B/-M/-S options expect a list of directory
99 * names that must be terminated with -f.
100 */
101static void
102scanopts(int argc, char **argv)
103{
104	int c, i;
105	ccharp **dirlist;
106
107	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
108		switch (c) {
109		case 'B':
110			dirlist = &bindirs;
111			goto dolist;
112
113		case 'M':
114			dirlist = &mandirs;
115			goto dolist;
116
117		case 'S':
118			dirlist = &sourcedirs;
119		  dolist:
120			i = 0;
121			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
122			(*dirlist)[i] = NULL;
123			while (optind < argc &&
124			       strcmp(argv[optind], "-f") != 0 &&
125			       strcmp(argv[optind], "-B") != 0 &&
126			       strcmp(argv[optind], "-M") != 0 &&
127			       strcmp(argv[optind], "-S") != 0) {
128				decolonify(argv[optind], dirlist, &i);
129				optind++;
130			}
131			break;
132
133		case 'a':
134			opt_a = 1;
135			break;
136
137		case 'b':
138			opt_b = 1;
139			break;
140
141		case 'f':
142			goto breakout;
143
144		case 'm':
145			opt_m = 1;
146			break;
147
148		case 'q':
149			opt_q = 1;
150			break;
151
152		case 's':
153			opt_s = 1;
154			break;
155
156		case 'u':
157			opt_u = 1;
158			break;
159
160		case 'x':
161			opt_x = 1;
162			break;
163
164		default:
165			usage();
166		}
167  breakout:
168	if (optind == argc)
169		usage();
170	query = argv + optind;
171}
172
173/*
174 * Find out whether string `s' is contained in list `cpp'.
175 */
176static int
177contains(ccharp *cpp, const char *s)
178{
179	ccharp cp;
180
181	if (cpp == NULL)
182		return (0);
183
184	while ((cp = *cpp) != NULL) {
185		if (strcmp(cp, s) == 0)
186			return (1);
187		cpp++;
188	}
189	return (0);
190}
191
192/*
193 * Split string `s' at colons, and pass it to the string list pointed
194 * to by `cppp' (which has `*ip' elements).  Note that the original
195 * string is modified by replacing the colon with a NUL byte.  The
196 * partial string is only added if it has a length greater than 0, and
197 * if it's not already contained in the string list.
198 */
199static void
200decolonify(char *s, ccharp **cppp, int *ip)
201{
202	char *cp;
203
204	while ((cp = strchr(s, ':')), *s != '\0') {
205		if (cp)
206			*cp = '\0';
207		if (strlen(s) && !contains(*cppp, s)) {
208			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
209			if (*cppp == NULL)
210				abort();
211			(*cppp)[*ip] = s;
212			(*cppp)[*ip + 1] = NULL;
213			(*ip)++;
214		}
215		if (cp)
216			s = cp + 1;
217		else
218			break;
219	}
220}
221
222/*
223 * Join string list `cpp' into a colon-separated string.
224 */
225static char *
226colonify(ccharp *cpp)
227{
228	size_t s;
229	char *cp;
230	int i;
231
232	if (cpp == NULL)
233		return (0);
234
235	for (s = 0, i = 0; cpp[i] != NULL; i++)
236		s += strlen(cpp[i]) + 1;
237	if ((cp = malloc(s + 1)) == NULL)
238		abort();
239	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
240		strcat(cp, cpp[i]);
241		strcat(cp, ":");
242	}
243	cp[s - 1] = '\0';		/* eliminate last colon */
244
245	return (cp);
246}
247
248/*
249 * Provide defaults for all options and directory lists.
250 */
251static void
252defaults(void)
253{
254	size_t s;
255	char *b, buf[BUFSIZ], *cp;
256	int nele;
257	FILE *p;
258	DIR *dir;
259	struct stat sb;
260	struct dirent *dirp;
261	const int oid[2] = {CTL_USER, USER_CS_PATH};
262
263	/* default to -bms if none has been specified */
264	if (!opt_b && !opt_m && !opt_s)
265		opt_b = opt_m = opt_s = 1;
266
267	/* -b defaults to default path + /usr/libexec +
268	 * user's path */
269	if (!bindirs) {
270		if (sysctl(oid, 2, NULL, &s, NULL, 0) == -1)
271			err(EX_OSERR, "sysctl(\"user.cs_path\")");
272		if ((b = malloc(s + 1)) == NULL)
273			abort();
274		if (sysctl(oid, 2, b, &s, NULL, 0) == -1)
275			err(EX_OSERR, "sysctl(\"user.cs_path\")");
276		nele = 0;
277		decolonify(b, &bindirs, &nele);
278		bindirs = realloc(bindirs, (nele + 2) * sizeof(char *));
279		if (bindirs == NULL)
280			abort();
281		bindirs[nele++] = PATH_LIBEXEC;
282		bindirs[nele] = NULL;
283		if ((cp = getenv("PATH")) != NULL) {
284			/* don't destroy the original environment... */
285			b = strdup(cp);
286			if (b == NULL)
287				abort();
288			decolonify(b, &bindirs, &nele);
289		}
290	}
291
292	/* -m defaults to $(manpath) */
293	if (!mandirs) {
294		if ((p = popen(MANPATHCMD, "r")) == NULL)
295			err(EX_OSERR, "cannot execute manpath command");
296		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
297		    pclose(p))
298			err(EX_OSERR, "error processing manpath results");
299		if ((b = strchr(buf, '\n')) != NULL)
300			*b = '\0';
301		b = strdup(buf);
302		if (b == NULL)
303			abort();
304		nele = 0;
305		decolonify(b, &mandirs, &nele);
306	}
307
308	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
309	if (!sourcedirs) {
310		b = strdup(sourcepath);
311		if (b == NULL)
312			abort();
313		nele = 0;
314		decolonify(b, &sourcedirs, &nele);
315
316		if (stat(PATH_PORTS, &sb) == -1) {
317			if (errno == ENOENT)
318				/* no /usr/ports, we are done */
319				return;
320			err(EX_OSERR, "stat(" PATH_PORTS ")");
321		}
322		if ((sb.st_mode & S_IFMT) != S_IFDIR)
323			/* /usr/ports is not a directory, ignore */
324			return;
325		if (access(PATH_PORTS, R_OK | X_OK) != 0)
326			return;
327		if ((dir = opendir(PATH_PORTS)) == NULL)
328			err(EX_OSERR, "opendir" PATH_PORTS ")");
329		while ((dirp = readdir(dir)) != NULL) {
330			/*
331			 * Not everything below PATH_PORTS is of
332			 * interest.  First, all dot files and
333			 * directories (e. g. .snap) can be ignored.
334			 * Also, all subdirectories starting with a
335			 * capital letter are not going to be
336			 * examined, as they are used for internal
337			 * purposes (Mk, Tools, ...).  This also
338			 * matches a possible CVS subdirectory.
339			 * Finally, the distfiles subdirectory is also
340			 * special, and should not be considered to
341			 * avoid false matches.
342			 */
343			if (dirp->d_name[0] == '.' ||
344			    /*
345			     * isupper() not used on purpose: the
346			     * check is supposed to default to the C
347			     * locale instead of the current user's
348			     * locale.
349			     */
350			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
351			    strcmp(dirp->d_name, "distfiles") == 0)
352				continue;
353			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
354			    == NULL)
355				abort();
356			strcpy(b, PATH_PORTS);
357			strcat(b, "/");
358			strcat(b, dirp->d_name);
359			if (stat(b, &sb) == -1 ||
360			    (sb.st_mode & S_IFMT) != S_IFDIR ||
361			    access(b, R_OK | X_OK) != 0) {
362				free(b);
363				continue;
364			}
365			sourcedirs = realloc(sourcedirs,
366					     (nele + 2) * sizeof(char *));
367			if (sourcedirs == NULL)
368				abort();
369			sourcedirs[nele++] = b;
370			sourcedirs[nele] = NULL;
371		}
372		closedir(dir);
373	}
374}
375
376int
377main(int argc, char **argv)
378{
379	int unusual, i, printed;
380	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
381	ccharp *dp;
382	size_t nlen, olen, s;
383	struct stat sb;
384	regex_t re, re2;
385	regmatch_t matches[2];
386	regoff_t rlen;
387	FILE *p;
388
389	setlocale(LC_ALL, "");
390
391	scanopts(argc, argv);
392	defaults();
393
394	if (mandirs == NULL)
395		opt_m = 0;
396	if (bindirs == NULL)
397		opt_b = 0;
398	if (sourcedirs == NULL)
399		opt_s = 0;
400	if (opt_m + opt_b + opt_s == 0)
401		errx(EX_DATAERR, "no directories to search");
402
403	if (opt_m) {
404		setenv("MANPATH", colonify(mandirs), 1);
405		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
406			regerror(i, &re, buf, BUFSIZ - 1);
407			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
408			     MANWHEREISMATCH, buf);
409		}
410	}
411
412	for (; (name = *query) != NULL; query++) {
413		/* strip leading path name component */
414		if ((cp = strrchr(name, '/')) != NULL)
415			name = cp + 1;
416		/* strip SCCS or RCS suffix/prefix */
417		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
418			name += 2;
419		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
420			name[s - 2] = '\0';
421		/* compression suffix */
422		s = strlen(name);
423		if (s > 2 &&
424		    (strcmp(name + s - 2, ".z") == 0 ||
425		     strcmp(name + s - 2, ".Z") == 0))
426			name[s - 2] = '\0';
427		else if (s > 3 &&
428			 strcmp(name + s - 3, ".gz") == 0)
429			name[s - 3] = '\0';
430		else if (s > 4 &&
431			 strcmp(name + s - 4, ".bz2") == 0)
432			name[s - 4] = '\0';
433
434		unusual = 0;
435		bin = man = src = NULL;
436		s = strlen(name);
437
438		if (opt_b) {
439			/*
440			 * Binaries have to match exactly, and must be regular
441			 * executable files.
442			 */
443			unusual = unusual | NO_BIN_FOUND;
444			for (dp = bindirs; *dp != NULL; dp++) {
445				cp = malloc(strlen(*dp) + 1 + s + 1);
446				if (cp == NULL)
447					abort();
448				strcpy(cp, *dp);
449				strcat(cp, "/");
450				strcat(cp, name);
451				if (stat(cp, &sb) == 0 &&
452				    (sb.st_mode & S_IFMT) == S_IFREG &&
453				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
454				    != 0) {
455					unusual = unusual & ~NO_BIN_FOUND;
456					if (bin == NULL) {
457						bin = strdup(cp);
458					} else {
459						olen = strlen(bin);
460						nlen = strlen(cp);
461						bin = realloc(bin,
462							      olen + nlen + 2);
463						if (bin == NULL)
464							abort();
465						strcat(bin, " ");
466						strcat(bin, cp);
467					}
468					if (!opt_a) {
469						free(cp);
470						break;
471					}
472				}
473				free(cp);
474			}
475		}
476
477		if (opt_m) {
478			/*
479			 * Ask the man command to perform the search for us.
480			 */
481			unusual = unusual | NO_MAN_FOUND;
482			if (opt_a)
483				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
484			else
485				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
486
487			if (cp == NULL)
488				abort();
489
490			if (opt_a)
491				sprintf(cp, MANWHEREISALLCMD, name);
492			else
493				sprintf(cp, MANWHEREISCMD, name);
494
495			if ((p = popen(cp, "r")) != NULL) {
496
497				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
498					unusual = unusual & ~NO_MAN_FOUND;
499
500					if ((cp2 = strchr(buf, '\n')) != NULL)
501						*cp2 = '\0';
502					if (regexec(&re, buf, 2,
503						    matches, 0) == 0 &&
504					    (rlen = matches[1].rm_eo -
505					     matches[1].rm_so) > 0) {
506						/*
507						 * man -w found formatted
508						 * page, need to pick up
509						 * source page name.
510						 */
511						cp2 = malloc(rlen + 1);
512						if (cp2 == NULL)
513							abort();
514						memcpy(cp2,
515						       buf + matches[1].rm_so,
516						       rlen);
517						cp2[rlen] = '\0';
518					} else {
519						/*
520						 * man -w found plain source
521						 * page, use it.
522						 */
523						cp2 = strdup(buf);
524						if (cp2 == NULL)
525							abort();
526					}
527
528					if (man == NULL) {
529						man = strdup(cp2);
530					} else {
531						olen = strlen(man);
532						nlen = strlen(cp2);
533						man = realloc(man,
534							      olen + nlen + 2);
535						if (man == NULL)
536							abort();
537						strcat(man, " ");
538						strcat(man, cp2);
539					}
540
541					free(cp2);
542
543					if (!opt_a)
544						break;
545				}
546				pclose(p);
547				free(cp);
548			}
549		}
550
551		if (opt_s) {
552			/*
553			 * Sources match if a subdir with the exact
554			 * name is found.
555			 */
556			unusual = unusual | NO_SRC_FOUND;
557			for (dp = sourcedirs; *dp != NULL; dp++) {
558				cp = malloc(strlen(*dp) + 1 + s + 1);
559				if (cp == NULL)
560					abort();
561				strcpy(cp, *dp);
562				strcat(cp, "/");
563				strcat(cp, name);
564				if (stat(cp, &sb) == 0 &&
565				    (sb.st_mode & S_IFMT) == S_IFDIR) {
566					unusual = unusual & ~NO_SRC_FOUND;
567					if (src == NULL) {
568						src = strdup(cp);
569					} else {
570						olen = strlen(src);
571						nlen = strlen(cp);
572						src = realloc(src,
573							      olen + nlen + 2);
574						if (src == NULL)
575							abort();
576						strcat(src, " ");
577						strcat(src, cp);
578					}
579					if (!opt_a) {
580						free(cp);
581						break;
582					}
583				}
584				free(cp);
585			}
586			/*
587			 * If still not found, ask locate to search it
588			 * for us.  This will find sources for things
589			 * like lpr that are well hidden in the
590			 * /usr/src tree, but takes a lot longer.
591			 * Thus, option -x (`expensive') prevents this
592			 * search.
593			 *
594			 * Do only match locate output that starts
595			 * with one of our source directories, and at
596			 * least one further level of subdirectories.
597			 */
598			if (opt_x || (src && !opt_a))
599				goto done_sources;
600
601			cp = malloc(sizeof LOCATECMD - 2 + s);
602			if (cp == NULL)
603				abort();
604			sprintf(cp, LOCATECMD, name);
605			if ((p = popen(cp, "r")) == NULL)
606				goto done_sources;
607			while ((src == NULL || opt_a) &&
608			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
609				if ((cp2 = strchr(buf, '\n')) != NULL)
610					*cp2 = '\0';
611				for (dp = sourcedirs;
612				     (src == NULL || opt_a) && *dp != NULL;
613				     dp++) {
614					cp2 = malloc(strlen(*dp) + 9);
615					if (cp2 == NULL)
616						abort();
617					strcpy(cp2, "^");
618					strcat(cp2, *dp);
619					strcat(cp2, "/[^/]+/");
620					if ((i = regcomp(&re2, cp2,
621							 REG_EXTENDED|REG_NOSUB))
622					    != 0) {
623						regerror(i, &re, buf,
624							 BUFSIZ - 1);
625						errx(EX_UNAVAILABLE,
626						     "regcomp(%s) failed: %s",
627						     cp2, buf);
628					}
629					free(cp2);
630					if (regexec(&re2, buf, 0,
631						    (regmatch_t *)NULL, 0)
632					    == 0) {
633						unusual = unusual &
634						          ~NO_SRC_FOUND;
635						if (src == NULL) {
636							src = strdup(buf);
637						} else {
638							olen = strlen(src);
639							nlen = strlen(buf);
640							src = realloc(src,
641								      olen +
642								      nlen + 2);
643							if (src == NULL)
644								abort();
645							strcat(src, " ");
646							strcat(src, buf);
647						}
648					}
649					regfree(&re2);
650				}
651			}
652			pclose(p);
653			free(cp);
654		}
655	  done_sources:
656
657		if (opt_u && !unusual)
658			continue;
659
660		printed = 0;
661		if (!opt_q) {
662			printf("%s:", name);
663			printed++;
664		}
665		if (bin) {
666			if (printed++)
667				putchar(' ');
668			fputs(bin, stdout);
669		}
670		if (man) {
671			if (printed++)
672				putchar(' ');
673			fputs(man, stdout);
674		}
675		if (src) {
676			if (printed++)
677				putchar(' ');
678			fputs(src, stdout);
679		}
680		if (printed)
681			putchar('\n');
682	}
683
684	if (opt_m)
685		regfree(&re);
686
687	return (0);
688}
689