1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1988, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/types.h>
33#include <sys/capsicum.h>
34
35#include <capsicum_helpers.h>
36#include <ctype.h>
37#include <err.h>
38#include <limits.h>
39#include <locale.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <unistd.h>
45#include <wchar.h>
46#include <wctype.h>
47
48#include "cmap.h"
49#include "cset.h"
50#include "extern.h"
51
52static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
53static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
54
55static struct cset *setup(char *, STR *, int, int);
56static void usage(void) __dead2;
57
58int
59main(int argc, char **argv)
60{
61	static int carray[NCHARS_SB];
62	struct cmap *map;
63	struct cset *delete, *squeeze;
64	int n, *p;
65	int Cflag, cflag, dflag, sflag, isstring2;
66	wint_t ch, cnt, lastch;
67
68	(void)setlocale(LC_ALL, "");
69
70	if (caph_limit_stdio() == -1)
71		err(1, "unable to limit stdio");
72
73	if (caph_enter() < 0)
74		err(1, "unable to enter capability mode");
75
76	Cflag = cflag = dflag = sflag = 0;
77	while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
78		switch((char)ch) {
79		case 'C':
80			Cflag = 1;
81			cflag = 0;
82			break;
83		case 'c':
84			cflag = 1;
85			Cflag = 0;
86			break;
87		case 'd':
88			dflag = 1;
89			break;
90		case 's':
91			sflag = 1;
92			break;
93		case 'u':
94			setbuf(stdout, (char *)NULL);
95			break;
96		case '?':
97		default:
98			usage();
99		}
100	argc -= optind;
101	argv += optind;
102
103	switch(argc) {
104	case 0:
105	default:
106		usage();
107		/* NOTREACHED */
108	case 1:
109		isstring2 = 0;
110		break;
111	case 2:
112		isstring2 = 1;
113		break;
114	}
115
116	/*
117	 * tr -ds [-Cc] string1 string2
118	 * Delete all characters (or complemented characters) in string1.
119	 * Squeeze all characters in string2.
120	 */
121	if (dflag && sflag) {
122		if (!isstring2)
123			usage();
124
125		delete = setup(argv[0], &s1, cflag, Cflag);
126		squeeze = setup(argv[1], &s2, 0, 0);
127
128		for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
129			if (!cset_in(delete, ch) &&
130			    (lastch != ch || !cset_in(squeeze, ch))) {
131				lastch = ch;
132				(void)putwchar(ch);
133			}
134		if (ferror(stdin))
135			err(1, NULL);
136		exit(0);
137	}
138
139	/*
140	 * tr -d [-Cc] string1
141	 * Delete all characters (or complemented characters) in string1.
142	 */
143	if (dflag) {
144		if (isstring2)
145			usage();
146
147		delete = setup(argv[0], &s1, cflag, Cflag);
148
149		while ((ch = getwchar()) != WEOF)
150			if (!cset_in(delete, ch))
151				(void)putwchar(ch);
152		if (ferror(stdin))
153			err(1, NULL);
154		exit(0);
155	}
156
157	/*
158	 * tr -s [-Cc] string1
159	 * Squeeze all characters (or complemented characters) in string1.
160	 */
161	if (sflag && !isstring2) {
162		squeeze = setup(argv[0], &s1, cflag, Cflag);
163
164		for (lastch = OOBCH; (ch = getwchar()) != WEOF;)
165			if (lastch != ch || !cset_in(squeeze, ch)) {
166				lastch = ch;
167				(void)putwchar(ch);
168			}
169		if (ferror(stdin))
170			err(1, NULL);
171		exit(0);
172	}
173
174	/*
175	 * tr [-Ccs] string1 string2
176	 * Replace all characters (or complemented characters) in string1 with
177	 * the character in the same position in string2.  If the -s option is
178	 * specified, squeeze all the characters in string2.
179	 */
180	if (!isstring2)
181		usage();
182
183	map = cmap_alloc();
184	if (map == NULL)
185		err(1, NULL);
186	squeeze = cset_alloc();
187	if (squeeze == NULL)
188		err(1, NULL);
189
190	s1.str = argv[0];
191
192	if (Cflag || cflag) {
193		cmap_default(map, OOBCH);
194		if ((s2.str = strdup(argv[1])) == NULL)
195			errx(1, "strdup(argv[1])");
196	} else
197		s2.str = argv[1];
198
199	if (!next(&s2))
200		errx(1, "empty string2");
201
202	/*
203	 * For -s result will contain only those characters defined
204	 * as the second characters in each of the toupper or tolower
205	 * pairs.
206	 */
207
208	/* If string2 runs out of characters, use the last one specified. */
209	while (next(&s1)) {
210	again:
211		if (s1.state == CCLASS_LOWER &&
212		    s2.state == CCLASS_UPPER &&
213		    s1.cnt == 1 && s2.cnt == 1) {
214			do {
215				ch = towupper(s1.lastch);
216				cmap_add(map, s1.lastch, ch);
217				if (sflag && iswupper(ch))
218					cset_add(squeeze, ch);
219				if (!next(&s1))
220					goto endloop;
221			} while (s1.state == CCLASS_LOWER && s1.cnt > 1);
222			/* skip upper set */
223			do {
224				if (!next(&s2))
225					break;
226			} while (s2.state == CCLASS_UPPER && s2.cnt > 1);
227			goto again;
228		} else if (s1.state == CCLASS_UPPER &&
229			   s2.state == CCLASS_LOWER &&
230			   s1.cnt == 1 && s2.cnt == 1) {
231			do {
232				ch = towlower(s1.lastch);
233				cmap_add(map, s1.lastch, ch);
234				if (sflag && iswlower(ch))
235					cset_add(squeeze, ch);
236				if (!next(&s1))
237					goto endloop;
238			} while (s1.state == CCLASS_UPPER && s1.cnt > 1);
239			/* skip lower set */
240			do {
241				if (!next(&s2))
242					break;
243			} while (s2.state == CCLASS_LOWER && s2.cnt > 1);
244			goto again;
245		} else {
246			cmap_add(map, s1.lastch, s2.lastch);
247			if (sflag)
248				cset_add(squeeze, s2.lastch);
249		}
250		(void)next(&s2);
251	}
252endloop:
253	if (cflag || (Cflag && MB_CUR_MAX > 1)) {
254		/*
255		 * This is somewhat tricky: since the character set is
256		 * potentially huge, we need to avoid allocating a map
257		 * entry for every character. Our strategy is to set the
258		 * default mapping to the last character of string #2
259		 * (= the one that gets automatically repeated), then to
260		 * add back identity mappings for characters that should
261		 * remain unchanged. We don't waste space on identity mappings
262		 * for non-characters with the -C option; those are simulated
263		 * in the I/O loop.
264		 */
265		s2.str = argv[1];
266		s2.state = NORMAL;
267		for (cnt = 0; cnt < WINT_MAX; cnt++) {
268			if (Cflag && !iswrune(cnt))
269				continue;
270			if (cmap_lookup(map, cnt) == OOBCH) {
271				if (next(&s2)) {
272					cmap_add(map, cnt, s2.lastch);
273					if (sflag)
274						cset_add(squeeze, s2.lastch);
275				}
276			} else
277				cmap_add(map, cnt, cnt);
278			if ((s2.state == EOS || s2.state == INFINITE) &&
279			    cnt >= cmap_max(map))
280				break;
281		}
282		cmap_default(map, s2.lastch);
283	} else if (Cflag) {
284		for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
285			if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
286				*p++ = cnt;
287			else
288				cmap_add(map, cnt, cnt);
289		}
290		n = p - carray;
291		if (Cflag && n > 1)
292			(void)mergesort(carray, n, sizeof(*carray), charcoll);
293
294		s2.str = argv[1];
295		s2.state = NORMAL;
296		for (cnt = 0; cnt < n; cnt++) {
297			(void)next(&s2);
298			cmap_add(map, carray[cnt], s2.lastch);
299			/*
300			 * Chars taken from s2 can be different this time
301			 * due to lack of complex upper/lower processing,
302			 * so fill string2 again to not miss some.
303			 */
304			if (sflag)
305				cset_add(squeeze, s2.lastch);
306		}
307	}
308
309	cset_cache(squeeze);
310	cmap_cache(map);
311
312	if (sflag)
313		for (lastch = OOBCH; (ch = getwchar()) != WEOF;) {
314			if (!Cflag || iswrune(ch))
315				ch = cmap_lookup(map, ch);
316			if (lastch != ch || !cset_in(squeeze, ch)) {
317				lastch = ch;
318				(void)putwchar(ch);
319			}
320		}
321	else
322		while ((ch = getwchar()) != WEOF) {
323			if (!Cflag || iswrune(ch))
324				ch = cmap_lookup(map, ch);
325			(void)putwchar(ch);
326		}
327	if (ferror(stdin))
328		err(1, NULL);
329	exit (0);
330}
331
332static struct cset *
333setup(char *arg, STR *str, int cflag, int Cflag)
334{
335	struct cset *cs;
336
337	cs = cset_alloc();
338	if (cs == NULL)
339		err(1, NULL);
340	str->str = arg;
341	while (next(str))
342		cset_add(cs, str->lastch);
343	if (Cflag)
344		cset_addclass(cs, wctype("rune"), true);
345	if (cflag || Cflag)
346		cset_invert(cs);
347	cset_cache(cs);
348	return (cs);
349}
350
351int
352charcoll(const void *a, const void *b)
353{
354	static char sa[2], sb[2];
355
356	sa[0] = *(const int *)a;
357	sb[0] = *(const int *)b;
358	return (strcoll(sa, sb));
359}
360
361static void
362usage(void)
363{
364	(void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
365		"usage: tr [-Ccsu] string1 string2",
366		"       tr [-Ccu] -d string1",
367		"       tr [-Ccu] -s string1",
368		"       tr [-Ccu] -ds string1 string2");
369	exit(1);
370}
371