tr.c revision 118409
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1988, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
3487705Smarkm#include <sys/cdefs.h>
3587705Smarkm
3687705Smarkm__FBSDID("$FreeBSD: head/usr.bin/tr/tr.c 118409 2003-08-04 02:57:17Z ache $");
3787705Smarkm
381590Srgrimes#ifndef lint
3928368Scharnierstatic const char copyright[] =
401590Srgrimes"@(#) Copyright (c) 1988, 1993\n\
411590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
4287705Smarkm#endif
431590Srgrimes
441590Srgrimes#ifndef lint
4587705Smarkmstatic const char sccsid[] = "@(#)tr.c	8.2 (Berkeley) 5/4/95";
4628368Scharnier#endif
471590Srgrimes
481590Srgrimes#include <sys/types.h>
4923693Speter
5098214Stjr#include <ctype.h>
5128368Scharnier#include <err.h>
5287705Smarkm#include <locale.h>
531590Srgrimes#include <stdio.h>
541590Srgrimes#include <stdlib.h>
551590Srgrimes#include <string.h>
5623693Speter#include <unistd.h>
5723693Speter
581590Srgrimes#include "extern.h"
591590Srgrimes
6098214Stjr/*
6198214Stjr * For -C option: determine whether a byte is a valid character in the
6298214Stjr * current character set (as defined by LC_CTYPE).
6398214Stjr */
6498214Stjr#define ISCHAR(c) (iscntrl(c) || isprint(c))
6598214Stjr
661590Srgrimesstatic int string1[NCHARS] = {
671590Srgrimes	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,		/* ASCII */
681590Srgrimes	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
691590Srgrimes	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
701590Srgrimes	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
711590Srgrimes	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
721590Srgrimes	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
731590Srgrimes	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
741590Srgrimes	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
751590Srgrimes	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
761590Srgrimes	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
771590Srgrimes	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
781590Srgrimes	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
791590Srgrimes	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
801590Srgrimes	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
811590Srgrimes	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
821590Srgrimes	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
831590Srgrimes	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
841590Srgrimes	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
851590Srgrimes	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
861590Srgrimes	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
871590Srgrimes	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
881590Srgrimes	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
891590Srgrimes	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
901590Srgrimes	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
911590Srgrimes	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
921590Srgrimes	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
931590Srgrimes	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
941590Srgrimes	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
951590Srgrimes	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
961590Srgrimes	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
971590Srgrimes	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
981590Srgrimes	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
991590Srgrimes}, string2[NCHARS];
1001590Srgrimes
1011590SrgrimesSTR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
1021590SrgrimesSTR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
1031590Srgrimes
10498214Stjrstatic void setup(int *, char *, STR *, int, int);
10592922Simpstatic void usage(void);
1061590Srgrimes
1071590Srgrimesint
108102944Sdwmalonemain(int argc, char **argv)
1091590Srgrimes{
110118399Sache	static int carray[NCHARS];
111118399Sache	int ch, cnt, n, lastch, *p;
11298214Stjr	int Cflag, cflag, dflag, sflag, isstring2;
1131590Srgrimes
11498210Stjr	(void)setlocale(LC_ALL, "");
11511895Sache
11698214Stjr	Cflag = cflag = dflag = sflag = 0;
11798214Stjr	while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
1181590Srgrimes		switch((char)ch) {
11998214Stjr		case 'C':
12098214Stjr			Cflag = 1;
12198214Stjr			cflag = 0;
12298214Stjr			break;
1231590Srgrimes		case 'c':
1241590Srgrimes			cflag = 1;
12598214Stjr			Cflag = 0;
1261590Srgrimes			break;
1271590Srgrimes		case 'd':
1281590Srgrimes			dflag = 1;
1291590Srgrimes			break;
1301590Srgrimes		case 's':
1311590Srgrimes			sflag = 1;
1321590Srgrimes			break;
13330322Shelbig		case 'u':
13430322Shelbig			setbuf(stdout, (char *)NULL);
13530322Shelbig			break;
1361590Srgrimes		case '?':
1371590Srgrimes		default:
1381590Srgrimes			usage();
1391590Srgrimes		}
1401590Srgrimes	argc -= optind;
1411590Srgrimes	argv += optind;
1421590Srgrimes
1431590Srgrimes	switch(argc) {
1441590Srgrimes	case 0:
1451590Srgrimes	default:
1461590Srgrimes		usage();
1471590Srgrimes		/* NOTREACHED */
1481590Srgrimes	case 1:
1491590Srgrimes		isstring2 = 0;
1501590Srgrimes		break;
1511590Srgrimes	case 2:
1521590Srgrimes		isstring2 = 1;
1531590Srgrimes		break;
1541590Srgrimes	}
1551590Srgrimes
1561590Srgrimes	/*
15798214Stjr	 * tr -ds [-Cc] string1 string2
1581590Srgrimes	 * Delete all characters (or complemented characters) in string1.
1591590Srgrimes	 * Squeeze all characters in string2.
1601590Srgrimes	 */
1611590Srgrimes	if (dflag && sflag) {
1621590Srgrimes		if (!isstring2)
1631590Srgrimes			usage();
1641590Srgrimes
16598214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
16698214Stjr		setup(string2, argv[1], &s2, 0, 0);
1678874Srgrimes
1681590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;)
1691590Srgrimes			if (!string1[ch] && (!string2[ch] || lastch != ch)) {
1701590Srgrimes				lastch = ch;
1711590Srgrimes				(void)putchar(ch);
1721590Srgrimes			}
1731590Srgrimes		exit(0);
1741590Srgrimes	}
1751590Srgrimes
1761590Srgrimes	/*
17798214Stjr	 * tr -d [-Cc] string1
1781590Srgrimes	 * Delete all characters (or complemented characters) in string1.
1791590Srgrimes	 */
1801590Srgrimes	if (dflag) {
1811590Srgrimes		if (isstring2)
1821590Srgrimes			usage();
1831590Srgrimes
18498214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
1851590Srgrimes
1861590Srgrimes		while ((ch = getchar()) != EOF)
1871590Srgrimes			if (!string1[ch])
1881590Srgrimes				(void)putchar(ch);
1891590Srgrimes		exit(0);
1901590Srgrimes	}
1911590Srgrimes
1921590Srgrimes	/*
19398214Stjr	 * tr -s [-Cc] string1
1941590Srgrimes	 * Squeeze all characters (or complemented characters) in string1.
1951590Srgrimes	 */
1961590Srgrimes	if (sflag && !isstring2) {
19798214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
1981590Srgrimes
1991590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;)
2001590Srgrimes			if (!string1[ch] || lastch != ch) {
2011590Srgrimes				lastch = ch;
2021590Srgrimes				(void)putchar(ch);
2031590Srgrimes			}
2041590Srgrimes		exit(0);
2051590Srgrimes	}
2061590Srgrimes
2071590Srgrimes	/*
20898214Stjr	 * tr [-Ccs] string1 string2
2091590Srgrimes	 * Replace all characters (or complemented characters) in string1 with
2101590Srgrimes	 * the character in the same position in string2.  If the -s option is
2111590Srgrimes	 * specified, squeeze all the characters in string2.
2121590Srgrimes	 */
2131590Srgrimes	if (!isstring2)
2141590Srgrimes		usage();
2151590Srgrimes
2161590Srgrimes	s1.str = argv[0];
217118400Sache	if (cflag || Cflag) {
218118400Sache		if ((s2.str = strdup(argv[1])) == NULL)
219118400Sache			errx(1, "strdup(argv[1])");
2201590Srgrimes
2211590Srgrimes		for (cnt = NCHARS, p = string1; cnt--;)
2221590Srgrimes			*p++ = OOBCH;
223118400Sache	} else
224118400Sache		s2.str = argv[1];
2251590Srgrimes
2261590Srgrimes	if (!next(&s2))
22728368Scharnier		errx(1, "empty string2");
2281590Srgrimes
229118371Sache	/*
230118371Sache	 * For -s result will contain only those characters defined
231118371Sache	 * as the second characters in each of the toupper or tolower
232118371Sache	 * pairs.
233118371Sache	 */
234118371Sache
2351590Srgrimes	/* If string2 runs out of characters, use the last one specified. */
236118371Sache	while (next(&s1)) {
237118371Sache	again:
238118371Sache		if (s1.state == SET_LOWER &&
239118371Sache		    s2.state == SET_UPPER &&
240118371Sache		    s1.cnt == 1 && s2.cnt == 1) {
241118371Sache			do {
242118371Sache				string1[s1.lastch] = ch = toupper(s1.lastch);
243118371Sache				if (sflag && isupper(ch))
244118371Sache					string2[ch] = 1;
245118371Sache				if (!next(&s1))
246118371Sache					goto endloop;
247118371Sache			} while (s1.state == SET_LOWER && s1.cnt > 1);
248118371Sache			/* skip upper set */
249118371Sache			do {
250118371Sache				if (!next(&s2))
251118371Sache					break;
252118371Sache			} while (s2.state == SET_UPPER && s2.cnt > 1);
253118371Sache			goto again;
254118371Sache		} else if (s1.state == SET_UPPER &&
255118371Sache			   s2.state == SET_LOWER &&
256118371Sache			   s1.cnt == 1 && s2.cnt == 1) {
257118371Sache			do {
258118371Sache				string1[s1.lastch] = ch = tolower(s1.lastch);
259118371Sache				if (sflag && islower(ch))
260118371Sache					string2[ch] = 1;
261118371Sache				if (!next(&s1))
262118371Sache					goto endloop;
263118371Sache			} while (s1.state == SET_UPPER && s1.cnt > 1);
264118371Sache			/* skip lower set */
265118371Sache			do {
266118371Sache				if (!next(&s2))
267118371Sache					break;
268118371Sache			} while (s2.state == SET_LOWER && s2.cnt > 1);
269118371Sache			goto again;
270118371Sache		} else {
271118371Sache			string1[s1.lastch] = s2.lastch;
272118371Sache			if (sflag)
273118371Sache				string2[s2.lastch] = 1;
2741590Srgrimes		}
275118371Sache		(void)next(&s2);
276118371Sache	}
277118371Sacheendloop:
278100874Stjr	if (cflag || Cflag) {
279118399Sache		for (p = carray, cnt = 0; cnt < NCHARS; cnt++) {
280118399Sache			if (string1[cnt] == OOBCH && (!Cflag || ISCHAR(cnt)))
281118399Sache				*p++ = cnt;
282118399Sache			else
283118399Sache				string1[cnt] = cnt;
284118399Sache		}
285118399Sache		n = p - carray;
286118399Sache		if (Cflag && n > 1)
287118399Sache			(void)mergesort(carray, n, sizeof(*carray), charcoll);
288118399Sache
289100874Stjr		s2.str = argv[1];
290100874Stjr		s2.state = NORMAL;
291118399Sache		for (cnt = 0; cnt < n; cnt++) {
292118399Sache			(void)next(&s2);
293118399Sache			string1[carray[cnt]] = s2.lastch;
294118409Sache			/*
295118409Sache			 * Chars taken from s2 can be different this time
296118409Sache			 * due to lack of complex upper/lower processing,
297118409Sache			 * so fill string2 again to not miss some.
298118409Sache			 */
299118409Sache			if (sflag)
300118409Sache				string2[s2.lastch] = 1;
301100874Stjr		}
302100874Stjr	}
3031590Srgrimes
3041590Srgrimes	if (sflag)
3051590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;) {
3061590Srgrimes			ch = string1[ch];
3071590Srgrimes			if (!string2[ch] || lastch != ch) {
3081590Srgrimes				lastch = ch;
3091590Srgrimes				(void)putchar(ch);
3101590Srgrimes			}
3111590Srgrimes		}
3121590Srgrimes	else
3131590Srgrimes		while ((ch = getchar()) != EOF)
3141590Srgrimes			(void)putchar(string1[ch]);
3151590Srgrimes	exit (0);
3161590Srgrimes}
3171590Srgrimes
3181590Srgrimesstatic void
319102944Sdwmalonesetup(int *string, char *arg, STR *str, int cflag, int Cflag)
3201590Srgrimes{
32187705Smarkm	int cnt, *p;
3221590Srgrimes
3231590Srgrimes	str->str = arg;
3241590Srgrimes	bzero(string, NCHARS * sizeof(int));
3251590Srgrimes	while (next(str))
3261590Srgrimes		string[str->lastch] = 1;
3271590Srgrimes	if (cflag)
3281590Srgrimes		for (p = string, cnt = NCHARS; cnt--; ++p)
3291590Srgrimes			*p = !*p;
33098214Stjr	else if (Cflag)
33198214Stjr		for (cnt = 0; cnt < NCHARS; cnt++)
33298214Stjr			string[cnt] = !string[cnt] && ISCHAR(cnt);
3331590Srgrimes}
3341590Srgrimes
335118371Sacheint
336100891Stjrcharcoll(const void *a, const void *b)
337100891Stjr{
338118371Sache	static char sa[2], sb[2];
339100891Stjr
340100891Stjr	sa[0] = *(const int *)a;
341100891Stjr	sb[0] = *(const int *)b;
342118373Sache	return (strcoll(sa, sb));
343100891Stjr}
344100891Stjr
3451590Srgrimesstatic void
346102944Sdwmaloneusage(void)
3471590Srgrimes{
34828368Scharnier	(void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
34998214Stjr		"usage: tr [-Ccsu] string1 string2",
35098214Stjr		"       tr [-Ccu] -d string1",
35198214Stjr		"       tr [-Ccu] -s string1",
35298214Stjr		"       tr [-Ccu] -ds string1 string2");
3531590Srgrimes	exit(1);
3541590Srgrimes}
355