tr.c revision 118399
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1988, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
3487705Smarkm#include <sys/cdefs.h>
3587705Smarkm
3687705Smarkm__FBSDID("$FreeBSD: head/usr.bin/tr/tr.c 118399 2003-08-03 22:02:49Z ache $");
3787705Smarkm
381590Srgrimes#ifndef lint
3928368Scharnierstatic const char copyright[] =
401590Srgrimes"@(#) Copyright (c) 1988, 1993\n\
411590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
4287705Smarkm#endif
431590Srgrimes
441590Srgrimes#ifndef lint
4587705Smarkmstatic const char sccsid[] = "@(#)tr.c	8.2 (Berkeley) 5/4/95";
4628368Scharnier#endif
471590Srgrimes
481590Srgrimes#include <sys/types.h>
4923693Speter
5098214Stjr#include <ctype.h>
5128368Scharnier#include <err.h>
5287705Smarkm#include <locale.h>
531590Srgrimes#include <stdio.h>
541590Srgrimes#include <stdlib.h>
551590Srgrimes#include <string.h>
5623693Speter#include <unistd.h>
5723693Speter
581590Srgrimes#include "extern.h"
591590Srgrimes
6098214Stjr/*
6198214Stjr * For -C option: determine whether a byte is a valid character in the
6298214Stjr * current character set (as defined by LC_CTYPE).
6398214Stjr */
6498214Stjr#define ISCHAR(c) (iscntrl(c) || isprint(c))
6598214Stjr
661590Srgrimesstatic int string1[NCHARS] = {
671590Srgrimes	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,		/* ASCII */
681590Srgrimes	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
691590Srgrimes	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
701590Srgrimes	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
711590Srgrimes	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
721590Srgrimes	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
731590Srgrimes	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
741590Srgrimes	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
751590Srgrimes	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
761590Srgrimes	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
771590Srgrimes	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
781590Srgrimes	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
791590Srgrimes	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
801590Srgrimes	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
811590Srgrimes	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
821590Srgrimes	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
831590Srgrimes	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
841590Srgrimes	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
851590Srgrimes	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
861590Srgrimes	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
871590Srgrimes	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
881590Srgrimes	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
891590Srgrimes	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
901590Srgrimes	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
911590Srgrimes	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
921590Srgrimes	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
931590Srgrimes	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
941590Srgrimes	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
951590Srgrimes	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
961590Srgrimes	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
971590Srgrimes	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
981590Srgrimes	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
991590Srgrimes}, string2[NCHARS];
1001590Srgrimes
1011590SrgrimesSTR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
1021590SrgrimesSTR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
1031590Srgrimes
10498214Stjrstatic void setup(int *, char *, STR *, int, int);
10592922Simpstatic void usage(void);
1061590Srgrimes
1071590Srgrimesint
108102944Sdwmalonemain(int argc, char **argv)
1091590Srgrimes{
110118399Sache	static int carray[NCHARS];
111118399Sache	int ch, cnt, n, lastch, *p;
11298214Stjr	int Cflag, cflag, dflag, sflag, isstring2;
1131590Srgrimes
11498210Stjr	(void)setlocale(LC_ALL, "");
11511895Sache
11698214Stjr	Cflag = cflag = dflag = sflag = 0;
11798214Stjr	while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
1181590Srgrimes		switch((char)ch) {
11998214Stjr		case 'C':
12098214Stjr			Cflag = 1;
12198214Stjr			cflag = 0;
12298214Stjr			break;
1231590Srgrimes		case 'c':
1241590Srgrimes			cflag = 1;
12598214Stjr			Cflag = 0;
1261590Srgrimes			break;
1271590Srgrimes		case 'd':
1281590Srgrimes			dflag = 1;
1291590Srgrimes			break;
1301590Srgrimes		case 's':
1311590Srgrimes			sflag = 1;
1321590Srgrimes			break;
13330322Shelbig		case 'u':
13430322Shelbig			setbuf(stdout, (char *)NULL);
13530322Shelbig			break;
1361590Srgrimes		case '?':
1371590Srgrimes		default:
1381590Srgrimes			usage();
1391590Srgrimes		}
1401590Srgrimes	argc -= optind;
1411590Srgrimes	argv += optind;
1421590Srgrimes
1431590Srgrimes	switch(argc) {
1441590Srgrimes	case 0:
1451590Srgrimes	default:
1461590Srgrimes		usage();
1471590Srgrimes		/* NOTREACHED */
1481590Srgrimes	case 1:
1491590Srgrimes		isstring2 = 0;
1501590Srgrimes		break;
1511590Srgrimes	case 2:
1521590Srgrimes		isstring2 = 1;
1531590Srgrimes		break;
1541590Srgrimes	}
1551590Srgrimes
1561590Srgrimes	/*
15798214Stjr	 * tr -ds [-Cc] string1 string2
1581590Srgrimes	 * Delete all characters (or complemented characters) in string1.
1591590Srgrimes	 * Squeeze all characters in string2.
1601590Srgrimes	 */
1611590Srgrimes	if (dflag && sflag) {
1621590Srgrimes		if (!isstring2)
1631590Srgrimes			usage();
1641590Srgrimes
16598214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
16698214Stjr		setup(string2, argv[1], &s2, 0, 0);
1678874Srgrimes
1681590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;)
1691590Srgrimes			if (!string1[ch] && (!string2[ch] || lastch != ch)) {
1701590Srgrimes				lastch = ch;
1711590Srgrimes				(void)putchar(ch);
1721590Srgrimes			}
1731590Srgrimes		exit(0);
1741590Srgrimes	}
1751590Srgrimes
1761590Srgrimes	/*
17798214Stjr	 * tr -d [-Cc] string1
1781590Srgrimes	 * Delete all characters (or complemented characters) in string1.
1791590Srgrimes	 */
1801590Srgrimes	if (dflag) {
1811590Srgrimes		if (isstring2)
1821590Srgrimes			usage();
1831590Srgrimes
18498214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
1851590Srgrimes
1861590Srgrimes		while ((ch = getchar()) != EOF)
1871590Srgrimes			if (!string1[ch])
1881590Srgrimes				(void)putchar(ch);
1891590Srgrimes		exit(0);
1901590Srgrimes	}
1911590Srgrimes
1921590Srgrimes	/*
19398214Stjr	 * tr -s [-Cc] string1
1941590Srgrimes	 * Squeeze all characters (or complemented characters) in string1.
1951590Srgrimes	 */
1961590Srgrimes	if (sflag && !isstring2) {
19798214Stjr		setup(string1, argv[0], &s1, cflag, Cflag);
1981590Srgrimes
1991590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;)
2001590Srgrimes			if (!string1[ch] || lastch != ch) {
2011590Srgrimes				lastch = ch;
2021590Srgrimes				(void)putchar(ch);
2031590Srgrimes			}
2041590Srgrimes		exit(0);
2051590Srgrimes	}
2061590Srgrimes
2071590Srgrimes	/*
20898214Stjr	 * tr [-Ccs] string1 string2
2091590Srgrimes	 * Replace all characters (or complemented characters) in string1 with
2101590Srgrimes	 * the character in the same position in string2.  If the -s option is
2111590Srgrimes	 * specified, squeeze all the characters in string2.
2121590Srgrimes	 */
2131590Srgrimes	if (!isstring2)
2141590Srgrimes		usage();
2151590Srgrimes
2161590Srgrimes	s1.str = argv[0];
217118399Sache	if ((s2.str = strdup(argv[1])) == NULL)
218118399Sache		errx(2, "strdup(argv[1])");
2191590Srgrimes
22098214Stjr	if (cflag || Cflag)
2211590Srgrimes		for (cnt = NCHARS, p = string1; cnt--;)
2221590Srgrimes			*p++ = OOBCH;
2231590Srgrimes
2241590Srgrimes	if (!next(&s2))
22528368Scharnier		errx(1, "empty string2");
2261590Srgrimes
227118371Sache	/*
228118371Sache	 * For -s result will contain only those characters defined
229118371Sache	 * as the second characters in each of the toupper or tolower
230118371Sache	 * pairs.
231118371Sache	 */
232118371Sache
2331590Srgrimes	/* If string2 runs out of characters, use the last one specified. */
234118371Sache	while (next(&s1)) {
235118371Sache	again:
236118371Sache		if (s1.state == SET_LOWER &&
237118371Sache		    s2.state == SET_UPPER &&
238118371Sache		    s1.cnt == 1 && s2.cnt == 1) {
239118371Sache			do {
240118371Sache				string1[s1.lastch] = ch = toupper(s1.lastch);
241118371Sache				if (sflag && isupper(ch))
242118371Sache					string2[ch] = 1;
243118371Sache				if (!next(&s1))
244118371Sache					goto endloop;
245118371Sache			} while (s1.state == SET_LOWER && s1.cnt > 1);
246118371Sache			/* skip upper set */
247118371Sache			do {
248118371Sache				if (!next(&s2))
249118371Sache					break;
250118371Sache			} while (s2.state == SET_UPPER && s2.cnt > 1);
251118371Sache			goto again;
252118371Sache		} else if (s1.state == SET_UPPER &&
253118371Sache			   s2.state == SET_LOWER &&
254118371Sache			   s1.cnt == 1 && s2.cnt == 1) {
255118371Sache			do {
256118371Sache				string1[s1.lastch] = ch = tolower(s1.lastch);
257118371Sache				if (sflag && islower(ch))
258118371Sache					string2[ch] = 1;
259118371Sache				if (!next(&s1))
260118371Sache					goto endloop;
261118371Sache			} while (s1.state == SET_UPPER && s1.cnt > 1);
262118371Sache			/* skip lower set */
263118371Sache			do {
264118371Sache				if (!next(&s2))
265118371Sache					break;
266118371Sache			} while (s2.state == SET_LOWER && s2.cnt > 1);
267118371Sache			goto again;
268118371Sache		} else {
269118371Sache			string1[s1.lastch] = s2.lastch;
270118371Sache			if (sflag)
271118371Sache				string2[s2.lastch] = 1;
2721590Srgrimes		}
273118371Sache		(void)next(&s2);
274118371Sache	}
275118371Sacheendloop:
276100874Stjr	if (cflag || Cflag) {
277118399Sache		for (p = carray, cnt = 0; cnt < NCHARS; cnt++) {
278118399Sache			if (string1[cnt] == OOBCH && (!Cflag || ISCHAR(cnt)))
279118399Sache				*p++ = cnt;
280118399Sache			else
281118399Sache				string1[cnt] = cnt;
282118399Sache		}
283118399Sache		n = p - carray;
284118399Sache		if (Cflag && n > 1)
285118399Sache			(void)mergesort(carray, n, sizeof(*carray), charcoll);
286118399Sache
287100874Stjr		s2.str = argv[1];
288100874Stjr		s2.state = NORMAL;
289118399Sache		for (cnt = 0; cnt < n; cnt++) {
290118399Sache			(void)next(&s2);
291118399Sache			string1[carray[cnt]] = s2.lastch;
292100874Stjr		}
293100874Stjr	}
2941590Srgrimes
2951590Srgrimes	if (sflag)
2961590Srgrimes		for (lastch = OOBCH; (ch = getchar()) != EOF;) {
2971590Srgrimes			ch = string1[ch];
2981590Srgrimes			if (!string2[ch] || lastch != ch) {
2991590Srgrimes				lastch = ch;
3001590Srgrimes				(void)putchar(ch);
3011590Srgrimes			}
3021590Srgrimes		}
3031590Srgrimes	else
3041590Srgrimes		while ((ch = getchar()) != EOF)
3051590Srgrimes			(void)putchar(string1[ch]);
3061590Srgrimes	exit (0);
3071590Srgrimes}
3081590Srgrimes
3091590Srgrimesstatic void
310102944Sdwmalonesetup(int *string, char *arg, STR *str, int cflag, int Cflag)
3111590Srgrimes{
31287705Smarkm	int cnt, *p;
3131590Srgrimes
3141590Srgrimes	str->str = arg;
3151590Srgrimes	bzero(string, NCHARS * sizeof(int));
3161590Srgrimes	while (next(str))
3171590Srgrimes		string[str->lastch] = 1;
3181590Srgrimes	if (cflag)
3191590Srgrimes		for (p = string, cnt = NCHARS; cnt--; ++p)
3201590Srgrimes			*p = !*p;
32198214Stjr	else if (Cflag)
32298214Stjr		for (cnt = 0; cnt < NCHARS; cnt++)
32398214Stjr			string[cnt] = !string[cnt] && ISCHAR(cnt);
3241590Srgrimes}
3251590Srgrimes
326118371Sacheint
327100891Stjrcharcoll(const void *a, const void *b)
328100891Stjr{
329118371Sache	static char sa[2], sb[2];
330100891Stjr
331100891Stjr	sa[0] = *(const int *)a;
332100891Stjr	sb[0] = *(const int *)b;
333118373Sache	return (strcoll(sa, sb));
334100891Stjr}
335100891Stjr
3361590Srgrimesstatic void
337102944Sdwmaloneusage(void)
3381590Srgrimes{
33928368Scharnier	(void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
34098214Stjr		"usage: tr [-Ccsu] string1 string2",
34198214Stjr		"       tr [-Ccu] -d string1",
34298214Stjr		"       tr [-Ccu] -s string1",
34398214Stjr		"       tr [-Ccu] -ds string1 string2");
3441590Srgrimes	exit(1);
3451590Srgrimes}
346