11590Srgrimes/* 21590Srgrimes * Copyright (c) 1988, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 4. Neither the name of the University nor the names of its contributors 141590Srgrimes * may be used to endorse or promote products derived from this software 151590Srgrimes * without specific prior written permission. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 3087705Smarkm#include <sys/cdefs.h> 3187705Smarkm 3287705Smarkm__FBSDID("$FreeBSD$"); 3387705Smarkm 341590Srgrimes#ifndef lint 3528368Scharnierstatic const char copyright[] = 361590Srgrimes"@(#) Copyright (c) 1988, 1993\n\ 371590Srgrimes The Regents of the University of California. All rights reserved.\n"; 3887705Smarkm#endif 391590Srgrimes 401590Srgrimes#ifndef lint 4187705Smarkmstatic const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 4228368Scharnier#endif 431590Srgrimes 441590Srgrimes#include <sys/types.h> 4523693Speter 46200462Sdelphij#include <ctype.h> 4728368Scharnier#include <err.h> 48200462Sdelphij#include <limits.h> 4987705Smarkm#include <locale.h> 50245767Sandrew#include <stdint.h> 511590Srgrimes#include <stdio.h> 521590Srgrimes#include <stdlib.h> 531590Srgrimes#include <string.h> 5423693Speter#include <unistd.h> 55200462Sdelphij#include <wchar.h> 56200462Sdelphij#include <wctype.h> 5723693Speter 58131846Stjr#include "cmap.h" 59131846Stjr#include "cset.h" 601590Srgrimes#include "extern.h" 611590Srgrimes 62227188Sedstatic STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 63227188Sedstatic STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 6498214Stjr 65131846Stjrstatic struct cset *setup(char *, STR *, int, int); 6692922Simpstatic void usage(void); 671590Srgrimes 681590Srgrimesint 69102944Sdwmalonemain(int argc, char **argv) 701590Srgrimes{ 71131846Stjr static int carray[NCHARS_SB]; 72131846Stjr struct cmap *map; 73131846Stjr struct cset *delete, *squeeze; 74131846Stjr int n, *p; 7598214Stjr int Cflag, cflag, dflag, sflag, isstring2; 76144840Sstefanf wint_t ch, cnt, lastch; 771590Srgrimes 7898210Stjr (void)setlocale(LC_ALL, ""); 7911895Sache 8098214Stjr Cflag = cflag = dflag = sflag = 0; 8198214Stjr while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 821590Srgrimes switch((char)ch) { 8398214Stjr case 'C': 8498214Stjr Cflag = 1; 8598214Stjr cflag = 0; 8698214Stjr break; 871590Srgrimes case 'c': 881590Srgrimes cflag = 1; 8998214Stjr Cflag = 0; 901590Srgrimes break; 911590Srgrimes case 'd': 921590Srgrimes dflag = 1; 931590Srgrimes break; 941590Srgrimes case 's': 951590Srgrimes sflag = 1; 961590Srgrimes break; 9730322Shelbig case 'u': 9830322Shelbig setbuf(stdout, (char *)NULL); 9930322Shelbig break; 1001590Srgrimes case '?': 1011590Srgrimes default: 1021590Srgrimes usage(); 1031590Srgrimes } 1041590Srgrimes argc -= optind; 1051590Srgrimes argv += optind; 1061590Srgrimes 1071590Srgrimes switch(argc) { 1081590Srgrimes case 0: 1091590Srgrimes default: 1101590Srgrimes usage(); 1111590Srgrimes /* NOTREACHED */ 1121590Srgrimes case 1: 1131590Srgrimes isstring2 = 0; 1141590Srgrimes break; 1151590Srgrimes case 2: 1161590Srgrimes isstring2 = 1; 1171590Srgrimes break; 1181590Srgrimes } 1191590Srgrimes 1201590Srgrimes /* 12198214Stjr * tr -ds [-Cc] string1 string2 1221590Srgrimes * Delete all characters (or complemented characters) in string1. 1231590Srgrimes * Squeeze all characters in string2. 1241590Srgrimes */ 1251590Srgrimes if (dflag && sflag) { 1261590Srgrimes if (!isstring2) 1271590Srgrimes usage(); 1281590Srgrimes 129131846Stjr delete = setup(argv[0], &s1, cflag, Cflag); 130131846Stjr squeeze = setup(argv[1], &s2, 0, 0); 1318874Srgrimes 132131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 133131846Stjr if (!cset_in(delete, ch) && 134131846Stjr (lastch != ch || !cset_in(squeeze, ch))) { 1351590Srgrimes lastch = ch; 136131846Stjr (void)putwchar(ch); 1371590Srgrimes } 138131855Stjr if (ferror(stdin)) 139131855Stjr err(1, NULL); 1401590Srgrimes exit(0); 1411590Srgrimes } 1421590Srgrimes 1431590Srgrimes /* 14498214Stjr * tr -d [-Cc] string1 1451590Srgrimes * Delete all characters (or complemented characters) in string1. 1461590Srgrimes */ 1471590Srgrimes if (dflag) { 1481590Srgrimes if (isstring2) 1491590Srgrimes usage(); 1501590Srgrimes 151131846Stjr delete = setup(argv[0], &s1, cflag, Cflag); 1521590Srgrimes 153131846Stjr while ((ch = getwchar()) != WEOF) 154131846Stjr if (!cset_in(delete, ch)) 155131846Stjr (void)putwchar(ch); 156131855Stjr if (ferror(stdin)) 157131855Stjr err(1, NULL); 1581590Srgrimes exit(0); 1591590Srgrimes } 1601590Srgrimes 1611590Srgrimes /* 16298214Stjr * tr -s [-Cc] string1 1631590Srgrimes * Squeeze all characters (or complemented characters) in string1. 1641590Srgrimes */ 1651590Srgrimes if (sflag && !isstring2) { 166131846Stjr squeeze = setup(argv[0], &s1, cflag, Cflag); 1671590Srgrimes 168131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 169131846Stjr if (lastch != ch || !cset_in(squeeze, ch)) { 1701590Srgrimes lastch = ch; 171131846Stjr (void)putwchar(ch); 1721590Srgrimes } 173131855Stjr if (ferror(stdin)) 174131855Stjr err(1, NULL); 1751590Srgrimes exit(0); 1761590Srgrimes } 1771590Srgrimes 1781590Srgrimes /* 17998214Stjr * tr [-Ccs] string1 string2 1801590Srgrimes * Replace all characters (or complemented characters) in string1 with 1811590Srgrimes * the character in the same position in string2. If the -s option is 1821590Srgrimes * specified, squeeze all the characters in string2. 1831590Srgrimes */ 1841590Srgrimes if (!isstring2) 1851590Srgrimes usage(); 1861590Srgrimes 187131846Stjr map = cmap_alloc(); 188131846Stjr if (map == NULL) 189131846Stjr err(1, NULL); 190131846Stjr squeeze = cset_alloc(); 191131846Stjr if (squeeze == NULL) 192131846Stjr err(1, NULL); 193131846Stjr 1941590Srgrimes s1.str = argv[0]; 195131846Stjr 196131846Stjr if (Cflag || cflag) { 197131846Stjr cmap_default(map, OOBCH); 198118400Sache if ((s2.str = strdup(argv[1])) == NULL) 199118400Sache errx(1, "strdup(argv[1])"); 200118400Sache } else 201118400Sache s2.str = argv[1]; 2021590Srgrimes 2031590Srgrimes if (!next(&s2)) 20428368Scharnier errx(1, "empty string2"); 2051590Srgrimes 206118371Sache /* 207118371Sache * For -s result will contain only those characters defined 208118371Sache * as the second characters in each of the toupper or tolower 209118371Sache * pairs. 210118371Sache */ 211118371Sache 2121590Srgrimes /* If string2 runs out of characters, use the last one specified. */ 213118371Sache while (next(&s1)) { 214118371Sache again: 215131846Stjr if (s1.state == CCLASS_LOWER && 216131846Stjr s2.state == CCLASS_UPPER && 217118371Sache s1.cnt == 1 && s2.cnt == 1) { 218118371Sache do { 219131846Stjr ch = towupper(s1.lastch); 220131846Stjr cmap_add(map, s1.lastch, ch); 221131846Stjr if (sflag && iswupper(ch)) 222131846Stjr cset_add(squeeze, ch); 223118371Sache if (!next(&s1)) 224118371Sache goto endloop; 225131846Stjr } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 226118371Sache /* skip upper set */ 227118371Sache do { 228118371Sache if (!next(&s2)) 229118371Sache break; 230131846Stjr } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 231118371Sache goto again; 232131846Stjr } else if (s1.state == CCLASS_UPPER && 233131846Stjr s2.state == CCLASS_LOWER && 234118371Sache s1.cnt == 1 && s2.cnt == 1) { 235118371Sache do { 236131846Stjr ch = towlower(s1.lastch); 237131846Stjr cmap_add(map, s1.lastch, ch); 238131846Stjr if (sflag && iswlower(ch)) 239131846Stjr cset_add(squeeze, ch); 240118371Sache if (!next(&s1)) 241118371Sache goto endloop; 242131846Stjr } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 243118371Sache /* skip lower set */ 244118371Sache do { 245118371Sache if (!next(&s2)) 246118371Sache break; 247131846Stjr } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 248118371Sache goto again; 249118371Sache } else { 250131846Stjr cmap_add(map, s1.lastch, s2.lastch); 251118371Sache if (sflag) 252131846Stjr cset_add(squeeze, s2.lastch); 2531590Srgrimes } 254118371Sache (void)next(&s2); 255118371Sache } 256118371Sacheendloop: 257131846Stjr if (cflag || (Cflag && MB_CUR_MAX > 1)) { 258131846Stjr /* 259131846Stjr * This is somewhat tricky: since the character set is 260131846Stjr * potentially huge, we need to avoid allocating a map 261131846Stjr * entry for every character. Our strategy is to set the 262131846Stjr * default mapping to the last character of string #2 263131846Stjr * (= the one that gets automatically repeated), then to 264131846Stjr * add back identity mappings for characters that should 265131846Stjr * remain unchanged. We don't waste space on identity mappings 266131846Stjr * for non-characters with the -C option; those are simulated 267131846Stjr * in the I/O loop. 268131846Stjr */ 269131846Stjr s2.str = argv[1]; 270131846Stjr s2.state = NORMAL; 271245767Sandrew for (cnt = 0; cnt < WINT_MAX; cnt++) { 272131846Stjr if (Cflag && !iswrune(cnt)) 273131846Stjr continue; 274131846Stjr if (cmap_lookup(map, cnt) == OOBCH) { 275131846Stjr if (next(&s2)) 276131846Stjr cmap_add(map, cnt, s2.lastch); 277131846Stjr if (sflag) 278131846Stjr cset_add(squeeze, s2.lastch); 279131846Stjr } else 280131846Stjr cmap_add(map, cnt, cnt); 281131846Stjr if ((s2.state == EOS || s2.state == INFINITE) && 282131846Stjr cnt >= cmap_max(map)) 283131846Stjr break; 284131846Stjr } 285131846Stjr cmap_default(map, s2.lastch); 286131846Stjr } else if (Cflag) { 287131846Stjr for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 288131846Stjr if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 289118399Sache *p++ = cnt; 290118399Sache else 291131846Stjr cmap_add(map, cnt, cnt); 292118399Sache } 293118399Sache n = p - carray; 294118399Sache if (Cflag && n > 1) 295118399Sache (void)mergesort(carray, n, sizeof(*carray), charcoll); 296118399Sache 297100874Stjr s2.str = argv[1]; 298100874Stjr s2.state = NORMAL; 299118399Sache for (cnt = 0; cnt < n; cnt++) { 300118399Sache (void)next(&s2); 301131846Stjr cmap_add(map, carray[cnt], s2.lastch); 302118409Sache /* 303118409Sache * Chars taken from s2 can be different this time 304118409Sache * due to lack of complex upper/lower processing, 305118409Sache * so fill string2 again to not miss some. 306118409Sache */ 307118409Sache if (sflag) 308131846Stjr cset_add(squeeze, s2.lastch); 309100874Stjr } 310100874Stjr } 3111590Srgrimes 312131846Stjr cset_cache(squeeze); 313131846Stjr cmap_cache(map); 314131846Stjr 3151590Srgrimes if (sflag) 316131846Stjr for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 317131846Stjr if (!Cflag || iswrune(ch)) 318131846Stjr ch = cmap_lookup(map, ch); 319131846Stjr if (lastch != ch || !cset_in(squeeze, ch)) { 3201590Srgrimes lastch = ch; 321131846Stjr (void)putwchar(ch); 3221590Srgrimes } 3231590Srgrimes } 3241590Srgrimes else 325131846Stjr while ((ch = getwchar()) != WEOF) { 326131846Stjr if (!Cflag || iswrune(ch)) 327131846Stjr ch = cmap_lookup(map, ch); 328131846Stjr (void)putwchar(ch); 329131846Stjr } 330131855Stjr if (ferror(stdin)) 331131855Stjr err(1, NULL); 3321590Srgrimes exit (0); 3331590Srgrimes} 3341590Srgrimes 335131846Stjrstatic struct cset * 336131846Stjrsetup(char *arg, STR *str, int cflag, int Cflag) 3371590Srgrimes{ 338131846Stjr struct cset *cs; 3391590Srgrimes 340131846Stjr cs = cset_alloc(); 341131846Stjr if (cs == NULL) 342131846Stjr err(1, NULL); 3431590Srgrimes str->str = arg; 3441590Srgrimes while (next(str)) 345131846Stjr cset_add(cs, str->lastch); 346131846Stjr if (Cflag) 347131846Stjr cset_addclass(cs, wctype("rune"), true); 348131846Stjr if (cflag || Cflag) 349131846Stjr cset_invert(cs); 350131846Stjr cset_cache(cs); 351131846Stjr return (cs); 3521590Srgrimes} 3531590Srgrimes 354118371Sacheint 355100891Stjrcharcoll(const void *a, const void *b) 356100891Stjr{ 357118371Sache static char sa[2], sb[2]; 358100891Stjr 359100891Stjr sa[0] = *(const int *)a; 360100891Stjr sb[0] = *(const int *)b; 361118373Sache return (strcoll(sa, sb)); 362100891Stjr} 363100891Stjr 3641590Srgrimesstatic void 365102944Sdwmaloneusage(void) 3661590Srgrimes{ 36728368Scharnier (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 36898214Stjr "usage: tr [-Ccsu] string1 string2", 36998214Stjr " tr [-Ccu] -d string1", 37098214Stjr " tr [-Ccu] -s string1", 37198214Stjr " tr [-Ccu] -ds string1 string2"); 3721590Srgrimes exit(1); 3731590Srgrimes} 374