tr.c revision 330897
1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33 34__FBSDID("$FreeBSD: stable/11/usr.bin/tr/tr.c 330897 2018-03-14 03:19:51Z eadler $"); 35 36#ifndef lint 37static const char copyright[] = 38"@(#) Copyright (c) 1988, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"; 40#endif 41 42#ifndef lint 43static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 44#endif 45 46#include <sys/types.h> 47 48#include <ctype.h> 49#include <err.h> 50#include <limits.h> 51#include <locale.h> 52#include <stdint.h> 53#include <stdio.h> 54#include <stdlib.h> 55#include <string.h> 56#include <unistd.h> 57#include <wchar.h> 58#include <wctype.h> 59 60#include "cmap.h" 61#include "cset.h" 62#include "extern.h" 63 64static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 65static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 66 67static struct cset *setup(char *, STR *, int, int); 68static void usage(void); 69 70int 71main(int argc, char **argv) 72{ 73 static int carray[NCHARS_SB]; 74 struct cmap *map; 75 struct cset *delete, *squeeze; 76 int n, *p; 77 int Cflag, cflag, dflag, sflag, isstring2; 78 wint_t ch, cnt, lastch; 79 80 (void)setlocale(LC_ALL, ""); 81 82 Cflag = cflag = dflag = sflag = 0; 83 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 84 switch((char)ch) { 85 case 'C': 86 Cflag = 1; 87 cflag = 0; 88 break; 89 case 'c': 90 cflag = 1; 91 Cflag = 0; 92 break; 93 case 'd': 94 dflag = 1; 95 break; 96 case 's': 97 sflag = 1; 98 break; 99 case 'u': 100 setbuf(stdout, (char *)NULL); 101 break; 102 case '?': 103 default: 104 usage(); 105 } 106 argc -= optind; 107 argv += optind; 108 109 switch(argc) { 110 case 0: 111 default: 112 usage(); 113 /* NOTREACHED */ 114 case 1: 115 isstring2 = 0; 116 break; 117 case 2: 118 isstring2 = 1; 119 break; 120 } 121 122 /* 123 * tr -ds [-Cc] string1 string2 124 * Delete all characters (or complemented characters) in string1. 125 * Squeeze all characters in string2. 126 */ 127 if (dflag && sflag) { 128 if (!isstring2) 129 usage(); 130 131 delete = setup(argv[0], &s1, cflag, Cflag); 132 squeeze = setup(argv[1], &s2, 0, 0); 133 134 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 135 if (!cset_in(delete, ch) && 136 (lastch != ch || !cset_in(squeeze, ch))) { 137 lastch = ch; 138 (void)putwchar(ch); 139 } 140 if (ferror(stdin)) 141 err(1, NULL); 142 exit(0); 143 } 144 145 /* 146 * tr -d [-Cc] string1 147 * Delete all characters (or complemented characters) in string1. 148 */ 149 if (dflag) { 150 if (isstring2) 151 usage(); 152 153 delete = setup(argv[0], &s1, cflag, Cflag); 154 155 while ((ch = getwchar()) != WEOF) 156 if (!cset_in(delete, ch)) 157 (void)putwchar(ch); 158 if (ferror(stdin)) 159 err(1, NULL); 160 exit(0); 161 } 162 163 /* 164 * tr -s [-Cc] string1 165 * Squeeze all characters (or complemented characters) in string1. 166 */ 167 if (sflag && !isstring2) { 168 squeeze = setup(argv[0], &s1, cflag, Cflag); 169 170 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 171 if (lastch != ch || !cset_in(squeeze, ch)) { 172 lastch = ch; 173 (void)putwchar(ch); 174 } 175 if (ferror(stdin)) 176 err(1, NULL); 177 exit(0); 178 } 179 180 /* 181 * tr [-Ccs] string1 string2 182 * Replace all characters (or complemented characters) in string1 with 183 * the character in the same position in string2. If the -s option is 184 * specified, squeeze all the characters in string2. 185 */ 186 if (!isstring2) 187 usage(); 188 189 map = cmap_alloc(); 190 if (map == NULL) 191 err(1, NULL); 192 squeeze = cset_alloc(); 193 if (squeeze == NULL) 194 err(1, NULL); 195 196 s1.str = argv[0]; 197 198 if (Cflag || cflag) { 199 cmap_default(map, OOBCH); 200 if ((s2.str = strdup(argv[1])) == NULL) 201 errx(1, "strdup(argv[1])"); 202 } else 203 s2.str = argv[1]; 204 205 if (!next(&s2)) 206 errx(1, "empty string2"); 207 208 /* 209 * For -s result will contain only those characters defined 210 * as the second characters in each of the toupper or tolower 211 * pairs. 212 */ 213 214 /* If string2 runs out of characters, use the last one specified. */ 215 while (next(&s1)) { 216 again: 217 if (s1.state == CCLASS_LOWER && 218 s2.state == CCLASS_UPPER && 219 s1.cnt == 1 && s2.cnt == 1) { 220 do { 221 ch = towupper(s1.lastch); 222 cmap_add(map, s1.lastch, ch); 223 if (sflag && iswupper(ch)) 224 cset_add(squeeze, ch); 225 if (!next(&s1)) 226 goto endloop; 227 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 228 /* skip upper set */ 229 do { 230 if (!next(&s2)) 231 break; 232 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 233 goto again; 234 } else if (s1.state == CCLASS_UPPER && 235 s2.state == CCLASS_LOWER && 236 s1.cnt == 1 && s2.cnt == 1) { 237 do { 238 ch = towlower(s1.lastch); 239 cmap_add(map, s1.lastch, ch); 240 if (sflag && iswlower(ch)) 241 cset_add(squeeze, ch); 242 if (!next(&s1)) 243 goto endloop; 244 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 245 /* skip lower set */ 246 do { 247 if (!next(&s2)) 248 break; 249 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 250 goto again; 251 } else { 252 cmap_add(map, s1.lastch, s2.lastch); 253 if (sflag) 254 cset_add(squeeze, s2.lastch); 255 } 256 (void)next(&s2); 257 } 258endloop: 259 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 260 /* 261 * This is somewhat tricky: since the character set is 262 * potentially huge, we need to avoid allocating a map 263 * entry for every character. Our strategy is to set the 264 * default mapping to the last character of string #2 265 * (= the one that gets automatically repeated), then to 266 * add back identity mappings for characters that should 267 * remain unchanged. We don't waste space on identity mappings 268 * for non-characters with the -C option; those are simulated 269 * in the I/O loop. 270 */ 271 s2.str = argv[1]; 272 s2.state = NORMAL; 273 for (cnt = 0; cnt < WINT_MAX; cnt++) { 274 if (Cflag && !iswrune(cnt)) 275 continue; 276 if (cmap_lookup(map, cnt) == OOBCH) { 277 if (next(&s2)) { 278 cmap_add(map, cnt, s2.lastch); 279 if (sflag) 280 cset_add(squeeze, s2.lastch); 281 } 282 } else 283 cmap_add(map, cnt, cnt); 284 if ((s2.state == EOS || s2.state == INFINITE) && 285 cnt >= cmap_max(map)) 286 break; 287 } 288 cmap_default(map, s2.lastch); 289 } else if (Cflag) { 290 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 291 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 292 *p++ = cnt; 293 else 294 cmap_add(map, cnt, cnt); 295 } 296 n = p - carray; 297 if (Cflag && n > 1) 298 (void)mergesort(carray, n, sizeof(*carray), charcoll); 299 300 s2.str = argv[1]; 301 s2.state = NORMAL; 302 for (cnt = 0; cnt < n; cnt++) { 303 (void)next(&s2); 304 cmap_add(map, carray[cnt], s2.lastch); 305 /* 306 * Chars taken from s2 can be different this time 307 * due to lack of complex upper/lower processing, 308 * so fill string2 again to not miss some. 309 */ 310 if (sflag) 311 cset_add(squeeze, s2.lastch); 312 } 313 } 314 315 cset_cache(squeeze); 316 cmap_cache(map); 317 318 if (sflag) 319 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 320 if (!Cflag || iswrune(ch)) 321 ch = cmap_lookup(map, ch); 322 if (lastch != ch || !cset_in(squeeze, ch)) { 323 lastch = ch; 324 (void)putwchar(ch); 325 } 326 } 327 else 328 while ((ch = getwchar()) != WEOF) { 329 if (!Cflag || iswrune(ch)) 330 ch = cmap_lookup(map, ch); 331 (void)putwchar(ch); 332 } 333 if (ferror(stdin)) 334 err(1, NULL); 335 exit (0); 336} 337 338static struct cset * 339setup(char *arg, STR *str, int cflag, int Cflag) 340{ 341 struct cset *cs; 342 343 cs = cset_alloc(); 344 if (cs == NULL) 345 err(1, NULL); 346 str->str = arg; 347 while (next(str)) 348 cset_add(cs, str->lastch); 349 if (Cflag) 350 cset_addclass(cs, wctype("rune"), true); 351 if (cflag || Cflag) 352 cset_invert(cs); 353 cset_cache(cs); 354 return (cs); 355} 356 357int 358charcoll(const void *a, const void *b) 359{ 360 static char sa[2], sb[2]; 361 362 sa[0] = *(const int *)a; 363 sb[0] = *(const int *)b; 364 return (strcoll(sa, sb)); 365} 366 367static void 368usage(void) 369{ 370 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 371 "usage: tr [-Ccsu] string1 string2", 372 " tr [-Ccu] -d string1", 373 " tr [-Ccu] -s string1", 374 " tr [-Ccu] -ds string1 string2"); 375 exit(1); 376} 377