1247128Sbrooks/* $NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $ */ 2247128Sbrooks 3247128Sbrooks/*- 4247128Sbrooks * Copyright (c) 1989, 1993 5247128Sbrooks * The Regents of the University of California. All rights reserved. 6247128Sbrooks * 7247128Sbrooks * Redistribution and use in source and binary forms, with or without 8247128Sbrooks * modification, are permitted provided that the following conditions 9247128Sbrooks * are met: 10247128Sbrooks * 1. Redistributions of source code must retain the above copyright 11247128Sbrooks * notice, this list of conditions and the following disclaimer. 12247128Sbrooks * 2. Redistributions in binary form must reproduce the above copyright 13247128Sbrooks * notice, this list of conditions and the following disclaimer in the 14247128Sbrooks * documentation and/or other materials provided with the distribution. 15247128Sbrooks * 3. Neither the name of the University nor the names of its contributors 16247128Sbrooks * may be used to endorse or promote products derived from this software 17247128Sbrooks * without specific prior written permission. 18247128Sbrooks * 19247128Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20247128Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21247128Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22247128Sbrooks * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23247128Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24247128Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25247128Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26247128Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27247128Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28247128Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29247128Sbrooks * SUCH DAMAGE. 30247128Sbrooks */ 31247128Sbrooks 32247128Sbrooks#include <sys/cdefs.h> 33247128Sbrooks#ifndef lint 34247128Sbrooks__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 35247128Sbrooks The Regents of the University of California. All rights reserved."); 36247128Sbrooks#endif /* not lint */ 37247128Sbrooks 38247128Sbrooks#ifndef lint 39247128Sbrooks#if 0 40247128Sbrooksstatic char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; 41247128Sbrooks#endif 42247128Sbrooks__RCSID("$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $"); 43247128Sbrooks#endif /* not lint */ 44247128Sbrooks 45247128Sbrooks#include <stdio.h> 46247128Sbrooks#include <string.h> 47247128Sbrooks#include <stdlib.h> 48247128Sbrooks#include <string.h> 49247128Sbrooks#include <errno.h> 50247128Sbrooks#include <wchar.h> 51247128Sbrooks#include <limits.h> 52247128Sbrooks#include <unistd.h> 53247128Sbrooks#include <err.h> 54247128Sbrooks#include <vis.h> 55247128Sbrooks 56247128Sbrooks#include "extern.h" 57247128Sbrooks 58247128Sbrooksstatic int eflags, fold, foldwidth = 80, none, markeol; 59247128Sbrooks#ifdef DEBUG 60247128Sbrooksint debug; 61247128Sbrooks#endif 62247128Sbrooksstatic const char *extra = ""; 63247128Sbrooks 64247128Sbrooksstatic void process(FILE *); 65247128Sbrooks 66247128Sbrooksint 67247128Sbrooksmain(int argc, char *argv[]) 68247128Sbrooks{ 69247128Sbrooks FILE *fp; 70247128Sbrooks int ch; 71247128Sbrooks int rval; 72247128Sbrooks 73247128Sbrooks while ((ch = getopt(argc, argv, "bcde:F:fhlmnostw")) != -1) 74247128Sbrooks switch((char)ch) { 75247128Sbrooks case 'b': 76247128Sbrooks eflags |= VIS_NOSLASH; 77247128Sbrooks break; 78247128Sbrooks case 'c': 79247128Sbrooks eflags |= VIS_CSTYLE; 80247128Sbrooks break; 81247128Sbrooks#ifdef DEBUG 82247128Sbrooks case 'd': 83247128Sbrooks debug++; 84247128Sbrooks break; 85247128Sbrooks#endif 86247128Sbrooks case 'e': 87247128Sbrooks extra = optarg; 88247128Sbrooks break; 89247128Sbrooks case 'F': 90247128Sbrooks if ((foldwidth = atoi(optarg)) < 5) { 91247128Sbrooks errx(1, "can't fold lines to less than 5 cols"); 92247128Sbrooks /* NOTREACHED */ 93247128Sbrooks } 94247128Sbrooks markeol++; 95247128Sbrooks break; 96247128Sbrooks case 'f': 97247128Sbrooks fold++; /* fold output lines to 80 cols */ 98247128Sbrooks break; /* using hidden newline */ 99247128Sbrooks case 'h': 100247128Sbrooks eflags |= VIS_HTTPSTYLE; 101247128Sbrooks break; 102247128Sbrooks case 'l': 103247128Sbrooks markeol++; /* mark end of line with \$ */ 104247128Sbrooks break; 105247128Sbrooks case 'm': 106247128Sbrooks eflags |= VIS_MIMESTYLE; 107247128Sbrooks if (foldwidth == 80) 108247128Sbrooks foldwidth = 76; 109247128Sbrooks break; 110247128Sbrooks case 'n': 111247128Sbrooks none++; 112247128Sbrooks break; 113247128Sbrooks case 'o': 114247128Sbrooks eflags |= VIS_OCTAL; 115247128Sbrooks break; 116247128Sbrooks case 's': 117247128Sbrooks eflags |= VIS_SAFE; 118247128Sbrooks break; 119247128Sbrooks case 't': 120247128Sbrooks eflags |= VIS_TAB; 121247128Sbrooks break; 122247128Sbrooks case 'w': 123247128Sbrooks eflags |= VIS_WHITE; 124247128Sbrooks break; 125247128Sbrooks case '?': 126247128Sbrooks default: 127247128Sbrooks (void)fprintf(stderr, 128247128Sbrooks "Usage: %s [-bcfhlmnostw] [-e extra]" 129247128Sbrooks " [-F foldwidth] [file ...]\n", getprogname()); 130247128Sbrooks return 1; 131247128Sbrooks } 132247128Sbrooks 133247128Sbrooks if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) == 134247128Sbrooks (VIS_HTTPSTYLE|VIS_MIMESTYLE)) 135247128Sbrooks errx(1, "Can't specify -m and -h at the same time"); 136247128Sbrooks 137247128Sbrooks argc -= optind; 138247128Sbrooks argv += optind; 139247128Sbrooks 140247128Sbrooks rval = 0; 141247128Sbrooks 142247128Sbrooks if (*argv) 143247128Sbrooks while (*argv) { 144247128Sbrooks if ((fp = fopen(*argv, "r")) != NULL) { 145247128Sbrooks process(fp); 146247128Sbrooks (void)fclose(fp); 147247128Sbrooks } else { 148247128Sbrooks warn("%s", *argv); 149247128Sbrooks rval = 1; 150247128Sbrooks } 151247128Sbrooks argv++; 152247128Sbrooks } 153247128Sbrooks else 154247128Sbrooks process(stdin); 155247128Sbrooks return rval; 156247128Sbrooks} 157247128Sbrooks 158247128Sbrooksstatic void 159247128Sbrooksprocess(FILE *fp) 160247128Sbrooks{ 161247128Sbrooks static int col = 0; 162247128Sbrooks static char nul[] = "\0"; 163247128Sbrooks char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ 164247128Sbrooks wint_t c, c1, rachar; 165247128Sbrooks char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */ 166247128Sbrooks char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */ 167247128Sbrooks int mbilen, cerr = 0, raerr = 0; 168247128Sbrooks 169247128Sbrooks /* 170247128Sbrooks * The input stream is considered to be multibyte characters. 171247128Sbrooks * The input loop will read this data inputing one character, 172247128Sbrooks * possibly multiple bytes, at a time and converting each to 173247128Sbrooks * a wide character wchar_t. 174247128Sbrooks * 175247128Sbrooks * The vis(3) functions, however, require single either bytes 176247128Sbrooks * or a multibyte string as their arguments. So we convert 177247128Sbrooks * our input wchar_t and the following look-ahead wchar_t to 178247128Sbrooks * a multibyte string for processing by vis(3). 179247128Sbrooks */ 180247128Sbrooks 181247128Sbrooks /* Read one multibyte character, store as wchar_t */ 182247128Sbrooks c = getwc(fp); 183247128Sbrooks if (c == WEOF && errno == EILSEQ) { 184247128Sbrooks /* Error in multibyte data. Read one byte. */ 185247128Sbrooks c = (wint_t)getc(fp); 186247128Sbrooks cerr = 1; 187247128Sbrooks } 188247128Sbrooks while (c != WEOF) { 189247128Sbrooks /* Clear multibyte input buffer. */ 190247128Sbrooks memset(mbibuff, 0, sizeof(mbibuff)); 191247128Sbrooks /* Read-ahead next multibyte character. */ 192247128Sbrooks if (!cerr) 193247128Sbrooks rachar = getwc(fp); 194247128Sbrooks if (cerr || (rachar == WEOF && errno == EILSEQ)) { 195247128Sbrooks /* Error in multibyte data. Read one byte. */ 196247128Sbrooks rachar = (wint_t)getc(fp); 197247128Sbrooks raerr = 1; 198247128Sbrooks } 199247128Sbrooks if (none) { 200247128Sbrooks /* Handle -n flag. */ 201247128Sbrooks cp = buff; 202247128Sbrooks *cp++ = c; 203247128Sbrooks if (c == '\\') 204247128Sbrooks *cp++ = '\\'; 205247128Sbrooks *cp = '\0'; 206247128Sbrooks } else if (markeol && c == '\n') { 207247128Sbrooks /* Handle -l flag. */ 208247128Sbrooks cp = buff; 209247128Sbrooks if ((eflags & VIS_NOSLASH) == 0) 210247128Sbrooks *cp++ = '\\'; 211247128Sbrooks *cp++ = '$'; 212247128Sbrooks *cp++ = '\n'; 213247128Sbrooks *cp = '\0'; 214247128Sbrooks } else { 215247128Sbrooks /* 216247128Sbrooks * Convert character using vis(3) library. 217247128Sbrooks * At this point we will process one character. 218247128Sbrooks * But we must pass the vis(3) library this 219247128Sbrooks * character plus the next one because the next 220247128Sbrooks * one is used as a look-ahead to decide how to 221247128Sbrooks * encode this one under certain circumstances. 222247128Sbrooks * 223247128Sbrooks * Since our characters may be multibyte, e.g., 224247128Sbrooks * in the UTF-8 locale, we cannot use vis() and 225247128Sbrooks * svis() which require byte input, so we must 226247128Sbrooks * create a multibyte string and use strvisx(). 227247128Sbrooks */ 228247128Sbrooks /* Treat EOF as a NUL char. */ 229247128Sbrooks c1 = rachar; 230247128Sbrooks if (c1 == WEOF) 231247128Sbrooks c1 = L'\0'; 232247128Sbrooks /* 233247128Sbrooks * If we hit a multibyte conversion error above, 234247128Sbrooks * insert byte directly into string buff because 235247128Sbrooks * wctomb() will fail. Else convert wchar_t to 236247128Sbrooks * multibyte using wctomb(). 237247128Sbrooks */ 238247128Sbrooks if (cerr) { 239247128Sbrooks *mbibuff = (char)c; 240247128Sbrooks mbilen = 1; 241247128Sbrooks } else 242247128Sbrooks mbilen = wctomb(mbibuff, c); 243247128Sbrooks /* Same for look-ahead character. */ 244247128Sbrooks if (raerr) 245247128Sbrooks mbibuff[mbilen] = (char)c1; 246247128Sbrooks else 247247128Sbrooks wctomb(mbibuff + mbilen, c1); 248247128Sbrooks /* Perform encoding on just first character. */ 249247128Sbrooks (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff, 250247128Sbrooks 1, eflags, extra, &cerr); 251247128Sbrooks } 252247128Sbrooks 253247128Sbrooks cp = buff; 254247128Sbrooks if (fold) { 255247128Sbrooks#ifdef DEBUG 256247128Sbrooks if (debug) 257247128Sbrooks (void)printf("<%02d,", col); 258247128Sbrooks#endif 259247128Sbrooks col = foldit(cp, col, foldwidth, eflags); 260247128Sbrooks#ifdef DEBUG 261247128Sbrooks if (debug) 262247128Sbrooks (void)printf("%02d>", col); 263247128Sbrooks#endif 264247128Sbrooks } 265247128Sbrooks do { 266247128Sbrooks (void)putchar(*cp); 267247128Sbrooks } while (*++cp); 268247128Sbrooks c = rachar; 269247128Sbrooks cerr = raerr; 270247128Sbrooks } 271247128Sbrooks /* 272247128Sbrooks * terminate partial line with a hidden newline 273247128Sbrooks */ 274247128Sbrooks if (fold && *(cp - 1) != '\n') 275247128Sbrooks (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n"); 276247128Sbrooks} 277