1247128Sbrooks/*	$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $	*/
2247128Sbrooks
3247128Sbrooks/*-
4247128Sbrooks * Copyright (c) 1989, 1993
5247128Sbrooks *	The Regents of the University of California.  All rights reserved.
6247128Sbrooks *
7247128Sbrooks * Redistribution and use in source and binary forms, with or without
8247128Sbrooks * modification, are permitted provided that the following conditions
9247128Sbrooks * are met:
10247128Sbrooks * 1. Redistributions of source code must retain the above copyright
11247128Sbrooks *    notice, this list of conditions and the following disclaimer.
12247128Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
13247128Sbrooks *    notice, this list of conditions and the following disclaimer in the
14247128Sbrooks *    documentation and/or other materials provided with the distribution.
15247128Sbrooks * 3. Neither the name of the University nor the names of its contributors
16247128Sbrooks *    may be used to endorse or promote products derived from this software
17247128Sbrooks *    without specific prior written permission.
18247128Sbrooks *
19247128Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20247128Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21247128Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22247128Sbrooks * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23247128Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24247128Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25247128Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26247128Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27247128Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28247128Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29247128Sbrooks * SUCH DAMAGE.
30247128Sbrooks */
31247128Sbrooks
32247128Sbrooks#include <sys/cdefs.h>
33247128Sbrooks#ifndef lint
34247128Sbrooks__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
35247128Sbrooks The Regents of the University of California.  All rights reserved.");
36247128Sbrooks#endif /* not lint */
37247128Sbrooks
38247128Sbrooks#ifndef lint
39247128Sbrooks#if 0
40247128Sbrooksstatic char sccsid[] = "@(#)vis.c	8.1 (Berkeley) 6/6/93";
41247128Sbrooks#endif
42247128Sbrooks__RCSID("$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $");
43247128Sbrooks#endif /* not lint */
44247128Sbrooks
45247128Sbrooks#include <stdio.h>
46247128Sbrooks#include <string.h>
47247128Sbrooks#include <stdlib.h>
48247128Sbrooks#include <string.h>
49247128Sbrooks#include <errno.h>
50247128Sbrooks#include <wchar.h>
51247128Sbrooks#include <limits.h>
52247128Sbrooks#include <unistd.h>
53247128Sbrooks#include <err.h>
54247128Sbrooks#include <vis.h>
55247128Sbrooks
56247128Sbrooks#include "extern.h"
57247128Sbrooks
58247128Sbrooksstatic int eflags, fold, foldwidth = 80, none, markeol;
59247128Sbrooks#ifdef DEBUG
60247128Sbrooksint debug;
61247128Sbrooks#endif
62247128Sbrooksstatic const char *extra = "";
63247128Sbrooks
64247128Sbrooksstatic void process(FILE *);
65247128Sbrooks
66247128Sbrooksint
67247128Sbrooksmain(int argc, char *argv[])
68247128Sbrooks{
69247128Sbrooks	FILE *fp;
70247128Sbrooks	int ch;
71247128Sbrooks	int rval;
72247128Sbrooks
73247128Sbrooks	while ((ch = getopt(argc, argv, "bcde:F:fhlmnostw")) != -1)
74247128Sbrooks		switch((char)ch) {
75247128Sbrooks		case 'b':
76247128Sbrooks			eflags |= VIS_NOSLASH;
77247128Sbrooks			break;
78247128Sbrooks		case 'c':
79247128Sbrooks			eflags |= VIS_CSTYLE;
80247128Sbrooks			break;
81247128Sbrooks#ifdef DEBUG
82247128Sbrooks		case 'd':
83247128Sbrooks			debug++;
84247128Sbrooks			break;
85247128Sbrooks#endif
86247128Sbrooks		case 'e':
87247128Sbrooks			extra = optarg;
88247128Sbrooks			break;
89247128Sbrooks		case 'F':
90247128Sbrooks			if ((foldwidth = atoi(optarg)) < 5) {
91247128Sbrooks				errx(1, "can't fold lines to less than 5 cols");
92247128Sbrooks				/* NOTREACHED */
93247128Sbrooks			}
94247128Sbrooks			markeol++;
95247128Sbrooks			break;
96247128Sbrooks		case 'f':
97247128Sbrooks			fold++;		/* fold output lines to 80 cols */
98247128Sbrooks			break;		/* using hidden newline */
99247128Sbrooks		case 'h':
100247128Sbrooks			eflags |= VIS_HTTPSTYLE;
101247128Sbrooks			break;
102247128Sbrooks		case 'l':
103247128Sbrooks			markeol++;	/* mark end of line with \$ */
104247128Sbrooks			break;
105247128Sbrooks		case 'm':
106247128Sbrooks			eflags |= VIS_MIMESTYLE;
107247128Sbrooks			if (foldwidth == 80)
108247128Sbrooks				foldwidth = 76;
109247128Sbrooks			break;
110247128Sbrooks		case 'n':
111247128Sbrooks			none++;
112247128Sbrooks			break;
113247128Sbrooks		case 'o':
114247128Sbrooks			eflags |= VIS_OCTAL;
115247128Sbrooks			break;
116247128Sbrooks		case 's':
117247128Sbrooks			eflags |= VIS_SAFE;
118247128Sbrooks			break;
119247128Sbrooks		case 't':
120247128Sbrooks			eflags |= VIS_TAB;
121247128Sbrooks			break;
122247128Sbrooks		case 'w':
123247128Sbrooks			eflags |= VIS_WHITE;
124247128Sbrooks			break;
125247128Sbrooks		case '?':
126247128Sbrooks		default:
127247128Sbrooks			(void)fprintf(stderr,
128247128Sbrooks			    "Usage: %s [-bcfhlmnostw] [-e extra]"
129247128Sbrooks			    " [-F foldwidth] [file ...]\n", getprogname());
130247128Sbrooks			return 1;
131247128Sbrooks		}
132247128Sbrooks
133247128Sbrooks	if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) ==
134247128Sbrooks	    (VIS_HTTPSTYLE|VIS_MIMESTYLE))
135247128Sbrooks		errx(1, "Can't specify -m and -h at the same time");
136247128Sbrooks
137247128Sbrooks	argc -= optind;
138247128Sbrooks	argv += optind;
139247128Sbrooks
140247128Sbrooks	rval = 0;
141247128Sbrooks
142247128Sbrooks	if (*argv)
143247128Sbrooks		while (*argv) {
144247128Sbrooks			if ((fp = fopen(*argv, "r")) != NULL) {
145247128Sbrooks				process(fp);
146247128Sbrooks				(void)fclose(fp);
147247128Sbrooks			} else {
148247128Sbrooks				warn("%s", *argv);
149247128Sbrooks				rval = 1;
150247128Sbrooks			}
151247128Sbrooks			argv++;
152247128Sbrooks		}
153247128Sbrooks	else
154247128Sbrooks		process(stdin);
155247128Sbrooks	return rval;
156247128Sbrooks}
157247128Sbrooks
158247128Sbrooksstatic void
159247128Sbrooksprocess(FILE *fp)
160247128Sbrooks{
161247128Sbrooks	static int col = 0;
162247128Sbrooks	static char nul[] = "\0";
163247128Sbrooks	char *cp = nul + 1;	/* so *(cp-1) starts out != '\n' */
164247128Sbrooks	wint_t c, c1, rachar;
165247128Sbrooks	char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */
166247128Sbrooks	char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */
167247128Sbrooks	int mbilen, cerr = 0, raerr = 0;
168247128Sbrooks
169247128Sbrooks        /*
170247128Sbrooks         * The input stream is considered to be multibyte characters.
171247128Sbrooks         * The input loop will read this data inputing one character,
172247128Sbrooks	 * possibly multiple bytes, at a time and converting each to
173247128Sbrooks	 * a wide character wchar_t.
174247128Sbrooks         *
175247128Sbrooks	 * The vis(3) functions, however, require single either bytes
176247128Sbrooks	 * or a multibyte string as their arguments.  So we convert
177247128Sbrooks	 * our input wchar_t and the following look-ahead wchar_t to
178247128Sbrooks	 * a multibyte string for processing by vis(3).
179247128Sbrooks         */
180247128Sbrooks
181247128Sbrooks	/* Read one multibyte character, store as wchar_t */
182247128Sbrooks	c = getwc(fp);
183247128Sbrooks	if (c == WEOF && errno == EILSEQ) {
184247128Sbrooks		/* Error in multibyte data.  Read one byte. */
185247128Sbrooks		c = (wint_t)getc(fp);
186247128Sbrooks		cerr = 1;
187247128Sbrooks	}
188247128Sbrooks	while (c != WEOF) {
189247128Sbrooks		/* Clear multibyte input buffer. */
190247128Sbrooks		memset(mbibuff, 0, sizeof(mbibuff));
191247128Sbrooks		/* Read-ahead next multibyte character. */
192247128Sbrooks		if (!cerr)
193247128Sbrooks			rachar = getwc(fp);
194247128Sbrooks		if (cerr || (rachar == WEOF && errno == EILSEQ)) {
195247128Sbrooks			/* Error in multibyte data.  Read one byte. */
196247128Sbrooks			rachar = (wint_t)getc(fp);
197247128Sbrooks			raerr = 1;
198247128Sbrooks		}
199247128Sbrooks		if (none) {
200247128Sbrooks			/* Handle -n flag. */
201247128Sbrooks			cp = buff;
202247128Sbrooks			*cp++ = c;
203247128Sbrooks			if (c == '\\')
204247128Sbrooks				*cp++ = '\\';
205247128Sbrooks			*cp = '\0';
206247128Sbrooks		} else if (markeol && c == '\n') {
207247128Sbrooks			/* Handle -l flag. */
208247128Sbrooks			cp = buff;
209247128Sbrooks			if ((eflags & VIS_NOSLASH) == 0)
210247128Sbrooks				*cp++ = '\\';
211247128Sbrooks			*cp++ = '$';
212247128Sbrooks			*cp++ = '\n';
213247128Sbrooks			*cp = '\0';
214247128Sbrooks		} else {
215247128Sbrooks			/*
216247128Sbrooks			 * Convert character using vis(3) library.
217247128Sbrooks			 * At this point we will process one character.
218247128Sbrooks			 * But we must pass the vis(3) library this
219247128Sbrooks			 * character plus the next one because the next
220247128Sbrooks			 * one is used as a look-ahead to decide how to
221247128Sbrooks			 * encode this one under certain circumstances.
222247128Sbrooks			 *
223247128Sbrooks			 * Since our characters may be multibyte, e.g.,
224247128Sbrooks			 * in the UTF-8 locale, we cannot use vis() and
225247128Sbrooks			 * svis() which require byte input, so we must
226247128Sbrooks			 * create a multibyte string and use strvisx().
227247128Sbrooks			 */
228247128Sbrooks			/* Treat EOF as a NUL char. */
229247128Sbrooks			c1 = rachar;
230247128Sbrooks			if (c1 == WEOF)
231247128Sbrooks				c1 = L'\0';
232247128Sbrooks			/*
233247128Sbrooks			 * If we hit a multibyte conversion error above,
234247128Sbrooks			 * insert byte directly into string buff because
235247128Sbrooks			 * wctomb() will fail.  Else convert wchar_t to
236247128Sbrooks			 * multibyte using wctomb().
237247128Sbrooks			 */
238247128Sbrooks			if (cerr) {
239247128Sbrooks				*mbibuff = (char)c;
240247128Sbrooks				mbilen = 1;
241247128Sbrooks			} else
242247128Sbrooks				mbilen = wctomb(mbibuff, c);
243247128Sbrooks			/* Same for look-ahead character. */
244247128Sbrooks			if (raerr)
245247128Sbrooks				mbibuff[mbilen] = (char)c1;
246247128Sbrooks			else
247247128Sbrooks				wctomb(mbibuff + mbilen, c1);
248247128Sbrooks			/* Perform encoding on just first character. */
249247128Sbrooks			(void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff,
250247128Sbrooks			    1, eflags, extra, &cerr);
251247128Sbrooks		}
252247128Sbrooks
253247128Sbrooks		cp = buff;
254247128Sbrooks		if (fold) {
255247128Sbrooks#ifdef DEBUG
256247128Sbrooks			if (debug)
257247128Sbrooks				(void)printf("<%02d,", col);
258247128Sbrooks#endif
259247128Sbrooks			col = foldit(cp, col, foldwidth, eflags);
260247128Sbrooks#ifdef DEBUG
261247128Sbrooks			if (debug)
262247128Sbrooks				(void)printf("%02d>", col);
263247128Sbrooks#endif
264247128Sbrooks		}
265247128Sbrooks		do {
266247128Sbrooks			(void)putchar(*cp);
267247128Sbrooks		} while (*++cp);
268247128Sbrooks		c = rachar;
269247128Sbrooks		cerr = raerr;
270247128Sbrooks	}
271247128Sbrooks	/*
272247128Sbrooks	 * terminate partial line with a hidden newline
273247128Sbrooks	 */
274247128Sbrooks	if (fold && *(cp - 1) != '\n')
275247128Sbrooks		(void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n");
276247128Sbrooks}
277