1/* $Id: term_ascii.c,v 1.66 2020/09/09 13:45:05 schwarze Exp $ */
2/*
3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#if HAVE_WCHAR
24#include <langinfo.h>
25#include <locale.h>
26#endif
27#include <stdint.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32#if HAVE_WCHAR
33#include <wchar.h>
34#endif
35
36#include "mandoc.h"
37#include "mandoc_aux.h"
38#include "out.h"
39#include "term.h"
40#include "manconf.h"
41#include "main.h"
42
43static	struct termp	 *ascii_init(enum termenc, const struct manoutput *);
44static	int		  ascii_hspan(const struct termp *,
45				const struct roffsu *);
46static	size_t		  ascii_width(const struct termp *, int);
47static	void		  ascii_advance(struct termp *, size_t);
48static	void		  ascii_begin(struct termp *);
49static	void		  ascii_end(struct termp *);
50static	void		  ascii_endline(struct termp *);
51static	void		  ascii_letter(struct termp *, int);
52static	void		  ascii_setwidth(struct termp *, int, int);
53
54#if HAVE_WCHAR
55static	void		  locale_advance(struct termp *, size_t);
56static	void		  locale_endline(struct termp *);
57static	void		  locale_letter(struct termp *, int);
58static	size_t		  locale_width(const struct termp *, int);
59#endif
60
61
62static struct termp *
63ascii_init(enum termenc enc, const struct manoutput *outopts)
64{
65#if HAVE_WCHAR
66	char		*v;
67#endif
68	struct termp	*p;
69
70	p = mandoc_calloc(1, sizeof(*p));
71	p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol));
72	p->maxtcol = 1;
73
74	p->line = 1;
75	p->defrmargin = p->lastrmargin = 78;
76	p->fontq = mandoc_reallocarray(NULL,
77	     (p->fontsz = 8), sizeof(*p->fontq));
78	p->fontq[0] = p->fontl = TERMFONT_NONE;
79
80	p->begin = ascii_begin;
81	p->end = ascii_end;
82	p->hspan = ascii_hspan;
83	p->type = TERMTYPE_CHAR;
84
85	p->enc = TERMENC_ASCII;
86	p->advance = ascii_advance;
87	p->endline = ascii_endline;
88	p->letter = ascii_letter;
89	p->setwidth = ascii_setwidth;
90	p->width = ascii_width;
91
92#if HAVE_WCHAR
93	if (enc != TERMENC_ASCII) {
94
95		/*
96		 * Do not change any of this to LC_ALL.  It might break
97		 * the formatting by subtly changing the behaviour of
98		 * various functions, for example strftime(3).  As a
99		 * worst case, it might even cause buffer overflows.
100		 */
101
102		v = enc == TERMENC_LOCALE ?
103		    setlocale(LC_CTYPE, "") :
104		    setlocale(LC_CTYPE, UTF8_LOCALE);
105
106		/*
107		 * We only support UTF-8,
108		 * so revert to ASCII for anything else.
109		 */
110
111		if (v != NULL &&
112		    strcmp(nl_langinfo(CODESET), "UTF-8") != 0)
113			v = setlocale(LC_CTYPE, "C");
114
115		if (v != NULL && MB_CUR_MAX > 1) {
116			p->enc = TERMENC_UTF8;
117			p->advance = locale_advance;
118			p->endline = locale_endline;
119			p->letter = locale_letter;
120			p->width = locale_width;
121		}
122	}
123#endif
124
125	if (outopts->mdoc) {
126		p->mdocstyle = 1;
127		p->defindent = 5;
128	}
129	if (outopts->indent)
130		p->defindent = outopts->indent;
131	if (outopts->width)
132		p->defrmargin = outopts->width;
133	if (outopts->synopsisonly)
134		p->synopsisonly = 1;
135
136	assert(p->defindent < UINT16_MAX);
137	assert(p->defrmargin < UINT16_MAX);
138	return p;
139}
140
141void *
142ascii_alloc(const struct manoutput *outopts)
143{
144
145	return ascii_init(TERMENC_ASCII, outopts);
146}
147
148void *
149utf8_alloc(const struct manoutput *outopts)
150{
151
152	return ascii_init(TERMENC_UTF8, outopts);
153}
154
155void *
156locale_alloc(const struct manoutput *outopts)
157{
158
159	return ascii_init(TERMENC_LOCALE, outopts);
160}
161
162static void
163ascii_setwidth(struct termp *p, int iop, int width)
164{
165
166	width /= 24;
167	p->tcol->rmargin = p->defrmargin;
168	if (iop > 0)
169		p->defrmargin += width;
170	else if (iop == 0)
171		p->defrmargin = width ? (size_t)width : p->lastrmargin;
172	else if (p->defrmargin > (size_t)width)
173		p->defrmargin -= width;
174	else
175		p->defrmargin = 0;
176	if (p->defrmargin > 1000)
177		p->defrmargin = 1000;
178	p->lastrmargin = p->tcol->rmargin;
179	p->tcol->rmargin = p->maxrmargin = p->defrmargin;
180}
181
182void
183terminal_sepline(void *arg)
184{
185	struct termp	*p;
186	size_t		 i;
187
188	p = (struct termp *)arg;
189	(*p->endline)(p);
190	for (i = 0; i < p->defrmargin; i++)
191		(*p->letter)(p, '-');
192	(*p->endline)(p);
193	(*p->endline)(p);
194}
195
196static size_t
197ascii_width(const struct termp *p, int c)
198{
199	return c != ASCII_BREAK;
200}
201
202void
203ascii_free(void *arg)
204{
205
206	term_free((struct termp *)arg);
207}
208
209static void
210ascii_letter(struct termp *p, int c)
211{
212
213	putchar(c);
214}
215
216static void
217ascii_begin(struct termp *p)
218{
219
220	(*p->headf)(p, p->argf);
221}
222
223static void
224ascii_end(struct termp *p)
225{
226
227	(*p->footf)(p, p->argf);
228}
229
230static void
231ascii_endline(struct termp *p)
232{
233
234	p->line++;
235	if ((int)p->tcol->offset > p->ti)
236		p->tcol->offset -= p->ti;
237	else
238		p->tcol->offset = 0;
239	p->ti = 0;
240	putchar('\n');
241}
242
243static void
244ascii_advance(struct termp *p, size_t len)
245{
246	size_t		i;
247
248	/*
249	 * XXX We used to have "assert(len < UINT16_MAX)" here.
250	 * that is not quite right because the input document
251	 * can trigger that by merely providing large input.
252	 * For now, simply truncate.
253	 */
254	if (len > 256)
255		len = 256;
256	for (i = 0; i < len; i++)
257		putchar(' ');
258}
259
260static int
261ascii_hspan(const struct termp *p, const struct roffsu *su)
262{
263	double		 r;
264
265	switch (su->unit) {
266	case SCALE_BU:
267		r = su->scale;
268		break;
269	case SCALE_CM:
270		r = su->scale * 240.0 / 2.54;
271		break;
272	case SCALE_FS:
273		r = su->scale * 65536.0;
274		break;
275	case SCALE_IN:
276		r = su->scale * 240.0;
277		break;
278	case SCALE_MM:
279		r = su->scale * 0.24;
280		break;
281	case SCALE_VS:
282	case SCALE_PC:
283		r = su->scale * 40.0;
284		break;
285	case SCALE_PT:
286		r = su->scale * 10.0 / 3.0;
287		break;
288	case SCALE_EN:
289	case SCALE_EM:
290		r = su->scale * 24.0;
291		break;
292	default:
293		abort();
294	}
295	return r > 0.0 ? r + 0.01 : r - 0.01;
296}
297
298const char *
299ascii_uc2str(int uc)
300{
301	static const char nbrsp[2] = { ASCII_NBRSP, '\0' };
302	static const char *tab[] = {
303	"<NUL>","<SOH>","<STX>","<ETX>","<EOT>","<ENQ>","<ACK>","<BEL>",
304	"<BS>",	"\t",	"<LF>",	"<VT>",	"<FF>",	"<CR>",	"<SO>",	"<SI>",
305	"<DLE>","<DC1>","<DC2>","<DC3>","<DC4>","<NAK>","<SYN>","<ETB>",
306	"<CAN>","<EM>",	"<SUB>","<ESC>","<FS>",	"<GS>",	"<RS>",	"<US>",
307	" ",	"!",	"\"",	"#",	"$",	"%",	"&",	"'",
308	"(",	")",	"*",	"+",	",",	"-",	".",	"/",
309	"0",	"1",	"2",	"3",	"4",	"5",	"6",	"7",
310	"8",	"9",	":",	";",	"<",	"=",	">",	"?",
311	"@",	"A",	"B",	"C",	"D",	"E",	"F",	"G",
312	"H",	"I",	"J",	"K",	"L",	"M",	"N",	"O",
313	"P",	"Q",	"R",	"S",	"T",	"U",	"V",	"W",
314	"X",	"Y",	"Z",	"[",	"\\",	"]",	"^",	"_",
315	"`",	"a",	"b",	"c",	"d",	"e",	"f",	"g",
316	"h",	"i",	"j",	"k",	"l",	"m",	"n",	"o",
317	"p",	"q",	"r",	"s",	"t",	"u",	"v",	"w",
318	"x",	"y",	"z",	"{",	"|",	"}",	"~",	"<DEL>",
319	"<80>",	"<81>",	"<82>",	"<83>",	"<84>",	"<85>",	"<86>",	"<87>",
320	"<88>",	"<89>",	"<8A>",	"<8B>",	"<8C>",	"<8D>",	"<8E>",	"<8F>",
321	"<90>",	"<91>",	"<92>",	"<93>",	"<94>",	"<95>",	"<96>",	"<97>",
322	"<98>",	"<99>",	"<9A>",	"<9B>",	"<9C>",	"<9D>",	"<9E>",	"<9F>",
323	nbrsp,	"!",	"/\bc",	"-\bL",	"o\bx",	"=\bY",	"|",	"<section>",
324	"\"",	"(C)",	"_\ba",	"<<",	"~",	"",	"(R)",	"-",
325	"<degree>","+-","^2",	"^3",	"'","<micro>","<paragraph>",".",
326	",",	"^1",	"_\bo",	">>",	"1/4",	"1/2",	"3/4",	"?",
327	"`\bA",	"'\bA",	"^\bA",	"~\bA",	"\"\bA","o\bA",	"AE",	",\bC",
328	"`\bE",	"'\bE",	"^\bE",	"\"\bE","`\bI",	"'\bI",	"^\bI",	"\"\bI",
329	"Dh",	"~\bN",	"`\bO",	"'\bO",	"^\bO",	"~\bO",	"\"\bO","x",
330	"/\bO",	"`\bU",	"'\bU",	"^\bU",	"\"\bU","'\bY",	"Th",	"ss",
331	"`\ba",	"'\ba",	"^\ba",	"~\ba",	"\"\ba","o\ba",	"ae",	",\bc",
332	"`\be",	"'\be",	"^\be",	"\"\be","`\bi",	"'\bi",	"^\bi",	"\"\bi",
333	"dh",	"~\bn",	"`\bo",	"'\bo",	"^\bo",	"~\bo",	"\"\bo","/",
334	"/\bo",	"`\bu",	"'\bu",	"^\bu",	"\"\bu","'\by",	"th",	"\"\by",
335	"A",	"a",	"A",	"a",	"A",	"a",	"'\bC",	"'\bc",
336	"^\bC",	"^\bc",	"C",	"c",	"C",	"c",	"D",	"d",
337	"/\bD",	"/\bd",	"E",	"e",	"E",	"e",	"E",	"e",
338	"E",	"e",	"E",	"e",	"^\bG",	"^\bg",	"G",	"g",
339	"G",	"g",	",\bG",	",\bg",	"^\bH",	"^\bh",	"/\bH",	"/\bh",
340	"~\bI",	"~\bi",	"I",	"i",	"I",	"i",	"I",	"i",
341	"I",	"i",	"IJ",	"ij",	"^\bJ",	"^\bj",	",\bK",	",\bk",
342	"q",	"'\bL",	"'\bl",	",\bL",	",\bl",	"L",	"l",	"L",
343	"l",	"/\bL",	"/\bl",	"'\bN",	"'\bn",	",\bN",	",\bn",	"N",
344	"n",	"'n",	"Ng",	"ng",	"O",	"o",	"O",	"o",
345	"O",	"o",	"OE",	"oe",	"'\bR",	"'\br",	",\bR",	",\br",
346	"R",	"r",	"'\bS",	"'\bs",	"^\bS",	"^\bs",	",\bS",	",\bs",
347	"S",	"s",	",\bT",	",\bt",	"T",	"t",	"/\bT",	"/\bt",
348	"~\bU",	"~\bu",	"U",	"u",	"U",	"u",	"U",	"u",
349	"U",	"u",	"U",	"u",	"^\bW",	"^\bw",	"^\bY",	"^\by",
350	"\"\bY","'\bZ",	"'\bz",	"Z",	"z",	"Z",	"z",	"s",
351	"b",	"B",	"B",	"b",	"6",	"6",	"O",	"C",
352	"c",	"D",	"D",	"D",	"d",	"d",	"3",	"@",
353	"E",	"F",	",\bf",	"G",	"G",	"hv",	"I",	"/\bI",
354	"K",	"k",	"/\bl",	"l",	"W",	"N",	"n",	"~\bO",
355	"O",	"o",	"OI",	"oi",	"P",	"p",	"YR",	"2",
356	"2",	"SH",	"sh",	"t",	"T",	"t",	"T",	"U",
357	"u",	"Y",	"V",	"Y",	"y",	"/\bZ",	"/\bz",	"ZH",
358	"ZH",	"zh",	"zh",	"/\b2",	"5",	"5",	"ts",	"w",
359	"|",	"||",	"|=",	"!",	"DZ",	"Dz",	"dz",	"LJ",
360	"Lj",	"lj",	"NJ",	"Nj",	"nj",	"A",	"a",	"I",
361	"i",	"O",	"o",	"U",	"u",	"U",	"u",	"U",
362	"u",	"U",	"u",	"U",	"u",	"@",	"A",	"a",
363	"A",	"a",	"AE",	"ae",	"/\bG",	"/\bg",	"G",	"g",
364	"K",	"k",	"O",	"o",	"O",	"o",	"ZH",	"zh",
365	"j",	"DZ",	"Dz",	"dz",	"'\bG",	"'\bg",	"HV",	"W",
366	"`\bN",	"`\bn",	"A",	"a",	"'\bAE","'\bae","O",	"o"};
367
368	assert(uc >= 0);
369	if ((size_t)uc < sizeof(tab)/sizeof(tab[0]))
370		return tab[uc];
371	return mchars_uc2str(uc);
372}
373
374#if HAVE_WCHAR
375static size_t
376locale_width(const struct termp *p, int c)
377{
378	int		rc;
379
380	if (c == ASCII_NBRSP)
381		c = ' ';
382	rc = wcwidth(c);
383	if (rc < 0)
384		rc = 0;
385	return rc;
386}
387
388static void
389locale_advance(struct termp *p, size_t len)
390{
391	size_t		i;
392
393	/*
394	 * XXX We used to have "assert(len < UINT16_MAX)" here.
395	 * that is not quite right because the input document
396	 * can trigger that by merely providing large input.
397	 * For now, simply truncate.
398	 */
399	if (len > 256)
400		len = 256;
401	for (i = 0; i < len; i++)
402		putwchar(L' ');
403}
404
405static void
406locale_endline(struct termp *p)
407{
408
409	p->line++;
410	if ((int)p->tcol->offset > p->ti)
411		p->tcol->offset -= p->ti;
412	else
413		p->tcol->offset = 0;
414	p->ti = 0;
415	putwchar(L'\n');
416}
417
418static void
419locale_letter(struct termp *p, int c)
420{
421
422	putwchar(c);
423}
424#endif
425