1322249Sbapt/*	$Id: chars.c,v 1.71 2017/06/14 20:57:07 schwarze Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4322249Sbapt * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#include "config.h"
19241675Suqs
20275432Sbapt#include <sys/types.h>
21275432Sbapt
22241675Suqs#include <assert.h>
23241675Suqs#include <ctype.h>
24294113Sbapt#include <stddef.h>
25294113Sbapt#include <stdint.h>
26241675Suqs#include <stdlib.h>
27241675Suqs#include <string.h>
28241675Suqs
29241675Suqs#include "mandoc.h"
30274880Sbapt#include "mandoc_aux.h"
31294113Sbapt#include "mandoc_ohash.h"
32241675Suqs#include "libmandoc.h"
33241675Suqs
34241675Suqsstruct	ln {
35294113Sbapt	const char	  roffcode[16];
36241675Suqs	const char	 *ascii;
37241675Suqs	int		  unicode;
38241675Suqs};
39241675Suqs
40294113Sbapt/* Special break control characters. */
41294113Sbaptstatic const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
42294113Sbaptstatic const char ascii_break[2] = { ASCII_BREAK, '\0' };
43241675Suqs
44294113Sbaptstatic struct ln lines[] = {
45241675Suqs
46294113Sbapt	/* Spacing. */
47294113Sbapt	{ " ",			ascii_nbrsp,	0x00a0	},
48294113Sbapt	{ "~",			ascii_nbrsp,	0x00a0	},
49294113Sbapt	{ "0",			" ",		0x2002	},
50294113Sbapt	{ "|",			"",		0	},
51294113Sbapt	{ "^",			"",		0	},
52294113Sbapt	{ "&",			"",		0	},
53294113Sbapt	{ "%",			"",		0	},
54294113Sbapt	{ ":",			ascii_break,	0	},
55294113Sbapt	/* XXX The following three do not really belong here. */
56294113Sbapt	{ "t",			"",		0	},
57294113Sbapt	{ "c",			"",		0	},
58294113Sbapt	{ "}",			"",		0	},
59241675Suqs
60294113Sbapt	/* Lines. */
61294113Sbapt	{ "ba",			"|",		0x007c	},
62294113Sbapt	{ "br",			"|",		0x2502	},
63294113Sbapt	{ "ul",			"_",		0x005f	},
64322249Sbapt	{ "ru",			"_",		0x005f	},
65294113Sbapt	{ "rn",			"-",		0x203e	},
66294113Sbapt	{ "bb",			"|",		0x00a6	},
67294113Sbapt	{ "sl",			"/",		0x002f	},
68294113Sbapt	{ "rs",			"\\",		0x005c	},
69241675Suqs
70294113Sbapt	/* Text markers. */
71294113Sbapt	{ "ci",			"O",		0x25cb	},
72294113Sbapt	{ "bu",			"+\bo",		0x2022	},
73294113Sbapt	{ "dd",			"|\b=",		0x2021	},
74294113Sbapt	{ "dg",			"|\b-",		0x2020	},
75294113Sbapt	{ "lz",			"<>",		0x25ca	},
76294113Sbapt	{ "sq",			"[]",		0x25a1	},
77294113Sbapt	{ "ps",			"<par>",	0x00b6	},
78294113Sbapt	{ "sc",			"<sec>",	0x00a7	},
79294113Sbapt	{ "lh",			"<=",		0x261c	},
80294113Sbapt	{ "rh",			"=>",		0x261e	},
81294113Sbapt	{ "at",			"@",		0x0040	},
82294113Sbapt	{ "sh",			"#",		0x0023	},
83294113Sbapt	{ "CR",			"_|",		0x21b5	},
84294113Sbapt	{ "OK",			"\\/",		0x2713	},
85322249Sbapt	{ "CL",			"<club>",	0x2663	},
86322249Sbapt	{ "SP",			"<spade>",	0x2660	},
87322249Sbapt	{ "HE",			"<heart>",	0x2665	},
88322249Sbapt	{ "DI",			"<diamond>",	0x2666	},
89294113Sbapt
90294113Sbapt	/* Legal symbols. */
91294113Sbapt	{ "co",			"(C)",		0x00a9	},
92294113Sbapt	{ "rg",			"(R)",		0x00ae	},
93294113Sbapt	{ "tm",			"tm",		0x2122	},
94294113Sbapt
95294113Sbapt	/* Punctuation. */
96294113Sbapt	{ "em",			"--",		0x2014	},
97294113Sbapt	{ "en",			"-",		0x2013	},
98294113Sbapt	{ "hy",			"-",		0x2010	},
99294113Sbapt	{ "e",			"\\",		0x005c	},
100294113Sbapt	{ ".",			".",		0x002e	},
101294113Sbapt	{ "r!",			"!",		0x00a1	},
102294113Sbapt	{ "r?",			"?",		0x00bf	},
103294113Sbapt
104294113Sbapt	/* Quotes. */
105294113Sbapt	{ "Bq",			",,",		0x201e	},
106294113Sbapt	{ "bq",			",",		0x201a	},
107294113Sbapt	{ "lq",			"\"",		0x201c	},
108294113Sbapt	{ "rq",			"\"",		0x201d	},
109322249Sbapt	{ "Lq",			"\"",		0x201c	},
110322249Sbapt	{ "Rq",			"\"",		0x201d	},
111294113Sbapt	{ "oq",			"`",		0x2018	},
112294113Sbapt	{ "cq",			"\'",		0x2019	},
113294113Sbapt	{ "aq",			"\'",		0x0027	},
114294113Sbapt	{ "dq",			"\"",		0x0022	},
115294113Sbapt	{ "Fo",			"<<",		0x00ab	},
116294113Sbapt	{ "Fc",			">>",		0x00bb	},
117294113Sbapt	{ "fo",			"<",		0x2039	},
118294113Sbapt	{ "fc",			">",		0x203a	},
119294113Sbapt
120294113Sbapt	/* Brackets. */
121294113Sbapt	{ "lB",			"[",		0x005b	},
122294113Sbapt	{ "rB",			"]",		0x005d	},
123294113Sbapt	{ "lC",			"{",		0x007b	},
124294113Sbapt	{ "rC",			"}",		0x007d	},
125294113Sbapt	{ "la",			"<",		0x27e8	},
126294113Sbapt	{ "ra",			">",		0x27e9	},
127294113Sbapt	{ "bv",			"|",		0x23aa	},
128294113Sbapt	{ "braceex",		"|",		0x23aa	},
129294113Sbapt	{ "bracketlefttp",	"|",		0x23a1	},
130294113Sbapt	{ "bracketleftbt",	"|",		0x23a3	},
131294113Sbapt	{ "bracketleftex",	"|",		0x23a2	},
132294113Sbapt	{ "bracketrighttp",	"|",		0x23a4	},
133294113Sbapt	{ "bracketrightbt",	"|",		0x23a6	},
134294113Sbapt	{ "bracketrightex",	"|",		0x23a5	},
135294113Sbapt	{ "lt",			",-",		0x23a7	},
136294113Sbapt	{ "bracelefttp",	",-",		0x23a7	},
137294113Sbapt	{ "lk",			"{",		0x23a8	},
138294113Sbapt	{ "braceleftmid",	"{",		0x23a8	},
139294113Sbapt	{ "lb",			"`-",		0x23a9	},
140294113Sbapt	{ "braceleftbt",	"`-",		0x23a9	},
141294113Sbapt	{ "braceleftex",	"|",		0x23aa	},
142294113Sbapt	{ "rt",			"-.",		0x23ab	},
143294113Sbapt	{ "bracerighttp",	"-.",		0x23ab	},
144294113Sbapt	{ "rk",			"}",		0x23ac	},
145294113Sbapt	{ "bracerightmid",	"}",		0x23ac	},
146294113Sbapt	{ "rb",			"-\'",		0x23ad	},
147294113Sbapt	{ "bracerightbt",	"-\'",		0x23ad	},
148294113Sbapt	{ "bracerightex",	"|",		0x23aa	},
149294113Sbapt	{ "parenlefttp",	"/",		0x239b	},
150294113Sbapt	{ "parenleftbt",	"\\",		0x239d	},
151294113Sbapt	{ "parenleftex",	"|",		0x239c	},
152294113Sbapt	{ "parenrighttp",	"\\",		0x239e	},
153294113Sbapt	{ "parenrightbt",	"/",		0x23a0	},
154294113Sbapt	{ "parenrightex",	"|",		0x239f	},
155294113Sbapt
156294113Sbapt	/* Arrows and lines. */
157294113Sbapt	{ "<-",			"<-",		0x2190	},
158294113Sbapt	{ "->",			"->",		0x2192	},
159294113Sbapt	{ "<>",			"<->",		0x2194	},
160294113Sbapt	{ "da",			"|\bv",		0x2193	},
161294113Sbapt	{ "ua",			"|\b^",		0x2191	},
162294113Sbapt	{ "va",			"^v",		0x2195	},
163294113Sbapt	{ "lA",			"<=",		0x21d0	},
164294113Sbapt	{ "rA",			"=>",		0x21d2	},
165294113Sbapt	{ "hA",			"<=>",		0x21d4	},
166294113Sbapt	{ "uA",			"=\b^",		0x21d1	},
167294113Sbapt	{ "dA",			"=\bv",		0x21d3	},
168294113Sbapt	{ "vA",			"^=v",		0x21d5	},
169322249Sbapt	{ "an",			"-",		0x23af	},
170294113Sbapt
171294113Sbapt	/* Logic. */
172294113Sbapt	{ "AN",			"^",		0x2227	},
173294113Sbapt	{ "OR",			"v",		0x2228	},
174294113Sbapt	{ "no",			"~",		0x00ac	},
175294113Sbapt	{ "tno",		"~",		0x00ac	},
176294113Sbapt	{ "te",			"3",		0x2203	},
177294113Sbapt	{ "fa",			"-\bV",		0x2200	},
178294113Sbapt	{ "st",			"-)",		0x220b	},
179294113Sbapt	{ "tf",			".:.",		0x2234	},
180294113Sbapt	{ "3d",			".:.",		0x2234	},
181294113Sbapt	{ "or",			"|",		0x007c	},
182294113Sbapt
183294113Sbapt	/* Mathematicals. */
184294113Sbapt	{ "pl",			"+",		0x002b	},
185294113Sbapt	{ "mi",			"-",		0x2212	},
186294113Sbapt	{ "-",			"-",		0x002d	},
187294113Sbapt	{ "-+",			"-+",		0x2213	},
188294113Sbapt	{ "+-",			"+-",		0x00b1	},
189294113Sbapt	{ "t+-",		"+-",		0x00b1	},
190294113Sbapt	{ "pc",			".",		0x00b7	},
191294113Sbapt	{ "md",			".",		0x22c5	},
192294113Sbapt	{ "mu",			"x",		0x00d7	},
193294113Sbapt	{ "tmu",		"x",		0x00d7	},
194294113Sbapt	{ "c*",			"O\bx",		0x2297	},
195294113Sbapt	{ "c+",			"O\b+",		0x2295	},
196294113Sbapt	{ "di",			"-:-",		0x00f7	},
197294113Sbapt	{ "tdi",		"-:-",		0x00f7	},
198294113Sbapt	{ "f/",			"/",		0x2044	},
199294113Sbapt	{ "**",			"*",		0x2217	},
200294113Sbapt	{ "<=",			"<=",		0x2264	},
201294113Sbapt	{ ">=",			">=",		0x2265	},
202294113Sbapt	{ "<<",			"<<",		0x226a	},
203294113Sbapt	{ ">>",			">>",		0x226b	},
204294113Sbapt	{ "eq",			"=",		0x003d	},
205294113Sbapt	{ "!=",			"!=",		0x2260	},
206294113Sbapt	{ "==",			"==",		0x2261	},
207294113Sbapt	{ "ne",			"!==",		0x2262	},
208294113Sbapt	{ "ap",			"~",		0x223c	},
209294113Sbapt	{ "|=",			"-~",		0x2243	},
210294113Sbapt	{ "=~",			"=~",		0x2245	},
211294113Sbapt	{ "~~",			"~~",		0x2248	},
212294113Sbapt	{ "~=",			"~=",		0x2248	},
213294113Sbapt	{ "pt",			"oc",		0x221d	},
214294113Sbapt	{ "es",			"{}",		0x2205	},
215294113Sbapt	{ "mo",			"E",		0x2208	},
216294113Sbapt	{ "nm",			"!E",		0x2209	},
217294113Sbapt	{ "sb",			"(=",		0x2282	},
218294113Sbapt	{ "nb",			"(!=",		0x2284	},
219294113Sbapt	{ "sp",			"=)",		0x2283	},
220294113Sbapt	{ "nc",			"!=)",		0x2285	},
221294113Sbapt	{ "ib",			"(=\b_",	0x2286	},
222294113Sbapt	{ "ip",			"=\b_)",	0x2287	},
223294113Sbapt	{ "ca",			"(^)",		0x2229	},
224294113Sbapt	{ "cu",			"U",		0x222a	},
225294113Sbapt	{ "/_",			"_\b/",		0x2220	},
226294113Sbapt	{ "pp",			"_\b|",		0x22a5	},
227294113Sbapt	{ "is",			"'\b,\bI",	0x222b	},
228294113Sbapt	{ "integral",		"'\b,\bI",	0x222b	},
229294113Sbapt	{ "sum",		"E",		0x2211	},
230294113Sbapt	{ "product",		"TT",		0x220f	},
231294113Sbapt	{ "coproduct",		"U",		0x2210	},
232294113Sbapt	{ "gr",			"V",		0x2207	},
233294113Sbapt	{ "sr",			"\\/",		0x221a	},
234294113Sbapt	{ "sqrt",		"\\/",		0x221a	},
235294113Sbapt	{ "lc",			"|~",		0x2308	},
236294113Sbapt	{ "rc",			"~|",		0x2309	},
237294113Sbapt	{ "lf",			"|_",		0x230a	},
238294113Sbapt	{ "rf",			"_|",		0x230b	},
239294113Sbapt	{ "if",			"oo",		0x221e	},
240294113Sbapt	{ "Ah",			"N",		0x2135	},
241294113Sbapt	{ "Im",			"I",		0x2111	},
242294113Sbapt	{ "Re",			"R",		0x211c	},
243322249Sbapt	{ "wp",			"P",		0x2118	},
244294113Sbapt	{ "pd",			"a",		0x2202	},
245294113Sbapt	{ "-h",			"/h",		0x210f	},
246322249Sbapt	{ "hbar",		"/h",		0x210f	},
247294113Sbapt	{ "12",			"1/2",		0x00bd	},
248294113Sbapt	{ "14",			"1/4",		0x00bc	},
249294113Sbapt	{ "34",			"3/4",		0x00be	},
250322249Sbapt	{ "18",			"1/8",		0x215B	},
251322249Sbapt	{ "38",			"3/8",		0x215C	},
252322249Sbapt	{ "58",			"5/8",		0x215D	},
253322249Sbapt	{ "78",			"7/8",		0x215E	},
254322249Sbapt	{ "S1",			"1",		0x00B9	},
255322249Sbapt	{ "S2",			"2",		0x00B2	},
256322249Sbapt	{ "S3",			"3",		0x00B3	},
257294113Sbapt
258294113Sbapt	/* Ligatures. */
259294113Sbapt	{ "ff",			"ff",		0xfb00	},
260294113Sbapt	{ "fi",			"fi",		0xfb01	},
261294113Sbapt	{ "fl",			"fl",		0xfb02	},
262294113Sbapt	{ "Fi",			"ffi",		0xfb03	},
263294113Sbapt	{ "Fl",			"ffl",		0xfb04	},
264294113Sbapt	{ "AE",			"AE",		0x00c6	},
265294113Sbapt	{ "ae",			"ae",		0x00e6	},
266294113Sbapt	{ "OE",			"OE",		0x0152	},
267294113Sbapt	{ "oe",			"oe",		0x0153	},
268294113Sbapt	{ "ss",			"ss",		0x00df	},
269294113Sbapt	{ "IJ",			"IJ",		0x0132	},
270294113Sbapt	{ "ij",			"ij",		0x0133	},
271294113Sbapt
272294113Sbapt	/* Accents. */
273294113Sbapt	{ "a\"",		"\"",		0x02dd	},
274294113Sbapt	{ "a-",			"-",		0x00af	},
275294113Sbapt	{ "a.",			".",		0x02d9	},
276294113Sbapt	{ "a^",			"^",		0x005e	},
277294113Sbapt	{ "aa",			"\'",		0x00b4	},
278294113Sbapt	{ "\'",			"\'",		0x00b4	},
279294113Sbapt	{ "ga",			"`",		0x0060	},
280294113Sbapt	{ "`",			"`",		0x0060	},
281294113Sbapt	{ "ab",			"'\b`",		0x02d8	},
282294113Sbapt	{ "ac",			",",		0x00b8	},
283294113Sbapt	{ "ad",			"\"",		0x00a8	},
284294113Sbapt	{ "ah",			"v",		0x02c7	},
285294113Sbapt	{ "ao",			"o",		0x02da	},
286294113Sbapt	{ "a~",			"~",		0x007e	},
287294113Sbapt	{ "ho",			",",		0x02db	},
288294113Sbapt	{ "ha",			"^",		0x005e	},
289294113Sbapt	{ "ti",			"~",		0x007e	},
290294113Sbapt
291294113Sbapt	/* Accented letters. */
292294113Sbapt	{ "'A",			"'\bA",		0x00c1	},
293294113Sbapt	{ "'E",			"'\bE",		0x00c9	},
294294113Sbapt	{ "'I",			"'\bI",		0x00cd	},
295294113Sbapt	{ "'O",			"'\bO",		0x00d3	},
296294113Sbapt	{ "'U",			"'\bU",		0x00da	},
297294113Sbapt	{ "'a",			"'\ba",		0x00e1	},
298294113Sbapt	{ "'e",			"'\be",		0x00e9	},
299294113Sbapt	{ "'i",			"'\bi",		0x00ed	},
300294113Sbapt	{ "'o",			"'\bo",		0x00f3	},
301294113Sbapt	{ "'u",			"'\bu",		0x00fa	},
302294113Sbapt	{ "`A",			"`\bA",		0x00c0	},
303294113Sbapt	{ "`E",			"`\bE",		0x00c8	},
304294113Sbapt	{ "`I",			"`\bI",		0x00cc	},
305294113Sbapt	{ "`O",			"`\bO",		0x00d2	},
306294113Sbapt	{ "`U",			"`\bU",		0x00d9	},
307294113Sbapt	{ "`a",			"`\ba",		0x00e0	},
308294113Sbapt	{ "`e",			"`\be",		0x00e8	},
309294113Sbapt	{ "`i",			"`\bi",		0x00ec	},
310294113Sbapt	{ "`o",			"`\bo",		0x00f2	},
311294113Sbapt	{ "`u",			"`\bu",		0x00f9	},
312294113Sbapt	{ "~A",			"~\bA",		0x00c3	},
313294113Sbapt	{ "~N",			"~\bN",		0x00d1	},
314294113Sbapt	{ "~O",			"~\bO",		0x00d5	},
315294113Sbapt	{ "~a",			"~\ba",		0x00e3	},
316294113Sbapt	{ "~n",			"~\bn",		0x00f1	},
317294113Sbapt	{ "~o",			"~\bo",		0x00f5	},
318294113Sbapt	{ ":A",			"\"\bA",	0x00c4	},
319294113Sbapt	{ ":E",			"\"\bE",	0x00cb	},
320294113Sbapt	{ ":I",			"\"\bI",	0x00cf	},
321294113Sbapt	{ ":O",			"\"\bO",	0x00d6	},
322294113Sbapt	{ ":U",			"\"\bU",	0x00dc	},
323294113Sbapt	{ ":a",			"\"\ba",	0x00e4	},
324294113Sbapt	{ ":e",			"\"\be",	0x00eb	},
325294113Sbapt	{ ":i",			"\"\bi",	0x00ef	},
326294113Sbapt	{ ":o",			"\"\bo",	0x00f6	},
327294113Sbapt	{ ":u",			"\"\bu",	0x00fc	},
328294113Sbapt	{ ":y",			"\"\by",	0x00ff	},
329294113Sbapt	{ "^A",			"^\bA",		0x00c2	},
330294113Sbapt	{ "^E",			"^\bE",		0x00ca	},
331294113Sbapt	{ "^I",			"^\bI",		0x00ce	},
332294113Sbapt	{ "^O",			"^\bO",		0x00d4	},
333294113Sbapt	{ "^U",			"^\bU",		0x00db	},
334294113Sbapt	{ "^a",			"^\ba",		0x00e2	},
335294113Sbapt	{ "^e",			"^\be",		0x00ea	},
336294113Sbapt	{ "^i",			"^\bi",		0x00ee	},
337294113Sbapt	{ "^o",			"^\bo",		0x00f4	},
338294113Sbapt	{ "^u",			"^\bu",		0x00fb	},
339294113Sbapt	{ ",C",			",\bC",		0x00c7	},
340294113Sbapt	{ ",c",			",\bc",		0x00e7	},
341294113Sbapt	{ "/L",			"/\bL",		0x0141	},
342294113Sbapt	{ "/l",			"/\bl",		0x0142	},
343294113Sbapt	{ "/O",			"/\bO",		0x00d8	},
344294113Sbapt	{ "/o",			"/\bo",		0x00f8	},
345294113Sbapt	{ "oA",			"o\bA",		0x00c5	},
346294113Sbapt	{ "oa",			"o\ba",		0x00e5	},
347294113Sbapt
348294113Sbapt	/* Special letters. */
349294113Sbapt	{ "-D",			"-\bD",		0x00d0	},
350294113Sbapt	{ "Sd",			"d",		0x00f0	},
351294113Sbapt	{ "TP",			"Th",		0x00de	},
352294113Sbapt	{ "Tp",			"th",		0x00fe	},
353294113Sbapt	{ ".i",			"i",		0x0131	},
354294113Sbapt	{ ".j",			"j",		0x0237	},
355294113Sbapt
356294113Sbapt	/* Currency. */
357294113Sbapt	{ "Do",			"$",		0x0024	},
358294113Sbapt	{ "ct",			"/\bc",		0x00a2	},
359294113Sbapt	{ "Eu",			"EUR",		0x20ac	},
360294113Sbapt	{ "eu",			"EUR",		0x20ac	},
361294113Sbapt	{ "Ye",			"=\bY",		0x00a5	},
362294113Sbapt	{ "Po",			"GBP",		0x00a3	},
363294113Sbapt	{ "Cs",			"o\bx",		0x00a4	},
364294113Sbapt	{ "Fn",			",\bf",		0x0192	},
365294113Sbapt
366294113Sbapt	/* Units. */
367294113Sbapt	{ "de",			"<deg>",	0x00b0	},
368294113Sbapt	{ "%0",			"%o",		0x2030	},
369294113Sbapt	{ "fm",			"\'",		0x2032	},
370294113Sbapt	{ "sd",			"''",		0x2033	},
371294113Sbapt	{ "mc",			",\bu",		0x00b5	},
372322249Sbapt	{ "Of",			"_\ba",		0x00aa	},
373322249Sbapt	{ "Om",			"_\bo",		0x00ba	},
374294113Sbapt
375294113Sbapt	/* Greek characters. */
376294113Sbapt	{ "*A",			"A",		0x0391	},
377294113Sbapt	{ "*B",			"B",		0x0392	},
378294113Sbapt	{ "*G",			"G",		0x0393	},
379294113Sbapt	{ "*D",			"_\b/_\b\\",	0x0394	},
380294113Sbapt	{ "*E",			"E",		0x0395	},
381294113Sbapt	{ "*Z",			"Z",		0x0396	},
382294113Sbapt	{ "*Y",			"H",		0x0397	},
383294113Sbapt	{ "*H",			"-\bO",		0x0398	},
384294113Sbapt	{ "*I",			"I",		0x0399	},
385294113Sbapt	{ "*K",			"K",		0x039a	},
386294113Sbapt	{ "*L",			"/\\",		0x039b	},
387294113Sbapt	{ "*M",			"M",		0x039c	},
388294113Sbapt	{ "*N",			"N",		0x039d	},
389294113Sbapt	{ "*C",			"_\bH",		0x039e	},
390294113Sbapt	{ "*O",			"O",		0x039f	},
391294113Sbapt	{ "*P",			"TT",		0x03a0	},
392294113Sbapt	{ "*R",			"P",		0x03a1	},
393294113Sbapt	{ "*S",			"S",		0x03a3	},
394294113Sbapt	{ "*T",			"T",		0x03a4	},
395294113Sbapt	{ "*U",			"Y",		0x03a5	},
396294113Sbapt	{ "*F",			"I\bO",		0x03a6	},
397294113Sbapt	{ "*X",			"X",		0x03a7	},
398294113Sbapt	{ "*Q",			"I\bY",		0x03a8	},
399294113Sbapt	{ "*W",			"_\bO",		0x03a9	},
400294113Sbapt	{ "*a",			"a",		0x03b1	},
401294113Sbapt	{ "*b",			"B",		0x03b2	},
402294113Sbapt	{ "*g",			"y",		0x03b3	},
403294113Sbapt	{ "*d",			"d",		0x03b4	},
404294113Sbapt	{ "*e",			"e",		0x03b5	},
405294113Sbapt	{ "*z",			",\bC",		0x03b6	},
406294113Sbapt	{ "*y",			"n",		0x03b7	},
407294113Sbapt	{ "*h",			"-\b0",		0x03b8	},
408294113Sbapt	{ "*i",			"i",		0x03b9	},
409294113Sbapt	{ "*k",			"k",		0x03ba	},
410294113Sbapt	{ "*l",			">\b\\",	0x03bb	},
411294113Sbapt	{ "*m",			",\bu",		0x03bc	},
412294113Sbapt	{ "*n",			"v",		0x03bd	},
413294113Sbapt	{ "*c",			",\bE",		0x03be	},
414294113Sbapt	{ "*o",			"o",		0x03bf	},
415294113Sbapt	{ "*p",			"-\bn",		0x03c0	},
416294113Sbapt	{ "*r",			"p",		0x03c1	},
417294113Sbapt	{ "*s",			"-\bo",		0x03c3	},
418294113Sbapt	{ "*t",			"~\bt",		0x03c4	},
419294113Sbapt	{ "*u",			"u",		0x03c5	},
420294113Sbapt	{ "*f",			"|\bo",		0x03d5	},
421294113Sbapt	{ "*x",			"x",		0x03c7	},
422294113Sbapt	{ "*q",			"|\bu",		0x03c8	},
423294113Sbapt	{ "*w",			"w",		0x03c9	},
424294113Sbapt	{ "+h",			"-\b0",		0x03d1	},
425294113Sbapt	{ "+f",			"|\bo",		0x03c6	},
426294113Sbapt	{ "+p",			"-\bw",		0x03d6	},
427294113Sbapt	{ "+e",			"e",		0x03f5	},
428294113Sbapt	{ "ts",			"s",		0x03c2	},
429241675Suqs};
430241675Suqs
431294113Sbaptstatic	struct ohash	  mchars;
432241675Suqs
433274880Sbapt
434241675Suqsvoid
435294113Sbaptmchars_free(void)
436241675Suqs{
437241675Suqs
438294113Sbapt	ohash_delete(&mchars);
439241675Suqs}
440241675Suqs
441294113Sbaptvoid
442241675Suqsmchars_alloc(void)
443241675Suqs{
444294113Sbapt	size_t		  i;
445294113Sbapt	unsigned int	  slot;
446241675Suqs
447294113Sbapt	mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
448294113Sbapt	for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
449294113Sbapt		slot = ohash_qlookup(&mchars, lines[i].roffcode);
450294113Sbapt		assert(ohash_find(&mchars, slot) == NULL);
451294113Sbapt		ohash_insert(&mchars, slot, lines + i);
452241675Suqs	}
453241675Suqs}
454241675Suqs
455241675Suqsint
456294113Sbaptmchars_spec2cp(const char *p, size_t sz)
457241675Suqs{
458241675Suqs	const struct ln	*ln;
459294113Sbapt	const char	*end;
460241675Suqs
461294113Sbapt	end = p + sz;
462294113Sbapt	ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
463294113Sbapt	return ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1;
464241675Suqs}
465241675Suqs
466275432Sbaptint
467241675Suqsmchars_num2char(const char *p, size_t sz)
468241675Suqs{
469274880Sbapt	int	  i;
470241675Suqs
471275432Sbapt	i = mandoc_strntoi(p, sz, 10);
472294113Sbapt	return i >= 0 && i < 256 ? i : -1;
473241675Suqs}
474241675Suqs
475241675Suqsint
476241675Suqsmchars_num2uc(const char *p, size_t sz)
477241675Suqs{
478274880Sbapt	int	 i;
479241675Suqs
480275432Sbapt	i = mandoc_strntoi(p, sz, 16);
481275432Sbapt	assert(i >= 0 && i <= 0x10FFFF);
482294113Sbapt	return i;
483241675Suqs}
484241675Suqs
485241675Suqsconst char *
486294113Sbaptmchars_spec2str(const char *p, size_t sz, size_t *rsz)
487241675Suqs{
488241675Suqs	const struct ln	*ln;
489294113Sbapt	const char	*end;
490241675Suqs
491294113Sbapt	end = p + sz;
492294113Sbapt	ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
493275432Sbapt	if (ln == NULL) {
494241675Suqs		*rsz = 1;
495294113Sbapt		return sz == 1 ? p : NULL;
496241675Suqs	}
497241675Suqs
498241675Suqs	*rsz = strlen(ln->ascii);
499294113Sbapt	return ln->ascii;
500241675Suqs}
501241675Suqs
502275432Sbaptconst char *
503275432Sbaptmchars_uc2str(int uc)
504275432Sbapt{
505294113Sbapt	size_t	  i;
506275432Sbapt
507294113Sbapt	for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
508275432Sbapt		if (uc == lines[i].unicode)
509294113Sbapt			return lines[i].ascii;
510294113Sbapt	return "<?>";
511275432Sbapt}
512