1/* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */
2/*
3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20#include "config.h"
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <limits.h>
27#include <stddef.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32
33#include "mandoc_aux.h"
34#include "mandoc_ohash.h"
35#include "mandoc.h"
36#include "roff.h"
37#include "mandoc_parse.h"
38#include "libmandoc.h"
39#include "roff_int.h"
40#include "tbl_parse.h"
41#include "eqn_parse.h"
42
43/*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated.  As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49#define	ASCII_ESC	27
50
51/* Maximum number of string expansions per line, to break infinite loops. */
52#define	EXPAND_LIMIT	1000
53
54/* Types of definitions of macros and strings. */
55#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
56#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
57#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
58#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
59#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
60			 ROFFDEF_REN | ROFFDEF_STD)
61#define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
62
63/* --- data types --------------------------------------------------------- */
64
65/*
66 * An incredibly-simple string buffer.
67 */
68struct	roffstr {
69	char		*p; /* nil-terminated buffer */
70	size_t		 sz; /* saved strlen(p) */
71};
72
73/*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76struct	roffkv {
77	struct roffstr	 key;
78	struct roffstr	 val;
79	struct roffkv	*next; /* next in list */
80};
81
82/*
83 * A single number register as part of a singly-linked list.
84 */
85struct	roffreg {
86	struct roffstr	 key;
87	int		 val;
88	int		 step;
89	struct roffreg	*next;
90};
91
92/*
93 * Association of request and macro names with token IDs.
94 */
95struct	roffreq {
96	enum roff_tok	 tok;
97	char		 name[];
98};
99
100/*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104struct	mctx {
105	char		**argv;
106	int		 argc;
107	int		 argsz;
108};
109
110struct	roff {
111	struct roff_man	*man; /* mdoc or man parser */
112	struct roffnode	*last; /* leaf of stack */
113	struct mctx	*mstack; /* stack of macro contexts */
114	int		*rstack; /* stack of inverted `ie' values */
115	struct ohash	*reqtab; /* request lookup table */
116	struct roffreg	*regtab; /* number registers */
117	struct roffkv	*strtab; /* user-defined strings & macros */
118	struct roffkv	*rentab; /* renamed strings & macros */
119	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
120	struct roffstr	*xtab; /* single-byte trans table (`tr') */
121	const char	*current_string; /* value of last called user macro */
122	struct tbl_node	*first_tbl; /* first table parsed */
123	struct tbl_node	*last_tbl; /* last table parsed */
124	struct tbl_node	*tbl; /* current table being parsed */
125	struct eqn_node	*last_eqn; /* equation parser */
126	struct eqn_node	*eqn; /* active equation parser */
127	int		 eqn_inline; /* current equation is inline */
128	int		 options; /* parse options */
129	int		 mstacksz; /* current size of mstack */
130	int		 mstackpos; /* position in mstack */
131	int		 rstacksz; /* current size limit of rstack */
132	int		 rstackpos; /* position in rstack */
133	int		 format; /* current file in mdoc or man format */
134	char		 control; /* control character */
135	char		 escape; /* escape character */
136};
137
138/*
139 * A macro definition, condition, or ignored block.
140 */
141struct	roffnode {
142	enum roff_tok	 tok; /* type of node */
143	struct roffnode	*parent; /* up one in stack */
144	int		 line; /* parse line */
145	int		 col; /* parse col */
146	char		*name; /* node name, e.g. macro name */
147	char		*end; /* custom end macro of the block */
148	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
149	int		 rule; /* content is: 1=evaluated 0=skipped */
150};
151
152#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
153			 enum roff_tok tok, /* tok of macro */ \
154			 struct buf *buf, /* input buffer */ \
155			 int ln, /* parse line */ \
156			 int ppos, /* original pos in buffer */ \
157			 int pos, /* current pos in buffer */ \
158			 int *offs /* reset offset of buffer data */
159
160typedef	int (*roffproc)(ROFF_ARGS);
161
162struct	roffmac {
163	roffproc	 proc; /* process new macro */
164	roffproc	 text; /* process as child text of macro */
165	roffproc	 sub; /* process as child of macro */
166	int		 flags;
167#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
168};
169
170struct	predef {
171	const char	*name; /* predefined input name */
172	const char	*str; /* replacement symbol */
173};
174
175#define	PREDEF(__name, __str) \
176	{ (__name), (__str) },
177
178/* --- function prototypes ------------------------------------------------ */
179
180static	int		 roffnode_cleanscope(struct roff *);
181static	int		 roffnode_pop(struct roff *);
182static	void		 roffnode_push(struct roff *, enum roff_tok,
183				const char *, int, int);
184static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
185static	int		 roff_als(ROFF_ARGS);
186static	int		 roff_block(ROFF_ARGS);
187static	int		 roff_block_text(ROFF_ARGS);
188static	int		 roff_block_sub(ROFF_ARGS);
189static	int		 roff_break(ROFF_ARGS);
190static	int		 roff_cblock(ROFF_ARGS);
191static	int		 roff_cc(ROFF_ARGS);
192static	int		 roff_ccond(struct roff *, int, int);
193static	int		 roff_char(ROFF_ARGS);
194static	int		 roff_cond(ROFF_ARGS);
195static	int		 roff_cond_checkend(ROFF_ARGS);
196static	int		 roff_cond_text(ROFF_ARGS);
197static	int		 roff_cond_sub(ROFF_ARGS);
198static	int		 roff_ds(ROFF_ARGS);
199static	int		 roff_ec(ROFF_ARGS);
200static	int		 roff_eo(ROFF_ARGS);
201static	int		 roff_eqndelim(struct roff *, struct buf *, int);
202static	int		 roff_evalcond(struct roff *, int, char *, int *);
203static	int		 roff_evalnum(struct roff *, int,
204				const char *, int *, int *, int);
205static	int		 roff_evalpar(struct roff *, int,
206				const char *, int *, int *, int);
207static	int		 roff_evalstrcond(const char *, int *);
208static	int		 roff_expand(struct roff *, struct buf *,
209				int, int, char);
210static	void		 roff_free1(struct roff *);
211static	void		 roff_freereg(struct roffreg *);
212static	void		 roff_freestr(struct roffkv *);
213static	size_t		 roff_getname(struct roff *, char **, int, int);
214static	int		 roff_getnum(const char *, int *, int *, int);
215static	int		 roff_getop(const char *, int *, char *);
216static	int		 roff_getregn(struct roff *,
217				const char *, size_t, char);
218static	int		 roff_getregro(const struct roff *,
219				const char *name);
220static	const char	*roff_getstrn(struct roff *,
221				const char *, size_t, int *);
222static	int		 roff_hasregn(const struct roff *,
223				const char *, size_t);
224static	int		 roff_insec(ROFF_ARGS);
225static	int		 roff_it(ROFF_ARGS);
226static	int		 roff_line_ignore(ROFF_ARGS);
227static	void		 roff_man_alloc1(struct roff_man *);
228static	void		 roff_man_free1(struct roff_man *);
229static	int		 roff_manyarg(ROFF_ARGS);
230static	int		 roff_noarg(ROFF_ARGS);
231static	int		 roff_nop(ROFF_ARGS);
232static	int		 roff_nr(ROFF_ARGS);
233static	int		 roff_onearg(ROFF_ARGS);
234static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
235				int, int);
236static	int		 roff_parsetext(struct roff *, struct buf *,
237				int, int *);
238static	int		 roff_renamed(ROFF_ARGS);
239static	int		 roff_return(ROFF_ARGS);
240static	int		 roff_rm(ROFF_ARGS);
241static	int		 roff_rn(ROFF_ARGS);
242static	int		 roff_rr(ROFF_ARGS);
243static	void		 roff_setregn(struct roff *, const char *,
244				size_t, int, char, int);
245static	void		 roff_setstr(struct roff *,
246				const char *, const char *, int);
247static	void		 roff_setstrn(struct roffkv **, const char *,
248				size_t, const char *, size_t, int);
249static	int		 roff_shift(ROFF_ARGS);
250static	int		 roff_so(ROFF_ARGS);
251static	int		 roff_tr(ROFF_ARGS);
252static	int		 roff_Dd(ROFF_ARGS);
253static	int		 roff_TE(ROFF_ARGS);
254static	int		 roff_TS(ROFF_ARGS);
255static	int		 roff_EQ(ROFF_ARGS);
256static	int		 roff_EN(ROFF_ARGS);
257static	int		 roff_T_(ROFF_ARGS);
258static	int		 roff_unsupp(ROFF_ARGS);
259static	int		 roff_userdef(ROFF_ARGS);
260
261/* --- constant data ------------------------------------------------------ */
262
263#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
264#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
265
266const char *__roff_name[MAN_MAX + 1] = {
267	"br",		"ce",		"fi",		"ft",
268	"ll",		"mc",		"nf",
269	"po",		"rj",		"sp",
270	"ta",		"ti",		NULL,
271	"ab",		"ad",		"af",		"aln",
272	"als",		"am",		"am1",		"ami",
273	"ami1",		"as",		"as1",		"asciify",
274	"backtrace",	"bd",		"bleedat",	"blm",
275        "box",		"boxa",		"bp",		"BP",
276	"break",	"breakchar",	"brnl",		"brp",
277	"brpnl",	"c2",		"cc",
278	"cf",		"cflags",	"ch",		"char",
279	"chop",		"class",	"close",	"CL",
280	"color",	"composite",	"continue",	"cp",
281	"cropat",	"cs",		"cu",		"da",
282	"dch",		"Dd",		"de",		"de1",
283	"defcolor",	"dei",		"dei1",		"device",
284	"devicem",	"di",		"do",		"ds",
285	"ds1",		"dwh",		"dt",		"ec",
286	"ecr",		"ecs",		"el",		"em",
287	"EN",		"eo",		"EP",		"EQ",
288	"errprint",	"ev",		"evc",		"ex",
289	"fallback",	"fam",		"fc",		"fchar",
290	"fcolor",	"fdeferlig",	"feature",	"fkern",
291	"fl",		"flig",		"fp",		"fps",
292	"fschar",	"fspacewidth",	"fspecial",	"ftr",
293	"fzoom",	"gcolor",	"hc",		"hcode",
294	"hidechar",	"hla",		"hlm",		"hpf",
295	"hpfa",		"hpfcode",	"hw",		"hy",
296	"hylang",	"hylen",	"hym",		"hypp",
297	"hys",		"ie",		"if",		"ig",
298	"index",	"it",		"itc",		"IX",
299	"kern",		"kernafter",	"kernbefore",	"kernpair",
300	"lc",		"lc_ctype",	"lds",		"length",
301	"letadj",	"lf",		"lg",		"lhang",
302	"linetabs",	"lnr",		"lnrf",		"lpfx",
303	"ls",		"lsm",		"lt",
304	"mediasize",	"minss",	"mk",		"mso",
305	"na",		"ne",		"nh",		"nhychar",
306	"nm",		"nn",		"nop",		"nr",
307	"nrf",		"nroff",	"ns",		"nx",
308	"open",		"opena",	"os",		"output",
309	"padj",		"papersize",	"pc",		"pev",
310	"pi",		"PI",		"pl",		"pm",
311	"pn",		"pnr",		"ps",
312	"psbb",		"pshape",	"pso",		"ptr",
313	"pvs",		"rchar",	"rd",		"recursionlimit",
314	"return",	"rfschar",	"rhang",
315	"rm",		"rn",		"rnn",		"rr",
316	"rs",		"rt",		"schar",	"sentchar",
317	"shc",		"shift",	"sizes",	"so",
318	"spacewidth",	"special",	"spreadwarn",	"ss",
319	"sty",		"substring",	"sv",		"sy",
320	"T&",		"tc",		"TE",
321	"TH",		"tkf",		"tl",
322	"tm",		"tm1",		"tmc",		"tr",
323	"track",	"transchar",	"trf",		"trimat",
324	"trin",		"trnt",		"troff",	"TS",
325	"uf",		"ul",		"unformat",	"unwatch",
326	"unwatchn",	"vpt",		"vs",		"warn",
327	"warnscale",	"watch",	"watchlength",	"watchn",
328	"wh",		"while",	"write",	"writec",
329	"writem",	"xflag",	".",		NULL,
330	NULL,		"text",
331	"Dd",		"Dt",		"Os",		"Sh",
332	"Ss",		"Pp",		"D1",		"Dl",
333	"Bd",		"Ed",		"Bl",		"El",
334	"It",		"Ad",		"An",		"Ap",
335	"Ar",		"Cd",		"Cm",		"Dv",
336	"Er",		"Ev",		"Ex",		"Fa",
337	"Fd",		"Fl",		"Fn",		"Ft",
338	"Ic",		"In",		"Li",		"Nd",
339	"Nm",		"Op",		"Ot",		"Pa",
340	"Rv",		"St",		"Va",		"Vt",
341	"Xr",		"%A",		"%B",		"%D",
342	"%I",		"%J",		"%N",		"%O",
343	"%P",		"%R",		"%T",		"%V",
344	"Ac",		"Ao",		"Aq",		"At",
345	"Bc",		"Bf",		"Bo",		"Bq",
346	"Bsx",		"Bx",		"Db",		"Dc",
347	"Do",		"Dq",		"Ec",		"Ef",
348	"Em",		"Eo",		"Fx",		"Ms",
349	"No",		"Ns",		"Nx",		"Ox",
350	"Pc",		"Pf",		"Po",		"Pq",
351	"Qc",		"Ql",		"Qo",		"Qq",
352	"Re",		"Rs",		"Sc",		"So",
353	"Sq",		"Sm",		"Sx",		"Sy",
354	"Tn",		"Ux",		"Xc",		"Xo",
355	"Fo",		"Fc",		"Oo",		"Oc",
356	"Bk",		"Ek",		"Bt",		"Hf",
357	"Fr",		"Ud",		"Lb",		"Lp",
358	"Lk",		"Mt",		"Brq",		"Bro",
359	"Brc",		"%C",		"Es",		"En",
360	"Dx",		"%Q",		"%U",		"Ta",
361	"Tg",		NULL,
362	"TH",		"SH",		"SS",		"TP",
363	"TQ",
364	"LP",		"PP",		"P",		"IP",
365	"HP",		"SM",		"SB",		"BI",
366	"IB",		"BR",		"RB",		"R",
367	"B",		"I",		"IR",		"RI",
368	"RE",		"RS",		"DT",		"UC",
369	"PD",		"AT",		"in",
370	"SY",		"YS",		"OP",
371	"EX",		"EE",		"UR",
372	"UE",		"MT",		"ME",		NULL
373};
374const	char *const *roff_name = __roff_name;
375
376static	struct roffmac	 roffs[TOKEN_NONE] = {
377	{ roff_noarg, NULL, NULL, 0 },  /* br */
378	{ roff_onearg, NULL, NULL, 0 },  /* ce */
379	{ roff_noarg, NULL, NULL, 0 },  /* fi */
380	{ roff_onearg, NULL, NULL, 0 },  /* ft */
381	{ roff_onearg, NULL, NULL, 0 },  /* ll */
382	{ roff_onearg, NULL, NULL, 0 },  /* mc */
383	{ roff_noarg, NULL, NULL, 0 },  /* nf */
384	{ roff_onearg, NULL, NULL, 0 },  /* po */
385	{ roff_onearg, NULL, NULL, 0 },  /* rj */
386	{ roff_onearg, NULL, NULL, 0 },  /* sp */
387	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
388	{ roff_onearg, NULL, NULL, 0 },  /* ti */
389	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
390	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
391	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
392	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
393	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
394	{ roff_als, NULL, NULL, 0 },  /* als */
395	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
396	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
397	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
398	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
399	{ roff_ds, NULL, NULL, 0 },  /* as */
400	{ roff_ds, NULL, NULL, 0 },  /* as1 */
401	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
402	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
403	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
404	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
405	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
406	{ roff_unsupp, NULL, NULL, 0 },  /* box */
407	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
408	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
409	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
410	{ roff_break, NULL, NULL, 0 },  /* break */
411	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
412	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
413	{ roff_noarg, NULL, NULL, 0 },  /* brp */
414	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
415	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
416	{ roff_cc, NULL, NULL, 0 },  /* cc */
417	{ roff_insec, NULL, NULL, 0 },  /* cf */
418	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
419	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
420	{ roff_char, NULL, NULL, 0 },  /* char */
421	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
422	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
423	{ roff_insec, NULL, NULL, 0 },  /* close */
424	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
425	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
426	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
427	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
428	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
429	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
430	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
431	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
432	{ roff_unsupp, NULL, NULL, 0 },  /* da */
433	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
434	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
435	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
436	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
437	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
438	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
439	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
440	{ roff_unsupp, NULL, NULL, 0 },  /* device */
441	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
442	{ roff_unsupp, NULL, NULL, 0 },  /* di */
443	{ roff_unsupp, NULL, NULL, 0 },  /* do */
444	{ roff_ds, NULL, NULL, 0 },  /* ds */
445	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
446	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
447	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
448	{ roff_ec, NULL, NULL, 0 },  /* ec */
449	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
450	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
451	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
452	{ roff_unsupp, NULL, NULL, 0 },  /* em */
453	{ roff_EN, NULL, NULL, 0 },  /* EN */
454	{ roff_eo, NULL, NULL, 0 },  /* eo */
455	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
456	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
457	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
458	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
459	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
460	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
461	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
462	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
463	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
464	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
465	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
466	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
467	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
468	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
469	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
470	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
471	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
472	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
473	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
474	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
475	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
476	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
477	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
478	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
479	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
480	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
481	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
482	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
483	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
484	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
485	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
486	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
487	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
488	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
489	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
490	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
491	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
492	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
493	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
494	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
495	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
496	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
497	{ roff_unsupp, NULL, NULL, 0 },  /* index */
498	{ roff_it, NULL, NULL, 0 },  /* it */
499	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
500	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
501	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
502	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
503	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
504	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
505	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
506	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
507	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
508	{ roff_unsupp, NULL, NULL, 0 },  /* length */
509	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
510	{ roff_insec, NULL, NULL, 0 },  /* lf */
511	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
512	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
513	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
514	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
515	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
516	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
517	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
518	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
519	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
520	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
521	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
522	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
523	{ roff_insec, NULL, NULL, 0 },  /* mso */
524	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
525	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
526	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
527	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
528	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
529	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
530	{ roff_nop, NULL, NULL, 0 },  /* nop */
531	{ roff_nr, NULL, NULL, 0 },  /* nr */
532	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
533	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
534	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
535	{ roff_insec, NULL, NULL, 0 },  /* nx */
536	{ roff_insec, NULL, NULL, 0 },  /* open */
537	{ roff_insec, NULL, NULL, 0 },  /* opena */
538	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
539	{ roff_unsupp, NULL, NULL, 0 },  /* output */
540	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
541	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
542	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
543	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
544	{ roff_insec, NULL, NULL, 0 },  /* pi */
545	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
546	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
547	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
548	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
549	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
550	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
551	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
552	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
553	{ roff_insec, NULL, NULL, 0 },  /* pso */
554	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
555	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
556	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
557	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
558	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
559	{ roff_return, NULL, NULL, 0 },  /* return */
560	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
561	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
562	{ roff_rm, NULL, NULL, 0 },  /* rm */
563	{ roff_rn, NULL, NULL, 0 },  /* rn */
564	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
565	{ roff_rr, NULL, NULL, 0 },  /* rr */
566	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
567	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
568	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
569	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
570	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
571	{ roff_shift, NULL, NULL, 0 },  /* shift */
572	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
573	{ roff_so, NULL, NULL, 0 },  /* so */
574	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
575	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
576	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
577	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
578	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
579	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
580	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
581	{ roff_insec, NULL, NULL, 0 },  /* sy */
582	{ roff_T_, NULL, NULL, 0 },  /* T& */
583	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
584	{ roff_TE, NULL, NULL, 0 },  /* TE */
585	{ roff_Dd, NULL, NULL, 0 },  /* TH */
586	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
587	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
588	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
589	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
590	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
591	{ roff_tr, NULL, NULL, 0 },  /* tr */
592	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
593	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
594	{ roff_insec, NULL, NULL, 0 },  /* trf */
595	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
596	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
597	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
598	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
599	{ roff_TS, NULL, NULL, 0 },  /* TS */
600	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
601	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
602	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
603	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
604	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
605	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
606	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
607	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
608	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
609	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
610	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
611	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
612	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
613	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614	{ roff_insec, NULL, NULL, 0 },  /* write */
615	{ roff_insec, NULL, NULL, 0 },  /* writec */
616	{ roff_insec, NULL, NULL, 0 },  /* writem */
617	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
618	{ roff_cblock, NULL, NULL, 0 },  /* . */
619	{ roff_renamed, NULL, NULL, 0 },
620	{ roff_userdef, NULL, NULL, 0 }
621};
622
623/* Array of injected predefined strings. */
624#define	PREDEFS_MAX	 38
625static	const struct predef predefs[PREDEFS_MAX] = {
626#include "predefs.in"
627};
628
629static	int	 roffce_lines;	/* number of input lines to center */
630static	struct roff_node *roffce_node;  /* active request */
631static	int	 roffit_lines;  /* number of lines to delay */
632static	char	*roffit_macro;  /* nil-terminated macro line */
633
634
635/* --- request table ------------------------------------------------------ */
636
637struct ohash *
638roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639{
640	struct ohash	*htab;
641	struct roffreq	*req;
642	enum roff_tok	 tok;
643	size_t		 sz;
644	unsigned int	 slot;
645
646	htab = mandoc_malloc(sizeof(*htab));
647	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648
649	for (tok = mintok; tok < maxtok; tok++) {
650		if (roff_name[tok] == NULL)
651			continue;
652		sz = strlen(roff_name[tok]);
653		req = mandoc_malloc(sizeof(*req) + sz + 1);
654		req->tok = tok;
655		memcpy(req->name, roff_name[tok], sz + 1);
656		slot = ohash_qlookup(htab, req->name);
657		ohash_insert(htab, slot, req);
658	}
659	return htab;
660}
661
662void
663roffhash_free(struct ohash *htab)
664{
665	struct roffreq	*req;
666	unsigned int	 slot;
667
668	if (htab == NULL)
669		return;
670	for (req = ohash_first(htab, &slot); req != NULL;
671	     req = ohash_next(htab, &slot))
672		free(req);
673	ohash_delete(htab);
674	free(htab);
675}
676
677enum roff_tok
678roffhash_find(struct ohash *htab, const char *name, size_t sz)
679{
680	struct roffreq	*req;
681	const char	*end;
682
683	if (sz) {
684		end = name + sz;
685		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686	} else
687		req = ohash_find(htab, ohash_qlookup(htab, name));
688	return req == NULL ? TOKEN_NONE : req->tok;
689}
690
691/* --- stack of request blocks -------------------------------------------- */
692
693/*
694 * Pop the current node off of the stack of roff instructions currently
695 * pending.  Return 1 if it is a loop or 0 otherwise.
696 */
697static int
698roffnode_pop(struct roff *r)
699{
700	struct roffnode	*p;
701	int		 inloop;
702
703	p = r->last;
704	inloop = p->tok == ROFF_while;
705	r->last = p->parent;
706	free(p->name);
707	free(p->end);
708	free(p);
709	return inloop;
710}
711
712/*
713 * Push a roff node onto the instruction stack.  This must later be
714 * removed with roffnode_pop().
715 */
716static void
717roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718		int line, int col)
719{
720	struct roffnode	*p;
721
722	p = mandoc_calloc(1, sizeof(struct roffnode));
723	p->tok = tok;
724	if (name)
725		p->name = mandoc_strdup(name);
726	p->parent = r->last;
727	p->line = line;
728	p->col = col;
729	p->rule = p->parent ? p->parent->rule : 0;
730
731	r->last = p;
732}
733
734/* --- roff parser state data management ---------------------------------- */
735
736static void
737roff_free1(struct roff *r)
738{
739	int		 i;
740
741	tbl_free(r->first_tbl);
742	r->first_tbl = r->last_tbl = r->tbl = NULL;
743
744	eqn_free(r->last_eqn);
745	r->last_eqn = r->eqn = NULL;
746
747	while (r->mstackpos >= 0)
748		roff_userret(r);
749
750	while (r->last)
751		roffnode_pop(r);
752
753	free (r->rstack);
754	r->rstack = NULL;
755	r->rstacksz = 0;
756	r->rstackpos = -1;
757
758	roff_freereg(r->regtab);
759	r->regtab = NULL;
760
761	roff_freestr(r->strtab);
762	roff_freestr(r->rentab);
763	roff_freestr(r->xmbtab);
764	r->strtab = r->rentab = r->xmbtab = NULL;
765
766	if (r->xtab)
767		for (i = 0; i < 128; i++)
768			free(r->xtab[i].p);
769	free(r->xtab);
770	r->xtab = NULL;
771}
772
773void
774roff_reset(struct roff *r)
775{
776	roff_free1(r);
777	r->options |= MPARSE_COMMENT;
778	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779	r->control = '\0';
780	r->escape = '\\';
781	roffce_lines = 0;
782	roffce_node = NULL;
783	roffit_lines = 0;
784	roffit_macro = NULL;
785}
786
787void
788roff_free(struct roff *r)
789{
790	int		 i;
791
792	roff_free1(r);
793	for (i = 0; i < r->mstacksz; i++)
794		free(r->mstack[i].argv);
795	free(r->mstack);
796	roffhash_free(r->reqtab);
797	free(r);
798}
799
800struct roff *
801roff_alloc(int options)
802{
803	struct roff	*r;
804
805	r = mandoc_calloc(1, sizeof(struct roff));
806	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807	r->options = options | MPARSE_COMMENT;
808	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809	r->mstackpos = -1;
810	r->rstackpos = -1;
811	r->escape = '\\';
812	return r;
813}
814
815/* --- syntax tree state data management ---------------------------------- */
816
817static void
818roff_man_free1(struct roff_man *man)
819{
820	if (man->meta.first != NULL)
821		roff_node_delete(man, man->meta.first);
822	free(man->meta.msec);
823	free(man->meta.vol);
824	free(man->meta.os);
825	free(man->meta.arch);
826	free(man->meta.title);
827	free(man->meta.name);
828	free(man->meta.date);
829	free(man->meta.sodest);
830}
831
832void
833roff_state_reset(struct roff_man *man)
834{
835	man->last = man->meta.first;
836	man->last_es = NULL;
837	man->flags = 0;
838	man->lastsec = man->lastnamed = SEC_NONE;
839	man->next = ROFF_NEXT_CHILD;
840	roff_setreg(man->roff, "nS", 0, '=');
841}
842
843static void
844roff_man_alloc1(struct roff_man *man)
845{
846	memset(&man->meta, 0, sizeof(man->meta));
847	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848	man->meta.first->type = ROFFT_ROOT;
849	man->meta.macroset = MACROSET_NONE;
850	roff_state_reset(man);
851}
852
853void
854roff_man_reset(struct roff_man *man)
855{
856	roff_man_free1(man);
857	roff_man_alloc1(man);
858}
859
860void
861roff_man_free(struct roff_man *man)
862{
863	roff_man_free1(man);
864	free(man);
865}
866
867struct roff_man *
868roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869{
870	struct roff_man *man;
871
872	man = mandoc_calloc(1, sizeof(*man));
873	man->roff = roff;
874	man->os_s = os_s;
875	man->quick = quick;
876	roff_man_alloc1(man);
877	roff->man = man;
878	return man;
879}
880
881/* --- syntax tree handling ----------------------------------------------- */
882
883struct roff_node *
884roff_node_alloc(struct roff_man *man, int line, int pos,
885	enum roff_type type, int tok)
886{
887	struct roff_node	*n;
888
889	n = mandoc_calloc(1, sizeof(*n));
890	n->line = line;
891	n->pos = pos;
892	n->tok = tok;
893	n->type = type;
894	n->sec = man->lastsec;
895
896	if (man->flags & MDOC_SYNOPSIS)
897		n->flags |= NODE_SYNPRETTY;
898	else
899		n->flags &= ~NODE_SYNPRETTY;
900	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901		n->flags |= NODE_NOFILL;
902	else
903		n->flags &= ~NODE_NOFILL;
904	if (man->flags & MDOC_NEWLINE)
905		n->flags |= NODE_LINE;
906	man->flags &= ~MDOC_NEWLINE;
907
908	return n;
909}
910
911void
912roff_node_append(struct roff_man *man, struct roff_node *n)
913{
914
915	switch (man->next) {
916	case ROFF_NEXT_SIBLING:
917		if (man->last->next != NULL) {
918			n->next = man->last->next;
919			man->last->next->prev = n;
920		} else
921			man->last->parent->last = n;
922		man->last->next = n;
923		n->prev = man->last;
924		n->parent = man->last->parent;
925		break;
926	case ROFF_NEXT_CHILD:
927		if (man->last->child != NULL) {
928			n->next = man->last->child;
929			man->last->child->prev = n;
930		} else
931			man->last->last = n;
932		man->last->child = n;
933		n->parent = man->last;
934		break;
935	default:
936		abort();
937	}
938	man->last = n;
939
940	switch (n->type) {
941	case ROFFT_HEAD:
942		n->parent->head = n;
943		break;
944	case ROFFT_BODY:
945		if (n->end != ENDBODY_NOT)
946			return;
947		n->parent->body = n;
948		break;
949	case ROFFT_TAIL:
950		n->parent->tail = n;
951		break;
952	default:
953		return;
954	}
955
956	/*
957	 * Copy over the normalised-data pointer of our parent.  Not
958	 * everybody has one, but copying a null pointer is fine.
959	 */
960
961	n->norm = n->parent->norm;
962	assert(n->parent->type == ROFFT_BLOCK);
963}
964
965void
966roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967{
968	struct roff_node	*n;
969
970	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971	n->string = roff_strdup(man->roff, word);
972	roff_node_append(man, n);
973	n->flags |= NODE_VALID | NODE_ENDED;
974	man->next = ROFF_NEXT_SIBLING;
975}
976
977void
978roff_word_append(struct roff_man *man, const char *word)
979{
980	struct roff_node	*n;
981	char			*addstr, *newstr;
982
983	n = man->last;
984	addstr = roff_strdup(man->roff, word);
985	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986	free(addstr);
987	free(n->string);
988	n->string = newstr;
989	man->next = ROFF_NEXT_SIBLING;
990}
991
992void
993roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994{
995	struct roff_node	*n;
996
997	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998	roff_node_append(man, n);
999	man->next = ROFF_NEXT_CHILD;
1000}
1001
1002struct roff_node *
1003roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004{
1005	struct roff_node	*n;
1006
1007	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008	roff_node_append(man, n);
1009	man->next = ROFF_NEXT_CHILD;
1010	return n;
1011}
1012
1013struct roff_node *
1014roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015{
1016	struct roff_node	*n;
1017
1018	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019	roff_node_append(man, n);
1020	man->next = ROFF_NEXT_CHILD;
1021	return n;
1022}
1023
1024struct roff_node *
1025roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026{
1027	struct roff_node	*n;
1028
1029	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030	roff_node_append(man, n);
1031	man->next = ROFF_NEXT_CHILD;
1032	return n;
1033}
1034
1035static void
1036roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037{
1038	struct roff_node	*n;
1039	struct tbl_span		*span;
1040
1041	if (man->meta.macroset == MACROSET_MAN)
1042		man_breakscope(man, ROFF_TS);
1043	while ((span = tbl_span(tbl)) != NULL) {
1044		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045		n->span = span;
1046		roff_node_append(man, n);
1047		n->flags |= NODE_VALID | NODE_ENDED;
1048		man->next = ROFF_NEXT_SIBLING;
1049	}
1050}
1051
1052void
1053roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054{
1055
1056	/* Adjust siblings. */
1057
1058	if (n->prev)
1059		n->prev->next = n->next;
1060	if (n->next)
1061		n->next->prev = n->prev;
1062
1063	/* Adjust parent. */
1064
1065	if (n->parent != NULL) {
1066		if (n->parent->child == n)
1067			n->parent->child = n->next;
1068		if (n->parent->last == n)
1069			n->parent->last = n->prev;
1070	}
1071
1072	/* Adjust parse point. */
1073
1074	if (man == NULL)
1075		return;
1076	if (man->last == n) {
1077		if (n->prev == NULL) {
1078			man->last = n->parent;
1079			man->next = ROFF_NEXT_CHILD;
1080		} else {
1081			man->last = n->prev;
1082			man->next = ROFF_NEXT_SIBLING;
1083		}
1084	}
1085	if (man->meta.first == n)
1086		man->meta.first = NULL;
1087}
1088
1089void
1090roff_node_relink(struct roff_man *man, struct roff_node *n)
1091{
1092	roff_node_unlink(man, n);
1093	n->prev = n->next = NULL;
1094	roff_node_append(man, n);
1095}
1096
1097void
1098roff_node_free(struct roff_node *n)
1099{
1100
1101	if (n->args != NULL)
1102		mdoc_argv_free(n->args);
1103	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104		free(n->norm);
1105	eqn_box_free(n->eqn);
1106	free(n->string);
1107	free(n->tag);
1108	free(n);
1109}
1110
1111void
1112roff_node_delete(struct roff_man *man, struct roff_node *n)
1113{
1114
1115	while (n->child != NULL)
1116		roff_node_delete(man, n->child);
1117	roff_node_unlink(man, n);
1118	roff_node_free(n);
1119}
1120
1121int
1122roff_node_transparent(struct roff_node *n)
1123{
1124	if (n == NULL)
1125		return 0;
1126	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127		return 1;
1128	return roff_tok_transparent(n->tok);
1129}
1130
1131int
1132roff_tok_transparent(enum roff_tok tok)
1133{
1134	switch (tok) {
1135	case ROFF_ft:
1136	case ROFF_ll:
1137	case ROFF_mc:
1138	case ROFF_po:
1139	case ROFF_ta:
1140	case MDOC_Db:
1141	case MDOC_Es:
1142	case MDOC_Sm:
1143	case MDOC_Tg:
1144	case MAN_DT:
1145	case MAN_UC:
1146	case MAN_PD:
1147	case MAN_AT:
1148		return 1;
1149	default:
1150		return 0;
1151	}
1152}
1153
1154struct roff_node *
1155roff_node_child(struct roff_node *n)
1156{
1157	for (n = n->child; roff_node_transparent(n); n = n->next)
1158		continue;
1159	return n;
1160}
1161
1162struct roff_node *
1163roff_node_prev(struct roff_node *n)
1164{
1165	do {
1166		n = n->prev;
1167	} while (roff_node_transparent(n));
1168	return n;
1169}
1170
1171struct roff_node *
1172roff_node_next(struct roff_node *n)
1173{
1174	do {
1175		n = n->next;
1176	} while (roff_node_transparent(n));
1177	return n;
1178}
1179
1180void
1181deroff(char **dest, const struct roff_node *n)
1182{
1183	char	*cp;
1184	size_t	 sz;
1185
1186	if (n->string == NULL) {
1187		for (n = n->child; n != NULL; n = n->next)
1188			deroff(dest, n);
1189		return;
1190	}
1191
1192	/* Skip leading whitespace. */
1193
1194	for (cp = n->string; *cp != '\0'; cp++) {
1195		if (cp[0] == '\\' && cp[1] != '\0' &&
1196		    strchr(" %&0^|~", cp[1]) != NULL)
1197			cp++;
1198		else if ( ! isspace((unsigned char)*cp))
1199			break;
1200	}
1201
1202	/* Skip trailing backslash. */
1203
1204	sz = strlen(cp);
1205	if (sz > 0 && cp[sz - 1] == '\\')
1206		sz--;
1207
1208	/* Skip trailing whitespace. */
1209
1210	for (; sz; sz--)
1211		if ( ! isspace((unsigned char)cp[sz-1]))
1212			break;
1213
1214	/* Skip empty strings. */
1215
1216	if (sz == 0)
1217		return;
1218
1219	if (*dest == NULL) {
1220		*dest = mandoc_strndup(cp, sz);
1221		return;
1222	}
1223
1224	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225	free(*dest);
1226	*dest = cp;
1227}
1228
1229/* --- main functions of the roff parser ---------------------------------- */
1230
1231/*
1232 * In the current line, expand escape sequences that produce parsable
1233 * input text.  Also check the syntax of the remaining escape sequences,
1234 * which typically produce output glyphs or change formatter state.
1235 */
1236static int
1237roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1238{
1239	struct mctx	*ctx;	/* current macro call context */
1240	char		 ubuf[24]; /* buffer to print the number */
1241	struct roff_node *n;	/* used for header comments */
1242	const char	*start;	/* start of the string to process */
1243	char		*stesc;	/* start of an escape sequence ('\\') */
1244	const char	*esct;	/* type of esccape sequence */
1245	char		*ep;	/* end of comment string */
1246	const char	*stnam;	/* start of the name, after "[(*" */
1247	const char	*cp;	/* end of the name, e.g. before ']' */
1248	const char	*res;	/* the string to be substituted */
1249	char		*nbuf;	/* new buffer to copy buf->buf to */
1250	size_t		 maxl;  /* expected length of the escape name */
1251	size_t		 naml;	/* actual length of the escape name */
1252	size_t		 asz;	/* length of the replacement */
1253	size_t		 rsz;	/* length of the rest of the string */
1254	int		 inaml;	/* length returned from mandoc_escape() */
1255	int		 expand_count;	/* to avoid infinite loops */
1256	int		 npos;	/* position in numeric expression */
1257	int		 arg_complete; /* argument not interrupted by eol */
1258	int		 quote_args; /* true for \\$@, false for \\$* */
1259	int		 done;	/* no more input available */
1260	int		 deftype; /* type of definition to paste */
1261	int		 rcsid;	/* kind of RCS id seen */
1262	enum mandocerr	 err;	/* for escape sequence problems */
1263	char		 sign;	/* increment number register */
1264	char		 term;	/* character terminating the escape */
1265
1266	/* Search forward for comments. */
1267
1268	done = 0;
1269	start = buf->buf + pos;
1270	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1271		if (stesc[0] != newesc || stesc[1] == '\0')
1272			continue;
1273		stesc++;
1274		if (*stesc != '"' && *stesc != '#')
1275			continue;
1276
1277		/* Comment found, look for RCS id. */
1278
1279		rcsid = 0;
1280		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1281			rcsid = 1 << MANDOC_OS_OPENBSD;
1282			cp += 8;
1283		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1284			rcsid = 1 << MANDOC_OS_NETBSD;
1285			cp += 7;
1286		}
1287		if (cp != NULL &&
1288		    isalnum((unsigned char)*cp) == 0 &&
1289		    strchr(cp, '$') != NULL) {
1290			if (r->man->meta.rcsids & rcsid)
1291				mandoc_msg(MANDOCERR_RCS_REP, ln,
1292				    (int)(stesc - buf->buf) + 1,
1293				    "%s", stesc + 1);
1294			r->man->meta.rcsids |= rcsid;
1295		}
1296
1297		/* Handle trailing whitespace. */
1298
1299		ep = strchr(stesc--, '\0') - 1;
1300		if (*ep == '\n') {
1301			done = 1;
1302			ep--;
1303		}
1304		if (*ep == ' ' || *ep == '\t')
1305			mandoc_msg(MANDOCERR_SPACE_EOL,
1306			    ln, (int)(ep - buf->buf), NULL);
1307
1308		/*
1309		 * Save comments preceding the title macro
1310		 * in the syntax tree.
1311		 */
1312
1313		if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1314			while (*ep == ' ' || *ep == '\t')
1315				ep--;
1316			ep[1] = '\0';
1317			n = roff_node_alloc(r->man,
1318			    ln, stesc + 1 - buf->buf,
1319			    ROFFT_COMMENT, TOKEN_NONE);
1320			n->string = mandoc_strdup(stesc + 2);
1321			roff_node_append(r->man, n);
1322			n->flags |= NODE_VALID | NODE_ENDED;
1323			r->man->next = ROFF_NEXT_SIBLING;
1324		}
1325
1326		/* Line continuation with comment. */
1327
1328		if (stesc[1] == '#') {
1329			*stesc = '\0';
1330			return ROFF_IGN | ROFF_APPEND;
1331		}
1332
1333		/* Discard normal comments. */
1334
1335		while (stesc > start && stesc[-1] == ' ' &&
1336		    (stesc == start + 1 || stesc[-2] != '\\'))
1337			stesc--;
1338		*stesc = '\0';
1339		break;
1340	}
1341	if (stesc == start)
1342		return ROFF_CONT;
1343	stesc--;
1344
1345	/* Notice the end of the input. */
1346
1347	if (*stesc == '\n') {
1348		*stesc-- = '\0';
1349		done = 1;
1350	}
1351
1352	expand_count = 0;
1353	while (stesc >= start) {
1354		if (*stesc != newesc) {
1355
1356			/*
1357			 * If we have a non-standard escape character,
1358			 * escape literal backslashes because all
1359			 * processing in subsequent functions uses
1360			 * the standard escaping rules.
1361			 */
1362
1363			if (newesc != ASCII_ESC && *stesc == '\\') {
1364				*stesc = '\0';
1365				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1366				    buf->buf, stesc + 1) + 1;
1367				start = nbuf + pos;
1368				stesc = nbuf + (stesc - buf->buf);
1369				free(buf->buf);
1370				buf->buf = nbuf;
1371			}
1372
1373			/* Search backwards for the next escape. */
1374
1375			stesc--;
1376			continue;
1377		}
1378
1379		/* If it is escaped, skip it. */
1380
1381		for (cp = stesc - 1; cp >= start; cp--)
1382			if (*cp != r->escape)
1383				break;
1384
1385		if ((stesc - cp) % 2 == 0) {
1386			while (stesc > cp)
1387				*stesc-- = '\\';
1388			continue;
1389		} else if (stesc[1] != '\0') {
1390			*stesc = '\\';
1391		} else {
1392			*stesc-- = '\0';
1393			if (done)
1394				continue;
1395			else
1396				return ROFF_IGN | ROFF_APPEND;
1397		}
1398
1399		/* Decide whether to expand or to check only. */
1400
1401		term = '\0';
1402		cp = stesc + 1;
1403		if (*cp == 'E')
1404			cp++;
1405		esct = cp;
1406		switch (*esct) {
1407		case '*':
1408		case '$':
1409			res = NULL;
1410			break;
1411		case 'B':
1412		case 'w':
1413			term = cp[1];
1414			/* FALLTHROUGH */
1415		case 'n':
1416			sign = cp[1];
1417			if (sign == '+' || sign == '-')
1418				cp++;
1419			res = ubuf;
1420			break;
1421		default:
1422			err = MANDOCERR_OK;
1423			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1424			case ESCAPE_SPECIAL:
1425				if (mchars_spec2cp(stnam, inaml) >= 0)
1426					break;
1427				/* FALLTHROUGH */
1428			case ESCAPE_ERROR:
1429				err = MANDOCERR_ESC_BAD;
1430				break;
1431			case ESCAPE_UNDEF:
1432				err = MANDOCERR_ESC_UNDEF;
1433				break;
1434			case ESCAPE_UNSUPP:
1435				err = MANDOCERR_ESC_UNSUPP;
1436				break;
1437			default:
1438				break;
1439			}
1440			if (err != MANDOCERR_OK)
1441				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1442				    "%.*s", (int)(cp - stesc), stesc);
1443			stesc--;
1444			continue;
1445		}
1446
1447		if (EXPAND_LIMIT < ++expand_count) {
1448			mandoc_msg(MANDOCERR_ROFFLOOP,
1449			    ln, (int)(stesc - buf->buf), NULL);
1450			return ROFF_IGN;
1451		}
1452
1453		/*
1454		 * The third character decides the length
1455		 * of the name of the string or register.
1456		 * Save a pointer to the name.
1457		 */
1458
1459		if (term == '\0') {
1460			switch (*++cp) {
1461			case '\0':
1462				maxl = 0;
1463				break;
1464			case '(':
1465				cp++;
1466				maxl = 2;
1467				break;
1468			case '[':
1469				cp++;
1470				term = ']';
1471				maxl = 0;
1472				break;
1473			default:
1474				maxl = 1;
1475				break;
1476			}
1477		} else {
1478			cp += 2;
1479			maxl = 0;
1480		}
1481		stnam = cp;
1482
1483		/* Advance to the end of the name. */
1484
1485		naml = 0;
1486		arg_complete = 1;
1487		while (maxl == 0 || naml < maxl) {
1488			if (*cp == '\0') {
1489				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1490				    (int)(stesc - buf->buf), "%s", stesc);
1491				arg_complete = 0;
1492				break;
1493			}
1494			if (maxl == 0 && *cp == term) {
1495				cp++;
1496				break;
1497			}
1498			if (*cp++ != '\\' || *esct != 'w') {
1499				naml++;
1500				continue;
1501			}
1502			switch (mandoc_escape(&cp, NULL, NULL)) {
1503			case ESCAPE_SPECIAL:
1504			case ESCAPE_UNICODE:
1505			case ESCAPE_NUMBERED:
1506			case ESCAPE_UNDEF:
1507			case ESCAPE_OVERSTRIKE:
1508				naml++;
1509				break;
1510			default:
1511				break;
1512			}
1513		}
1514
1515		/*
1516		 * Retrieve the replacement string; if it is
1517		 * undefined, resume searching for escapes.
1518		 */
1519
1520		switch (*esct) {
1521		case '*':
1522			if (arg_complete) {
1523				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1524				res = roff_getstrn(r, stnam, naml, &deftype);
1525
1526				/*
1527				 * If not overriden, let \*(.T
1528				 * through to the formatters.
1529				 */
1530
1531				if (res == NULL && naml == 2 &&
1532				    stnam[0] == '.' && stnam[1] == 'T') {
1533					roff_setstrn(&r->strtab,
1534					    ".T", 2, NULL, 0, 0);
1535					stesc--;
1536					continue;
1537				}
1538			}
1539			break;
1540		case '$':
1541			if (r->mstackpos < 0) {
1542				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1543				    (int)(stesc - buf->buf), "%.3s", stesc);
1544				break;
1545			}
1546			ctx = r->mstack + r->mstackpos;
1547			npos = esct[1] - '1';
1548			if (npos >= 0 && npos <= 8) {
1549				res = npos < ctx->argc ?
1550				    ctx->argv[npos] : "";
1551				break;
1552			}
1553			if (esct[1] == '*')
1554				quote_args = 0;
1555			else if (esct[1] == '@')
1556				quote_args = 1;
1557			else {
1558				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1559				    (int)(stesc - buf->buf), "%.3s", stesc);
1560				break;
1561			}
1562			asz = 0;
1563			for (npos = 0; npos < ctx->argc; npos++) {
1564				if (npos)
1565					asz++;  /* blank */
1566				if (quote_args)
1567					asz += 2;  /* quotes */
1568				asz += strlen(ctx->argv[npos]);
1569			}
1570			if (asz != 3) {
1571				rsz = buf->sz - (stesc - buf->buf) - 3;
1572				if (asz < 3)
1573					memmove(stesc + asz, stesc + 3, rsz);
1574				buf->sz += asz - 3;
1575				nbuf = mandoc_realloc(buf->buf, buf->sz);
1576				start = nbuf + pos;
1577				stesc = nbuf + (stesc - buf->buf);
1578				buf->buf = nbuf;
1579				if (asz > 3)
1580					memmove(stesc + asz, stesc + 3, rsz);
1581			}
1582			for (npos = 0; npos < ctx->argc; npos++) {
1583				if (npos)
1584					*stesc++ = ' ';
1585				if (quote_args)
1586					*stesc++ = '"';
1587				cp = ctx->argv[npos];
1588				while (*cp != '\0')
1589					*stesc++ = *cp++;
1590				if (quote_args)
1591					*stesc++ = '"';
1592			}
1593			continue;
1594		case 'B':
1595			npos = 0;
1596			ubuf[0] = arg_complete &&
1597			    roff_evalnum(r, ln, stnam, &npos,
1598			      NULL, ROFFNUM_SCALE) &&
1599			    stnam + npos + 1 == cp ? '1' : '0';
1600			ubuf[1] = '\0';
1601			break;
1602		case 'n':
1603			if (arg_complete)
1604				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1605				    roff_getregn(r, stnam, naml, sign));
1606			else
1607				ubuf[0] = '\0';
1608			break;
1609		case 'w':
1610			/* use even incomplete args */
1611			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1612			    24 * (int)naml);
1613			break;
1614		}
1615
1616		if (res == NULL) {
1617			if (*esct == '*')
1618				mandoc_msg(MANDOCERR_STR_UNDEF,
1619				    ln, (int)(stesc - buf->buf),
1620				    "%.*s", (int)naml, stnam);
1621			res = "";
1622		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1623			mandoc_msg(MANDOCERR_ROFFLOOP,
1624			    ln, (int)(stesc - buf->buf), NULL);
1625			return ROFF_IGN;
1626		}
1627
1628		/* Replace the escape sequence by the string. */
1629
1630		*stesc = '\0';
1631		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1632		    buf->buf, res, cp) + 1;
1633
1634		/* Prepare for the next replacement. */
1635
1636		start = nbuf + pos;
1637		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1638		free(buf->buf);
1639		buf->buf = nbuf;
1640	}
1641	return ROFF_CONT;
1642}
1643
1644/*
1645 * Parse a quoted or unquoted roff-style request or macro argument.
1646 * Return a pointer to the parsed argument, which is either the original
1647 * pointer or advanced by one byte in case the argument is quoted.
1648 * NUL-terminate the argument in place.
1649 * Collapse pairs of quotes inside quoted arguments.
1650 * Advance the argument pointer to the next argument,
1651 * or to the NUL byte terminating the argument line.
1652 */
1653char *
1654roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1655{
1656	struct buf	 buf;
1657	char		*cp, *start;
1658	int		 newesc, pairs, quoted, white;
1659
1660	/* Quoting can only start with a new word. */
1661	start = *cpp;
1662	quoted = 0;
1663	if ('"' == *start) {
1664		quoted = 1;
1665		start++;
1666	}
1667
1668	newesc = pairs = white = 0;
1669	for (cp = start; '\0' != *cp; cp++) {
1670
1671		/*
1672		 * Move the following text left
1673		 * after quoted quotes and after "\\" and "\t".
1674		 */
1675		if (pairs)
1676			cp[-pairs] = cp[0];
1677
1678		if ('\\' == cp[0]) {
1679			/*
1680			 * In copy mode, translate double to single
1681			 * backslashes and backslash-t to literal tabs.
1682			 */
1683			switch (cp[1]) {
1684			case 'a':
1685			case 't':
1686				cp[-pairs] = '\t';
1687				pairs++;
1688				cp++;
1689				break;
1690			case '\\':
1691				newesc = 1;
1692				cp[-pairs] = ASCII_ESC;
1693				pairs++;
1694				cp++;
1695				break;
1696			case ' ':
1697				/* Skip escaped blanks. */
1698				if (0 == quoted)
1699					cp++;
1700				break;
1701			default:
1702				break;
1703			}
1704		} else if (0 == quoted) {
1705			if (' ' == cp[0]) {
1706				/* Unescaped blanks end unquoted args. */
1707				white = 1;
1708				break;
1709			}
1710		} else if ('"' == cp[0]) {
1711			if ('"' == cp[1]) {
1712				/* Quoted quotes collapse. */
1713				pairs++;
1714				cp++;
1715			} else {
1716				/* Unquoted quotes end quoted args. */
1717				quoted = 2;
1718				break;
1719			}
1720		}
1721	}
1722
1723	/* Quoted argument without a closing quote. */
1724	if (1 == quoted)
1725		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1726
1727	/* NUL-terminate this argument and move to the next one. */
1728	if (pairs)
1729		cp[-pairs] = '\0';
1730	if ('\0' != *cp) {
1731		*cp++ = '\0';
1732		while (' ' == *cp)
1733			cp++;
1734	}
1735	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1736	*cpp = cp;
1737
1738	if ('\0' == *cp && (white || ' ' == cp[-1]))
1739		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1740
1741	start = mandoc_strdup(start);
1742	if (newesc == 0)
1743		return start;
1744
1745	buf.buf = start;
1746	buf.sz = strlen(start) + 1;
1747	buf.next = NULL;
1748	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1749		free(buf.buf);
1750		buf.buf = mandoc_strdup("");
1751	}
1752	return buf.buf;
1753}
1754
1755
1756/*
1757 * Process text streams.
1758 */
1759static int
1760roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1761{
1762	size_t		 sz;
1763	const char	*start;
1764	char		*p;
1765	int		 isz;
1766	enum mandoc_esc	 esc;
1767
1768	/* Spring the input line trap. */
1769
1770	if (roffit_lines == 1) {
1771		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1772		free(buf->buf);
1773		buf->buf = p;
1774		buf->sz = isz + 1;
1775		*offs = 0;
1776		free(roffit_macro);
1777		roffit_lines = 0;
1778		return ROFF_REPARSE;
1779	} else if (roffit_lines > 1)
1780		--roffit_lines;
1781
1782	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1783		if (roffce_lines < 1) {
1784			r->man->last = roffce_node;
1785			r->man->next = ROFF_NEXT_SIBLING;
1786			roffce_lines = 0;
1787			roffce_node = NULL;
1788		} else
1789			roffce_lines--;
1790	}
1791
1792	/* Convert all breakable hyphens into ASCII_HYPH. */
1793
1794	start = p = buf->buf + pos;
1795
1796	while (*p != '\0') {
1797		sz = strcspn(p, "-\\");
1798		p += sz;
1799
1800		if (*p == '\0')
1801			break;
1802
1803		if (*p == '\\') {
1804			/* Skip over escapes. */
1805			p++;
1806			esc = mandoc_escape((const char **)&p, NULL, NULL);
1807			if (esc == ESCAPE_ERROR)
1808				break;
1809			while (*p == '-')
1810				p++;
1811			continue;
1812		} else if (p == start) {
1813			p++;
1814			continue;
1815		}
1816
1817		if (isalpha((unsigned char)p[-1]) &&
1818		    isalpha((unsigned char)p[1]))
1819			*p = ASCII_HYPH;
1820		p++;
1821	}
1822	return ROFF_CONT;
1823}
1824
1825int
1826roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1827{
1828	enum roff_tok	 t;
1829	int		 e;
1830	int		 pos;	/* parse point */
1831	int		 spos;	/* saved parse point for messages */
1832	int		 ppos;	/* original offset in buf->buf */
1833	int		 ctl;	/* macro line (boolean) */
1834
1835	ppos = pos = *offs;
1836
1837	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1838	    (r->man->flags & ROFF_NOFILL) == 0 &&
1839	    strchr(" .\\", buf->buf[pos]) == NULL &&
1840	    buf->buf[pos] != r->control &&
1841	    strcspn(buf->buf, " ") < 80)
1842		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1843		    "%.20s...", buf->buf + pos);
1844
1845	/* Handle in-line equation delimiters. */
1846
1847	if (r->tbl == NULL &&
1848	    r->last_eqn != NULL && r->last_eqn->delim &&
1849	    (r->eqn == NULL || r->eqn_inline)) {
1850		e = roff_eqndelim(r, buf, pos);
1851		if (e == ROFF_REPARSE)
1852			return e;
1853		assert(e == ROFF_CONT);
1854	}
1855
1856	/* Expand some escape sequences. */
1857
1858	e = roff_expand(r, buf, ln, pos, r->escape);
1859	if ((e & ROFF_MASK) == ROFF_IGN)
1860		return e;
1861	assert(e == ROFF_CONT);
1862
1863	ctl = roff_getcontrol(r, buf->buf, &pos);
1864
1865	/*
1866	 * First, if a scope is open and we're not a macro, pass the
1867	 * text through the macro's filter.
1868	 * Equations process all content themselves.
1869	 * Tables process almost all content themselves, but we want
1870	 * to warn about macros before passing it there.
1871	 */
1872
1873	if (r->last != NULL && ! ctl) {
1874		t = r->last->tok;
1875		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1876		if ((e & ROFF_MASK) == ROFF_IGN)
1877			return e;
1878		e &= ~ROFF_MASK;
1879	} else
1880		e = ROFF_IGN;
1881	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1882		eqn_read(r->eqn, buf->buf + ppos);
1883		return e;
1884	}
1885	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1886		tbl_read(r->tbl, ln, buf->buf, ppos);
1887		roff_addtbl(r->man, ln, r->tbl);
1888		return e;
1889	}
1890	if ( ! ctl) {
1891		r->options &= ~MPARSE_COMMENT;
1892		return roff_parsetext(r, buf, pos, offs) | e;
1893	}
1894
1895	/* Skip empty request lines. */
1896
1897	if (buf->buf[pos] == '"') {
1898		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1899		return ROFF_IGN;
1900	} else if (buf->buf[pos] == '\0')
1901		return ROFF_IGN;
1902
1903	/*
1904	 * If a scope is open, go to the child handler for that macro,
1905	 * as it may want to preprocess before doing anything with it.
1906	 * Don't do so if an equation is open.
1907	 */
1908
1909	if (r->last) {
1910		t = r->last->tok;
1911		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1912	}
1913
1914	/* No scope is open.  This is a new request or macro. */
1915
1916	r->options &= ~MPARSE_COMMENT;
1917	spos = pos;
1918	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919
1920	/* Tables ignore most macros. */
1921
1922	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1923	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1924		mandoc_msg(MANDOCERR_TBLMACRO,
1925		    ln, pos, "%s", buf->buf + spos);
1926		if (t != TOKEN_NONE)
1927			return ROFF_IGN;
1928		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1929			pos++;
1930		while (buf->buf[pos] == ' ')
1931			pos++;
1932		tbl_read(r->tbl, ln, buf->buf, pos);
1933		roff_addtbl(r->man, ln, r->tbl);
1934		return ROFF_IGN;
1935	}
1936
1937	/* For now, let high level macros abort .ce mode. */
1938
1939	if (ctl && roffce_node != NULL &&
1940	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1941	     t == ROFF_TH || t == ROFF_TS)) {
1942		r->man->last = roffce_node;
1943		r->man->next = ROFF_NEXT_SIBLING;
1944		roffce_lines = 0;
1945		roffce_node = NULL;
1946	}
1947
1948	/*
1949	 * This is neither a roff request nor a user-defined macro.
1950	 * Let the standard macro set parsers handle it.
1951	 */
1952
1953	if (t == TOKEN_NONE)
1954		return ROFF_CONT;
1955
1956	/* Execute a roff request or a user defined macro. */
1957
1958	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1959}
1960
1961/*
1962 * Internal interface function to tell the roff parser that execution
1963 * of the current macro ended.  This is required because macro
1964 * definitions usually do not end with a .return request.
1965 */
1966void
1967roff_userret(struct roff *r)
1968{
1969	struct mctx	*ctx;
1970	int		 i;
1971
1972	assert(r->mstackpos >= 0);
1973	ctx = r->mstack + r->mstackpos;
1974	for (i = 0; i < ctx->argc; i++)
1975		free(ctx->argv[i]);
1976	ctx->argc = 0;
1977	r->mstackpos--;
1978}
1979
1980void
1981roff_endparse(struct roff *r)
1982{
1983	if (r->last != NULL)
1984		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1985		    r->last->col, "%s", roff_name[r->last->tok]);
1986
1987	if (r->eqn != NULL) {
1988		mandoc_msg(MANDOCERR_BLK_NOEND,
1989		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1990		eqn_parse(r->eqn);
1991		r->eqn = NULL;
1992	}
1993
1994	if (r->tbl != NULL) {
1995		tbl_end(r->tbl, 1);
1996		r->tbl = NULL;
1997	}
1998}
1999
2000/*
2001 * Parse a roff node's type from the input buffer.  This must be in the
2002 * form of ".foo xxx" in the usual way.
2003 */
2004static enum roff_tok
2005roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2006{
2007	char		*cp;
2008	const char	*mac;
2009	size_t		 maclen;
2010	int		 deftype;
2011	enum roff_tok	 t;
2012
2013	cp = buf + *pos;
2014
2015	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2016		return TOKEN_NONE;
2017
2018	mac = cp;
2019	maclen = roff_getname(r, &cp, ln, ppos);
2020
2021	deftype = ROFFDEF_USER | ROFFDEF_REN;
2022	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2023	switch (deftype) {
2024	case ROFFDEF_USER:
2025		t = ROFF_USERDEF;
2026		break;
2027	case ROFFDEF_REN:
2028		t = ROFF_RENAMED;
2029		break;
2030	default:
2031		t = roffhash_find(r->reqtab, mac, maclen);
2032		break;
2033	}
2034	if (t != TOKEN_NONE)
2035		*pos = cp - buf;
2036	else if (deftype == ROFFDEF_UNDEF) {
2037		/* Using an undefined macro defines it to be empty. */
2038		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2039		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2040	}
2041	return t;
2042}
2043
2044/* --- handling of request blocks ----------------------------------------- */
2045
2046/*
2047 * Close a macro definition block or an "ignore" block.
2048 */
2049static int
2050roff_cblock(ROFF_ARGS)
2051{
2052	int	 rr;
2053
2054	if (r->last == NULL) {
2055		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056		return ROFF_IGN;
2057	}
2058
2059	switch (r->last->tok) {
2060	case ROFF_am:
2061	case ROFF_ami:
2062	case ROFF_de:
2063	case ROFF_dei:
2064	case ROFF_ig:
2065		break;
2066	case ROFF_am1:
2067	case ROFF_de1:
2068		/* Remapped in roff_block(). */
2069		abort();
2070	default:
2071		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2072		return ROFF_IGN;
2073	}
2074
2075	roffnode_pop(r);
2076	roffnode_cleanscope(r);
2077
2078	/*
2079	 * If a conditional block with braces is still open,
2080	 * check for "\}" block end markers.
2081	 */
2082
2083	if (r->last != NULL && r->last->endspan < 0) {
2084		rr = 1;  /* If arguments follow "\}", warn about them. */
2085		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2086	}
2087
2088	if (buf->buf[pos] != '\0')
2089		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2090		    ".. %s", buf->buf + pos);
2091
2092	return ROFF_IGN;
2093}
2094
2095/*
2096 * Pop all nodes ending at the end of the current input line.
2097 * Return the number of loops ended.
2098 */
2099static int
2100roffnode_cleanscope(struct roff *r)
2101{
2102	int inloop;
2103
2104	inloop = 0;
2105	while (r->last != NULL && r->last->endspan > 0) {
2106		if (--r->last->endspan != 0)
2107			break;
2108		inloop += roffnode_pop(r);
2109	}
2110	return inloop;
2111}
2112
2113/*
2114 * Handle the closing "\}" of a conditional block.
2115 * Apart from generating warnings, this only pops nodes.
2116 * Return the number of loops ended.
2117 */
2118static int
2119roff_ccond(struct roff *r, int ln, int ppos)
2120{
2121	if (NULL == r->last) {
2122		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123		return 0;
2124	}
2125
2126	switch (r->last->tok) {
2127	case ROFF_el:
2128	case ROFF_ie:
2129	case ROFF_if:
2130	case ROFF_while:
2131		break;
2132	default:
2133		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2134		return 0;
2135	}
2136
2137	if (r->last->endspan > -1) {
2138		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2139		return 0;
2140	}
2141
2142	return roffnode_pop(r) + roffnode_cleanscope(r);
2143}
2144
2145static int
2146roff_block(ROFF_ARGS)
2147{
2148	const char	*name, *value;
2149	char		*call, *cp, *iname, *rname;
2150	size_t		 csz, namesz, rsz;
2151	int		 deftype;
2152
2153	/* Ignore groff compatibility mode for now. */
2154
2155	if (tok == ROFF_de1)
2156		tok = ROFF_de;
2157	else if (tok == ROFF_dei1)
2158		tok = ROFF_dei;
2159	else if (tok == ROFF_am1)
2160		tok = ROFF_am;
2161	else if (tok == ROFF_ami1)
2162		tok = ROFF_ami;
2163
2164	/* Parse the macro name argument. */
2165
2166	cp = buf->buf + pos;
2167	if (tok == ROFF_ig) {
2168		iname = NULL;
2169		namesz = 0;
2170	} else {
2171		iname = cp;
2172		namesz = roff_getname(r, &cp, ln, ppos);
2173		iname[namesz] = '\0';
2174	}
2175
2176	/* Resolve the macro name argument if it is indirect. */
2177
2178	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2179		deftype = ROFFDEF_USER;
2180		name = roff_getstrn(r, iname, namesz, &deftype);
2181		if (name == NULL) {
2182			mandoc_msg(MANDOCERR_STR_UNDEF,
2183			    ln, (int)(iname - buf->buf),
2184			    "%.*s", (int)namesz, iname);
2185			namesz = 0;
2186		} else
2187			namesz = strlen(name);
2188	} else
2189		name = iname;
2190
2191	if (namesz == 0 && tok != ROFF_ig) {
2192		mandoc_msg(MANDOCERR_REQ_EMPTY,
2193		    ln, ppos, "%s", roff_name[tok]);
2194		return ROFF_IGN;
2195	}
2196
2197	roffnode_push(r, tok, name, ln, ppos);
2198
2199	/*
2200	 * At the beginning of a `de' macro, clear the existing string
2201	 * with the same name, if there is one.  New content will be
2202	 * appended from roff_block_text() in multiline mode.
2203	 */
2204
2205	if (tok == ROFF_de || tok == ROFF_dei) {
2206		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2207		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2208	} else if (tok == ROFF_am || tok == ROFF_ami) {
2209		deftype = ROFFDEF_ANY;
2210		value = roff_getstrn(r, iname, namesz, &deftype);
2211		switch (deftype) {  /* Before appending, ... */
2212		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2213			roff_setstrn(&r->strtab, name, namesz,
2214			    value, strlen(value), 0);
2215			break;
2216		case ROFFDEF_REN: /* call original standard macro. */
2217			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2218			    (int)strlen(value), value);
2219			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2220			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2221			free(call);
2222			break;
2223		case ROFFDEF_STD:  /* rename and call standard macro. */
2224			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2225			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2226			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2227			    (int)rsz, rname);
2228			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2229			free(call);
2230			free(rname);
2231			break;
2232		default:
2233			break;
2234		}
2235	}
2236
2237	if (*cp == '\0')
2238		return ROFF_IGN;
2239
2240	/* Get the custom end marker. */
2241
2242	iname = cp;
2243	namesz = roff_getname(r, &cp, ln, ppos);
2244
2245	/* Resolve the end marker if it is indirect. */
2246
2247	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2248		deftype = ROFFDEF_USER;
2249		name = roff_getstrn(r, iname, namesz, &deftype);
2250		if (name == NULL) {
2251			mandoc_msg(MANDOCERR_STR_UNDEF,
2252			    ln, (int)(iname - buf->buf),
2253			    "%.*s", (int)namesz, iname);
2254			namesz = 0;
2255		} else
2256			namesz = strlen(name);
2257	} else
2258		name = iname;
2259
2260	if (namesz)
2261		r->last->end = mandoc_strndup(name, namesz);
2262
2263	if (*cp != '\0')
2264		mandoc_msg(MANDOCERR_ARG_EXCESS,
2265		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2266
2267	return ROFF_IGN;
2268}
2269
2270static int
2271roff_block_sub(ROFF_ARGS)
2272{
2273	enum roff_tok	t;
2274	int		i, j;
2275
2276	/*
2277	 * First check whether a custom macro exists at this level.  If
2278	 * it does, then check against it.  This is some of groff's
2279	 * stranger behaviours.  If we encountered a custom end-scope
2280	 * tag and that tag also happens to be a "real" macro, then we
2281	 * need to try interpreting it again as a real macro.  If it's
2282	 * not, then return ignore.  Else continue.
2283	 */
2284
2285	if (r->last->end) {
2286		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2287			if (buf->buf[i] != r->last->end[j])
2288				break;
2289
2290		if (r->last->end[j] == '\0' &&
2291		    (buf->buf[i] == '\0' ||
2292		     buf->buf[i] == ' ' ||
2293		     buf->buf[i] == '\t')) {
2294			roffnode_pop(r);
2295			roffnode_cleanscope(r);
2296
2297			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2298				i++;
2299
2300			pos = i;
2301			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2302			    TOKEN_NONE)
2303				return ROFF_RERUN;
2304			return ROFF_IGN;
2305		}
2306	}
2307
2308	/*
2309	 * If we have no custom end-query or lookup failed, then try
2310	 * pulling it out of the hashtable.
2311	 */
2312
2313	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2314
2315	if (t != ROFF_cblock) {
2316		if (tok != ROFF_ig)
2317			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2318		return ROFF_IGN;
2319	}
2320
2321	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2322}
2323
2324static int
2325roff_block_text(ROFF_ARGS)
2326{
2327
2328	if (tok != ROFF_ig)
2329		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2330
2331	return ROFF_IGN;
2332}
2333
2334/*
2335 * Check for a closing "\}" and handle it.
2336 * In this function, the final "int *offs" argument is used for
2337 * different purposes than elsewhere:
2338 * Input: *offs == 0: caller wants to discard arguments following \}
2339 *        *offs == 1: caller wants to preserve text following \}
2340 * Output: *offs = 0: tell caller to discard input line
2341 *         *offs = 1: tell caller to use input line
2342 */
2343static int
2344roff_cond_checkend(ROFF_ARGS)
2345{
2346	char		*ep;
2347	int		 endloop, irc, rr;
2348
2349	irc = ROFF_IGN;
2350	rr = r->last->rule;
2351	endloop = tok != ROFF_while ? ROFF_IGN :
2352	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2353	if (roffnode_cleanscope(r))
2354		irc |= endloop;
2355
2356	/*
2357	 * If "\}" occurs on a macro line without a preceding macro or
2358	 * a text line contains nothing else, drop the line completely.
2359	 */
2360
2361	ep = buf->buf + pos;
2362	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2363		rr = 0;
2364
2365	/*
2366	 * The closing delimiter "\}" rewinds the conditional scope
2367	 * but is otherwise ignored when interpreting the line.
2368	 */
2369
2370	while ((ep = strchr(ep, '\\')) != NULL) {
2371		switch (ep[1]) {
2372		case '}':
2373			if (ep[2] == '\0')
2374				ep[0] = '\0';
2375			else if (rr)
2376				ep[1] = '&';
2377			else
2378				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2379			if (roff_ccond(r, ln, ep - buf->buf))
2380				irc |= endloop;
2381			break;
2382		case '\0':
2383			++ep;
2384			break;
2385		default:
2386			ep += 2;
2387			break;
2388		}
2389	}
2390	*offs = rr;
2391	return irc;
2392}
2393
2394/*
2395 * Parse and process a request or macro line in conditional scope.
2396 */
2397static int
2398roff_cond_sub(ROFF_ARGS)
2399{
2400	struct roffnode	*bl;
2401	int		 irc, rr;
2402	enum roff_tok	 t;
2403
2404	rr = 0;  /* If arguments follow "\}", skip them. */
2405	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2406	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2407
2408	/* For now, let high level macros abort .ce mode. */
2409
2410	if (roffce_node != NULL &&
2411	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2412             t == ROFF_TH || t == ROFF_TS)) {
2413		r->man->last = roffce_node;
2414		r->man->next = ROFF_NEXT_SIBLING;
2415		roffce_lines = 0;
2416		roffce_node = NULL;
2417	}
2418
2419	/*
2420	 * Fully handle known macros when they are structurally
2421	 * required or when the conditional evaluated to true.
2422	 */
2423
2424	if (t == ROFF_break) {
2425		if (irc & ROFF_LOOPMASK)
2426			irc = ROFF_IGN | ROFF_LOOPEXIT;
2427		else if (rr) {
2428			for (bl = r->last; bl != NULL; bl = bl->parent) {
2429				bl->rule = 0;
2430				if (bl->tok == ROFF_while)
2431					break;
2432			}
2433		}
2434	} else if (t != TOKEN_NONE &&
2435	    (rr || roffs[t].flags & ROFFMAC_STRUCT))
2436		irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2437	else
2438		irc |= rr ? ROFF_CONT : ROFF_IGN;
2439	return irc;
2440}
2441
2442/*
2443 * Parse and process a text line in conditional scope.
2444 */
2445static int
2446roff_cond_text(ROFF_ARGS)
2447{
2448	int	 irc, rr;
2449
2450	rr = 1;  /* If arguments follow "\}", preserve them. */
2451	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2452	if (rr)
2453		irc |= ROFF_CONT;
2454	return irc;
2455}
2456
2457/* --- handling of numeric and conditional expressions -------------------- */
2458
2459/*
2460 * Parse a single signed integer number.  Stop at the first non-digit.
2461 * If there is at least one digit, return success and advance the
2462 * parse point, else return failure and let the parse point unchanged.
2463 * Ignore overflows, treat them just like the C language.
2464 */
2465static int
2466roff_getnum(const char *v, int *pos, int *res, int flags)
2467{
2468	int	 myres, scaled, n, p;
2469
2470	if (NULL == res)
2471		res = &myres;
2472
2473	p = *pos;
2474	n = v[p] == '-';
2475	if (n || v[p] == '+')
2476		p++;
2477
2478	if (flags & ROFFNUM_WHITE)
2479		while (isspace((unsigned char)v[p]))
2480			p++;
2481
2482	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2483		*res = 10 * *res + v[p] - '0';
2484	if (p == *pos + n)
2485		return 0;
2486
2487	if (n)
2488		*res = -*res;
2489
2490	/* Each number may be followed by one optional scaling unit. */
2491
2492	switch (v[p]) {
2493	case 'f':
2494		scaled = *res * 65536;
2495		break;
2496	case 'i':
2497		scaled = *res * 240;
2498		break;
2499	case 'c':
2500		scaled = *res * 240 / 2.54;
2501		break;
2502	case 'v':
2503	case 'P':
2504		scaled = *res * 40;
2505		break;
2506	case 'm':
2507	case 'n':
2508		scaled = *res * 24;
2509		break;
2510	case 'p':
2511		scaled = *res * 10 / 3;
2512		break;
2513	case 'u':
2514		scaled = *res;
2515		break;
2516	case 'M':
2517		scaled = *res * 6 / 25;
2518		break;
2519	default:
2520		scaled = *res;
2521		p--;
2522		break;
2523	}
2524	if (flags & ROFFNUM_SCALE)
2525		*res = scaled;
2526
2527	*pos = p + 1;
2528	return 1;
2529}
2530
2531/*
2532 * Evaluate a string comparison condition.
2533 * The first character is the delimiter.
2534 * Succeed if the string up to its second occurrence
2535 * matches the string up to its third occurence.
2536 * Advance the cursor after the third occurrence
2537 * or lacking that, to the end of the line.
2538 */
2539static int
2540roff_evalstrcond(const char *v, int *pos)
2541{
2542	const char	*s1, *s2, *s3;
2543	int		 match;
2544
2545	match = 0;
2546	s1 = v + *pos;		/* initial delimiter */
2547	s2 = s1 + 1;		/* for scanning the first string */
2548	s3 = strchr(s2, *s1);	/* for scanning the second string */
2549
2550	if (NULL == s3)		/* found no middle delimiter */
2551		goto out;
2552
2553	while ('\0' != *++s3) {
2554		if (*s2 != *s3) {  /* mismatch */
2555			s3 = strchr(s3, *s1);
2556			break;
2557		}
2558		if (*s3 == *s1) {  /* found the final delimiter */
2559			match = 1;
2560			break;
2561		}
2562		s2++;
2563	}
2564
2565out:
2566	if (NULL == s3)
2567		s3 = strchr(s2, '\0');
2568	else if (*s3 != '\0')
2569		s3++;
2570	*pos = s3 - v;
2571	return match;
2572}
2573
2574/*
2575 * Evaluate an optionally negated single character, numerical,
2576 * or string condition.
2577 */
2578static int
2579roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2580{
2581	const char	*start, *end;
2582	char		*cp, *name;
2583	size_t		 sz;
2584	int		 deftype, len, number, savepos, istrue, wanttrue;
2585
2586	if ('!' == v[*pos]) {
2587		wanttrue = 0;
2588		(*pos)++;
2589	} else
2590		wanttrue = 1;
2591
2592	switch (v[*pos]) {
2593	case '\0':
2594		return 0;
2595	case 'n':
2596	case 'o':
2597		(*pos)++;
2598		return wanttrue;
2599	case 'e':
2600	case 't':
2601	case 'v':
2602		(*pos)++;
2603		return !wanttrue;
2604	case 'c':
2605		do {
2606			(*pos)++;
2607		} while (v[*pos] == ' ');
2608
2609		/*
2610		 * Quirk for groff compatibility:
2611		 * The horizontal tab is neither available nor unavailable.
2612		 */
2613
2614		if (v[*pos] == '\t') {
2615			(*pos)++;
2616			return 0;
2617		}
2618
2619		/* Printable ASCII characters are available. */
2620
2621		if (v[*pos] != '\\') {
2622			(*pos)++;
2623			return wanttrue;
2624		}
2625
2626		end = v + ++*pos;
2627		switch (mandoc_escape(&end, &start, &len)) {
2628		case ESCAPE_SPECIAL:
2629			istrue = mchars_spec2cp(start, len) != -1;
2630			break;
2631		case ESCAPE_UNICODE:
2632			istrue = 1;
2633			break;
2634		case ESCAPE_NUMBERED:
2635			istrue = mchars_num2char(start, len) != -1;
2636			break;
2637		default:
2638			istrue = !wanttrue;
2639			break;
2640		}
2641		*pos = end - v;
2642		return istrue == wanttrue;
2643	case 'd':
2644	case 'r':
2645		cp = v + *pos + 1;
2646		while (*cp == ' ')
2647			cp++;
2648		name = cp;
2649		sz = roff_getname(r, &cp, ln, cp - v);
2650		if (sz == 0)
2651			istrue = 0;
2652		else if (v[*pos] == 'r')
2653			istrue = roff_hasregn(r, name, sz);
2654		else {
2655			deftype = ROFFDEF_ANY;
2656		        roff_getstrn(r, name, sz, &deftype);
2657			istrue = !!deftype;
2658		}
2659		*pos = (name + sz) - v;
2660		return istrue == wanttrue;
2661	default:
2662		break;
2663	}
2664
2665	savepos = *pos;
2666	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2667		return (number > 0) == wanttrue;
2668	else if (*pos == savepos)
2669		return roff_evalstrcond(v, pos) == wanttrue;
2670	else
2671		return 0;
2672}
2673
2674static int
2675roff_line_ignore(ROFF_ARGS)
2676{
2677
2678	return ROFF_IGN;
2679}
2680
2681static int
2682roff_insec(ROFF_ARGS)
2683{
2684
2685	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2686	return ROFF_IGN;
2687}
2688
2689static int
2690roff_unsupp(ROFF_ARGS)
2691{
2692
2693	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2694	return ROFF_IGN;
2695}
2696
2697static int
2698roff_cond(ROFF_ARGS)
2699{
2700	int	 irc;
2701
2702	roffnode_push(r, tok, NULL, ln, ppos);
2703
2704	/*
2705	 * An `.el' has no conditional body: it will consume the value
2706	 * of the current rstack entry set in prior `ie' calls or
2707	 * defaults to DENY.
2708	 *
2709	 * If we're not an `el', however, then evaluate the conditional.
2710	 */
2711
2712	r->last->rule = tok == ROFF_el ?
2713	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2714	    roff_evalcond(r, ln, buf->buf, &pos);
2715
2716	/*
2717	 * An if-else will put the NEGATION of the current evaluated
2718	 * conditional into the stack of rules.
2719	 */
2720
2721	if (tok == ROFF_ie) {
2722		if (r->rstackpos + 1 == r->rstacksz) {
2723			r->rstacksz += 16;
2724			r->rstack = mandoc_reallocarray(r->rstack,
2725			    r->rstacksz, sizeof(int));
2726		}
2727		r->rstack[++r->rstackpos] = !r->last->rule;
2728	}
2729
2730	/* If the parent has false as its rule, then so do we. */
2731
2732	if (r->last->parent && !r->last->parent->rule)
2733		r->last->rule = 0;
2734
2735	/*
2736	 * Determine scope.
2737	 * If there is nothing on the line after the conditional,
2738	 * not even whitespace, use next-line scope.
2739	 * Except that .while does not support next-line scope.
2740	 */
2741
2742	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2743		r->last->endspan = 2;
2744		goto out;
2745	}
2746
2747	while (buf->buf[pos] == ' ')
2748		pos++;
2749
2750	/* An opening brace requests multiline scope. */
2751
2752	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2753		r->last->endspan = -1;
2754		pos += 2;
2755		while (buf->buf[pos] == ' ')
2756			pos++;
2757		goto out;
2758	}
2759
2760	/*
2761	 * Anything else following the conditional causes
2762	 * single-line scope.  Warn if the scope contains
2763	 * nothing but trailing whitespace.
2764	 */
2765
2766	if (buf->buf[pos] == '\0')
2767		mandoc_msg(MANDOCERR_COND_EMPTY,
2768		    ln, ppos, "%s", roff_name[tok]);
2769
2770	r->last->endspan = 1;
2771
2772out:
2773	*offs = pos;
2774	irc = ROFF_RERUN;
2775	if (tok == ROFF_while)
2776		irc |= ROFF_WHILE;
2777	return irc;
2778}
2779
2780static int
2781roff_ds(ROFF_ARGS)
2782{
2783	char		*string;
2784	const char	*name;
2785	size_t		 namesz;
2786
2787	/* Ignore groff compatibility mode for now. */
2788
2789	if (tok == ROFF_ds1)
2790		tok = ROFF_ds;
2791	else if (tok == ROFF_as1)
2792		tok = ROFF_as;
2793
2794	/*
2795	 * The first word is the name of the string.
2796	 * If it is empty or terminated by an escape sequence,
2797	 * abort the `ds' request without defining anything.
2798	 */
2799
2800	name = string = buf->buf + pos;
2801	if (*name == '\0')
2802		return ROFF_IGN;
2803
2804	namesz = roff_getname(r, &string, ln, pos);
2805	switch (name[namesz]) {
2806	case '\\':
2807		return ROFF_IGN;
2808	case '\t':
2809		string = buf->buf + pos + namesz;
2810		break;
2811	default:
2812		break;
2813	}
2814
2815	/* Read past the initial double-quote, if any. */
2816	if (*string == '"')
2817		string++;
2818
2819	/* The rest is the value. */
2820	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2821	    ROFF_as == tok);
2822	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2823	return ROFF_IGN;
2824}
2825
2826/*
2827 * Parse a single operator, one or two characters long.
2828 * If the operator is recognized, return success and advance the
2829 * parse point, else return failure and let the parse point unchanged.
2830 */
2831static int
2832roff_getop(const char *v, int *pos, char *res)
2833{
2834
2835	*res = v[*pos];
2836
2837	switch (*res) {
2838	case '+':
2839	case '-':
2840	case '*':
2841	case '/':
2842	case '%':
2843	case '&':
2844	case ':':
2845		break;
2846	case '<':
2847		switch (v[*pos + 1]) {
2848		case '=':
2849			*res = 'l';
2850			(*pos)++;
2851			break;
2852		case '>':
2853			*res = '!';
2854			(*pos)++;
2855			break;
2856		case '?':
2857			*res = 'i';
2858			(*pos)++;
2859			break;
2860		default:
2861			break;
2862		}
2863		break;
2864	case '>':
2865		switch (v[*pos + 1]) {
2866		case '=':
2867			*res = 'g';
2868			(*pos)++;
2869			break;
2870		case '?':
2871			*res = 'a';
2872			(*pos)++;
2873			break;
2874		default:
2875			break;
2876		}
2877		break;
2878	case '=':
2879		if ('=' == v[*pos + 1])
2880			(*pos)++;
2881		break;
2882	default:
2883		return 0;
2884	}
2885	(*pos)++;
2886
2887	return *res;
2888}
2889
2890/*
2891 * Evaluate either a parenthesized numeric expression
2892 * or a single signed integer number.
2893 */
2894static int
2895roff_evalpar(struct roff *r, int ln,
2896	const char *v, int *pos, int *res, int flags)
2897{
2898
2899	if ('(' != v[*pos])
2900		return roff_getnum(v, pos, res, flags);
2901
2902	(*pos)++;
2903	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2904		return 0;
2905
2906	/*
2907	 * Omission of the closing parenthesis
2908	 * is an error in validation mode,
2909	 * but ignored in evaluation mode.
2910	 */
2911
2912	if (')' == v[*pos])
2913		(*pos)++;
2914	else if (NULL == res)
2915		return 0;
2916
2917	return 1;
2918}
2919
2920/*
2921 * Evaluate a complete numeric expression.
2922 * Proceed left to right, there is no concept of precedence.
2923 */
2924static int
2925roff_evalnum(struct roff *r, int ln, const char *v,
2926	int *pos, int *res, int flags)
2927{
2928	int		 mypos, operand2;
2929	char		 operator;
2930
2931	if (NULL == pos) {
2932		mypos = 0;
2933		pos = &mypos;
2934	}
2935
2936	if (flags & ROFFNUM_WHITE)
2937		while (isspace((unsigned char)v[*pos]))
2938			(*pos)++;
2939
2940	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2941		return 0;
2942
2943	while (1) {
2944		if (flags & ROFFNUM_WHITE)
2945			while (isspace((unsigned char)v[*pos]))
2946				(*pos)++;
2947
2948		if ( ! roff_getop(v, pos, &operator))
2949			break;
2950
2951		if (flags & ROFFNUM_WHITE)
2952			while (isspace((unsigned char)v[*pos]))
2953				(*pos)++;
2954
2955		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2956			return 0;
2957
2958		if (flags & ROFFNUM_WHITE)
2959			while (isspace((unsigned char)v[*pos]))
2960				(*pos)++;
2961
2962		if (NULL == res)
2963			continue;
2964
2965		switch (operator) {
2966		case '+':
2967			*res += operand2;
2968			break;
2969		case '-':
2970			*res -= operand2;
2971			break;
2972		case '*':
2973			*res *= operand2;
2974			break;
2975		case '/':
2976			if (operand2 == 0) {
2977				mandoc_msg(MANDOCERR_DIVZERO,
2978					ln, *pos, "%s", v);
2979				*res = 0;
2980				break;
2981			}
2982			*res /= operand2;
2983			break;
2984		case '%':
2985			if (operand2 == 0) {
2986				mandoc_msg(MANDOCERR_DIVZERO,
2987					ln, *pos, "%s", v);
2988				*res = 0;
2989				break;
2990			}
2991			*res %= operand2;
2992			break;
2993		case '<':
2994			*res = *res < operand2;
2995			break;
2996		case '>':
2997			*res = *res > operand2;
2998			break;
2999		case 'l':
3000			*res = *res <= operand2;
3001			break;
3002		case 'g':
3003			*res = *res >= operand2;
3004			break;
3005		case '=':
3006			*res = *res == operand2;
3007			break;
3008		case '!':
3009			*res = *res != operand2;
3010			break;
3011		case '&':
3012			*res = *res && operand2;
3013			break;
3014		case ':':
3015			*res = *res || operand2;
3016			break;
3017		case 'i':
3018			if (operand2 < *res)
3019				*res = operand2;
3020			break;
3021		case 'a':
3022			if (operand2 > *res)
3023				*res = operand2;
3024			break;
3025		default:
3026			abort();
3027		}
3028	}
3029	return 1;
3030}
3031
3032/* --- register management ------------------------------------------------ */
3033
3034void
3035roff_setreg(struct roff *r, const char *name, int val, char sign)
3036{
3037	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3038}
3039
3040static void
3041roff_setregn(struct roff *r, const char *name, size_t len,
3042    int val, char sign, int step)
3043{
3044	struct roffreg	*reg;
3045
3046	/* Search for an existing register with the same name. */
3047	reg = r->regtab;
3048
3049	while (reg != NULL && (reg->key.sz != len ||
3050	    strncmp(reg->key.p, name, len) != 0))
3051		reg = reg->next;
3052
3053	if (NULL == reg) {
3054		/* Create a new register. */
3055		reg = mandoc_malloc(sizeof(struct roffreg));
3056		reg->key.p = mandoc_strndup(name, len);
3057		reg->key.sz = len;
3058		reg->val = 0;
3059		reg->step = 0;
3060		reg->next = r->regtab;
3061		r->regtab = reg;
3062	}
3063
3064	if ('+' == sign)
3065		reg->val += val;
3066	else if ('-' == sign)
3067		reg->val -= val;
3068	else
3069		reg->val = val;
3070	if (step != INT_MIN)
3071		reg->step = step;
3072}
3073
3074/*
3075 * Handle some predefined read-only number registers.
3076 * For now, return -1 if the requested register is not predefined;
3077 * in case a predefined read-only register having the value -1
3078 * were to turn up, another special value would have to be chosen.
3079 */
3080static int
3081roff_getregro(const struct roff *r, const char *name)
3082{
3083
3084	switch (*name) {
3085	case '$':  /* Number of arguments of the last macro evaluated. */
3086		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3087	case 'A':  /* ASCII approximation mode is always off. */
3088		return 0;
3089	case 'g':  /* Groff compatibility mode is always on. */
3090		return 1;
3091	case 'H':  /* Fixed horizontal resolution. */
3092		return 24;
3093	case 'j':  /* Always adjust left margin only. */
3094		return 0;
3095	case 'T':  /* Some output device is always defined. */
3096		return 1;
3097	case 'V':  /* Fixed vertical resolution. */
3098		return 40;
3099	default:
3100		return -1;
3101	}
3102}
3103
3104int
3105roff_getreg(struct roff *r, const char *name)
3106{
3107	return roff_getregn(r, name, strlen(name), '\0');
3108}
3109
3110static int
3111roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3112{
3113	struct roffreg	*reg;
3114	int		 val;
3115
3116	if ('.' == name[0] && 2 == len) {
3117		val = roff_getregro(r, name + 1);
3118		if (-1 != val)
3119			return val;
3120	}
3121
3122	for (reg = r->regtab; reg; reg = reg->next) {
3123		if (len == reg->key.sz &&
3124		    0 == strncmp(name, reg->key.p, len)) {
3125			switch (sign) {
3126			case '+':
3127				reg->val += reg->step;
3128				break;
3129			case '-':
3130				reg->val -= reg->step;
3131				break;
3132			default:
3133				break;
3134			}
3135			return reg->val;
3136		}
3137	}
3138
3139	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3140	return 0;
3141}
3142
3143static int
3144roff_hasregn(const struct roff *r, const char *name, size_t len)
3145{
3146	struct roffreg	*reg;
3147	int		 val;
3148
3149	if ('.' == name[0] && 2 == len) {
3150		val = roff_getregro(r, name + 1);
3151		if (-1 != val)
3152			return 1;
3153	}
3154
3155	for (reg = r->regtab; reg; reg = reg->next)
3156		if (len == reg->key.sz &&
3157		    0 == strncmp(name, reg->key.p, len))
3158			return 1;
3159
3160	return 0;
3161}
3162
3163static void
3164roff_freereg(struct roffreg *reg)
3165{
3166	struct roffreg	*old_reg;
3167
3168	while (NULL != reg) {
3169		free(reg->key.p);
3170		old_reg = reg;
3171		reg = reg->next;
3172		free(old_reg);
3173	}
3174}
3175
3176static int
3177roff_nr(ROFF_ARGS)
3178{
3179	char		*key, *val, *step;
3180	size_t		 keysz;
3181	int		 iv, is, len;
3182	char		 sign;
3183
3184	key = val = buf->buf + pos;
3185	if (*key == '\0')
3186		return ROFF_IGN;
3187
3188	keysz = roff_getname(r, &val, ln, pos);
3189	if (key[keysz] == '\\' || key[keysz] == '\t')
3190		return ROFF_IGN;
3191
3192	sign = *val;
3193	if (sign == '+' || sign == '-')
3194		val++;
3195
3196	len = 0;
3197	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3198		return ROFF_IGN;
3199
3200	step = val + len;
3201	while (isspace((unsigned char)*step))
3202		step++;
3203	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3204		is = INT_MIN;
3205
3206	roff_setregn(r, key, keysz, iv, sign, is);
3207	return ROFF_IGN;
3208}
3209
3210static int
3211roff_rr(ROFF_ARGS)
3212{
3213	struct roffreg	*reg, **prev;
3214	char		*name, *cp;
3215	size_t		 namesz;
3216
3217	name = cp = buf->buf + pos;
3218	if (*name == '\0')
3219		return ROFF_IGN;
3220	namesz = roff_getname(r, &cp, ln, pos);
3221	name[namesz] = '\0';
3222
3223	prev = &r->regtab;
3224	while (1) {
3225		reg = *prev;
3226		if (reg == NULL || !strcmp(name, reg->key.p))
3227			break;
3228		prev = &reg->next;
3229	}
3230	if (reg != NULL) {
3231		*prev = reg->next;
3232		free(reg->key.p);
3233		free(reg);
3234	}
3235	return ROFF_IGN;
3236}
3237
3238/* --- handler functions for roff requests -------------------------------- */
3239
3240static int
3241roff_rm(ROFF_ARGS)
3242{
3243	const char	 *name;
3244	char		 *cp;
3245	size_t		  namesz;
3246
3247	cp = buf->buf + pos;
3248	while (*cp != '\0') {
3249		name = cp;
3250		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3251		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3252		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3253		if (name[namesz] == '\\' || name[namesz] == '\t')
3254			break;
3255	}
3256	return ROFF_IGN;
3257}
3258
3259static int
3260roff_it(ROFF_ARGS)
3261{
3262	int		 iv;
3263
3264	/* Parse the number of lines. */
3265
3266	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3267		mandoc_msg(MANDOCERR_IT_NONUM,
3268		    ln, ppos, "%s", buf->buf + 1);
3269		return ROFF_IGN;
3270	}
3271
3272	while (isspace((unsigned char)buf->buf[pos]))
3273		pos++;
3274
3275	/*
3276	 * Arm the input line trap.
3277	 * Special-casing "an-trap" is an ugly workaround to cope
3278	 * with DocBook stupidly fiddling with man(7) internals.
3279	 */
3280
3281	roffit_lines = iv;
3282	roffit_macro = mandoc_strdup(iv != 1 ||
3283	    strcmp(buf->buf + pos, "an-trap") ?
3284	    buf->buf + pos : "br");
3285	return ROFF_IGN;
3286}
3287
3288static int
3289roff_Dd(ROFF_ARGS)
3290{
3291	int		 mask;
3292	enum roff_tok	 t, te;
3293
3294	switch (tok) {
3295	case ROFF_Dd:
3296		tok = MDOC_Dd;
3297		te = MDOC_MAX;
3298		if (r->format == 0)
3299			r->format = MPARSE_MDOC;
3300		mask = MPARSE_MDOC | MPARSE_QUICK;
3301		break;
3302	case ROFF_TH:
3303		tok = MAN_TH;
3304		te = MAN_MAX;
3305		if (r->format == 0)
3306			r->format = MPARSE_MAN;
3307		mask = MPARSE_QUICK;
3308		break;
3309	default:
3310		abort();
3311	}
3312	if ((r->options & mask) == 0)
3313		for (t = tok; t < te; t++)
3314			roff_setstr(r, roff_name[t], NULL, 0);
3315	return ROFF_CONT;
3316}
3317
3318static int
3319roff_TE(ROFF_ARGS)
3320{
3321	r->man->flags &= ~ROFF_NONOFILL;
3322	if (r->tbl == NULL) {
3323		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3324		return ROFF_IGN;
3325	}
3326	if (tbl_end(r->tbl, 0) == 0) {
3327		r->tbl = NULL;
3328		free(buf->buf);
3329		buf->buf = mandoc_strdup(".sp");
3330		buf->sz = 4;
3331		*offs = 0;
3332		return ROFF_REPARSE;
3333	}
3334	r->tbl = NULL;
3335	return ROFF_IGN;
3336}
3337
3338static int
3339roff_T_(ROFF_ARGS)
3340{
3341
3342	if (NULL == r->tbl)
3343		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3344	else
3345		tbl_restart(ln, ppos, r->tbl);
3346
3347	return ROFF_IGN;
3348}
3349
3350/*
3351 * Handle in-line equation delimiters.
3352 */
3353static int
3354roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3355{
3356	char		*cp1, *cp2;
3357	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3358
3359	/*
3360	 * Outside equations, look for an opening delimiter.
3361	 * If we are inside an equation, we already know it is
3362	 * in-line, or this function wouldn't have been called;
3363	 * so look for a closing delimiter.
3364	 */
3365
3366	cp1 = buf->buf + pos;
3367	cp2 = strchr(cp1, r->eqn == NULL ?
3368	    r->last_eqn->odelim : r->last_eqn->cdelim);
3369	if (cp2 == NULL)
3370		return ROFF_CONT;
3371
3372	*cp2++ = '\0';
3373	bef_pr = bef_nl = aft_nl = aft_pr = "";
3374
3375	/* Handle preceding text, protecting whitespace. */
3376
3377	if (*buf->buf != '\0') {
3378		if (r->eqn == NULL)
3379			bef_pr = "\\&";
3380		bef_nl = "\n";
3381	}
3382
3383	/*
3384	 * Prepare replacing the delimiter with an equation macro
3385	 * and drop leading white space from the equation.
3386	 */
3387
3388	if (r->eqn == NULL) {
3389		while (*cp2 == ' ')
3390			cp2++;
3391		mac = ".EQ";
3392	} else
3393		mac = ".EN";
3394
3395	/* Handle following text, protecting whitespace. */
3396
3397	if (*cp2 != '\0') {
3398		aft_nl = "\n";
3399		if (r->eqn != NULL)
3400			aft_pr = "\\&";
3401	}
3402
3403	/* Do the actual replacement. */
3404
3405	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3406	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3407	free(buf->buf);
3408	buf->buf = cp1;
3409
3410	/* Toggle the in-line state of the eqn subsystem. */
3411
3412	r->eqn_inline = r->eqn == NULL;
3413	return ROFF_REPARSE;
3414}
3415
3416static int
3417roff_EQ(ROFF_ARGS)
3418{
3419	struct roff_node	*n;
3420
3421	if (r->man->meta.macroset == MACROSET_MAN)
3422		man_breakscope(r->man, ROFF_EQ);
3423	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3424	if (ln > r->man->last->line)
3425		n->flags |= NODE_LINE;
3426	n->eqn = eqn_box_new();
3427	roff_node_append(r->man, n);
3428	r->man->next = ROFF_NEXT_SIBLING;
3429
3430	assert(r->eqn == NULL);
3431	if (r->last_eqn == NULL)
3432		r->last_eqn = eqn_alloc();
3433	else
3434		eqn_reset(r->last_eqn);
3435	r->eqn = r->last_eqn;
3436	r->eqn->node = n;
3437
3438	if (buf->buf[pos] != '\0')
3439		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3440		    ".EQ %s", buf->buf + pos);
3441
3442	return ROFF_IGN;
3443}
3444
3445static int
3446roff_EN(ROFF_ARGS)
3447{
3448	if (r->eqn != NULL) {
3449		eqn_parse(r->eqn);
3450		r->eqn = NULL;
3451	} else
3452		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3453	if (buf->buf[pos] != '\0')
3454		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3455		    "EN %s", buf->buf + pos);
3456	return ROFF_IGN;
3457}
3458
3459static int
3460roff_TS(ROFF_ARGS)
3461{
3462	if (r->tbl != NULL) {
3463		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3464		tbl_end(r->tbl, 0);
3465	}
3466	r->man->flags |= ROFF_NONOFILL;
3467	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3468	if (r->last_tbl == NULL)
3469		r->first_tbl = r->tbl;
3470	r->last_tbl = r->tbl;
3471	return ROFF_IGN;
3472}
3473
3474static int
3475roff_noarg(ROFF_ARGS)
3476{
3477	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3478		man_breakscope(r->man, tok);
3479	if (tok == ROFF_brp)
3480		tok = ROFF_br;
3481	roff_elem_alloc(r->man, ln, ppos, tok);
3482	if (buf->buf[pos] != '\0')
3483		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3484		   "%s %s", roff_name[tok], buf->buf + pos);
3485	if (tok == ROFF_nf)
3486		r->man->flags |= ROFF_NOFILL;
3487	else if (tok == ROFF_fi)
3488		r->man->flags &= ~ROFF_NOFILL;
3489	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3490	r->man->next = ROFF_NEXT_SIBLING;
3491	return ROFF_IGN;
3492}
3493
3494static int
3495roff_onearg(ROFF_ARGS)
3496{
3497	struct roff_node	*n;
3498	char			*cp;
3499	int			 npos;
3500
3501	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3502	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3503	     tok == ROFF_ti))
3504		man_breakscope(r->man, tok);
3505
3506	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3507		r->man->last = roffce_node;
3508		r->man->next = ROFF_NEXT_SIBLING;
3509	}
3510
3511	roff_elem_alloc(r->man, ln, ppos, tok);
3512	n = r->man->last;
3513
3514	cp = buf->buf + pos;
3515	if (*cp != '\0') {
3516		while (*cp != '\0' && *cp != ' ')
3517			cp++;
3518		while (*cp == ' ')
3519			*cp++ = '\0';
3520		if (*cp != '\0')
3521			mandoc_msg(MANDOCERR_ARG_EXCESS,
3522			    ln, (int)(cp - buf->buf),
3523			    "%s ... %s", roff_name[tok], cp);
3524		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3525	}
3526
3527	if (tok == ROFF_ce || tok == ROFF_rj) {
3528		if (r->man->last->type == ROFFT_ELEM) {
3529			roff_word_alloc(r->man, ln, pos, "1");
3530			r->man->last->flags |= NODE_NOSRC;
3531		}
3532		npos = 0;
3533		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3534		    &roffce_lines, 0) == 0) {
3535			mandoc_msg(MANDOCERR_CE_NONUM,
3536			    ln, pos, "ce %s", buf->buf + pos);
3537			roffce_lines = 1;
3538		}
3539		if (roffce_lines < 1) {
3540			r->man->last = r->man->last->parent;
3541			roffce_node = NULL;
3542			roffce_lines = 0;
3543		} else
3544			roffce_node = r->man->last->parent;
3545	} else {
3546		n->flags |= NODE_VALID | NODE_ENDED;
3547		r->man->last = n;
3548	}
3549	n->flags |= NODE_LINE;
3550	r->man->next = ROFF_NEXT_SIBLING;
3551	return ROFF_IGN;
3552}
3553
3554static int
3555roff_manyarg(ROFF_ARGS)
3556{
3557	struct roff_node	*n;
3558	char			*sp, *ep;
3559
3560	roff_elem_alloc(r->man, ln, ppos, tok);
3561	n = r->man->last;
3562
3563	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3564		while (*ep != '\0' && *ep != ' ')
3565			ep++;
3566		while (*ep == ' ')
3567			*ep++ = '\0';
3568		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3569	}
3570
3571	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3572	r->man->last = n;
3573	r->man->next = ROFF_NEXT_SIBLING;
3574	return ROFF_IGN;
3575}
3576
3577static int
3578roff_als(ROFF_ARGS)
3579{
3580	char		*oldn, *newn, *end, *value;
3581	size_t		 oldsz, newsz, valsz;
3582
3583	newn = oldn = buf->buf + pos;
3584	if (*newn == '\0')
3585		return ROFF_IGN;
3586
3587	newsz = roff_getname(r, &oldn, ln, pos);
3588	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3589		return ROFF_IGN;
3590
3591	end = oldn;
3592	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3593	if (oldsz == 0)
3594		return ROFF_IGN;
3595
3596	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3597	    (int)oldsz, oldn);
3598	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3599	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3600	free(value);
3601	return ROFF_IGN;
3602}
3603
3604/*
3605 * The .break request only makes sense inside conditionals,
3606 * and that case is already handled in roff_cond_sub().
3607 */
3608static int
3609roff_break(ROFF_ARGS)
3610{
3611	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3612	return ROFF_IGN;
3613}
3614
3615static int
3616roff_cc(ROFF_ARGS)
3617{
3618	const char	*p;
3619
3620	p = buf->buf + pos;
3621
3622	if (*p == '\0' || (r->control = *p++) == '.')
3623		r->control = '\0';
3624
3625	if (*p != '\0')
3626		mandoc_msg(MANDOCERR_ARG_EXCESS,
3627		    ln, p - buf->buf, "cc ... %s", p);
3628
3629	return ROFF_IGN;
3630}
3631
3632static int
3633roff_char(ROFF_ARGS)
3634{
3635	const char	*p, *kp, *vp;
3636	size_t		 ksz, vsz;
3637	int		 font;
3638
3639	/* Parse the character to be replaced. */
3640
3641	kp = buf->buf + pos;
3642	p = kp + 1;
3643	if (*kp == '\0' || (*kp == '\\' &&
3644	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3645	    (*p != ' ' && *p != '\0')) {
3646		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3647		return ROFF_IGN;
3648	}
3649	ksz = p - kp;
3650	while (*p == ' ')
3651		p++;
3652
3653	/*
3654	 * If the replacement string contains a font escape sequence,
3655	 * we have to restore the font at the end.
3656	 */
3657
3658	vp = p;
3659	vsz = strlen(p);
3660	font = 0;
3661	while (*p != '\0') {
3662		if (*p++ != '\\')
3663			continue;
3664		switch (mandoc_escape(&p, NULL, NULL)) {
3665		case ESCAPE_FONT:
3666		case ESCAPE_FONTROMAN:
3667		case ESCAPE_FONTITALIC:
3668		case ESCAPE_FONTBOLD:
3669		case ESCAPE_FONTBI:
3670		case ESCAPE_FONTCR:
3671		case ESCAPE_FONTCB:
3672		case ESCAPE_FONTCI:
3673		case ESCAPE_FONTPREV:
3674			font++;
3675			break;
3676		default:
3677			break;
3678		}
3679	}
3680	if (font > 1)
3681		mandoc_msg(MANDOCERR_CHAR_FONT,
3682		    ln, (int)(vp - buf->buf), "%s", vp);
3683
3684	/*
3685	 * Approximate the effect of .char using the .tr tables.
3686	 * XXX In groff, .char and .tr interact differently.
3687	 */
3688
3689	if (ksz == 1) {
3690		if (r->xtab == NULL)
3691			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3692		assert((unsigned int)*kp < 128);
3693		free(r->xtab[(int)*kp].p);
3694		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3695		    "%s%s", vp, font ? "\fP" : "");
3696	} else {
3697		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3698		if (font)
3699			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3700	}
3701	return ROFF_IGN;
3702}
3703
3704static int
3705roff_ec(ROFF_ARGS)
3706{
3707	const char	*p;
3708
3709	p = buf->buf + pos;
3710	if (*p == '\0')
3711		r->escape = '\\';
3712	else {
3713		r->escape = *p;
3714		if (*++p != '\0')
3715			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3716			    (int)(p - buf->buf), "ec ... %s", p);
3717	}
3718	return ROFF_IGN;
3719}
3720
3721static int
3722roff_eo(ROFF_ARGS)
3723{
3724	r->escape = '\0';
3725	if (buf->buf[pos] != '\0')
3726		mandoc_msg(MANDOCERR_ARG_SKIP,
3727		    ln, pos, "eo %s", buf->buf + pos);
3728	return ROFF_IGN;
3729}
3730
3731static int
3732roff_nop(ROFF_ARGS)
3733{
3734	while (buf->buf[pos] == ' ')
3735		pos++;
3736	*offs = pos;
3737	return ROFF_RERUN;
3738}
3739
3740static int
3741roff_tr(ROFF_ARGS)
3742{
3743	const char	*p, *first, *second;
3744	size_t		 fsz, ssz;
3745	enum mandoc_esc	 esc;
3746
3747	p = buf->buf + pos;
3748
3749	if (*p == '\0') {
3750		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3751		return ROFF_IGN;
3752	}
3753
3754	while (*p != '\0') {
3755		fsz = ssz = 1;
3756
3757		first = p++;
3758		if (*first == '\\') {
3759			esc = mandoc_escape(&p, NULL, NULL);
3760			if (esc == ESCAPE_ERROR) {
3761				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3762				    (int)(p - buf->buf), "%s", first);
3763				return ROFF_IGN;
3764			}
3765			fsz = (size_t)(p - first);
3766		}
3767
3768		second = p++;
3769		if (*second == '\\') {
3770			esc = mandoc_escape(&p, NULL, NULL);
3771			if (esc == ESCAPE_ERROR) {
3772				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3773				    (int)(p - buf->buf), "%s", second);
3774				return ROFF_IGN;
3775			}
3776			ssz = (size_t)(p - second);
3777		} else if (*second == '\0') {
3778			mandoc_msg(MANDOCERR_TR_ODD, ln,
3779			    (int)(first - buf->buf), "tr %s", first);
3780			second = " ";
3781			p--;
3782		}
3783
3784		if (fsz > 1) {
3785			roff_setstrn(&r->xmbtab, first, fsz,
3786			    second, ssz, 0);
3787			continue;
3788		}
3789
3790		if (r->xtab == NULL)
3791			r->xtab = mandoc_calloc(128,
3792			    sizeof(struct roffstr));
3793
3794		free(r->xtab[(int)*first].p);
3795		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3796		r->xtab[(int)*first].sz = ssz;
3797	}
3798
3799	return ROFF_IGN;
3800}
3801
3802/*
3803 * Implementation of the .return request.
3804 * There is no need to call roff_userret() from here.
3805 * The read module will call that after rewinding the reader stack
3806 * to the place from where the current macro was called.
3807 */
3808static int
3809roff_return(ROFF_ARGS)
3810{
3811	if (r->mstackpos >= 0)
3812		return ROFF_IGN | ROFF_USERRET;
3813
3814	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3815	return ROFF_IGN;
3816}
3817
3818static int
3819roff_rn(ROFF_ARGS)
3820{
3821	const char	*value;
3822	char		*oldn, *newn, *end;
3823	size_t		 oldsz, newsz;
3824	int		 deftype;
3825
3826	oldn = newn = buf->buf + pos;
3827	if (*oldn == '\0')
3828		return ROFF_IGN;
3829
3830	oldsz = roff_getname(r, &newn, ln, pos);
3831	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3832		return ROFF_IGN;
3833
3834	end = newn;
3835	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3836	if (newsz == 0)
3837		return ROFF_IGN;
3838
3839	deftype = ROFFDEF_ANY;
3840	value = roff_getstrn(r, oldn, oldsz, &deftype);
3841	switch (deftype) {
3842	case ROFFDEF_USER:
3843		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3844		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3845		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3846		break;
3847	case ROFFDEF_PRE:
3848		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3849		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3850		break;
3851	case ROFFDEF_REN:
3852		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3853		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3854		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3855		break;
3856	case ROFFDEF_STD:
3857		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3858		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3859		break;
3860	default:
3861		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863		break;
3864	}
3865	return ROFF_IGN;
3866}
3867
3868static int
3869roff_shift(ROFF_ARGS)
3870{
3871	struct mctx	*ctx;
3872	int		 levels, i;
3873
3874	levels = 1;
3875	if (buf->buf[pos] != '\0' &&
3876	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3877		mandoc_msg(MANDOCERR_CE_NONUM,
3878		    ln, pos, "shift %s", buf->buf + pos);
3879		levels = 1;
3880	}
3881	if (r->mstackpos < 0) {
3882		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3883		return ROFF_IGN;
3884	}
3885	ctx = r->mstack + r->mstackpos;
3886	if (levels > ctx->argc) {
3887		mandoc_msg(MANDOCERR_SHIFT,
3888		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3889		levels = ctx->argc;
3890	}
3891	if (levels == 0)
3892		return ROFF_IGN;
3893	for (i = 0; i < levels; i++)
3894		free(ctx->argv[i]);
3895	ctx->argc -= levels;
3896	for (i = 0; i < ctx->argc; i++)
3897		ctx->argv[i] = ctx->argv[i + levels];
3898	return ROFF_IGN;
3899}
3900
3901static int
3902roff_so(ROFF_ARGS)
3903{
3904	char *name, *cp;
3905
3906	name = buf->buf + pos;
3907	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3908
3909	/*
3910	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3911	 * opening anything that's not in our cwd or anything beneath
3912	 * it.  Thus, explicitly disallow traversing up the file-system
3913	 * or using absolute paths.
3914	 */
3915
3916	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3917		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3918		buf->sz = mandoc_asprintf(&cp,
3919		    ".sp\nSee the file %s.\n.sp", name) + 1;
3920		free(buf->buf);
3921		buf->buf = cp;
3922		*offs = 0;
3923		return ROFF_REPARSE;
3924	}
3925
3926	*offs = pos;
3927	return ROFF_SO;
3928}
3929
3930/* --- user defined strings and macros ------------------------------------ */
3931
3932static int
3933roff_userdef(ROFF_ARGS)
3934{
3935	struct mctx	 *ctx;
3936	char		 *arg, *ap, *dst, *src;
3937	size_t		  sz;
3938
3939	/* If the macro is empty, ignore it altogether. */
3940
3941	if (*r->current_string == '\0')
3942		return ROFF_IGN;
3943
3944	/* Initialize a new macro stack context. */
3945
3946	if (++r->mstackpos == r->mstacksz) {
3947		r->mstack = mandoc_recallocarray(r->mstack,
3948		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3949		r->mstacksz += 8;
3950	}
3951	ctx = r->mstack + r->mstackpos;
3952	ctx->argsz = 0;
3953	ctx->argc = 0;
3954	ctx->argv = NULL;
3955
3956	/*
3957	 * Collect pointers to macro argument strings,
3958	 * NUL-terminating them and escaping quotes.
3959	 */
3960
3961	src = buf->buf + pos;
3962	while (*src != '\0') {
3963		if (ctx->argc == ctx->argsz) {
3964			ctx->argsz += 8;
3965			ctx->argv = mandoc_reallocarray(ctx->argv,
3966			    ctx->argsz, sizeof(*ctx->argv));
3967		}
3968		arg = roff_getarg(r, &src, ln, &pos);
3969		sz = 1;  /* For the terminating NUL. */
3970		for (ap = arg; *ap != '\0'; ap++)
3971			sz += *ap == '"' ? 4 : 1;
3972		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3973		for (ap = arg; *ap != '\0'; ap++) {
3974			if (*ap == '"') {
3975				memcpy(dst, "\\(dq", 4);
3976				dst += 4;
3977			} else
3978				*dst++ = *ap;
3979		}
3980		*dst = '\0';
3981		free(arg);
3982	}
3983
3984	/* Replace the macro invocation by the macro definition. */
3985
3986	free(buf->buf);
3987	buf->buf = mandoc_strdup(r->current_string);
3988	buf->sz = strlen(buf->buf) + 1;
3989	*offs = 0;
3990
3991	return buf->buf[buf->sz - 2] == '\n' ?
3992	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3993}
3994
3995/*
3996 * Calling a high-level macro that was renamed with .rn.
3997 * r->current_string has already been set up by roff_parse().
3998 */
3999static int
4000roff_renamed(ROFF_ARGS)
4001{
4002	char	*nbuf;
4003
4004	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4005	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4006	free(buf->buf);
4007	buf->buf = nbuf;
4008	*offs = 0;
4009	return ROFF_CONT;
4010}
4011
4012/*
4013 * Measure the length in bytes of the roff identifier at *cpp
4014 * and advance the pointer to the next word.
4015 */
4016static size_t
4017roff_getname(struct roff *r, char **cpp, int ln, int pos)
4018{
4019	char	 *name, *cp;
4020	size_t	  namesz;
4021
4022	name = *cpp;
4023	if (*name == '\0')
4024		return 0;
4025
4026	/* Advance cp to the byte after the end of the name. */
4027
4028	for (cp = name; 1; cp++) {
4029		namesz = cp - name;
4030		if (*cp == '\0')
4031			break;
4032		if (*cp == ' ' || *cp == '\t') {
4033			cp++;
4034			break;
4035		}
4036		if (*cp != '\\')
4037			continue;
4038		if (cp[1] == '{' || cp[1] == '}')
4039			break;
4040		if (*++cp == '\\')
4041			continue;
4042		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4043		    "%.*s", (int)(cp - name + 1), name);
4044		mandoc_escape((const char **)&cp, NULL, NULL);
4045		break;
4046	}
4047
4048	/* Read past spaces. */
4049
4050	while (*cp == ' ')
4051		cp++;
4052
4053	*cpp = cp;
4054	return namesz;
4055}
4056
4057/*
4058 * Store *string into the user-defined string called *name.
4059 * To clear an existing entry, call with (*r, *name, NULL, 0).
4060 * append == 0: replace mode
4061 * append == 1: single-line append mode
4062 * append == 2: multiline append mode, append '\n' after each call
4063 */
4064static void
4065roff_setstr(struct roff *r, const char *name, const char *string,
4066	int append)
4067{
4068	size_t	 namesz;
4069
4070	namesz = strlen(name);
4071	roff_setstrn(&r->strtab, name, namesz, string,
4072	    string ? strlen(string) : 0, append);
4073	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4074}
4075
4076static void
4077roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4078		const char *string, size_t stringsz, int append)
4079{
4080	struct roffkv	*n;
4081	char		*c;
4082	int		 i;
4083	size_t		 oldch, newch;
4084
4085	/* Search for an existing string with the same name. */
4086	n = *r;
4087
4088	while (n && (namesz != n->key.sz ||
4089			strncmp(n->key.p, name, namesz)))
4090		n = n->next;
4091
4092	if (NULL == n) {
4093		/* Create a new string table entry. */
4094		n = mandoc_malloc(sizeof(struct roffkv));
4095		n->key.p = mandoc_strndup(name, namesz);
4096		n->key.sz = namesz;
4097		n->val.p = NULL;
4098		n->val.sz = 0;
4099		n->next = *r;
4100		*r = n;
4101	} else if (0 == append) {
4102		free(n->val.p);
4103		n->val.p = NULL;
4104		n->val.sz = 0;
4105	}
4106
4107	if (NULL == string)
4108		return;
4109
4110	/*
4111	 * One additional byte for the '\n' in multiline mode,
4112	 * and one for the terminating '\0'.
4113	 */
4114	newch = stringsz + (1 < append ? 2u : 1u);
4115
4116	if (NULL == n->val.p) {
4117		n->val.p = mandoc_malloc(newch);
4118		*n->val.p = '\0';
4119		oldch = 0;
4120	} else {
4121		oldch = n->val.sz;
4122		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4123	}
4124
4125	/* Skip existing content in the destination buffer. */
4126	c = n->val.p + (int)oldch;
4127
4128	/* Append new content to the destination buffer. */
4129	i = 0;
4130	while (i < (int)stringsz) {
4131		/*
4132		 * Rudimentary roff copy mode:
4133		 * Handle escaped backslashes.
4134		 */
4135		if ('\\' == string[i] && '\\' == string[i + 1])
4136			i++;
4137		*c++ = string[i++];
4138	}
4139
4140	/* Append terminating bytes. */
4141	if (1 < append)
4142		*c++ = '\n';
4143
4144	*c = '\0';
4145	n->val.sz = (int)(c - n->val.p);
4146}
4147
4148static const char *
4149roff_getstrn(struct roff *r, const char *name, size_t len,
4150    int *deftype)
4151{
4152	const struct roffkv	*n;
4153	int			 found, i;
4154	enum roff_tok		 tok;
4155
4156	found = 0;
4157	for (n = r->strtab; n != NULL; n = n->next) {
4158		if (strncmp(name, n->key.p, len) != 0 ||
4159		    n->key.p[len] != '\0' || n->val.p == NULL)
4160			continue;
4161		if (*deftype & ROFFDEF_USER) {
4162			*deftype = ROFFDEF_USER;
4163			return n->val.p;
4164		} else {
4165			found = 1;
4166			break;
4167		}
4168	}
4169	for (n = r->rentab; n != NULL; n = n->next) {
4170		if (strncmp(name, n->key.p, len) != 0 ||
4171		    n->key.p[len] != '\0' || n->val.p == NULL)
4172			continue;
4173		if (*deftype & ROFFDEF_REN) {
4174			*deftype = ROFFDEF_REN;
4175			return n->val.p;
4176		} else {
4177			found = 1;
4178			break;
4179		}
4180	}
4181	for (i = 0; i < PREDEFS_MAX; i++) {
4182		if (strncmp(name, predefs[i].name, len) != 0 ||
4183		    predefs[i].name[len] != '\0')
4184			continue;
4185		if (*deftype & ROFFDEF_PRE) {
4186			*deftype = ROFFDEF_PRE;
4187			return predefs[i].str;
4188		} else {
4189			found = 1;
4190			break;
4191		}
4192	}
4193	if (r->man->meta.macroset != MACROSET_MAN) {
4194		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4195			if (strncmp(name, roff_name[tok], len) != 0 ||
4196			    roff_name[tok][len] != '\0')
4197				continue;
4198			if (*deftype & ROFFDEF_STD) {
4199				*deftype = ROFFDEF_STD;
4200				return NULL;
4201			} else {
4202				found = 1;
4203				break;
4204			}
4205		}
4206	}
4207	if (r->man->meta.macroset != MACROSET_MDOC) {
4208		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4209			if (strncmp(name, roff_name[tok], len) != 0 ||
4210			    roff_name[tok][len] != '\0')
4211				continue;
4212			if (*deftype & ROFFDEF_STD) {
4213				*deftype = ROFFDEF_STD;
4214				return NULL;
4215			} else {
4216				found = 1;
4217				break;
4218			}
4219		}
4220	}
4221
4222	if (found == 0 && *deftype != ROFFDEF_ANY) {
4223		if (*deftype & ROFFDEF_REN) {
4224			/*
4225			 * This might still be a request,
4226			 * so do not treat it as undefined yet.
4227			 */
4228			*deftype = ROFFDEF_UNDEF;
4229			return NULL;
4230		}
4231
4232		/* Using an undefined string defines it to be empty. */
4233
4234		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4235		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4236	}
4237
4238	*deftype = 0;
4239	return NULL;
4240}
4241
4242static void
4243roff_freestr(struct roffkv *r)
4244{
4245	struct roffkv	 *n, *nn;
4246
4247	for (n = r; n; n = nn) {
4248		free(n->key.p);
4249		free(n->val.p);
4250		nn = n->next;
4251		free(n);
4252	}
4253}
4254
4255/* --- accessors and utility functions ------------------------------------ */
4256
4257/*
4258 * Duplicate an input string, making the appropriate character
4259 * conversations (as stipulated by `tr') along the way.
4260 * Returns a heap-allocated string with all the replacements made.
4261 */
4262char *
4263roff_strdup(const struct roff *r, const char *p)
4264{
4265	const struct roffkv *cp;
4266	char		*res;
4267	const char	*pp;
4268	size_t		 ssz, sz;
4269	enum mandoc_esc	 esc;
4270
4271	if (NULL == r->xmbtab && NULL == r->xtab)
4272		return mandoc_strdup(p);
4273	else if ('\0' == *p)
4274		return mandoc_strdup("");
4275
4276	/*
4277	 * Step through each character looking for term matches
4278	 * (remember that a `tr' can be invoked with an escape, which is
4279	 * a glyph but the escape is multi-character).
4280	 * We only do this if the character hash has been initialised
4281	 * and the string is >0 length.
4282	 */
4283
4284	res = NULL;
4285	ssz = 0;
4286
4287	while ('\0' != *p) {
4288		assert((unsigned int)*p < 128);
4289		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4290			sz = r->xtab[(int)*p].sz;
4291			res = mandoc_realloc(res, ssz + sz + 1);
4292			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4293			ssz += sz;
4294			p++;
4295			continue;
4296		} else if ('\\' != *p) {
4297			res = mandoc_realloc(res, ssz + 2);
4298			res[ssz++] = *p++;
4299			continue;
4300		}
4301
4302		/* Search for term matches. */
4303		for (cp = r->xmbtab; cp; cp = cp->next)
4304			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4305				break;
4306
4307		if (NULL != cp) {
4308			/*
4309			 * A match has been found.
4310			 * Append the match to the array and move
4311			 * forward by its keysize.
4312			 */
4313			res = mandoc_realloc(res,
4314			    ssz + cp->val.sz + 1);
4315			memcpy(res + ssz, cp->val.p, cp->val.sz);
4316			ssz += cp->val.sz;
4317			p += (int)cp->key.sz;
4318			continue;
4319		}
4320
4321		/*
4322		 * Handle escapes carefully: we need to copy
4323		 * over just the escape itself, or else we might
4324		 * do replacements within the escape itself.
4325		 * Make sure to pass along the bogus string.
4326		 */
4327		pp = p++;
4328		esc = mandoc_escape(&p, NULL, NULL);
4329		if (ESCAPE_ERROR == esc) {
4330			sz = strlen(pp);
4331			res = mandoc_realloc(res, ssz + sz + 1);
4332			memcpy(res + ssz, pp, sz);
4333			break;
4334		}
4335		/*
4336		 * We bail out on bad escapes.
4337		 * No need to warn: we already did so when
4338		 * roff_expand() was called.
4339		 */
4340		sz = (int)(p - pp);
4341		res = mandoc_realloc(res, ssz + sz + 1);
4342		memcpy(res + ssz, pp, sz);
4343		ssz += sz;
4344	}
4345
4346	res[(int)ssz] = '\0';
4347	return res;
4348}
4349
4350int
4351roff_getformat(const struct roff *r)
4352{
4353
4354	return r->format;
4355}
4356
4357/*
4358 * Find out whether a line is a macro line or not.
4359 * If it is, adjust the current position and return one; if it isn't,
4360 * return zero and don't change the current position.
4361 * If the control character has been set with `.cc', then let that grain
4362 * precedence.
4363 * This is slighly contrary to groff, where using the non-breaking
4364 * control character when `cc' has been invoked will cause the
4365 * non-breaking macro contents to be printed verbatim.
4366 */
4367int
4368roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4369{
4370	int		pos;
4371
4372	pos = *ppos;
4373
4374	if (r->control != '\0' && cp[pos] == r->control)
4375		pos++;
4376	else if (r->control != '\0')
4377		return 0;
4378	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4379		pos += 2;
4380	else if ('.' == cp[pos] || '\'' == cp[pos])
4381		pos++;
4382	else
4383		return 0;
4384
4385	while (' ' == cp[pos] || '\t' == cp[pos])
4386		pos++;
4387
4388	*ppos = pos;
4389	return 1;
4390}
4391