roff.c revision 316420
1/*	$Id: roff.c,v 1.288 2017/01/12 18:02:20 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28
29#include "mandoc.h"
30#include "mandoc_aux.h"
31#include "roff.h"
32#include "libmandoc.h"
33#include "roff_int.h"
34#include "libroff.h"
35
36/* Maximum number of string expansions per line, to break infinite loops. */
37#define	EXPAND_LIMIT	1000
38
39/* --- data types --------------------------------------------------------- */
40
41enum	rofft {
42	ROFF_ab,
43	ROFF_ad,
44	ROFF_af,
45	ROFF_aln,
46	ROFF_als,
47	ROFF_am,
48	ROFF_am1,
49	ROFF_ami,
50	ROFF_ami1,
51	ROFF_as,
52	ROFF_as1,
53	ROFF_asciify,
54	ROFF_backtrace,
55	ROFF_bd,
56	ROFF_bleedat,
57	ROFF_blm,
58	ROFF_box,
59	ROFF_boxa,
60	ROFF_bp,
61	ROFF_BP,
62	/* MAN_br, MDOC_br */
63	ROFF_break,
64	ROFF_breakchar,
65	ROFF_brnl,
66	ROFF_brp,
67	ROFF_brpnl,
68	ROFF_c2,
69	ROFF_cc,
70	ROFF_ce,
71	ROFF_cf,
72	ROFF_cflags,
73	ROFF_ch,
74	ROFF_char,
75	ROFF_chop,
76	ROFF_class,
77	ROFF_close,
78	ROFF_CL,
79	ROFF_color,
80	ROFF_composite,
81	ROFF_continue,
82	ROFF_cp,
83	ROFF_cropat,
84	ROFF_cs,
85	ROFF_cu,
86	ROFF_da,
87	ROFF_dch,
88	ROFF_Dd,
89	ROFF_de,
90	ROFF_de1,
91	ROFF_defcolor,
92	ROFF_dei,
93	ROFF_dei1,
94	ROFF_device,
95	ROFF_devicem,
96	ROFF_di,
97	ROFF_do,
98	ROFF_ds,
99	ROFF_ds1,
100	ROFF_dwh,
101	ROFF_dt,
102	ROFF_ec,
103	ROFF_ecr,
104	ROFF_ecs,
105	ROFF_el,
106	ROFF_em,
107	ROFF_EN,
108	ROFF_eo,
109	ROFF_EP,
110	ROFF_EQ,
111	ROFF_errprint,
112	ROFF_ev,
113	ROFF_evc,
114	ROFF_ex,
115	ROFF_fallback,
116	ROFF_fam,
117	ROFF_fc,
118	ROFF_fchar,
119	ROFF_fcolor,
120	ROFF_fdeferlig,
121	ROFF_feature,
122	/* MAN_fi; ignored in mdoc(7) */
123	ROFF_fkern,
124	ROFF_fl,
125	ROFF_flig,
126	ROFF_fp,
127	ROFF_fps,
128	ROFF_fschar,
129	ROFF_fspacewidth,
130	ROFF_fspecial,
131	/* MAN_ft; ignored in mdoc(7) */
132	ROFF_ftr,
133	ROFF_fzoom,
134	ROFF_gcolor,
135	ROFF_hc,
136	ROFF_hcode,
137	ROFF_hidechar,
138	ROFF_hla,
139	ROFF_hlm,
140	ROFF_hpf,
141	ROFF_hpfa,
142	ROFF_hpfcode,
143	ROFF_hw,
144	ROFF_hy,
145	ROFF_hylang,
146	ROFF_hylen,
147	ROFF_hym,
148	ROFF_hypp,
149	ROFF_hys,
150	ROFF_ie,
151	ROFF_if,
152	ROFF_ig,
153	/* MAN_in; ignored in mdoc(7) */
154	ROFF_index,
155	ROFF_it,
156	ROFF_itc,
157	ROFF_IX,
158	ROFF_kern,
159	ROFF_kernafter,
160	ROFF_kernbefore,
161	ROFF_kernpair,
162	ROFF_lc,
163	ROFF_lc_ctype,
164	ROFF_lds,
165	ROFF_length,
166	ROFF_letadj,
167	ROFF_lf,
168	ROFF_lg,
169	ROFF_lhang,
170	ROFF_linetabs,
171	/* MAN_ll, MDOC_ll */
172	ROFF_lnr,
173	ROFF_lnrf,
174	ROFF_lpfx,
175	ROFF_ls,
176	ROFF_lsm,
177	ROFF_lt,
178	ROFF_mc,
179	ROFF_mediasize,
180	ROFF_minss,
181	ROFF_mk,
182	ROFF_mso,
183	ROFF_na,
184	ROFF_ne,
185	/* MAN_nf; ignored in mdoc(7) */
186	ROFF_nh,
187	ROFF_nhychar,
188	ROFF_nm,
189	ROFF_nn,
190	ROFF_nop,
191	ROFF_nr,
192	ROFF_nrf,
193	ROFF_nroff,
194	ROFF_ns,
195	ROFF_nx,
196	ROFF_open,
197	ROFF_opena,
198	ROFF_os,
199	ROFF_output,
200	ROFF_padj,
201	ROFF_papersize,
202	ROFF_pc,
203	ROFF_pev,
204	ROFF_pi,
205	ROFF_PI,
206	ROFF_pl,
207	ROFF_pm,
208	ROFF_pn,
209	ROFF_pnr,
210	ROFF_po,
211	ROFF_ps,
212	ROFF_psbb,
213	ROFF_pshape,
214	ROFF_pso,
215	ROFF_ptr,
216	ROFF_pvs,
217	ROFF_rchar,
218	ROFF_rd,
219	ROFF_recursionlimit,
220	ROFF_return,
221	ROFF_rfschar,
222	ROFF_rhang,
223	ROFF_rj,
224	ROFF_rm,
225	ROFF_rn,
226	ROFF_rnn,
227	ROFF_rr,
228	ROFF_rs,
229	ROFF_rt,
230	ROFF_schar,
231	ROFF_sentchar,
232	ROFF_shc,
233	ROFF_shift,
234	ROFF_sizes,
235	ROFF_so,
236	/* MAN_sp, MDOC_sp */
237	ROFF_spacewidth,
238	ROFF_special,
239	ROFF_spreadwarn,
240	ROFF_ss,
241	ROFF_sty,
242	ROFF_substring,
243	ROFF_sv,
244	ROFF_sy,
245	ROFF_T_,
246	ROFF_ta,
247	ROFF_tc,
248	ROFF_TE,
249	ROFF_TH,
250	ROFF_ti,
251	ROFF_tkf,
252	ROFF_tl,
253	ROFF_tm,
254	ROFF_tm1,
255	ROFF_tmc,
256	ROFF_tr,
257	ROFF_track,
258	ROFF_transchar,
259	ROFF_trf,
260	ROFF_trimat,
261	ROFF_trin,
262	ROFF_trnt,
263	ROFF_troff,
264	ROFF_TS,
265	ROFF_uf,
266	ROFF_ul,
267	ROFF_unformat,
268	ROFF_unwatch,
269	ROFF_unwatchn,
270	ROFF_vpt,
271	ROFF_vs,
272	ROFF_warn,
273	ROFF_warnscale,
274	ROFF_watch,
275	ROFF_watchlength,
276	ROFF_watchn,
277	ROFF_wh,
278	ROFF_while,
279	ROFF_write,
280	ROFF_writec,
281	ROFF_writem,
282	ROFF_xflag,
283	ROFF_cblock,
284	ROFF_USERDEF,
285	ROFF_MAX
286};
287
288/*
289 * An incredibly-simple string buffer.
290 */
291struct	roffstr {
292	char		*p; /* nil-terminated buffer */
293	size_t		 sz; /* saved strlen(p) */
294};
295
296/*
297 * A key-value roffstr pair as part of a singly-linked list.
298 */
299struct	roffkv {
300	struct roffstr	 key;
301	struct roffstr	 val;
302	struct roffkv	*next; /* next in list */
303};
304
305/*
306 * A single number register as part of a singly-linked list.
307 */
308struct	roffreg {
309	struct roffstr	 key;
310	int		 val;
311	struct roffreg	*next;
312};
313
314struct	roff {
315	struct mparse	*parse; /* parse point */
316	struct roffnode	*last; /* leaf of stack */
317	int		*rstack; /* stack of inverted `ie' values */
318	struct roffreg	*regtab; /* number registers */
319	struct roffkv	*strtab; /* user-defined strings & macros */
320	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
321	struct roffstr	*xtab; /* single-byte trans table (`tr') */
322	const char	*current_string; /* value of last called user macro */
323	struct tbl_node	*first_tbl; /* first table parsed */
324	struct tbl_node	*last_tbl; /* last table parsed */
325	struct tbl_node	*tbl; /* current table being parsed */
326	struct eqn_node	*last_eqn; /* last equation parsed */
327	struct eqn_node	*first_eqn; /* first equation parsed */
328	struct eqn_node	*eqn; /* current equation being parsed */
329	int		 eqn_inline; /* current equation is inline */
330	int		 options; /* parse options */
331	int		 rstacksz; /* current size limit of rstack */
332	int		 rstackpos; /* position in rstack */
333	int		 format; /* current file in mdoc or man format */
334	int		 argc; /* number of args of the last macro */
335	char		 control; /* control character */
336};
337
338struct	roffnode {
339	enum rofft	 tok; /* type of node */
340	struct roffnode	*parent; /* up one in stack */
341	int		 line; /* parse line */
342	int		 col; /* parse col */
343	char		*name; /* node name, e.g. macro name */
344	char		*end; /* end-rules: custom token */
345	int		 endspan; /* end-rules: next-line or infty */
346	int		 rule; /* current evaluation rule */
347};
348
349#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
350			 enum rofft tok, /* tok of macro */ \
351			 struct buf *buf, /* input buffer */ \
352			 int ln, /* parse line */ \
353			 int ppos, /* original pos in buffer */ \
354			 int pos, /* current pos in buffer */ \
355			 int *offs /* reset offset of buffer data */
356
357typedef	enum rofferr (*roffproc)(ROFF_ARGS);
358
359struct	roffmac {
360	const char	*name; /* macro name */
361	roffproc	 proc; /* process new macro */
362	roffproc	 text; /* process as child text of macro */
363	roffproc	 sub; /* process as child of macro */
364	int		 flags;
365#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
366	struct roffmac	*next;
367};
368
369struct	predef {
370	const char	*name; /* predefined input name */
371	const char	*str; /* replacement symbol */
372};
373
374#define	PREDEF(__name, __str) \
375	{ (__name), (__str) },
376
377/* --- function prototypes ------------------------------------------------ */
378
379static	enum rofft	 roffhash_find(const char *, size_t);
380static	void		 roffhash_init(void);
381static	void		 roffnode_cleanscope(struct roff *);
382static	void		 roffnode_pop(struct roff *);
383static	void		 roffnode_push(struct roff *, enum rofft,
384				const char *, int, int);
385static	enum rofferr	 roff_block(ROFF_ARGS);
386static	enum rofferr	 roff_block_text(ROFF_ARGS);
387static	enum rofferr	 roff_block_sub(ROFF_ARGS);
388static	enum rofferr	 roff_brp(ROFF_ARGS);
389static	enum rofferr	 roff_cblock(ROFF_ARGS);
390static	enum rofferr	 roff_cc(ROFF_ARGS);
391static	void		 roff_ccond(struct roff *, int, int);
392static	enum rofferr	 roff_cond(ROFF_ARGS);
393static	enum rofferr	 roff_cond_text(ROFF_ARGS);
394static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
395static	enum rofferr	 roff_ds(ROFF_ARGS);
396static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
397static	int		 roff_evalcond(struct roff *r, int, char *, int *);
398static	int		 roff_evalnum(struct roff *, int,
399				const char *, int *, int *, int);
400static	int		 roff_evalpar(struct roff *, int,
401				const char *, int *, int *, int);
402static	int		 roff_evalstrcond(const char *, int *);
403static	void		 roff_free1(struct roff *);
404static	void		 roff_freereg(struct roffreg *);
405static	void		 roff_freestr(struct roffkv *);
406static	size_t		 roff_getname(struct roff *, char **, int, int);
407static	int		 roff_getnum(const char *, int *, int *, int);
408static	int		 roff_getop(const char *, int *, char *);
409static	int		 roff_getregn(const struct roff *,
410				const char *, size_t);
411static	int		 roff_getregro(const struct roff *,
412				const char *name);
413static	const char	*roff_getstrn(const struct roff *,
414				const char *, size_t);
415static	int		 roff_hasregn(const struct roff *,
416				const char *, size_t);
417static	enum rofferr	 roff_insec(ROFF_ARGS);
418static	enum rofferr	 roff_it(ROFF_ARGS);
419static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
420static	void		 roff_man_alloc1(struct roff_man *);
421static	void		 roff_man_free1(struct roff_man *);
422static	enum rofferr	 roff_nr(ROFF_ARGS);
423static	enum rofft	 roff_parse(struct roff *, char *, int *,
424				int, int);
425static	enum rofferr	 roff_parsetext(struct buf *, int, int *);
426static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
427static	enum rofferr	 roff_rm(ROFF_ARGS);
428static	enum rofferr	 roff_rr(ROFF_ARGS);
429static	void		 roff_setstr(struct roff *,
430				const char *, const char *, int);
431static	void		 roff_setstrn(struct roffkv **, const char *,
432				size_t, const char *, size_t, int);
433static	enum rofferr	 roff_so(ROFF_ARGS);
434static	enum rofferr	 roff_tr(ROFF_ARGS);
435static	enum rofferr	 roff_Dd(ROFF_ARGS);
436static	enum rofferr	 roff_TH(ROFF_ARGS);
437static	enum rofferr	 roff_TE(ROFF_ARGS);
438static	enum rofferr	 roff_TS(ROFF_ARGS);
439static	enum rofferr	 roff_EQ(ROFF_ARGS);
440static	enum rofferr	 roff_EN(ROFF_ARGS);
441static	enum rofferr	 roff_T_(ROFF_ARGS);
442static	enum rofferr	 roff_unsupp(ROFF_ARGS);
443static	enum rofferr	 roff_userdef(ROFF_ARGS);
444
445/* --- constant data ------------------------------------------------------ */
446
447/* See roffhash_find() */
448
449#define	ASCII_HI	 126
450#define	ASCII_LO	 33
451#define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
452
453#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
454#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
455
456static	struct roffmac	*hash[HASHWIDTH];
457
458static	struct roffmac	 roffs[ROFF_MAX] = {
459	{ "ab", roff_unsupp, NULL, NULL, 0, NULL },
460	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
461	{ "af", roff_line_ignore, NULL, NULL, 0, NULL },
462	{ "aln", roff_unsupp, NULL, NULL, 0, NULL },
463	{ "als", roff_unsupp, NULL, NULL, 0, NULL },
464	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
465	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
466	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
467	{ "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
468	{ "as", roff_ds, NULL, NULL, 0, NULL },
469	{ "as1", roff_ds, NULL, NULL, 0, NULL },
470	{ "asciify", roff_unsupp, NULL, NULL, 0, NULL },
471	{ "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
472	{ "bd", roff_line_ignore, NULL, NULL, 0, NULL },
473	{ "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
474	{ "blm", roff_unsupp, NULL, NULL, 0, NULL },
475	{ "box", roff_unsupp, NULL, NULL, 0, NULL },
476	{ "boxa", roff_unsupp, NULL, NULL, 0, NULL },
477	{ "bp", roff_line_ignore, NULL, NULL, 0, NULL },
478	{ "BP", roff_unsupp, NULL, NULL, 0, NULL },
479	{ "break", roff_unsupp, NULL, NULL, 0, NULL },
480	{ "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
481	{ "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
482	{ "brp", roff_brp, NULL, NULL, 0, NULL },
483	{ "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
484	{ "c2", roff_unsupp, NULL, NULL, 0, NULL },
485	{ "cc", roff_cc, NULL, NULL, 0, NULL },
486	{ "ce", roff_line_ignore, NULL, NULL, 0, NULL },
487	{ "cf", roff_insec, NULL, NULL, 0, NULL },
488	{ "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
489	{ "ch", roff_line_ignore, NULL, NULL, 0, NULL },
490	{ "char", roff_unsupp, NULL, NULL, 0, NULL },
491	{ "chop", roff_unsupp, NULL, NULL, 0, NULL },
492	{ "class", roff_line_ignore, NULL, NULL, 0, NULL },
493	{ "close", roff_insec, NULL, NULL, 0, NULL },
494	{ "CL", roff_unsupp, NULL, NULL, 0, NULL },
495	{ "color", roff_line_ignore, NULL, NULL, 0, NULL },
496	{ "composite", roff_unsupp, NULL, NULL, 0, NULL },
497	{ "continue", roff_unsupp, NULL, NULL, 0, NULL },
498	{ "cp", roff_line_ignore, NULL, NULL, 0, NULL },
499	{ "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
500	{ "cs", roff_line_ignore, NULL, NULL, 0, NULL },
501	{ "cu", roff_line_ignore, NULL, NULL, 0, NULL },
502	{ "da", roff_unsupp, NULL, NULL, 0, NULL },
503	{ "dch", roff_unsupp, NULL, NULL, 0, NULL },
504	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
505	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
506	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
507	{ "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
508	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
509	{ "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
510	{ "device", roff_unsupp, NULL, NULL, 0, NULL },
511	{ "devicem", roff_unsupp, NULL, NULL, 0, NULL },
512	{ "di", roff_unsupp, NULL, NULL, 0, NULL },
513	{ "do", roff_unsupp, NULL, NULL, 0, NULL },
514	{ "ds", roff_ds, NULL, NULL, 0, NULL },
515	{ "ds1", roff_ds, NULL, NULL, 0, NULL },
516	{ "dwh", roff_unsupp, NULL, NULL, 0, NULL },
517	{ "dt", roff_unsupp, NULL, NULL, 0, NULL },
518	{ "ec", roff_unsupp, NULL, NULL, 0, NULL },
519	{ "ecr", roff_unsupp, NULL, NULL, 0, NULL },
520	{ "ecs", roff_unsupp, NULL, NULL, 0, NULL },
521	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
522	{ "em", roff_unsupp, NULL, NULL, 0, NULL },
523	{ "EN", roff_EN, NULL, NULL, 0, NULL },
524	{ "eo", roff_unsupp, NULL, NULL, 0, NULL },
525	{ "EP", roff_unsupp, NULL, NULL, 0, NULL },
526	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
527	{ "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
528	{ "ev", roff_unsupp, NULL, NULL, 0, NULL },
529	{ "evc", roff_unsupp, NULL, NULL, 0, NULL },
530	{ "ex", roff_unsupp, NULL, NULL, 0, NULL },
531	{ "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
532	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
533	{ "fc", roff_unsupp, NULL, NULL, 0, NULL },
534	{ "fchar", roff_unsupp, NULL, NULL, 0, NULL },
535	{ "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
536	{ "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
537	{ "feature", roff_line_ignore, NULL, NULL, 0, NULL },
538	{ "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
539	{ "fl", roff_line_ignore, NULL, NULL, 0, NULL },
540	{ "flig", roff_line_ignore, NULL, NULL, 0, NULL },
541	{ "fp", roff_line_ignore, NULL, NULL, 0, NULL },
542	{ "fps", roff_line_ignore, NULL, NULL, 0, NULL },
543	{ "fschar", roff_unsupp, NULL, NULL, 0, NULL },
544	{ "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
545	{ "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
546	{ "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
547	{ "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
548	{ "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
549	{ "hc", roff_line_ignore, NULL, NULL, 0, NULL },
550	{ "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
551	{ "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
552	{ "hla", roff_line_ignore, NULL, NULL, 0, NULL },
553	{ "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
554	{ "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
555	{ "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
556	{ "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
557	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
558	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
559	{ "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
560	{ "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
561	{ "hym", roff_line_ignore, NULL, NULL, 0, NULL },
562	{ "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
563	{ "hys", roff_line_ignore, NULL, NULL, 0, NULL },
564	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
565	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
566	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
567	{ "index", roff_unsupp, NULL, NULL, 0, NULL },
568	{ "it", roff_it, NULL, NULL, 0, NULL },
569	{ "itc", roff_unsupp, NULL, NULL, 0, NULL },
570	{ "IX", roff_line_ignore, NULL, NULL, 0, NULL },
571	{ "kern", roff_line_ignore, NULL, NULL, 0, NULL },
572	{ "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
573	{ "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
574	{ "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
575	{ "lc", roff_unsupp, NULL, NULL, 0, NULL },
576	{ "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
577	{ "lds", roff_unsupp, NULL, NULL, 0, NULL },
578	{ "length", roff_unsupp, NULL, NULL, 0, NULL },
579	{ "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
580	{ "lf", roff_insec, NULL, NULL, 0, NULL },
581	{ "lg", roff_line_ignore, NULL, NULL, 0, NULL },
582	{ "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
583	{ "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
584	{ "lnr", roff_unsupp, NULL, NULL, 0, NULL },
585	{ "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
586	{ "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
587	{ "ls", roff_line_ignore, NULL, NULL, 0, NULL },
588	{ "lsm", roff_unsupp, NULL, NULL, 0, NULL },
589	{ "lt", roff_line_ignore, NULL, NULL, 0, NULL },
590	{ "mc", roff_line_ignore, NULL, NULL, 0, NULL },
591	{ "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
592	{ "minss", roff_line_ignore, NULL, NULL, 0, NULL },
593	{ "mk", roff_line_ignore, NULL, NULL, 0, NULL },
594	{ "mso", roff_insec, NULL, NULL, 0, NULL },
595	{ "na", roff_line_ignore, NULL, NULL, 0, NULL },
596	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
597	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
598	{ "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
599	{ "nm", roff_unsupp, NULL, NULL, 0, NULL },
600	{ "nn", roff_unsupp, NULL, NULL, 0, NULL },
601	{ "nop", roff_unsupp, NULL, NULL, 0, NULL },
602	{ "nr", roff_nr, NULL, NULL, 0, NULL },
603	{ "nrf", roff_unsupp, NULL, NULL, 0, NULL },
604	{ "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
605	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
606	{ "nx", roff_insec, NULL, NULL, 0, NULL },
607	{ "open", roff_insec, NULL, NULL, 0, NULL },
608	{ "opena", roff_insec, NULL, NULL, 0, NULL },
609	{ "os", roff_line_ignore, NULL, NULL, 0, NULL },
610	{ "output", roff_unsupp, NULL, NULL, 0, NULL },
611	{ "padj", roff_line_ignore, NULL, NULL, 0, NULL },
612	{ "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
613	{ "pc", roff_line_ignore, NULL, NULL, 0, NULL },
614	{ "pev", roff_line_ignore, NULL, NULL, 0, NULL },
615	{ "pi", roff_insec, NULL, NULL, 0, NULL },
616	{ "PI", roff_unsupp, NULL, NULL, 0, NULL },
617	{ "pl", roff_line_ignore, NULL, NULL, 0, NULL },
618	{ "pm", roff_line_ignore, NULL, NULL, 0, NULL },
619	{ "pn", roff_line_ignore, NULL, NULL, 0, NULL },
620	{ "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
621	{ "po", roff_line_ignore, NULL, NULL, 0, NULL },
622	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
623	{ "psbb", roff_unsupp, NULL, NULL, 0, NULL },
624	{ "pshape", roff_unsupp, NULL, NULL, 0, NULL },
625	{ "pso", roff_insec, NULL, NULL, 0, NULL },
626	{ "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
627	{ "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
628	{ "rchar", roff_unsupp, NULL, NULL, 0, NULL },
629	{ "rd", roff_line_ignore, NULL, NULL, 0, NULL },
630	{ "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
631	{ "return", roff_unsupp, NULL, NULL, 0, NULL },
632	{ "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
633	{ "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
634	{ "rj", roff_line_ignore, NULL, NULL, 0, NULL },
635	{ "rm", roff_rm, NULL, NULL, 0, NULL },
636	{ "rn", roff_unsupp, NULL, NULL, 0, NULL },
637	{ "rnn", roff_unsupp, NULL, NULL, 0, NULL },
638	{ "rr", roff_rr, NULL, NULL, 0, NULL },
639	{ "rs", roff_line_ignore, NULL, NULL, 0, NULL },
640	{ "rt", roff_line_ignore, NULL, NULL, 0, NULL },
641	{ "schar", roff_unsupp, NULL, NULL, 0, NULL },
642	{ "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
643	{ "shc", roff_line_ignore, NULL, NULL, 0, NULL },
644	{ "shift", roff_unsupp, NULL, NULL, 0, NULL },
645	{ "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
646	{ "so", roff_so, NULL, NULL, 0, NULL },
647	{ "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
648	{ "special", roff_line_ignore, NULL, NULL, 0, NULL },
649	{ "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
650	{ "ss", roff_line_ignore, NULL, NULL, 0, NULL },
651	{ "sty", roff_line_ignore, NULL, NULL, 0, NULL },
652	{ "substring", roff_unsupp, NULL, NULL, 0, NULL },
653	{ "sv", roff_line_ignore, NULL, NULL, 0, NULL },
654	{ "sy", roff_insec, NULL, NULL, 0, NULL },
655	{ "T&", roff_T_, NULL, NULL, 0, NULL },
656	{ "ta", roff_unsupp, NULL, NULL, 0, NULL },
657	{ "tc", roff_unsupp, NULL, NULL, 0, NULL },
658	{ "TE", roff_TE, NULL, NULL, 0, NULL },
659	{ "TH", roff_TH, NULL, NULL, 0, NULL },
660	{ "ti", roff_unsupp, NULL, NULL, 0, NULL },
661	{ "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
662	{ "tl", roff_unsupp, NULL, NULL, 0, NULL },
663	{ "tm", roff_line_ignore, NULL, NULL, 0, NULL },
664	{ "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
665	{ "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
666	{ "tr", roff_tr, NULL, NULL, 0, NULL },
667	{ "track", roff_line_ignore, NULL, NULL, 0, NULL },
668	{ "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
669	{ "trf", roff_insec, NULL, NULL, 0, NULL },
670	{ "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
671	{ "trin", roff_unsupp, NULL, NULL, 0, NULL },
672	{ "trnt", roff_unsupp, NULL, NULL, 0, NULL },
673	{ "troff", roff_line_ignore, NULL, NULL, 0, NULL },
674	{ "TS", roff_TS, NULL, NULL, 0, NULL },
675	{ "uf", roff_line_ignore, NULL, NULL, 0, NULL },
676	{ "ul", roff_line_ignore, NULL, NULL, 0, NULL },
677	{ "unformat", roff_unsupp, NULL, NULL, 0, NULL },
678	{ "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
679	{ "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
680	{ "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
681	{ "vs", roff_line_ignore, NULL, NULL, 0, NULL },
682	{ "warn", roff_line_ignore, NULL, NULL, 0, NULL },
683	{ "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
684	{ "watch", roff_line_ignore, NULL, NULL, 0, NULL },
685	{ "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
686	{ "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
687	{ "wh", roff_unsupp, NULL, NULL, 0, NULL },
688	{ "while", roff_unsupp, NULL, NULL, 0, NULL },
689	{ "write", roff_insec, NULL, NULL, 0, NULL },
690	{ "writec", roff_insec, NULL, NULL, 0, NULL },
691	{ "writem", roff_insec, NULL, NULL, 0, NULL },
692	{ "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
693	{ ".", roff_cblock, NULL, NULL, 0, NULL },
694	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
695};
696
697/* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
698const	char *const __mdoc_reserved[] = {
699	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
700	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
701	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
702	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
703	"Dt", "Dv", "Dx", "D1",
704	"Ec", "Ed", "Ef", "Ek", "El", "Em",
705	"En", "Eo", "Er", "Es", "Ev", "Ex",
706	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
707	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
708	"Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
709	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
710	"Pa", "Pc", "Pf", "Po", "Pp", "Pq",
711	"Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
712	"Sc", "Sh", "Sm", "So", "Sq",
713	"Ss", "St", "Sx", "Sy",
714	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
715	"%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
716	"%P", "%Q", "%R", "%T", "%U", "%V",
717	NULL
718};
719
720/* not currently implemented: BT DE DS ME MT PT SY TQ YS */
721const	char *const __man_reserved[] = {
722	"AT", "B", "BI", "BR", "DT",
723	"EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
724	"LP", "OP", "P", "PD", "PP",
725	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
726	"TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
727	NULL
728};
729
730/* Array of injected predefined strings. */
731#define	PREDEFS_MAX	 38
732static	const struct predef predefs[PREDEFS_MAX] = {
733#include "predefs.in"
734};
735
736/* See roffhash_find() */
737#define	ROFF_HASH(p)	(p[0] - ASCII_LO)
738
739static	int	 roffit_lines;  /* number of lines to delay */
740static	char	*roffit_macro;  /* nil-terminated macro line */
741
742
743/* --- request table ------------------------------------------------------ */
744
745static void
746roffhash_init(void)
747{
748	struct roffmac	 *n;
749	int		  buc, i;
750
751	for (i = 0; i < (int)ROFF_USERDEF; i++) {
752		assert(roffs[i].name[0] >= ASCII_LO);
753		assert(roffs[i].name[0] <= ASCII_HI);
754
755		buc = ROFF_HASH(roffs[i].name);
756
757		if (NULL != (n = hash[buc])) {
758			for ( ; n->next; n = n->next)
759				/* Do nothing. */ ;
760			n->next = &roffs[i];
761		} else
762			hash[buc] = &roffs[i];
763	}
764}
765
766/*
767 * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
768 * the nil-terminated string name could be found.
769 */
770static enum rofft
771roffhash_find(const char *p, size_t s)
772{
773	int		 buc;
774	struct roffmac	*n;
775
776	/*
777	 * libroff has an extremely simple hashtable, for the time
778	 * being, which simply keys on the first character, which must
779	 * be printable, then walks a chain.  It works well enough until
780	 * optimised.
781	 */
782
783	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
784		return ROFF_MAX;
785
786	buc = ROFF_HASH(p);
787
788	if (NULL == (n = hash[buc]))
789		return ROFF_MAX;
790	for ( ; n; n = n->next)
791		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
792			return (enum rofft)(n - roffs);
793
794	return ROFF_MAX;
795}
796
797/* --- stack of request blocks -------------------------------------------- */
798
799/*
800 * Pop the current node off of the stack of roff instructions currently
801 * pending.
802 */
803static void
804roffnode_pop(struct roff *r)
805{
806	struct roffnode	*p;
807
808	assert(r->last);
809	p = r->last;
810
811	r->last = r->last->parent;
812	free(p->name);
813	free(p->end);
814	free(p);
815}
816
817/*
818 * Push a roff node onto the instruction stack.  This must later be
819 * removed with roffnode_pop().
820 */
821static void
822roffnode_push(struct roff *r, enum rofft tok, const char *name,
823		int line, int col)
824{
825	struct roffnode	*p;
826
827	p = mandoc_calloc(1, sizeof(struct roffnode));
828	p->tok = tok;
829	if (name)
830		p->name = mandoc_strdup(name);
831	p->parent = r->last;
832	p->line = line;
833	p->col = col;
834	p->rule = p->parent ? p->parent->rule : 0;
835
836	r->last = p;
837}
838
839/* --- roff parser state data management ---------------------------------- */
840
841static void
842roff_free1(struct roff *r)
843{
844	struct tbl_node	*tbl;
845	struct eqn_node	*e;
846	int		 i;
847
848	while (NULL != (tbl = r->first_tbl)) {
849		r->first_tbl = tbl->next;
850		tbl_free(tbl);
851	}
852	r->first_tbl = r->last_tbl = r->tbl = NULL;
853
854	while (NULL != (e = r->first_eqn)) {
855		r->first_eqn = e->next;
856		eqn_free(e);
857	}
858	r->first_eqn = r->last_eqn = r->eqn = NULL;
859
860	while (r->last)
861		roffnode_pop(r);
862
863	free (r->rstack);
864	r->rstack = NULL;
865	r->rstacksz = 0;
866	r->rstackpos = -1;
867
868	roff_freereg(r->regtab);
869	r->regtab = NULL;
870
871	roff_freestr(r->strtab);
872	roff_freestr(r->xmbtab);
873	r->strtab = r->xmbtab = NULL;
874
875	if (r->xtab)
876		for (i = 0; i < 128; i++)
877			free(r->xtab[i].p);
878	free(r->xtab);
879	r->xtab = NULL;
880}
881
882void
883roff_reset(struct roff *r)
884{
885
886	roff_free1(r);
887	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
888	r->control = 0;
889}
890
891void
892roff_free(struct roff *r)
893{
894
895	roff_free1(r);
896	free(r);
897}
898
899struct roff *
900roff_alloc(struct mparse *parse, int options)
901{
902	struct roff	*r;
903
904	r = mandoc_calloc(1, sizeof(struct roff));
905	r->parse = parse;
906	r->options = options;
907	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
908	r->rstackpos = -1;
909
910	roffhash_init();
911
912	return r;
913}
914
915/* --- syntax tree state data management ---------------------------------- */
916
917static void
918roff_man_free1(struct roff_man *man)
919{
920
921	if (man->first != NULL)
922		roff_node_delete(man, man->first);
923	free(man->meta.msec);
924	free(man->meta.vol);
925	free(man->meta.os);
926	free(man->meta.arch);
927	free(man->meta.title);
928	free(man->meta.name);
929	free(man->meta.date);
930}
931
932static void
933roff_man_alloc1(struct roff_man *man)
934{
935
936	memset(&man->meta, 0, sizeof(man->meta));
937	man->first = mandoc_calloc(1, sizeof(*man->first));
938	man->first->type = ROFFT_ROOT;
939	man->last = man->first;
940	man->last_es = NULL;
941	man->flags = 0;
942	man->macroset = MACROSET_NONE;
943	man->lastsec = man->lastnamed = SEC_NONE;
944	man->next = ROFF_NEXT_CHILD;
945}
946
947void
948roff_man_reset(struct roff_man *man)
949{
950
951	roff_man_free1(man);
952	roff_man_alloc1(man);
953}
954
955void
956roff_man_free(struct roff_man *man)
957{
958
959	roff_man_free1(man);
960	free(man);
961}
962
963struct roff_man *
964roff_man_alloc(struct roff *roff, struct mparse *parse,
965	const char *defos, int quick)
966{
967	struct roff_man *man;
968
969	man = mandoc_calloc(1, sizeof(*man));
970	man->parse = parse;
971	man->roff = roff;
972	man->defos = defos;
973	man->quick = quick;
974	roff_man_alloc1(man);
975	return man;
976}
977
978/* --- syntax tree handling ----------------------------------------------- */
979
980struct roff_node *
981roff_node_alloc(struct roff_man *man, int line, int pos,
982	enum roff_type type, int tok)
983{
984	struct roff_node	*n;
985
986	n = mandoc_calloc(1, sizeof(*n));
987	n->line = line;
988	n->pos = pos;
989	n->tok = tok;
990	n->type = type;
991	n->sec = man->lastsec;
992
993	if (man->flags & MDOC_SYNOPSIS)
994		n->flags |= NODE_SYNPRETTY;
995	else
996		n->flags &= ~NODE_SYNPRETTY;
997	if (man->flags & MDOC_NEWLINE)
998		n->flags |= NODE_LINE;
999	man->flags &= ~MDOC_NEWLINE;
1000
1001	return n;
1002}
1003
1004void
1005roff_node_append(struct roff_man *man, struct roff_node *n)
1006{
1007
1008	switch (man->next) {
1009	case ROFF_NEXT_SIBLING:
1010		if (man->last->next != NULL) {
1011			n->next = man->last->next;
1012			man->last->next->prev = n;
1013		} else
1014			man->last->parent->last = n;
1015		man->last->next = n;
1016		n->prev = man->last;
1017		n->parent = man->last->parent;
1018		break;
1019	case ROFF_NEXT_CHILD:
1020		if (man->last->child != NULL) {
1021			n->next = man->last->child;
1022			man->last->child->prev = n;
1023		} else
1024			man->last->last = n;
1025		man->last->child = n;
1026		n->parent = man->last;
1027		break;
1028	default:
1029		abort();
1030	}
1031	man->last = n;
1032
1033	switch (n->type) {
1034	case ROFFT_HEAD:
1035		n->parent->head = n;
1036		break;
1037	case ROFFT_BODY:
1038		if (n->end != ENDBODY_NOT)
1039			return;
1040		n->parent->body = n;
1041		break;
1042	case ROFFT_TAIL:
1043		n->parent->tail = n;
1044		break;
1045	default:
1046		return;
1047	}
1048
1049	/*
1050	 * Copy over the normalised-data pointer of our parent.  Not
1051	 * everybody has one, but copying a null pointer is fine.
1052	 */
1053
1054	n->norm = n->parent->norm;
1055	assert(n->parent->type == ROFFT_BLOCK);
1056}
1057
1058void
1059roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
1060{
1061	struct roff_node	*n;
1062
1063	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
1064	n->string = roff_strdup(man->roff, word);
1065	roff_node_append(man, n);
1066	n->flags |= NODE_VALID | NODE_ENDED;
1067	man->next = ROFF_NEXT_SIBLING;
1068}
1069
1070void
1071roff_word_append(struct roff_man *man, const char *word)
1072{
1073	struct roff_node	*n;
1074	char			*addstr, *newstr;
1075
1076	n = man->last;
1077	addstr = roff_strdup(man->roff, word);
1078	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
1079	free(addstr);
1080	free(n->string);
1081	n->string = newstr;
1082	man->next = ROFF_NEXT_SIBLING;
1083}
1084
1085void
1086roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
1087{
1088	struct roff_node	*n;
1089
1090	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
1091	roff_node_append(man, n);
1092	man->next = ROFF_NEXT_CHILD;
1093}
1094
1095struct roff_node *
1096roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1097{
1098	struct roff_node	*n;
1099
1100	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1101	roff_node_append(man, n);
1102	man->next = ROFF_NEXT_CHILD;
1103	return n;
1104}
1105
1106struct roff_node *
1107roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1108{
1109	struct roff_node	*n;
1110
1111	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1112	roff_node_append(man, n);
1113	man->next = ROFF_NEXT_CHILD;
1114	return n;
1115}
1116
1117struct roff_node *
1118roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1119{
1120	struct roff_node	*n;
1121
1122	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1123	roff_node_append(man, n);
1124	man->next = ROFF_NEXT_CHILD;
1125	return n;
1126}
1127
1128void
1129roff_addeqn(struct roff_man *man, const struct eqn *eqn)
1130{
1131	struct roff_node	*n;
1132
1133	n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE);
1134	n->eqn = eqn;
1135	if (eqn->ln > man->last->line)
1136		n->flags |= NODE_LINE;
1137	roff_node_append(man, n);
1138	man->next = ROFF_NEXT_SIBLING;
1139}
1140
1141void
1142roff_addtbl(struct roff_man *man, const struct tbl_span *tbl)
1143{
1144	struct roff_node	*n;
1145
1146	if (man->macroset == MACROSET_MAN)
1147		man_breakscope(man, TOKEN_NONE);
1148	n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
1149	n->span = tbl;
1150	roff_node_append(man, n);
1151	n->flags |= NODE_VALID | NODE_ENDED;
1152	man->next = ROFF_NEXT_SIBLING;
1153}
1154
1155void
1156roff_node_unlink(struct roff_man *man, struct roff_node *n)
1157{
1158
1159	/* Adjust siblings. */
1160
1161	if (n->prev)
1162		n->prev->next = n->next;
1163	if (n->next)
1164		n->next->prev = n->prev;
1165
1166	/* Adjust parent. */
1167
1168	if (n->parent != NULL) {
1169		if (n->parent->child == n)
1170			n->parent->child = n->next;
1171		if (n->parent->last == n)
1172			n->parent->last = n->prev;
1173	}
1174
1175	/* Adjust parse point. */
1176
1177	if (man == NULL)
1178		return;
1179	if (man->last == n) {
1180		if (n->prev == NULL) {
1181			man->last = n->parent;
1182			man->next = ROFF_NEXT_CHILD;
1183		} else {
1184			man->last = n->prev;
1185			man->next = ROFF_NEXT_SIBLING;
1186		}
1187	}
1188	if (man->first == n)
1189		man->first = NULL;
1190}
1191
1192void
1193roff_node_free(struct roff_node *n)
1194{
1195
1196	if (n->args != NULL)
1197		mdoc_argv_free(n->args);
1198	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1199		free(n->norm);
1200	free(n->string);
1201	free(n);
1202}
1203
1204void
1205roff_node_delete(struct roff_man *man, struct roff_node *n)
1206{
1207
1208	while (n->child != NULL)
1209		roff_node_delete(man, n->child);
1210	roff_node_unlink(man, n);
1211	roff_node_free(n);
1212}
1213
1214void
1215deroff(char **dest, const struct roff_node *n)
1216{
1217	char	*cp;
1218	size_t	 sz;
1219
1220	if (n->type != ROFFT_TEXT) {
1221		for (n = n->child; n != NULL; n = n->next)
1222			deroff(dest, n);
1223		return;
1224	}
1225
1226	/* Skip leading whitespace. */
1227
1228	for (cp = n->string; *cp != '\0'; cp++) {
1229		if (cp[0] == '\\' && strchr(" %&0^|~", cp[1]) != NULL)
1230			cp++;
1231		else if ( ! isspace((unsigned char)*cp))
1232			break;
1233	}
1234
1235	/* Skip trailing whitespace. */
1236
1237	for (sz = strlen(cp); sz; sz--)
1238		if ( ! isspace((unsigned char)cp[sz-1]))
1239			break;
1240
1241	/* Skip empty strings. */
1242
1243	if (sz == 0)
1244		return;
1245
1246	if (*dest == NULL) {
1247		*dest = mandoc_strndup(cp, sz);
1248		return;
1249	}
1250
1251	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1252	free(*dest);
1253	*dest = cp;
1254}
1255
1256/* --- main functions of the roff parser ---------------------------------- */
1257
1258/*
1259 * In the current line, expand escape sequences that tend to get
1260 * used in numerical expressions and conditional requests.
1261 * Also check the syntax of the remaining escape sequences.
1262 */
1263static enum rofferr
1264roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1265{
1266	char		 ubuf[24]; /* buffer to print the number */
1267	const char	*start;	/* start of the string to process */
1268	char		*stesc;	/* start of an escape sequence ('\\') */
1269	const char	*stnam;	/* start of the name, after "[(*" */
1270	const char	*cp;	/* end of the name, e.g. before ']' */
1271	const char	*res;	/* the string to be substituted */
1272	char		*nbuf;	/* new buffer to copy buf->buf to */
1273	size_t		 maxl;  /* expected length of the escape name */
1274	size_t		 naml;	/* actual length of the escape name */
1275	enum mandoc_esc	 esc;	/* type of the escape sequence */
1276	int		 inaml;	/* length returned from mandoc_escape() */
1277	int		 expand_count;	/* to avoid infinite loops */
1278	int		 npos;	/* position in numeric expression */
1279	int		 arg_complete; /* argument not interrupted by eol */
1280	char		 term;	/* character terminating the escape */
1281
1282	expand_count = 0;
1283	start = buf->buf + pos;
1284	stesc = strchr(start, '\0') - 1;
1285	while (stesc-- > start) {
1286
1287		/* Search backwards for the next backslash. */
1288
1289		if (*stesc != '\\')
1290			continue;
1291
1292		/* If it is escaped, skip it. */
1293
1294		for (cp = stesc - 1; cp >= start; cp--)
1295			if (*cp != '\\')
1296				break;
1297
1298		if ((stesc - cp) % 2 == 0) {
1299			stesc = (char *)cp;
1300			continue;
1301		}
1302
1303		/* Decide whether to expand or to check only. */
1304
1305		term = '\0';
1306		cp = stesc + 1;
1307		switch (*cp) {
1308		case '*':
1309			res = NULL;
1310			break;
1311		case 'B':
1312		case 'w':
1313			term = cp[1];
1314			/* FALLTHROUGH */
1315		case 'n':
1316			res = ubuf;
1317			break;
1318		default:
1319			esc = mandoc_escape(&cp, &stnam, &inaml);
1320			if (esc == ESCAPE_ERROR ||
1321			    (esc == ESCAPE_SPECIAL &&
1322			     mchars_spec2cp(stnam, inaml) < 0))
1323				mandoc_vmsg(MANDOCERR_ESC_BAD,
1324				    r->parse, ln, (int)(stesc - buf->buf),
1325				    "%.*s", (int)(cp - stesc), stesc);
1326			continue;
1327		}
1328
1329		if (EXPAND_LIMIT < ++expand_count) {
1330			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1331			    ln, (int)(stesc - buf->buf), NULL);
1332			return ROFF_IGN;
1333		}
1334
1335		/*
1336		 * The third character decides the length
1337		 * of the name of the string or register.
1338		 * Save a pointer to the name.
1339		 */
1340
1341		if (term == '\0') {
1342			switch (*++cp) {
1343			case '\0':
1344				maxl = 0;
1345				break;
1346			case '(':
1347				cp++;
1348				maxl = 2;
1349				break;
1350			case '[':
1351				cp++;
1352				term = ']';
1353				maxl = 0;
1354				break;
1355			default:
1356				maxl = 1;
1357				break;
1358			}
1359		} else {
1360			cp += 2;
1361			maxl = 0;
1362		}
1363		stnam = cp;
1364
1365		/* Advance to the end of the name. */
1366
1367		naml = 0;
1368		arg_complete = 1;
1369		while (maxl == 0 || naml < maxl) {
1370			if (*cp == '\0') {
1371				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1372				    ln, (int)(stesc - buf->buf), stesc);
1373				arg_complete = 0;
1374				break;
1375			}
1376			if (maxl == 0 && *cp == term) {
1377				cp++;
1378				break;
1379			}
1380			if (*cp++ != '\\' || stesc[1] != 'w') {
1381				naml++;
1382				continue;
1383			}
1384			switch (mandoc_escape(&cp, NULL, NULL)) {
1385			case ESCAPE_SPECIAL:
1386			case ESCAPE_UNICODE:
1387			case ESCAPE_NUMBERED:
1388			case ESCAPE_OVERSTRIKE:
1389				naml++;
1390				break;
1391			default:
1392				break;
1393			}
1394		}
1395
1396		/*
1397		 * Retrieve the replacement string; if it is
1398		 * undefined, resume searching for escapes.
1399		 */
1400
1401		switch (stesc[1]) {
1402		case '*':
1403			if (arg_complete)
1404				res = roff_getstrn(r, stnam, naml);
1405			break;
1406		case 'B':
1407			npos = 0;
1408			ubuf[0] = arg_complete &&
1409			    roff_evalnum(r, ln, stnam, &npos,
1410			      NULL, ROFFNUM_SCALE) &&
1411			    stnam + npos + 1 == cp ? '1' : '0';
1412			ubuf[1] = '\0';
1413			break;
1414		case 'n':
1415			if (arg_complete)
1416				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1417				    roff_getregn(r, stnam, naml));
1418			else
1419				ubuf[0] = '\0';
1420			break;
1421		case 'w':
1422			/* use even incomplete args */
1423			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1424			    24 * (int)naml);
1425			break;
1426		}
1427
1428		if (res == NULL) {
1429			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1430			    r->parse, ln, (int)(stesc - buf->buf),
1431			    "%.*s", (int)naml, stnam);
1432			res = "";
1433		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1434			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1435			    ln, (int)(stesc - buf->buf), NULL);
1436			return ROFF_IGN;
1437		}
1438
1439		/* Replace the escape sequence by the string. */
1440
1441		*stesc = '\0';
1442		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1443		    buf->buf, res, cp) + 1;
1444
1445		/* Prepare for the next replacement. */
1446
1447		start = nbuf + pos;
1448		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1449		free(buf->buf);
1450		buf->buf = nbuf;
1451	}
1452	return ROFF_CONT;
1453}
1454
1455/*
1456 * Process text streams.
1457 */
1458static enum rofferr
1459roff_parsetext(struct buf *buf, int pos, int *offs)
1460{
1461	size_t		 sz;
1462	const char	*start;
1463	char		*p;
1464	int		 isz;
1465	enum mandoc_esc	 esc;
1466
1467	/* Spring the input line trap. */
1468
1469	if (roffit_lines == 1) {
1470		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1471		free(buf->buf);
1472		buf->buf = p;
1473		buf->sz = isz + 1;
1474		*offs = 0;
1475		free(roffit_macro);
1476		roffit_lines = 0;
1477		return ROFF_REPARSE;
1478	} else if (roffit_lines > 1)
1479		--roffit_lines;
1480
1481	/* Convert all breakable hyphens into ASCII_HYPH. */
1482
1483	start = p = buf->buf + pos;
1484
1485	while (*p != '\0') {
1486		sz = strcspn(p, "-\\");
1487		p += sz;
1488
1489		if (*p == '\0')
1490			break;
1491
1492		if (*p == '\\') {
1493			/* Skip over escapes. */
1494			p++;
1495			esc = mandoc_escape((const char **)&p, NULL, NULL);
1496			if (esc == ESCAPE_ERROR)
1497				break;
1498			while (*p == '-')
1499				p++;
1500			continue;
1501		} else if (p == start) {
1502			p++;
1503			continue;
1504		}
1505
1506		if (isalpha((unsigned char)p[-1]) &&
1507		    isalpha((unsigned char)p[1]))
1508			*p = ASCII_HYPH;
1509		p++;
1510	}
1511	return ROFF_CONT;
1512}
1513
1514enum rofferr
1515roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1516{
1517	enum rofft	 t;
1518	enum rofferr	 e;
1519	int		 pos;	/* parse point */
1520	int		 spos;	/* saved parse point for messages */
1521	int		 ppos;	/* original offset in buf->buf */
1522	int		 ctl;	/* macro line (boolean) */
1523
1524	ppos = pos = *offs;
1525
1526	/* Handle in-line equation delimiters. */
1527
1528	if (r->tbl == NULL &&
1529	    r->last_eqn != NULL && r->last_eqn->delim &&
1530	    (r->eqn == NULL || r->eqn_inline)) {
1531		e = roff_eqndelim(r, buf, pos);
1532		if (e == ROFF_REPARSE)
1533			return e;
1534		assert(e == ROFF_CONT);
1535	}
1536
1537	/* Expand some escape sequences. */
1538
1539	e = roff_res(r, buf, ln, pos);
1540	if (e == ROFF_IGN)
1541		return e;
1542	assert(e == ROFF_CONT);
1543
1544	ctl = roff_getcontrol(r, buf->buf, &pos);
1545
1546	/*
1547	 * First, if a scope is open and we're not a macro, pass the
1548	 * text through the macro's filter.
1549	 * Equations process all content themselves.
1550	 * Tables process almost all content themselves, but we want
1551	 * to warn about macros before passing it there.
1552	 */
1553
1554	if (r->last != NULL && ! ctl) {
1555		t = r->last->tok;
1556		assert(roffs[t].text);
1557		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1558		assert(e == ROFF_IGN || e == ROFF_CONT);
1559		if (e != ROFF_CONT)
1560			return e;
1561	}
1562	if (r->eqn != NULL)
1563		return eqn_read(&r->eqn, ln, buf->buf, ppos, offs);
1564	if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1565		return tbl_read(r->tbl, ln, buf->buf, ppos);
1566	if ( ! ctl)
1567		return roff_parsetext(buf, pos, offs);
1568
1569	/* Skip empty request lines. */
1570
1571	if (buf->buf[pos] == '"') {
1572		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1573		    ln, pos, NULL);
1574		return ROFF_IGN;
1575	} else if (buf->buf[pos] == '\0')
1576		return ROFF_IGN;
1577
1578	/*
1579	 * If a scope is open, go to the child handler for that macro,
1580	 * as it may want to preprocess before doing anything with it.
1581	 * Don't do so if an equation is open.
1582	 */
1583
1584	if (r->last) {
1585		t = r->last->tok;
1586		assert(roffs[t].sub);
1587		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1588	}
1589
1590	/* No scope is open.  This is a new request or macro. */
1591
1592	spos = pos;
1593	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1594
1595	/* Tables ignore most macros. */
1596
1597	if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1598		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1599		    ln, pos, buf->buf + spos);
1600		if (t == ROFF_TS)
1601			return ROFF_IGN;
1602		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1603			pos++;
1604		while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1605			pos++;
1606		return tbl_read(r->tbl, ln, buf->buf, pos);
1607	}
1608
1609	/*
1610	 * This is neither a roff request nor a user-defined macro.
1611	 * Let the standard macro set parsers handle it.
1612	 */
1613
1614	if (t == ROFF_MAX)
1615		return ROFF_CONT;
1616
1617	/* Execute a roff request or a user defined macro. */
1618
1619	assert(roffs[t].proc);
1620	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1621}
1622
1623void
1624roff_endparse(struct roff *r)
1625{
1626
1627	if (r->last)
1628		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1629		    r->last->line, r->last->col,
1630		    roffs[r->last->tok].name);
1631
1632	if (r->eqn) {
1633		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1634		    r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1635		eqn_end(&r->eqn);
1636	}
1637
1638	if (r->tbl) {
1639		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1640		    r->tbl->line, r->tbl->pos, "TS");
1641		tbl_end(&r->tbl);
1642	}
1643}
1644
1645/*
1646 * Parse a roff node's type from the input buffer.  This must be in the
1647 * form of ".foo xxx" in the usual way.
1648 */
1649static enum rofft
1650roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1651{
1652	char		*cp;
1653	const char	*mac;
1654	size_t		 maclen;
1655	enum rofft	 t;
1656
1657	cp = buf + *pos;
1658
1659	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1660		return ROFF_MAX;
1661
1662	mac = cp;
1663	maclen = roff_getname(r, &cp, ln, ppos);
1664
1665	t = (r->current_string = roff_getstrn(r, mac, maclen))
1666	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
1667
1668	if (ROFF_MAX != t)
1669		*pos = cp - buf;
1670
1671	return t;
1672}
1673
1674/* --- handling of request blocks ----------------------------------------- */
1675
1676static enum rofferr
1677roff_cblock(ROFF_ARGS)
1678{
1679
1680	/*
1681	 * A block-close `..' should only be invoked as a child of an
1682	 * ignore macro, otherwise raise a warning and just ignore it.
1683	 */
1684
1685	if (r->last == NULL) {
1686		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1687		    ln, ppos, "..");
1688		return ROFF_IGN;
1689	}
1690
1691	switch (r->last->tok) {
1692	case ROFF_am:
1693		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1694	case ROFF_ami:
1695	case ROFF_de:
1696		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1697	case ROFF_dei:
1698	case ROFF_ig:
1699		break;
1700	default:
1701		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1702		    ln, ppos, "..");
1703		return ROFF_IGN;
1704	}
1705
1706	if (buf->buf[pos] != '\0')
1707		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1708		    ".. %s", buf->buf + pos);
1709
1710	roffnode_pop(r);
1711	roffnode_cleanscope(r);
1712	return ROFF_IGN;
1713
1714}
1715
1716static void
1717roffnode_cleanscope(struct roff *r)
1718{
1719
1720	while (r->last) {
1721		if (--r->last->endspan != 0)
1722			break;
1723		roffnode_pop(r);
1724	}
1725}
1726
1727static void
1728roff_ccond(struct roff *r, int ln, int ppos)
1729{
1730
1731	if (NULL == r->last) {
1732		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1733		    ln, ppos, "\\}");
1734		return;
1735	}
1736
1737	switch (r->last->tok) {
1738	case ROFF_el:
1739	case ROFF_ie:
1740	case ROFF_if:
1741		break;
1742	default:
1743		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1744		    ln, ppos, "\\}");
1745		return;
1746	}
1747
1748	if (r->last->endspan > -1) {
1749		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1750		    ln, ppos, "\\}");
1751		return;
1752	}
1753
1754	roffnode_pop(r);
1755	roffnode_cleanscope(r);
1756	return;
1757}
1758
1759static enum rofferr
1760roff_block(ROFF_ARGS)
1761{
1762	const char	*name;
1763	char		*iname, *cp;
1764	size_t		 namesz;
1765
1766	/* Ignore groff compatibility mode for now. */
1767
1768	if (tok == ROFF_de1)
1769		tok = ROFF_de;
1770	else if (tok == ROFF_dei1)
1771		tok = ROFF_dei;
1772	else if (tok == ROFF_am1)
1773		tok = ROFF_am;
1774	else if (tok == ROFF_ami1)
1775		tok = ROFF_ami;
1776
1777	/* Parse the macro name argument. */
1778
1779	cp = buf->buf + pos;
1780	if (tok == ROFF_ig) {
1781		iname = NULL;
1782		namesz = 0;
1783	} else {
1784		iname = cp;
1785		namesz = roff_getname(r, &cp, ln, ppos);
1786		iname[namesz] = '\0';
1787	}
1788
1789	/* Resolve the macro name argument if it is indirect. */
1790
1791	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1792		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1793			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1794			    r->parse, ln, (int)(iname - buf->buf),
1795			    "%.*s", (int)namesz, iname);
1796			namesz = 0;
1797		} else
1798			namesz = strlen(name);
1799	} else
1800		name = iname;
1801
1802	if (namesz == 0 && tok != ROFF_ig) {
1803		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1804		    ln, ppos, roffs[tok].name);
1805		return ROFF_IGN;
1806	}
1807
1808	roffnode_push(r, tok, name, ln, ppos);
1809
1810	/*
1811	 * At the beginning of a `de' macro, clear the existing string
1812	 * with the same name, if there is one.  New content will be
1813	 * appended from roff_block_text() in multiline mode.
1814	 */
1815
1816	if (tok == ROFF_de || tok == ROFF_dei)
1817		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1818
1819	if (*cp == '\0')
1820		return ROFF_IGN;
1821
1822	/* Get the custom end marker. */
1823
1824	iname = cp;
1825	namesz = roff_getname(r, &cp, ln, ppos);
1826
1827	/* Resolve the end marker if it is indirect. */
1828
1829	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1830		if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1831			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1832			    r->parse, ln, (int)(iname - buf->buf),
1833			    "%.*s", (int)namesz, iname);
1834			namesz = 0;
1835		} else
1836			namesz = strlen(name);
1837	} else
1838		name = iname;
1839
1840	if (namesz)
1841		r->last->end = mandoc_strndup(name, namesz);
1842
1843	if (*cp != '\0')
1844		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1845		    ln, pos, ".%s ... %s", roffs[tok].name, cp);
1846
1847	return ROFF_IGN;
1848}
1849
1850static enum rofferr
1851roff_block_sub(ROFF_ARGS)
1852{
1853	enum rofft	t;
1854	int		i, j;
1855
1856	/*
1857	 * First check whether a custom macro exists at this level.  If
1858	 * it does, then check against it.  This is some of groff's
1859	 * stranger behaviours.  If we encountered a custom end-scope
1860	 * tag and that tag also happens to be a "real" macro, then we
1861	 * need to try interpreting it again as a real macro.  If it's
1862	 * not, then return ignore.  Else continue.
1863	 */
1864
1865	if (r->last->end) {
1866		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1867			if (buf->buf[i] != r->last->end[j])
1868				break;
1869
1870		if (r->last->end[j] == '\0' &&
1871		    (buf->buf[i] == '\0' ||
1872		     buf->buf[i] == ' ' ||
1873		     buf->buf[i] == '\t')) {
1874			roffnode_pop(r);
1875			roffnode_cleanscope(r);
1876
1877			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1878				i++;
1879
1880			pos = i;
1881			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1882			    ROFF_MAX)
1883				return ROFF_RERUN;
1884			return ROFF_IGN;
1885		}
1886	}
1887
1888	/*
1889	 * If we have no custom end-query or lookup failed, then try
1890	 * pulling it out of the hashtable.
1891	 */
1892
1893	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1894
1895	if (t != ROFF_cblock) {
1896		if (tok != ROFF_ig)
1897			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1898		return ROFF_IGN;
1899	}
1900
1901	assert(roffs[t].proc);
1902	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1903}
1904
1905static enum rofferr
1906roff_block_text(ROFF_ARGS)
1907{
1908
1909	if (tok != ROFF_ig)
1910		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1911
1912	return ROFF_IGN;
1913}
1914
1915static enum rofferr
1916roff_cond_sub(ROFF_ARGS)
1917{
1918	enum rofft	 t;
1919	char		*ep;
1920	int		 rr;
1921
1922	rr = r->last->rule;
1923	roffnode_cleanscope(r);
1924	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1925
1926	/*
1927	 * Fully handle known macros when they are structurally
1928	 * required or when the conditional evaluated to true.
1929	 */
1930
1931	if ((t != ROFF_MAX) &&
1932	    (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1933		assert(roffs[t].proc);
1934		return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1935	}
1936
1937	/*
1938	 * If `\}' occurs on a macro line without a preceding macro,
1939	 * drop the line completely.
1940	 */
1941
1942	ep = buf->buf + pos;
1943	if (ep[0] == '\\' && ep[1] == '}')
1944		rr = 0;
1945
1946	/* Always check for the closing delimiter `\}'. */
1947
1948	while ((ep = strchr(ep, '\\')) != NULL) {
1949		if (*(++ep) == '}') {
1950			*ep = '&';
1951			roff_ccond(r, ln, ep - buf->buf - 1);
1952		}
1953		if (*ep != '\0')
1954			++ep;
1955	}
1956	return rr ? ROFF_CONT : ROFF_IGN;
1957}
1958
1959static enum rofferr
1960roff_cond_text(ROFF_ARGS)
1961{
1962	char		*ep;
1963	int		 rr;
1964
1965	rr = r->last->rule;
1966	roffnode_cleanscope(r);
1967
1968	ep = buf->buf + pos;
1969	while ((ep = strchr(ep, '\\')) != NULL) {
1970		if (*(++ep) == '}') {
1971			*ep = '&';
1972			roff_ccond(r, ln, ep - buf->buf - 1);
1973		}
1974		if (*ep != '\0')
1975			++ep;
1976	}
1977	return rr ? ROFF_CONT : ROFF_IGN;
1978}
1979
1980/* --- handling of numeric and conditional expressions -------------------- */
1981
1982/*
1983 * Parse a single signed integer number.  Stop at the first non-digit.
1984 * If there is at least one digit, return success and advance the
1985 * parse point, else return failure and let the parse point unchanged.
1986 * Ignore overflows, treat them just like the C language.
1987 */
1988static int
1989roff_getnum(const char *v, int *pos, int *res, int flags)
1990{
1991	int	 myres, scaled, n, p;
1992
1993	if (NULL == res)
1994		res = &myres;
1995
1996	p = *pos;
1997	n = v[p] == '-';
1998	if (n || v[p] == '+')
1999		p++;
2000
2001	if (flags & ROFFNUM_WHITE)
2002		while (isspace((unsigned char)v[p]))
2003			p++;
2004
2005	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2006		*res = 10 * *res + v[p] - '0';
2007	if (p == *pos + n)
2008		return 0;
2009
2010	if (n)
2011		*res = -*res;
2012
2013	/* Each number may be followed by one optional scaling unit. */
2014
2015	switch (v[p]) {
2016	case 'f':
2017		scaled = *res * 65536;
2018		break;
2019	case 'i':
2020		scaled = *res * 240;
2021		break;
2022	case 'c':
2023		scaled = *res * 240 / 2.54;
2024		break;
2025	case 'v':
2026	case 'P':
2027		scaled = *res * 40;
2028		break;
2029	case 'm':
2030	case 'n':
2031		scaled = *res * 24;
2032		break;
2033	case 'p':
2034		scaled = *res * 10 / 3;
2035		break;
2036	case 'u':
2037		scaled = *res;
2038		break;
2039	case 'M':
2040		scaled = *res * 6 / 25;
2041		break;
2042	default:
2043		scaled = *res;
2044		p--;
2045		break;
2046	}
2047	if (flags & ROFFNUM_SCALE)
2048		*res = scaled;
2049
2050	*pos = p + 1;
2051	return 1;
2052}
2053
2054/*
2055 * Evaluate a string comparison condition.
2056 * The first character is the delimiter.
2057 * Succeed if the string up to its second occurrence
2058 * matches the string up to its third occurence.
2059 * Advance the cursor after the third occurrence
2060 * or lacking that, to the end of the line.
2061 */
2062static int
2063roff_evalstrcond(const char *v, int *pos)
2064{
2065	const char	*s1, *s2, *s3;
2066	int		 match;
2067
2068	match = 0;
2069	s1 = v + *pos;		/* initial delimiter */
2070	s2 = s1 + 1;		/* for scanning the first string */
2071	s3 = strchr(s2, *s1);	/* for scanning the second string */
2072
2073	if (NULL == s3)		/* found no middle delimiter */
2074		goto out;
2075
2076	while ('\0' != *++s3) {
2077		if (*s2 != *s3) {  /* mismatch */
2078			s3 = strchr(s3, *s1);
2079			break;
2080		}
2081		if (*s3 == *s1) {  /* found the final delimiter */
2082			match = 1;
2083			break;
2084		}
2085		s2++;
2086	}
2087
2088out:
2089	if (NULL == s3)
2090		s3 = strchr(s2, '\0');
2091	else if (*s3 != '\0')
2092		s3++;
2093	*pos = s3 - v;
2094	return match;
2095}
2096
2097/*
2098 * Evaluate an optionally negated single character, numerical,
2099 * or string condition.
2100 */
2101static int
2102roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2103{
2104	char	*cp, *name;
2105	size_t	 sz;
2106	int	 number, savepos, wanttrue;
2107
2108	if ('!' == v[*pos]) {
2109		wanttrue = 0;
2110		(*pos)++;
2111	} else
2112		wanttrue = 1;
2113
2114	switch (v[*pos]) {
2115	case '\0':
2116		return 0;
2117	case 'n':
2118	case 'o':
2119		(*pos)++;
2120		return wanttrue;
2121	case 'c':
2122	case 'd':
2123	case 'e':
2124	case 't':
2125	case 'v':
2126		(*pos)++;
2127		return !wanttrue;
2128	case 'r':
2129		cp = name = v + ++*pos;
2130		sz = roff_getname(r, &cp, ln, *pos);
2131		*pos = cp - v;
2132		return (sz && roff_hasregn(r, name, sz)) == wanttrue;
2133	default:
2134		break;
2135	}
2136
2137	savepos = *pos;
2138	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2139		return (number > 0) == wanttrue;
2140	else if (*pos == savepos)
2141		return roff_evalstrcond(v, pos) == wanttrue;
2142	else
2143		return 0;
2144}
2145
2146static enum rofferr
2147roff_line_ignore(ROFF_ARGS)
2148{
2149
2150	return ROFF_IGN;
2151}
2152
2153static enum rofferr
2154roff_insec(ROFF_ARGS)
2155{
2156
2157	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2158	    ln, ppos, roffs[tok].name);
2159	return ROFF_IGN;
2160}
2161
2162static enum rofferr
2163roff_unsupp(ROFF_ARGS)
2164{
2165
2166	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2167	    ln, ppos, roffs[tok].name);
2168	return ROFF_IGN;
2169}
2170
2171static enum rofferr
2172roff_cond(ROFF_ARGS)
2173{
2174
2175	roffnode_push(r, tok, NULL, ln, ppos);
2176
2177	/*
2178	 * An `.el' has no conditional body: it will consume the value
2179	 * of the current rstack entry set in prior `ie' calls or
2180	 * defaults to DENY.
2181	 *
2182	 * If we're not an `el', however, then evaluate the conditional.
2183	 */
2184
2185	r->last->rule = tok == ROFF_el ?
2186	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2187	    roff_evalcond(r, ln, buf->buf, &pos);
2188
2189	/*
2190	 * An if-else will put the NEGATION of the current evaluated
2191	 * conditional into the stack of rules.
2192	 */
2193
2194	if (tok == ROFF_ie) {
2195		if (r->rstackpos + 1 == r->rstacksz) {
2196			r->rstacksz += 16;
2197			r->rstack = mandoc_reallocarray(r->rstack,
2198			    r->rstacksz, sizeof(int));
2199		}
2200		r->rstack[++r->rstackpos] = !r->last->rule;
2201	}
2202
2203	/* If the parent has false as its rule, then so do we. */
2204
2205	if (r->last->parent && !r->last->parent->rule)
2206		r->last->rule = 0;
2207
2208	/*
2209	 * Determine scope.
2210	 * If there is nothing on the line after the conditional,
2211	 * not even whitespace, use next-line scope.
2212	 */
2213
2214	if (buf->buf[pos] == '\0') {
2215		r->last->endspan = 2;
2216		goto out;
2217	}
2218
2219	while (buf->buf[pos] == ' ')
2220		pos++;
2221
2222	/* An opening brace requests multiline scope. */
2223
2224	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2225		r->last->endspan = -1;
2226		pos += 2;
2227		while (buf->buf[pos] == ' ')
2228			pos++;
2229		goto out;
2230	}
2231
2232	/*
2233	 * Anything else following the conditional causes
2234	 * single-line scope.  Warn if the scope contains
2235	 * nothing but trailing whitespace.
2236	 */
2237
2238	if (buf->buf[pos] == '\0')
2239		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2240		    ln, ppos, roffs[tok].name);
2241
2242	r->last->endspan = 1;
2243
2244out:
2245	*offs = pos;
2246	return ROFF_RERUN;
2247}
2248
2249static enum rofferr
2250roff_ds(ROFF_ARGS)
2251{
2252	char		*string;
2253	const char	*name;
2254	size_t		 namesz;
2255
2256	/* Ignore groff compatibility mode for now. */
2257
2258	if (tok == ROFF_ds1)
2259		tok = ROFF_ds;
2260	else if (tok == ROFF_as1)
2261		tok = ROFF_as;
2262
2263	/*
2264	 * The first word is the name of the string.
2265	 * If it is empty or terminated by an escape sequence,
2266	 * abort the `ds' request without defining anything.
2267	 */
2268
2269	name = string = buf->buf + pos;
2270	if (*name == '\0')
2271		return ROFF_IGN;
2272
2273	namesz = roff_getname(r, &string, ln, pos);
2274	if (name[namesz] == '\\')
2275		return ROFF_IGN;
2276
2277	/* Read past the initial double-quote, if any. */
2278	if (*string == '"')
2279		string++;
2280
2281	/* The rest is the value. */
2282	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2283	    ROFF_as == tok);
2284	return ROFF_IGN;
2285}
2286
2287/*
2288 * Parse a single operator, one or two characters long.
2289 * If the operator is recognized, return success and advance the
2290 * parse point, else return failure and let the parse point unchanged.
2291 */
2292static int
2293roff_getop(const char *v, int *pos, char *res)
2294{
2295
2296	*res = v[*pos];
2297
2298	switch (*res) {
2299	case '+':
2300	case '-':
2301	case '*':
2302	case '/':
2303	case '%':
2304	case '&':
2305	case ':':
2306		break;
2307	case '<':
2308		switch (v[*pos + 1]) {
2309		case '=':
2310			*res = 'l';
2311			(*pos)++;
2312			break;
2313		case '>':
2314			*res = '!';
2315			(*pos)++;
2316			break;
2317		case '?':
2318			*res = 'i';
2319			(*pos)++;
2320			break;
2321		default:
2322			break;
2323		}
2324		break;
2325	case '>':
2326		switch (v[*pos + 1]) {
2327		case '=':
2328			*res = 'g';
2329			(*pos)++;
2330			break;
2331		case '?':
2332			*res = 'a';
2333			(*pos)++;
2334			break;
2335		default:
2336			break;
2337		}
2338		break;
2339	case '=':
2340		if ('=' == v[*pos + 1])
2341			(*pos)++;
2342		break;
2343	default:
2344		return 0;
2345	}
2346	(*pos)++;
2347
2348	return *res;
2349}
2350
2351/*
2352 * Evaluate either a parenthesized numeric expression
2353 * or a single signed integer number.
2354 */
2355static int
2356roff_evalpar(struct roff *r, int ln,
2357	const char *v, int *pos, int *res, int flags)
2358{
2359
2360	if ('(' != v[*pos])
2361		return roff_getnum(v, pos, res, flags);
2362
2363	(*pos)++;
2364	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2365		return 0;
2366
2367	/*
2368	 * Omission of the closing parenthesis
2369	 * is an error in validation mode,
2370	 * but ignored in evaluation mode.
2371	 */
2372
2373	if (')' == v[*pos])
2374		(*pos)++;
2375	else if (NULL == res)
2376		return 0;
2377
2378	return 1;
2379}
2380
2381/*
2382 * Evaluate a complete numeric expression.
2383 * Proceed left to right, there is no concept of precedence.
2384 */
2385static int
2386roff_evalnum(struct roff *r, int ln, const char *v,
2387	int *pos, int *res, int flags)
2388{
2389	int		 mypos, operand2;
2390	char		 operator;
2391
2392	if (NULL == pos) {
2393		mypos = 0;
2394		pos = &mypos;
2395	}
2396
2397	if (flags & ROFFNUM_WHITE)
2398		while (isspace((unsigned char)v[*pos]))
2399			(*pos)++;
2400
2401	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2402		return 0;
2403
2404	while (1) {
2405		if (flags & ROFFNUM_WHITE)
2406			while (isspace((unsigned char)v[*pos]))
2407				(*pos)++;
2408
2409		if ( ! roff_getop(v, pos, &operator))
2410			break;
2411
2412		if (flags & ROFFNUM_WHITE)
2413			while (isspace((unsigned char)v[*pos]))
2414				(*pos)++;
2415
2416		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2417			return 0;
2418
2419		if (flags & ROFFNUM_WHITE)
2420			while (isspace((unsigned char)v[*pos]))
2421				(*pos)++;
2422
2423		if (NULL == res)
2424			continue;
2425
2426		switch (operator) {
2427		case '+':
2428			*res += operand2;
2429			break;
2430		case '-':
2431			*res -= operand2;
2432			break;
2433		case '*':
2434			*res *= operand2;
2435			break;
2436		case '/':
2437			if (operand2 == 0) {
2438				mandoc_msg(MANDOCERR_DIVZERO,
2439					r->parse, ln, *pos, v);
2440				*res = 0;
2441				break;
2442			}
2443			*res /= operand2;
2444			break;
2445		case '%':
2446			if (operand2 == 0) {
2447				mandoc_msg(MANDOCERR_DIVZERO,
2448					r->parse, ln, *pos, v);
2449				*res = 0;
2450				break;
2451			}
2452			*res %= operand2;
2453			break;
2454		case '<':
2455			*res = *res < operand2;
2456			break;
2457		case '>':
2458			*res = *res > operand2;
2459			break;
2460		case 'l':
2461			*res = *res <= operand2;
2462			break;
2463		case 'g':
2464			*res = *res >= operand2;
2465			break;
2466		case '=':
2467			*res = *res == operand2;
2468			break;
2469		case '!':
2470			*res = *res != operand2;
2471			break;
2472		case '&':
2473			*res = *res && operand2;
2474			break;
2475		case ':':
2476			*res = *res || operand2;
2477			break;
2478		case 'i':
2479			if (operand2 < *res)
2480				*res = operand2;
2481			break;
2482		case 'a':
2483			if (operand2 > *res)
2484				*res = operand2;
2485			break;
2486		default:
2487			abort();
2488		}
2489	}
2490	return 1;
2491}
2492
2493/* --- register management ------------------------------------------------ */
2494
2495void
2496roff_setreg(struct roff *r, const char *name, int val, char sign)
2497{
2498	struct roffreg	*reg;
2499
2500	/* Search for an existing register with the same name. */
2501	reg = r->regtab;
2502
2503	while (reg && strcmp(name, reg->key.p))
2504		reg = reg->next;
2505
2506	if (NULL == reg) {
2507		/* Create a new register. */
2508		reg = mandoc_malloc(sizeof(struct roffreg));
2509		reg->key.p = mandoc_strdup(name);
2510		reg->key.sz = strlen(name);
2511		reg->val = 0;
2512		reg->next = r->regtab;
2513		r->regtab = reg;
2514	}
2515
2516	if ('+' == sign)
2517		reg->val += val;
2518	else if ('-' == sign)
2519		reg->val -= val;
2520	else
2521		reg->val = val;
2522}
2523
2524/*
2525 * Handle some predefined read-only number registers.
2526 * For now, return -1 if the requested register is not predefined;
2527 * in case a predefined read-only register having the value -1
2528 * were to turn up, another special value would have to be chosen.
2529 */
2530static int
2531roff_getregro(const struct roff *r, const char *name)
2532{
2533
2534	switch (*name) {
2535	case '$':  /* Number of arguments of the last macro evaluated. */
2536		return r->argc;
2537	case 'A':  /* ASCII approximation mode is always off. */
2538		return 0;
2539	case 'g':  /* Groff compatibility mode is always on. */
2540		return 1;
2541	case 'H':  /* Fixed horizontal resolution. */
2542		return 24;
2543	case 'j':  /* Always adjust left margin only. */
2544		return 0;
2545	case 'T':  /* Some output device is always defined. */
2546		return 1;
2547	case 'V':  /* Fixed vertical resolution. */
2548		return 40;
2549	default:
2550		return -1;
2551	}
2552}
2553
2554int
2555roff_getreg(const struct roff *r, const char *name)
2556{
2557	struct roffreg	*reg;
2558	int		 val;
2559
2560	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2561		val = roff_getregro(r, name + 1);
2562		if (-1 != val)
2563			return val;
2564	}
2565
2566	for (reg = r->regtab; reg; reg = reg->next)
2567		if (0 == strcmp(name, reg->key.p))
2568			return reg->val;
2569
2570	return 0;
2571}
2572
2573static int
2574roff_getregn(const struct roff *r, const char *name, size_t len)
2575{
2576	struct roffreg	*reg;
2577	int		 val;
2578
2579	if ('.' == name[0] && 2 == len) {
2580		val = roff_getregro(r, name + 1);
2581		if (-1 != val)
2582			return val;
2583	}
2584
2585	for (reg = r->regtab; reg; reg = reg->next)
2586		if (len == reg->key.sz &&
2587		    0 == strncmp(name, reg->key.p, len))
2588			return reg->val;
2589
2590	return 0;
2591}
2592
2593static int
2594roff_hasregn(const struct roff *r, const char *name, size_t len)
2595{
2596	struct roffreg	*reg;
2597	int		 val;
2598
2599	if ('.' == name[0] && 2 == len) {
2600		val = roff_getregro(r, name + 1);
2601		if (-1 != val)
2602			return 1;
2603	}
2604
2605	for (reg = r->regtab; reg; reg = reg->next)
2606		if (len == reg->key.sz &&
2607		    0 == strncmp(name, reg->key.p, len))
2608			return 1;
2609
2610	return 0;
2611}
2612
2613static void
2614roff_freereg(struct roffreg *reg)
2615{
2616	struct roffreg	*old_reg;
2617
2618	while (NULL != reg) {
2619		free(reg->key.p);
2620		old_reg = reg;
2621		reg = reg->next;
2622		free(old_reg);
2623	}
2624}
2625
2626static enum rofferr
2627roff_nr(ROFF_ARGS)
2628{
2629	char		*key, *val;
2630	size_t		 keysz;
2631	int		 iv;
2632	char		 sign;
2633
2634	key = val = buf->buf + pos;
2635	if (*key == '\0')
2636		return ROFF_IGN;
2637
2638	keysz = roff_getname(r, &val, ln, pos);
2639	if (key[keysz] == '\\')
2640		return ROFF_IGN;
2641	key[keysz] = '\0';
2642
2643	sign = *val;
2644	if (sign == '+' || sign == '-')
2645		val++;
2646
2647	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2648		roff_setreg(r, key, iv, sign);
2649
2650	return ROFF_IGN;
2651}
2652
2653static enum rofferr
2654roff_rr(ROFF_ARGS)
2655{
2656	struct roffreg	*reg, **prev;
2657	char		*name, *cp;
2658	size_t		 namesz;
2659
2660	name = cp = buf->buf + pos;
2661	if (*name == '\0')
2662		return ROFF_IGN;
2663	namesz = roff_getname(r, &cp, ln, pos);
2664	name[namesz] = '\0';
2665
2666	prev = &r->regtab;
2667	while (1) {
2668		reg = *prev;
2669		if (reg == NULL || !strcmp(name, reg->key.p))
2670			break;
2671		prev = &reg->next;
2672	}
2673	if (reg != NULL) {
2674		*prev = reg->next;
2675		free(reg->key.p);
2676		free(reg);
2677	}
2678	return ROFF_IGN;
2679}
2680
2681/* --- handler functions for roff requests -------------------------------- */
2682
2683static enum rofferr
2684roff_rm(ROFF_ARGS)
2685{
2686	const char	 *name;
2687	char		 *cp;
2688	size_t		  namesz;
2689
2690	cp = buf->buf + pos;
2691	while (*cp != '\0') {
2692		name = cp;
2693		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2694		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2695		if (name[namesz] == '\\')
2696			break;
2697	}
2698	return ROFF_IGN;
2699}
2700
2701static enum rofferr
2702roff_it(ROFF_ARGS)
2703{
2704	int		 iv;
2705
2706	/* Parse the number of lines. */
2707
2708	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2709		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2710		    ln, ppos, buf->buf + 1);
2711		return ROFF_IGN;
2712	}
2713
2714	while (isspace((unsigned char)buf->buf[pos]))
2715		pos++;
2716
2717	/*
2718	 * Arm the input line trap.
2719	 * Special-casing "an-trap" is an ugly workaround to cope
2720	 * with DocBook stupidly fiddling with man(7) internals.
2721	 */
2722
2723	roffit_lines = iv;
2724	roffit_macro = mandoc_strdup(iv != 1 ||
2725	    strcmp(buf->buf + pos, "an-trap") ?
2726	    buf->buf + pos : "br");
2727	return ROFF_IGN;
2728}
2729
2730static enum rofferr
2731roff_Dd(ROFF_ARGS)
2732{
2733	const char *const	*cp;
2734
2735	if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2736		for (cp = __mdoc_reserved; *cp; cp++)
2737			roff_setstr(r, *cp, NULL, 0);
2738
2739	if (r->format == 0)
2740		r->format = MPARSE_MDOC;
2741
2742	return ROFF_CONT;
2743}
2744
2745static enum rofferr
2746roff_TH(ROFF_ARGS)
2747{
2748	const char *const	*cp;
2749
2750	if ((r->options & MPARSE_QUICK) == 0)
2751		for (cp = __man_reserved; *cp; cp++)
2752			roff_setstr(r, *cp, NULL, 0);
2753
2754	if (r->format == 0)
2755		r->format = MPARSE_MAN;
2756
2757	return ROFF_CONT;
2758}
2759
2760static enum rofferr
2761roff_TE(ROFF_ARGS)
2762{
2763
2764	if (NULL == r->tbl)
2765		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2766		    ln, ppos, "TE");
2767	else if ( ! tbl_end(&r->tbl)) {
2768		free(buf->buf);
2769		buf->buf = mandoc_strdup(".sp");
2770		buf->sz = 4;
2771		return ROFF_REPARSE;
2772	}
2773	return ROFF_IGN;
2774}
2775
2776static enum rofferr
2777roff_T_(ROFF_ARGS)
2778{
2779
2780	if (NULL == r->tbl)
2781		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2782		    ln, ppos, "T&");
2783	else
2784		tbl_restart(ppos, ln, r->tbl);
2785
2786	return ROFF_IGN;
2787}
2788
2789/*
2790 * Handle in-line equation delimiters.
2791 */
2792static enum rofferr
2793roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2794{
2795	char		*cp1, *cp2;
2796	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2797
2798	/*
2799	 * Outside equations, look for an opening delimiter.
2800	 * If we are inside an equation, we already know it is
2801	 * in-line, or this function wouldn't have been called;
2802	 * so look for a closing delimiter.
2803	 */
2804
2805	cp1 = buf->buf + pos;
2806	cp2 = strchr(cp1, r->eqn == NULL ?
2807	    r->last_eqn->odelim : r->last_eqn->cdelim);
2808	if (cp2 == NULL)
2809		return ROFF_CONT;
2810
2811	*cp2++ = '\0';
2812	bef_pr = bef_nl = aft_nl = aft_pr = "";
2813
2814	/* Handle preceding text, protecting whitespace. */
2815
2816	if (*buf->buf != '\0') {
2817		if (r->eqn == NULL)
2818			bef_pr = "\\&";
2819		bef_nl = "\n";
2820	}
2821
2822	/*
2823	 * Prepare replacing the delimiter with an equation macro
2824	 * and drop leading white space from the equation.
2825	 */
2826
2827	if (r->eqn == NULL) {
2828		while (*cp2 == ' ')
2829			cp2++;
2830		mac = ".EQ";
2831	} else
2832		mac = ".EN";
2833
2834	/* Handle following text, protecting whitespace. */
2835
2836	if (*cp2 != '\0') {
2837		aft_nl = "\n";
2838		if (r->eqn != NULL)
2839			aft_pr = "\\&";
2840	}
2841
2842	/* Do the actual replacement. */
2843
2844	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2845	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2846	free(buf->buf);
2847	buf->buf = cp1;
2848
2849	/* Toggle the in-line state of the eqn subsystem. */
2850
2851	r->eqn_inline = r->eqn == NULL;
2852	return ROFF_REPARSE;
2853}
2854
2855static enum rofferr
2856roff_EQ(ROFF_ARGS)
2857{
2858	struct eqn_node *e;
2859
2860	assert(r->eqn == NULL);
2861	e = eqn_alloc(ppos, ln, r->parse);
2862
2863	if (r->last_eqn) {
2864		r->last_eqn->next = e;
2865		e->delim = r->last_eqn->delim;
2866		e->odelim = r->last_eqn->odelim;
2867		e->cdelim = r->last_eqn->cdelim;
2868	} else
2869		r->first_eqn = r->last_eqn = e;
2870
2871	r->eqn = r->last_eqn = e;
2872
2873	if (buf->buf[pos] != '\0')
2874		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2875		    ".EQ %s", buf->buf + pos);
2876
2877	return ROFF_IGN;
2878}
2879
2880static enum rofferr
2881roff_EN(ROFF_ARGS)
2882{
2883
2884	mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2885	return ROFF_IGN;
2886}
2887
2888static enum rofferr
2889roff_TS(ROFF_ARGS)
2890{
2891	struct tbl_node	*tbl;
2892
2893	if (r->tbl) {
2894		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2895		    ln, ppos, "TS breaks TS");
2896		tbl_end(&r->tbl);
2897	}
2898
2899	tbl = tbl_alloc(ppos, ln, r->parse);
2900
2901	if (r->last_tbl)
2902		r->last_tbl->next = tbl;
2903	else
2904		r->first_tbl = r->last_tbl = tbl;
2905
2906	r->tbl = r->last_tbl = tbl;
2907	return ROFF_IGN;
2908}
2909
2910static enum rofferr
2911roff_brp(ROFF_ARGS)
2912{
2913
2914	buf->buf[pos - 1] = '\0';
2915	return ROFF_CONT;
2916}
2917
2918static enum rofferr
2919roff_cc(ROFF_ARGS)
2920{
2921	const char	*p;
2922
2923	p = buf->buf + pos;
2924
2925	if (*p == '\0' || (r->control = *p++) == '.')
2926		r->control = 0;
2927
2928	if (*p != '\0')
2929		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2930		    ln, p - buf->buf, "cc ... %s", p);
2931
2932	return ROFF_IGN;
2933}
2934
2935static enum rofferr
2936roff_tr(ROFF_ARGS)
2937{
2938	const char	*p, *first, *second;
2939	size_t		 fsz, ssz;
2940	enum mandoc_esc	 esc;
2941
2942	p = buf->buf + pos;
2943
2944	if (*p == '\0') {
2945		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2946		return ROFF_IGN;
2947	}
2948
2949	while (*p != '\0') {
2950		fsz = ssz = 1;
2951
2952		first = p++;
2953		if (*first == '\\') {
2954			esc = mandoc_escape(&p, NULL, NULL);
2955			if (esc == ESCAPE_ERROR) {
2956				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2957				    ln, (int)(p - buf->buf), first);
2958				return ROFF_IGN;
2959			}
2960			fsz = (size_t)(p - first);
2961		}
2962
2963		second = p++;
2964		if (*second == '\\') {
2965			esc = mandoc_escape(&p, NULL, NULL);
2966			if (esc == ESCAPE_ERROR) {
2967				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2968				    ln, (int)(p - buf->buf), second);
2969				return ROFF_IGN;
2970			}
2971			ssz = (size_t)(p - second);
2972		} else if (*second == '\0') {
2973			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2974			    ln, first - buf->buf, "tr %s", first);
2975			second = " ";
2976			p--;
2977		}
2978
2979		if (fsz > 1) {
2980			roff_setstrn(&r->xmbtab, first, fsz,
2981			    second, ssz, 0);
2982			continue;
2983		}
2984
2985		if (r->xtab == NULL)
2986			r->xtab = mandoc_calloc(128,
2987			    sizeof(struct roffstr));
2988
2989		free(r->xtab[(int)*first].p);
2990		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2991		r->xtab[(int)*first].sz = ssz;
2992	}
2993
2994	return ROFF_IGN;
2995}
2996
2997static enum rofferr
2998roff_so(ROFF_ARGS)
2999{
3000	char *name, *cp;
3001
3002	name = buf->buf + pos;
3003	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3004
3005	/*
3006	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3007	 * opening anything that's not in our cwd or anything beneath
3008	 * it.  Thus, explicitly disallow traversing up the file-system
3009	 * or using absolute paths.
3010	 */
3011
3012	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3013		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3014		    ".so %s", name);
3015		buf->sz = mandoc_asprintf(&cp,
3016		    ".sp\nSee the file %s.\n.sp", name) + 1;
3017		free(buf->buf);
3018		buf->buf = cp;
3019		*offs = 0;
3020		return ROFF_REPARSE;
3021	}
3022
3023	*offs = pos;
3024	return ROFF_SO;
3025}
3026
3027/* --- user defined strings and macros ------------------------------------ */
3028
3029static enum rofferr
3030roff_userdef(ROFF_ARGS)
3031{
3032	const char	 *arg[9], *ap;
3033	char		 *cp, *n1, *n2;
3034	int		  i, ib, ie;
3035	size_t		  asz, rsz;
3036
3037	/*
3038	 * Collect pointers to macro argument strings
3039	 * and NUL-terminate them.
3040	 */
3041
3042	r->argc = 0;
3043	cp = buf->buf + pos;
3044	for (i = 0; i < 9; i++) {
3045		if (*cp == '\0')
3046			arg[i] = "";
3047		else {
3048			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3049			r->argc = i + 1;
3050		}
3051	}
3052
3053	/*
3054	 * Expand macro arguments.
3055	 */
3056
3057	buf->sz = strlen(r->current_string) + 1;
3058	n1 = cp = mandoc_malloc(buf->sz);
3059	memcpy(n1, r->current_string, buf->sz);
3060	while (*cp != '\0') {
3061
3062		/* Scan ahead for the next argument invocation. */
3063
3064		if (*cp++ != '\\')
3065			continue;
3066		if (*cp++ != '$')
3067			continue;
3068		if (*cp == '*') {  /* \\$* inserts all arguments */
3069			ib = 0;
3070			ie = r->argc - 1;
3071		} else {  /* \\$1 .. \\$9 insert one argument */
3072			ib = ie = *cp - '1';
3073			if (ib < 0 || ib > 8)
3074				continue;
3075		}
3076		cp -= 2;
3077
3078		/*
3079		 * Determine the size of the expanded argument,
3080		 * taking escaping of quotes into account.
3081		 */
3082
3083		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3084		for (i = ib; i <= ie; i++) {
3085			for (ap = arg[i]; *ap != '\0'; ap++) {
3086				asz++;
3087				if (*ap == '"')
3088					asz += 3;
3089			}
3090		}
3091		if (asz != 3) {
3092
3093			/*
3094			 * Determine the size of the rest of the
3095			 * unexpanded macro, including the NUL.
3096			 */
3097
3098			rsz = buf->sz - (cp - n1) - 3;
3099
3100			/*
3101			 * When shrinking, move before
3102			 * releasing the storage.
3103			 */
3104
3105			if (asz < 3)
3106				memmove(cp + asz, cp + 3, rsz);
3107
3108			/*
3109			 * Resize the storage for the macro
3110			 * and readjust the parse pointer.
3111			 */
3112
3113			buf->sz += asz - 3;
3114			n2 = mandoc_realloc(n1, buf->sz);
3115			cp = n2 + (cp - n1);
3116			n1 = n2;
3117
3118			/*
3119			 * When growing, make room
3120			 * for the expanded argument.
3121			 */
3122
3123			if (asz > 3)
3124				memmove(cp + asz, cp + 3, rsz);
3125		}
3126
3127		/* Copy the expanded argument, escaping quotes. */
3128
3129		n2 = cp;
3130		for (i = ib; i <= ie; i++) {
3131			for (ap = arg[i]; *ap != '\0'; ap++) {
3132				if (*ap == '"') {
3133					memcpy(n2, "\\(dq", 4);
3134					n2 += 4;
3135				} else
3136					*n2++ = *ap;
3137			}
3138			if (i < ie)
3139				*n2++ = ' ';
3140		}
3141	}
3142
3143	/*
3144	 * Replace the macro invocation
3145	 * by the expanded macro.
3146	 */
3147
3148	free(buf->buf);
3149	buf->buf = n1;
3150	*offs = 0;
3151
3152	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3153	   ROFF_REPARSE : ROFF_APPEND;
3154}
3155
3156static size_t
3157roff_getname(struct roff *r, char **cpp, int ln, int pos)
3158{
3159	char	 *name, *cp;
3160	size_t	  namesz;
3161
3162	name = *cpp;
3163	if ('\0' == *name)
3164		return 0;
3165
3166	/* Read until end of name and terminate it with NUL. */
3167	for (cp = name; 1; cp++) {
3168		if ('\0' == *cp || ' ' == *cp) {
3169			namesz = cp - name;
3170			break;
3171		}
3172		if ('\\' != *cp)
3173			continue;
3174		namesz = cp - name;
3175		if ('{' == cp[1] || '}' == cp[1])
3176			break;
3177		cp++;
3178		if ('\\' == *cp)
3179			continue;
3180		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3181		    "%.*s", (int)(cp - name + 1), name);
3182		mandoc_escape((const char **)&cp, NULL, NULL);
3183		break;
3184	}
3185
3186	/* Read past spaces. */
3187	while (' ' == *cp)
3188		cp++;
3189
3190	*cpp = cp;
3191	return namesz;
3192}
3193
3194/*
3195 * Store *string into the user-defined string called *name.
3196 * To clear an existing entry, call with (*r, *name, NULL, 0).
3197 * append == 0: replace mode
3198 * append == 1: single-line append mode
3199 * append == 2: multiline append mode, append '\n' after each call
3200 */
3201static void
3202roff_setstr(struct roff *r, const char *name, const char *string,
3203	int append)
3204{
3205
3206	roff_setstrn(&r->strtab, name, strlen(name), string,
3207	    string ? strlen(string) : 0, append);
3208}
3209
3210static void
3211roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3212		const char *string, size_t stringsz, int append)
3213{
3214	struct roffkv	*n;
3215	char		*c;
3216	int		 i;
3217	size_t		 oldch, newch;
3218
3219	/* Search for an existing string with the same name. */
3220	n = *r;
3221
3222	while (n && (namesz != n->key.sz ||
3223			strncmp(n->key.p, name, namesz)))
3224		n = n->next;
3225
3226	if (NULL == n) {
3227		/* Create a new string table entry. */
3228		n = mandoc_malloc(sizeof(struct roffkv));
3229		n->key.p = mandoc_strndup(name, namesz);
3230		n->key.sz = namesz;
3231		n->val.p = NULL;
3232		n->val.sz = 0;
3233		n->next = *r;
3234		*r = n;
3235	} else if (0 == append) {
3236		free(n->val.p);
3237		n->val.p = NULL;
3238		n->val.sz = 0;
3239	}
3240
3241	if (NULL == string)
3242		return;
3243
3244	/*
3245	 * One additional byte for the '\n' in multiline mode,
3246	 * and one for the terminating '\0'.
3247	 */
3248	newch = stringsz + (1 < append ? 2u : 1u);
3249
3250	if (NULL == n->val.p) {
3251		n->val.p = mandoc_malloc(newch);
3252		*n->val.p = '\0';
3253		oldch = 0;
3254	} else {
3255		oldch = n->val.sz;
3256		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3257	}
3258
3259	/* Skip existing content in the destination buffer. */
3260	c = n->val.p + (int)oldch;
3261
3262	/* Append new content to the destination buffer. */
3263	i = 0;
3264	while (i < (int)stringsz) {
3265		/*
3266		 * Rudimentary roff copy mode:
3267		 * Handle escaped backslashes.
3268		 */
3269		if ('\\' == string[i] && '\\' == string[i + 1])
3270			i++;
3271		*c++ = string[i++];
3272	}
3273
3274	/* Append terminating bytes. */
3275	if (1 < append)
3276		*c++ = '\n';
3277
3278	*c = '\0';
3279	n->val.sz = (int)(c - n->val.p);
3280}
3281
3282static const char *
3283roff_getstrn(const struct roff *r, const char *name, size_t len)
3284{
3285	const struct roffkv *n;
3286	int i;
3287
3288	for (n = r->strtab; n; n = n->next)
3289		if (0 == strncmp(name, n->key.p, len) &&
3290		    '\0' == n->key.p[(int)len])
3291			return n->val.p;
3292
3293	for (i = 0; i < PREDEFS_MAX; i++)
3294		if (0 == strncmp(name, predefs[i].name, len) &&
3295				'\0' == predefs[i].name[(int)len])
3296			return predefs[i].str;
3297
3298	return NULL;
3299}
3300
3301static void
3302roff_freestr(struct roffkv *r)
3303{
3304	struct roffkv	 *n, *nn;
3305
3306	for (n = r; n; n = nn) {
3307		free(n->key.p);
3308		free(n->val.p);
3309		nn = n->next;
3310		free(n);
3311	}
3312}
3313
3314/* --- accessors and utility functions ------------------------------------ */
3315
3316const struct tbl_span *
3317roff_span(const struct roff *r)
3318{
3319
3320	return r->tbl ? tbl_span(r->tbl) : NULL;
3321}
3322
3323const struct eqn *
3324roff_eqn(const struct roff *r)
3325{
3326
3327	return r->last_eqn ? &r->last_eqn->eqn : NULL;
3328}
3329
3330/*
3331 * Duplicate an input string, making the appropriate character
3332 * conversations (as stipulated by `tr') along the way.
3333 * Returns a heap-allocated string with all the replacements made.
3334 */
3335char *
3336roff_strdup(const struct roff *r, const char *p)
3337{
3338	const struct roffkv *cp;
3339	char		*res;
3340	const char	*pp;
3341	size_t		 ssz, sz;
3342	enum mandoc_esc	 esc;
3343
3344	if (NULL == r->xmbtab && NULL == r->xtab)
3345		return mandoc_strdup(p);
3346	else if ('\0' == *p)
3347		return mandoc_strdup("");
3348
3349	/*
3350	 * Step through each character looking for term matches
3351	 * (remember that a `tr' can be invoked with an escape, which is
3352	 * a glyph but the escape is multi-character).
3353	 * We only do this if the character hash has been initialised
3354	 * and the string is >0 length.
3355	 */
3356
3357	res = NULL;
3358	ssz = 0;
3359
3360	while ('\0' != *p) {
3361		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
3362			sz = r->xtab[(int)*p].sz;
3363			res = mandoc_realloc(res, ssz + sz + 1);
3364			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3365			ssz += sz;
3366			p++;
3367			continue;
3368		} else if ('\\' != *p) {
3369			res = mandoc_realloc(res, ssz + 2);
3370			res[ssz++] = *p++;
3371			continue;
3372		}
3373
3374		/* Search for term matches. */
3375		for (cp = r->xmbtab; cp; cp = cp->next)
3376			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3377				break;
3378
3379		if (NULL != cp) {
3380			/*
3381			 * A match has been found.
3382			 * Append the match to the array and move
3383			 * forward by its keysize.
3384			 */
3385			res = mandoc_realloc(res,
3386			    ssz + cp->val.sz + 1);
3387			memcpy(res + ssz, cp->val.p, cp->val.sz);
3388			ssz += cp->val.sz;
3389			p += (int)cp->key.sz;
3390			continue;
3391		}
3392
3393		/*
3394		 * Handle escapes carefully: we need to copy
3395		 * over just the escape itself, or else we might
3396		 * do replacements within the escape itself.
3397		 * Make sure to pass along the bogus string.
3398		 */
3399		pp = p++;
3400		esc = mandoc_escape(&p, NULL, NULL);
3401		if (ESCAPE_ERROR == esc) {
3402			sz = strlen(pp);
3403			res = mandoc_realloc(res, ssz + sz + 1);
3404			memcpy(res + ssz, pp, sz);
3405			break;
3406		}
3407		/*
3408		 * We bail out on bad escapes.
3409		 * No need to warn: we already did so when
3410		 * roff_res() was called.
3411		 */
3412		sz = (int)(p - pp);
3413		res = mandoc_realloc(res, ssz + sz + 1);
3414		memcpy(res + ssz, pp, sz);
3415		ssz += sz;
3416	}
3417
3418	res[(int)ssz] = '\0';
3419	return res;
3420}
3421
3422int
3423roff_getformat(const struct roff *r)
3424{
3425
3426	return r->format;
3427}
3428
3429/*
3430 * Find out whether a line is a macro line or not.
3431 * If it is, adjust the current position and return one; if it isn't,
3432 * return zero and don't change the current position.
3433 * If the control character has been set with `.cc', then let that grain
3434 * precedence.
3435 * This is slighly contrary to groff, where using the non-breaking
3436 * control character when `cc' has been invoked will cause the
3437 * non-breaking macro contents to be printed verbatim.
3438 */
3439int
3440roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3441{
3442	int		pos;
3443
3444	pos = *ppos;
3445
3446	if (0 != r->control && cp[pos] == r->control)
3447		pos++;
3448	else if (0 != r->control)
3449		return 0;
3450	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3451		pos += 2;
3452	else if ('.' == cp[pos] || '\'' == cp[pos])
3453		pos++;
3454	else
3455		return 0;
3456
3457	while (' ' == cp[pos] || '\t' == cp[pos])
3458		pos++;
3459
3460	*ppos = pos;
3461	return 1;
3462}
3463