1241675Suqs/*	$Id: man_macro.c,v 1.71 2012/01/03 15:16:24 kristaps Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs *
5241675Suqs * Permission to use, copy, modify, and distribute this software for any
6241675Suqs * purpose with or without fee is hereby granted, provided that the above
7241675Suqs * copyright notice and this permission notice appear in all copies.
8241675Suqs *
9241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16241675Suqs */
17241675Suqs#ifdef HAVE_CONFIG_H
18241675Suqs#include "config.h"
19241675Suqs#endif
20241675Suqs
21241675Suqs#include <assert.h>
22241675Suqs#include <ctype.h>
23241675Suqs#include <stdlib.h>
24241675Suqs#include <string.h>
25241675Suqs
26241675Suqs#include "man.h"
27241675Suqs#include "mandoc.h"
28241675Suqs#include "libmandoc.h"
29241675Suqs#include "libman.h"
30241675Suqs
31241675Suqsenum	rew {
32241675Suqs	REW_REWIND,
33241675Suqs	REW_NOHALT,
34241675Suqs	REW_HALT
35241675Suqs};
36241675Suqs
37241675Suqsstatic	int		 blk_close(MACRO_PROT_ARGS);
38241675Suqsstatic	int		 blk_exp(MACRO_PROT_ARGS);
39241675Suqsstatic	int		 blk_imp(MACRO_PROT_ARGS);
40241675Suqsstatic	int		 in_line_eoln(MACRO_PROT_ARGS);
41241675Suqsstatic	int		 man_args(struct man *, int,
42241675Suqs				int *, char *, char **);
43241675Suqs
44241675Suqsstatic	int		 rew_scope(enum man_type,
45241675Suqs				struct man *, enum mant);
46241675Suqsstatic	enum rew	 rew_dohalt(enum mant, enum man_type,
47241675Suqs				const struct man_node *);
48241675Suqsstatic	enum rew	 rew_block(enum mant, enum man_type,
49241675Suqs				const struct man_node *);
50241675Suqsstatic	void		 rew_warn(struct man *,
51241675Suqs				struct man_node *, enum mandocerr);
52241675Suqs
53241675Suqsconst	struct man_macro __man_macros[MAN_MAX] = {
54241675Suqs	{ in_line_eoln, MAN_NSCOPED }, /* br */
55241675Suqs	{ in_line_eoln, MAN_BSCOPE }, /* TH */
56241675Suqs	{ blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */
57241675Suqs	{ blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */
58241675Suqs	{ blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */
59241675Suqs	{ blk_imp, MAN_BSCOPE }, /* LP */
60241675Suqs	{ blk_imp, MAN_BSCOPE }, /* PP */
61241675Suqs	{ blk_imp, MAN_BSCOPE }, /* P */
62241675Suqs	{ blk_imp, MAN_BSCOPE }, /* IP */
63241675Suqs	{ blk_imp, MAN_BSCOPE }, /* HP */
64241675Suqs	{ in_line_eoln, MAN_SCOPED }, /* SM */
65241675Suqs	{ in_line_eoln, MAN_SCOPED }, /* SB */
66241675Suqs	{ in_line_eoln, 0 }, /* BI */
67241675Suqs	{ in_line_eoln, 0 }, /* IB */
68241675Suqs	{ in_line_eoln, 0 }, /* BR */
69241675Suqs	{ in_line_eoln, 0 }, /* RB */
70241675Suqs	{ in_line_eoln, MAN_SCOPED }, /* R */
71241675Suqs	{ in_line_eoln, MAN_SCOPED }, /* B */
72241675Suqs	{ in_line_eoln, MAN_SCOPED }, /* I */
73241675Suqs	{ in_line_eoln, 0 }, /* IR */
74241675Suqs	{ in_line_eoln, 0 }, /* RI */
75241675Suqs	{ in_line_eoln, MAN_NSCOPED }, /* na */
76241675Suqs	{ in_line_eoln, MAN_NSCOPED }, /* sp */
77241675Suqs	{ in_line_eoln, MAN_BSCOPE }, /* nf */
78241675Suqs	{ in_line_eoln, MAN_BSCOPE }, /* fi */
79241675Suqs	{ blk_close, 0 }, /* RE */
80241675Suqs	{ blk_exp, MAN_EXPLICIT }, /* RS */
81241675Suqs	{ in_line_eoln, 0 }, /* DT */
82241675Suqs	{ in_line_eoln, 0 }, /* UC */
83241675Suqs	{ in_line_eoln, 0 }, /* PD */
84241675Suqs	{ in_line_eoln, 0 }, /* AT */
85241675Suqs	{ in_line_eoln, 0 }, /* in */
86241675Suqs	{ in_line_eoln, 0 }, /* ft */
87241675Suqs	{ in_line_eoln, 0 }, /* OP */
88241675Suqs};
89241675Suqs
90241675Suqsconst	struct man_macro * const man_macros = __man_macros;
91241675Suqs
92241675Suqs
93241675Suqs/*
94241675Suqs * Warn when "n" is an explicit non-roff macro.
95241675Suqs */
96241675Suqsstatic void
97241675Suqsrew_warn(struct man *m, struct man_node *n, enum mandocerr er)
98241675Suqs{
99241675Suqs
100241675Suqs	if (er == MANDOCERR_MAX || MAN_BLOCK != n->type)
101241675Suqs		return;
102241675Suqs	if (MAN_VALID & n->flags)
103241675Suqs		return;
104241675Suqs	if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags))
105241675Suqs		return;
106241675Suqs
107241675Suqs	assert(er < MANDOCERR_FATAL);
108241675Suqs	man_nmsg(m, n, er);
109241675Suqs}
110241675Suqs
111241675Suqs
112241675Suqs/*
113241675Suqs * Rewind scope.  If a code "er" != MANDOCERR_MAX has been provided, it
114241675Suqs * will be used if an explicit block scope is being closed out.
115241675Suqs */
116241675Suqsint
117241675Suqsman_unscope(struct man *m, const struct man_node *to,
118241675Suqs		enum mandocerr er)
119241675Suqs{
120241675Suqs	struct man_node	*n;
121241675Suqs
122241675Suqs	assert(to);
123241675Suqs
124241675Suqs	m->next = MAN_NEXT_SIBLING;
125241675Suqs
126241675Suqs	/* LINTED */
127241675Suqs	while (m->last != to) {
128241675Suqs		/*
129241675Suqs		 * Save the parent here, because we may delete the
130241675Suqs		 * m->last node in the post-validation phase and reset
131241675Suqs		 * it to m->last->parent, causing a step in the closing
132241675Suqs		 * out to be lost.
133241675Suqs		 */
134241675Suqs		n = m->last->parent;
135241675Suqs		rew_warn(m, m->last, er);
136241675Suqs		if ( ! man_valid_post(m))
137241675Suqs			return(0);
138241675Suqs		m->last = n;
139241675Suqs		assert(m->last);
140241675Suqs	}
141241675Suqs
142241675Suqs	rew_warn(m, m->last, er);
143241675Suqs	if ( ! man_valid_post(m))
144241675Suqs		return(0);
145241675Suqs
146241675Suqs	return(1);
147241675Suqs}
148241675Suqs
149241675Suqs
150241675Suqsstatic enum rew
151241675Suqsrew_block(enum mant ntok, enum man_type type, const struct man_node *n)
152241675Suqs{
153241675Suqs
154241675Suqs	if (MAN_BLOCK == type && ntok == n->parent->tok &&
155241675Suqs			MAN_BODY == n->parent->type)
156241675Suqs		return(REW_REWIND);
157241675Suqs	return(ntok == n->tok ? REW_HALT : REW_NOHALT);
158241675Suqs}
159241675Suqs
160241675Suqs
161241675Suqs/*
162241675Suqs * There are three scope levels: scoped to the root (all), scoped to the
163241675Suqs * section (all less sections), and scoped to subsections (all less
164241675Suqs * sections and subsections).
165241675Suqs */
166241675Suqsstatic enum rew
167241675Suqsrew_dohalt(enum mant tok, enum man_type type, const struct man_node *n)
168241675Suqs{
169241675Suqs	enum rew	 c;
170241675Suqs
171241675Suqs	/* We cannot progress beyond the root ever. */
172241675Suqs	if (MAN_ROOT == n->type)
173241675Suqs		return(REW_HALT);
174241675Suqs
175241675Suqs	assert(n->parent);
176241675Suqs
177241675Suqs	/* Normal nodes shouldn't go to the level of the root. */
178241675Suqs	if (MAN_ROOT == n->parent->type)
179241675Suqs		return(REW_REWIND);
180241675Suqs
181241675Suqs	/* Already-validated nodes should be closed out. */
182241675Suqs	if (MAN_VALID & n->flags)
183241675Suqs		return(REW_NOHALT);
184241675Suqs
185241675Suqs	/* First: rewind to ourselves. */
186241675Suqs	if (type == n->type && tok == n->tok)
187241675Suqs		return(REW_REWIND);
188241675Suqs
189241675Suqs	/*
190241675Suqs	 * Next follow the implicit scope-smashings as defined by man.7:
191241675Suqs	 * section, sub-section, etc.
192241675Suqs	 */
193241675Suqs
194241675Suqs	switch (tok) {
195241675Suqs	case (MAN_SH):
196241675Suqs		break;
197241675Suqs	case (MAN_SS):
198241675Suqs		/* Rewind to a section, if a block. */
199241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
200241675Suqs			return(c);
201241675Suqs		break;
202241675Suqs	case (MAN_RS):
203241675Suqs		/* Rewind to a subsection, if a block. */
204241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
205241675Suqs			return(c);
206241675Suqs		/* Rewind to a section, if a block. */
207241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
208241675Suqs			return(c);
209241675Suqs		break;
210241675Suqs	default:
211241675Suqs		/* Rewind to an offsetter, if a block. */
212241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_RS, type, n)))
213241675Suqs			return(c);
214241675Suqs		/* Rewind to a subsection, if a block. */
215241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_SS, type, n)))
216241675Suqs			return(c);
217241675Suqs		/* Rewind to a section, if a block. */
218241675Suqs		if (REW_NOHALT != (c = rew_block(MAN_SH, type, n)))
219241675Suqs			return(c);
220241675Suqs		break;
221241675Suqs	}
222241675Suqs
223241675Suqs	return(REW_NOHALT);
224241675Suqs}
225241675Suqs
226241675Suqs
227241675Suqs/*
228241675Suqs * Rewinding entails ascending the parse tree until a coherent point,
229241675Suqs * for example, the `SH' macro will close out any intervening `SS'
230241675Suqs * scopes.  When a scope is closed, it must be validated and actioned.
231241675Suqs */
232241675Suqsstatic int
233241675Suqsrew_scope(enum man_type type, struct man *m, enum mant tok)
234241675Suqs{
235241675Suqs	struct man_node	*n;
236241675Suqs	enum rew	 c;
237241675Suqs
238241675Suqs	/* LINTED */
239241675Suqs	for (n = m->last; n; n = n->parent) {
240241675Suqs		/*
241241675Suqs		 * Whether we should stop immediately (REW_HALT), stop
242241675Suqs		 * and rewind until this point (REW_REWIND), or keep
243241675Suqs		 * rewinding (REW_NOHALT).
244241675Suqs		 */
245241675Suqs		c = rew_dohalt(tok, type, n);
246241675Suqs		if (REW_HALT == c)
247241675Suqs			return(1);
248241675Suqs		if (REW_REWIND == c)
249241675Suqs			break;
250241675Suqs	}
251241675Suqs
252241675Suqs	/*
253241675Suqs	 * Rewind until the current point.  Warn if we're a roff
254241675Suqs	 * instruction that's mowing over explicit scopes.
255241675Suqs	 */
256241675Suqs	assert(n);
257241675Suqs
258241675Suqs	return(man_unscope(m, n, MANDOCERR_MAX));
259241675Suqs}
260241675Suqs
261241675Suqs
262241675Suqs/*
263241675Suqs * Close out a generic explicit macro.
264241675Suqs */
265241675Suqs/* ARGSUSED */
266241675Suqsint
267241675Suqsblk_close(MACRO_PROT_ARGS)
268241675Suqs{
269241675Suqs	enum mant	 	 ntok;
270241675Suqs	const struct man_node	*nn;
271241675Suqs
272241675Suqs	switch (tok) {
273241675Suqs	case (MAN_RE):
274241675Suqs		ntok = MAN_RS;
275241675Suqs		break;
276241675Suqs	default:
277241675Suqs		abort();
278241675Suqs		/* NOTREACHED */
279241675Suqs	}
280241675Suqs
281241675Suqs	for (nn = m->last->parent; nn; nn = nn->parent)
282241675Suqs		if (ntok == nn->tok)
283241675Suqs			break;
284241675Suqs
285241675Suqs	if (NULL == nn)
286241675Suqs		man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE);
287241675Suqs
288241675Suqs	if ( ! rew_scope(MAN_BODY, m, ntok))
289241675Suqs		return(0);
290241675Suqs	if ( ! rew_scope(MAN_BLOCK, m, ntok))
291241675Suqs		return(0);
292241675Suqs
293241675Suqs	return(1);
294241675Suqs}
295241675Suqs
296241675Suqs
297241675Suqs/* ARGSUSED */
298241675Suqsint
299241675Suqsblk_exp(MACRO_PROT_ARGS)
300241675Suqs{
301241675Suqs	int		 la;
302241675Suqs	char		*p;
303241675Suqs
304241675Suqs	/*
305241675Suqs	 * Close out prior scopes.  "Regular" explicit macros cannot be
306241675Suqs	 * nested, but we allow roff macros to be placed just about
307241675Suqs	 * anywhere.
308241675Suqs	 */
309241675Suqs
310241675Suqs	if ( ! man_block_alloc(m, line, ppos, tok))
311241675Suqs		return(0);
312241675Suqs	if ( ! man_head_alloc(m, line, ppos, tok))
313241675Suqs		return(0);
314241675Suqs
315241675Suqs	for (;;) {
316241675Suqs		la = *pos;
317241675Suqs		if ( ! man_args(m, line, pos, buf, &p))
318241675Suqs			break;
319241675Suqs		if ( ! man_word_alloc(m, line, la, p))
320241675Suqs			return(0);
321241675Suqs	}
322241675Suqs
323241675Suqs	assert(m);
324241675Suqs	assert(tok != MAN_MAX);
325241675Suqs
326241675Suqs	if ( ! rew_scope(MAN_HEAD, m, tok))
327241675Suqs		return(0);
328241675Suqs	return(man_body_alloc(m, line, ppos, tok));
329241675Suqs}
330241675Suqs
331241675Suqs
332241675Suqs
333241675Suqs/*
334241675Suqs * Parse an implicit-block macro.  These contain a MAN_HEAD and a
335241675Suqs * MAN_BODY contained within a MAN_BLOCK.  Rules for closing out other
336241675Suqs * scopes, such as `SH' closing out an `SS', are defined in the rew
337241675Suqs * routines.
338241675Suqs */
339241675Suqs/* ARGSUSED */
340241675Suqsint
341241675Suqsblk_imp(MACRO_PROT_ARGS)
342241675Suqs{
343241675Suqs	int		 la;
344241675Suqs	char		*p;
345241675Suqs	struct man_node	*n;
346241675Suqs
347241675Suqs	/* Close out prior scopes. */
348241675Suqs
349241675Suqs	if ( ! rew_scope(MAN_BODY, m, tok))
350241675Suqs		return(0);
351241675Suqs	if ( ! rew_scope(MAN_BLOCK, m, tok))
352241675Suqs		return(0);
353241675Suqs
354241675Suqs	/* Allocate new block & head scope. */
355241675Suqs
356241675Suqs	if ( ! man_block_alloc(m, line, ppos, tok))
357241675Suqs		return(0);
358241675Suqs	if ( ! man_head_alloc(m, line, ppos, tok))
359241675Suqs		return(0);
360241675Suqs
361241675Suqs	n = m->last;
362241675Suqs
363241675Suqs	/* Add line arguments. */
364241675Suqs
365241675Suqs	for (;;) {
366241675Suqs		la = *pos;
367241675Suqs		if ( ! man_args(m, line, pos, buf, &p))
368241675Suqs			break;
369241675Suqs		if ( ! man_word_alloc(m, line, la, p))
370241675Suqs			return(0);
371241675Suqs	}
372241675Suqs
373241675Suqs	/* Close out head and open body (unless MAN_SCOPE). */
374241675Suqs
375241675Suqs	if (MAN_SCOPED & man_macros[tok].flags) {
376241675Suqs		/* If we're forcing scope (`TP'), keep it open. */
377241675Suqs		if (MAN_FSCOPED & man_macros[tok].flags) {
378241675Suqs			m->flags |= MAN_BLINE;
379241675Suqs			return(1);
380241675Suqs		} else if (n == m->last) {
381241675Suqs			m->flags |= MAN_BLINE;
382241675Suqs			return(1);
383241675Suqs		}
384241675Suqs	}
385241675Suqs
386241675Suqs	if ( ! rew_scope(MAN_HEAD, m, tok))
387241675Suqs		return(0);
388241675Suqs	return(man_body_alloc(m, line, ppos, tok));
389241675Suqs}
390241675Suqs
391241675Suqs
392241675Suqs/* ARGSUSED */
393241675Suqsint
394241675Suqsin_line_eoln(MACRO_PROT_ARGS)
395241675Suqs{
396241675Suqs	int		 la;
397241675Suqs	char		*p;
398241675Suqs	struct man_node	*n;
399241675Suqs
400241675Suqs	if ( ! man_elem_alloc(m, line, ppos, tok))
401241675Suqs		return(0);
402241675Suqs
403241675Suqs	n = m->last;
404241675Suqs
405241675Suqs	for (;;) {
406241675Suqs		la = *pos;
407241675Suqs		if ( ! man_args(m, line, pos, buf, &p))
408241675Suqs			break;
409241675Suqs		if ( ! man_word_alloc(m, line, la, p))
410241675Suqs			return(0);
411241675Suqs	}
412241675Suqs
413241675Suqs	/*
414241675Suqs	 * If no arguments are specified and this is MAN_SCOPED (i.e.,
415241675Suqs	 * next-line scoped), then set our mode to indicate that we're
416241675Suqs	 * waiting for terms to load into our context.
417241675Suqs	 */
418241675Suqs
419241675Suqs	if (n == m->last && MAN_SCOPED & man_macros[tok].flags) {
420241675Suqs		assert( ! (MAN_NSCOPED & man_macros[tok].flags));
421241675Suqs		m->flags |= MAN_ELINE;
422241675Suqs		return(1);
423241675Suqs	}
424241675Suqs
425241675Suqs	/* Set ignorable context, if applicable. */
426241675Suqs
427241675Suqs	if (MAN_NSCOPED & man_macros[tok].flags) {
428241675Suqs		assert( ! (MAN_SCOPED & man_macros[tok].flags));
429241675Suqs		m->flags |= MAN_ILINE;
430241675Suqs	}
431241675Suqs
432241675Suqs	assert(MAN_ROOT != m->last->type);
433241675Suqs	m->next = MAN_NEXT_SIBLING;
434241675Suqs
435241675Suqs	/*
436241675Suqs	 * Rewind our element scope.  Note that when TH is pruned, we'll
437241675Suqs	 * be back at the root, so make sure that we don't clobber as
438241675Suqs	 * its sibling.
439241675Suqs	 */
440241675Suqs
441241675Suqs	for ( ; m->last; m->last = m->last->parent) {
442241675Suqs		if (m->last == n)
443241675Suqs			break;
444241675Suqs		if (m->last->type == MAN_ROOT)
445241675Suqs			break;
446241675Suqs		if ( ! man_valid_post(m))
447241675Suqs			return(0);
448241675Suqs	}
449241675Suqs
450241675Suqs	assert(m->last);
451241675Suqs
452241675Suqs	/*
453241675Suqs	 * Same here regarding whether we're back at the root.
454241675Suqs	 */
455241675Suqs
456241675Suqs	if (m->last->type != MAN_ROOT && ! man_valid_post(m))
457241675Suqs		return(0);
458241675Suqs
459241675Suqs	return(1);
460241675Suqs}
461241675Suqs
462241675Suqs
463241675Suqsint
464241675Suqsman_macroend(struct man *m)
465241675Suqs{
466241675Suqs
467241675Suqs	return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT));
468241675Suqs}
469241675Suqs
470241675Suqsstatic int
471241675Suqsman_args(struct man *m, int line, int *pos, char *buf, char **v)
472241675Suqs{
473241675Suqs	char	 *start;
474241675Suqs
475241675Suqs	assert(*pos);
476241675Suqs	*v = start = buf + *pos;
477241675Suqs	assert(' ' != *start);
478241675Suqs
479241675Suqs	if ('\0' == *start)
480241675Suqs		return(0);
481241675Suqs
482241675Suqs	*v = mandoc_getarg(m->parse, v, line, pos);
483241675Suqs	return(1);
484241675Suqs}
485