1241675Suqs/*	$Id: mdoc_argv.c,v 1.82 2012/03/23 05:50:24 kristaps Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs *
5241675Suqs * Permission to use, copy, modify, and distribute this software for any
6241675Suqs * purpose with or without fee is hereby granted, provided that the above
7241675Suqs * copyright notice and this permission notice appear in all copies.
8241675Suqs *
9241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16241675Suqs */
17241675Suqs#ifdef HAVE_CONFIG_H
18241675Suqs#include "config.h"
19241675Suqs#endif
20241675Suqs
21241675Suqs#include <sys/types.h>
22241675Suqs
23241675Suqs#include <assert.h>
24241675Suqs#include <stdlib.h>
25241675Suqs#include <stdio.h>
26241675Suqs#include <string.h>
27241675Suqs
28241675Suqs#include "mdoc.h"
29241675Suqs#include "mandoc.h"
30241675Suqs#include "libmdoc.h"
31241675Suqs#include "libmandoc.h"
32241675Suqs
33241675Suqs#define	MULTI_STEP	 5 /* pre-allocate argument values */
34241675Suqs#define	DELIMSZ	  	 6 /* max possible size of a delimiter */
35241675Suqs
36241675Suqsenum	argsflag {
37241675Suqs	ARGSFL_NONE = 0,
38241675Suqs	ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */
39241675Suqs	ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */
40241675Suqs};
41241675Suqs
42241675Suqsenum	argvflag {
43241675Suqs	ARGV_NONE, /* no args to flag (e.g., -split) */
44241675Suqs	ARGV_SINGLE, /* one arg to flag (e.g., -file xxx)  */
45241675Suqs	ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */
46241675Suqs	ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */
47241675Suqs};
48241675Suqs
49241675Suqsstruct	mdocarg {
50241675Suqs	enum argsflag	 flags;
51241675Suqs	const enum mdocargt *argvs;
52241675Suqs};
53241675Suqs
54241675Suqsstatic	void		 argn_free(struct mdoc_arg *, int);
55241675Suqsstatic	enum margserr	 args(struct mdoc *, int, int *,
56241675Suqs				char *, enum argsflag, char **);
57241675Suqsstatic	int		 args_checkpunct(const char *, int);
58241675Suqsstatic	int		 argv_multi(struct mdoc *, int,
59241675Suqs				struct mdoc_argv *, int *, char *);
60241675Suqsstatic	int		 argv_opt_single(struct mdoc *, int,
61241675Suqs				struct mdoc_argv *, int *, char *);
62241675Suqsstatic	int		 argv_single(struct mdoc *, int,
63241675Suqs				struct mdoc_argv *, int *, char *);
64241675Suqs
65241675Suqsstatic	const enum argvflag argvflags[MDOC_ARG_MAX] = {
66241675Suqs	ARGV_NONE,	/* MDOC_Split */
67241675Suqs	ARGV_NONE,	/* MDOC_Nosplit */
68241675Suqs	ARGV_NONE,	/* MDOC_Ragged */
69241675Suqs	ARGV_NONE,	/* MDOC_Unfilled */
70241675Suqs	ARGV_NONE,	/* MDOC_Literal */
71241675Suqs	ARGV_SINGLE,	/* MDOC_File */
72241675Suqs	ARGV_OPT_SINGLE, /* MDOC_Offset */
73241675Suqs	ARGV_NONE,	/* MDOC_Bullet */
74241675Suqs	ARGV_NONE,	/* MDOC_Dash */
75241675Suqs	ARGV_NONE,	/* MDOC_Hyphen */
76241675Suqs	ARGV_NONE,	/* MDOC_Item */
77241675Suqs	ARGV_NONE,	/* MDOC_Enum */
78241675Suqs	ARGV_NONE,	/* MDOC_Tag */
79241675Suqs	ARGV_NONE,	/* MDOC_Diag */
80241675Suqs	ARGV_NONE,	/* MDOC_Hang */
81241675Suqs	ARGV_NONE,	/* MDOC_Ohang */
82241675Suqs	ARGV_NONE,	/* MDOC_Inset */
83241675Suqs	ARGV_MULTI,	/* MDOC_Column */
84241675Suqs	ARGV_OPT_SINGLE, /* MDOC_Width */
85241675Suqs	ARGV_NONE,	/* MDOC_Compact */
86241675Suqs	ARGV_NONE,	/* MDOC_Std */
87241675Suqs	ARGV_NONE,	/* MDOC_Filled */
88241675Suqs	ARGV_NONE,	/* MDOC_Words */
89241675Suqs	ARGV_NONE,	/* MDOC_Emphasis */
90241675Suqs	ARGV_NONE,	/* MDOC_Symbolic */
91241675Suqs	ARGV_NONE	/* MDOC_Symbolic */
92241675Suqs};
93241675Suqs
94241675Suqsstatic	const enum mdocargt args_Ex[] = {
95241675Suqs	MDOC_Std,
96241675Suqs	MDOC_ARG_MAX
97241675Suqs};
98241675Suqs
99241675Suqsstatic	const enum mdocargt args_An[] = {
100241675Suqs	MDOC_Split,
101241675Suqs	MDOC_Nosplit,
102241675Suqs	MDOC_ARG_MAX
103241675Suqs};
104241675Suqs
105241675Suqsstatic	const enum mdocargt args_Bd[] = {
106241675Suqs	MDOC_Ragged,
107241675Suqs	MDOC_Unfilled,
108241675Suqs	MDOC_Filled,
109241675Suqs	MDOC_Literal,
110241675Suqs	MDOC_File,
111241675Suqs	MDOC_Offset,
112241675Suqs	MDOC_Compact,
113241675Suqs	MDOC_Centred,
114241675Suqs	MDOC_ARG_MAX
115241675Suqs};
116241675Suqs
117241675Suqsstatic	const enum mdocargt args_Bf[] = {
118241675Suqs	MDOC_Emphasis,
119241675Suqs	MDOC_Literal,
120241675Suqs	MDOC_Symbolic,
121241675Suqs	MDOC_ARG_MAX
122241675Suqs};
123241675Suqs
124241675Suqsstatic	const enum mdocargt args_Bk[] = {
125241675Suqs	MDOC_Words,
126241675Suqs	MDOC_ARG_MAX
127241675Suqs};
128241675Suqs
129241675Suqsstatic	const enum mdocargt args_Bl[] = {
130241675Suqs	MDOC_Bullet,
131241675Suqs	MDOC_Dash,
132241675Suqs	MDOC_Hyphen,
133241675Suqs	MDOC_Item,
134241675Suqs	MDOC_Enum,
135241675Suqs	MDOC_Tag,
136241675Suqs	MDOC_Diag,
137241675Suqs	MDOC_Hang,
138241675Suqs	MDOC_Ohang,
139241675Suqs	MDOC_Inset,
140241675Suqs	MDOC_Column,
141241675Suqs	MDOC_Width,
142241675Suqs	MDOC_Offset,
143241675Suqs	MDOC_Compact,
144241675Suqs	MDOC_Nested,
145241675Suqs	MDOC_ARG_MAX
146241675Suqs};
147241675Suqs
148241675Suqsstatic	const struct mdocarg mdocargs[MDOC_MAX] = {
149241675Suqs	{ ARGSFL_NONE, NULL }, /* Ap */
150241675Suqs	{ ARGSFL_NONE, NULL }, /* Dd */
151241675Suqs	{ ARGSFL_NONE, NULL }, /* Dt */
152241675Suqs	{ ARGSFL_NONE, NULL }, /* Os */
153241675Suqs	{ ARGSFL_NONE, NULL }, /* Sh */
154241675Suqs	{ ARGSFL_NONE, NULL }, /* Ss */
155241675Suqs	{ ARGSFL_NONE, NULL }, /* Pp */
156241675Suqs	{ ARGSFL_DELIM, NULL }, /* D1 */
157241675Suqs	{ ARGSFL_DELIM, NULL }, /* Dl */
158241675Suqs	{ ARGSFL_NONE, args_Bd }, /* Bd */
159241675Suqs	{ ARGSFL_NONE, NULL }, /* Ed */
160241675Suqs	{ ARGSFL_NONE, args_Bl }, /* Bl */
161241675Suqs	{ ARGSFL_NONE, NULL }, /* El */
162241675Suqs	{ ARGSFL_NONE, NULL }, /* It */
163241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ad */
164241675Suqs	{ ARGSFL_DELIM, args_An }, /* An */
165241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ar */
166241675Suqs	{ ARGSFL_NONE, NULL }, /* Cd */
167241675Suqs	{ ARGSFL_DELIM, NULL }, /* Cm */
168241675Suqs	{ ARGSFL_DELIM, NULL }, /* Dv */
169241675Suqs	{ ARGSFL_DELIM, NULL }, /* Er */
170241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ev */
171241675Suqs	{ ARGSFL_NONE, args_Ex }, /* Ex */
172241675Suqs	{ ARGSFL_DELIM, NULL }, /* Fa */
173241675Suqs	{ ARGSFL_NONE, NULL }, /* Fd */
174241675Suqs	{ ARGSFL_DELIM, NULL }, /* Fl */
175241675Suqs	{ ARGSFL_DELIM, NULL }, /* Fn */
176241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ft */
177241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ic */
178241675Suqs	{ ARGSFL_NONE, NULL }, /* In */
179241675Suqs	{ ARGSFL_DELIM, NULL }, /* Li */
180241675Suqs	{ ARGSFL_NONE, NULL }, /* Nd */
181241675Suqs	{ ARGSFL_DELIM, NULL }, /* Nm */
182241675Suqs	{ ARGSFL_DELIM, NULL }, /* Op */
183241675Suqs	{ ARGSFL_NONE, NULL }, /* Ot */
184241675Suqs	{ ARGSFL_DELIM, NULL }, /* Pa */
185241675Suqs	{ ARGSFL_NONE, args_Ex }, /* Rv */
186241675Suqs	{ ARGSFL_DELIM, NULL }, /* St */
187241675Suqs	{ ARGSFL_DELIM, NULL }, /* Va */
188241675Suqs	{ ARGSFL_DELIM, NULL }, /* Vt */
189241675Suqs	{ ARGSFL_DELIM, NULL }, /* Xr */
190241675Suqs	{ ARGSFL_NONE, NULL }, /* %A */
191241675Suqs	{ ARGSFL_NONE, NULL }, /* %B */
192241675Suqs	{ ARGSFL_NONE, NULL }, /* %D */
193241675Suqs	{ ARGSFL_NONE, NULL }, /* %I */
194241675Suqs	{ ARGSFL_NONE, NULL }, /* %J */
195241675Suqs	{ ARGSFL_NONE, NULL }, /* %N */
196241675Suqs	{ ARGSFL_NONE, NULL }, /* %O */
197241675Suqs	{ ARGSFL_NONE, NULL }, /* %P */
198241675Suqs	{ ARGSFL_NONE, NULL }, /* %R */
199241675Suqs	{ ARGSFL_NONE, NULL }, /* %T */
200241675Suqs	{ ARGSFL_NONE, NULL }, /* %V */
201241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ac */
202241675Suqs	{ ARGSFL_NONE, NULL }, /* Ao */
203241675Suqs	{ ARGSFL_DELIM, NULL }, /* Aq */
204241675Suqs	{ ARGSFL_DELIM, NULL }, /* At */
205241675Suqs	{ ARGSFL_DELIM, NULL }, /* Bc */
206241675Suqs	{ ARGSFL_NONE, args_Bf }, /* Bf */
207241675Suqs	{ ARGSFL_NONE, NULL }, /* Bo */
208241675Suqs	{ ARGSFL_DELIM, NULL }, /* Bq */
209241675Suqs	{ ARGSFL_DELIM, NULL }, /* Bsx */
210241675Suqs	{ ARGSFL_DELIM, NULL }, /* Bx */
211241675Suqs	{ ARGSFL_NONE, NULL }, /* Db */
212241675Suqs	{ ARGSFL_DELIM, NULL }, /* Dc */
213241675Suqs	{ ARGSFL_NONE, NULL }, /* Do */
214241675Suqs	{ ARGSFL_DELIM, NULL }, /* Dq */
215241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ec */
216241675Suqs	{ ARGSFL_NONE, NULL }, /* Ef */
217241675Suqs	{ ARGSFL_DELIM, NULL }, /* Em */
218241675Suqs	{ ARGSFL_NONE, NULL }, /* Eo */
219241675Suqs	{ ARGSFL_DELIM, NULL }, /* Fx */
220241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ms */
221241675Suqs	{ ARGSFL_DELIM, NULL }, /* No */
222241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ns */
223241675Suqs	{ ARGSFL_DELIM, NULL }, /* Nx */
224241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ox */
225241675Suqs	{ ARGSFL_DELIM, NULL }, /* Pc */
226241675Suqs	{ ARGSFL_DELIM, NULL }, /* Pf */
227241675Suqs	{ ARGSFL_NONE, NULL }, /* Po */
228241675Suqs	{ ARGSFL_DELIM, NULL }, /* Pq */
229241675Suqs	{ ARGSFL_DELIM, NULL }, /* Qc */
230241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ql */
231241675Suqs	{ ARGSFL_NONE, NULL }, /* Qo */
232241675Suqs	{ ARGSFL_DELIM, NULL }, /* Qq */
233241675Suqs	{ ARGSFL_NONE, NULL }, /* Re */
234241675Suqs	{ ARGSFL_NONE, NULL }, /* Rs */
235241675Suqs	{ ARGSFL_DELIM, NULL }, /* Sc */
236241675Suqs	{ ARGSFL_NONE, NULL }, /* So */
237241675Suqs	{ ARGSFL_DELIM, NULL }, /* Sq */
238241675Suqs	{ ARGSFL_NONE, NULL }, /* Sm */
239241675Suqs	{ ARGSFL_DELIM, NULL }, /* Sx */
240241675Suqs	{ ARGSFL_DELIM, NULL }, /* Sy */
241241675Suqs	{ ARGSFL_DELIM, NULL }, /* Tn */
242241675Suqs	{ ARGSFL_DELIM, NULL }, /* Ux */
243241675Suqs	{ ARGSFL_DELIM, NULL }, /* Xc */
244241675Suqs	{ ARGSFL_NONE, NULL }, /* Xo */
245241675Suqs	{ ARGSFL_NONE, NULL }, /* Fo */
246241675Suqs	{ ARGSFL_NONE, NULL }, /* Fc */
247241675Suqs	{ ARGSFL_NONE, NULL }, /* Oo */
248241675Suqs	{ ARGSFL_DELIM, NULL }, /* Oc */
249241675Suqs	{ ARGSFL_NONE, args_Bk }, /* Bk */
250241675Suqs	{ ARGSFL_NONE, NULL }, /* Ek */
251241675Suqs	{ ARGSFL_NONE, NULL }, /* Bt */
252241675Suqs	{ ARGSFL_NONE, NULL }, /* Hf */
253241675Suqs	{ ARGSFL_NONE, NULL }, /* Fr */
254241675Suqs	{ ARGSFL_NONE, NULL }, /* Ud */
255241675Suqs	{ ARGSFL_NONE, NULL }, /* Lb */
256241675Suqs	{ ARGSFL_NONE, NULL }, /* Lp */
257241675Suqs	{ ARGSFL_DELIM, NULL }, /* Lk */
258241675Suqs	{ ARGSFL_DELIM, NULL }, /* Mt */
259241675Suqs	{ ARGSFL_DELIM, NULL }, /* Brq */
260241675Suqs	{ ARGSFL_NONE, NULL }, /* Bro */
261241675Suqs	{ ARGSFL_DELIM, NULL }, /* Brc */
262241675Suqs	{ ARGSFL_NONE, NULL }, /* %C */
263241675Suqs	{ ARGSFL_NONE, NULL }, /* Es */
264241675Suqs	{ ARGSFL_NONE, NULL }, /* En */
265241675Suqs	{ ARGSFL_NONE, NULL }, /* Dx */
266241675Suqs	{ ARGSFL_NONE, NULL }, /* %Q */
267241675Suqs	{ ARGSFL_NONE, NULL }, /* br */
268241675Suqs	{ ARGSFL_NONE, NULL }, /* sp */
269241675Suqs	{ ARGSFL_NONE, NULL }, /* %U */
270241675Suqs	{ ARGSFL_NONE, NULL }, /* Ta */
271241675Suqs};
272241675Suqs
273241675Suqs
274241675Suqs/*
275241675Suqs * Parse an argument from line text.  This comes in the form of -key
276241675Suqs * [value0...], which may either have a single mandatory value, at least
277241675Suqs * one mandatory value, an optional single value, or no value.
278241675Suqs */
279241675Suqsenum margverr
280241675Suqsmdoc_argv(struct mdoc *m, int line, enum mdoct tok,
281241675Suqs		struct mdoc_arg **v, int *pos, char *buf)
282241675Suqs{
283241675Suqs	char		 *p, sv;
284241675Suqs	struct mdoc_argv tmp;
285241675Suqs	struct mdoc_arg	 *arg;
286241675Suqs	const enum mdocargt *ap;
287241675Suqs
288241675Suqs	if ('\0' == buf[*pos])
289241675Suqs		return(ARGV_EOLN);
290241675Suqs	else if (NULL == (ap = mdocargs[tok].argvs))
291241675Suqs		return(ARGV_WORD);
292241675Suqs	else if ('-' != buf[*pos])
293241675Suqs		return(ARGV_WORD);
294241675Suqs
295241675Suqs	/* Seek to the first unescaped space. */
296241675Suqs
297241675Suqs	p = &buf[++(*pos)];
298241675Suqs
299241675Suqs	assert(*pos > 0);
300241675Suqs
301241675Suqs	for ( ; buf[*pos] ; (*pos)++)
302241675Suqs		if (' ' == buf[*pos] && '\\' != buf[*pos - 1])
303241675Suqs			break;
304241675Suqs
305241675Suqs	/*
306241675Suqs	 * We want to nil-terminate the word to look it up (it's easier
307241675Suqs	 * that way).  But we may not have a flag, in which case we need
308241675Suqs	 * to restore the line as-is.  So keep around the stray byte,
309241675Suqs	 * which we'll reset upon exiting (if necessary).
310241675Suqs	 */
311241675Suqs
312241675Suqs	if ('\0' != (sv = buf[*pos]))
313241675Suqs		buf[(*pos)++] = '\0';
314241675Suqs
315241675Suqs	/*
316241675Suqs	 * Now look up the word as a flag.  Use temporary storage that
317241675Suqs	 * we'll copy into the node's flags, if necessary.
318241675Suqs	 */
319241675Suqs
320241675Suqs	memset(&tmp, 0, sizeof(struct mdoc_argv));
321241675Suqs
322241675Suqs	tmp.line = line;
323241675Suqs	tmp.pos = *pos;
324241675Suqs	tmp.arg = MDOC_ARG_MAX;
325241675Suqs
326241675Suqs	while (MDOC_ARG_MAX != (tmp.arg = *ap++))
327241675Suqs		if (0 == strcmp(p, mdoc_argnames[tmp.arg]))
328241675Suqs			break;
329241675Suqs
330241675Suqs	if (MDOC_ARG_MAX == tmp.arg) {
331241675Suqs		/*
332241675Suqs		 * The flag was not found.
333241675Suqs		 * Restore saved zeroed byte and return as a word.
334241675Suqs		 */
335241675Suqs		if (sv)
336241675Suqs			buf[*pos - 1] = sv;
337241675Suqs		return(ARGV_WORD);
338241675Suqs	}
339241675Suqs
340241675Suqs	/* Read to the next word (the argument). */
341241675Suqs
342241675Suqs	while (buf[*pos] && ' ' == buf[*pos])
343241675Suqs		(*pos)++;
344241675Suqs
345241675Suqs	switch (argvflags[tmp.arg]) {
346241675Suqs	case (ARGV_SINGLE):
347241675Suqs		if ( ! argv_single(m, line, &tmp, pos, buf))
348241675Suqs			return(ARGV_ERROR);
349241675Suqs		break;
350241675Suqs	case (ARGV_MULTI):
351241675Suqs		if ( ! argv_multi(m, line, &tmp, pos, buf))
352241675Suqs			return(ARGV_ERROR);
353241675Suqs		break;
354241675Suqs	case (ARGV_OPT_SINGLE):
355241675Suqs		if ( ! argv_opt_single(m, line, &tmp, pos, buf))
356241675Suqs			return(ARGV_ERROR);
357241675Suqs		break;
358241675Suqs	case (ARGV_NONE):
359241675Suqs		break;
360241675Suqs	}
361241675Suqs
362241675Suqs	if (NULL == (arg = *v))
363241675Suqs		arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg));
364241675Suqs
365241675Suqs	arg->argc++;
366241675Suqs	arg->argv = mandoc_realloc
367241675Suqs		(arg->argv, arg->argc * sizeof(struct mdoc_argv));
368241675Suqs
369241675Suqs	memcpy(&arg->argv[(int)arg->argc - 1],
370241675Suqs			&tmp, sizeof(struct mdoc_argv));
371241675Suqs
372241675Suqs	return(ARGV_ARG);
373241675Suqs}
374241675Suqs
375241675Suqsvoid
376241675Suqsmdoc_argv_free(struct mdoc_arg *p)
377241675Suqs{
378241675Suqs	int		 i;
379241675Suqs
380241675Suqs	if (NULL == p)
381241675Suqs		return;
382241675Suqs
383241675Suqs	if (p->refcnt) {
384241675Suqs		--(p->refcnt);
385241675Suqs		if (p->refcnt)
386241675Suqs			return;
387241675Suqs	}
388241675Suqs	assert(p->argc);
389241675Suqs
390241675Suqs	for (i = (int)p->argc - 1; i >= 0; i--)
391241675Suqs		argn_free(p, i);
392241675Suqs
393241675Suqs	free(p->argv);
394241675Suqs	free(p);
395241675Suqs}
396241675Suqs
397241675Suqsstatic void
398241675Suqsargn_free(struct mdoc_arg *p, int iarg)
399241675Suqs{
400241675Suqs	struct mdoc_argv *arg;
401241675Suqs	int		  j;
402241675Suqs
403241675Suqs	arg = &p->argv[iarg];
404241675Suqs
405241675Suqs	if (arg->sz && arg->value) {
406241675Suqs		for (j = (int)arg->sz - 1; j >= 0; j--)
407241675Suqs			free(arg->value[j]);
408241675Suqs		free(arg->value);
409241675Suqs	}
410241675Suqs
411241675Suqs	for (--p->argc; iarg < (int)p->argc; iarg++)
412241675Suqs		p->argv[iarg] = p->argv[iarg+1];
413241675Suqs}
414241675Suqs
415241675Suqsenum margserr
416241675Suqsmdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v)
417241675Suqs{
418241675Suqs
419241675Suqs	return(args(m, line, pos, buf, ARGSFL_NONE, v));
420241675Suqs}
421241675Suqs
422241675Suqsenum margserr
423241675Suqsmdoc_args(struct mdoc *m, int line, int *pos,
424241675Suqs		char *buf, enum mdoct tok, char **v)
425241675Suqs{
426241675Suqs	enum argsflag	  fl;
427241675Suqs	struct mdoc_node *n;
428241675Suqs
429241675Suqs	fl = mdocargs[tok].flags;
430241675Suqs
431241675Suqs	if (MDOC_It != tok)
432241675Suqs		return(args(m, line, pos, buf, fl, v));
433241675Suqs
434241675Suqs	/*
435241675Suqs	 * We know that we're in an `It', so it's reasonable to expect
436241675Suqs	 * us to be sitting in a `Bl'.  Someday this may not be the case
437241675Suqs	 * (if we allow random `It's sitting out there), so provide a
438241675Suqs	 * safe fall-back into the default behaviour.
439241675Suqs	 */
440241675Suqs
441241675Suqs	for (n = m->last; n; n = n->parent)
442241675Suqs		if (MDOC_Bl == n->tok)
443241675Suqs			if (LIST_column == n->norm->Bl.type) {
444241675Suqs				fl = ARGSFL_TABSEP;
445241675Suqs				break;
446241675Suqs			}
447241675Suqs
448241675Suqs	return(args(m, line, pos, buf, fl, v));
449241675Suqs}
450241675Suqs
451241675Suqsstatic enum margserr
452241675Suqsargs(struct mdoc *m, int line, int *pos,
453241675Suqs		char *buf, enum argsflag fl, char **v)
454241675Suqs{
455241675Suqs	char		*p, *pp;
456241675Suqs	enum margserr	 rc;
457241675Suqs
458241675Suqs	if ('\0' == buf[*pos]) {
459241675Suqs		if (MDOC_PPHRASE & m->flags)
460241675Suqs			return(ARGS_EOLN);
461241675Suqs		/*
462241675Suqs		 * If we're not in a partial phrase and the flag for
463241675Suqs		 * being a phrase literal is still set, the punctuation
464241675Suqs		 * is unterminated.
465241675Suqs		 */
466241675Suqs		if (MDOC_PHRASELIT & m->flags)
467241675Suqs			mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
468241675Suqs
469241675Suqs		m->flags &= ~MDOC_PHRASELIT;
470241675Suqs		return(ARGS_EOLN);
471241675Suqs	}
472241675Suqs
473241675Suqs	*v = &buf[*pos];
474241675Suqs
475241675Suqs	if (ARGSFL_DELIM == fl)
476241675Suqs		if (args_checkpunct(buf, *pos))
477241675Suqs			return(ARGS_PUNCT);
478241675Suqs
479241675Suqs	/*
480241675Suqs	 * First handle TABSEP items, restricted to `Bl -column'.  This
481241675Suqs	 * ignores conventional token parsing and instead uses tabs or
482241675Suqs	 * `Ta' macros to separate phrases.  Phrases are parsed again
483241675Suqs	 * for arguments at a later phase.
484241675Suqs	 */
485241675Suqs
486241675Suqs	if (ARGSFL_TABSEP == fl) {
487241675Suqs		/* Scan ahead to tab (can't be escaped). */
488241675Suqs		p = strchr(*v, '\t');
489241675Suqs		pp = NULL;
490241675Suqs
491241675Suqs		/* Scan ahead to unescaped `Ta'. */
492241675Suqs		if ( ! (MDOC_PHRASELIT & m->flags))
493241675Suqs			for (pp = *v; ; pp++) {
494241675Suqs				if (NULL == (pp = strstr(pp, "Ta")))
495241675Suqs					break;
496241675Suqs				if (pp > *v && ' ' != *(pp - 1))
497241675Suqs					continue;
498241675Suqs				if (' ' == *(pp + 2) || '\0' == *(pp + 2))
499241675Suqs					break;
500241675Suqs			}
501241675Suqs
502241675Suqs		/* By default, assume a phrase. */
503241675Suqs		rc = ARGS_PHRASE;
504241675Suqs
505241675Suqs		/*
506241675Suqs		 * Adjust new-buffer position to be beyond delimiter
507241675Suqs		 * mark (e.g., Ta -> end + 2).
508241675Suqs		 */
509241675Suqs		if (p && pp) {
510241675Suqs			*pos += pp < p ? 2 : 1;
511241675Suqs			rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE;
512241675Suqs			p = pp < p ? pp : p;
513241675Suqs		} else if (p && ! pp) {
514241675Suqs			rc = ARGS_PPHRASE;
515241675Suqs			*pos += 1;
516241675Suqs		} else if (pp && ! p) {
517241675Suqs			p = pp;
518241675Suqs			*pos += 2;
519241675Suqs		} else {
520241675Suqs			rc = ARGS_PEND;
521241675Suqs			p = strchr(*v, 0);
522241675Suqs		}
523241675Suqs
524241675Suqs		/* Whitespace check for eoln case... */
525241675Suqs		if ('\0' == *p && ' ' == *(p - 1))
526241675Suqs			mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
527241675Suqs
528241675Suqs		*pos += (int)(p - *v);
529241675Suqs
530241675Suqs		/* Strip delimiter's preceding whitespace. */
531241675Suqs		pp = p - 1;
532241675Suqs		while (pp > *v && ' ' == *pp) {
533241675Suqs			if (pp > *v && '\\' == *(pp - 1))
534241675Suqs				break;
535241675Suqs			pp--;
536241675Suqs		}
537241675Suqs		*(pp + 1) = 0;
538241675Suqs
539241675Suqs		/* Strip delimiter's proceeding whitespace. */
540241675Suqs		for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++)
541241675Suqs			/* Skip ahead. */ ;
542241675Suqs
543241675Suqs		return(rc);
544241675Suqs	}
545241675Suqs
546241675Suqs	/*
547241675Suqs	 * Process a quoted literal.  A quote begins with a double-quote
548241675Suqs	 * and ends with a double-quote NOT preceded by a double-quote.
549241675Suqs	 * Whitespace is NOT involved in literal termination.
550241675Suqs	 */
551241675Suqs
552241675Suqs	if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) {
553241675Suqs		if ( ! (MDOC_PHRASELIT & m->flags))
554241675Suqs			*v = &buf[++(*pos)];
555241675Suqs
556241675Suqs		if (MDOC_PPHRASE & m->flags)
557241675Suqs			m->flags |= MDOC_PHRASELIT;
558241675Suqs
559241675Suqs		for ( ; buf[*pos]; (*pos)++) {
560241675Suqs			if ('\"' != buf[*pos])
561241675Suqs				continue;
562241675Suqs			if ('\"' != buf[*pos + 1])
563241675Suqs				break;
564241675Suqs			(*pos)++;
565241675Suqs		}
566241675Suqs
567241675Suqs		if ('\0' == buf[*pos]) {
568241675Suqs			if (MDOC_PPHRASE & m->flags)
569241675Suqs				return(ARGS_QWORD);
570241675Suqs			mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE);
571241675Suqs			return(ARGS_QWORD);
572241675Suqs		}
573241675Suqs
574241675Suqs		m->flags &= ~MDOC_PHRASELIT;
575241675Suqs		buf[(*pos)++] = '\0';
576241675Suqs
577241675Suqs		if ('\0' == buf[*pos])
578241675Suqs			return(ARGS_QWORD);
579241675Suqs
580241675Suqs		while (' ' == buf[*pos])
581241675Suqs			(*pos)++;
582241675Suqs
583241675Suqs		if ('\0' == buf[*pos])
584241675Suqs			mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE);
585241675Suqs
586241675Suqs		return(ARGS_QWORD);
587241675Suqs	}
588241675Suqs
589241675Suqs	p = &buf[*pos];
590241675Suqs	*v = mandoc_getarg(m->parse, &p, line, pos);
591241675Suqs
592241675Suqs	return(ARGS_WORD);
593241675Suqs}
594241675Suqs
595241675Suqs/*
596241675Suqs * Check if the string consists only of space-separated closing
597241675Suqs * delimiters.  This is a bit of a dance: the first must be a close
598241675Suqs * delimiter, but it may be followed by middle delimiters.  Arbitrary
599241675Suqs * whitespace may separate these tokens.
600241675Suqs */
601241675Suqsstatic int
602241675Suqsargs_checkpunct(const char *buf, int i)
603241675Suqs{
604241675Suqs	int		 j;
605241675Suqs	char		 dbuf[DELIMSZ];
606241675Suqs	enum mdelim	 d;
607241675Suqs
608241675Suqs	/* First token must be a close-delimiter. */
609241675Suqs
610241675Suqs	for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++)
611241675Suqs		dbuf[j] = buf[i];
612241675Suqs
613241675Suqs	if (DELIMSZ == j)
614241675Suqs		return(0);
615241675Suqs
616241675Suqs	dbuf[j] = '\0';
617241675Suqs	if (DELIM_CLOSE != mdoc_isdelim(dbuf))
618241675Suqs		return(0);
619241675Suqs
620241675Suqs	while (' ' == buf[i])
621241675Suqs		i++;
622241675Suqs
623241675Suqs	/* Remaining must NOT be open/none. */
624241675Suqs
625241675Suqs	while (buf[i]) {
626241675Suqs		j = 0;
627241675Suqs		while (buf[i] && ' ' != buf[i] && j < DELIMSZ)
628241675Suqs			dbuf[j++] = buf[i++];
629241675Suqs
630241675Suqs		if (DELIMSZ == j)
631241675Suqs			return(0);
632241675Suqs
633241675Suqs		dbuf[j] = '\0';
634241675Suqs		d = mdoc_isdelim(dbuf);
635241675Suqs		if (DELIM_NONE == d || DELIM_OPEN == d)
636241675Suqs			return(0);
637241675Suqs
638241675Suqs		while (' ' == buf[i])
639241675Suqs			i++;
640241675Suqs	}
641241675Suqs
642241675Suqs	return('\0' == buf[i]);
643241675Suqs}
644241675Suqs
645241675Suqsstatic int
646241675Suqsargv_multi(struct mdoc *m, int line,
647241675Suqs		struct mdoc_argv *v, int *pos, char *buf)
648241675Suqs{
649241675Suqs	enum margserr	 ac;
650241675Suqs	char		*p;
651241675Suqs
652241675Suqs	for (v->sz = 0; ; v->sz++) {
653241675Suqs		if ('-' == buf[*pos])
654241675Suqs			break;
655241675Suqs		ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
656241675Suqs		if (ARGS_ERROR == ac)
657241675Suqs			return(0);
658241675Suqs		else if (ARGS_EOLN == ac)
659241675Suqs			break;
660241675Suqs
661241675Suqs		if (0 == v->sz % MULTI_STEP)
662241675Suqs			v->value = mandoc_realloc(v->value,
663241675Suqs				(v->sz + MULTI_STEP) * sizeof(char *));
664241675Suqs
665241675Suqs		v->value[(int)v->sz] = mandoc_strdup(p);
666241675Suqs	}
667241675Suqs
668241675Suqs	return(1);
669241675Suqs}
670241675Suqs
671241675Suqsstatic int
672241675Suqsargv_opt_single(struct mdoc *m, int line,
673241675Suqs		struct mdoc_argv *v, int *pos, char *buf)
674241675Suqs{
675241675Suqs	enum margserr	 ac;
676241675Suqs	char		*p;
677241675Suqs
678241675Suqs	if ('-' == buf[*pos])
679241675Suqs		return(1);
680241675Suqs
681241675Suqs	ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
682241675Suqs	if (ARGS_ERROR == ac)
683241675Suqs		return(0);
684241675Suqs	if (ARGS_EOLN == ac)
685241675Suqs		return(1);
686241675Suqs
687241675Suqs	v->sz = 1;
688241675Suqs	v->value = mandoc_malloc(sizeof(char *));
689241675Suqs	v->value[0] = mandoc_strdup(p);
690241675Suqs
691241675Suqs	return(1);
692241675Suqs}
693241675Suqs
694241675Suqsstatic int
695241675Suqsargv_single(struct mdoc *m, int line,
696241675Suqs		struct mdoc_argv *v, int *pos, char *buf)
697241675Suqs{
698241675Suqs	int		 ppos;
699241675Suqs	enum margserr	 ac;
700241675Suqs	char		*p;
701241675Suqs
702241675Suqs	ppos = *pos;
703241675Suqs
704241675Suqs	ac = args(m, line, pos, buf, ARGSFL_NONE, &p);
705241675Suqs	if (ARGS_EOLN == ac) {
706241675Suqs		mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT);
707241675Suqs		return(0);
708241675Suqs	} else if (ARGS_ERROR == ac)
709241675Suqs		return(0);
710241675Suqs
711241675Suqs	v->sz = 1;
712241675Suqs	v->value = mandoc_malloc(sizeof(char *));
713241675Suqs	v->value[0] = mandoc_strdup(p);
714241675Suqs
715241675Suqs	return(1);
716241675Suqs}
717