1241675Suqs/*	$Id: tbl_layout.c,v 1.22 2011/09/18 14:14:15 schwarze Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs *
5241675Suqs * Permission to use, copy, modify, and distribute this software for any
6241675Suqs * purpose with or without fee is hereby granted, provided that the above
7241675Suqs * copyright notice and this permission notice appear in all copies.
8241675Suqs *
9241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16241675Suqs */
17241675Suqs#ifdef HAVE_CONFIG_H
18241675Suqs#include "config.h"
19241675Suqs#endif
20241675Suqs
21241675Suqs#include <assert.h>
22241675Suqs#include <ctype.h>
23241675Suqs#include <stdlib.h>
24241675Suqs#include <string.h>
25241675Suqs#include <time.h>
26241675Suqs
27241675Suqs#include "mandoc.h"
28241675Suqs#include "libmandoc.h"
29241675Suqs#include "libroff.h"
30241675Suqs
31241675Suqsstruct	tbl_phrase {
32241675Suqs	char		 name;
33241675Suqs	enum tbl_cellt	 key;
34241675Suqs};
35241675Suqs
36241675Suqs/*
37241675Suqs * FIXME: we can make this parse a lot nicer by, when an error is
38241675Suqs * encountered in a layout key, bailing to the next key (i.e. to the
39241675Suqs * next whitespace then continuing).
40241675Suqs */
41241675Suqs
42241675Suqs#define	KEYS_MAX	 11
43241675Suqs
44241675Suqsstatic	const struct tbl_phrase keys[KEYS_MAX] = {
45241675Suqs	{ 'c',		 TBL_CELL_CENTRE },
46241675Suqs	{ 'r',		 TBL_CELL_RIGHT },
47241675Suqs	{ 'l',		 TBL_CELL_LEFT },
48241675Suqs	{ 'n',		 TBL_CELL_NUMBER },
49241675Suqs	{ 's',		 TBL_CELL_SPAN },
50241675Suqs	{ 'a',		 TBL_CELL_LONG },
51241675Suqs	{ '^',		 TBL_CELL_DOWN },
52241675Suqs	{ '-',		 TBL_CELL_HORIZ },
53241675Suqs	{ '_',		 TBL_CELL_HORIZ },
54241675Suqs	{ '=',		 TBL_CELL_DHORIZ },
55241675Suqs	{ '|',		 TBL_CELL_VERT }
56241675Suqs};
57241675Suqs
58241675Suqsstatic	int		 mods(struct tbl_node *, struct tbl_cell *,
59241675Suqs				int, const char *, int *);
60241675Suqsstatic	int		 cell(struct tbl_node *, struct tbl_row *,
61241675Suqs				int, const char *, int *);
62241675Suqsstatic	void		 row(struct tbl_node *, int, const char *, int *);
63241675Suqsstatic	struct tbl_cell *cell_alloc(struct tbl_node *,
64241675Suqs				struct tbl_row *, enum tbl_cellt);
65241675Suqsstatic	void		 head_adjust(const struct tbl_cell *,
66241675Suqs				struct tbl_head *);
67241675Suqs
68241675Suqsstatic int
69241675Suqsmods(struct tbl_node *tbl, struct tbl_cell *cp,
70241675Suqs		int ln, const char *p, int *pos)
71241675Suqs{
72241675Suqs	char		 buf[5];
73241675Suqs	int		 i;
74241675Suqs
75241675Suqs	/* Not all types accept modifiers. */
76241675Suqs
77241675Suqs	switch (cp->pos) {
78241675Suqs	case (TBL_CELL_DOWN):
79241675Suqs		/* FALLTHROUGH */
80241675Suqs	case (TBL_CELL_HORIZ):
81241675Suqs		/* FALLTHROUGH */
82241675Suqs	case (TBL_CELL_DHORIZ):
83241675Suqs		/* FALLTHROUGH */
84241675Suqs	case (TBL_CELL_VERT):
85241675Suqs		/* FALLTHROUGH */
86241675Suqs	case (TBL_CELL_DVERT):
87241675Suqs		return(1);
88241675Suqs	default:
89241675Suqs		break;
90241675Suqs	}
91241675Suqs
92241675Suqsmod:
93241675Suqs	/*
94241675Suqs	 * XXX: since, at least for now, modifiers are non-conflicting
95241675Suqs	 * (are separable by value, regardless of position), we let
96241675Suqs	 * modifiers come in any order.  The existing tbl doesn't let
97241675Suqs	 * this happen.
98241675Suqs	 */
99241675Suqs	switch (p[*pos]) {
100241675Suqs	case ('\0'):
101241675Suqs		/* FALLTHROUGH */
102241675Suqs	case (' '):
103241675Suqs		/* FALLTHROUGH */
104241675Suqs	case ('\t'):
105241675Suqs		/* FALLTHROUGH */
106241675Suqs	case (','):
107241675Suqs		/* FALLTHROUGH */
108241675Suqs	case ('.'):
109241675Suqs		return(1);
110241675Suqs	default:
111241675Suqs		break;
112241675Suqs	}
113241675Suqs
114241675Suqs	/* Throw away parenthesised expression. */
115241675Suqs
116241675Suqs	if ('(' == p[*pos]) {
117241675Suqs		(*pos)++;
118241675Suqs		while (p[*pos] && ')' != p[*pos])
119241675Suqs			(*pos)++;
120241675Suqs		if (')' == p[*pos]) {
121241675Suqs			(*pos)++;
122241675Suqs			goto mod;
123241675Suqs		}
124241675Suqs		mandoc_msg(MANDOCERR_TBLLAYOUT,
125241675Suqs				tbl->parse, ln, *pos, NULL);
126241675Suqs		return(0);
127241675Suqs	}
128241675Suqs
129241675Suqs	/* Parse numerical spacing from modifier string. */
130241675Suqs
131241675Suqs	if (isdigit((unsigned char)p[*pos])) {
132241675Suqs		for (i = 0; i < 4; i++) {
133241675Suqs			if ( ! isdigit((unsigned char)p[*pos + i]))
134241675Suqs				break;
135241675Suqs			buf[i] = p[*pos + i];
136241675Suqs		}
137241675Suqs		buf[i] = '\0';
138241675Suqs
139241675Suqs		/* No greater than 4 digits. */
140241675Suqs
141241675Suqs		if (4 == i) {
142241675Suqs			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
143241675Suqs					ln, *pos, NULL);
144241675Suqs			return(0);
145241675Suqs		}
146241675Suqs
147241675Suqs		*pos += i;
148241675Suqs		cp->spacing = (size_t)atoi(buf);
149241675Suqs
150241675Suqs		goto mod;
151241675Suqs		/* NOTREACHED */
152241675Suqs	}
153241675Suqs
154241675Suqs	/* TODO: GNU has many more extensions. */
155241675Suqs
156241675Suqs	switch (tolower((unsigned char)p[(*pos)++])) {
157241675Suqs	case ('z'):
158241675Suqs		cp->flags |= TBL_CELL_WIGN;
159241675Suqs		goto mod;
160241675Suqs	case ('u'):
161241675Suqs		cp->flags |= TBL_CELL_UP;
162241675Suqs		goto mod;
163241675Suqs	case ('e'):
164241675Suqs		cp->flags |= TBL_CELL_EQUAL;
165241675Suqs		goto mod;
166241675Suqs	case ('t'):
167241675Suqs		cp->flags |= TBL_CELL_TALIGN;
168241675Suqs		goto mod;
169241675Suqs	case ('d'):
170241675Suqs		cp->flags |= TBL_CELL_BALIGN;
171241675Suqs		goto mod;
172241675Suqs	case ('w'):  /* XXX for now, ignore minimal column width */
173241675Suqs		goto mod;
174241675Suqs	case ('f'):
175241675Suqs		break;
176241675Suqs	case ('r'):
177241675Suqs		/* FALLTHROUGH */
178241675Suqs	case ('b'):
179241675Suqs		/* FALLTHROUGH */
180241675Suqs	case ('i'):
181241675Suqs		(*pos)--;
182241675Suqs		break;
183241675Suqs	default:
184241675Suqs		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
185241675Suqs				ln, *pos - 1, NULL);
186241675Suqs		return(0);
187241675Suqs	}
188241675Suqs
189241675Suqs	switch (tolower((unsigned char)p[(*pos)++])) {
190241675Suqs	case ('3'):
191241675Suqs		/* FALLTHROUGH */
192241675Suqs	case ('b'):
193241675Suqs		cp->flags |= TBL_CELL_BOLD;
194241675Suqs		goto mod;
195241675Suqs	case ('2'):
196241675Suqs		/* FALLTHROUGH */
197241675Suqs	case ('i'):
198241675Suqs		cp->flags |= TBL_CELL_ITALIC;
199241675Suqs		goto mod;
200241675Suqs	case ('1'):
201241675Suqs		/* FALLTHROUGH */
202241675Suqs	case ('r'):
203241675Suqs		goto mod;
204241675Suqs	default:
205241675Suqs		break;
206241675Suqs	}
207241675Suqs
208241675Suqs	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
209241675Suqs			ln, *pos - 1, NULL);
210241675Suqs	return(0);
211241675Suqs}
212241675Suqs
213241675Suqsstatic int
214241675Suqscell(struct tbl_node *tbl, struct tbl_row *rp,
215241675Suqs		int ln, const char *p, int *pos)
216241675Suqs{
217241675Suqs	int		 i;
218241675Suqs	enum tbl_cellt	 c;
219241675Suqs
220241675Suqs	/* Parse the column position (`r', `R', `|', ...). */
221241675Suqs
222241675Suqs	for (i = 0; i < KEYS_MAX; i++)
223241675Suqs		if (tolower((unsigned char)p[*pos]) == keys[i].name)
224241675Suqs			break;
225241675Suqs
226241675Suqs	if (KEYS_MAX == i) {
227241675Suqs		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
228241675Suqs				ln, *pos, NULL);
229241675Suqs		return(0);
230241675Suqs	}
231241675Suqs
232241675Suqs	c = keys[i].key;
233241675Suqs
234241675Suqs	/*
235241675Suqs	 * If a span cell is found first, raise a warning and abort the
236241675Suqs	 * parse.  If a span cell is found and the last layout element
237241675Suqs	 * isn't a "normal" layout, bail.
238241675Suqs	 *
239241675Suqs	 * FIXME: recover from this somehow?
240241675Suqs	 */
241241675Suqs
242241675Suqs	if (TBL_CELL_SPAN == c) {
243241675Suqs		if (NULL == rp->first) {
244241675Suqs			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
245241675Suqs					ln, *pos, NULL);
246241675Suqs			return(0);
247241675Suqs		} else if (rp->last)
248241675Suqs			switch (rp->last->pos) {
249241675Suqs			case (TBL_CELL_VERT):
250241675Suqs			case (TBL_CELL_DVERT):
251241675Suqs			case (TBL_CELL_HORIZ):
252241675Suqs			case (TBL_CELL_DHORIZ):
253241675Suqs				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
254241675Suqs						ln, *pos, NULL);
255241675Suqs				return(0);
256241675Suqs			default:
257241675Suqs				break;
258241675Suqs			}
259241675Suqs	}
260241675Suqs
261241675Suqs	/*
262241675Suqs	 * If a vertical spanner is found, we may not be in the first
263241675Suqs	 * row.
264241675Suqs	 */
265241675Suqs
266241675Suqs	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
267241675Suqs		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
268241675Suqs		return(0);
269241675Suqs	}
270241675Suqs
271241675Suqs	(*pos)++;
272241675Suqs
273241675Suqs	/* Extra check for the double-vertical. */
274241675Suqs
275241675Suqs	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
276241675Suqs		(*pos)++;
277241675Suqs		c = TBL_CELL_DVERT;
278241675Suqs	}
279241675Suqs
280241675Suqs	/* Disallow adjacent spacers. */
281241675Suqs
282241675Suqs	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
283241675Suqs			(TBL_CELL_VERT == rp->last->pos ||
284241675Suqs			 TBL_CELL_DVERT == rp->last->pos)) {
285241675Suqs		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
286241675Suqs		return(0);
287241675Suqs	}
288241675Suqs
289241675Suqs	/* Allocate cell then parse its modifiers. */
290241675Suqs
291241675Suqs	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
292241675Suqs}
293241675Suqs
294241675Suqs
295241675Suqsstatic void
296241675Suqsrow(struct tbl_node *tbl, int ln, const char *p, int *pos)
297241675Suqs{
298241675Suqs	struct tbl_row	*rp;
299241675Suqs
300241675Suqsrow:	/*
301241675Suqs	 * EBNF describing this section:
302241675Suqs	 *
303241675Suqs	 * row		::= row_list [:space:]* [.]?[\n]
304241675Suqs	 * row_list	::= [:space:]* row_elem row_tail
305241675Suqs	 * row_tail	::= [:space:]*[,] row_list |
306241675Suqs	 *                  epsilon
307241675Suqs	 * row_elem	::= [\t\ ]*[:alpha:]+
308241675Suqs	 */
309241675Suqs
310241675Suqs	rp = mandoc_calloc(1, sizeof(struct tbl_row));
311241675Suqs	if (tbl->last_row) {
312241675Suqs		tbl->last_row->next = rp;
313241675Suqs		tbl->last_row = rp;
314241675Suqs	} else
315241675Suqs		tbl->last_row = tbl->first_row = rp;
316241675Suqs
317241675Suqscell:
318241675Suqs	while (isspace((unsigned char)p[*pos]))
319241675Suqs		(*pos)++;
320241675Suqs
321241675Suqs	/* Safely exit layout context. */
322241675Suqs
323241675Suqs	if ('.' == p[*pos]) {
324241675Suqs		tbl->part = TBL_PART_DATA;
325241675Suqs		if (NULL == tbl->first_row)
326241675Suqs			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
327241675Suqs					ln, *pos, NULL);
328241675Suqs		(*pos)++;
329241675Suqs		return;
330241675Suqs	}
331241675Suqs
332241675Suqs	/* End (and possibly restart) a row. */
333241675Suqs
334241675Suqs	if (',' == p[*pos]) {
335241675Suqs		(*pos)++;
336241675Suqs		goto row;
337241675Suqs	} else if ('\0' == p[*pos])
338241675Suqs		return;
339241675Suqs
340241675Suqs	if ( ! cell(tbl, rp, ln, p, pos))
341241675Suqs		return;
342241675Suqs
343241675Suqs	goto cell;
344241675Suqs	/* NOTREACHED */
345241675Suqs}
346241675Suqs
347241675Suqsint
348241675Suqstbl_layout(struct tbl_node *tbl, int ln, const char *p)
349241675Suqs{
350241675Suqs	int		 pos;
351241675Suqs
352241675Suqs	pos = 0;
353241675Suqs	row(tbl, ln, p, &pos);
354241675Suqs
355241675Suqs	/* Always succeed. */
356241675Suqs	return(1);
357241675Suqs}
358241675Suqs
359241675Suqsstatic struct tbl_cell *
360241675Suqscell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
361241675Suqs{
362241675Suqs	struct tbl_cell	*p, *pp;
363241675Suqs	struct tbl_head	*h, *hp;
364241675Suqs
365241675Suqs	p = mandoc_calloc(1, sizeof(struct tbl_cell));
366241675Suqs
367241675Suqs	if (NULL != (pp = rp->last)) {
368241675Suqs		rp->last->next = p;
369241675Suqs		rp->last = p;
370241675Suqs	} else
371241675Suqs		rp->last = rp->first = p;
372241675Suqs
373241675Suqs	p->pos = pos;
374241675Suqs
375241675Suqs	/*
376241675Suqs	 * This is a little bit complicated.  Here we determine the
377241675Suqs	 * header the corresponds to a cell.  We add headers dynamically
378241675Suqs	 * when need be or re-use them, otherwise.  As an example, given
379241675Suqs	 * the following:
380241675Suqs	 *
381241675Suqs	 * 	1  c || l
382241675Suqs	 * 	2  | c | l
383241675Suqs	 * 	3  l l
384241675Suqs	 * 	3  || c | l |.
385241675Suqs	 *
386241675Suqs	 * We first add the new headers (as there are none) in (1); then
387241675Suqs	 * in (2) we insert the first spanner (as it doesn't match up
388241675Suqs	 * with the header); then we re-use the prior data headers,
389241675Suqs	 * skipping over the spanners; then we re-use everything and add
390241675Suqs	 * a last spanner.  Note that VERT headers are made into DVERT
391241675Suqs	 * ones.
392241675Suqs	 */
393241675Suqs
394241675Suqs	h = pp ? pp->head->next : tbl->first_head;
395241675Suqs
396241675Suqs	if (h) {
397241675Suqs		/* Re-use data header. */
398241675Suqs		if (TBL_HEAD_DATA == h->pos &&
399241675Suqs				(TBL_CELL_VERT != p->pos &&
400241675Suqs				 TBL_CELL_DVERT != p->pos)) {
401241675Suqs			p->head = h;
402241675Suqs			return(p);
403241675Suqs		}
404241675Suqs
405241675Suqs		/* Re-use spanner header. */
406241675Suqs		if (TBL_HEAD_DATA != h->pos &&
407241675Suqs				(TBL_CELL_VERT == p->pos ||
408241675Suqs				 TBL_CELL_DVERT == p->pos)) {
409241675Suqs			head_adjust(p, h);
410241675Suqs			p->head = h;
411241675Suqs			return(p);
412241675Suqs		}
413241675Suqs
414241675Suqs		/* Right-shift headers with a new spanner. */
415241675Suqs		if (TBL_HEAD_DATA == h->pos &&
416241675Suqs				(TBL_CELL_VERT == p->pos ||
417241675Suqs				 TBL_CELL_DVERT == p->pos)) {
418241675Suqs			hp = mandoc_calloc(1, sizeof(struct tbl_head));
419241675Suqs			hp->ident = tbl->opts.cols++;
420241675Suqs			hp->prev = h->prev;
421241675Suqs			if (h->prev)
422241675Suqs				h->prev->next = hp;
423241675Suqs			if (h == tbl->first_head)
424241675Suqs				tbl->first_head = hp;
425241675Suqs			h->prev = hp;
426241675Suqs			hp->next = h;
427241675Suqs			head_adjust(p, hp);
428241675Suqs			p->head = hp;
429241675Suqs			return(p);
430241675Suqs		}
431241675Suqs
432241675Suqs		if (NULL != (h = h->next)) {
433241675Suqs			head_adjust(p, h);
434241675Suqs			p->head = h;
435241675Suqs			return(p);
436241675Suqs		}
437241675Suqs
438241675Suqs		/* Fall through to default case... */
439241675Suqs	}
440241675Suqs
441241675Suqs	hp = mandoc_calloc(1, sizeof(struct tbl_head));
442241675Suqs	hp->ident = tbl->opts.cols++;
443241675Suqs
444241675Suqs	if (tbl->last_head) {
445241675Suqs		hp->prev = tbl->last_head;
446241675Suqs		tbl->last_head->next = hp;
447241675Suqs		tbl->last_head = hp;
448241675Suqs	} else
449241675Suqs		tbl->last_head = tbl->first_head = hp;
450241675Suqs
451241675Suqs	head_adjust(p, hp);
452241675Suqs	p->head = hp;
453241675Suqs	return(p);
454241675Suqs}
455241675Suqs
456241675Suqsstatic void
457241675Suqshead_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
458241675Suqs{
459241675Suqs	if (TBL_CELL_VERT != cellp->pos &&
460241675Suqs			TBL_CELL_DVERT != cellp->pos) {
461241675Suqs		head->pos = TBL_HEAD_DATA;
462241675Suqs		return;
463241675Suqs	}
464241675Suqs
465241675Suqs	if (TBL_CELL_VERT == cellp->pos)
466241675Suqs		if (TBL_HEAD_DVERT != head->pos)
467241675Suqs			head->pos = TBL_HEAD_VERT;
468241675Suqs
469241675Suqs	if (TBL_CELL_DVERT == cellp->pos)
470241675Suqs		head->pos = TBL_HEAD_DVERT;
471241675Suqs}
472241675Suqs
473