1/*	$Id: tbl_layout.c,v 1.22 2011/09/18 14:14:15 schwarze Exp $ */
2/*
3 * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#ifdef HAVE_CONFIG_H
18#include "config.h"
19#endif
20
21#include <assert.h>
22#include <ctype.h>
23#include <stdlib.h>
24#include <string.h>
25#include <time.h>
26
27#include "mandoc.h"
28#include "libmandoc.h"
29#include "libroff.h"
30
31struct	tbl_phrase {
32	char		 name;
33	enum tbl_cellt	 key;
34};
35
36/*
37 * FIXME: we can make this parse a lot nicer by, when an error is
38 * encountered in a layout key, bailing to the next key (i.e. to the
39 * next whitespace then continuing).
40 */
41
42#define	KEYS_MAX	 11
43
44static	const struct tbl_phrase keys[KEYS_MAX] = {
45	{ 'c',		 TBL_CELL_CENTRE },
46	{ 'r',		 TBL_CELL_RIGHT },
47	{ 'l',		 TBL_CELL_LEFT },
48	{ 'n',		 TBL_CELL_NUMBER },
49	{ 's',		 TBL_CELL_SPAN },
50	{ 'a',		 TBL_CELL_LONG },
51	{ '^',		 TBL_CELL_DOWN },
52	{ '-',		 TBL_CELL_HORIZ },
53	{ '_',		 TBL_CELL_HORIZ },
54	{ '=',		 TBL_CELL_DHORIZ },
55	{ '|',		 TBL_CELL_VERT }
56};
57
58static	int		 mods(struct tbl_node *, struct tbl_cell *,
59				int, const char *, int *);
60static	int		 cell(struct tbl_node *, struct tbl_row *,
61				int, const char *, int *);
62static	void		 row(struct tbl_node *, int, const char *, int *);
63static	struct tbl_cell *cell_alloc(struct tbl_node *,
64				struct tbl_row *, enum tbl_cellt);
65static	void		 head_adjust(const struct tbl_cell *,
66				struct tbl_head *);
67
68static int
69mods(struct tbl_node *tbl, struct tbl_cell *cp,
70		int ln, const char *p, int *pos)
71{
72	char		 buf[5];
73	int		 i;
74
75	/* Not all types accept modifiers. */
76
77	switch (cp->pos) {
78	case (TBL_CELL_DOWN):
79		/* FALLTHROUGH */
80	case (TBL_CELL_HORIZ):
81		/* FALLTHROUGH */
82	case (TBL_CELL_DHORIZ):
83		/* FALLTHROUGH */
84	case (TBL_CELL_VERT):
85		/* FALLTHROUGH */
86	case (TBL_CELL_DVERT):
87		return(1);
88	default:
89		break;
90	}
91
92mod:
93	/*
94	 * XXX: since, at least for now, modifiers are non-conflicting
95	 * (are separable by value, regardless of position), we let
96	 * modifiers come in any order.  The existing tbl doesn't let
97	 * this happen.
98	 */
99	switch (p[*pos]) {
100	case ('\0'):
101		/* FALLTHROUGH */
102	case (' '):
103		/* FALLTHROUGH */
104	case ('\t'):
105		/* FALLTHROUGH */
106	case (','):
107		/* FALLTHROUGH */
108	case ('.'):
109		return(1);
110	default:
111		break;
112	}
113
114	/* Throw away parenthesised expression. */
115
116	if ('(' == p[*pos]) {
117		(*pos)++;
118		while (p[*pos] && ')' != p[*pos])
119			(*pos)++;
120		if (')' == p[*pos]) {
121			(*pos)++;
122			goto mod;
123		}
124		mandoc_msg(MANDOCERR_TBLLAYOUT,
125				tbl->parse, ln, *pos, NULL);
126		return(0);
127	}
128
129	/* Parse numerical spacing from modifier string. */
130
131	if (isdigit((unsigned char)p[*pos])) {
132		for (i = 0; i < 4; i++) {
133			if ( ! isdigit((unsigned char)p[*pos + i]))
134				break;
135			buf[i] = p[*pos + i];
136		}
137		buf[i] = '\0';
138
139		/* No greater than 4 digits. */
140
141		if (4 == i) {
142			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
143					ln, *pos, NULL);
144			return(0);
145		}
146
147		*pos += i;
148		cp->spacing = (size_t)atoi(buf);
149
150		goto mod;
151		/* NOTREACHED */
152	}
153
154	/* TODO: GNU has many more extensions. */
155
156	switch (tolower((unsigned char)p[(*pos)++])) {
157	case ('z'):
158		cp->flags |= TBL_CELL_WIGN;
159		goto mod;
160	case ('u'):
161		cp->flags |= TBL_CELL_UP;
162		goto mod;
163	case ('e'):
164		cp->flags |= TBL_CELL_EQUAL;
165		goto mod;
166	case ('t'):
167		cp->flags |= TBL_CELL_TALIGN;
168		goto mod;
169	case ('d'):
170		cp->flags |= TBL_CELL_BALIGN;
171		goto mod;
172	case ('w'):  /* XXX for now, ignore minimal column width */
173		goto mod;
174	case ('f'):
175		break;
176	case ('r'):
177		/* FALLTHROUGH */
178	case ('b'):
179		/* FALLTHROUGH */
180	case ('i'):
181		(*pos)--;
182		break;
183	default:
184		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
185				ln, *pos - 1, NULL);
186		return(0);
187	}
188
189	switch (tolower((unsigned char)p[(*pos)++])) {
190	case ('3'):
191		/* FALLTHROUGH */
192	case ('b'):
193		cp->flags |= TBL_CELL_BOLD;
194		goto mod;
195	case ('2'):
196		/* FALLTHROUGH */
197	case ('i'):
198		cp->flags |= TBL_CELL_ITALIC;
199		goto mod;
200	case ('1'):
201		/* FALLTHROUGH */
202	case ('r'):
203		goto mod;
204	default:
205		break;
206	}
207
208	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
209			ln, *pos - 1, NULL);
210	return(0);
211}
212
213static int
214cell(struct tbl_node *tbl, struct tbl_row *rp,
215		int ln, const char *p, int *pos)
216{
217	int		 i;
218	enum tbl_cellt	 c;
219
220	/* Parse the column position (`r', `R', `|', ...). */
221
222	for (i = 0; i < KEYS_MAX; i++)
223		if (tolower((unsigned char)p[*pos]) == keys[i].name)
224			break;
225
226	if (KEYS_MAX == i) {
227		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
228				ln, *pos, NULL);
229		return(0);
230	}
231
232	c = keys[i].key;
233
234	/*
235	 * If a span cell is found first, raise a warning and abort the
236	 * parse.  If a span cell is found and the last layout element
237	 * isn't a "normal" layout, bail.
238	 *
239	 * FIXME: recover from this somehow?
240	 */
241
242	if (TBL_CELL_SPAN == c) {
243		if (NULL == rp->first) {
244			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
245					ln, *pos, NULL);
246			return(0);
247		} else if (rp->last)
248			switch (rp->last->pos) {
249			case (TBL_CELL_VERT):
250			case (TBL_CELL_DVERT):
251			case (TBL_CELL_HORIZ):
252			case (TBL_CELL_DHORIZ):
253				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
254						ln, *pos, NULL);
255				return(0);
256			default:
257				break;
258			}
259	}
260
261	/*
262	 * If a vertical spanner is found, we may not be in the first
263	 * row.
264	 */
265
266	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
267		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
268		return(0);
269	}
270
271	(*pos)++;
272
273	/* Extra check for the double-vertical. */
274
275	if (TBL_CELL_VERT == c && '|' == p[*pos]) {
276		(*pos)++;
277		c = TBL_CELL_DVERT;
278	}
279
280	/* Disallow adjacent spacers. */
281
282	if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) &&
283			(TBL_CELL_VERT == rp->last->pos ||
284			 TBL_CELL_DVERT == rp->last->pos)) {
285		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
286		return(0);
287	}
288
289	/* Allocate cell then parse its modifiers. */
290
291	return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos));
292}
293
294
295static void
296row(struct tbl_node *tbl, int ln, const char *p, int *pos)
297{
298	struct tbl_row	*rp;
299
300row:	/*
301	 * EBNF describing this section:
302	 *
303	 * row		::= row_list [:space:]* [.]?[\n]
304	 * row_list	::= [:space:]* row_elem row_tail
305	 * row_tail	::= [:space:]*[,] row_list |
306	 *                  epsilon
307	 * row_elem	::= [\t\ ]*[:alpha:]+
308	 */
309
310	rp = mandoc_calloc(1, sizeof(struct tbl_row));
311	if (tbl->last_row) {
312		tbl->last_row->next = rp;
313		tbl->last_row = rp;
314	} else
315		tbl->last_row = tbl->first_row = rp;
316
317cell:
318	while (isspace((unsigned char)p[*pos]))
319		(*pos)++;
320
321	/* Safely exit layout context. */
322
323	if ('.' == p[*pos]) {
324		tbl->part = TBL_PART_DATA;
325		if (NULL == tbl->first_row)
326			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
327					ln, *pos, NULL);
328		(*pos)++;
329		return;
330	}
331
332	/* End (and possibly restart) a row. */
333
334	if (',' == p[*pos]) {
335		(*pos)++;
336		goto row;
337	} else if ('\0' == p[*pos])
338		return;
339
340	if ( ! cell(tbl, rp, ln, p, pos))
341		return;
342
343	goto cell;
344	/* NOTREACHED */
345}
346
347int
348tbl_layout(struct tbl_node *tbl, int ln, const char *p)
349{
350	int		 pos;
351
352	pos = 0;
353	row(tbl, ln, p, &pos);
354
355	/* Always succeed. */
356	return(1);
357}
358
359static struct tbl_cell *
360cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos)
361{
362	struct tbl_cell	*p, *pp;
363	struct tbl_head	*h, *hp;
364
365	p = mandoc_calloc(1, sizeof(struct tbl_cell));
366
367	if (NULL != (pp = rp->last)) {
368		rp->last->next = p;
369		rp->last = p;
370	} else
371		rp->last = rp->first = p;
372
373	p->pos = pos;
374
375	/*
376	 * This is a little bit complicated.  Here we determine the
377	 * header the corresponds to a cell.  We add headers dynamically
378	 * when need be or re-use them, otherwise.  As an example, given
379	 * the following:
380	 *
381	 * 	1  c || l
382	 * 	2  | c | l
383	 * 	3  l l
384	 * 	3  || c | l |.
385	 *
386	 * We first add the new headers (as there are none) in (1); then
387	 * in (2) we insert the first spanner (as it doesn't match up
388	 * with the header); then we re-use the prior data headers,
389	 * skipping over the spanners; then we re-use everything and add
390	 * a last spanner.  Note that VERT headers are made into DVERT
391	 * ones.
392	 */
393
394	h = pp ? pp->head->next : tbl->first_head;
395
396	if (h) {
397		/* Re-use data header. */
398		if (TBL_HEAD_DATA == h->pos &&
399				(TBL_CELL_VERT != p->pos &&
400				 TBL_CELL_DVERT != p->pos)) {
401			p->head = h;
402			return(p);
403		}
404
405		/* Re-use spanner header. */
406		if (TBL_HEAD_DATA != h->pos &&
407				(TBL_CELL_VERT == p->pos ||
408				 TBL_CELL_DVERT == p->pos)) {
409			head_adjust(p, h);
410			p->head = h;
411			return(p);
412		}
413
414		/* Right-shift headers with a new spanner. */
415		if (TBL_HEAD_DATA == h->pos &&
416				(TBL_CELL_VERT == p->pos ||
417				 TBL_CELL_DVERT == p->pos)) {
418			hp = mandoc_calloc(1, sizeof(struct tbl_head));
419			hp->ident = tbl->opts.cols++;
420			hp->prev = h->prev;
421			if (h->prev)
422				h->prev->next = hp;
423			if (h == tbl->first_head)
424				tbl->first_head = hp;
425			h->prev = hp;
426			hp->next = h;
427			head_adjust(p, hp);
428			p->head = hp;
429			return(p);
430		}
431
432		if (NULL != (h = h->next)) {
433			head_adjust(p, h);
434			p->head = h;
435			return(p);
436		}
437
438		/* Fall through to default case... */
439	}
440
441	hp = mandoc_calloc(1, sizeof(struct tbl_head));
442	hp->ident = tbl->opts.cols++;
443
444	if (tbl->last_head) {
445		hp->prev = tbl->last_head;
446		tbl->last_head->next = hp;
447		tbl->last_head = hp;
448	} else
449		tbl->last_head = tbl->first_head = hp;
450
451	head_adjust(p, hp);
452	p->head = hp;
453	return(p);
454}
455
456static void
457head_adjust(const struct tbl_cell *cellp, struct tbl_head *head)
458{
459	if (TBL_CELL_VERT != cellp->pos &&
460			TBL_CELL_DVERT != cellp->pos) {
461		head->pos = TBL_HEAD_DATA;
462		return;
463	}
464
465	if (TBL_CELL_VERT == cellp->pos)
466		if (TBL_HEAD_DVERT != head->pos)
467			head->pos = TBL_HEAD_VERT;
468
469	if (TBL_CELL_DVERT == cellp->pos)
470		head->pos = TBL_HEAD_DVERT;
471}
472
473