1/*	$Id: man_validate.c,v 1.80 2012/01/03 15:16:24 kristaps Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <errno.h>
27#include <limits.h>
28#include <stdarg.h>
29#include <stdlib.h>
30#include <string.h>
31#include <time.h>
32
33#include "man.h"
34#include "mandoc.h"
35#include "libman.h"
36#include "libmandoc.h"
37
38#define	CHKARGS	  struct man *m, struct man_node *n
39
40typedef	int	(*v_check)(CHKARGS);
41
42struct	man_valid {
43	v_check	 *pres;
44	v_check	 *posts;
45};
46
47static	int	  check_eq0(CHKARGS);
48static	int	  check_eq2(CHKARGS);
49static	int	  check_le1(CHKARGS);
50static	int	  check_ge2(CHKARGS);
51static	int	  check_le5(CHKARGS);
52static	int	  check_par(CHKARGS);
53static	int	  check_part(CHKARGS);
54static	int	  check_root(CHKARGS);
55static	void	  check_text(CHKARGS);
56
57static	int	  post_AT(CHKARGS);
58static	int	  post_vs(CHKARGS);
59static	int	  post_fi(CHKARGS);
60static	int	  post_ft(CHKARGS);
61static	int	  post_nf(CHKARGS);
62static	int	  post_sec(CHKARGS);
63static	int	  post_TH(CHKARGS);
64static	int	  post_UC(CHKARGS);
65static	int	  pre_sec(CHKARGS);
66
67static	v_check	  posts_at[] = { post_AT, NULL };
68static	v_check	  posts_br[] = { post_vs, check_eq0, NULL };
69static	v_check	  posts_eq0[] = { check_eq0, NULL };
70static	v_check	  posts_eq2[] = { check_eq2, NULL };
71static	v_check	  posts_fi[] = { check_eq0, post_fi, NULL };
72static	v_check	  posts_ft[] = { post_ft, NULL };
73static	v_check	  posts_nf[] = { check_eq0, post_nf, NULL };
74static	v_check	  posts_par[] = { check_par, NULL };
75static	v_check	  posts_part[] = { check_part, NULL };
76static	v_check	  posts_sec[] = { post_sec, NULL };
77static	v_check	  posts_sp[] = { post_vs, check_le1, NULL };
78static	v_check	  posts_th[] = { check_ge2, check_le5, post_TH, NULL };
79static	v_check	  posts_uc[] = { post_UC, NULL };
80static	v_check	  pres_sec[] = { pre_sec, NULL };
81
82static	const struct man_valid man_valids[MAN_MAX] = {
83	{ NULL, posts_br }, /* br */
84	{ NULL, posts_th }, /* TH */
85	{ pres_sec, posts_sec }, /* SH */
86	{ pres_sec, posts_sec }, /* SS */
87	{ NULL, NULL }, /* TP */
88	{ NULL, posts_par }, /* LP */
89	{ NULL, posts_par }, /* PP */
90	{ NULL, posts_par }, /* P */
91	{ NULL, NULL }, /* IP */
92	{ NULL, NULL }, /* HP */
93	{ NULL, NULL }, /* SM */
94	{ NULL, NULL }, /* SB */
95	{ NULL, NULL }, /* BI */
96	{ NULL, NULL }, /* IB */
97	{ NULL, NULL }, /* BR */
98	{ NULL, NULL }, /* RB */
99	{ NULL, NULL }, /* R */
100	{ NULL, NULL }, /* B */
101	{ NULL, NULL }, /* I */
102	{ NULL, NULL }, /* IR */
103	{ NULL, NULL }, /* RI */
104	{ NULL, posts_eq0 }, /* na */
105	{ NULL, posts_sp }, /* sp */
106	{ NULL, posts_nf }, /* nf */
107	{ NULL, posts_fi }, /* fi */
108	{ NULL, NULL }, /* RE */
109	{ NULL, posts_part }, /* RS */
110	{ NULL, NULL }, /* DT */
111	{ NULL, posts_uc }, /* UC */
112	{ NULL, NULL }, /* PD */
113	{ NULL, posts_at }, /* AT */
114	{ NULL, NULL }, /* in */
115	{ NULL, posts_ft }, /* ft */
116	{ NULL, posts_eq2 }, /* OP */
117};
118
119
120int
121man_valid_pre(struct man *m, struct man_node *n)
122{
123	v_check		*cp;
124
125	switch (n->type) {
126	case (MAN_TEXT):
127		/* FALLTHROUGH */
128	case (MAN_ROOT):
129		/* FALLTHROUGH */
130	case (MAN_EQN):
131		/* FALLTHROUGH */
132	case (MAN_TBL):
133		return(1);
134	default:
135		break;
136	}
137
138	if (NULL == (cp = man_valids[n->tok].pres))
139		return(1);
140	for ( ; *cp; cp++)
141		if ( ! (*cp)(m, n))
142			return(0);
143	return(1);
144}
145
146
147int
148man_valid_post(struct man *m)
149{
150	v_check		*cp;
151
152	if (MAN_VALID & m->last->flags)
153		return(1);
154	m->last->flags |= MAN_VALID;
155
156	switch (m->last->type) {
157	case (MAN_TEXT):
158		check_text(m, m->last);
159		return(1);
160	case (MAN_ROOT):
161		return(check_root(m, m->last));
162	case (MAN_EQN):
163		/* FALLTHROUGH */
164	case (MAN_TBL):
165		return(1);
166	default:
167		break;
168	}
169
170	if (NULL == (cp = man_valids[m->last->tok].posts))
171		return(1);
172	for ( ; *cp; cp++)
173		if ( ! (*cp)(m, m->last))
174			return(0);
175
176	return(1);
177}
178
179
180static int
181check_root(CHKARGS)
182{
183
184	if (MAN_BLINE & m->flags)
185		man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
186	else if (MAN_ELINE & m->flags)
187		man_nmsg(m, n, MANDOCERR_SCOPEEXIT);
188
189	m->flags &= ~MAN_BLINE;
190	m->flags &= ~MAN_ELINE;
191
192	if (NULL == m->first->child) {
193		man_nmsg(m, n, MANDOCERR_NODOCBODY);
194		return(0);
195	} else if (NULL == m->meta.title) {
196		man_nmsg(m, n, MANDOCERR_NOTITLE);
197
198		/*
199		 * If a title hasn't been set, do so now (by
200		 * implication, date and section also aren't set).
201		 */
202
203	        m->meta.title = mandoc_strdup("unknown");
204		m->meta.msec = mandoc_strdup("1");
205		m->meta.date = mandoc_normdate
206			(m->parse, NULL, n->line, n->pos);
207	}
208
209	return(1);
210}
211
212static void
213check_text(CHKARGS)
214{
215	char		*cp, *p;
216
217	if (MAN_LITERAL & m->flags)
218		return;
219
220	cp = n->string;
221	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
222		man_pmsg(m, n->line, (int)(p - cp), MANDOCERR_BADTAB);
223}
224
225#define	INEQ_DEFINE(x, ineq, name) \
226static int \
227check_##name(CHKARGS) \
228{ \
229	if (n->nchild ineq (x)) \
230		return(1); \
231	mandoc_vmsg(MANDOCERR_ARGCOUNT, m->parse, n->line, n->pos, \
232			"line arguments %s %d (have %d)", \
233			#ineq, (x), n->nchild); \
234	return(1); \
235}
236
237INEQ_DEFINE(0, ==, eq0)
238INEQ_DEFINE(2, ==, eq2)
239INEQ_DEFINE(1, <=, le1)
240INEQ_DEFINE(2, >=, ge2)
241INEQ_DEFINE(5, <=, le5)
242
243static int
244post_ft(CHKARGS)
245{
246	char	*cp;
247	int	 ok;
248
249	if (0 == n->nchild)
250		return(1);
251
252	ok = 0;
253	cp = n->child->string;
254	switch (*cp) {
255	case ('1'):
256		/* FALLTHROUGH */
257	case ('2'):
258		/* FALLTHROUGH */
259	case ('3'):
260		/* FALLTHROUGH */
261	case ('4'):
262		/* FALLTHROUGH */
263	case ('I'):
264		/* FALLTHROUGH */
265	case ('P'):
266		/* FALLTHROUGH */
267	case ('R'):
268		if ('\0' == cp[1])
269			ok = 1;
270		break;
271	case ('B'):
272		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
273			ok = 1;
274		break;
275	case ('C'):
276		if ('W' == cp[1] && '\0' == cp[2])
277			ok = 1;
278		break;
279	default:
280		break;
281	}
282
283	if (0 == ok) {
284		mandoc_vmsg
285			(MANDOCERR_BADFONT, m->parse,
286			 n->line, n->pos, "%s", cp);
287		*cp = '\0';
288	}
289
290	if (1 < n->nchild)
291		mandoc_vmsg
292			(MANDOCERR_ARGCOUNT, m->parse, n->line,
293			 n->pos, "want one child (have %d)",
294			 n->nchild);
295
296	return(1);
297}
298
299static int
300pre_sec(CHKARGS)
301{
302
303	if (MAN_BLOCK == n->type)
304		m->flags &= ~MAN_LITERAL;
305	return(1);
306}
307
308static int
309post_sec(CHKARGS)
310{
311
312	if ( ! (MAN_HEAD == n->type && 0 == n->nchild))
313		return(1);
314
315	man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT);
316	return(0);
317}
318
319static int
320check_part(CHKARGS)
321{
322
323	if (MAN_BODY == n->type && 0 == n->nchild)
324		mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line,
325				n->pos, "want children (have none)");
326
327	return(1);
328}
329
330
331static int
332check_par(CHKARGS)
333{
334
335	switch (n->type) {
336	case (MAN_BLOCK):
337		if (0 == n->body->nchild)
338			man_node_delete(m, n);
339		break;
340	case (MAN_BODY):
341		if (0 == n->nchild)
342			man_nmsg(m, n, MANDOCERR_IGNPAR);
343		break;
344	case (MAN_HEAD):
345		if (n->nchild)
346			man_nmsg(m, n, MANDOCERR_ARGSLOST);
347		break;
348	default:
349		break;
350	}
351
352	return(1);
353}
354
355
356static int
357post_TH(CHKARGS)
358{
359	const char	*p;
360	int		 line, pos;
361
362	if (m->meta.title)
363		free(m->meta.title);
364	if (m->meta.vol)
365		free(m->meta.vol);
366	if (m->meta.source)
367		free(m->meta.source);
368	if (m->meta.msec)
369		free(m->meta.msec);
370	if (m->meta.date)
371		free(m->meta.date);
372
373	line = n->line;
374	pos = n->pos;
375	m->meta.title = m->meta.vol = m->meta.date =
376		m->meta.msec = m->meta.source = NULL;
377
378	/* ->TITLE<- MSEC DATE SOURCE VOL */
379
380	n = n->child;
381	if (n && n->string) {
382		for (p = n->string; '\0' != *p; p++) {
383			/* Only warn about this once... */
384			if (isalpha((unsigned char)*p) &&
385					! isupper((unsigned char)*p)) {
386				man_nmsg(m, n, MANDOCERR_UPPERCASE);
387				break;
388			}
389		}
390		m->meta.title = mandoc_strdup(n->string);
391	} else
392		m->meta.title = mandoc_strdup("");
393
394	/* TITLE ->MSEC<- DATE SOURCE VOL */
395
396	if (n)
397		n = n->next;
398	if (n && n->string)
399		m->meta.msec = mandoc_strdup(n->string);
400	else
401		m->meta.msec = mandoc_strdup("");
402
403	/* TITLE MSEC ->DATE<- SOURCE VOL */
404
405	if (n)
406		n = n->next;
407	if (n && n->string && '\0' != n->string[0]) {
408		pos = n->pos;
409		m->meta.date = mandoc_normdate
410		    (m->parse, n->string, line, pos);
411	} else
412		m->meta.date = mandoc_strdup("");
413
414	/* TITLE MSEC DATE ->SOURCE<- VOL */
415
416	if (n && (n = n->next))
417		m->meta.source = mandoc_strdup(n->string);
418
419	/* TITLE MSEC DATE SOURCE ->VOL<- */
420	/* If missing, use the default VOL name for MSEC. */
421
422	if (n && (n = n->next))
423		m->meta.vol = mandoc_strdup(n->string);
424	else if ('\0' != m->meta.msec[0] &&
425	    (NULL != (p = mandoc_a2msec(m->meta.msec))))
426		m->meta.vol = mandoc_strdup(p);
427
428	/*
429	 * Remove the `TH' node after we've processed it for our
430	 * meta-data.
431	 */
432	man_node_delete(m, m->last);
433	return(1);
434}
435
436static int
437post_nf(CHKARGS)
438{
439
440	if (MAN_LITERAL & m->flags)
441		man_nmsg(m, n, MANDOCERR_SCOPEREP);
442
443	m->flags |= MAN_LITERAL;
444	return(1);
445}
446
447static int
448post_fi(CHKARGS)
449{
450
451	if ( ! (MAN_LITERAL & m->flags))
452		man_nmsg(m, n, MANDOCERR_WNOSCOPE);
453
454	m->flags &= ~MAN_LITERAL;
455	return(1);
456}
457
458static int
459post_UC(CHKARGS)
460{
461	static const char * const bsd_versions[] = {
462	    "3rd Berkeley Distribution",
463	    "4th Berkeley Distribution",
464	    "4.2 Berkeley Distribution",
465	    "4.3 Berkeley Distribution",
466	    "4.4 Berkeley Distribution",
467	};
468
469	const char	*p, *s;
470
471	n = n->child;
472
473	if (NULL == n || MAN_TEXT != n->type)
474		p = bsd_versions[0];
475	else {
476		s = n->string;
477		if (0 == strcmp(s, "3"))
478			p = bsd_versions[0];
479		else if (0 == strcmp(s, "4"))
480			p = bsd_versions[1];
481		else if (0 == strcmp(s, "5"))
482			p = bsd_versions[2];
483		else if (0 == strcmp(s, "6"))
484			p = bsd_versions[3];
485		else if (0 == strcmp(s, "7"))
486			p = bsd_versions[4];
487		else
488			p = bsd_versions[0];
489	}
490
491	if (m->meta.source)
492		free(m->meta.source);
493
494	m->meta.source = mandoc_strdup(p);
495	return(1);
496}
497
498static int
499post_AT(CHKARGS)
500{
501	static const char * const unix_versions[] = {
502	    "7th Edition",
503	    "System III",
504	    "System V",
505	    "System V Release 2",
506	};
507
508	const char	*p, *s;
509	struct man_node	*nn;
510
511	n = n->child;
512
513	if (NULL == n || MAN_TEXT != n->type)
514		p = unix_versions[0];
515	else {
516		s = n->string;
517		if (0 == strcmp(s, "3"))
518			p = unix_versions[0];
519		else if (0 == strcmp(s, "4"))
520			p = unix_versions[1];
521		else if (0 == strcmp(s, "5")) {
522			nn = n->next;
523			if (nn && MAN_TEXT == nn->type && nn->string[0])
524				p = unix_versions[3];
525			else
526				p = unix_versions[2];
527		} else
528			p = unix_versions[0];
529	}
530
531	if (m->meta.source)
532		free(m->meta.source);
533
534	m->meta.source = mandoc_strdup(p);
535	return(1);
536}
537
538static int
539post_vs(CHKARGS)
540{
541
542	/*
543	 * Don't warn about this because it occurs in pod2man and would
544	 * cause considerable (unfixable) warnage.
545	 */
546	if (NULL == n->prev && MAN_ROOT == n->parent->type)
547		man_node_delete(m, n);
548
549	return(1);
550}
551