1241675Suqs/* $Id: man_validate.c,v 1.80 2012/01/03 15:16:24 kristaps Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4241675Suqs * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#ifdef HAVE_CONFIG_H 19241675Suqs#include "config.h" 20241675Suqs#endif 21241675Suqs 22241675Suqs#include <sys/types.h> 23241675Suqs 24241675Suqs#include <assert.h> 25241675Suqs#include <ctype.h> 26241675Suqs#include <errno.h> 27241675Suqs#include <limits.h> 28241675Suqs#include <stdarg.h> 29241675Suqs#include <stdlib.h> 30241675Suqs#include <string.h> 31241675Suqs#include <time.h> 32241675Suqs 33241675Suqs#include "man.h" 34241675Suqs#include "mandoc.h" 35241675Suqs#include "libman.h" 36241675Suqs#include "libmandoc.h" 37241675Suqs 38241675Suqs#define CHKARGS struct man *m, struct man_node *n 39241675Suqs 40241675Suqstypedef int (*v_check)(CHKARGS); 41241675Suqs 42241675Suqsstruct man_valid { 43241675Suqs v_check *pres; 44241675Suqs v_check *posts; 45241675Suqs}; 46241675Suqs 47241675Suqsstatic int check_eq0(CHKARGS); 48241675Suqsstatic int check_eq2(CHKARGS); 49241675Suqsstatic int check_le1(CHKARGS); 50241675Suqsstatic int check_ge2(CHKARGS); 51241675Suqsstatic int check_le5(CHKARGS); 52241675Suqsstatic int check_par(CHKARGS); 53241675Suqsstatic int check_part(CHKARGS); 54241675Suqsstatic int check_root(CHKARGS); 55241675Suqsstatic void check_text(CHKARGS); 56241675Suqs 57241675Suqsstatic int post_AT(CHKARGS); 58241675Suqsstatic int post_vs(CHKARGS); 59241675Suqsstatic int post_fi(CHKARGS); 60241675Suqsstatic int post_ft(CHKARGS); 61241675Suqsstatic int post_nf(CHKARGS); 62241675Suqsstatic int post_sec(CHKARGS); 63241675Suqsstatic int post_TH(CHKARGS); 64241675Suqsstatic int post_UC(CHKARGS); 65241675Suqsstatic int pre_sec(CHKARGS); 66241675Suqs 67241675Suqsstatic v_check posts_at[] = { post_AT, NULL }; 68241675Suqsstatic v_check posts_br[] = { post_vs, check_eq0, NULL }; 69241675Suqsstatic v_check posts_eq0[] = { check_eq0, NULL }; 70241675Suqsstatic v_check posts_eq2[] = { check_eq2, NULL }; 71241675Suqsstatic v_check posts_fi[] = { check_eq0, post_fi, NULL }; 72241675Suqsstatic v_check posts_ft[] = { post_ft, NULL }; 73241675Suqsstatic v_check posts_nf[] = { check_eq0, post_nf, NULL }; 74241675Suqsstatic v_check posts_par[] = { check_par, NULL }; 75241675Suqsstatic v_check posts_part[] = { check_part, NULL }; 76241675Suqsstatic v_check posts_sec[] = { post_sec, NULL }; 77241675Suqsstatic v_check posts_sp[] = { post_vs, check_le1, NULL }; 78241675Suqsstatic v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL }; 79241675Suqsstatic v_check posts_uc[] = { post_UC, NULL }; 80241675Suqsstatic v_check pres_sec[] = { pre_sec, NULL }; 81241675Suqs 82241675Suqsstatic const struct man_valid man_valids[MAN_MAX] = { 83241675Suqs { NULL, posts_br }, /* br */ 84241675Suqs { NULL, posts_th }, /* TH */ 85241675Suqs { pres_sec, posts_sec }, /* SH */ 86241675Suqs { pres_sec, posts_sec }, /* SS */ 87241675Suqs { NULL, NULL }, /* TP */ 88241675Suqs { NULL, posts_par }, /* LP */ 89241675Suqs { NULL, posts_par }, /* PP */ 90241675Suqs { NULL, posts_par }, /* P */ 91241675Suqs { NULL, NULL }, /* IP */ 92241675Suqs { NULL, NULL }, /* HP */ 93241675Suqs { NULL, NULL }, /* SM */ 94241675Suqs { NULL, NULL }, /* SB */ 95241675Suqs { NULL, NULL }, /* BI */ 96241675Suqs { NULL, NULL }, /* IB */ 97241675Suqs { NULL, NULL }, /* BR */ 98241675Suqs { NULL, NULL }, /* RB */ 99241675Suqs { NULL, NULL }, /* R */ 100241675Suqs { NULL, NULL }, /* B */ 101241675Suqs { NULL, NULL }, /* I */ 102241675Suqs { NULL, NULL }, /* IR */ 103241675Suqs { NULL, NULL }, /* RI */ 104241675Suqs { NULL, posts_eq0 }, /* na */ 105241675Suqs { NULL, posts_sp }, /* sp */ 106241675Suqs { NULL, posts_nf }, /* nf */ 107241675Suqs { NULL, posts_fi }, /* fi */ 108241675Suqs { NULL, NULL }, /* RE */ 109241675Suqs { NULL, posts_part }, /* RS */ 110241675Suqs { NULL, NULL }, /* DT */ 111241675Suqs { NULL, posts_uc }, /* UC */ 112241675Suqs { NULL, NULL }, /* PD */ 113241675Suqs { NULL, posts_at }, /* AT */ 114241675Suqs { NULL, NULL }, /* in */ 115241675Suqs { NULL, posts_ft }, /* ft */ 116241675Suqs { NULL, posts_eq2 }, /* OP */ 117241675Suqs}; 118241675Suqs 119241675Suqs 120241675Suqsint 121241675Suqsman_valid_pre(struct man *m, struct man_node *n) 122241675Suqs{ 123241675Suqs v_check *cp; 124241675Suqs 125241675Suqs switch (n->type) { 126241675Suqs case (MAN_TEXT): 127241675Suqs /* FALLTHROUGH */ 128241675Suqs case (MAN_ROOT): 129241675Suqs /* FALLTHROUGH */ 130241675Suqs case (MAN_EQN): 131241675Suqs /* FALLTHROUGH */ 132241675Suqs case (MAN_TBL): 133241675Suqs return(1); 134241675Suqs default: 135241675Suqs break; 136241675Suqs } 137241675Suqs 138241675Suqs if (NULL == (cp = man_valids[n->tok].pres)) 139241675Suqs return(1); 140241675Suqs for ( ; *cp; cp++) 141241675Suqs if ( ! (*cp)(m, n)) 142241675Suqs return(0); 143241675Suqs return(1); 144241675Suqs} 145241675Suqs 146241675Suqs 147241675Suqsint 148241675Suqsman_valid_post(struct man *m) 149241675Suqs{ 150241675Suqs v_check *cp; 151241675Suqs 152241675Suqs if (MAN_VALID & m->last->flags) 153241675Suqs return(1); 154241675Suqs m->last->flags |= MAN_VALID; 155241675Suqs 156241675Suqs switch (m->last->type) { 157241675Suqs case (MAN_TEXT): 158241675Suqs check_text(m, m->last); 159241675Suqs return(1); 160241675Suqs case (MAN_ROOT): 161241675Suqs return(check_root(m, m->last)); 162241675Suqs case (MAN_EQN): 163241675Suqs /* FALLTHROUGH */ 164241675Suqs case (MAN_TBL): 165241675Suqs return(1); 166241675Suqs default: 167241675Suqs break; 168241675Suqs } 169241675Suqs 170241675Suqs if (NULL == (cp = man_valids[m->last->tok].posts)) 171241675Suqs return(1); 172241675Suqs for ( ; *cp; cp++) 173241675Suqs if ( ! (*cp)(m, m->last)) 174241675Suqs return(0); 175241675Suqs 176241675Suqs return(1); 177241675Suqs} 178241675Suqs 179241675Suqs 180241675Suqsstatic int 181241675Suqscheck_root(CHKARGS) 182241675Suqs{ 183241675Suqs 184241675Suqs if (MAN_BLINE & m->flags) 185241675Suqs man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 186241675Suqs else if (MAN_ELINE & m->flags) 187241675Suqs man_nmsg(m, n, MANDOCERR_SCOPEEXIT); 188241675Suqs 189241675Suqs m->flags &= ~MAN_BLINE; 190241675Suqs m->flags &= ~MAN_ELINE; 191241675Suqs 192241675Suqs if (NULL == m->first->child) { 193241675Suqs man_nmsg(m, n, MANDOCERR_NODOCBODY); 194241675Suqs return(0); 195241675Suqs } else if (NULL == m->meta.title) { 196241675Suqs man_nmsg(m, n, MANDOCERR_NOTITLE); 197241675Suqs 198241675Suqs /* 199241675Suqs * If a title hasn't been set, do so now (by 200241675Suqs * implication, date and section also aren't set). 201241675Suqs */ 202241675Suqs 203241675Suqs m->meta.title = mandoc_strdup("unknown"); 204241675Suqs m->meta.msec = mandoc_strdup("1"); 205241675Suqs m->meta.date = mandoc_normdate 206241675Suqs (m->parse, NULL, n->line, n->pos); 207241675Suqs } 208241675Suqs 209241675Suqs return(1); 210241675Suqs} 211241675Suqs 212241675Suqsstatic void 213241675Suqscheck_text(CHKARGS) 214241675Suqs{ 215241675Suqs char *cp, *p; 216241675Suqs 217241675Suqs if (MAN_LITERAL & m->flags) 218241675Suqs return; 219241675Suqs 220241675Suqs cp = n->string; 221241675Suqs for (p = cp; NULL != (p = strchr(p, '\t')); p++) 222241675Suqs man_pmsg(m, n->line, (int)(p - cp), MANDOCERR_BADTAB); 223241675Suqs} 224241675Suqs 225241675Suqs#define INEQ_DEFINE(x, ineq, name) \ 226241675Suqsstatic int \ 227241675Suqscheck_##name(CHKARGS) \ 228241675Suqs{ \ 229241675Suqs if (n->nchild ineq (x)) \ 230241675Suqs return(1); \ 231241675Suqs mandoc_vmsg(MANDOCERR_ARGCOUNT, m->parse, n->line, n->pos, \ 232241675Suqs "line arguments %s %d (have %d)", \ 233241675Suqs #ineq, (x), n->nchild); \ 234241675Suqs return(1); \ 235241675Suqs} 236241675Suqs 237241675SuqsINEQ_DEFINE(0, ==, eq0) 238241675SuqsINEQ_DEFINE(2, ==, eq2) 239241675SuqsINEQ_DEFINE(1, <=, le1) 240241675SuqsINEQ_DEFINE(2, >=, ge2) 241241675SuqsINEQ_DEFINE(5, <=, le5) 242241675Suqs 243241675Suqsstatic int 244241675Suqspost_ft(CHKARGS) 245241675Suqs{ 246241675Suqs char *cp; 247241675Suqs int ok; 248241675Suqs 249241675Suqs if (0 == n->nchild) 250241675Suqs return(1); 251241675Suqs 252241675Suqs ok = 0; 253241675Suqs cp = n->child->string; 254241675Suqs switch (*cp) { 255241675Suqs case ('1'): 256241675Suqs /* FALLTHROUGH */ 257241675Suqs case ('2'): 258241675Suqs /* FALLTHROUGH */ 259241675Suqs case ('3'): 260241675Suqs /* FALLTHROUGH */ 261241675Suqs case ('4'): 262241675Suqs /* FALLTHROUGH */ 263241675Suqs case ('I'): 264241675Suqs /* FALLTHROUGH */ 265241675Suqs case ('P'): 266241675Suqs /* FALLTHROUGH */ 267241675Suqs case ('R'): 268241675Suqs if ('\0' == cp[1]) 269241675Suqs ok = 1; 270241675Suqs break; 271241675Suqs case ('B'): 272241675Suqs if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 273241675Suqs ok = 1; 274241675Suqs break; 275241675Suqs case ('C'): 276241675Suqs if ('W' == cp[1] && '\0' == cp[2]) 277241675Suqs ok = 1; 278241675Suqs break; 279241675Suqs default: 280241675Suqs break; 281241675Suqs } 282241675Suqs 283241675Suqs if (0 == ok) { 284241675Suqs mandoc_vmsg 285241675Suqs (MANDOCERR_BADFONT, m->parse, 286241675Suqs n->line, n->pos, "%s", cp); 287241675Suqs *cp = '\0'; 288241675Suqs } 289241675Suqs 290241675Suqs if (1 < n->nchild) 291241675Suqs mandoc_vmsg 292241675Suqs (MANDOCERR_ARGCOUNT, m->parse, n->line, 293241675Suqs n->pos, "want one child (have %d)", 294241675Suqs n->nchild); 295241675Suqs 296241675Suqs return(1); 297241675Suqs} 298241675Suqs 299241675Suqsstatic int 300241675Suqspre_sec(CHKARGS) 301241675Suqs{ 302241675Suqs 303241675Suqs if (MAN_BLOCK == n->type) 304241675Suqs m->flags &= ~MAN_LITERAL; 305241675Suqs return(1); 306241675Suqs} 307241675Suqs 308241675Suqsstatic int 309241675Suqspost_sec(CHKARGS) 310241675Suqs{ 311241675Suqs 312241675Suqs if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) 313241675Suqs return(1); 314241675Suqs 315241675Suqs man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); 316241675Suqs return(0); 317241675Suqs} 318241675Suqs 319241675Suqsstatic int 320241675Suqscheck_part(CHKARGS) 321241675Suqs{ 322241675Suqs 323241675Suqs if (MAN_BODY == n->type && 0 == n->nchild) 324241675Suqs mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line, 325241675Suqs n->pos, "want children (have none)"); 326241675Suqs 327241675Suqs return(1); 328241675Suqs} 329241675Suqs 330241675Suqs 331241675Suqsstatic int 332241675Suqscheck_par(CHKARGS) 333241675Suqs{ 334241675Suqs 335241675Suqs switch (n->type) { 336241675Suqs case (MAN_BLOCK): 337241675Suqs if (0 == n->body->nchild) 338241675Suqs man_node_delete(m, n); 339241675Suqs break; 340241675Suqs case (MAN_BODY): 341241675Suqs if (0 == n->nchild) 342241675Suqs man_nmsg(m, n, MANDOCERR_IGNPAR); 343241675Suqs break; 344241675Suqs case (MAN_HEAD): 345241675Suqs if (n->nchild) 346241675Suqs man_nmsg(m, n, MANDOCERR_ARGSLOST); 347241675Suqs break; 348241675Suqs default: 349241675Suqs break; 350241675Suqs } 351241675Suqs 352241675Suqs return(1); 353241675Suqs} 354241675Suqs 355241675Suqs 356241675Suqsstatic int 357241675Suqspost_TH(CHKARGS) 358241675Suqs{ 359241675Suqs const char *p; 360241675Suqs int line, pos; 361241675Suqs 362241675Suqs if (m->meta.title) 363241675Suqs free(m->meta.title); 364241675Suqs if (m->meta.vol) 365241675Suqs free(m->meta.vol); 366241675Suqs if (m->meta.source) 367241675Suqs free(m->meta.source); 368241675Suqs if (m->meta.msec) 369241675Suqs free(m->meta.msec); 370241675Suqs if (m->meta.date) 371241675Suqs free(m->meta.date); 372241675Suqs 373241675Suqs line = n->line; 374241675Suqs pos = n->pos; 375241675Suqs m->meta.title = m->meta.vol = m->meta.date = 376241675Suqs m->meta.msec = m->meta.source = NULL; 377241675Suqs 378241675Suqs /* ->TITLE<- MSEC DATE SOURCE VOL */ 379241675Suqs 380241675Suqs n = n->child; 381241675Suqs if (n && n->string) { 382241675Suqs for (p = n->string; '\0' != *p; p++) { 383241675Suqs /* Only warn about this once... */ 384241675Suqs if (isalpha((unsigned char)*p) && 385241675Suqs ! isupper((unsigned char)*p)) { 386241675Suqs man_nmsg(m, n, MANDOCERR_UPPERCASE); 387241675Suqs break; 388241675Suqs } 389241675Suqs } 390241675Suqs m->meta.title = mandoc_strdup(n->string); 391241675Suqs } else 392241675Suqs m->meta.title = mandoc_strdup(""); 393241675Suqs 394241675Suqs /* TITLE ->MSEC<- DATE SOURCE VOL */ 395241675Suqs 396241675Suqs if (n) 397241675Suqs n = n->next; 398241675Suqs if (n && n->string) 399241675Suqs m->meta.msec = mandoc_strdup(n->string); 400241675Suqs else 401241675Suqs m->meta.msec = mandoc_strdup(""); 402241675Suqs 403241675Suqs /* TITLE MSEC ->DATE<- SOURCE VOL */ 404241675Suqs 405241675Suqs if (n) 406241675Suqs n = n->next; 407241675Suqs if (n && n->string && '\0' != n->string[0]) { 408241675Suqs pos = n->pos; 409241675Suqs m->meta.date = mandoc_normdate 410241675Suqs (m->parse, n->string, line, pos); 411241675Suqs } else 412241675Suqs m->meta.date = mandoc_strdup(""); 413241675Suqs 414241675Suqs /* TITLE MSEC DATE ->SOURCE<- VOL */ 415241675Suqs 416241675Suqs if (n && (n = n->next)) 417241675Suqs m->meta.source = mandoc_strdup(n->string); 418241675Suqs 419241675Suqs /* TITLE MSEC DATE SOURCE ->VOL<- */ 420241675Suqs /* If missing, use the default VOL name for MSEC. */ 421241675Suqs 422241675Suqs if (n && (n = n->next)) 423241675Suqs m->meta.vol = mandoc_strdup(n->string); 424241675Suqs else if ('\0' != m->meta.msec[0] && 425241675Suqs (NULL != (p = mandoc_a2msec(m->meta.msec)))) 426241675Suqs m->meta.vol = mandoc_strdup(p); 427241675Suqs 428241675Suqs /* 429241675Suqs * Remove the `TH' node after we've processed it for our 430241675Suqs * meta-data. 431241675Suqs */ 432241675Suqs man_node_delete(m, m->last); 433241675Suqs return(1); 434241675Suqs} 435241675Suqs 436241675Suqsstatic int 437241675Suqspost_nf(CHKARGS) 438241675Suqs{ 439241675Suqs 440241675Suqs if (MAN_LITERAL & m->flags) 441241675Suqs man_nmsg(m, n, MANDOCERR_SCOPEREP); 442241675Suqs 443241675Suqs m->flags |= MAN_LITERAL; 444241675Suqs return(1); 445241675Suqs} 446241675Suqs 447241675Suqsstatic int 448241675Suqspost_fi(CHKARGS) 449241675Suqs{ 450241675Suqs 451241675Suqs if ( ! (MAN_LITERAL & m->flags)) 452241675Suqs man_nmsg(m, n, MANDOCERR_WNOSCOPE); 453241675Suqs 454241675Suqs m->flags &= ~MAN_LITERAL; 455241675Suqs return(1); 456241675Suqs} 457241675Suqs 458241675Suqsstatic int 459241675Suqspost_UC(CHKARGS) 460241675Suqs{ 461241675Suqs static const char * const bsd_versions[] = { 462241675Suqs "3rd Berkeley Distribution", 463241675Suqs "4th Berkeley Distribution", 464241675Suqs "4.2 Berkeley Distribution", 465241675Suqs "4.3 Berkeley Distribution", 466241675Suqs "4.4 Berkeley Distribution", 467241675Suqs }; 468241675Suqs 469241675Suqs const char *p, *s; 470241675Suqs 471241675Suqs n = n->child; 472241675Suqs 473241675Suqs if (NULL == n || MAN_TEXT != n->type) 474241675Suqs p = bsd_versions[0]; 475241675Suqs else { 476241675Suqs s = n->string; 477241675Suqs if (0 == strcmp(s, "3")) 478241675Suqs p = bsd_versions[0]; 479241675Suqs else if (0 == strcmp(s, "4")) 480241675Suqs p = bsd_versions[1]; 481241675Suqs else if (0 == strcmp(s, "5")) 482241675Suqs p = bsd_versions[2]; 483241675Suqs else if (0 == strcmp(s, "6")) 484241675Suqs p = bsd_versions[3]; 485241675Suqs else if (0 == strcmp(s, "7")) 486241675Suqs p = bsd_versions[4]; 487241675Suqs else 488241675Suqs p = bsd_versions[0]; 489241675Suqs } 490241675Suqs 491241675Suqs if (m->meta.source) 492241675Suqs free(m->meta.source); 493241675Suqs 494241675Suqs m->meta.source = mandoc_strdup(p); 495241675Suqs return(1); 496241675Suqs} 497241675Suqs 498241675Suqsstatic int 499241675Suqspost_AT(CHKARGS) 500241675Suqs{ 501241675Suqs static const char * const unix_versions[] = { 502241675Suqs "7th Edition", 503241675Suqs "System III", 504241675Suqs "System V", 505241675Suqs "System V Release 2", 506241675Suqs }; 507241675Suqs 508241675Suqs const char *p, *s; 509241675Suqs struct man_node *nn; 510241675Suqs 511241675Suqs n = n->child; 512241675Suqs 513241675Suqs if (NULL == n || MAN_TEXT != n->type) 514241675Suqs p = unix_versions[0]; 515241675Suqs else { 516241675Suqs s = n->string; 517241675Suqs if (0 == strcmp(s, "3")) 518241675Suqs p = unix_versions[0]; 519241675Suqs else if (0 == strcmp(s, "4")) 520241675Suqs p = unix_versions[1]; 521241675Suqs else if (0 == strcmp(s, "5")) { 522241675Suqs nn = n->next; 523241675Suqs if (nn && MAN_TEXT == nn->type && nn->string[0]) 524241675Suqs p = unix_versions[3]; 525241675Suqs else 526241675Suqs p = unix_versions[2]; 527241675Suqs } else 528241675Suqs p = unix_versions[0]; 529241675Suqs } 530241675Suqs 531241675Suqs if (m->meta.source) 532241675Suqs free(m->meta.source); 533241675Suqs 534241675Suqs m->meta.source = mandoc_strdup(p); 535241675Suqs return(1); 536241675Suqs} 537241675Suqs 538241675Suqsstatic int 539241675Suqspost_vs(CHKARGS) 540241675Suqs{ 541241675Suqs 542241675Suqs /* 543241675Suqs * Don't warn about this because it occurs in pod2man and would 544241675Suqs * cause considerable (unfixable) warnage. 545241675Suqs */ 546241675Suqs if (NULL == n->prev && MAN_ROOT == n->parent->type) 547241675Suqs man_node_delete(m, n); 548241675Suqs 549241675Suqs return(1); 550241675Suqs} 551