1/* $Id: man_validate.c,v 1.156 2021/08/10 12:55:03 schwarze Exp $ */ 2/* 3 * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Validation module for man(7) syntax trees used by mandoc(1). 19 */ 20#include "config.h" 21 22#include <sys/types.h> 23 24#include <assert.h> 25#include <ctype.h> 26#include <errno.h> 27#include <limits.h> 28#include <stdarg.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <time.h> 33 34#include "mandoc_aux.h" 35#include "mandoc.h" 36#include "roff.h" 37#include "man.h" 38#include "libmandoc.h" 39#include "roff_int.h" 40#include "libman.h" 41#include "tag.h" 42 43#define CHKARGS struct roff_man *man, struct roff_node *n 44 45typedef void (*v_check)(CHKARGS); 46 47static void check_abort(CHKARGS) __attribute__((__noreturn__)); 48static void check_par(CHKARGS); 49static void check_part(CHKARGS); 50static void check_root(CHKARGS); 51static void check_tag(struct roff_node *, struct roff_node *); 52static void check_text(CHKARGS); 53 54static void post_AT(CHKARGS); 55static void post_EE(CHKARGS); 56static void post_EX(CHKARGS); 57static void post_IP(CHKARGS); 58static void post_OP(CHKARGS); 59static void post_SH(CHKARGS); 60static void post_TH(CHKARGS); 61static void post_TP(CHKARGS); 62static void post_UC(CHKARGS); 63static void post_UR(CHKARGS); 64static void post_in(CHKARGS); 65 66static const v_check man_valids[MAN_MAX - MAN_TH] = { 67 post_TH, /* TH */ 68 post_SH, /* SH */ 69 post_SH, /* SS */ 70 post_TP, /* TP */ 71 post_TP, /* TQ */ 72 check_abort,/* LP */ 73 check_par, /* PP */ 74 check_abort,/* P */ 75 post_IP, /* IP */ 76 NULL, /* HP */ 77 NULL, /* SM */ 78 NULL, /* SB */ 79 NULL, /* BI */ 80 NULL, /* IB */ 81 NULL, /* BR */ 82 NULL, /* RB */ 83 NULL, /* R */ 84 NULL, /* B */ 85 NULL, /* I */ 86 NULL, /* IR */ 87 NULL, /* RI */ 88 NULL, /* RE */ 89 check_part, /* RS */ 90 NULL, /* DT */ 91 post_UC, /* UC */ 92 NULL, /* PD */ 93 post_AT, /* AT */ 94 post_in, /* in */ 95 NULL, /* SY */ 96 NULL, /* YS */ 97 post_OP, /* OP */ 98 post_EX, /* EX */ 99 post_EE, /* EE */ 100 post_UR, /* UR */ 101 NULL, /* UE */ 102 post_UR, /* MT */ 103 NULL, /* ME */ 104}; 105 106 107/* Validate the subtree rooted at man->last. */ 108void 109man_validate(struct roff_man *man) 110{ 111 struct roff_node *n; 112 const v_check *cp; 113 114 /* 115 * Translate obsolete macros such that later code 116 * does not need to look for them. 117 */ 118 119 n = man->last; 120 switch (n->tok) { 121 case MAN_LP: 122 case MAN_P: 123 n->tok = MAN_PP; 124 break; 125 default: 126 break; 127 } 128 129 /* 130 * Iterate over all children, recursing into each one 131 * in turn, depth-first. 132 */ 133 134 man->last = man->last->child; 135 while (man->last != NULL) { 136 man_validate(man); 137 if (man->last == n) 138 man->last = man->last->child; 139 else 140 man->last = man->last->next; 141 } 142 143 /* Finally validate the macro itself. */ 144 145 man->last = n; 146 man->next = ROFF_NEXT_SIBLING; 147 switch (n->type) { 148 case ROFFT_TEXT: 149 check_text(man, n); 150 break; 151 case ROFFT_ROOT: 152 check_root(man, n); 153 break; 154 case ROFFT_COMMENT: 155 case ROFFT_EQN: 156 case ROFFT_TBL: 157 break; 158 default: 159 if (n->tok < ROFF_MAX) { 160 roff_validate(man); 161 break; 162 } 163 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 164 cp = man_valids + (n->tok - MAN_TH); 165 if (*cp) 166 (*cp)(man, n); 167 if (man->last == n) 168 n->flags |= NODE_VALID; 169 break; 170 } 171} 172 173static void 174check_root(CHKARGS) 175{ 176 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 177 178 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 179 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 180 else 181 man->meta.hasbody = 1; 182 183 if (NULL == man->meta.title) { 184 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 185 186 /* 187 * If a title hasn't been set, do so now (by 188 * implication, date and section also aren't set). 189 */ 190 191 man->meta.title = mandoc_strdup(""); 192 man->meta.msec = mandoc_strdup(""); 193 man->meta.date = mandoc_normdate(NULL, NULL); 194 } 195 196 if (man->meta.os_e && 197 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 198 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 199 man->meta.os_e == MANDOC_OS_OPENBSD ? 200 "(OpenBSD)" : "(NetBSD)"); 201} 202 203static void 204check_abort(CHKARGS) 205{ 206 abort(); 207} 208 209/* 210 * Skip leading whitespace, dashes, backslashes, and font escapes, 211 * then create a tag if the first following byte is a letter. 212 * Priority is high unless whitespace is present. 213 */ 214static void 215check_tag(struct roff_node *n, struct roff_node *nt) 216{ 217 const char *cp, *arg; 218 int prio, sz; 219 220 if (nt == NULL || nt->type != ROFFT_TEXT) 221 return; 222 223 cp = nt->string; 224 prio = TAG_STRONG; 225 for (;;) { 226 switch (*cp) { 227 case ' ': 228 case '\t': 229 prio = TAG_WEAK; 230 /* FALLTHROUGH */ 231 case '-': 232 cp++; 233 break; 234 case '\\': 235 cp++; 236 switch (mandoc_escape(&cp, &arg, &sz)) { 237 case ESCAPE_FONT: 238 case ESCAPE_FONTBOLD: 239 case ESCAPE_FONTITALIC: 240 case ESCAPE_FONTBI: 241 case ESCAPE_FONTROMAN: 242 case ESCAPE_FONTCR: 243 case ESCAPE_FONTCB: 244 case ESCAPE_FONTCI: 245 case ESCAPE_FONTPREV: 246 case ESCAPE_IGNORE: 247 break; 248 case ESCAPE_SPECIAL: 249 if (sz != 1) 250 return; 251 switch (*arg) { 252 case '-': 253 case 'e': 254 break; 255 default: 256 return; 257 } 258 break; 259 default: 260 return; 261 } 262 break; 263 default: 264 if (isalpha((unsigned char)*cp)) 265 tag_put(cp, prio, n); 266 return; 267 } 268 } 269} 270 271static void 272check_text(CHKARGS) 273{ 274 char *cp, *p; 275 276 if (n->flags & NODE_NOFILL) 277 return; 278 279 cp = n->string; 280 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 281 mandoc_msg(MANDOCERR_FI_TAB, 282 n->line, n->pos + (int)(p - cp), NULL); 283} 284 285static void 286post_EE(CHKARGS) 287{ 288 if ((n->flags & NODE_NOFILL) == 0) 289 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 290} 291 292static void 293post_EX(CHKARGS) 294{ 295 if (n->flags & NODE_NOFILL) 296 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 297} 298 299static void 300post_OP(CHKARGS) 301{ 302 303 if (n->child == NULL) 304 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 305 else if (n->child->next != NULL && n->child->next->next != NULL) { 306 n = n->child->next->next; 307 mandoc_msg(MANDOCERR_ARG_EXCESS, 308 n->line, n->pos, "OP ... %s", n->string); 309 } 310} 311 312static void 313post_SH(CHKARGS) 314{ 315 struct roff_node *nc; 316 char *cp, *tag; 317 318 nc = n->child; 319 switch (n->type) { 320 case ROFFT_HEAD: 321 tag = NULL; 322 deroff(&tag, n); 323 if (tag != NULL) { 324 for (cp = tag; *cp != '\0'; cp++) 325 if (*cp == ' ') 326 *cp = '_'; 327 if (nc != NULL && nc->type == ROFFT_TEXT && 328 strcmp(nc->string, tag) == 0) 329 tag_put(NULL, TAG_STRONG, n); 330 else 331 tag_put(tag, TAG_FALLBACK, n); 332 free(tag); 333 } 334 return; 335 case ROFFT_BODY: 336 if (nc != NULL) 337 break; 338 return; 339 default: 340 return; 341 } 342 343 if (nc->tok == MAN_PP && nc->body->child != NULL) { 344 while (nc->body->last != NULL) { 345 man->next = ROFF_NEXT_CHILD; 346 roff_node_relink(man, nc->body->last); 347 man->last = n; 348 } 349 } 350 351 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 352 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 353 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 354 roff_node_delete(man, nc); 355 } 356 357 /* 358 * Trailing PP is empty, so it is deleted by check_par(). 359 * Trailing sp is significant. 360 */ 361 362 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 363 mandoc_msg(MANDOCERR_PAR_SKIP, 364 nc->line, nc->pos, "%s at the end of %s", 365 roff_name[nc->tok], roff_name[n->tok]); 366 roff_node_delete(man, nc); 367 } 368} 369 370static void 371post_UR(CHKARGS) 372{ 373 if (n->type == ROFFT_HEAD && n->child == NULL) 374 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 375 "%s", roff_name[n->tok]); 376 check_part(man, n); 377} 378 379static void 380check_part(CHKARGS) 381{ 382 383 if (n->type == ROFFT_BODY && n->child == NULL) 384 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 385 "%s", roff_name[n->tok]); 386} 387 388static void 389check_par(CHKARGS) 390{ 391 392 switch (n->type) { 393 case ROFFT_BLOCK: 394 if (n->body->child == NULL) 395 roff_node_delete(man, n); 396 break; 397 case ROFFT_BODY: 398 if (n->child != NULL && 399 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 400 mandoc_msg(MANDOCERR_PAR_SKIP, 401 n->child->line, n->child->pos, 402 "%s after %s", roff_name[n->child->tok], 403 roff_name[n->tok]); 404 roff_node_delete(man, n->child); 405 } 406 if (n->child == NULL) 407 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 408 "%s empty", roff_name[n->tok]); 409 break; 410 case ROFFT_HEAD: 411 if (n->child != NULL) 412 mandoc_msg(MANDOCERR_ARG_SKIP, 413 n->line, n->pos, "%s %s%s", 414 roff_name[n->tok], n->child->string, 415 n->child->next != NULL ? " ..." : ""); 416 break; 417 default: 418 break; 419 } 420} 421 422static void 423post_IP(CHKARGS) 424{ 425 switch (n->type) { 426 case ROFFT_BLOCK: 427 if (n->head->child == NULL && n->body->child == NULL) 428 roff_node_delete(man, n); 429 break; 430 case ROFFT_HEAD: 431 check_tag(n, n->child); 432 break; 433 case ROFFT_BODY: 434 if (n->parent->head->child == NULL && n->child == NULL) 435 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 436 "%s empty", roff_name[n->tok]); 437 break; 438 default: 439 break; 440 } 441} 442 443/* 444 * The first next-line element in the head is the tag. 445 * If that's a font macro, use its first child instead. 446 */ 447static void 448post_TP(CHKARGS) 449{ 450 struct roff_node *nt; 451 452 if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) 453 return; 454 455 while ((nt->flags & NODE_LINE) == 0) 456 if ((nt = nt->next) == NULL) 457 return; 458 459 switch (nt->tok) { 460 case MAN_B: 461 case MAN_BI: 462 case MAN_BR: 463 case MAN_I: 464 case MAN_IB: 465 case MAN_IR: 466 nt = nt->child; 467 break; 468 default: 469 break; 470 } 471 check_tag(n, nt); 472} 473 474static void 475post_TH(CHKARGS) 476{ 477 struct roff_node *nb; 478 const char *p; 479 480 free(man->meta.title); 481 free(man->meta.vol); 482 free(man->meta.os); 483 free(man->meta.msec); 484 free(man->meta.date); 485 486 man->meta.title = man->meta.vol = man->meta.date = 487 man->meta.msec = man->meta.os = NULL; 488 489 nb = n; 490 491 /* ->TITLE<- MSEC DATE OS VOL */ 492 493 n = n->child; 494 if (n != NULL && n->string != NULL) { 495 for (p = n->string; *p != '\0'; p++) { 496 /* Only warn about this once... */ 497 if (isalpha((unsigned char)*p) && 498 ! isupper((unsigned char)*p)) { 499 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 500 n->pos + (int)(p - n->string), 501 "TH %s", n->string); 502 break; 503 } 504 } 505 man->meta.title = mandoc_strdup(n->string); 506 } else { 507 man->meta.title = mandoc_strdup(""); 508 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 509 } 510 511 /* TITLE ->MSEC<- DATE OS VOL */ 512 513 if (n != NULL) 514 n = n->next; 515 if (n != NULL && n->string != NULL) { 516 man->meta.msec = mandoc_strdup(n->string); 517 if (man->filesec != '\0' && 518 man->filesec != *n->string && 519 *n->string >= '1' && *n->string <= '9') 520 mandoc_msg(MANDOCERR_MSEC_FILE, n->line, n->pos, 521 "*.%c vs TH ... %c", man->filesec, *n->string); 522 } else { 523 man->meta.msec = mandoc_strdup(""); 524 mandoc_msg(MANDOCERR_MSEC_MISSING, 525 nb->line, nb->pos, "TH %s", man->meta.title); 526 } 527 528 /* TITLE MSEC ->DATE<- OS VOL */ 529 530 if (n != NULL) 531 n = n->next; 532 if (man->quick && n != NULL) 533 man->meta.date = mandoc_strdup(""); 534 else 535 man->meta.date = mandoc_normdate(n, nb); 536 537 /* TITLE MSEC DATE ->OS<- VOL */ 538 539 if (n && (n = n->next)) 540 man->meta.os = mandoc_strdup(n->string); 541 else if (man->os_s != NULL) 542 man->meta.os = mandoc_strdup(man->os_s); 543 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 544 if (strstr(man->meta.os, "OpenBSD") != NULL) 545 man->meta.os_e = MANDOC_OS_OPENBSD; 546 else if (strstr(man->meta.os, "NetBSD") != NULL) 547 man->meta.os_e = MANDOC_OS_NETBSD; 548 } 549 550 /* TITLE MSEC DATE OS ->VOL<- */ 551 /* If missing, use the default VOL name for MSEC. */ 552 553 if (n && (n = n->next)) 554 man->meta.vol = mandoc_strdup(n->string); 555 else if ('\0' != man->meta.msec[0] && 556 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 557 man->meta.vol = mandoc_strdup(p); 558 559 if (n != NULL && (n = n->next) != NULL) 560 mandoc_msg(MANDOCERR_ARG_EXCESS, 561 n->line, n->pos, "TH ... %s", n->string); 562 563 /* 564 * Remove the `TH' node after we've processed it for our 565 * meta-data. 566 */ 567 roff_node_delete(man, man->last); 568} 569 570static void 571post_UC(CHKARGS) 572{ 573 static const char * const bsd_versions[] = { 574 "3rd Berkeley Distribution", 575 "4th Berkeley Distribution", 576 "4.2 Berkeley Distribution", 577 "4.3 Berkeley Distribution", 578 "4.4 Berkeley Distribution", 579 }; 580 581 const char *p, *s; 582 583 n = n->child; 584 585 if (n == NULL || n->type != ROFFT_TEXT) 586 p = bsd_versions[0]; 587 else { 588 s = n->string; 589 if (0 == strcmp(s, "3")) 590 p = bsd_versions[0]; 591 else if (0 == strcmp(s, "4")) 592 p = bsd_versions[1]; 593 else if (0 == strcmp(s, "5")) 594 p = bsd_versions[2]; 595 else if (0 == strcmp(s, "6")) 596 p = bsd_versions[3]; 597 else if (0 == strcmp(s, "7")) 598 p = bsd_versions[4]; 599 else 600 p = bsd_versions[0]; 601 } 602 603 free(man->meta.os); 604 man->meta.os = mandoc_strdup(p); 605} 606 607static void 608post_AT(CHKARGS) 609{ 610 static const char * const unix_versions[] = { 611 "7th Edition", 612 "System III", 613 "System V", 614 "System V Release 2", 615 }; 616 617 struct roff_node *nn; 618 const char *p, *s; 619 620 n = n->child; 621 622 if (n == NULL || n->type != ROFFT_TEXT) 623 p = unix_versions[0]; 624 else { 625 s = n->string; 626 if (0 == strcmp(s, "3")) 627 p = unix_versions[0]; 628 else if (0 == strcmp(s, "4")) 629 p = unix_versions[1]; 630 else if (0 == strcmp(s, "5")) { 631 nn = n->next; 632 if (nn != NULL && 633 nn->type == ROFFT_TEXT && 634 nn->string[0] != '\0') 635 p = unix_versions[3]; 636 else 637 p = unix_versions[2]; 638 } else 639 p = unix_versions[0]; 640 } 641 642 free(man->meta.os); 643 man->meta.os = mandoc_strdup(p); 644} 645 646static void 647post_in(CHKARGS) 648{ 649 char *s; 650 651 if (n->parent->tok != MAN_TP || 652 n->parent->type != ROFFT_HEAD || 653 n->child == NULL || 654 *n->child->string == '+' || 655 *n->child->string == '-') 656 return; 657 mandoc_asprintf(&s, "+%s", n->child->string); 658 free(n->child->string); 659 n->child->string = s; 660} 661