1/* Id: man_validate.c,v 1.146 2018/12/31 10:04:39 schwarze Exp */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include "config.h" 19 20#include <sys/types.h> 21 22#include <assert.h> 23#include <ctype.h> 24#include <errno.h> 25#include <limits.h> 26#include <stdarg.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <time.h> 31 32#include "mandoc_aux.h" 33#include "mandoc.h" 34#include "roff.h" 35#include "man.h" 36#include "libmandoc.h" 37#include "roff_int.h" 38#include "libman.h" 39 40#define CHKARGS struct roff_man *man, struct roff_node *n 41 42typedef void (*v_check)(CHKARGS); 43 44static void check_abort(CHKARGS) __dead; 45static void check_par(CHKARGS); 46static void check_part(CHKARGS); 47static void check_root(CHKARGS); 48static void check_text(CHKARGS); 49 50static void post_AT(CHKARGS); 51static void post_EE(CHKARGS); 52static void post_EX(CHKARGS); 53static void post_IP(CHKARGS); 54static void post_OP(CHKARGS); 55static void post_SH(CHKARGS); 56static void post_TH(CHKARGS); 57static void post_UC(CHKARGS); 58static void post_UR(CHKARGS); 59static void post_in(CHKARGS); 60 61static const v_check man_valids[MAN_MAX - MAN_TH] = { 62 post_TH, /* TH */ 63 post_SH, /* SH */ 64 post_SH, /* SS */ 65 NULL, /* TP */ 66 NULL, /* TQ */ 67 check_abort,/* LP */ 68 check_par, /* PP */ 69 check_abort,/* P */ 70 post_IP, /* IP */ 71 NULL, /* HP */ 72 NULL, /* SM */ 73 NULL, /* SB */ 74 NULL, /* BI */ 75 NULL, /* IB */ 76 NULL, /* BR */ 77 NULL, /* RB */ 78 NULL, /* R */ 79 NULL, /* B */ 80 NULL, /* I */ 81 NULL, /* IR */ 82 NULL, /* RI */ 83 NULL, /* RE */ 84 check_part, /* RS */ 85 NULL, /* DT */ 86 post_UC, /* UC */ 87 NULL, /* PD */ 88 post_AT, /* AT */ 89 post_in, /* in */ 90 NULL, /* SY */ 91 NULL, /* YS */ 92 post_OP, /* OP */ 93 post_EX, /* EX */ 94 post_EE, /* EE */ 95 post_UR, /* UR */ 96 NULL, /* UE */ 97 post_UR, /* MT */ 98 NULL, /* ME */ 99}; 100 101 102/* Validate the subtree rooted at man->last. */ 103void 104man_validate(struct roff_man *man) 105{ 106 struct roff_node *n; 107 const v_check *cp; 108 109 /* 110 * Translate obsolete macros such that later code 111 * does not need to look for them. 112 */ 113 114 n = man->last; 115 switch (n->tok) { 116 case MAN_LP: 117 case MAN_P: 118 n->tok = MAN_PP; 119 break; 120 default: 121 break; 122 } 123 124 /* 125 * Iterate over all children, recursing into each one 126 * in turn, depth-first. 127 */ 128 129 man->last = man->last->child; 130 while (man->last != NULL) { 131 man_validate(man); 132 if (man->last == n) 133 man->last = man->last->child; 134 else 135 man->last = man->last->next; 136 } 137 138 /* Finally validate the macro itself. */ 139 140 man->last = n; 141 man->next = ROFF_NEXT_SIBLING; 142 switch (n->type) { 143 case ROFFT_TEXT: 144 check_text(man, n); 145 break; 146 case ROFFT_ROOT: 147 check_root(man, n); 148 break; 149 case ROFFT_COMMENT: 150 case ROFFT_EQN: 151 case ROFFT_TBL: 152 break; 153 default: 154 if (n->tok < ROFF_MAX) { 155 roff_validate(man); 156 break; 157 } 158 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 159 cp = man_valids + (n->tok - MAN_TH); 160 if (*cp) 161 (*cp)(man, n); 162 if (man->last == n) 163 n->flags |= NODE_VALID; 164 break; 165 } 166} 167 168static void 169check_root(CHKARGS) 170{ 171 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 172 173 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 174 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 175 else 176 man->meta.hasbody = 1; 177 178 if (NULL == man->meta.title) { 179 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 180 181 /* 182 * If a title hasn't been set, do so now (by 183 * implication, date and section also aren't set). 184 */ 185 186 man->meta.title = mandoc_strdup(""); 187 man->meta.msec = mandoc_strdup(""); 188 man->meta.date = man->quick ? mandoc_strdup("") : 189 mandoc_normdate(man, NULL, n->line, n->pos); 190 } 191 192 if (man->meta.os_e && 193 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 194 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 195 man->meta.os_e == MANDOC_OS_OPENBSD ? 196 "(OpenBSD)" : "(NetBSD)"); 197} 198 199static void 200check_abort(CHKARGS) 201{ 202 abort(); 203} 204 205static void 206check_text(CHKARGS) 207{ 208 char *cp, *p; 209 210 if (n->flags & NODE_NOFILL) 211 return; 212 213 cp = n->string; 214 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 215 mandoc_msg(MANDOCERR_FI_TAB, 216 n->line, n->pos + (int)(p - cp), NULL); 217} 218 219static void 220post_EE(CHKARGS) 221{ 222 if ((n->flags & NODE_NOFILL) == 0) 223 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 224} 225 226static void 227post_EX(CHKARGS) 228{ 229 if (n->flags & NODE_NOFILL) 230 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 231} 232 233static void 234post_OP(CHKARGS) 235{ 236 237 if (n->child == NULL) 238 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 239 else if (n->child->next != NULL && n->child->next->next != NULL) { 240 n = n->child->next->next; 241 mandoc_msg(MANDOCERR_ARG_EXCESS, 242 n->line, n->pos, "OP ... %s", n->string); 243 } 244} 245 246static void 247post_SH(CHKARGS) 248{ 249 struct roff_node *nc; 250 251 if (n->type != ROFFT_BODY || (nc = n->child) == NULL) 252 return; 253 254 if (nc->tok == MAN_PP && nc->body->child != NULL) { 255 while (nc->body->last != NULL) { 256 man->next = ROFF_NEXT_CHILD; 257 roff_node_relink(man, nc->body->last); 258 man->last = n; 259 } 260 } 261 262 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 263 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 264 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 265 roff_node_delete(man, nc); 266 } 267 268 /* 269 * Trailing PP is empty, so it is deleted by check_par(). 270 * Trailing sp is significant. 271 */ 272 273 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 274 mandoc_msg(MANDOCERR_PAR_SKIP, 275 nc->line, nc->pos, "%s at the end of %s", 276 roff_name[nc->tok], roff_name[n->tok]); 277 roff_node_delete(man, nc); 278 } 279} 280 281static void 282post_UR(CHKARGS) 283{ 284 if (n->type == ROFFT_HEAD && n->child == NULL) 285 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 286 "%s", roff_name[n->tok]); 287 check_part(man, n); 288} 289 290static void 291check_part(CHKARGS) 292{ 293 294 if (n->type == ROFFT_BODY && n->child == NULL) 295 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 296 "%s", roff_name[n->tok]); 297} 298 299static void 300check_par(CHKARGS) 301{ 302 303 switch (n->type) { 304 case ROFFT_BLOCK: 305 if (n->body->child == NULL) 306 roff_node_delete(man, n); 307 break; 308 case ROFFT_BODY: 309 if (n->child != NULL && 310 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 311 mandoc_msg(MANDOCERR_PAR_SKIP, 312 n->child->line, n->child->pos, 313 "%s after %s", roff_name[n->child->tok], 314 roff_name[n->tok]); 315 roff_node_delete(man, n->child); 316 } 317 if (n->child == NULL) 318 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 319 "%s empty", roff_name[n->tok]); 320 break; 321 case ROFFT_HEAD: 322 if (n->child != NULL) 323 mandoc_msg(MANDOCERR_ARG_SKIP, 324 n->line, n->pos, "%s %s%s", 325 roff_name[n->tok], n->child->string, 326 n->child->next != NULL ? " ..." : ""); 327 break; 328 default: 329 break; 330 } 331} 332 333static void 334post_IP(CHKARGS) 335{ 336 337 switch (n->type) { 338 case ROFFT_BLOCK: 339 if (n->head->child == NULL && n->body->child == NULL) 340 roff_node_delete(man, n); 341 break; 342 case ROFFT_BODY: 343 if (n->parent->head->child == NULL && n->child == NULL) 344 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 345 "%s empty", roff_name[n->tok]); 346 break; 347 default: 348 break; 349 } 350} 351 352static void 353post_TH(CHKARGS) 354{ 355 struct roff_node *nb; 356 const char *p; 357 358 free(man->meta.title); 359 free(man->meta.vol); 360 free(man->meta.os); 361 free(man->meta.msec); 362 free(man->meta.date); 363 364 man->meta.title = man->meta.vol = man->meta.date = 365 man->meta.msec = man->meta.os = NULL; 366 367 nb = n; 368 369 /* ->TITLE<- MSEC DATE OS VOL */ 370 371 n = n->child; 372 if (n && n->string) { 373 for (p = n->string; '\0' != *p; p++) { 374 /* Only warn about this once... */ 375 if (isalpha((unsigned char)*p) && 376 ! isupper((unsigned char)*p)) { 377 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 378 n->pos + (int)(p - n->string), 379 "TH %s", n->string); 380 break; 381 } 382 } 383 man->meta.title = mandoc_strdup(n->string); 384 } else { 385 man->meta.title = mandoc_strdup(""); 386 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 387 } 388 389 /* TITLE ->MSEC<- DATE OS VOL */ 390 391 if (n) 392 n = n->next; 393 if (n && n->string) 394 man->meta.msec = mandoc_strdup(n->string); 395 else { 396 man->meta.msec = mandoc_strdup(""); 397 mandoc_msg(MANDOCERR_MSEC_MISSING, 398 nb->line, nb->pos, "TH %s", man->meta.title); 399 } 400 401 /* TITLE MSEC ->DATE<- OS VOL */ 402 403 if (n) 404 n = n->next; 405 if (n && n->string && '\0' != n->string[0]) { 406 man->meta.date = man->quick ? 407 mandoc_strdup(n->string) : 408 mandoc_normdate(man, n->string, n->line, n->pos); 409 } else { 410 man->meta.date = mandoc_strdup(""); 411 mandoc_msg(MANDOCERR_DATE_MISSING, 412 n ? n->line : nb->line, 413 n ? n->pos : nb->pos, "TH"); 414 } 415 416 /* TITLE MSEC DATE ->OS<- VOL */ 417 418 if (n && (n = n->next)) 419 man->meta.os = mandoc_strdup(n->string); 420 else if (man->os_s != NULL) 421 man->meta.os = mandoc_strdup(man->os_s); 422 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 423 if (strstr(man->meta.os, "OpenBSD") != NULL) 424 man->meta.os_e = MANDOC_OS_OPENBSD; 425 else if (strstr(man->meta.os, "NetBSD") != NULL) 426 man->meta.os_e = MANDOC_OS_NETBSD; 427 } 428 429 /* TITLE MSEC DATE OS ->VOL<- */ 430 /* If missing, use the default VOL name for MSEC. */ 431 432 if (n && (n = n->next)) 433 man->meta.vol = mandoc_strdup(n->string); 434 else if ('\0' != man->meta.msec[0] && 435 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 436 man->meta.vol = mandoc_strdup(p); 437 438 if (n != NULL && (n = n->next) != NULL) 439 mandoc_msg(MANDOCERR_ARG_EXCESS, 440 n->line, n->pos, "TH ... %s", n->string); 441 442 /* 443 * Remove the `TH' node after we've processed it for our 444 * meta-data. 445 */ 446 roff_node_delete(man, man->last); 447} 448 449static void 450post_UC(CHKARGS) 451{ 452 static const char * const bsd_versions[] = { 453 "3rd Berkeley Distribution", 454 "4th Berkeley Distribution", 455 "4.2 Berkeley Distribution", 456 "4.3 Berkeley Distribution", 457 "4.4 Berkeley Distribution", 458 }; 459 460 const char *p, *s; 461 462 n = n->child; 463 464 if (n == NULL || n->type != ROFFT_TEXT) 465 p = bsd_versions[0]; 466 else { 467 s = n->string; 468 if (0 == strcmp(s, "3")) 469 p = bsd_versions[0]; 470 else if (0 == strcmp(s, "4")) 471 p = bsd_versions[1]; 472 else if (0 == strcmp(s, "5")) 473 p = bsd_versions[2]; 474 else if (0 == strcmp(s, "6")) 475 p = bsd_versions[3]; 476 else if (0 == strcmp(s, "7")) 477 p = bsd_versions[4]; 478 else 479 p = bsd_versions[0]; 480 } 481 482 free(man->meta.os); 483 man->meta.os = mandoc_strdup(p); 484} 485 486static void 487post_AT(CHKARGS) 488{ 489 static const char * const unix_versions[] = { 490 "7th Edition", 491 "System III", 492 "System V", 493 "System V Release 2", 494 }; 495 496 struct roff_node *nn; 497 const char *p, *s; 498 499 n = n->child; 500 501 if (n == NULL || n->type != ROFFT_TEXT) 502 p = unix_versions[0]; 503 else { 504 s = n->string; 505 if (0 == strcmp(s, "3")) 506 p = unix_versions[0]; 507 else if (0 == strcmp(s, "4")) 508 p = unix_versions[1]; 509 else if (0 == strcmp(s, "5")) { 510 nn = n->next; 511 if (nn != NULL && 512 nn->type == ROFFT_TEXT && 513 nn->string[0] != '\0') 514 p = unix_versions[3]; 515 else 516 p = unix_versions[2]; 517 } else 518 p = unix_versions[0]; 519 } 520 521 free(man->meta.os); 522 man->meta.os = mandoc_strdup(p); 523} 524 525static void 526post_in(CHKARGS) 527{ 528 char *s; 529 530 if (n->parent->tok != MAN_TP || 531 n->parent->type != ROFFT_HEAD || 532 n->child == NULL || 533 *n->child->string == '+' || 534 *n->child->string == '-') 535 return; 536 mandoc_asprintf(&s, "+%s", n->child->string); 537 free(n->child->string); 538 n->child->string = s; 539} 540