mandocdb.c revision 307795
1/* $Id: mandocdb.c,v 1.218 2016/07/12 05:18:38 kristaps Exp $ */ 2/* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2016 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include "config.h" 19 20#include <sys/types.h> 21#include <sys/stat.h> 22#include <sys/wait.h> 23 24#include <assert.h> 25#include <ctype.h> 26#if HAVE_ERR 27#include <err.h> 28#endif 29#include <errno.h> 30#include <fcntl.h> 31#if HAVE_FTS 32#include <fts.h> 33#else 34#include "compat_fts.h" 35#endif 36#include <limits.h> 37#if HAVE_SANDBOX_INIT 38#include <sandbox.h> 39#endif 40#include <stddef.h> 41#include <stdio.h> 42#include <stdint.h> 43#include <stdlib.h> 44#include <string.h> 45#include <unistd.h> 46 47#include <sqlite3.h> 48 49#include "mandoc_aux.h" 50#include "mandoc_ohash.h" 51#include "mandoc.h" 52#include "roff.h" 53#include "mdoc.h" 54#include "man.h" 55#include "manconf.h" 56#include "mansearch.h" 57 58extern int mansearch_keymax; 59extern const char *const mansearch_keynames[]; 60 61#define SQL_EXEC(_v) \ 62 if (SQLITE_OK != sqlite3_exec(db, (_v), NULL, NULL, NULL)) \ 63 say("", "%s: %s", (_v), sqlite3_errmsg(db)) 64#define SQL_BIND_TEXT(_s, _i, _v) \ 65 if (SQLITE_OK != sqlite3_bind_text \ 66 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 67 say(mlink->file, "%s", sqlite3_errmsg(db)) 68#define SQL_BIND_INT(_s, _i, _v) \ 69 if (SQLITE_OK != sqlite3_bind_int \ 70 ((_s), (_i)++, (_v))) \ 71 say(mlink->file, "%s", sqlite3_errmsg(db)) 72#define SQL_BIND_INT64(_s, _i, _v) \ 73 if (SQLITE_OK != sqlite3_bind_int64 \ 74 ((_s), (_i)++, (_v))) \ 75 say(mlink->file, "%s", sqlite3_errmsg(db)) 76#define SQL_STEP(_s) \ 77 if (SQLITE_DONE != sqlite3_step((_s))) \ 78 say(mlink->file, "%s", sqlite3_errmsg(db)) 79 80enum op { 81 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 82 OP_CONFFILE, /* new databases from custom config file */ 83 OP_UPDATE, /* delete/add entries in existing database */ 84 OP_DELETE, /* delete entries from existing database */ 85 OP_TEST /* change no databases, report potential problems */ 86}; 87 88struct str { 89 const struct mpage *mpage; /* if set, the owning parse */ 90 uint64_t mask; /* bitmask in sequence */ 91 char key[]; /* rendered text */ 92}; 93 94struct inodev { 95 ino_t st_ino; 96 dev_t st_dev; 97}; 98 99struct mpage { 100 struct inodev inodev; /* used for hashing routine */ 101 int64_t pageid; /* pageid in mpages SQL table */ 102 char *sec; /* section from file content */ 103 char *arch; /* architecture from file content */ 104 char *title; /* title from file content */ 105 char *desc; /* description from file content */ 106 struct mlink *mlinks; /* singly linked list */ 107 int form; /* format from file content */ 108 int name_head_done; 109}; 110 111struct mlink { 112 char file[PATH_MAX]; /* filename rel. to manpath */ 113 char *dsec; /* section from directory */ 114 char *arch; /* architecture from directory */ 115 char *name; /* name from file name (not empty) */ 116 char *fsec; /* section from file name suffix */ 117 struct mlink *next; /* singly linked list */ 118 struct mpage *mpage; /* parent */ 119 int dform; /* format from directory */ 120 int fform; /* format from file name suffix */ 121 int gzip; /* filename has a .gz suffix */ 122}; 123 124enum stmt { 125 STMT_DELETE_PAGE = 0, /* delete mpage */ 126 STMT_INSERT_PAGE, /* insert mpage */ 127 STMT_INSERT_LINK, /* insert mlink */ 128 STMT_INSERT_NAME, /* insert name */ 129 STMT_SELECT_NAME, /* retrieve existing name flags */ 130 STMT_INSERT_KEY, /* insert parsed key */ 131 STMT__MAX 132}; 133 134typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, 135 const struct roff_node *); 136 137struct mdoc_handler { 138 mdoc_fp fp; /* optional handler */ 139 uint64_t mask; /* set unless handler returns 0 */ 140}; 141 142static void dbclose(int); 143static void dbadd(struct mpage *); 144static void dbadd_mlink(const struct mlink *mlink); 145static void dbadd_mlink_name(const struct mlink *mlink); 146static int dbopen(int); 147static void dbprune(void); 148static void filescan(const char *); 149static void mlink_add(struct mlink *, const struct stat *); 150static void mlink_check(struct mpage *, struct mlink *); 151static void mlink_free(struct mlink *); 152static void mlinks_undupe(struct mpage *); 153static void mpages_free(void); 154static void mpages_merge(struct mparse *); 155static void names_check(void); 156static void parse_cat(struct mpage *, int); 157static void parse_man(struct mpage *, const struct roff_meta *, 158 const struct roff_node *); 159static void parse_mdoc(struct mpage *, const struct roff_meta *, 160 const struct roff_node *); 161static int parse_mdoc_head(struct mpage *, const struct roff_meta *, 162 const struct roff_node *); 163static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, 164 const struct roff_node *); 165static void parse_mdoc_fname(struct mpage *, const struct roff_node *); 166static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, 167 const struct roff_node *); 168static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, 169 const struct roff_node *); 170static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, 171 const struct roff_node *); 172static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, 173 const struct roff_node *); 174static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, 175 const struct roff_node *); 176static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, 177 const struct roff_node *); 178static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, 179 const struct roff_node *); 180static void putkey(const struct mpage *, char *, uint64_t); 181static void putkeys(const struct mpage *, char *, size_t, uint64_t); 182static void putmdockey(const struct mpage *, 183 const struct roff_node *, uint64_t); 184static int render_string(char **, size_t *); 185static void say(const char *, const char *, ...); 186static int set_basedir(const char *, int); 187static int treescan(void); 188static size_t utf8(unsigned int, char [7]); 189 190static char tempfilename[32]; 191static int nodb; /* no database changes */ 192static int mparse_options; /* abort the parse early */ 193static int use_all; /* use all found files */ 194static int debug; /* print what we're doing */ 195static int warnings; /* warn about crap */ 196static int write_utf8; /* write UTF-8 output; else ASCII */ 197static int exitcode; /* to be returned by main */ 198static enum op op; /* operational mode */ 199static char basedir[PATH_MAX]; /* current base directory */ 200static struct ohash mpages; /* table of distinct manual pages */ 201static struct ohash mlinks; /* table of directory entries */ 202static struct ohash names; /* table of all names */ 203static struct ohash strings; /* table of all strings */ 204static sqlite3 *db = NULL; /* current database */ 205static sqlite3_stmt *stmts[STMT__MAX]; /* current statements */ 206static uint64_t name_mask; 207 208static const struct mdoc_handler mdocs[MDOC_MAX] = { 209 { NULL, 0 }, /* Ap */ 210 { NULL, 0 }, /* Dd */ 211 { NULL, 0 }, /* Dt */ 212 { NULL, 0 }, /* Os */ 213 { parse_mdoc_Sh, TYPE_Sh }, /* Sh */ 214 { parse_mdoc_head, TYPE_Ss }, /* Ss */ 215 { NULL, 0 }, /* Pp */ 216 { NULL, 0 }, /* D1 */ 217 { NULL, 0 }, /* Dl */ 218 { NULL, 0 }, /* Bd */ 219 { NULL, 0 }, /* Ed */ 220 { NULL, 0 }, /* Bl */ 221 { NULL, 0 }, /* El */ 222 { NULL, 0 }, /* It */ 223 { NULL, 0 }, /* Ad */ 224 { NULL, TYPE_An }, /* An */ 225 { NULL, TYPE_Ar }, /* Ar */ 226 { NULL, TYPE_Cd }, /* Cd */ 227 { NULL, TYPE_Cm }, /* Cm */ 228 { NULL, TYPE_Dv }, /* Dv */ 229 { NULL, TYPE_Er }, /* Er */ 230 { NULL, TYPE_Ev }, /* Ev */ 231 { NULL, 0 }, /* Ex */ 232 { NULL, TYPE_Fa }, /* Fa */ 233 { parse_mdoc_Fd, 0 }, /* Fd */ 234 { NULL, TYPE_Fl }, /* Fl */ 235 { parse_mdoc_Fn, 0 }, /* Fn */ 236 { NULL, TYPE_Ft }, /* Ft */ 237 { NULL, TYPE_Ic }, /* Ic */ 238 { NULL, TYPE_In }, /* In */ 239 { NULL, TYPE_Li }, /* Li */ 240 { parse_mdoc_Nd, 0 }, /* Nd */ 241 { parse_mdoc_Nm, 0 }, /* Nm */ 242 { NULL, 0 }, /* Op */ 243 { NULL, 0 }, /* Ot */ 244 { NULL, TYPE_Pa }, /* Pa */ 245 { NULL, 0 }, /* Rv */ 246 { NULL, TYPE_St }, /* St */ 247 { parse_mdoc_Va, TYPE_Va }, /* Va */ 248 { parse_mdoc_Va, TYPE_Vt }, /* Vt */ 249 { parse_mdoc_Xr, 0 }, /* Xr */ 250 { NULL, 0 }, /* %A */ 251 { NULL, 0 }, /* %B */ 252 { NULL, 0 }, /* %D */ 253 { NULL, 0 }, /* %I */ 254 { NULL, 0 }, /* %J */ 255 { NULL, 0 }, /* %N */ 256 { NULL, 0 }, /* %O */ 257 { NULL, 0 }, /* %P */ 258 { NULL, 0 }, /* %R */ 259 { NULL, 0 }, /* %T */ 260 { NULL, 0 }, /* %V */ 261 { NULL, 0 }, /* Ac */ 262 { NULL, 0 }, /* Ao */ 263 { NULL, 0 }, /* Aq */ 264 { NULL, TYPE_At }, /* At */ 265 { NULL, 0 }, /* Bc */ 266 { NULL, 0 }, /* Bf */ 267 { NULL, 0 }, /* Bo */ 268 { NULL, 0 }, /* Bq */ 269 { NULL, TYPE_Bsx }, /* Bsx */ 270 { NULL, TYPE_Bx }, /* Bx */ 271 { NULL, 0 }, /* Db */ 272 { NULL, 0 }, /* Dc */ 273 { NULL, 0 }, /* Do */ 274 { NULL, 0 }, /* Dq */ 275 { NULL, 0 }, /* Ec */ 276 { NULL, 0 }, /* Ef */ 277 { NULL, TYPE_Em }, /* Em */ 278 { NULL, 0 }, /* Eo */ 279 { NULL, TYPE_Fx }, /* Fx */ 280 { NULL, TYPE_Ms }, /* Ms */ 281 { NULL, 0 }, /* No */ 282 { NULL, 0 }, /* Ns */ 283 { NULL, TYPE_Nx }, /* Nx */ 284 { NULL, TYPE_Ox }, /* Ox */ 285 { NULL, 0 }, /* Pc */ 286 { NULL, 0 }, /* Pf */ 287 { NULL, 0 }, /* Po */ 288 { NULL, 0 }, /* Pq */ 289 { NULL, 0 }, /* Qc */ 290 { NULL, 0 }, /* Ql */ 291 { NULL, 0 }, /* Qo */ 292 { NULL, 0 }, /* Qq */ 293 { NULL, 0 }, /* Re */ 294 { NULL, 0 }, /* Rs */ 295 { NULL, 0 }, /* Sc */ 296 { NULL, 0 }, /* So */ 297 { NULL, 0 }, /* Sq */ 298 { NULL, 0 }, /* Sm */ 299 { NULL, 0 }, /* Sx */ 300 { NULL, TYPE_Sy }, /* Sy */ 301 { NULL, TYPE_Tn }, /* Tn */ 302 { NULL, 0 }, /* Ux */ 303 { NULL, 0 }, /* Xc */ 304 { NULL, 0 }, /* Xo */ 305 { parse_mdoc_Fo, 0 }, /* Fo */ 306 { NULL, 0 }, /* Fc */ 307 { NULL, 0 }, /* Oo */ 308 { NULL, 0 }, /* Oc */ 309 { NULL, 0 }, /* Bk */ 310 { NULL, 0 }, /* Ek */ 311 { NULL, 0 }, /* Bt */ 312 { NULL, 0 }, /* Hf */ 313 { NULL, 0 }, /* Fr */ 314 { NULL, 0 }, /* Ud */ 315 { NULL, TYPE_Lb }, /* Lb */ 316 { NULL, 0 }, /* Lp */ 317 { NULL, TYPE_Lk }, /* Lk */ 318 { NULL, TYPE_Mt }, /* Mt */ 319 { NULL, 0 }, /* Brq */ 320 { NULL, 0 }, /* Bro */ 321 { NULL, 0 }, /* Brc */ 322 { NULL, 0 }, /* %C */ 323 { NULL, 0 }, /* Es */ 324 { NULL, 0 }, /* En */ 325 { NULL, TYPE_Dx }, /* Dx */ 326 { NULL, 0 }, /* %Q */ 327 { NULL, 0 }, /* br */ 328 { NULL, 0 }, /* sp */ 329 { NULL, 0 }, /* %U */ 330 { NULL, 0 }, /* Ta */ 331 { NULL, 0 }, /* ll */ 332}; 333 334 335int 336mandocdb(int argc, char *argv[]) 337{ 338 struct manconf conf; 339 struct mparse *mp; 340 const char *path_arg, *progname; 341 size_t j, sz; 342 int ch, i; 343 344#if HAVE_PLEDGE 345 if (pledge("stdio rpath wpath cpath fattr flock proc exec", NULL) == -1) { 346 warn("pledge"); 347 return (int)MANDOCLEVEL_SYSERR; 348 } 349#endif 350 351#if HAVE_SANDBOX_INIT 352 if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) { 353 warnx("sandbox_init"); 354 return (int)MANDOCLEVEL_SYSERR; 355 } 356#endif 357 358 memset(&conf, 0, sizeof(conf)); 359 memset(stmts, 0, STMT__MAX * sizeof(sqlite3_stmt *)); 360 361 /* 362 * We accept a few different invocations. 363 * The CHECKOP macro makes sure that invocation styles don't 364 * clobber each other. 365 */ 366#define CHECKOP(_op, _ch) do \ 367 if (OP_DEFAULT != (_op)) { \ 368 warnx("-%c: Conflicting option", (_ch)); \ 369 goto usage; \ 370 } while (/*CONSTCOND*/0) 371 372 path_arg = NULL; 373 op = OP_DEFAULT; 374 375 while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v"))) 376 switch (ch) { 377 case 'a': 378 use_all = 1; 379 break; 380 case 'C': 381 CHECKOP(op, ch); 382 path_arg = optarg; 383 op = OP_CONFFILE; 384 break; 385 case 'D': 386 debug++; 387 break; 388 case 'd': 389 CHECKOP(op, ch); 390 path_arg = optarg; 391 op = OP_UPDATE; 392 break; 393 case 'n': 394 nodb = 1; 395 break; 396 case 'p': 397 warnings = 1; 398 break; 399 case 'Q': 400 mparse_options |= MPARSE_QUICK; 401 break; 402 case 'T': 403 if (strcmp(optarg, "utf8")) { 404 warnx("-T%s: Unsupported output format", 405 optarg); 406 goto usage; 407 } 408 write_utf8 = 1; 409 break; 410 case 't': 411 CHECKOP(op, ch); 412 dup2(STDOUT_FILENO, STDERR_FILENO); 413 op = OP_TEST; 414 nodb = warnings = 1; 415 break; 416 case 'u': 417 CHECKOP(op, ch); 418 path_arg = optarg; 419 op = OP_DELETE; 420 break; 421 case 'v': 422 /* Compatibility with espie@'s makewhatis. */ 423 break; 424 default: 425 goto usage; 426 } 427 428 argc -= optind; 429 argv += optind; 430 431#if HAVE_PLEDGE 432 if (nodb) { 433 if (pledge("stdio rpath", NULL) == -1) { 434 warn("pledge"); 435 return (int)MANDOCLEVEL_SYSERR; 436 } 437 } 438#endif 439 440 if (OP_CONFFILE == op && argc > 0) { 441 warnx("-C: Too many arguments"); 442 goto usage; 443 } 444 445 exitcode = (int)MANDOCLEVEL_OK; 446 mchars_alloc(); 447 mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL); 448 mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); 449 mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); 450 451 if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) { 452 453 /* 454 * Most of these deal with a specific directory. 455 * Jump into that directory first. 456 */ 457 if (OP_TEST != op && 0 == set_basedir(path_arg, 1)) 458 goto out; 459 460 if (dbopen(1)) { 461 /* 462 * The existing database is usable. Process 463 * all files specified on the command-line. 464 */ 465#if HAVE_PLEDGE 466 if (!nodb) { 467 if (pledge("stdio rpath wpath cpath fattr flock", NULL) == -1) { 468 warn("pledge"); 469 exitcode = (int)MANDOCLEVEL_SYSERR; 470 goto out; 471 } 472 } 473#endif 474 use_all = 1; 475 for (i = 0; i < argc; i++) 476 filescan(argv[i]); 477 if (OP_TEST != op) 478 dbprune(); 479 } else { 480 /* 481 * Database missing or corrupt. 482 * Recreate from scratch. 483 */ 484 exitcode = (int)MANDOCLEVEL_OK; 485 op = OP_DEFAULT; 486 if (0 == treescan()) 487 goto out; 488 if (0 == dbopen(0)) 489 goto out; 490 } 491 if (OP_DELETE != op) 492 mpages_merge(mp); 493 dbclose(OP_DEFAULT == op ? 0 : 1); 494 } else { 495 /* 496 * If we have arguments, use them as our manpaths. 497 * If we don't, grok from manpath(1) or however else 498 * manconf_parse() wants to do it. 499 */ 500 if (argc > 0) { 501 conf.manpath.paths = mandoc_reallocarray(NULL, 502 argc, sizeof(char *)); 503 conf.manpath.sz = (size_t)argc; 504 for (i = 0; i < argc; i++) 505 conf.manpath.paths[i] = mandoc_strdup(argv[i]); 506 } else 507 manconf_parse(&conf, path_arg, NULL, NULL); 508 509 if (conf.manpath.sz == 0) { 510 exitcode = (int)MANDOCLEVEL_BADARG; 511 say("", "Empty manpath"); 512 } 513 514 /* 515 * First scan the tree rooted at a base directory, then 516 * build a new database and finally move it into place. 517 * Ignore zero-length directories and strip trailing 518 * slashes. 519 */ 520 for (j = 0; j < conf.manpath.sz; j++) { 521 sz = strlen(conf.manpath.paths[j]); 522 if (sz && conf.manpath.paths[j][sz - 1] == '/') 523 conf.manpath.paths[j][--sz] = '\0'; 524 if (0 == sz) 525 continue; 526 527 if (j) { 528 mandoc_ohash_init(&mpages, 6, 529 offsetof(struct mpage, inodev)); 530 mandoc_ohash_init(&mlinks, 6, 531 offsetof(struct mlink, file)); 532 } 533 534 if ( ! set_basedir(conf.manpath.paths[j], argc > 0)) 535 continue; 536 if (0 == treescan()) 537 continue; 538 if (0 == dbopen(0)) 539 continue; 540 541 mpages_merge(mp); 542 if (warnings && !nodb && 543 ! (MPARSE_QUICK & mparse_options)) 544 names_check(); 545 dbclose(0); 546 547 if (j + 1 < conf.manpath.sz) { 548 mpages_free(); 549 ohash_delete(&mpages); 550 ohash_delete(&mlinks); 551 } 552 } 553 } 554out: 555 manconf_free(&conf); 556 mparse_free(mp); 557 mchars_free(); 558 mpages_free(); 559 ohash_delete(&mpages); 560 ohash_delete(&mlinks); 561 return exitcode; 562usage: 563 progname = getprogname(); 564 fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" 565 " %s [-aDnpQ] [-Tutf8] dir ...\n" 566 " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" 567 " %s [-Dnp] -u dir [file ...]\n" 568 " %s [-Q] -t file ...\n", 569 progname, progname, progname, progname, progname); 570 571 return (int)MANDOCLEVEL_BADARG; 572} 573 574/* 575 * Scan a directory tree rooted at "basedir" for manpages. 576 * We use fts(), scanning directory parts along the way for clues to our 577 * section and architecture. 578 * 579 * If use_all has been specified, grok all files. 580 * If not, sanitise paths to the following: 581 * 582 * [./]man*[/<arch>]/<name>.<section> 583 * or 584 * [./]cat<section>[/<arch>]/<name>.0 585 * 586 * TODO: accommodate for multi-language directories. 587 */ 588static int 589treescan(void) 590{ 591 char buf[PATH_MAX]; 592 FTS *f; 593 FTSENT *ff; 594 struct mlink *mlink; 595 int dform, gzip; 596 char *dsec, *arch, *fsec, *cp; 597 const char *path; 598 const char *argv[2]; 599 600 argv[0] = "."; 601 argv[1] = (char *)NULL; 602 603 f = fts_open((char * const *)argv, 604 FTS_PHYSICAL | FTS_NOCHDIR, NULL); 605 if (f == NULL) { 606 exitcode = (int)MANDOCLEVEL_SYSERR; 607 say("", "&fts_open"); 608 return 0; 609 } 610 611 dsec = arch = NULL; 612 dform = FORM_NONE; 613 614 while ((ff = fts_read(f)) != NULL) { 615 path = ff->fts_path + 2; 616 switch (ff->fts_info) { 617 618 /* 619 * Symbolic links require various sanity checks, 620 * then get handled just like regular files. 621 */ 622 case FTS_SL: 623 if (realpath(path, buf) == NULL) { 624 if (warnings) 625 say(path, "&realpath"); 626 continue; 627 } 628 if (strstr(buf, basedir) != buf 629#ifdef HOMEBREWDIR 630 && strstr(buf, HOMEBREWDIR) != buf 631#endif 632 ) { 633 if (warnings) say("", 634 "%s: outside base directory", buf); 635 continue; 636 } 637 /* Use logical inode to avoid mpages dupe. */ 638 if (stat(path, ff->fts_statp) == -1) { 639 if (warnings) 640 say(path, "&stat"); 641 continue; 642 } 643 /* FALLTHROUGH */ 644 645 /* 646 * If we're a regular file, add an mlink by using the 647 * stored directory data and handling the filename. 648 */ 649 case FTS_F: 650 if ( ! strcmp(path, MANDOC_DB)) 651 continue; 652 if ( ! use_all && ff->fts_level < 2) { 653 if (warnings) 654 say(path, "Extraneous file"); 655 continue; 656 } 657 gzip = 0; 658 fsec = NULL; 659 while (fsec == NULL) { 660 fsec = strrchr(ff->fts_name, '.'); 661 if (fsec == NULL || strcmp(fsec+1, "gz")) 662 break; 663 gzip = 1; 664 *fsec = '\0'; 665 fsec = NULL; 666 } 667 if (fsec == NULL) { 668 if ( ! use_all) { 669 if (warnings) 670 say(path, 671 "No filename suffix"); 672 continue; 673 } 674 } else if ( ! strcmp(++fsec, "html")) { 675 if (warnings) 676 say(path, "Skip html"); 677 continue; 678 } else if ( ! strcmp(fsec, "ps")) { 679 if (warnings) 680 say(path, "Skip ps"); 681 continue; 682 } else if ( ! strcmp(fsec, "pdf")) { 683 if (warnings) 684 say(path, "Skip pdf"); 685 continue; 686 } else if ( ! use_all && 687 ((dform == FORM_SRC && 688 strncmp(fsec, dsec, strlen(dsec))) || 689 (dform == FORM_CAT && strcmp(fsec, "0")))) { 690 if (warnings) 691 say(path, "Wrong filename suffix"); 692 continue; 693 } else 694 fsec[-1] = '\0'; 695 696 mlink = mandoc_calloc(1, sizeof(struct mlink)); 697 if (strlcpy(mlink->file, path, 698 sizeof(mlink->file)) >= 699 sizeof(mlink->file)) { 700 say(path, "Filename too long"); 701 free(mlink); 702 continue; 703 } 704 mlink->dform = dform; 705 mlink->dsec = dsec; 706 mlink->arch = arch; 707 mlink->name = ff->fts_name; 708 mlink->fsec = fsec; 709 mlink->gzip = gzip; 710 mlink_add(mlink, ff->fts_statp); 711 continue; 712 713 case FTS_D: 714 case FTS_DP: 715 break; 716 717 default: 718 if (warnings) 719 say(path, "Not a regular file"); 720 continue; 721 } 722 723 switch (ff->fts_level) { 724 case 0: 725 /* Ignore the root directory. */ 726 break; 727 case 1: 728 /* 729 * This might contain manX/ or catX/. 730 * Try to infer this from the name. 731 * If we're not in use_all, enforce it. 732 */ 733 cp = ff->fts_name; 734 if (ff->fts_info == FTS_DP) { 735 dform = FORM_NONE; 736 dsec = NULL; 737 break; 738 } 739 740 if ( ! strncmp(cp, "man", 3)) { 741 dform = FORM_SRC; 742 dsec = cp + 3; 743 } else if ( ! strncmp(cp, "cat", 3)) { 744 dform = FORM_CAT; 745 dsec = cp + 3; 746 } else { 747 dform = FORM_NONE; 748 dsec = NULL; 749 } 750 751 if (dsec != NULL || use_all) 752 break; 753 754 if (warnings) 755 say(path, "Unknown directory part"); 756 fts_set(f, ff, FTS_SKIP); 757 break; 758 case 2: 759 /* 760 * Possibly our architecture. 761 * If we're descending, keep tabs on it. 762 */ 763 if (ff->fts_info != FTS_DP && dsec != NULL) 764 arch = ff->fts_name; 765 else 766 arch = NULL; 767 break; 768 default: 769 if (ff->fts_info == FTS_DP || use_all) 770 break; 771 if (warnings) 772 say(path, "Extraneous directory part"); 773 fts_set(f, ff, FTS_SKIP); 774 break; 775 } 776 } 777 778 fts_close(f); 779 return 1; 780} 781 782/* 783 * Add a file to the mlinks table. 784 * Do not verify that it's a "valid" looking manpage (we'll do that 785 * later). 786 * 787 * Try to infer the manual section, architecture, and page name from the 788 * path, assuming it looks like 789 * 790 * [./]man*[/<arch>]/<name>.<section> 791 * or 792 * [./]cat<section>[/<arch>]/<name>.0 793 * 794 * See treescan() for the fts(3) version of this. 795 */ 796static void 797filescan(const char *file) 798{ 799 char buf[PATH_MAX]; 800 struct stat st; 801 struct mlink *mlink; 802 char *p, *start; 803 804 assert(use_all); 805 806 if (0 == strncmp(file, "./", 2)) 807 file += 2; 808 809 /* 810 * We have to do lstat(2) before realpath(3) loses 811 * the information whether this is a symbolic link. 812 * We need to know that because for symbolic links, 813 * we want to use the orginal file name, while for 814 * regular files, we want to use the real path. 815 */ 816 if (-1 == lstat(file, &st)) { 817 exitcode = (int)MANDOCLEVEL_BADARG; 818 say(file, "&lstat"); 819 return; 820 } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) { 821 exitcode = (int)MANDOCLEVEL_BADARG; 822 say(file, "Not a regular file"); 823 return; 824 } 825 826 /* 827 * We have to resolve the file name to the real path 828 * in any case for the base directory check. 829 */ 830 if (NULL == realpath(file, buf)) { 831 exitcode = (int)MANDOCLEVEL_BADARG; 832 say(file, "&realpath"); 833 return; 834 } 835 836 if (OP_TEST == op) 837 start = buf; 838 else if (strstr(buf, basedir) == buf) 839 start = buf + strlen(basedir); 840#ifdef HOMEBREWDIR 841 else if (strstr(buf, HOMEBREWDIR) == buf) 842 start = buf; 843#endif 844 else { 845 exitcode = (int)MANDOCLEVEL_BADARG; 846 say("", "%s: outside base directory", buf); 847 return; 848 } 849 850 /* 851 * Now we are sure the file is inside our tree. 852 * If it is a symbolic link, ignore the real path 853 * and use the original name. 854 * This implies passing stuff like "cat1/../man1/foo.1" 855 * on the command line won't work. So don't do that. 856 * Note the stat(2) can still fail if the link target 857 * doesn't exist. 858 */ 859 if (S_IFLNK & st.st_mode) { 860 if (-1 == stat(buf, &st)) { 861 exitcode = (int)MANDOCLEVEL_BADARG; 862 say(file, "&stat"); 863 return; 864 } 865 if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) { 866 say(file, "Filename too long"); 867 return; 868 } 869 start = buf; 870 if (OP_TEST != op && strstr(buf, basedir) == buf) 871 start += strlen(basedir); 872 } 873 874 mlink = mandoc_calloc(1, sizeof(struct mlink)); 875 mlink->dform = FORM_NONE; 876 if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= 877 sizeof(mlink->file)) { 878 say(start, "Filename too long"); 879 free(mlink); 880 return; 881 } 882 883 /* 884 * First try to guess our directory structure. 885 * If we find a separator, try to look for man* or cat*. 886 * If we find one of these and what's underneath is a directory, 887 * assume it's an architecture. 888 */ 889 if (NULL != (p = strchr(start, '/'))) { 890 *p++ = '\0'; 891 if (0 == strncmp(start, "man", 3)) { 892 mlink->dform = FORM_SRC; 893 mlink->dsec = start + 3; 894 } else if (0 == strncmp(start, "cat", 3)) { 895 mlink->dform = FORM_CAT; 896 mlink->dsec = start + 3; 897 } 898 899 start = p; 900 if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) { 901 *p++ = '\0'; 902 mlink->arch = start; 903 start = p; 904 } 905 } 906 907 /* 908 * Now check the file suffix. 909 * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. 910 */ 911 p = strrchr(start, '\0'); 912 while (p-- > start && '/' != *p && '.' != *p) 913 /* Loop. */ ; 914 915 if ('.' == *p) { 916 *p++ = '\0'; 917 mlink->fsec = p; 918 } 919 920 /* 921 * Now try to parse the name. 922 * Use the filename portion of the path. 923 */ 924 mlink->name = start; 925 if (NULL != (p = strrchr(start, '/'))) { 926 mlink->name = p + 1; 927 *p = '\0'; 928 } 929 mlink_add(mlink, &st); 930} 931 932static void 933mlink_add(struct mlink *mlink, const struct stat *st) 934{ 935 struct inodev inodev; 936 struct mpage *mpage; 937 unsigned int slot; 938 939 assert(NULL != mlink->file); 940 941 mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); 942 mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); 943 mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); 944 mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); 945 946 if ('0' == *mlink->fsec) { 947 free(mlink->fsec); 948 mlink->fsec = mandoc_strdup(mlink->dsec); 949 mlink->fform = FORM_CAT; 950 } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) 951 mlink->fform = FORM_SRC; 952 else 953 mlink->fform = FORM_NONE; 954 955 slot = ohash_qlookup(&mlinks, mlink->file); 956 assert(NULL == ohash_find(&mlinks, slot)); 957 ohash_insert(&mlinks, slot, mlink); 958 959 memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ 960 inodev.st_ino = st->st_ino; 961 inodev.st_dev = st->st_dev; 962 slot = ohash_lookup_memory(&mpages, (char *)&inodev, 963 sizeof(struct inodev), inodev.st_ino); 964 mpage = ohash_find(&mpages, slot); 965 if (NULL == mpage) { 966 mpage = mandoc_calloc(1, sizeof(struct mpage)); 967 mpage->inodev.st_ino = inodev.st_ino; 968 mpage->inodev.st_dev = inodev.st_dev; 969 ohash_insert(&mpages, slot, mpage); 970 } else 971 mlink->next = mpage->mlinks; 972 mpage->mlinks = mlink; 973 mlink->mpage = mpage; 974} 975 976static void 977mlink_free(struct mlink *mlink) 978{ 979 980 free(mlink->dsec); 981 free(mlink->arch); 982 free(mlink->name); 983 free(mlink->fsec); 984 free(mlink); 985} 986 987static void 988mpages_free(void) 989{ 990 struct mpage *mpage; 991 struct mlink *mlink; 992 unsigned int slot; 993 994 mpage = ohash_first(&mpages, &slot); 995 while (NULL != mpage) { 996 while (NULL != (mlink = mpage->mlinks)) { 997 mpage->mlinks = mlink->next; 998 mlink_free(mlink); 999 } 1000 free(mpage->sec); 1001 free(mpage->arch); 1002 free(mpage->title); 1003 free(mpage->desc); 1004 free(mpage); 1005 mpage = ohash_next(&mpages, &slot); 1006 } 1007} 1008 1009/* 1010 * For each mlink to the mpage, check whether the path looks like 1011 * it is formatted, and if it does, check whether a source manual 1012 * exists by the same name, ignoring the suffix. 1013 * If both conditions hold, drop the mlink. 1014 */ 1015static void 1016mlinks_undupe(struct mpage *mpage) 1017{ 1018 char buf[PATH_MAX]; 1019 struct mlink **prev; 1020 struct mlink *mlink; 1021 char *bufp; 1022 1023 mpage->form = FORM_CAT; 1024 prev = &mpage->mlinks; 1025 while (NULL != (mlink = *prev)) { 1026 if (FORM_CAT != mlink->dform) { 1027 mpage->form = FORM_NONE; 1028 goto nextlink; 1029 } 1030 (void)strlcpy(buf, mlink->file, sizeof(buf)); 1031 bufp = strstr(buf, "cat"); 1032 assert(NULL != bufp); 1033 memcpy(bufp, "man", 3); 1034 if (NULL != (bufp = strrchr(buf, '.'))) 1035 *++bufp = '\0'; 1036 (void)strlcat(buf, mlink->dsec, sizeof(buf)); 1037 if (NULL == ohash_find(&mlinks, 1038 ohash_qlookup(&mlinks, buf))) 1039 goto nextlink; 1040 if (warnings) 1041 say(mlink->file, "Man source exists: %s", buf); 1042 if (use_all) 1043 goto nextlink; 1044 *prev = mlink->next; 1045 mlink_free(mlink); 1046 continue; 1047nextlink: 1048 prev = &(*prev)->next; 1049 } 1050} 1051 1052static void 1053mlink_check(struct mpage *mpage, struct mlink *mlink) 1054{ 1055 struct str *str; 1056 unsigned int slot; 1057 1058 /* 1059 * Check whether the manual section given in a file 1060 * agrees with the directory where the file is located. 1061 * Some manuals have suffixes like (3p) on their 1062 * section number either inside the file or in the 1063 * directory name, some are linked into more than one 1064 * section, like encrypt(1) = makekey(8). 1065 */ 1066 1067 if (FORM_SRC == mpage->form && 1068 strcasecmp(mpage->sec, mlink->dsec)) 1069 say(mlink->file, "Section \"%s\" manual in %s directory", 1070 mpage->sec, mlink->dsec); 1071 1072 /* 1073 * Manual page directories exist for each kernel 1074 * architecture as returned by machine(1). 1075 * However, many manuals only depend on the 1076 * application architecture as returned by arch(1). 1077 * For example, some (2/ARM) manuals are shared 1078 * across the "armish" and "zaurus" kernel 1079 * architectures. 1080 * A few manuals are even shared across completely 1081 * different architectures, for example fdformat(1) 1082 * on amd64, i386, sparc, and sparc64. 1083 */ 1084 1085 if (strcasecmp(mpage->arch, mlink->arch)) 1086 say(mlink->file, "Architecture \"%s\" manual in " 1087 "\"%s\" directory", mpage->arch, mlink->arch); 1088 1089 /* 1090 * XXX 1091 * parse_cat() doesn't set NAME_TITLE yet. 1092 */ 1093 1094 if (FORM_CAT == mpage->form) 1095 return; 1096 1097 /* 1098 * Check whether this mlink 1099 * appears as a name in the NAME section. 1100 */ 1101 1102 slot = ohash_qlookup(&names, mlink->name); 1103 str = ohash_find(&names, slot); 1104 assert(NULL != str); 1105 if ( ! (NAME_TITLE & str->mask)) 1106 say(mlink->file, "Name missing in NAME section"); 1107} 1108 1109/* 1110 * Run through the files in the global vector "mpages" 1111 * and add them to the database specified in "basedir". 1112 * 1113 * This handles the parsing scheme itself, using the cues of directory 1114 * and filename to determine whether the file is parsable or not. 1115 */ 1116static void 1117mpages_merge(struct mparse *mp) 1118{ 1119 char any[] = "any"; 1120 struct mpage *mpage, *mpage_dest; 1121 struct mlink *mlink, *mlink_dest; 1122 struct roff_man *man; 1123 char *sodest; 1124 char *cp; 1125 int fd; 1126 unsigned int pslot; 1127 1128 if ( ! nodb) 1129 SQL_EXEC("BEGIN TRANSACTION"); 1130 1131 mpage = ohash_first(&mpages, &pslot); 1132 while (mpage != NULL) { 1133 mlinks_undupe(mpage); 1134 if ((mlink = mpage->mlinks) == NULL) { 1135 mpage = ohash_next(&mpages, &pslot); 1136 continue; 1137 } 1138 1139 name_mask = NAME_MASK; 1140 mandoc_ohash_init(&names, 4, offsetof(struct str, key)); 1141 mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); 1142 mparse_reset(mp); 1143 man = NULL; 1144 sodest = NULL; 1145 1146 if ((fd = mparse_open(mp, mlink->file)) == -1) { 1147 say(mlink->file, "&open"); 1148 goto nextpage; 1149 } 1150 1151 /* 1152 * Interpret the file as mdoc(7) or man(7) source 1153 * code, unless it is known to be formatted. 1154 */ 1155 if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { 1156 mparse_readfd(mp, fd, mlink->file); 1157 close(fd); 1158 mparse_result(mp, &man, &sodest); 1159 } 1160 1161 if (sodest != NULL) { 1162 mlink_dest = ohash_find(&mlinks, 1163 ohash_qlookup(&mlinks, sodest)); 1164 if (mlink_dest == NULL) { 1165 mandoc_asprintf(&cp, "%s.gz", sodest); 1166 mlink_dest = ohash_find(&mlinks, 1167 ohash_qlookup(&mlinks, cp)); 1168 free(cp); 1169 } 1170 if (mlink_dest != NULL) { 1171 1172 /* The .so target exists. */ 1173 1174 mpage_dest = mlink_dest->mpage; 1175 while (1) { 1176 mlink->mpage = mpage_dest; 1177 1178 /* 1179 * If the target was already 1180 * processed, add the links 1181 * to the database now. 1182 * Otherwise, this will 1183 * happen when we come 1184 * to the target. 1185 */ 1186 1187 if (mpage_dest->pageid) 1188 dbadd_mlink_name(mlink); 1189 1190 if (mlink->next == NULL) 1191 break; 1192 mlink = mlink->next; 1193 } 1194 1195 /* Move all links to the target. */ 1196 1197 mlink->next = mlink_dest->next; 1198 mlink_dest->next = mpage->mlinks; 1199 mpage->mlinks = NULL; 1200 } 1201 goto nextpage; 1202 } else if (man != NULL && man->macroset == MACROSET_MDOC) { 1203 mdoc_validate(man); 1204 mpage->form = FORM_SRC; 1205 mpage->sec = man->meta.msec; 1206 mpage->sec = mandoc_strdup( 1207 mpage->sec == NULL ? "" : mpage->sec); 1208 mpage->arch = man->meta.arch; 1209 mpage->arch = mandoc_strdup( 1210 mpage->arch == NULL ? "" : mpage->arch); 1211 mpage->title = mandoc_strdup(man->meta.title); 1212 } else if (man != NULL && man->macroset == MACROSET_MAN) { 1213 man_validate(man); 1214 mpage->form = FORM_SRC; 1215 mpage->sec = mandoc_strdup(man->meta.msec); 1216 mpage->arch = mandoc_strdup(mlink->arch); 1217 mpage->title = mandoc_strdup(man->meta.title); 1218 } else { 1219 mpage->form = FORM_CAT; 1220 mpage->sec = mandoc_strdup(mlink->dsec); 1221 mpage->arch = mandoc_strdup(mlink->arch); 1222 mpage->title = mandoc_strdup(mlink->name); 1223 } 1224 putkey(mpage, mpage->sec, TYPE_sec); 1225 if (*mpage->arch != '\0') 1226 putkey(mpage, mpage->arch, TYPE_arch); 1227 1228 for ( ; mlink != NULL; mlink = mlink->next) { 1229 if ('\0' != *mlink->dsec) 1230 putkey(mpage, mlink->dsec, TYPE_sec); 1231 if ('\0' != *mlink->fsec) 1232 putkey(mpage, mlink->fsec, TYPE_sec); 1233 putkey(mpage, '\0' == *mlink->arch ? 1234 any : mlink->arch, TYPE_arch); 1235 putkey(mpage, mlink->name, NAME_FILE); 1236 } 1237 1238 assert(mpage->desc == NULL); 1239 if (man != NULL && man->macroset == MACROSET_MDOC) 1240 parse_mdoc(mpage, &man->meta, man->first); 1241 else if (man != NULL) 1242 parse_man(mpage, &man->meta, man->first); 1243 else 1244 parse_cat(mpage, fd); 1245 if (mpage->desc == NULL) 1246 mpage->desc = mandoc_strdup(mpage->mlinks->name); 1247 1248 if (warnings && !use_all) 1249 for (mlink = mpage->mlinks; mlink; 1250 mlink = mlink->next) 1251 mlink_check(mpage, mlink); 1252 1253 dbadd(mpage); 1254 mlink = mpage->mlinks; 1255 1256nextpage: 1257 ohash_delete(&strings); 1258 ohash_delete(&names); 1259 mpage = ohash_next(&mpages, &pslot); 1260 } 1261 1262 if (0 == nodb) 1263 SQL_EXEC("END TRANSACTION"); 1264} 1265 1266static void 1267names_check(void) 1268{ 1269 sqlite3_stmt *stmt; 1270 const char *name, *sec, *arch, *key; 1271 1272 sqlite3_prepare_v2(db, 1273 "SELECT name, sec, arch, key FROM (" 1274 "SELECT name AS key, pageid FROM names " 1275 "WHERE bits & ? AND NOT EXISTS (" 1276 "SELECT pageid FROM mlinks " 1277 "WHERE mlinks.pageid == names.pageid " 1278 "AND mlinks.name == names.name" 1279 ")" 1280 ") JOIN (" 1281 "SELECT sec, arch, name, pageid FROM mlinks " 1282 "GROUP BY pageid" 1283 ") USING (pageid);", 1284 -1, &stmt, NULL); 1285 1286 if (sqlite3_bind_int64(stmt, 1, NAME_TITLE) != SQLITE_OK) 1287 say("", "%s", sqlite3_errmsg(db)); 1288 1289 while (sqlite3_step(stmt) == SQLITE_ROW) { 1290 name = (const char *)sqlite3_column_text(stmt, 0); 1291 sec = (const char *)sqlite3_column_text(stmt, 1); 1292 arch = (const char *)sqlite3_column_text(stmt, 2); 1293 key = (const char *)sqlite3_column_text(stmt, 3); 1294 say("", "%s(%s%s%s) lacks mlink \"%s\"", name, sec, 1295 '\0' == *arch ? "" : "/", 1296 '\0' == *arch ? "" : arch, key); 1297 } 1298 sqlite3_finalize(stmt); 1299} 1300 1301static void 1302parse_cat(struct mpage *mpage, int fd) 1303{ 1304 FILE *stream; 1305 char *line, *p, *title; 1306 size_t linesz, plen, titlesz; 1307 ssize_t len; 1308 int offs; 1309 1310 stream = (-1 == fd) ? 1311 fopen(mpage->mlinks->file, "r") : 1312 fdopen(fd, "r"); 1313 if (NULL == stream) { 1314 if (-1 != fd) 1315 close(fd); 1316 if (warnings) 1317 say(mpage->mlinks->file, "&fopen"); 1318 return; 1319 } 1320 1321 line = NULL; 1322 linesz = 0; 1323 1324 /* Skip to first blank line. */ 1325 1326 while (getline(&line, &linesz, stream) != -1) 1327 if (*line == '\n') 1328 break; 1329 1330 /* 1331 * Assume the first line that is not indented 1332 * is the first section header. Skip to it. 1333 */ 1334 1335 while (getline(&line, &linesz, stream) != -1) 1336 if (*line != '\n' && *line != ' ') 1337 break; 1338 1339 /* 1340 * Read up until the next section into a buffer. 1341 * Strip the leading and trailing newline from each read line, 1342 * appending a trailing space. 1343 * Ignore empty (whitespace-only) lines. 1344 */ 1345 1346 titlesz = 0; 1347 title = NULL; 1348 1349 while ((len = getline(&line, &linesz, stream)) != -1) { 1350 if (*line != ' ') 1351 break; 1352 offs = 0; 1353 while (isspace((unsigned char)line[offs])) 1354 offs++; 1355 if (line[offs] == '\0') 1356 continue; 1357 title = mandoc_realloc(title, titlesz + len - offs); 1358 memcpy(title + titlesz, line + offs, len - offs); 1359 titlesz += len - offs; 1360 title[titlesz - 1] = ' '; 1361 } 1362 free(line); 1363 1364 /* 1365 * If no page content can be found, or the input line 1366 * is already the next section header, or there is no 1367 * trailing newline, reuse the page title as the page 1368 * description. 1369 */ 1370 1371 if (NULL == title || '\0' == *title) { 1372 if (warnings) 1373 say(mpage->mlinks->file, 1374 "Cannot find NAME section"); 1375 fclose(stream); 1376 free(title); 1377 return; 1378 } 1379 1380 title[titlesz - 1] = '\0'; 1381 1382 /* 1383 * Skip to the first dash. 1384 * Use the remaining line as the description (no more than 70 1385 * bytes). 1386 */ 1387 1388 if (NULL != (p = strstr(title, "- "))) { 1389 for (p += 2; ' ' == *p || '\b' == *p; p++) 1390 /* Skip to next word. */ ; 1391 } else { 1392 if (warnings) 1393 say(mpage->mlinks->file, 1394 "No dash in title line"); 1395 p = title; 1396 } 1397 1398 plen = strlen(p); 1399 1400 /* Strip backspace-encoding from line. */ 1401 1402 while (NULL != (line = memchr(p, '\b', plen))) { 1403 len = line - p; 1404 if (0 == len) { 1405 memmove(line, line + 1, plen--); 1406 continue; 1407 } 1408 memmove(line - 1, line + 1, plen - len); 1409 plen -= 2; 1410 } 1411 1412 mpage->desc = mandoc_strdup(p); 1413 fclose(stream); 1414 free(title); 1415} 1416 1417/* 1418 * Put a type/word pair into the word database for this particular file. 1419 */ 1420static void 1421putkey(const struct mpage *mpage, char *value, uint64_t type) 1422{ 1423 char *cp; 1424 1425 assert(NULL != value); 1426 if (TYPE_arch == type) 1427 for (cp = value; *cp; cp++) 1428 if (isupper((unsigned char)*cp)) 1429 *cp = _tolower((unsigned char)*cp); 1430 putkeys(mpage, value, strlen(value), type); 1431} 1432 1433/* 1434 * Grok all nodes at or below a certain mdoc node into putkey(). 1435 */ 1436static void 1437putmdockey(const struct mpage *mpage, 1438 const struct roff_node *n, uint64_t m) 1439{ 1440 1441 for ( ; NULL != n; n = n->next) { 1442 if (NULL != n->child) 1443 putmdockey(mpage, n->child, m); 1444 if (n->type == ROFFT_TEXT) 1445 putkey(mpage, n->string, m); 1446 } 1447} 1448 1449static void 1450parse_man(struct mpage *mpage, const struct roff_meta *meta, 1451 const struct roff_node *n) 1452{ 1453 const struct roff_node *head, *body; 1454 char *start, *title; 1455 char byte; 1456 size_t sz; 1457 1458 if (n == NULL) 1459 return; 1460 1461 /* 1462 * We're only searching for one thing: the first text child in 1463 * the BODY of a NAME section. Since we don't keep track of 1464 * sections in -man, run some hoops to find out whether we're in 1465 * the correct section or not. 1466 */ 1467 1468 if (n->type == ROFFT_BODY && n->tok == MAN_SH) { 1469 body = n; 1470 if ((head = body->parent->head) != NULL && 1471 (head = head->child) != NULL && 1472 head->next == NULL && 1473 head->type == ROFFT_TEXT && 1474 strcmp(head->string, "NAME") == 0 && 1475 body->child != NULL) { 1476 1477 /* 1478 * Suck the entire NAME section into memory. 1479 * Yes, we might run away. 1480 * But too many manuals have big, spread-out 1481 * NAME sections over many lines. 1482 */ 1483 1484 title = NULL; 1485 deroff(&title, body); 1486 if (NULL == title) 1487 return; 1488 1489 /* 1490 * Go through a special heuristic dance here. 1491 * Conventionally, one or more manual names are 1492 * comma-specified prior to a whitespace, then a 1493 * dash, then a description. Try to puzzle out 1494 * the name parts here. 1495 */ 1496 1497 start = title; 1498 for ( ;; ) { 1499 sz = strcspn(start, " ,"); 1500 if ('\0' == start[sz]) 1501 break; 1502 1503 byte = start[sz]; 1504 start[sz] = '\0'; 1505 1506 /* 1507 * Assume a stray trailing comma in the 1508 * name list if a name begins with a dash. 1509 */ 1510 1511 if ('-' == start[0] || 1512 ('\\' == start[0] && '-' == start[1])) 1513 break; 1514 1515 putkey(mpage, start, NAME_TITLE); 1516 if ( ! (mpage->name_head_done || 1517 strcasecmp(start, meta->title))) { 1518 putkey(mpage, start, NAME_HEAD); 1519 mpage->name_head_done = 1; 1520 } 1521 1522 if (' ' == byte) { 1523 start += sz + 1; 1524 break; 1525 } 1526 1527 assert(',' == byte); 1528 start += sz + 1; 1529 while (' ' == *start) 1530 start++; 1531 } 1532 1533 if (start == title) { 1534 putkey(mpage, start, NAME_TITLE); 1535 if ( ! (mpage->name_head_done || 1536 strcasecmp(start, meta->title))) { 1537 putkey(mpage, start, NAME_HEAD); 1538 mpage->name_head_done = 1; 1539 } 1540 free(title); 1541 return; 1542 } 1543 1544 while (isspace((unsigned char)*start)) 1545 start++; 1546 1547 if (0 == strncmp(start, "-", 1)) 1548 start += 1; 1549 else if (0 == strncmp(start, "\\-\\-", 4)) 1550 start += 4; 1551 else if (0 == strncmp(start, "\\-", 2)) 1552 start += 2; 1553 else if (0 == strncmp(start, "\\(en", 4)) 1554 start += 4; 1555 else if (0 == strncmp(start, "\\(em", 4)) 1556 start += 4; 1557 1558 while (' ' == *start) 1559 start++; 1560 1561 mpage->desc = mandoc_strdup(start); 1562 free(title); 1563 return; 1564 } 1565 } 1566 1567 for (n = n->child; n; n = n->next) { 1568 if (NULL != mpage->desc) 1569 break; 1570 parse_man(mpage, meta, n); 1571 } 1572} 1573 1574static void 1575parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, 1576 const struct roff_node *n) 1577{ 1578 1579 assert(NULL != n); 1580 for (n = n->child; NULL != n; n = n->next) { 1581 switch (n->type) { 1582 case ROFFT_ELEM: 1583 case ROFFT_BLOCK: 1584 case ROFFT_HEAD: 1585 case ROFFT_BODY: 1586 case ROFFT_TAIL: 1587 if (NULL != mdocs[n->tok].fp) 1588 if (0 == (*mdocs[n->tok].fp)(mpage, meta, n)) 1589 break; 1590 if (mdocs[n->tok].mask) 1591 putmdockey(mpage, n->child, 1592 mdocs[n->tok].mask); 1593 break; 1594 default: 1595 assert(n->type != ROFFT_ROOT); 1596 continue; 1597 } 1598 if (NULL != n->child) 1599 parse_mdoc(mpage, meta, n); 1600 } 1601} 1602 1603static int 1604parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, 1605 const struct roff_node *n) 1606{ 1607 char *start, *end; 1608 size_t sz; 1609 1610 if (SEC_SYNOPSIS != n->sec || 1611 NULL == (n = n->child) || 1612 n->type != ROFFT_TEXT) 1613 return 0; 1614 1615 /* 1616 * Only consider those `Fd' macro fields that begin with an 1617 * "inclusion" token (versus, e.g., #define). 1618 */ 1619 1620 if (strcmp("#include", n->string)) 1621 return 0; 1622 1623 if ((n = n->next) == NULL || n->type != ROFFT_TEXT) 1624 return 0; 1625 1626 /* 1627 * Strip away the enclosing angle brackets and make sure we're 1628 * not zero-length. 1629 */ 1630 1631 start = n->string; 1632 if ('<' == *start || '"' == *start) 1633 start++; 1634 1635 if (0 == (sz = strlen(start))) 1636 return 0; 1637 1638 end = &start[(int)sz - 1]; 1639 if ('>' == *end || '"' == *end) 1640 end--; 1641 1642 if (end > start) 1643 putkeys(mpage, start, end - start + 1, TYPE_In); 1644 return 0; 1645} 1646 1647static void 1648parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) 1649{ 1650 char *cp; 1651 size_t sz; 1652 1653 if (n->type != ROFFT_TEXT) 1654 return; 1655 1656 /* Skip function pointer punctuation. */ 1657 1658 cp = n->string; 1659 while (*cp == '(' || *cp == '*') 1660 cp++; 1661 sz = strcspn(cp, "()"); 1662 1663 putkeys(mpage, cp, sz, TYPE_Fn); 1664 if (n->sec == SEC_SYNOPSIS) 1665 putkeys(mpage, cp, sz, NAME_SYN); 1666} 1667 1668static int 1669parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, 1670 const struct roff_node *n) 1671{ 1672 1673 if (n->child == NULL) 1674 return 0; 1675 1676 parse_mdoc_fname(mpage, n->child); 1677 1678 for (n = n->child->next; n != NULL; n = n->next) 1679 if (n->type == ROFFT_TEXT) 1680 putkey(mpage, n->string, TYPE_Fa); 1681 1682 return 0; 1683} 1684 1685static int 1686parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, 1687 const struct roff_node *n) 1688{ 1689 1690 if (n->type != ROFFT_HEAD) 1691 return 1; 1692 1693 if (n->child != NULL) 1694 parse_mdoc_fname(mpage, n->child); 1695 1696 return 0; 1697} 1698 1699static int 1700parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, 1701 const struct roff_node *n) 1702{ 1703 char *cp; 1704 1705 if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) 1706 return 0; 1707 1708 if (n->child != NULL && 1709 n->child->next == NULL && 1710 n->child->type == ROFFT_TEXT) 1711 return 1; 1712 1713 cp = NULL; 1714 deroff(&cp, n); 1715 if (cp != NULL) { 1716 putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || 1717 n->type == ROFFT_BODY ? TYPE_Va : 0)); 1718 free(cp); 1719 } 1720 1721 return 0; 1722} 1723 1724static int 1725parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, 1726 const struct roff_node *n) 1727{ 1728 char *cp; 1729 1730 if (NULL == (n = n->child)) 1731 return 0; 1732 1733 if (NULL == n->next) { 1734 putkey(mpage, n->string, TYPE_Xr); 1735 return 0; 1736 } 1737 1738 mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); 1739 putkey(mpage, cp, TYPE_Xr); 1740 free(cp); 1741 return 0; 1742} 1743 1744static int 1745parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, 1746 const struct roff_node *n) 1747{ 1748 1749 if (n->type == ROFFT_BODY) 1750 deroff(&mpage->desc, n); 1751 return 0; 1752} 1753 1754static int 1755parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, 1756 const struct roff_node *n) 1757{ 1758 1759 if (SEC_NAME == n->sec) 1760 putmdockey(mpage, n->child, NAME_TITLE); 1761 else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { 1762 if (n->child == NULL) 1763 putkey(mpage, meta->name, NAME_SYN); 1764 else 1765 putmdockey(mpage, n->child, NAME_SYN); 1766 } 1767 if ( ! (mpage->name_head_done || 1768 n->child == NULL || n->child->string == NULL || 1769 strcasecmp(n->child->string, meta->title))) { 1770 putkey(mpage, n->child->string, ROFFT_HEAD); 1771 mpage->name_head_done = 1; 1772 } 1773 return 0; 1774} 1775 1776static int 1777parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, 1778 const struct roff_node *n) 1779{ 1780 1781 return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; 1782} 1783 1784static int 1785parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, 1786 const struct roff_node *n) 1787{ 1788 1789 return n->type == ROFFT_HEAD; 1790} 1791 1792/* 1793 * Add a string to the hash table for the current manual. 1794 * Each string has a bitmask telling which macros it belongs to. 1795 * When we finish the manual, we'll dump the table. 1796 */ 1797static void 1798putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) 1799{ 1800 struct ohash *htab; 1801 struct str *s; 1802 const char *end; 1803 unsigned int slot; 1804 int i, mustfree; 1805 1806 if (0 == sz) 1807 return; 1808 1809 mustfree = render_string(&cp, &sz); 1810 1811 if (TYPE_Nm & v) { 1812 htab = &names; 1813 v &= name_mask; 1814 if (v & NAME_FIRST) 1815 name_mask &= ~NAME_FIRST; 1816 if (debug > 1) 1817 say(mpage->mlinks->file, 1818 "Adding name %*s, bits=%d", sz, cp, v); 1819 } else { 1820 htab = &strings; 1821 if (debug > 1) 1822 for (i = 0; i < mansearch_keymax; i++) 1823 if ((uint64_t)1 << i & v) 1824 say(mpage->mlinks->file, 1825 "Adding key %s=%*s", 1826 mansearch_keynames[i], sz, cp); 1827 } 1828 1829 end = cp + sz; 1830 slot = ohash_qlookupi(htab, cp, &end); 1831 s = ohash_find(htab, slot); 1832 1833 if (NULL != s && mpage == s->mpage) { 1834 s->mask |= v; 1835 return; 1836 } else if (NULL == s) { 1837 s = mandoc_calloc(1, sizeof(struct str) + sz + 1); 1838 memcpy(s->key, cp, sz); 1839 ohash_insert(htab, slot, s); 1840 } 1841 s->mpage = mpage; 1842 s->mask = v; 1843 1844 if (mustfree) 1845 free(cp); 1846} 1847 1848/* 1849 * Take a Unicode codepoint and produce its UTF-8 encoding. 1850 * This isn't the best way to do this, but it works. 1851 * The magic numbers are from the UTF-8 packaging. 1852 * They're not as scary as they seem: read the UTF-8 spec for details. 1853 */ 1854static size_t 1855utf8(unsigned int cp, char out[7]) 1856{ 1857 size_t rc; 1858 1859 rc = 0; 1860 if (cp <= 0x0000007F) { 1861 rc = 1; 1862 out[0] = (char)cp; 1863 } else if (cp <= 0x000007FF) { 1864 rc = 2; 1865 out[0] = (cp >> 6 & 31) | 192; 1866 out[1] = (cp & 63) | 128; 1867 } else if (cp <= 0x0000FFFF) { 1868 rc = 3; 1869 out[0] = (cp >> 12 & 15) | 224; 1870 out[1] = (cp >> 6 & 63) | 128; 1871 out[2] = (cp & 63) | 128; 1872 } else if (cp <= 0x001FFFFF) { 1873 rc = 4; 1874 out[0] = (cp >> 18 & 7) | 240; 1875 out[1] = (cp >> 12 & 63) | 128; 1876 out[2] = (cp >> 6 & 63) | 128; 1877 out[3] = (cp & 63) | 128; 1878 } else if (cp <= 0x03FFFFFF) { 1879 rc = 5; 1880 out[0] = (cp >> 24 & 3) | 248; 1881 out[1] = (cp >> 18 & 63) | 128; 1882 out[2] = (cp >> 12 & 63) | 128; 1883 out[3] = (cp >> 6 & 63) | 128; 1884 out[4] = (cp & 63) | 128; 1885 } else if (cp <= 0x7FFFFFFF) { 1886 rc = 6; 1887 out[0] = (cp >> 30 & 1) | 252; 1888 out[1] = (cp >> 24 & 63) | 128; 1889 out[2] = (cp >> 18 & 63) | 128; 1890 out[3] = (cp >> 12 & 63) | 128; 1891 out[4] = (cp >> 6 & 63) | 128; 1892 out[5] = (cp & 63) | 128; 1893 } else 1894 return 0; 1895 1896 out[rc] = '\0'; 1897 return rc; 1898} 1899 1900/* 1901 * If the string contains escape sequences, 1902 * replace it with an allocated rendering and return 1, 1903 * such that the caller can free it after use. 1904 * Otherwise, do nothing and return 0. 1905 */ 1906static int 1907render_string(char **public, size_t *psz) 1908{ 1909 const char *src, *scp, *addcp, *seq; 1910 char *dst; 1911 size_t ssz, dsz, addsz; 1912 char utfbuf[7], res[6]; 1913 int seqlen, unicode; 1914 1915 res[0] = '\\'; 1916 res[1] = '\t'; 1917 res[2] = ASCII_NBRSP; 1918 res[3] = ASCII_HYPH; 1919 res[4] = ASCII_BREAK; 1920 res[5] = '\0'; 1921 1922 src = scp = *public; 1923 ssz = *psz; 1924 dst = NULL; 1925 dsz = 0; 1926 1927 while (scp < src + *psz) { 1928 1929 /* Leave normal characters unchanged. */ 1930 1931 if (strchr(res, *scp) == NULL) { 1932 if (dst != NULL) 1933 dst[dsz++] = *scp; 1934 scp++; 1935 continue; 1936 } 1937 1938 /* 1939 * Found something that requires replacing, 1940 * make sure we have a destination buffer. 1941 */ 1942 1943 if (dst == NULL) { 1944 dst = mandoc_malloc(ssz + 1); 1945 dsz = scp - src; 1946 memcpy(dst, src, dsz); 1947 } 1948 1949 /* Handle single-char special characters. */ 1950 1951 switch (*scp) { 1952 case '\\': 1953 break; 1954 case '\t': 1955 case ASCII_NBRSP: 1956 dst[dsz++] = ' '; 1957 scp++; 1958 continue; 1959 case ASCII_HYPH: 1960 dst[dsz++] = '-'; 1961 /* FALLTHROUGH */ 1962 case ASCII_BREAK: 1963 scp++; 1964 continue; 1965 default: 1966 abort(); 1967 } 1968 1969 /* 1970 * Found an escape sequence. 1971 * Read past the slash, then parse it. 1972 * Ignore everything except characters. 1973 */ 1974 1975 scp++; 1976 if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) 1977 continue; 1978 1979 /* 1980 * Render the special character 1981 * as either UTF-8 or ASCII. 1982 */ 1983 1984 if (write_utf8) { 1985 unicode = mchars_spec2cp(seq, seqlen); 1986 if (unicode <= 0) 1987 continue; 1988 addsz = utf8(unicode, utfbuf); 1989 if (addsz == 0) 1990 continue; 1991 addcp = utfbuf; 1992 } else { 1993 addcp = mchars_spec2str(seq, seqlen, &addsz); 1994 if (addcp == NULL) 1995 continue; 1996 if (*addcp == ASCII_NBRSP) { 1997 addcp = " "; 1998 addsz = 1; 1999 } 2000 } 2001 2002 /* Copy the rendered glyph into the stream. */ 2003 2004 ssz += addsz; 2005 dst = mandoc_realloc(dst, ssz + 1); 2006 memcpy(dst + dsz, addcp, addsz); 2007 dsz += addsz; 2008 } 2009 if (dst != NULL) { 2010 *public = dst; 2011 *psz = dsz; 2012 } 2013 2014 /* Trim trailing whitespace and NUL-terminate. */ 2015 2016 while (*psz > 0 && (*public)[*psz - 1] == ' ') 2017 --*psz; 2018 if (dst != NULL) { 2019 (*public)[*psz] = '\0'; 2020 return 1; 2021 } else 2022 return 0; 2023} 2024 2025static void 2026dbadd_mlink(const struct mlink *mlink) 2027{ 2028 size_t i; 2029 2030 i = 1; 2031 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->dsec); 2032 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->arch); 2033 SQL_BIND_TEXT(stmts[STMT_INSERT_LINK], i, mlink->name); 2034 SQL_BIND_INT64(stmts[STMT_INSERT_LINK], i, mlink->mpage->pageid); 2035 SQL_STEP(stmts[STMT_INSERT_LINK]); 2036 sqlite3_reset(stmts[STMT_INSERT_LINK]); 2037} 2038 2039static void 2040dbadd_mlink_name(const struct mlink *mlink) 2041{ 2042 uint64_t bits; 2043 size_t i; 2044 2045 dbadd_mlink(mlink); 2046 2047 i = 1; 2048 SQL_BIND_INT64(stmts[STMT_SELECT_NAME], i, mlink->mpage->pageid); 2049 bits = NAME_FILE & NAME_MASK; 2050 if (sqlite3_step(stmts[STMT_SELECT_NAME]) == SQLITE_ROW) { 2051 bits |= sqlite3_column_int64(stmts[STMT_SELECT_NAME], 0); 2052 sqlite3_reset(stmts[STMT_SELECT_NAME]); 2053 } 2054 2055 i = 1; 2056 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, bits); 2057 SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, mlink->name); 2058 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mlink->mpage->pageid); 2059 SQL_STEP(stmts[STMT_INSERT_NAME]); 2060 sqlite3_reset(stmts[STMT_INSERT_NAME]); 2061} 2062 2063/* 2064 * Flush the current page's terms (and their bits) into the database. 2065 * Wrap the entire set of additions in a transaction to make sqlite be a 2066 * little faster. 2067 * Also, handle escape sequences at the last possible moment. 2068 */ 2069static void 2070dbadd(struct mpage *mpage) 2071{ 2072 struct mlink *mlink; 2073 struct str *key; 2074 char *cp; 2075 size_t i; 2076 unsigned int slot; 2077 int mustfree; 2078 2079 mlink = mpage->mlinks; 2080 2081 if (nodb) { 2082 for (key = ohash_first(&names, &slot); NULL != key; 2083 key = ohash_next(&names, &slot)) 2084 free(key); 2085 for (key = ohash_first(&strings, &slot); NULL != key; 2086 key = ohash_next(&strings, &slot)) 2087 free(key); 2088 if (0 == debug) 2089 return; 2090 while (NULL != mlink) { 2091 fputs(mlink->name, stdout); 2092 if (NULL == mlink->next || 2093 strcmp(mlink->dsec, mlink->next->dsec) || 2094 strcmp(mlink->fsec, mlink->next->fsec) || 2095 strcmp(mlink->arch, mlink->next->arch)) { 2096 putchar('('); 2097 if ('\0' == *mlink->dsec) 2098 fputs(mlink->fsec, stdout); 2099 else 2100 fputs(mlink->dsec, stdout); 2101 if ('\0' != *mlink->arch) 2102 printf("/%s", mlink->arch); 2103 putchar(')'); 2104 } 2105 mlink = mlink->next; 2106 if (NULL != mlink) 2107 fputs(", ", stdout); 2108 } 2109 printf(" - %s\n", mpage->desc); 2110 return; 2111 } 2112 2113 if (debug) 2114 say(mlink->file, "Adding to database"); 2115 2116 cp = mpage->desc; 2117 i = strlen(cp); 2118 mustfree = render_string(&cp, &i); 2119 i = 1; 2120 SQL_BIND_TEXT(stmts[STMT_INSERT_PAGE], i, cp); 2121 SQL_BIND_INT(stmts[STMT_INSERT_PAGE], i, mpage->form); 2122 SQL_STEP(stmts[STMT_INSERT_PAGE]); 2123 mpage->pageid = sqlite3_last_insert_rowid(db); 2124 sqlite3_reset(stmts[STMT_INSERT_PAGE]); 2125 if (mustfree) 2126 free(cp); 2127 2128 while (NULL != mlink) { 2129 dbadd_mlink(mlink); 2130 mlink = mlink->next; 2131 } 2132 mlink = mpage->mlinks; 2133 2134 for (key = ohash_first(&names, &slot); NULL != key; 2135 key = ohash_next(&names, &slot)) { 2136 assert(key->mpage == mpage); 2137 i = 1; 2138 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, key->mask); 2139 SQL_BIND_TEXT(stmts[STMT_INSERT_NAME], i, key->key); 2140 SQL_BIND_INT64(stmts[STMT_INSERT_NAME], i, mpage->pageid); 2141 SQL_STEP(stmts[STMT_INSERT_NAME]); 2142 sqlite3_reset(stmts[STMT_INSERT_NAME]); 2143 free(key); 2144 } 2145 for (key = ohash_first(&strings, &slot); NULL != key; 2146 key = ohash_next(&strings, &slot)) { 2147 assert(key->mpage == mpage); 2148 i = 1; 2149 SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, key->mask); 2150 SQL_BIND_TEXT(stmts[STMT_INSERT_KEY], i, key->key); 2151 SQL_BIND_INT64(stmts[STMT_INSERT_KEY], i, mpage->pageid); 2152 SQL_STEP(stmts[STMT_INSERT_KEY]); 2153 sqlite3_reset(stmts[STMT_INSERT_KEY]); 2154 free(key); 2155 } 2156} 2157 2158static void 2159dbprune(void) 2160{ 2161 struct mpage *mpage; 2162 struct mlink *mlink; 2163 size_t i; 2164 unsigned int slot; 2165 2166 if (0 == nodb) 2167 SQL_EXEC("BEGIN TRANSACTION"); 2168 2169 for (mpage = ohash_first(&mpages, &slot); NULL != mpage; 2170 mpage = ohash_next(&mpages, &slot)) { 2171 mlink = mpage->mlinks; 2172 if (debug) 2173 say(mlink->file, "Deleting from database"); 2174 if (nodb) 2175 continue; 2176 for ( ; NULL != mlink; mlink = mlink->next) { 2177 i = 1; 2178 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2179 i, mlink->dsec); 2180 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2181 i, mlink->arch); 2182 SQL_BIND_TEXT(stmts[STMT_DELETE_PAGE], 2183 i, mlink->name); 2184 SQL_STEP(stmts[STMT_DELETE_PAGE]); 2185 sqlite3_reset(stmts[STMT_DELETE_PAGE]); 2186 } 2187 } 2188 2189 if (0 == nodb) 2190 SQL_EXEC("END TRANSACTION"); 2191} 2192 2193/* 2194 * Close an existing database and its prepared statements. 2195 * If "real" is not set, rename the temporary file into the real one. 2196 */ 2197static void 2198dbclose(int real) 2199{ 2200 size_t i; 2201 int status; 2202 pid_t child; 2203 2204 if (nodb) 2205 return; 2206 2207 for (i = 0; i < STMT__MAX; i++) { 2208 sqlite3_finalize(stmts[i]); 2209 stmts[i] = NULL; 2210 } 2211 2212 sqlite3_close(db); 2213 db = NULL; 2214 2215 if (real) 2216 return; 2217 2218 if ('\0' == *tempfilename) { 2219 if (-1 == rename(MANDOC_DB "~", MANDOC_DB)) { 2220 exitcode = (int)MANDOCLEVEL_SYSERR; 2221 say(MANDOC_DB, "&rename"); 2222 } 2223 return; 2224 } 2225 2226 switch (child = fork()) { 2227 case -1: 2228 exitcode = (int)MANDOCLEVEL_SYSERR; 2229 say("", "&fork cmp"); 2230 return; 2231 case 0: 2232 execlp("cmp", "cmp", "-s", 2233 tempfilename, MANDOC_DB, (char *)NULL); 2234 say("", "&exec cmp"); 2235 exit(0); 2236 default: 2237 break; 2238 } 2239 if (-1 == waitpid(child, &status, 0)) { 2240 exitcode = (int)MANDOCLEVEL_SYSERR; 2241 say("", "&wait cmp"); 2242 } else if (WIFSIGNALED(status)) { 2243 exitcode = (int)MANDOCLEVEL_SYSERR; 2244 say("", "cmp died from signal %d", WTERMSIG(status)); 2245 } else if (WEXITSTATUS(status)) { 2246 exitcode = (int)MANDOCLEVEL_SYSERR; 2247 say(MANDOC_DB, 2248 "Data changed, but cannot replace database"); 2249 } 2250 2251 *strrchr(tempfilename, '/') = '\0'; 2252 switch (child = fork()) { 2253 case -1: 2254 exitcode = (int)MANDOCLEVEL_SYSERR; 2255 say("", "&fork rm"); 2256 return; 2257 case 0: 2258 execlp("rm", "rm", "-rf", tempfilename, (char *)NULL); 2259 say("", "&exec rm"); 2260 exit((int)MANDOCLEVEL_SYSERR); 2261 default: 2262 break; 2263 } 2264 if (-1 == waitpid(child, &status, 0)) { 2265 exitcode = (int)MANDOCLEVEL_SYSERR; 2266 say("", "&wait rm"); 2267 } else if (WIFSIGNALED(status) || WEXITSTATUS(status)) { 2268 exitcode = (int)MANDOCLEVEL_SYSERR; 2269 say("", "%s: Cannot remove temporary directory", 2270 tempfilename); 2271 } 2272} 2273 2274/* 2275 * This is straightforward stuff. 2276 * Open a database connection to a "temporary" database, then open a set 2277 * of prepared statements we'll use over and over again. 2278 * If "real" is set, we use the existing database; if not, we truncate a 2279 * temporary one. 2280 * Must be matched by dbclose(). 2281 */ 2282static int 2283dbopen(int real) 2284{ 2285 const char *sql; 2286 int rc, ofl; 2287 2288 if (nodb) 2289 return 1; 2290 2291 *tempfilename = '\0'; 2292 ofl = SQLITE_OPEN_READWRITE; 2293 2294 if (real) { 2295 rc = sqlite3_open_v2(MANDOC_DB, &db, ofl, NULL); 2296 if (SQLITE_OK != rc) { 2297 exitcode = (int)MANDOCLEVEL_SYSERR; 2298 if (SQLITE_CANTOPEN != rc) 2299 say(MANDOC_DB, "%s", sqlite3_errstr(rc)); 2300 return 0; 2301 } 2302 goto prepare_statements; 2303 } 2304 2305 ofl |= SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE; 2306 2307 remove(MANDOC_DB "~"); 2308 rc = sqlite3_open_v2(MANDOC_DB "~", &db, ofl, NULL); 2309 if (SQLITE_OK == rc) 2310 goto create_tables; 2311 if (MPARSE_QUICK & mparse_options) { 2312 exitcode = (int)MANDOCLEVEL_SYSERR; 2313 say(MANDOC_DB "~", "%s", sqlite3_errstr(rc)); 2314 return 0; 2315 } 2316 2317 (void)strlcpy(tempfilename, "/tmp/mandocdb.XXXXXX", 2318 sizeof(tempfilename)); 2319 if (NULL == mkdtemp(tempfilename)) { 2320 exitcode = (int)MANDOCLEVEL_SYSERR; 2321 say("", "&%s", tempfilename); 2322 return 0; 2323 } 2324 (void)strlcat(tempfilename, "/" MANDOC_DB, 2325 sizeof(tempfilename)); 2326 rc = sqlite3_open_v2(tempfilename, &db, ofl, NULL); 2327 if (SQLITE_OK != rc) { 2328 exitcode = (int)MANDOCLEVEL_SYSERR; 2329 say("", "%s: %s", tempfilename, sqlite3_errstr(rc)); 2330 return 0; 2331 } 2332 2333create_tables: 2334 sql = "CREATE TABLE \"mpages\" (\n" 2335 " \"desc\" TEXT NOT NULL,\n" 2336 " \"form\" INTEGER NOT NULL,\n" 2337 " \"pageid\" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL\n" 2338 ");\n" 2339 "\n" 2340 "CREATE TABLE \"mlinks\" (\n" 2341 " \"sec\" TEXT NOT NULL,\n" 2342 " \"arch\" TEXT NOT NULL,\n" 2343 " \"name\" TEXT NOT NULL,\n" 2344 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2345 "ON DELETE CASCADE\n" 2346 ");\n" 2347 "CREATE INDEX mlinks_pageid_idx ON mlinks (pageid);\n" 2348 "\n" 2349 "CREATE TABLE \"names\" (\n" 2350 " \"bits\" INTEGER NOT NULL,\n" 2351 " \"name\" TEXT NOT NULL,\n" 2352 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2353 "ON DELETE CASCADE,\n" 2354 " UNIQUE (\"name\", \"pageid\") ON CONFLICT REPLACE\n" 2355 ");\n" 2356 "\n" 2357 "CREATE TABLE \"keys\" (\n" 2358 " \"bits\" INTEGER NOT NULL,\n" 2359 " \"key\" TEXT NOT NULL,\n" 2360 " \"pageid\" INTEGER NOT NULL REFERENCES mpages(pageid) " 2361 "ON DELETE CASCADE\n" 2362 ");\n" 2363 "CREATE INDEX keys_pageid_idx ON keys (pageid);\n"; 2364 2365 if (SQLITE_OK != sqlite3_exec(db, sql, NULL, NULL, NULL)) { 2366 exitcode = (int)MANDOCLEVEL_SYSERR; 2367 say(MANDOC_DB, "%s", sqlite3_errmsg(db)); 2368 sqlite3_close(db); 2369 return 0; 2370 } 2371 2372prepare_statements: 2373 if (SQLITE_OK != sqlite3_exec(db, 2374 "PRAGMA foreign_keys = ON", NULL, NULL, NULL)) { 2375 exitcode = (int)MANDOCLEVEL_SYSERR; 2376 say(MANDOC_DB, "PRAGMA foreign_keys: %s", 2377 sqlite3_errmsg(db)); 2378 sqlite3_close(db); 2379 return 0; 2380 } 2381 2382 sql = "DELETE FROM mpages WHERE pageid IN " 2383 "(SELECT pageid FROM mlinks WHERE " 2384 "sec=? AND arch=? AND name=?)"; 2385 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_DELETE_PAGE], NULL); 2386 sql = "INSERT INTO mpages " 2387 "(desc,form) VALUES (?,?)"; 2388 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_PAGE], NULL); 2389 sql = "INSERT INTO mlinks " 2390 "(sec,arch,name,pageid) VALUES (?,?,?,?)"; 2391 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_LINK], NULL); 2392 sql = "SELECT bits FROM names where pageid = ?"; 2393 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_SELECT_NAME], NULL); 2394 sql = "INSERT INTO names " 2395 "(bits,name,pageid) VALUES (?,?,?)"; 2396 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_NAME], NULL); 2397 sql = "INSERT INTO keys " 2398 "(bits,key,pageid) VALUES (?,?,?)"; 2399 sqlite3_prepare_v2(db, sql, -1, &stmts[STMT_INSERT_KEY], NULL); 2400 2401#ifndef __APPLE__ 2402 /* 2403 * When opening a new database, we can turn off 2404 * synchronous mode for much better performance. 2405 */ 2406 2407 if (real && SQLITE_OK != sqlite3_exec(db, 2408 "PRAGMA synchronous = OFF", NULL, NULL, NULL)) { 2409 exitcode = (int)MANDOCLEVEL_SYSERR; 2410 say(MANDOC_DB, "PRAGMA synchronous: %s", 2411 sqlite3_errmsg(db)); 2412 sqlite3_close(db); 2413 return 0; 2414 } 2415#endif 2416 2417 return 1; 2418} 2419 2420static int 2421set_basedir(const char *targetdir, int report_baddir) 2422{ 2423 static char startdir[PATH_MAX]; 2424 static int getcwd_status; /* 1 = ok, 2 = failure */ 2425 static int chdir_status; /* 1 = changed directory */ 2426 char *cp; 2427 2428 /* 2429 * Remember the original working directory, if possible. 2430 * This will be needed if the second or a later directory 2431 * on the command line is given as a relative path. 2432 * Do not error out if the current directory is not 2433 * searchable: Maybe it won't be needed after all. 2434 */ 2435 if (0 == getcwd_status) { 2436 if (NULL == getcwd(startdir, sizeof(startdir))) { 2437 getcwd_status = 2; 2438 (void)strlcpy(startdir, strerror(errno), 2439 sizeof(startdir)); 2440 } else 2441 getcwd_status = 1; 2442 } 2443 2444 /* 2445 * We are leaving the old base directory. 2446 * Do not use it any longer, not even for messages. 2447 */ 2448 *basedir = '\0'; 2449 2450 /* 2451 * If and only if the directory was changed earlier and 2452 * the next directory to process is given as a relative path, 2453 * first go back, or bail out if that is impossible. 2454 */ 2455 if (chdir_status && '/' != *targetdir) { 2456 if (2 == getcwd_status) { 2457 exitcode = (int)MANDOCLEVEL_SYSERR; 2458 say("", "getcwd: %s", startdir); 2459 return 0; 2460 } 2461 if (-1 == chdir(startdir)) { 2462 exitcode = (int)MANDOCLEVEL_SYSERR; 2463 say("", "&chdir %s", startdir); 2464 return 0; 2465 } 2466 } 2467 2468 /* 2469 * Always resolve basedir to the canonicalized absolute 2470 * pathname and append a trailing slash, such that 2471 * we can reliably check whether files are inside. 2472 */ 2473 if (NULL == realpath(targetdir, basedir)) { 2474 if (report_baddir || errno != ENOENT) { 2475 exitcode = (int)MANDOCLEVEL_BADARG; 2476 say("", "&%s: realpath", targetdir); 2477 } 2478 return 0; 2479 } else if (-1 == chdir(basedir)) { 2480 if (report_baddir || errno != ENOENT) { 2481 exitcode = (int)MANDOCLEVEL_BADARG; 2482 say("", "&chdir"); 2483 } 2484 return 0; 2485 } 2486 chdir_status = 1; 2487 cp = strchr(basedir, '\0'); 2488 if ('/' != cp[-1]) { 2489 if (cp - basedir >= PATH_MAX - 1) { 2490 exitcode = (int)MANDOCLEVEL_SYSERR; 2491 say("", "Filename too long"); 2492 return 0; 2493 } 2494 *cp++ = '/'; 2495 *cp = '\0'; 2496 } 2497 return 1; 2498} 2499 2500static void 2501say(const char *file, const char *format, ...) 2502{ 2503 va_list ap; 2504 int use_errno; 2505 2506 if ('\0' != *basedir) 2507 fprintf(stderr, "%s", basedir); 2508 if ('\0' != *basedir && '\0' != *file) 2509 fputc('/', stderr); 2510 if ('\0' != *file) 2511 fprintf(stderr, "%s", file); 2512 2513 use_errno = 1; 2514 if (NULL != format) { 2515 switch (*format) { 2516 case '&': 2517 format++; 2518 break; 2519 case '\0': 2520 format = NULL; 2521 break; 2522 default: 2523 use_errno = 0; 2524 break; 2525 } 2526 } 2527 if (NULL != format) { 2528 if ('\0' != *basedir || '\0' != *file) 2529 fputs(": ", stderr); 2530 va_start(ap, format); 2531 vfprintf(stderr, format, ap); 2532 va_end(ap); 2533 } 2534 if (use_errno) { 2535 if ('\0' != *basedir || '\0' != *file || NULL != format) 2536 fputs(": ", stderr); 2537 perror(NULL); 2538 } else 2539 fputc('\n', stderr); 2540} 2541