cgi.c revision 316420
1/* $Id: cgi.c,v 1.144 2017/01/21 01:20:31 schwarze Exp $ */ 2/* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include "config.h" 19 20#include <sys/types.h> 21#include <sys/time.h> 22 23#include <ctype.h> 24#include <err.h> 25#include <errno.h> 26#include <fcntl.h> 27#include <limits.h> 28#include <stdint.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <unistd.h> 33 34#include "mandoc_aux.h" 35#include "mandoc.h" 36#include "roff.h" 37#include "mdoc.h" 38#include "man.h" 39#include "main.h" 40#include "manconf.h" 41#include "mansearch.h" 42#include "cgi.h" 43 44/* 45 * A query as passed to the search function. 46 */ 47struct query { 48 char *manpath; /* desired manual directory */ 49 char *arch; /* architecture */ 50 char *sec; /* manual section */ 51 char *query; /* unparsed query expression */ 52 int equal; /* match whole names, not substrings */ 53}; 54 55struct req { 56 struct query q; 57 char **p; /* array of available manpaths */ 58 size_t psz; /* number of available manpaths */ 59 int isquery; /* QUERY_STRING used, not PATH_INFO */ 60}; 61 62enum focus { 63 FOCUS_NONE = 0, 64 FOCUS_QUERY 65}; 66 67static void html_print(const char *); 68static void html_putchar(char); 69static int http_decode(char *); 70static void parse_manpath_conf(struct req *); 71static void parse_path_info(struct req *req, const char *path); 72static void parse_query_string(struct req *, const char *); 73static void pg_error_badrequest(const char *); 74static void pg_error_internal(void); 75static void pg_index(const struct req *); 76static void pg_noresult(const struct req *, const char *); 77static void pg_search(const struct req *); 78static void pg_searchres(const struct req *, 79 struct manpage *, size_t); 80static void pg_show(struct req *, const char *); 81static void resp_begin_html(int, const char *); 82static void resp_begin_http(int, const char *); 83static void resp_catman(const struct req *, const char *); 84static void resp_copy(const char *); 85static void resp_end_html(void); 86static void resp_format(const struct req *, const char *); 87static void resp_searchform(const struct req *, enum focus); 88static void resp_show(const struct req *, const char *); 89static void set_query_attr(char **, char **); 90static int validate_filename(const char *); 91static int validate_manpath(const struct req *, const char *); 92static int validate_urifrag(const char *); 93 94static const char *scriptname = SCRIPT_NAME; 95 96static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 97static const char *const sec_numbers[] = { 98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 99}; 100static const char *const sec_names[] = { 101 "All Sections", 102 "1 - General Commands", 103 "2 - System Calls", 104 "3 - Library Functions", 105 "3p - Perl Library", 106 "4 - Device Drivers", 107 "5 - File Formats", 108 "6 - Games", 109 "7 - Miscellaneous Information", 110 "8 - System Manager\'s Manual", 111 "9 - Kernel Developer\'s Manual" 112}; 113static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 114 115static const char *const arch_names[] = { 116 "amd64", "alpha", "armv7", 117 "hppa", "i386", "landisk", 118 "loongson", "luna88k", "macppc", "mips64", 119 "octeon", "sgi", "socppc", "sparc64", 120 "amiga", "arc", "armish", "arm32", 121 "atari", "aviion", "beagle", "cats", 122 "hppa64", "hp300", 123 "ia64", "mac68k", "mvme68k", "mvme88k", 124 "mvmeppc", "palm", "pc532", "pegasos", 125 "pmax", "powerpc", "solbourne", "sparc", 126 "sun3", "vax", "wgrisc", "x68k", 127 "zaurus" 128}; 129static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 130 131/* 132 * Print a character, escaping HTML along the way. 133 * This will pass non-ASCII straight to output: be warned! 134 */ 135static void 136html_putchar(char c) 137{ 138 139 switch (c) { 140 case ('"'): 141 printf("""); 142 break; 143 case ('&'): 144 printf("&"); 145 break; 146 case ('>'): 147 printf(">"); 148 break; 149 case ('<'): 150 printf("<"); 151 break; 152 default: 153 putchar((unsigned char)c); 154 break; 155 } 156} 157 158/* 159 * Call through to html_putchar(). 160 * Accepts NULL strings. 161 */ 162static void 163html_print(const char *p) 164{ 165 166 if (NULL == p) 167 return; 168 while ('\0' != *p) 169 html_putchar(*p++); 170} 171 172/* 173 * Transfer the responsibility for the allocated string *val 174 * to the query structure. 175 */ 176static void 177set_query_attr(char **attr, char **val) 178{ 179 180 free(*attr); 181 if (**val == '\0') { 182 *attr = NULL; 183 free(*val); 184 } else 185 *attr = *val; 186 *val = NULL; 187} 188 189/* 190 * Parse the QUERY_STRING for key-value pairs 191 * and store the values into the query structure. 192 */ 193static void 194parse_query_string(struct req *req, const char *qs) 195{ 196 char *key, *val; 197 size_t keysz, valsz; 198 199 req->isquery = 1; 200 req->q.manpath = NULL; 201 req->q.arch = NULL; 202 req->q.sec = NULL; 203 req->q.query = NULL; 204 req->q.equal = 1; 205 206 key = val = NULL; 207 while (*qs != '\0') { 208 209 /* Parse one key. */ 210 211 keysz = strcspn(qs, "=;&"); 212 key = mandoc_strndup(qs, keysz); 213 qs += keysz; 214 if (*qs != '=') 215 goto next; 216 217 /* Parse one value. */ 218 219 valsz = strcspn(++qs, ";&"); 220 val = mandoc_strndup(qs, valsz); 221 qs += valsz; 222 223 /* Decode and catch encoding errors. */ 224 225 if ( ! (http_decode(key) && http_decode(val))) 226 goto next; 227 228 /* Handle key-value pairs. */ 229 230 if ( ! strcmp(key, "query")) 231 set_query_attr(&req->q.query, &val); 232 233 else if ( ! strcmp(key, "apropos")) 234 req->q.equal = !strcmp(val, "0"); 235 236 else if ( ! strcmp(key, "manpath")) { 237#ifdef COMPAT_OLDURI 238 if ( ! strncmp(val, "OpenBSD ", 8)) { 239 val[7] = '-'; 240 if ('C' == val[8]) 241 val[8] = 'c'; 242 } 243#endif 244 set_query_attr(&req->q.manpath, &val); 245 } 246 247 else if ( ! (strcmp(key, "sec") 248#ifdef COMPAT_OLDURI 249 && strcmp(key, "sektion") 250#endif 251 )) { 252 if ( ! strcmp(val, "0")) 253 *val = '\0'; 254 set_query_attr(&req->q.sec, &val); 255 } 256 257 else if ( ! strcmp(key, "arch")) { 258 if ( ! strcmp(val, "default")) 259 *val = '\0'; 260 set_query_attr(&req->q.arch, &val); 261 } 262 263 /* 264 * The key must be freed in any case. 265 * The val may have been handed over to the query 266 * structure, in which case it is now NULL. 267 */ 268next: 269 free(key); 270 key = NULL; 271 free(val); 272 val = NULL; 273 274 if (*qs != '\0') 275 qs++; 276 } 277} 278 279/* 280 * HTTP-decode a string. The standard explanation is that this turns 281 * "%4e+foo" into "n foo" in the regular way. This is done in-place 282 * over the allocated string. 283 */ 284static int 285http_decode(char *p) 286{ 287 char hex[3]; 288 char *q; 289 int c; 290 291 hex[2] = '\0'; 292 293 q = p; 294 for ( ; '\0' != *p; p++, q++) { 295 if ('%' == *p) { 296 if ('\0' == (hex[0] = *(p + 1))) 297 return 0; 298 if ('\0' == (hex[1] = *(p + 2))) 299 return 0; 300 if (1 != sscanf(hex, "%x", &c)) 301 return 0; 302 if ('\0' == c) 303 return 0; 304 305 *q = (char)c; 306 p += 2; 307 } else 308 *q = '+' == *p ? ' ' : *p; 309 } 310 311 *q = '\0'; 312 return 1; 313} 314 315static void 316resp_begin_http(int code, const char *msg) 317{ 318 319 if (200 != code) 320 printf("Status: %d %s\r\n", code, msg); 321 322 printf("Content-Type: text/html; charset=utf-8\r\n" 323 "Cache-Control: no-cache\r\n" 324 "Pragma: no-cache\r\n" 325 "\r\n"); 326 327 fflush(stdout); 328} 329 330static void 331resp_copy(const char *filename) 332{ 333 char buf[4096]; 334 ssize_t sz; 335 int fd; 336 337 if ((fd = open(filename, O_RDONLY)) != -1) { 338 fflush(stdout); 339 while ((sz = read(fd, buf, sizeof(buf))) > 0) 340 write(STDOUT_FILENO, buf, sz); 341 close(fd); 342 } 343} 344 345static void 346resp_begin_html(int code, const char *msg) 347{ 348 349 resp_begin_http(code, msg); 350 351 printf("<!DOCTYPE html>\n" 352 "<html>\n" 353 "<head>\n" 354 " <meta charset=\"UTF-8\"/>\n" 355 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 356 " type=\"text/css\" media=\"all\">\n" 357 " <title>%s</title>\n" 358 "</head>\n" 359 "<body>\n", 360 CSS_DIR, CUSTOMIZE_TITLE); 361 362 resp_copy(MAN_DIR "/header.html"); 363} 364 365static void 366resp_end_html(void) 367{ 368 369 resp_copy(MAN_DIR "/footer.html"); 370 371 puts("</body>\n" 372 "</html>"); 373} 374 375static void 376resp_searchform(const struct req *req, enum focus focus) 377{ 378 int i; 379 380 printf("<form action=\"/%s\" method=\"get\">\n" 381 " <fieldset>\n" 382 " <legend>Manual Page Search Parameters</legend>\n", 383 scriptname); 384 385 /* Write query input box. */ 386 387 printf(" <input type=\"text\" name=\"query\" value=\""); 388 if (req->q.query != NULL) 389 html_print(req->q.query); 390 printf( "\" size=\"40\""); 391 if (focus == FOCUS_QUERY) 392 printf(" autofocus"); 393 puts(">"); 394 395 /* Write submission buttons. */ 396 397 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 398 "man</button>\n" 399 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 400 "apropos</button>\n" 401 " <br/>\n"); 402 403 /* Write section selector. */ 404 405 puts(" <select name=\"sec\">"); 406 for (i = 0; i < sec_MAX; i++) { 407 printf(" <option value=\"%s\"", sec_numbers[i]); 408 if (NULL != req->q.sec && 409 0 == strcmp(sec_numbers[i], req->q.sec)) 410 printf(" selected=\"selected\""); 411 printf(">%s</option>\n", sec_names[i]); 412 } 413 puts(" </select>"); 414 415 /* Write architecture selector. */ 416 417 printf( " <select name=\"arch\">\n" 418 " <option value=\"default\""); 419 if (NULL == req->q.arch) 420 printf(" selected=\"selected\""); 421 puts(">All Architectures</option>"); 422 for (i = 0; i < arch_MAX; i++) { 423 printf(" <option value=\"%s\"", arch_names[i]); 424 if (NULL != req->q.arch && 425 0 == strcmp(arch_names[i], req->q.arch)) 426 printf(" selected=\"selected\""); 427 printf(">%s</option>\n", arch_names[i]); 428 } 429 puts(" </select>"); 430 431 /* Write manpath selector. */ 432 433 if (req->psz > 1) { 434 puts(" <select name=\"manpath\">"); 435 for (i = 0; i < (int)req->psz; i++) { 436 printf(" <option "); 437 if (strcmp(req->q.manpath, req->p[i]) == 0) 438 printf("selected=\"selected\" "); 439 printf("value=\""); 440 html_print(req->p[i]); 441 printf("\">"); 442 html_print(req->p[i]); 443 puts("</option>"); 444 } 445 puts(" </select>"); 446 } 447 448 puts(" </fieldset>\n" 449 "</form>"); 450} 451 452static int 453validate_urifrag(const char *frag) 454{ 455 456 while ('\0' != *frag) { 457 if ( ! (isalnum((unsigned char)*frag) || 458 '-' == *frag || '.' == *frag || 459 '/' == *frag || '_' == *frag)) 460 return 0; 461 frag++; 462 } 463 return 1; 464} 465 466static int 467validate_manpath(const struct req *req, const char* manpath) 468{ 469 size_t i; 470 471 for (i = 0; i < req->psz; i++) 472 if ( ! strcmp(manpath, req->p[i])) 473 return 1; 474 475 return 0; 476} 477 478static int 479validate_filename(const char *file) 480{ 481 482 if ('.' == file[0] && '/' == file[1]) 483 file += 2; 484 485 return ! (strstr(file, "../") || strstr(file, "/..") || 486 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 487} 488 489static void 490pg_index(const struct req *req) 491{ 492 493 resp_begin_html(200, NULL); 494 resp_searchform(req, FOCUS_QUERY); 495 printf("<p>\n" 496 "This web interface is documented in the\n" 497 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 498 "manual, and the\n" 499 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 500 "manual explains the query syntax.\n" 501 "</p>\n", 502 scriptname, *scriptname == '\0' ? "" : "/", 503 scriptname, *scriptname == '\0' ? "" : "/"); 504 resp_end_html(); 505} 506 507static void 508pg_noresult(const struct req *req, const char *msg) 509{ 510 resp_begin_html(200, NULL); 511 resp_searchform(req, FOCUS_QUERY); 512 puts("<p>"); 513 puts(msg); 514 puts("</p>"); 515 resp_end_html(); 516} 517 518static void 519pg_error_badrequest(const char *msg) 520{ 521 522 resp_begin_html(400, "Bad Request"); 523 puts("<h1>Bad Request</h1>\n" 524 "<p>\n"); 525 puts(msg); 526 printf("Try again from the\n" 527 "<a href=\"/%s\">main page</a>.\n" 528 "</p>", scriptname); 529 resp_end_html(); 530} 531 532static void 533pg_error_internal(void) 534{ 535 resp_begin_html(500, "Internal Server Error"); 536 puts("<p>Internal Server Error</p>"); 537 resp_end_html(); 538} 539 540static void 541pg_searchres(const struct req *req, struct manpage *r, size_t sz) 542{ 543 char *arch, *archend; 544 const char *sec; 545 size_t i, iuse; 546 int archprio, archpriouse; 547 int prio, priouse; 548 549 for (i = 0; i < sz; i++) { 550 if (validate_filename(r[i].file)) 551 continue; 552 warnx("invalid filename %s in %s database", 553 r[i].file, req->q.manpath); 554 pg_error_internal(); 555 return; 556 } 557 558 if (req->isquery && sz == 1) { 559 /* 560 * If we have just one result, then jump there now 561 * without any delay. 562 */ 563 printf("Status: 303 See Other\r\n"); 564 printf("Location: http://%s/%s%s%s/%s", 565 HTTP_HOST, scriptname, 566 *scriptname == '\0' ? "" : "/", 567 req->q.manpath, r[0].file); 568 printf("\r\n" 569 "Content-Type: text/html; charset=utf-8\r\n" 570 "\r\n"); 571 return; 572 } 573 574 resp_begin_html(200, NULL); 575 resp_searchform(req, 576 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 577 578 if (sz > 1) { 579 puts("<table class=\"results\">"); 580 for (i = 0; i < sz; i++) { 581 printf(" <tr>\n" 582 " <td>" 583 "<a class=\"Xr\" href=\"/%s%s%s/%s\">", 584 scriptname, *scriptname == '\0' ? "" : "/", 585 req->q.manpath, r[i].file); 586 html_print(r[i].names); 587 printf("</a></td>\n" 588 " <td><span class=\"Nd\">"); 589 html_print(r[i].output); 590 puts("</span></td>\n" 591 " </tr>"); 592 } 593 puts("</table>"); 594 } 595 596 /* 597 * In man(1) mode, show one of the pages 598 * even if more than one is found. 599 */ 600 601 if (req->q.equal || sz == 1) { 602 puts("<hr>"); 603 iuse = 0; 604 priouse = 20; 605 archpriouse = 3; 606 for (i = 0; i < sz; i++) { 607 sec = r[i].file; 608 sec += strcspn(sec, "123456789"); 609 if (sec[0] == '\0') 610 continue; 611 prio = sec_prios[sec[0] - '1']; 612 if (sec[1] != '/') 613 prio += 10; 614 if (req->q.arch == NULL) { 615 archprio = 616 ((arch = strchr(sec + 1, '/')) 617 == NULL) ? 3 : 618 ((archend = strchr(arch + 1, '/')) 619 == NULL) ? 0 : 620 strncmp(arch, "amd64/", 621 archend - arch) ? 2 : 1; 622 if (archprio < archpriouse) { 623 archpriouse = archprio; 624 priouse = prio; 625 iuse = i; 626 continue; 627 } 628 if (archprio > archpriouse) 629 continue; 630 } 631 if (prio >= priouse) 632 continue; 633 priouse = prio; 634 iuse = i; 635 } 636 resp_show(req, r[iuse].file); 637 } 638 639 resp_end_html(); 640} 641 642static void 643resp_catman(const struct req *req, const char *file) 644{ 645 FILE *f; 646 char *p; 647 size_t sz; 648 ssize_t len; 649 int i; 650 int italic, bold; 651 652 if ((f = fopen(file, "r")) == NULL) { 653 puts("<p>You specified an invalid manual file.</p>"); 654 return; 655 } 656 657 puts("<div class=\"catman\">\n" 658 "<pre>"); 659 660 p = NULL; 661 sz = 0; 662 663 while ((len = getline(&p, &sz, f)) != -1) { 664 bold = italic = 0; 665 for (i = 0; i < len - 1; i++) { 666 /* 667 * This means that the catpage is out of state. 668 * Ignore it and keep going (although the 669 * catpage is bogus). 670 */ 671 672 if ('\b' == p[i] || '\n' == p[i]) 673 continue; 674 675 /* 676 * Print a regular character. 677 * Close out any bold/italic scopes. 678 * If we're in back-space mode, make sure we'll 679 * have something to enter when we backspace. 680 */ 681 682 if ('\b' != p[i + 1]) { 683 if (italic) 684 printf("</i>"); 685 if (bold) 686 printf("</b>"); 687 italic = bold = 0; 688 html_putchar(p[i]); 689 continue; 690 } else if (i + 2 >= len) 691 continue; 692 693 /* Italic mode. */ 694 695 if ('_' == p[i]) { 696 if (bold) 697 printf("</b>"); 698 if ( ! italic) 699 printf("<i>"); 700 bold = 0; 701 italic = 1; 702 i += 2; 703 html_putchar(p[i]); 704 continue; 705 } 706 707 /* 708 * Handle funny behaviour troff-isms. 709 * These grok'd from the original man2html.c. 710 */ 711 712 if (('+' == p[i] && 'o' == p[i + 2]) || 713 ('o' == p[i] && '+' == p[i + 2]) || 714 ('|' == p[i] && '=' == p[i + 2]) || 715 ('=' == p[i] && '|' == p[i + 2]) || 716 ('*' == p[i] && '=' == p[i + 2]) || 717 ('=' == p[i] && '*' == p[i + 2]) || 718 ('*' == p[i] && '|' == p[i + 2]) || 719 ('|' == p[i] && '*' == p[i + 2])) { 720 if (italic) 721 printf("</i>"); 722 if (bold) 723 printf("</b>"); 724 italic = bold = 0; 725 putchar('*'); 726 i += 2; 727 continue; 728 } else if (('|' == p[i] && '-' == p[i + 2]) || 729 ('-' == p[i] && '|' == p[i + 1]) || 730 ('+' == p[i] && '-' == p[i + 1]) || 731 ('-' == p[i] && '+' == p[i + 1]) || 732 ('+' == p[i] && '|' == p[i + 1]) || 733 ('|' == p[i] && '+' == p[i + 1])) { 734 if (italic) 735 printf("</i>"); 736 if (bold) 737 printf("</b>"); 738 italic = bold = 0; 739 putchar('+'); 740 i += 2; 741 continue; 742 } 743 744 /* Bold mode. */ 745 746 if (italic) 747 printf("</i>"); 748 if ( ! bold) 749 printf("<b>"); 750 bold = 1; 751 italic = 0; 752 i += 2; 753 html_putchar(p[i]); 754 } 755 756 /* 757 * Clean up the last character. 758 * We can get to a newline; don't print that. 759 */ 760 761 if (italic) 762 printf("</i>"); 763 if (bold) 764 printf("</b>"); 765 766 if (i == len - 1 && p[i] != '\n') 767 html_putchar(p[i]); 768 769 putchar('\n'); 770 } 771 free(p); 772 773 puts("</pre>\n" 774 "</div>"); 775 776 fclose(f); 777} 778 779static void 780resp_format(const struct req *req, const char *file) 781{ 782 struct manoutput conf; 783 struct mparse *mp; 784 struct roff_man *man; 785 void *vp; 786 int fd; 787 int usepath; 788 789 if (-1 == (fd = open(file, O_RDONLY, 0))) { 790 puts("<p>You specified an invalid manual file.</p>"); 791 return; 792 } 793 794 mchars_alloc(); 795 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 796 MANDOCLEVEL_BADARG, NULL, req->q.manpath); 797 mparse_readfd(mp, fd, file); 798 close(fd); 799 800 memset(&conf, 0, sizeof(conf)); 801 conf.fragment = 1; 802 usepath = strcmp(req->q.manpath, req->p[0]); 803 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 804 usepath ? req->q.manpath : "", usepath ? "/" : ""); 805 806 mparse_result(mp, &man, NULL); 807 if (man == NULL) { 808 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 809 pg_error_internal(); 810 mparse_free(mp); 811 mchars_free(); 812 return; 813 } 814 815 vp = html_alloc(&conf); 816 817 if (man->macroset == MACROSET_MDOC) { 818 mdoc_validate(man); 819 html_mdoc(vp, man); 820 } else { 821 man_validate(man); 822 html_man(vp, man); 823 } 824 825 html_free(vp); 826 mparse_free(mp); 827 mchars_free(); 828 free(conf.man); 829} 830 831static void 832resp_show(const struct req *req, const char *file) 833{ 834 835 if ('.' == file[0] && '/' == file[1]) 836 file += 2; 837 838 if ('c' == *file) 839 resp_catman(req, file); 840 else 841 resp_format(req, file); 842} 843 844static void 845pg_show(struct req *req, const char *fullpath) 846{ 847 char *manpath; 848 const char *file; 849 850 if ((file = strchr(fullpath, '/')) == NULL) { 851 pg_error_badrequest( 852 "You did not specify a page to show."); 853 return; 854 } 855 manpath = mandoc_strndup(fullpath, file - fullpath); 856 file++; 857 858 if ( ! validate_manpath(req, manpath)) { 859 pg_error_badrequest( 860 "You specified an invalid manpath."); 861 free(manpath); 862 return; 863 } 864 865 /* 866 * Begin by chdir()ing into the manpath. 867 * This way we can pick up the database files, which are 868 * relative to the manpath root. 869 */ 870 871 if (chdir(manpath) == -1) { 872 warn("chdir %s", manpath); 873 pg_error_internal(); 874 free(manpath); 875 return; 876 } 877 free(manpath); 878 879 if ( ! validate_filename(file)) { 880 pg_error_badrequest( 881 "You specified an invalid manual file."); 882 return; 883 } 884 885 resp_begin_html(200, NULL); 886 resp_searchform(req, FOCUS_NONE); 887 resp_show(req, file); 888 resp_end_html(); 889} 890 891static void 892pg_search(const struct req *req) 893{ 894 struct mansearch search; 895 struct manpaths paths; 896 struct manpage *res; 897 char **argv; 898 char *query, *rp, *wp; 899 size_t ressz; 900 int argc; 901 902 /* 903 * Begin by chdir()ing into the root of the manpath. 904 * This way we can pick up the database files, which are 905 * relative to the manpath root. 906 */ 907 908 if (chdir(req->q.manpath) == -1) { 909 warn("chdir %s", req->q.manpath); 910 pg_error_internal(); 911 return; 912 } 913 914 search.arch = req->q.arch; 915 search.sec = req->q.sec; 916 search.outkey = "Nd"; 917 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 918 search.firstmatch = 1; 919 920 paths.sz = 1; 921 paths.paths = mandoc_malloc(sizeof(char *)); 922 paths.paths[0] = mandoc_strdup("."); 923 924 /* 925 * Break apart at spaces with backslash-escaping. 926 */ 927 928 argc = 0; 929 argv = NULL; 930 rp = query = mandoc_strdup(req->q.query); 931 for (;;) { 932 while (isspace((unsigned char)*rp)) 933 rp++; 934 if (*rp == '\0') 935 break; 936 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 937 argv[argc++] = wp = rp; 938 for (;;) { 939 if (isspace((unsigned char)*rp)) { 940 *wp = '\0'; 941 rp++; 942 break; 943 } 944 if (rp[0] == '\\' && rp[1] != '\0') 945 rp++; 946 if (wp != rp) 947 *wp = *rp; 948 if (*rp == '\0') 949 break; 950 wp++; 951 rp++; 952 } 953 } 954 955 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 956 pg_noresult(req, "You entered an invalid query."); 957 else if (0 == ressz) 958 pg_noresult(req, "No results found."); 959 else 960 pg_searchres(req, res, ressz); 961 962 free(query); 963 mansearch_free(res, ressz); 964 free(paths.paths[0]); 965 free(paths.paths); 966} 967 968int 969main(void) 970{ 971 struct req req; 972 struct itimerval itimer; 973 const char *path; 974 const char *querystring; 975 int i; 976 977 /* Poor man's ReDoS mitigation. */ 978 979 itimer.it_value.tv_sec = 2; 980 itimer.it_value.tv_usec = 0; 981 itimer.it_interval.tv_sec = 2; 982 itimer.it_interval.tv_usec = 0; 983 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 984 warn("setitimer"); 985 pg_error_internal(); 986 return EXIT_FAILURE; 987 } 988 989 /* 990 * First we change directory into the MAN_DIR so that 991 * subsequent scanning for manpath directories is rooted 992 * relative to the same position. 993 */ 994 995 if (chdir(MAN_DIR) == -1) { 996 warn("MAN_DIR: %s", MAN_DIR); 997 pg_error_internal(); 998 return EXIT_FAILURE; 999 } 1000 1001 memset(&req, 0, sizeof(struct req)); 1002 req.q.equal = 1; 1003 parse_manpath_conf(&req); 1004 1005 /* Parse the path info and the query string. */ 1006 1007 if ((path = getenv("PATH_INFO")) == NULL) 1008 path = ""; 1009 else if (*path == '/') 1010 path++; 1011 1012 if (*path != '\0') { 1013 parse_path_info(&req, path); 1014 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1015 path = ""; 1016 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1017 parse_query_string(&req, querystring); 1018 1019 /* Validate parsed data and add defaults. */ 1020 1021 if (req.q.manpath == NULL) 1022 req.q.manpath = mandoc_strdup(req.p[0]); 1023 else if ( ! validate_manpath(&req, req.q.manpath)) { 1024 pg_error_badrequest( 1025 "You specified an invalid manpath."); 1026 return EXIT_FAILURE; 1027 } 1028 1029 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1030 pg_error_badrequest( 1031 "You specified an invalid architecture."); 1032 return EXIT_FAILURE; 1033 } 1034 1035 /* Dispatch to the three different pages. */ 1036 1037 if ('\0' != *path) 1038 pg_show(&req, path); 1039 else if (NULL != req.q.query) 1040 pg_search(&req); 1041 else 1042 pg_index(&req); 1043 1044 free(req.q.manpath); 1045 free(req.q.arch); 1046 free(req.q.sec); 1047 free(req.q.query); 1048 for (i = 0; i < (int)req.psz; i++) 1049 free(req.p[i]); 1050 free(req.p); 1051 return EXIT_SUCCESS; 1052} 1053 1054/* 1055 * If PATH_INFO is not a file name, translate it to a query. 1056 */ 1057static void 1058parse_path_info(struct req *req, const char *path) 1059{ 1060 char *dir[4]; 1061 int i; 1062 1063 req->isquery = 0; 1064 req->q.equal = 1; 1065 req->q.manpath = mandoc_strdup(path); 1066 req->q.arch = NULL; 1067 1068 /* Mandatory manual page name. */ 1069 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1070 req->q.query = req->q.manpath; 1071 req->q.manpath = NULL; 1072 } else 1073 *req->q.query++ = '\0'; 1074 1075 /* Optional trailing section. */ 1076 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1077 if(isdigit((unsigned char)req->q.sec[1])) { 1078 *req->q.sec++ = '\0'; 1079 req->q.sec = mandoc_strdup(req->q.sec); 1080 } else 1081 req->q.sec = NULL; 1082 } 1083 1084 /* Handle the case of name[.section] only. */ 1085 if (req->q.manpath == NULL) 1086 return; 1087 req->q.query = mandoc_strdup(req->q.query); 1088 1089 /* Split directory components. */ 1090 dir[i = 0] = req->q.manpath; 1091 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1092 if (++i == 3) { 1093 pg_error_badrequest( 1094 "You specified too many directory components."); 1095 exit(EXIT_FAILURE); 1096 } 1097 *dir[i]++ = '\0'; 1098 } 1099 1100 /* Optional manpath. */ 1101 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1102 req->q.manpath = NULL; 1103 else if (dir[1] == NULL) 1104 return; 1105 1106 /* Optional section. */ 1107 if (strncmp(dir[i], "man", 3) == 0) { 1108 free(req->q.sec); 1109 req->q.sec = mandoc_strdup(dir[i++] + 3); 1110 } 1111 if (dir[i] == NULL) { 1112 if (req->q.manpath == NULL) 1113 free(dir[0]); 1114 return; 1115 } 1116 if (dir[i + 1] != NULL) { 1117 pg_error_badrequest( 1118 "You specified an invalid directory component."); 1119 exit(EXIT_FAILURE); 1120 } 1121 1122 /* Optional architecture. */ 1123 if (i) { 1124 req->q.arch = mandoc_strdup(dir[i]); 1125 if (req->q.manpath == NULL) 1126 free(dir[0]); 1127 } else 1128 req->q.arch = dir[0]; 1129} 1130 1131/* 1132 * Scan for indexable paths. 1133 */ 1134static void 1135parse_manpath_conf(struct req *req) 1136{ 1137 FILE *fp; 1138 char *dp; 1139 size_t dpsz; 1140 ssize_t len; 1141 1142 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1143 warn("%s/manpath.conf", MAN_DIR); 1144 pg_error_internal(); 1145 exit(EXIT_FAILURE); 1146 } 1147 1148 dp = NULL; 1149 dpsz = 0; 1150 1151 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1152 if (dp[len - 1] == '\n') 1153 dp[--len] = '\0'; 1154 req->p = mandoc_realloc(req->p, 1155 (req->psz + 1) * sizeof(char *)); 1156 if ( ! validate_urifrag(dp)) { 1157 warnx("%s/manpath.conf contains " 1158 "unsafe path \"%s\"", MAN_DIR, dp); 1159 pg_error_internal(); 1160 exit(EXIT_FAILURE); 1161 } 1162 if (strchr(dp, '/') != NULL) { 1163 warnx("%s/manpath.conf contains " 1164 "path with slash \"%s\"", MAN_DIR, dp); 1165 pg_error_internal(); 1166 exit(EXIT_FAILURE); 1167 } 1168 req->p[req->psz++] = dp; 1169 dp = NULL; 1170 dpsz = 0; 1171 } 1172 free(dp); 1173 1174 if (req->p == NULL) { 1175 warnx("%s/manpath.conf is empty", MAN_DIR); 1176 pg_error_internal(); 1177 exit(EXIT_FAILURE); 1178 } 1179} 1180