cgi.c revision 307795
1/* $Id: cgi.c,v 1.135 2016/07/11 22:48:37 schwarze Exp $ */ 2/* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include "config.h" 19 20#include <sys/types.h> 21#include <sys/time.h> 22 23#include <ctype.h> 24#include <err.h> 25#include <errno.h> 26#include <fcntl.h> 27#include <limits.h> 28#include <stdint.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <unistd.h> 33 34#include "mandoc_aux.h" 35#include "mandoc.h" 36#include "roff.h" 37#include "mdoc.h" 38#include "man.h" 39#include "main.h" 40#include "manconf.h" 41#include "mansearch.h" 42#include "cgi.h" 43 44/* 45 * A query as passed to the search function. 46 */ 47struct query { 48 char *manpath; /* desired manual directory */ 49 char *arch; /* architecture */ 50 char *sec; /* manual section */ 51 char *query; /* unparsed query expression */ 52 int equal; /* match whole names, not substrings */ 53}; 54 55struct req { 56 struct query q; 57 char **p; /* array of available manpaths */ 58 size_t psz; /* number of available manpaths */ 59 int isquery; /* QUERY_STRING used, not PATH_INFO */ 60}; 61 62enum focus { 63 FOCUS_NONE = 0, 64 FOCUS_QUERY 65}; 66 67static void html_print(const char *); 68static void html_putchar(char); 69static int http_decode(char *); 70static void parse_manpath_conf(struct req *); 71static void parse_path_info(struct req *req, const char *path); 72static void parse_query_string(struct req *, const char *); 73static void pg_error_badrequest(const char *); 74static void pg_error_internal(void); 75static void pg_index(const struct req *); 76static void pg_noresult(const struct req *, const char *); 77static void pg_search(const struct req *); 78static void pg_searchres(const struct req *, 79 struct manpage *, size_t); 80static void pg_show(struct req *, const char *); 81static void resp_begin_html(int, const char *); 82static void resp_begin_http(int, const char *); 83static void resp_catman(const struct req *, const char *); 84static void resp_copy(const char *); 85static void resp_end_html(void); 86static void resp_format(const struct req *, const char *); 87static void resp_searchform(const struct req *, enum focus); 88static void resp_show(const struct req *, const char *); 89static void set_query_attr(char **, char **); 90static int validate_filename(const char *); 91static int validate_manpath(const struct req *, const char *); 92static int validate_urifrag(const char *); 93 94static const char *scriptname = SCRIPT_NAME; 95 96static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 97static const char *const sec_numbers[] = { 98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 99}; 100static const char *const sec_names[] = { 101 "All Sections", 102 "1 - General Commands", 103 "2 - System Calls", 104 "3 - Library Functions", 105 "3p - Perl Library", 106 "4 - Device Drivers", 107 "5 - File Formats", 108 "6 - Games", 109 "7 - Miscellaneous Information", 110 "8 - System Manager\'s Manual", 111 "9 - Kernel Developer\'s Manual" 112}; 113static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 114 115static const char *const arch_names[] = { 116 "amd64", "alpha", "armish", "armv7", 117 "hppa", "hppa64", "i386", "landisk", 118 "loongson", "luna88k", "macppc", "mips64", 119 "octeon", "sgi", "socppc", "sparc", 120 "sparc64", "zaurus", 121 "amiga", "arc", "arm32", "atari", 122 "aviion", "beagle", "cats", "hp300", 123 "ia64", "mac68k", "mvme68k", "mvme88k", 124 "mvmeppc", "palm", "pc532", "pegasos", 125 "pmax", "powerpc", "solbourne", "sun3", 126 "vax", "wgrisc", "x68k" 127}; 128static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 129 130/* 131 * Print a character, escaping HTML along the way. 132 * This will pass non-ASCII straight to output: be warned! 133 */ 134static void 135html_putchar(char c) 136{ 137 138 switch (c) { 139 case ('"'): 140 printf(""e;"); 141 break; 142 case ('&'): 143 printf("&"); 144 break; 145 case ('>'): 146 printf(">"); 147 break; 148 case ('<'): 149 printf("<"); 150 break; 151 default: 152 putchar((unsigned char)c); 153 break; 154 } 155} 156 157/* 158 * Call through to html_putchar(). 159 * Accepts NULL strings. 160 */ 161static void 162html_print(const char *p) 163{ 164 165 if (NULL == p) 166 return; 167 while ('\0' != *p) 168 html_putchar(*p++); 169} 170 171/* 172 * Transfer the responsibility for the allocated string *val 173 * to the query structure. 174 */ 175static void 176set_query_attr(char **attr, char **val) 177{ 178 179 free(*attr); 180 if (**val == '\0') { 181 *attr = NULL; 182 free(*val); 183 } else 184 *attr = *val; 185 *val = NULL; 186} 187 188/* 189 * Parse the QUERY_STRING for key-value pairs 190 * and store the values into the query structure. 191 */ 192static void 193parse_query_string(struct req *req, const char *qs) 194{ 195 char *key, *val; 196 size_t keysz, valsz; 197 198 req->isquery = 1; 199 req->q.manpath = NULL; 200 req->q.arch = NULL; 201 req->q.sec = NULL; 202 req->q.query = NULL; 203 req->q.equal = 1; 204 205 key = val = NULL; 206 while (*qs != '\0') { 207 208 /* Parse one key. */ 209 210 keysz = strcspn(qs, "=;&"); 211 key = mandoc_strndup(qs, keysz); 212 qs += keysz; 213 if (*qs != '=') 214 goto next; 215 216 /* Parse one value. */ 217 218 valsz = strcspn(++qs, ";&"); 219 val = mandoc_strndup(qs, valsz); 220 qs += valsz; 221 222 /* Decode and catch encoding errors. */ 223 224 if ( ! (http_decode(key) && http_decode(val))) 225 goto next; 226 227 /* Handle key-value pairs. */ 228 229 if ( ! strcmp(key, "query")) 230 set_query_attr(&req->q.query, &val); 231 232 else if ( ! strcmp(key, "apropos")) 233 req->q.equal = !strcmp(val, "0"); 234 235 else if ( ! strcmp(key, "manpath")) { 236#ifdef COMPAT_OLDURI 237 if ( ! strncmp(val, "OpenBSD ", 8)) { 238 val[7] = '-'; 239 if ('C' == val[8]) 240 val[8] = 'c'; 241 } 242#endif 243 set_query_attr(&req->q.manpath, &val); 244 } 245 246 else if ( ! (strcmp(key, "sec") 247#ifdef COMPAT_OLDURI 248 && strcmp(key, "sektion") 249#endif 250 )) { 251 if ( ! strcmp(val, "0")) 252 *val = '\0'; 253 set_query_attr(&req->q.sec, &val); 254 } 255 256 else if ( ! strcmp(key, "arch")) { 257 if ( ! strcmp(val, "default")) 258 *val = '\0'; 259 set_query_attr(&req->q.arch, &val); 260 } 261 262 /* 263 * The key must be freed in any case. 264 * The val may have been handed over to the query 265 * structure, in which case it is now NULL. 266 */ 267next: 268 free(key); 269 key = NULL; 270 free(val); 271 val = NULL; 272 273 if (*qs != '\0') 274 qs++; 275 } 276} 277 278/* 279 * HTTP-decode a string. The standard explanation is that this turns 280 * "%4e+foo" into "n foo" in the regular way. This is done in-place 281 * over the allocated string. 282 */ 283static int 284http_decode(char *p) 285{ 286 char hex[3]; 287 char *q; 288 int c; 289 290 hex[2] = '\0'; 291 292 q = p; 293 for ( ; '\0' != *p; p++, q++) { 294 if ('%' == *p) { 295 if ('\0' == (hex[0] = *(p + 1))) 296 return 0; 297 if ('\0' == (hex[1] = *(p + 2))) 298 return 0; 299 if (1 != sscanf(hex, "%x", &c)) 300 return 0; 301 if ('\0' == c) 302 return 0; 303 304 *q = (char)c; 305 p += 2; 306 } else 307 *q = '+' == *p ? ' ' : *p; 308 } 309 310 *q = '\0'; 311 return 1; 312} 313 314static void 315resp_begin_http(int code, const char *msg) 316{ 317 318 if (200 != code) 319 printf("Status: %d %s\r\n", code, msg); 320 321 printf("Content-Type: text/html; charset=utf-8\r\n" 322 "Cache-Control: no-cache\r\n" 323 "Pragma: no-cache\r\n" 324 "\r\n"); 325 326 fflush(stdout); 327} 328 329static void 330resp_copy(const char *filename) 331{ 332 char buf[4096]; 333 ssize_t sz; 334 int fd; 335 336 if ((fd = open(filename, O_RDONLY)) != -1) { 337 fflush(stdout); 338 while ((sz = read(fd, buf, sizeof(buf))) > 0) 339 write(STDOUT_FILENO, buf, sz); 340 } 341} 342 343static void 344resp_begin_html(int code, const char *msg) 345{ 346 347 resp_begin_http(code, msg); 348 349 printf("<!DOCTYPE html>\n" 350 "<html>\n" 351 "<head>\n" 352 "<meta charset=\"UTF-8\"/>\n" 353 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 354 " type=\"text/css\" media=\"all\">\n" 355 "<title>%s</title>\n" 356 "</head>\n" 357 "<body>\n" 358 "<!-- Begin page content. //-->\n", 359 CSS_DIR, CUSTOMIZE_TITLE); 360 361 resp_copy(MAN_DIR "/header.html"); 362} 363 364static void 365resp_end_html(void) 366{ 367 368 resp_copy(MAN_DIR "/footer.html"); 369 370 puts("</body>\n" 371 "</html>"); 372} 373 374static void 375resp_searchform(const struct req *req, enum focus focus) 376{ 377 int i; 378 379 puts("<!-- Begin search form. //-->"); 380 printf("<div id=\"mancgi\">\n" 381 "<form action=\"/%s\" method=\"get\">\n" 382 "<fieldset>\n" 383 "<legend>Manual Page Search Parameters</legend>\n", 384 scriptname); 385 386 /* Write query input box. */ 387 388 printf("<input type=\"text\" name=\"query\" value=\""); 389 if (req->q.query != NULL) 390 html_print(req->q.query); 391 printf( "\" size=\"40\""); 392 if (focus == FOCUS_QUERY) 393 printf(" autofocus"); 394 puts(">"); 395 396 /* Write submission buttons. */ 397 398 printf( "<button type=\"submit\" name=\"apropos\" value=\"0\">" 399 "man</button>\n" 400 "<button type=\"submit\" name=\"apropos\" value=\"1\">" 401 "apropos</button>\n<br/>\n"); 402 403 /* Write section selector. */ 404 405 puts("<select name=\"sec\">"); 406 for (i = 0; i < sec_MAX; i++) { 407 printf("<option value=\"%s\"", sec_numbers[i]); 408 if (NULL != req->q.sec && 409 0 == strcmp(sec_numbers[i], req->q.sec)) 410 printf(" selected=\"selected\""); 411 printf(">%s</option>\n", sec_names[i]); 412 } 413 puts("</select>"); 414 415 /* Write architecture selector. */ 416 417 printf( "<select name=\"arch\">\n" 418 "<option value=\"default\""); 419 if (NULL == req->q.arch) 420 printf(" selected=\"selected\""); 421 puts(">All Architectures</option>"); 422 for (i = 0; i < arch_MAX; i++) { 423 printf("<option value=\"%s\"", arch_names[i]); 424 if (NULL != req->q.arch && 425 0 == strcmp(arch_names[i], req->q.arch)) 426 printf(" selected=\"selected\""); 427 printf(">%s</option>\n", arch_names[i]); 428 } 429 puts("</select>"); 430 431 /* Write manpath selector. */ 432 433 if (req->psz > 1) { 434 puts("<select name=\"manpath\">"); 435 for (i = 0; i < (int)req->psz; i++) { 436 printf("<option "); 437 if (strcmp(req->q.manpath, req->p[i]) == 0) 438 printf("selected=\"selected\" "); 439 printf("value=\""); 440 html_print(req->p[i]); 441 printf("\">"); 442 html_print(req->p[i]); 443 puts("</option>"); 444 } 445 puts("</select>"); 446 } 447 448 puts("</fieldset>\n" 449 "</form>\n" 450 "</div>"); 451 puts("<!-- End search form. //-->"); 452} 453 454static int 455validate_urifrag(const char *frag) 456{ 457 458 while ('\0' != *frag) { 459 if ( ! (isalnum((unsigned char)*frag) || 460 '-' == *frag || '.' == *frag || 461 '/' == *frag || '_' == *frag)) 462 return 0; 463 frag++; 464 } 465 return 1; 466} 467 468static int 469validate_manpath(const struct req *req, const char* manpath) 470{ 471 size_t i; 472 473 for (i = 0; i < req->psz; i++) 474 if ( ! strcmp(manpath, req->p[i])) 475 return 1; 476 477 return 0; 478} 479 480static int 481validate_filename(const char *file) 482{ 483 484 if ('.' == file[0] && '/' == file[1]) 485 file += 2; 486 487 return ! (strstr(file, "../") || strstr(file, "/..") || 488 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 489} 490 491static void 492pg_index(const struct req *req) 493{ 494 495 resp_begin_html(200, NULL); 496 resp_searchform(req, FOCUS_QUERY); 497 printf("<p>\n" 498 "This web interface is documented in the\n" 499 "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 500 "manual, and the\n" 501 "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n" 502 "manual explains the query syntax.\n" 503 "</p>\n", 504 scriptname, *scriptname == '\0' ? "" : "/", 505 scriptname, *scriptname == '\0' ? "" : "/"); 506 resp_end_html(); 507} 508 509static void 510pg_noresult(const struct req *req, const char *msg) 511{ 512 resp_begin_html(200, NULL); 513 resp_searchform(req, FOCUS_QUERY); 514 puts("<p>"); 515 puts(msg); 516 puts("</p>"); 517 resp_end_html(); 518} 519 520static void 521pg_error_badrequest(const char *msg) 522{ 523 524 resp_begin_html(400, "Bad Request"); 525 puts("<h1>Bad Request</h1>\n" 526 "<p>\n"); 527 puts(msg); 528 printf("Try again from the\n" 529 "<a href=\"/%s\">main page</a>.\n" 530 "</p>", scriptname); 531 resp_end_html(); 532} 533 534static void 535pg_error_internal(void) 536{ 537 resp_begin_html(500, "Internal Server Error"); 538 puts("<p>Internal Server Error</p>"); 539 resp_end_html(); 540} 541 542static void 543pg_searchres(const struct req *req, struct manpage *r, size_t sz) 544{ 545 char *arch, *archend; 546 const char *sec; 547 size_t i, iuse; 548 int archprio, archpriouse; 549 int prio, priouse; 550 551 for (i = 0; i < sz; i++) { 552 if (validate_filename(r[i].file)) 553 continue; 554 warnx("invalid filename %s in %s database", 555 r[i].file, req->q.manpath); 556 pg_error_internal(); 557 return; 558 } 559 560 if (req->isquery && sz == 1) { 561 /* 562 * If we have just one result, then jump there now 563 * without any delay. 564 */ 565 printf("Status: 303 See Other\r\n"); 566 printf("Location: http://%s/%s%s%s/%s", 567 HTTP_HOST, scriptname, 568 *scriptname == '\0' ? "" : "/", 569 req->q.manpath, r[0].file); 570 printf("\r\n" 571 "Content-Type: text/html; charset=utf-8\r\n" 572 "\r\n"); 573 return; 574 } 575 576 resp_begin_html(200, NULL); 577 resp_searchform(req, 578 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 579 580 if (sz > 1) { 581 puts("<div class=\"results\">"); 582 puts("<table>"); 583 584 for (i = 0; i < sz; i++) { 585 printf("<tr>\n" 586 "<td class=\"title\">\n" 587 "<a href=\"/%s%s%s/%s", 588 scriptname, *scriptname == '\0' ? "" : "/", 589 req->q.manpath, r[i].file); 590 printf("\">"); 591 html_print(r[i].names); 592 printf("</a>\n" 593 "</td>\n" 594 "<td class=\"desc\">"); 595 html_print(r[i].output); 596 puts("</td>\n" 597 "</tr>"); 598 } 599 600 puts("</table>\n" 601 "</div>"); 602 } 603 604 /* 605 * In man(1) mode, show one of the pages 606 * even if more than one is found. 607 */ 608 609 if (req->q.equal || sz == 1) { 610 puts("<hr>"); 611 iuse = 0; 612 priouse = 20; 613 archpriouse = 3; 614 for (i = 0; i < sz; i++) { 615 sec = r[i].file; 616 sec += strcspn(sec, "123456789"); 617 if (sec[0] == '\0') 618 continue; 619 prio = sec_prios[sec[0] - '1']; 620 if (sec[1] != '/') 621 prio += 10; 622 if (req->q.arch == NULL) { 623 archprio = 624 ((arch = strchr(sec + 1, '/')) 625 == NULL) ? 3 : 626 ((archend = strchr(arch + 1, '/')) 627 == NULL) ? 0 : 628 strncmp(arch, "amd64/", 629 archend - arch) ? 2 : 1; 630 if (archprio < archpriouse) { 631 archpriouse = archprio; 632 priouse = prio; 633 iuse = i; 634 continue; 635 } 636 if (archprio > archpriouse) 637 continue; 638 } 639 if (prio >= priouse) 640 continue; 641 priouse = prio; 642 iuse = i; 643 } 644 resp_show(req, r[iuse].file); 645 } 646 647 resp_end_html(); 648} 649 650static void 651resp_catman(const struct req *req, const char *file) 652{ 653 FILE *f; 654 char *p; 655 size_t sz; 656 ssize_t len; 657 int i; 658 int italic, bold; 659 660 if ((f = fopen(file, "r")) == NULL) { 661 puts("<p>You specified an invalid manual file.</p>"); 662 return; 663 } 664 665 puts("<div class=\"catman\">\n" 666 "<pre>"); 667 668 p = NULL; 669 sz = 0; 670 671 while ((len = getline(&p, &sz, f)) != -1) { 672 bold = italic = 0; 673 for (i = 0; i < len - 1; i++) { 674 /* 675 * This means that the catpage is out of state. 676 * Ignore it and keep going (although the 677 * catpage is bogus). 678 */ 679 680 if ('\b' == p[i] || '\n' == p[i]) 681 continue; 682 683 /* 684 * Print a regular character. 685 * Close out any bold/italic scopes. 686 * If we're in back-space mode, make sure we'll 687 * have something to enter when we backspace. 688 */ 689 690 if ('\b' != p[i + 1]) { 691 if (italic) 692 printf("</i>"); 693 if (bold) 694 printf("</b>"); 695 italic = bold = 0; 696 html_putchar(p[i]); 697 continue; 698 } else if (i + 2 >= len) 699 continue; 700 701 /* Italic mode. */ 702 703 if ('_' == p[i]) { 704 if (bold) 705 printf("</b>"); 706 if ( ! italic) 707 printf("<i>"); 708 bold = 0; 709 italic = 1; 710 i += 2; 711 html_putchar(p[i]); 712 continue; 713 } 714 715 /* 716 * Handle funny behaviour troff-isms. 717 * These grok'd from the original man2html.c. 718 */ 719 720 if (('+' == p[i] && 'o' == p[i + 2]) || 721 ('o' == p[i] && '+' == p[i + 2]) || 722 ('|' == p[i] && '=' == p[i + 2]) || 723 ('=' == p[i] && '|' == p[i + 2]) || 724 ('*' == p[i] && '=' == p[i + 2]) || 725 ('=' == p[i] && '*' == p[i + 2]) || 726 ('*' == p[i] && '|' == p[i + 2]) || 727 ('|' == p[i] && '*' == p[i + 2])) { 728 if (italic) 729 printf("</i>"); 730 if (bold) 731 printf("</b>"); 732 italic = bold = 0; 733 putchar('*'); 734 i += 2; 735 continue; 736 } else if (('|' == p[i] && '-' == p[i + 2]) || 737 ('-' == p[i] && '|' == p[i + 1]) || 738 ('+' == p[i] && '-' == p[i + 1]) || 739 ('-' == p[i] && '+' == p[i + 1]) || 740 ('+' == p[i] && '|' == p[i + 1]) || 741 ('|' == p[i] && '+' == p[i + 1])) { 742 if (italic) 743 printf("</i>"); 744 if (bold) 745 printf("</b>"); 746 italic = bold = 0; 747 putchar('+'); 748 i += 2; 749 continue; 750 } 751 752 /* Bold mode. */ 753 754 if (italic) 755 printf("</i>"); 756 if ( ! bold) 757 printf("<b>"); 758 bold = 1; 759 italic = 0; 760 i += 2; 761 html_putchar(p[i]); 762 } 763 764 /* 765 * Clean up the last character. 766 * We can get to a newline; don't print that. 767 */ 768 769 if (italic) 770 printf("</i>"); 771 if (bold) 772 printf("</b>"); 773 774 if (i == len - 1 && p[i] != '\n') 775 html_putchar(p[i]); 776 777 putchar('\n'); 778 } 779 free(p); 780 781 puts("</pre>\n" 782 "</div>"); 783 784 fclose(f); 785} 786 787static void 788resp_format(const struct req *req, const char *file) 789{ 790 struct manoutput conf; 791 struct mparse *mp; 792 struct roff_man *man; 793 void *vp; 794 int fd; 795 int usepath; 796 797 if (-1 == (fd = open(file, O_RDONLY, 0))) { 798 puts("<p>You specified an invalid manual file.</p>"); 799 return; 800 } 801 802 mchars_alloc(); 803 mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath); 804 mparse_readfd(mp, fd, file); 805 close(fd); 806 807 memset(&conf, 0, sizeof(conf)); 808 conf.fragment = 1; 809 usepath = strcmp(req->q.manpath, req->p[0]); 810 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 811 usepath ? req->q.manpath : "", usepath ? "/" : ""); 812 813 mparse_result(mp, &man, NULL); 814 if (man == NULL) { 815 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 816 pg_error_internal(); 817 mparse_free(mp); 818 mchars_free(); 819 return; 820 } 821 822 vp = html_alloc(&conf); 823 824 if (man->macroset == MACROSET_MDOC) { 825 mdoc_validate(man); 826 html_mdoc(vp, man); 827 } else { 828 man_validate(man); 829 html_man(vp, man); 830 } 831 832 html_free(vp); 833 mparse_free(mp); 834 mchars_free(); 835 free(conf.man); 836} 837 838static void 839resp_show(const struct req *req, const char *file) 840{ 841 842 if ('.' == file[0] && '/' == file[1]) 843 file += 2; 844 845 if ('c' == *file) 846 resp_catman(req, file); 847 else 848 resp_format(req, file); 849} 850 851static void 852pg_show(struct req *req, const char *fullpath) 853{ 854 char *manpath; 855 const char *file; 856 857 if ((file = strchr(fullpath, '/')) == NULL) { 858 pg_error_badrequest( 859 "You did not specify a page to show."); 860 return; 861 } 862 manpath = mandoc_strndup(fullpath, file - fullpath); 863 file++; 864 865 if ( ! validate_manpath(req, manpath)) { 866 pg_error_badrequest( 867 "You specified an invalid manpath."); 868 free(manpath); 869 return; 870 } 871 872 /* 873 * Begin by chdir()ing into the manpath. 874 * This way we can pick up the database files, which are 875 * relative to the manpath root. 876 */ 877 878 if (chdir(manpath) == -1) { 879 warn("chdir %s", manpath); 880 pg_error_internal(); 881 free(manpath); 882 return; 883 } 884 free(manpath); 885 886 if ( ! validate_filename(file)) { 887 pg_error_badrequest( 888 "You specified an invalid manual file."); 889 return; 890 } 891 892 resp_begin_html(200, NULL); 893 resp_searchform(req, FOCUS_NONE); 894 resp_show(req, file); 895 resp_end_html(); 896} 897 898static void 899pg_search(const struct req *req) 900{ 901 struct mansearch search; 902 struct manpaths paths; 903 struct manpage *res; 904 char **argv; 905 char *query, *rp, *wp; 906 size_t ressz; 907 int argc; 908 909 /* 910 * Begin by chdir()ing into the root of the manpath. 911 * This way we can pick up the database files, which are 912 * relative to the manpath root. 913 */ 914 915 if (chdir(req->q.manpath) == -1) { 916 warn("chdir %s", req->q.manpath); 917 pg_error_internal(); 918 return; 919 } 920 921 search.arch = req->q.arch; 922 search.sec = req->q.sec; 923 search.outkey = "Nd"; 924 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 925 search.firstmatch = 1; 926 927 paths.sz = 1; 928 paths.paths = mandoc_malloc(sizeof(char *)); 929 paths.paths[0] = mandoc_strdup("."); 930 931 /* 932 * Break apart at spaces with backslash-escaping. 933 */ 934 935 argc = 0; 936 argv = NULL; 937 rp = query = mandoc_strdup(req->q.query); 938 for (;;) { 939 while (isspace((unsigned char)*rp)) 940 rp++; 941 if (*rp == '\0') 942 break; 943 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 944 argv[argc++] = wp = rp; 945 for (;;) { 946 if (isspace((unsigned char)*rp)) { 947 *wp = '\0'; 948 rp++; 949 break; 950 } 951 if (rp[0] == '\\' && rp[1] != '\0') 952 rp++; 953 if (wp != rp) 954 *wp = *rp; 955 if (*rp == '\0') 956 break; 957 wp++; 958 rp++; 959 } 960 } 961 962 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 963 pg_noresult(req, "You entered an invalid query."); 964 else if (0 == ressz) 965 pg_noresult(req, "No results found."); 966 else 967 pg_searchres(req, res, ressz); 968 969 free(query); 970 mansearch_free(res, ressz); 971 free(paths.paths[0]); 972 free(paths.paths); 973} 974 975int 976main(void) 977{ 978 struct req req; 979 struct itimerval itimer; 980 const char *path; 981 const char *querystring; 982 int i; 983 984 /* Poor man's ReDoS mitigation. */ 985 986 itimer.it_value.tv_sec = 2; 987 itimer.it_value.tv_usec = 0; 988 itimer.it_interval.tv_sec = 2; 989 itimer.it_interval.tv_usec = 0; 990 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 991 warn("setitimer"); 992 pg_error_internal(); 993 return EXIT_FAILURE; 994 } 995 996 /* 997 * First we change directory into the MAN_DIR so that 998 * subsequent scanning for manpath directories is rooted 999 * relative to the same position. 1000 */ 1001 1002 if (chdir(MAN_DIR) == -1) { 1003 warn("MAN_DIR: %s", MAN_DIR); 1004 pg_error_internal(); 1005 return EXIT_FAILURE; 1006 } 1007 1008 memset(&req, 0, sizeof(struct req)); 1009 req.q.equal = 1; 1010 parse_manpath_conf(&req); 1011 1012 /* Parse the path info and the query string. */ 1013 1014 if ((path = getenv("PATH_INFO")) == NULL) 1015 path = ""; 1016 else if (*path == '/') 1017 path++; 1018 1019 if (*path != '\0') { 1020 parse_path_info(&req, path); 1021 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1022 path = ""; 1023 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1024 parse_query_string(&req, querystring); 1025 1026 /* Validate parsed data and add defaults. */ 1027 1028 if (req.q.manpath == NULL) 1029 req.q.manpath = mandoc_strdup(req.p[0]); 1030 else if ( ! validate_manpath(&req, req.q.manpath)) { 1031 pg_error_badrequest( 1032 "You specified an invalid manpath."); 1033 return EXIT_FAILURE; 1034 } 1035 1036 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1037 pg_error_badrequest( 1038 "You specified an invalid architecture."); 1039 return EXIT_FAILURE; 1040 } 1041 1042 /* Dispatch to the three different pages. */ 1043 1044 if ('\0' != *path) 1045 pg_show(&req, path); 1046 else if (NULL != req.q.query) 1047 pg_search(&req); 1048 else 1049 pg_index(&req); 1050 1051 free(req.q.manpath); 1052 free(req.q.arch); 1053 free(req.q.sec); 1054 free(req.q.query); 1055 for (i = 0; i < (int)req.psz; i++) 1056 free(req.p[i]); 1057 free(req.p); 1058 return EXIT_SUCCESS; 1059} 1060 1061/* 1062 * If PATH_INFO is not a file name, translate it to a query. 1063 */ 1064static void 1065parse_path_info(struct req *req, const char *path) 1066{ 1067 char *dir[4]; 1068 int i; 1069 1070 req->isquery = 0; 1071 req->q.equal = 1; 1072 req->q.manpath = mandoc_strdup(path); 1073 req->q.arch = NULL; 1074 1075 /* Mandatory manual page name. */ 1076 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1077 req->q.query = req->q.manpath; 1078 req->q.manpath = NULL; 1079 } else 1080 *req->q.query++ = '\0'; 1081 1082 /* Optional trailing section. */ 1083 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1084 if(isdigit((unsigned char)req->q.sec[1])) { 1085 *req->q.sec++ = '\0'; 1086 req->q.sec = mandoc_strdup(req->q.sec); 1087 } else 1088 req->q.sec = NULL; 1089 } 1090 1091 /* Handle the case of name[.section] only. */ 1092 if (req->q.manpath == NULL) 1093 return; 1094 req->q.query = mandoc_strdup(req->q.query); 1095 1096 /* Split directory components. */ 1097 dir[i = 0] = req->q.manpath; 1098 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1099 if (++i == 3) { 1100 pg_error_badrequest( 1101 "You specified too many directory components."); 1102 exit(EXIT_FAILURE); 1103 } 1104 *dir[i]++ = '\0'; 1105 } 1106 1107 /* Optional manpath. */ 1108 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1109 req->q.manpath = NULL; 1110 else if (dir[1] == NULL) 1111 return; 1112 1113 /* Optional section. */ 1114 if (strncmp(dir[i], "man", 3) == 0) { 1115 free(req->q.sec); 1116 req->q.sec = mandoc_strdup(dir[i++] + 3); 1117 } 1118 if (dir[i] == NULL) { 1119 if (req->q.manpath == NULL) 1120 free(dir[0]); 1121 return; 1122 } 1123 if (dir[i + 1] != NULL) { 1124 pg_error_badrequest( 1125 "You specified an invalid directory component."); 1126 exit(EXIT_FAILURE); 1127 } 1128 1129 /* Optional architecture. */ 1130 if (i) { 1131 req->q.arch = mandoc_strdup(dir[i]); 1132 if (req->q.manpath == NULL) 1133 free(dir[0]); 1134 } else 1135 req->q.arch = dir[0]; 1136} 1137 1138/* 1139 * Scan for indexable paths. 1140 */ 1141static void 1142parse_manpath_conf(struct req *req) 1143{ 1144 FILE *fp; 1145 char *dp; 1146 size_t dpsz; 1147 ssize_t len; 1148 1149 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1150 warn("%s/manpath.conf", MAN_DIR); 1151 pg_error_internal(); 1152 exit(EXIT_FAILURE); 1153 } 1154 1155 dp = NULL; 1156 dpsz = 0; 1157 1158 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1159 if (dp[len - 1] == '\n') 1160 dp[--len] = '\0'; 1161 req->p = mandoc_realloc(req->p, 1162 (req->psz + 1) * sizeof(char *)); 1163 if ( ! validate_urifrag(dp)) { 1164 warnx("%s/manpath.conf contains " 1165 "unsafe path \"%s\"", MAN_DIR, dp); 1166 pg_error_internal(); 1167 exit(EXIT_FAILURE); 1168 } 1169 if (strchr(dp, '/') != NULL) { 1170 warnx("%s/manpath.conf contains " 1171 "path with slash \"%s\"", MAN_DIR, dp); 1172 pg_error_internal(); 1173 exit(EXIT_FAILURE); 1174 } 1175 req->p[req->psz++] = dp; 1176 dp = NULL; 1177 dpsz = 0; 1178 } 1179 free(dp); 1180 1181 if (req->p == NULL) { 1182 warnx("%s/manpath.conf is empty", MAN_DIR); 1183 pg_error_internal(); 1184 exit(EXIT_FAILURE); 1185 } 1186} 1187