1/* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
2/*
3 * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the man.cgi(8) program.
19 */
20#include "config.h"
21
22#include <sys/types.h>
23#include <sys/time.h>
24
25#include <ctype.h>
26#if HAVE_ERR
27#include <err.h>
28#endif
29#include <errno.h>
30#include <fcntl.h>
31#include <limits.h>
32#include <stdint.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <unistd.h>
37
38#include "mandoc_aux.h"
39#include "mandoc.h"
40#include "roff.h"
41#include "mdoc.h"
42#include "man.h"
43#include "mandoc_parse.h"
44#include "main.h"
45#include "manconf.h"
46#include "mansearch.h"
47#include "cgi.h"
48
49/*
50 * A query as passed to the search function.
51 */
52struct	query {
53	char		*manpath; /* desired manual directory */
54	char		*arch; /* architecture */
55	char		*sec; /* manual section */
56	char		*query; /* unparsed query expression */
57	int		 equal; /* match whole names, not substrings */
58};
59
60struct	req {
61	struct query	  q;
62	char		**p; /* array of available manpaths */
63	size_t		  psz; /* number of available manpaths */
64	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
65};
66
67enum	focus {
68	FOCUS_NONE = 0,
69	FOCUS_QUERY
70};
71
72static	void		 html_print(const char *);
73static	void		 html_putchar(char);
74static	int		 http_decode(char *);
75static	void		 http_encode(const char *);
76static	void		 parse_manpath_conf(struct req *);
77static	void		 parse_path_info(struct req *, const char *);
78static	void		 parse_query_string(struct req *, const char *);
79static	void		 pg_error_badrequest(const char *);
80static	void		 pg_error_internal(void);
81static	void		 pg_index(const struct req *);
82static	void		 pg_noresult(const struct req *, int, const char *,
83				const char *);
84static	void		 pg_redirect(const struct req *, const char *);
85static	void		 pg_search(const struct req *);
86static	void		 pg_searchres(const struct req *,
87				struct manpage *, size_t);
88static	void		 pg_show(struct req *, const char *);
89static	void		 resp_begin_html(int, const char *, const char *);
90static	void		 resp_begin_http(int, const char *);
91static	void		 resp_catman(const struct req *, const char *);
92static	void		 resp_copy(const char *);
93static	void		 resp_end_html(void);
94static	void		 resp_format(const struct req *, const char *);
95static	void		 resp_searchform(const struct req *, enum focus);
96static	void		 resp_show(const struct req *, const char *);
97static	void		 set_query_attr(char **, char **);
98static	int		 validate_arch(const char *);
99static	int		 validate_filename(const char *);
100static	int		 validate_manpath(const struct req *, const char *);
101static	int		 validate_urifrag(const char *);
102
103static	const char	 *scriptname = SCRIPT_NAME;
104
105static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106static	const char *const sec_numbers[] = {
107    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108};
109static	const char *const sec_names[] = {
110    "All Sections",
111    "1 - General Commands",
112    "2 - System Calls",
113    "3 - Library Functions",
114    "3p - Perl Library",
115    "4 - Device Drivers",
116    "5 - File Formats",
117    "6 - Games",
118    "7 - Miscellaneous Information",
119    "8 - System Manager\'s Manual",
120    "9 - Kernel Developer\'s Manual"
121};
122static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124static	const char *const arch_names[] = {
125    "amd64",       "alpha",       "armv7",       "arm64",
126    "hppa",        "i386",        "landisk",     "loongson",
127    "luna88k",     "macppc",      "mips64",      "octeon",
128    "powerpc64",   "riscv64",     "sparc64",
129
130    "amiga",       "arc",         "armish",      "arm32",
131    "atari",       "aviion",      "beagle",      "cats",
132    "hppa64",      "hp300",
133    "ia64",        "mac68k",      "mvme68k",     "mvme88k",
134    "mvmeppc",     "palm",        "pc532",       "pegasos",
135    "pmax",        "powerpc",     "sgi",         "socppc",
136    "solbourne",   "sparc",
137    "sun3",        "vax",         "wgrisc",      "x68k",
138    "zaurus"
139};
140static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
141
142/*
143 * Print a character, escaping HTML along the way.
144 * This will pass non-ASCII straight to output: be warned!
145 */
146static void
147html_putchar(char c)
148{
149
150	switch (c) {
151	case '"':
152		printf("&quot;");
153		break;
154	case '&':
155		printf("&amp;");
156		break;
157	case '>':
158		printf("&gt;");
159		break;
160	case '<':
161		printf("&lt;");
162		break;
163	default:
164		putchar((unsigned char)c);
165		break;
166	}
167}
168
169/*
170 * Call through to html_putchar().
171 * Accepts NULL strings.
172 */
173static void
174html_print(const char *p)
175{
176
177	if (NULL == p)
178		return;
179	while ('\0' != *p)
180		html_putchar(*p++);
181}
182
183/*
184 * Transfer the responsibility for the allocated string *val
185 * to the query structure.
186 */
187static void
188set_query_attr(char **attr, char **val)
189{
190
191	free(*attr);
192	if (**val == '\0') {
193		*attr = NULL;
194		free(*val);
195	} else
196		*attr = *val;
197	*val = NULL;
198}
199
200/*
201 * Parse the QUERY_STRING for key-value pairs
202 * and store the values into the query structure.
203 */
204static void
205parse_query_string(struct req *req, const char *qs)
206{
207	char		*key, *val;
208	size_t		 keysz, valsz;
209
210	req->isquery	= 1;
211	req->q.manpath	= NULL;
212	req->q.arch	= NULL;
213	req->q.sec	= NULL;
214	req->q.query	= NULL;
215	req->q.equal	= 1;
216
217	key = val = NULL;
218	while (*qs != '\0') {
219
220		/* Parse one key. */
221
222		keysz = strcspn(qs, "=;&");
223		key = mandoc_strndup(qs, keysz);
224		qs += keysz;
225		if (*qs != '=')
226			goto next;
227
228		/* Parse one value. */
229
230		valsz = strcspn(++qs, ";&");
231		val = mandoc_strndup(qs, valsz);
232		qs += valsz;
233
234		/* Decode and catch encoding errors. */
235
236		if ( ! (http_decode(key) && http_decode(val)))
237			goto next;
238
239		/* Handle key-value pairs. */
240
241		if ( ! strcmp(key, "query"))
242			set_query_attr(&req->q.query, &val);
243
244		else if ( ! strcmp(key, "apropos"))
245			req->q.equal = !strcmp(val, "0");
246
247		else if ( ! strcmp(key, "manpath")) {
248#ifdef COMPAT_OLDURI
249			if ( ! strncmp(val, "OpenBSD ", 8)) {
250				val[7] = '-';
251				if ('C' == val[8])
252					val[8] = 'c';
253			}
254#endif
255			set_query_attr(&req->q.manpath, &val);
256		}
257
258		else if ( ! (strcmp(key, "sec")
259#ifdef COMPAT_OLDURI
260		    && strcmp(key, "sektion")
261#endif
262		    )) {
263			if ( ! strcmp(val, "0"))
264				*val = '\0';
265			set_query_attr(&req->q.sec, &val);
266		}
267
268		else if ( ! strcmp(key, "arch")) {
269			if ( ! strcmp(val, "default"))
270				*val = '\0';
271			set_query_attr(&req->q.arch, &val);
272		}
273
274		/*
275		 * The key must be freed in any case.
276		 * The val may have been handed over to the query
277		 * structure, in which case it is now NULL.
278		 */
279next:
280		free(key);
281		key = NULL;
282		free(val);
283		val = NULL;
284
285		if (*qs != '\0')
286			qs++;
287	}
288}
289
290/*
291 * HTTP-decode a string.  The standard explanation is that this turns
292 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
293 * over the allocated string.
294 */
295static int
296http_decode(char *p)
297{
298	char             hex[3];
299	char		*q;
300	int              c;
301
302	hex[2] = '\0';
303
304	q = p;
305	for ( ; '\0' != *p; p++, q++) {
306		if ('%' == *p) {
307			if ('\0' == (hex[0] = *(p + 1)))
308				return 0;
309			if ('\0' == (hex[1] = *(p + 2)))
310				return 0;
311			if (1 != sscanf(hex, "%x", &c))
312				return 0;
313			if ('\0' == c)
314				return 0;
315
316			*q = (char)c;
317			p += 2;
318		} else
319			*q = '+' == *p ? ' ' : *p;
320	}
321
322	*q = '\0';
323	return 1;
324}
325
326static void
327http_encode(const char *p)
328{
329	for (; *p != '\0'; p++) {
330		if (isalnum((unsigned char)*p) == 0 &&
331		    strchr("-._~", *p) == NULL)
332			printf("%%%2.2X", (unsigned char)*p);
333		else
334			putchar(*p);
335	}
336}
337
338static void
339resp_begin_http(int code, const char *msg)
340{
341
342	if (200 != code)
343		printf("Status: %d %s\r\n", code, msg);
344
345	printf("Content-Type: text/html; charset=utf-8\r\n"
346	     "Cache-Control: no-cache\r\n"
347	     "Content-Security-Policy: default-src 'none'; "
348	     "style-src 'self' 'unsafe-inline'\r\n"
349	     "Pragma: no-cache\r\n"
350	     "\r\n");
351
352	fflush(stdout);
353}
354
355static void
356resp_copy(const char *filename)
357{
358	char	 buf[4096];
359	ssize_t	 sz;
360	int	 fd;
361
362	if ((fd = open(filename, O_RDONLY)) != -1) {
363		fflush(stdout);
364		while ((sz = read(fd, buf, sizeof(buf))) > 0)
365			write(STDOUT_FILENO, buf, sz);
366		close(fd);
367	}
368}
369
370static void
371resp_begin_html(int code, const char *msg, const char *file)
372{
373	const char	*name, *sec, *cp;
374	int		 namesz, secsz;
375
376	resp_begin_http(code, msg);
377
378	printf("<!DOCTYPE html>\n"
379	       "<html>\n"
380	       "<head>\n"
381	       "  <meta charset=\"UTF-8\"/>\n"
382	       "  <meta name=\"viewport\""
383		      " content=\"width=device-width, initial-scale=1.0\">\n"
384	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
385	       " type=\"text/css\" media=\"all\">\n"
386	       "  <title>",
387	       CSS_DIR);
388	if (file != NULL) {
389		cp = strrchr(file, '/');
390		name = cp == NULL ? file : cp + 1;
391		cp = strrchr(name, '.');
392		namesz = cp == NULL ? strlen(name) : cp - name;
393		sec = NULL;
394		if (cp != NULL && cp[1] != '0') {
395			sec = cp + 1;
396			secsz = strlen(sec);
397		} else if (name - file > 1) {
398			for (cp = name - 2; cp >= file; cp--) {
399				if (*cp < '1' || *cp > '9')
400					continue;
401				sec = cp;
402				secsz = name - cp - 1;
403				break;
404			}
405		}
406		printf("%.*s", namesz, name);
407		if (sec != NULL)
408			printf("(%.*s)", secsz, sec);
409		fputs(" - ", stdout);
410	}
411	printf("%s</title>\n"
412	       "</head>\n"
413	       "<body>\n",
414	       CUSTOMIZE_TITLE);
415
416	resp_copy(MAN_DIR "/header.html");
417}
418
419static void
420resp_end_html(void)
421{
422
423	resp_copy(MAN_DIR "/footer.html");
424
425	puts("</body>\n"
426	     "</html>");
427}
428
429static void
430resp_searchform(const struct req *req, enum focus focus)
431{
432	int		 i;
433
434	printf("<form action=\"/%s\" method=\"get\" "
435	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
436	       "  <fieldset>\n"
437	       "    <legend>Manual Page Search Parameters</legend>\n",
438	       scriptname);
439
440	/* Write query input box. */
441
442	printf("    <input type=\"search\" name=\"query\" value=\"");
443	if (req->q.query != NULL)
444		html_print(req->q.query);
445	printf( "\" size=\"40\"");
446	if (focus == FOCUS_QUERY)
447		printf(" autofocus");
448	puts(">");
449
450	/* Write submission buttons. */
451
452	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
453		"man</button>\n"
454		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
455		"apropos</button>\n"
456		"    <br/>\n");
457
458	/* Write section selector. */
459
460	puts("    <select name=\"sec\">");
461	for (i = 0; i < sec_MAX; i++) {
462		printf("      <option value=\"%s\"", sec_numbers[i]);
463		if (NULL != req->q.sec &&
464		    0 == strcmp(sec_numbers[i], req->q.sec))
465			printf(" selected=\"selected\"");
466		printf(">%s</option>\n", sec_names[i]);
467	}
468	puts("    </select>");
469
470	/* Write architecture selector. */
471
472	printf(	"    <select name=\"arch\">\n"
473		"      <option value=\"default\"");
474	if (NULL == req->q.arch)
475		printf(" selected=\"selected\"");
476	puts(">All Architectures</option>");
477	for (i = 0; i < arch_MAX; i++) {
478		printf("      <option");
479		if (NULL != req->q.arch &&
480		    0 == strcmp(arch_names[i], req->q.arch))
481			printf(" selected=\"selected\"");
482		printf(">%s</option>\n", arch_names[i]);
483	}
484	puts("    </select>");
485
486	/* Write manpath selector. */
487
488	if (req->psz > 1) {
489		puts("    <select name=\"manpath\">");
490		for (i = 0; i < (int)req->psz; i++) {
491			printf("      <option");
492			if (strcmp(req->q.manpath, req->p[i]) == 0)
493				printf(" selected=\"selected\"");
494			printf(">");
495			html_print(req->p[i]);
496			puts("</option>");
497		}
498		puts("    </select>");
499	}
500
501	puts("  </fieldset>\n"
502	     "</form>");
503}
504
505static int
506validate_urifrag(const char *frag)
507{
508
509	while ('\0' != *frag) {
510		if ( ! (isalnum((unsigned char)*frag) ||
511		    '-' == *frag || '.' == *frag ||
512		    '/' == *frag || '_' == *frag))
513			return 0;
514		frag++;
515	}
516	return 1;
517}
518
519static int
520validate_manpath(const struct req *req, const char* manpath)
521{
522	size_t	 i;
523
524	for (i = 0; i < req->psz; i++)
525		if ( ! strcmp(manpath, req->p[i]))
526			return 1;
527
528	return 0;
529}
530
531static int
532validate_arch(const char *arch)
533{
534	int	 i;
535
536	for (i = 0; i < arch_MAX; i++)
537		if (strcmp(arch, arch_names[i]) == 0)
538			return 1;
539
540	return 0;
541}
542
543static int
544validate_filename(const char *file)
545{
546
547	if ('.' == file[0] && '/' == file[1])
548		file += 2;
549
550	return ! (strstr(file, "../") || strstr(file, "/..") ||
551	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
552}
553
554static void
555pg_index(const struct req *req)
556{
557
558	resp_begin_html(200, NULL, NULL);
559	resp_searchform(req, FOCUS_QUERY);
560	printf("<p>\n"
561	       "This web interface is documented in the\n"
562	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
563	       "manual, and the\n"
564	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
565	       "manual explains the query syntax.\n"
566	       "</p>\n",
567	       scriptname, *scriptname == '\0' ? "" : "/",
568	       scriptname, *scriptname == '\0' ? "" : "/");
569	resp_end_html();
570}
571
572static void
573pg_noresult(const struct req *req, int code, const char *http_msg,
574    const char *user_msg)
575{
576	resp_begin_html(code, http_msg, NULL);
577	resp_searchform(req, FOCUS_QUERY);
578	puts("<p>");
579	puts(user_msg);
580	puts("</p>");
581	resp_end_html();
582}
583
584static void
585pg_error_badrequest(const char *msg)
586{
587
588	resp_begin_html(400, "Bad Request", NULL);
589	puts("<h1>Bad Request</h1>\n"
590	     "<p>\n");
591	puts(msg);
592	printf("Try again from the\n"
593	       "<a href=\"/%s\">main page</a>.\n"
594	       "</p>", scriptname);
595	resp_end_html();
596}
597
598static void
599pg_error_internal(void)
600{
601	resp_begin_html(500, "Internal Server Error", NULL);
602	puts("<p>Internal Server Error</p>");
603	resp_end_html();
604}
605
606static void
607pg_redirect(const struct req *req, const char *name)
608{
609	printf("Status: 303 See Other\r\n"
610	    "Location: /");
611	if (*scriptname != '\0')
612		printf("%s/", scriptname);
613	if (strcmp(req->q.manpath, req->p[0]))
614		printf("%s/", req->q.manpath);
615	if (req->q.arch != NULL)
616		printf("%s/", req->q.arch);
617	http_encode(name);
618	if (req->q.sec != NULL) {
619		putchar('.');
620		http_encode(req->q.sec);
621	}
622	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
623}
624
625static void
626pg_searchres(const struct req *req, struct manpage *r, size_t sz)
627{
628	char		*arch, *archend;
629	const char	*sec;
630	size_t		 i, iuse;
631	int		 archprio, archpriouse;
632	int		 prio, priouse;
633
634	for (i = 0; i < sz; i++) {
635		if (validate_filename(r[i].file))
636			continue;
637		warnx("invalid filename %s in %s database",
638		    r[i].file, req->q.manpath);
639		pg_error_internal();
640		return;
641	}
642
643	if (req->isquery && sz == 1) {
644		/*
645		 * If we have just one result, then jump there now
646		 * without any delay.
647		 */
648		printf("Status: 303 See Other\r\n"
649		    "Location: /");
650		if (*scriptname != '\0')
651			printf("%s/", scriptname);
652		if (strcmp(req->q.manpath, req->p[0]))
653			printf("%s/", req->q.manpath);
654		printf("%s\r\n"
655		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
656		    r[0].file);
657		return;
658	}
659
660	/*
661	 * In man(1) mode, show one of the pages
662	 * even if more than one is found.
663	 */
664
665	iuse = 0;
666	if (req->q.equal || sz == 1) {
667		priouse = 20;
668		archpriouse = 3;
669		for (i = 0; i < sz; i++) {
670			sec = r[i].file;
671			sec += strcspn(sec, "123456789");
672			if (sec[0] == '\0')
673				continue;
674			prio = sec_prios[sec[0] - '1'];
675			if (sec[1] != '/')
676				prio += 10;
677			if (req->q.arch == NULL) {
678				archprio =
679				    ((arch = strchr(sec + 1, '/'))
680					== NULL) ? 3 :
681				    ((archend = strchr(arch + 1, '/'))
682					== NULL) ? 0 :
683				    strncmp(arch, "amd64/",
684					archend - arch) ? 2 : 1;
685				if (archprio < archpriouse) {
686					archpriouse = archprio;
687					priouse = prio;
688					iuse = i;
689					continue;
690				}
691				if (archprio > archpriouse)
692					continue;
693			}
694			if (prio >= priouse)
695				continue;
696			priouse = prio;
697			iuse = i;
698		}
699		resp_begin_html(200, NULL, r[iuse].file);
700	} else
701		resp_begin_html(200, NULL, NULL);
702
703	resp_searchform(req,
704	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
705
706	if (sz > 1) {
707		puts("<table class=\"results\">");
708		for (i = 0; i < sz; i++) {
709			printf("  <tr>\n"
710			       "    <td>"
711			       "<a class=\"Xr\" href=\"/");
712			if (*scriptname != '\0')
713				printf("%s/", scriptname);
714			if (strcmp(req->q.manpath, req->p[0]))
715				printf("%s/", req->q.manpath);
716			printf("%s\">", r[i].file);
717			html_print(r[i].names);
718			printf("</a></td>\n"
719			       "    <td><span class=\"Nd\">");
720			html_print(r[i].output);
721			puts("</span></td>\n"
722			     "  </tr>");
723		}
724		puts("</table>");
725	}
726
727	if (req->q.equal || sz == 1) {
728		puts("<hr>");
729		resp_show(req, r[iuse].file);
730	}
731
732	resp_end_html();
733}
734
735static void
736resp_catman(const struct req *req, const char *file)
737{
738	FILE		*f;
739	char		*p;
740	size_t		 sz;
741	ssize_t		 len;
742	int		 i;
743	int		 italic, bold;
744
745	if ((f = fopen(file, "r")) == NULL) {
746		puts("<p>You specified an invalid manual file.</p>");
747		return;
748	}
749
750	puts("<div class=\"catman\">\n"
751	     "<pre>");
752
753	p = NULL;
754	sz = 0;
755
756	while ((len = getline(&p, &sz, f)) != -1) {
757		bold = italic = 0;
758		for (i = 0; i < len - 1; i++) {
759			/*
760			 * This means that the catpage is out of state.
761			 * Ignore it and keep going (although the
762			 * catpage is bogus).
763			 */
764
765			if ('\b' == p[i] || '\n' == p[i])
766				continue;
767
768			/*
769			 * Print a regular character.
770			 * Close out any bold/italic scopes.
771			 * If we're in back-space mode, make sure we'll
772			 * have something to enter when we backspace.
773			 */
774
775			if ('\b' != p[i + 1]) {
776				if (italic)
777					printf("</i>");
778				if (bold)
779					printf("</b>");
780				italic = bold = 0;
781				html_putchar(p[i]);
782				continue;
783			} else if (i + 2 >= len)
784				continue;
785
786			/* Italic mode. */
787
788			if ('_' == p[i]) {
789				if (bold)
790					printf("</b>");
791				if ( ! italic)
792					printf("<i>");
793				bold = 0;
794				italic = 1;
795				i += 2;
796				html_putchar(p[i]);
797				continue;
798			}
799
800			/*
801			 * Handle funny behaviour troff-isms.
802			 * These grok'd from the original man2html.c.
803			 */
804
805			if (('+' == p[i] && 'o' == p[i + 2]) ||
806					('o' == p[i] && '+' == p[i + 2]) ||
807					('|' == p[i] && '=' == p[i + 2]) ||
808					('=' == p[i] && '|' == p[i + 2]) ||
809					('*' == p[i] && '=' == p[i + 2]) ||
810					('=' == p[i] && '*' == p[i + 2]) ||
811					('*' == p[i] && '|' == p[i + 2]) ||
812					('|' == p[i] && '*' == p[i + 2]))  {
813				if (italic)
814					printf("</i>");
815				if (bold)
816					printf("</b>");
817				italic = bold = 0;
818				putchar('*');
819				i += 2;
820				continue;
821			} else if (('|' == p[i] && '-' == p[i + 2]) ||
822					('-' == p[i] && '|' == p[i + 1]) ||
823					('+' == p[i] && '-' == p[i + 1]) ||
824					('-' == p[i] && '+' == p[i + 1]) ||
825					('+' == p[i] && '|' == p[i + 1]) ||
826					('|' == p[i] && '+' == p[i + 1]))  {
827				if (italic)
828					printf("</i>");
829				if (bold)
830					printf("</b>");
831				italic = bold = 0;
832				putchar('+');
833				i += 2;
834				continue;
835			}
836
837			/* Bold mode. */
838
839			if (italic)
840				printf("</i>");
841			if ( ! bold)
842				printf("<b>");
843			bold = 1;
844			italic = 0;
845			i += 2;
846			html_putchar(p[i]);
847		}
848
849		/*
850		 * Clean up the last character.
851		 * We can get to a newline; don't print that.
852		 */
853
854		if (italic)
855			printf("</i>");
856		if (bold)
857			printf("</b>");
858
859		if (i == len - 1 && p[i] != '\n')
860			html_putchar(p[i]);
861
862		putchar('\n');
863	}
864	free(p);
865
866	puts("</pre>\n"
867	     "</div>");
868
869	fclose(f);
870}
871
872static void
873resp_format(const struct req *req, const char *file)
874{
875	struct manoutput conf;
876	struct mparse	*mp;
877	struct roff_meta *meta;
878	void		*vp;
879	int		 fd;
880	int		 usepath;
881
882	if (-1 == (fd = open(file, O_RDONLY, 0))) {
883		puts("<p>You specified an invalid manual file.</p>");
884		return;
885	}
886
887	mchars_alloc();
888	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
889	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
890	mparse_readfd(mp, fd, file);
891	close(fd);
892	meta = mparse_result(mp);
893
894	memset(&conf, 0, sizeof(conf));
895	conf.fragment = 1;
896	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
897	usepath = strcmp(req->q.manpath, req->p[0]);
898	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
899	    scriptname, *scriptname == '\0' ? "" : "/",
900	    usepath ? req->q.manpath : "", usepath ? "/" : "");
901
902	vp = html_alloc(&conf);
903	if (meta->macroset == MACROSET_MDOC)
904		html_mdoc(vp, meta);
905	else
906		html_man(vp, meta);
907
908	html_free(vp);
909	mparse_free(mp);
910	mchars_free();
911	free(conf.man);
912	free(conf.style);
913}
914
915static void
916resp_show(const struct req *req, const char *file)
917{
918
919	if ('.' == file[0] && '/' == file[1])
920		file += 2;
921
922	if ('c' == *file)
923		resp_catman(req, file);
924	else
925		resp_format(req, file);
926}
927
928static void
929pg_show(struct req *req, const char *fullpath)
930{
931	char		*manpath;
932	const char	*file;
933
934	if ((file = strchr(fullpath, '/')) == NULL) {
935		pg_error_badrequest(
936		    "You did not specify a page to show.");
937		return;
938	}
939	manpath = mandoc_strndup(fullpath, file - fullpath);
940	file++;
941
942	if ( ! validate_manpath(req, manpath)) {
943		pg_error_badrequest(
944		    "You specified an invalid manpath.");
945		free(manpath);
946		return;
947	}
948
949	/*
950	 * Begin by chdir()ing into the manpath.
951	 * This way we can pick up the database files, which are
952	 * relative to the manpath root.
953	 */
954
955	if (chdir(manpath) == -1) {
956		warn("chdir %s", manpath);
957		pg_error_internal();
958		free(manpath);
959		return;
960	}
961	free(manpath);
962
963	if ( ! validate_filename(file)) {
964		pg_error_badrequest(
965		    "You specified an invalid manual file.");
966		return;
967	}
968
969	resp_begin_html(200, NULL, file);
970	resp_searchform(req, FOCUS_NONE);
971	resp_show(req, file);
972	resp_end_html();
973}
974
975static void
976pg_search(const struct req *req)
977{
978	struct mansearch	  search;
979	struct manpaths		  paths;
980	struct manpage		 *res;
981	char			**argv;
982	char			 *query, *rp, *wp;
983	size_t			  ressz;
984	int			  argc;
985
986	/*
987	 * Begin by chdir()ing into the root of the manpath.
988	 * This way we can pick up the database files, which are
989	 * relative to the manpath root.
990	 */
991
992	if (chdir(req->q.manpath) == -1) {
993		warn("chdir %s", req->q.manpath);
994		pg_error_internal();
995		return;
996	}
997
998	search.arch = req->q.arch;
999	search.sec = req->q.sec;
1000	search.outkey = "Nd";
1001	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1002	search.firstmatch = 1;
1003
1004	paths.sz = 1;
1005	paths.paths = mandoc_malloc(sizeof(char *));
1006	paths.paths[0] = mandoc_strdup(".");
1007
1008	/*
1009	 * Break apart at spaces with backslash-escaping.
1010	 */
1011
1012	argc = 0;
1013	argv = NULL;
1014	rp = query = mandoc_strdup(req->q.query);
1015	for (;;) {
1016		while (isspace((unsigned char)*rp))
1017			rp++;
1018		if (*rp == '\0')
1019			break;
1020		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1021		argv[argc++] = wp = rp;
1022		for (;;) {
1023			if (isspace((unsigned char)*rp)) {
1024				*wp = '\0';
1025				rp++;
1026				break;
1027			}
1028			if (rp[0] == '\\' && rp[1] != '\0')
1029				rp++;
1030			if (wp != rp)
1031				*wp = *rp;
1032			if (*rp == '\0')
1033				break;
1034			wp++;
1035			rp++;
1036		}
1037	}
1038
1039	res = NULL;
1040	ressz = 0;
1041	if (req->isquery && req->q.equal && argc == 1)
1042		pg_redirect(req, argv[0]);
1043	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1044		pg_noresult(req, 400, "Bad Request",
1045		    "You entered an invalid query.");
1046	else if (ressz == 0)
1047		pg_noresult(req, 404, "Not Found", "No results found.");
1048	else
1049		pg_searchres(req, res, ressz);
1050
1051	free(query);
1052	mansearch_free(res, ressz);
1053	free(paths.paths[0]);
1054	free(paths.paths);
1055}
1056
1057int
1058main(void)
1059{
1060	struct req	 req;
1061	struct itimerval itimer;
1062	const char	*path;
1063	const char	*querystring;
1064	int		 i;
1065
1066#if HAVE_PLEDGE
1067	/*
1068	 * The "rpath" pledge could be revoked after mparse_readfd()
1069	 * if the file desciptor to "/footer.html" would be opened
1070	 * up front, but it's probably not worth the complication
1071	 * of the code it would cause: it would require scattering
1072	 * pledge() calls in multiple low-level resp_*() functions.
1073	 */
1074
1075	if (pledge("stdio rpath", NULL) == -1) {
1076		warn("pledge");
1077		pg_error_internal();
1078		return EXIT_FAILURE;
1079	}
1080#endif
1081
1082	/* Poor man's ReDoS mitigation. */
1083
1084	itimer.it_value.tv_sec = 2;
1085	itimer.it_value.tv_usec = 0;
1086	itimer.it_interval.tv_sec = 2;
1087	itimer.it_interval.tv_usec = 0;
1088	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1089		warn("setitimer");
1090		pg_error_internal();
1091		return EXIT_FAILURE;
1092	}
1093
1094	/*
1095	 * First we change directory into the MAN_DIR so that
1096	 * subsequent scanning for manpath directories is rooted
1097	 * relative to the same position.
1098	 */
1099
1100	if (chdir(MAN_DIR) == -1) {
1101		warn("MAN_DIR: %s", MAN_DIR);
1102		pg_error_internal();
1103		return EXIT_FAILURE;
1104	}
1105
1106	memset(&req, 0, sizeof(struct req));
1107	req.q.equal = 1;
1108	parse_manpath_conf(&req);
1109
1110	/* Parse the path info and the query string. */
1111
1112	if ((path = getenv("PATH_INFO")) == NULL)
1113		path = "";
1114	else if (*path == '/')
1115		path++;
1116
1117	if (*path != '\0') {
1118		parse_path_info(&req, path);
1119		if (req.q.manpath == NULL || req.q.sec == NULL ||
1120		    *req.q.query == '\0' || access(path, F_OK) == -1)
1121			path = "";
1122	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1123		parse_query_string(&req, querystring);
1124
1125	/* Validate parsed data and add defaults. */
1126
1127	if (req.q.manpath == NULL)
1128		req.q.manpath = mandoc_strdup(req.p[0]);
1129	else if ( ! validate_manpath(&req, req.q.manpath)) {
1130		pg_error_badrequest(
1131		    "You specified an invalid manpath.");
1132		return EXIT_FAILURE;
1133	}
1134
1135	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1136		pg_error_badrequest(
1137		    "You specified an invalid architecture.");
1138		return EXIT_FAILURE;
1139	}
1140
1141	/* Dispatch to the three different pages. */
1142
1143	if ('\0' != *path)
1144		pg_show(&req, path);
1145	else if (NULL != req.q.query)
1146		pg_search(&req);
1147	else
1148		pg_index(&req);
1149
1150	free(req.q.manpath);
1151	free(req.q.arch);
1152	free(req.q.sec);
1153	free(req.q.query);
1154	for (i = 0; i < (int)req.psz; i++)
1155		free(req.p[i]);
1156	free(req.p);
1157	return EXIT_SUCCESS;
1158}
1159
1160/*
1161 * Translate PATH_INFO to a query.
1162 */
1163static void
1164parse_path_info(struct req *req, const char *path)
1165{
1166	const char	*name, *sec, *end;
1167
1168	req->isquery = 0;
1169	req->q.equal = 1;
1170	req->q.manpath = NULL;
1171	req->q.arch = NULL;
1172
1173	/* Mandatory manual page name. */
1174	if ((name = strrchr(path, '/')) == NULL)
1175		name = path;
1176	else
1177		name++;
1178
1179	/* Optional trailing section. */
1180	sec = strrchr(name, '.');
1181	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1182		req->q.query = mandoc_strndup(name, sec - name - 1);
1183		req->q.sec = mandoc_strdup(sec);
1184	} else {
1185		req->q.query = mandoc_strdup(name);
1186		req->q.sec = NULL;
1187	}
1188
1189	/* Handle the case of name[.section] only. */
1190	if (name == path)
1191		return;
1192
1193	/* Optional manpath. */
1194	end = strchr(path, '/');
1195	req->q.manpath = mandoc_strndup(path, end - path);
1196	if (validate_manpath(req, req->q.manpath)) {
1197		path = end + 1;
1198		if (name == path)
1199			return;
1200	} else {
1201		free(req->q.manpath);
1202		req->q.manpath = NULL;
1203	}
1204
1205	/* Optional section. */
1206	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1207		path += 3;
1208		end = strchr(path, '/');
1209		free(req->q.sec);
1210		req->q.sec = mandoc_strndup(path, end - path);
1211		path = end + 1;
1212		if (name == path)
1213			return;
1214	}
1215
1216	/* Optional architecture. */
1217	end = strchr(path, '/');
1218	if (end + 1 != name) {
1219		pg_error_badrequest(
1220		    "You specified too many directory components.");
1221		exit(EXIT_FAILURE);
1222	}
1223	req->q.arch = mandoc_strndup(path, end - path);
1224	if (validate_arch(req->q.arch) == 0) {
1225		pg_error_badrequest(
1226		    "You specified an invalid directory component.");
1227		exit(EXIT_FAILURE);
1228	}
1229}
1230
1231/*
1232 * Scan for indexable paths.
1233 */
1234static void
1235parse_manpath_conf(struct req *req)
1236{
1237	FILE	*fp;
1238	char	*dp;
1239	size_t	 dpsz;
1240	ssize_t	 len;
1241
1242	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1243		warn("%s/manpath.conf", MAN_DIR);
1244		pg_error_internal();
1245		exit(EXIT_FAILURE);
1246	}
1247
1248	dp = NULL;
1249	dpsz = 0;
1250
1251	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1252		if (dp[len - 1] == '\n')
1253			dp[--len] = '\0';
1254		req->p = mandoc_realloc(req->p,
1255		    (req->psz + 1) * sizeof(char *));
1256		if ( ! validate_urifrag(dp)) {
1257			warnx("%s/manpath.conf contains "
1258			    "unsafe path \"%s\"", MAN_DIR, dp);
1259			pg_error_internal();
1260			exit(EXIT_FAILURE);
1261		}
1262		if (strchr(dp, '/') != NULL) {
1263			warnx("%s/manpath.conf contains "
1264			    "path with slash \"%s\"", MAN_DIR, dp);
1265			pg_error_internal();
1266			exit(EXIT_FAILURE);
1267		}
1268		req->p[req->psz++] = dp;
1269		dp = NULL;
1270		dpsz = 0;
1271	}
1272	free(dp);
1273
1274	if (req->p == NULL) {
1275		warnx("%s/manpath.conf is empty", MAN_DIR);
1276		pg_error_internal();
1277		exit(EXIT_FAILURE);
1278	}
1279}
1280