cgi.c revision 307795
1/*	$Id: cgi.c,v 1.135 2016/07/11 22:48:37 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/time.h>
22
23#include <ctype.h>
24#include <err.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <limits.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <unistd.h>
33
34#include "mandoc_aux.h"
35#include "mandoc.h"
36#include "roff.h"
37#include "mdoc.h"
38#include "man.h"
39#include "main.h"
40#include "manconf.h"
41#include "mansearch.h"
42#include "cgi.h"
43
44/*
45 * A query as passed to the search function.
46 */
47struct	query {
48	char		*manpath; /* desired manual directory */
49	char		*arch; /* architecture */
50	char		*sec; /* manual section */
51	char		*query; /* unparsed query expression */
52	int		 equal; /* match whole names, not substrings */
53};
54
55struct	req {
56	struct query	  q;
57	char		**p; /* array of available manpaths */
58	size_t		  psz; /* number of available manpaths */
59	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
60};
61
62enum	focus {
63	FOCUS_NONE = 0,
64	FOCUS_QUERY
65};
66
67static	void		 html_print(const char *);
68static	void		 html_putchar(char);
69static	int		 http_decode(char *);
70static	void		 parse_manpath_conf(struct req *);
71static	void		 parse_path_info(struct req *req, const char *path);
72static	void		 parse_query_string(struct req *, const char *);
73static	void		 pg_error_badrequest(const char *);
74static	void		 pg_error_internal(void);
75static	void		 pg_index(const struct req *);
76static	void		 pg_noresult(const struct req *, const char *);
77static	void		 pg_search(const struct req *);
78static	void		 pg_searchres(const struct req *,
79				struct manpage *, size_t);
80static	void		 pg_show(struct req *, const char *);
81static	void		 resp_begin_html(int, const char *);
82static	void		 resp_begin_http(int, const char *);
83static	void		 resp_catman(const struct req *, const char *);
84static	void		 resp_copy(const char *);
85static	void		 resp_end_html(void);
86static	void		 resp_format(const struct req *, const char *);
87static	void		 resp_searchform(const struct req *, enum focus);
88static	void		 resp_show(const struct req *, const char *);
89static	void		 set_query_attr(char **, char **);
90static	int		 validate_filename(const char *);
91static	int		 validate_manpath(const struct req *, const char *);
92static	int		 validate_urifrag(const char *);
93
94static	const char	 *scriptname = SCRIPT_NAME;
95
96static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97static	const char *const sec_numbers[] = {
98    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99};
100static	const char *const sec_names[] = {
101    "All Sections",
102    "1 - General Commands",
103    "2 - System Calls",
104    "3 - Library Functions",
105    "3p - Perl Library",
106    "4 - Device Drivers",
107    "5 - File Formats",
108    "6 - Games",
109    "7 - Miscellaneous Information",
110    "8 - System Manager\'s Manual",
111    "9 - Kernel Developer\'s Manual"
112};
113static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115static	const char *const arch_names[] = {
116    "amd64",       "alpha",       "armish",      "armv7",
117    "hppa",        "hppa64",      "i386",        "landisk",
118    "loongson",    "luna88k",     "macppc",      "mips64",
119    "octeon",      "sgi",         "socppc",      "sparc",
120    "sparc64",     "zaurus",
121    "amiga",       "arc",         "arm32",       "atari",
122    "aviion",      "beagle",      "cats",        "hp300",
123    "ia64",        "mac68k",      "mvme68k",     "mvme88k",
124    "mvmeppc",     "palm",        "pc532",       "pegasos",
125    "pmax",        "powerpc",     "solbourne",   "sun3",
126    "vax",         "wgrisc",      "x68k"
127};
128static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
129
130/*
131 * Print a character, escaping HTML along the way.
132 * This will pass non-ASCII straight to output: be warned!
133 */
134static void
135html_putchar(char c)
136{
137
138	switch (c) {
139	case ('"'):
140		printf("&quote;");
141		break;
142	case ('&'):
143		printf("&amp;");
144		break;
145	case ('>'):
146		printf("&gt;");
147		break;
148	case ('<'):
149		printf("&lt;");
150		break;
151	default:
152		putchar((unsigned char)c);
153		break;
154	}
155}
156
157/*
158 * Call through to html_putchar().
159 * Accepts NULL strings.
160 */
161static void
162html_print(const char *p)
163{
164
165	if (NULL == p)
166		return;
167	while ('\0' != *p)
168		html_putchar(*p++);
169}
170
171/*
172 * Transfer the responsibility for the allocated string *val
173 * to the query structure.
174 */
175static void
176set_query_attr(char **attr, char **val)
177{
178
179	free(*attr);
180	if (**val == '\0') {
181		*attr = NULL;
182		free(*val);
183	} else
184		*attr = *val;
185	*val = NULL;
186}
187
188/*
189 * Parse the QUERY_STRING for key-value pairs
190 * and store the values into the query structure.
191 */
192static void
193parse_query_string(struct req *req, const char *qs)
194{
195	char		*key, *val;
196	size_t		 keysz, valsz;
197
198	req->isquery	= 1;
199	req->q.manpath	= NULL;
200	req->q.arch	= NULL;
201	req->q.sec	= NULL;
202	req->q.query	= NULL;
203	req->q.equal	= 1;
204
205	key = val = NULL;
206	while (*qs != '\0') {
207
208		/* Parse one key. */
209
210		keysz = strcspn(qs, "=;&");
211		key = mandoc_strndup(qs, keysz);
212		qs += keysz;
213		if (*qs != '=')
214			goto next;
215
216		/* Parse one value. */
217
218		valsz = strcspn(++qs, ";&");
219		val = mandoc_strndup(qs, valsz);
220		qs += valsz;
221
222		/* Decode and catch encoding errors. */
223
224		if ( ! (http_decode(key) && http_decode(val)))
225			goto next;
226
227		/* Handle key-value pairs. */
228
229		if ( ! strcmp(key, "query"))
230			set_query_attr(&req->q.query, &val);
231
232		else if ( ! strcmp(key, "apropos"))
233			req->q.equal = !strcmp(val, "0");
234
235		else if ( ! strcmp(key, "manpath")) {
236#ifdef COMPAT_OLDURI
237			if ( ! strncmp(val, "OpenBSD ", 8)) {
238				val[7] = '-';
239				if ('C' == val[8])
240					val[8] = 'c';
241			}
242#endif
243			set_query_attr(&req->q.manpath, &val);
244		}
245
246		else if ( ! (strcmp(key, "sec")
247#ifdef COMPAT_OLDURI
248		    && strcmp(key, "sektion")
249#endif
250		    )) {
251			if ( ! strcmp(val, "0"))
252				*val = '\0';
253			set_query_attr(&req->q.sec, &val);
254		}
255
256		else if ( ! strcmp(key, "arch")) {
257			if ( ! strcmp(val, "default"))
258				*val = '\0';
259			set_query_attr(&req->q.arch, &val);
260		}
261
262		/*
263		 * The key must be freed in any case.
264		 * The val may have been handed over to the query
265		 * structure, in which case it is now NULL.
266		 */
267next:
268		free(key);
269		key = NULL;
270		free(val);
271		val = NULL;
272
273		if (*qs != '\0')
274			qs++;
275	}
276}
277
278/*
279 * HTTP-decode a string.  The standard explanation is that this turns
280 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
281 * over the allocated string.
282 */
283static int
284http_decode(char *p)
285{
286	char             hex[3];
287	char		*q;
288	int              c;
289
290	hex[2] = '\0';
291
292	q = p;
293	for ( ; '\0' != *p; p++, q++) {
294		if ('%' == *p) {
295			if ('\0' == (hex[0] = *(p + 1)))
296				return 0;
297			if ('\0' == (hex[1] = *(p + 2)))
298				return 0;
299			if (1 != sscanf(hex, "%x", &c))
300				return 0;
301			if ('\0' == c)
302				return 0;
303
304			*q = (char)c;
305			p += 2;
306		} else
307			*q = '+' == *p ? ' ' : *p;
308	}
309
310	*q = '\0';
311	return 1;
312}
313
314static void
315resp_begin_http(int code, const char *msg)
316{
317
318	if (200 != code)
319		printf("Status: %d %s\r\n", code, msg);
320
321	printf("Content-Type: text/html; charset=utf-8\r\n"
322	     "Cache-Control: no-cache\r\n"
323	     "Pragma: no-cache\r\n"
324	     "\r\n");
325
326	fflush(stdout);
327}
328
329static void
330resp_copy(const char *filename)
331{
332	char	 buf[4096];
333	ssize_t	 sz;
334	int	 fd;
335
336	if ((fd = open(filename, O_RDONLY)) != -1) {
337		fflush(stdout);
338		while ((sz = read(fd, buf, sizeof(buf))) > 0)
339			write(STDOUT_FILENO, buf, sz);
340	}
341}
342
343static void
344resp_begin_html(int code, const char *msg)
345{
346
347	resp_begin_http(code, msg);
348
349	printf("<!DOCTYPE html>\n"
350	       "<html>\n"
351	       "<head>\n"
352	       "<meta charset=\"UTF-8\"/>\n"
353	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354	       " type=\"text/css\" media=\"all\">\n"
355	       "<title>%s</title>\n"
356	       "</head>\n"
357	       "<body>\n"
358	       "<!-- Begin page content. //-->\n",
359	       CSS_DIR, CUSTOMIZE_TITLE);
360
361	resp_copy(MAN_DIR "/header.html");
362}
363
364static void
365resp_end_html(void)
366{
367
368	resp_copy(MAN_DIR "/footer.html");
369
370	puts("</body>\n"
371	     "</html>");
372}
373
374static void
375resp_searchform(const struct req *req, enum focus focus)
376{
377	int		 i;
378
379	puts("<!-- Begin search form. //-->");
380	printf("<div id=\"mancgi\">\n"
381	       "<form action=\"/%s\" method=\"get\">\n"
382	       "<fieldset>\n"
383	       "<legend>Manual Page Search Parameters</legend>\n",
384	       scriptname);
385
386	/* Write query input box. */
387
388	printf("<input type=\"text\" name=\"query\" value=\"");
389	if (req->q.query != NULL)
390		html_print(req->q.query);
391	printf( "\" size=\"40\"");
392	if (focus == FOCUS_QUERY)
393		printf(" autofocus");
394	puts(">");
395
396	/* Write submission buttons. */
397
398	printf(	"<button type=\"submit\" name=\"apropos\" value=\"0\">"
399		"man</button>\n"
400		"<button type=\"submit\" name=\"apropos\" value=\"1\">"
401		"apropos</button>\n<br/>\n");
402
403	/* Write section selector. */
404
405	puts("<select name=\"sec\">");
406	for (i = 0; i < sec_MAX; i++) {
407		printf("<option value=\"%s\"", sec_numbers[i]);
408		if (NULL != req->q.sec &&
409		    0 == strcmp(sec_numbers[i], req->q.sec))
410			printf(" selected=\"selected\"");
411		printf(">%s</option>\n", sec_names[i]);
412	}
413	puts("</select>");
414
415	/* Write architecture selector. */
416
417	printf(	"<select name=\"arch\">\n"
418		"<option value=\"default\"");
419	if (NULL == req->q.arch)
420		printf(" selected=\"selected\"");
421	puts(">All Architectures</option>");
422	for (i = 0; i < arch_MAX; i++) {
423		printf("<option value=\"%s\"", arch_names[i]);
424		if (NULL != req->q.arch &&
425		    0 == strcmp(arch_names[i], req->q.arch))
426			printf(" selected=\"selected\"");
427		printf(">%s</option>\n", arch_names[i]);
428	}
429	puts("</select>");
430
431	/* Write manpath selector. */
432
433	if (req->psz > 1) {
434		puts("<select name=\"manpath\">");
435		for (i = 0; i < (int)req->psz; i++) {
436			printf("<option ");
437			if (strcmp(req->q.manpath, req->p[i]) == 0)
438				printf("selected=\"selected\" ");
439			printf("value=\"");
440			html_print(req->p[i]);
441			printf("\">");
442			html_print(req->p[i]);
443			puts("</option>");
444		}
445		puts("</select>");
446	}
447
448	puts("</fieldset>\n"
449	     "</form>\n"
450	     "</div>");
451	puts("<!-- End search form. //-->");
452}
453
454static int
455validate_urifrag(const char *frag)
456{
457
458	while ('\0' != *frag) {
459		if ( ! (isalnum((unsigned char)*frag) ||
460		    '-' == *frag || '.' == *frag ||
461		    '/' == *frag || '_' == *frag))
462			return 0;
463		frag++;
464	}
465	return 1;
466}
467
468static int
469validate_manpath(const struct req *req, const char* manpath)
470{
471	size_t	 i;
472
473	for (i = 0; i < req->psz; i++)
474		if ( ! strcmp(manpath, req->p[i]))
475			return 1;
476
477	return 0;
478}
479
480static int
481validate_filename(const char *file)
482{
483
484	if ('.' == file[0] && '/' == file[1])
485		file += 2;
486
487	return ! (strstr(file, "../") || strstr(file, "/..") ||
488	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489}
490
491static void
492pg_index(const struct req *req)
493{
494
495	resp_begin_html(200, NULL);
496	resp_searchform(req, FOCUS_QUERY);
497	printf("<p>\n"
498	       "This web interface is documented in the\n"
499	       "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500	       "manual, and the\n"
501	       "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502	       "manual explains the query syntax.\n"
503	       "</p>\n",
504	       scriptname, *scriptname == '\0' ? "" : "/",
505	       scriptname, *scriptname == '\0' ? "" : "/");
506	resp_end_html();
507}
508
509static void
510pg_noresult(const struct req *req, const char *msg)
511{
512	resp_begin_html(200, NULL);
513	resp_searchform(req, FOCUS_QUERY);
514	puts("<p>");
515	puts(msg);
516	puts("</p>");
517	resp_end_html();
518}
519
520static void
521pg_error_badrequest(const char *msg)
522{
523
524	resp_begin_html(400, "Bad Request");
525	puts("<h1>Bad Request</h1>\n"
526	     "<p>\n");
527	puts(msg);
528	printf("Try again from the\n"
529	       "<a href=\"/%s\">main page</a>.\n"
530	       "</p>", scriptname);
531	resp_end_html();
532}
533
534static void
535pg_error_internal(void)
536{
537	resp_begin_html(500, "Internal Server Error");
538	puts("<p>Internal Server Error</p>");
539	resp_end_html();
540}
541
542static void
543pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544{
545	char		*arch, *archend;
546	const char	*sec;
547	size_t		 i, iuse;
548	int		 archprio, archpriouse;
549	int		 prio, priouse;
550
551	for (i = 0; i < sz; i++) {
552		if (validate_filename(r[i].file))
553			continue;
554		warnx("invalid filename %s in %s database",
555		    r[i].file, req->q.manpath);
556		pg_error_internal();
557		return;
558	}
559
560	if (req->isquery && sz == 1) {
561		/*
562		 * If we have just one result, then jump there now
563		 * without any delay.
564		 */
565		printf("Status: 303 See Other\r\n");
566		printf("Location: http://%s/%s%s%s/%s",
567		    HTTP_HOST, scriptname,
568		    *scriptname == '\0' ? "" : "/",
569		    req->q.manpath, r[0].file);
570		printf("\r\n"
571		     "Content-Type: text/html; charset=utf-8\r\n"
572		     "\r\n");
573		return;
574	}
575
576	resp_begin_html(200, NULL);
577	resp_searchform(req,
578	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579
580	if (sz > 1) {
581		puts("<div class=\"results\">");
582		puts("<table>");
583
584		for (i = 0; i < sz; i++) {
585			printf("<tr>\n"
586			       "<td class=\"title\">\n"
587			       "<a href=\"/%s%s%s/%s",
588			    scriptname, *scriptname == '\0' ? "" : "/",
589			    req->q.manpath, r[i].file);
590			printf("\">");
591			html_print(r[i].names);
592			printf("</a>\n"
593			       "</td>\n"
594			       "<td class=\"desc\">");
595			html_print(r[i].output);
596			puts("</td>\n"
597			     "</tr>");
598		}
599
600		puts("</table>\n"
601		     "</div>");
602	}
603
604	/*
605	 * In man(1) mode, show one of the pages
606	 * even if more than one is found.
607	 */
608
609	if (req->q.equal || sz == 1) {
610		puts("<hr>");
611		iuse = 0;
612		priouse = 20;
613		archpriouse = 3;
614		for (i = 0; i < sz; i++) {
615			sec = r[i].file;
616			sec += strcspn(sec, "123456789");
617			if (sec[0] == '\0')
618				continue;
619			prio = sec_prios[sec[0] - '1'];
620			if (sec[1] != '/')
621				prio += 10;
622			if (req->q.arch == NULL) {
623				archprio =
624				    ((arch = strchr(sec + 1, '/'))
625					== NULL) ? 3 :
626				    ((archend = strchr(arch + 1, '/'))
627					== NULL) ? 0 :
628				    strncmp(arch, "amd64/",
629					archend - arch) ? 2 : 1;
630				if (archprio < archpriouse) {
631					archpriouse = archprio;
632					priouse = prio;
633					iuse = i;
634					continue;
635				}
636				if (archprio > archpriouse)
637					continue;
638			}
639			if (prio >= priouse)
640				continue;
641			priouse = prio;
642			iuse = i;
643		}
644		resp_show(req, r[iuse].file);
645	}
646
647	resp_end_html();
648}
649
650static void
651resp_catman(const struct req *req, const char *file)
652{
653	FILE		*f;
654	char		*p;
655	size_t		 sz;
656	ssize_t		 len;
657	int		 i;
658	int		 italic, bold;
659
660	if ((f = fopen(file, "r")) == NULL) {
661		puts("<p>You specified an invalid manual file.</p>");
662		return;
663	}
664
665	puts("<div class=\"catman\">\n"
666	     "<pre>");
667
668	p = NULL;
669	sz = 0;
670
671	while ((len = getline(&p, &sz, f)) != -1) {
672		bold = italic = 0;
673		for (i = 0; i < len - 1; i++) {
674			/*
675			 * This means that the catpage is out of state.
676			 * Ignore it and keep going (although the
677			 * catpage is bogus).
678			 */
679
680			if ('\b' == p[i] || '\n' == p[i])
681				continue;
682
683			/*
684			 * Print a regular character.
685			 * Close out any bold/italic scopes.
686			 * If we're in back-space mode, make sure we'll
687			 * have something to enter when we backspace.
688			 */
689
690			if ('\b' != p[i + 1]) {
691				if (italic)
692					printf("</i>");
693				if (bold)
694					printf("</b>");
695				italic = bold = 0;
696				html_putchar(p[i]);
697				continue;
698			} else if (i + 2 >= len)
699				continue;
700
701			/* Italic mode. */
702
703			if ('_' == p[i]) {
704				if (bold)
705					printf("</b>");
706				if ( ! italic)
707					printf("<i>");
708				bold = 0;
709				italic = 1;
710				i += 2;
711				html_putchar(p[i]);
712				continue;
713			}
714
715			/*
716			 * Handle funny behaviour troff-isms.
717			 * These grok'd from the original man2html.c.
718			 */
719
720			if (('+' == p[i] && 'o' == p[i + 2]) ||
721					('o' == p[i] && '+' == p[i + 2]) ||
722					('|' == p[i] && '=' == p[i + 2]) ||
723					('=' == p[i] && '|' == p[i + 2]) ||
724					('*' == p[i] && '=' == p[i + 2]) ||
725					('=' == p[i] && '*' == p[i + 2]) ||
726					('*' == p[i] && '|' == p[i + 2]) ||
727					('|' == p[i] && '*' == p[i + 2]))  {
728				if (italic)
729					printf("</i>");
730				if (bold)
731					printf("</b>");
732				italic = bold = 0;
733				putchar('*');
734				i += 2;
735				continue;
736			} else if (('|' == p[i] && '-' == p[i + 2]) ||
737					('-' == p[i] && '|' == p[i + 1]) ||
738					('+' == p[i] && '-' == p[i + 1]) ||
739					('-' == p[i] && '+' == p[i + 1]) ||
740					('+' == p[i] && '|' == p[i + 1]) ||
741					('|' == p[i] && '+' == p[i + 1]))  {
742				if (italic)
743					printf("</i>");
744				if (bold)
745					printf("</b>");
746				italic = bold = 0;
747				putchar('+');
748				i += 2;
749				continue;
750			}
751
752			/* Bold mode. */
753
754			if (italic)
755				printf("</i>");
756			if ( ! bold)
757				printf("<b>");
758			bold = 1;
759			italic = 0;
760			i += 2;
761			html_putchar(p[i]);
762		}
763
764		/*
765		 * Clean up the last character.
766		 * We can get to a newline; don't print that.
767		 */
768
769		if (italic)
770			printf("</i>");
771		if (bold)
772			printf("</b>");
773
774		if (i == len - 1 && p[i] != '\n')
775			html_putchar(p[i]);
776
777		putchar('\n');
778	}
779	free(p);
780
781	puts("</pre>\n"
782	     "</div>");
783
784	fclose(f);
785}
786
787static void
788resp_format(const struct req *req, const char *file)
789{
790	struct manoutput conf;
791	struct mparse	*mp;
792	struct roff_man	*man;
793	void		*vp;
794	int		 fd;
795	int		 usepath;
796
797	if (-1 == (fd = open(file, O_RDONLY, 0))) {
798		puts("<p>You specified an invalid manual file.</p>");
799		return;
800	}
801
802	mchars_alloc();
803	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
804	mparse_readfd(mp, fd, file);
805	close(fd);
806
807	memset(&conf, 0, sizeof(conf));
808	conf.fragment = 1;
809	usepath = strcmp(req->q.manpath, req->p[0]);
810	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
811	    usepath ? req->q.manpath : "", usepath ? "/" : "");
812
813	mparse_result(mp, &man, NULL);
814	if (man == NULL) {
815		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
816		pg_error_internal();
817		mparse_free(mp);
818		mchars_free();
819		return;
820	}
821
822	vp = html_alloc(&conf);
823
824	if (man->macroset == MACROSET_MDOC) {
825		mdoc_validate(man);
826		html_mdoc(vp, man);
827	} else {
828		man_validate(man);
829		html_man(vp, man);
830	}
831
832	html_free(vp);
833	mparse_free(mp);
834	mchars_free();
835	free(conf.man);
836}
837
838static void
839resp_show(const struct req *req, const char *file)
840{
841
842	if ('.' == file[0] && '/' == file[1])
843		file += 2;
844
845	if ('c' == *file)
846		resp_catman(req, file);
847	else
848		resp_format(req, file);
849}
850
851static void
852pg_show(struct req *req, const char *fullpath)
853{
854	char		*manpath;
855	const char	*file;
856
857	if ((file = strchr(fullpath, '/')) == NULL) {
858		pg_error_badrequest(
859		    "You did not specify a page to show.");
860		return;
861	}
862	manpath = mandoc_strndup(fullpath, file - fullpath);
863	file++;
864
865	if ( ! validate_manpath(req, manpath)) {
866		pg_error_badrequest(
867		    "You specified an invalid manpath.");
868		free(manpath);
869		return;
870	}
871
872	/*
873	 * Begin by chdir()ing into the manpath.
874	 * This way we can pick up the database files, which are
875	 * relative to the manpath root.
876	 */
877
878	if (chdir(manpath) == -1) {
879		warn("chdir %s", manpath);
880		pg_error_internal();
881		free(manpath);
882		return;
883	}
884	free(manpath);
885
886	if ( ! validate_filename(file)) {
887		pg_error_badrequest(
888		    "You specified an invalid manual file.");
889		return;
890	}
891
892	resp_begin_html(200, NULL);
893	resp_searchform(req, FOCUS_NONE);
894	resp_show(req, file);
895	resp_end_html();
896}
897
898static void
899pg_search(const struct req *req)
900{
901	struct mansearch	  search;
902	struct manpaths		  paths;
903	struct manpage		 *res;
904	char			**argv;
905	char			 *query, *rp, *wp;
906	size_t			  ressz;
907	int			  argc;
908
909	/*
910	 * Begin by chdir()ing into the root of the manpath.
911	 * This way we can pick up the database files, which are
912	 * relative to the manpath root.
913	 */
914
915	if (chdir(req->q.manpath) == -1) {
916		warn("chdir %s", req->q.manpath);
917		pg_error_internal();
918		return;
919	}
920
921	search.arch = req->q.arch;
922	search.sec = req->q.sec;
923	search.outkey = "Nd";
924	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
925	search.firstmatch = 1;
926
927	paths.sz = 1;
928	paths.paths = mandoc_malloc(sizeof(char *));
929	paths.paths[0] = mandoc_strdup(".");
930
931	/*
932	 * Break apart at spaces with backslash-escaping.
933	 */
934
935	argc = 0;
936	argv = NULL;
937	rp = query = mandoc_strdup(req->q.query);
938	for (;;) {
939		while (isspace((unsigned char)*rp))
940			rp++;
941		if (*rp == '\0')
942			break;
943		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
944		argv[argc++] = wp = rp;
945		for (;;) {
946			if (isspace((unsigned char)*rp)) {
947				*wp = '\0';
948				rp++;
949				break;
950			}
951			if (rp[0] == '\\' && rp[1] != '\0')
952				rp++;
953			if (wp != rp)
954				*wp = *rp;
955			if (*rp == '\0')
956				break;
957			wp++;
958			rp++;
959		}
960	}
961
962	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
963		pg_noresult(req, "You entered an invalid query.");
964	else if (0 == ressz)
965		pg_noresult(req, "No results found.");
966	else
967		pg_searchres(req, res, ressz);
968
969	free(query);
970	mansearch_free(res, ressz);
971	free(paths.paths[0]);
972	free(paths.paths);
973}
974
975int
976main(void)
977{
978	struct req	 req;
979	struct itimerval itimer;
980	const char	*path;
981	const char	*querystring;
982	int		 i;
983
984	/* Poor man's ReDoS mitigation. */
985
986	itimer.it_value.tv_sec = 2;
987	itimer.it_value.tv_usec = 0;
988	itimer.it_interval.tv_sec = 2;
989	itimer.it_interval.tv_usec = 0;
990	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
991		warn("setitimer");
992		pg_error_internal();
993		return EXIT_FAILURE;
994	}
995
996	/*
997	 * First we change directory into the MAN_DIR so that
998	 * subsequent scanning for manpath directories is rooted
999	 * relative to the same position.
1000	 */
1001
1002	if (chdir(MAN_DIR) == -1) {
1003		warn("MAN_DIR: %s", MAN_DIR);
1004		pg_error_internal();
1005		return EXIT_FAILURE;
1006	}
1007
1008	memset(&req, 0, sizeof(struct req));
1009	req.q.equal = 1;
1010	parse_manpath_conf(&req);
1011
1012	/* Parse the path info and the query string. */
1013
1014	if ((path = getenv("PATH_INFO")) == NULL)
1015		path = "";
1016	else if (*path == '/')
1017		path++;
1018
1019	if (*path != '\0') {
1020		parse_path_info(&req, path);
1021		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1022			path = "";
1023	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1024		parse_query_string(&req, querystring);
1025
1026	/* Validate parsed data and add defaults. */
1027
1028	if (req.q.manpath == NULL)
1029		req.q.manpath = mandoc_strdup(req.p[0]);
1030	else if ( ! validate_manpath(&req, req.q.manpath)) {
1031		pg_error_badrequest(
1032		    "You specified an invalid manpath.");
1033		return EXIT_FAILURE;
1034	}
1035
1036	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1037		pg_error_badrequest(
1038		    "You specified an invalid architecture.");
1039		return EXIT_FAILURE;
1040	}
1041
1042	/* Dispatch to the three different pages. */
1043
1044	if ('\0' != *path)
1045		pg_show(&req, path);
1046	else if (NULL != req.q.query)
1047		pg_search(&req);
1048	else
1049		pg_index(&req);
1050
1051	free(req.q.manpath);
1052	free(req.q.arch);
1053	free(req.q.sec);
1054	free(req.q.query);
1055	for (i = 0; i < (int)req.psz; i++)
1056		free(req.p[i]);
1057	free(req.p);
1058	return EXIT_SUCCESS;
1059}
1060
1061/*
1062 * If PATH_INFO is not a file name, translate it to a query.
1063 */
1064static void
1065parse_path_info(struct req *req, const char *path)
1066{
1067	char	*dir[4];
1068	int	 i;
1069
1070	req->isquery = 0;
1071	req->q.equal = 1;
1072	req->q.manpath = mandoc_strdup(path);
1073	req->q.arch = NULL;
1074
1075	/* Mandatory manual page name. */
1076	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1077		req->q.query = req->q.manpath;
1078		req->q.manpath = NULL;
1079	} else
1080		*req->q.query++ = '\0';
1081
1082	/* Optional trailing section. */
1083	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1084		if(isdigit((unsigned char)req->q.sec[1])) {
1085			*req->q.sec++ = '\0';
1086			req->q.sec = mandoc_strdup(req->q.sec);
1087		} else
1088			req->q.sec = NULL;
1089	}
1090
1091	/* Handle the case of name[.section] only. */
1092	if (req->q.manpath == NULL)
1093		return;
1094	req->q.query = mandoc_strdup(req->q.query);
1095
1096	/* Split directory components. */
1097	dir[i = 0] = req->q.manpath;
1098	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1099		if (++i == 3) {
1100			pg_error_badrequest(
1101			    "You specified too many directory components.");
1102			exit(EXIT_FAILURE);
1103		}
1104		*dir[i]++ = '\0';
1105	}
1106
1107	/* Optional manpath. */
1108	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1109		req->q.manpath = NULL;
1110	else if (dir[1] == NULL)
1111		return;
1112
1113	/* Optional section. */
1114	if (strncmp(dir[i], "man", 3) == 0) {
1115		free(req->q.sec);
1116		req->q.sec = mandoc_strdup(dir[i++] + 3);
1117	}
1118	if (dir[i] == NULL) {
1119		if (req->q.manpath == NULL)
1120			free(dir[0]);
1121		return;
1122	}
1123	if (dir[i + 1] != NULL) {
1124		pg_error_badrequest(
1125		    "You specified an invalid directory component.");
1126		exit(EXIT_FAILURE);
1127	}
1128
1129	/* Optional architecture. */
1130	if (i) {
1131		req->q.arch = mandoc_strdup(dir[i]);
1132		if (req->q.manpath == NULL)
1133			free(dir[0]);
1134	} else
1135		req->q.arch = dir[0];
1136}
1137
1138/*
1139 * Scan for indexable paths.
1140 */
1141static void
1142parse_manpath_conf(struct req *req)
1143{
1144	FILE	*fp;
1145	char	*dp;
1146	size_t	 dpsz;
1147	ssize_t	 len;
1148
1149	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1150		warn("%s/manpath.conf", MAN_DIR);
1151		pg_error_internal();
1152		exit(EXIT_FAILURE);
1153	}
1154
1155	dp = NULL;
1156	dpsz = 0;
1157
1158	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1159		if (dp[len - 1] == '\n')
1160			dp[--len] = '\0';
1161		req->p = mandoc_realloc(req->p,
1162		    (req->psz + 1) * sizeof(char *));
1163		if ( ! validate_urifrag(dp)) {
1164			warnx("%s/manpath.conf contains "
1165			    "unsafe path \"%s\"", MAN_DIR, dp);
1166			pg_error_internal();
1167			exit(EXIT_FAILURE);
1168		}
1169		if (strchr(dp, '/') != NULL) {
1170			warnx("%s/manpath.conf contains "
1171			    "path with slash \"%s\"", MAN_DIR, dp);
1172			pg_error_internal();
1173			exit(EXIT_FAILURE);
1174		}
1175		req->p[req->psz++] = dp;
1176		dp = NULL;
1177		dpsz = 0;
1178	}
1179	free(dp);
1180
1181	if (req->p == NULL) {
1182		warnx("%s/manpath.conf is empty", MAN_DIR);
1183		pg_error_internal();
1184		exit(EXIT_FAILURE);
1185	}
1186}
1187