cgi.c revision 316420
1/*	$Id: cgi.c,v 1.144 2017/01/21 01:20:31 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/time.h>
22
23#include <ctype.h>
24#include <err.h>
25#include <errno.h>
26#include <fcntl.h>
27#include <limits.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <unistd.h>
33
34#include "mandoc_aux.h"
35#include "mandoc.h"
36#include "roff.h"
37#include "mdoc.h"
38#include "man.h"
39#include "main.h"
40#include "manconf.h"
41#include "mansearch.h"
42#include "cgi.h"
43
44/*
45 * A query as passed to the search function.
46 */
47struct	query {
48	char		*manpath; /* desired manual directory */
49	char		*arch; /* architecture */
50	char		*sec; /* manual section */
51	char		*query; /* unparsed query expression */
52	int		 equal; /* match whole names, not substrings */
53};
54
55struct	req {
56	struct query	  q;
57	char		**p; /* array of available manpaths */
58	size_t		  psz; /* number of available manpaths */
59	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
60};
61
62enum	focus {
63	FOCUS_NONE = 0,
64	FOCUS_QUERY
65};
66
67static	void		 html_print(const char *);
68static	void		 html_putchar(char);
69static	int		 http_decode(char *);
70static	void		 parse_manpath_conf(struct req *);
71static	void		 parse_path_info(struct req *req, const char *path);
72static	void		 parse_query_string(struct req *, const char *);
73static	void		 pg_error_badrequest(const char *);
74static	void		 pg_error_internal(void);
75static	void		 pg_index(const struct req *);
76static	void		 pg_noresult(const struct req *, const char *);
77static	void		 pg_search(const struct req *);
78static	void		 pg_searchres(const struct req *,
79				struct manpage *, size_t);
80static	void		 pg_show(struct req *, const char *);
81static	void		 resp_begin_html(int, const char *);
82static	void		 resp_begin_http(int, const char *);
83static	void		 resp_catman(const struct req *, const char *);
84static	void		 resp_copy(const char *);
85static	void		 resp_end_html(void);
86static	void		 resp_format(const struct req *, const char *);
87static	void		 resp_searchform(const struct req *, enum focus);
88static	void		 resp_show(const struct req *, const char *);
89static	void		 set_query_attr(char **, char **);
90static	int		 validate_filename(const char *);
91static	int		 validate_manpath(const struct req *, const char *);
92static	int		 validate_urifrag(const char *);
93
94static	const char	 *scriptname = SCRIPT_NAME;
95
96static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97static	const char *const sec_numbers[] = {
98    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99};
100static	const char *const sec_names[] = {
101    "All Sections",
102    "1 - General Commands",
103    "2 - System Calls",
104    "3 - Library Functions",
105    "3p - Perl Library",
106    "4 - Device Drivers",
107    "5 - File Formats",
108    "6 - Games",
109    "7 - Miscellaneous Information",
110    "8 - System Manager\'s Manual",
111    "9 - Kernel Developer\'s Manual"
112};
113static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115static	const char *const arch_names[] = {
116    "amd64",       "alpha",       "armv7",
117    "hppa",        "i386",        "landisk",
118    "loongson",    "luna88k",     "macppc",      "mips64",
119    "octeon",      "sgi",         "socppc",      "sparc64",
120    "amiga",       "arc",         "armish",      "arm32",
121    "atari",       "aviion",      "beagle",      "cats",
122    "hppa64",      "hp300",
123    "ia64",        "mac68k",      "mvme68k",     "mvme88k",
124    "mvmeppc",     "palm",        "pc532",       "pegasos",
125    "pmax",        "powerpc",     "solbourne",   "sparc",
126    "sun3",        "vax",         "wgrisc",      "x68k",
127    "zaurus"
128};
129static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
130
131/*
132 * Print a character, escaping HTML along the way.
133 * This will pass non-ASCII straight to output: be warned!
134 */
135static void
136html_putchar(char c)
137{
138
139	switch (c) {
140	case ('"'):
141		printf("&quot;");
142		break;
143	case ('&'):
144		printf("&amp;");
145		break;
146	case ('>'):
147		printf("&gt;");
148		break;
149	case ('<'):
150		printf("&lt;");
151		break;
152	default:
153		putchar((unsigned char)c);
154		break;
155	}
156}
157
158/*
159 * Call through to html_putchar().
160 * Accepts NULL strings.
161 */
162static void
163html_print(const char *p)
164{
165
166	if (NULL == p)
167		return;
168	while ('\0' != *p)
169		html_putchar(*p++);
170}
171
172/*
173 * Transfer the responsibility for the allocated string *val
174 * to the query structure.
175 */
176static void
177set_query_attr(char **attr, char **val)
178{
179
180	free(*attr);
181	if (**val == '\0') {
182		*attr = NULL;
183		free(*val);
184	} else
185		*attr = *val;
186	*val = NULL;
187}
188
189/*
190 * Parse the QUERY_STRING for key-value pairs
191 * and store the values into the query structure.
192 */
193static void
194parse_query_string(struct req *req, const char *qs)
195{
196	char		*key, *val;
197	size_t		 keysz, valsz;
198
199	req->isquery	= 1;
200	req->q.manpath	= NULL;
201	req->q.arch	= NULL;
202	req->q.sec	= NULL;
203	req->q.query	= NULL;
204	req->q.equal	= 1;
205
206	key = val = NULL;
207	while (*qs != '\0') {
208
209		/* Parse one key. */
210
211		keysz = strcspn(qs, "=;&");
212		key = mandoc_strndup(qs, keysz);
213		qs += keysz;
214		if (*qs != '=')
215			goto next;
216
217		/* Parse one value. */
218
219		valsz = strcspn(++qs, ";&");
220		val = mandoc_strndup(qs, valsz);
221		qs += valsz;
222
223		/* Decode and catch encoding errors. */
224
225		if ( ! (http_decode(key) && http_decode(val)))
226			goto next;
227
228		/* Handle key-value pairs. */
229
230		if ( ! strcmp(key, "query"))
231			set_query_attr(&req->q.query, &val);
232
233		else if ( ! strcmp(key, "apropos"))
234			req->q.equal = !strcmp(val, "0");
235
236		else if ( ! strcmp(key, "manpath")) {
237#ifdef COMPAT_OLDURI
238			if ( ! strncmp(val, "OpenBSD ", 8)) {
239				val[7] = '-';
240				if ('C' == val[8])
241					val[8] = 'c';
242			}
243#endif
244			set_query_attr(&req->q.manpath, &val);
245		}
246
247		else if ( ! (strcmp(key, "sec")
248#ifdef COMPAT_OLDURI
249		    && strcmp(key, "sektion")
250#endif
251		    )) {
252			if ( ! strcmp(val, "0"))
253				*val = '\0';
254			set_query_attr(&req->q.sec, &val);
255		}
256
257		else if ( ! strcmp(key, "arch")) {
258			if ( ! strcmp(val, "default"))
259				*val = '\0';
260			set_query_attr(&req->q.arch, &val);
261		}
262
263		/*
264		 * The key must be freed in any case.
265		 * The val may have been handed over to the query
266		 * structure, in which case it is now NULL.
267		 */
268next:
269		free(key);
270		key = NULL;
271		free(val);
272		val = NULL;
273
274		if (*qs != '\0')
275			qs++;
276	}
277}
278
279/*
280 * HTTP-decode a string.  The standard explanation is that this turns
281 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
282 * over the allocated string.
283 */
284static int
285http_decode(char *p)
286{
287	char             hex[3];
288	char		*q;
289	int              c;
290
291	hex[2] = '\0';
292
293	q = p;
294	for ( ; '\0' != *p; p++, q++) {
295		if ('%' == *p) {
296			if ('\0' == (hex[0] = *(p + 1)))
297				return 0;
298			if ('\0' == (hex[1] = *(p + 2)))
299				return 0;
300			if (1 != sscanf(hex, "%x", &c))
301				return 0;
302			if ('\0' == c)
303				return 0;
304
305			*q = (char)c;
306			p += 2;
307		} else
308			*q = '+' == *p ? ' ' : *p;
309	}
310
311	*q = '\0';
312	return 1;
313}
314
315static void
316resp_begin_http(int code, const char *msg)
317{
318
319	if (200 != code)
320		printf("Status: %d %s\r\n", code, msg);
321
322	printf("Content-Type: text/html; charset=utf-8\r\n"
323	     "Cache-Control: no-cache\r\n"
324	     "Pragma: no-cache\r\n"
325	     "\r\n");
326
327	fflush(stdout);
328}
329
330static void
331resp_copy(const char *filename)
332{
333	char	 buf[4096];
334	ssize_t	 sz;
335	int	 fd;
336
337	if ((fd = open(filename, O_RDONLY)) != -1) {
338		fflush(stdout);
339		while ((sz = read(fd, buf, sizeof(buf))) > 0)
340			write(STDOUT_FILENO, buf, sz);
341		close(fd);
342	}
343}
344
345static void
346resp_begin_html(int code, const char *msg)
347{
348
349	resp_begin_http(code, msg);
350
351	printf("<!DOCTYPE html>\n"
352	       "<html>\n"
353	       "<head>\n"
354	       "  <meta charset=\"UTF-8\"/>\n"
355	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356	       " type=\"text/css\" media=\"all\">\n"
357	       "  <title>%s</title>\n"
358	       "</head>\n"
359	       "<body>\n",
360	       CSS_DIR, CUSTOMIZE_TITLE);
361
362	resp_copy(MAN_DIR "/header.html");
363}
364
365static void
366resp_end_html(void)
367{
368
369	resp_copy(MAN_DIR "/footer.html");
370
371	puts("</body>\n"
372	     "</html>");
373}
374
375static void
376resp_searchform(const struct req *req, enum focus focus)
377{
378	int		 i;
379
380	printf("<form action=\"/%s\" method=\"get\">\n"
381	       "  <fieldset>\n"
382	       "    <legend>Manual Page Search Parameters</legend>\n",
383	       scriptname);
384
385	/* Write query input box. */
386
387	printf("    <input type=\"text\" name=\"query\" value=\"");
388	if (req->q.query != NULL)
389		html_print(req->q.query);
390	printf( "\" size=\"40\"");
391	if (focus == FOCUS_QUERY)
392		printf(" autofocus");
393	puts(">");
394
395	/* Write submission buttons. */
396
397	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
398		"man</button>\n"
399		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
400		"apropos</button>\n"
401		"    <br/>\n");
402
403	/* Write section selector. */
404
405	puts("    <select name=\"sec\">");
406	for (i = 0; i < sec_MAX; i++) {
407		printf("      <option value=\"%s\"", sec_numbers[i]);
408		if (NULL != req->q.sec &&
409		    0 == strcmp(sec_numbers[i], req->q.sec))
410			printf(" selected=\"selected\"");
411		printf(">%s</option>\n", sec_names[i]);
412	}
413	puts("    </select>");
414
415	/* Write architecture selector. */
416
417	printf(	"    <select name=\"arch\">\n"
418		"      <option value=\"default\"");
419	if (NULL == req->q.arch)
420		printf(" selected=\"selected\"");
421	puts(">All Architectures</option>");
422	for (i = 0; i < arch_MAX; i++) {
423		printf("      <option value=\"%s\"", arch_names[i]);
424		if (NULL != req->q.arch &&
425		    0 == strcmp(arch_names[i], req->q.arch))
426			printf(" selected=\"selected\"");
427		printf(">%s</option>\n", arch_names[i]);
428	}
429	puts("    </select>");
430
431	/* Write manpath selector. */
432
433	if (req->psz > 1) {
434		puts("    <select name=\"manpath\">");
435		for (i = 0; i < (int)req->psz; i++) {
436			printf("      <option ");
437			if (strcmp(req->q.manpath, req->p[i]) == 0)
438				printf("selected=\"selected\" ");
439			printf("value=\"");
440			html_print(req->p[i]);
441			printf("\">");
442			html_print(req->p[i]);
443			puts("</option>");
444		}
445		puts("    </select>");
446	}
447
448	puts("  </fieldset>\n"
449	     "</form>");
450}
451
452static int
453validate_urifrag(const char *frag)
454{
455
456	while ('\0' != *frag) {
457		if ( ! (isalnum((unsigned char)*frag) ||
458		    '-' == *frag || '.' == *frag ||
459		    '/' == *frag || '_' == *frag))
460			return 0;
461		frag++;
462	}
463	return 1;
464}
465
466static int
467validate_manpath(const struct req *req, const char* manpath)
468{
469	size_t	 i;
470
471	for (i = 0; i < req->psz; i++)
472		if ( ! strcmp(manpath, req->p[i]))
473			return 1;
474
475	return 0;
476}
477
478static int
479validate_filename(const char *file)
480{
481
482	if ('.' == file[0] && '/' == file[1])
483		file += 2;
484
485	return ! (strstr(file, "../") || strstr(file, "/..") ||
486	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
487}
488
489static void
490pg_index(const struct req *req)
491{
492
493	resp_begin_html(200, NULL);
494	resp_searchform(req, FOCUS_QUERY);
495	printf("<p>\n"
496	       "This web interface is documented in the\n"
497	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
498	       "manual, and the\n"
499	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
500	       "manual explains the query syntax.\n"
501	       "</p>\n",
502	       scriptname, *scriptname == '\0' ? "" : "/",
503	       scriptname, *scriptname == '\0' ? "" : "/");
504	resp_end_html();
505}
506
507static void
508pg_noresult(const struct req *req, const char *msg)
509{
510	resp_begin_html(200, NULL);
511	resp_searchform(req, FOCUS_QUERY);
512	puts("<p>");
513	puts(msg);
514	puts("</p>");
515	resp_end_html();
516}
517
518static void
519pg_error_badrequest(const char *msg)
520{
521
522	resp_begin_html(400, "Bad Request");
523	puts("<h1>Bad Request</h1>\n"
524	     "<p>\n");
525	puts(msg);
526	printf("Try again from the\n"
527	       "<a href=\"/%s\">main page</a>.\n"
528	       "</p>", scriptname);
529	resp_end_html();
530}
531
532static void
533pg_error_internal(void)
534{
535	resp_begin_html(500, "Internal Server Error");
536	puts("<p>Internal Server Error</p>");
537	resp_end_html();
538}
539
540static void
541pg_searchres(const struct req *req, struct manpage *r, size_t sz)
542{
543	char		*arch, *archend;
544	const char	*sec;
545	size_t		 i, iuse;
546	int		 archprio, archpriouse;
547	int		 prio, priouse;
548
549	for (i = 0; i < sz; i++) {
550		if (validate_filename(r[i].file))
551			continue;
552		warnx("invalid filename %s in %s database",
553		    r[i].file, req->q.manpath);
554		pg_error_internal();
555		return;
556	}
557
558	if (req->isquery && sz == 1) {
559		/*
560		 * If we have just one result, then jump there now
561		 * without any delay.
562		 */
563		printf("Status: 303 See Other\r\n");
564		printf("Location: http://%s/%s%s%s/%s",
565		    HTTP_HOST, scriptname,
566		    *scriptname == '\0' ? "" : "/",
567		    req->q.manpath, r[0].file);
568		printf("\r\n"
569		     "Content-Type: text/html; charset=utf-8\r\n"
570		     "\r\n");
571		return;
572	}
573
574	resp_begin_html(200, NULL);
575	resp_searchform(req,
576	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
577
578	if (sz > 1) {
579		puts("<table class=\"results\">");
580		for (i = 0; i < sz; i++) {
581			printf("  <tr>\n"
582			       "    <td>"
583			       "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
584			    scriptname, *scriptname == '\0' ? "" : "/",
585			    req->q.manpath, r[i].file);
586			html_print(r[i].names);
587			printf("</a></td>\n"
588			       "    <td><span class=\"Nd\">");
589			html_print(r[i].output);
590			puts("</span></td>\n"
591			     "  </tr>");
592		}
593		puts("</table>");
594	}
595
596	/*
597	 * In man(1) mode, show one of the pages
598	 * even if more than one is found.
599	 */
600
601	if (req->q.equal || sz == 1) {
602		puts("<hr>");
603		iuse = 0;
604		priouse = 20;
605		archpriouse = 3;
606		for (i = 0; i < sz; i++) {
607			sec = r[i].file;
608			sec += strcspn(sec, "123456789");
609			if (sec[0] == '\0')
610				continue;
611			prio = sec_prios[sec[0] - '1'];
612			if (sec[1] != '/')
613				prio += 10;
614			if (req->q.arch == NULL) {
615				archprio =
616				    ((arch = strchr(sec + 1, '/'))
617					== NULL) ? 3 :
618				    ((archend = strchr(arch + 1, '/'))
619					== NULL) ? 0 :
620				    strncmp(arch, "amd64/",
621					archend - arch) ? 2 : 1;
622				if (archprio < archpriouse) {
623					archpriouse = archprio;
624					priouse = prio;
625					iuse = i;
626					continue;
627				}
628				if (archprio > archpriouse)
629					continue;
630			}
631			if (prio >= priouse)
632				continue;
633			priouse = prio;
634			iuse = i;
635		}
636		resp_show(req, r[iuse].file);
637	}
638
639	resp_end_html();
640}
641
642static void
643resp_catman(const struct req *req, const char *file)
644{
645	FILE		*f;
646	char		*p;
647	size_t		 sz;
648	ssize_t		 len;
649	int		 i;
650	int		 italic, bold;
651
652	if ((f = fopen(file, "r")) == NULL) {
653		puts("<p>You specified an invalid manual file.</p>");
654		return;
655	}
656
657	puts("<div class=\"catman\">\n"
658	     "<pre>");
659
660	p = NULL;
661	sz = 0;
662
663	while ((len = getline(&p, &sz, f)) != -1) {
664		bold = italic = 0;
665		for (i = 0; i < len - 1; i++) {
666			/*
667			 * This means that the catpage is out of state.
668			 * Ignore it and keep going (although the
669			 * catpage is bogus).
670			 */
671
672			if ('\b' == p[i] || '\n' == p[i])
673				continue;
674
675			/*
676			 * Print a regular character.
677			 * Close out any bold/italic scopes.
678			 * If we're in back-space mode, make sure we'll
679			 * have something to enter when we backspace.
680			 */
681
682			if ('\b' != p[i + 1]) {
683				if (italic)
684					printf("</i>");
685				if (bold)
686					printf("</b>");
687				italic = bold = 0;
688				html_putchar(p[i]);
689				continue;
690			} else if (i + 2 >= len)
691				continue;
692
693			/* Italic mode. */
694
695			if ('_' == p[i]) {
696				if (bold)
697					printf("</b>");
698				if ( ! italic)
699					printf("<i>");
700				bold = 0;
701				italic = 1;
702				i += 2;
703				html_putchar(p[i]);
704				continue;
705			}
706
707			/*
708			 * Handle funny behaviour troff-isms.
709			 * These grok'd from the original man2html.c.
710			 */
711
712			if (('+' == p[i] && 'o' == p[i + 2]) ||
713					('o' == p[i] && '+' == p[i + 2]) ||
714					('|' == p[i] && '=' == p[i + 2]) ||
715					('=' == p[i] && '|' == p[i + 2]) ||
716					('*' == p[i] && '=' == p[i + 2]) ||
717					('=' == p[i] && '*' == p[i + 2]) ||
718					('*' == p[i] && '|' == p[i + 2]) ||
719					('|' == p[i] && '*' == p[i + 2]))  {
720				if (italic)
721					printf("</i>");
722				if (bold)
723					printf("</b>");
724				italic = bold = 0;
725				putchar('*');
726				i += 2;
727				continue;
728			} else if (('|' == p[i] && '-' == p[i + 2]) ||
729					('-' == p[i] && '|' == p[i + 1]) ||
730					('+' == p[i] && '-' == p[i + 1]) ||
731					('-' == p[i] && '+' == p[i + 1]) ||
732					('+' == p[i] && '|' == p[i + 1]) ||
733					('|' == p[i] && '+' == p[i + 1]))  {
734				if (italic)
735					printf("</i>");
736				if (bold)
737					printf("</b>");
738				italic = bold = 0;
739				putchar('+');
740				i += 2;
741				continue;
742			}
743
744			/* Bold mode. */
745
746			if (italic)
747				printf("</i>");
748			if ( ! bold)
749				printf("<b>");
750			bold = 1;
751			italic = 0;
752			i += 2;
753			html_putchar(p[i]);
754		}
755
756		/*
757		 * Clean up the last character.
758		 * We can get to a newline; don't print that.
759		 */
760
761		if (italic)
762			printf("</i>");
763		if (bold)
764			printf("</b>");
765
766		if (i == len - 1 && p[i] != '\n')
767			html_putchar(p[i]);
768
769		putchar('\n');
770	}
771	free(p);
772
773	puts("</pre>\n"
774	     "</div>");
775
776	fclose(f);
777}
778
779static void
780resp_format(const struct req *req, const char *file)
781{
782	struct manoutput conf;
783	struct mparse	*mp;
784	struct roff_man	*man;
785	void		*vp;
786	int		 fd;
787	int		 usepath;
788
789	if (-1 == (fd = open(file, O_RDONLY, 0))) {
790		puts("<p>You specified an invalid manual file.</p>");
791		return;
792	}
793
794	mchars_alloc();
795	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
796	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
797	mparse_readfd(mp, fd, file);
798	close(fd);
799
800	memset(&conf, 0, sizeof(conf));
801	conf.fragment = 1;
802	usepath = strcmp(req->q.manpath, req->p[0]);
803	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
804	    usepath ? req->q.manpath : "", usepath ? "/" : "");
805
806	mparse_result(mp, &man, NULL);
807	if (man == NULL) {
808		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
809		pg_error_internal();
810		mparse_free(mp);
811		mchars_free();
812		return;
813	}
814
815	vp = html_alloc(&conf);
816
817	if (man->macroset == MACROSET_MDOC) {
818		mdoc_validate(man);
819		html_mdoc(vp, man);
820	} else {
821		man_validate(man);
822		html_man(vp, man);
823	}
824
825	html_free(vp);
826	mparse_free(mp);
827	mchars_free();
828	free(conf.man);
829}
830
831static void
832resp_show(const struct req *req, const char *file)
833{
834
835	if ('.' == file[0] && '/' == file[1])
836		file += 2;
837
838	if ('c' == *file)
839		resp_catman(req, file);
840	else
841		resp_format(req, file);
842}
843
844static void
845pg_show(struct req *req, const char *fullpath)
846{
847	char		*manpath;
848	const char	*file;
849
850	if ((file = strchr(fullpath, '/')) == NULL) {
851		pg_error_badrequest(
852		    "You did not specify a page to show.");
853		return;
854	}
855	manpath = mandoc_strndup(fullpath, file - fullpath);
856	file++;
857
858	if ( ! validate_manpath(req, manpath)) {
859		pg_error_badrequest(
860		    "You specified an invalid manpath.");
861		free(manpath);
862		return;
863	}
864
865	/*
866	 * Begin by chdir()ing into the manpath.
867	 * This way we can pick up the database files, which are
868	 * relative to the manpath root.
869	 */
870
871	if (chdir(manpath) == -1) {
872		warn("chdir %s", manpath);
873		pg_error_internal();
874		free(manpath);
875		return;
876	}
877	free(manpath);
878
879	if ( ! validate_filename(file)) {
880		pg_error_badrequest(
881		    "You specified an invalid manual file.");
882		return;
883	}
884
885	resp_begin_html(200, NULL);
886	resp_searchform(req, FOCUS_NONE);
887	resp_show(req, file);
888	resp_end_html();
889}
890
891static void
892pg_search(const struct req *req)
893{
894	struct mansearch	  search;
895	struct manpaths		  paths;
896	struct manpage		 *res;
897	char			**argv;
898	char			 *query, *rp, *wp;
899	size_t			  ressz;
900	int			  argc;
901
902	/*
903	 * Begin by chdir()ing into the root of the manpath.
904	 * This way we can pick up the database files, which are
905	 * relative to the manpath root.
906	 */
907
908	if (chdir(req->q.manpath) == -1) {
909		warn("chdir %s", req->q.manpath);
910		pg_error_internal();
911		return;
912	}
913
914	search.arch = req->q.arch;
915	search.sec = req->q.sec;
916	search.outkey = "Nd";
917	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
918	search.firstmatch = 1;
919
920	paths.sz = 1;
921	paths.paths = mandoc_malloc(sizeof(char *));
922	paths.paths[0] = mandoc_strdup(".");
923
924	/*
925	 * Break apart at spaces with backslash-escaping.
926	 */
927
928	argc = 0;
929	argv = NULL;
930	rp = query = mandoc_strdup(req->q.query);
931	for (;;) {
932		while (isspace((unsigned char)*rp))
933			rp++;
934		if (*rp == '\0')
935			break;
936		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
937		argv[argc++] = wp = rp;
938		for (;;) {
939			if (isspace((unsigned char)*rp)) {
940				*wp = '\0';
941				rp++;
942				break;
943			}
944			if (rp[0] == '\\' && rp[1] != '\0')
945				rp++;
946			if (wp != rp)
947				*wp = *rp;
948			if (*rp == '\0')
949				break;
950			wp++;
951			rp++;
952		}
953	}
954
955	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
956		pg_noresult(req, "You entered an invalid query.");
957	else if (0 == ressz)
958		pg_noresult(req, "No results found.");
959	else
960		pg_searchres(req, res, ressz);
961
962	free(query);
963	mansearch_free(res, ressz);
964	free(paths.paths[0]);
965	free(paths.paths);
966}
967
968int
969main(void)
970{
971	struct req	 req;
972	struct itimerval itimer;
973	const char	*path;
974	const char	*querystring;
975	int		 i;
976
977	/* Poor man's ReDoS mitigation. */
978
979	itimer.it_value.tv_sec = 2;
980	itimer.it_value.tv_usec = 0;
981	itimer.it_interval.tv_sec = 2;
982	itimer.it_interval.tv_usec = 0;
983	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
984		warn("setitimer");
985		pg_error_internal();
986		return EXIT_FAILURE;
987	}
988
989	/*
990	 * First we change directory into the MAN_DIR so that
991	 * subsequent scanning for manpath directories is rooted
992	 * relative to the same position.
993	 */
994
995	if (chdir(MAN_DIR) == -1) {
996		warn("MAN_DIR: %s", MAN_DIR);
997		pg_error_internal();
998		return EXIT_FAILURE;
999	}
1000
1001	memset(&req, 0, sizeof(struct req));
1002	req.q.equal = 1;
1003	parse_manpath_conf(&req);
1004
1005	/* Parse the path info and the query string. */
1006
1007	if ((path = getenv("PATH_INFO")) == NULL)
1008		path = "";
1009	else if (*path == '/')
1010		path++;
1011
1012	if (*path != '\0') {
1013		parse_path_info(&req, path);
1014		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1015			path = "";
1016	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1017		parse_query_string(&req, querystring);
1018
1019	/* Validate parsed data and add defaults. */
1020
1021	if (req.q.manpath == NULL)
1022		req.q.manpath = mandoc_strdup(req.p[0]);
1023	else if ( ! validate_manpath(&req, req.q.manpath)) {
1024		pg_error_badrequest(
1025		    "You specified an invalid manpath.");
1026		return EXIT_FAILURE;
1027	}
1028
1029	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1030		pg_error_badrequest(
1031		    "You specified an invalid architecture.");
1032		return EXIT_FAILURE;
1033	}
1034
1035	/* Dispatch to the three different pages. */
1036
1037	if ('\0' != *path)
1038		pg_show(&req, path);
1039	else if (NULL != req.q.query)
1040		pg_search(&req);
1041	else
1042		pg_index(&req);
1043
1044	free(req.q.manpath);
1045	free(req.q.arch);
1046	free(req.q.sec);
1047	free(req.q.query);
1048	for (i = 0; i < (int)req.psz; i++)
1049		free(req.p[i]);
1050	free(req.p);
1051	return EXIT_SUCCESS;
1052}
1053
1054/*
1055 * If PATH_INFO is not a file name, translate it to a query.
1056 */
1057static void
1058parse_path_info(struct req *req, const char *path)
1059{
1060	char	*dir[4];
1061	int	 i;
1062
1063	req->isquery = 0;
1064	req->q.equal = 1;
1065	req->q.manpath = mandoc_strdup(path);
1066	req->q.arch = NULL;
1067
1068	/* Mandatory manual page name. */
1069	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1070		req->q.query = req->q.manpath;
1071		req->q.manpath = NULL;
1072	} else
1073		*req->q.query++ = '\0';
1074
1075	/* Optional trailing section. */
1076	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1077		if(isdigit((unsigned char)req->q.sec[1])) {
1078			*req->q.sec++ = '\0';
1079			req->q.sec = mandoc_strdup(req->q.sec);
1080		} else
1081			req->q.sec = NULL;
1082	}
1083
1084	/* Handle the case of name[.section] only. */
1085	if (req->q.manpath == NULL)
1086		return;
1087	req->q.query = mandoc_strdup(req->q.query);
1088
1089	/* Split directory components. */
1090	dir[i = 0] = req->q.manpath;
1091	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1092		if (++i == 3) {
1093			pg_error_badrequest(
1094			    "You specified too many directory components.");
1095			exit(EXIT_FAILURE);
1096		}
1097		*dir[i]++ = '\0';
1098	}
1099
1100	/* Optional manpath. */
1101	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1102		req->q.manpath = NULL;
1103	else if (dir[1] == NULL)
1104		return;
1105
1106	/* Optional section. */
1107	if (strncmp(dir[i], "man", 3) == 0) {
1108		free(req->q.sec);
1109		req->q.sec = mandoc_strdup(dir[i++] + 3);
1110	}
1111	if (dir[i] == NULL) {
1112		if (req->q.manpath == NULL)
1113			free(dir[0]);
1114		return;
1115	}
1116	if (dir[i + 1] != NULL) {
1117		pg_error_badrequest(
1118		    "You specified an invalid directory component.");
1119		exit(EXIT_FAILURE);
1120	}
1121
1122	/* Optional architecture. */
1123	if (i) {
1124		req->q.arch = mandoc_strdup(dir[i]);
1125		if (req->q.manpath == NULL)
1126			free(dir[0]);
1127	} else
1128		req->q.arch = dir[0];
1129}
1130
1131/*
1132 * Scan for indexable paths.
1133 */
1134static void
1135parse_manpath_conf(struct req *req)
1136{
1137	FILE	*fp;
1138	char	*dp;
1139	size_t	 dpsz;
1140	ssize_t	 len;
1141
1142	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1143		warn("%s/manpath.conf", MAN_DIR);
1144		pg_error_internal();
1145		exit(EXIT_FAILURE);
1146	}
1147
1148	dp = NULL;
1149	dpsz = 0;
1150
1151	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1152		if (dp[len - 1] == '\n')
1153			dp[--len] = '\0';
1154		req->p = mandoc_realloc(req->p,
1155		    (req->psz + 1) * sizeof(char *));
1156		if ( ! validate_urifrag(dp)) {
1157			warnx("%s/manpath.conf contains "
1158			    "unsafe path \"%s\"", MAN_DIR, dp);
1159			pg_error_internal();
1160			exit(EXIT_FAILURE);
1161		}
1162		if (strchr(dp, '/') != NULL) {
1163			warnx("%s/manpath.conf contains "
1164			    "path with slash \"%s\"", MAN_DIR, dp);
1165			pg_error_internal();
1166			exit(EXIT_FAILURE);
1167		}
1168		req->p[req->psz++] = dp;
1169		dp = NULL;
1170		dpsz = 0;
1171	}
1172	free(dp);
1173
1174	if (req->p == NULL) {
1175		warnx("%s/manpath.conf is empty", MAN_DIR);
1176		pg_error_internal();
1177		exit(EXIT_FAILURE);
1178	}
1179}
1180