read.c revision 307795
1/*	$Id: read.c,v 1.149 2016/07/10 13:34:30 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19#include "config.h"
20
21#include <sys/types.h>
22#if HAVE_MMAP
23#include <sys/mman.h>
24#include <sys/stat.h>
25#endif
26
27#include <assert.h>
28#include <ctype.h>
29#if HAVE_ERR
30#include <err.h>
31#endif
32#include <errno.h>
33#include <fcntl.h>
34#include <stdarg.h>
35#include <stdint.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40#include <zlib.h>
41
42#include "mandoc_aux.h"
43#include "mandoc.h"
44#include "roff.h"
45#include "mdoc.h"
46#include "man.h"
47#include "libmandoc.h"
48#include "roff_int.h"
49
50#define	REPARSE_LIMIT	1000
51
52struct	mparse {
53	struct roff_man	 *man; /* man parser */
54	struct roff	 *roff; /* roff parser (!NULL) */
55	char		 *sodest; /* filename pointed to by .so */
56	const char	 *file; /* filename of current input file */
57	struct buf	 *primary; /* buffer currently being parsed */
58	struct buf	 *secondary; /* preprocessed copy of input */
59	const char	 *defos; /* default operating system */
60	mandocmsg	  mmsg; /* warning/error message handler */
61	enum mandoclevel  file_status; /* status of current parse */
62	enum mandoclevel  wlevel; /* ignore messages below this */
63	int		  options; /* parser options */
64	int		  gzip; /* current input file is gzipped */
65	int		  filenc; /* encoding of the current file */
66	int		  reparse_count; /* finite interp. stack */
67	int		  line; /* line number in the file */
68};
69
70static	void	  choose_parser(struct mparse *);
71static	void	  resize_buf(struct buf *, size_t);
72static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
73static	int	  read_whole_file(struct mparse *, const char *, int,
74				struct buf *, int *);
75static	void	  mparse_end(struct mparse *);
76static	void	  mparse_parse_buffer(struct mparse *, struct buf,
77			const char *);
78
79static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
80	MANDOCERR_OK,
81	MANDOCERR_WARNING,
82	MANDOCERR_WARNING,
83	MANDOCERR_ERROR,
84	MANDOCERR_UNSUPP,
85	MANDOCERR_MAX,
86	MANDOCERR_MAX
87};
88
89static	const char * const	mandocerrs[MANDOCERR_MAX] = {
90	"ok",
91
92	"generic warning",
93
94	/* related to the prologue */
95	"missing manual title, using UNTITLED",
96	"missing manual title, using \"\"",
97	"lower case character in document title",
98	"missing manual section, using \"\"",
99	"unknown manual section",
100	"missing date, using today's date",
101	"cannot parse date, using it verbatim",
102	"missing Os macro, using \"\"",
103	"duplicate prologue macro",
104	"late prologue macro",
105	"skipping late title macro",
106	"prologue macros out of order",
107
108	/* related to document structure */
109	".so is fragile, better use ln(1)",
110	"no document body",
111	"content before first section header",
112	"first section is not \"NAME\"",
113	"NAME section without name",
114	"NAME section without description",
115	"description not at the end of NAME",
116	"bad NAME section content",
117	"missing description line, using \"\"",
118	"sections out of conventional order",
119	"duplicate section title",
120	"unexpected section",
121	"unusual Xr order",
122	"unusual Xr punctuation",
123	"AUTHORS section without An macro",
124
125	/* related to macros and nesting */
126	"obsolete macro",
127	"macro neither callable nor escaped",
128	"skipping paragraph macro",
129	"moving paragraph macro out of list",
130	"skipping no-space macro",
131	"blocks badly nested",
132	"nested displays are not portable",
133	"moving content out of list",
134	"fill mode already enabled, skipping",
135	"fill mode already disabled, skipping",
136	"line scope broken",
137
138	/* related to missing macro arguments */
139	"skipping empty request",
140	"conditional request controls empty scope",
141	"skipping empty macro",
142	"empty block",
143	"empty argument, using 0n",
144	"missing display type, using -ragged",
145	"list type is not the first argument",
146	"missing -width in -tag list, using 8n",
147	"missing utility name, using \"\"",
148	"missing function name, using \"\"",
149	"empty head in list item",
150	"empty list item",
151	"missing font type, using \\fR",
152	"unknown font type, using \\fR",
153	"nothing follows prefix",
154	"empty reference block",
155	"missing -std argument, adding it",
156	"missing option string, using \"\"",
157	"missing resource identifier, using \"\"",
158	"missing eqn box, using \"\"",
159
160	/* related to bad macro arguments */
161	"unterminated quoted argument",
162	"duplicate argument",
163	"skipping duplicate argument",
164	"skipping duplicate display type",
165	"skipping duplicate list type",
166	"skipping -width argument",
167	"wrong number of cells",
168	"unknown AT&T UNIX version",
169	"comma in function argument",
170	"parenthesis in function name",
171	"invalid content in Rs block",
172	"invalid Boolean argument",
173	"unknown font, skipping request",
174	"odd number of characters in request",
175
176	/* related to plain text */
177	"blank line in fill mode, using .sp",
178	"tab in filled text",
179	"whitespace at end of input line",
180	"bad comment style",
181	"invalid escape sequence",
182	"undefined string, using \"\"",
183
184	/* related to tables */
185	"tbl line starts with span",
186	"tbl column starts with span",
187	"skipping vertical bar in tbl layout",
188
189	"generic error",
190
191	/* related to tables */
192	"non-alphabetic character in tbl options",
193	"skipping unknown tbl option",
194	"missing tbl option argument",
195	"wrong tbl option argument size",
196	"empty tbl layout",
197	"invalid character in tbl layout",
198	"unmatched parenthesis in tbl layout",
199	"tbl without any data cells",
200	"ignoring data in spanned tbl cell",
201	"ignoring extra tbl data cells",
202	"data block open at end of tbl",
203
204	/* related to document structure and macros */
205	NULL,
206	"input stack limit exceeded, infinite loop?",
207	"skipping bad character",
208	"skipping unknown macro",
209	"skipping insecure request",
210	"skipping item outside list",
211	"skipping column outside column list",
212	"skipping end of block that is not open",
213	"fewer RS blocks open, skipping",
214	"inserting missing end of block",
215	"appending missing end of block",
216
217	/* related to request and macro arguments */
218	"escaped character not allowed in a name",
219	"NOT IMPLEMENTED: Bd -file",
220	"skipping display without arguments",
221	"missing list type, using -item",
222	"missing manual name, using \"\"",
223	"uname(3) system call failed, using UNKNOWN",
224	"unknown standard specifier",
225	"skipping request without numeric argument",
226	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
227	".so request failed",
228	"skipping all arguments",
229	"skipping excess arguments",
230	"divide by zero",
231
232	"unsupported feature",
233	"input too large",
234	"unsupported control character",
235	"unsupported roff request",
236	"eqn delim option in tbl",
237	"unsupported tbl layout modifier",
238	"ignoring macro in table",
239};
240
241static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
242	"SUCCESS",
243	"RESERVED",
244	"WARNING",
245	"ERROR",
246	"UNSUPP",
247	"BADARG",
248	"SYSERR"
249};
250
251
252static void
253resize_buf(struct buf *buf, size_t initial)
254{
255
256	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
257	buf->buf = mandoc_realloc(buf->buf, buf->sz);
258}
259
260static void
261choose_parser(struct mparse *curp)
262{
263	char		*cp, *ep;
264	int		 format;
265
266	/*
267	 * If neither command line arguments -mdoc or -man select
268	 * a parser nor the roff parser found a .Dd or .TH macro
269	 * yet, look ahead in the main input buffer.
270	 */
271
272	if ((format = roff_getformat(curp->roff)) == 0) {
273		cp = curp->primary->buf;
274		ep = cp + curp->primary->sz;
275		while (cp < ep) {
276			if (*cp == '.' || *cp == '\'') {
277				cp++;
278				if (cp[0] == 'D' && cp[1] == 'd') {
279					format = MPARSE_MDOC;
280					break;
281				}
282				if (cp[0] == 'T' && cp[1] == 'H') {
283					format = MPARSE_MAN;
284					break;
285				}
286			}
287			cp = memchr(cp, '\n', ep - cp);
288			if (cp == NULL)
289				break;
290			cp++;
291		}
292	}
293
294	if (curp->man == NULL) {
295		curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
296		    curp->options & MPARSE_QUICK ? 1 : 0);
297		curp->man->macroset = MACROSET_MAN;
298		curp->man->first->tok = TOKEN_NONE;
299	}
300
301	if (format == MPARSE_MDOC) {
302		mdoc_hash_init();
303		curp->man->macroset = MACROSET_MDOC;
304		curp->man->first->tok = TOKEN_NONE;
305	} else {
306		man_hash_init();
307		curp->man->macroset = MACROSET_MAN;
308		curp->man->first->tok = TOKEN_NONE;
309	}
310}
311
312/*
313 * Main parse routine for a buffer.
314 * It assumes encoding and line numbering are already set up.
315 * It can recurse directly (for invocations of user-defined
316 * macros, inline equations, and input line traps)
317 * and indirectly (for .so file inclusion).
318 */
319static void
320mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
321{
322	const struct tbl_span	*span;
323	struct buf	 ln;
324	const char	*save_file;
325	char		*cp;
326	size_t		 pos; /* byte number in the ln buffer */
327	enum rofferr	 rr;
328	int		 of;
329	int		 lnn; /* line number in the real file */
330	int		 fd;
331	unsigned char	 c;
332
333	memset(&ln, 0, sizeof(ln));
334
335	lnn = curp->line;
336	pos = 0;
337
338	while (i < blk.sz) {
339		if (0 == pos && '\0' == blk.buf[i])
340			break;
341
342		if (start) {
343			curp->line = lnn;
344			curp->reparse_count = 0;
345
346			if (lnn < 3 &&
347			    curp->filenc & MPARSE_UTF8 &&
348			    curp->filenc & MPARSE_LATIN1)
349				curp->filenc = preconv_cue(&blk, i);
350		}
351
352		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
353
354			/*
355			 * When finding an unescaped newline character,
356			 * leave the character loop to process the line.
357			 * Skip a preceding carriage return, if any.
358			 */
359
360			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
361			    '\n' == blk.buf[i + 1])
362				++i;
363			if ('\n' == blk.buf[i]) {
364				++i;
365				++lnn;
366				break;
367			}
368
369			/*
370			 * Make sure we have space for the worst
371			 * case of 11 bytes: "\\[u10ffff]\0"
372			 */
373
374			if (pos + 11 > ln.sz)
375				resize_buf(&ln, 256);
376
377			/*
378			 * Encode 8-bit input.
379			 */
380
381			c = blk.buf[i];
382			if (c & 0x80) {
383				if ( ! (curp->filenc && preconv_encode(
384				    &blk, &i, &ln, &pos, &curp->filenc))) {
385					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
386					    curp->line, pos, "0x%x", c);
387					ln.buf[pos++] = '?';
388					i++;
389				}
390				continue;
391			}
392
393			/*
394			 * Exclude control characters.
395			 */
396
397			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
398				mandoc_vmsg(c == 0x00 || c == 0x04 ||
399				    c > 0x0a ? MANDOCERR_CHAR_BAD :
400				    MANDOCERR_CHAR_UNSUPP,
401				    curp, curp->line, pos, "0x%x", c);
402				i++;
403				if (c != '\r')
404					ln.buf[pos++] = '?';
405				continue;
406			}
407
408			/* Trailing backslash = a plain char. */
409
410			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
411				ln.buf[pos++] = blk.buf[i++];
412				continue;
413			}
414
415			/*
416			 * Found escape and at least one other character.
417			 * When it's a newline character, skip it.
418			 * When there is a carriage return in between,
419			 * skip that one as well.
420			 */
421
422			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
423			    '\n' == blk.buf[i + 2])
424				++i;
425			if ('\n' == blk.buf[i + 1]) {
426				i += 2;
427				++lnn;
428				continue;
429			}
430
431			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
432				i += 2;
433				/* Comment, skip to end of line */
434				for (; i < blk.sz; ++i) {
435					if ('\n' == blk.buf[i]) {
436						++i;
437						++lnn;
438						break;
439					}
440				}
441
442				/* Backout trailing whitespaces */
443				for (; pos > 0; --pos) {
444					if (ln.buf[pos - 1] != ' ')
445						break;
446					if (pos > 2 && ln.buf[pos - 2] == '\\')
447						break;
448				}
449				break;
450			}
451
452			/* Catch escaped bogus characters. */
453
454			c = (unsigned char) blk.buf[i+1];
455
456			if ( ! (isascii(c) &&
457			    (isgraph(c) || isblank(c)))) {
458				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
459				    curp->line, pos, "0x%x", c);
460				i += 2;
461				ln.buf[pos++] = '?';
462				continue;
463			}
464
465			/* Some other escape sequence, copy & cont. */
466
467			ln.buf[pos++] = blk.buf[i++];
468			ln.buf[pos++] = blk.buf[i++];
469		}
470
471		if (pos >= ln.sz)
472			resize_buf(&ln, 256);
473
474		ln.buf[pos] = '\0';
475
476		/*
477		 * A significant amount of complexity is contained by
478		 * the roff preprocessor.  It's line-oriented but can be
479		 * expressed on one line, so we need at times to
480		 * readjust our starting point and re-run it.  The roff
481		 * preprocessor can also readjust the buffers with new
482		 * data, so we pass them in wholesale.
483		 */
484
485		of = 0;
486
487		/*
488		 * Maintain a lookaside buffer of all parsed lines.  We
489		 * only do this if mparse_keep() has been invoked (the
490		 * buffer may be accessed with mparse_getkeep()).
491		 */
492
493		if (curp->secondary) {
494			curp->secondary->buf = mandoc_realloc(
495			    curp->secondary->buf,
496			    curp->secondary->sz + pos + 2);
497			memcpy(curp->secondary->buf +
498			    curp->secondary->sz,
499			    ln.buf, pos);
500			curp->secondary->sz += pos;
501			curp->secondary->buf
502				[curp->secondary->sz] = '\n';
503			curp->secondary->sz++;
504			curp->secondary->buf
505				[curp->secondary->sz] = '\0';
506		}
507rerun:
508		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
509
510		switch (rr) {
511		case ROFF_REPARSE:
512			if (REPARSE_LIMIT >= ++curp->reparse_count)
513				mparse_buf_r(curp, ln, of, 0);
514			else
515				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
516				    curp->line, pos, NULL);
517			pos = 0;
518			continue;
519		case ROFF_APPEND:
520			pos = strlen(ln.buf);
521			continue;
522		case ROFF_RERUN:
523			goto rerun;
524		case ROFF_IGN:
525			pos = 0;
526			continue;
527		case ROFF_SO:
528			if ( ! (curp->options & MPARSE_SO) &&
529			    (i >= blk.sz || blk.buf[i] == '\0')) {
530				curp->sodest = mandoc_strdup(ln.buf + of);
531				free(ln.buf);
532				return;
533			}
534			/*
535			 * We remove `so' clauses from our lookaside
536			 * buffer because we're going to descend into
537			 * the file recursively.
538			 */
539			if (curp->secondary)
540				curp->secondary->sz -= pos + 1;
541			save_file = curp->file;
542			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
543				mparse_readfd(curp, fd, ln.buf + of);
544				close(fd);
545				curp->file = save_file;
546			} else {
547				curp->file = save_file;
548				mandoc_vmsg(MANDOCERR_SO_FAIL,
549				    curp, curp->line, pos,
550				    ".so %s", ln.buf + of);
551				ln.sz = mandoc_asprintf(&cp,
552				    ".sp\nSee the file %s.\n.sp",
553				    ln.buf + of);
554				free(ln.buf);
555				ln.buf = cp;
556				of = 0;
557				mparse_buf_r(curp, ln, of, 0);
558			}
559			pos = 0;
560			continue;
561		default:
562			break;
563		}
564
565		/*
566		 * If input parsers have not been allocated, do so now.
567		 * We keep these instanced between parsers, but set them
568		 * locally per parse routine since we can use different
569		 * parsers with each one.
570		 */
571
572		if (curp->man == NULL ||
573		    curp->man->macroset == MACROSET_NONE)
574			choose_parser(curp);
575
576		/*
577		 * Lastly, push down into the parsers themselves.
578		 * If libroff returns ROFF_TBL, then add it to the
579		 * currently open parse.  Since we only get here if
580		 * there does exist data (see tbl_data.c), we're
581		 * guaranteed that something's been allocated.
582		 * Do the same for ROFF_EQN.
583		 */
584
585		if (rr == ROFF_TBL)
586			while ((span = roff_span(curp->roff)) != NULL)
587				roff_addtbl(curp->man, span);
588		else if (rr == ROFF_EQN)
589			roff_addeqn(curp->man, roff_eqn(curp->roff));
590		else if ((curp->man->macroset == MACROSET_MDOC ?
591		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
592		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
593				break;
594
595		/* Temporary buffers typically are not full. */
596
597		if (0 == start && '\0' == blk.buf[i])
598			break;
599
600		/* Start the next input line. */
601
602		pos = 0;
603	}
604
605	free(ln.buf);
606}
607
608static int
609read_whole_file(struct mparse *curp, const char *file, int fd,
610		struct buf *fb, int *with_mmap)
611{
612	gzFile		 gz;
613	size_t		 off;
614	ssize_t		 ssz;
615
616#if HAVE_MMAP
617	struct stat	 st;
618
619	if (fstat(fd, &st) == -1)
620		err((int)MANDOCLEVEL_SYSERR, "%s", file);
621
622	/*
623	 * If we're a regular file, try just reading in the whole entry
624	 * via mmap().  This is faster than reading it into blocks, and
625	 * since each file is only a few bytes to begin with, I'm not
626	 * concerned that this is going to tank any machines.
627	 */
628
629	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
630		if (st.st_size > 0x7fffffff) {
631			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
632			return 0;
633		}
634		*with_mmap = 1;
635		fb->sz = (size_t)st.st_size;
636		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
637		if (fb->buf != MAP_FAILED)
638			return 1;
639	}
640#endif
641
642	if (curp->gzip) {
643		if ((gz = gzdopen(fd, "rb")) == NULL)
644			err((int)MANDOCLEVEL_SYSERR, "%s", file);
645	} else
646		gz = NULL;
647
648	/*
649	 * If this isn't a regular file (like, say, stdin), then we must
650	 * go the old way and just read things in bit by bit.
651	 */
652
653	*with_mmap = 0;
654	off = 0;
655	fb->sz = 0;
656	fb->buf = NULL;
657	for (;;) {
658		if (off == fb->sz) {
659			if (fb->sz == (1U << 31)) {
660				mandoc_msg(MANDOCERR_TOOLARGE, curp,
661				    0, 0, NULL);
662				break;
663			}
664			resize_buf(fb, 65536);
665		}
666		ssz = curp->gzip ?
667		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
668		    read(fd, fb->buf + (int)off, fb->sz - off);
669		if (ssz == 0) {
670			fb->sz = off;
671			return 1;
672		}
673		if (ssz == -1)
674			err((int)MANDOCLEVEL_SYSERR, "%s", file);
675		off += (size_t)ssz;
676	}
677
678	free(fb->buf);
679	fb->buf = NULL;
680	return 0;
681}
682
683static void
684mparse_end(struct mparse *curp)
685{
686
687	if (curp->man == NULL && curp->sodest == NULL)
688		curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
689		    curp->options & MPARSE_QUICK ? 1 : 0);
690	if (curp->man->macroset == MACROSET_NONE)
691		curp->man->macroset = MACROSET_MAN;
692	if (curp->man->macroset == MACROSET_MDOC)
693		mdoc_endparse(curp->man);
694	else
695		man_endparse(curp->man);
696	roff_endparse(curp->roff);
697}
698
699static void
700mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
701{
702	struct buf	*svprimary;
703	const char	*svfile;
704	size_t		 offset;
705	static int	 recursion_depth;
706
707	if (64 < recursion_depth) {
708		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
709		return;
710	}
711
712	/* Line number is per-file. */
713	svfile = curp->file;
714	curp->file = file;
715	svprimary = curp->primary;
716	curp->primary = &blk;
717	curp->line = 1;
718	recursion_depth++;
719
720	/* Skip an UTF-8 byte order mark. */
721	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
722	    (unsigned char)blk.buf[0] == 0xef &&
723	    (unsigned char)blk.buf[1] == 0xbb &&
724	    (unsigned char)blk.buf[2] == 0xbf) {
725		offset = 3;
726		curp->filenc &= ~MPARSE_LATIN1;
727	} else
728		offset = 0;
729
730	mparse_buf_r(curp, blk, offset, 1);
731
732	if (--recursion_depth == 0)
733		mparse_end(curp);
734
735	curp->primary = svprimary;
736	curp->file = svfile;
737}
738
739enum mandoclevel
740mparse_readmem(struct mparse *curp, void *buf, size_t len,
741		const char *file)
742{
743	struct buf blk;
744
745	blk.buf = buf;
746	blk.sz = len;
747
748	mparse_parse_buffer(curp, blk, file);
749	return curp->file_status;
750}
751
752/*
753 * Read the whole file into memory and call the parsers.
754 * Called recursively when an .so request is encountered.
755 */
756enum mandoclevel
757mparse_readfd(struct mparse *curp, int fd, const char *file)
758{
759	struct buf	 blk;
760	int		 with_mmap;
761	int		 save_filenc;
762
763	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
764		save_filenc = curp->filenc;
765		curp->filenc = curp->options &
766		    (MPARSE_UTF8 | MPARSE_LATIN1);
767		mparse_parse_buffer(curp, blk, file);
768		curp->filenc = save_filenc;
769#if HAVE_MMAP
770		if (with_mmap)
771			munmap(blk.buf, blk.sz);
772		else
773#endif
774			free(blk.buf);
775	}
776	return curp->file_status;
777}
778
779int
780mparse_open(struct mparse *curp, const char *file)
781{
782	char		 *cp;
783	int		  fd;
784
785	curp->file = file;
786	cp = strrchr(file, '.');
787	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
788
789	/* First try to use the filename as it is. */
790
791	if ((fd = open(file, O_RDONLY)) != -1)
792		return fd;
793
794	/*
795	 * If that doesn't work and the filename doesn't
796	 * already  end in .gz, try appending .gz.
797	 */
798
799	if ( ! curp->gzip) {
800		mandoc_asprintf(&cp, "%s.gz", file);
801		fd = open(cp, O_RDONLY);
802		free(cp);
803		if (fd != -1) {
804			curp->gzip = 1;
805			return fd;
806		}
807	}
808
809	/* Neither worked, give up. */
810
811	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
812	return -1;
813}
814
815struct mparse *
816mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
817    const char *defos)
818{
819	struct mparse	*curp;
820
821	curp = mandoc_calloc(1, sizeof(struct mparse));
822
823	curp->options = options;
824	curp->wlevel = wlevel;
825	curp->mmsg = mmsg;
826	curp->defos = defos;
827
828	curp->roff = roff_alloc(curp, options);
829	curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
830		curp->options & MPARSE_QUICK ? 1 : 0);
831	if (curp->options & MPARSE_MDOC) {
832		mdoc_hash_init();
833		curp->man->macroset = MACROSET_MDOC;
834	} else if (curp->options & MPARSE_MAN) {
835		man_hash_init();
836		curp->man->macroset = MACROSET_MAN;
837	}
838	curp->man->first->tok = TOKEN_NONE;
839	return curp;
840}
841
842void
843mparse_reset(struct mparse *curp)
844{
845
846	roff_reset(curp->roff);
847
848	if (curp->man != NULL)
849		roff_man_reset(curp->man);
850	if (curp->secondary)
851		curp->secondary->sz = 0;
852
853	curp->file_status = MANDOCLEVEL_OK;
854
855	free(curp->sodest);
856	curp->sodest = NULL;
857}
858
859void
860mparse_free(struct mparse *curp)
861{
862
863	roff_man_free(curp->man);
864	if (curp->roff)
865		roff_free(curp->roff);
866	if (curp->secondary)
867		free(curp->secondary->buf);
868
869	free(curp->secondary);
870	free(curp->sodest);
871	free(curp);
872}
873
874void
875mparse_result(struct mparse *curp, struct roff_man **man,
876	char **sodest)
877{
878
879	if (sodest && NULL != (*sodest = curp->sodest)) {
880		*man = NULL;
881		return;
882	}
883	if (man)
884		*man = curp->man;
885}
886
887void
888mandoc_vmsg(enum mandocerr t, struct mparse *m,
889		int ln, int pos, const char *fmt, ...)
890{
891	char		 buf[256];
892	va_list		 ap;
893
894	va_start(ap, fmt);
895	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
896	va_end(ap);
897
898	mandoc_msg(t, m, ln, pos, buf);
899}
900
901void
902mandoc_msg(enum mandocerr er, struct mparse *m,
903		int ln, int col, const char *msg)
904{
905	enum mandoclevel level;
906
907	level = MANDOCLEVEL_UNSUPP;
908	while (er < mandoclimits[level])
909		level--;
910
911	if (level < m->wlevel && er != MANDOCERR_FILE)
912		return;
913
914	if (m->mmsg)
915		(*m->mmsg)(er, level, m->file, ln, col, msg);
916
917	if (m->file_status < level)
918		m->file_status = level;
919}
920
921const char *
922mparse_strerror(enum mandocerr er)
923{
924
925	return mandocerrs[er];
926}
927
928const char *
929mparse_strlevel(enum mandoclevel lvl)
930{
931	return mandoclevels[lvl];
932}
933
934void
935mparse_keep(struct mparse *p)
936{
937
938	assert(NULL == p->secondary);
939	p->secondary = mandoc_calloc(1, sizeof(struct buf));
940}
941
942const char *
943mparse_getkeep(const struct mparse *p)
944{
945
946	assert(p->secondary);
947	return p->secondary->sz ? p->secondary->buf : NULL;
948}
949