1241675Suqs/*	$Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#ifdef HAVE_CONFIG_H
19241675Suqs#include "config.h"
20241675Suqs#endif
21241675Suqs
22241675Suqs#ifdef HAVE_MMAP
23241675Suqs# include <sys/stat.h>
24241675Suqs# include <sys/mman.h>
25241675Suqs#endif
26241675Suqs
27241675Suqs#include <assert.h>
28241675Suqs#include <ctype.h>
29241675Suqs#include <fcntl.h>
30241675Suqs#include <stdarg.h>
31241675Suqs#include <stdint.h>
32241675Suqs#include <stdio.h>
33241675Suqs#include <stdlib.h>
34241675Suqs#include <string.h>
35241675Suqs#include <unistd.h>
36241675Suqs
37241675Suqs#include "mandoc.h"
38241675Suqs#include "libmandoc.h"
39241675Suqs#include "mdoc.h"
40241675Suqs#include "man.h"
41241675Suqs#include "main.h"
42241675Suqs
43241675Suqs#ifndef MAP_FILE
44241675Suqs#define	MAP_FILE	0
45241675Suqs#endif
46241675Suqs
47241675Suqs#define	REPARSE_LIMIT	1000
48241675Suqs
49241675Suqsstruct	buf {
50241675Suqs	char	 	 *buf; /* binary input buffer */
51241675Suqs	size_t		  sz; /* size of binary buffer */
52241675Suqs};
53241675Suqs
54241675Suqsstruct	mparse {
55241675Suqs	enum mandoclevel  file_status; /* status of current parse */
56241675Suqs	enum mandoclevel  wlevel; /* ignore messages below this */
57241675Suqs	int		  line; /* line number in the file */
58241675Suqs	enum mparset	  inttype; /* which parser to use */
59241675Suqs	struct man	 *pman; /* persistent man parser */
60241675Suqs	struct mdoc	 *pmdoc; /* persistent mdoc parser */
61241675Suqs	struct man	 *man; /* man parser */
62241675Suqs	struct mdoc	 *mdoc; /* mdoc parser */
63241675Suqs	struct roff	 *roff; /* roff parser (!NULL) */
64241675Suqs	int		  reparse_count; /* finite interp. stack */
65241675Suqs	mandocmsg	  mmsg; /* warning/error message handler */
66241675Suqs	void		 *arg; /* argument to mmsg */
67241675Suqs	const char	 *file;
68241675Suqs	struct buf	 *secondary;
69241675Suqs};
70241675Suqs
71241675Suqsstatic	void	  resize_buf(struct buf *, size_t);
72241675Suqsstatic	void	  mparse_buf_r(struct mparse *, struct buf, int);
73241675Suqsstatic	void	  mparse_readfd_r(struct mparse *, int, const char *, int);
74241675Suqsstatic	void	  pset(const char *, int, struct mparse *);
75241675Suqsstatic	int	  read_whole_file(const char *, int, struct buf *, int *);
76241675Suqsstatic	void	  mparse_end(struct mparse *);
77241675Suqs
78241675Suqsstatic	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
79241675Suqs	MANDOCERR_OK,
80241675Suqs	MANDOCERR_WARNING,
81241675Suqs	MANDOCERR_WARNING,
82241675Suqs	MANDOCERR_ERROR,
83241675Suqs	MANDOCERR_FATAL,
84241675Suqs	MANDOCERR_MAX,
85241675Suqs	MANDOCERR_MAX
86241675Suqs};
87241675Suqs
88241675Suqsstatic	const char * const	mandocerrs[MANDOCERR_MAX] = {
89241675Suqs	"ok",
90241675Suqs
91241675Suqs	"generic warning",
92241675Suqs
93241675Suqs	/* related to the prologue */
94241675Suqs	"no title in document",
95241675Suqs	"document title should be all caps",
96241675Suqs	"unknown manual section",
97241675Suqs	"date missing, using today's date",
98241675Suqs	"cannot parse date, using it verbatim",
99241675Suqs	"prologue macros out of order",
100241675Suqs	"duplicate prologue macro",
101241675Suqs	"macro not allowed in prologue",
102241675Suqs	"macro not allowed in body",
103241675Suqs
104241675Suqs	/* related to document structure */
105241675Suqs	".so is fragile, better use ln(1)",
106241675Suqs	"NAME section must come first",
107241675Suqs	"bad NAME section contents",
108241675Suqs	"manual name not yet set",
109241675Suqs	"sections out of conventional order",
110241675Suqs	"duplicate section name",
111241675Suqs	"section not in conventional manual section",
112241675Suqs
113241675Suqs	/* related to macros and nesting */
114241675Suqs	"skipping obsolete macro",
115241675Suqs	"skipping paragraph macro",
116241675Suqs	"skipping no-space macro",
117241675Suqs	"blocks badly nested",
118241675Suqs	"child violates parent syntax",
119241675Suqs	"nested displays are not portable",
120241675Suqs	"already in literal mode",
121241675Suqs	"line scope broken",
122241675Suqs
123241675Suqs	/* related to missing macro arguments */
124241675Suqs	"skipping empty macro",
125241675Suqs	"argument count wrong",
126241675Suqs	"missing display type",
127241675Suqs	"list type must come first",
128241675Suqs	"tag lists require a width argument",
129241675Suqs	"missing font type",
130241675Suqs	"skipping end of block that is not open",
131241675Suqs
132241675Suqs	/* related to bad macro arguments */
133241675Suqs	"skipping argument",
134241675Suqs	"duplicate argument",
135241675Suqs	"duplicate display type",
136241675Suqs	"duplicate list type",
137241675Suqs	"unknown AT&T UNIX version",
138241675Suqs	"bad Boolean value",
139241675Suqs	"unknown font",
140241675Suqs	"unknown standard specifier",
141241675Suqs	"bad width argument",
142241675Suqs
143241675Suqs	/* related to plain text */
144241675Suqs	"blank line in non-literal context",
145241675Suqs	"tab in non-literal context",
146241675Suqs	"end of line whitespace",
147241675Suqs	"bad comment style",
148241675Suqs	"bad escape sequence",
149241675Suqs	"unterminated quoted string",
150241675Suqs
151241675Suqs	/* related to equations */
152241675Suqs	"unexpected literal in equation",
153241675Suqs
154241675Suqs	"generic error",
155241675Suqs
156241675Suqs	/* related to equations */
157241675Suqs	"unexpected equation scope closure",
158241675Suqs	"equation scope open on exit",
159241675Suqs	"overlapping equation scopes",
160241675Suqs	"unexpected end of equation",
161241675Suqs	"equation syntax error",
162241675Suqs
163241675Suqs	/* related to tables */
164241675Suqs	"bad table syntax",
165241675Suqs	"bad table option",
166241675Suqs	"bad table layout",
167241675Suqs	"no table layout cells specified",
168241675Suqs	"no table data cells specified",
169241675Suqs	"ignore data in cell",
170241675Suqs	"data block still open",
171241675Suqs	"ignoring extra data cells",
172241675Suqs
173241675Suqs	"input stack limit exceeded, infinite loop?",
174241675Suqs	"skipping bad character",
175241675Suqs	"escaped character not allowed in a name",
176241675Suqs	"skipping text before the first section header",
177241675Suqs	"skipping unknown macro",
178241675Suqs	"NOT IMPLEMENTED, please use groff: skipping request",
179241675Suqs	"argument count wrong",
180241675Suqs	"skipping end of block that is not open",
181241675Suqs	"missing end of block",
182241675Suqs	"scope open on exit",
183241675Suqs	"uname(3) system call failed",
184241675Suqs	"macro requires line argument(s)",
185241675Suqs	"macro requires body argument(s)",
186241675Suqs	"macro requires argument(s)",
187241675Suqs	"missing list type",
188241675Suqs	"line argument(s) will be lost",
189241675Suqs	"body argument(s) will be lost",
190241675Suqs
191241675Suqs	"generic fatal error",
192241675Suqs
193241675Suqs	"not a manual",
194241675Suqs	"column syntax is inconsistent",
195241675Suqs	"NOT IMPLEMENTED: .Bd -file",
196241675Suqs	"argument count wrong, violates syntax",
197241675Suqs	"child violates parent syntax",
198241675Suqs	"argument count wrong, violates syntax",
199241675Suqs	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
200241675Suqs	"no document body",
201241675Suqs	"no document prologue",
202241675Suqs	"static buffer exhausted",
203241675Suqs};
204241675Suqs
205241675Suqsstatic	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
206241675Suqs	"SUCCESS",
207241675Suqs	"RESERVED",
208241675Suqs	"WARNING",
209241675Suqs	"ERROR",
210241675Suqs	"FATAL",
211241675Suqs	"BADARG",
212241675Suqs	"SYSERR"
213241675Suqs};
214241675Suqs
215241675Suqsstatic void
216241675Suqsresize_buf(struct buf *buf, size_t initial)
217241675Suqs{
218241675Suqs
219241675Suqs	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
220241675Suqs	buf->buf = mandoc_realloc(buf->buf, buf->sz);
221241675Suqs}
222241675Suqs
223241675Suqsstatic void
224241675Suqspset(const char *buf, int pos, struct mparse *curp)
225241675Suqs{
226241675Suqs	int		 i;
227241675Suqs
228241675Suqs	/*
229241675Suqs	 * Try to intuit which kind of manual parser should be used.  If
230241675Suqs	 * passed in by command-line (-man, -mdoc), then use that
231241675Suqs	 * explicitly.  If passed as -mandoc, then try to guess from the
232241675Suqs	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
233241675Suqs	 * default to -man, which is more lenient.
234241675Suqs	 *
235241675Suqs	 * Separate out pmdoc/pman from mdoc/man: the first persists
236241675Suqs	 * through all parsers, while the latter is used per-parse.
237241675Suqs	 */
238241675Suqs
239241675Suqs	if ('.' == buf[0] || '\'' == buf[0]) {
240241675Suqs		for (i = 1; buf[i]; i++)
241241675Suqs			if (' ' != buf[i] && '\t' != buf[i])
242241675Suqs				break;
243241675Suqs		if ('\0' == buf[i])
244241675Suqs			return;
245241675Suqs	}
246241675Suqs
247241675Suqs	switch (curp->inttype) {
248241675Suqs	case (MPARSE_MDOC):
249241675Suqs		if (NULL == curp->pmdoc)
250241675Suqs			curp->pmdoc = mdoc_alloc(curp->roff, curp);
251241675Suqs		assert(curp->pmdoc);
252241675Suqs		curp->mdoc = curp->pmdoc;
253241675Suqs		return;
254241675Suqs	case (MPARSE_MAN):
255241675Suqs		if (NULL == curp->pman)
256241675Suqs			curp->pman = man_alloc(curp->roff, curp);
257241675Suqs		assert(curp->pman);
258241675Suqs		curp->man = curp->pman;
259241675Suqs		return;
260241675Suqs	default:
261241675Suqs		break;
262241675Suqs	}
263241675Suqs
264241675Suqs	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
265241675Suqs		if (NULL == curp->pmdoc)
266241675Suqs			curp->pmdoc = mdoc_alloc(curp->roff, curp);
267241675Suqs		assert(curp->pmdoc);
268241675Suqs		curp->mdoc = curp->pmdoc;
269241675Suqs		return;
270241675Suqs	}
271241675Suqs
272241675Suqs	if (NULL == curp->pman)
273241675Suqs		curp->pman = man_alloc(curp->roff, curp);
274241675Suqs	assert(curp->pman);
275241675Suqs	curp->man = curp->pman;
276241675Suqs}
277241675Suqs
278241675Suqs/*
279241675Suqs * Main parse routine for an opened file.  This is called for each
280241675Suqs * opened file and simply loops around the full input file, possibly
281241675Suqs * nesting (i.e., with `so').
282241675Suqs */
283241675Suqsstatic void
284241675Suqsmparse_buf_r(struct mparse *curp, struct buf blk, int start)
285241675Suqs{
286241675Suqs	const struct tbl_span	*span;
287241675Suqs	struct buf	 ln;
288241675Suqs	enum rofferr	 rr;
289241675Suqs	int		 i, of, rc;
290241675Suqs	int		 pos; /* byte number in the ln buffer */
291241675Suqs	int		 lnn; /* line number in the real file */
292241675Suqs	unsigned char	 c;
293241675Suqs
294241675Suqs	memset(&ln, 0, sizeof(struct buf));
295241675Suqs
296241675Suqs	lnn = curp->line;
297241675Suqs	pos = 0;
298241675Suqs
299241675Suqs	for (i = 0; i < (int)blk.sz; ) {
300241675Suqs		if (0 == pos && '\0' == blk.buf[i])
301241675Suqs			break;
302241675Suqs
303241675Suqs		if (start) {
304241675Suqs			curp->line = lnn;
305241675Suqs			curp->reparse_count = 0;
306241675Suqs		}
307241675Suqs
308241675Suqs		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
309241675Suqs
310241675Suqs			/*
311241675Suqs			 * When finding an unescaped newline character,
312241675Suqs			 * leave the character loop to process the line.
313241675Suqs			 * Skip a preceding carriage return, if any.
314241675Suqs			 */
315241675Suqs
316241675Suqs			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
317241675Suqs			    '\n' == blk.buf[i + 1])
318241675Suqs				++i;
319241675Suqs			if ('\n' == blk.buf[i]) {
320241675Suqs				++i;
321241675Suqs				++lnn;
322241675Suqs				break;
323241675Suqs			}
324241675Suqs
325241675Suqs			/*
326241675Suqs			 * Warn about bogus characters.  If you're using
327241675Suqs			 * non-ASCII encoding, you're screwing your
328241675Suqs			 * readers.  Since I'd rather this not happen,
329241675Suqs			 * I'll be helpful and replace these characters
330241675Suqs			 * with "?", so we don't display gibberish.
331241675Suqs			 * Note to manual writers: use special characters.
332241675Suqs			 */
333241675Suqs
334241675Suqs			c = (unsigned char) blk.buf[i];
335241675Suqs
336241675Suqs			if ( ! (isascii(c) &&
337241675Suqs					(isgraph(c) || isblank(c)))) {
338241675Suqs				mandoc_msg(MANDOCERR_BADCHAR, curp,
339241675Suqs						curp->line, pos, NULL);
340241675Suqs				i++;
341241675Suqs				if (pos >= (int)ln.sz)
342241675Suqs					resize_buf(&ln, 256);
343241675Suqs				ln.buf[pos++] = '?';
344241675Suqs				continue;
345241675Suqs			}
346241675Suqs
347241675Suqs			/* Trailing backslash = a plain char. */
348241675Suqs
349241675Suqs			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
350241675Suqs				if (pos >= (int)ln.sz)
351241675Suqs					resize_buf(&ln, 256);
352241675Suqs				ln.buf[pos++] = blk.buf[i++];
353241675Suqs				continue;
354241675Suqs			}
355241675Suqs
356241675Suqs			/*
357241675Suqs			 * Found escape and at least one other character.
358241675Suqs			 * When it's a newline character, skip it.
359241675Suqs			 * When there is a carriage return in between,
360241675Suqs			 * skip that one as well.
361241675Suqs			 */
362241675Suqs
363241675Suqs			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
364241675Suqs			    '\n' == blk.buf[i + 2])
365241675Suqs				++i;
366241675Suqs			if ('\n' == blk.buf[i + 1]) {
367241675Suqs				i += 2;
368241675Suqs				++lnn;
369241675Suqs				continue;
370241675Suqs			}
371241675Suqs
372241675Suqs			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
373241675Suqs				i += 2;
374241675Suqs				/* Comment, skip to end of line */
375241675Suqs				for (; i < (int)blk.sz; ++i) {
376241675Suqs					if ('\n' == blk.buf[i]) {
377241675Suqs						++i;
378241675Suqs						++lnn;
379241675Suqs						break;
380241675Suqs					}
381241675Suqs				}
382241675Suqs
383241675Suqs				/* Backout trailing whitespaces */
384241675Suqs				for (; pos > 0; --pos) {
385241675Suqs					if (ln.buf[pos - 1] != ' ')
386241675Suqs						break;
387241675Suqs					if (pos > 2 && ln.buf[pos - 2] == '\\')
388241675Suqs						break;
389241675Suqs				}
390241675Suqs				break;
391241675Suqs			}
392241675Suqs
393241675Suqs			/* Some other escape sequence, copy & cont. */
394241675Suqs
395241675Suqs			if (pos + 1 >= (int)ln.sz)
396241675Suqs				resize_buf(&ln, 256);
397241675Suqs
398241675Suqs			ln.buf[pos++] = blk.buf[i++];
399241675Suqs			ln.buf[pos++] = blk.buf[i++];
400241675Suqs		}
401241675Suqs
402241675Suqs 		if (pos >= (int)ln.sz)
403241675Suqs			resize_buf(&ln, 256);
404241675Suqs
405241675Suqs		ln.buf[pos] = '\0';
406241675Suqs
407241675Suqs		/*
408241675Suqs		 * A significant amount of complexity is contained by
409241675Suqs		 * the roff preprocessor.  It's line-oriented but can be
410241675Suqs		 * expressed on one line, so we need at times to
411241675Suqs		 * readjust our starting point and re-run it.  The roff
412241675Suqs		 * preprocessor can also readjust the buffers with new
413241675Suqs		 * data, so we pass them in wholesale.
414241675Suqs		 */
415241675Suqs
416241675Suqs		of = 0;
417241675Suqs
418241675Suqs		/*
419241675Suqs		 * Maintain a lookaside buffer of all parsed lines.  We
420241675Suqs		 * only do this if mparse_keep() has been invoked (the
421241675Suqs		 * buffer may be accessed with mparse_getkeep()).
422241675Suqs		 */
423241675Suqs
424241675Suqs		if (curp->secondary) {
425241675Suqs			curp->secondary->buf =
426241675Suqs				mandoc_realloc
427241675Suqs				(curp->secondary->buf,
428241675Suqs				 curp->secondary->sz + pos + 2);
429241675Suqs			memcpy(curp->secondary->buf +
430241675Suqs					curp->secondary->sz,
431241675Suqs					ln.buf, pos);
432241675Suqs			curp->secondary->sz += pos;
433241675Suqs			curp->secondary->buf
434241675Suqs				[curp->secondary->sz] = '\n';
435241675Suqs			curp->secondary->sz++;
436241675Suqs			curp->secondary->buf
437241675Suqs				[curp->secondary->sz] = '\0';
438241675Suqs		}
439241675Suqsrerun:
440241675Suqs		rr = roff_parseln
441241675Suqs			(curp->roff, curp->line,
442241675Suqs			 &ln.buf, &ln.sz, of, &of);
443241675Suqs
444241675Suqs		switch (rr) {
445241675Suqs		case (ROFF_REPARSE):
446241675Suqs			if (REPARSE_LIMIT >= ++curp->reparse_count)
447241675Suqs				mparse_buf_r(curp, ln, 0);
448241675Suqs			else
449241675Suqs				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
450241675Suqs					curp->line, pos, NULL);
451241675Suqs			pos = 0;
452241675Suqs			continue;
453241675Suqs		case (ROFF_APPEND):
454241675Suqs			pos = (int)strlen(ln.buf);
455241675Suqs			continue;
456241675Suqs		case (ROFF_RERUN):
457241675Suqs			goto rerun;
458241675Suqs		case (ROFF_IGN):
459241675Suqs			pos = 0;
460241675Suqs			continue;
461241675Suqs		case (ROFF_ERR):
462241675Suqs			assert(MANDOCLEVEL_FATAL <= curp->file_status);
463241675Suqs			break;
464241675Suqs		case (ROFF_SO):
465241675Suqs			/*
466241675Suqs			 * We remove `so' clauses from our lookaside
467241675Suqs			 * buffer because we're going to descend into
468241675Suqs			 * the file recursively.
469241675Suqs			 */
470241675Suqs			if (curp->secondary)
471241675Suqs				curp->secondary->sz -= pos + 1;
472241675Suqs			mparse_readfd_r(curp, -1, ln.buf + of, 1);
473241675Suqs			if (MANDOCLEVEL_FATAL <= curp->file_status)
474241675Suqs				break;
475241675Suqs			pos = 0;
476241675Suqs			continue;
477241675Suqs		default:
478241675Suqs			break;
479241675Suqs		}
480241675Suqs
481241675Suqs		/*
482241675Suqs		 * If we encounter errors in the recursive parse, make
483241675Suqs		 * sure we don't continue parsing.
484241675Suqs		 */
485241675Suqs
486241675Suqs		if (MANDOCLEVEL_FATAL <= curp->file_status)
487241675Suqs			break;
488241675Suqs
489241675Suqs		/*
490241675Suqs		 * If input parsers have not been allocated, do so now.
491241675Suqs		 * We keep these instanced between parsers, but set them
492241675Suqs		 * locally per parse routine since we can use different
493241675Suqs		 * parsers with each one.
494241675Suqs		 */
495241675Suqs
496241675Suqs		if ( ! (curp->man || curp->mdoc))
497241675Suqs			pset(ln.buf + of, pos - of, curp);
498241675Suqs
499241675Suqs		/*
500241675Suqs		 * Lastly, push down into the parsers themselves.  One
501241675Suqs		 * of these will have already been set in the pset()
502241675Suqs		 * routine.
503241675Suqs		 * If libroff returns ROFF_TBL, then add it to the
504241675Suqs		 * currently open parse.  Since we only get here if
505241675Suqs		 * there does exist data (see tbl_data.c), we're
506241675Suqs		 * guaranteed that something's been allocated.
507241675Suqs		 * Do the same for ROFF_EQN.
508241675Suqs		 */
509241675Suqs
510241675Suqs		rc = -1;
511241675Suqs
512241675Suqs		if (ROFF_TBL == rr)
513241675Suqs			while (NULL != (span = roff_span(curp->roff))) {
514241675Suqs				rc = curp->man ?
515241675Suqs					man_addspan(curp->man, span) :
516241675Suqs					mdoc_addspan(curp->mdoc, span);
517241675Suqs				if (0 == rc)
518241675Suqs					break;
519241675Suqs			}
520241675Suqs		else if (ROFF_EQN == rr)
521241675Suqs			rc = curp->mdoc ?
522241675Suqs				mdoc_addeqn(curp->mdoc,
523241675Suqs					roff_eqn(curp->roff)) :
524241675Suqs				man_addeqn(curp->man,
525241675Suqs					roff_eqn(curp->roff));
526241675Suqs		else if (curp->man || curp->mdoc)
527241675Suqs			rc = curp->man ?
528241675Suqs				man_parseln(curp->man,
529241675Suqs					curp->line, ln.buf, of) :
530241675Suqs				mdoc_parseln(curp->mdoc,
531241675Suqs					curp->line, ln.buf, of);
532241675Suqs
533241675Suqs		if (0 == rc) {
534241675Suqs			assert(MANDOCLEVEL_FATAL <= curp->file_status);
535241675Suqs			break;
536241675Suqs		}
537241675Suqs
538241675Suqs		/* Temporary buffers typically are not full. */
539241675Suqs
540241675Suqs		if (0 == start && '\0' == blk.buf[i])
541241675Suqs			break;
542241675Suqs
543241675Suqs		/* Start the next input line. */
544241675Suqs
545241675Suqs		pos = 0;
546241675Suqs	}
547241675Suqs
548241675Suqs	free(ln.buf);
549241675Suqs}
550241675Suqs
551241675Suqsstatic int
552241675Suqsread_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
553241675Suqs{
554241675Suqs	size_t		 off;
555241675Suqs	ssize_t		 ssz;
556241675Suqs
557241675Suqs#ifdef	HAVE_MMAP
558241675Suqs	struct stat	 st;
559241675Suqs	if (-1 == fstat(fd, &st)) {
560241675Suqs		perror(file);
561241675Suqs		return(0);
562241675Suqs	}
563241675Suqs
564241675Suqs	/*
565241675Suqs	 * If we're a regular file, try just reading in the whole entry
566241675Suqs	 * via mmap().  This is faster than reading it into blocks, and
567241675Suqs	 * since each file is only a few bytes to begin with, I'm not
568241675Suqs	 * concerned that this is going to tank any machines.
569241675Suqs	 */
570241675Suqs
571241675Suqs	if (S_ISREG(st.st_mode)) {
572241675Suqs		if (st.st_size >= (1U << 31)) {
573241675Suqs			fprintf(stderr, "%s: input too large\n", file);
574241675Suqs			return(0);
575241675Suqs		}
576241675Suqs		*with_mmap = 1;
577241675Suqs		fb->sz = (size_t)st.st_size;
578241675Suqs		fb->buf = mmap(NULL, fb->sz, PROT_READ,
579241675Suqs				MAP_FILE|MAP_SHARED, fd, 0);
580241675Suqs		if (fb->buf != MAP_FAILED)
581241675Suqs			return(1);
582241675Suqs	}
583241675Suqs#endif
584241675Suqs
585241675Suqs	/*
586241675Suqs	 * If this isn't a regular file (like, say, stdin), then we must
587241675Suqs	 * go the old way and just read things in bit by bit.
588241675Suqs	 */
589241675Suqs
590241675Suqs	*with_mmap = 0;
591241675Suqs	off = 0;
592241675Suqs	fb->sz = 0;
593241675Suqs	fb->buf = NULL;
594241675Suqs	for (;;) {
595241675Suqs		if (off == fb->sz) {
596241675Suqs			if (fb->sz == (1U << 31)) {
597241675Suqs				fprintf(stderr, "%s: input too large\n", file);
598241675Suqs				break;
599241675Suqs			}
600241675Suqs			resize_buf(fb, 65536);
601241675Suqs		}
602241675Suqs		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
603241675Suqs		if (ssz == 0) {
604241675Suqs			fb->sz = off;
605241675Suqs			return(1);
606241675Suqs		}
607241675Suqs		if (ssz == -1) {
608241675Suqs			perror(file);
609241675Suqs			break;
610241675Suqs		}
611241675Suqs		off += (size_t)ssz;
612241675Suqs	}
613241675Suqs
614241675Suqs	free(fb->buf);
615241675Suqs	fb->buf = NULL;
616241675Suqs	return(0);
617241675Suqs}
618241675Suqs
619241675Suqsstatic void
620241675Suqsmparse_end(struct mparse *curp)
621241675Suqs{
622241675Suqs
623241675Suqs	if (MANDOCLEVEL_FATAL <= curp->file_status)
624241675Suqs		return;
625241675Suqs
626241675Suqs	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
627241675Suqs		assert(MANDOCLEVEL_FATAL <= curp->file_status);
628241675Suqs		return;
629241675Suqs	}
630241675Suqs
631241675Suqs	if (curp->man && ! man_endparse(curp->man)) {
632241675Suqs		assert(MANDOCLEVEL_FATAL <= curp->file_status);
633241675Suqs		return;
634241675Suqs	}
635241675Suqs
636241675Suqs	if ( ! (curp->man || curp->mdoc)) {
637241675Suqs		mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
638241675Suqs		curp->file_status = MANDOCLEVEL_FATAL;
639241675Suqs		return;
640241675Suqs	}
641241675Suqs
642241675Suqs	roff_endparse(curp->roff);
643241675Suqs}
644241675Suqs
645241675Suqsstatic void
646241675Suqsmparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
647241675Suqs		int re)
648241675Suqs{
649241675Suqs	const char	*svfile;
650241675Suqs
651241675Suqs	/* Line number is per-file. */
652241675Suqs	svfile = curp->file;
653241675Suqs	curp->file = file;
654241675Suqs	curp->line = 1;
655241675Suqs
656241675Suqs	mparse_buf_r(curp, blk, 1);
657241675Suqs
658241675Suqs	if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
659241675Suqs		mparse_end(curp);
660241675Suqs
661241675Suqs	curp->file = svfile;
662241675Suqs}
663241675Suqs
664241675Suqsenum mandoclevel
665241675Suqsmparse_readmem(struct mparse *curp, const void *buf, size_t len,
666241675Suqs		const char *file)
667241675Suqs{
668241675Suqs	struct buf blk;
669241675Suqs
670241675Suqs	blk.buf = UNCONST(buf);
671241675Suqs	blk.sz = len;
672241675Suqs
673241675Suqs	mparse_parse_buffer(curp, blk, file, 0);
674241675Suqs	return(curp->file_status);
675241675Suqs}
676241675Suqs
677241675Suqsstatic void
678241675Suqsmparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
679241675Suqs{
680241675Suqs	struct buf	 blk;
681241675Suqs	int		 with_mmap;
682241675Suqs
683241675Suqs	if (-1 == fd)
684241675Suqs		if (-1 == (fd = open(file, O_RDONLY, 0))) {
685241675Suqs			perror(file);
686241675Suqs			curp->file_status = MANDOCLEVEL_SYSERR;
687241675Suqs			return;
688241675Suqs		}
689241675Suqs	/*
690241675Suqs	 * Run for each opened file; may be called more than once for
691241675Suqs	 * each full parse sequence if the opened file is nested (i.e.,
692241675Suqs	 * from `so').  Simply sucks in the whole file and moves into
693241675Suqs	 * the parse phase for the file.
694241675Suqs	 */
695241675Suqs
696241675Suqs	if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
697241675Suqs		curp->file_status = MANDOCLEVEL_SYSERR;
698241675Suqs		return;
699241675Suqs	}
700241675Suqs
701241675Suqs	mparse_parse_buffer(curp, blk, file, re);
702241675Suqs
703241675Suqs#ifdef	HAVE_MMAP
704241675Suqs	if (with_mmap)
705241675Suqs		munmap(blk.buf, blk.sz);
706241675Suqs	else
707241675Suqs#endif
708241675Suqs		free(blk.buf);
709241675Suqs
710241675Suqs	if (STDIN_FILENO != fd && -1 == close(fd))
711241675Suqs		perror(file);
712241675Suqs}
713241675Suqs
714241675Suqsenum mandoclevel
715241675Suqsmparse_readfd(struct mparse *curp, int fd, const char *file)
716241675Suqs{
717241675Suqs
718241675Suqs	mparse_readfd_r(curp, fd, file, 0);
719241675Suqs	return(curp->file_status);
720241675Suqs}
721241675Suqs
722241675Suqsstruct mparse *
723241675Suqsmparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg)
724241675Suqs{
725241675Suqs	struct mparse	*curp;
726241675Suqs
727241675Suqs	assert(wlevel <= MANDOCLEVEL_FATAL);
728241675Suqs
729241675Suqs	curp = mandoc_calloc(1, sizeof(struct mparse));
730241675Suqs
731241675Suqs	curp->wlevel = wlevel;
732241675Suqs	curp->mmsg = mmsg;
733241675Suqs	curp->arg = arg;
734241675Suqs	curp->inttype = inttype;
735241675Suqs
736241675Suqs	curp->roff = roff_alloc(curp);
737241675Suqs	return(curp);
738241675Suqs}
739241675Suqs
740241675Suqsvoid
741241675Suqsmparse_reset(struct mparse *curp)
742241675Suqs{
743241675Suqs
744241675Suqs	roff_reset(curp->roff);
745241675Suqs
746241675Suqs	if (curp->mdoc)
747241675Suqs		mdoc_reset(curp->mdoc);
748241675Suqs	if (curp->man)
749241675Suqs		man_reset(curp->man);
750241675Suqs	if (curp->secondary)
751241675Suqs		curp->secondary->sz = 0;
752241675Suqs
753241675Suqs	curp->file_status = MANDOCLEVEL_OK;
754241675Suqs	curp->mdoc = NULL;
755241675Suqs	curp->man = NULL;
756241675Suqs}
757241675Suqs
758241675Suqsvoid
759241675Suqsmparse_free(struct mparse *curp)
760241675Suqs{
761241675Suqs
762241675Suqs	if (curp->pmdoc)
763241675Suqs		mdoc_free(curp->pmdoc);
764241675Suqs	if (curp->pman)
765241675Suqs		man_free(curp->pman);
766241675Suqs	if (curp->roff)
767241675Suqs		roff_free(curp->roff);
768241675Suqs	if (curp->secondary)
769241675Suqs		free(curp->secondary->buf);
770241675Suqs
771241675Suqs	free(curp->secondary);
772241675Suqs	free(curp);
773241675Suqs}
774241675Suqs
775241675Suqsvoid
776241675Suqsmparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man)
777241675Suqs{
778241675Suqs
779241675Suqs	if (mdoc)
780241675Suqs		*mdoc = curp->mdoc;
781241675Suqs	if (man)
782241675Suqs		*man = curp->man;
783241675Suqs}
784241675Suqs
785241675Suqsvoid
786241675Suqsmandoc_vmsg(enum mandocerr t, struct mparse *m,
787241675Suqs		int ln, int pos, const char *fmt, ...)
788241675Suqs{
789241675Suqs	char		 buf[256];
790241675Suqs	va_list		 ap;
791241675Suqs
792241675Suqs	va_start(ap, fmt);
793241675Suqs	vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
794241675Suqs	va_end(ap);
795241675Suqs
796241675Suqs	mandoc_msg(t, m, ln, pos, buf);
797241675Suqs}
798241675Suqs
799241675Suqsvoid
800241675Suqsmandoc_msg(enum mandocerr er, struct mparse *m,
801241675Suqs		int ln, int col, const char *msg)
802241675Suqs{
803241675Suqs	enum mandoclevel level;
804241675Suqs
805241675Suqs	level = MANDOCLEVEL_FATAL;
806241675Suqs	while (er < mandoclimits[level])
807241675Suqs		level--;
808241675Suqs
809241675Suqs	if (level < m->wlevel)
810241675Suqs		return;
811241675Suqs
812241675Suqs	if (m->mmsg)
813241675Suqs		(*m->mmsg)(er, level, m->file, ln, col, msg);
814241675Suqs
815241675Suqs	if (m->file_status < level)
816241675Suqs		m->file_status = level;
817241675Suqs}
818241675Suqs
819241675Suqsconst char *
820241675Suqsmparse_strerror(enum mandocerr er)
821241675Suqs{
822241675Suqs
823241675Suqs	return(mandocerrs[er]);
824241675Suqs}
825241675Suqs
826241675Suqsconst char *
827241675Suqsmparse_strlevel(enum mandoclevel lvl)
828241675Suqs{
829241675Suqs	return(mandoclevels[lvl]);
830241675Suqs}
831241675Suqs
832241675Suqsvoid
833241675Suqsmparse_keep(struct mparse *p)
834241675Suqs{
835241675Suqs
836241675Suqs	assert(NULL == p->secondary);
837241675Suqs	p->secondary = mandoc_calloc(1, sizeof(struct buf));
838241675Suqs}
839241675Suqs
840241675Suqsconst char *
841241675Suqsmparse_getkeep(const struct mparse *p)
842241675Suqs{
843241675Suqs
844241675Suqs	assert(p->secondary);
845241675Suqs	return(p->secondary->sz ? p->secondary->buf : NULL);
846241675Suqs}
847