1241675Suqs/* $Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4241675Suqs * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#ifdef HAVE_CONFIG_H 19241675Suqs#include "config.h" 20241675Suqs#endif 21241675Suqs 22241675Suqs#ifdef HAVE_MMAP 23241675Suqs# include <sys/stat.h> 24241675Suqs# include <sys/mman.h> 25241675Suqs#endif 26241675Suqs 27241675Suqs#include <assert.h> 28241675Suqs#include <ctype.h> 29241675Suqs#include <fcntl.h> 30241675Suqs#include <stdarg.h> 31241675Suqs#include <stdint.h> 32241675Suqs#include <stdio.h> 33241675Suqs#include <stdlib.h> 34241675Suqs#include <string.h> 35241675Suqs#include <unistd.h> 36241675Suqs 37241675Suqs#include "mandoc.h" 38241675Suqs#include "libmandoc.h" 39241675Suqs#include "mdoc.h" 40241675Suqs#include "man.h" 41241675Suqs#include "main.h" 42241675Suqs 43241675Suqs#ifndef MAP_FILE 44241675Suqs#define MAP_FILE 0 45241675Suqs#endif 46241675Suqs 47241675Suqs#define REPARSE_LIMIT 1000 48241675Suqs 49241675Suqsstruct buf { 50241675Suqs char *buf; /* binary input buffer */ 51241675Suqs size_t sz; /* size of binary buffer */ 52241675Suqs}; 53241675Suqs 54241675Suqsstruct mparse { 55241675Suqs enum mandoclevel file_status; /* status of current parse */ 56241675Suqs enum mandoclevel wlevel; /* ignore messages below this */ 57241675Suqs int line; /* line number in the file */ 58241675Suqs enum mparset inttype; /* which parser to use */ 59241675Suqs struct man *pman; /* persistent man parser */ 60241675Suqs struct mdoc *pmdoc; /* persistent mdoc parser */ 61241675Suqs struct man *man; /* man parser */ 62241675Suqs struct mdoc *mdoc; /* mdoc parser */ 63241675Suqs struct roff *roff; /* roff parser (!NULL) */ 64241675Suqs int reparse_count; /* finite interp. stack */ 65241675Suqs mandocmsg mmsg; /* warning/error message handler */ 66241675Suqs void *arg; /* argument to mmsg */ 67241675Suqs const char *file; 68241675Suqs struct buf *secondary; 69241675Suqs}; 70241675Suqs 71241675Suqsstatic void resize_buf(struct buf *, size_t); 72241675Suqsstatic void mparse_buf_r(struct mparse *, struct buf, int); 73241675Suqsstatic void mparse_readfd_r(struct mparse *, int, const char *, int); 74241675Suqsstatic void pset(const char *, int, struct mparse *); 75241675Suqsstatic int read_whole_file(const char *, int, struct buf *, int *); 76241675Suqsstatic void mparse_end(struct mparse *); 77241675Suqs 78241675Suqsstatic const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 79241675Suqs MANDOCERR_OK, 80241675Suqs MANDOCERR_WARNING, 81241675Suqs MANDOCERR_WARNING, 82241675Suqs MANDOCERR_ERROR, 83241675Suqs MANDOCERR_FATAL, 84241675Suqs MANDOCERR_MAX, 85241675Suqs MANDOCERR_MAX 86241675Suqs}; 87241675Suqs 88241675Suqsstatic const char * const mandocerrs[MANDOCERR_MAX] = { 89241675Suqs "ok", 90241675Suqs 91241675Suqs "generic warning", 92241675Suqs 93241675Suqs /* related to the prologue */ 94241675Suqs "no title in document", 95241675Suqs "document title should be all caps", 96241675Suqs "unknown manual section", 97241675Suqs "date missing, using today's date", 98241675Suqs "cannot parse date, using it verbatim", 99241675Suqs "prologue macros out of order", 100241675Suqs "duplicate prologue macro", 101241675Suqs "macro not allowed in prologue", 102241675Suqs "macro not allowed in body", 103241675Suqs 104241675Suqs /* related to document structure */ 105241675Suqs ".so is fragile, better use ln(1)", 106241675Suqs "NAME section must come first", 107241675Suqs "bad NAME section contents", 108241675Suqs "manual name not yet set", 109241675Suqs "sections out of conventional order", 110241675Suqs "duplicate section name", 111241675Suqs "section not in conventional manual section", 112241675Suqs 113241675Suqs /* related to macros and nesting */ 114241675Suqs "skipping obsolete macro", 115241675Suqs "skipping paragraph macro", 116241675Suqs "skipping no-space macro", 117241675Suqs "blocks badly nested", 118241675Suqs "child violates parent syntax", 119241675Suqs "nested displays are not portable", 120241675Suqs "already in literal mode", 121241675Suqs "line scope broken", 122241675Suqs 123241675Suqs /* related to missing macro arguments */ 124241675Suqs "skipping empty macro", 125241675Suqs "argument count wrong", 126241675Suqs "missing display type", 127241675Suqs "list type must come first", 128241675Suqs "tag lists require a width argument", 129241675Suqs "missing font type", 130241675Suqs "skipping end of block that is not open", 131241675Suqs 132241675Suqs /* related to bad macro arguments */ 133241675Suqs "skipping argument", 134241675Suqs "duplicate argument", 135241675Suqs "duplicate display type", 136241675Suqs "duplicate list type", 137241675Suqs "unknown AT&T UNIX version", 138241675Suqs "bad Boolean value", 139241675Suqs "unknown font", 140241675Suqs "unknown standard specifier", 141241675Suqs "bad width argument", 142241675Suqs 143241675Suqs /* related to plain text */ 144241675Suqs "blank line in non-literal context", 145241675Suqs "tab in non-literal context", 146241675Suqs "end of line whitespace", 147241675Suqs "bad comment style", 148241675Suqs "bad escape sequence", 149241675Suqs "unterminated quoted string", 150241675Suqs 151241675Suqs /* related to equations */ 152241675Suqs "unexpected literal in equation", 153241675Suqs 154241675Suqs "generic error", 155241675Suqs 156241675Suqs /* related to equations */ 157241675Suqs "unexpected equation scope closure", 158241675Suqs "equation scope open on exit", 159241675Suqs "overlapping equation scopes", 160241675Suqs "unexpected end of equation", 161241675Suqs "equation syntax error", 162241675Suqs 163241675Suqs /* related to tables */ 164241675Suqs "bad table syntax", 165241675Suqs "bad table option", 166241675Suqs "bad table layout", 167241675Suqs "no table layout cells specified", 168241675Suqs "no table data cells specified", 169241675Suqs "ignore data in cell", 170241675Suqs "data block still open", 171241675Suqs "ignoring extra data cells", 172241675Suqs 173241675Suqs "input stack limit exceeded, infinite loop?", 174241675Suqs "skipping bad character", 175241675Suqs "escaped character not allowed in a name", 176241675Suqs "skipping text before the first section header", 177241675Suqs "skipping unknown macro", 178241675Suqs "NOT IMPLEMENTED, please use groff: skipping request", 179241675Suqs "argument count wrong", 180241675Suqs "skipping end of block that is not open", 181241675Suqs "missing end of block", 182241675Suqs "scope open on exit", 183241675Suqs "uname(3) system call failed", 184241675Suqs "macro requires line argument(s)", 185241675Suqs "macro requires body argument(s)", 186241675Suqs "macro requires argument(s)", 187241675Suqs "missing list type", 188241675Suqs "line argument(s) will be lost", 189241675Suqs "body argument(s) will be lost", 190241675Suqs 191241675Suqs "generic fatal error", 192241675Suqs 193241675Suqs "not a manual", 194241675Suqs "column syntax is inconsistent", 195241675Suqs "NOT IMPLEMENTED: .Bd -file", 196241675Suqs "argument count wrong, violates syntax", 197241675Suqs "child violates parent syntax", 198241675Suqs "argument count wrong, violates syntax", 199241675Suqs "NOT IMPLEMENTED: .so with absolute path or \"..\"", 200241675Suqs "no document body", 201241675Suqs "no document prologue", 202241675Suqs "static buffer exhausted", 203241675Suqs}; 204241675Suqs 205241675Suqsstatic const char * const mandoclevels[MANDOCLEVEL_MAX] = { 206241675Suqs "SUCCESS", 207241675Suqs "RESERVED", 208241675Suqs "WARNING", 209241675Suqs "ERROR", 210241675Suqs "FATAL", 211241675Suqs "BADARG", 212241675Suqs "SYSERR" 213241675Suqs}; 214241675Suqs 215241675Suqsstatic void 216241675Suqsresize_buf(struct buf *buf, size_t initial) 217241675Suqs{ 218241675Suqs 219241675Suqs buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 220241675Suqs buf->buf = mandoc_realloc(buf->buf, buf->sz); 221241675Suqs} 222241675Suqs 223241675Suqsstatic void 224241675Suqspset(const char *buf, int pos, struct mparse *curp) 225241675Suqs{ 226241675Suqs int i; 227241675Suqs 228241675Suqs /* 229241675Suqs * Try to intuit which kind of manual parser should be used. If 230241675Suqs * passed in by command-line (-man, -mdoc), then use that 231241675Suqs * explicitly. If passed as -mandoc, then try to guess from the 232241675Suqs * line: either skip dot-lines, use -mdoc when finding `.Dt', or 233241675Suqs * default to -man, which is more lenient. 234241675Suqs * 235241675Suqs * Separate out pmdoc/pman from mdoc/man: the first persists 236241675Suqs * through all parsers, while the latter is used per-parse. 237241675Suqs */ 238241675Suqs 239241675Suqs if ('.' == buf[0] || '\'' == buf[0]) { 240241675Suqs for (i = 1; buf[i]; i++) 241241675Suqs if (' ' != buf[i] && '\t' != buf[i]) 242241675Suqs break; 243241675Suqs if ('\0' == buf[i]) 244241675Suqs return; 245241675Suqs } 246241675Suqs 247241675Suqs switch (curp->inttype) { 248241675Suqs case (MPARSE_MDOC): 249241675Suqs if (NULL == curp->pmdoc) 250241675Suqs curp->pmdoc = mdoc_alloc(curp->roff, curp); 251241675Suqs assert(curp->pmdoc); 252241675Suqs curp->mdoc = curp->pmdoc; 253241675Suqs return; 254241675Suqs case (MPARSE_MAN): 255241675Suqs if (NULL == curp->pman) 256241675Suqs curp->pman = man_alloc(curp->roff, curp); 257241675Suqs assert(curp->pman); 258241675Suqs curp->man = curp->pman; 259241675Suqs return; 260241675Suqs default: 261241675Suqs break; 262241675Suqs } 263241675Suqs 264241675Suqs if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { 265241675Suqs if (NULL == curp->pmdoc) 266241675Suqs curp->pmdoc = mdoc_alloc(curp->roff, curp); 267241675Suqs assert(curp->pmdoc); 268241675Suqs curp->mdoc = curp->pmdoc; 269241675Suqs return; 270241675Suqs } 271241675Suqs 272241675Suqs if (NULL == curp->pman) 273241675Suqs curp->pman = man_alloc(curp->roff, curp); 274241675Suqs assert(curp->pman); 275241675Suqs curp->man = curp->pman; 276241675Suqs} 277241675Suqs 278241675Suqs/* 279241675Suqs * Main parse routine for an opened file. This is called for each 280241675Suqs * opened file and simply loops around the full input file, possibly 281241675Suqs * nesting (i.e., with `so'). 282241675Suqs */ 283241675Suqsstatic void 284241675Suqsmparse_buf_r(struct mparse *curp, struct buf blk, int start) 285241675Suqs{ 286241675Suqs const struct tbl_span *span; 287241675Suqs struct buf ln; 288241675Suqs enum rofferr rr; 289241675Suqs int i, of, rc; 290241675Suqs int pos; /* byte number in the ln buffer */ 291241675Suqs int lnn; /* line number in the real file */ 292241675Suqs unsigned char c; 293241675Suqs 294241675Suqs memset(&ln, 0, sizeof(struct buf)); 295241675Suqs 296241675Suqs lnn = curp->line; 297241675Suqs pos = 0; 298241675Suqs 299241675Suqs for (i = 0; i < (int)blk.sz; ) { 300241675Suqs if (0 == pos && '\0' == blk.buf[i]) 301241675Suqs break; 302241675Suqs 303241675Suqs if (start) { 304241675Suqs curp->line = lnn; 305241675Suqs curp->reparse_count = 0; 306241675Suqs } 307241675Suqs 308241675Suqs while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { 309241675Suqs 310241675Suqs /* 311241675Suqs * When finding an unescaped newline character, 312241675Suqs * leave the character loop to process the line. 313241675Suqs * Skip a preceding carriage return, if any. 314241675Suqs */ 315241675Suqs 316241675Suqs if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && 317241675Suqs '\n' == blk.buf[i + 1]) 318241675Suqs ++i; 319241675Suqs if ('\n' == blk.buf[i]) { 320241675Suqs ++i; 321241675Suqs ++lnn; 322241675Suqs break; 323241675Suqs } 324241675Suqs 325241675Suqs /* 326241675Suqs * Warn about bogus characters. If you're using 327241675Suqs * non-ASCII encoding, you're screwing your 328241675Suqs * readers. Since I'd rather this not happen, 329241675Suqs * I'll be helpful and replace these characters 330241675Suqs * with "?", so we don't display gibberish. 331241675Suqs * Note to manual writers: use special characters. 332241675Suqs */ 333241675Suqs 334241675Suqs c = (unsigned char) blk.buf[i]; 335241675Suqs 336241675Suqs if ( ! (isascii(c) && 337241675Suqs (isgraph(c) || isblank(c)))) { 338241675Suqs mandoc_msg(MANDOCERR_BADCHAR, curp, 339241675Suqs curp->line, pos, NULL); 340241675Suqs i++; 341241675Suqs if (pos >= (int)ln.sz) 342241675Suqs resize_buf(&ln, 256); 343241675Suqs ln.buf[pos++] = '?'; 344241675Suqs continue; 345241675Suqs } 346241675Suqs 347241675Suqs /* Trailing backslash = a plain char. */ 348241675Suqs 349241675Suqs if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { 350241675Suqs if (pos >= (int)ln.sz) 351241675Suqs resize_buf(&ln, 256); 352241675Suqs ln.buf[pos++] = blk.buf[i++]; 353241675Suqs continue; 354241675Suqs } 355241675Suqs 356241675Suqs /* 357241675Suqs * Found escape and at least one other character. 358241675Suqs * When it's a newline character, skip it. 359241675Suqs * When there is a carriage return in between, 360241675Suqs * skip that one as well. 361241675Suqs */ 362241675Suqs 363241675Suqs if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && 364241675Suqs '\n' == blk.buf[i + 2]) 365241675Suqs ++i; 366241675Suqs if ('\n' == blk.buf[i + 1]) { 367241675Suqs i += 2; 368241675Suqs ++lnn; 369241675Suqs continue; 370241675Suqs } 371241675Suqs 372241675Suqs if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { 373241675Suqs i += 2; 374241675Suqs /* Comment, skip to end of line */ 375241675Suqs for (; i < (int)blk.sz; ++i) { 376241675Suqs if ('\n' == blk.buf[i]) { 377241675Suqs ++i; 378241675Suqs ++lnn; 379241675Suqs break; 380241675Suqs } 381241675Suqs } 382241675Suqs 383241675Suqs /* Backout trailing whitespaces */ 384241675Suqs for (; pos > 0; --pos) { 385241675Suqs if (ln.buf[pos - 1] != ' ') 386241675Suqs break; 387241675Suqs if (pos > 2 && ln.buf[pos - 2] == '\\') 388241675Suqs break; 389241675Suqs } 390241675Suqs break; 391241675Suqs } 392241675Suqs 393241675Suqs /* Some other escape sequence, copy & cont. */ 394241675Suqs 395241675Suqs if (pos + 1 >= (int)ln.sz) 396241675Suqs resize_buf(&ln, 256); 397241675Suqs 398241675Suqs ln.buf[pos++] = blk.buf[i++]; 399241675Suqs ln.buf[pos++] = blk.buf[i++]; 400241675Suqs } 401241675Suqs 402241675Suqs if (pos >= (int)ln.sz) 403241675Suqs resize_buf(&ln, 256); 404241675Suqs 405241675Suqs ln.buf[pos] = '\0'; 406241675Suqs 407241675Suqs /* 408241675Suqs * A significant amount of complexity is contained by 409241675Suqs * the roff preprocessor. It's line-oriented but can be 410241675Suqs * expressed on one line, so we need at times to 411241675Suqs * readjust our starting point and re-run it. The roff 412241675Suqs * preprocessor can also readjust the buffers with new 413241675Suqs * data, so we pass them in wholesale. 414241675Suqs */ 415241675Suqs 416241675Suqs of = 0; 417241675Suqs 418241675Suqs /* 419241675Suqs * Maintain a lookaside buffer of all parsed lines. We 420241675Suqs * only do this if mparse_keep() has been invoked (the 421241675Suqs * buffer may be accessed with mparse_getkeep()). 422241675Suqs */ 423241675Suqs 424241675Suqs if (curp->secondary) { 425241675Suqs curp->secondary->buf = 426241675Suqs mandoc_realloc 427241675Suqs (curp->secondary->buf, 428241675Suqs curp->secondary->sz + pos + 2); 429241675Suqs memcpy(curp->secondary->buf + 430241675Suqs curp->secondary->sz, 431241675Suqs ln.buf, pos); 432241675Suqs curp->secondary->sz += pos; 433241675Suqs curp->secondary->buf 434241675Suqs [curp->secondary->sz] = '\n'; 435241675Suqs curp->secondary->sz++; 436241675Suqs curp->secondary->buf 437241675Suqs [curp->secondary->sz] = '\0'; 438241675Suqs } 439241675Suqsrerun: 440241675Suqs rr = roff_parseln 441241675Suqs (curp->roff, curp->line, 442241675Suqs &ln.buf, &ln.sz, of, &of); 443241675Suqs 444241675Suqs switch (rr) { 445241675Suqs case (ROFF_REPARSE): 446241675Suqs if (REPARSE_LIMIT >= ++curp->reparse_count) 447241675Suqs mparse_buf_r(curp, ln, 0); 448241675Suqs else 449241675Suqs mandoc_msg(MANDOCERR_ROFFLOOP, curp, 450241675Suqs curp->line, pos, NULL); 451241675Suqs pos = 0; 452241675Suqs continue; 453241675Suqs case (ROFF_APPEND): 454241675Suqs pos = (int)strlen(ln.buf); 455241675Suqs continue; 456241675Suqs case (ROFF_RERUN): 457241675Suqs goto rerun; 458241675Suqs case (ROFF_IGN): 459241675Suqs pos = 0; 460241675Suqs continue; 461241675Suqs case (ROFF_ERR): 462241675Suqs assert(MANDOCLEVEL_FATAL <= curp->file_status); 463241675Suqs break; 464241675Suqs case (ROFF_SO): 465241675Suqs /* 466241675Suqs * We remove `so' clauses from our lookaside 467241675Suqs * buffer because we're going to descend into 468241675Suqs * the file recursively. 469241675Suqs */ 470241675Suqs if (curp->secondary) 471241675Suqs curp->secondary->sz -= pos + 1; 472241675Suqs mparse_readfd_r(curp, -1, ln.buf + of, 1); 473241675Suqs if (MANDOCLEVEL_FATAL <= curp->file_status) 474241675Suqs break; 475241675Suqs pos = 0; 476241675Suqs continue; 477241675Suqs default: 478241675Suqs break; 479241675Suqs } 480241675Suqs 481241675Suqs /* 482241675Suqs * If we encounter errors in the recursive parse, make 483241675Suqs * sure we don't continue parsing. 484241675Suqs */ 485241675Suqs 486241675Suqs if (MANDOCLEVEL_FATAL <= curp->file_status) 487241675Suqs break; 488241675Suqs 489241675Suqs /* 490241675Suqs * If input parsers have not been allocated, do so now. 491241675Suqs * We keep these instanced between parsers, but set them 492241675Suqs * locally per parse routine since we can use different 493241675Suqs * parsers with each one. 494241675Suqs */ 495241675Suqs 496241675Suqs if ( ! (curp->man || curp->mdoc)) 497241675Suqs pset(ln.buf + of, pos - of, curp); 498241675Suqs 499241675Suqs /* 500241675Suqs * Lastly, push down into the parsers themselves. One 501241675Suqs * of these will have already been set in the pset() 502241675Suqs * routine. 503241675Suqs * If libroff returns ROFF_TBL, then add it to the 504241675Suqs * currently open parse. Since we only get here if 505241675Suqs * there does exist data (see tbl_data.c), we're 506241675Suqs * guaranteed that something's been allocated. 507241675Suqs * Do the same for ROFF_EQN. 508241675Suqs */ 509241675Suqs 510241675Suqs rc = -1; 511241675Suqs 512241675Suqs if (ROFF_TBL == rr) 513241675Suqs while (NULL != (span = roff_span(curp->roff))) { 514241675Suqs rc = curp->man ? 515241675Suqs man_addspan(curp->man, span) : 516241675Suqs mdoc_addspan(curp->mdoc, span); 517241675Suqs if (0 == rc) 518241675Suqs break; 519241675Suqs } 520241675Suqs else if (ROFF_EQN == rr) 521241675Suqs rc = curp->mdoc ? 522241675Suqs mdoc_addeqn(curp->mdoc, 523241675Suqs roff_eqn(curp->roff)) : 524241675Suqs man_addeqn(curp->man, 525241675Suqs roff_eqn(curp->roff)); 526241675Suqs else if (curp->man || curp->mdoc) 527241675Suqs rc = curp->man ? 528241675Suqs man_parseln(curp->man, 529241675Suqs curp->line, ln.buf, of) : 530241675Suqs mdoc_parseln(curp->mdoc, 531241675Suqs curp->line, ln.buf, of); 532241675Suqs 533241675Suqs if (0 == rc) { 534241675Suqs assert(MANDOCLEVEL_FATAL <= curp->file_status); 535241675Suqs break; 536241675Suqs } 537241675Suqs 538241675Suqs /* Temporary buffers typically are not full. */ 539241675Suqs 540241675Suqs if (0 == start && '\0' == blk.buf[i]) 541241675Suqs break; 542241675Suqs 543241675Suqs /* Start the next input line. */ 544241675Suqs 545241675Suqs pos = 0; 546241675Suqs } 547241675Suqs 548241675Suqs free(ln.buf); 549241675Suqs} 550241675Suqs 551241675Suqsstatic int 552241675Suqsread_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) 553241675Suqs{ 554241675Suqs size_t off; 555241675Suqs ssize_t ssz; 556241675Suqs 557241675Suqs#ifdef HAVE_MMAP 558241675Suqs struct stat st; 559241675Suqs if (-1 == fstat(fd, &st)) { 560241675Suqs perror(file); 561241675Suqs return(0); 562241675Suqs } 563241675Suqs 564241675Suqs /* 565241675Suqs * If we're a regular file, try just reading in the whole entry 566241675Suqs * via mmap(). This is faster than reading it into blocks, and 567241675Suqs * since each file is only a few bytes to begin with, I'm not 568241675Suqs * concerned that this is going to tank any machines. 569241675Suqs */ 570241675Suqs 571241675Suqs if (S_ISREG(st.st_mode)) { 572241675Suqs if (st.st_size >= (1U << 31)) { 573241675Suqs fprintf(stderr, "%s: input too large\n", file); 574241675Suqs return(0); 575241675Suqs } 576241675Suqs *with_mmap = 1; 577241675Suqs fb->sz = (size_t)st.st_size; 578241675Suqs fb->buf = mmap(NULL, fb->sz, PROT_READ, 579241675Suqs MAP_FILE|MAP_SHARED, fd, 0); 580241675Suqs if (fb->buf != MAP_FAILED) 581241675Suqs return(1); 582241675Suqs } 583241675Suqs#endif 584241675Suqs 585241675Suqs /* 586241675Suqs * If this isn't a regular file (like, say, stdin), then we must 587241675Suqs * go the old way and just read things in bit by bit. 588241675Suqs */ 589241675Suqs 590241675Suqs *with_mmap = 0; 591241675Suqs off = 0; 592241675Suqs fb->sz = 0; 593241675Suqs fb->buf = NULL; 594241675Suqs for (;;) { 595241675Suqs if (off == fb->sz) { 596241675Suqs if (fb->sz == (1U << 31)) { 597241675Suqs fprintf(stderr, "%s: input too large\n", file); 598241675Suqs break; 599241675Suqs } 600241675Suqs resize_buf(fb, 65536); 601241675Suqs } 602241675Suqs ssz = read(fd, fb->buf + (int)off, fb->sz - off); 603241675Suqs if (ssz == 0) { 604241675Suqs fb->sz = off; 605241675Suqs return(1); 606241675Suqs } 607241675Suqs if (ssz == -1) { 608241675Suqs perror(file); 609241675Suqs break; 610241675Suqs } 611241675Suqs off += (size_t)ssz; 612241675Suqs } 613241675Suqs 614241675Suqs free(fb->buf); 615241675Suqs fb->buf = NULL; 616241675Suqs return(0); 617241675Suqs} 618241675Suqs 619241675Suqsstatic void 620241675Suqsmparse_end(struct mparse *curp) 621241675Suqs{ 622241675Suqs 623241675Suqs if (MANDOCLEVEL_FATAL <= curp->file_status) 624241675Suqs return; 625241675Suqs 626241675Suqs if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { 627241675Suqs assert(MANDOCLEVEL_FATAL <= curp->file_status); 628241675Suqs return; 629241675Suqs } 630241675Suqs 631241675Suqs if (curp->man && ! man_endparse(curp->man)) { 632241675Suqs assert(MANDOCLEVEL_FATAL <= curp->file_status); 633241675Suqs return; 634241675Suqs } 635241675Suqs 636241675Suqs if ( ! (curp->man || curp->mdoc)) { 637241675Suqs mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL); 638241675Suqs curp->file_status = MANDOCLEVEL_FATAL; 639241675Suqs return; 640241675Suqs } 641241675Suqs 642241675Suqs roff_endparse(curp->roff); 643241675Suqs} 644241675Suqs 645241675Suqsstatic void 646241675Suqsmparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file, 647241675Suqs int re) 648241675Suqs{ 649241675Suqs const char *svfile; 650241675Suqs 651241675Suqs /* Line number is per-file. */ 652241675Suqs svfile = curp->file; 653241675Suqs curp->file = file; 654241675Suqs curp->line = 1; 655241675Suqs 656241675Suqs mparse_buf_r(curp, blk, 1); 657241675Suqs 658241675Suqs if (0 == re && MANDOCLEVEL_FATAL > curp->file_status) 659241675Suqs mparse_end(curp); 660241675Suqs 661241675Suqs curp->file = svfile; 662241675Suqs} 663241675Suqs 664241675Suqsenum mandoclevel 665241675Suqsmparse_readmem(struct mparse *curp, const void *buf, size_t len, 666241675Suqs const char *file) 667241675Suqs{ 668241675Suqs struct buf blk; 669241675Suqs 670241675Suqs blk.buf = UNCONST(buf); 671241675Suqs blk.sz = len; 672241675Suqs 673241675Suqs mparse_parse_buffer(curp, blk, file, 0); 674241675Suqs return(curp->file_status); 675241675Suqs} 676241675Suqs 677241675Suqsstatic void 678241675Suqsmparse_readfd_r(struct mparse *curp, int fd, const char *file, int re) 679241675Suqs{ 680241675Suqs struct buf blk; 681241675Suqs int with_mmap; 682241675Suqs 683241675Suqs if (-1 == fd) 684241675Suqs if (-1 == (fd = open(file, O_RDONLY, 0))) { 685241675Suqs perror(file); 686241675Suqs curp->file_status = MANDOCLEVEL_SYSERR; 687241675Suqs return; 688241675Suqs } 689241675Suqs /* 690241675Suqs * Run for each opened file; may be called more than once for 691241675Suqs * each full parse sequence if the opened file is nested (i.e., 692241675Suqs * from `so'). Simply sucks in the whole file and moves into 693241675Suqs * the parse phase for the file. 694241675Suqs */ 695241675Suqs 696241675Suqs if ( ! read_whole_file(file, fd, &blk, &with_mmap)) { 697241675Suqs curp->file_status = MANDOCLEVEL_SYSERR; 698241675Suqs return; 699241675Suqs } 700241675Suqs 701241675Suqs mparse_parse_buffer(curp, blk, file, re); 702241675Suqs 703241675Suqs#ifdef HAVE_MMAP 704241675Suqs if (with_mmap) 705241675Suqs munmap(blk.buf, blk.sz); 706241675Suqs else 707241675Suqs#endif 708241675Suqs free(blk.buf); 709241675Suqs 710241675Suqs if (STDIN_FILENO != fd && -1 == close(fd)) 711241675Suqs perror(file); 712241675Suqs} 713241675Suqs 714241675Suqsenum mandoclevel 715241675Suqsmparse_readfd(struct mparse *curp, int fd, const char *file) 716241675Suqs{ 717241675Suqs 718241675Suqs mparse_readfd_r(curp, fd, file, 0); 719241675Suqs return(curp->file_status); 720241675Suqs} 721241675Suqs 722241675Suqsstruct mparse * 723241675Suqsmparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg) 724241675Suqs{ 725241675Suqs struct mparse *curp; 726241675Suqs 727241675Suqs assert(wlevel <= MANDOCLEVEL_FATAL); 728241675Suqs 729241675Suqs curp = mandoc_calloc(1, sizeof(struct mparse)); 730241675Suqs 731241675Suqs curp->wlevel = wlevel; 732241675Suqs curp->mmsg = mmsg; 733241675Suqs curp->arg = arg; 734241675Suqs curp->inttype = inttype; 735241675Suqs 736241675Suqs curp->roff = roff_alloc(curp); 737241675Suqs return(curp); 738241675Suqs} 739241675Suqs 740241675Suqsvoid 741241675Suqsmparse_reset(struct mparse *curp) 742241675Suqs{ 743241675Suqs 744241675Suqs roff_reset(curp->roff); 745241675Suqs 746241675Suqs if (curp->mdoc) 747241675Suqs mdoc_reset(curp->mdoc); 748241675Suqs if (curp->man) 749241675Suqs man_reset(curp->man); 750241675Suqs if (curp->secondary) 751241675Suqs curp->secondary->sz = 0; 752241675Suqs 753241675Suqs curp->file_status = MANDOCLEVEL_OK; 754241675Suqs curp->mdoc = NULL; 755241675Suqs curp->man = NULL; 756241675Suqs} 757241675Suqs 758241675Suqsvoid 759241675Suqsmparse_free(struct mparse *curp) 760241675Suqs{ 761241675Suqs 762241675Suqs if (curp->pmdoc) 763241675Suqs mdoc_free(curp->pmdoc); 764241675Suqs if (curp->pman) 765241675Suqs man_free(curp->pman); 766241675Suqs if (curp->roff) 767241675Suqs roff_free(curp->roff); 768241675Suqs if (curp->secondary) 769241675Suqs free(curp->secondary->buf); 770241675Suqs 771241675Suqs free(curp->secondary); 772241675Suqs free(curp); 773241675Suqs} 774241675Suqs 775241675Suqsvoid 776241675Suqsmparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man) 777241675Suqs{ 778241675Suqs 779241675Suqs if (mdoc) 780241675Suqs *mdoc = curp->mdoc; 781241675Suqs if (man) 782241675Suqs *man = curp->man; 783241675Suqs} 784241675Suqs 785241675Suqsvoid 786241675Suqsmandoc_vmsg(enum mandocerr t, struct mparse *m, 787241675Suqs int ln, int pos, const char *fmt, ...) 788241675Suqs{ 789241675Suqs char buf[256]; 790241675Suqs va_list ap; 791241675Suqs 792241675Suqs va_start(ap, fmt); 793241675Suqs vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 794241675Suqs va_end(ap); 795241675Suqs 796241675Suqs mandoc_msg(t, m, ln, pos, buf); 797241675Suqs} 798241675Suqs 799241675Suqsvoid 800241675Suqsmandoc_msg(enum mandocerr er, struct mparse *m, 801241675Suqs int ln, int col, const char *msg) 802241675Suqs{ 803241675Suqs enum mandoclevel level; 804241675Suqs 805241675Suqs level = MANDOCLEVEL_FATAL; 806241675Suqs while (er < mandoclimits[level]) 807241675Suqs level--; 808241675Suqs 809241675Suqs if (level < m->wlevel) 810241675Suqs return; 811241675Suqs 812241675Suqs if (m->mmsg) 813241675Suqs (*m->mmsg)(er, level, m->file, ln, col, msg); 814241675Suqs 815241675Suqs if (m->file_status < level) 816241675Suqs m->file_status = level; 817241675Suqs} 818241675Suqs 819241675Suqsconst char * 820241675Suqsmparse_strerror(enum mandocerr er) 821241675Suqs{ 822241675Suqs 823241675Suqs return(mandocerrs[er]); 824241675Suqs} 825241675Suqs 826241675Suqsconst char * 827241675Suqsmparse_strlevel(enum mandoclevel lvl) 828241675Suqs{ 829241675Suqs return(mandoclevels[lvl]); 830241675Suqs} 831241675Suqs 832241675Suqsvoid 833241675Suqsmparse_keep(struct mparse *p) 834241675Suqs{ 835241675Suqs 836241675Suqs assert(NULL == p->secondary); 837241675Suqs p->secondary = mandoc_calloc(1, sizeof(struct buf)); 838241675Suqs} 839241675Suqs 840241675Suqsconst char * 841241675Suqsmparse_getkeep(const struct mparse *p) 842241675Suqs{ 843241675Suqs 844241675Suqs assert(p->secondary); 845241675Suqs return(p->secondary->sz ? p->secondary->buf : NULL); 846241675Suqs} 847