1241675Suqs/* $Id: mdoc_argv.c,v 1.82 2012/03/23 05:50:24 kristaps Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4241675Suqs * 5241675Suqs * Permission to use, copy, modify, and distribute this software for any 6241675Suqs * purpose with or without fee is hereby granted, provided that the above 7241675Suqs * copyright notice and this permission notice appear in all copies. 8241675Suqs * 9241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16241675Suqs */ 17241675Suqs#ifdef HAVE_CONFIG_H 18241675Suqs#include "config.h" 19241675Suqs#endif 20241675Suqs 21241675Suqs#include <sys/types.h> 22241675Suqs 23241675Suqs#include <assert.h> 24241675Suqs#include <stdlib.h> 25241675Suqs#include <stdio.h> 26241675Suqs#include <string.h> 27241675Suqs 28241675Suqs#include "mdoc.h" 29241675Suqs#include "mandoc.h" 30241675Suqs#include "libmdoc.h" 31241675Suqs#include "libmandoc.h" 32241675Suqs 33241675Suqs#define MULTI_STEP 5 /* pre-allocate argument values */ 34241675Suqs#define DELIMSZ 6 /* max possible size of a delimiter */ 35241675Suqs 36241675Suqsenum argsflag { 37241675Suqs ARGSFL_NONE = 0, 38241675Suqs ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ 39241675Suqs ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ 40241675Suqs}; 41241675Suqs 42241675Suqsenum argvflag { 43241675Suqs ARGV_NONE, /* no args to flag (e.g., -split) */ 44241675Suqs ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ 45241675Suqs ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ 46241675Suqs ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ 47241675Suqs}; 48241675Suqs 49241675Suqsstruct mdocarg { 50241675Suqs enum argsflag flags; 51241675Suqs const enum mdocargt *argvs; 52241675Suqs}; 53241675Suqs 54241675Suqsstatic void argn_free(struct mdoc_arg *, int); 55241675Suqsstatic enum margserr args(struct mdoc *, int, int *, 56241675Suqs char *, enum argsflag, char **); 57241675Suqsstatic int args_checkpunct(const char *, int); 58241675Suqsstatic int argv_multi(struct mdoc *, int, 59241675Suqs struct mdoc_argv *, int *, char *); 60241675Suqsstatic int argv_opt_single(struct mdoc *, int, 61241675Suqs struct mdoc_argv *, int *, char *); 62241675Suqsstatic int argv_single(struct mdoc *, int, 63241675Suqs struct mdoc_argv *, int *, char *); 64241675Suqs 65241675Suqsstatic const enum argvflag argvflags[MDOC_ARG_MAX] = { 66241675Suqs ARGV_NONE, /* MDOC_Split */ 67241675Suqs ARGV_NONE, /* MDOC_Nosplit */ 68241675Suqs ARGV_NONE, /* MDOC_Ragged */ 69241675Suqs ARGV_NONE, /* MDOC_Unfilled */ 70241675Suqs ARGV_NONE, /* MDOC_Literal */ 71241675Suqs ARGV_SINGLE, /* MDOC_File */ 72241675Suqs ARGV_OPT_SINGLE, /* MDOC_Offset */ 73241675Suqs ARGV_NONE, /* MDOC_Bullet */ 74241675Suqs ARGV_NONE, /* MDOC_Dash */ 75241675Suqs ARGV_NONE, /* MDOC_Hyphen */ 76241675Suqs ARGV_NONE, /* MDOC_Item */ 77241675Suqs ARGV_NONE, /* MDOC_Enum */ 78241675Suqs ARGV_NONE, /* MDOC_Tag */ 79241675Suqs ARGV_NONE, /* MDOC_Diag */ 80241675Suqs ARGV_NONE, /* MDOC_Hang */ 81241675Suqs ARGV_NONE, /* MDOC_Ohang */ 82241675Suqs ARGV_NONE, /* MDOC_Inset */ 83241675Suqs ARGV_MULTI, /* MDOC_Column */ 84241675Suqs ARGV_OPT_SINGLE, /* MDOC_Width */ 85241675Suqs ARGV_NONE, /* MDOC_Compact */ 86241675Suqs ARGV_NONE, /* MDOC_Std */ 87241675Suqs ARGV_NONE, /* MDOC_Filled */ 88241675Suqs ARGV_NONE, /* MDOC_Words */ 89241675Suqs ARGV_NONE, /* MDOC_Emphasis */ 90241675Suqs ARGV_NONE, /* MDOC_Symbolic */ 91241675Suqs ARGV_NONE /* MDOC_Symbolic */ 92241675Suqs}; 93241675Suqs 94241675Suqsstatic const enum mdocargt args_Ex[] = { 95241675Suqs MDOC_Std, 96241675Suqs MDOC_ARG_MAX 97241675Suqs}; 98241675Suqs 99241675Suqsstatic const enum mdocargt args_An[] = { 100241675Suqs MDOC_Split, 101241675Suqs MDOC_Nosplit, 102241675Suqs MDOC_ARG_MAX 103241675Suqs}; 104241675Suqs 105241675Suqsstatic const enum mdocargt args_Bd[] = { 106241675Suqs MDOC_Ragged, 107241675Suqs MDOC_Unfilled, 108241675Suqs MDOC_Filled, 109241675Suqs MDOC_Literal, 110241675Suqs MDOC_File, 111241675Suqs MDOC_Offset, 112241675Suqs MDOC_Compact, 113241675Suqs MDOC_Centred, 114241675Suqs MDOC_ARG_MAX 115241675Suqs}; 116241675Suqs 117241675Suqsstatic const enum mdocargt args_Bf[] = { 118241675Suqs MDOC_Emphasis, 119241675Suqs MDOC_Literal, 120241675Suqs MDOC_Symbolic, 121241675Suqs MDOC_ARG_MAX 122241675Suqs}; 123241675Suqs 124241675Suqsstatic const enum mdocargt args_Bk[] = { 125241675Suqs MDOC_Words, 126241675Suqs MDOC_ARG_MAX 127241675Suqs}; 128241675Suqs 129241675Suqsstatic const enum mdocargt args_Bl[] = { 130241675Suqs MDOC_Bullet, 131241675Suqs MDOC_Dash, 132241675Suqs MDOC_Hyphen, 133241675Suqs MDOC_Item, 134241675Suqs MDOC_Enum, 135241675Suqs MDOC_Tag, 136241675Suqs MDOC_Diag, 137241675Suqs MDOC_Hang, 138241675Suqs MDOC_Ohang, 139241675Suqs MDOC_Inset, 140241675Suqs MDOC_Column, 141241675Suqs MDOC_Width, 142241675Suqs MDOC_Offset, 143241675Suqs MDOC_Compact, 144241675Suqs MDOC_Nested, 145241675Suqs MDOC_ARG_MAX 146241675Suqs}; 147241675Suqs 148241675Suqsstatic const struct mdocarg mdocargs[MDOC_MAX] = { 149241675Suqs { ARGSFL_NONE, NULL }, /* Ap */ 150241675Suqs { ARGSFL_NONE, NULL }, /* Dd */ 151241675Suqs { ARGSFL_NONE, NULL }, /* Dt */ 152241675Suqs { ARGSFL_NONE, NULL }, /* Os */ 153241675Suqs { ARGSFL_NONE, NULL }, /* Sh */ 154241675Suqs { ARGSFL_NONE, NULL }, /* Ss */ 155241675Suqs { ARGSFL_NONE, NULL }, /* Pp */ 156241675Suqs { ARGSFL_DELIM, NULL }, /* D1 */ 157241675Suqs { ARGSFL_DELIM, NULL }, /* Dl */ 158241675Suqs { ARGSFL_NONE, args_Bd }, /* Bd */ 159241675Suqs { ARGSFL_NONE, NULL }, /* Ed */ 160241675Suqs { ARGSFL_NONE, args_Bl }, /* Bl */ 161241675Suqs { ARGSFL_NONE, NULL }, /* El */ 162241675Suqs { ARGSFL_NONE, NULL }, /* It */ 163241675Suqs { ARGSFL_DELIM, NULL }, /* Ad */ 164241675Suqs { ARGSFL_DELIM, args_An }, /* An */ 165241675Suqs { ARGSFL_DELIM, NULL }, /* Ar */ 166241675Suqs { ARGSFL_NONE, NULL }, /* Cd */ 167241675Suqs { ARGSFL_DELIM, NULL }, /* Cm */ 168241675Suqs { ARGSFL_DELIM, NULL }, /* Dv */ 169241675Suqs { ARGSFL_DELIM, NULL }, /* Er */ 170241675Suqs { ARGSFL_DELIM, NULL }, /* Ev */ 171241675Suqs { ARGSFL_NONE, args_Ex }, /* Ex */ 172241675Suqs { ARGSFL_DELIM, NULL }, /* Fa */ 173241675Suqs { ARGSFL_NONE, NULL }, /* Fd */ 174241675Suqs { ARGSFL_DELIM, NULL }, /* Fl */ 175241675Suqs { ARGSFL_DELIM, NULL }, /* Fn */ 176241675Suqs { ARGSFL_DELIM, NULL }, /* Ft */ 177241675Suqs { ARGSFL_DELIM, NULL }, /* Ic */ 178241675Suqs { ARGSFL_NONE, NULL }, /* In */ 179241675Suqs { ARGSFL_DELIM, NULL }, /* Li */ 180241675Suqs { ARGSFL_NONE, NULL }, /* Nd */ 181241675Suqs { ARGSFL_DELIM, NULL }, /* Nm */ 182241675Suqs { ARGSFL_DELIM, NULL }, /* Op */ 183241675Suqs { ARGSFL_NONE, NULL }, /* Ot */ 184241675Suqs { ARGSFL_DELIM, NULL }, /* Pa */ 185241675Suqs { ARGSFL_NONE, args_Ex }, /* Rv */ 186241675Suqs { ARGSFL_DELIM, NULL }, /* St */ 187241675Suqs { ARGSFL_DELIM, NULL }, /* Va */ 188241675Suqs { ARGSFL_DELIM, NULL }, /* Vt */ 189241675Suqs { ARGSFL_DELIM, NULL }, /* Xr */ 190241675Suqs { ARGSFL_NONE, NULL }, /* %A */ 191241675Suqs { ARGSFL_NONE, NULL }, /* %B */ 192241675Suqs { ARGSFL_NONE, NULL }, /* %D */ 193241675Suqs { ARGSFL_NONE, NULL }, /* %I */ 194241675Suqs { ARGSFL_NONE, NULL }, /* %J */ 195241675Suqs { ARGSFL_NONE, NULL }, /* %N */ 196241675Suqs { ARGSFL_NONE, NULL }, /* %O */ 197241675Suqs { ARGSFL_NONE, NULL }, /* %P */ 198241675Suqs { ARGSFL_NONE, NULL }, /* %R */ 199241675Suqs { ARGSFL_NONE, NULL }, /* %T */ 200241675Suqs { ARGSFL_NONE, NULL }, /* %V */ 201241675Suqs { ARGSFL_DELIM, NULL }, /* Ac */ 202241675Suqs { ARGSFL_NONE, NULL }, /* Ao */ 203241675Suqs { ARGSFL_DELIM, NULL }, /* Aq */ 204241675Suqs { ARGSFL_DELIM, NULL }, /* At */ 205241675Suqs { ARGSFL_DELIM, NULL }, /* Bc */ 206241675Suqs { ARGSFL_NONE, args_Bf }, /* Bf */ 207241675Suqs { ARGSFL_NONE, NULL }, /* Bo */ 208241675Suqs { ARGSFL_DELIM, NULL }, /* Bq */ 209241675Suqs { ARGSFL_DELIM, NULL }, /* Bsx */ 210241675Suqs { ARGSFL_DELIM, NULL }, /* Bx */ 211241675Suqs { ARGSFL_NONE, NULL }, /* Db */ 212241675Suqs { ARGSFL_DELIM, NULL }, /* Dc */ 213241675Suqs { ARGSFL_NONE, NULL }, /* Do */ 214241675Suqs { ARGSFL_DELIM, NULL }, /* Dq */ 215241675Suqs { ARGSFL_DELIM, NULL }, /* Ec */ 216241675Suqs { ARGSFL_NONE, NULL }, /* Ef */ 217241675Suqs { ARGSFL_DELIM, NULL }, /* Em */ 218241675Suqs { ARGSFL_NONE, NULL }, /* Eo */ 219241675Suqs { ARGSFL_DELIM, NULL }, /* Fx */ 220241675Suqs { ARGSFL_DELIM, NULL }, /* Ms */ 221241675Suqs { ARGSFL_DELIM, NULL }, /* No */ 222241675Suqs { ARGSFL_DELIM, NULL }, /* Ns */ 223241675Suqs { ARGSFL_DELIM, NULL }, /* Nx */ 224241675Suqs { ARGSFL_DELIM, NULL }, /* Ox */ 225241675Suqs { ARGSFL_DELIM, NULL }, /* Pc */ 226241675Suqs { ARGSFL_DELIM, NULL }, /* Pf */ 227241675Suqs { ARGSFL_NONE, NULL }, /* Po */ 228241675Suqs { ARGSFL_DELIM, NULL }, /* Pq */ 229241675Suqs { ARGSFL_DELIM, NULL }, /* Qc */ 230241675Suqs { ARGSFL_DELIM, NULL }, /* Ql */ 231241675Suqs { ARGSFL_NONE, NULL }, /* Qo */ 232241675Suqs { ARGSFL_DELIM, NULL }, /* Qq */ 233241675Suqs { ARGSFL_NONE, NULL }, /* Re */ 234241675Suqs { ARGSFL_NONE, NULL }, /* Rs */ 235241675Suqs { ARGSFL_DELIM, NULL }, /* Sc */ 236241675Suqs { ARGSFL_NONE, NULL }, /* So */ 237241675Suqs { ARGSFL_DELIM, NULL }, /* Sq */ 238241675Suqs { ARGSFL_NONE, NULL }, /* Sm */ 239241675Suqs { ARGSFL_DELIM, NULL }, /* Sx */ 240241675Suqs { ARGSFL_DELIM, NULL }, /* Sy */ 241241675Suqs { ARGSFL_DELIM, NULL }, /* Tn */ 242241675Suqs { ARGSFL_DELIM, NULL }, /* Ux */ 243241675Suqs { ARGSFL_DELIM, NULL }, /* Xc */ 244241675Suqs { ARGSFL_NONE, NULL }, /* Xo */ 245241675Suqs { ARGSFL_NONE, NULL }, /* Fo */ 246241675Suqs { ARGSFL_NONE, NULL }, /* Fc */ 247241675Suqs { ARGSFL_NONE, NULL }, /* Oo */ 248241675Suqs { ARGSFL_DELIM, NULL }, /* Oc */ 249241675Suqs { ARGSFL_NONE, args_Bk }, /* Bk */ 250241675Suqs { ARGSFL_NONE, NULL }, /* Ek */ 251241675Suqs { ARGSFL_NONE, NULL }, /* Bt */ 252241675Suqs { ARGSFL_NONE, NULL }, /* Hf */ 253241675Suqs { ARGSFL_NONE, NULL }, /* Fr */ 254241675Suqs { ARGSFL_NONE, NULL }, /* Ud */ 255241675Suqs { ARGSFL_NONE, NULL }, /* Lb */ 256241675Suqs { ARGSFL_NONE, NULL }, /* Lp */ 257241675Suqs { ARGSFL_DELIM, NULL }, /* Lk */ 258241675Suqs { ARGSFL_DELIM, NULL }, /* Mt */ 259241675Suqs { ARGSFL_DELIM, NULL }, /* Brq */ 260241675Suqs { ARGSFL_NONE, NULL }, /* Bro */ 261241675Suqs { ARGSFL_DELIM, NULL }, /* Brc */ 262241675Suqs { ARGSFL_NONE, NULL }, /* %C */ 263241675Suqs { ARGSFL_NONE, NULL }, /* Es */ 264241675Suqs { ARGSFL_NONE, NULL }, /* En */ 265241675Suqs { ARGSFL_NONE, NULL }, /* Dx */ 266241675Suqs { ARGSFL_NONE, NULL }, /* %Q */ 267241675Suqs { ARGSFL_NONE, NULL }, /* br */ 268241675Suqs { ARGSFL_NONE, NULL }, /* sp */ 269241675Suqs { ARGSFL_NONE, NULL }, /* %U */ 270241675Suqs { ARGSFL_NONE, NULL }, /* Ta */ 271241675Suqs}; 272241675Suqs 273241675Suqs 274241675Suqs/* 275241675Suqs * Parse an argument from line text. This comes in the form of -key 276241675Suqs * [value0...], which may either have a single mandatory value, at least 277241675Suqs * one mandatory value, an optional single value, or no value. 278241675Suqs */ 279241675Suqsenum margverr 280241675Suqsmdoc_argv(struct mdoc *m, int line, enum mdoct tok, 281241675Suqs struct mdoc_arg **v, int *pos, char *buf) 282241675Suqs{ 283241675Suqs char *p, sv; 284241675Suqs struct mdoc_argv tmp; 285241675Suqs struct mdoc_arg *arg; 286241675Suqs const enum mdocargt *ap; 287241675Suqs 288241675Suqs if ('\0' == buf[*pos]) 289241675Suqs return(ARGV_EOLN); 290241675Suqs else if (NULL == (ap = mdocargs[tok].argvs)) 291241675Suqs return(ARGV_WORD); 292241675Suqs else if ('-' != buf[*pos]) 293241675Suqs return(ARGV_WORD); 294241675Suqs 295241675Suqs /* Seek to the first unescaped space. */ 296241675Suqs 297241675Suqs p = &buf[++(*pos)]; 298241675Suqs 299241675Suqs assert(*pos > 0); 300241675Suqs 301241675Suqs for ( ; buf[*pos] ; (*pos)++) 302241675Suqs if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) 303241675Suqs break; 304241675Suqs 305241675Suqs /* 306241675Suqs * We want to nil-terminate the word to look it up (it's easier 307241675Suqs * that way). But we may not have a flag, in which case we need 308241675Suqs * to restore the line as-is. So keep around the stray byte, 309241675Suqs * which we'll reset upon exiting (if necessary). 310241675Suqs */ 311241675Suqs 312241675Suqs if ('\0' != (sv = buf[*pos])) 313241675Suqs buf[(*pos)++] = '\0'; 314241675Suqs 315241675Suqs /* 316241675Suqs * Now look up the word as a flag. Use temporary storage that 317241675Suqs * we'll copy into the node's flags, if necessary. 318241675Suqs */ 319241675Suqs 320241675Suqs memset(&tmp, 0, sizeof(struct mdoc_argv)); 321241675Suqs 322241675Suqs tmp.line = line; 323241675Suqs tmp.pos = *pos; 324241675Suqs tmp.arg = MDOC_ARG_MAX; 325241675Suqs 326241675Suqs while (MDOC_ARG_MAX != (tmp.arg = *ap++)) 327241675Suqs if (0 == strcmp(p, mdoc_argnames[tmp.arg])) 328241675Suqs break; 329241675Suqs 330241675Suqs if (MDOC_ARG_MAX == tmp.arg) { 331241675Suqs /* 332241675Suqs * The flag was not found. 333241675Suqs * Restore saved zeroed byte and return as a word. 334241675Suqs */ 335241675Suqs if (sv) 336241675Suqs buf[*pos - 1] = sv; 337241675Suqs return(ARGV_WORD); 338241675Suqs } 339241675Suqs 340241675Suqs /* Read to the next word (the argument). */ 341241675Suqs 342241675Suqs while (buf[*pos] && ' ' == buf[*pos]) 343241675Suqs (*pos)++; 344241675Suqs 345241675Suqs switch (argvflags[tmp.arg]) { 346241675Suqs case (ARGV_SINGLE): 347241675Suqs if ( ! argv_single(m, line, &tmp, pos, buf)) 348241675Suqs return(ARGV_ERROR); 349241675Suqs break; 350241675Suqs case (ARGV_MULTI): 351241675Suqs if ( ! argv_multi(m, line, &tmp, pos, buf)) 352241675Suqs return(ARGV_ERROR); 353241675Suqs break; 354241675Suqs case (ARGV_OPT_SINGLE): 355241675Suqs if ( ! argv_opt_single(m, line, &tmp, pos, buf)) 356241675Suqs return(ARGV_ERROR); 357241675Suqs break; 358241675Suqs case (ARGV_NONE): 359241675Suqs break; 360241675Suqs } 361241675Suqs 362241675Suqs if (NULL == (arg = *v)) 363241675Suqs arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); 364241675Suqs 365241675Suqs arg->argc++; 366241675Suqs arg->argv = mandoc_realloc 367241675Suqs (arg->argv, arg->argc * sizeof(struct mdoc_argv)); 368241675Suqs 369241675Suqs memcpy(&arg->argv[(int)arg->argc - 1], 370241675Suqs &tmp, sizeof(struct mdoc_argv)); 371241675Suqs 372241675Suqs return(ARGV_ARG); 373241675Suqs} 374241675Suqs 375241675Suqsvoid 376241675Suqsmdoc_argv_free(struct mdoc_arg *p) 377241675Suqs{ 378241675Suqs int i; 379241675Suqs 380241675Suqs if (NULL == p) 381241675Suqs return; 382241675Suqs 383241675Suqs if (p->refcnt) { 384241675Suqs --(p->refcnt); 385241675Suqs if (p->refcnt) 386241675Suqs return; 387241675Suqs } 388241675Suqs assert(p->argc); 389241675Suqs 390241675Suqs for (i = (int)p->argc - 1; i >= 0; i--) 391241675Suqs argn_free(p, i); 392241675Suqs 393241675Suqs free(p->argv); 394241675Suqs free(p); 395241675Suqs} 396241675Suqs 397241675Suqsstatic void 398241675Suqsargn_free(struct mdoc_arg *p, int iarg) 399241675Suqs{ 400241675Suqs struct mdoc_argv *arg; 401241675Suqs int j; 402241675Suqs 403241675Suqs arg = &p->argv[iarg]; 404241675Suqs 405241675Suqs if (arg->sz && arg->value) { 406241675Suqs for (j = (int)arg->sz - 1; j >= 0; j--) 407241675Suqs free(arg->value[j]); 408241675Suqs free(arg->value); 409241675Suqs } 410241675Suqs 411241675Suqs for (--p->argc; iarg < (int)p->argc; iarg++) 412241675Suqs p->argv[iarg] = p->argv[iarg+1]; 413241675Suqs} 414241675Suqs 415241675Suqsenum margserr 416241675Suqsmdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v) 417241675Suqs{ 418241675Suqs 419241675Suqs return(args(m, line, pos, buf, ARGSFL_NONE, v)); 420241675Suqs} 421241675Suqs 422241675Suqsenum margserr 423241675Suqsmdoc_args(struct mdoc *m, int line, int *pos, 424241675Suqs char *buf, enum mdoct tok, char **v) 425241675Suqs{ 426241675Suqs enum argsflag fl; 427241675Suqs struct mdoc_node *n; 428241675Suqs 429241675Suqs fl = mdocargs[tok].flags; 430241675Suqs 431241675Suqs if (MDOC_It != tok) 432241675Suqs return(args(m, line, pos, buf, fl, v)); 433241675Suqs 434241675Suqs /* 435241675Suqs * We know that we're in an `It', so it's reasonable to expect 436241675Suqs * us to be sitting in a `Bl'. Someday this may not be the case 437241675Suqs * (if we allow random `It's sitting out there), so provide a 438241675Suqs * safe fall-back into the default behaviour. 439241675Suqs */ 440241675Suqs 441241675Suqs for (n = m->last; n; n = n->parent) 442241675Suqs if (MDOC_Bl == n->tok) 443241675Suqs if (LIST_column == n->norm->Bl.type) { 444241675Suqs fl = ARGSFL_TABSEP; 445241675Suqs break; 446241675Suqs } 447241675Suqs 448241675Suqs return(args(m, line, pos, buf, fl, v)); 449241675Suqs} 450241675Suqs 451241675Suqsstatic enum margserr 452241675Suqsargs(struct mdoc *m, int line, int *pos, 453241675Suqs char *buf, enum argsflag fl, char **v) 454241675Suqs{ 455241675Suqs char *p, *pp; 456241675Suqs enum margserr rc; 457241675Suqs 458241675Suqs if ('\0' == buf[*pos]) { 459241675Suqs if (MDOC_PPHRASE & m->flags) 460241675Suqs return(ARGS_EOLN); 461241675Suqs /* 462241675Suqs * If we're not in a partial phrase and the flag for 463241675Suqs * being a phrase literal is still set, the punctuation 464241675Suqs * is unterminated. 465241675Suqs */ 466241675Suqs if (MDOC_PHRASELIT & m->flags) 467241675Suqs mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); 468241675Suqs 469241675Suqs m->flags &= ~MDOC_PHRASELIT; 470241675Suqs return(ARGS_EOLN); 471241675Suqs } 472241675Suqs 473241675Suqs *v = &buf[*pos]; 474241675Suqs 475241675Suqs if (ARGSFL_DELIM == fl) 476241675Suqs if (args_checkpunct(buf, *pos)) 477241675Suqs return(ARGS_PUNCT); 478241675Suqs 479241675Suqs /* 480241675Suqs * First handle TABSEP items, restricted to `Bl -column'. This 481241675Suqs * ignores conventional token parsing and instead uses tabs or 482241675Suqs * `Ta' macros to separate phrases. Phrases are parsed again 483241675Suqs * for arguments at a later phase. 484241675Suqs */ 485241675Suqs 486241675Suqs if (ARGSFL_TABSEP == fl) { 487241675Suqs /* Scan ahead to tab (can't be escaped). */ 488241675Suqs p = strchr(*v, '\t'); 489241675Suqs pp = NULL; 490241675Suqs 491241675Suqs /* Scan ahead to unescaped `Ta'. */ 492241675Suqs if ( ! (MDOC_PHRASELIT & m->flags)) 493241675Suqs for (pp = *v; ; pp++) { 494241675Suqs if (NULL == (pp = strstr(pp, "Ta"))) 495241675Suqs break; 496241675Suqs if (pp > *v && ' ' != *(pp - 1)) 497241675Suqs continue; 498241675Suqs if (' ' == *(pp + 2) || '\0' == *(pp + 2)) 499241675Suqs break; 500241675Suqs } 501241675Suqs 502241675Suqs /* By default, assume a phrase. */ 503241675Suqs rc = ARGS_PHRASE; 504241675Suqs 505241675Suqs /* 506241675Suqs * Adjust new-buffer position to be beyond delimiter 507241675Suqs * mark (e.g., Ta -> end + 2). 508241675Suqs */ 509241675Suqs if (p && pp) { 510241675Suqs *pos += pp < p ? 2 : 1; 511241675Suqs rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; 512241675Suqs p = pp < p ? pp : p; 513241675Suqs } else if (p && ! pp) { 514241675Suqs rc = ARGS_PPHRASE; 515241675Suqs *pos += 1; 516241675Suqs } else if (pp && ! p) { 517241675Suqs p = pp; 518241675Suqs *pos += 2; 519241675Suqs } else { 520241675Suqs rc = ARGS_PEND; 521241675Suqs p = strchr(*v, 0); 522241675Suqs } 523241675Suqs 524241675Suqs /* Whitespace check for eoln case... */ 525241675Suqs if ('\0' == *p && ' ' == *(p - 1)) 526241675Suqs mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); 527241675Suqs 528241675Suqs *pos += (int)(p - *v); 529241675Suqs 530241675Suqs /* Strip delimiter's preceding whitespace. */ 531241675Suqs pp = p - 1; 532241675Suqs while (pp > *v && ' ' == *pp) { 533241675Suqs if (pp > *v && '\\' == *(pp - 1)) 534241675Suqs break; 535241675Suqs pp--; 536241675Suqs } 537241675Suqs *(pp + 1) = 0; 538241675Suqs 539241675Suqs /* Strip delimiter's proceeding whitespace. */ 540241675Suqs for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) 541241675Suqs /* Skip ahead. */ ; 542241675Suqs 543241675Suqs return(rc); 544241675Suqs } 545241675Suqs 546241675Suqs /* 547241675Suqs * Process a quoted literal. A quote begins with a double-quote 548241675Suqs * and ends with a double-quote NOT preceded by a double-quote. 549241675Suqs * Whitespace is NOT involved in literal termination. 550241675Suqs */ 551241675Suqs 552241675Suqs if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) { 553241675Suqs if ( ! (MDOC_PHRASELIT & m->flags)) 554241675Suqs *v = &buf[++(*pos)]; 555241675Suqs 556241675Suqs if (MDOC_PPHRASE & m->flags) 557241675Suqs m->flags |= MDOC_PHRASELIT; 558241675Suqs 559241675Suqs for ( ; buf[*pos]; (*pos)++) { 560241675Suqs if ('\"' != buf[*pos]) 561241675Suqs continue; 562241675Suqs if ('\"' != buf[*pos + 1]) 563241675Suqs break; 564241675Suqs (*pos)++; 565241675Suqs } 566241675Suqs 567241675Suqs if ('\0' == buf[*pos]) { 568241675Suqs if (MDOC_PPHRASE & m->flags) 569241675Suqs return(ARGS_QWORD); 570241675Suqs mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); 571241675Suqs return(ARGS_QWORD); 572241675Suqs } 573241675Suqs 574241675Suqs m->flags &= ~MDOC_PHRASELIT; 575241675Suqs buf[(*pos)++] = '\0'; 576241675Suqs 577241675Suqs if ('\0' == buf[*pos]) 578241675Suqs return(ARGS_QWORD); 579241675Suqs 580241675Suqs while (' ' == buf[*pos]) 581241675Suqs (*pos)++; 582241675Suqs 583241675Suqs if ('\0' == buf[*pos]) 584241675Suqs mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); 585241675Suqs 586241675Suqs return(ARGS_QWORD); 587241675Suqs } 588241675Suqs 589241675Suqs p = &buf[*pos]; 590241675Suqs *v = mandoc_getarg(m->parse, &p, line, pos); 591241675Suqs 592241675Suqs return(ARGS_WORD); 593241675Suqs} 594241675Suqs 595241675Suqs/* 596241675Suqs * Check if the string consists only of space-separated closing 597241675Suqs * delimiters. This is a bit of a dance: the first must be a close 598241675Suqs * delimiter, but it may be followed by middle delimiters. Arbitrary 599241675Suqs * whitespace may separate these tokens. 600241675Suqs */ 601241675Suqsstatic int 602241675Suqsargs_checkpunct(const char *buf, int i) 603241675Suqs{ 604241675Suqs int j; 605241675Suqs char dbuf[DELIMSZ]; 606241675Suqs enum mdelim d; 607241675Suqs 608241675Suqs /* First token must be a close-delimiter. */ 609241675Suqs 610241675Suqs for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) 611241675Suqs dbuf[j] = buf[i]; 612241675Suqs 613241675Suqs if (DELIMSZ == j) 614241675Suqs return(0); 615241675Suqs 616241675Suqs dbuf[j] = '\0'; 617241675Suqs if (DELIM_CLOSE != mdoc_isdelim(dbuf)) 618241675Suqs return(0); 619241675Suqs 620241675Suqs while (' ' == buf[i]) 621241675Suqs i++; 622241675Suqs 623241675Suqs /* Remaining must NOT be open/none. */ 624241675Suqs 625241675Suqs while (buf[i]) { 626241675Suqs j = 0; 627241675Suqs while (buf[i] && ' ' != buf[i] && j < DELIMSZ) 628241675Suqs dbuf[j++] = buf[i++]; 629241675Suqs 630241675Suqs if (DELIMSZ == j) 631241675Suqs return(0); 632241675Suqs 633241675Suqs dbuf[j] = '\0'; 634241675Suqs d = mdoc_isdelim(dbuf); 635241675Suqs if (DELIM_NONE == d || DELIM_OPEN == d) 636241675Suqs return(0); 637241675Suqs 638241675Suqs while (' ' == buf[i]) 639241675Suqs i++; 640241675Suqs } 641241675Suqs 642241675Suqs return('\0' == buf[i]); 643241675Suqs} 644241675Suqs 645241675Suqsstatic int 646241675Suqsargv_multi(struct mdoc *m, int line, 647241675Suqs struct mdoc_argv *v, int *pos, char *buf) 648241675Suqs{ 649241675Suqs enum margserr ac; 650241675Suqs char *p; 651241675Suqs 652241675Suqs for (v->sz = 0; ; v->sz++) { 653241675Suqs if ('-' == buf[*pos]) 654241675Suqs break; 655241675Suqs ac = args(m, line, pos, buf, ARGSFL_NONE, &p); 656241675Suqs if (ARGS_ERROR == ac) 657241675Suqs return(0); 658241675Suqs else if (ARGS_EOLN == ac) 659241675Suqs break; 660241675Suqs 661241675Suqs if (0 == v->sz % MULTI_STEP) 662241675Suqs v->value = mandoc_realloc(v->value, 663241675Suqs (v->sz + MULTI_STEP) * sizeof(char *)); 664241675Suqs 665241675Suqs v->value[(int)v->sz] = mandoc_strdup(p); 666241675Suqs } 667241675Suqs 668241675Suqs return(1); 669241675Suqs} 670241675Suqs 671241675Suqsstatic int 672241675Suqsargv_opt_single(struct mdoc *m, int line, 673241675Suqs struct mdoc_argv *v, int *pos, char *buf) 674241675Suqs{ 675241675Suqs enum margserr ac; 676241675Suqs char *p; 677241675Suqs 678241675Suqs if ('-' == buf[*pos]) 679241675Suqs return(1); 680241675Suqs 681241675Suqs ac = args(m, line, pos, buf, ARGSFL_NONE, &p); 682241675Suqs if (ARGS_ERROR == ac) 683241675Suqs return(0); 684241675Suqs if (ARGS_EOLN == ac) 685241675Suqs return(1); 686241675Suqs 687241675Suqs v->sz = 1; 688241675Suqs v->value = mandoc_malloc(sizeof(char *)); 689241675Suqs v->value[0] = mandoc_strdup(p); 690241675Suqs 691241675Suqs return(1); 692241675Suqs} 693241675Suqs 694241675Suqsstatic int 695241675Suqsargv_single(struct mdoc *m, int line, 696241675Suqs struct mdoc_argv *v, int *pos, char *buf) 697241675Suqs{ 698241675Suqs int ppos; 699241675Suqs enum margserr ac; 700241675Suqs char *p; 701241675Suqs 702241675Suqs ppos = *pos; 703241675Suqs 704241675Suqs ac = args(m, line, pos, buf, ARGSFL_NONE, &p); 705241675Suqs if (ARGS_EOLN == ac) { 706241675Suqs mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); 707241675Suqs return(0); 708241675Suqs } else if (ARGS_ERROR == ac) 709241675Suqs return(0); 710241675Suqs 711241675Suqs v->sz = 1; 712241675Suqs v->value = mandoc_malloc(sizeof(char *)); 713241675Suqs v->value[0] = mandoc_strdup(p); 714241675Suqs 715241675Suqs return(1); 716241675Suqs} 717