1229997Sken/* $Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */ 2229997Sken/* 3229997Sken * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4229997Sken * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> 5229997Sken * 6229997Sken * Permission to use, copy, modify, and distribute this software for any 7229997Sken * purpose with or without fee is hereby granted, provided that the above 8229997Sken * copyright notice and this permission notice appear in all copies. 9229997Sken * 10229997Sken * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11229997Sken * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12229997Sken * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13229997Sken * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14229997Sken * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15229997Sken * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16229997Sken * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17229997Sken */ 18229997Sken#include "config.h" 19229997Sken 20229997Sken#include <sys/types.h> 21229997Sken 22229997Sken#include <assert.h> 23229997Sken#include <ctype.h> 24229997Sken#include <limits.h> 25229997Sken#include <stdio.h> 26229997Sken#include <stdlib.h> 27229997Sken#include <string.h> 28229997Sken#include <time.h> 29229997Sken 30229997Sken#include "mandoc_aux.h" 31229997Sken#include "mandoc.h" 32229997Sken#include "roff.h" 33229997Sken#include "eqn.h" 34229997Sken#include "libmandoc.h" 35229997Sken#include "eqn_parse.h" 36229997Sken 37229997Sken#define EQN_NEST_MAX 128 /* maximum nesting of defines */ 38229997Sken#define STRNEQ(p1, sz1, p2, sz2) \ 39229997Sken ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 40229997Sken 41229997Skenenum eqn_tok { 42229997Sken EQN_TOK_DYAD = 0, 43229997Sken EQN_TOK_VEC, 44229997Sken EQN_TOK_UNDER, 45229997Sken EQN_TOK_BAR, 46229997Sken EQN_TOK_TILDE, 47229997Sken EQN_TOK_HAT, 48229997Sken EQN_TOK_DOT, 49229997Sken EQN_TOK_DOTDOT, 50229997Sken EQN_TOK_FWD, 51229997Sken EQN_TOK_BACK, 52229997Sken EQN_TOK_DOWN, 53229997Sken EQN_TOK_UP, 54229997Sken EQN_TOK_FAT, 55229997Sken EQN_TOK_ROMAN, 56229997Sken EQN_TOK_ITALIC, 57229997Sken EQN_TOK_BOLD, 58229997Sken EQN_TOK_SIZE, 59229997Sken EQN_TOK_SUB, 60229997Sken EQN_TOK_SUP, 61229997Sken EQN_TOK_SQRT, 62229997Sken EQN_TOK_OVER, 63229997Sken EQN_TOK_FROM, 64229997Sken EQN_TOK_TO, 65229997Sken EQN_TOK_BRACE_OPEN, 66229997Sken EQN_TOK_BRACE_CLOSE, 67229997Sken EQN_TOK_GSIZE, 68229997Sken EQN_TOK_GFONT, 69229997Sken EQN_TOK_MARK, 70229997Sken EQN_TOK_LINEUP, 71229997Sken EQN_TOK_LEFT, 72229997Sken EQN_TOK_RIGHT, 73229997Sken EQN_TOK_PILE, 74229997Sken EQN_TOK_LPILE, 75229997Sken EQN_TOK_RPILE, 76229997Sken EQN_TOK_CPILE, 77229997Sken EQN_TOK_MATRIX, 78229997Sken EQN_TOK_CCOL, 79229997Sken EQN_TOK_LCOL, 80229997Sken EQN_TOK_RCOL, 81229997Sken EQN_TOK_DELIM, 82229997Sken EQN_TOK_DEFINE, 83229997Sken EQN_TOK_TDEFINE, 84229997Sken EQN_TOK_NDEFINE, 85229997Sken EQN_TOK_UNDEF, 86229997Sken EQN_TOK_ABOVE, 87229997Sken EQN_TOK__MAX, 88229997Sken EQN_TOK_FUNC, 89229997Sken EQN_TOK_QUOTED, 90229997Sken EQN_TOK_SYM, 91229997Sken EQN_TOK_EOF 92229997Sken}; 93229997Sken 94229997Skenstatic const char *eqn_toks[EQN_TOK__MAX] = { 95229997Sken "dyad", /* EQN_TOK_DYAD */ 96229997Sken "vec", /* EQN_TOK_VEC */ 97229997Sken "under", /* EQN_TOK_UNDER */ 98229997Sken "bar", /* EQN_TOK_BAR */ 99229997Sken "tilde", /* EQN_TOK_TILDE */ 100229997Sken "hat", /* EQN_TOK_HAT */ 101229997Sken "dot", /* EQN_TOK_DOT */ 102229997Sken "dotdot", /* EQN_TOK_DOTDOT */ 103229997Sken "fwd", /* EQN_TOK_FWD * */ 104229997Sken "back", /* EQN_TOK_BACK */ 105229997Sken "down", /* EQN_TOK_DOWN */ 106229997Sken "up", /* EQN_TOK_UP */ 107229997Sken "fat", /* EQN_TOK_FAT */ 108229997Sken "roman", /* EQN_TOK_ROMAN */ 109229997Sken "italic", /* EQN_TOK_ITALIC */ 110229997Sken "bold", /* EQN_TOK_BOLD */ 111229997Sken "size", /* EQN_TOK_SIZE */ 112229997Sken "sub", /* EQN_TOK_SUB */ 113229997Sken "sup", /* EQN_TOK_SUP */ 114229997Sken "sqrt", /* EQN_TOK_SQRT */ 115229997Sken "over", /* EQN_TOK_OVER */ 116229997Sken "from", /* EQN_TOK_FROM */ 117229997Sken "to", /* EQN_TOK_TO */ 118229997Sken "{", /* EQN_TOK_BRACE_OPEN */ 119229997Sken "}", /* EQN_TOK_BRACE_CLOSE */ 120229997Sken "gsize", /* EQN_TOK_GSIZE */ 121229997Sken "gfont", /* EQN_TOK_GFONT */ 122229997Sken "mark", /* EQN_TOK_MARK */ 123229997Sken "lineup", /* EQN_TOK_LINEUP */ 124229997Sken "left", /* EQN_TOK_LEFT */ 125229997Sken "right", /* EQN_TOK_RIGHT */ 126229997Sken "pile", /* EQN_TOK_PILE */ 127229997Sken "lpile", /* EQN_TOK_LPILE */ 128229997Sken "rpile", /* EQN_TOK_RPILE */ 129229997Sken "cpile", /* EQN_TOK_CPILE */ 130229997Sken "matrix", /* EQN_TOK_MATRIX */ 131229997Sken "ccol", /* EQN_TOK_CCOL */ 132229997Sken "lcol", /* EQN_TOK_LCOL */ 133229997Sken "rcol", /* EQN_TOK_RCOL */ 134229997Sken "delim", /* EQN_TOK_DELIM */ 135229997Sken "define", /* EQN_TOK_DEFINE */ 136229997Sken "tdefine", /* EQN_TOK_TDEFINE */ 137229997Sken "ndefine", /* EQN_TOK_NDEFINE */ 138229997Sken "undef", /* EQN_TOK_UNDEF */ 139229997Sken "above", /* EQN_TOK_ABOVE */ 140229997Sken}; 141229997Sken 142229997Skenstatic const char *const eqn_func[] = { 143229997Sken "acos", "acsc", "and", "arc", "asec", "asin", "atan", 144229997Sken "cos", "cosh", "coth", "csc", "det", "exp", "for", 145229997Sken "if", "lim", "ln", "log", "max", "min", 146229997Sken "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 147229997Sken}; 148229997Sken 149229997Skenenum eqn_symt { 150229997Sken EQNSYM_alpha = 0, 151229997Sken EQNSYM_beta, 152229997Sken EQNSYM_chi, 153229997Sken EQNSYM_delta, 154229997Sken EQNSYM_epsilon, 155229997Sken EQNSYM_eta, 156229997Sken EQNSYM_gamma, 157229997Sken EQNSYM_iota, 158229997Sken EQNSYM_kappa, 159229997Sken EQNSYM_lambda, 160229997Sken EQNSYM_mu, 161229997Sken EQNSYM_nu, 162229997Sken EQNSYM_omega, 163229997Sken EQNSYM_omicron, 164229997Sken EQNSYM_phi, 165229997Sken EQNSYM_pi, 166229997Sken EQNSYM_ps, 167229997Sken EQNSYM_rho, 168229997Sken EQNSYM_sigma, 169229997Sken EQNSYM_tau, 170229997Sken EQNSYM_theta, 171229997Sken EQNSYM_upsilon, 172229997Sken EQNSYM_xi, 173229997Sken EQNSYM_zeta, 174229997Sken EQNSYM_DELTA, 175229997Sken EQNSYM_GAMMA, 176229997Sken EQNSYM_LAMBDA, 177229997Sken EQNSYM_OMEGA, 178229997Sken EQNSYM_PHI, 179229997Sken EQNSYM_PI, 180229997Sken EQNSYM_PSI, 181229997Sken EQNSYM_SIGMA, 182229997Sken EQNSYM_THETA, 183229997Sken EQNSYM_UPSILON, 184229997Sken EQNSYM_XI, 185229997Sken EQNSYM_inter, 186229997Sken EQNSYM_union, 187229997Sken EQNSYM_prod, 188229997Sken EQNSYM_int, 189229997Sken EQNSYM_sum, 190229997Sken EQNSYM_grad, 191229997Sken EQNSYM_del, 192229997Sken EQNSYM_times, 193229997Sken EQNSYM_cdot, 194229997Sken EQNSYM_nothing, 195229997Sken EQNSYM_approx, 196229997Sken EQNSYM_prime, 197229997Sken EQNSYM_half, 198229997Sken EQNSYM_partial, 199229997Sken EQNSYM_inf, 200229997Sken EQNSYM_muchgreat, 201229997Sken EQNSYM_muchless, 202229997Sken EQNSYM_larrow, 203229997Sken EQNSYM_rarrow, 204229997Sken EQNSYM_pm, 205229997Sken EQNSYM_nequal, 206229997Sken EQNSYM_equiv, 207229997Sken EQNSYM_lessequal, 208229997Sken EQNSYM_moreequal, 209229997Sken EQNSYM_minus, 210229997Sken EQNSYM__MAX 211229997Sken}; 212229997Sken 213229997Skenstruct eqnsym { 214229997Sken const char *str; 215229997Sken const char *sym; 216229997Sken}; 217229997Sken 218229997Skenstatic const struct eqnsym eqnsyms[EQNSYM__MAX] = { 219229997Sken { "alpha", "*a" }, /* EQNSYM_alpha */ 220229997Sken { "beta", "*b" }, /* EQNSYM_beta */ 221229997Sken { "chi", "*x" }, /* EQNSYM_chi */ 222229997Sken { "delta", "*d" }, /* EQNSYM_delta */ 223229997Sken { "epsilon", "*e" }, /* EQNSYM_epsilon */ 224229997Sken { "eta", "*y" }, /* EQNSYM_eta */ 225229997Sken { "gamma", "*g" }, /* EQNSYM_gamma */ 226229997Sken { "iota", "*i" }, /* EQNSYM_iota */ 227229997Sken { "kappa", "*k" }, /* EQNSYM_kappa */ 228229997Sken { "lambda", "*l" }, /* EQNSYM_lambda */ 229229997Sken { "mu", "*m" }, /* EQNSYM_mu */ 230229997Sken { "nu", "*n" }, /* EQNSYM_nu */ 231229997Sken { "omega", "*w" }, /* EQNSYM_omega */ 232229997Sken { "omicron", "*o" }, /* EQNSYM_omicron */ 233229997Sken { "phi", "*f" }, /* EQNSYM_phi */ 234229997Sken { "pi", "*p" }, /* EQNSYM_pi */ 235229997Sken { "psi", "*q" }, /* EQNSYM_psi */ 236229997Sken { "rho", "*r" }, /* EQNSYM_rho */ 237229997Sken { "sigma", "*s" }, /* EQNSYM_sigma */ 238229997Sken { "tau", "*t" }, /* EQNSYM_tau */ 239229997Sken { "theta", "*h" }, /* EQNSYM_theta */ 240229997Sken { "upsilon", "*u" }, /* EQNSYM_upsilon */ 241229997Sken { "xi", "*c" }, /* EQNSYM_xi */ 242229997Sken { "zeta", "*z" }, /* EQNSYM_zeta */ 243229997Sken { "DELTA", "*D" }, /* EQNSYM_DELTA */ 244229997Sken { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 245229997Sken { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 246229997Sken { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 247229997Sken { "PHI", "*F" }, /* EQNSYM_PHI */ 248229997Sken { "PI", "*P" }, /* EQNSYM_PI */ 249229997Sken { "PSI", "*Q" }, /* EQNSYM_PSI */ 250229997Sken { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 251229997Sken { "THETA", "*H" }, /* EQNSYM_THETA */ 252229997Sken { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 253229997Sken { "XI", "*C" }, /* EQNSYM_XI */ 254229997Sken { "inter", "ca" }, /* EQNSYM_inter */ 255229997Sken { "union", "cu" }, /* EQNSYM_union */ 256229997Sken { "prod", "product" }, /* EQNSYM_prod */ 257229997Sken { "int", "integral" }, /* EQNSYM_int */ 258229997Sken { "sum", "sum" }, /* EQNSYM_sum */ 259229997Sken { "grad", "gr" }, /* EQNSYM_grad */ 260229997Sken { "del", "gr" }, /* EQNSYM_del */ 261229997Sken { "times", "mu" }, /* EQNSYM_times */ 262229997Sken { "cdot", "pc" }, /* EQNSYM_cdot */ 263229997Sken { "nothing", "&" }, /* EQNSYM_nothing */ 264229997Sken { "approx", "~~" }, /* EQNSYM_approx */ 265229997Sken { "prime", "fm" }, /* EQNSYM_prime */ 266229997Sken { "half", "12" }, /* EQNSYM_half */ 267229997Sken { "partial", "pd" }, /* EQNSYM_partial */ 268229997Sken { "inf", "if" }, /* EQNSYM_inf */ 269229997Sken { ">>", ">>" }, /* EQNSYM_muchgreat */ 270229997Sken { "<<", "<<" }, /* EQNSYM_muchless */ 271229997Sken { "<-", "<-" }, /* EQNSYM_larrow */ 272229997Sken { "->", "->" }, /* EQNSYM_rarrow */ 273229997Sken { "+-", "+-" }, /* EQNSYM_pm */ 274229997Sken { "!=", "!=" }, /* EQNSYM_nequal */ 275229997Sken { "==", "==" }, /* EQNSYM_equiv */ 276229997Sken { "<=", "<=" }, /* EQNSYM_lessequal */ 277229997Sken { ">=", ">=" }, /* EQNSYM_moreequal */ 278229997Sken { "-", "mi" }, /* EQNSYM_minus */ 279229997Sken}; 280229997Sken 281229997Skenenum parse_mode { 282229997Sken MODE_QUOTED, 283229997Sken MODE_NOSUB, 284229997Sken MODE_SUB, 285229997Sken MODE_TOK 286229997Sken}; 287229997Sken 288229997Skenstruct eqn_def { 289229997Sken char *key; 290229997Sken size_t keysz; 291229997Sken char *val; 292229997Sken size_t valsz; 293229997Sken}; 294229997Sken 295229997Skenstatic struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 296229997Skenstatic struct eqn_box *eqn_box_makebinary(struct eqn_node *, 297229997Sken struct eqn_box *); 298229997Skenstatic void eqn_def(struct eqn_node *); 299229997Skenstatic struct eqn_def *eqn_def_find(struct eqn_node *); 300229997Skenstatic void eqn_delim(struct eqn_node *); 301229997Skenstatic enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 302229997Skenstatic void eqn_undef(struct eqn_node *); 303229997Sken 304229997Sken 305229997Skenstruct eqn_node * 306229997Skeneqn_alloc(void) 307229997Sken{ 308229997Sken struct eqn_node *ep; 309229997Sken 310229997Sken ep = mandoc_calloc(1, sizeof(*ep)); 311229997Sken ep->gsize = EQN_DEFSIZE; 312229997Sken return ep; 313229997Sken} 314229997Sken 315229997Skenvoid 316229997Skeneqn_reset(struct eqn_node *ep) 317229997Sken{ 318229997Sken free(ep->data); 319229997Sken ep->data = ep->start = ep->end = NULL; 320229997Sken ep->sz = ep->toksz = 0; 321229997Sken} 322229997Sken 323229997Skenvoid 324229997Skeneqn_read(struct eqn_node *ep, const char *p) 325229997Sken{ 326229997Sken char *cp; 327229997Sken 328229997Sken if (ep->data == NULL) { 329229997Sken ep->sz = strlen(p); 330229997Sken ep->data = mandoc_strdup(p); 331229997Sken } else { 332229997Sken ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 333229997Sken free(ep->data); 334229997Sken ep->data = cp; 335229997Sken } 336229997Sken ep->sz += 1; 337229997Sken} 338229997Sken 339229997Sken/* 340229997Sken * Find the key "key" of the give size within our eqn-defined values. 341229997Sken */ 342229997Skenstatic struct eqn_def * 343229997Skeneqn_def_find(struct eqn_node *ep) 344229997Sken{ 345229997Sken int i; 346229997Sken 347229997Sken for (i = 0; i < (int)ep->defsz; i++) 348229997Sken if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 349229997Sken ep->defs[i].keysz, ep->start, ep->toksz)) 350229997Sken return &ep->defs[i]; 351229997Sken 352229997Sken return NULL; 353229997Sken} 354229997Sken 355229997Sken/* 356229997Sken * Parse a token from the input text. The modes are: 357229997Sken * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 358229997Sken * before its next occurence. Do not interpret the token in any 359229997Sken * way and return EQN_TOK_QUOTED. All other modes behave like 360229997Sken * MODE_QUOTED when *ep->start is '"'. 361229997Sken * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 362229997Sken * otherwise, it ends before the next whitespace or brace. 363229997Sken * Do not interpret the token and return EQN_TOK__MAX. 364229997Sken * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 365229997Sken * alias created with define. If it is an alias, replace it with 366229997Sken * its string value and reparse. 367229997Sken * MODE_TOK: Like MODE_SUB, but also check the token against the list 368229997Sken * of tokens, and if there is a match, return that token. Otherwise, 369229997Sken * if the token matches a symbol, return EQN_TOK_SYM; if it matches 370229997Sken * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 371229997Sken * a token match, *ep->start is set to an allocated string that the 372229997Sken * caller is expected to free. 373229997Sken * All modes skip whitespace following the end of the token. 374229997Sken */ 375229997Skenstatic enum eqn_tok 376229997Skeneqn_next(struct eqn_node *ep, enum parse_mode mode) 377229997Sken{ 378229997Sken static int last_len, lim; 379229997Sken 380229997Sken struct eqn_def *def; 381229997Sken size_t start; 382229997Sken int diff, i, quoted; 383229997Sken enum eqn_tok tok; 384229997Sken 385229997Sken /* 386229997Sken * Reset the recursion counter after advancing 387229997Sken * beyond the end of the previous substitution. 388229997Sken */ 389229997Sken if (ep->end - ep->data >= last_len) 390229997Sken lim = 0; 391229997Sken 392229997Sken ep->start = ep->end; 393229997Sken quoted = mode == MODE_QUOTED; 394229997Sken for (;;) { 395229997Sken switch (*ep->start) { 396229997Sken case '\0': 397229997Sken ep->toksz = 0; 398229997Sken return EQN_TOK_EOF; 399229997Sken case '"': 400229997Sken quoted = 1; 401229997Sken break; 402229997Sken case ' ': 403229997Sken case '\t': 404229997Sken case '~': 405229997Sken case '^': 406229997Sken if (quoted) 407229997Sken break; 408229997Sken ep->start++; 409229997Sken continue; 410229997Sken default: 411229997Sken break; 412229997Sken } 413229997Sken if (quoted) { 414229997Sken ep->end = strchr(ep->start + 1, *ep->start); 415229997Sken ep->start++; /* Skip opening quote. */ 416229997Sken if (ep->end == NULL) { 417229997Sken mandoc_msg(MANDOCERR_ARG_QUOTE, 418229997Sken ep->node->line, ep->node->pos, NULL); 419229997Sken ep->end = strchr(ep->start, '\0'); 420229997Sken } 421229997Sken } else { 422229997Sken ep->end = ep->start + 1; 423229997Sken if (*ep->start != '{' && *ep->start != '}') 424229997Sken ep->end += strcspn(ep->end, " ^~\"{}\t"); 425229997Sken } 426229997Sken ep->toksz = ep->end - ep->start; 427229997Sken if (quoted && *ep->end != '\0') 428229997Sken ep->end++; /* Skip closing quote. */ 429229997Sken while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 430229997Sken ep->end++; 431229997Sken if (quoted) /* Cannot return, may have to strndup. */ 432229997Sken break; 433229997Sken if (mode == MODE_NOSUB) 434229997Sken return EQN_TOK__MAX; 435229997Sken if ((def = eqn_def_find(ep)) == NULL) 436229997Sken break; 437229997Sken if (++lim > EQN_NEST_MAX) { 438229997Sken mandoc_msg(MANDOCERR_ROFFLOOP, 439229997Sken ep->node->line, ep->node->pos, NULL); 440229997Sken return EQN_TOK_EOF; 441229997Sken } 442229997Sken 443229997Sken /* Replace a defined name with its string value. */ 444229997Sken if ((diff = def->valsz - ep->toksz) > 0) { 445229997Sken start = ep->start - ep->data; 446229997Sken ep->sz += diff; 447229997Sken ep->data = mandoc_realloc(ep->data, ep->sz + 1); 448229997Sken ep->start = ep->data + start; 449229997Sken } 450229997Sken if (diff) 451229997Sken memmove(ep->start + def->valsz, ep->start + ep->toksz, 452229997Sken strlen(ep->start + ep->toksz) + 1); 453229997Sken memcpy(ep->start, def->val, def->valsz); 454229997Sken last_len = ep->start - ep->data + def->valsz; 455229997Sken } 456229997Sken if (mode != MODE_TOK) 457229997Sken return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 458229997Sken if (quoted) { 459229997Sken ep->start = mandoc_strndup(ep->start, ep->toksz); 460229997Sken return EQN_TOK_QUOTED; 461229997Sken } 462229997Sken for (tok = 0; tok < EQN_TOK__MAX; tok++) 463229997Sken if (STRNEQ(ep->start, ep->toksz, 464229997Sken eqn_toks[tok], strlen(eqn_toks[tok]))) 465229997Sken return tok; 466229997Sken 467229997Sken for (i = 0; i < EQNSYM__MAX; i++) { 468229997Sken if (STRNEQ(ep->start, ep->toksz, 469229997Sken eqnsyms[i].str, strlen(eqnsyms[i].str))) { 470229997Sken mandoc_asprintf(&ep->start, 471229997Sken "\\[%s]", eqnsyms[i].sym); 472229997Sken return EQN_TOK_SYM; 473229997Sken } 474229997Sken } 475 ep->start = mandoc_strndup(ep->start, ep->toksz); 476 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 477 if (STRNEQ(ep->start, ep->toksz, 478 eqn_func[i], strlen(eqn_func[i]))) 479 return EQN_TOK_FUNC; 480 return EQN_TOK__MAX; 481} 482 483void 484eqn_box_free(struct eqn_box *bp) 485{ 486 if (bp == NULL) 487 return; 488 489 if (bp->first) 490 eqn_box_free(bp->first); 491 if (bp->next) 492 eqn_box_free(bp->next); 493 494 free(bp->text); 495 free(bp->left); 496 free(bp->right); 497 free(bp->top); 498 free(bp->bottom); 499 free(bp); 500} 501 502struct eqn_box * 503eqn_box_new(void) 504{ 505 struct eqn_box *bp; 506 507 bp = mandoc_calloc(1, sizeof(*bp)); 508 bp->expectargs = UINT_MAX; 509 return bp; 510} 511 512/* 513 * Allocate a box as the last child of the parent node. 514 */ 515static struct eqn_box * 516eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 517{ 518 struct eqn_box *bp; 519 520 bp = eqn_box_new(); 521 bp->parent = parent; 522 bp->parent->args++; 523 bp->font = bp->parent->font; 524 bp->size = ep->gsize; 525 526 if (NULL != parent->first) { 527 parent->last->next = bp; 528 bp->prev = parent->last; 529 } else 530 parent->first = bp; 531 532 parent->last = bp; 533 return bp; 534} 535 536/* 537 * Reparent the current last node (of the current parent) under a new 538 * EQN_SUBEXPR as the first element. 539 * Then return the new parent. 540 * The new EQN_SUBEXPR will have a two-child limit. 541 */ 542static struct eqn_box * 543eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 544{ 545 struct eqn_box *b, *newb; 546 547 assert(NULL != parent->last); 548 b = parent->last; 549 if (parent->last == parent->first) 550 parent->first = NULL; 551 parent->args--; 552 parent->last = b->prev; 553 b->prev = NULL; 554 newb = eqn_box_alloc(ep, parent); 555 newb->type = EQN_SUBEXPR; 556 newb->expectargs = 2; 557 newb->args = 1; 558 newb->first = newb->last = b; 559 newb->first->next = NULL; 560 b->parent = newb; 561 return newb; 562} 563 564/* 565 * Parse the "delim" control statement. 566 */ 567static void 568eqn_delim(struct eqn_node *ep) 569{ 570 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 571 mandoc_msg(MANDOCERR_REQ_EMPTY, 572 ep->node->line, ep->node->pos, "delim"); 573 if (ep->end[0] != '\0') 574 ep->end++; 575 } else if (strncmp(ep->end, "off", 3) == 0) { 576 ep->delim = 0; 577 ep->end += 3; 578 } else if (strncmp(ep->end, "on", 2) == 0) { 579 if (ep->odelim && ep->cdelim) 580 ep->delim = 1; 581 ep->end += 2; 582 } else { 583 ep->odelim = *ep->end++; 584 ep->cdelim = *ep->end++; 585 ep->delim = 1; 586 } 587} 588 589/* 590 * Undefine a previously-defined string. 591 */ 592static void 593eqn_undef(struct eqn_node *ep) 594{ 595 struct eqn_def *def; 596 597 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 598 mandoc_msg(MANDOCERR_REQ_EMPTY, 599 ep->node->line, ep->node->pos, "undef"); 600 return; 601 } 602 if ((def = eqn_def_find(ep)) == NULL) 603 return; 604 free(def->key); 605 free(def->val); 606 def->key = def->val = NULL; 607 def->keysz = def->valsz = 0; 608} 609 610static void 611eqn_def(struct eqn_node *ep) 612{ 613 struct eqn_def *def; 614 int i; 615 616 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 617 mandoc_msg(MANDOCERR_REQ_EMPTY, 618 ep->node->line, ep->node->pos, "define"); 619 return; 620 } 621 622 /* 623 * Search for a key that already exists. 624 * Create a new key if none is found. 625 */ 626 if ((def = eqn_def_find(ep)) == NULL) { 627 /* Find holes in string array. */ 628 for (i = 0; i < (int)ep->defsz; i++) 629 if (0 == ep->defs[i].keysz) 630 break; 631 632 if (i == (int)ep->defsz) { 633 ep->defsz++; 634 ep->defs = mandoc_reallocarray(ep->defs, 635 ep->defsz, sizeof(struct eqn_def)); 636 ep->defs[i].key = ep->defs[i].val = NULL; 637 } 638 639 def = ep->defs + i; 640 free(def->key); 641 def->key = mandoc_strndup(ep->start, ep->toksz); 642 def->keysz = ep->toksz; 643 } 644 645 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 646 mandoc_msg(MANDOCERR_REQ_EMPTY, 647 ep->node->line, ep->node->pos, "define %s", def->key); 648 free(def->key); 649 free(def->val); 650 def->key = def->val = NULL; 651 def->keysz = def->valsz = 0; 652 return; 653 } 654 free(def->val); 655 def->val = mandoc_strndup(ep->start, ep->toksz); 656 def->valsz = ep->toksz; 657} 658 659void 660eqn_parse(struct eqn_node *ep) 661{ 662 struct eqn_box *cur, *nbox, *parent, *split; 663 const char *cp, *cpn; 664 char *p; 665 enum eqn_tok tok; 666 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 667 int size; 668 669 parent = ep->node->eqn; 670 assert(parent != NULL); 671 672 /* 673 * Empty equation. 674 * Do not add it to the high-level syntax tree. 675 */ 676 677 if (ep->data == NULL) 678 return; 679 680 ep->start = ep->end = ep->data; 681 682next_tok: 683 tok = eqn_next(ep, MODE_TOK); 684 switch (tok) { 685 case EQN_TOK_UNDEF: 686 eqn_undef(ep); 687 break; 688 case EQN_TOK_NDEFINE: 689 case EQN_TOK_DEFINE: 690 eqn_def(ep); 691 break; 692 case EQN_TOK_TDEFINE: 693 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 694 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 695 mandoc_msg(MANDOCERR_REQ_EMPTY, 696 ep->node->line, ep->node->pos, "tdefine"); 697 break; 698 case EQN_TOK_DELIM: 699 eqn_delim(ep); 700 break; 701 case EQN_TOK_GFONT: 702 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 703 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 704 ep->node->pos, "%s", eqn_toks[tok]); 705 break; 706 case EQN_TOK_MARK: 707 case EQN_TOK_LINEUP: 708 /* Ignore these. */ 709 break; 710 case EQN_TOK_DYAD: 711 case EQN_TOK_VEC: 712 case EQN_TOK_UNDER: 713 case EQN_TOK_BAR: 714 case EQN_TOK_TILDE: 715 case EQN_TOK_HAT: 716 case EQN_TOK_DOT: 717 case EQN_TOK_DOTDOT: 718 if (parent->last == NULL) { 719 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 720 ep->node->pos, "%s", eqn_toks[tok]); 721 cur = eqn_box_alloc(ep, parent); 722 cur->type = EQN_TEXT; 723 cur->text = mandoc_strdup(""); 724 } 725 parent = eqn_box_makebinary(ep, parent); 726 parent->type = EQN_LIST; 727 parent->expectargs = 1; 728 parent->font = EQNFONT_ROMAN; 729 switch (tok) { 730 case EQN_TOK_DOTDOT: 731 parent->top = mandoc_strdup("\\[ad]"); 732 break; 733 case EQN_TOK_VEC: 734 parent->top = mandoc_strdup("\\[->]"); 735 break; 736 case EQN_TOK_DYAD: 737 parent->top = mandoc_strdup("\\[<>]"); 738 break; 739 case EQN_TOK_TILDE: 740 parent->top = mandoc_strdup("\\[a~]"); 741 break; 742 case EQN_TOK_UNDER: 743 parent->bottom = mandoc_strdup("\\[ul]"); 744 break; 745 case EQN_TOK_BAR: 746 parent->top = mandoc_strdup("\\[rn]"); 747 break; 748 case EQN_TOK_DOT: 749 parent->top = mandoc_strdup("\\[a.]"); 750 break; 751 case EQN_TOK_HAT: 752 parent->top = mandoc_strdup("\\[ha]"); 753 break; 754 default: 755 abort(); 756 } 757 parent = parent->parent; 758 break; 759 case EQN_TOK_FWD: 760 case EQN_TOK_BACK: 761 case EQN_TOK_DOWN: 762 case EQN_TOK_UP: 763 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 764 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 765 ep->node->pos, "%s", eqn_toks[tok]); 766 break; 767 case EQN_TOK_FAT: 768 case EQN_TOK_ROMAN: 769 case EQN_TOK_ITALIC: 770 case EQN_TOK_BOLD: 771 while (parent->args == parent->expectargs) 772 parent = parent->parent; 773 /* 774 * These values apply to the next word or sequence of 775 * words; thus, we mark that we'll have a child with 776 * exactly one of those. 777 */ 778 parent = eqn_box_alloc(ep, parent); 779 parent->type = EQN_LIST; 780 parent->expectargs = 1; 781 switch (tok) { 782 case EQN_TOK_FAT: 783 parent->font = EQNFONT_FAT; 784 break; 785 case EQN_TOK_ROMAN: 786 parent->font = EQNFONT_ROMAN; 787 break; 788 case EQN_TOK_ITALIC: 789 parent->font = EQNFONT_ITALIC; 790 break; 791 case EQN_TOK_BOLD: 792 parent->font = EQNFONT_BOLD; 793 break; 794 default: 795 abort(); 796 } 797 break; 798 case EQN_TOK_SIZE: 799 case EQN_TOK_GSIZE: 800 /* Accept two values: integral size and a single. */ 801 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 802 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 803 ep->node->pos, "%s", eqn_toks[tok]); 804 break; 805 } 806 size = mandoc_strntoi(ep->start, ep->toksz, 10); 807 if (-1 == size) { 808 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 809 ep->node->pos, "%s", eqn_toks[tok]); 810 break; 811 } 812 if (EQN_TOK_GSIZE == tok) { 813 ep->gsize = size; 814 break; 815 } 816 while (parent->args == parent->expectargs) 817 parent = parent->parent; 818 parent = eqn_box_alloc(ep, parent); 819 parent->type = EQN_LIST; 820 parent->expectargs = 1; 821 parent->size = size; 822 break; 823 case EQN_TOK_FROM: 824 case EQN_TOK_TO: 825 case EQN_TOK_SUB: 826 case EQN_TOK_SUP: 827 /* 828 * We have a left-right-associative expression. 829 * Repivot under a positional node, open a child scope 830 * and keep on reading. 831 */ 832 if (parent->last == NULL) { 833 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 834 ep->node->pos, "%s", eqn_toks[tok]); 835 cur = eqn_box_alloc(ep, parent); 836 cur->type = EQN_TEXT; 837 cur->text = mandoc_strdup(""); 838 } 839 while (parent->expectargs == 1 && parent->args == 1) 840 parent = parent->parent; 841 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 842 for (cur = parent; cur != NULL; cur = cur->parent) 843 if (cur->pos == EQNPOS_SUB || 844 cur->pos == EQNPOS_SUP || 845 cur->pos == EQNPOS_SUBSUP || 846 cur->pos == EQNPOS_SQRT || 847 cur->pos == EQNPOS_OVER) 848 break; 849 if (cur != NULL) 850 parent = cur->parent; 851 } 852 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 853 parent->expectargs = 3; 854 parent->pos = EQNPOS_SUBSUP; 855 break; 856 } 857 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 858 parent->expectargs = 3; 859 parent->pos = EQNPOS_FROMTO; 860 break; 861 } 862 parent = eqn_box_makebinary(ep, parent); 863 switch (tok) { 864 case EQN_TOK_FROM: 865 parent->pos = EQNPOS_FROM; 866 break; 867 case EQN_TOK_TO: 868 parent->pos = EQNPOS_TO; 869 break; 870 case EQN_TOK_SUP: 871 parent->pos = EQNPOS_SUP; 872 break; 873 case EQN_TOK_SUB: 874 parent->pos = EQNPOS_SUB; 875 break; 876 default: 877 abort(); 878 } 879 break; 880 case EQN_TOK_SQRT: 881 while (parent->args == parent->expectargs) 882 parent = parent->parent; 883 /* 884 * Accept a left-right-associative set of arguments just 885 * like sub and sup and friends but without rebalancing 886 * under a pivot. 887 */ 888 parent = eqn_box_alloc(ep, parent); 889 parent->type = EQN_SUBEXPR; 890 parent->pos = EQNPOS_SQRT; 891 parent->expectargs = 1; 892 break; 893 case EQN_TOK_OVER: 894 /* 895 * We have a right-left-associative fraction. 896 * Close out anything that's currently open, then 897 * rebalance and continue reading. 898 */ 899 if (parent->last == NULL) { 900 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 901 ep->node->pos, "%s", eqn_toks[tok]); 902 cur = eqn_box_alloc(ep, parent); 903 cur->type = EQN_TEXT; 904 cur->text = mandoc_strdup(""); 905 } 906 while (parent->args == parent->expectargs) 907 parent = parent->parent; 908 while (EQN_SUBEXPR == parent->type) 909 parent = parent->parent; 910 parent = eqn_box_makebinary(ep, parent); 911 parent->pos = EQNPOS_OVER; 912 break; 913 case EQN_TOK_RIGHT: 914 case EQN_TOK_BRACE_CLOSE: 915 /* 916 * Close out the existing brace. 917 * FIXME: this is a shitty sentinel: we should really 918 * have a native EQN_BRACE type or whatnot. 919 */ 920 for (cur = parent; cur != NULL; cur = cur->parent) 921 if (cur->type == EQN_LIST && 922 cur->expectargs > 1 && 923 (tok == EQN_TOK_BRACE_CLOSE || 924 cur->left != NULL)) 925 break; 926 if (cur == NULL) { 927 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 928 ep->node->pos, "%s", eqn_toks[tok]); 929 break; 930 } 931 parent = cur; 932 if (EQN_TOK_RIGHT == tok) { 933 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 934 mandoc_msg(MANDOCERR_REQ_EMPTY, 935 ep->node->line, ep->node->pos, 936 "%s", eqn_toks[tok]); 937 break; 938 } 939 /* Handling depends on right/left. */ 940 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 941 parent->right = mandoc_strdup("\\[rc]"); 942 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 943 parent->right = mandoc_strdup("\\[rf]"); 944 else 945 parent->right = 946 mandoc_strndup(ep->start, ep->toksz); 947 } 948 parent = parent->parent; 949 if (tok == EQN_TOK_BRACE_CLOSE && 950 (parent->type == EQN_PILE || 951 parent->type == EQN_MATRIX)) 952 parent = parent->parent; 953 /* Close out any "singleton" lists. */ 954 while (parent->type == EQN_LIST && 955 parent->expectargs == 1 && 956 parent->args == 1) 957 parent = parent->parent; 958 break; 959 case EQN_TOK_BRACE_OPEN: 960 case EQN_TOK_LEFT: 961 /* 962 * If we already have something in the stack and we're 963 * in an expression, then rewind til we're not any more 964 * (just like with the text node). 965 */ 966 while (parent->args == parent->expectargs) 967 parent = parent->parent; 968 if (EQN_TOK_LEFT == tok && 969 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 970 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 971 ep->node->pos, "%s", eqn_toks[tok]); 972 break; 973 } 974 parent = eqn_box_alloc(ep, parent); 975 parent->type = EQN_LIST; 976 if (EQN_TOK_LEFT == tok) { 977 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 978 parent->left = mandoc_strdup("\\[lc]"); 979 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 980 parent->left = mandoc_strdup("\\[lf]"); 981 else 982 parent->left = 983 mandoc_strndup(ep->start, ep->toksz); 984 } 985 break; 986 case EQN_TOK_PILE: 987 case EQN_TOK_LPILE: 988 case EQN_TOK_RPILE: 989 case EQN_TOK_CPILE: 990 case EQN_TOK_CCOL: 991 case EQN_TOK_LCOL: 992 case EQN_TOK_RCOL: 993 while (parent->args == parent->expectargs) 994 parent = parent->parent; 995 parent = eqn_box_alloc(ep, parent); 996 parent->type = EQN_PILE; 997 parent->expectargs = 1; 998 break; 999 case EQN_TOK_ABOVE: 1000 for (cur = parent; cur != NULL; cur = cur->parent) 1001 if (cur->type == EQN_PILE) 1002 break; 1003 if (cur == NULL) { 1004 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 1005 ep->node->pos, "%s", eqn_toks[tok]); 1006 break; 1007 } 1008 parent = eqn_box_alloc(ep, cur); 1009 parent->type = EQN_LIST; 1010 break; 1011 case EQN_TOK_MATRIX: 1012 while (parent->args == parent->expectargs) 1013 parent = parent->parent; 1014 parent = eqn_box_alloc(ep, parent); 1015 parent->type = EQN_MATRIX; 1016 parent->expectargs = 1; 1017 break; 1018 case EQN_TOK_EOF: 1019 return; 1020 case EQN_TOK__MAX: 1021 case EQN_TOK_FUNC: 1022 case EQN_TOK_QUOTED: 1023 case EQN_TOK_SYM: 1024 p = ep->start; 1025 assert(p != NULL); 1026 /* 1027 * If we already have something in the stack and we're 1028 * in an expression, then rewind til we're not any more. 1029 */ 1030 while (parent->args == parent->expectargs) 1031 parent = parent->parent; 1032 cur = eqn_box_alloc(ep, parent); 1033 cur->type = EQN_TEXT; 1034 cur->text = p; 1035 switch (tok) { 1036 case EQN_TOK_FUNC: 1037 cur->font = EQNFONT_ROMAN; 1038 break; 1039 case EQN_TOK_QUOTED: 1040 if (cur->font == EQNFONT_NONE) 1041 cur->font = EQNFONT_ITALIC; 1042 break; 1043 case EQN_TOK_SYM: 1044 break; 1045 default: 1046 if (cur->font != EQNFONT_NONE || *p == '\0') 1047 break; 1048 cpn = p - 1; 1049 ccln = CCL_LET; 1050 split = NULL; 1051 for (;;) { 1052 /* Advance to next character. */ 1053 cp = cpn++; 1054 ccl = ccln; 1055 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1056 isdigit((unsigned char)*cpn) || 1057 (*cpn == '.' && (ccl == CCL_DIG || 1058 isdigit((unsigned char)cpn[1]))) ? 1059 CCL_DIG : CCL_PUN; 1060 /* No boundary before first character. */ 1061 if (cp < p) 1062 continue; 1063 cur->font = ccl == CCL_LET ? 1064 EQNFONT_ITALIC : EQNFONT_ROMAN; 1065 if (*cp == '\\') 1066 mandoc_escape(&cpn, NULL, NULL); 1067 /* No boundary after last character. */ 1068 if (*cpn == '\0') 1069 break; 1070 if (ccln == ccl && *cp != ',' && *cpn != ',') 1071 continue; 1072 /* Boundary found, split the text. */ 1073 if (parent->args == parent->expectargs) { 1074 /* Remove the text from the tree. */ 1075 if (cur->prev == NULL) 1076 parent->first = cur->next; 1077 else 1078 cur->prev->next = NULL; 1079 parent->last = cur->prev; 1080 parent->args--; 1081 /* Set up a list instead. */ 1082 split = eqn_box_alloc(ep, parent); 1083 split->type = EQN_LIST; 1084 /* Insert the word into the list. */ 1085 split->first = split->last = cur; 1086 cur->parent = split; 1087 cur->prev = NULL; 1088 parent = split; 1089 } 1090 /* Append a new text box. */ 1091 nbox = eqn_box_alloc(ep, parent); 1092 nbox->type = EQN_TEXT; 1093 nbox->text = mandoc_strdup(cpn); 1094 /* Truncate the old box. */ 1095 p = mandoc_strndup(cur->text, 1096 cpn - cur->text); 1097 free(cur->text); 1098 cur->text = p; 1099 /* Setup to process the new box. */ 1100 cur = nbox; 1101 p = nbox->text; 1102 cpn = p - 1; 1103 ccln = CCL_LET; 1104 } 1105 if (split != NULL) 1106 parent = split->parent; 1107 break; 1108 } 1109 break; 1110 default: 1111 abort(); 1112 } 1113 goto next_tok; 1114} 1115 1116void 1117eqn_free(struct eqn_node *p) 1118{ 1119 int i; 1120 1121 if (p == NULL) 1122 return; 1123 1124 for (i = 0; i < (int)p->defsz; i++) { 1125 free(p->defs[i].key); 1126 free(p->defs[i].val); 1127 } 1128 1129 free(p->data); 1130 free(p->defs); 1131 free(p); 1132} 1133