1229997Sken/*	$Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */
2229997Sken/*
3229997Sken * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4229997Sken * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
5229997Sken *
6229997Sken * Permission to use, copy, modify, and distribute this software for any
7229997Sken * purpose with or without fee is hereby granted, provided that the above
8229997Sken * copyright notice and this permission notice appear in all copies.
9229997Sken *
10229997Sken * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11229997Sken * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12229997Sken * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13229997Sken * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14229997Sken * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15229997Sken * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16229997Sken * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17229997Sken */
18229997Sken#include "config.h"
19229997Sken
20229997Sken#include <sys/types.h>
21229997Sken
22229997Sken#include <assert.h>
23229997Sken#include <ctype.h>
24229997Sken#include <limits.h>
25229997Sken#include <stdio.h>
26229997Sken#include <stdlib.h>
27229997Sken#include <string.h>
28229997Sken#include <time.h>
29229997Sken
30229997Sken#include "mandoc_aux.h"
31229997Sken#include "mandoc.h"
32229997Sken#include "roff.h"
33229997Sken#include "eqn.h"
34229997Sken#include "libmandoc.h"
35229997Sken#include "eqn_parse.h"
36229997Sken
37229997Sken#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
38229997Sken#define	STRNEQ(p1, sz1, p2, sz2) \
39229997Sken	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
40229997Sken
41229997Skenenum	eqn_tok {
42229997Sken	EQN_TOK_DYAD = 0,
43229997Sken	EQN_TOK_VEC,
44229997Sken	EQN_TOK_UNDER,
45229997Sken	EQN_TOK_BAR,
46229997Sken	EQN_TOK_TILDE,
47229997Sken	EQN_TOK_HAT,
48229997Sken	EQN_TOK_DOT,
49229997Sken	EQN_TOK_DOTDOT,
50229997Sken	EQN_TOK_FWD,
51229997Sken	EQN_TOK_BACK,
52229997Sken	EQN_TOK_DOWN,
53229997Sken	EQN_TOK_UP,
54229997Sken	EQN_TOK_FAT,
55229997Sken	EQN_TOK_ROMAN,
56229997Sken	EQN_TOK_ITALIC,
57229997Sken	EQN_TOK_BOLD,
58229997Sken	EQN_TOK_SIZE,
59229997Sken	EQN_TOK_SUB,
60229997Sken	EQN_TOK_SUP,
61229997Sken	EQN_TOK_SQRT,
62229997Sken	EQN_TOK_OVER,
63229997Sken	EQN_TOK_FROM,
64229997Sken	EQN_TOK_TO,
65229997Sken	EQN_TOK_BRACE_OPEN,
66229997Sken	EQN_TOK_BRACE_CLOSE,
67229997Sken	EQN_TOK_GSIZE,
68229997Sken	EQN_TOK_GFONT,
69229997Sken	EQN_TOK_MARK,
70229997Sken	EQN_TOK_LINEUP,
71229997Sken	EQN_TOK_LEFT,
72229997Sken	EQN_TOK_RIGHT,
73229997Sken	EQN_TOK_PILE,
74229997Sken	EQN_TOK_LPILE,
75229997Sken	EQN_TOK_RPILE,
76229997Sken	EQN_TOK_CPILE,
77229997Sken	EQN_TOK_MATRIX,
78229997Sken	EQN_TOK_CCOL,
79229997Sken	EQN_TOK_LCOL,
80229997Sken	EQN_TOK_RCOL,
81229997Sken	EQN_TOK_DELIM,
82229997Sken	EQN_TOK_DEFINE,
83229997Sken	EQN_TOK_TDEFINE,
84229997Sken	EQN_TOK_NDEFINE,
85229997Sken	EQN_TOK_UNDEF,
86229997Sken	EQN_TOK_ABOVE,
87229997Sken	EQN_TOK__MAX,
88229997Sken	EQN_TOK_FUNC,
89229997Sken	EQN_TOK_QUOTED,
90229997Sken	EQN_TOK_SYM,
91229997Sken	EQN_TOK_EOF
92229997Sken};
93229997Sken
94229997Skenstatic	const char *eqn_toks[EQN_TOK__MAX] = {
95229997Sken	"dyad", /* EQN_TOK_DYAD */
96229997Sken	"vec", /* EQN_TOK_VEC */
97229997Sken	"under", /* EQN_TOK_UNDER */
98229997Sken	"bar", /* EQN_TOK_BAR */
99229997Sken	"tilde", /* EQN_TOK_TILDE */
100229997Sken	"hat", /* EQN_TOK_HAT */
101229997Sken	"dot", /* EQN_TOK_DOT */
102229997Sken	"dotdot", /* EQN_TOK_DOTDOT */
103229997Sken	"fwd", /* EQN_TOK_FWD * */
104229997Sken	"back", /* EQN_TOK_BACK */
105229997Sken	"down", /* EQN_TOK_DOWN */
106229997Sken	"up", /* EQN_TOK_UP */
107229997Sken	"fat", /* EQN_TOK_FAT */
108229997Sken	"roman", /* EQN_TOK_ROMAN */
109229997Sken	"italic", /* EQN_TOK_ITALIC */
110229997Sken	"bold", /* EQN_TOK_BOLD */
111229997Sken	"size", /* EQN_TOK_SIZE */
112229997Sken	"sub", /* EQN_TOK_SUB */
113229997Sken	"sup", /* EQN_TOK_SUP */
114229997Sken	"sqrt", /* EQN_TOK_SQRT */
115229997Sken	"over", /* EQN_TOK_OVER */
116229997Sken	"from", /* EQN_TOK_FROM */
117229997Sken	"to", /* EQN_TOK_TO */
118229997Sken	"{", /* EQN_TOK_BRACE_OPEN */
119229997Sken	"}", /* EQN_TOK_BRACE_CLOSE */
120229997Sken	"gsize", /* EQN_TOK_GSIZE */
121229997Sken	"gfont", /* EQN_TOK_GFONT */
122229997Sken	"mark", /* EQN_TOK_MARK */
123229997Sken	"lineup", /* EQN_TOK_LINEUP */
124229997Sken	"left", /* EQN_TOK_LEFT */
125229997Sken	"right", /* EQN_TOK_RIGHT */
126229997Sken	"pile", /* EQN_TOK_PILE */
127229997Sken	"lpile", /* EQN_TOK_LPILE */
128229997Sken	"rpile", /* EQN_TOK_RPILE */
129229997Sken	"cpile", /* EQN_TOK_CPILE */
130229997Sken	"matrix", /* EQN_TOK_MATRIX */
131229997Sken	"ccol", /* EQN_TOK_CCOL */
132229997Sken	"lcol", /* EQN_TOK_LCOL */
133229997Sken	"rcol", /* EQN_TOK_RCOL */
134229997Sken	"delim", /* EQN_TOK_DELIM */
135229997Sken	"define", /* EQN_TOK_DEFINE */
136229997Sken	"tdefine", /* EQN_TOK_TDEFINE */
137229997Sken	"ndefine", /* EQN_TOK_NDEFINE */
138229997Sken	"undef", /* EQN_TOK_UNDEF */
139229997Sken	"above", /* EQN_TOK_ABOVE */
140229997Sken};
141229997Sken
142229997Skenstatic	const char *const eqn_func[] = {
143229997Sken	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
144229997Sken	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
145229997Sken	"if",	"lim",	"ln",	"log",	"max",	"min",
146229997Sken	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
147229997Sken};
148229997Sken
149229997Skenenum	eqn_symt {
150229997Sken	EQNSYM_alpha = 0,
151229997Sken	EQNSYM_beta,
152229997Sken	EQNSYM_chi,
153229997Sken	EQNSYM_delta,
154229997Sken	EQNSYM_epsilon,
155229997Sken	EQNSYM_eta,
156229997Sken	EQNSYM_gamma,
157229997Sken	EQNSYM_iota,
158229997Sken	EQNSYM_kappa,
159229997Sken	EQNSYM_lambda,
160229997Sken	EQNSYM_mu,
161229997Sken	EQNSYM_nu,
162229997Sken	EQNSYM_omega,
163229997Sken	EQNSYM_omicron,
164229997Sken	EQNSYM_phi,
165229997Sken	EQNSYM_pi,
166229997Sken	EQNSYM_ps,
167229997Sken	EQNSYM_rho,
168229997Sken	EQNSYM_sigma,
169229997Sken	EQNSYM_tau,
170229997Sken	EQNSYM_theta,
171229997Sken	EQNSYM_upsilon,
172229997Sken	EQNSYM_xi,
173229997Sken	EQNSYM_zeta,
174229997Sken	EQNSYM_DELTA,
175229997Sken	EQNSYM_GAMMA,
176229997Sken	EQNSYM_LAMBDA,
177229997Sken	EQNSYM_OMEGA,
178229997Sken	EQNSYM_PHI,
179229997Sken	EQNSYM_PI,
180229997Sken	EQNSYM_PSI,
181229997Sken	EQNSYM_SIGMA,
182229997Sken	EQNSYM_THETA,
183229997Sken	EQNSYM_UPSILON,
184229997Sken	EQNSYM_XI,
185229997Sken	EQNSYM_inter,
186229997Sken	EQNSYM_union,
187229997Sken	EQNSYM_prod,
188229997Sken	EQNSYM_int,
189229997Sken	EQNSYM_sum,
190229997Sken	EQNSYM_grad,
191229997Sken	EQNSYM_del,
192229997Sken	EQNSYM_times,
193229997Sken	EQNSYM_cdot,
194229997Sken	EQNSYM_nothing,
195229997Sken	EQNSYM_approx,
196229997Sken	EQNSYM_prime,
197229997Sken	EQNSYM_half,
198229997Sken	EQNSYM_partial,
199229997Sken	EQNSYM_inf,
200229997Sken	EQNSYM_muchgreat,
201229997Sken	EQNSYM_muchless,
202229997Sken	EQNSYM_larrow,
203229997Sken	EQNSYM_rarrow,
204229997Sken	EQNSYM_pm,
205229997Sken	EQNSYM_nequal,
206229997Sken	EQNSYM_equiv,
207229997Sken	EQNSYM_lessequal,
208229997Sken	EQNSYM_moreequal,
209229997Sken	EQNSYM_minus,
210229997Sken	EQNSYM__MAX
211229997Sken};
212229997Sken
213229997Skenstruct	eqnsym {
214229997Sken	const char	*str;
215229997Sken	const char	*sym;
216229997Sken};
217229997Sken
218229997Skenstatic	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
219229997Sken	{ "alpha", "*a" }, /* EQNSYM_alpha */
220229997Sken	{ "beta", "*b" }, /* EQNSYM_beta */
221229997Sken	{ "chi", "*x" }, /* EQNSYM_chi */
222229997Sken	{ "delta", "*d" }, /* EQNSYM_delta */
223229997Sken	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
224229997Sken	{ "eta", "*y" }, /* EQNSYM_eta */
225229997Sken	{ "gamma", "*g" }, /* EQNSYM_gamma */
226229997Sken	{ "iota", "*i" }, /* EQNSYM_iota */
227229997Sken	{ "kappa", "*k" }, /* EQNSYM_kappa */
228229997Sken	{ "lambda", "*l" }, /* EQNSYM_lambda */
229229997Sken	{ "mu", "*m" }, /* EQNSYM_mu */
230229997Sken	{ "nu", "*n" }, /* EQNSYM_nu */
231229997Sken	{ "omega", "*w" }, /* EQNSYM_omega */
232229997Sken	{ "omicron", "*o" }, /* EQNSYM_omicron */
233229997Sken	{ "phi", "*f" }, /* EQNSYM_phi */
234229997Sken	{ "pi", "*p" }, /* EQNSYM_pi */
235229997Sken	{ "psi", "*q" }, /* EQNSYM_psi */
236229997Sken	{ "rho", "*r" }, /* EQNSYM_rho */
237229997Sken	{ "sigma", "*s" }, /* EQNSYM_sigma */
238229997Sken	{ "tau", "*t" }, /* EQNSYM_tau */
239229997Sken	{ "theta", "*h" }, /* EQNSYM_theta */
240229997Sken	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
241229997Sken	{ "xi", "*c" }, /* EQNSYM_xi */
242229997Sken	{ "zeta", "*z" }, /* EQNSYM_zeta */
243229997Sken	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
244229997Sken	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
245229997Sken	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
246229997Sken	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
247229997Sken	{ "PHI", "*F" }, /* EQNSYM_PHI */
248229997Sken	{ "PI", "*P" }, /* EQNSYM_PI */
249229997Sken	{ "PSI", "*Q" }, /* EQNSYM_PSI */
250229997Sken	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
251229997Sken	{ "THETA", "*H" }, /* EQNSYM_THETA */
252229997Sken	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
253229997Sken	{ "XI", "*C" }, /* EQNSYM_XI */
254229997Sken	{ "inter", "ca" }, /* EQNSYM_inter */
255229997Sken	{ "union", "cu" }, /* EQNSYM_union */
256229997Sken	{ "prod", "product" }, /* EQNSYM_prod */
257229997Sken	{ "int", "integral" }, /* EQNSYM_int */
258229997Sken	{ "sum", "sum" }, /* EQNSYM_sum */
259229997Sken	{ "grad", "gr" }, /* EQNSYM_grad */
260229997Sken	{ "del", "gr" }, /* EQNSYM_del */
261229997Sken	{ "times", "mu" }, /* EQNSYM_times */
262229997Sken	{ "cdot", "pc" }, /* EQNSYM_cdot */
263229997Sken	{ "nothing", "&" }, /* EQNSYM_nothing */
264229997Sken	{ "approx", "~~" }, /* EQNSYM_approx */
265229997Sken	{ "prime", "fm" }, /* EQNSYM_prime */
266229997Sken	{ "half", "12" }, /* EQNSYM_half */
267229997Sken	{ "partial", "pd" }, /* EQNSYM_partial */
268229997Sken	{ "inf", "if" }, /* EQNSYM_inf */
269229997Sken	{ ">>", ">>" }, /* EQNSYM_muchgreat */
270229997Sken	{ "<<", "<<" }, /* EQNSYM_muchless */
271229997Sken	{ "<-", "<-" }, /* EQNSYM_larrow */
272229997Sken	{ "->", "->" }, /* EQNSYM_rarrow */
273229997Sken	{ "+-", "+-" }, /* EQNSYM_pm */
274229997Sken	{ "!=", "!=" }, /* EQNSYM_nequal */
275229997Sken	{ "==", "==" }, /* EQNSYM_equiv */
276229997Sken	{ "<=", "<=" }, /* EQNSYM_lessequal */
277229997Sken	{ ">=", ">=" }, /* EQNSYM_moreequal */
278229997Sken	{ "-", "mi" }, /* EQNSYM_minus */
279229997Sken};
280229997Sken
281229997Skenenum	parse_mode {
282229997Sken	MODE_QUOTED,
283229997Sken	MODE_NOSUB,
284229997Sken	MODE_SUB,
285229997Sken	MODE_TOK
286229997Sken};
287229997Sken
288229997Skenstruct	eqn_def {
289229997Sken	char		 *key;
290229997Sken	size_t		  keysz;
291229997Sken	char		 *val;
292229997Sken	size_t		  valsz;
293229997Sken};
294229997Sken
295229997Skenstatic	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
296229997Skenstatic	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
297229997Sken				struct eqn_box *);
298229997Skenstatic	void		 eqn_def(struct eqn_node *);
299229997Skenstatic	struct eqn_def	*eqn_def_find(struct eqn_node *);
300229997Skenstatic	void		 eqn_delim(struct eqn_node *);
301229997Skenstatic	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
302229997Skenstatic	void		 eqn_undef(struct eqn_node *);
303229997Sken
304229997Sken
305229997Skenstruct eqn_node *
306229997Skeneqn_alloc(void)
307229997Sken{
308229997Sken	struct eqn_node *ep;
309229997Sken
310229997Sken	ep = mandoc_calloc(1, sizeof(*ep));
311229997Sken	ep->gsize = EQN_DEFSIZE;
312229997Sken	return ep;
313229997Sken}
314229997Sken
315229997Skenvoid
316229997Skeneqn_reset(struct eqn_node *ep)
317229997Sken{
318229997Sken	free(ep->data);
319229997Sken	ep->data = ep->start = ep->end = NULL;
320229997Sken	ep->sz = ep->toksz = 0;
321229997Sken}
322229997Sken
323229997Skenvoid
324229997Skeneqn_read(struct eqn_node *ep, const char *p)
325229997Sken{
326229997Sken	char		*cp;
327229997Sken
328229997Sken	if (ep->data == NULL) {
329229997Sken		ep->sz = strlen(p);
330229997Sken		ep->data = mandoc_strdup(p);
331229997Sken	} else {
332229997Sken		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
333229997Sken		free(ep->data);
334229997Sken		ep->data = cp;
335229997Sken	}
336229997Sken	ep->sz += 1;
337229997Sken}
338229997Sken
339229997Sken/*
340229997Sken * Find the key "key" of the give size within our eqn-defined values.
341229997Sken */
342229997Skenstatic struct eqn_def *
343229997Skeneqn_def_find(struct eqn_node *ep)
344229997Sken{
345229997Sken	int		 i;
346229997Sken
347229997Sken	for (i = 0; i < (int)ep->defsz; i++)
348229997Sken		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
349229997Sken		    ep->defs[i].keysz, ep->start, ep->toksz))
350229997Sken			return &ep->defs[i];
351229997Sken
352229997Sken	return NULL;
353229997Sken}
354229997Sken
355229997Sken/*
356229997Sken * Parse a token from the input text.  The modes are:
357229997Sken * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
358229997Sken *   before its next occurence.  Do not interpret the token in any
359229997Sken *   way and return EQN_TOK_QUOTED.  All other modes behave like
360229997Sken *   MODE_QUOTED when *ep->start is '"'.
361229997Sken * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
362229997Sken *   otherwise, it ends before the next whitespace or brace.
363229997Sken *   Do not interpret the token and return EQN_TOK__MAX.
364229997Sken * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
365229997Sken *   alias created with define.  If it is an alias, replace it with
366229997Sken *   its string value and reparse.
367229997Sken * MODE_TOK: Like MODE_SUB, but also check the token against the list
368229997Sken *   of tokens, and if there is a match, return that token.  Otherwise,
369229997Sken *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
370229997Sken *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
371229997Sken *   a token match, *ep->start is set to an allocated string that the
372229997Sken *   caller is expected to free.
373229997Sken * All modes skip whitespace following the end of the token.
374229997Sken */
375229997Skenstatic enum eqn_tok
376229997Skeneqn_next(struct eqn_node *ep, enum parse_mode mode)
377229997Sken{
378229997Sken	static int	 last_len, lim;
379229997Sken
380229997Sken	struct eqn_def	*def;
381229997Sken	size_t		 start;
382229997Sken	int		 diff, i, quoted;
383229997Sken	enum eqn_tok	 tok;
384229997Sken
385229997Sken	/*
386229997Sken	 * Reset the recursion counter after advancing
387229997Sken	 * beyond the end of the previous substitution.
388229997Sken	 */
389229997Sken	if (ep->end - ep->data >= last_len)
390229997Sken		lim = 0;
391229997Sken
392229997Sken	ep->start = ep->end;
393229997Sken	quoted = mode == MODE_QUOTED;
394229997Sken	for (;;) {
395229997Sken		switch (*ep->start) {
396229997Sken		case '\0':
397229997Sken			ep->toksz = 0;
398229997Sken			return EQN_TOK_EOF;
399229997Sken		case '"':
400229997Sken			quoted = 1;
401229997Sken			break;
402229997Sken		case ' ':
403229997Sken		case '\t':
404229997Sken		case '~':
405229997Sken		case '^':
406229997Sken			if (quoted)
407229997Sken				break;
408229997Sken			ep->start++;
409229997Sken			continue;
410229997Sken		default:
411229997Sken			break;
412229997Sken		}
413229997Sken		if (quoted) {
414229997Sken			ep->end = strchr(ep->start + 1, *ep->start);
415229997Sken			ep->start++;  /* Skip opening quote. */
416229997Sken			if (ep->end == NULL) {
417229997Sken				mandoc_msg(MANDOCERR_ARG_QUOTE,
418229997Sken				    ep->node->line, ep->node->pos, NULL);
419229997Sken				ep->end = strchr(ep->start, '\0');
420229997Sken			}
421229997Sken		} else {
422229997Sken			ep->end = ep->start + 1;
423229997Sken			if (*ep->start != '{' && *ep->start != '}')
424229997Sken				ep->end += strcspn(ep->end, " ^~\"{}\t");
425229997Sken		}
426229997Sken		ep->toksz = ep->end - ep->start;
427229997Sken		if (quoted && *ep->end != '\0')
428229997Sken			ep->end++;  /* Skip closing quote. */
429229997Sken		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
430229997Sken			ep->end++;
431229997Sken		if (quoted)  /* Cannot return, may have to strndup. */
432229997Sken			break;
433229997Sken		if (mode == MODE_NOSUB)
434229997Sken			return EQN_TOK__MAX;
435229997Sken		if ((def = eqn_def_find(ep)) == NULL)
436229997Sken			break;
437229997Sken		if (++lim > EQN_NEST_MAX) {
438229997Sken			mandoc_msg(MANDOCERR_ROFFLOOP,
439229997Sken			    ep->node->line, ep->node->pos, NULL);
440229997Sken			return EQN_TOK_EOF;
441229997Sken		}
442229997Sken
443229997Sken		/* Replace a defined name with its string value. */
444229997Sken		if ((diff = def->valsz - ep->toksz) > 0) {
445229997Sken			start = ep->start - ep->data;
446229997Sken			ep->sz += diff;
447229997Sken			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
448229997Sken			ep->start = ep->data + start;
449229997Sken		}
450229997Sken		if (diff)
451229997Sken			memmove(ep->start + def->valsz, ep->start + ep->toksz,
452229997Sken			    strlen(ep->start + ep->toksz) + 1);
453229997Sken		memcpy(ep->start, def->val, def->valsz);
454229997Sken		last_len = ep->start - ep->data + def->valsz;
455229997Sken	}
456229997Sken	if (mode != MODE_TOK)
457229997Sken		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
458229997Sken	if (quoted) {
459229997Sken		ep->start = mandoc_strndup(ep->start, ep->toksz);
460229997Sken		return EQN_TOK_QUOTED;
461229997Sken	}
462229997Sken	for (tok = 0; tok < EQN_TOK__MAX; tok++)
463229997Sken		if (STRNEQ(ep->start, ep->toksz,
464229997Sken		    eqn_toks[tok], strlen(eqn_toks[tok])))
465229997Sken			return tok;
466229997Sken
467229997Sken	for (i = 0; i < EQNSYM__MAX; i++) {
468229997Sken		if (STRNEQ(ep->start, ep->toksz,
469229997Sken		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
470229997Sken			mandoc_asprintf(&ep->start,
471229997Sken			    "\\[%s]", eqnsyms[i].sym);
472229997Sken			return EQN_TOK_SYM;
473229997Sken		}
474229997Sken	}
475	ep->start = mandoc_strndup(ep->start, ep->toksz);
476	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
477		if (STRNEQ(ep->start, ep->toksz,
478		    eqn_func[i], strlen(eqn_func[i])))
479			return EQN_TOK_FUNC;
480	return EQN_TOK__MAX;
481}
482
483void
484eqn_box_free(struct eqn_box *bp)
485{
486	if (bp == NULL)
487		return;
488
489	if (bp->first)
490		eqn_box_free(bp->first);
491	if (bp->next)
492		eqn_box_free(bp->next);
493
494	free(bp->text);
495	free(bp->left);
496	free(bp->right);
497	free(bp->top);
498	free(bp->bottom);
499	free(bp);
500}
501
502struct eqn_box *
503eqn_box_new(void)
504{
505	struct eqn_box	*bp;
506
507	bp = mandoc_calloc(1, sizeof(*bp));
508	bp->expectargs = UINT_MAX;
509	return bp;
510}
511
512/*
513 * Allocate a box as the last child of the parent node.
514 */
515static struct eqn_box *
516eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517{
518	struct eqn_box	*bp;
519
520	bp = eqn_box_new();
521	bp->parent = parent;
522	bp->parent->args++;
523	bp->font = bp->parent->font;
524	bp->size = ep->gsize;
525
526	if (NULL != parent->first) {
527		parent->last->next = bp;
528		bp->prev = parent->last;
529	} else
530		parent->first = bp;
531
532	parent->last = bp;
533	return bp;
534}
535
536/*
537 * Reparent the current last node (of the current parent) under a new
538 * EQN_SUBEXPR as the first element.
539 * Then return the new parent.
540 * The new EQN_SUBEXPR will have a two-child limit.
541 */
542static struct eqn_box *
543eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
544{
545	struct eqn_box	*b, *newb;
546
547	assert(NULL != parent->last);
548	b = parent->last;
549	if (parent->last == parent->first)
550		parent->first = NULL;
551	parent->args--;
552	parent->last = b->prev;
553	b->prev = NULL;
554	newb = eqn_box_alloc(ep, parent);
555	newb->type = EQN_SUBEXPR;
556	newb->expectargs = 2;
557	newb->args = 1;
558	newb->first = newb->last = b;
559	newb->first->next = NULL;
560	b->parent = newb;
561	return newb;
562}
563
564/*
565 * Parse the "delim" control statement.
566 */
567static void
568eqn_delim(struct eqn_node *ep)
569{
570	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
571		mandoc_msg(MANDOCERR_REQ_EMPTY,
572		    ep->node->line, ep->node->pos, "delim");
573		if (ep->end[0] != '\0')
574			ep->end++;
575	} else if (strncmp(ep->end, "off", 3) == 0) {
576		ep->delim = 0;
577		ep->end += 3;
578	} else if (strncmp(ep->end, "on", 2) == 0) {
579		if (ep->odelim && ep->cdelim)
580			ep->delim = 1;
581		ep->end += 2;
582	} else {
583		ep->odelim = *ep->end++;
584		ep->cdelim = *ep->end++;
585		ep->delim = 1;
586	}
587}
588
589/*
590 * Undefine a previously-defined string.
591 */
592static void
593eqn_undef(struct eqn_node *ep)
594{
595	struct eqn_def	*def;
596
597	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
598		mandoc_msg(MANDOCERR_REQ_EMPTY,
599		    ep->node->line, ep->node->pos, "undef");
600		return;
601	}
602	if ((def = eqn_def_find(ep)) == NULL)
603		return;
604	free(def->key);
605	free(def->val);
606	def->key = def->val = NULL;
607	def->keysz = def->valsz = 0;
608}
609
610static void
611eqn_def(struct eqn_node *ep)
612{
613	struct eqn_def	*def;
614	int		 i;
615
616	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
617		mandoc_msg(MANDOCERR_REQ_EMPTY,
618		    ep->node->line, ep->node->pos, "define");
619		return;
620	}
621
622	/*
623	 * Search for a key that already exists.
624	 * Create a new key if none is found.
625	 */
626	if ((def = eqn_def_find(ep)) == NULL) {
627		/* Find holes in string array. */
628		for (i = 0; i < (int)ep->defsz; i++)
629			if (0 == ep->defs[i].keysz)
630				break;
631
632		if (i == (int)ep->defsz) {
633			ep->defsz++;
634			ep->defs = mandoc_reallocarray(ep->defs,
635			    ep->defsz, sizeof(struct eqn_def));
636			ep->defs[i].key = ep->defs[i].val = NULL;
637		}
638
639		def = ep->defs + i;
640		free(def->key);
641		def->key = mandoc_strndup(ep->start, ep->toksz);
642		def->keysz = ep->toksz;
643	}
644
645	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
646		mandoc_msg(MANDOCERR_REQ_EMPTY,
647		    ep->node->line, ep->node->pos, "define %s", def->key);
648		free(def->key);
649		free(def->val);
650		def->key = def->val = NULL;
651		def->keysz = def->valsz = 0;
652		return;
653	}
654	free(def->val);
655	def->val = mandoc_strndup(ep->start, ep->toksz);
656	def->valsz = ep->toksz;
657}
658
659void
660eqn_parse(struct eqn_node *ep)
661{
662	struct eqn_box	*cur, *nbox, *parent, *split;
663	const char	*cp, *cpn;
664	char		*p;
665	enum eqn_tok	 tok;
666	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
667	int		 size;
668
669	parent = ep->node->eqn;
670	assert(parent != NULL);
671
672	/*
673	 * Empty equation.
674	 * Do not add it to the high-level syntax tree.
675	 */
676
677	if (ep->data == NULL)
678		return;
679
680	ep->start = ep->end = ep->data;
681
682next_tok:
683	tok = eqn_next(ep, MODE_TOK);
684	switch (tok) {
685	case EQN_TOK_UNDEF:
686		eqn_undef(ep);
687		break;
688	case EQN_TOK_NDEFINE:
689	case EQN_TOK_DEFINE:
690		eqn_def(ep);
691		break;
692	case EQN_TOK_TDEFINE:
693		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
694		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
695			mandoc_msg(MANDOCERR_REQ_EMPTY,
696			    ep->node->line, ep->node->pos, "tdefine");
697		break;
698	case EQN_TOK_DELIM:
699		eqn_delim(ep);
700		break;
701	case EQN_TOK_GFONT:
702		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
703			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
704			    ep->node->pos, "%s", eqn_toks[tok]);
705		break;
706	case EQN_TOK_MARK:
707	case EQN_TOK_LINEUP:
708		/* Ignore these. */
709		break;
710	case EQN_TOK_DYAD:
711	case EQN_TOK_VEC:
712	case EQN_TOK_UNDER:
713	case EQN_TOK_BAR:
714	case EQN_TOK_TILDE:
715	case EQN_TOK_HAT:
716	case EQN_TOK_DOT:
717	case EQN_TOK_DOTDOT:
718		if (parent->last == NULL) {
719			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
720			    ep->node->pos, "%s", eqn_toks[tok]);
721			cur = eqn_box_alloc(ep, parent);
722			cur->type = EQN_TEXT;
723			cur->text = mandoc_strdup("");
724		}
725		parent = eqn_box_makebinary(ep, parent);
726		parent->type = EQN_LIST;
727		parent->expectargs = 1;
728		parent->font = EQNFONT_ROMAN;
729		switch (tok) {
730		case EQN_TOK_DOTDOT:
731			parent->top = mandoc_strdup("\\[ad]");
732			break;
733		case EQN_TOK_VEC:
734			parent->top = mandoc_strdup("\\[->]");
735			break;
736		case EQN_TOK_DYAD:
737			parent->top = mandoc_strdup("\\[<>]");
738			break;
739		case EQN_TOK_TILDE:
740			parent->top = mandoc_strdup("\\[a~]");
741			break;
742		case EQN_TOK_UNDER:
743			parent->bottom = mandoc_strdup("\\[ul]");
744			break;
745		case EQN_TOK_BAR:
746			parent->top = mandoc_strdup("\\[rn]");
747			break;
748		case EQN_TOK_DOT:
749			parent->top = mandoc_strdup("\\[a.]");
750			break;
751		case EQN_TOK_HAT:
752			parent->top = mandoc_strdup("\\[ha]");
753			break;
754		default:
755			abort();
756		}
757		parent = parent->parent;
758		break;
759	case EQN_TOK_FWD:
760	case EQN_TOK_BACK:
761	case EQN_TOK_DOWN:
762	case EQN_TOK_UP:
763		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
764			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
765			    ep->node->pos, "%s", eqn_toks[tok]);
766		break;
767	case EQN_TOK_FAT:
768	case EQN_TOK_ROMAN:
769	case EQN_TOK_ITALIC:
770	case EQN_TOK_BOLD:
771		while (parent->args == parent->expectargs)
772			parent = parent->parent;
773		/*
774		 * These values apply to the next word or sequence of
775		 * words; thus, we mark that we'll have a child with
776		 * exactly one of those.
777		 */
778		parent = eqn_box_alloc(ep, parent);
779		parent->type = EQN_LIST;
780		parent->expectargs = 1;
781		switch (tok) {
782		case EQN_TOK_FAT:
783			parent->font = EQNFONT_FAT;
784			break;
785		case EQN_TOK_ROMAN:
786			parent->font = EQNFONT_ROMAN;
787			break;
788		case EQN_TOK_ITALIC:
789			parent->font = EQNFONT_ITALIC;
790			break;
791		case EQN_TOK_BOLD:
792			parent->font = EQNFONT_BOLD;
793			break;
794		default:
795			abort();
796		}
797		break;
798	case EQN_TOK_SIZE:
799	case EQN_TOK_GSIZE:
800		/* Accept two values: integral size and a single. */
801		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
802			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
803			    ep->node->pos, "%s", eqn_toks[tok]);
804			break;
805		}
806		size = mandoc_strntoi(ep->start, ep->toksz, 10);
807		if (-1 == size) {
808			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
809			    ep->node->pos, "%s", eqn_toks[tok]);
810			break;
811		}
812		if (EQN_TOK_GSIZE == tok) {
813			ep->gsize = size;
814			break;
815		}
816		while (parent->args == parent->expectargs)
817			parent = parent->parent;
818		parent = eqn_box_alloc(ep, parent);
819		parent->type = EQN_LIST;
820		parent->expectargs = 1;
821		parent->size = size;
822		break;
823	case EQN_TOK_FROM:
824	case EQN_TOK_TO:
825	case EQN_TOK_SUB:
826	case EQN_TOK_SUP:
827		/*
828		 * We have a left-right-associative expression.
829		 * Repivot under a positional node, open a child scope
830		 * and keep on reading.
831		 */
832		if (parent->last == NULL) {
833			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
834			    ep->node->pos, "%s", eqn_toks[tok]);
835			cur = eqn_box_alloc(ep, parent);
836			cur->type = EQN_TEXT;
837			cur->text = mandoc_strdup("");
838		}
839		while (parent->expectargs == 1 && parent->args == 1)
840			parent = parent->parent;
841		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
842			for (cur = parent; cur != NULL; cur = cur->parent)
843				if (cur->pos == EQNPOS_SUB ||
844				    cur->pos == EQNPOS_SUP ||
845				    cur->pos == EQNPOS_SUBSUP ||
846				    cur->pos == EQNPOS_SQRT ||
847				    cur->pos == EQNPOS_OVER)
848					break;
849			if (cur != NULL)
850				parent = cur->parent;
851		}
852		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
853			parent->expectargs = 3;
854			parent->pos = EQNPOS_SUBSUP;
855			break;
856		}
857		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
858			parent->expectargs = 3;
859			parent->pos = EQNPOS_FROMTO;
860			break;
861		}
862		parent = eqn_box_makebinary(ep, parent);
863		switch (tok) {
864		case EQN_TOK_FROM:
865			parent->pos = EQNPOS_FROM;
866			break;
867		case EQN_TOK_TO:
868			parent->pos = EQNPOS_TO;
869			break;
870		case EQN_TOK_SUP:
871			parent->pos = EQNPOS_SUP;
872			break;
873		case EQN_TOK_SUB:
874			parent->pos = EQNPOS_SUB;
875			break;
876		default:
877			abort();
878		}
879		break;
880	case EQN_TOK_SQRT:
881		while (parent->args == parent->expectargs)
882			parent = parent->parent;
883		/*
884		 * Accept a left-right-associative set of arguments just
885		 * like sub and sup and friends but without rebalancing
886		 * under a pivot.
887		 */
888		parent = eqn_box_alloc(ep, parent);
889		parent->type = EQN_SUBEXPR;
890		parent->pos = EQNPOS_SQRT;
891		parent->expectargs = 1;
892		break;
893	case EQN_TOK_OVER:
894		/*
895		 * We have a right-left-associative fraction.
896		 * Close out anything that's currently open, then
897		 * rebalance and continue reading.
898		 */
899		if (parent->last == NULL) {
900			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
901			    ep->node->pos, "%s", eqn_toks[tok]);
902			cur = eqn_box_alloc(ep, parent);
903			cur->type = EQN_TEXT;
904			cur->text = mandoc_strdup("");
905		}
906		while (parent->args == parent->expectargs)
907			parent = parent->parent;
908		while (EQN_SUBEXPR == parent->type)
909			parent = parent->parent;
910		parent = eqn_box_makebinary(ep, parent);
911		parent->pos = EQNPOS_OVER;
912		break;
913	case EQN_TOK_RIGHT:
914	case EQN_TOK_BRACE_CLOSE:
915		/*
916		 * Close out the existing brace.
917		 * FIXME: this is a shitty sentinel: we should really
918		 * have a native EQN_BRACE type or whatnot.
919		 */
920		for (cur = parent; cur != NULL; cur = cur->parent)
921			if (cur->type == EQN_LIST &&
922			    cur->expectargs > 1 &&
923			    (tok == EQN_TOK_BRACE_CLOSE ||
924			     cur->left != NULL))
925				break;
926		if (cur == NULL) {
927			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
928			    ep->node->pos, "%s", eqn_toks[tok]);
929			break;
930		}
931		parent = cur;
932		if (EQN_TOK_RIGHT == tok) {
933			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
934				mandoc_msg(MANDOCERR_REQ_EMPTY,
935				    ep->node->line, ep->node->pos,
936				    "%s", eqn_toks[tok]);
937				break;
938			}
939			/* Handling depends on right/left. */
940			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
941				parent->right = mandoc_strdup("\\[rc]");
942			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
943				parent->right = mandoc_strdup("\\[rf]");
944			else
945				parent->right =
946				    mandoc_strndup(ep->start, ep->toksz);
947		}
948		parent = parent->parent;
949		if (tok == EQN_TOK_BRACE_CLOSE &&
950		    (parent->type == EQN_PILE ||
951		     parent->type == EQN_MATRIX))
952			parent = parent->parent;
953		/* Close out any "singleton" lists. */
954		while (parent->type == EQN_LIST &&
955		    parent->expectargs == 1 &&
956		    parent->args == 1)
957			parent = parent->parent;
958		break;
959	case EQN_TOK_BRACE_OPEN:
960	case EQN_TOK_LEFT:
961		/*
962		 * If we already have something in the stack and we're
963		 * in an expression, then rewind til we're not any more
964		 * (just like with the text node).
965		 */
966		while (parent->args == parent->expectargs)
967			parent = parent->parent;
968		if (EQN_TOK_LEFT == tok &&
969		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
970			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
971			    ep->node->pos, "%s", eqn_toks[tok]);
972			break;
973		}
974		parent = eqn_box_alloc(ep, parent);
975		parent->type = EQN_LIST;
976		if (EQN_TOK_LEFT == tok) {
977			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
978				parent->left = mandoc_strdup("\\[lc]");
979			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
980				parent->left = mandoc_strdup("\\[lf]");
981			else
982				parent->left =
983				    mandoc_strndup(ep->start, ep->toksz);
984		}
985		break;
986	case EQN_TOK_PILE:
987	case EQN_TOK_LPILE:
988	case EQN_TOK_RPILE:
989	case EQN_TOK_CPILE:
990	case EQN_TOK_CCOL:
991	case EQN_TOK_LCOL:
992	case EQN_TOK_RCOL:
993		while (parent->args == parent->expectargs)
994			parent = parent->parent;
995		parent = eqn_box_alloc(ep, parent);
996		parent->type = EQN_PILE;
997		parent->expectargs = 1;
998		break;
999	case EQN_TOK_ABOVE:
1000		for (cur = parent; cur != NULL; cur = cur->parent)
1001			if (cur->type == EQN_PILE)
1002				break;
1003		if (cur == NULL) {
1004			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1005			    ep->node->pos, "%s", eqn_toks[tok]);
1006			break;
1007		}
1008		parent = eqn_box_alloc(ep, cur);
1009		parent->type = EQN_LIST;
1010		break;
1011	case EQN_TOK_MATRIX:
1012		while (parent->args == parent->expectargs)
1013			parent = parent->parent;
1014		parent = eqn_box_alloc(ep, parent);
1015		parent->type = EQN_MATRIX;
1016		parent->expectargs = 1;
1017		break;
1018	case EQN_TOK_EOF:
1019		return;
1020	case EQN_TOK__MAX:
1021	case EQN_TOK_FUNC:
1022	case EQN_TOK_QUOTED:
1023	case EQN_TOK_SYM:
1024		p = ep->start;
1025		assert(p != NULL);
1026		/*
1027		 * If we already have something in the stack and we're
1028		 * in an expression, then rewind til we're not any more.
1029		 */
1030		while (parent->args == parent->expectargs)
1031			parent = parent->parent;
1032		cur = eqn_box_alloc(ep, parent);
1033		cur->type = EQN_TEXT;
1034		cur->text = p;
1035		switch (tok) {
1036		case EQN_TOK_FUNC:
1037			cur->font = EQNFONT_ROMAN;
1038			break;
1039		case EQN_TOK_QUOTED:
1040			if (cur->font == EQNFONT_NONE)
1041				cur->font = EQNFONT_ITALIC;
1042			break;
1043		case EQN_TOK_SYM:
1044			break;
1045		default:
1046			if (cur->font != EQNFONT_NONE || *p == '\0')
1047				break;
1048			cpn = p - 1;
1049			ccln = CCL_LET;
1050			split = NULL;
1051			for (;;) {
1052				/* Advance to next character. */
1053				cp = cpn++;
1054				ccl = ccln;
1055				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1056				    isdigit((unsigned char)*cpn) ||
1057				    (*cpn == '.' && (ccl == CCL_DIG ||
1058				     isdigit((unsigned char)cpn[1]))) ?
1059				    CCL_DIG : CCL_PUN;
1060				/* No boundary before first character. */
1061				if (cp < p)
1062					continue;
1063				cur->font = ccl == CCL_LET ?
1064				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1065				if (*cp == '\\')
1066					mandoc_escape(&cpn, NULL, NULL);
1067				/* No boundary after last character. */
1068				if (*cpn == '\0')
1069					break;
1070				if (ccln == ccl && *cp != ',' && *cpn != ',')
1071					continue;
1072				/* Boundary found, split the text. */
1073				if (parent->args == parent->expectargs) {
1074					/* Remove the text from the tree. */
1075					if (cur->prev == NULL)
1076						parent->first = cur->next;
1077					else
1078						cur->prev->next = NULL;
1079					parent->last = cur->prev;
1080					parent->args--;
1081					/* Set up a list instead. */
1082					split = eqn_box_alloc(ep, parent);
1083					split->type = EQN_LIST;
1084					/* Insert the word into the list. */
1085					split->first = split->last = cur;
1086					cur->parent = split;
1087					cur->prev = NULL;
1088					parent = split;
1089				}
1090				/* Append a new text box. */
1091				nbox = eqn_box_alloc(ep, parent);
1092				nbox->type = EQN_TEXT;
1093				nbox->text = mandoc_strdup(cpn);
1094				/* Truncate the old box. */
1095				p = mandoc_strndup(cur->text,
1096				    cpn - cur->text);
1097				free(cur->text);
1098				cur->text = p;
1099				/* Setup to process the new box. */
1100				cur = nbox;
1101				p = nbox->text;
1102				cpn = p - 1;
1103				ccln = CCL_LET;
1104			}
1105			if (split != NULL)
1106				parent = split->parent;
1107			break;
1108		}
1109		break;
1110	default:
1111		abort();
1112	}
1113	goto next_tok;
1114}
1115
1116void
1117eqn_free(struct eqn_node *p)
1118{
1119	int		 i;
1120
1121	if (p == NULL)
1122		return;
1123
1124	for (i = 0; i < (int)p->defsz; i++) {
1125		free(p->defs[i].key);
1126		free(p->defs[i].val);
1127	}
1128
1129	free(p->data);
1130	free(p->defs);
1131	free(p);
1132}
1133