1/*	$Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29
30#include "mandoc_aux.h"
31#include "mandoc.h"
32#include "roff.h"
33#include "eqn.h"
34#include "libmandoc.h"
35#include "eqn_parse.h"
36
37#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
38#define	STRNEQ(p1, sz1, p2, sz2) \
39	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
40
41enum	eqn_tok {
42	EQN_TOK_DYAD = 0,
43	EQN_TOK_VEC,
44	EQN_TOK_UNDER,
45	EQN_TOK_BAR,
46	EQN_TOK_TILDE,
47	EQN_TOK_HAT,
48	EQN_TOK_DOT,
49	EQN_TOK_DOTDOT,
50	EQN_TOK_FWD,
51	EQN_TOK_BACK,
52	EQN_TOK_DOWN,
53	EQN_TOK_UP,
54	EQN_TOK_FAT,
55	EQN_TOK_ROMAN,
56	EQN_TOK_ITALIC,
57	EQN_TOK_BOLD,
58	EQN_TOK_SIZE,
59	EQN_TOK_SUB,
60	EQN_TOK_SUP,
61	EQN_TOK_SQRT,
62	EQN_TOK_OVER,
63	EQN_TOK_FROM,
64	EQN_TOK_TO,
65	EQN_TOK_BRACE_OPEN,
66	EQN_TOK_BRACE_CLOSE,
67	EQN_TOK_GSIZE,
68	EQN_TOK_GFONT,
69	EQN_TOK_MARK,
70	EQN_TOK_LINEUP,
71	EQN_TOK_LEFT,
72	EQN_TOK_RIGHT,
73	EQN_TOK_PILE,
74	EQN_TOK_LPILE,
75	EQN_TOK_RPILE,
76	EQN_TOK_CPILE,
77	EQN_TOK_MATRIX,
78	EQN_TOK_CCOL,
79	EQN_TOK_LCOL,
80	EQN_TOK_RCOL,
81	EQN_TOK_DELIM,
82	EQN_TOK_DEFINE,
83	EQN_TOK_TDEFINE,
84	EQN_TOK_NDEFINE,
85	EQN_TOK_UNDEF,
86	EQN_TOK_ABOVE,
87	EQN_TOK__MAX,
88	EQN_TOK_FUNC,
89	EQN_TOK_QUOTED,
90	EQN_TOK_SYM,
91	EQN_TOK_EOF
92};
93
94static	const char *eqn_toks[EQN_TOK__MAX] = {
95	"dyad", /* EQN_TOK_DYAD */
96	"vec", /* EQN_TOK_VEC */
97	"under", /* EQN_TOK_UNDER */
98	"bar", /* EQN_TOK_BAR */
99	"tilde", /* EQN_TOK_TILDE */
100	"hat", /* EQN_TOK_HAT */
101	"dot", /* EQN_TOK_DOT */
102	"dotdot", /* EQN_TOK_DOTDOT */
103	"fwd", /* EQN_TOK_FWD * */
104	"back", /* EQN_TOK_BACK */
105	"down", /* EQN_TOK_DOWN */
106	"up", /* EQN_TOK_UP */
107	"fat", /* EQN_TOK_FAT */
108	"roman", /* EQN_TOK_ROMAN */
109	"italic", /* EQN_TOK_ITALIC */
110	"bold", /* EQN_TOK_BOLD */
111	"size", /* EQN_TOK_SIZE */
112	"sub", /* EQN_TOK_SUB */
113	"sup", /* EQN_TOK_SUP */
114	"sqrt", /* EQN_TOK_SQRT */
115	"over", /* EQN_TOK_OVER */
116	"from", /* EQN_TOK_FROM */
117	"to", /* EQN_TOK_TO */
118	"{", /* EQN_TOK_BRACE_OPEN */
119	"}", /* EQN_TOK_BRACE_CLOSE */
120	"gsize", /* EQN_TOK_GSIZE */
121	"gfont", /* EQN_TOK_GFONT */
122	"mark", /* EQN_TOK_MARK */
123	"lineup", /* EQN_TOK_LINEUP */
124	"left", /* EQN_TOK_LEFT */
125	"right", /* EQN_TOK_RIGHT */
126	"pile", /* EQN_TOK_PILE */
127	"lpile", /* EQN_TOK_LPILE */
128	"rpile", /* EQN_TOK_RPILE */
129	"cpile", /* EQN_TOK_CPILE */
130	"matrix", /* EQN_TOK_MATRIX */
131	"ccol", /* EQN_TOK_CCOL */
132	"lcol", /* EQN_TOK_LCOL */
133	"rcol", /* EQN_TOK_RCOL */
134	"delim", /* EQN_TOK_DELIM */
135	"define", /* EQN_TOK_DEFINE */
136	"tdefine", /* EQN_TOK_TDEFINE */
137	"ndefine", /* EQN_TOK_NDEFINE */
138	"undef", /* EQN_TOK_UNDEF */
139	"above", /* EQN_TOK_ABOVE */
140};
141
142static	const char *const eqn_func[] = {
143	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
144	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
145	"if",	"lim",	"ln",	"log",	"max",	"min",
146	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
147};
148
149enum	eqn_symt {
150	EQNSYM_alpha = 0,
151	EQNSYM_beta,
152	EQNSYM_chi,
153	EQNSYM_delta,
154	EQNSYM_epsilon,
155	EQNSYM_eta,
156	EQNSYM_gamma,
157	EQNSYM_iota,
158	EQNSYM_kappa,
159	EQNSYM_lambda,
160	EQNSYM_mu,
161	EQNSYM_nu,
162	EQNSYM_omega,
163	EQNSYM_omicron,
164	EQNSYM_phi,
165	EQNSYM_pi,
166	EQNSYM_ps,
167	EQNSYM_rho,
168	EQNSYM_sigma,
169	EQNSYM_tau,
170	EQNSYM_theta,
171	EQNSYM_upsilon,
172	EQNSYM_xi,
173	EQNSYM_zeta,
174	EQNSYM_DELTA,
175	EQNSYM_GAMMA,
176	EQNSYM_LAMBDA,
177	EQNSYM_OMEGA,
178	EQNSYM_PHI,
179	EQNSYM_PI,
180	EQNSYM_PSI,
181	EQNSYM_SIGMA,
182	EQNSYM_THETA,
183	EQNSYM_UPSILON,
184	EQNSYM_XI,
185	EQNSYM_inter,
186	EQNSYM_union,
187	EQNSYM_prod,
188	EQNSYM_int,
189	EQNSYM_sum,
190	EQNSYM_grad,
191	EQNSYM_del,
192	EQNSYM_times,
193	EQNSYM_cdot,
194	EQNSYM_nothing,
195	EQNSYM_approx,
196	EQNSYM_prime,
197	EQNSYM_half,
198	EQNSYM_partial,
199	EQNSYM_inf,
200	EQNSYM_muchgreat,
201	EQNSYM_muchless,
202	EQNSYM_larrow,
203	EQNSYM_rarrow,
204	EQNSYM_pm,
205	EQNSYM_nequal,
206	EQNSYM_equiv,
207	EQNSYM_lessequal,
208	EQNSYM_moreequal,
209	EQNSYM_minus,
210	EQNSYM__MAX
211};
212
213struct	eqnsym {
214	const char	*str;
215	const char	*sym;
216};
217
218static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
219	{ "alpha", "*a" }, /* EQNSYM_alpha */
220	{ "beta", "*b" }, /* EQNSYM_beta */
221	{ "chi", "*x" }, /* EQNSYM_chi */
222	{ "delta", "*d" }, /* EQNSYM_delta */
223	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
224	{ "eta", "*y" }, /* EQNSYM_eta */
225	{ "gamma", "*g" }, /* EQNSYM_gamma */
226	{ "iota", "*i" }, /* EQNSYM_iota */
227	{ "kappa", "*k" }, /* EQNSYM_kappa */
228	{ "lambda", "*l" }, /* EQNSYM_lambda */
229	{ "mu", "*m" }, /* EQNSYM_mu */
230	{ "nu", "*n" }, /* EQNSYM_nu */
231	{ "omega", "*w" }, /* EQNSYM_omega */
232	{ "omicron", "*o" }, /* EQNSYM_omicron */
233	{ "phi", "*f" }, /* EQNSYM_phi */
234	{ "pi", "*p" }, /* EQNSYM_pi */
235	{ "psi", "*q" }, /* EQNSYM_psi */
236	{ "rho", "*r" }, /* EQNSYM_rho */
237	{ "sigma", "*s" }, /* EQNSYM_sigma */
238	{ "tau", "*t" }, /* EQNSYM_tau */
239	{ "theta", "*h" }, /* EQNSYM_theta */
240	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
241	{ "xi", "*c" }, /* EQNSYM_xi */
242	{ "zeta", "*z" }, /* EQNSYM_zeta */
243	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
244	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
245	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
246	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
247	{ "PHI", "*F" }, /* EQNSYM_PHI */
248	{ "PI", "*P" }, /* EQNSYM_PI */
249	{ "PSI", "*Q" }, /* EQNSYM_PSI */
250	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
251	{ "THETA", "*H" }, /* EQNSYM_THETA */
252	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
253	{ "XI", "*C" }, /* EQNSYM_XI */
254	{ "inter", "ca" }, /* EQNSYM_inter */
255	{ "union", "cu" }, /* EQNSYM_union */
256	{ "prod", "product" }, /* EQNSYM_prod */
257	{ "int", "integral" }, /* EQNSYM_int */
258	{ "sum", "sum" }, /* EQNSYM_sum */
259	{ "grad", "gr" }, /* EQNSYM_grad */
260	{ "del", "gr" }, /* EQNSYM_del */
261	{ "times", "mu" }, /* EQNSYM_times */
262	{ "cdot", "pc" }, /* EQNSYM_cdot */
263	{ "nothing", "&" }, /* EQNSYM_nothing */
264	{ "approx", "~~" }, /* EQNSYM_approx */
265	{ "prime", "fm" }, /* EQNSYM_prime */
266	{ "half", "12" }, /* EQNSYM_half */
267	{ "partial", "pd" }, /* EQNSYM_partial */
268	{ "inf", "if" }, /* EQNSYM_inf */
269	{ ">>", ">>" }, /* EQNSYM_muchgreat */
270	{ "<<", "<<" }, /* EQNSYM_muchless */
271	{ "<-", "<-" }, /* EQNSYM_larrow */
272	{ "->", "->" }, /* EQNSYM_rarrow */
273	{ "+-", "+-" }, /* EQNSYM_pm */
274	{ "!=", "!=" }, /* EQNSYM_nequal */
275	{ "==", "==" }, /* EQNSYM_equiv */
276	{ "<=", "<=" }, /* EQNSYM_lessequal */
277	{ ">=", ">=" }, /* EQNSYM_moreequal */
278	{ "-", "mi" }, /* EQNSYM_minus */
279};
280
281enum	parse_mode {
282	MODE_QUOTED,
283	MODE_NOSUB,
284	MODE_SUB,
285	MODE_TOK
286};
287
288struct	eqn_def {
289	char		 *key;
290	size_t		  keysz;
291	char		 *val;
292	size_t		  valsz;
293};
294
295static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
296static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
297				struct eqn_box *);
298static	void		 eqn_def(struct eqn_node *);
299static	struct eqn_def	*eqn_def_find(struct eqn_node *);
300static	void		 eqn_delim(struct eqn_node *);
301static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
302static	void		 eqn_undef(struct eqn_node *);
303
304
305struct eqn_node *
306eqn_alloc(void)
307{
308	struct eqn_node *ep;
309
310	ep = mandoc_calloc(1, sizeof(*ep));
311	ep->gsize = EQN_DEFSIZE;
312	return ep;
313}
314
315void
316eqn_reset(struct eqn_node *ep)
317{
318	free(ep->data);
319	ep->data = ep->start = ep->end = NULL;
320	ep->sz = ep->toksz = 0;
321}
322
323void
324eqn_read(struct eqn_node *ep, const char *p)
325{
326	char		*cp;
327
328	if (ep->data == NULL) {
329		ep->sz = strlen(p);
330		ep->data = mandoc_strdup(p);
331	} else {
332		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
333		free(ep->data);
334		ep->data = cp;
335	}
336	ep->sz += 1;
337}
338
339/*
340 * Find the key "key" of the give size within our eqn-defined values.
341 */
342static struct eqn_def *
343eqn_def_find(struct eqn_node *ep)
344{
345	int		 i;
346
347	for (i = 0; i < (int)ep->defsz; i++)
348		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
349		    ep->defs[i].keysz, ep->start, ep->toksz))
350			return &ep->defs[i];
351
352	return NULL;
353}
354
355/*
356 * Parse a token from the input text.  The modes are:
357 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
358 *   before its next occurence.  Do not interpret the token in any
359 *   way and return EQN_TOK_QUOTED.  All other modes behave like
360 *   MODE_QUOTED when *ep->start is '"'.
361 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
362 *   otherwise, it ends before the next whitespace or brace.
363 *   Do not interpret the token and return EQN_TOK__MAX.
364 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
365 *   alias created with define.  If it is an alias, replace it with
366 *   its string value and reparse.
367 * MODE_TOK: Like MODE_SUB, but also check the token against the list
368 *   of tokens, and if there is a match, return that token.  Otherwise,
369 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
370 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
371 *   a token match, *ep->start is set to an allocated string that the
372 *   caller is expected to free.
373 * All modes skip whitespace following the end of the token.
374 */
375static enum eqn_tok
376eqn_next(struct eqn_node *ep, enum parse_mode mode)
377{
378	static int	 last_len, lim;
379
380	struct eqn_def	*def;
381	size_t		 start;
382	int		 diff, i, quoted;
383	enum eqn_tok	 tok;
384
385	/*
386	 * Reset the recursion counter after advancing
387	 * beyond the end of the previous substitution.
388	 */
389	if (ep->end - ep->data >= last_len)
390		lim = 0;
391
392	ep->start = ep->end;
393	quoted = mode == MODE_QUOTED;
394	for (;;) {
395		switch (*ep->start) {
396		case '\0':
397			ep->toksz = 0;
398			return EQN_TOK_EOF;
399		case '"':
400			quoted = 1;
401			break;
402		case ' ':
403		case '\t':
404		case '~':
405		case '^':
406			if (quoted)
407				break;
408			ep->start++;
409			continue;
410		default:
411			break;
412		}
413		if (quoted) {
414			ep->end = strchr(ep->start + 1, *ep->start);
415			ep->start++;  /* Skip opening quote. */
416			if (ep->end == NULL) {
417				mandoc_msg(MANDOCERR_ARG_QUOTE,
418				    ep->node->line, ep->node->pos, NULL);
419				ep->end = strchr(ep->start, '\0');
420			}
421		} else {
422			ep->end = ep->start + 1;
423			if (*ep->start != '{' && *ep->start != '}')
424				ep->end += strcspn(ep->end, " ^~\"{}\t");
425		}
426		ep->toksz = ep->end - ep->start;
427		if (quoted && *ep->end != '\0')
428			ep->end++;  /* Skip closing quote. */
429		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
430			ep->end++;
431		if (quoted)  /* Cannot return, may have to strndup. */
432			break;
433		if (mode == MODE_NOSUB)
434			return EQN_TOK__MAX;
435		if ((def = eqn_def_find(ep)) == NULL)
436			break;
437		if (++lim > EQN_NEST_MAX) {
438			mandoc_msg(MANDOCERR_ROFFLOOP,
439			    ep->node->line, ep->node->pos, NULL);
440			return EQN_TOK_EOF;
441		}
442
443		/* Replace a defined name with its string value. */
444		if ((diff = def->valsz - ep->toksz) > 0) {
445			start = ep->start - ep->data;
446			ep->sz += diff;
447			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
448			ep->start = ep->data + start;
449		}
450		if (diff)
451			memmove(ep->start + def->valsz, ep->start + ep->toksz,
452			    strlen(ep->start + ep->toksz) + 1);
453		memcpy(ep->start, def->val, def->valsz);
454		last_len = ep->start - ep->data + def->valsz;
455	}
456	if (mode != MODE_TOK)
457		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
458	if (quoted) {
459		ep->start = mandoc_strndup(ep->start, ep->toksz);
460		return EQN_TOK_QUOTED;
461	}
462	for (tok = 0; tok < EQN_TOK__MAX; tok++)
463		if (STRNEQ(ep->start, ep->toksz,
464		    eqn_toks[tok], strlen(eqn_toks[tok])))
465			return tok;
466
467	for (i = 0; i < EQNSYM__MAX; i++) {
468		if (STRNEQ(ep->start, ep->toksz,
469		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
470			mandoc_asprintf(&ep->start,
471			    "\\[%s]", eqnsyms[i].sym);
472			return EQN_TOK_SYM;
473		}
474	}
475	ep->start = mandoc_strndup(ep->start, ep->toksz);
476	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
477		if (STRNEQ(ep->start, ep->toksz,
478		    eqn_func[i], strlen(eqn_func[i])))
479			return EQN_TOK_FUNC;
480	return EQN_TOK__MAX;
481}
482
483void
484eqn_box_free(struct eqn_box *bp)
485{
486	if (bp == NULL)
487		return;
488
489	if (bp->first)
490		eqn_box_free(bp->first);
491	if (bp->next)
492		eqn_box_free(bp->next);
493
494	free(bp->text);
495	free(bp->left);
496	free(bp->right);
497	free(bp->top);
498	free(bp->bottom);
499	free(bp);
500}
501
502struct eqn_box *
503eqn_box_new(void)
504{
505	struct eqn_box	*bp;
506
507	bp = mandoc_calloc(1, sizeof(*bp));
508	bp->expectargs = UINT_MAX;
509	return bp;
510}
511
512/*
513 * Allocate a box as the last child of the parent node.
514 */
515static struct eqn_box *
516eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517{
518	struct eqn_box	*bp;
519
520	bp = eqn_box_new();
521	bp->parent = parent;
522	bp->parent->args++;
523	bp->font = bp->parent->font;
524	bp->size = ep->gsize;
525
526	if (NULL != parent->first) {
527		parent->last->next = bp;
528		bp->prev = parent->last;
529	} else
530		parent->first = bp;
531
532	parent->last = bp;
533	return bp;
534}
535
536/*
537 * Reparent the current last node (of the current parent) under a new
538 * EQN_SUBEXPR as the first element.
539 * Then return the new parent.
540 * The new EQN_SUBEXPR will have a two-child limit.
541 */
542static struct eqn_box *
543eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
544{
545	struct eqn_box	*b, *newb;
546
547	assert(NULL != parent->last);
548	b = parent->last;
549	if (parent->last == parent->first)
550		parent->first = NULL;
551	parent->args--;
552	parent->last = b->prev;
553	b->prev = NULL;
554	newb = eqn_box_alloc(ep, parent);
555	newb->type = EQN_SUBEXPR;
556	newb->expectargs = 2;
557	newb->args = 1;
558	newb->first = newb->last = b;
559	newb->first->next = NULL;
560	b->parent = newb;
561	return newb;
562}
563
564/*
565 * Parse the "delim" control statement.
566 */
567static void
568eqn_delim(struct eqn_node *ep)
569{
570	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
571		mandoc_msg(MANDOCERR_REQ_EMPTY,
572		    ep->node->line, ep->node->pos, "delim");
573		if (ep->end[0] != '\0')
574			ep->end++;
575	} else if (strncmp(ep->end, "off", 3) == 0) {
576		ep->delim = 0;
577		ep->end += 3;
578	} else if (strncmp(ep->end, "on", 2) == 0) {
579		if (ep->odelim && ep->cdelim)
580			ep->delim = 1;
581		ep->end += 2;
582	} else {
583		ep->odelim = *ep->end++;
584		ep->cdelim = *ep->end++;
585		ep->delim = 1;
586	}
587}
588
589/*
590 * Undefine a previously-defined string.
591 */
592static void
593eqn_undef(struct eqn_node *ep)
594{
595	struct eqn_def	*def;
596
597	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
598		mandoc_msg(MANDOCERR_REQ_EMPTY,
599		    ep->node->line, ep->node->pos, "undef");
600		return;
601	}
602	if ((def = eqn_def_find(ep)) == NULL)
603		return;
604	free(def->key);
605	free(def->val);
606	def->key = def->val = NULL;
607	def->keysz = def->valsz = 0;
608}
609
610static void
611eqn_def(struct eqn_node *ep)
612{
613	struct eqn_def	*def;
614	int		 i;
615
616	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
617		mandoc_msg(MANDOCERR_REQ_EMPTY,
618		    ep->node->line, ep->node->pos, "define");
619		return;
620	}
621
622	/*
623	 * Search for a key that already exists.
624	 * Create a new key if none is found.
625	 */
626	if ((def = eqn_def_find(ep)) == NULL) {
627		/* Find holes in string array. */
628		for (i = 0; i < (int)ep->defsz; i++)
629			if (0 == ep->defs[i].keysz)
630				break;
631
632		if (i == (int)ep->defsz) {
633			ep->defsz++;
634			ep->defs = mandoc_reallocarray(ep->defs,
635			    ep->defsz, sizeof(struct eqn_def));
636			ep->defs[i].key = ep->defs[i].val = NULL;
637		}
638
639		def = ep->defs + i;
640		free(def->key);
641		def->key = mandoc_strndup(ep->start, ep->toksz);
642		def->keysz = ep->toksz;
643	}
644
645	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
646		mandoc_msg(MANDOCERR_REQ_EMPTY,
647		    ep->node->line, ep->node->pos, "define %s", def->key);
648		free(def->key);
649		free(def->val);
650		def->key = def->val = NULL;
651		def->keysz = def->valsz = 0;
652		return;
653	}
654	free(def->val);
655	def->val = mandoc_strndup(ep->start, ep->toksz);
656	def->valsz = ep->toksz;
657}
658
659void
660eqn_parse(struct eqn_node *ep)
661{
662	struct eqn_box	*cur, *nbox, *parent, *split;
663	const char	*cp, *cpn;
664	char		*p;
665	enum eqn_tok	 tok;
666	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
667	int		 size;
668
669	parent = ep->node->eqn;
670	assert(parent != NULL);
671
672	/*
673	 * Empty equation.
674	 * Do not add it to the high-level syntax tree.
675	 */
676
677	if (ep->data == NULL)
678		return;
679
680	ep->start = ep->end = ep->data;
681
682next_tok:
683	tok = eqn_next(ep, MODE_TOK);
684	switch (tok) {
685	case EQN_TOK_UNDEF:
686		eqn_undef(ep);
687		break;
688	case EQN_TOK_NDEFINE:
689	case EQN_TOK_DEFINE:
690		eqn_def(ep);
691		break;
692	case EQN_TOK_TDEFINE:
693		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
694		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
695			mandoc_msg(MANDOCERR_REQ_EMPTY,
696			    ep->node->line, ep->node->pos, "tdefine");
697		break;
698	case EQN_TOK_DELIM:
699		eqn_delim(ep);
700		break;
701	case EQN_TOK_GFONT:
702		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
703			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
704			    ep->node->pos, "%s", eqn_toks[tok]);
705		break;
706	case EQN_TOK_MARK:
707	case EQN_TOK_LINEUP:
708		/* Ignore these. */
709		break;
710	case EQN_TOK_DYAD:
711	case EQN_TOK_VEC:
712	case EQN_TOK_UNDER:
713	case EQN_TOK_BAR:
714	case EQN_TOK_TILDE:
715	case EQN_TOK_HAT:
716	case EQN_TOK_DOT:
717	case EQN_TOK_DOTDOT:
718		if (parent->last == NULL) {
719			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
720			    ep->node->pos, "%s", eqn_toks[tok]);
721			cur = eqn_box_alloc(ep, parent);
722			cur->type = EQN_TEXT;
723			cur->text = mandoc_strdup("");
724		}
725		parent = eqn_box_makebinary(ep, parent);
726		parent->type = EQN_LIST;
727		parent->expectargs = 1;
728		parent->font = EQNFONT_ROMAN;
729		switch (tok) {
730		case EQN_TOK_DOTDOT:
731			parent->top = mandoc_strdup("\\[ad]");
732			break;
733		case EQN_TOK_VEC:
734			parent->top = mandoc_strdup("\\[->]");
735			break;
736		case EQN_TOK_DYAD:
737			parent->top = mandoc_strdup("\\[<>]");
738			break;
739		case EQN_TOK_TILDE:
740			parent->top = mandoc_strdup("\\[a~]");
741			break;
742		case EQN_TOK_UNDER:
743			parent->bottom = mandoc_strdup("\\[ul]");
744			break;
745		case EQN_TOK_BAR:
746			parent->top = mandoc_strdup("\\[rn]");
747			break;
748		case EQN_TOK_DOT:
749			parent->top = mandoc_strdup("\\[a.]");
750			break;
751		case EQN_TOK_HAT:
752			parent->top = mandoc_strdup("\\[ha]");
753			break;
754		default:
755			abort();
756		}
757		parent = parent->parent;
758		break;
759	case EQN_TOK_FWD:
760	case EQN_TOK_BACK:
761	case EQN_TOK_DOWN:
762	case EQN_TOK_UP:
763		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
764			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
765			    ep->node->pos, "%s", eqn_toks[tok]);
766		break;
767	case EQN_TOK_FAT:
768	case EQN_TOK_ROMAN:
769	case EQN_TOK_ITALIC:
770	case EQN_TOK_BOLD:
771		while (parent->args == parent->expectargs)
772			parent = parent->parent;
773		/*
774		 * These values apply to the next word or sequence of
775		 * words; thus, we mark that we'll have a child with
776		 * exactly one of those.
777		 */
778		parent = eqn_box_alloc(ep, parent);
779		parent->type = EQN_LIST;
780		parent->expectargs = 1;
781		switch (tok) {
782		case EQN_TOK_FAT:
783			parent->font = EQNFONT_FAT;
784			break;
785		case EQN_TOK_ROMAN:
786			parent->font = EQNFONT_ROMAN;
787			break;
788		case EQN_TOK_ITALIC:
789			parent->font = EQNFONT_ITALIC;
790			break;
791		case EQN_TOK_BOLD:
792			parent->font = EQNFONT_BOLD;
793			break;
794		default:
795			abort();
796		}
797		break;
798	case EQN_TOK_SIZE:
799	case EQN_TOK_GSIZE:
800		/* Accept two values: integral size and a single. */
801		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
802			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
803			    ep->node->pos, "%s", eqn_toks[tok]);
804			break;
805		}
806		size = mandoc_strntoi(ep->start, ep->toksz, 10);
807		if (-1 == size) {
808			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
809			    ep->node->pos, "%s", eqn_toks[tok]);
810			break;
811		}
812		if (EQN_TOK_GSIZE == tok) {
813			ep->gsize = size;
814			break;
815		}
816		while (parent->args == parent->expectargs)
817			parent = parent->parent;
818		parent = eqn_box_alloc(ep, parent);
819		parent->type = EQN_LIST;
820		parent->expectargs = 1;
821		parent->size = size;
822		break;
823	case EQN_TOK_FROM:
824	case EQN_TOK_TO:
825	case EQN_TOK_SUB:
826	case EQN_TOK_SUP:
827		/*
828		 * We have a left-right-associative expression.
829		 * Repivot under a positional node, open a child scope
830		 * and keep on reading.
831		 */
832		if (parent->last == NULL) {
833			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
834			    ep->node->pos, "%s", eqn_toks[tok]);
835			cur = eqn_box_alloc(ep, parent);
836			cur->type = EQN_TEXT;
837			cur->text = mandoc_strdup("");
838		}
839		while (parent->expectargs == 1 && parent->args == 1)
840			parent = parent->parent;
841		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
842			for (cur = parent; cur != NULL; cur = cur->parent)
843				if (cur->pos == EQNPOS_SUB ||
844				    cur->pos == EQNPOS_SUP ||
845				    cur->pos == EQNPOS_SUBSUP ||
846				    cur->pos == EQNPOS_SQRT ||
847				    cur->pos == EQNPOS_OVER)
848					break;
849			if (cur != NULL)
850				parent = cur->parent;
851		}
852		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
853			parent->expectargs = 3;
854			parent->pos = EQNPOS_SUBSUP;
855			break;
856		}
857		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
858			parent->expectargs = 3;
859			parent->pos = EQNPOS_FROMTO;
860			break;
861		}
862		parent = eqn_box_makebinary(ep, parent);
863		switch (tok) {
864		case EQN_TOK_FROM:
865			parent->pos = EQNPOS_FROM;
866			break;
867		case EQN_TOK_TO:
868			parent->pos = EQNPOS_TO;
869			break;
870		case EQN_TOK_SUP:
871			parent->pos = EQNPOS_SUP;
872			break;
873		case EQN_TOK_SUB:
874			parent->pos = EQNPOS_SUB;
875			break;
876		default:
877			abort();
878		}
879		break;
880	case EQN_TOK_SQRT:
881		while (parent->args == parent->expectargs)
882			parent = parent->parent;
883		/*
884		 * Accept a left-right-associative set of arguments just
885		 * like sub and sup and friends but without rebalancing
886		 * under a pivot.
887		 */
888		parent = eqn_box_alloc(ep, parent);
889		parent->type = EQN_SUBEXPR;
890		parent->pos = EQNPOS_SQRT;
891		parent->expectargs = 1;
892		break;
893	case EQN_TOK_OVER:
894		/*
895		 * We have a right-left-associative fraction.
896		 * Close out anything that's currently open, then
897		 * rebalance and continue reading.
898		 */
899		if (parent->last == NULL) {
900			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
901			    ep->node->pos, "%s", eqn_toks[tok]);
902			cur = eqn_box_alloc(ep, parent);
903			cur->type = EQN_TEXT;
904			cur->text = mandoc_strdup("");
905		}
906		while (parent->args == parent->expectargs)
907			parent = parent->parent;
908		while (EQN_SUBEXPR == parent->type)
909			parent = parent->parent;
910		parent = eqn_box_makebinary(ep, parent);
911		parent->pos = EQNPOS_OVER;
912		break;
913	case EQN_TOK_RIGHT:
914	case EQN_TOK_BRACE_CLOSE:
915		/*
916		 * Close out the existing brace.
917		 * FIXME: this is a shitty sentinel: we should really
918		 * have a native EQN_BRACE type or whatnot.
919		 */
920		for (cur = parent; cur != NULL; cur = cur->parent)
921			if (cur->type == EQN_LIST &&
922			    cur->expectargs > 1 &&
923			    (tok == EQN_TOK_BRACE_CLOSE ||
924			     cur->left != NULL))
925				break;
926		if (cur == NULL) {
927			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
928			    ep->node->pos, "%s", eqn_toks[tok]);
929			break;
930		}
931		parent = cur;
932		if (EQN_TOK_RIGHT == tok) {
933			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
934				mandoc_msg(MANDOCERR_REQ_EMPTY,
935				    ep->node->line, ep->node->pos,
936				    "%s", eqn_toks[tok]);
937				break;
938			}
939			/* Handling depends on right/left. */
940			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
941				parent->right = mandoc_strdup("\\[rc]");
942			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
943				parent->right = mandoc_strdup("\\[rf]");
944			else
945				parent->right =
946				    mandoc_strndup(ep->start, ep->toksz);
947		}
948		parent = parent->parent;
949		if (tok == EQN_TOK_BRACE_CLOSE &&
950		    (parent->type == EQN_PILE ||
951		     parent->type == EQN_MATRIX))
952			parent = parent->parent;
953		/* Close out any "singleton" lists. */
954		while (parent->type == EQN_LIST &&
955		    parent->expectargs == 1 &&
956		    parent->args == 1)
957			parent = parent->parent;
958		break;
959	case EQN_TOK_BRACE_OPEN:
960	case EQN_TOK_LEFT:
961		/*
962		 * If we already have something in the stack and we're
963		 * in an expression, then rewind til we're not any more
964		 * (just like with the text node).
965		 */
966		while (parent->args == parent->expectargs)
967			parent = parent->parent;
968		if (EQN_TOK_LEFT == tok &&
969		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
970			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
971			    ep->node->pos, "%s", eqn_toks[tok]);
972			break;
973		}
974		parent = eqn_box_alloc(ep, parent);
975		parent->type = EQN_LIST;
976		if (EQN_TOK_LEFT == tok) {
977			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
978				parent->left = mandoc_strdup("\\[lc]");
979			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
980				parent->left = mandoc_strdup("\\[lf]");
981			else
982				parent->left =
983				    mandoc_strndup(ep->start, ep->toksz);
984		}
985		break;
986	case EQN_TOK_PILE:
987	case EQN_TOK_LPILE:
988	case EQN_TOK_RPILE:
989	case EQN_TOK_CPILE:
990	case EQN_TOK_CCOL:
991	case EQN_TOK_LCOL:
992	case EQN_TOK_RCOL:
993		while (parent->args == parent->expectargs)
994			parent = parent->parent;
995		parent = eqn_box_alloc(ep, parent);
996		parent->type = EQN_PILE;
997		parent->expectargs = 1;
998		break;
999	case EQN_TOK_ABOVE:
1000		for (cur = parent; cur != NULL; cur = cur->parent)
1001			if (cur->type == EQN_PILE)
1002				break;
1003		if (cur == NULL) {
1004			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1005			    ep->node->pos, "%s", eqn_toks[tok]);
1006			break;
1007		}
1008		parent = eqn_box_alloc(ep, cur);
1009		parent->type = EQN_LIST;
1010		break;
1011	case EQN_TOK_MATRIX:
1012		while (parent->args == parent->expectargs)
1013			parent = parent->parent;
1014		parent = eqn_box_alloc(ep, parent);
1015		parent->type = EQN_MATRIX;
1016		parent->expectargs = 1;
1017		break;
1018	case EQN_TOK_EOF:
1019		return;
1020	case EQN_TOK__MAX:
1021	case EQN_TOK_FUNC:
1022	case EQN_TOK_QUOTED:
1023	case EQN_TOK_SYM:
1024		p = ep->start;
1025		assert(p != NULL);
1026		/*
1027		 * If we already have something in the stack and we're
1028		 * in an expression, then rewind til we're not any more.
1029		 */
1030		while (parent->args == parent->expectargs)
1031			parent = parent->parent;
1032		cur = eqn_box_alloc(ep, parent);
1033		cur->type = EQN_TEXT;
1034		cur->text = p;
1035		switch (tok) {
1036		case EQN_TOK_FUNC:
1037			cur->font = EQNFONT_ROMAN;
1038			break;
1039		case EQN_TOK_QUOTED:
1040			if (cur->font == EQNFONT_NONE)
1041				cur->font = EQNFONT_ITALIC;
1042			break;
1043		case EQN_TOK_SYM:
1044			break;
1045		default:
1046			if (cur->font != EQNFONT_NONE || *p == '\0')
1047				break;
1048			cpn = p - 1;
1049			ccln = CCL_LET;
1050			split = NULL;
1051			for (;;) {
1052				/* Advance to next character. */
1053				cp = cpn++;
1054				ccl = ccln;
1055				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1056				    isdigit((unsigned char)*cpn) ||
1057				    (*cpn == '.' && (ccl == CCL_DIG ||
1058				     isdigit((unsigned char)cpn[1]))) ?
1059				    CCL_DIG : CCL_PUN;
1060				/* No boundary before first character. */
1061				if (cp < p)
1062					continue;
1063				cur->font = ccl == CCL_LET ?
1064				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1065				if (*cp == '\\')
1066					mandoc_escape(&cpn, NULL, NULL);
1067				/* No boundary after last character. */
1068				if (*cpn == '\0')
1069					break;
1070				if (ccln == ccl && *cp != ',' && *cpn != ',')
1071					continue;
1072				/* Boundary found, split the text. */
1073				if (parent->args == parent->expectargs) {
1074					/* Remove the text from the tree. */
1075					if (cur->prev == NULL)
1076						parent->first = cur->next;
1077					else
1078						cur->prev->next = NULL;
1079					parent->last = cur->prev;
1080					parent->args--;
1081					/* Set up a list instead. */
1082					split = eqn_box_alloc(ep, parent);
1083					split->type = EQN_LIST;
1084					/* Insert the word into the list. */
1085					split->first = split->last = cur;
1086					cur->parent = split;
1087					cur->prev = NULL;
1088					parent = split;
1089				}
1090				/* Append a new text box. */
1091				nbox = eqn_box_alloc(ep, parent);
1092				nbox->type = EQN_TEXT;
1093				nbox->text = mandoc_strdup(cpn);
1094				/* Truncate the old box. */
1095				p = mandoc_strndup(cur->text,
1096				    cpn - cur->text);
1097				free(cur->text);
1098				cur->text = p;
1099				/* Setup to process the new box. */
1100				cur = nbox;
1101				p = nbox->text;
1102				cpn = p - 1;
1103				ccln = CCL_LET;
1104			}
1105			if (split != NULL)
1106				parent = split->parent;
1107			break;
1108		}
1109		break;
1110	default:
1111		abort();
1112	}
1113	goto next_tok;
1114}
1115
1116void
1117eqn_free(struct eqn_node *p)
1118{
1119	int		 i;
1120
1121	if (p == NULL)
1122		return;
1123
1124	for (i = 0; i < (int)p->defsz; i++) {
1125		free(p->defs[i].key);
1126		free(p->defs[i].val);
1127	}
1128
1129	free(p->data);
1130	free(p->defs);
1131	free(p);
1132}
1133