1/*	$Id: eqn.c,v 1.78 2017/07/15 16:26:17 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29
30#include "mandoc_aux.h"
31#include "mandoc.h"
32#include "roff.h"
33#include "libmandoc.h"
34#include "libroff.h"
35
36#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
37#define	STRNEQ(p1, sz1, p2, sz2) \
38	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
39
40enum	eqn_tok {
41	EQN_TOK_DYAD = 0,
42	EQN_TOK_VEC,
43	EQN_TOK_UNDER,
44	EQN_TOK_BAR,
45	EQN_TOK_TILDE,
46	EQN_TOK_HAT,
47	EQN_TOK_DOT,
48	EQN_TOK_DOTDOT,
49	EQN_TOK_FWD,
50	EQN_TOK_BACK,
51	EQN_TOK_DOWN,
52	EQN_TOK_UP,
53	EQN_TOK_FAT,
54	EQN_TOK_ROMAN,
55	EQN_TOK_ITALIC,
56	EQN_TOK_BOLD,
57	EQN_TOK_SIZE,
58	EQN_TOK_SUB,
59	EQN_TOK_SUP,
60	EQN_TOK_SQRT,
61	EQN_TOK_OVER,
62	EQN_TOK_FROM,
63	EQN_TOK_TO,
64	EQN_TOK_BRACE_OPEN,
65	EQN_TOK_BRACE_CLOSE,
66	EQN_TOK_GSIZE,
67	EQN_TOK_GFONT,
68	EQN_TOK_MARK,
69	EQN_TOK_LINEUP,
70	EQN_TOK_LEFT,
71	EQN_TOK_RIGHT,
72	EQN_TOK_PILE,
73	EQN_TOK_LPILE,
74	EQN_TOK_RPILE,
75	EQN_TOK_CPILE,
76	EQN_TOK_MATRIX,
77	EQN_TOK_CCOL,
78	EQN_TOK_LCOL,
79	EQN_TOK_RCOL,
80	EQN_TOK_DELIM,
81	EQN_TOK_DEFINE,
82	EQN_TOK_TDEFINE,
83	EQN_TOK_NDEFINE,
84	EQN_TOK_UNDEF,
85	EQN_TOK_ABOVE,
86	EQN_TOK__MAX,
87	EQN_TOK_FUNC,
88	EQN_TOK_QUOTED,
89	EQN_TOK_SYM,
90	EQN_TOK_EOF
91};
92
93static	const char *eqn_toks[EQN_TOK__MAX] = {
94	"dyad", /* EQN_TOK_DYAD */
95	"vec", /* EQN_TOK_VEC */
96	"under", /* EQN_TOK_UNDER */
97	"bar", /* EQN_TOK_BAR */
98	"tilde", /* EQN_TOK_TILDE */
99	"hat", /* EQN_TOK_HAT */
100	"dot", /* EQN_TOK_DOT */
101	"dotdot", /* EQN_TOK_DOTDOT */
102	"fwd", /* EQN_TOK_FWD * */
103	"back", /* EQN_TOK_BACK */
104	"down", /* EQN_TOK_DOWN */
105	"up", /* EQN_TOK_UP */
106	"fat", /* EQN_TOK_FAT */
107	"roman", /* EQN_TOK_ROMAN */
108	"italic", /* EQN_TOK_ITALIC */
109	"bold", /* EQN_TOK_BOLD */
110	"size", /* EQN_TOK_SIZE */
111	"sub", /* EQN_TOK_SUB */
112	"sup", /* EQN_TOK_SUP */
113	"sqrt", /* EQN_TOK_SQRT */
114	"over", /* EQN_TOK_OVER */
115	"from", /* EQN_TOK_FROM */
116	"to", /* EQN_TOK_TO */
117	"{", /* EQN_TOK_BRACE_OPEN */
118	"}", /* EQN_TOK_BRACE_CLOSE */
119	"gsize", /* EQN_TOK_GSIZE */
120	"gfont", /* EQN_TOK_GFONT */
121	"mark", /* EQN_TOK_MARK */
122	"lineup", /* EQN_TOK_LINEUP */
123	"left", /* EQN_TOK_LEFT */
124	"right", /* EQN_TOK_RIGHT */
125	"pile", /* EQN_TOK_PILE */
126	"lpile", /* EQN_TOK_LPILE */
127	"rpile", /* EQN_TOK_RPILE */
128	"cpile", /* EQN_TOK_CPILE */
129	"matrix", /* EQN_TOK_MATRIX */
130	"ccol", /* EQN_TOK_CCOL */
131	"lcol", /* EQN_TOK_LCOL */
132	"rcol", /* EQN_TOK_RCOL */
133	"delim", /* EQN_TOK_DELIM */
134	"define", /* EQN_TOK_DEFINE */
135	"tdefine", /* EQN_TOK_TDEFINE */
136	"ndefine", /* EQN_TOK_NDEFINE */
137	"undef", /* EQN_TOK_UNDEF */
138	"above", /* EQN_TOK_ABOVE */
139};
140
141static	const char *const eqn_func[] = {
142	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
143	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
144	"if",	"lim",	"ln",	"log",	"max",	"min",
145	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
146};
147
148enum	eqn_symt {
149	EQNSYM_alpha = 0,
150	EQNSYM_beta,
151	EQNSYM_chi,
152	EQNSYM_delta,
153	EQNSYM_epsilon,
154	EQNSYM_eta,
155	EQNSYM_gamma,
156	EQNSYM_iota,
157	EQNSYM_kappa,
158	EQNSYM_lambda,
159	EQNSYM_mu,
160	EQNSYM_nu,
161	EQNSYM_omega,
162	EQNSYM_omicron,
163	EQNSYM_phi,
164	EQNSYM_pi,
165	EQNSYM_ps,
166	EQNSYM_rho,
167	EQNSYM_sigma,
168	EQNSYM_tau,
169	EQNSYM_theta,
170	EQNSYM_upsilon,
171	EQNSYM_xi,
172	EQNSYM_zeta,
173	EQNSYM_DELTA,
174	EQNSYM_GAMMA,
175	EQNSYM_LAMBDA,
176	EQNSYM_OMEGA,
177	EQNSYM_PHI,
178	EQNSYM_PI,
179	EQNSYM_PSI,
180	EQNSYM_SIGMA,
181	EQNSYM_THETA,
182	EQNSYM_UPSILON,
183	EQNSYM_XI,
184	EQNSYM_inter,
185	EQNSYM_union,
186	EQNSYM_prod,
187	EQNSYM_int,
188	EQNSYM_sum,
189	EQNSYM_grad,
190	EQNSYM_del,
191	EQNSYM_times,
192	EQNSYM_cdot,
193	EQNSYM_nothing,
194	EQNSYM_approx,
195	EQNSYM_prime,
196	EQNSYM_half,
197	EQNSYM_partial,
198	EQNSYM_inf,
199	EQNSYM_muchgreat,
200	EQNSYM_muchless,
201	EQNSYM_larrow,
202	EQNSYM_rarrow,
203	EQNSYM_pm,
204	EQNSYM_nequal,
205	EQNSYM_equiv,
206	EQNSYM_lessequal,
207	EQNSYM_moreequal,
208	EQNSYM_minus,
209	EQNSYM__MAX
210};
211
212struct	eqnsym {
213	const char	*str;
214	const char	*sym;
215};
216
217static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
218	{ "alpha", "*a" }, /* EQNSYM_alpha */
219	{ "beta", "*b" }, /* EQNSYM_beta */
220	{ "chi", "*x" }, /* EQNSYM_chi */
221	{ "delta", "*d" }, /* EQNSYM_delta */
222	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
223	{ "eta", "*y" }, /* EQNSYM_eta */
224	{ "gamma", "*g" }, /* EQNSYM_gamma */
225	{ "iota", "*i" }, /* EQNSYM_iota */
226	{ "kappa", "*k" }, /* EQNSYM_kappa */
227	{ "lambda", "*l" }, /* EQNSYM_lambda */
228	{ "mu", "*m" }, /* EQNSYM_mu */
229	{ "nu", "*n" }, /* EQNSYM_nu */
230	{ "omega", "*w" }, /* EQNSYM_omega */
231	{ "omicron", "*o" }, /* EQNSYM_omicron */
232	{ "phi", "*f" }, /* EQNSYM_phi */
233	{ "pi", "*p" }, /* EQNSYM_pi */
234	{ "psi", "*q" }, /* EQNSYM_psi */
235	{ "rho", "*r" }, /* EQNSYM_rho */
236	{ "sigma", "*s" }, /* EQNSYM_sigma */
237	{ "tau", "*t" }, /* EQNSYM_tau */
238	{ "theta", "*h" }, /* EQNSYM_theta */
239	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
240	{ "xi", "*c" }, /* EQNSYM_xi */
241	{ "zeta", "*z" }, /* EQNSYM_zeta */
242	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
243	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
244	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
245	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
246	{ "PHI", "*F" }, /* EQNSYM_PHI */
247	{ "PI", "*P" }, /* EQNSYM_PI */
248	{ "PSI", "*Q" }, /* EQNSYM_PSI */
249	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
250	{ "THETA", "*H" }, /* EQNSYM_THETA */
251	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
252	{ "XI", "*C" }, /* EQNSYM_XI */
253	{ "inter", "ca" }, /* EQNSYM_inter */
254	{ "union", "cu" }, /* EQNSYM_union */
255	{ "prod", "product" }, /* EQNSYM_prod */
256	{ "int", "integral" }, /* EQNSYM_int */
257	{ "sum", "sum" }, /* EQNSYM_sum */
258	{ "grad", "gr" }, /* EQNSYM_grad */
259	{ "del", "gr" }, /* EQNSYM_del */
260	{ "times", "mu" }, /* EQNSYM_times */
261	{ "cdot", "pc" }, /* EQNSYM_cdot */
262	{ "nothing", "&" }, /* EQNSYM_nothing */
263	{ "approx", "~~" }, /* EQNSYM_approx */
264	{ "prime", "fm" }, /* EQNSYM_prime */
265	{ "half", "12" }, /* EQNSYM_half */
266	{ "partial", "pd" }, /* EQNSYM_partial */
267	{ "inf", "if" }, /* EQNSYM_inf */
268	{ ">>", ">>" }, /* EQNSYM_muchgreat */
269	{ "<<", "<<" }, /* EQNSYM_muchless */
270	{ "<-", "<-" }, /* EQNSYM_larrow */
271	{ "->", "->" }, /* EQNSYM_rarrow */
272	{ "+-", "+-" }, /* EQNSYM_pm */
273	{ "!=", "!=" }, /* EQNSYM_nequal */
274	{ "==", "==" }, /* EQNSYM_equiv */
275	{ "<=", "<=" }, /* EQNSYM_lessequal */
276	{ ">=", ">=" }, /* EQNSYM_moreequal */
277	{ "-", "mi" }, /* EQNSYM_minus */
278};
279
280enum	parse_mode {
281	MODE_QUOTED,
282	MODE_NOSUB,
283	MODE_SUB,
284	MODE_TOK
285};
286
287static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
288static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
289				struct eqn_box *);
290static	void		 eqn_def(struct eqn_node *);
291static	struct eqn_def	*eqn_def_find(struct eqn_node *);
292static	void		 eqn_delim(struct eqn_node *);
293static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
294static	void		 eqn_undef(struct eqn_node *);
295
296
297struct eqn_node *
298eqn_alloc(struct mparse *parse)
299{
300	struct eqn_node *ep;
301
302	ep = mandoc_calloc(1, sizeof(*ep));
303	ep->parse = parse;
304	ep->gsize = EQN_DEFSIZE;
305	return ep;
306}
307
308void
309eqn_reset(struct eqn_node *ep)
310{
311	free(ep->data);
312	ep->data = ep->start = ep->end = NULL;
313	ep->sz = ep->toksz = 0;
314}
315
316void
317eqn_read(struct eqn_node *ep, const char *p)
318{
319	char		*cp;
320
321	if (ep->data == NULL) {
322		ep->sz = strlen(p);
323		ep->data = mandoc_strdup(p);
324	} else {
325		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
326		free(ep->data);
327		ep->data = cp;
328	}
329	ep->sz += 1;
330}
331
332/*
333 * Find the key "key" of the give size within our eqn-defined values.
334 */
335static struct eqn_def *
336eqn_def_find(struct eqn_node *ep)
337{
338	int		 i;
339
340	for (i = 0; i < (int)ep->defsz; i++)
341		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
342		    ep->defs[i].keysz, ep->start, ep->toksz))
343			return &ep->defs[i];
344
345	return NULL;
346}
347
348/*
349 * Parse a token from the input text.  The modes are:
350 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
351 *   before its next occurence.  Do not interpret the token in any
352 *   way and return EQN_TOK_QUOTED.  All other modes behave like
353 *   MODE_QUOTED when *ep->start is '"'.
354 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
355 *   otherwise, it ends before the next whitespace or brace.
356 *   Do not interpret the token and return EQN_TOK__MAX.
357 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
358 *   alias created with define.  If it is an alias, replace it with
359 *   its string value and reparse.
360 * MODE_TOK: Like MODE_SUB, but also check the token against the list
361 *   of tokens, and if there is a match, return that token.  Otherwise,
362 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
363 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
364 *   a token match, *ep->start is set to an allocated string that the
365 *   caller is expected to free.
366 * All modes skip whitespace following the end of the token.
367 */
368static enum eqn_tok
369eqn_next(struct eqn_node *ep, enum parse_mode mode)
370{
371	static int	 last_len, lim;
372
373	struct eqn_def	*def;
374	size_t		 start;
375	int		 diff, i, quoted;
376	enum eqn_tok	 tok;
377
378	/*
379	 * Reset the recursion counter after advancing
380	 * beyond the end of the previous substitution.
381	 */
382	if (ep->end - ep->data >= last_len)
383		lim = 0;
384
385	ep->start = ep->end;
386	quoted = mode == MODE_QUOTED;
387	for (;;) {
388		switch (*ep->start) {
389		case '\0':
390			ep->toksz = 0;
391			return EQN_TOK_EOF;
392		case '"':
393			quoted = 1;
394			break;
395		default:
396			break;
397		}
398		if (quoted) {
399			ep->end = strchr(ep->start + 1, *ep->start);
400			ep->start++;  /* Skip opening quote. */
401			if (ep->end == NULL) {
402				mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
403				    ep->node->line, ep->node->pos, NULL);
404				ep->end = strchr(ep->start, '\0');
405			}
406		} else {
407			ep->end = ep->start + 1;
408			if (*ep->start != '{' && *ep->start != '}')
409				ep->end += strcspn(ep->end, " ^~\"{}\t");
410		}
411		ep->toksz = ep->end - ep->start;
412		if (quoted && *ep->end != '\0')
413			ep->end++;  /* Skip closing quote. */
414		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
415			ep->end++;
416		if (quoted)  /* Cannot return, may have to strndup. */
417			break;
418		if (mode == MODE_NOSUB)
419			return EQN_TOK__MAX;
420		if ((def = eqn_def_find(ep)) == NULL)
421			break;
422		if (++lim > EQN_NEST_MAX) {
423			mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
424			    ep->node->line, ep->node->pos, NULL);
425			return EQN_TOK_EOF;
426		}
427
428		/* Replace a defined name with its string value. */
429		if ((diff = def->valsz - ep->toksz) > 0) {
430			start = ep->start - ep->data;
431			ep->sz += diff;
432			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
433			ep->start = ep->data + start;
434		}
435		if (diff)
436			memmove(ep->start + def->valsz, ep->start + ep->toksz,
437			    strlen(ep->start + ep->toksz) + 1);
438		memcpy(ep->start, def->val, def->valsz);
439		last_len = ep->start - ep->data + def->valsz;
440	}
441	if (mode != MODE_TOK)
442		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
443	if (quoted) {
444		ep->start = mandoc_strndup(ep->start, ep->toksz);
445		return EQN_TOK_QUOTED;
446	}
447	for (tok = 0; tok < EQN_TOK__MAX; tok++)
448		if (STRNEQ(ep->start, ep->toksz,
449		    eqn_toks[tok], strlen(eqn_toks[tok])))
450			return tok;
451
452	for (i = 0; i < EQNSYM__MAX; i++) {
453		if (STRNEQ(ep->start, ep->toksz,
454		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
455			mandoc_asprintf(&ep->start,
456			    "\\[%s]", eqnsyms[i].sym);
457			return EQN_TOK_SYM;
458		}
459	}
460	ep->start = mandoc_strndup(ep->start, ep->toksz);
461	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
462		if (STRNEQ(ep->start, ep->toksz,
463		    eqn_func[i], strlen(eqn_func[i])))
464			return EQN_TOK_FUNC;
465	return EQN_TOK__MAX;
466}
467
468void
469eqn_box_free(struct eqn_box *bp)
470{
471
472	if (bp->first)
473		eqn_box_free(bp->first);
474	if (bp->next)
475		eqn_box_free(bp->next);
476
477	free(bp->text);
478	free(bp->left);
479	free(bp->right);
480	free(bp->top);
481	free(bp->bottom);
482	free(bp);
483}
484
485/*
486 * Allocate a box as the last child of the parent node.
487 */
488static struct eqn_box *
489eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
490{
491	struct eqn_box	*bp;
492
493	bp = mandoc_calloc(1, sizeof(struct eqn_box));
494	bp->parent = parent;
495	bp->parent->args++;
496	bp->expectargs = UINT_MAX;
497	bp->font = bp->parent->font;
498	bp->size = ep->gsize;
499
500	if (NULL != parent->first) {
501		parent->last->next = bp;
502		bp->prev = parent->last;
503	} else
504		parent->first = bp;
505
506	parent->last = bp;
507	return bp;
508}
509
510/*
511 * Reparent the current last node (of the current parent) under a new
512 * EQN_SUBEXPR as the first element.
513 * Then return the new parent.
514 * The new EQN_SUBEXPR will have a two-child limit.
515 */
516static struct eqn_box *
517eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
518{
519	struct eqn_box	*b, *newb;
520
521	assert(NULL != parent->last);
522	b = parent->last;
523	if (parent->last == parent->first)
524		parent->first = NULL;
525	parent->args--;
526	parent->last = b->prev;
527	b->prev = NULL;
528	newb = eqn_box_alloc(ep, parent);
529	newb->type = EQN_SUBEXPR;
530	newb->expectargs = 2;
531	newb->args = 1;
532	newb->first = newb->last = b;
533	newb->first->next = NULL;
534	b->parent = newb;
535	return newb;
536}
537
538/*
539 * Parse the "delim" control statement.
540 */
541static void
542eqn_delim(struct eqn_node *ep)
543{
544	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
545		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
546		    ep->node->line, ep->node->pos, "delim");
547		if (ep->end[0] != '\0')
548			ep->end++;
549	} else if (strncmp(ep->end, "off", 3) == 0) {
550		ep->delim = 0;
551		ep->end += 3;
552	} else if (strncmp(ep->end, "on", 2) == 0) {
553		if (ep->odelim && ep->cdelim)
554			ep->delim = 1;
555		ep->end += 2;
556	} else {
557		ep->odelim = *ep->end++;
558		ep->cdelim = *ep->end++;
559		ep->delim = 1;
560	}
561}
562
563/*
564 * Undefine a previously-defined string.
565 */
566static void
567eqn_undef(struct eqn_node *ep)
568{
569	struct eqn_def	*def;
570
571	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
572		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
573		    ep->node->line, ep->node->pos, "undef");
574		return;
575	}
576	if ((def = eqn_def_find(ep)) == NULL)
577		return;
578	free(def->key);
579	free(def->val);
580	def->key = def->val = NULL;
581	def->keysz = def->valsz = 0;
582}
583
584static void
585eqn_def(struct eqn_node *ep)
586{
587	struct eqn_def	*def;
588	int		 i;
589
590	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
591		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
592		    ep->node->line, ep->node->pos, "define");
593		return;
594	}
595
596	/*
597	 * Search for a key that already exists.
598	 * Create a new key if none is found.
599	 */
600	if ((def = eqn_def_find(ep)) == NULL) {
601		/* Find holes in string array. */
602		for (i = 0; i < (int)ep->defsz; i++)
603			if (0 == ep->defs[i].keysz)
604				break;
605
606		if (i == (int)ep->defsz) {
607			ep->defsz++;
608			ep->defs = mandoc_reallocarray(ep->defs,
609			    ep->defsz, sizeof(struct eqn_def));
610			ep->defs[i].key = ep->defs[i].val = NULL;
611		}
612
613		def = ep->defs + i;
614		free(def->key);
615		def->key = mandoc_strndup(ep->start, ep->toksz);
616		def->keysz = ep->toksz;
617	}
618
619	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
620		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
621		    ep->node->line, ep->node->pos, "define %s", def->key);
622		free(def->key);
623		free(def->val);
624		def->key = def->val = NULL;
625		def->keysz = def->valsz = 0;
626		return;
627	}
628	free(def->val);
629	def->val = mandoc_strndup(ep->start, ep->toksz);
630	def->valsz = ep->toksz;
631}
632
633void
634eqn_parse(struct eqn_node *ep)
635{
636	struct eqn_box	*cur, *nbox, *parent, *split;
637	const char	*cp, *cpn;
638	char		*p;
639	enum eqn_tok	 tok;
640	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
641	int		 size;
642
643	parent = ep->node->eqn;
644	assert(parent != NULL);
645
646	/*
647	 * Empty equation.
648	 * Do not add it to the high-level syntax tree.
649	 */
650
651	if (ep->data == NULL)
652		return;
653
654	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
655
656next_tok:
657	tok = eqn_next(ep, MODE_TOK);
658	switch (tok) {
659	case EQN_TOK_UNDEF:
660		eqn_undef(ep);
661		break;
662	case EQN_TOK_NDEFINE:
663	case EQN_TOK_DEFINE:
664		eqn_def(ep);
665		break;
666	case EQN_TOK_TDEFINE:
667		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
668		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
669			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
670			    ep->node->line, ep->node->pos, "tdefine");
671		break;
672	case EQN_TOK_DELIM:
673		eqn_delim(ep);
674		break;
675	case EQN_TOK_GFONT:
676		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
677			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
678			    ep->node->line, ep->node->pos, eqn_toks[tok]);
679		break;
680	case EQN_TOK_MARK:
681	case EQN_TOK_LINEUP:
682		/* Ignore these. */
683		break;
684	case EQN_TOK_DYAD:
685	case EQN_TOK_VEC:
686	case EQN_TOK_UNDER:
687	case EQN_TOK_BAR:
688	case EQN_TOK_TILDE:
689	case EQN_TOK_HAT:
690	case EQN_TOK_DOT:
691	case EQN_TOK_DOTDOT:
692		if (parent->last == NULL) {
693			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
694			    ep->node->line, ep->node->pos, eqn_toks[tok]);
695			cur = eqn_box_alloc(ep, parent);
696			cur->type = EQN_TEXT;
697			cur->text = mandoc_strdup("");
698		}
699		parent = eqn_box_makebinary(ep, parent);
700		parent->type = EQN_LIST;
701		parent->expectargs = 1;
702		parent->font = EQNFONT_ROMAN;
703		switch (tok) {
704		case EQN_TOK_DOTDOT:
705			parent->top = mandoc_strdup("\\[ad]");
706			break;
707		case EQN_TOK_VEC:
708			parent->top = mandoc_strdup("\\[->]");
709			break;
710		case EQN_TOK_DYAD:
711			parent->top = mandoc_strdup("\\[<>]");
712			break;
713		case EQN_TOK_TILDE:
714			parent->top = mandoc_strdup("\\[a~]");
715			break;
716		case EQN_TOK_UNDER:
717			parent->bottom = mandoc_strdup("\\[ul]");
718			break;
719		case EQN_TOK_BAR:
720			parent->top = mandoc_strdup("\\[rn]");
721			break;
722		case EQN_TOK_DOT:
723			parent->top = mandoc_strdup("\\[a.]");
724			break;
725		case EQN_TOK_HAT:
726			parent->top = mandoc_strdup("\\[ha]");
727			break;
728		default:
729			abort();
730		}
731		parent = parent->parent;
732		break;
733	case EQN_TOK_FWD:
734	case EQN_TOK_BACK:
735	case EQN_TOK_DOWN:
736	case EQN_TOK_UP:
737		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
738			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
739			    ep->node->line, ep->node->pos, eqn_toks[tok]);
740		break;
741	case EQN_TOK_FAT:
742	case EQN_TOK_ROMAN:
743	case EQN_TOK_ITALIC:
744	case EQN_TOK_BOLD:
745		while (parent->args == parent->expectargs)
746			parent = parent->parent;
747		/*
748		 * These values apply to the next word or sequence of
749		 * words; thus, we mark that we'll have a child with
750		 * exactly one of those.
751		 */
752		parent = eqn_box_alloc(ep, parent);
753		parent->type = EQN_LIST;
754		parent->expectargs = 1;
755		switch (tok) {
756		case EQN_TOK_FAT:
757			parent->font = EQNFONT_FAT;
758			break;
759		case EQN_TOK_ROMAN:
760			parent->font = EQNFONT_ROMAN;
761			break;
762		case EQN_TOK_ITALIC:
763			parent->font = EQNFONT_ITALIC;
764			break;
765		case EQN_TOK_BOLD:
766			parent->font = EQNFONT_BOLD;
767			break;
768		default:
769			abort();
770		}
771		break;
772	case EQN_TOK_SIZE:
773	case EQN_TOK_GSIZE:
774		/* Accept two values: integral size and a single. */
775		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
776			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
777			    ep->node->line, ep->node->pos, eqn_toks[tok]);
778			break;
779		}
780		size = mandoc_strntoi(ep->start, ep->toksz, 10);
781		if (-1 == size) {
782			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
783			    ep->node->line, ep->node->pos, eqn_toks[tok]);
784			break;
785		}
786		if (EQN_TOK_GSIZE == tok) {
787			ep->gsize = size;
788			break;
789		}
790		while (parent->args == parent->expectargs)
791			parent = parent->parent;
792		parent = eqn_box_alloc(ep, parent);
793		parent->type = EQN_LIST;
794		parent->expectargs = 1;
795		parent->size = size;
796		break;
797	case EQN_TOK_FROM:
798	case EQN_TOK_TO:
799	case EQN_TOK_SUB:
800	case EQN_TOK_SUP:
801		/*
802		 * We have a left-right-associative expression.
803		 * Repivot under a positional node, open a child scope
804		 * and keep on reading.
805		 */
806		if (parent->last == NULL) {
807			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
808			    ep->node->line, ep->node->pos, eqn_toks[tok]);
809			cur = eqn_box_alloc(ep, parent);
810			cur->type = EQN_TEXT;
811			cur->text = mandoc_strdup("");
812		}
813		while (parent->expectargs == 1 && parent->args == 1)
814			parent = parent->parent;
815		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
816			for (cur = parent; cur != NULL; cur = cur->parent)
817				if (cur->pos == EQNPOS_SUB ||
818				    cur->pos == EQNPOS_SUP ||
819				    cur->pos == EQNPOS_SUBSUP ||
820				    cur->pos == EQNPOS_SQRT ||
821				    cur->pos == EQNPOS_OVER)
822					break;
823			if (cur != NULL)
824				parent = cur->parent;
825		}
826		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
827			parent->expectargs = 3;
828			parent->pos = EQNPOS_SUBSUP;
829			break;
830		}
831		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
832			parent->expectargs = 3;
833			parent->pos = EQNPOS_FROMTO;
834			break;
835		}
836		parent = eqn_box_makebinary(ep, parent);
837		switch (tok) {
838		case EQN_TOK_FROM:
839			parent->pos = EQNPOS_FROM;
840			break;
841		case EQN_TOK_TO:
842			parent->pos = EQNPOS_TO;
843			break;
844		case EQN_TOK_SUP:
845			parent->pos = EQNPOS_SUP;
846			break;
847		case EQN_TOK_SUB:
848			parent->pos = EQNPOS_SUB;
849			break;
850		default:
851			abort();
852		}
853		break;
854	case EQN_TOK_SQRT:
855		while (parent->args == parent->expectargs)
856			parent = parent->parent;
857		/*
858		 * Accept a left-right-associative set of arguments just
859		 * like sub and sup and friends but without rebalancing
860		 * under a pivot.
861		 */
862		parent = eqn_box_alloc(ep, parent);
863		parent->type = EQN_SUBEXPR;
864		parent->pos = EQNPOS_SQRT;
865		parent->expectargs = 1;
866		break;
867	case EQN_TOK_OVER:
868		/*
869		 * We have a right-left-associative fraction.
870		 * Close out anything that's currently open, then
871		 * rebalance and continue reading.
872		 */
873		if (parent->last == NULL) {
874			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
875			    ep->node->line, ep->node->pos, eqn_toks[tok]);
876			cur = eqn_box_alloc(ep, parent);
877			cur->type = EQN_TEXT;
878			cur->text = mandoc_strdup("");
879		}
880		while (parent->args == parent->expectargs)
881			parent = parent->parent;
882		while (EQN_SUBEXPR == parent->type)
883			parent = parent->parent;
884		parent = eqn_box_makebinary(ep, parent);
885		parent->pos = EQNPOS_OVER;
886		break;
887	case EQN_TOK_RIGHT:
888	case EQN_TOK_BRACE_CLOSE:
889		/*
890		 * Close out the existing brace.
891		 * FIXME: this is a shitty sentinel: we should really
892		 * have a native EQN_BRACE type or whatnot.
893		 */
894		for (cur = parent; cur != NULL; cur = cur->parent)
895			if (cur->type == EQN_LIST &&
896			    cur->expectargs > 1 &&
897			    (tok == EQN_TOK_BRACE_CLOSE ||
898			     cur->left != NULL))
899				break;
900		if (cur == NULL) {
901			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
902			    ep->node->line, ep->node->pos, eqn_toks[tok]);
903			break;
904		}
905		parent = cur;
906		if (EQN_TOK_RIGHT == tok) {
907			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
908				mandoc_msg(MANDOCERR_REQ_EMPTY,
909				    ep->parse, ep->node->line,
910				    ep->node->pos, eqn_toks[tok]);
911				break;
912			}
913			/* Handling depends on right/left. */
914			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
915				parent->right = mandoc_strdup("\\[rc]");
916			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
917				parent->right = mandoc_strdup("\\[rf]");
918			else
919				parent->right =
920				    mandoc_strndup(ep->start, ep->toksz);
921		}
922		parent = parent->parent;
923		if (tok == EQN_TOK_BRACE_CLOSE &&
924		    (parent->type == EQN_PILE ||
925		     parent->type == EQN_MATRIX))
926			parent = parent->parent;
927		/* Close out any "singleton" lists. */
928		while (parent->type == EQN_LIST &&
929		    parent->expectargs == 1 &&
930		    parent->args == 1)
931			parent = parent->parent;
932		break;
933	case EQN_TOK_BRACE_OPEN:
934	case EQN_TOK_LEFT:
935		/*
936		 * If we already have something in the stack and we're
937		 * in an expression, then rewind til we're not any more
938		 * (just like with the text node).
939		 */
940		while (parent->args == parent->expectargs)
941			parent = parent->parent;
942		if (EQN_TOK_LEFT == tok &&
943		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
944			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
945			    ep->node->line, ep->node->pos, eqn_toks[tok]);
946			break;
947		}
948		parent = eqn_box_alloc(ep, parent);
949		parent->type = EQN_LIST;
950		if (EQN_TOK_LEFT == tok) {
951			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
952				parent->left = mandoc_strdup("\\[lc]");
953			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
954				parent->left = mandoc_strdup("\\[lf]");
955			else
956				parent->left =
957				    mandoc_strndup(ep->start, ep->toksz);
958		}
959		break;
960	case EQN_TOK_PILE:
961	case EQN_TOK_LPILE:
962	case EQN_TOK_RPILE:
963	case EQN_TOK_CPILE:
964	case EQN_TOK_CCOL:
965	case EQN_TOK_LCOL:
966	case EQN_TOK_RCOL:
967		while (parent->args == parent->expectargs)
968			parent = parent->parent;
969		parent = eqn_box_alloc(ep, parent);
970		parent->type = EQN_PILE;
971		parent->expectargs = 1;
972		break;
973	case EQN_TOK_ABOVE:
974		for (cur = parent; cur != NULL; cur = cur->parent)
975			if (cur->type == EQN_PILE)
976				break;
977		if (cur == NULL) {
978			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
979			    ep->node->line, ep->node->pos, eqn_toks[tok]);
980			break;
981		}
982		parent = eqn_box_alloc(ep, cur);
983		parent->type = EQN_LIST;
984		break;
985	case EQN_TOK_MATRIX:
986		while (parent->args == parent->expectargs)
987			parent = parent->parent;
988		parent = eqn_box_alloc(ep, parent);
989		parent->type = EQN_MATRIX;
990		parent->expectargs = 1;
991		break;
992	case EQN_TOK_EOF:
993		return;
994	case EQN_TOK__MAX:
995	case EQN_TOK_FUNC:
996	case EQN_TOK_QUOTED:
997	case EQN_TOK_SYM:
998		p = ep->start;
999		assert(p != NULL);
1000		/*
1001		 * If we already have something in the stack and we're
1002		 * in an expression, then rewind til we're not any more.
1003		 */
1004		while (parent->args == parent->expectargs)
1005			parent = parent->parent;
1006		cur = eqn_box_alloc(ep, parent);
1007		cur->type = EQN_TEXT;
1008		cur->text = p;
1009		switch (tok) {
1010		case EQN_TOK_FUNC:
1011			cur->font = EQNFONT_ROMAN;
1012			break;
1013		case EQN_TOK_QUOTED:
1014			if (cur->font == EQNFONT_NONE)
1015				cur->font = EQNFONT_ITALIC;
1016			break;
1017		case EQN_TOK_SYM:
1018			break;
1019		default:
1020			if (cur->font != EQNFONT_NONE || *p == '\0')
1021				break;
1022			cpn = p - 1;
1023			ccln = CCL_LET;
1024			split = NULL;
1025			for (;;) {
1026				/* Advance to next character. */
1027				cp = cpn++;
1028				ccl = ccln;
1029				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1030				    isdigit((unsigned char)*cpn) ||
1031				    (*cpn == '.' && (ccl == CCL_DIG ||
1032				     isdigit((unsigned char)cpn[1]))) ?
1033				    CCL_DIG : CCL_PUN;
1034				/* No boundary before first character. */
1035				if (cp < p)
1036					continue;
1037				cur->font = ccl == CCL_LET ?
1038				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1039				if (*cp == '\\')
1040					mandoc_escape(&cpn, NULL, NULL);
1041				/* No boundary after last character. */
1042				if (*cpn == '\0')
1043					break;
1044				if (ccln == ccl && *cp != ',' && *cpn != ',')
1045					continue;
1046				/* Boundary found, split the text. */
1047				if (parent->args == parent->expectargs) {
1048					/* Remove the text from the tree. */
1049					if (cur->prev == NULL)
1050						parent->first = cur->next;
1051					else
1052						cur->prev->next = NULL;
1053					parent->last = cur->prev;
1054					parent->args--;
1055					/* Set up a list instead. */
1056					split = eqn_box_alloc(ep, parent);
1057					split->type = EQN_LIST;
1058					/* Insert the word into the list. */
1059					split->first = split->last = cur;
1060					cur->parent = split;
1061					cur->prev = NULL;
1062					parent = split;
1063				}
1064				/* Append a new text box. */
1065				nbox = eqn_box_alloc(ep, parent);
1066				nbox->type = EQN_TEXT;
1067				nbox->text = mandoc_strdup(cpn);
1068				/* Truncate the old box. */
1069				p = mandoc_strndup(cur->text,
1070				    cpn - cur->text);
1071				free(cur->text);
1072				cur->text = p;
1073				/* Setup to process the new box. */
1074				cur = nbox;
1075				p = nbox->text;
1076				cpn = p - 1;
1077				ccln = CCL_LET;
1078			}
1079			if (split != NULL)
1080				parent = split->parent;
1081			break;
1082		}
1083		break;
1084	default:
1085		abort();
1086	}
1087	goto next_tok;
1088}
1089
1090void
1091eqn_free(struct eqn_node *p)
1092{
1093	int		 i;
1094
1095	for (i = 0; i < (int)p->defsz; i++) {
1096		free(p->defs[i].key);
1097		free(p->defs[i].val);
1098	}
1099
1100	free(p->data);
1101	free(p->defs);
1102	free(p);
1103}
1104