1/* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
2/*
3 * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Functions to tag syntax tree nodes.
18 * For internal use by mandoc(1) validation modules only.
19 */
20#include "config.h"
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <limits.h>
26#include <stddef.h>
27#include <stdint.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc_aux.h"
32#include "mandoc_ohash.h"
33#include "roff.h"
34#include "mdoc.h"
35#include "roff_int.h"
36#include "tag.h"
37
38struct tag_entry {
39	struct roff_node **nodes;
40	size_t	 maxnodes;
41	size_t	 nnodes;
42	int	 prio;
43	char	 s[];
44};
45
46static void		 tag_move_href(struct roff_man *,
47				struct roff_node *, const char *);
48static void		 tag_move_id(struct roff_node *);
49
50static struct ohash	 tag_data;
51
52
53/*
54 * Set up the ohash table to collect nodes
55 * where various marked-up terms are documented.
56 */
57void
58tag_alloc(void)
59{
60	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
61}
62
63void
64tag_free(void)
65{
66	struct tag_entry	*entry;
67	unsigned int		 slot;
68
69	if (tag_data.info.free == NULL)
70		return;
71	entry = ohash_first(&tag_data, &slot);
72	while (entry != NULL) {
73		free(entry->nodes);
74		free(entry);
75		entry = ohash_next(&tag_data, &slot);
76	}
77	ohash_delete(&tag_data);
78	tag_data.info.free = NULL;
79}
80
81/*
82 * Set a node where a term is defined,
83 * unless it is already defined at a lower priority.
84 */
85void
86tag_put(const char *s, int prio, struct roff_node *n)
87{
88	struct tag_entry	*entry;
89	struct roff_node	*nold;
90	const char		*se;
91	size_t			 len;
92	unsigned int		 slot;
93
94	assert(prio <= TAG_FALLBACK);
95
96	if (s == NULL) {
97		if (n->child == NULL || n->child->type != ROFFT_TEXT)
98			return;
99		s = n->child->string;
100		switch (s[0]) {
101		case '-':
102			s++;
103			break;
104		case '\\':
105			switch (s[1]) {
106			case '&':
107			case '-':
108			case 'e':
109				s += 2;
110				break;
111			default:
112				break;
113			}
114			break;
115		default:
116			break;
117		}
118	}
119
120	/*
121	 * Skip whitespace and escapes and whatever follows,
122	 * and if there is any, downgrade the priority.
123	 */
124
125	len = strcspn(s, " \t\\");
126	if (len == 0)
127		return;
128
129	se = s + len;
130	if (*se != '\0' && prio < TAG_WEAK)
131		prio = TAG_WEAK;
132
133	slot = ohash_qlookupi(&tag_data, s, &se);
134	entry = ohash_find(&tag_data, slot);
135
136	/* Build a new entry. */
137
138	if (entry == NULL) {
139		entry = mandoc_malloc(sizeof(*entry) + len + 1);
140		memcpy(entry->s, s, len);
141		entry->s[len] = '\0';
142		entry->nodes = NULL;
143		entry->maxnodes = entry->nnodes = 0;
144		ohash_insert(&tag_data, slot, entry);
145	}
146
147	/*
148	 * Lower priority numbers take precedence.
149	 * If a better entry is already present, ignore the new one.
150	 */
151
152	else if (entry->prio < prio)
153			return;
154
155	/*
156	 * If the existing entry is worse, clear it.
157	 * In addition, a tag with priority TAG_FALLBACK
158	 * is only used if the tag occurs exactly once.
159	 */
160
161	else if (entry->prio > prio || prio == TAG_FALLBACK) {
162		while (entry->nnodes > 0) {
163			nold = entry->nodes[--entry->nnodes];
164			nold->flags &= ~NODE_ID;
165			free(nold->tag);
166			nold->tag = NULL;
167		}
168		if (prio == TAG_FALLBACK) {
169			entry->prio = TAG_DELETE;
170			return;
171		}
172	}
173
174	/* Remember the new node. */
175
176	if (entry->maxnodes == entry->nnodes) {
177		entry->maxnodes += 4;
178		entry->nodes = mandoc_reallocarray(entry->nodes,
179		    entry->maxnodes, sizeof(*entry->nodes));
180	}
181	entry->nodes[entry->nnodes++] = n;
182	entry->prio = prio;
183	n->flags |= NODE_ID;
184	if (n->child == NULL || n->child->string != s || *se != '\0') {
185		assert(n->tag == NULL);
186		n->tag = mandoc_strndup(s, len);
187	}
188}
189
190int
191tag_exists(const char *tag)
192{
193	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
194}
195
196/*
197 * For in-line elements, move the link target
198 * to the enclosing paragraph when appropriate.
199 */
200static void
201tag_move_id(struct roff_node *n)
202{
203	struct roff_node *np;
204
205	np = n;
206	for (;;) {
207		if (np->prev != NULL)
208			np = np->prev;
209		else if ((np = np->parent) == NULL)
210			return;
211		switch (np->tok) {
212		case MDOC_It:
213			switch (np->parent->parent->norm->Bl.type) {
214			case LIST_column:
215				/* Target the ROFFT_BLOCK = <tr>. */
216				np = np->parent;
217				break;
218			case LIST_diag:
219			case LIST_hang:
220			case LIST_inset:
221			case LIST_ohang:
222			case LIST_tag:
223				/* Target the ROFFT_HEAD = <dt>. */
224				np = np->parent->head;
225				break;
226			default:
227				/* Target the ROFF_BODY = <li>. */
228				break;
229			}
230			/* FALLTHROUGH */
231		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
232			if (np->tag == NULL) {
233				np->tag = mandoc_strdup(n->tag == NULL ?
234				    n->child->string : n->tag);
235				np->flags |= NODE_ID;
236				n->flags &= ~NODE_ID;
237			}
238			return;
239		case MDOC_Sh:
240		case MDOC_Ss:
241		case MDOC_Bd:
242		case MDOC_Bl:
243		case MDOC_D1:
244		case MDOC_Dl:
245		case MDOC_Rs:
246			/* Do not move past major blocks. */
247			return;
248		default:
249			/*
250			 * Move past in-line content and partial
251			 * blocks, for example .It Xo or .It Bq Er.
252			 */
253			break;
254		}
255	}
256}
257
258/*
259 * When a paragraph is tagged and starts with text,
260 * move the permalink to the first few words.
261 */
262static void
263tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
264{
265	char	*cp;
266
267	if (n == NULL || n->type != ROFFT_TEXT ||
268	    *n->string == '\0' || *n->string == ' ')
269		return;
270
271	cp = n->string;
272	while (cp != NULL && cp - n->string < 5)
273		cp = strchr(cp + 1, ' ');
274
275	/* If the first text node is longer, split it. */
276
277	if (cp != NULL && cp[1] != '\0') {
278		man->last = n;
279		man->next = ROFF_NEXT_SIBLING;
280		roff_word_alloc(man, n->line,
281		    n->pos + (cp - n->string), cp + 1);
282		man->last->flags = n->flags & ~NODE_LINE;
283		*cp = '\0';
284	}
285
286	assert(n->tag == NULL);
287	n->tag = mandoc_strdup(tag);
288	n->flags |= NODE_HREF;
289}
290
291/*
292 * When all tags have been set, decide where to put
293 * the associated permalinks, and maybe move some tags
294 * to the beginning of the respective paragraphs.
295 */
296void
297tag_postprocess(struct roff_man *man, struct roff_node *n)
298{
299	if (n->flags & NODE_ID) {
300		switch (n->tok) {
301		case MDOC_Pp:
302			tag_move_href(man, n->next, n->tag);
303			break;
304		case MDOC_Bd:
305		case MDOC_D1:
306		case MDOC_Dl:
307			tag_move_href(man, n->child, n->tag);
308			break;
309		case MDOC_Bl:
310			/* XXX No permalink for now. */
311			break;
312		default:
313			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
314				tag_move_id(n);
315			if (n->tok != MDOC_Tg)
316				n->flags |= NODE_HREF;
317			else if ((n->flags & NODE_ID) == 0) {
318				n->flags |= NODE_NOPRT;
319				free(n->tag);
320				n->tag = NULL;
321			}
322			break;
323		}
324	}
325	for (n = n->child; n != NULL; n = n->next)
326		tag_postprocess(man, n);
327}
328