1/* $Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Common functions for mandoc(1) HTML formatters.
19 * For use by individual formatters and by the main program.
20 */
21#include "config.h"
22
23#include <sys/types.h>
24#include <sys/stat.h>
25
26#include <assert.h>
27#include <ctype.h>
28#include <stdarg.h>
29#include <stddef.h>
30#include <stdio.h>
31#include <stdint.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35
36#include "mandoc_aux.h"
37#include "mandoc_ohash.h"
38#include "mandoc.h"
39#include "roff.h"
40#include "out.h"
41#include "html.h"
42#include "manconf.h"
43#include "main.h"
44
45struct	htmldata {
46	const char	 *name;
47	int		  flags;
48#define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
49#define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
50#define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
51#define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
52#define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
53#define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
54#define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
55#define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
56#define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
57#define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
58#define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
59#define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
60};
61
62static	const struct htmldata htmltags[TAG_MAX] = {
63	{"html",	HTML_NLALL},
64	{"head",	HTML_NLALL | HTML_INDENT},
65	{"meta",	HTML_NOSTACK | HTML_NLALL},
66	{"link",	HTML_NOSTACK | HTML_NLALL},
67	{"style",	HTML_NLALL | HTML_INDENT},
68	{"title",	HTML_NLAROUND},
69	{"body",	HTML_NLALL},
70	{"div",		HTML_NLAROUND},
71	{"section",	HTML_NLALL},
72	{"table",	HTML_NLALL | HTML_INDENT},
73	{"tr",		HTML_NLALL | HTML_INDENT},
74	{"td",		HTML_NLAROUND},
75	{"li",		HTML_NLAROUND | HTML_INDENT},
76	{"ul",		HTML_NLALL | HTML_INDENT},
77	{"ol",		HTML_NLALL | HTML_INDENT},
78	{"dl",		HTML_NLALL | HTML_INDENT},
79	{"dt",		HTML_NLAROUND},
80	{"dd",		HTML_NLAROUND | HTML_INDENT},
81	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
82	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
83	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
84	{"pre",		HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
85	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
86	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
87	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
88	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
89	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
90	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
91	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
92	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
93	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
94	{"hr",		HTML_INPHRASE | HTML_NOSTACK},
95	{"mark",	HTML_INPHRASE },
96	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
97	{"mrow",	0},
98	{"mi",		0},
99	{"mn",		0},
100	{"mo",		0},
101	{"msup",	0},
102	{"msub",	0},
103	{"msubsup",	0},
104	{"mfrac",	0},
105	{"msqrt",	0},
106	{"mfenced",	0},
107	{"mtable",	0},
108	{"mtr",		0},
109	{"mtd",		0},
110	{"munderover",	0},
111	{"munder",	0},
112	{"mover",	0},
113};
114
115/* Avoid duplicate HTML id= attributes. */
116
117struct	id_entry {
118	int	 ord;	/* Ordinal number of the latest occurrence. */
119	char	 id[];	/* The id= attribute without any ordinal suffix. */
120};
121static	struct ohash	 id_unique;
122
123static	void	 html_reset_internal(struct html *);
124static	void	 print_byte(struct html *, char);
125static	void	 print_endword(struct html *);
126static	void	 print_indent(struct html *);
127static	void	 print_word(struct html *, const char *);
128
129static	void	 print_ctag(struct html *, struct tag *);
130static	int	 print_escape(struct html *, char);
131static	int	 print_encode(struct html *, const char *, const char *, int);
132static	void	 print_href(struct html *, const char *, const char *, int);
133static	void	 print_metaf(struct html *);
134
135
136void *
137html_alloc(const struct manoutput *outopts)
138{
139	struct html	*h;
140
141	h = mandoc_calloc(1, sizeof(struct html));
142
143	h->tag = NULL;
144	h->metac = h->metal = ESCAPE_FONTROMAN;
145	h->style = outopts->style;
146	if ((h->base_man1 = outopts->man) == NULL)
147		h->base_man2 = NULL;
148	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
149		*h->base_man2++ = '\0';
150	h->base_includes = outopts->includes;
151	if (outopts->fragment)
152		h->oflags |= HTML_FRAGMENT;
153	if (outopts->toc)
154		h->oflags |= HTML_TOC;
155
156	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
157
158	return h;
159}
160
161static void
162html_reset_internal(struct html *h)
163{
164	struct tag	*tag;
165	struct id_entry	*entry;
166	unsigned int	 slot;
167
168	while ((tag = h->tag) != NULL) {
169		h->tag = tag->next;
170		free(tag);
171	}
172	entry = ohash_first(&id_unique, &slot);
173	while (entry != NULL) {
174		free(entry);
175		entry = ohash_next(&id_unique, &slot);
176	}
177	ohash_delete(&id_unique);
178}
179
180void
181html_reset(void *p)
182{
183	html_reset_internal(p);
184	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
185}
186
187void
188html_free(void *p)
189{
190	html_reset_internal(p);
191	free(p);
192}
193
194void
195print_gen_head(struct html *h)
196{
197	struct tag	*t;
198
199	print_otag(h, TAG_META, "?", "charset", "utf-8");
200	print_otag(h, TAG_META, "??", "name", "viewport",
201	    "content", "width=device-width, initial-scale=1.0");
202	if (h->style != NULL) {
203		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
204		    h->style, "type", "text/css", "media", "all");
205		return;
206	}
207
208	/*
209	 * Print a minimal embedded style sheet.
210	 */
211
212	t = print_otag(h, TAG_STYLE, "");
213	print_text(h, "table.head, table.foot { width: 100%; }");
214	print_endline(h);
215	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
216	print_endline(h);
217	print_text(h, "td.head-vol { text-align: center; }");
218	print_endline(h);
219	print_text(h, ".Nd, .Bf, .Op { display: inline; }");
220	print_endline(h);
221	print_text(h, ".Pa, .Ad { font-style: italic; }");
222	print_endline(h);
223	print_text(h, ".Ms { font-weight: bold; }");
224	print_endline(h);
225	print_text(h, ".Bl-diag ");
226	print_byte(h, '>');
227	print_text(h, " dt { font-weight: bold; }");
228	print_endline(h);
229	print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
230	    "{ font-weight: bold; font-family: inherit; }");
231	print_tagq(h, t);
232}
233
234int
235html_setfont(struct html *h, enum mandoc_esc font)
236{
237	switch (font) {
238	case ESCAPE_FONTPREV:
239		font = h->metal;
240		break;
241	case ESCAPE_FONTITALIC:
242	case ESCAPE_FONTBOLD:
243	case ESCAPE_FONTBI:
244	case ESCAPE_FONTROMAN:
245	case ESCAPE_FONTCR:
246	case ESCAPE_FONTCB:
247	case ESCAPE_FONTCI:
248		break;
249	case ESCAPE_FONT:
250		font = ESCAPE_FONTROMAN;
251		break;
252	default:
253		return 0;
254	}
255	h->metal = h->metac;
256	h->metac = font;
257	return 1;
258}
259
260static void
261print_metaf(struct html *h)
262{
263	if (h->metaf) {
264		print_tagq(h, h->metaf);
265		h->metaf = NULL;
266	}
267	switch (h->metac) {
268	case ESCAPE_FONTITALIC:
269		h->metaf = print_otag(h, TAG_I, "");
270		break;
271	case ESCAPE_FONTBOLD:
272		h->metaf = print_otag(h, TAG_B, "");
273		break;
274	case ESCAPE_FONTBI:
275		h->metaf = print_otag(h, TAG_B, "");
276		print_otag(h, TAG_I, "");
277		break;
278	case ESCAPE_FONTCR:
279		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
280		break;
281	case ESCAPE_FONTCB:
282		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
283		print_otag(h, TAG_B, "");
284		break;
285	case ESCAPE_FONTCI:
286		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
287		print_otag(h, TAG_I, "");
288		break;
289	default:
290		break;
291	}
292}
293
294void
295html_close_paragraph(struct html *h)
296{
297	struct tag	*this, *next;
298	int		 flags;
299
300	this = h->tag;
301	for (;;) {
302		next = this->next;
303		flags = htmltags[this->tag].flags;
304		if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
305			print_ctag(h, this);
306		if ((flags & HTML_INPHRASE) == 0)
307			break;
308		this = next;
309	}
310}
311
312/*
313 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
314 * TOKEN_NONE does not switch.  The old mode is returned.
315 */
316enum roff_tok
317html_fillmode(struct html *h, enum roff_tok want)
318{
319	struct tag	*t;
320	enum roff_tok	 had;
321
322	for (t = h->tag; t != NULL; t = t->next)
323		if (t->tag == TAG_PRE)
324			break;
325
326	had = t == NULL ? ROFF_fi : ROFF_nf;
327
328	if (want != had) {
329		switch (want) {
330		case ROFF_fi:
331			print_tagq(h, t);
332			break;
333		case ROFF_nf:
334			html_close_paragraph(h);
335			print_otag(h, TAG_PRE, "");
336			break;
337		case TOKEN_NONE:
338			break;
339		default:
340			abort();
341		}
342	}
343	return had;
344}
345
346/*
347 * Allocate a string to be used for the "id=" attribute of an HTML
348 * element and/or as a segment identifier for a URI in an <a> element.
349 * The function may fail and return NULL if the node lacks text data
350 * to create the attribute from.
351 * The caller is responsible for free(3)ing the returned string.
352 *
353 * If the "unique" argument is non-zero, the "id_unique" ohash table
354 * is used for de-duplication.  If the "unique" argument is 1,
355 * it is the first time the function is called for this tag and
356 * location, so if an ordinal suffix is needed, it is incremented.
357 * If the "unique" argument is 2, it is the second time the function
358 * is called for this tag and location, so the ordinal suffix
359 * remains unchanged.
360 */
361char *
362html_make_id(const struct roff_node *n, int unique)
363{
364	const struct roff_node	*nch;
365	struct id_entry		*entry;
366	char			*buf, *cp;
367	size_t			 len;
368	unsigned int		 slot;
369
370	if (n->tag != NULL)
371		buf = mandoc_strdup(n->tag);
372	else {
373		switch (n->tok) {
374		case MDOC_Sh:
375		case MDOC_Ss:
376		case MDOC_Sx:
377		case MAN_SH:
378		case MAN_SS:
379			for (nch = n->child; nch != NULL; nch = nch->next)
380				if (nch->type != ROFFT_TEXT)
381					return NULL;
382			buf = NULL;
383			deroff(&buf, n);
384			if (buf == NULL)
385				return NULL;
386			break;
387		default:
388			if (n->child == NULL || n->child->type != ROFFT_TEXT)
389				return NULL;
390			buf = mandoc_strdup(n->child->string);
391			break;
392		}
393	}
394
395	/*
396	 * In ID attributes, only use ASCII characters that are
397	 * permitted in URL-fragment strings according to the
398	 * explicit list at:
399	 * https://url.spec.whatwg.org/#url-fragment-string
400	 * In addition, reserve '~' for ordinal suffixes.
401	 */
402
403	for (cp = buf; *cp != '\0'; cp++)
404		if (isalnum((unsigned char)*cp) == 0 &&
405		    strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
406			*cp = '_';
407
408	if (unique == 0)
409		return buf;
410
411	/* Avoid duplicate HTML id= attributes. */
412
413	slot = ohash_qlookup(&id_unique, buf);
414	if ((entry = ohash_find(&id_unique, slot)) == NULL) {
415		len = strlen(buf) + 1;
416		entry = mandoc_malloc(sizeof(*entry) + len);
417		entry->ord = 1;
418		memcpy(entry->id, buf, len);
419		ohash_insert(&id_unique, slot, entry);
420	} else if (unique == 1)
421		entry->ord++;
422
423	if (entry->ord > 1) {
424		cp = buf;
425		mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
426		free(cp);
427	}
428	return buf;
429}
430
431static int
432print_escape(struct html *h, char c)
433{
434
435	switch (c) {
436	case '<':
437		print_word(h, "&lt;");
438		break;
439	case '>':
440		print_word(h, "&gt;");
441		break;
442	case '&':
443		print_word(h, "&amp;");
444		break;
445	case '"':
446		print_word(h, "&quot;");
447		break;
448	case ASCII_NBRSP:
449		print_word(h, "&nbsp;");
450		break;
451	case ASCII_HYPH:
452		print_byte(h, '-');
453		break;
454	case ASCII_BREAK:
455		break;
456	default:
457		return 0;
458	}
459	return 1;
460}
461
462static int
463print_encode(struct html *h, const char *p, const char *pend, int norecurse)
464{
465	char		 numbuf[16];
466	const char	*seq;
467	size_t		 sz;
468	int		 c, len, breakline, nospace;
469	enum mandoc_esc	 esc;
470	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
471		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
472
473	if (pend == NULL)
474		pend = strchr(p, '\0');
475
476	breakline = 0;
477	nospace = 0;
478
479	while (p < pend) {
480		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
481			h->flags &= ~HTML_SKIPCHAR;
482			p++;
483			continue;
484		}
485
486		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
487			print_byte(h, *p);
488
489		if (breakline &&
490		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
491			print_otag(h, TAG_BR, "");
492			breakline = 0;
493			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
494				p++;
495			continue;
496		}
497
498		if (p >= pend)
499			break;
500
501		if (*p == ' ') {
502			print_endword(h);
503			p++;
504			continue;
505		}
506
507		if (print_escape(h, *p++))
508			continue;
509
510		esc = mandoc_escape(&p, &seq, &len);
511		switch (esc) {
512		case ESCAPE_FONT:
513		case ESCAPE_FONTPREV:
514		case ESCAPE_FONTBOLD:
515		case ESCAPE_FONTITALIC:
516		case ESCAPE_FONTBI:
517		case ESCAPE_FONTROMAN:
518		case ESCAPE_FONTCR:
519		case ESCAPE_FONTCB:
520		case ESCAPE_FONTCI:
521			if (0 == norecurse) {
522				h->flags |= HTML_NOSPACE;
523				if (html_setfont(h, esc))
524					print_metaf(h);
525				h->flags &= ~HTML_NOSPACE;
526			}
527			continue;
528		case ESCAPE_SKIPCHAR:
529			h->flags |= HTML_SKIPCHAR;
530			continue;
531		case ESCAPE_ERROR:
532			continue;
533		default:
534			break;
535		}
536
537		if (h->flags & HTML_SKIPCHAR) {
538			h->flags &= ~HTML_SKIPCHAR;
539			continue;
540		}
541
542		switch (esc) {
543		case ESCAPE_UNICODE:
544			/* Skip past "u" header. */
545			c = mchars_num2uc(seq + 1, len - 1);
546			break;
547		case ESCAPE_NUMBERED:
548			c = mchars_num2char(seq, len);
549			if (c < 0)
550				continue;
551			break;
552		case ESCAPE_SPECIAL:
553			c = mchars_spec2cp(seq, len);
554			if (c <= 0)
555				continue;
556			break;
557		case ESCAPE_UNDEF:
558			c = *seq;
559			break;
560		case ESCAPE_DEVICE:
561			print_word(h, "html");
562			continue;
563		case ESCAPE_BREAK:
564			breakline = 1;
565			continue;
566		case ESCAPE_NOSPACE:
567			if ('\0' == *p)
568				nospace = 1;
569			continue;
570		case ESCAPE_OVERSTRIKE:
571			if (len == 0)
572				continue;
573			c = seq[len - 1];
574			break;
575		default:
576			continue;
577		}
578		if ((c < 0x20 && c != 0x09) ||
579		    (c > 0x7E && c < 0xA0))
580			c = 0xFFFD;
581		if (c > 0x7E) {
582			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
583			print_word(h, numbuf);
584		} else if (print_escape(h, c) == 0)
585			print_byte(h, c);
586	}
587
588	return nospace;
589}
590
591static void
592print_href(struct html *h, const char *name, const char *sec, int man)
593{
594	struct stat	 sb;
595	const char	*p, *pp;
596	char		*filename;
597
598	if (man) {
599		pp = h->base_man1;
600		if (h->base_man2 != NULL) {
601			mandoc_asprintf(&filename, "%s.%s", name, sec);
602			if (stat(filename, &sb) == -1)
603				pp = h->base_man2;
604			free(filename);
605		}
606	} else
607		pp = h->base_includes;
608
609	while ((p = strchr(pp, '%')) != NULL) {
610		print_encode(h, pp, p, 1);
611		if (man && p[1] == 'S') {
612			if (sec == NULL)
613				print_byte(h, '1');
614			else
615				print_encode(h, sec, NULL, 1);
616		} else if ((man && p[1] == 'N') ||
617		    (man == 0 && p[1] == 'I'))
618			print_encode(h, name, NULL, 1);
619		else
620			print_encode(h, p, p + 2, 1);
621		pp = p + 2;
622	}
623	if (*pp != '\0')
624		print_encode(h, pp, NULL, 1);
625}
626
627struct tag *
628print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
629{
630	va_list		 ap;
631	struct tag	*t;
632	const char	*attr;
633	char		*arg1, *arg2;
634	int		 style_written, tflags;
635
636	tflags = htmltags[tag].flags;
637
638	/* Flow content is not allowed in phrasing context. */
639
640	if ((tflags & HTML_INPHRASE) == 0) {
641		for (t = h->tag; t != NULL; t = t->next) {
642			if (t->closed)
643				continue;
644			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
645			break;
646		}
647
648	/*
649	 * Always wrap phrasing elements in a paragraph
650	 * unless already contained in some flow container;
651	 * never put them directly into a section.
652	 */
653
654	} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
655		print_otag(h, TAG_P, "c", "Pp");
656
657	/* Push this tag onto the stack of open scopes. */
658
659	if ((tflags & HTML_NOSTACK) == 0) {
660		t = mandoc_malloc(sizeof(struct tag));
661		t->tag = tag;
662		t->next = h->tag;
663		t->refcnt = 0;
664		t->closed = 0;
665		h->tag = t;
666	} else
667		t = NULL;
668
669	if (tflags & HTML_NLBEFORE)
670		print_endline(h);
671	if (h->col == 0)
672		print_indent(h);
673	else if ((h->flags & HTML_NOSPACE) == 0) {
674		if (h->flags & HTML_KEEP)
675			print_word(h, "&#x00A0;");
676		else {
677			if (h->flags & HTML_PREKEEP)
678				h->flags |= HTML_KEEP;
679			print_endword(h);
680		}
681	}
682
683	if ( ! (h->flags & HTML_NONOSPACE))
684		h->flags &= ~HTML_NOSPACE;
685	else
686		h->flags |= HTML_NOSPACE;
687
688	/* Print out the tag name and attributes. */
689
690	print_byte(h, '<');
691	print_word(h, htmltags[tag].name);
692
693	va_start(ap, fmt);
694
695	while (*fmt != '\0' && *fmt != 's') {
696
697		/* Parse attributes and arguments. */
698
699		arg1 = va_arg(ap, char *);
700		arg2 = NULL;
701		switch (*fmt++) {
702		case 'c':
703			attr = "class";
704			break;
705		case 'h':
706			attr = "href";
707			break;
708		case 'i':
709			attr = "id";
710			break;
711		case '?':
712			attr = arg1;
713			arg1 = va_arg(ap, char *);
714			break;
715		default:
716			abort();
717		}
718		if (*fmt == 'M')
719			arg2 = va_arg(ap, char *);
720		if (arg1 == NULL)
721			continue;
722
723		/* Print the attributes. */
724
725		print_byte(h, ' ');
726		print_word(h, attr);
727		print_byte(h, '=');
728		print_byte(h, '"');
729		switch (*fmt) {
730		case 'I':
731			print_href(h, arg1, NULL, 0);
732			fmt++;
733			break;
734		case 'M':
735			print_href(h, arg1, arg2, 1);
736			fmt++;
737			break;
738		case 'R':
739			print_byte(h, '#');
740			print_encode(h, arg1, NULL, 1);
741			fmt++;
742			break;
743		default:
744			print_encode(h, arg1, NULL, 1);
745			break;
746		}
747		print_byte(h, '"');
748	}
749
750	style_written = 0;
751	while (*fmt++ == 's') {
752		arg1 = va_arg(ap, char *);
753		arg2 = va_arg(ap, char *);
754		if (arg2 == NULL)
755			continue;
756		print_byte(h, ' ');
757		if (style_written == 0) {
758			print_word(h, "style=\"");
759			style_written = 1;
760		}
761		print_word(h, arg1);
762		print_byte(h, ':');
763		print_byte(h, ' ');
764		print_word(h, arg2);
765		print_byte(h, ';');
766	}
767	if (style_written)
768		print_byte(h, '"');
769
770	va_end(ap);
771
772	/* Accommodate for "well-formed" singleton escaping. */
773
774	if (htmltags[tag].flags & HTML_NOSTACK)
775		print_byte(h, '/');
776
777	print_byte(h, '>');
778
779	if (tflags & HTML_NLBEGIN)
780		print_endline(h);
781	else
782		h->flags |= HTML_NOSPACE;
783
784	if (tflags & HTML_INDENT)
785		h->indent++;
786	if (tflags & HTML_NOINDENT)
787		h->noindent++;
788
789	return t;
790}
791
792/*
793 * Print an element with an optional "id=" attribute.
794 * If the element has phrasing content and an "id=" attribute,
795 * also add a permalink: outside if it can be in phrasing context,
796 * inside otherwise.
797 */
798struct tag *
799print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
800    struct roff_node *n)
801{
802	struct roff_node *nch;
803	struct tag	*ret, *t;
804	char		*id, *href;
805
806	ret = NULL;
807	id = href = NULL;
808	if (n->flags & NODE_ID)
809		id = html_make_id(n, 1);
810	if (n->flags & NODE_HREF)
811		href = id == NULL ? html_make_id(n, 2) : id;
812	if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
813		ret = print_otag(h, TAG_A, "chR", "permalink", href);
814	t = print_otag(h, elemtype, "ci", cattr, id);
815	if (ret == NULL) {
816		ret = t;
817		if (href != NULL && (nch = n->child) != NULL) {
818			/* man(7) is safe, it tags phrasing content only. */
819			if (n->tok > MDOC_MAX ||
820			    htmltags[elemtype].flags & HTML_TOPHRASE)
821				nch = NULL;
822			else  /* For mdoc(7), beware of nested blocks. */
823				while (nch != NULL && nch->type == ROFFT_TEXT)
824					nch = nch->next;
825			if (nch == NULL)
826				print_otag(h, TAG_A, "chR", "permalink", href);
827		}
828	}
829	free(id);
830	if (id == NULL)
831		free(href);
832	return ret;
833}
834
835static void
836print_ctag(struct html *h, struct tag *tag)
837{
838	int	 tflags;
839
840	if (tag->closed == 0) {
841		tag->closed = 1;
842		if (tag == h->metaf)
843			h->metaf = NULL;
844		if (tag == h->tblt)
845			h->tblt = NULL;
846
847		tflags = htmltags[tag->tag].flags;
848		if (tflags & HTML_INDENT)
849			h->indent--;
850		if (tflags & HTML_NOINDENT)
851			h->noindent--;
852		if (tflags & HTML_NLEND)
853			print_endline(h);
854		print_indent(h);
855		print_byte(h, '<');
856		print_byte(h, '/');
857		print_word(h, htmltags[tag->tag].name);
858		print_byte(h, '>');
859		if (tflags & HTML_NLAFTER)
860			print_endline(h);
861	}
862	if (tag->refcnt == 0) {
863		h->tag = tag->next;
864		free(tag);
865	}
866}
867
868void
869print_gen_decls(struct html *h)
870{
871	print_word(h, "<!DOCTYPE html>");
872	print_endline(h);
873}
874
875void
876print_gen_comment(struct html *h, struct roff_node *n)
877{
878	int	 wantblank;
879
880	print_word(h, "<!-- This is an automatically generated file."
881	    "  Do not edit.");
882	h->indent = 1;
883	wantblank = 0;
884	while (n != NULL && n->type == ROFFT_COMMENT) {
885		if (strstr(n->string, "-->") == NULL &&
886		    (wantblank || *n->string != '\0')) {
887			print_endline(h);
888			print_indent(h);
889			print_word(h, n->string);
890			wantblank = *n->string != '\0';
891		}
892		n = n->next;
893	}
894	if (wantblank)
895		print_endline(h);
896	print_word(h, " -->");
897	print_endline(h);
898	h->indent = 0;
899}
900
901void
902print_text(struct html *h, const char *word)
903{
904	print_tagged_text(h, word, NULL);
905}
906
907void
908print_tagged_text(struct html *h, const char *word, struct roff_node *n)
909{
910	struct tag	*t;
911	char		*href;
912
913	/*
914	 * Always wrap text in a paragraph unless already contained in
915	 * some flow container; never put it directly into a section.
916	 */
917
918	if (h->tag->tag == TAG_SECTION)
919		print_otag(h, TAG_P, "c", "Pp");
920
921	/* Output whitespace before this text? */
922
923	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
924		if ( ! (HTML_KEEP & h->flags)) {
925			if (HTML_PREKEEP & h->flags)
926				h->flags |= HTML_KEEP;
927			print_endword(h);
928		} else
929			print_word(h, "&#x00A0;");
930	}
931
932	/*
933	 * Optionally switch fonts, optionally write a permalink, then
934	 * print the text, optionally surrounded by HTML whitespace.
935	 */
936
937	assert(h->metaf == NULL);
938	print_metaf(h);
939	print_indent(h);
940
941	if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
942		t = print_otag(h, TAG_A, "chR", "permalink", href);
943		free(href);
944	} else
945		t = NULL;
946
947	if ( ! print_encode(h, word, NULL, 0)) {
948		if ( ! (h->flags & HTML_NONOSPACE))
949			h->flags &= ~HTML_NOSPACE;
950		h->flags &= ~HTML_NONEWLINE;
951	} else
952		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
953
954	if (h->metaf != NULL) {
955		print_tagq(h, h->metaf);
956		h->metaf = NULL;
957	} else if (t != NULL)
958		print_tagq(h, t);
959
960	h->flags &= ~HTML_IGNDELIM;
961}
962
963void
964print_tagq(struct html *h, const struct tag *until)
965{
966	struct tag	*this, *next;
967
968	for (this = h->tag; this != NULL; this = next) {
969		next = this == until ? NULL : this->next;
970		print_ctag(h, this);
971	}
972}
973
974/*
975 * Close out all open elements up to but excluding suntil.
976 * Note that a paragraph just inside stays open together with it
977 * because paragraphs include subsequent phrasing content.
978 */
979void
980print_stagq(struct html *h, const struct tag *suntil)
981{
982	struct tag	*this, *next;
983
984	for (this = h->tag; this != NULL; this = next) {
985		next = this->next;
986		if (this == suntil || (next == suntil &&
987		    (this->tag == TAG_P || this->tag == TAG_PRE)))
988			break;
989		print_ctag(h, this);
990	}
991}
992
993
994/***********************************************************************
995 * Low level output functions.
996 * They implement line breaking using a short static buffer.
997 ***********************************************************************/
998
999/*
1000 * Buffer one HTML output byte.
1001 * If the buffer is full, flush and deactivate it and start a new line.
1002 * If the buffer is inactive, print directly.
1003 */
1004static void
1005print_byte(struct html *h, char c)
1006{
1007	if ((h->flags & HTML_BUFFER) == 0) {
1008		putchar(c);
1009		h->col++;
1010		return;
1011	}
1012
1013	if (h->col + h->bufcol < sizeof(h->buf)) {
1014		h->buf[h->bufcol++] = c;
1015		return;
1016	}
1017
1018	putchar('\n');
1019	h->col = 0;
1020	print_indent(h);
1021	putchar(' ');
1022	putchar(' ');
1023	fwrite(h->buf, h->bufcol, 1, stdout);
1024	putchar(c);
1025	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1026	h->bufcol = 0;
1027	h->flags &= ~HTML_BUFFER;
1028}
1029
1030/*
1031 * If something was printed on the current output line, end it.
1032 * Not to be called right after print_indent().
1033 */
1034void
1035print_endline(struct html *h)
1036{
1037	if (h->col == 0)
1038		return;
1039
1040	if (h->bufcol) {
1041		putchar(' ');
1042		fwrite(h->buf, h->bufcol, 1, stdout);
1043		h->bufcol = 0;
1044	}
1045	putchar('\n');
1046	h->col = 0;
1047	h->flags |= HTML_NOSPACE;
1048	h->flags &= ~HTML_BUFFER;
1049}
1050
1051/*
1052 * Flush the HTML output buffer.
1053 * If it is inactive, activate it.
1054 */
1055static void
1056print_endword(struct html *h)
1057{
1058	if (h->noindent) {
1059		print_byte(h, ' ');
1060		return;
1061	}
1062
1063	if ((h->flags & HTML_BUFFER) == 0) {
1064		h->col++;
1065		h->flags |= HTML_BUFFER;
1066	} else if (h->bufcol) {
1067		putchar(' ');
1068		fwrite(h->buf, h->bufcol, 1, stdout);
1069		h->col += h->bufcol + 1;
1070	}
1071	h->bufcol = 0;
1072}
1073
1074/*
1075 * If at the beginning of a new output line,
1076 * perform indentation and mark the line as containing output.
1077 * Make sure to really produce some output right afterwards,
1078 * but do not use print_otag() for producing it.
1079 */
1080static void
1081print_indent(struct html *h)
1082{
1083	size_t	 i;
1084
1085	if (h->col || h->noindent)
1086		return;
1087
1088	h->col = h->indent * 2;
1089	for (i = 0; i < h->col; i++)
1090		putchar(' ');
1091}
1092
1093/*
1094 * Print or buffer some characters
1095 * depending on the current HTML output buffer state.
1096 */
1097static void
1098print_word(struct html *h, const char *cp)
1099{
1100	while (*cp != '\0')
1101		print_byte(h, *cp++);
1102}
1103