1/* $Id: man_html.c,v 1.179 2020/10/16 17:22:43 schwarze Exp $ */
2/*
3 * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * HTML formatter for man(7) used by mandoc(1).
19 */
20#include "config.h"
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "mandoc_aux.h"
31#include "mandoc.h"
32#include "roff.h"
33#include "man.h"
34#include "out.h"
35#include "html.h"
36#include "main.h"
37
38#define	MAN_ARGS	  const struct roff_meta *man, \
39			  struct roff_node *n, \
40			  struct html *h
41
42struct	man_html_act {
43	int		(*pre)(MAN_ARGS);
44	int		(*post)(MAN_ARGS);
45};
46
47static	void		  print_man_head(const struct roff_meta *,
48				struct html *);
49static	void		  print_man_nodelist(MAN_ARGS);
50static	void		  print_man_node(MAN_ARGS);
51static	char		  list_continues(const struct roff_node *,
52				const struct roff_node *);
53static	int		  man_B_pre(MAN_ARGS);
54static	int		  man_IP_pre(MAN_ARGS);
55static	int		  man_I_pre(MAN_ARGS);
56static	int		  man_OP_pre(MAN_ARGS);
57static	int		  man_PP_pre(MAN_ARGS);
58static	int		  man_RS_pre(MAN_ARGS);
59static	int		  man_SH_pre(MAN_ARGS);
60static	int		  man_SM_pre(MAN_ARGS);
61static	int		  man_SY_pre(MAN_ARGS);
62static	int		  man_UR_pre(MAN_ARGS);
63static	int		  man_abort_pre(MAN_ARGS);
64static	int		  man_alt_pre(MAN_ARGS);
65static	int		  man_ign_pre(MAN_ARGS);
66static	int		  man_in_pre(MAN_ARGS);
67static	void		  man_root_post(const struct roff_meta *,
68				struct html *);
69static	void		  man_root_pre(const struct roff_meta *,
70				struct html *);
71
72static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
73	{ NULL, NULL }, /* TH */
74	{ man_SH_pre, NULL }, /* SH */
75	{ man_SH_pre, NULL }, /* SS */
76	{ man_IP_pre, NULL }, /* TP */
77	{ man_IP_pre, NULL }, /* TQ */
78	{ man_abort_pre, NULL }, /* LP */
79	{ man_PP_pre, NULL }, /* PP */
80	{ man_abort_pre, NULL }, /* P */
81	{ man_IP_pre, NULL }, /* IP */
82	{ man_PP_pre, NULL }, /* HP */
83	{ man_SM_pre, NULL }, /* SM */
84	{ man_SM_pre, NULL }, /* SB */
85	{ man_alt_pre, NULL }, /* BI */
86	{ man_alt_pre, NULL }, /* IB */
87	{ man_alt_pre, NULL }, /* BR */
88	{ man_alt_pre, NULL }, /* RB */
89	{ NULL, NULL }, /* R */
90	{ man_B_pre, NULL }, /* B */
91	{ man_I_pre, NULL }, /* I */
92	{ man_alt_pre, NULL }, /* IR */
93	{ man_alt_pre, NULL }, /* RI */
94	{ NULL, NULL }, /* RE */
95	{ man_RS_pre, NULL }, /* RS */
96	{ man_ign_pre, NULL }, /* DT */
97	{ man_ign_pre, NULL }, /* UC */
98	{ man_ign_pre, NULL }, /* PD */
99	{ man_ign_pre, NULL }, /* AT */
100	{ man_in_pre, NULL }, /* in */
101	{ man_SY_pre, NULL }, /* SY */
102	{ NULL, NULL }, /* YS */
103	{ man_OP_pre, NULL }, /* OP */
104	{ NULL, NULL }, /* EX */
105	{ NULL, NULL }, /* EE */
106	{ man_UR_pre, NULL }, /* UR */
107	{ NULL, NULL }, /* UE */
108	{ man_UR_pre, NULL }, /* MT */
109	{ NULL, NULL }, /* ME */
110};
111
112
113void
114html_man(void *arg, const struct roff_meta *man)
115{
116	struct html		*h;
117	struct roff_node	*n;
118	struct tag		*t;
119
120	h = (struct html *)arg;
121	n = man->first->child;
122
123	if ((h->oflags & HTML_FRAGMENT) == 0) {
124		print_gen_decls(h);
125		print_otag(h, TAG_HTML, "");
126		if (n != NULL && n->type == ROFFT_COMMENT)
127			print_gen_comment(h, n);
128		t = print_otag(h, TAG_HEAD, "");
129		print_man_head(man, h);
130		print_tagq(h, t);
131		print_otag(h, TAG_BODY, "");
132	}
133
134	man_root_pre(man, h);
135	t = print_otag(h, TAG_DIV, "c", "manual-text");
136	print_man_nodelist(man, n, h);
137	print_tagq(h, t);
138	man_root_post(man, h);
139	print_tagq(h, NULL);
140}
141
142static void
143print_man_head(const struct roff_meta *man, struct html *h)
144{
145	char	*cp;
146
147	print_gen_head(h);
148	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
149	print_otag(h, TAG_TITLE, "");
150	print_text(h, cp);
151	free(cp);
152}
153
154static void
155print_man_nodelist(MAN_ARGS)
156{
157	while (n != NULL) {
158		print_man_node(man, n, h);
159		n = n->next;
160	}
161}
162
163static void
164print_man_node(MAN_ARGS)
165{
166	struct tag	*t;
167	int		 child;
168
169	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
170		return;
171
172	if ((n->flags & NODE_NOFILL) == 0)
173		html_fillmode(h, ROFF_fi);
174	else if (html_fillmode(h, ROFF_nf) == ROFF_nf &&
175	    n->tok != ROFF_fi && n->flags & NODE_LINE &&
176	    (n->prev == NULL || n->prev->tok != MAN_YS))
177		print_endline(h);
178
179	child = 1;
180	switch (n->type) {
181	case ROFFT_TEXT:
182		if (*n->string == '\0') {
183			print_endline(h);
184			return;
185		}
186		if (*n->string == ' ' && n->flags & NODE_LINE &&
187		    (h->flags & HTML_NONEWLINE) == 0)
188			print_otag(h, TAG_BR, "");
189		else if (n->flags & NODE_DELIMC)
190			h->flags |= HTML_NOSPACE;
191		t = h->tag;
192		t->refcnt++;
193		print_text(h, n->string);
194		break;
195	case ROFFT_EQN:
196		t = h->tag;
197		t->refcnt++;
198		print_eqn(h, n->eqn);
199		break;
200	case ROFFT_TBL:
201		/*
202		 * This will take care of initialising all of the table
203		 * state data for the first table, then tearing it down
204		 * for the last one.
205		 */
206		print_tbl(h, n->span);
207		return;
208	default:
209		/*
210		 * Close out scope of font prior to opening a macro
211		 * scope.
212		 */
213		if (h->metac != ESCAPE_FONTROMAN) {
214			h->metal = h->metac;
215			h->metac = ESCAPE_FONTROMAN;
216		}
217
218		/*
219		 * Close out the current table, if it's open, and unset
220		 * the "meta" table state.  This will be reopened on the
221		 * next table element.
222		 */
223		if (h->tblt != NULL)
224			print_tblclose(h);
225		t = h->tag;
226		t->refcnt++;
227		if (n->tok < ROFF_MAX) {
228			roff_html_pre(h, n);
229			t->refcnt--;
230			print_stagq(h, t);
231			return;
232		}
233		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
234		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
235			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
236			    n, h);
237		break;
238	}
239
240	if (child && n->child != NULL)
241		print_man_nodelist(man, n->child, h);
242
243	/* This will automatically close out any font scope. */
244	t->refcnt--;
245	if (n->type == ROFFT_BLOCK &&
246	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
247		t = h->tag;
248		while (t->tag != TAG_DL && t->tag != TAG_UL)
249			t = t->next;
250		/*
251		 * Close the list if no further item of the same type
252		 * follows; otherwise, close the item only.
253		 */
254		if (list_continues(n, roff_node_next(n)) == '\0') {
255			print_tagq(h, t);
256			t = NULL;
257		}
258	}
259	if (t != NULL)
260		print_stagq(h, t);
261}
262
263static void
264man_root_pre(const struct roff_meta *man, struct html *h)
265{
266	struct tag	*t, *tt;
267	char		*title;
268
269	assert(man->title);
270	assert(man->msec);
271	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
272
273	t = print_otag(h, TAG_TABLE, "c", "head");
274	tt = print_otag(h, TAG_TR, "");
275
276	print_otag(h, TAG_TD, "c", "head-ltitle");
277	print_text(h, title);
278	print_stagq(h, tt);
279
280	print_otag(h, TAG_TD, "c", "head-vol");
281	if (man->vol != NULL)
282		print_text(h, man->vol);
283	print_stagq(h, tt);
284
285	print_otag(h, TAG_TD, "c", "head-rtitle");
286	print_text(h, title);
287	print_tagq(h, t);
288	free(title);
289}
290
291static void
292man_root_post(const struct roff_meta *man, struct html *h)
293{
294	struct tag	*t, *tt;
295
296	t = print_otag(h, TAG_TABLE, "c", "foot");
297	tt = print_otag(h, TAG_TR, "");
298
299	print_otag(h, TAG_TD, "c", "foot-date");
300	print_text(h, man->date);
301	print_stagq(h, tt);
302
303	print_otag(h, TAG_TD, "c", "foot-os");
304	if (man->os != NULL)
305		print_text(h, man->os);
306	print_tagq(h, t);
307}
308
309static int
310man_SH_pre(MAN_ARGS)
311{
312	const char	*class;
313	enum htmltag	 tag;
314
315	if (n->tok == MAN_SH) {
316		tag = TAG_H1;
317		class = "Sh";
318	} else {
319		tag = TAG_H2;
320		class = "Ss";
321	}
322	switch (n->type) {
323	case ROFFT_BLOCK:
324		html_close_paragraph(h);
325		print_otag(h, TAG_SECTION, "c", class);
326		break;
327	case ROFFT_HEAD:
328		print_otag_id(h, tag, class, n);
329		break;
330	case ROFFT_BODY:
331		break;
332	default:
333		abort();
334	}
335	return 1;
336}
337
338static int
339man_alt_pre(MAN_ARGS)
340{
341	const struct roff_node	*nn;
342	struct tag	*t;
343	int		 i;
344	enum htmltag	 fp;
345
346	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
347		switch (n->tok) {
348		case MAN_BI:
349			fp = i % 2 ? TAG_I : TAG_B;
350			break;
351		case MAN_IB:
352			fp = i % 2 ? TAG_B : TAG_I;
353			break;
354		case MAN_RI:
355			fp = i % 2 ? TAG_I : TAG_MAX;
356			break;
357		case MAN_IR:
358			fp = i % 2 ? TAG_MAX : TAG_I;
359			break;
360		case MAN_BR:
361			fp = i % 2 ? TAG_MAX : TAG_B;
362			break;
363		case MAN_RB:
364			fp = i % 2 ? TAG_B : TAG_MAX;
365			break;
366		default:
367			abort();
368		}
369
370		if (i)
371			h->flags |= HTML_NOSPACE;
372
373		if (fp != TAG_MAX)
374			t = print_otag(h, fp, "");
375
376		print_text(h, nn->string);
377
378		if (fp != TAG_MAX)
379			print_tagq(h, t);
380	}
381	return 0;
382}
383
384static int
385man_SM_pre(MAN_ARGS)
386{
387	print_otag(h, TAG_SMALL, "");
388	if (n->tok == MAN_SB)
389		print_otag(h, TAG_B, "");
390	return 1;
391}
392
393static int
394man_PP_pre(MAN_ARGS)
395{
396	switch (n->type) {
397	case ROFFT_BLOCK:
398		html_close_paragraph(h);
399		break;
400	case ROFFT_HEAD:
401		return 0;
402	case ROFFT_BODY:
403		if (n->child != NULL &&
404		    (n->child->flags & NODE_NOFILL) == 0)
405			print_otag(h, TAG_P, "c",
406			    n->tok == MAN_PP ? "Pp" : "Pp HP");
407		break;
408	default:
409		abort();
410	}
411	return 1;
412}
413
414static char
415list_continues(const struct roff_node *n1, const struct roff_node *n2)
416{
417	const char *s1, *s2;
418	char c1, c2;
419
420	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
421	    n2 == NULL || n2->type != ROFFT_BLOCK)
422		return '\0';
423	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
424	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
425		return ' ';
426	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
427		return '\0';
428	n1 = n1->head->child;
429	n2 = n2->head->child;
430	s1 = n1 == NULL ? "" : n1->string;
431	s2 = n2 == NULL ? "" : n2->string;
432	c1 = strcmp(s1, "*") == 0 ? '*' :
433	     strcmp(s1, "\\-") == 0 ? '-' :
434	     strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
435	c2 = strcmp(s2, "*") == 0 ? '*' :
436	     strcmp(s2, "\\-") == 0 ? '-' :
437	     strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
438	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
439}
440
441static int
442man_IP_pre(MAN_ARGS)
443{
444	struct roff_node	*nn;
445	const char		*list_class;
446	enum htmltag		 list_elem, body_elem;
447	char			 list_type;
448
449	nn = n->type == ROFFT_BLOCK ? n : n->parent;
450	list_type = list_continues(roff_node_prev(nn), nn);
451	if (list_type == '\0') {
452		/* Start a new list. */
453		list_type = list_continues(nn, roff_node_next(nn));
454		if (list_type == '\0')
455			list_type = ' ';
456		switch (list_type) {
457		case ' ':
458			list_class = "Bl-tag";
459			list_elem = TAG_DL;
460			break;
461		case '*':
462			list_class = "Bl-bullet";
463			list_elem = TAG_UL;
464			break;
465		case '-':
466			list_class = "Bl-dash";
467			list_elem = TAG_UL;
468			break;
469		default:
470			abort();
471		}
472	} else {
473		/* Continue a list that was started earlier. */
474		list_class = NULL;
475		list_elem = TAG_MAX;
476	}
477	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
478
479	switch (n->type) {
480	case ROFFT_BLOCK:
481		html_close_paragraph(h);
482		if (list_elem != TAG_MAX)
483			print_otag(h, list_elem, "c", list_class);
484		return 1;
485	case ROFFT_HEAD:
486		if (body_elem == TAG_LI)
487			return 0;
488		print_otag_id(h, TAG_DT, NULL, n);
489		break;
490	case ROFFT_BODY:
491		print_otag(h, body_elem, "");
492		return 1;
493	default:
494		abort();
495	}
496	switch(n->tok) {
497	case MAN_IP:  /* Only print the first header element. */
498		if (n->child != NULL)
499			print_man_node(man, n->child, h);
500		break;
501	case MAN_TP:  /* Only print next-line header elements. */
502	case MAN_TQ:
503		nn = n->child;
504		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
505			nn = nn->next;
506		while (nn != NULL) {
507			print_man_node(man, nn, h);
508			nn = nn->next;
509		}
510		break;
511	default:
512		abort();
513	}
514	return 0;
515}
516
517static int
518man_OP_pre(MAN_ARGS)
519{
520	struct tag	*tt;
521
522	print_text(h, "[");
523	h->flags |= HTML_NOSPACE;
524	tt = print_otag(h, TAG_SPAN, "c", "Op");
525
526	if ((n = n->child) != NULL) {
527		print_otag(h, TAG_B, "");
528		print_text(h, n->string);
529	}
530
531	print_stagq(h, tt);
532
533	if (n != NULL && n->next != NULL) {
534		print_otag(h, TAG_I, "");
535		print_text(h, n->next->string);
536	}
537
538	print_stagq(h, tt);
539	h->flags |= HTML_NOSPACE;
540	print_text(h, "]");
541	return 0;
542}
543
544static int
545man_B_pre(MAN_ARGS)
546{
547	print_otag(h, TAG_B, "");
548	return 1;
549}
550
551static int
552man_I_pre(MAN_ARGS)
553{
554	print_otag(h, TAG_I, "");
555	return 1;
556}
557
558static int
559man_in_pre(MAN_ARGS)
560{
561	print_otag(h, TAG_BR, "");
562	return 0;
563}
564
565static int
566man_ign_pre(MAN_ARGS)
567{
568	return 0;
569}
570
571static int
572man_RS_pre(MAN_ARGS)
573{
574	switch (n->type) {
575	case ROFFT_BLOCK:
576		html_close_paragraph(h);
577		break;
578	case ROFFT_HEAD:
579		return 0;
580	case ROFFT_BODY:
581		print_otag(h, TAG_DIV, "c", "Bd-indent");
582		break;
583	default:
584		abort();
585	}
586	return 1;
587}
588
589static int
590man_SY_pre(MAN_ARGS)
591{
592	switch (n->type) {
593	case ROFFT_BLOCK:
594		html_close_paragraph(h);
595		print_otag(h, TAG_TABLE, "c", "Nm");
596		print_otag(h, TAG_TR, "");
597		break;
598	case ROFFT_HEAD:
599		print_otag(h, TAG_TD, "");
600		print_otag(h, TAG_CODE, "c", "Nm");
601		break;
602	case ROFFT_BODY:
603		print_otag(h, TAG_TD, "");
604		break;
605	default:
606		abort();
607	}
608	return 1;
609}
610
611static int
612man_UR_pre(MAN_ARGS)
613{
614	char *cp;
615
616	n = n->child;
617	assert(n->type == ROFFT_HEAD);
618	if (n->child != NULL) {
619		assert(n->child->type == ROFFT_TEXT);
620		if (n->tok == MAN_MT) {
621			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
622			print_otag(h, TAG_A, "ch", "Mt", cp);
623			free(cp);
624		} else
625			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
626	}
627
628	assert(n->next->type == ROFFT_BODY);
629	if (n->next->child != NULL)
630		n = n->next;
631
632	print_man_nodelist(man, n->child, h);
633	return 0;
634}
635
636static int
637man_abort_pre(MAN_ARGS)
638{
639	abort();
640}
641