1/*	$Id: term.c,v 1.201 2011/09/21 09:57:13 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc.h"
32#include "out.h"
33#include "term.h"
34#include "main.h"
35
36static	void		 adjbuf(struct termp *p, int);
37static	void		 bufferc(struct termp *, char);
38static	void		 encode(struct termp *, const char *, size_t);
39static	void		 encode1(struct termp *, int);
40
41void
42term_free(struct termp *p)
43{
44
45	if (p->buf)
46		free(p->buf);
47	if (p->symtab)
48		mchars_free(p->symtab);
49
50	free(p);
51}
52
53
54void
55term_begin(struct termp *p, term_margin head,
56		term_margin foot, const void *arg)
57{
58
59	p->headf = head;
60	p->footf = foot;
61	p->argf = arg;
62	(*p->begin)(p);
63}
64
65
66void
67term_end(struct termp *p)
68{
69
70	(*p->end)(p);
71}
72
73/*
74 * Flush a line of text.  A "line" is loosely defined as being something
75 * that should be followed by a newline, regardless of whether it's
76 * broken apart by newlines getting there.  A line can also be a
77 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
78 * not have a trailing newline.
79 *
80 * The following flags may be specified:
81 *
82 *  - TERMP_NOBREAK: this is the most important and is used when making
83 *    columns.  In short: don't print a newline and instead expect the
84 *    next call to do the padding up to the start of the next column.
85 *
86 *  - TERMP_TWOSPACE: make sure there is room for at least two space
87 *    characters of padding.  Otherwise, rather break the line.
88 *
89 *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
90 *    the line is overrun, and don't pad-right if it's underrun.
91 *
92 *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
93 *    overrunning, instead save the position and continue at that point
94 *    when the next invocation.
95 *
96 *  In-line line breaking:
97 *
98 *  If TERMP_NOBREAK is specified and the line overruns the right
99 *  margin, it will break and pad-right to the right margin after
100 *  writing.  If maxrmargin is violated, it will break and continue
101 *  writing from the right-margin, which will lead to the above scenario
102 *  upon exit.  Otherwise, the line will break at the right margin.
103 */
104void
105term_flushln(struct termp *p)
106{
107	int		 i;     /* current input position in p->buf */
108	size_t		 vis;   /* current visual position on output */
109	size_t		 vbl;   /* number of blanks to prepend to output */
110	size_t		 vend;	/* end of word visual position on output */
111	size_t		 bp;    /* visual right border position */
112	size_t		 dv;    /* temporary for visual pos calculations */
113	int		 j;     /* temporary loop index for p->buf */
114	int		 jhy;	/* last hyph before overflow w/r/t j */
115	size_t		 maxvis; /* output position of visible boundary */
116	size_t		 mmax; /* used in calculating bp */
117
118	/*
119	 * First, establish the maximum columns of "visible" content.
120	 * This is usually the difference between the right-margin and
121	 * an indentation, but can be, for tagged lists or columns, a
122	 * small set of values.
123	 */
124	assert  (p->rmargin >= p->offset);
125	dv     = p->rmargin - p->offset;
126	maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
127	dv     = p->maxrmargin - p->offset;
128	mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
129
130	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
131
132	/*
133	 * Calculate the required amount of padding.
134	 */
135	vbl = p->offset + p->overstep > p->viscol ?
136	      p->offset + p->overstep - p->viscol : 0;
137
138	vis = vend = 0;
139	i = 0;
140
141	while (i < p->col) {
142		/*
143		 * Handle literal tab characters: collapse all
144		 * subsequent tabs into a single huge set of spaces.
145		 */
146		while (i < p->col && '\t' == p->buf[i]) {
147			vend = (vis / p->tabwidth + 1) * p->tabwidth;
148			vbl += vend - vis;
149			vis = vend;
150			i++;
151		}
152
153		/*
154		 * Count up visible word characters.  Control sequences
155		 * (starting with the CSI) aren't counted.  A space
156		 * generates a non-printing word, which is valid (the
157		 * space is printed according to regular spacing rules).
158		 */
159
160		for (j = i, jhy = 0; j < p->col; j++) {
161			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
162				break;
163
164			/* Back over the the last printed character. */
165			if (8 == p->buf[j]) {
166				assert(j);
167				vend -= (*p->width)(p, p->buf[j - 1]);
168				continue;
169			}
170
171			/* Regular word. */
172			/* Break at the hyphen point if we overrun. */
173			if (vend > vis && vend < bp &&
174					ASCII_HYPH == p->buf[j])
175				jhy = j;
176
177			vend += (*p->width)(p, p->buf[j]);
178		}
179
180		/*
181		 * Find out whether we would exceed the right margin.
182		 * If so, break to the next line.
183		 */
184		if (vend > bp && 0 == jhy && vis > 0) {
185			vend -= vis;
186			(*p->endline)(p);
187			p->viscol = 0;
188			if (TERMP_NOBREAK & p->flags) {
189				vbl = p->rmargin;
190				vend += p->rmargin - p->offset;
191			} else
192				vbl = p->offset;
193
194			/* Remove the p->overstep width. */
195
196			bp += (size_t)p->overstep;
197			p->overstep = 0;
198		}
199
200		/* Write out the [remaining] word. */
201		for ( ; i < p->col; i++) {
202			if (vend > bp && jhy > 0 && i > jhy)
203				break;
204			if ('\t' == p->buf[i])
205				break;
206			if (' ' == p->buf[i]) {
207				j = i;
208				while (' ' == p->buf[i])
209					i++;
210				dv = (size_t)(i - j) * (*p->width)(p, ' ');
211				vbl += dv;
212				vend += dv;
213				break;
214			}
215			if (ASCII_NBRSP == p->buf[i]) {
216				vbl += (*p->width)(p, ' ');
217				continue;
218			}
219
220			/*
221			 * Now we definitely know there will be
222			 * printable characters to output,
223			 * so write preceding white space now.
224			 */
225			if (vbl) {
226				(*p->advance)(p, vbl);
227				p->viscol += vbl;
228				vbl = 0;
229			}
230
231			if (ASCII_HYPH == p->buf[i]) {
232				(*p->letter)(p, '-');
233				p->viscol += (*p->width)(p, '-');
234				continue;
235			}
236
237			(*p->letter)(p, p->buf[i]);
238			if (8 == p->buf[i])
239				p->viscol -= (*p->width)(p, p->buf[i-1]);
240			else
241				p->viscol += (*p->width)(p, p->buf[i]);
242		}
243		vis = vend;
244	}
245
246	/*
247	 * If there was trailing white space, it was not printed;
248	 * so reset the cursor position accordingly.
249	 */
250	if (vis)
251		vis -= vbl;
252
253	p->col = 0;
254	p->overstep = 0;
255
256	if ( ! (TERMP_NOBREAK & p->flags)) {
257		p->viscol = 0;
258		(*p->endline)(p);
259		return;
260	}
261
262	if (TERMP_HANG & p->flags) {
263		/* We need one blank after the tag. */
264		p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
265
266		/*
267		 * Behave exactly the same way as groff:
268		 * If we have overstepped the margin, temporarily move
269		 * it to the right and flag the rest of the line to be
270		 * shorter.
271		 * If we landed right at the margin, be happy.
272		 * If we are one step before the margin, temporarily
273		 * move it one step LEFT and flag the rest of the line
274		 * to be longer.
275		 */
276		if (p->overstep < -1)
277			p->overstep = 0;
278		return;
279
280	} else if (TERMP_DANGLE & p->flags)
281		return;
282
283	/* If the column was overrun, break the line. */
284	if (maxvis <= vis +
285	    ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
286		(*p->endline)(p);
287		p->viscol = 0;
288	}
289}
290
291
292/*
293 * A newline only breaks an existing line; it won't assert vertical
294 * space.  All data in the output buffer is flushed prior to the newline
295 * assertion.
296 */
297void
298term_newln(struct termp *p)
299{
300
301	p->flags |= TERMP_NOSPACE;
302	if (p->col || p->viscol)
303		term_flushln(p);
304}
305
306
307/*
308 * Asserts a vertical space (a full, empty line-break between lines).
309 * Note that if used twice, this will cause two blank spaces and so on.
310 * All data in the output buffer is flushed prior to the newline
311 * assertion.
312 */
313void
314term_vspace(struct termp *p)
315{
316
317	term_newln(p);
318	p->viscol = 0;
319	(*p->endline)(p);
320}
321
322void
323term_fontlast(struct termp *p)
324{
325	enum termfont	 f;
326
327	f = p->fontl;
328	p->fontl = p->fontq[p->fonti];
329	p->fontq[p->fonti] = f;
330}
331
332
333void
334term_fontrepl(struct termp *p, enum termfont f)
335{
336
337	p->fontl = p->fontq[p->fonti];
338	p->fontq[p->fonti] = f;
339}
340
341
342void
343term_fontpush(struct termp *p, enum termfont f)
344{
345
346	assert(p->fonti + 1 < 10);
347	p->fontl = p->fontq[p->fonti];
348	p->fontq[++p->fonti] = f;
349}
350
351
352const void *
353term_fontq(struct termp *p)
354{
355
356	return(&p->fontq[p->fonti]);
357}
358
359
360enum termfont
361term_fonttop(struct termp *p)
362{
363
364	return(p->fontq[p->fonti]);
365}
366
367
368void
369term_fontpopq(struct termp *p, const void *key)
370{
371
372	while (p->fonti >= 0 && key != &p->fontq[p->fonti])
373		p->fonti--;
374	assert(p->fonti >= 0);
375}
376
377
378void
379term_fontpop(struct termp *p)
380{
381
382	assert(p->fonti);
383	p->fonti--;
384}
385
386/*
387 * Handle pwords, partial words, which may be either a single word or a
388 * phrase that cannot be broken down (such as a literal string).  This
389 * handles word styling.
390 */
391void
392term_word(struct termp *p, const char *word)
393{
394	const char	*seq, *cp;
395	char		 c;
396	int		 sz, uc;
397	size_t		 ssz;
398	enum mandoc_esc	 esc;
399
400	if ( ! (TERMP_NOSPACE & p->flags)) {
401		if ( ! (TERMP_KEEP & p->flags)) {
402			if (TERMP_PREKEEP & p->flags)
403				p->flags |= TERMP_KEEP;
404			bufferc(p, ' ');
405			if (TERMP_SENTENCE & p->flags)
406				bufferc(p, ' ');
407		} else
408			bufferc(p, ASCII_NBRSP);
409	}
410
411	if ( ! (p->flags & TERMP_NONOSPACE))
412		p->flags &= ~TERMP_NOSPACE;
413	else
414		p->flags |= TERMP_NOSPACE;
415
416	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
417
418	while ('\0' != *word) {
419		if ((ssz = strcspn(word, "\\")) > 0)
420			encode(p, word, ssz);
421
422		word += (int)ssz;
423		if ('\\' != *word)
424			continue;
425
426		word++;
427		esc = mandoc_escape(&word, &seq, &sz);
428		if (ESCAPE_ERROR == esc)
429			break;
430
431		if (TERMENC_ASCII != p->enc)
432			switch (esc) {
433			case (ESCAPE_UNICODE):
434				uc = mchars_num2uc(seq + 1, sz - 1);
435				if ('\0' == uc)
436					break;
437				encode1(p, uc);
438				continue;
439			case (ESCAPE_SPECIAL):
440				uc = mchars_spec2cp(p->symtab, seq, sz);
441				if (uc <= 0)
442					break;
443				encode1(p, uc);
444				continue;
445			default:
446				break;
447			}
448
449		switch (esc) {
450		case (ESCAPE_UNICODE):
451			encode1(p, '?');
452			break;
453		case (ESCAPE_NUMBERED):
454			c = mchars_num2char(seq, sz);
455			if ('\0' != c)
456				encode(p, &c, 1);
457			break;
458		case (ESCAPE_SPECIAL):
459			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
460			if (NULL != cp)
461				encode(p, cp, ssz);
462			else if (1 == ssz)
463				encode(p, seq, sz);
464			break;
465		case (ESCAPE_FONTBOLD):
466			term_fontrepl(p, TERMFONT_BOLD);
467			break;
468		case (ESCAPE_FONTITALIC):
469			term_fontrepl(p, TERMFONT_UNDER);
470			break;
471		case (ESCAPE_FONT):
472			/* FALLTHROUGH */
473		case (ESCAPE_FONTROMAN):
474			term_fontrepl(p, TERMFONT_NONE);
475			break;
476		case (ESCAPE_FONTPREV):
477			term_fontlast(p);
478			break;
479		case (ESCAPE_NOSPACE):
480			if ('\0' == *word)
481				p->flags |= TERMP_NOSPACE;
482			break;
483		default:
484			break;
485		}
486	}
487}
488
489static void
490adjbuf(struct termp *p, int sz)
491{
492
493	if (0 == p->maxcols)
494		p->maxcols = 1024;
495	while (sz >= p->maxcols)
496		p->maxcols <<= 2;
497
498	p->buf = mandoc_realloc
499		(p->buf, sizeof(int) * (size_t)p->maxcols);
500}
501
502static void
503bufferc(struct termp *p, char c)
504{
505
506	if (p->col + 1 >= p->maxcols)
507		adjbuf(p, p->col + 1);
508
509	p->buf[p->col++] = c;
510}
511
512/*
513 * See encode().
514 * Do this for a single (probably unicode) value.
515 * Does not check for non-decorated glyphs.
516 */
517static void
518encode1(struct termp *p, int c)
519{
520	enum termfont	  f;
521
522	if (p->col + 4 >= p->maxcols)
523		adjbuf(p, p->col + 4);
524
525	f = term_fonttop(p);
526
527	if (TERMFONT_NONE == f) {
528		p->buf[p->col++] = c;
529		return;
530	} else if (TERMFONT_UNDER == f) {
531		p->buf[p->col++] = '_';
532	} else
533		p->buf[p->col++] = c;
534
535	p->buf[p->col++] = 8;
536	p->buf[p->col++] = c;
537}
538
539static void
540encode(struct termp *p, const char *word, size_t sz)
541{
542	enum termfont	  f;
543	int		  i, len;
544
545	/* LINTED */
546	len = sz;
547
548	/*
549	 * Encode and buffer a string of characters.  If the current
550	 * font mode is unset, buffer directly, else encode then buffer
551	 * character by character.
552	 */
553
554	if (TERMFONT_NONE == (f = term_fonttop(p))) {
555		if (p->col + len >= p->maxcols)
556			adjbuf(p, p->col + len);
557		for (i = 0; i < len; i++)
558			p->buf[p->col++] = word[i];
559		return;
560	}
561
562	/* Pre-buffer, assuming worst-case. */
563
564	if (p->col + 1 + (len * 3) >= p->maxcols)
565		adjbuf(p, p->col + 1 + (len * 3));
566
567	for (i = 0; i < len; i++) {
568		if (ASCII_HYPH != word[i] &&
569		    ! isgraph((unsigned char)word[i])) {
570			p->buf[p->col++] = word[i];
571			continue;
572		}
573
574		if (TERMFONT_UNDER == f)
575			p->buf[p->col++] = '_';
576		else if (ASCII_HYPH == word[i])
577			p->buf[p->col++] = '-';
578		else
579			p->buf[p->col++] = word[i];
580
581		p->buf[p->col++] = 8;
582		p->buf[p->col++] = word[i];
583	}
584}
585
586size_t
587term_len(const struct termp *p, size_t sz)
588{
589
590	return((*p->width)(p, ' ') * sz);
591}
592
593
594size_t
595term_strlen(const struct termp *p, const char *cp)
596{
597	size_t		 sz, rsz, i;
598	int		 ssz, c;
599	const char	*seq, *rhs;
600	enum mandoc_esc	 esc;
601	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
602
603	/*
604	 * Account for escaped sequences within string length
605	 * calculations.  This follows the logic in term_word() as we
606	 * must calculate the width of produced strings.
607	 */
608
609	sz = 0;
610	while ('\0' != *cp) {
611		rsz = strcspn(cp, rej);
612		for (i = 0; i < rsz; i++)
613			sz += (*p->width)(p, *cp++);
614
615		c = 0;
616		switch (*cp) {
617		case ('\\'):
618			cp++;
619			esc = mandoc_escape(&cp, &seq, &ssz);
620			if (ESCAPE_ERROR == esc)
621				return(sz);
622
623			if (TERMENC_ASCII != p->enc)
624				switch (esc) {
625				case (ESCAPE_UNICODE):
626					c = mchars_num2uc
627						(seq + 1, ssz - 1);
628					if ('\0' == c)
629						break;
630					sz += (*p->width)(p, c);
631					continue;
632				case (ESCAPE_SPECIAL):
633					c = mchars_spec2cp
634						(p->symtab, seq, ssz);
635					if (c <= 0)
636						break;
637					sz += (*p->width)(p, c);
638					continue;
639				default:
640					break;
641				}
642
643			rhs = NULL;
644
645			switch (esc) {
646			case (ESCAPE_UNICODE):
647				sz += (*p->width)(p, '?');
648				break;
649			case (ESCAPE_NUMBERED):
650				c = mchars_num2char(seq, ssz);
651				if ('\0' != c)
652					sz += (*p->width)(p, c);
653				break;
654			case (ESCAPE_SPECIAL):
655				rhs = mchars_spec2str
656					(p->symtab, seq, ssz, &rsz);
657
658				if (ssz != 1 || rhs)
659					break;
660
661				rhs = seq;
662				rsz = ssz;
663				break;
664			default:
665				break;
666			}
667
668			if (NULL == rhs)
669				break;
670
671			for (i = 0; i < rsz; i++)
672				sz += (*p->width)(p, *rhs++);
673			break;
674		case (ASCII_NBRSP):
675			sz += (*p->width)(p, ' ');
676			cp++;
677			break;
678		case (ASCII_HYPH):
679			sz += (*p->width)(p, '-');
680			cp++;
681			break;
682		default:
683			break;
684		}
685	}
686
687	return(sz);
688}
689
690/* ARGSUSED */
691size_t
692term_vspan(const struct termp *p, const struct roffsu *su)
693{
694	double		 r;
695
696	switch (su->unit) {
697	case (SCALE_CM):
698		r = su->scale * 2;
699		break;
700	case (SCALE_IN):
701		r = su->scale * 6;
702		break;
703	case (SCALE_PC):
704		r = su->scale;
705		break;
706	case (SCALE_PT):
707		r = su->scale / 8;
708		break;
709	case (SCALE_MM):
710		r = su->scale / 1000;
711		break;
712	case (SCALE_VS):
713		r = su->scale;
714		break;
715	default:
716		r = su->scale - 1;
717		break;
718	}
719
720	if (r < 0.0)
721		r = 0.0;
722	return(/* LINTED */(size_t)
723			r);
724}
725
726size_t
727term_hspan(const struct termp *p, const struct roffsu *su)
728{
729	double		 v;
730
731	v = ((*p->hspan)(p, su));
732	if (v < 0.0)
733		v = 0.0;
734	return((size_t) /* LINTED */
735			v);
736}
737