1/*	$NetBSD: fmt.c,v 1.30 2008/04/13 03:46:30 dholland Exp $	*/
2
3/*
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34__COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California.  All rights reserved.");
36#endif /* not lint */
37
38#ifndef lint
39#if 0
40static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
41#endif
42__RCSID("$NetBSD: fmt.c,v 1.30 2008/04/13 03:46:30 dholland Exp $");
43#endif /* not lint */
44
45#include <ctype.h>
46#include <locale.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <unistd.h>
50#include <errno.h>
51#include <err.h>
52#include <limits.h>
53#include <string.h>
54#include "buffer.h"
55
56/*
57 * fmt -- format the concatenation of input files or standard input
58 * onto standard output.  Designed for use with Mail ~|
59 *
60 * Syntax : fmt [ goal [ max ] ] [ name ... ]
61 * Authors: Kurt Shoens (UCB) 12/7/78;
62 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63 */
64
65/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
66#define GOAL_LENGTH 65
67#define MAX_LENGTH 75
68static size_t	goal_length;	/* Target or goal line length in output */
69static size_t	max_length;	/* Max line length in output */
70static size_t	pfx;		/* Current leading blank count */
71static int	raw;		/* Don't treat mail specially */
72static int	lineno;		/* Current input line */
73static int	mark;		/* Last place we saw a head line */
74static int	center;
75static struct buffer outbuf;
76
77static const char	*headnames[] = {"To", "Subject", "Cc", 0};
78
79static void	usage(void) __dead;
80static int 	getnum(const char *, const char *, size_t *, int);
81static void	fmt(FILE *);
82static int	ispref(const char *, const char *);
83static void	leadin(void);
84static void	oflush(void);
85static void	pack(const char *, size_t);
86static void	prefix(const struct buffer *, int);
87static void	split(const char *, int);
88static void	tabulate(struct buffer *);
89
90
91int		ishead(const char *);
92
93/*
94 * Drive the whole formatter by managing input files.  Also,
95 * cause initialization of the output stuff and flush it out
96 * at the end.
97 */
98
99int
100main(int argc, char **argv)
101{
102	FILE *fi;
103	int errs = 0;
104	int compat = 1;
105	int c;
106
107	goal_length = GOAL_LENGTH;
108	max_length = MAX_LENGTH;
109	buf_init(&outbuf);
110	lineno = 1;
111	mark = -10;
112
113	setprogname(*argv);
114	(void)setlocale(LC_ALL, "");
115
116	while ((c = getopt(argc, argv, "Cg:m:r")) != -1)
117		switch (c) {
118		case 'C':
119			center++;
120			break;
121		case 'g':
122			(void)getnum(optarg, "goal", &goal_length, 1);
123			compat = 0;
124			break;
125		case 'm':
126			(void)getnum(optarg, "max", &max_length, 1);
127			compat = 0;
128			break;
129		case 'r':
130			raw++;
131			break;
132		default:
133			usage();
134		}
135
136	argc -= optind;
137	argv += optind;
138
139	/*
140	 * compatibility with old usage.
141	 */
142	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
143		argv++;
144		argc--;
145		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
146			argv++;
147			argc--;
148		}
149	}
150
151	if (max_length <= goal_length) {
152		errx(1, "Max length (%zu) must be greater than goal "
153		    "length (%zu)", max_length, goal_length);
154	}
155	if (argc == 0) {
156		fmt(stdin);
157		oflush();
158		return 0;
159	}
160	for (;argc; argc--, argv++) {
161		if ((fi = fopen(*argv, "r")) == NULL) {
162			warn("Cannot open `%s'", *argv);
163			errs++;
164			continue;
165		}
166		fmt(fi);
167		(void)fclose(fi);
168	}
169	oflush();
170	buf_end(&outbuf);
171	return errs;
172}
173
174static void
175usage(void)
176{
177	(void)fprintf(stderr,
178	    "Usage: %s [-Cr] [-g <goal>] [-m <max>] [<files>..]\n"
179	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
180	    getprogname(), getprogname());
181	exit(1);
182}
183
184static int
185getnum(const char *str, const char *what, size_t *res, int badnum)
186{
187	unsigned long ul;
188	char *ep;
189
190	errno = 0;
191	ul = strtoul(str, &ep, 0);
192        if (*str != '\0' && *ep == '\0') {
193		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
194			errx(1, "%s number `%s' too big", what, str);
195		*res = (size_t)ul;
196		return 1;
197	} else if (badnum)
198		errx(1, "Bad %s number `%s'", what, str);
199
200	return 0;
201}
202
203/*
204 * Read up characters from the passed input file, forming lines,
205 * doing ^H processing, expanding tabs, stripping trailing blanks,
206 * and sending each line down for analysis.
207 */
208static void
209fmt(FILE *fi)
210{
211	struct buffer lbuf, cbuf;
212	char *cp, *cp2;
213	int c, add_space;
214	size_t len, col, i;
215
216	if (center) {
217		for (;;) {
218			cp = fgetln(fi, &len);
219			if (!cp)
220				return;
221
222			/* skip over leading space */
223			while (len > 0) {
224				if (!isspace((unsigned char)*cp))
225					break;
226				cp++;
227				len--;
228			}
229
230			/* clear trailing space */
231			while (len > 0) {
232				if (!isspace((unsigned char)cp[len-1]))
233					break;
234				len--;
235			}
236
237			if (len == 0) {
238				/* blank line */
239				(void)putchar('\n');
240				continue;
241			}
242
243			if (goal_length > len) {
244				for (i = 0; i < (goal_length - len) / 2; i++) {
245					(void)putchar(' ');
246				}
247			}
248			for (i = 0; i < len; i++) {
249				(void)putchar(cp[i]);
250			}
251			(void)putchar('\n');
252		}
253	}
254
255	buf_init(&lbuf);
256	buf_init(&cbuf);
257	c = getc(fi);
258
259	while (c != EOF) {
260		/*
261		 * Collect a line, doing ^H processing.
262		 * Leave tabs for now.
263		 */
264		buf_reset(&lbuf);
265		while (c != '\n' && c != EOF) {
266			if (c == '\b') {
267				(void)buf_unputc(&lbuf);
268				c = getc(fi);
269				continue;
270			}
271			if(!(isprint(c) || c == '\t' || c >= 160)) {
272				c = getc(fi);
273				continue;
274			}
275			buf_putc(&lbuf, c);
276			c = getc(fi);
277		}
278		buf_putc(&lbuf, '\0');
279		(void)buf_unputc(&lbuf);
280		add_space = c != EOF;
281
282		/*
283		 * Expand tabs on the way.
284		 */
285		col = 0;
286		cp = lbuf.bptr;
287		buf_reset(&cbuf);
288		while ((c = *cp++) != '\0') {
289			if (c != '\t') {
290				col++;
291				buf_putc(&cbuf, c);
292				continue;
293			}
294			do {
295				buf_putc(&cbuf, ' ');
296				col++;
297			} while ((col & 07) != 0);
298		}
299
300		/*
301		 * Swipe trailing blanks from the line.
302		 */
303		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
304			continue;
305		cbuf.ptr = cp2 + 1;
306		buf_putc(&cbuf, '\0');
307		(void)buf_unputc(&cbuf);
308		prefix(&cbuf, add_space);
309		if (c != EOF)
310			c = getc(fi);
311	}
312	buf_end(&cbuf);
313	buf_end(&lbuf);
314}
315
316/*
317 * Take a line devoid of tabs and other garbage and determine its
318 * blank prefix.  If the indent changes, call for a linebreak.
319 * If the input line is blank, echo the blank line on the output.
320 * Finally, if the line minus the prefix is a mail header, try to keep
321 * it on a line by itself.
322 */
323static void
324prefix(const struct buffer *buf, int add_space)
325{
326	const char *cp;
327	const char **hp;
328	size_t np;
329	int h;
330
331	if (buf->ptr == buf->bptr) {
332		oflush();
333		(void)putchar('\n');
334		return;
335	}
336	for (cp = buf->bptr; *cp == ' '; cp++)
337		continue;
338	np = cp - buf->bptr;
339
340	/*
341	 * The following horrible expression attempts to avoid linebreaks
342	 * when the indent changes due to a paragraph.
343	 */
344	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
345		oflush();
346	if (!raw) {
347		if ((h = ishead(cp)) != 0) {
348			oflush();
349			mark = lineno;
350		}
351		if (lineno - mark < 3 && lineno - mark > 0)
352			for (hp = &headnames[0]; *hp != NULL; hp++)
353				if (ispref(*hp, cp)) {
354					h = 1;
355					oflush();
356					break;
357				}
358		if (!h && (h = (*cp == '.')))
359			oflush();
360	} else
361		h = 0;
362	pfx = np;
363	if (h) {
364		pack(cp, (size_t)(buf->ptr - cp));
365		oflush();
366	} else
367		split(cp, add_space);
368	lineno++;
369}
370
371/*
372 * Split up the passed line into output "words" which are
373 * maximal strings of non-blanks with the blank separation
374 * attached at the end.  Pass these words along to the output
375 * line packer.
376 */
377static void
378split(const char line[], int add_space)
379{
380	const char *cp;
381	struct buffer word;
382	size_t wlen;
383
384	buf_init(&word);
385	cp = line;
386	while (*cp) {
387		buf_reset(&word);
388		wlen = 0;
389
390		/*
391		 * Collect a 'word,' allowing it to contain escaped white
392		 * space.
393		 */
394		while (*cp && *cp != ' ') {
395			if (*cp == '\\' && isspace((unsigned char)cp[1]))
396				buf_putc(&word, *cp++);
397			buf_putc(&word, *cp++);
398			wlen++;
399		}
400
401		/*
402		 * Guarantee a space at end of line. Two spaces after end of
403		 * sentence punctuation.
404		 */
405		if (*cp == '\0' && add_space) {
406			buf_putc(&word, ' ');
407			if (strchr(".:!", cp[-1]))
408				buf_putc(&word, ' ');
409		}
410		while (*cp == ' ')
411			buf_putc(&word, *cp++);
412
413		buf_putc(&word, '\0');
414		(void)buf_unputc(&word);
415
416		pack(word.bptr, wlen);
417	}
418	buf_end(&word);
419}
420
421/*
422 * Output section.
423 * Build up line images from the words passed in.  Prefix
424 * each line with correct number of blanks.
425 *
426 * At the bottom of this whole mess, leading tabs are reinserted.
427 */
428
429/*
430 * Pack a word onto the output line.  If this is the beginning of
431 * the line, push on the appropriately-sized string of blanks first.
432 * If the word won't fit on the current line, flush and begin a new
433 * line.  If the word is too long to fit all by itself on a line,
434 * just give it its own and hope for the best.
435 *
436 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
437 *	goal length, take it.  If not, then check to see if the line
438 *	will be over the max length; if so put the word on the next
439 *	line.  If not, check to see if the line will be closer to the
440 *	goal length with or without the word and take it or put it on
441 *	the next line accordingly.
442 */
443
444static void
445pack(const char *word, size_t wlen)
446{
447	const char *cp;
448	size_t s, t;
449
450	if (outbuf.bptr == outbuf.ptr)
451		leadin();
452	/*
453	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
454	 * length of the line before the word is added; t is now the length
455	 * of the line after the word is added
456	 */
457	s = outbuf.ptr - outbuf.bptr;
458	t = wlen + s;
459	if ((t <= goal_length) || ((t <= max_length) &&
460	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
461		/*
462		 * In like flint!
463		 */
464		for (cp = word; *cp;)
465			buf_putc(&outbuf, *cp++);
466		return;
467	}
468	if (s > pfx) {
469		oflush();
470		leadin();
471	}
472	for (cp = word; *cp;)
473		buf_putc(&outbuf, *cp++);
474}
475
476/*
477 * If there is anything on the current output line, send it on
478 * its way.  Reset outbuf.
479 */
480static void
481oflush(void)
482{
483	if (outbuf.bptr == outbuf.ptr)
484		return;
485	buf_putc(&outbuf, '\0');
486	(void)buf_unputc(&outbuf);
487	tabulate(&outbuf);
488	buf_reset(&outbuf);
489}
490
491/*
492 * Take the passed line buffer, insert leading tabs where possible, and
493 * output on standard output (finally).
494 */
495static void
496tabulate(struct buffer *buf)
497{
498	char *cp;
499	size_t b, t;
500
501	/*
502	 * Toss trailing blanks in the output line.
503	 */
504	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
505		continue;
506	*++cp = '\0';
507
508	/*
509	 * Count the leading blank space and tabulate.
510	 */
511	for (cp = buf->bptr; *cp == ' '; cp++)
512		continue;
513	b = cp - buf->bptr;
514	t = b / 8;
515	b = b % 8;
516	if (t > 0)
517		do
518			(void)putchar('\t');
519		while (--t);
520	if (b > 0)
521		do
522			(void)putchar(' ');
523		while (--b);
524	while (*cp)
525		(void)putchar(*cp++);
526	(void)putchar('\n');
527}
528
529/*
530 * Initialize the output line with the appropriate number of
531 * leading blanks.
532 */
533static void
534leadin(void)
535{
536	size_t b;
537
538	buf_reset(&outbuf);
539
540	for (b = 0; b < pfx; b++)
541		buf_putc(&outbuf, ' ');
542}
543
544/*
545 * Is s1 a prefix of s2??
546 */
547static int
548ispref(const char *s1, const char *s2)
549{
550
551	while (*s1++ == *s2)
552		continue;
553	return *s1 == '\0';
554}
555