1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1987, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#ifndef lint
33static const char copyright[] =
34"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
35	The Regents of the University of California.  All rights reserved.\n";
36#endif /* not lint */
37
38#if 0
39#ifndef lint
40static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
41#endif /* not lint */
42#endif
43
44#include <sys/cdefs.h>
45__FBSDID("$FreeBSD$");
46
47#include <sys/param.h>
48#include <sys/stat.h>
49
50#include <ctype.h>
51#include <err.h>
52#include <errno.h>
53#include <fcntl.h>
54#include <locale.h>
55#include <stdint.h>
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59#include <unistd.h>
60#include <wchar.h>
61#include <wctype.h>
62#include <libxo/xo.h>
63
64static uintmax_t tlinect, twordct, tcharct, tlongline;
65static int doline, doword, dochar, domulti, dolongline;
66static volatile sig_atomic_t siginfo;
67static xo_handle_t *stderr_handle;
68
69static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
70		    uintmax_t charct, uintmax_t llct);
71static int	cnt(const char *);
72static void	usage(void);
73
74static void
75siginfo_handler(int sig __unused)
76{
77
78	siginfo = 1;
79}
80
81static void
82reset_siginfo(void)
83{
84
85	signal(SIGINFO, SIG_DFL);
86	siginfo = 0;
87}
88
89int
90main(int argc, char *argv[])
91{
92	int ch, errors, total;
93
94	(void) setlocale(LC_CTYPE, "");
95
96	argc = xo_parse_args(argc, argv);
97	if (argc < 0)
98		return (argc);
99
100	while ((ch = getopt(argc, argv, "clmwL")) != -1)
101		switch((char)ch) {
102		case 'l':
103			doline = 1;
104			break;
105		case 'w':
106			doword = 1;
107			break;
108		case 'c':
109			dochar = 1;
110			domulti = 0;
111			break;
112		case 'L':
113			dolongline = 1;
114			break;
115		case 'm':
116			domulti = 1;
117			dochar = 0;
118			break;
119		case '?':
120		default:
121			usage();
122		}
123	argv += optind;
124	argc -= optind;
125
126	(void)signal(SIGINFO, siginfo_handler);
127
128	/* Wc's flags are on by default. */
129	if (doline + doword + dochar + domulti + dolongline == 0)
130		doline = doword = dochar = 1;
131
132	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
133	xo_open_container("wc");
134	xo_open_list("file");
135
136	errors = 0;
137	total = 0;
138	if (!*argv) {
139	 	xo_open_instance("file");
140		if (cnt((char *)NULL) != 0)
141			++errors;
142	 	xo_close_instance("file");
143	} else {
144		do {
145	 		xo_open_instance("file");
146			if (cnt(*argv) != 0)
147				++errors;
148	 		xo_close_instance("file");
149			++total;
150		} while(*++argv);
151	}
152
153	xo_close_list("file");
154
155	if (total > 1) {
156		xo_open_container("total");
157		show_cnt("total", tlinect, twordct, tcharct, tlongline);
158		xo_close_container("total");
159	}
160
161	xo_close_container("wc");
162	xo_finish();
163	exit(errors == 0 ? 0 : 1);
164}
165
166static void
167show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
168    uintmax_t charct, uintmax_t llct)
169{
170	xo_handle_t *xop;
171
172	if (!siginfo)
173		xop = NULL;
174	else {
175		xop = stderr_handle;
176		siginfo = 0;
177	}
178
179	if (doline)
180		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
181	if (doword)
182		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
183	if (dochar || domulti)
184		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
185	if (dolongline)
186		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
187	if (file != NULL)
188		xo_emit_h(xop, " {:filename/%s}\n", file);
189	else
190		xo_emit_h(xop, "\n");
191}
192
193static int
194cnt(const char *file)
195{
196	struct stat sb;
197	uintmax_t linect, wordct, charct, llct, tmpll;
198	int fd, len, warned;
199	size_t clen;
200	short gotsp;
201	u_char *p;
202	u_char buf[MAXBSIZE];
203	wchar_t wch;
204	mbstate_t mbs;
205
206	linect = wordct = charct = llct = tmpll = 0;
207	if (file == NULL)
208		fd = STDIN_FILENO;
209	else if ((fd = open(file, O_RDONLY, 0)) < 0) {
210		xo_warn("%s: open", file);
211		return (1);
212	}
213	if (doword || (domulti && MB_CUR_MAX != 1))
214		goto word;
215	/*
216	 * If all we need is the number of characters and it's a regular file,
217	 * just stat it.
218	 */
219	if (doline == 0 && dolongline == 0) {
220		if (fstat(fd, &sb)) {
221			xo_warn("%s: fstat", file != NULL ? file : "stdin");
222			(void)close(fd);
223			return (1);
224		}
225		if (S_ISREG(sb.st_mode)) {
226			reset_siginfo();
227			charct = sb.st_size;
228			show_cnt(file, linect, wordct, charct, llct);
229			tcharct += charct;
230			(void)close(fd);
231			return (0);
232		}
233	}
234	/*
235	 * For files we can't stat, or if we need line counting, slurp the
236	 * file.  Line counting is split out because it's a lot faster to get
237	 * lines than to get words, since the word count requires locale
238	 * handling.
239	 */
240	while ((len = read(fd, buf, MAXBSIZE))) {
241		if (len == -1) {
242			xo_warn("%s: read", file != NULL ? file : "stdin");
243			(void)close(fd);
244			return (1);
245		}
246		if (siginfo)
247			show_cnt(file, linect, wordct, charct, llct);
248		charct += len;
249		if (doline || dolongline) {
250			for (p = buf; len--; ++p)
251				if (*p == '\n') {
252					if (tmpll > llct)
253						llct = tmpll;
254					tmpll = 0;
255					++linect;
256				} else
257					tmpll++;
258		}
259	}
260	reset_siginfo();
261	if (doline)
262		tlinect += linect;
263	if (dochar)
264		tcharct += charct;
265	if (dolongline && llct > tlongline)
266		tlongline = llct;
267	show_cnt(file, linect, wordct, charct, llct);
268	(void)close(fd);
269	return (0);
270
271	/* Do it the hard way... */
272word:	gotsp = 1;
273	warned = 0;
274	memset(&mbs, 0, sizeof(mbs));
275	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
276		if (len == -1) {
277			xo_warn("%s: read", file != NULL ? file : "stdin");
278			(void)close(fd);
279			return (1);
280		}
281		p = buf;
282		while (len > 0) {
283			if (siginfo)
284				show_cnt(file, linect, wordct, charct, llct);
285			if (!domulti || MB_CUR_MAX == 1) {
286				clen = 1;
287				wch = (unsigned char)*p;
288			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
289			    (size_t)-1) {
290				if (!warned) {
291					errno = EILSEQ;
292					xo_warn("%s",
293					    file != NULL ? file : "stdin");
294					warned = 1;
295				}
296				memset(&mbs, 0, sizeof(mbs));
297				clen = 1;
298				wch = (unsigned char)*p;
299			} else if (clen == (size_t)-2)
300				break;
301			else if (clen == 0)
302				clen = 1;
303			charct++;
304			if (wch != L'\n')
305				tmpll++;
306			len -= clen;
307			p += clen;
308			if (wch == L'\n') {
309				if (tmpll > llct)
310					llct = tmpll;
311				tmpll = 0;
312				++linect;
313			}
314			if (iswspace(wch))
315				gotsp = 1;
316			else if (gotsp) {
317				gotsp = 0;
318				++wordct;
319			}
320		}
321	}
322	reset_siginfo();
323	if (domulti && MB_CUR_MAX > 1)
324		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
325			xo_warn("%s", file != NULL ? file : "stdin");
326	if (doline)
327		tlinect += linect;
328	if (doword)
329		twordct += wordct;
330	if (dochar || domulti)
331		tcharct += charct;
332	if (dolongline && llct > tlongline)
333		tlongline = llct;
334	show_cnt(file, linect, wordct, charct, llct);
335	(void)close(fd);
336	return (0);
337}
338
339static void
340usage(void)
341{
342	xo_error("usage: wc [-Lclmw] [file ...]\n");
343	exit(1);
344}
345