11590Srgrimes/*
21590Srgrimes * Copyright (c) 1980, 1987, 1991, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 4. Neither the name of the University nor the names of its contributors
141590Srgrimes *    may be used to endorse or promote products derived from this software
151590Srgrimes *    without specific prior written permission.
161590Srgrimes *
171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271590Srgrimes * SUCH DAMAGE.
281590Srgrimes */
291590Srgrimes
301590Srgrimes#ifndef lint
3191792Smikestatic const char copyright[] =
321590Srgrimes"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
331590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
3491481Smike#endif /* not lint */
351590Srgrimes
3691792Smike#if 0
371590Srgrimes#ifndef lint
3891481Smikestatic char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
3991481Smike#endif /* not lint */
4015233Sbde#endif
411590Srgrimes
4291481Smike#include <sys/cdefs.h>
4391481Smike__FBSDID("$FreeBSD$");
4491481Smike
451590Srgrimes#include <sys/param.h>
461590Srgrimes#include <sys/stat.h>
4715233Sbde
4815233Sbde#include <ctype.h>
4915233Sbde#include <err.h>
5098165Stjr#include <errno.h>
511590Srgrimes#include <fcntl.h>
5213380Sache#include <locale.h>
5391481Smike#include <stdint.h>
541590Srgrimes#include <stdio.h>
551590Srgrimes#include <stdlib.h>
561590Srgrimes#include <string.h>
5715233Sbde#include <unistd.h>
58128049Stjr#include <wchar.h>
59101670Stjr#include <wctype.h>
601590Srgrimes
61227201Sedstatic uintmax_t tlinect, twordct, tcharct, tlongline;
62227201Sedstatic int doline, doword, dochar, domulti, dolongline;
63208210Spjdstatic volatile sig_atomic_t siginfo;
641590Srgrimes
65208170Spjdstatic void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
66208170Spjd		    uintmax_t charct, uintmax_t llct);
6792922Simpstatic int	cnt(const char *);
6892922Simpstatic void	usage(void);
691590Srgrimes
70208170Spjdstatic void
71208170Spjdsiginfo_handler(int sig __unused)
72208170Spjd{
73208170Spjd
74208170Spjd	siginfo = 1;
75208170Spjd}
76208170Spjd
771590Srgrimesint
78139364Sjosefmain(int argc, char *argv[])
791590Srgrimes{
8049685Ssheldonh	int ch, errors, total;
811590Srgrimes
8213380Sache	(void) setlocale(LC_CTYPE, "");
8313380Sache
84185714Skeramida	while ((ch = getopt(argc, argv, "clmwL")) != -1)
851590Srgrimes		switch((char)ch) {
861590Srgrimes		case 'l':
871590Srgrimes			doline = 1;
881590Srgrimes			break;
891590Srgrimes		case 'w':
901590Srgrimes			doword = 1;
911590Srgrimes			break;
921590Srgrimes		case 'c':
931590Srgrimes			dochar = 1;
9498165Stjr			domulti = 0;
951590Srgrimes			break;
96185714Skeramida		case 'L':
97185714Skeramida			dolongline = 1;
98185714Skeramida			break;
9998165Stjr		case 'm':
10098165Stjr			domulti = 1;
10198165Stjr			dochar = 0;
10298165Stjr			break;
1031590Srgrimes		case '?':
1041590Srgrimes		default:
1051590Srgrimes			usage();
1061590Srgrimes		}
1071590Srgrimes	argv += optind;
1081590Srgrimes	argc -= optind;
1091590Srgrimes
110208170Spjd	(void)signal(SIGINFO, siginfo_handler);
111208170Spjd
1121590Srgrimes	/* Wc's flags are on by default. */
113185714Skeramida	if (doline + doword + dochar + domulti + dolongline == 0)
1141590Srgrimes		doline = doword = dochar = 1;
1151590Srgrimes
11615233Sbde	errors = 0;
1171590Srgrimes	total = 0;
1181590Srgrimes	if (!*argv) {
11915233Sbde		if (cnt((char *)NULL) != 0)
12015233Sbde			++errors;
121208170Spjd	} else {
122208170Spjd		do {
123208170Spjd			if (cnt(*argv) != 0)
124208170Spjd				++errors;
125208170Spjd			++total;
126208170Spjd		} while(*++argv);
1271590Srgrimes	}
1281590Srgrimes
129208170Spjd	if (total > 1)
130208170Spjd		show_cnt("total", tlinect, twordct, tcharct, tlongline);
13115233Sbde	exit(errors == 0 ? 0 : 1);
1321590Srgrimes}
1331590Srgrimes
134208170Spjdstatic void
135208170Spjdshow_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
136208170Spjd    uintmax_t charct, uintmax_t llct)
137208170Spjd{
138208170Spjd	FILE *out;
139208170Spjd
140208170Spjd	if (!siginfo)
141208170Spjd		out = stdout;
142208170Spjd	else {
143208170Spjd		out = stderr;
144208170Spjd		siginfo = 0;
145208170Spjd	}
146208170Spjd
147208170Spjd	if (doline)
148208170Spjd		(void)fprintf(out, " %7ju", linect);
149208170Spjd	if (doword)
150208170Spjd		(void)fprintf(out, " %7ju", wordct);
151208170Spjd	if (dochar || domulti)
152208170Spjd		(void)fprintf(out, " %7ju", charct);
153208170Spjd	if (dolongline)
154208170Spjd		(void)fprintf(out, " %7ju", llct);
155208170Spjd	if (file != NULL)
156208170Spjd		(void)fprintf(out, " %s\n", file);
157208170Spjd	else
158208170Spjd		(void)fprintf(out, "\n");
159208170Spjd}
160208170Spjd
16191481Smikestatic int
162139364Sjosefcnt(const char *file)
1631590Srgrimes{
16449685Ssheldonh	struct stat sb;
165185714Skeramida	uintmax_t linect, wordct, charct, llct, tmpll;
166128049Stjr	int fd, len, warned;
167128049Stjr	size_t clen;
16849461Ssheldonh	short gotsp;
16949685Ssheldonh	u_char *p;
17098291Stjr	u_char buf[MAXBSIZE];
17198165Stjr	wchar_t wch;
172128049Stjr	mbstate_t mbs;
1731590Srgrimes
174185714Skeramida	linect = wordct = charct = llct = tmpll = 0;
175208170Spjd	if (file == NULL)
17615233Sbde		fd = STDIN_FILENO;
177208170Spjd	else {
17815179Swosch		if ((fd = open(file, O_RDONLY, 0)) < 0) {
17915233Sbde			warn("%s: open", file);
18015233Sbde			return (1);
18115179Swosch		}
18298165Stjr		if (doword || (domulti && MB_CUR_MAX != 1))
1831590Srgrimes			goto word;
1841590Srgrimes		/*
1851590Srgrimes		 * Line counting is split out because it's a lot faster to get
1861590Srgrimes		 * lines than to get words, since the word count requires some
1871590Srgrimes		 * logic.
1881590Srgrimes		 */
1891590Srgrimes		if (doline) {
19028696Scharnier			while ((len = read(fd, buf, MAXBSIZE))) {
19115233Sbde				if (len == -1) {
19215233Sbde					warn("%s: read", file);
19315233Sbde					(void)close(fd);
19415233Sbde					return (1);
19515233Sbde				}
196208170Spjd				if (siginfo) {
197208170Spjd					show_cnt(file, linect, wordct, charct,
198208170Spjd					    llct);
199208170Spjd				}
2001590Srgrimes				charct += len;
2011590Srgrimes				for (p = buf; len--; ++p)
202185714Skeramida					if (*p == '\n') {
203185714Skeramida						if (tmpll > llct)
204185714Skeramida							llct = tmpll;
205185714Skeramida						tmpll = 0;
2061590Srgrimes						++linect;
207185714Skeramida					} else
208185714Skeramida						tmpll++;
2091590Srgrimes			}
2101590Srgrimes			tlinect += linect;
211208170Spjd			if (dochar)
2121590Srgrimes				tcharct += charct;
213185714Skeramida			if (dolongline) {
214185714Skeramida				if (llct > tlongline)
215185714Skeramida					tlongline = llct;
216185714Skeramida			}
217208170Spjd			show_cnt(file, linect, wordct, charct, llct);
2181590Srgrimes			(void)close(fd);
21915233Sbde			return (0);
2201590Srgrimes		}
2211590Srgrimes		/*
2221590Srgrimes		 * If all we need is the number of characters and it's a
22398245Stjr		 * regular file, just stat the puppy.
2241590Srgrimes		 */
22598165Stjr		if (dochar || domulti) {
22615233Sbde			if (fstat(fd, &sb)) {
22715233Sbde				warn("%s: fstat", file);
22815233Sbde				(void)close(fd);
22915233Sbde				return (1);
23015233Sbde			}
23198245Stjr			if (S_ISREG(sb.st_mode)) {
232208170Spjd				charct = sb.st_size;
233208170Spjd				show_cnt(file, linect, wordct, charct, llct);
234208170Spjd				tcharct += charct;
2351590Srgrimes				(void)close(fd);
23615233Sbde				return (0);
2371590Srgrimes			}
2381590Srgrimes		}
2391590Srgrimes	}
2401590Srgrimes
2411590Srgrimes	/* Do it the hard way... */
24298165Stjrword:	gotsp = 1;
24398165Stjr	warned = 0;
244128049Stjr	memset(&mbs, 0, sizeof(mbs));
245128049Stjr	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
246128049Stjr		if (len == -1) {
247208170Spjd			warn("%s: read", file != NULL ? file : "stdin");
24815233Sbde			(void)close(fd);
24915233Sbde			return (1);
25015233Sbde		}
25198165Stjr		p = buf;
25298165Stjr		while (len > 0) {
253208170Spjd			if (siginfo)
254208170Spjd				show_cnt(file, linect, wordct, charct, llct);
25598165Stjr			if (!domulti || MB_CUR_MAX == 1) {
25698165Stjr				clen = 1;
25798165Stjr				wch = (unsigned char)*p;
258128049Stjr			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
259128049Stjr			    (size_t)-1) {
260128049Stjr				if (!warned) {
261128049Stjr					errno = EILSEQ;
262208170Spjd					warn("%s",
263208170Spjd					    file != NULL ? file : "stdin");
264128049Stjr					warned = 1;
26598165Stjr				}
266128049Stjr				memset(&mbs, 0, sizeof(mbs));
267128049Stjr				clen = 1;
268128049Stjr				wch = (unsigned char)*p;
269128049Stjr			} else if (clen == (size_t)-2)
270128049Stjr				break;
271128049Stjr			else if (clen == 0)
272128049Stjr				clen = 1;
27398165Stjr			charct++;
274185714Skeramida			if (wch != L'\n')
275185714Skeramida				tmpll++;
27698165Stjr			len -= clen;
27798165Stjr			p += clen;
278185714Skeramida			if (wch == L'\n') {
279185714Skeramida				if (tmpll > llct)
280185714Skeramida					llct = tmpll;
281185714Skeramida				tmpll = 0;
2821590Srgrimes				++linect;
283185714Skeramida			}
284101670Stjr			if (iswspace(wch))
2851590Srgrimes				gotsp = 1;
2861590Srgrimes			else if (gotsp) {
2871590Srgrimes				gotsp = 0;
2881590Srgrimes				++wordct;
2891590Srgrimes			}
2901590Srgrimes		}
2911590Srgrimes	}
292128049Stjr	if (domulti && MB_CUR_MAX > 1)
293128049Stjr		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
294208170Spjd			warn("%s", file != NULL ? file : "stdin");
295208170Spjd	if (doline)
2961590Srgrimes		tlinect += linect;
297208170Spjd	if (doword)
2981590Srgrimes		twordct += wordct;
299208170Spjd	if (dochar || domulti)
3001590Srgrimes		tcharct += charct;
301185714Skeramida	if (dolongline) {
302185714Skeramida		if (llct > tlongline)
303185714Skeramida			tlongline = llct;
304185714Skeramida	}
305208170Spjd	show_cnt(file, linect, wordct, charct, llct);
3061590Srgrimes	(void)close(fd);
30715233Sbde	return (0);
3081590Srgrimes}
3091590Srgrimes
31091481Smikestatic void
311201181Sedusage(void)
3121590Srgrimes{
313185714Skeramida	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
3141590Srgrimes	exit(1);
3151590Srgrimes}
316