1/*	$NetBSD: cut.c,v 1.28 2012/06/20 17:53:39 wiz Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36#ifndef lint
37__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
38 The Regents of the University of California.  All rights reserved.");
39#endif /* not lint */
40
41#ifndef lint
42#if 0
43static char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
44#endif
45__RCSID("$NetBSD: cut.c,v 1.28 2012/06/20 17:53:39 wiz Exp $");
46#endif /* not lint */
47
48#include <ctype.h>
49#include <err.h>
50#include <errno.h>
51#include <limits.h>
52#include <locale.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <unistd.h>
57#include <util.h>
58#include <wchar.h>
59#include <sys/param.h>
60
61static int bflag;
62static int	cflag;
63static char	dchar;
64static int	dflag;
65static int	fflag;
66static int	sflag;
67
68static void	b_cut(FILE *, const char *);
69static void	c_cut(FILE *, const char *);
70static void	f_cut(FILE *, const char *);
71static void	get_list(char *);
72static void	usage(void) __dead;
73
74int
75main(int argc, char *argv[])
76{
77	FILE *fp;
78	void (*fcn)(FILE *, const char *);
79	int ch, rval;
80
81	fcn = NULL;
82	(void)setlocale(LC_ALL, "");
83
84	dchar = '\t';			/* default delimiter is \t */
85
86	/* Since we don't support multi-byte characters, the -c and -b
87	   options are equivalent, and the -n option is meaningless. */
88	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
89		switch(ch) {
90		case 'b':
91			fcn = b_cut;
92			get_list(optarg);
93			bflag = 1;
94			break;
95		case 'c':
96			fcn = c_cut;
97			get_list(optarg);
98			cflag = 1;
99			break;
100		case 'd':
101			dchar = *optarg;
102			dflag = 1;
103			break;
104		case 'f':
105			get_list(optarg);
106			fcn = f_cut;
107			fflag = 1;
108			break;
109		case 's':
110			sflag = 1;
111			break;
112		case 'n':
113			break;
114		case '?':
115		default:
116			usage();
117		}
118	argc -= optind;
119	argv += optind;
120
121	if (fflag) {
122		if (cflag || bflag)
123			usage();
124	} else if ((!cflag && !bflag) || dflag || sflag)
125		usage();
126	else if (bflag && cflag)
127		usage();
128
129	rval = 0;
130	if (*argv)
131		for (; *argv; ++argv) {
132			if (strcmp(*argv, "-") == 0)
133				fcn(stdin, "stdin");
134			else {
135				if ((fp = fopen(*argv, "r"))) {
136					fcn(fp, *argv);
137					(void)fclose(fp);
138				} else {
139					rval = 1;
140					warn("%s", *argv);
141				}
142			}
143		}
144	else
145		fcn(stdin, "stdin");
146	return(rval);
147}
148
149static size_t autostart, autostop, maxval;
150
151static char *positions = NULL;
152static size_t numpositions = 0;
153#define ALLOC_CHUNK	_POSIX2_LINE_MAX	/* malloc granularity */
154
155static void
156get_list(char *list)
157{
158	size_t setautostart, start, stop;
159	char *pos;
160	char *p;
161
162	if (positions == NULL) {
163		numpositions = ALLOC_CHUNK;
164		positions = ecalloc(numpositions, sizeof(*positions));
165	}
166
167	/*
168	 * set a byte in the positions array to indicate if a field or
169	 * column is to be selected; use +1, it's 1-based, not 0-based.
170	 * This parser is less restrictive than the Draft 9 POSIX spec.
171	 * POSIX doesn't allow lists that aren't in increasing order or
172	 * overlapping lists.  We also handle "-3-5" although there's no
173	 * real reason to.
174	 */
175	for (; (p = strtok(list, ", \t")) != NULL; list = NULL) {
176		setautostart = start = stop = 0;
177		if (*p == '-') {
178			++p;
179			setautostart = 1;
180		}
181		if (isdigit((unsigned char)*p)) {
182			start = stop = strtol(p, &p, 10);
183			if (setautostart && start > autostart)
184				autostart = start;
185		}
186		if (*p == '-') {
187			if (isdigit((unsigned char)p[1]))
188				stop = strtol(p + 1, &p, 10);
189			if (*p == '-') {
190				++p;
191				if (!autostop || autostop > stop)
192					autostop = stop;
193			}
194		}
195		if (*p)
196			errx(1, "[-bcf] list: illegal list value");
197		if (!stop || !start)
198			errx(1, "[-bcf] list: values may not include zero");
199		if (stop + 1 > numpositions) {
200			size_t newsize;
201			newsize = roundup(stop + 1, ALLOC_CHUNK);
202			positions = erealloc(positions, newsize);
203			(void)memset(positions + numpositions, 0,
204			    newsize - numpositions);
205			numpositions = newsize;
206		}
207		if (maxval < stop)
208			maxval = stop;
209		for (pos = positions + start; start++ <= stop; pos++)
210			*pos = 1;
211	}
212
213	/* overlapping ranges */
214	if (autostop && maxval > autostop)
215		maxval = autostop;
216
217	/* set autostart */
218	if (autostart)
219		(void)memset(positions + 1, '1', autostart);
220}
221
222static void
223/*ARGSUSED*/
224f_cut(FILE *fp, const char *fname __unused)
225{
226	int ch, field, isdelim;
227	char *pos, *p, sep;
228	int output;
229	size_t len;
230	char *lbuf, *tbuf;
231
232	for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) {
233		output = 0;
234		if (lbuf[len - 1] != '\n') {
235			/* no newline at the end of the last line so add one */
236			if ((tbuf = (char *)malloc(len + 1)) == NULL)
237				err(1, NULL);
238			(void)memcpy(tbuf, lbuf, len);
239			tbuf[len++] = '\n';
240			lbuf = tbuf;
241		}
242		for (isdelim = 0, p = lbuf;; ++p) {
243			ch = *p;
244			/* this should work if newline is delimiter */
245			if (ch == sep)
246				isdelim = 1;
247			if (ch == '\n') {
248				if (!isdelim && !sflag)
249					(void)fwrite(lbuf, len, 1, stdout);
250				break;
251			}
252		}
253		if (!isdelim)
254			continue;
255
256		pos = positions + 1;
257		for (field = maxval, p = lbuf; field; --field, ++pos) {
258			if (*pos) {
259				if (output++)
260					(void)putchar(sep);
261				while ((ch = *p++) != '\n' && ch != sep)
262					(void)putchar(ch);
263			} else {
264				while ((ch = *p++) != '\n' && ch != sep)
265					continue;
266			}
267			if (ch == '\n')
268				break;
269		}
270		if (ch != '\n') {
271			if (autostop) {
272				if (output)
273					(void)putchar(sep);
274				for (; (ch = *p) != '\n'; ++p)
275					(void)putchar(ch);
276			} else
277				for (; (ch = *p) != '\n'; ++p);
278		}
279		(void)putchar('\n');
280		if (tbuf) {
281			free(tbuf);
282			tbuf = NULL;
283		}
284	}
285	if (tbuf)
286		free(tbuf);
287}
288
289static void
290usage(void)
291{
292	(void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n"
293	    "\tcut -c list [file ...]\n"
294	    "\tcut -f list [-d string] [-s] [file ...]\n");
295	exit(1);
296}
297
298/* make b_put(): */
299#define CUT_BYTE 1
300#include "x_cut.c"
301#undef CUT_BYTE
302
303/* make c_put(): */
304#define CUT_BYTE 0
305#include "x_cut.c"
306#undef CUT_BYTE
307