1/*	$NetBSD: split.c,v 1.25 2009/04/13 11:17:06 lukem Exp $	*/
2
3/*
4 * Copyright (c) 1987, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
35 The Regents of the University of California.  All rights reserved.");
36#endif /* not lint */
37
38#ifndef lint
39#if 0
40static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
41#endif
42__RCSID("$NetBSD: split.c,v 1.25 2009/04/13 11:17:06 lukem Exp $");
43#endif /* not lint */
44
45#include <sys/param.h>
46
47#include <ctype.h>
48#include <err.h>
49#include <errno.h>
50#include <fcntl.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55
56#define DEFLINE	1000		/* Default num lines per file. */
57
58static int file_open;		/* If a file open. */
59static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
60static char *fname;		/* File name prefix. */
61static size_t sfxlen = 2;		/* suffix length. */
62
63static void newfile(void);
64static void split1(off_t, int) __dead;
65static void split2(off_t) __dead;
66static void split3(off_t) __dead;
67static void usage(void) __dead;
68static size_t bigwrite(int, void const *, size_t);
69
70int
71main(int argc, char *argv[])
72{
73	int ch;
74	char *ep, *p;
75	char const *base;
76	off_t bytecnt = 0;	/* Byte count to split on. */
77	off_t numlines = 0;	/* Line count to split on. */
78	off_t chunks = 0;	/* Number of chunks to split into. */
79
80	while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1)
81		switch (ch) {
82		case '0': case '1': case '2': case '3': case '4':
83		case '5': case '6': case '7': case '8': case '9':
84			/*
85			 * Undocumented kludge: split was originally designed
86			 * to take a number after a dash.
87			 */
88			if (numlines == 0) {
89				p = argv[optind - 1];
90				if (p[0] == '-' && p[1] == ch && !p[2])
91					p++;
92				else
93					p = argv[optind] + 1;
94				numlines = strtoull(p, &ep, 10);
95				if (numlines == 0 || *ep != '\0')
96					errx(1, "%s: illegal line count.", p);
97			}
98			break;
99		case 'b':		/* Byte count. */
100			if (!isdigit((unsigned char)optarg[0]) ||
101			    (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
102			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
103				errx(1, "%s: illegal byte count.", optarg);
104			if (*ep == 'k')
105				bytecnt *= 1024;
106			else if (*ep == 'm')
107				bytecnt *= 1024 * 1024;
108			break;
109		case 'l':		/* Line count. */
110			if (numlines != 0)
111				usage();
112			if (!isdigit((unsigned char)optarg[0]) ||
113			    (numlines = strtoull(optarg, &ep, 10)) == 0 ||
114			    *ep != '\0')
115				errx(1, "%s: illegal line count.", optarg);
116			break;
117		case 'a':		/* Suffix length. */
118			if (!isdigit((unsigned char)optarg[0]) ||
119			    (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
120			    *ep != '\0')
121				errx(1, "%s: illegal suffix length.", optarg);
122			break;
123		case 'n':		/* Chunks. */
124			if (!isdigit((unsigned char)optarg[0]) ||
125			    (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
126			    *ep != '\0')
127				errx(1, "%s: illegal number of chunks.", optarg);
128			break;
129		default:
130			usage();
131		}
132	argv += optind;
133	argc -= optind;
134
135	if (*argv != NULL) {
136		if (strcmp(*argv, "-") != 0 &&
137		    (ifd = open(*argv, O_RDONLY, 0)) < 0)
138			err(1, "%s", *argv);
139		++argv;
140	}
141
142
143	base = (*argv != NULL) ? *argv++ : "x";
144	if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
145		err(EXIT_FAILURE, NULL);
146	(void)strcpy(fname, base);		/* File name prefix. */
147
148	if (*argv != NULL)
149		usage();
150
151	if (numlines == 0)
152		numlines = DEFLINE;
153	else if (bytecnt || chunks)
154		usage();
155
156	if (bytecnt && chunks)
157		usage();
158
159	if (bytecnt)
160		split1(bytecnt, 0);
161	else if (chunks)
162		split3(chunks);
163	else
164		split2(numlines);
165
166	return 0;
167}
168
169/*
170 * split1 --
171 *	Split the input by bytes.
172 */
173static void
174split1(off_t bytecnt, int maxcnt)
175{
176	off_t bcnt;
177	ssize_t dist, len;
178	char *C;
179	char bfr[MAXBSIZE];
180	int nfiles;
181
182	nfiles = 0;
183
184	for (bcnt = 0;;)
185		switch (len = read(ifd, bfr, MAXBSIZE)) {
186		case 0:
187			exit(0);
188			/* NOTREACHED */
189		case -1:
190			err(1, "read");
191			/* NOTREACHED */
192		default:
193			if (!file_open) {
194				if (!maxcnt || (nfiles < maxcnt)) {
195					newfile();
196					nfiles++;
197					file_open = 1;
198				}
199			}
200			if (bcnt + len >= bytecnt) {
201				/* LINTED: bytecnt - bcnt <= len */
202				dist = bytecnt - bcnt;
203				if (bigwrite(ofd, bfr, dist) != (size_t)dist)
204					err(1, "write");
205				len -= dist;
206				for (C = bfr + dist; len >= bytecnt;
207				    /* LINTED: bytecnt <= len */
208				    len -= bytecnt, C += bytecnt) {
209					if (!maxcnt || (nfiles < maxcnt)) {
210						newfile();
211						nfiles++;
212					}
213					/* LINTED: as above */
214					if (bigwrite(ofd,
215					    C, bytecnt) != (size_t)bytecnt)
216						err(1, "write");
217				}
218				if (len) {
219					if (!maxcnt || (nfiles < maxcnt)) {
220						newfile();
221						nfiles++;
222					}
223					/* LINTED: len >= 0 */
224					if (bigwrite(ofd, C, len) != (size_t)len)
225						err(1, "write");
226				} else
227					file_open = 0;
228				bcnt = len;
229			} else {
230				bcnt += len;
231				/* LINTED: len >= 0 */
232				if (bigwrite(ofd, bfr, len) != (size_t)len)
233					err(1, "write");
234			}
235		}
236}
237
238/*
239 * split2 --
240 *	Split the input by lines.
241 */
242static void
243split2(off_t numlines)
244{
245	off_t lcnt;
246	size_t bcnt;
247	ssize_t len;
248	char *Ce, *Cs;
249	char bfr[MAXBSIZE];
250
251	for (lcnt = 0;;)
252		switch (len = read(ifd, bfr, MAXBSIZE)) {
253		case 0:
254			exit(0);
255			/* NOTREACHED */
256		case -1:
257			err(1, "read");
258			/* NOTREACHED */
259		default:
260			if (!file_open) {
261				newfile();
262				file_open = 1;
263			}
264			for (Cs = Ce = bfr; len--; Ce++)
265				if (*Ce == '\n' && ++lcnt == numlines) {
266					bcnt = Ce - Cs + 1;
267					if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
268						err(1, "write");
269					lcnt = 0;
270					Cs = Ce + 1;
271					if (len)
272						newfile();
273					else
274						file_open = 0;
275				}
276			if (Cs < Ce) {
277				bcnt = Ce - Cs;
278				if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
279					err(1, "write");
280			}
281		}
282}
283
284/*
285 * split3 --
286 *	Split the input into specified number of chunks
287 */
288static void
289split3(off_t chunks)
290{
291	struct stat sb;
292
293	if (fstat(ifd, &sb) == -1) {
294		err(1, "stat");
295		/* NOTREACHED */
296	}
297
298	if (chunks > sb.st_size) {
299		errx(1, "can't split into more than %d files",
300				(int)sb.st_size);
301		/* NOTREACHED */
302	}
303
304	split1(sb.st_size/chunks, chunks);
305}
306
307/*
308 * newfile --
309 *	Open a new output file.
310 */
311static void
312newfile(void)
313{
314	static int fnum;
315	static char *fpnt;
316	int quot, i;
317
318	if (ofd == -1) {
319		fpnt = fname + strlen(fname);
320		fpnt[sfxlen] = '\0';
321	} else if (close(ofd) != 0)
322		err(1, "%s", fname);
323
324	quot = fnum;
325	for (i = sfxlen - 1; i >= 0; i--) {
326		fpnt[i] = quot % 26 + 'a';
327		quot = quot / 26;
328	}
329	if (quot > 0)
330		errx(1, "too many files.");
331	++fnum;
332	if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
333		err(1, "%s", fname);
334}
335
336static size_t
337bigwrite(int fd, const void *buf, size_t len)
338{
339	const char *ptr = buf;
340	size_t sofar = 0;
341	ssize_t w;
342
343	while (len != 0) {
344		if  ((w = write(fd, ptr, len)) == -1)
345			return sofar;
346		len -= w;
347		ptr += w;
348		sofar += w;
349	}
350	return sofar;
351}
352
353
354static void
355usage(void)
356{
357	(void)fprintf(stderr,
358"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
359"[file [prefix]]\n", getprogname());
360	exit(1);
361}
362