11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 4. Neither the name of the University nor the names of its contributors
141590Srgrimes *    may be used to endorse or promote products derived from this software
151590Srgrimes *    without specific prior written permission.
161590Srgrimes *
171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271590Srgrimes * SUCH DAMAGE.
281590Srgrimes */
291590Srgrimes
3087765Smarkm#include <sys/cdefs.h>
3187765Smarkm__FBSDID("$FreeBSD$");
3287765Smarkm
331590Srgrimes#ifndef lint
3428071Scharnierstatic const char copyright[] =
351590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\
361590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
3787765Smarkm#endif
381590Srgrimes
391590Srgrimes#ifndef lint
4087765Smarkmstatic const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
4128071Scharnier#endif
421590Srgrimes
431590Srgrimes#include <sys/param.h>
44177256Sdas#include <sys/types.h>
45177256Sdas#include <sys/stat.h>
461590Srgrimes
471590Srgrimes#include <ctype.h>
481590Srgrimes#include <err.h>
49100024Stjr#include <errno.h>
501590Srgrimes#include <fcntl.h>
51100024Stjr#include <inttypes.h>
52100024Stjr#include <limits.h>
53131978Stjr#include <locale.h>
54251711Seadler#include <stdbool.h>
55100024Stjr#include <stdint.h>
561590Srgrimes#include <stdio.h>
571590Srgrimes#include <stdlib.h>
581590Srgrimes#include <string.h>
591590Srgrimes#include <unistd.h>
6043513Sarchie#include <regex.h>
6143513Sarchie#include <sysexits.h>
621590Srgrimes
631590Srgrimes#define DEFLINE	1000			/* Default num lines per file. */
641590Srgrimes
65251711Seadlerstatic off_t	 bytecnt;		/* Byte count to split on. */
66251711Seadlerstatic off_t	 chunks = 0;		/* Chunks count to split into. */
67251711Seadlerstatic long	 numlines;		/* Line count to split on. */
68251711Seadlerstatic int	 file_open;		/* If a file open. */
69251711Seadlerstatic int	 ifd = -1, ofd = -1;	/* Input/output file descriptors. */
70251711Seadlerstatic char	 bfr[MAXBSIZE];		/* I/O buffer. */
71251711Seadlerstatic char	 fname[MAXPATHLEN];	/* File name prefix. */
72251711Seadlerstatic regex_t	 rgx;
73251711Seadlerstatic int	 pflag;
74251711Seadlerstatic bool	 dflag;
75251711Seadlerstatic long	 sufflen = 2;		/* File name suffix length. */
761590Srgrimes
77177256Sdasstatic void newfile(void);
78177256Sdasstatic void split1(void);
79177256Sdasstatic void split2(void);
80177256Sdasstatic void split3(void);
8192922Simpstatic void usage(void);
821590Srgrimes
831590Srgrimesint
8498253Sjmallettmain(int argc, char **argv)
851590Srgrimes{
86100024Stjr	intmax_t bytecnti;
87100024Stjr	long scale;
881590Srgrimes	int ch;
891590Srgrimes	char *ep, *p;
901590Srgrimes
91131978Stjr	setlocale(LC_ALL, "");
92131978Stjr
93251711Seadler	dflag = false;
94251711Seadler	while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1)
951590Srgrimes		switch (ch) {
961590Srgrimes		case '0': case '1': case '2': case '3': case '4':
971590Srgrimes		case '5': case '6': case '7': case '8': case '9':
981590Srgrimes			/*
991590Srgrimes			 * Undocumented kludge: split was originally designed
1001590Srgrimes			 * to take a number after a dash.
1011590Srgrimes			 */
1021590Srgrimes			if (numlines == 0) {
1031590Srgrimes				p = argv[optind - 1];
1041590Srgrimes				if (p[0] == '-' && p[1] == ch && !p[2])
1051590Srgrimes					numlines = strtol(++p, &ep, 10);
1061590Srgrimes				else
1071590Srgrimes					numlines =
1081590Srgrimes					    strtol(argv[optind] + 1, &ep, 10);
1091590Srgrimes				if (numlines <= 0 || *ep)
11043513Sarchie					errx(EX_USAGE,
11143513Sarchie					    "%s: illegal line count", optarg);
1121590Srgrimes			}
1131590Srgrimes			break;
11490048Smike		case 'a':		/* Suffix length */
11590048Smike			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
11690048Smike				errx(EX_USAGE,
11790048Smike				    "%s: illegal suffix length", optarg);
11890048Smike			break;
1191590Srgrimes		case 'b':		/* Byte count. */
120100024Stjr			errno = 0;
121100024Stjr			if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 ||
122161172Skeramida			    strchr("kKmMgG", *ep) == NULL || errno != 0)
12343513Sarchie				errx(EX_USAGE,
12443513Sarchie				    "%s: illegal byte count", optarg);
125161172Skeramida			if (*ep == 'k' || *ep == 'K')
126100024Stjr				scale = 1024;
127161172Skeramida			else if (*ep == 'm' || *ep == 'M')
128100024Stjr				scale = 1024 * 1024;
129161172Skeramida			else if (*ep == 'g' || *ep == 'G')
130161172Skeramida				scale = 1024 * 1024 * 1024;
131100024Stjr			else
132100024Stjr				scale = 1;
133100024Stjr			if (bytecnti > OFF_MAX / scale)
134100024Stjr				errx(EX_USAGE, "%s: offset too large", optarg);
135100024Stjr			bytecnt = (off_t)(bytecnti * scale);
1361590Srgrimes			break;
137251711Seadler		case 'd':		/* Decimal suffix */
138251711Seadler			dflag = true;
139251711Seadler			break;
1401590Srgrimes		case 'l':		/* Line count. */
1411590Srgrimes			if (numlines != 0)
1421590Srgrimes				usage();
1439427Srgrimes			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
14443513Sarchie				errx(EX_USAGE,
14543513Sarchie				    "%s: illegal line count", optarg);
1461590Srgrimes			break;
147177256Sdas		case 'n':		/* Chunks. */
148177256Sdas			if (!isdigit((unsigned char)optarg[0]) ||
149177256Sdas			    (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
150177256Sdas			    *ep != '\0') {
151177256Sdas				errx(EX_USAGE, "%s: illegal number of chunks",
152177256Sdas				     optarg);
153177256Sdas			}
154177256Sdas			break;
155177256Sdas
156161106Skeramida		case 'p':		/* pattern matching. */
157161106Skeramida			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
158161106Skeramida				errx(EX_USAGE, "%s: illegal regexp", optarg);
159161106Skeramida			pflag = 1;
160161106Skeramida			break;
1611590Srgrimes		default:
1621590Srgrimes			usage();
1631590Srgrimes		}
1641590Srgrimes	argv += optind;
1651590Srgrimes	argc -= optind;
1661590Srgrimes
167149616Stjr	if (*argv != NULL) {			/* Input file. */
168149616Stjr		if (strcmp(*argv, "-") == 0)
169149616Stjr			ifd = STDIN_FILENO;
170149616Stjr		else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
171149616Stjr			err(EX_NOINPUT, "%s", *argv);
172149616Stjr		++argv;
173149616Stjr	}
1741590Srgrimes	if (*argv != NULL)			/* File name prefix. */
17597332Stjr		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
17697332Stjr			errx(EX_USAGE, "file name prefix is too long");
1771590Srgrimes	if (*argv != NULL)
1781590Srgrimes		usage();
1791590Srgrimes
18090048Smike	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
18190048Smike		errx(EX_USAGE, "suffix is too long");
182177256Sdas	if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
18343513Sarchie		usage();
18443513Sarchie
1851590Srgrimes	if (numlines == 0)
1861590Srgrimes		numlines = DEFLINE;
187177256Sdas	else if (bytecnt != 0 || chunks != 0)
1881590Srgrimes		usage();
1891590Srgrimes
190177256Sdas	if (bytecnt && chunks)
191177256Sdas		usage();
192177256Sdas
1931590Srgrimes	if (ifd == -1)				/* Stdin by default. */
1941590Srgrimes		ifd = 0;
1951590Srgrimes
1961590Srgrimes	if (bytecnt) {
1971590Srgrimes		split1();
1981590Srgrimes		exit (0);
199177256Sdas	} else if (chunks) {
200177256Sdas		split3();
201177256Sdas		exit (0);
2021590Srgrimes	}
2031590Srgrimes	split2();
20443513Sarchie	if (pflag)
20543513Sarchie		regfree(&rgx);
2061590Srgrimes	exit(0);
2071590Srgrimes}
2081590Srgrimes
2091590Srgrimes/*
2101590Srgrimes * split1 --
2111590Srgrimes *	Split the input by bytes.
2121590Srgrimes */
213177256Sdasstatic void
21498253Sjmallettsplit1(void)
2151590Srgrimes{
216100024Stjr	off_t bcnt;
2171590Srgrimes	char *C;
218100024Stjr	ssize_t dist, len;
219177256Sdas	int nfiles;
2201590Srgrimes
221177256Sdas	nfiles = 0;
222177256Sdas
2231590Srgrimes	for (bcnt = 0;;)
22443513Sarchie		switch ((len = read(ifd, bfr, MAXBSIZE))) {
2251590Srgrimes		case 0:
2261590Srgrimes			exit(0);
2271590Srgrimes		case -1:
22843513Sarchie			err(EX_IOERR, "read");
2291590Srgrimes			/* NOTREACHED */
2301590Srgrimes		default:
231177256Sdas			if (!file_open) {
232177256Sdas				if (!chunks || (nfiles < chunks)) {
233177256Sdas					newfile();
234177256Sdas					nfiles++;
235177256Sdas				}
236177256Sdas			}
237100024Stjr			if (bcnt + len >= bytecnt) {
2381590Srgrimes				dist = bytecnt - bcnt;
2391590Srgrimes				if (write(ofd, bfr, dist) != dist)
24043513Sarchie					err(EX_IOERR, "write");
2411590Srgrimes				len -= dist;
2421590Srgrimes				for (C = bfr + dist; len >= bytecnt;
2431590Srgrimes				    len -= bytecnt, C += bytecnt) {
244177256Sdas					if (!chunks || (nfiles < chunks)) {
2451590Srgrimes					newfile();
246177256Sdas						nfiles++;
247177256Sdas					}
2481590Srgrimes					if (write(ofd,
24987765Smarkm					    C, bytecnt) != bytecnt)
25043513Sarchie						err(EX_IOERR, "write");
2511590Srgrimes				}
25243513Sarchie				if (len != 0) {
253177256Sdas					if (!chunks || (nfiles < chunks)) {
2541590Srgrimes					newfile();
255177256Sdas						nfiles++;
256177256Sdas					}
2571590Srgrimes					if (write(ofd, C, len) != len)
25843513Sarchie						err(EX_IOERR, "write");
2591590Srgrimes				} else
2601590Srgrimes					file_open = 0;
2611590Srgrimes				bcnt = len;
2621590Srgrimes			} else {
2631590Srgrimes				bcnt += len;
2641590Srgrimes				if (write(ofd, bfr, len) != len)
26543513Sarchie					err(EX_IOERR, "write");
2661590Srgrimes			}
2671590Srgrimes		}
2681590Srgrimes}
2691590Srgrimes
2701590Srgrimes/*
2711590Srgrimes * split2 --
2721590Srgrimes *	Split the input by lines.
2731590Srgrimes */
274177256Sdasstatic void
27598253Sjmallettsplit2(void)
2761590Srgrimes{
27743513Sarchie	long lcnt = 0;
27843513Sarchie	FILE *infp;
2791590Srgrimes
28043513Sarchie	/* Stick a stream on top of input file descriptor */
28143513Sarchie	if ((infp = fdopen(ifd, "r")) == NULL)
28243513Sarchie		err(EX_NOINPUT, "fdopen");
28343513Sarchie
28443513Sarchie	/* Process input one line at a time */
28543513Sarchie	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
28643513Sarchie		const int len = strlen(bfr);
28743513Sarchie
28843513Sarchie		/* If line is too long to deal with, just write it out */
28943513Sarchie		if (bfr[len - 1] != '\n')
29043513Sarchie			goto writeit;
29143513Sarchie
29243513Sarchie		/* Check if we need to start a new file */
29343513Sarchie		if (pflag) {
29443513Sarchie			regmatch_t pmatch;
29543513Sarchie
29643513Sarchie			pmatch.rm_so = 0;
29743513Sarchie			pmatch.rm_eo = len - 1;
29843513Sarchie			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
2991590Srgrimes				newfile();
30043513Sarchie		} else if (lcnt++ == numlines) {
30143513Sarchie			newfile();
30243513Sarchie			lcnt = 1;
3031590Srgrimes		}
30443513Sarchie
30543513Sarchiewriteit:
30643513Sarchie		/* Open output file if needed */
30743513Sarchie		if (!file_open)
30843513Sarchie			newfile();
30943513Sarchie
31043513Sarchie		/* Write out line */
31143513Sarchie		if (write(ofd, bfr, len) != len)
31243513Sarchie			err(EX_IOERR, "write");
31343513Sarchie	}
31443513Sarchie
31543513Sarchie	/* EOF or error? */
31643513Sarchie	if (ferror(infp))
31743513Sarchie		err(EX_IOERR, "read");
31843513Sarchie	else
31943513Sarchie		exit(0);
3201590Srgrimes}
3211590Srgrimes
3221590Srgrimes/*
323177256Sdas * split3 --
324177256Sdas *	Split the input into specified number of chunks
325177256Sdas */
326177256Sdasstatic void
327177256Sdassplit3(void)
328177256Sdas{
329177256Sdas	struct stat sb;
330177256Sdas
331177256Sdas	if (fstat(ifd, &sb) == -1) {
332177256Sdas		err(1, "stat");
333177256Sdas		/* NOTREACHED */
334177256Sdas	}
335177256Sdas
336177256Sdas	if (chunks > sb.st_size) {
337177256Sdas		errx(1, "can't split into more than %d files",
338177256Sdas		    (int)sb.st_size);
339177256Sdas		/* NOTREACHED */
340177256Sdas	}
341177256Sdas
342177256Sdas	bytecnt = sb.st_size / chunks;
343177256Sdas	split1();
344177256Sdas}
345177256Sdas
346177256Sdas
347177256Sdas/*
3481590Srgrimes * newfile --
3491590Srgrimes *	Open a new output file.
3501590Srgrimes */
351177256Sdasstatic void
35298253Sjmallettnewfile(void)
3531590Srgrimes{
35490048Smike	long i, maxfiles, tfnum;
3551590Srgrimes	static long fnum;
3561590Srgrimes	static char *fpnt;
357251711Seadler	char beg, end;
358251711Seadler	int pattlen;
3591590Srgrimes
3601590Srgrimes	if (ofd == -1) {
3611590Srgrimes		if (fname[0] == '\0') {
3621590Srgrimes			fname[0] = 'x';
3631590Srgrimes			fpnt = fname + 1;
3641590Srgrimes		} else {
3651590Srgrimes			fpnt = fname + strlen(fname);
3661590Srgrimes		}
3671590Srgrimes		ofd = fileno(stdout);
3681590Srgrimes	}
36990048Smike
370251711Seadler	if (dflag) {
371251711Seadler		beg = '0';
372251711Seadler		end = '9';
373251711Seadler	}
374251711Seadler	else {
375251711Seadler		beg = 'a';
376251711Seadler		end = 'z';
377251711Seadler	}
378251711Seadler	pattlen = end - beg + 1;
379251711Seadler
380251711Seadler	/* maxfiles = pattlen^sufflen, but don't use libm. */
38190048Smike	for (maxfiles = 1, i = 0; i < sufflen; i++)
382251711Seadler		if (LONG_MAX / pattlen < maxfiles)
38390048Smike			errx(EX_USAGE, "suffix is too long (max %ld)", i);
384251711Seadler		else
385251711Seadler			maxfiles *= pattlen;
38690048Smike
387149345Stjr	if (fnum == maxfiles)
388149345Stjr		errx(EX_DATAERR, "too many files");
38990048Smike
39090048Smike	/* Generate suffix of sufflen letters */
39190048Smike	tfnum = fnum;
39290048Smike	i = sufflen - 1;
39390048Smike	do {
394251711Seadler		fpnt[i] = tfnum % pattlen + beg;
395251711Seadler		tfnum /= pattlen;
39690048Smike	} while (i-- > 0);
39790048Smike	fpnt[sufflen] = '\0';
39890048Smike
3991590Srgrimes	++fnum;
4001590Srgrimes	if (!freopen(fname, "w", stdout))
40143513Sarchie		err(EX_IOERR, "%s", fname);
40243513Sarchie	file_open = 1;
4031590Srgrimes}
4041590Srgrimes
40528071Scharnierstatic void
40698253Sjmallettusage(void)
4071590Srgrimes{
40843625Sarchie	(void)fprintf(stderr,
409161106Skeramida"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
410161172Skeramida"       split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
411177256Sdas"       split -n chunk_count [-a suffix_length] [file [prefix]]\n"
412161106Skeramida"       split -p pattern [-a suffix_length] [file [prefix]]\n");
41343625Sarchie	exit(EX_USAGE);
4141590Srgrimes}
415