11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 4. Neither the name of the University nor the names of its contributors 141590Srgrimes * may be used to endorse or promote products derived from this software 151590Srgrimes * without specific prior written permission. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 3087765Smarkm#include <sys/cdefs.h> 3187765Smarkm__FBSDID("$FreeBSD$"); 3287765Smarkm 331590Srgrimes#ifndef lint 3428071Scharnierstatic const char copyright[] = 351590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\ 361590Srgrimes The Regents of the University of California. All rights reserved.\n"; 3787765Smarkm#endif 381590Srgrimes 391590Srgrimes#ifndef lint 4087765Smarkmstatic const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 4128071Scharnier#endif 421590Srgrimes 431590Srgrimes#include <sys/param.h> 44177256Sdas#include <sys/types.h> 45177256Sdas#include <sys/stat.h> 461590Srgrimes 471590Srgrimes#include <ctype.h> 481590Srgrimes#include <err.h> 49100024Stjr#include <errno.h> 501590Srgrimes#include <fcntl.h> 51100024Stjr#include <inttypes.h> 52100024Stjr#include <limits.h> 53131978Stjr#include <locale.h> 54251711Seadler#include <stdbool.h> 55100024Stjr#include <stdint.h> 561590Srgrimes#include <stdio.h> 571590Srgrimes#include <stdlib.h> 581590Srgrimes#include <string.h> 591590Srgrimes#include <unistd.h> 6043513Sarchie#include <regex.h> 6143513Sarchie#include <sysexits.h> 621590Srgrimes 631590Srgrimes#define DEFLINE 1000 /* Default num lines per file. */ 641590Srgrimes 65251711Seadlerstatic off_t bytecnt; /* Byte count to split on. */ 66251711Seadlerstatic off_t chunks = 0; /* Chunks count to split into. */ 67251711Seadlerstatic long numlines; /* Line count to split on. */ 68251711Seadlerstatic int file_open; /* If a file open. */ 69251711Seadlerstatic int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 70251711Seadlerstatic char bfr[MAXBSIZE]; /* I/O buffer. */ 71251711Seadlerstatic char fname[MAXPATHLEN]; /* File name prefix. */ 72251711Seadlerstatic regex_t rgx; 73251711Seadlerstatic int pflag; 74251711Seadlerstatic bool dflag; 75251711Seadlerstatic long sufflen = 2; /* File name suffix length. */ 761590Srgrimes 77177256Sdasstatic void newfile(void); 78177256Sdasstatic void split1(void); 79177256Sdasstatic void split2(void); 80177256Sdasstatic void split3(void); 8192922Simpstatic void usage(void); 821590Srgrimes 831590Srgrimesint 8498253Sjmallettmain(int argc, char **argv) 851590Srgrimes{ 86100024Stjr intmax_t bytecnti; 87100024Stjr long scale; 881590Srgrimes int ch; 891590Srgrimes char *ep, *p; 901590Srgrimes 91131978Stjr setlocale(LC_ALL, ""); 92131978Stjr 93251711Seadler dflag = false; 94251711Seadler while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1) 951590Srgrimes switch (ch) { 961590Srgrimes case '0': case '1': case '2': case '3': case '4': 971590Srgrimes case '5': case '6': case '7': case '8': case '9': 981590Srgrimes /* 991590Srgrimes * Undocumented kludge: split was originally designed 1001590Srgrimes * to take a number after a dash. 1011590Srgrimes */ 1021590Srgrimes if (numlines == 0) { 1031590Srgrimes p = argv[optind - 1]; 1041590Srgrimes if (p[0] == '-' && p[1] == ch && !p[2]) 1051590Srgrimes numlines = strtol(++p, &ep, 10); 1061590Srgrimes else 1071590Srgrimes numlines = 1081590Srgrimes strtol(argv[optind] + 1, &ep, 10); 1091590Srgrimes if (numlines <= 0 || *ep) 11043513Sarchie errx(EX_USAGE, 11143513Sarchie "%s: illegal line count", optarg); 1121590Srgrimes } 1131590Srgrimes break; 11490048Smike case 'a': /* Suffix length */ 11590048Smike if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 11690048Smike errx(EX_USAGE, 11790048Smike "%s: illegal suffix length", optarg); 11890048Smike break; 1191590Srgrimes case 'b': /* Byte count. */ 120100024Stjr errno = 0; 121100024Stjr if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 || 122161172Skeramida strchr("kKmMgG", *ep) == NULL || errno != 0) 12343513Sarchie errx(EX_USAGE, 12443513Sarchie "%s: illegal byte count", optarg); 125161172Skeramida if (*ep == 'k' || *ep == 'K') 126100024Stjr scale = 1024; 127161172Skeramida else if (*ep == 'm' || *ep == 'M') 128100024Stjr scale = 1024 * 1024; 129161172Skeramida else if (*ep == 'g' || *ep == 'G') 130161172Skeramida scale = 1024 * 1024 * 1024; 131100024Stjr else 132100024Stjr scale = 1; 133100024Stjr if (bytecnti > OFF_MAX / scale) 134100024Stjr errx(EX_USAGE, "%s: offset too large", optarg); 135100024Stjr bytecnt = (off_t)(bytecnti * scale); 1361590Srgrimes break; 137251711Seadler case 'd': /* Decimal suffix */ 138251711Seadler dflag = true; 139251711Seadler break; 1401590Srgrimes case 'l': /* Line count. */ 1411590Srgrimes if (numlines != 0) 1421590Srgrimes usage(); 1439427Srgrimes if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 14443513Sarchie errx(EX_USAGE, 14543513Sarchie "%s: illegal line count", optarg); 1461590Srgrimes break; 147177256Sdas case 'n': /* Chunks. */ 148177256Sdas if (!isdigit((unsigned char)optarg[0]) || 149177256Sdas (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 150177256Sdas *ep != '\0') { 151177256Sdas errx(EX_USAGE, "%s: illegal number of chunks", 152177256Sdas optarg); 153177256Sdas } 154177256Sdas break; 155177256Sdas 156161106Skeramida case 'p': /* pattern matching. */ 157161106Skeramida if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 158161106Skeramida errx(EX_USAGE, "%s: illegal regexp", optarg); 159161106Skeramida pflag = 1; 160161106Skeramida break; 1611590Srgrimes default: 1621590Srgrimes usage(); 1631590Srgrimes } 1641590Srgrimes argv += optind; 1651590Srgrimes argc -= optind; 1661590Srgrimes 167149616Stjr if (*argv != NULL) { /* Input file. */ 168149616Stjr if (strcmp(*argv, "-") == 0) 169149616Stjr ifd = STDIN_FILENO; 170149616Stjr else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 171149616Stjr err(EX_NOINPUT, "%s", *argv); 172149616Stjr ++argv; 173149616Stjr } 1741590Srgrimes if (*argv != NULL) /* File name prefix. */ 17597332Stjr if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 17697332Stjr errx(EX_USAGE, "file name prefix is too long"); 1771590Srgrimes if (*argv != NULL) 1781590Srgrimes usage(); 1791590Srgrimes 18090048Smike if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 18190048Smike errx(EX_USAGE, "suffix is too long"); 182177256Sdas if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) 18343513Sarchie usage(); 18443513Sarchie 1851590Srgrimes if (numlines == 0) 1861590Srgrimes numlines = DEFLINE; 187177256Sdas else if (bytecnt != 0 || chunks != 0) 1881590Srgrimes usage(); 1891590Srgrimes 190177256Sdas if (bytecnt && chunks) 191177256Sdas usage(); 192177256Sdas 1931590Srgrimes if (ifd == -1) /* Stdin by default. */ 1941590Srgrimes ifd = 0; 1951590Srgrimes 1961590Srgrimes if (bytecnt) { 1971590Srgrimes split1(); 1981590Srgrimes exit (0); 199177256Sdas } else if (chunks) { 200177256Sdas split3(); 201177256Sdas exit (0); 2021590Srgrimes } 2031590Srgrimes split2(); 20443513Sarchie if (pflag) 20543513Sarchie regfree(&rgx); 2061590Srgrimes exit(0); 2071590Srgrimes} 2081590Srgrimes 2091590Srgrimes/* 2101590Srgrimes * split1 -- 2111590Srgrimes * Split the input by bytes. 2121590Srgrimes */ 213177256Sdasstatic void 21498253Sjmallettsplit1(void) 2151590Srgrimes{ 216100024Stjr off_t bcnt; 2171590Srgrimes char *C; 218100024Stjr ssize_t dist, len; 219177256Sdas int nfiles; 2201590Srgrimes 221177256Sdas nfiles = 0; 222177256Sdas 2231590Srgrimes for (bcnt = 0;;) 22443513Sarchie switch ((len = read(ifd, bfr, MAXBSIZE))) { 2251590Srgrimes case 0: 2261590Srgrimes exit(0); 2271590Srgrimes case -1: 22843513Sarchie err(EX_IOERR, "read"); 2291590Srgrimes /* NOTREACHED */ 2301590Srgrimes default: 231177256Sdas if (!file_open) { 232177256Sdas if (!chunks || (nfiles < chunks)) { 233177256Sdas newfile(); 234177256Sdas nfiles++; 235177256Sdas } 236177256Sdas } 237100024Stjr if (bcnt + len >= bytecnt) { 2381590Srgrimes dist = bytecnt - bcnt; 2391590Srgrimes if (write(ofd, bfr, dist) != dist) 24043513Sarchie err(EX_IOERR, "write"); 2411590Srgrimes len -= dist; 2421590Srgrimes for (C = bfr + dist; len >= bytecnt; 2431590Srgrimes len -= bytecnt, C += bytecnt) { 244177256Sdas if (!chunks || (nfiles < chunks)) { 2451590Srgrimes newfile(); 246177256Sdas nfiles++; 247177256Sdas } 2481590Srgrimes if (write(ofd, 24987765Smarkm C, bytecnt) != bytecnt) 25043513Sarchie err(EX_IOERR, "write"); 2511590Srgrimes } 25243513Sarchie if (len != 0) { 253177256Sdas if (!chunks || (nfiles < chunks)) { 2541590Srgrimes newfile(); 255177256Sdas nfiles++; 256177256Sdas } 2571590Srgrimes if (write(ofd, C, len) != len) 25843513Sarchie err(EX_IOERR, "write"); 2591590Srgrimes } else 2601590Srgrimes file_open = 0; 2611590Srgrimes bcnt = len; 2621590Srgrimes } else { 2631590Srgrimes bcnt += len; 2641590Srgrimes if (write(ofd, bfr, len) != len) 26543513Sarchie err(EX_IOERR, "write"); 2661590Srgrimes } 2671590Srgrimes } 2681590Srgrimes} 2691590Srgrimes 2701590Srgrimes/* 2711590Srgrimes * split2 -- 2721590Srgrimes * Split the input by lines. 2731590Srgrimes */ 274177256Sdasstatic void 27598253Sjmallettsplit2(void) 2761590Srgrimes{ 27743513Sarchie long lcnt = 0; 27843513Sarchie FILE *infp; 2791590Srgrimes 28043513Sarchie /* Stick a stream on top of input file descriptor */ 28143513Sarchie if ((infp = fdopen(ifd, "r")) == NULL) 28243513Sarchie err(EX_NOINPUT, "fdopen"); 28343513Sarchie 28443513Sarchie /* Process input one line at a time */ 28543513Sarchie while (fgets(bfr, sizeof(bfr), infp) != NULL) { 28643513Sarchie const int len = strlen(bfr); 28743513Sarchie 28843513Sarchie /* If line is too long to deal with, just write it out */ 28943513Sarchie if (bfr[len - 1] != '\n') 29043513Sarchie goto writeit; 29143513Sarchie 29243513Sarchie /* Check if we need to start a new file */ 29343513Sarchie if (pflag) { 29443513Sarchie regmatch_t pmatch; 29543513Sarchie 29643513Sarchie pmatch.rm_so = 0; 29743513Sarchie pmatch.rm_eo = len - 1; 29843513Sarchie if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 2991590Srgrimes newfile(); 30043513Sarchie } else if (lcnt++ == numlines) { 30143513Sarchie newfile(); 30243513Sarchie lcnt = 1; 3031590Srgrimes } 30443513Sarchie 30543513Sarchiewriteit: 30643513Sarchie /* Open output file if needed */ 30743513Sarchie if (!file_open) 30843513Sarchie newfile(); 30943513Sarchie 31043513Sarchie /* Write out line */ 31143513Sarchie if (write(ofd, bfr, len) != len) 31243513Sarchie err(EX_IOERR, "write"); 31343513Sarchie } 31443513Sarchie 31543513Sarchie /* EOF or error? */ 31643513Sarchie if (ferror(infp)) 31743513Sarchie err(EX_IOERR, "read"); 31843513Sarchie else 31943513Sarchie exit(0); 3201590Srgrimes} 3211590Srgrimes 3221590Srgrimes/* 323177256Sdas * split3 -- 324177256Sdas * Split the input into specified number of chunks 325177256Sdas */ 326177256Sdasstatic void 327177256Sdassplit3(void) 328177256Sdas{ 329177256Sdas struct stat sb; 330177256Sdas 331177256Sdas if (fstat(ifd, &sb) == -1) { 332177256Sdas err(1, "stat"); 333177256Sdas /* NOTREACHED */ 334177256Sdas } 335177256Sdas 336177256Sdas if (chunks > sb.st_size) { 337177256Sdas errx(1, "can't split into more than %d files", 338177256Sdas (int)sb.st_size); 339177256Sdas /* NOTREACHED */ 340177256Sdas } 341177256Sdas 342177256Sdas bytecnt = sb.st_size / chunks; 343177256Sdas split1(); 344177256Sdas} 345177256Sdas 346177256Sdas 347177256Sdas/* 3481590Srgrimes * newfile -- 3491590Srgrimes * Open a new output file. 3501590Srgrimes */ 351177256Sdasstatic void 35298253Sjmallettnewfile(void) 3531590Srgrimes{ 35490048Smike long i, maxfiles, tfnum; 3551590Srgrimes static long fnum; 3561590Srgrimes static char *fpnt; 357251711Seadler char beg, end; 358251711Seadler int pattlen; 3591590Srgrimes 3601590Srgrimes if (ofd == -1) { 3611590Srgrimes if (fname[0] == '\0') { 3621590Srgrimes fname[0] = 'x'; 3631590Srgrimes fpnt = fname + 1; 3641590Srgrimes } else { 3651590Srgrimes fpnt = fname + strlen(fname); 3661590Srgrimes } 3671590Srgrimes ofd = fileno(stdout); 3681590Srgrimes } 36990048Smike 370251711Seadler if (dflag) { 371251711Seadler beg = '0'; 372251711Seadler end = '9'; 373251711Seadler } 374251711Seadler else { 375251711Seadler beg = 'a'; 376251711Seadler end = 'z'; 377251711Seadler } 378251711Seadler pattlen = end - beg + 1; 379251711Seadler 380251711Seadler /* maxfiles = pattlen^sufflen, but don't use libm. */ 38190048Smike for (maxfiles = 1, i = 0; i < sufflen; i++) 382251711Seadler if (LONG_MAX / pattlen < maxfiles) 38390048Smike errx(EX_USAGE, "suffix is too long (max %ld)", i); 384251711Seadler else 385251711Seadler maxfiles *= pattlen; 38690048Smike 387149345Stjr if (fnum == maxfiles) 388149345Stjr errx(EX_DATAERR, "too many files"); 38990048Smike 39090048Smike /* Generate suffix of sufflen letters */ 39190048Smike tfnum = fnum; 39290048Smike i = sufflen - 1; 39390048Smike do { 394251711Seadler fpnt[i] = tfnum % pattlen + beg; 395251711Seadler tfnum /= pattlen; 39690048Smike } while (i-- > 0); 39790048Smike fpnt[sufflen] = '\0'; 39890048Smike 3991590Srgrimes ++fnum; 4001590Srgrimes if (!freopen(fname, "w", stdout)) 40143513Sarchie err(EX_IOERR, "%s", fname); 40243513Sarchie file_open = 1; 4031590Srgrimes} 4041590Srgrimes 40528071Scharnierstatic void 40698253Sjmallettusage(void) 4071590Srgrimes{ 40843625Sarchie (void)fprintf(stderr, 409161106Skeramida"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" 410161172Skeramida" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" 411177256Sdas" split -n chunk_count [-a suffix_length] [file [prefix]]\n" 412161106Skeramida" split -p pattern [-a suffix_length] [file [prefix]]\n"); 41343625Sarchie exit(EX_USAGE); 4141590Srgrimes} 415