1/* $NetBSD: split.c,v 1.25 2009/04/13 11:17:06 lukem Exp $ */ 2 3/* 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#ifndef lint 34__COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\ 35 The Regents of the University of California. All rights reserved."); 36#endif /* not lint */ 37 38#ifndef lint 39#if 0 40static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; 41#endif 42__RCSID("$NetBSD: split.c,v 1.25 2009/04/13 11:17:06 lukem Exp $"); 43#endif /* not lint */ 44 45#include <sys/param.h> 46 47#include <ctype.h> 48#include <err.h> 49#include <errno.h> 50#include <fcntl.h> 51#include <stdio.h> 52#include <stdlib.h> 53#include <string.h> 54#include <unistd.h> 55 56#define DEFLINE 1000 /* Default num lines per file. */ 57 58static int file_open; /* If a file open. */ 59static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */ 60static char *fname; /* File name prefix. */ 61static size_t sfxlen = 2; /* suffix length. */ 62 63static void newfile(void); 64static void split1(off_t, int) __dead; 65static void split2(off_t) __dead; 66static void split3(off_t) __dead; 67static void usage(void) __dead; 68static size_t bigwrite(int, void const *, size_t); 69 70int 71main(int argc, char *argv[]) 72{ 73 int ch; 74 char *ep, *p; 75 char const *base; 76 off_t bytecnt = 0; /* Byte count to split on. */ 77 off_t numlines = 0; /* Line count to split on. */ 78 off_t chunks = 0; /* Number of chunks to split into. */ 79 80 while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1) 81 switch (ch) { 82 case '0': case '1': case '2': case '3': case '4': 83 case '5': case '6': case '7': case '8': case '9': 84 /* 85 * Undocumented kludge: split was originally designed 86 * to take a number after a dash. 87 */ 88 if (numlines == 0) { 89 p = argv[optind - 1]; 90 if (p[0] == '-' && p[1] == ch && !p[2]) 91 p++; 92 else 93 p = argv[optind] + 1; 94 numlines = strtoull(p, &ep, 10); 95 if (numlines == 0 || *ep != '\0') 96 errx(1, "%s: illegal line count.", p); 97 } 98 break; 99 case 'b': /* Byte count. */ 100 if (!isdigit((unsigned char)optarg[0]) || 101 (bytecnt = strtoull(optarg, &ep, 10)) == 0 || 102 (*ep != '\0' && *ep != 'k' && *ep != 'm')) 103 errx(1, "%s: illegal byte count.", optarg); 104 if (*ep == 'k') 105 bytecnt *= 1024; 106 else if (*ep == 'm') 107 bytecnt *= 1024 * 1024; 108 break; 109 case 'l': /* Line count. */ 110 if (numlines != 0) 111 usage(); 112 if (!isdigit((unsigned char)optarg[0]) || 113 (numlines = strtoull(optarg, &ep, 10)) == 0 || 114 *ep != '\0') 115 errx(1, "%s: illegal line count.", optarg); 116 break; 117 case 'a': /* Suffix length. */ 118 if (!isdigit((unsigned char)optarg[0]) || 119 (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 || 120 *ep != '\0') 121 errx(1, "%s: illegal suffix length.", optarg); 122 break; 123 case 'n': /* Chunks. */ 124 if (!isdigit((unsigned char)optarg[0]) || 125 (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 126 *ep != '\0') 127 errx(1, "%s: illegal number of chunks.", optarg); 128 break; 129 default: 130 usage(); 131 } 132 argv += optind; 133 argc -= optind; 134 135 if (*argv != NULL) { 136 if (strcmp(*argv, "-") != 0 && 137 (ifd = open(*argv, O_RDONLY, 0)) < 0) 138 err(1, "%s", *argv); 139 ++argv; 140 } 141 142 143 base = (*argv != NULL) ? *argv++ : "x"; 144 if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL) 145 err(EXIT_FAILURE, NULL); 146 (void)strcpy(fname, base); /* File name prefix. */ 147 148 if (*argv != NULL) 149 usage(); 150 151 if (numlines == 0) 152 numlines = DEFLINE; 153 else if (bytecnt || chunks) 154 usage(); 155 156 if (bytecnt && chunks) 157 usage(); 158 159 if (bytecnt) 160 split1(bytecnt, 0); 161 else if (chunks) 162 split3(chunks); 163 else 164 split2(numlines); 165 166 return 0; 167} 168 169/* 170 * split1 -- 171 * Split the input by bytes. 172 */ 173static void 174split1(off_t bytecnt, int maxcnt) 175{ 176 off_t bcnt; 177 ssize_t dist, len; 178 char *C; 179 char bfr[MAXBSIZE]; 180 int nfiles; 181 182 nfiles = 0; 183 184 for (bcnt = 0;;) 185 switch (len = read(ifd, bfr, MAXBSIZE)) { 186 case 0: 187 exit(0); 188 /* NOTREACHED */ 189 case -1: 190 err(1, "read"); 191 /* NOTREACHED */ 192 default: 193 if (!file_open) { 194 if (!maxcnt || (nfiles < maxcnt)) { 195 newfile(); 196 nfiles++; 197 file_open = 1; 198 } 199 } 200 if (bcnt + len >= bytecnt) { 201 /* LINTED: bytecnt - bcnt <= len */ 202 dist = bytecnt - bcnt; 203 if (bigwrite(ofd, bfr, dist) != (size_t)dist) 204 err(1, "write"); 205 len -= dist; 206 for (C = bfr + dist; len >= bytecnt; 207 /* LINTED: bytecnt <= len */ 208 len -= bytecnt, C += bytecnt) { 209 if (!maxcnt || (nfiles < maxcnt)) { 210 newfile(); 211 nfiles++; 212 } 213 /* LINTED: as above */ 214 if (bigwrite(ofd, 215 C, bytecnt) != (size_t)bytecnt) 216 err(1, "write"); 217 } 218 if (len) { 219 if (!maxcnt || (nfiles < maxcnt)) { 220 newfile(); 221 nfiles++; 222 } 223 /* LINTED: len >= 0 */ 224 if (bigwrite(ofd, C, len) != (size_t)len) 225 err(1, "write"); 226 } else 227 file_open = 0; 228 bcnt = len; 229 } else { 230 bcnt += len; 231 /* LINTED: len >= 0 */ 232 if (bigwrite(ofd, bfr, len) != (size_t)len) 233 err(1, "write"); 234 } 235 } 236} 237 238/* 239 * split2 -- 240 * Split the input by lines. 241 */ 242static void 243split2(off_t numlines) 244{ 245 off_t lcnt; 246 size_t bcnt; 247 ssize_t len; 248 char *Ce, *Cs; 249 char bfr[MAXBSIZE]; 250 251 for (lcnt = 0;;) 252 switch (len = read(ifd, bfr, MAXBSIZE)) { 253 case 0: 254 exit(0); 255 /* NOTREACHED */ 256 case -1: 257 err(1, "read"); 258 /* NOTREACHED */ 259 default: 260 if (!file_open) { 261 newfile(); 262 file_open = 1; 263 } 264 for (Cs = Ce = bfr; len--; Ce++) 265 if (*Ce == '\n' && ++lcnt == numlines) { 266 bcnt = Ce - Cs + 1; 267 if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) 268 err(1, "write"); 269 lcnt = 0; 270 Cs = Ce + 1; 271 if (len) 272 newfile(); 273 else 274 file_open = 0; 275 } 276 if (Cs < Ce) { 277 bcnt = Ce - Cs; 278 if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) 279 err(1, "write"); 280 } 281 } 282} 283 284/* 285 * split3 -- 286 * Split the input into specified number of chunks 287 */ 288static void 289split3(off_t chunks) 290{ 291 struct stat sb; 292 293 if (fstat(ifd, &sb) == -1) { 294 err(1, "stat"); 295 /* NOTREACHED */ 296 } 297 298 if (chunks > sb.st_size) { 299 errx(1, "can't split into more than %d files", 300 (int)sb.st_size); 301 /* NOTREACHED */ 302 } 303 304 split1(sb.st_size/chunks, chunks); 305} 306 307/* 308 * newfile -- 309 * Open a new output file. 310 */ 311static void 312newfile(void) 313{ 314 static int fnum; 315 static char *fpnt; 316 int quot, i; 317 318 if (ofd == -1) { 319 fpnt = fname + strlen(fname); 320 fpnt[sfxlen] = '\0'; 321 } else if (close(ofd) != 0) 322 err(1, "%s", fname); 323 324 quot = fnum; 325 for (i = sfxlen - 1; i >= 0; i--) { 326 fpnt[i] = quot % 26 + 'a'; 327 quot = quot / 26; 328 } 329 if (quot > 0) 330 errx(1, "too many files."); 331 ++fnum; 332 if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0) 333 err(1, "%s", fname); 334} 335 336static size_t 337bigwrite(int fd, const void *buf, size_t len) 338{ 339 const char *ptr = buf; 340 size_t sofar = 0; 341 ssize_t w; 342 343 while (len != 0) { 344 if ((w = write(fd, ptr, len)) == -1) 345 return sofar; 346 len -= w; 347 ptr += w; 348 sofar += w; 349 } 350 return sofar; 351} 352 353 354static void 355usage(void) 356{ 357 (void)fprintf(stderr, 358"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] " 359"[file [prefix]]\n", getprogname()); 360 exit(1); 361} 362