1110723Sseanc/* 2110723Sseanc * Copyright (C) 2003 Sean Chittenden <seanc@FreeBSD.org> 3110723Sseanc * All rights reserved. 4110723Sseanc * 5110723Sseanc * Redistribution and use in source and binary forms, with or without 6110723Sseanc * modification, are permitted provided that the following conditions 7110723Sseanc * are met: 8110723Sseanc * 1. Redistributions of source code must retain the above copyright 9110723Sseanc * notice, this list of conditions and the following disclaimer. 10110723Sseanc * 2. Redistributions in binary form must reproduce the above copyright 11110723Sseanc * notice, this list of conditions and the following disclaimer in the 12110723Sseanc * documentation and/or other materials provided with the distribution. 13110723Sseanc * 14110723Sseanc * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 15110723Sseanc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16110723Sseanc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17110723Sseanc * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 18110723Sseanc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19110723Sseanc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20110723Sseanc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21110723Sseanc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22110723Sseanc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23110723Sseanc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24110723Sseanc * SUCH DAMAGE. 25110723Sseanc */ 26110723Sseanc 27110928Sseanc#include <sys/cdefs.h> 28110928Sseanc__FBSDID("$FreeBSD$"); 29110928Sseanc 30110928Sseanc#include <sys/types.h> 31110928Sseanc#include <sys/param.h> 32110928Sseanc 33110928Sseanc#include <ctype.h> 34110928Sseanc#include <err.h> 35181412Sache#include <errno.h> 36110928Sseanc#include <stdlib.h> 37110928Sseanc#include <stdio.h> 38110928Sseanc#include <string.h> 39110928Sseanc#include <unistd.h> 40110928Sseanc 41110723Sseanc#include "randomize_fd.h" 42110723Sseanc 43110928Sseancstatic struct rand_node *rand_root; 44110928Sseancstatic struct rand_node *rand_tail; 45110723Sseanc 46110928Sseancstatic struct rand_node * 47110928Sseancrand_node_allocate(void) 48110723Sseanc{ 49110723Sseanc struct rand_node *n; 50110723Sseanc 51181615Sache n = (struct rand_node *)malloc(sizeof(struct rand_node)); 52110723Sseanc if (n == NULL) 53181615Sache err(1, "malloc"); 54110723Sseanc 55181615Sache n->len = 0; 56181615Sache n->cp = NULL; 57181615Sache n->next = NULL; 58110723Sseanc return(n); 59110723Sseanc} 60110723Sseanc 61110928Sseancstatic void 62110928Sseancrand_node_free(struct rand_node *n) 63110723Sseanc{ 64110723Sseanc if (n != NULL) { 65110723Sseanc if (n->cp != NULL) 66110723Sseanc free(n->cp); 67110723Sseanc 68110723Sseanc free(n); 69110723Sseanc } 70110723Sseanc} 71110723Sseanc 72110928Sseancstatic void 73110928Sseancrand_node_free_rec(struct rand_node *n) 74110723Sseanc{ 75110723Sseanc if (n != NULL) { 76110723Sseanc if (n->next != NULL) 77110723Sseanc rand_node_free_rec(n->next); 78110723Sseanc 79110723Sseanc rand_node_free(n); 80110723Sseanc } 81110723Sseanc} 82110723Sseanc 83110928Sseancstatic void 84110928Sseancrand_node_append(struct rand_node *n) 85110723Sseanc{ 86110928Sseanc if (rand_root == NULL) 87110723Sseanc rand_root = rand_tail = n; 88110928Sseanc else { 89110723Sseanc rand_tail->next = n; 90110723Sseanc rand_tail = n; 91110723Sseanc } 92110723Sseanc} 93110723Sseanc 94110928Sseancint 95110928Sseancrandomize_fd(int fd, int type, int unique, double denom) 96110723Sseanc{ 97157758Sache u_char *buf; 98181412Sache u_int slen; 99181412Sache u_long i, j, numnode, selected; 100110723Sseanc struct rand_node *n, *prev; 101110928Sseanc int bufleft, eof, fndstr, ret; 102181412Sache size_t bufc, buflen; 103110928Sseanc ssize_t len; 104110723Sseanc 105110723Sseanc rand_root = rand_tail = NULL; 106110928Sseanc bufc = i = 0; 107209157Suqs bufleft = eof = fndstr = numnode = 0; 108110723Sseanc 109110723Sseanc if (type == RANDOM_TYPE_UNSET) 110110723Sseanc type = RANDOM_TYPE_LINES; 111110723Sseanc 112110723Sseanc buflen = sizeof(u_char) * MAXBSIZE; 113110723Sseanc buf = (u_char *)malloc(buflen); 114110723Sseanc if (buf == NULL) 115110723Sseanc err(1, "malloc"); 116110723Sseanc 117110723Sseanc while (!eof) { 118110723Sseanc /* Check to see if we have bits in the buffer */ 119110723Sseanc if (bufleft == 0) { 120110723Sseanc len = read(fd, buf, buflen); 121110723Sseanc if (len == -1) 122110723Sseanc err(1, "read"); 123110928Sseanc else if (len == 0) { 124110928Sseanc eof++; 125110723Sseanc break; 126110928Sseanc } else if ((size_t)len < buflen) 127110928Sseanc buflen = (size_t)len; 128110723Sseanc 129110928Sseanc bufleft = (int)len; 130110723Sseanc } 131110723Sseanc 132110723Sseanc /* Look for a newline */ 133110928Sseanc for (i = bufc; i <= buflen && bufleft >= 0; i++, bufleft--) { 134110723Sseanc if (i == buflen) { 135110723Sseanc if (fndstr) { 136110723Sseanc if (!eof) { 137110723Sseanc memmove(buf, &buf[bufc], i - bufc); 138110928Sseanc i -= bufc; 139110723Sseanc bufc = 0; 140110723Sseanc len = read(fd, &buf[i], buflen - i); 141110723Sseanc if (len == -1) 142110723Sseanc err(1, "read"); 143110723Sseanc else if (len == 0) { 144110723Sseanc eof++; 145110723Sseanc break; 146110928Sseanc } else if (len < (ssize_t)(buflen - i)) 147110928Sseanc buflen = i + (size_t)len; 148110723Sseanc 149110928Sseanc bufleft = (int)len; 150110723Sseanc fndstr = 0; 151110723Sseanc } 152110723Sseanc } else { 153157758Sache buflen *= 2; 154157758Sache buf = (u_char *)realloc(buf, buflen); 155157758Sache if (buf == NULL) 156110723Sseanc err(1, "realloc"); 157110723Sseanc 158110723Sseanc if (!eof) { 159157758Sache len = read(fd, &buf[i], buflen - i); 160110723Sseanc if (len == -1) 161110723Sseanc err(1, "read"); 162110723Sseanc else if (len == 0) { 163110723Sseanc eof++; 164110723Sseanc break; 165110928Sseanc } else if (len < (ssize_t)(buflen - i)) 166157758Sache buflen = i + (size_t)len; 167110723Sseanc 168110928Sseanc bufleft = (int)len; 169110723Sseanc } 170110723Sseanc 171110723Sseanc } 172110723Sseanc } 173110723Sseanc 174110723Sseanc if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') || 175157758Sache (type == RANDOM_TYPE_WORDS && isspace(buf[i])) || 176110723Sseanc (eof && i == buflen - 1)) { 177299484Scemmake_token: 178181527Sache if (numnode == RANDOM_MAX_PLUS1) { 179181412Sache errno = EFBIG; 180181527Sache err(1, "too many delimiters"); 181181412Sache } 182181412Sache numnode++; 183110723Sseanc n = rand_node_allocate(); 184110928Sseanc if (-1 != (int)i) { 185110928Sseanc slen = i - (u_long)bufc; 186110928Sseanc n->len = slen + 2; 187110928Sseanc n->cp = (u_char *)malloc(slen + 2); 188110928Sseanc if (n->cp == NULL) 189110928Sseanc err(1, "malloc"); 190110723Sseanc 191110928Sseanc memmove(n->cp, &buf[bufc], slen); 192110928Sseanc n->cp[slen] = buf[i]; 193110928Sseanc n->cp[slen + 1] = '\0'; 194110928Sseanc bufc = i + 1; 195110928Sseanc } 196110928Sseanc rand_node_append(n); 197110723Sseanc fndstr = 1; 198110723Sseanc } 199110723Sseanc } 200110723Sseanc } 201110723Sseanc 202110928Sseanc /* Necessary evil to compensate for files that don't end with a newline */ 203110928Sseanc if (bufc != i) { 204110928Sseanc i--; 205110928Sseanc goto make_token; 206110928Sseanc } 207110928Sseanc 208301574Struckman (void)close(fd); 209301574Struckman 210241847Seadler free(buf); 211241847Seadler 212110723Sseanc for (i = numnode; i > 0; i--) { 213181412Sache selected = random() % numnode; 214110723Sseanc 215110723Sseanc for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) { 216110723Sseanc if (j == selected) { 217110928Sseanc if (n->cp == NULL) 218110928Sseanc break; 219110928Sseanc 220181615Sache if ((int)(denom * random() / 221181615Sache RANDOM_MAX_PLUS1) == 0) { 222181615Sache ret = printf("%.*s", 223181615Sache (int)n->len - 1, n->cp); 224181412Sache if (ret < 0) 225181412Sache err(1, "printf"); 226181412Sache } 227110723Sseanc if (unique) { 228110723Sseanc if (n == rand_root) 229110723Sseanc rand_root = n->next; 230110723Sseanc if (n == rand_tail) 231110723Sseanc rand_tail = prev; 232110723Sseanc 233110723Sseanc prev->next = n->next; 234110723Sseanc rand_node_free(n); 235110723Sseanc numnode--; 236110723Sseanc } 237181412Sache break; 238110723Sseanc } 239110723Sseanc } 240110723Sseanc } 241110723Sseanc 242110723Sseanc fflush(stdout); 243110723Sseanc 244110723Sseanc if (!unique) 245110723Sseanc rand_node_free_rec(rand_root); 246110723Sseanc 247110723Sseanc return(0); 248110723Sseanc} 249