1110723Sseanc/*
2110723Sseanc * Copyright (C) 2003 Sean Chittenden <seanc@FreeBSD.org>
3110723Sseanc * All rights reserved.
4110723Sseanc *
5110723Sseanc * Redistribution and use in source and binary forms, with or without
6110723Sseanc * modification, are permitted provided that the following conditions
7110723Sseanc * are met:
8110723Sseanc * 1. Redistributions of source code must retain the above copyright
9110723Sseanc *    notice, this list of conditions and the following disclaimer.
10110723Sseanc * 2. Redistributions in binary form must reproduce the above copyright
11110723Sseanc *    notice, this list of conditions and the following disclaimer in the
12110723Sseanc *    documentation and/or other materials provided with the distribution.
13110723Sseanc *
14110723Sseanc * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
15110723Sseanc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16110723Sseanc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17110723Sseanc * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
18110723Sseanc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19110723Sseanc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20110723Sseanc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21110723Sseanc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22110723Sseanc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23110723Sseanc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24110723Sseanc * SUCH DAMAGE.
25110723Sseanc */
26110723Sseanc
27110928Sseanc#include <sys/cdefs.h>
28110928Sseanc__FBSDID("$FreeBSD$");
29110928Sseanc
30110928Sseanc#include <sys/types.h>
31110928Sseanc#include <sys/param.h>
32110928Sseanc
33110928Sseanc#include <ctype.h>
34110928Sseanc#include <err.h>
35181412Sache#include <errno.h>
36110928Sseanc#include <stdlib.h>
37110928Sseanc#include <stdio.h>
38110928Sseanc#include <string.h>
39110928Sseanc#include <unistd.h>
40110928Sseanc
41110723Sseanc#include "randomize_fd.h"
42110723Sseanc
43110928Sseancstatic struct rand_node *rand_root;
44110928Sseancstatic struct rand_node *rand_tail;
45110723Sseanc
46110928Sseancstatic struct rand_node *
47110928Sseancrand_node_allocate(void)
48110723Sseanc{
49110723Sseanc	struct rand_node *n;
50110723Sseanc
51181615Sache	n = (struct rand_node *)malloc(sizeof(struct rand_node));
52110723Sseanc	if (n == NULL)
53181615Sache		err(1, "malloc");
54110723Sseanc
55181615Sache	n->len = 0;
56181615Sache	n->cp = NULL;
57181615Sache	n->next = NULL;
58110723Sseanc	return(n);
59110723Sseanc}
60110723Sseanc
61110928Sseancstatic void
62110928Sseancrand_node_free(struct rand_node *n)
63110723Sseanc{
64110723Sseanc	if (n != NULL) {
65110723Sseanc		if (n->cp != NULL)
66110723Sseanc			free(n->cp);
67110723Sseanc
68110723Sseanc		free(n);
69110723Sseanc	}
70110723Sseanc}
71110723Sseanc
72110928Sseancstatic void
73110928Sseancrand_node_free_rec(struct rand_node *n)
74110723Sseanc{
75110723Sseanc	if (n != NULL) {
76110723Sseanc		if (n->next != NULL)
77110723Sseanc			rand_node_free_rec(n->next);
78110723Sseanc
79110723Sseanc		rand_node_free(n);
80110723Sseanc	}
81110723Sseanc}
82110723Sseanc
83110928Sseancstatic void
84110928Sseancrand_node_append(struct rand_node *n)
85110723Sseanc{
86110928Sseanc	if (rand_root == NULL)
87110723Sseanc		rand_root = rand_tail = n;
88110928Sseanc	else {
89110723Sseanc		rand_tail->next = n;
90110723Sseanc		rand_tail = n;
91110723Sseanc	}
92110723Sseanc}
93110723Sseanc
94110928Sseancint
95110928Sseancrandomize_fd(int fd, int type, int unique, double denom)
96110723Sseanc{
97157758Sache	u_char *buf;
98181412Sache	u_int slen;
99181412Sache	u_long i, j, numnode, selected;
100110723Sseanc	struct rand_node *n, *prev;
101110928Sseanc	int bufleft, eof, fndstr, ret;
102181412Sache	size_t bufc, buflen;
103110928Sseanc	ssize_t len;
104110723Sseanc
105110723Sseanc	rand_root = rand_tail = NULL;
106110928Sseanc	bufc = i = 0;
107209157Suqs	bufleft = eof = fndstr = numnode = 0;
108110723Sseanc
109110723Sseanc	if (type == RANDOM_TYPE_UNSET)
110110723Sseanc		type = RANDOM_TYPE_LINES;
111110723Sseanc
112110723Sseanc	buflen = sizeof(u_char) * MAXBSIZE;
113110723Sseanc	buf = (u_char *)malloc(buflen);
114110723Sseanc	if (buf == NULL)
115110723Sseanc		err(1, "malloc");
116110723Sseanc
117110723Sseanc	while (!eof) {
118110723Sseanc		/* Check to see if we have bits in the buffer */
119110723Sseanc		if (bufleft == 0) {
120110723Sseanc			len = read(fd, buf, buflen);
121110723Sseanc			if (len == -1)
122110723Sseanc				err(1, "read");
123110928Sseanc			else if (len == 0) {
124110928Sseanc				eof++;
125110723Sseanc				break;
126110928Sseanc			} else if ((size_t)len < buflen)
127110928Sseanc				buflen = (size_t)len;
128110723Sseanc
129110928Sseanc			bufleft = (int)len;
130110723Sseanc		}
131110723Sseanc
132110723Sseanc		/* Look for a newline */
133110928Sseanc		for (i = bufc; i <= buflen && bufleft >= 0; i++, bufleft--) {
134110723Sseanc			if (i == buflen) {
135110723Sseanc				if (fndstr) {
136110723Sseanc					if (!eof) {
137110723Sseanc						memmove(buf, &buf[bufc], i - bufc);
138110928Sseanc						i -= bufc;
139110723Sseanc						bufc = 0;
140110723Sseanc						len = read(fd, &buf[i], buflen - i);
141110723Sseanc						if (len == -1)
142110723Sseanc							err(1, "read");
143110723Sseanc						else if (len == 0) {
144110723Sseanc							eof++;
145110723Sseanc							break;
146110928Sseanc						} else if (len < (ssize_t)(buflen - i))
147110928Sseanc							buflen = i + (size_t)len;
148110723Sseanc
149110928Sseanc						bufleft = (int)len;
150110723Sseanc						fndstr = 0;
151110723Sseanc					}
152110723Sseanc				} else {
153157758Sache					buflen *= 2;
154157758Sache					buf = (u_char *)realloc(buf, buflen);
155157758Sache					if (buf == NULL)
156110723Sseanc						err(1, "realloc");
157110723Sseanc
158110723Sseanc					if (!eof) {
159157758Sache						len = read(fd, &buf[i], buflen - i);
160110723Sseanc						if (len == -1)
161110723Sseanc							err(1, "read");
162110723Sseanc						else if (len == 0) {
163110723Sseanc							eof++;
164110723Sseanc							break;
165110928Sseanc						} else if (len < (ssize_t)(buflen - i))
166157758Sache							buflen = i + (size_t)len;
167110723Sseanc
168110928Sseanc						bufleft = (int)len;
169110723Sseanc					}
170110723Sseanc
171110723Sseanc				}
172110723Sseanc			}
173110723Sseanc
174110723Sseanc			if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') ||
175157758Sache			    (type == RANDOM_TYPE_WORDS && isspace(buf[i])) ||
176110723Sseanc			    (eof && i == buflen - 1)) {
177299484Scemmake_token:
178181527Sache				if (numnode == RANDOM_MAX_PLUS1) {
179181412Sache					errno = EFBIG;
180181527Sache					err(1, "too many delimiters");
181181412Sache				}
182181412Sache				numnode++;
183110723Sseanc				n = rand_node_allocate();
184110928Sseanc				if (-1 != (int)i) {
185110928Sseanc					slen = i - (u_long)bufc;
186110928Sseanc					n->len = slen + 2;
187110928Sseanc					n->cp = (u_char *)malloc(slen + 2);
188110928Sseanc					if (n->cp == NULL)
189110928Sseanc						err(1, "malloc");
190110723Sseanc
191110928Sseanc					memmove(n->cp, &buf[bufc], slen);
192110928Sseanc					n->cp[slen] = buf[i];
193110928Sseanc					n->cp[slen + 1] = '\0';
194110928Sseanc					bufc = i + 1;
195110928Sseanc				}
196110928Sseanc				rand_node_append(n);
197110723Sseanc				fndstr = 1;
198110723Sseanc			}
199110723Sseanc		}
200110723Sseanc	}
201110723Sseanc
202110928Sseanc	/* Necessary evil to compensate for files that don't end with a newline */
203110928Sseanc	if (bufc != i) {
204110928Sseanc		i--;
205110928Sseanc		goto make_token;
206110928Sseanc	}
207110928Sseanc
208301574Struckman	(void)close(fd);
209301574Struckman
210241847Seadler	free(buf);
211241847Seadler
212110723Sseanc	for (i = numnode; i > 0; i--) {
213181412Sache		selected = random() % numnode;
214110723Sseanc
215110723Sseanc		for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) {
216110723Sseanc			if (j == selected) {
217110928Sseanc				if (n->cp == NULL)
218110928Sseanc					break;
219110928Sseanc
220181615Sache				if ((int)(denom * random() /
221181615Sache					RANDOM_MAX_PLUS1) == 0) {
222181615Sache					ret = printf("%.*s",
223181615Sache						(int)n->len - 1, n->cp);
224181412Sache					if (ret < 0)
225181412Sache						err(1, "printf");
226181412Sache				}
227110723Sseanc				if (unique) {
228110723Sseanc					if (n == rand_root)
229110723Sseanc						rand_root = n->next;
230110723Sseanc					if (n == rand_tail)
231110723Sseanc						rand_tail = prev;
232110723Sseanc
233110723Sseanc					prev->next = n->next;
234110723Sseanc					rand_node_free(n);
235110723Sseanc					numnode--;
236110723Sseanc				}
237181412Sache				break;
238110723Sseanc			}
239110723Sseanc		}
240110723Sseanc	}
241110723Sseanc
242110723Sseanc	fflush(stdout);
243110723Sseanc
244110723Sseanc	if (!unique)
245110723Sseanc		rand_node_free_rec(rand_root);
246110723Sseanc
247110723Sseanc	return(0);
248110723Sseanc}
249