1/*
2 * Copyright (C) 2003 Sean Chittenden <seanc@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/types.h>
28#include <sys/param.h>
29
30#include <ctype.h>
31#include <err.h>
32#include <errno.h>
33#include <stdbool.h>
34#include <stdlib.h>
35#include <stdint.h>
36#include <stdio.h>
37#include <string.h>
38#include <unistd.h>
39
40#include "randomize_fd.h"
41
42static struct rand_node *rand_root;
43static struct rand_node *rand_tail;
44
45static struct rand_node *
46rand_node_allocate(void)
47{
48	struct rand_node *n;
49
50	n = (struct rand_node *)malloc(sizeof(struct rand_node));
51	if (n == NULL)
52		err(1, "malloc");
53
54	n->len = 0;
55	n->cp = NULL;
56	n->next = NULL;
57	return(n);
58}
59
60static void
61rand_node_free(struct rand_node *n)
62{
63	if (n != NULL) {
64		if (n->cp != NULL)
65			free(n->cp);
66
67		free(n);
68	}
69}
70
71static void
72rand_node_free_rec(struct rand_node *n)
73{
74	if (n != NULL) {
75		if (n->next != NULL)
76			rand_node_free_rec(n->next);
77
78		rand_node_free(n);
79	}
80}
81
82static void
83rand_node_append(struct rand_node *n)
84{
85	if (rand_root == NULL)
86		rand_root = rand_tail = n;
87	else {
88		rand_tail->next = n;
89		rand_tail = n;
90	}
91}
92
93int
94randomize_fd(int fd, int type, int unique, double denom)
95{
96	u_char *buf;
97	u_int slen;
98	u_long i, j, numnode, selected;
99	struct rand_node *n, *prev;
100	int bufleft, eof, fndstr, ret;
101	size_t bufc, buflen;
102	ssize_t len;
103
104	rand_root = rand_tail = NULL;
105	bufc = i = 0;
106	bufleft = eof = fndstr = numnode = 0;
107
108	if (type == RANDOM_TYPE_UNSET)
109		type = RANDOM_TYPE_LINES;
110
111	buflen = sizeof(u_char) * MAXBSIZE;
112	buf = (u_char *)malloc(buflen);
113	if (buf == NULL)
114		err(1, "malloc");
115
116	while (!eof) {
117		/* Check to see if we have bits in the buffer */
118		if (bufleft == 0) {
119			len = read(fd, buf, buflen);
120			if (len == -1)
121				err(1, "read");
122			else if (len == 0) {
123				eof++;
124				break;
125			} else if ((size_t)len < buflen)
126				buflen = (size_t)len;
127
128			bufleft = (int)len;
129		}
130
131		/* Look for a newline */
132		for (i = bufc; i <= buflen && bufleft >= 0; i++, bufleft--) {
133			if (i == buflen) {
134				if (fndstr) {
135					if (!eof) {
136						memmove(buf, &buf[bufc], i - bufc);
137						i -= bufc;
138						bufc = 0;
139						len = read(fd, &buf[i], buflen - i);
140						if (len == -1)
141							err(1, "read");
142						else if (len == 0) {
143							eof++;
144							break;
145						} else if (len < (ssize_t)(buflen - i))
146							buflen = i + (size_t)len;
147
148						bufleft = (int)len;
149						fndstr = 0;
150					}
151				} else {
152					buflen *= 2;
153					buf = (u_char *)realloc(buf, buflen);
154					if (buf == NULL)
155						err(1, "realloc");
156
157					if (!eof) {
158						len = read(fd, &buf[i], buflen - i);
159						if (len == -1)
160							err(1, "read");
161						else if (len == 0) {
162							eof++;
163							break;
164						} else if (len < (ssize_t)(buflen - i))
165							buflen = i + (size_t)len;
166
167						bufleft = (int)len;
168					}
169
170				}
171			}
172
173			if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') ||
174			    (type == RANDOM_TYPE_WORDS && isspace(buf[i])) ||
175			    (eof && i == buflen - 1)) {
176make_token:
177				if (numnode == UINT32_MAX - 1) {
178					errno = EFBIG;
179					err(1, "too many delimiters");
180				}
181				numnode++;
182				n = rand_node_allocate();
183				if (-1 != (int)i) {
184					slen = i - (u_long)bufc;
185					n->len = slen + 2;
186					n->cp = (u_char *)malloc(slen + 2);
187					if (n->cp == NULL)
188						err(1, "malloc");
189
190					memmove(n->cp, &buf[bufc], slen);
191					n->cp[slen] = buf[i];
192					n->cp[slen + 1] = '\0';
193					bufc = i + 1;
194				}
195				rand_node_append(n);
196				fndstr = 1;
197			}
198		}
199	}
200
201	/* Necessary evil to compensate for files that don't end with a newline */
202	if (bufc != i) {
203		i--;
204		goto make_token;
205	}
206
207	(void)close(fd);
208
209	free(buf);
210
211	for (i = numnode; i > 0; i--) {
212		selected = arc4random_uniform(numnode);
213
214		for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) {
215			if (j == selected) {
216				if (n->cp == NULL)
217					break;
218
219				if (random_uniform_denom(denom)) {
220					ret = printf("%.*s",
221						(int)n->len - 1, n->cp);
222					if (ret < 0)
223						err(1, "printf");
224				}
225				if (unique) {
226					if (n == rand_root)
227						rand_root = n->next;
228					if (n == rand_tail)
229						rand_tail = prev;
230
231					prev->next = n->next;
232					rand_node_free(n);
233					numnode--;
234				}
235				break;
236			}
237		}
238	}
239
240	fflush(stdout);
241
242	if (!unique)
243		rand_node_free_rec(rand_root);
244
245	return(0);
246}
247