1/*-
2 * Copyright (c) 2008 Joerg Sonnenberger
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/*-
27 * Copyright (c) 1985, 1986, 1992, 1993
28 *	The Regents of the University of California.  All rights reserved.
29 *
30 * This code is derived from software contributed to Berkeley by
31 * Diomidis Spinellis and James A. Woods, derived from original
32 * work by Spencer Thomas and Joseph Orost.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. Neither the name of the University nor the names of its contributors
43 *    may be used to endorse or promote products derived from this software
44 *    without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 */
58
59#include "archive_platform.h"
60
61__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $");
62
63#ifdef HAVE_ERRNO_H
64#include <errno.h>
65#endif
66#ifdef HAVE_STDLIB_H
67#include <stdlib.h>
68#endif
69#ifdef HAVE_STRING_H
70#include <string.h>
71#endif
72
73#include "archive.h"
74#include "archive_private.h"
75#include "archive_write_private.h"
76
77#define	HSIZE		69001	/* 95% occupancy */
78#define	HSHIFT		8	/* 8 - trunc(log2(HSIZE / 65536)) */
79#define	CHECK_GAP 10000		/* Ratio check interval. */
80
81#define	MAXCODE(bits)	((1 << (bits)) - 1)
82
83/*
84 * the next two codes should not be changed lightly, as they must not
85 * lie within the contiguous general code space.
86 */
87#define	FIRST	257		/* First free entry. */
88#define	CLEAR	256		/* Table clear output code. */
89
90struct private_data {
91	int64_t in_count, out_count, checkpoint;
92
93	int code_len;			/* Number of bits/code. */
94	int cur_maxcode;		/* Maximum code, given n_bits. */
95	int max_maxcode;		/* Should NEVER generate this code. */
96	int hashtab [HSIZE];
97	unsigned short codetab [HSIZE];
98	int first_free;		/* First unused entry. */
99	int compress_ratio;
100
101	int cur_code, cur_fcode;
102
103	int bit_offset;
104	unsigned char bit_buf;
105
106	unsigned char	*compressed;
107	size_t		 compressed_buffer_size;
108	size_t		 compressed_offset;
109};
110
111static int archive_compressor_compress_open(struct archive_write_filter *);
112static int archive_compressor_compress_write(struct archive_write_filter *,
113		    const void *, size_t);
114static int archive_compressor_compress_close(struct archive_write_filter *);
115static int archive_compressor_compress_free(struct archive_write_filter *);
116
117#if ARCHIVE_VERSION_NUMBER < 4000000
118int
119archive_write_set_compression_compress(struct archive *a)
120{
121	__archive_write_filters_free(a);
122	return (archive_write_add_filter_compress(a));
123}
124#endif
125
126/*
127 * Add a compress filter to this write handle.
128 */
129int
130archive_write_add_filter_compress(struct archive *_a)
131{
132	struct archive_write *a = (struct archive_write *)_a;
133	struct archive_write_filter *f = __archive_write_allocate_filter(_a);
134
135	archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
136	    ARCHIVE_STATE_NEW, "archive_write_add_filter_compress");
137	f->open = &archive_compressor_compress_open;
138	f->code = ARCHIVE_FILTER_COMPRESS;
139	f->name = "compress";
140	return (ARCHIVE_OK);
141}
142
143/*
144 * Setup callback.
145 */
146static int
147archive_compressor_compress_open(struct archive_write_filter *f)
148{
149	int ret;
150	struct private_data *state;
151	size_t bs = 65536, bpb;
152
153	f->code = ARCHIVE_FILTER_COMPRESS;
154	f->name = "compress";
155
156	ret = __archive_write_open_filter(f->next_filter);
157	if (ret != ARCHIVE_OK)
158		return (ret);
159
160	state = (struct private_data *)calloc(1, sizeof(*state));
161	if (state == NULL) {
162		archive_set_error(f->archive, ENOMEM,
163		    "Can't allocate data for compression");
164		return (ARCHIVE_FATAL);
165	}
166
167	if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
168		/* Buffer size should be a multiple number of the of bytes
169		 * per block for performance. */
170		bpb = archive_write_get_bytes_per_block(f->archive);
171		if (bpb > bs)
172			bs = bpb;
173		else if (bpb != 0)
174			bs -= bs % bpb;
175	}
176	state->compressed_buffer_size = bs;
177	state->compressed = malloc(state->compressed_buffer_size);
178
179	if (state->compressed == NULL) {
180		archive_set_error(f->archive, ENOMEM,
181		    "Can't allocate data for compression buffer");
182		free(state);
183		return (ARCHIVE_FATAL);
184	}
185
186	f->write = archive_compressor_compress_write;
187	f->close = archive_compressor_compress_close;
188	f->free = archive_compressor_compress_free;
189
190	state->max_maxcode = 0x10000;	/* Should NEVER generate this code. */
191	state->in_count = 0;		/* Length of input. */
192	state->bit_buf = 0;
193	state->bit_offset = 0;
194	state->out_count = 3;		/* Includes 3-byte header mojo. */
195	state->compress_ratio = 0;
196	state->checkpoint = CHECK_GAP;
197	state->code_len = 9;
198	state->cur_maxcode = MAXCODE(state->code_len);
199	state->first_free = FIRST;
200
201	memset(state->hashtab, 0xff, sizeof(state->hashtab));
202
203	/* Prime output buffer with a gzip header. */
204	state->compressed[0] = 0x1f; /* Compress */
205	state->compressed[1] = 0x9d;
206	state->compressed[2] = 0x90; /* Block mode, 16bit max */
207	state->compressed_offset = 3;
208
209	f->data = state;
210	return (0);
211}
212
213/*-
214 * Output the given code.
215 * Inputs:
216 * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
217 *		that n_bits <= (long)wordsize - 1.
218 * Outputs:
219 * 	Outputs code to the file.
220 * Assumptions:
221 *	Chars are 8 bits long.
222 * Algorithm:
223 * 	Maintain a BITS character long buffer (so that 8 codes will
224 * fit in it exactly).  Use the VAX insv instruction to insert each
225 * code in turn.  When the buffer fills up empty it and start over.
226 */
227
228static const unsigned char rmask[9] =
229	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
230
231static int
232output_byte(struct archive_write_filter *f, unsigned char c)
233{
234	struct private_data *state = f->data;
235
236	state->compressed[state->compressed_offset++] = c;
237	++state->out_count;
238
239	if (state->compressed_buffer_size == state->compressed_offset) {
240		int ret = __archive_write_filter(f->next_filter,
241		    state->compressed, state->compressed_buffer_size);
242		if (ret != ARCHIVE_OK)
243			return ARCHIVE_FATAL;
244		state->compressed_offset = 0;
245	}
246
247	return ARCHIVE_OK;
248}
249
250static int
251output_code(struct archive_write_filter *f, int ocode)
252{
253	struct private_data *state = f->data;
254	int bits, ret, clear_flg, bit_offset;
255
256	clear_flg = ocode == CLEAR;
257
258	/*
259	 * Since ocode is always >= 8 bits, only need to mask the first
260	 * hunk on the left.
261	 */
262	bit_offset = state->bit_offset % 8;
263	state->bit_buf |= (ocode << bit_offset) & 0xff;
264	output_byte(f, state->bit_buf);
265
266	bits = state->code_len - (8 - bit_offset);
267	ocode >>= 8 - bit_offset;
268	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
269	if (bits >= 8) {
270		output_byte(f, ocode & 0xff);
271		ocode >>= 8;
272		bits -= 8;
273	}
274	/* Last bits. */
275	state->bit_offset += state->code_len;
276	state->bit_buf = ocode & rmask[bits];
277	if (state->bit_offset == state->code_len * 8)
278		state->bit_offset = 0;
279
280	/*
281	 * If the next entry is going to be too big for the ocode size,
282	 * then increase it, if possible.
283	 */
284	if (clear_flg || state->first_free > state->cur_maxcode) {
285	       /*
286		* Write the whole buffer, because the input side won't
287		* discover the size increase until after it has read it.
288		*/
289		if (state->bit_offset > 0) {
290			while (state->bit_offset < state->code_len * 8) {
291				ret = output_byte(f, state->bit_buf);
292				if (ret != ARCHIVE_OK)
293					return ret;
294				state->bit_offset += 8;
295				state->bit_buf = 0;
296			}
297		}
298		state->bit_buf = 0;
299		state->bit_offset = 0;
300
301		if (clear_flg) {
302			state->code_len = 9;
303			state->cur_maxcode = MAXCODE(state->code_len);
304		} else {
305			state->code_len++;
306			if (state->code_len == 16)
307				state->cur_maxcode = state->max_maxcode;
308			else
309				state->cur_maxcode = MAXCODE(state->code_len);
310		}
311	}
312
313	return (ARCHIVE_OK);
314}
315
316static int
317output_flush(struct archive_write_filter *f)
318{
319	struct private_data *state = f->data;
320	int ret;
321
322	/* At EOF, write the rest of the buffer. */
323	if (state->bit_offset % 8) {
324		state->code_len = (state->bit_offset % 8 + 7) / 8;
325		ret = output_byte(f, state->bit_buf);
326		if (ret != ARCHIVE_OK)
327			return ret;
328	}
329
330	return (ARCHIVE_OK);
331}
332
333/*
334 * Write data to the compressed stream.
335 */
336static int
337archive_compressor_compress_write(struct archive_write_filter *f,
338    const void *buff, size_t length)
339{
340	struct private_data *state = (struct private_data *)f->data;
341	int i;
342	int ratio;
343	int c, disp, ret;
344	const unsigned char *bp;
345
346	if (length == 0)
347		return ARCHIVE_OK;
348
349	bp = buff;
350
351	if (state->in_count == 0) {
352		state->cur_code = *bp++;
353		++state->in_count;
354		--length;
355	}
356
357	while (length--) {
358		c = *bp++;
359		state->in_count++;
360		state->cur_fcode = (c << 16) + state->cur_code;
361		i = ((c << HSHIFT) ^ state->cur_code);	/* Xor hashing. */
362
363		if (state->hashtab[i] == state->cur_fcode) {
364			state->cur_code = state->codetab[i];
365			continue;
366		}
367		if (state->hashtab[i] < 0)	/* Empty slot. */
368			goto nomatch;
369		/* Secondary hash (after G. Knott). */
370		if (i == 0)
371			disp = 1;
372		else
373			disp = HSIZE - i;
374 probe:
375		if ((i -= disp) < 0)
376			i += HSIZE;
377
378		if (state->hashtab[i] == state->cur_fcode) {
379			state->cur_code = state->codetab[i];
380			continue;
381		}
382		if (state->hashtab[i] >= 0)
383			goto probe;
384 nomatch:
385		ret = output_code(f, state->cur_code);
386		if (ret != ARCHIVE_OK)
387			return ret;
388		state->cur_code = c;
389		if (state->first_free < state->max_maxcode) {
390			state->codetab[i] = state->first_free++;	/* code -> hashtable */
391			state->hashtab[i] = state->cur_fcode;
392			continue;
393		}
394		if (state->in_count < state->checkpoint)
395			continue;
396
397		state->checkpoint = state->in_count + CHECK_GAP;
398
399		if (state->in_count <= 0x007fffff && state->out_count != 0)
400			ratio = (int)(state->in_count * 256 / state->out_count);
401		else if ((ratio = (int)(state->out_count / 256)) == 0)
402			ratio = 0x7fffffff;
403		else
404			ratio = (int)(state->in_count / ratio);
405
406		if (ratio > state->compress_ratio)
407			state->compress_ratio = ratio;
408		else {
409			state->compress_ratio = 0;
410			memset(state->hashtab, 0xff, sizeof(state->hashtab));
411			state->first_free = FIRST;
412			ret = output_code(f, CLEAR);
413			if (ret != ARCHIVE_OK)
414				return ret;
415		}
416	}
417
418	return (ARCHIVE_OK);
419}
420
421
422/*
423 * Finish the compression...
424 */
425static int
426archive_compressor_compress_close(struct archive_write_filter *f)
427{
428	struct private_data *state = (struct private_data *)f->data;
429	int ret, ret2;
430
431	ret = output_code(f, state->cur_code);
432	if (ret != ARCHIVE_OK)
433		goto cleanup;
434	ret = output_flush(f);
435	if (ret != ARCHIVE_OK)
436		goto cleanup;
437
438	/* Write the last block */
439	ret = __archive_write_filter(f->next_filter,
440	    state->compressed, state->compressed_offset);
441cleanup:
442	ret2 = __archive_write_close_filter(f->next_filter);
443	if (ret > ret2)
444		ret = ret2;
445	free(state->compressed);
446	free(state);
447	return (ret);
448}
449
450static int
451archive_compressor_compress_free(struct archive_write_filter *f)
452{
453	(void)f; /* UNUSED */
454	return (ARCHIVE_OK);
455}
456