1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228753Smm
28231200Smm__FBSDID("$FreeBSD$");
29228753Smm
30228753Smm#ifdef HAVE_ERRNO_H
31228753Smm#include <errno.h>
32228753Smm#endif
33228753Smm#include <stdio.h>
34228753Smm#ifdef HAVE_STDLIB_H
35228753Smm#include <stdlib.h>
36228753Smm#endif
37228753Smm#ifdef HAVE_STRING_H
38228753Smm#include <string.h>
39228753Smm#endif
40228753Smm#ifdef HAVE_UNISTD_H
41228753Smm#include <unistd.h>
42228753Smm#endif
43228753Smm#ifdef HAVE_BZLIB_H
44228753Smm#include <bzlib.h>
45228753Smm#endif
46228753Smm
47228753Smm#include "archive.h"
48228753Smm#include "archive_private.h"
49228753Smm#include "archive_read_private.h"
50228753Smm
51228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR)
52228753Smmstruct private_data {
53228753Smm	bz_stream	 stream;
54228753Smm	char		*out_block;
55228753Smm	size_t		 out_block_size;
56228753Smm	char		 valid; /* True = decompressor is initialized */
57228753Smm	char		 eof; /* True = found end of compressed data. */
58228753Smm};
59228753Smm
60228753Smm/* Bzip2 filter */
61228753Smmstatic ssize_t	bzip2_filter_read(struct archive_read_filter *, const void **);
62228753Smmstatic int	bzip2_filter_close(struct archive_read_filter *);
63228753Smm#endif
64228753Smm
65228753Smm/*
66228753Smm * Note that we can detect bzip2 archives even if we can't decompress
67228753Smm * them.  (In fact, we like detecting them because we can give better
68228753Smm * error messages.)  So the bid framework here gets compiled even
69228753Smm * if bzlib is unavailable.
70228753Smm */
71228753Smmstatic int	bzip2_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
72228753Smmstatic int	bzip2_reader_init(struct archive_read_filter *);
73228753Smmstatic int	bzip2_reader_free(struct archive_read_filter_bidder *);
74228753Smm
75231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
76231200Smm/* Deprecated; remove in libarchive 4.0 */
77228753Smmint
78231200Smmarchive_read_support_compression_bzip2(struct archive *a)
79228753Smm{
80231200Smm	return archive_read_support_filter_bzip2(a);
81231200Smm}
82231200Smm#endif
83231200Smm
84231200Smmint
85231200Smmarchive_read_support_filter_bzip2(struct archive *_a)
86231200Smm{
87228753Smm	struct archive_read *a = (struct archive_read *)_a;
88231200Smm	struct archive_read_filter_bidder *reader;
89228753Smm
90231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
91231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_bzip2");
92231200Smm
93231200Smm	if (__archive_read_get_bidder(a, &reader) != ARCHIVE_OK)
94228753Smm		return (ARCHIVE_FATAL);
95228753Smm
96228753Smm	reader->data = NULL;
97248616Smm	reader->name = "bzip2";
98228753Smm	reader->bid = bzip2_reader_bid;
99228753Smm	reader->init = bzip2_reader_init;
100228753Smm	reader->options = NULL;
101228753Smm	reader->free = bzip2_reader_free;
102228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR)
103228753Smm	return (ARCHIVE_OK);
104228753Smm#else
105228753Smm	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
106248616Smm	    "Using external bzip2 program");
107228753Smm	return (ARCHIVE_WARN);
108228753Smm#endif
109228753Smm}
110228753Smm
111228753Smmstatic int
112228753Smmbzip2_reader_free(struct archive_read_filter_bidder *self){
113228753Smm	(void)self; /* UNUSED */
114228753Smm	return (ARCHIVE_OK);
115228753Smm}
116228753Smm
117228753Smm/*
118228753Smm * Test whether we can handle this data.
119228753Smm *
120228753Smm * This logic returns zero if any part of the signature fails.  It
121228753Smm * also tries to Do The Right Thing if a very short buffer prevents us
122228753Smm * from verifying as much as we would like.
123228753Smm */
124228753Smmstatic int
125228753Smmbzip2_reader_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter)
126228753Smm{
127228753Smm	const unsigned char *buffer;
128228753Smm	ssize_t avail;
129228753Smm	int bits_checked;
130228753Smm
131228753Smm	(void)self; /* UNUSED */
132228753Smm
133228753Smm	/* Minimal bzip2 archive is 14 bytes. */
134228753Smm	buffer = __archive_read_filter_ahead(filter, 14, &avail);
135228753Smm	if (buffer == NULL)
136228753Smm		return (0);
137228753Smm
138228753Smm	/* First three bytes must be "BZh" */
139228753Smm	bits_checked = 0;
140231200Smm	if (memcmp(buffer, "BZh", 3) != 0)
141228753Smm		return (0);
142228753Smm	bits_checked += 24;
143228753Smm
144228753Smm	/* Next follows a compression flag which must be an ASCII digit. */
145228753Smm	if (buffer[3] < '1' || buffer[3] > '9')
146228753Smm		return (0);
147228753Smm	bits_checked += 5;
148228753Smm
149228753Smm	/* After BZh[1-9], there must be either a data block
150228753Smm	 * which begins with 0x314159265359 or an end-of-data
151228753Smm	 * marker of 0x177245385090. */
152228753Smm	if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0)
153228753Smm		bits_checked += 48;
154228753Smm	else if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", 6) == 0)
155228753Smm		bits_checked += 48;
156228753Smm	else
157228753Smm		return (0);
158228753Smm
159228753Smm	return (bits_checked);
160228753Smm}
161228753Smm
162228753Smm#if !defined(HAVE_BZLIB_H) || !defined(BZ_CONFIG_ERROR)
163228753Smm
164228753Smm/*
165228753Smm * If we don't have the library on this system, we can't actually do the
166228753Smm * decompression.  We can, however, still detect compressed archives
167228753Smm * and emit a useful message.
168228753Smm */
169228753Smmstatic int
170228753Smmbzip2_reader_init(struct archive_read_filter *self)
171228753Smm{
172228753Smm	int r;
173228753Smm
174248616Smm	r = __archive_read_program(self, "bzip2 -d");
175228753Smm	/* Note: We set the format here even if __archive_read_program()
176228753Smm	 * above fails.  We do, after all, know what the format is
177228753Smm	 * even if we weren't able to read it. */
178248616Smm	self->code = ARCHIVE_FILTER_BZIP2;
179228753Smm	self->name = "bzip2";
180228753Smm	return (r);
181228753Smm}
182228753Smm
183228753Smm
184228753Smm#else
185228753Smm
186228753Smm/*
187228753Smm * Setup the callbacks.
188228753Smm */
189228753Smmstatic int
190228753Smmbzip2_reader_init(struct archive_read_filter *self)
191228753Smm{
192228753Smm	static const size_t out_block_size = 64 * 1024;
193228753Smm	void *out_block;
194228753Smm	struct private_data *state;
195228753Smm
196248616Smm	self->code = ARCHIVE_FILTER_BZIP2;
197228753Smm	self->name = "bzip2";
198228753Smm
199228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
200228753Smm	out_block = (unsigned char *)malloc(out_block_size);
201231200Smm	if (state == NULL || out_block == NULL) {
202228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
203228753Smm		    "Can't allocate data for bzip2 decompression");
204228753Smm		free(out_block);
205228753Smm		free(state);
206228753Smm		return (ARCHIVE_FATAL);
207228753Smm	}
208228753Smm
209228753Smm	self->data = state;
210228753Smm	state->out_block_size = out_block_size;
211228753Smm	state->out_block = out_block;
212228753Smm	self->read = bzip2_filter_read;
213228753Smm	self->skip = NULL; /* not supported */
214228753Smm	self->close = bzip2_filter_close;
215228753Smm
216228753Smm	return (ARCHIVE_OK);
217228753Smm}
218228753Smm
219228753Smm/*
220228753Smm * Return the next block of decompressed data.
221228753Smm */
222228753Smmstatic ssize_t
223228753Smmbzip2_filter_read(struct archive_read_filter *self, const void **p)
224228753Smm{
225228753Smm	struct private_data *state;
226228753Smm	size_t decompressed;
227228753Smm	const char *read_buf;
228228753Smm	ssize_t ret;
229228753Smm
230228753Smm	state = (struct private_data *)self->data;
231228753Smm
232228753Smm	if (state->eof) {
233228753Smm		*p = NULL;
234228753Smm		return (0);
235228753Smm	}
236228753Smm
237228753Smm	/* Empty our output buffer. */
238228753Smm	state->stream.next_out = state->out_block;
239228753Smm	state->stream.avail_out = state->out_block_size;
240228753Smm
241228753Smm	/* Try to fill the output buffer. */
242228753Smm	for (;;) {
243228753Smm		if (!state->valid) {
244228753Smm			if (bzip2_reader_bid(self->bidder, self->upstream) == 0) {
245228753Smm				state->eof = 1;
246228753Smm				*p = state->out_block;
247228753Smm				decompressed = state->stream.next_out
248228753Smm				    - state->out_block;
249228753Smm				return (decompressed);
250228753Smm			}
251228753Smm			/* Initialize compression library. */
252228753Smm			ret = BZ2_bzDecompressInit(&(state->stream),
253228753Smm					   0 /* library verbosity */,
254228753Smm					   0 /* don't use low-mem algorithm */);
255228753Smm
256228753Smm			/* If init fails, try low-memory algorithm instead. */
257228753Smm			if (ret == BZ_MEM_ERROR)
258228753Smm				ret = BZ2_bzDecompressInit(&(state->stream),
259228753Smm					   0 /* library verbosity */,
260228753Smm					   1 /* do use low-mem algo */);
261228753Smm
262228753Smm			if (ret != BZ_OK) {
263228753Smm				const char *detail = NULL;
264228753Smm				int err = ARCHIVE_ERRNO_MISC;
265228753Smm				switch (ret) {
266228753Smm				case BZ_PARAM_ERROR:
267228753Smm					detail = "invalid setup parameter";
268228753Smm					break;
269228753Smm				case BZ_MEM_ERROR:
270228753Smm					err = ENOMEM;
271228753Smm					detail = "out of memory";
272228753Smm					break;
273228753Smm				case BZ_CONFIG_ERROR:
274228753Smm					detail = "mis-compiled library";
275228753Smm					break;
276228753Smm				}
277228753Smm				archive_set_error(&self->archive->archive, err,
278228753Smm				    "Internal error initializing decompressor%s%s",
279228753Smm				    detail == NULL ? "" : ": ",
280228753Smm				    detail);
281228753Smm				return (ARCHIVE_FATAL);
282228753Smm			}
283228753Smm			state->valid = 1;
284228753Smm		}
285228753Smm
286228753Smm		/* stream.next_in is really const, but bzlib
287228753Smm		 * doesn't declare it so. <sigh> */
288228753Smm		read_buf =
289228753Smm		    __archive_read_filter_ahead(self->upstream, 1, &ret);
290231200Smm		if (read_buf == NULL) {
291231200Smm			archive_set_error(&self->archive->archive,
292231200Smm			    ARCHIVE_ERRNO_MISC,
293231200Smm			    "truncated bzip2 input");
294228753Smm			return (ARCHIVE_FATAL);
295231200Smm		}
296228753Smm		state->stream.next_in = (char *)(uintptr_t)read_buf;
297228753Smm		state->stream.avail_in = ret;
298228753Smm		/* There is no more data, return whatever we have. */
299228753Smm		if (ret == 0) {
300228753Smm			state->eof = 1;
301228753Smm			*p = state->out_block;
302228753Smm			decompressed = state->stream.next_out
303228753Smm			    - state->out_block;
304228753Smm			return (decompressed);
305228753Smm		}
306228753Smm
307228753Smm		/* Decompress as much as we can in one pass. */
308228753Smm		ret = BZ2_bzDecompress(&(state->stream));
309228753Smm		__archive_read_filter_consume(self->upstream,
310228753Smm		    state->stream.next_in - read_buf);
311228753Smm
312228753Smm		switch (ret) {
313228753Smm		case BZ_STREAM_END: /* Found end of stream. */
314228753Smm			switch (BZ2_bzDecompressEnd(&(state->stream))) {
315228753Smm			case BZ_OK:
316228753Smm				break;
317228753Smm			default:
318228753Smm				archive_set_error(&(self->archive->archive),
319228753Smm					  ARCHIVE_ERRNO_MISC,
320228753Smm					  "Failed to clean up decompressor");
321228753Smm				return (ARCHIVE_FATAL);
322228753Smm			}
323228753Smm			state->valid = 0;
324228753Smm			/* FALLTHROUGH */
325228753Smm		case BZ_OK: /* Decompressor made some progress. */
326228753Smm			/* If we filled our buffer, update stats and return. */
327228753Smm			if (state->stream.avail_out == 0) {
328228753Smm				*p = state->out_block;
329228753Smm				decompressed = state->stream.next_out
330228753Smm				    - state->out_block;
331228753Smm				return (decompressed);
332228753Smm			}
333228753Smm			break;
334228753Smm		default: /* Return an error. */
335228753Smm			archive_set_error(&self->archive->archive,
336228753Smm			    ARCHIVE_ERRNO_MISC, "bzip decompression failed");
337228753Smm			return (ARCHIVE_FATAL);
338228753Smm		}
339228753Smm	}
340228753Smm}
341228753Smm
342228753Smm/*
343228753Smm * Clean up the decompressor.
344228753Smm */
345228753Smmstatic int
346228753Smmbzip2_filter_close(struct archive_read_filter *self)
347228753Smm{
348228753Smm	struct private_data *state;
349228753Smm	int ret = ARCHIVE_OK;
350228753Smm
351228753Smm	state = (struct private_data *)self->data;
352228753Smm
353228753Smm	if (state->valid) {
354228753Smm		switch (BZ2_bzDecompressEnd(&state->stream)) {
355228753Smm		case BZ_OK:
356228753Smm			break;
357228753Smm		default:
358228753Smm			archive_set_error(&self->archive->archive,
359228753Smm					  ARCHIVE_ERRNO_MISC,
360228753Smm					  "Failed to clean up decompressor");
361228753Smm			ret = ARCHIVE_FATAL;
362228753Smm		}
363231200Smm		state->valid = 0;
364228753Smm	}
365228753Smm
366228753Smm	free(state->out_block);
367228753Smm	free(state);
368228753Smm	return (ret);
369228753Smm}
370228753Smm
371228753Smm#endif /* HAVE_BZLIB_H && BZ_CONFIG_ERROR */
372