1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
34#ifdef HAVE_STRING_H
35#include <string.h>
36#endif
37#ifdef HAVE_LIMITS_H
38#include <limits.h>
39#endif
40#ifdef HAVE_UNISTD_H
41#include <unistd.h>
42#endif
43#ifdef HAVE_ZLIB_H
44#include <zlib.h>
45#endif
46
47#include "archive.h"
48#include "archive_entry.h"
49#include "archive_endian.h"
50#include "archive_private.h"
51#include "archive_read_private.h"
52
53#ifdef HAVE_ZLIB_H
54struct private_data {
55	z_stream	 stream;
56	char		 in_stream;
57	unsigned char	*out_block;
58	size_t		 out_block_size;
59	int64_t		 total_out;
60	unsigned long	 crc;
61	uint32_t	 mtime;
62	char		*name;
63	char		 eof; /* True = found end of compressed data. */
64};
65
66/* Gzip Filter. */
67static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
68static int	gzip_filter_close(struct archive_read_filter *);
69#endif
70
71/*
72 * Note that we can detect gzip archives even if we can't decompress
73 * them.  (In fact, we like detecting them because we can give better
74 * error messages.)  So the bid framework here gets compiled even
75 * if zlib is unavailable.
76 *
77 * TODO: If zlib is unavailable, gzip_bidder_init() should
78 * use the compress_program framework to try to fire up an external
79 * gzip program.
80 */
81static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
82		    struct archive_read_filter *);
83static int	gzip_bidder_init(struct archive_read_filter *);
84
85#if ARCHIVE_VERSION_NUMBER < 4000000
86/* Deprecated; remove in libarchive 4.0 */
87int
88archive_read_support_compression_gzip(struct archive *a)
89{
90	return archive_read_support_filter_gzip(a);
91}
92#endif
93
94static const struct archive_read_filter_bidder_vtable
95gzip_bidder_vtable = {
96	.bid = gzip_bidder_bid,
97	.init = gzip_bidder_init,
98};
99
100int
101archive_read_support_filter_gzip(struct archive *_a)
102{
103	struct archive_read *a = (struct archive_read *)_a;
104
105	if (__archive_read_register_bidder(a, NULL, "gzip",
106				&gzip_bidder_vtable) != ARCHIVE_OK)
107		return (ARCHIVE_FATAL);
108
109	/* Signal the extent of gzip support with the return value here. */
110#if HAVE_ZLIB_H
111	return (ARCHIVE_OK);
112#else
113	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
114	    "Using external gzip program");
115	return (ARCHIVE_WARN);
116#endif
117}
118
119/*
120 * Read and verify the header.
121 *
122 * Returns zero if the header couldn't be validated, else returns
123 * number of bytes in header.  If pbits is non-NULL, it receives a
124 * count of bits verified, suitable for use by bidder.
125 */
126static ssize_t
127peek_at_header(struct archive_read_filter *filter, int *pbits,
128#ifdef HAVE_ZLIB_H
129	       struct private_data *state
130#else
131	       void *state
132#endif
133	      )
134{
135	const unsigned char *p;
136	ssize_t avail, len;
137	int bits = 0;
138	int header_flags;
139#ifndef HAVE_ZLIB_H
140	(void)state; /* UNUSED */
141#endif
142
143	/* Start by looking at the first ten bytes of the header, which
144	 * is all fixed layout. */
145	len = 10;
146	p = __archive_read_filter_ahead(filter, len, &avail);
147	if (p == NULL || avail == 0)
148		return (0);
149	/* We only support deflation- third byte must be 0x08. */
150	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
151		return (0);
152	bits += 24;
153	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
154		return (0);
155	bits += 3;
156	header_flags = p[3];
157	/* Bytes 4-7 are mod time in little endian. */
158#ifdef HAVE_ZLIB_H
159	if (state)
160		state->mtime = archive_le32dec(p + 4);
161#endif
162	/* Byte 8 is deflate flags. */
163	/* XXXX TODO: return deflate flags back to consume_header for use
164	   in initializing the decompressor. */
165	/* Byte 9 is OS. */
166
167	/* Optional extra data:  2 byte length plus variable body. */
168	if (header_flags & 4) {
169		p = __archive_read_filter_ahead(filter, len + 2, &avail);
170		if (p == NULL)
171			return (0);
172		len += ((int)p[len + 1] << 8) | (int)p[len];
173		len += 2;
174	}
175
176	/* Null-terminated optional filename. */
177	if (header_flags & 8) {
178#ifdef HAVE_ZLIB_H
179		ssize_t file_start = len;
180#endif
181		do {
182			++len;
183			if (avail < len)
184				p = __archive_read_filter_ahead(filter,
185				    len, &avail);
186			if (p == NULL)
187				return (0);
188		} while (p[len - 1] != 0);
189
190#ifdef HAVE_ZLIB_H
191		if (state) {
192			/* Reset the name in case of repeat header reads. */
193			free(state->name);
194			state->name = strdup((const char *)&p[file_start]);
195		}
196#endif
197	}
198
199	/* Null-terminated optional comment. */
200	if (header_flags & 16) {
201		do {
202			++len;
203			if (avail < len)
204				p = __archive_read_filter_ahead(filter,
205				    len, &avail);
206			if (p == NULL)
207				return (0);
208		} while (p[len - 1] != 0);
209	}
210
211	/* Optional header CRC */
212	if ((header_flags & 2)) {
213		p = __archive_read_filter_ahead(filter, len + 2, &avail);
214		if (p == NULL)
215			return (0);
216#if 0
217	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
218	int crc = /* XXX TODO: Compute header CRC. */;
219	if (crc != hcrc)
220		return (0);
221	bits += 16;
222#endif
223		len += 2;
224	}
225
226	if (pbits != NULL)
227		*pbits = bits;
228	return (len);
229}
230
231/*
232 * Bidder just verifies the header and returns the number of verified bits.
233 */
234static int
235gzip_bidder_bid(struct archive_read_filter_bidder *self,
236    struct archive_read_filter *filter)
237{
238	int bits_checked;
239
240	(void)self; /* UNUSED */
241
242	if (peek_at_header(filter, &bits_checked, NULL))
243		return (bits_checked);
244	return (0);
245}
246
247#ifndef HAVE_ZLIB_H
248
249/*
250 * If we don't have the library on this system, we can't do the
251 * decompression directly.  We can, however, try to run "gzip -d"
252 * in case that's available.
253 */
254static int
255gzip_bidder_init(struct archive_read_filter *self)
256{
257	int r;
258
259	r = __archive_read_program(self, "gzip -d");
260	/* Note: We set the format here even if __archive_read_program()
261	 * above fails.  We do, after all, know what the format is
262	 * even if we weren't able to read it. */
263	self->code = ARCHIVE_FILTER_GZIP;
264	self->name = "gzip";
265	return (r);
266}
267
268#else
269
270static int
271gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
272{
273	struct private_data *state;
274
275	state = (struct private_data *)self->data;
276
277	/* A mtime of 0 is considered invalid/missing. */
278	if (state->mtime != 0)
279		archive_entry_set_mtime(entry, state->mtime, 0);
280
281	/* If the name is available, extract it. */
282	if (state->name)
283		archive_entry_set_pathname(entry, state->name);
284
285	return (ARCHIVE_OK);
286}
287
288static const struct archive_read_filter_vtable
289gzip_reader_vtable = {
290	.read = gzip_filter_read,
291	.close = gzip_filter_close,
292#ifdef HAVE_ZLIB_H
293	.read_header = gzip_read_header,
294#endif
295};
296
297/*
298 * Initialize the filter object.
299 */
300static int
301gzip_bidder_init(struct archive_read_filter *self)
302{
303	struct private_data *state;
304	static const size_t out_block_size = 64 * 1024;
305	void *out_block;
306
307	self->code = ARCHIVE_FILTER_GZIP;
308	self->name = "gzip";
309
310	state = (struct private_data *)calloc(1, sizeof(*state));
311	out_block = (unsigned char *)malloc(out_block_size);
312	if (state == NULL || out_block == NULL) {
313		free(out_block);
314		free(state);
315		archive_set_error(&self->archive->archive, ENOMEM,
316		    "Can't allocate data for gzip decompression");
317		return (ARCHIVE_FATAL);
318	}
319
320	self->data = state;
321	state->out_block_size = out_block_size;
322	state->out_block = out_block;
323	self->vtable = &gzip_reader_vtable;
324
325	state->in_stream = 0; /* We're not actually within a stream yet. */
326
327	return (ARCHIVE_OK);
328}
329
330static int
331consume_header(struct archive_read_filter *self)
332{
333	struct private_data *state;
334	ssize_t avail;
335	size_t len;
336	int ret;
337
338	state = (struct private_data *)self->data;
339
340	/* If this is a real header, consume it. */
341	len = peek_at_header(self->upstream, NULL, state);
342	if (len == 0)
343		return (ARCHIVE_EOF);
344	__archive_read_filter_consume(self->upstream, len);
345
346	/* Initialize CRC accumulator. */
347	state->crc = crc32(0L, NULL, 0);
348
349	/* Initialize compression library. */
350	state->stream.next_in = (unsigned char *)(uintptr_t)
351	    __archive_read_filter_ahead(self->upstream, 1, &avail);
352	state->stream.avail_in = (uInt)avail;
353	ret = inflateInit2(&(state->stream),
354	    -15 /* Don't check for zlib header */);
355
356	/* Decipher the error code. */
357	switch (ret) {
358	case Z_OK:
359		state->in_stream = 1;
360		return (ARCHIVE_OK);
361	case Z_STREAM_ERROR:
362		archive_set_error(&self->archive->archive,
363		    ARCHIVE_ERRNO_MISC,
364		    "Internal error initializing compression library: "
365		    "invalid setup parameter");
366		break;
367	case Z_MEM_ERROR:
368		archive_set_error(&self->archive->archive, ENOMEM,
369		    "Internal error initializing compression library: "
370		    "out of memory");
371		break;
372	case Z_VERSION_ERROR:
373		archive_set_error(&self->archive->archive,
374		    ARCHIVE_ERRNO_MISC,
375		    "Internal error initializing compression library: "
376		    "invalid library version");
377		break;
378	default:
379		archive_set_error(&self->archive->archive,
380		    ARCHIVE_ERRNO_MISC,
381		    "Internal error initializing compression library: "
382		    " Zlib error %d", ret);
383		break;
384	}
385	return (ARCHIVE_FATAL);
386}
387
388static int
389consume_trailer(struct archive_read_filter *self)
390{
391	struct private_data *state;
392	const unsigned char *p;
393	ssize_t avail;
394
395	state = (struct private_data *)self->data;
396
397	state->in_stream = 0;
398	switch (inflateEnd(&(state->stream))) {
399	case Z_OK:
400		break;
401	default:
402		archive_set_error(&self->archive->archive,
403		    ARCHIVE_ERRNO_MISC,
404		    "Failed to clean up gzip decompressor");
405		return (ARCHIVE_FATAL);
406	}
407
408	/* GZip trailer is a fixed 8 byte structure. */
409	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
410	if (p == NULL || avail == 0)
411		return (ARCHIVE_FATAL);
412
413	/* XXX TODO: Verify the length and CRC. */
414
415	/* We've verified the trailer, so consume it now. */
416	__archive_read_filter_consume(self->upstream, 8);
417
418	return (ARCHIVE_OK);
419}
420
421static ssize_t
422gzip_filter_read(struct archive_read_filter *self, const void **p)
423{
424	struct private_data *state;
425	size_t decompressed;
426	ssize_t avail_in, max_in;
427	int ret;
428
429	state = (struct private_data *)self->data;
430
431	/* Empty our output buffer. */
432	state->stream.next_out = state->out_block;
433	state->stream.avail_out = (uInt)state->out_block_size;
434
435	/* Try to fill the output buffer. */
436	while (state->stream.avail_out > 0 && !state->eof) {
437		/* If we're not in a stream, read a header
438		 * and initialize the decompression library. */
439		if (!state->in_stream) {
440			ret = consume_header(self);
441			if (ret == ARCHIVE_EOF) {
442				state->eof = 1;
443				break;
444			}
445			if (ret < ARCHIVE_OK)
446				return (ret);
447		}
448
449		/* Peek at the next available data. */
450		/* ZLib treats stream.next_in as const but doesn't declare
451		 * it so, hence this ugly cast. */
452		state->stream.next_in = (unsigned char *)(uintptr_t)
453		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
454		if (state->stream.next_in == NULL) {
455			archive_set_error(&self->archive->archive,
456			    ARCHIVE_ERRNO_MISC,
457			    "truncated gzip input");
458			return (ARCHIVE_FATAL);
459		}
460		if (UINT_MAX >= SSIZE_MAX)
461			max_in = SSIZE_MAX;
462		else
463			max_in = UINT_MAX;
464		if (avail_in > max_in)
465			avail_in = max_in;
466		state->stream.avail_in = (uInt)avail_in;
467
468		/* Decompress and consume some of that data. */
469		ret = inflate(&(state->stream), 0);
470		switch (ret) {
471		case Z_OK: /* Decompressor made some progress. */
472			__archive_read_filter_consume(self->upstream,
473			    avail_in - state->stream.avail_in);
474			break;
475		case Z_STREAM_END: /* Found end of stream. */
476			__archive_read_filter_consume(self->upstream,
477			    avail_in - state->stream.avail_in);
478			/* Consume the stream trailer; release the
479			 * decompression library. */
480			ret = consume_trailer(self);
481			if (ret < ARCHIVE_OK)
482				return (ret);
483			break;
484		default:
485			/* Return an error. */
486			archive_set_error(&self->archive->archive,
487			    ARCHIVE_ERRNO_MISC,
488			    "gzip decompression failed");
489			return (ARCHIVE_FATAL);
490		}
491	}
492
493	/* We've read as much as we can. */
494	decompressed = state->stream.next_out - state->out_block;
495	state->total_out += decompressed;
496	if (decompressed == 0)
497		*p = NULL;
498	else
499		*p = state->out_block;
500	return (decompressed);
501}
502
503/*
504 * Clean up the decompressor.
505 */
506static int
507gzip_filter_close(struct archive_read_filter *self)
508{
509	struct private_data *state;
510	int ret;
511
512	state = (struct private_data *)self->data;
513	ret = ARCHIVE_OK;
514
515	if (state->in_stream) {
516		switch (inflateEnd(&(state->stream))) {
517		case Z_OK:
518			break;
519		default:
520			archive_set_error(&(self->archive->archive),
521			    ARCHIVE_ERRNO_MISC,
522			    "Failed to clean up gzip compressor");
523			ret = ARCHIVE_FATAL;
524		}
525	}
526
527	free(state->name);
528	free(state->out_block);
529	free(state);
530	return (ret);
531}
532
533#endif /* HAVE_ZLIB_H */
534