1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm/*
27228753Smm * This code borrows heavily from "compress" source code, which is
28228753Smm * protected by the following copyright.  (Clause 3 dropped by request
29228753Smm * of the Regents.)
30228753Smm */
31228753Smm
32228753Smm/*-
33228753Smm * Copyright (c) 1985, 1986, 1992, 1993
34228753Smm *	The Regents of the University of California.  All rights reserved.
35228753Smm *
36228753Smm * This code is derived from software contributed to Berkeley by
37228753Smm * Diomidis Spinellis and James A. Woods, derived from original
38228753Smm * work by Spencer Thomas and Joseph Orost.
39228753Smm *
40228753Smm * Redistribution and use in source and binary forms, with or without
41228753Smm * modification, are permitted provided that the following conditions
42228753Smm * are met:
43228753Smm * 1. Redistributions of source code must retain the above copyright
44228753Smm *    notice, this list of conditions and the following disclaimer.
45228753Smm * 2. Redistributions in binary form must reproduce the above copyright
46228753Smm *    notice, this list of conditions and the following disclaimer in the
47228753Smm *    documentation and/or other materials provided with the distribution.
48228753Smm * 4. Neither the name of the University nor the names of its contributors
49228753Smm *    may be used to endorse or promote products derived from this software
50228753Smm *    without specific prior written permission.
51228753Smm *
52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55228753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62228753Smm * SUCH DAMAGE.
63228753Smm */
64228753Smm
65228753Smm
66228753Smm#include "archive_platform.h"
67231200Smm__FBSDID("$FreeBSD$");
68228753Smm
69228753Smm#ifdef HAVE_ERRNO_H
70228753Smm#include <errno.h>
71228753Smm#endif
72228753Smm#ifdef HAVE_STDLIB_H
73228753Smm#include <stdlib.h>
74228753Smm#endif
75228753Smm#ifdef HAVE_STRING_H
76228753Smm#include <string.h>
77228753Smm#endif
78228753Smm#ifdef HAVE_UNISTD_H
79228753Smm#include <unistd.h>
80228753Smm#endif
81228753Smm
82228753Smm#include "archive.h"
83228753Smm#include "archive_private.h"
84228753Smm#include "archive_read_private.h"
85228753Smm
86228753Smm/*
87228753Smm * Because LZW decompression is pretty simple, I've just implemented
88228753Smm * the whole decompressor here (cribbing from "compress" source code,
89228753Smm * of course), rather than relying on an external library.  I have
90228753Smm * made an effort to clarify and simplify the algorithm, so the
91228753Smm * names and structure here don't exactly match those used by compress.
92228753Smm */
93228753Smm
94228753Smmstruct private_data {
95228753Smm	/* Input variables. */
96228753Smm	const unsigned char	*next_in;
97228753Smm	size_t			 avail_in;
98231200Smm	size_t			 consume_unnotified;
99228753Smm	int			 bit_buffer;
100228753Smm	int			 bits_avail;
101228753Smm	size_t			 bytes_in_section;
102228753Smm
103228753Smm	/* Output variables. */
104228753Smm	size_t			 out_block_size;
105228753Smm	void			*out_block;
106228753Smm
107228753Smm	/* Decompression status variables. */
108228753Smm	int			 use_reset_code;
109228753Smm	int			 end_of_stream;	/* EOF status. */
110228753Smm	int			 maxcode;	/* Largest code. */
111228753Smm	int			 maxcode_bits;	/* Length of largest code. */
112228753Smm	int			 section_end_code; /* When to increase bits. */
113228753Smm	int			 bits;		/* Current code length. */
114228753Smm	int			 oldcode;	/* Previous code. */
115228753Smm	int			 finbyte;	/* Last byte of prev code. */
116228753Smm
117228753Smm	/* Dictionary. */
118228753Smm	int			 free_ent;       /* Next dictionary entry. */
119228753Smm	unsigned char		 suffix[65536];
120228753Smm	uint16_t		 prefix[65536];
121228753Smm
122228753Smm	/*
123228753Smm	 * Scratch area for expanding dictionary entries.  Note:
124228753Smm	 * "worst" case here comes from compressing /dev/zero: the
125228753Smm	 * last code in the dictionary will code a sequence of
126228753Smm	 * 65536-256 zero bytes.  Thus, we need stack space to expand
127228753Smm	 * a 65280-byte dictionary entry.  (Of course, 32640:1
128228753Smm	 * compression could also be considered the "best" case. ;-)
129228753Smm	 */
130228753Smm	unsigned char		*stackp;
131228753Smm	unsigned char		 stack[65300];
132228753Smm};
133228753Smm
134228753Smmstatic int	compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
135228753Smmstatic int	compress_bidder_init(struct archive_read_filter *);
136228753Smmstatic int	compress_bidder_free(struct archive_read_filter_bidder *);
137228753Smm
138228753Smmstatic ssize_t	compress_filter_read(struct archive_read_filter *, const void **);
139228753Smmstatic int	compress_filter_close(struct archive_read_filter *);
140228753Smm
141228753Smmstatic int	getbits(struct archive_read_filter *, int n);
142228753Smmstatic int	next_code(struct archive_read_filter *);
143228753Smm
144231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
145231200Smm/* Deprecated; remove in libarchive 4.0 */
146228753Smmint
147231200Smmarchive_read_support_compression_compress(struct archive *a)
148228753Smm{
149231200Smm	return archive_read_support_filter_compress(a);
150231200Smm}
151231200Smm#endif
152231200Smm
153231200Smmint
154231200Smmarchive_read_support_filter_compress(struct archive *_a)
155231200Smm{
156228753Smm	struct archive_read *a = (struct archive_read *)_a;
157231200Smm	struct archive_read_filter_bidder *bidder;
158228753Smm
159231200Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
160231200Smm	    ARCHIVE_STATE_NEW, "archive_read_support_filter_compress");
161231200Smm
162231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
163228753Smm		return (ARCHIVE_FATAL);
164228753Smm
165228753Smm	bidder->data = NULL;
166248616Smm	bidder->name = "compress (.Z)";
167228753Smm	bidder->bid = compress_bidder_bid;
168228753Smm	bidder->init = compress_bidder_init;
169228753Smm	bidder->options = NULL;
170228753Smm	bidder->free = compress_bidder_free;
171228753Smm	return (ARCHIVE_OK);
172228753Smm}
173228753Smm
174228753Smm/*
175228753Smm * Test whether we can handle this data.
176231200Smm * This logic returns zero if any part of the signature fails.
177228753Smm */
178228753Smmstatic int
179228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self,
180228753Smm    struct archive_read_filter *filter)
181228753Smm{
182228753Smm	const unsigned char *buffer;
183228753Smm	ssize_t avail;
184228753Smm	int bits_checked;
185228753Smm
186228753Smm	(void)self; /* UNUSED */
187228753Smm
188228753Smm	buffer = __archive_read_filter_ahead(filter, 2, &avail);
189228753Smm
190228753Smm	if (buffer == NULL)
191228753Smm		return (0);
192228753Smm
193228753Smm	bits_checked = 0;
194231200Smm	if (buffer[0] != 0x1F || buffer[1] != 0x9D)
195228753Smm		return (0);
196231200Smm	bits_checked += 16;
197228753Smm
198228753Smm	/*
199228753Smm	 * TODO: Verify more.
200228753Smm	 */
201228753Smm
202228753Smm	return (bits_checked);
203228753Smm}
204228753Smm
205228753Smm/*
206228753Smm * Setup the callbacks.
207228753Smm */
208228753Smmstatic int
209228753Smmcompress_bidder_init(struct archive_read_filter *self)
210228753Smm{
211228753Smm	struct private_data *state;
212228753Smm	static const size_t out_block_size = 64 * 1024;
213228753Smm	void *out_block;
214228753Smm	int code;
215228753Smm
216248616Smm	self->code = ARCHIVE_FILTER_COMPRESS;
217228753Smm	self->name = "compress (.Z)";
218228753Smm
219228753Smm	state = (struct private_data *)calloc(sizeof(*state), 1);
220228753Smm	out_block = malloc(out_block_size);
221228753Smm	if (state == NULL || out_block == NULL) {
222228753Smm		free(out_block);
223228753Smm		free(state);
224228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
225228753Smm		    "Can't allocate data for %s decompression",
226228753Smm		    self->name);
227228753Smm		return (ARCHIVE_FATAL);
228228753Smm	}
229228753Smm
230228753Smm	self->data = state;
231228753Smm	state->out_block_size = out_block_size;
232228753Smm	state->out_block = out_block;
233228753Smm	self->read = compress_filter_read;
234228753Smm	self->skip = NULL; /* not supported */
235228753Smm	self->close = compress_filter_close;
236228753Smm
237228753Smm	/* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */
238228753Smm
239228753Smm	(void)getbits(self, 8); /* Skip first signature byte. */
240228753Smm	(void)getbits(self, 8); /* Skip second signature byte. */
241228753Smm
242228753Smm	code = getbits(self, 8);
243228753Smm	state->maxcode_bits = code & 0x1f;
244228753Smm	state->maxcode = (1 << state->maxcode_bits);
245228753Smm	state->use_reset_code = code & 0x80;
246228753Smm
247228753Smm	/* Initialize decompressor. */
248228753Smm	state->free_ent = 256;
249228753Smm	state->stackp = state->stack;
250228753Smm	if (state->use_reset_code)
251228753Smm		state->free_ent++;
252228753Smm	state->bits = 9;
253228753Smm	state->section_end_code = (1<<state->bits) - 1;
254228753Smm	state->oldcode = -1;
255228753Smm	for (code = 255; code >= 0; code--) {
256228753Smm		state->prefix[code] = 0;
257228753Smm		state->suffix[code] = code;
258228753Smm	}
259228753Smm	next_code(self);
260228753Smm
261228753Smm	return (ARCHIVE_OK);
262228753Smm}
263228753Smm
264228753Smm/*
265228753Smm * Return a block of data from the decompression buffer.  Decompress more
266228753Smm * as necessary.
267228753Smm */
268228753Smmstatic ssize_t
269228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock)
270228753Smm{
271228753Smm	struct private_data *state;
272228753Smm	unsigned char *p, *start, *end;
273228753Smm	int ret;
274228753Smm
275228753Smm	state = (struct private_data *)self->data;
276228753Smm	if (state->end_of_stream) {
277228753Smm		*pblock = NULL;
278228753Smm		return (0);
279228753Smm	}
280228753Smm	p = start = (unsigned char *)state->out_block;
281228753Smm	end = start + state->out_block_size;
282228753Smm
283228753Smm	while (p < end && !state->end_of_stream) {
284228753Smm		if (state->stackp > state->stack) {
285228753Smm			*p++ = *--state->stackp;
286228753Smm		} else {
287228753Smm			ret = next_code(self);
288228753Smm			if (ret == -1)
289228753Smm				state->end_of_stream = ret;
290228753Smm			else if (ret != ARCHIVE_OK)
291228753Smm				return (ret);
292228753Smm		}
293228753Smm	}
294228753Smm
295228753Smm	*pblock = start;
296228753Smm	return (p - start);
297228753Smm}
298228753Smm
299228753Smm/*
300228753Smm * Clean up the reader.
301228753Smm */
302228753Smmstatic int
303228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self)
304228753Smm{
305228753Smm	self->data = NULL;
306228753Smm	return (ARCHIVE_OK);
307228753Smm}
308228753Smm
309228753Smm/*
310228753Smm * Close and release the filter.
311228753Smm */
312228753Smmstatic int
313228753Smmcompress_filter_close(struct archive_read_filter *self)
314228753Smm{
315228753Smm	struct private_data *state = (struct private_data *)self->data;
316228753Smm
317228753Smm	free(state->out_block);
318228753Smm	free(state);
319228753Smm	return (ARCHIVE_OK);
320228753Smm}
321228753Smm
322228753Smm/*
323228753Smm * Process the next code and fill the stack with the expansion
324228753Smm * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
325228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
326228753Smm */
327228753Smmstatic int
328228753Smmnext_code(struct archive_read_filter *self)
329228753Smm{
330228753Smm	struct private_data *state = (struct private_data *)self->data;
331228753Smm	int code, newcode;
332228753Smm
333228753Smm	static int debug_buff[1024];
334228753Smm	static unsigned debug_index;
335228753Smm
336228753Smm	code = newcode = getbits(self, state->bits);
337228753Smm	if (code < 0)
338228753Smm		return (code);
339228753Smm
340228753Smm	debug_buff[debug_index++] = code;
341228753Smm	if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
342228753Smm		debug_index = 0;
343228753Smm
344228753Smm	/* If it's a reset code, reset the dictionary. */
345228753Smm	if ((code == 256) && state->use_reset_code) {
346228753Smm		/*
347228753Smm		 * The original 'compress' implementation blocked its
348228753Smm		 * I/O in a manner that resulted in junk bytes being
349228753Smm		 * inserted after every reset.  The next section skips
350228753Smm		 * this junk.  (Yes, the number of *bytes* to skip is
351228753Smm		 * a function of the current *bit* length.)
352228753Smm		 */
353228753Smm		int skip_bytes =  state->bits -
354228753Smm		    (state->bytes_in_section % state->bits);
355228753Smm		skip_bytes %= state->bits;
356228753Smm		state->bits_avail = 0; /* Discard rest of this byte. */
357228753Smm		while (skip_bytes-- > 0) {
358228753Smm			code = getbits(self, 8);
359228753Smm			if (code < 0)
360228753Smm				return (code);
361228753Smm		}
362228753Smm		/* Now, actually do the reset. */
363228753Smm		state->bytes_in_section = 0;
364228753Smm		state->bits = 9;
365228753Smm		state->section_end_code = (1 << state->bits) - 1;
366228753Smm		state->free_ent = 257;
367228753Smm		state->oldcode = -1;
368228753Smm		return (next_code(self));
369228753Smm	}
370228753Smm
371228753Smm	if (code > state->free_ent) {
372228753Smm		/* An invalid code is a fatal error. */
373228753Smm		archive_set_error(&(self->archive->archive), -1,
374228753Smm		    "Invalid compressed data");
375228753Smm		return (ARCHIVE_FATAL);
376228753Smm	}
377228753Smm
378228753Smm	/* Special case for KwKwK string. */
379228753Smm	if (code >= state->free_ent) {
380228753Smm		*state->stackp++ = state->finbyte;
381228753Smm		code = state->oldcode;
382228753Smm	}
383228753Smm
384228753Smm	/* Generate output characters in reverse order. */
385228753Smm	while (code >= 256) {
386228753Smm		*state->stackp++ = state->suffix[code];
387228753Smm		code = state->prefix[code];
388228753Smm	}
389228753Smm	*state->stackp++ = state->finbyte = code;
390228753Smm
391228753Smm	/* Generate the new entry. */
392228753Smm	code = state->free_ent;
393228753Smm	if (code < state->maxcode && state->oldcode >= 0) {
394228753Smm		state->prefix[code] = state->oldcode;
395228753Smm		state->suffix[code] = state->finbyte;
396228753Smm		++state->free_ent;
397228753Smm	}
398228753Smm	if (state->free_ent > state->section_end_code) {
399228753Smm		state->bits++;
400228753Smm		state->bytes_in_section = 0;
401228753Smm		if (state->bits == state->maxcode_bits)
402228753Smm			state->section_end_code = state->maxcode;
403228753Smm		else
404228753Smm			state->section_end_code = (1 << state->bits) - 1;
405228753Smm	}
406228753Smm
407228753Smm	/* Remember previous code. */
408228753Smm	state->oldcode = newcode;
409228753Smm	return (ARCHIVE_OK);
410228753Smm}
411228753Smm
412228753Smm/*
413228753Smm * Return next 'n' bits from stream.
414228753Smm *
415228753Smm * -1 indicates end of available data.
416228753Smm */
417228753Smmstatic int
418228753Smmgetbits(struct archive_read_filter *self, int n)
419228753Smm{
420228753Smm	struct private_data *state = (struct private_data *)self->data;
421228753Smm	int code;
422228753Smm	ssize_t ret;
423228753Smm	static const int mask[] = {
424228753Smm		0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
425228753Smm		0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
426228753Smm	};
427228753Smm
428228753Smm	while (state->bits_avail < n) {
429228753Smm		if (state->avail_in <= 0) {
430231200Smm			if (state->consume_unnotified) {
431231200Smm				__archive_read_filter_consume(self->upstream,
432231200Smm					state->consume_unnotified);
433231200Smm				state->consume_unnotified = 0;
434231200Smm			}
435228753Smm			state->next_in
436228753Smm			    = __archive_read_filter_ahead(self->upstream,
437228753Smm				1, &ret);
438228753Smm			if (ret == 0)
439228753Smm				return (-1);
440228753Smm			if (ret < 0 || state->next_in == NULL)
441228753Smm				return (ARCHIVE_FATAL);
442231200Smm			state->consume_unnotified = state->avail_in = ret;
443228753Smm		}
444228753Smm		state->bit_buffer |= *state->next_in++ << state->bits_avail;
445228753Smm		state->avail_in--;
446228753Smm		state->bits_avail += 8;
447228753Smm		state->bytes_in_section++;
448228753Smm	}
449228753Smm
450228753Smm	code = state->bit_buffer;
451228753Smm	state->bit_buffer >>= n;
452228753Smm	state->bits_avail -= n;
453228753Smm
454228753Smm	return (code & mask[n]);
455228753Smm}
456