1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm/* 27228753Smm * This code borrows heavily from "compress" source code, which is 28228753Smm * protected by the following copyright. (Clause 3 dropped by request 29228753Smm * of the Regents.) 30228753Smm */ 31228753Smm 32228753Smm/*- 33228753Smm * Copyright (c) 1985, 1986, 1992, 1993 34228753Smm * The Regents of the University of California. All rights reserved. 35228753Smm * 36228753Smm * This code is derived from software contributed to Berkeley by 37228753Smm * Diomidis Spinellis and James A. Woods, derived from original 38228753Smm * work by Spencer Thomas and Joseph Orost. 39228753Smm * 40228753Smm * Redistribution and use in source and binary forms, with or without 41228753Smm * modification, are permitted provided that the following conditions 42228753Smm * are met: 43228753Smm * 1. Redistributions of source code must retain the above copyright 44228753Smm * notice, this list of conditions and the following disclaimer. 45228753Smm * 2. Redistributions in binary form must reproduce the above copyright 46228753Smm * notice, this list of conditions and the following disclaimer in the 47228753Smm * documentation and/or other materials provided with the distribution. 48228753Smm * 4. Neither the name of the University nor the names of its contributors 49228753Smm * may be used to endorse or promote products derived from this software 50228753Smm * without specific prior written permission. 51228753Smm * 52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55228753Smm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62228753Smm * SUCH DAMAGE. 63228753Smm */ 64228753Smm 65228753Smm 66228753Smm#include "archive_platform.h" 67231200Smm__FBSDID("$FreeBSD$"); 68228753Smm 69228753Smm#ifdef HAVE_ERRNO_H 70228753Smm#include <errno.h> 71228753Smm#endif 72228753Smm#ifdef HAVE_STDLIB_H 73228753Smm#include <stdlib.h> 74228753Smm#endif 75228753Smm#ifdef HAVE_STRING_H 76228753Smm#include <string.h> 77228753Smm#endif 78228753Smm#ifdef HAVE_UNISTD_H 79228753Smm#include <unistd.h> 80228753Smm#endif 81228753Smm 82228753Smm#include "archive.h" 83228753Smm#include "archive_private.h" 84228753Smm#include "archive_read_private.h" 85228753Smm 86228753Smm/* 87228753Smm * Because LZW decompression is pretty simple, I've just implemented 88228753Smm * the whole decompressor here (cribbing from "compress" source code, 89228753Smm * of course), rather than relying on an external library. I have 90228753Smm * made an effort to clarify and simplify the algorithm, so the 91228753Smm * names and structure here don't exactly match those used by compress. 92228753Smm */ 93228753Smm 94228753Smmstruct private_data { 95228753Smm /* Input variables. */ 96228753Smm const unsigned char *next_in; 97228753Smm size_t avail_in; 98231200Smm size_t consume_unnotified; 99228753Smm int bit_buffer; 100228753Smm int bits_avail; 101228753Smm size_t bytes_in_section; 102228753Smm 103228753Smm /* Output variables. */ 104228753Smm size_t out_block_size; 105228753Smm void *out_block; 106228753Smm 107228753Smm /* Decompression status variables. */ 108228753Smm int use_reset_code; 109228753Smm int end_of_stream; /* EOF status. */ 110228753Smm int maxcode; /* Largest code. */ 111228753Smm int maxcode_bits; /* Length of largest code. */ 112228753Smm int section_end_code; /* When to increase bits. */ 113228753Smm int bits; /* Current code length. */ 114228753Smm int oldcode; /* Previous code. */ 115228753Smm int finbyte; /* Last byte of prev code. */ 116228753Smm 117228753Smm /* Dictionary. */ 118228753Smm int free_ent; /* Next dictionary entry. */ 119228753Smm unsigned char suffix[65536]; 120228753Smm uint16_t prefix[65536]; 121228753Smm 122228753Smm /* 123228753Smm * Scratch area for expanding dictionary entries. Note: 124228753Smm * "worst" case here comes from compressing /dev/zero: the 125228753Smm * last code in the dictionary will code a sequence of 126228753Smm * 65536-256 zero bytes. Thus, we need stack space to expand 127228753Smm * a 65280-byte dictionary entry. (Of course, 32640:1 128228753Smm * compression could also be considered the "best" case. ;-) 129228753Smm */ 130228753Smm unsigned char *stackp; 131228753Smm unsigned char stack[65300]; 132228753Smm}; 133228753Smm 134228753Smmstatic int compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 135228753Smmstatic int compress_bidder_init(struct archive_read_filter *); 136228753Smmstatic int compress_bidder_free(struct archive_read_filter_bidder *); 137228753Smm 138228753Smmstatic ssize_t compress_filter_read(struct archive_read_filter *, const void **); 139228753Smmstatic int compress_filter_close(struct archive_read_filter *); 140228753Smm 141228753Smmstatic int getbits(struct archive_read_filter *, int n); 142228753Smmstatic int next_code(struct archive_read_filter *); 143228753Smm 144231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000 145231200Smm/* Deprecated; remove in libarchive 4.0 */ 146228753Smmint 147231200Smmarchive_read_support_compression_compress(struct archive *a) 148228753Smm{ 149231200Smm return archive_read_support_filter_compress(a); 150231200Smm} 151231200Smm#endif 152231200Smm 153231200Smmint 154231200Smmarchive_read_support_filter_compress(struct archive *_a) 155231200Smm{ 156228753Smm struct archive_read *a = (struct archive_read *)_a; 157231200Smm struct archive_read_filter_bidder *bidder; 158228753Smm 159231200Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 160231200Smm ARCHIVE_STATE_NEW, "archive_read_support_filter_compress"); 161231200Smm 162231200Smm if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) 163228753Smm return (ARCHIVE_FATAL); 164228753Smm 165228753Smm bidder->data = NULL; 166248616Smm bidder->name = "compress (.Z)"; 167228753Smm bidder->bid = compress_bidder_bid; 168228753Smm bidder->init = compress_bidder_init; 169228753Smm bidder->options = NULL; 170228753Smm bidder->free = compress_bidder_free; 171228753Smm return (ARCHIVE_OK); 172228753Smm} 173228753Smm 174228753Smm/* 175228753Smm * Test whether we can handle this data. 176231200Smm * This logic returns zero if any part of the signature fails. 177228753Smm */ 178228753Smmstatic int 179228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self, 180228753Smm struct archive_read_filter *filter) 181228753Smm{ 182228753Smm const unsigned char *buffer; 183228753Smm ssize_t avail; 184228753Smm int bits_checked; 185228753Smm 186228753Smm (void)self; /* UNUSED */ 187228753Smm 188228753Smm buffer = __archive_read_filter_ahead(filter, 2, &avail); 189228753Smm 190228753Smm if (buffer == NULL) 191228753Smm return (0); 192228753Smm 193228753Smm bits_checked = 0; 194231200Smm if (buffer[0] != 0x1F || buffer[1] != 0x9D) 195228753Smm return (0); 196231200Smm bits_checked += 16; 197228753Smm 198228753Smm /* 199228753Smm * TODO: Verify more. 200228753Smm */ 201228753Smm 202228753Smm return (bits_checked); 203228753Smm} 204228753Smm 205228753Smm/* 206228753Smm * Setup the callbacks. 207228753Smm */ 208228753Smmstatic int 209228753Smmcompress_bidder_init(struct archive_read_filter *self) 210228753Smm{ 211228753Smm struct private_data *state; 212228753Smm static const size_t out_block_size = 64 * 1024; 213228753Smm void *out_block; 214228753Smm int code; 215228753Smm 216248616Smm self->code = ARCHIVE_FILTER_COMPRESS; 217228753Smm self->name = "compress (.Z)"; 218228753Smm 219228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 220228753Smm out_block = malloc(out_block_size); 221228753Smm if (state == NULL || out_block == NULL) { 222228753Smm free(out_block); 223228753Smm free(state); 224228753Smm archive_set_error(&self->archive->archive, ENOMEM, 225228753Smm "Can't allocate data for %s decompression", 226228753Smm self->name); 227228753Smm return (ARCHIVE_FATAL); 228228753Smm } 229228753Smm 230228753Smm self->data = state; 231228753Smm state->out_block_size = out_block_size; 232228753Smm state->out_block = out_block; 233228753Smm self->read = compress_filter_read; 234228753Smm self->skip = NULL; /* not supported */ 235228753Smm self->close = compress_filter_close; 236228753Smm 237228753Smm /* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */ 238228753Smm 239228753Smm (void)getbits(self, 8); /* Skip first signature byte. */ 240228753Smm (void)getbits(self, 8); /* Skip second signature byte. */ 241228753Smm 242228753Smm code = getbits(self, 8); 243228753Smm state->maxcode_bits = code & 0x1f; 244228753Smm state->maxcode = (1 << state->maxcode_bits); 245228753Smm state->use_reset_code = code & 0x80; 246228753Smm 247228753Smm /* Initialize decompressor. */ 248228753Smm state->free_ent = 256; 249228753Smm state->stackp = state->stack; 250228753Smm if (state->use_reset_code) 251228753Smm state->free_ent++; 252228753Smm state->bits = 9; 253228753Smm state->section_end_code = (1<<state->bits) - 1; 254228753Smm state->oldcode = -1; 255228753Smm for (code = 255; code >= 0; code--) { 256228753Smm state->prefix[code] = 0; 257228753Smm state->suffix[code] = code; 258228753Smm } 259228753Smm next_code(self); 260228753Smm 261228753Smm return (ARCHIVE_OK); 262228753Smm} 263228753Smm 264228753Smm/* 265228753Smm * Return a block of data from the decompression buffer. Decompress more 266228753Smm * as necessary. 267228753Smm */ 268228753Smmstatic ssize_t 269228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock) 270228753Smm{ 271228753Smm struct private_data *state; 272228753Smm unsigned char *p, *start, *end; 273228753Smm int ret; 274228753Smm 275228753Smm state = (struct private_data *)self->data; 276228753Smm if (state->end_of_stream) { 277228753Smm *pblock = NULL; 278228753Smm return (0); 279228753Smm } 280228753Smm p = start = (unsigned char *)state->out_block; 281228753Smm end = start + state->out_block_size; 282228753Smm 283228753Smm while (p < end && !state->end_of_stream) { 284228753Smm if (state->stackp > state->stack) { 285228753Smm *p++ = *--state->stackp; 286228753Smm } else { 287228753Smm ret = next_code(self); 288228753Smm if (ret == -1) 289228753Smm state->end_of_stream = ret; 290228753Smm else if (ret != ARCHIVE_OK) 291228753Smm return (ret); 292228753Smm } 293228753Smm } 294228753Smm 295228753Smm *pblock = start; 296228753Smm return (p - start); 297228753Smm} 298228753Smm 299228753Smm/* 300228753Smm * Clean up the reader. 301228753Smm */ 302228753Smmstatic int 303228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self) 304228753Smm{ 305228753Smm self->data = NULL; 306228753Smm return (ARCHIVE_OK); 307228753Smm} 308228753Smm 309228753Smm/* 310228753Smm * Close and release the filter. 311228753Smm */ 312228753Smmstatic int 313228753Smmcompress_filter_close(struct archive_read_filter *self) 314228753Smm{ 315228753Smm struct private_data *state = (struct private_data *)self->data; 316228753Smm 317228753Smm free(state->out_block); 318228753Smm free(state); 319228753Smm return (ARCHIVE_OK); 320228753Smm} 321228753Smm 322228753Smm/* 323228753Smm * Process the next code and fill the stack with the expansion 324228753Smm * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or 325228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. 326228753Smm */ 327228753Smmstatic int 328228753Smmnext_code(struct archive_read_filter *self) 329228753Smm{ 330228753Smm struct private_data *state = (struct private_data *)self->data; 331228753Smm int code, newcode; 332228753Smm 333228753Smm static int debug_buff[1024]; 334228753Smm static unsigned debug_index; 335228753Smm 336228753Smm code = newcode = getbits(self, state->bits); 337228753Smm if (code < 0) 338228753Smm return (code); 339228753Smm 340228753Smm debug_buff[debug_index++] = code; 341228753Smm if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0])) 342228753Smm debug_index = 0; 343228753Smm 344228753Smm /* If it's a reset code, reset the dictionary. */ 345228753Smm if ((code == 256) && state->use_reset_code) { 346228753Smm /* 347228753Smm * The original 'compress' implementation blocked its 348228753Smm * I/O in a manner that resulted in junk bytes being 349228753Smm * inserted after every reset. The next section skips 350228753Smm * this junk. (Yes, the number of *bytes* to skip is 351228753Smm * a function of the current *bit* length.) 352228753Smm */ 353228753Smm int skip_bytes = state->bits - 354228753Smm (state->bytes_in_section % state->bits); 355228753Smm skip_bytes %= state->bits; 356228753Smm state->bits_avail = 0; /* Discard rest of this byte. */ 357228753Smm while (skip_bytes-- > 0) { 358228753Smm code = getbits(self, 8); 359228753Smm if (code < 0) 360228753Smm return (code); 361228753Smm } 362228753Smm /* Now, actually do the reset. */ 363228753Smm state->bytes_in_section = 0; 364228753Smm state->bits = 9; 365228753Smm state->section_end_code = (1 << state->bits) - 1; 366228753Smm state->free_ent = 257; 367228753Smm state->oldcode = -1; 368228753Smm return (next_code(self)); 369228753Smm } 370228753Smm 371228753Smm if (code > state->free_ent) { 372228753Smm /* An invalid code is a fatal error. */ 373228753Smm archive_set_error(&(self->archive->archive), -1, 374228753Smm "Invalid compressed data"); 375228753Smm return (ARCHIVE_FATAL); 376228753Smm } 377228753Smm 378228753Smm /* Special case for KwKwK string. */ 379228753Smm if (code >= state->free_ent) { 380228753Smm *state->stackp++ = state->finbyte; 381228753Smm code = state->oldcode; 382228753Smm } 383228753Smm 384228753Smm /* Generate output characters in reverse order. */ 385228753Smm while (code >= 256) { 386228753Smm *state->stackp++ = state->suffix[code]; 387228753Smm code = state->prefix[code]; 388228753Smm } 389228753Smm *state->stackp++ = state->finbyte = code; 390228753Smm 391228753Smm /* Generate the new entry. */ 392228753Smm code = state->free_ent; 393228753Smm if (code < state->maxcode && state->oldcode >= 0) { 394228753Smm state->prefix[code] = state->oldcode; 395228753Smm state->suffix[code] = state->finbyte; 396228753Smm ++state->free_ent; 397228753Smm } 398228753Smm if (state->free_ent > state->section_end_code) { 399228753Smm state->bits++; 400228753Smm state->bytes_in_section = 0; 401228753Smm if (state->bits == state->maxcode_bits) 402228753Smm state->section_end_code = state->maxcode; 403228753Smm else 404228753Smm state->section_end_code = (1 << state->bits) - 1; 405228753Smm } 406228753Smm 407228753Smm /* Remember previous code. */ 408228753Smm state->oldcode = newcode; 409228753Smm return (ARCHIVE_OK); 410228753Smm} 411228753Smm 412228753Smm/* 413228753Smm * Return next 'n' bits from stream. 414228753Smm * 415228753Smm * -1 indicates end of available data. 416228753Smm */ 417228753Smmstatic int 418228753Smmgetbits(struct archive_read_filter *self, int n) 419228753Smm{ 420228753Smm struct private_data *state = (struct private_data *)self->data; 421228753Smm int code; 422228753Smm ssize_t ret; 423228753Smm static const int mask[] = { 424228753Smm 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 425228753Smm 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 426228753Smm }; 427228753Smm 428228753Smm while (state->bits_avail < n) { 429228753Smm if (state->avail_in <= 0) { 430231200Smm if (state->consume_unnotified) { 431231200Smm __archive_read_filter_consume(self->upstream, 432231200Smm state->consume_unnotified); 433231200Smm state->consume_unnotified = 0; 434231200Smm } 435228753Smm state->next_in 436228753Smm = __archive_read_filter_ahead(self->upstream, 437228753Smm 1, &ret); 438228753Smm if (ret == 0) 439228753Smm return (-1); 440228753Smm if (ret < 0 || state->next_in == NULL) 441228753Smm return (ARCHIVE_FATAL); 442231200Smm state->consume_unnotified = state->avail_in = ret; 443228753Smm } 444228753Smm state->bit_buffer |= *state->next_in++ << state->bits_avail; 445228753Smm state->avail_in--; 446228753Smm state->bits_avail += 8; 447228753Smm state->bytes_in_section++; 448228753Smm } 449228753Smm 450228753Smm code = state->bit_buffer; 451228753Smm state->bit_buffer >>= n; 452228753Smm state->bits_avail -= n; 453228753Smm 454228753Smm return (code & mask[n]); 455228753Smm} 456