archive_read_support_filter_compress.c revision 302001
1296341Sdelphij/*- 296593Smarkm * Copyright (c) 2003-2007 Tim Kientzle 396593Smarkm * All rights reserved. 4142429Snectar * 596593Smarkm * Redistribution and use in source and binary forms, with or without 696593Smarkm * modification, are permitted provided that the following conditions 796593Smarkm * are met: 896593Smarkm * 1. Redistributions of source code must retain the above copyright 996593Smarkm * notice, this list of conditions and the following disclaimer. 1096593Smarkm * 2. Redistributions in binary form must reproduce the above copyright 1196593Smarkm * notice, this list of conditions and the following disclaimer in the 1296593Smarkm * documentation and/or other materials provided with the distribution. 1396593Smarkm * 1496593Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 1596593Smarkm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1696593Smarkm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1796593Smarkm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 1896593Smarkm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 1996593Smarkm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20215698Ssimon * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21215698Ssimon * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22215698Ssimon * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23215698Ssimon * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24215698Ssimon */ 2596593Smarkm 2696593Smarkm/* 2796593Smarkm * This code borrows heavily from "compress" source code, which is 2896593Smarkm * protected by the following copyright. (Clause 3 dropped by request 2996593Smarkm * of the Regents.) 3096593Smarkm */ 3196593Smarkm 3296593Smarkm/*- 3396593Smarkm * Copyright (c) 1985, 1986, 1992, 1993 3496593Smarkm * The Regents of the University of California. All rights reserved. 3596593Smarkm * 3696593Smarkm * This code is derived from software contributed to Berkeley by 3796593Smarkm * Diomidis Spinellis and James A. Woods, derived from original 3896593Smarkm * work by Spencer Thomas and Joseph Orost. 3996593Smarkm * 4096593Smarkm * Redistribution and use in source and binary forms, with or without 41279264Sdelphij * modification, are permitted provided that the following conditions 42279264Sdelphij * are met: 4396593Smarkm * 1. Redistributions of source code must retain the above copyright 4496593Smarkm * notice, this list of conditions and the following disclaimer. 45215698Ssimon * 2. Redistributions in binary form must reproduce the above copyright 46215698Ssimon * notice, this list of conditions and the following disclaimer in the 47215698Ssimon * documentation and/or other materials provided with the distribution. 48215698Ssimon * 4. Neither the name of the University nor the names of its contributors 49142429Snectar * may be used to endorse or promote products derived from this software 50215698Ssimon * without specific prior written permission. 51142429Snectar * 52142429Snectar * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53279264Sdelphij * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54279264Sdelphij * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55279264Sdelphij * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 5696593Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57279264Sdelphij * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58279264Sdelphij * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59279264Sdelphij * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60279264Sdelphij * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61279264Sdelphij * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62279264Sdelphij * SUCH DAMAGE. 63215698Ssimon */ 64279264Sdelphij 65279264Sdelphij 66279264Sdelphij#include "archive_platform.h" 67279264Sdelphij__FBSDID("$FreeBSD$"); 68279264Sdelphij 69215698Ssimon#ifdef HAVE_ERRNO_H 70279264Sdelphij#include <errno.h> 7196593Smarkm#endif 7296593Smarkm#ifdef HAVE_STDLIB_H 7396593Smarkm#include <stdlib.h> 7496593Smarkm#endif 7596593Smarkm#ifdef HAVE_STRING_H 7696593Smarkm#include <string.h> 7796593Smarkm#endif 7896593Smarkm#ifdef HAVE_UNISTD_H 7996593Smarkm#include <unistd.h> 8096593Smarkm#endif 8196593Smarkm 8296593Smarkm#include "archive.h" 8396593Smarkm#include "archive_private.h" 8496593Smarkm#include "archive_read_private.h" 8596593Smarkm 8696593Smarkm/* 8796593Smarkm * Because LZW decompression is pretty simple, I've just implemented 8896593Smarkm * the whole decompressor here (cribbing from "compress" source code, 8996593Smarkm * of course), rather than relying on an external library. I have 9096593Smarkm * made an effort to clarify and simplify the algorithm, so the 9196593Smarkm * names and structure here don't exactly match those used by compress. 9296593Smarkm */ 9396593Smarkm 9496593Smarkmstruct private_data { 9596593Smarkm /* Input variables. */ 9696593Smarkm const unsigned char *next_in; 9796593Smarkm size_t avail_in; 9896593Smarkm size_t consume_unnotified; 9996593Smarkm int bit_buffer; 10096593Smarkm int bits_avail; 10196593Smarkm size_t bytes_in_section; 10296593Smarkm 10396593Smarkm /* Output variables. */ 10496593Smarkm size_t out_block_size; 10596593Smarkm void *out_block; 10696593Smarkm 10796593Smarkm /* Decompression status variables. */ 10896593Smarkm int use_reset_code; 10996593Smarkm int end_of_stream; /* EOF status. */ 11096593Smarkm int maxcode; /* Largest code. */ 11196593Smarkm int maxcode_bits; /* Length of largest code. */ 11296593Smarkm int section_end_code; /* When to increase bits. */ 11396593Smarkm int bits; /* Current code length. */ 11496593Smarkm int oldcode; /* Previous code. */ 11596593Smarkm int finbyte; /* Last byte of prev code. */ 11696593Smarkm 11796593Smarkm /* Dictionary. */ 11896593Smarkm int free_ent; /* Next dictionary entry. */ 11996593Smarkm unsigned char suffix[65536]; 12096593Smarkm uint16_t prefix[65536]; 12196593Smarkm 12296593Smarkm /* 12396593Smarkm * Scratch area for expanding dictionary entries. Note: 12496593Smarkm * "worst" case here comes from compressing /dev/zero: the 12596593Smarkm * last code in the dictionary will code a sequence of 12696593Smarkm * 65536-256 zero bytes. Thus, we need stack space to expand 12796593Smarkm * a 65280-byte dictionary entry. (Of course, 32640:1 12896593Smarkm * compression could also be considered the "best" case. ;-) 12996593Smarkm */ 13096593Smarkm unsigned char *stackp; 13196593Smarkm unsigned char stack[65300]; 13296593Smarkm}; 133142429Snectar 13496593Smarkmstatic int compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 135100946Snectarstatic int compress_bidder_init(struct archive_read_filter *); 136296341Sdelphijstatic int compress_bidder_free(struct archive_read_filter_bidder *); 137215698Ssimon 138215698Ssimonstatic ssize_t compress_filter_read(struct archive_read_filter *, const void **); 139215698Ssimonstatic int compress_filter_close(struct archive_read_filter *); 140215698Ssimon 14196593Smarkmstatic int getbits(struct archive_read_filter *, int n); 14296593Smarkmstatic int next_code(struct archive_read_filter *); 14396593Smarkm 14496593Smarkm#if ARCHIVE_VERSION_NUMBER < 4000000 145142429Snectar/* Deprecated; remove in libarchive 4.0 */ 14696593Smarkmint 14796593Smarkmarchive_read_support_compression_compress(struct archive *a) 14896593Smarkm{ 14996593Smarkm return archive_read_support_filter_compress(a); 150215698Ssimon} 15196593Smarkm#endif 15296593Smarkm 15396593Smarkmint 15496593Smarkmarchive_read_support_filter_compress(struct archive *_a) 155215698Ssimon{ 15696593Smarkm struct archive_read *a = (struct archive_read *)_a; 15796593Smarkm struct archive_read_filter_bidder *bidder; 15896593Smarkm 15996593Smarkm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 16096593Smarkm ARCHIVE_STATE_NEW, "archive_read_support_filter_compress"); 16196593Smarkm 16296593Smarkm if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) 16396593Smarkm return (ARCHIVE_FATAL); 16496593Smarkm 16596593Smarkm bidder->data = NULL; 16696593Smarkm bidder->name = "compress (.Z)"; 167215698Ssimon bidder->bid = compress_bidder_bid; 16896593Smarkm bidder->init = compress_bidder_init; 16996593Smarkm bidder->options = NULL; 170215698Ssimon bidder->free = compress_bidder_free; 17196593Smarkm return (ARCHIVE_OK); 17296593Smarkm} 17396593Smarkm 17496593Smarkm/* 17596593Smarkm * Test whether we can handle this data. 176279264Sdelphij * This logic returns zero if any part of the signature fails. 17796593Smarkm */ 17896593Smarkmstatic int 17996593Smarkmcompress_bidder_bid(struct archive_read_filter_bidder *self, 18096593Smarkm struct archive_read_filter *filter) 18196593Smarkm{ 18296593Smarkm const unsigned char *buffer; 18396593Smarkm ssize_t avail; 18496593Smarkm int bits_checked; 18596593Smarkm 18696593Smarkm (void)self; /* UNUSED */ 18796593Smarkm 18896593Smarkm /* Shortest valid compress file is 3 bytes. */ 18996593Smarkm buffer = __archive_read_filter_ahead(filter, 3, &avail); 190279264Sdelphij 19196593Smarkm if (buffer == NULL) 19296593Smarkm return (0); 19396593Smarkm 19496593Smarkm bits_checked = 0; 195279264Sdelphij /* First two bytes are the magic value */ 196279264Sdelphij if (buffer[0] != 0x1F || buffer[1] != 0x9D) 19796593Smarkm return (0); 198279264Sdelphij /* Third byte holds compression parameters. */ 199279264Sdelphij if (buffer[2] & 0x20) /* Reserved bit, must be zero. */ 20096593Smarkm return (0); 20196593Smarkm if (buffer[2] & 0x40) /* Reserved bit, must be zero. */ 20296593Smarkm return (0); 20396593Smarkm bits_checked += 18; 20496593Smarkm 20596593Smarkm return (bits_checked); 20696593Smarkm} 20796593Smarkm 20896593Smarkm/* 20996593Smarkm * Setup the callbacks. 21096593Smarkm */ 21196593Smarkmstatic int 21296593Smarkmcompress_bidder_init(struct archive_read_filter *self) 21396593Smarkm{ 21496593Smarkm struct private_data *state; 21596593Smarkm static const size_t out_block_size = 64 * 1024; 21696593Smarkm void *out_block; 21796593Smarkm int code; 21896593Smarkm 21996593Smarkm self->code = ARCHIVE_FILTER_COMPRESS; 22096593Smarkm self->name = "compress (.Z)"; 22196593Smarkm 22296593Smarkm state = (struct private_data *)calloc(sizeof(*state), 1); 22396593Smarkm out_block = malloc(out_block_size); 22496593Smarkm if (state == NULL || out_block == NULL) { 225279264Sdelphij free(out_block); 22696593Smarkm free(state); 22796593Smarkm archive_set_error(&self->archive->archive, ENOMEM, 22896593Smarkm "Can't allocate data for %s decompression", 22996593Smarkm self->name); 23096593Smarkm return (ARCHIVE_FATAL); 23196593Smarkm } 23296593Smarkm 23396593Smarkm self->data = state; 23496593Smarkm state->out_block_size = out_block_size; 23596593Smarkm state->out_block = out_block; 23696593Smarkm self->read = compress_filter_read; 23796593Smarkm self->skip = NULL; /* not supported */ 23896593Smarkm self->close = compress_filter_close; 23996593Smarkm 24096593Smarkm /* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */ 24196593Smarkm 24296593Smarkm (void)getbits(self, 8); /* Skip first signature byte. */ 24396593Smarkm (void)getbits(self, 8); /* Skip second signature byte. */ 24496593Smarkm 24596593Smarkm /* Get compression parameters. */ 246279264Sdelphij code = getbits(self, 8); 24796593Smarkm if ((code & 0x1f) > 16) { 24896593Smarkm archive_set_error(&self->archive->archive, -1, 249279264Sdelphij "Invalid compressed data"); 25096593Smarkm return (ARCHIVE_FATAL); 25196593Smarkm } 25296593Smarkm state->maxcode_bits = code & 0x1f; 25396593Smarkm state->maxcode = (1 << state->maxcode_bits); 25496593Smarkm state->use_reset_code = code & 0x80; 25596593Smarkm 25696593Smarkm /* Initialize decompressor. */ 25796593Smarkm state->free_ent = 256; 258215698Ssimon state->stackp = state->stack; 25996593Smarkm if (state->use_reset_code) 26096593Smarkm state->free_ent++; 26196593Smarkm state->bits = 9; 262 state->section_end_code = (1<<state->bits) - 1; 263 state->oldcode = -1; 264 for (code = 255; code >= 0; code--) { 265 state->prefix[code] = 0; 266 state->suffix[code] = code; 267 } 268 next_code(self); 269 270 return (ARCHIVE_OK); 271} 272 273/* 274 * Return a block of data from the decompression buffer. Decompress more 275 * as necessary. 276 */ 277static ssize_t 278compress_filter_read(struct archive_read_filter *self, const void **pblock) 279{ 280 struct private_data *state; 281 unsigned char *p, *start, *end; 282 int ret; 283 284 state = (struct private_data *)self->data; 285 if (state->end_of_stream) { 286 *pblock = NULL; 287 return (0); 288 } 289 p = start = (unsigned char *)state->out_block; 290 end = start + state->out_block_size; 291 292 while (p < end && !state->end_of_stream) { 293 if (state->stackp > state->stack) { 294 *p++ = *--state->stackp; 295 } else { 296 ret = next_code(self); 297 if (ret == -1) 298 state->end_of_stream = ret; 299 else if (ret != ARCHIVE_OK) 300 return (ret); 301 } 302 } 303 304 *pblock = start; 305 return (p - start); 306} 307 308/* 309 * Clean up the reader. 310 */ 311static int 312compress_bidder_free(struct archive_read_filter_bidder *self) 313{ 314 self->data = NULL; 315 return (ARCHIVE_OK); 316} 317 318/* 319 * Close and release the filter. 320 */ 321static int 322compress_filter_close(struct archive_read_filter *self) 323{ 324 struct private_data *state = (struct private_data *)self->data; 325 326 free(state->out_block); 327 free(state); 328 return (ARCHIVE_OK); 329} 330 331/* 332 * Process the next code and fill the stack with the expansion 333 * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or 334 * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. 335 */ 336static int 337next_code(struct archive_read_filter *self) 338{ 339 struct private_data *state = (struct private_data *)self->data; 340 int code, newcode; 341 342 static int debug_buff[1024]; 343 static unsigned debug_index; 344 345 code = newcode = getbits(self, state->bits); 346 if (code < 0) 347 return (code); 348 349 debug_buff[debug_index++] = code; 350 if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0])) 351 debug_index = 0; 352 353 /* If it's a reset code, reset the dictionary. */ 354 if ((code == 256) && state->use_reset_code) { 355 /* 356 * The original 'compress' implementation blocked its 357 * I/O in a manner that resulted in junk bytes being 358 * inserted after every reset. The next section skips 359 * this junk. (Yes, the number of *bytes* to skip is 360 * a function of the current *bit* length.) 361 */ 362 int skip_bytes = state->bits - 363 (state->bytes_in_section % state->bits); 364 skip_bytes %= state->bits; 365 state->bits_avail = 0; /* Discard rest of this byte. */ 366 while (skip_bytes-- > 0) { 367 code = getbits(self, 8); 368 if (code < 0) 369 return (code); 370 } 371 /* Now, actually do the reset. */ 372 state->bytes_in_section = 0; 373 state->bits = 9; 374 state->section_end_code = (1 << state->bits) - 1; 375 state->free_ent = 257; 376 state->oldcode = -1; 377 return (next_code(self)); 378 } 379 380 if (code > state->free_ent 381 || (code == state->free_ent && state->oldcode < 0)) { 382 /* An invalid code is a fatal error. */ 383 archive_set_error(&(self->archive->archive), -1, 384 "Invalid compressed data"); 385 return (ARCHIVE_FATAL); 386 } 387 388 /* Special case for KwKwK string. */ 389 if (code >= state->free_ent) { 390 *state->stackp++ = state->finbyte; 391 code = state->oldcode; 392 } 393 394 /* Generate output characters in reverse order. */ 395 while (code >= 256) { 396 *state->stackp++ = state->suffix[code]; 397 code = state->prefix[code]; 398 } 399 *state->stackp++ = state->finbyte = code; 400 401 /* Generate the new entry. */ 402 code = state->free_ent; 403 if (code < state->maxcode && state->oldcode >= 0) { 404 state->prefix[code] = state->oldcode; 405 state->suffix[code] = state->finbyte; 406 ++state->free_ent; 407 } 408 if (state->free_ent > state->section_end_code) { 409 state->bits++; 410 state->bytes_in_section = 0; 411 if (state->bits == state->maxcode_bits) 412 state->section_end_code = state->maxcode; 413 else 414 state->section_end_code = (1 << state->bits) - 1; 415 } 416 417 /* Remember previous code. */ 418 state->oldcode = newcode; 419 return (ARCHIVE_OK); 420} 421 422/* 423 * Return next 'n' bits from stream. 424 * 425 * -1 indicates end of available data. 426 */ 427static int 428getbits(struct archive_read_filter *self, int n) 429{ 430 struct private_data *state = (struct private_data *)self->data; 431 int code; 432 ssize_t ret; 433 static const int mask[] = { 434 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 435 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 436 }; 437 438 while (state->bits_avail < n) { 439 if (state->avail_in <= 0) { 440 if (state->consume_unnotified) { 441 __archive_read_filter_consume(self->upstream, 442 state->consume_unnotified); 443 state->consume_unnotified = 0; 444 } 445 state->next_in 446 = __archive_read_filter_ahead(self->upstream, 447 1, &ret); 448 if (ret == 0) 449 return (-1); 450 if (ret < 0 || state->next_in == NULL) 451 return (ARCHIVE_FATAL); 452 state->consume_unnotified = state->avail_in = ret; 453 } 454 state->bit_buffer |= *state->next_in++ << state->bits_avail; 455 state->avail_in--; 456 state->bits_avail += 8; 457 state->bytes_in_section++; 458 } 459 460 code = state->bit_buffer; 461 state->bit_buffer >>= n; 462 state->bits_avail -= n; 463 464 return (code & mask[n]); 465} 466