1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27228753Smm 28231200Smm__FBSDID("$FreeBSD$"); 29228753Smm 30228753Smm#ifdef HAVE_ERRNO_H 31228753Smm#include <errno.h> 32228753Smm#endif 33228753Smm#include <stdio.h> 34228753Smm#ifdef HAVE_STDLIB_H 35228753Smm#include <stdlib.h> 36228753Smm#endif 37228753Smm#ifdef HAVE_STRING_H 38228753Smm#include <string.h> 39228753Smm#endif 40228753Smm#ifdef HAVE_UNISTD_H 41228753Smm#include <unistd.h> 42228753Smm#endif 43228753Smm#ifdef HAVE_BZLIB_H 44228753Smm#include <bzlib.h> 45228753Smm#endif 46228753Smm 47228753Smm#include "archive.h" 48228753Smm#include "archive_private.h" 49228753Smm#include "archive_read_private.h" 50228753Smm 51228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR) 52228753Smmstruct private_data { 53228753Smm bz_stream stream; 54228753Smm char *out_block; 55228753Smm size_t out_block_size; 56228753Smm char valid; /* True = decompressor is initialized */ 57228753Smm char eof; /* True = found end of compressed data. */ 58228753Smm}; 59228753Smm 60228753Smm/* Bzip2 filter */ 61228753Smmstatic ssize_t bzip2_filter_read(struct archive_read_filter *, const void **); 62228753Smmstatic int bzip2_filter_close(struct archive_read_filter *); 63228753Smm#endif 64228753Smm 65228753Smm/* 66228753Smm * Note that we can detect bzip2 archives even if we can't decompress 67228753Smm * them. (In fact, we like detecting them because we can give better 68228753Smm * error messages.) So the bid framework here gets compiled even 69228753Smm * if bzlib is unavailable. 70228753Smm */ 71228753Smmstatic int bzip2_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 72228753Smmstatic int bzip2_reader_init(struct archive_read_filter *); 73228753Smmstatic int bzip2_reader_free(struct archive_read_filter_bidder *); 74228753Smm 75231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000 76231200Smm/* Deprecated; remove in libarchive 4.0 */ 77228753Smmint 78231200Smmarchive_read_support_compression_bzip2(struct archive *a) 79228753Smm{ 80231200Smm return archive_read_support_filter_bzip2(a); 81231200Smm} 82231200Smm#endif 83231200Smm 84231200Smmint 85231200Smmarchive_read_support_filter_bzip2(struct archive *_a) 86231200Smm{ 87228753Smm struct archive_read *a = (struct archive_read *)_a; 88231200Smm struct archive_read_filter_bidder *reader; 89228753Smm 90231200Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 91231200Smm ARCHIVE_STATE_NEW, "archive_read_support_filter_bzip2"); 92231200Smm 93231200Smm if (__archive_read_get_bidder(a, &reader) != ARCHIVE_OK) 94228753Smm return (ARCHIVE_FATAL); 95228753Smm 96228753Smm reader->data = NULL; 97248616Smm reader->name = "bzip2"; 98228753Smm reader->bid = bzip2_reader_bid; 99228753Smm reader->init = bzip2_reader_init; 100228753Smm reader->options = NULL; 101228753Smm reader->free = bzip2_reader_free; 102228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR) 103228753Smm return (ARCHIVE_OK); 104228753Smm#else 105228753Smm archive_set_error(_a, ARCHIVE_ERRNO_MISC, 106248616Smm "Using external bzip2 program"); 107228753Smm return (ARCHIVE_WARN); 108228753Smm#endif 109228753Smm} 110228753Smm 111228753Smmstatic int 112228753Smmbzip2_reader_free(struct archive_read_filter_bidder *self){ 113228753Smm (void)self; /* UNUSED */ 114228753Smm return (ARCHIVE_OK); 115228753Smm} 116228753Smm 117228753Smm/* 118228753Smm * Test whether we can handle this data. 119228753Smm * 120228753Smm * This logic returns zero if any part of the signature fails. It 121228753Smm * also tries to Do The Right Thing if a very short buffer prevents us 122228753Smm * from verifying as much as we would like. 123228753Smm */ 124228753Smmstatic int 125228753Smmbzip2_reader_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter) 126228753Smm{ 127228753Smm const unsigned char *buffer; 128228753Smm ssize_t avail; 129228753Smm int bits_checked; 130228753Smm 131228753Smm (void)self; /* UNUSED */ 132228753Smm 133228753Smm /* Minimal bzip2 archive is 14 bytes. */ 134228753Smm buffer = __archive_read_filter_ahead(filter, 14, &avail); 135228753Smm if (buffer == NULL) 136228753Smm return (0); 137228753Smm 138228753Smm /* First three bytes must be "BZh" */ 139228753Smm bits_checked = 0; 140231200Smm if (memcmp(buffer, "BZh", 3) != 0) 141228753Smm return (0); 142228753Smm bits_checked += 24; 143228753Smm 144228753Smm /* Next follows a compression flag which must be an ASCII digit. */ 145228753Smm if (buffer[3] < '1' || buffer[3] > '9') 146228753Smm return (0); 147228753Smm bits_checked += 5; 148228753Smm 149228753Smm /* After BZh[1-9], there must be either a data block 150228753Smm * which begins with 0x314159265359 or an end-of-data 151228753Smm * marker of 0x177245385090. */ 152228753Smm if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0) 153228753Smm bits_checked += 48; 154228753Smm else if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", 6) == 0) 155228753Smm bits_checked += 48; 156228753Smm else 157228753Smm return (0); 158228753Smm 159228753Smm return (bits_checked); 160228753Smm} 161228753Smm 162228753Smm#if !defined(HAVE_BZLIB_H) || !defined(BZ_CONFIG_ERROR) 163228753Smm 164228753Smm/* 165228753Smm * If we don't have the library on this system, we can't actually do the 166228753Smm * decompression. We can, however, still detect compressed archives 167228753Smm * and emit a useful message. 168228753Smm */ 169228753Smmstatic int 170228753Smmbzip2_reader_init(struct archive_read_filter *self) 171228753Smm{ 172228753Smm int r; 173228753Smm 174248616Smm r = __archive_read_program(self, "bzip2 -d"); 175228753Smm /* Note: We set the format here even if __archive_read_program() 176228753Smm * above fails. We do, after all, know what the format is 177228753Smm * even if we weren't able to read it. */ 178248616Smm self->code = ARCHIVE_FILTER_BZIP2; 179228753Smm self->name = "bzip2"; 180228753Smm return (r); 181228753Smm} 182228753Smm 183228753Smm 184228753Smm#else 185228753Smm 186228753Smm/* 187228753Smm * Setup the callbacks. 188228753Smm */ 189228753Smmstatic int 190228753Smmbzip2_reader_init(struct archive_read_filter *self) 191228753Smm{ 192228753Smm static const size_t out_block_size = 64 * 1024; 193228753Smm void *out_block; 194228753Smm struct private_data *state; 195228753Smm 196248616Smm self->code = ARCHIVE_FILTER_BZIP2; 197228753Smm self->name = "bzip2"; 198228753Smm 199228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 200228753Smm out_block = (unsigned char *)malloc(out_block_size); 201231200Smm if (state == NULL || out_block == NULL) { 202228753Smm archive_set_error(&self->archive->archive, ENOMEM, 203228753Smm "Can't allocate data for bzip2 decompression"); 204228753Smm free(out_block); 205228753Smm free(state); 206228753Smm return (ARCHIVE_FATAL); 207228753Smm } 208228753Smm 209228753Smm self->data = state; 210228753Smm state->out_block_size = out_block_size; 211228753Smm state->out_block = out_block; 212228753Smm self->read = bzip2_filter_read; 213228753Smm self->skip = NULL; /* not supported */ 214228753Smm self->close = bzip2_filter_close; 215228753Smm 216228753Smm return (ARCHIVE_OK); 217228753Smm} 218228753Smm 219228753Smm/* 220228753Smm * Return the next block of decompressed data. 221228753Smm */ 222228753Smmstatic ssize_t 223228753Smmbzip2_filter_read(struct archive_read_filter *self, const void **p) 224228753Smm{ 225228753Smm struct private_data *state; 226228753Smm size_t decompressed; 227228753Smm const char *read_buf; 228228753Smm ssize_t ret; 229228753Smm 230228753Smm state = (struct private_data *)self->data; 231228753Smm 232228753Smm if (state->eof) { 233228753Smm *p = NULL; 234228753Smm return (0); 235228753Smm } 236228753Smm 237228753Smm /* Empty our output buffer. */ 238228753Smm state->stream.next_out = state->out_block; 239228753Smm state->stream.avail_out = state->out_block_size; 240228753Smm 241228753Smm /* Try to fill the output buffer. */ 242228753Smm for (;;) { 243228753Smm if (!state->valid) { 244228753Smm if (bzip2_reader_bid(self->bidder, self->upstream) == 0) { 245228753Smm state->eof = 1; 246228753Smm *p = state->out_block; 247228753Smm decompressed = state->stream.next_out 248228753Smm - state->out_block; 249228753Smm return (decompressed); 250228753Smm } 251228753Smm /* Initialize compression library. */ 252228753Smm ret = BZ2_bzDecompressInit(&(state->stream), 253228753Smm 0 /* library verbosity */, 254228753Smm 0 /* don't use low-mem algorithm */); 255228753Smm 256228753Smm /* If init fails, try low-memory algorithm instead. */ 257228753Smm if (ret == BZ_MEM_ERROR) 258228753Smm ret = BZ2_bzDecompressInit(&(state->stream), 259228753Smm 0 /* library verbosity */, 260228753Smm 1 /* do use low-mem algo */); 261228753Smm 262228753Smm if (ret != BZ_OK) { 263228753Smm const char *detail = NULL; 264228753Smm int err = ARCHIVE_ERRNO_MISC; 265228753Smm switch (ret) { 266228753Smm case BZ_PARAM_ERROR: 267228753Smm detail = "invalid setup parameter"; 268228753Smm break; 269228753Smm case BZ_MEM_ERROR: 270228753Smm err = ENOMEM; 271228753Smm detail = "out of memory"; 272228753Smm break; 273228753Smm case BZ_CONFIG_ERROR: 274228753Smm detail = "mis-compiled library"; 275228753Smm break; 276228753Smm } 277228753Smm archive_set_error(&self->archive->archive, err, 278228753Smm "Internal error initializing decompressor%s%s", 279228753Smm detail == NULL ? "" : ": ", 280228753Smm detail); 281228753Smm return (ARCHIVE_FATAL); 282228753Smm } 283228753Smm state->valid = 1; 284228753Smm } 285228753Smm 286228753Smm /* stream.next_in is really const, but bzlib 287228753Smm * doesn't declare it so. <sigh> */ 288228753Smm read_buf = 289228753Smm __archive_read_filter_ahead(self->upstream, 1, &ret); 290231200Smm if (read_buf == NULL) { 291231200Smm archive_set_error(&self->archive->archive, 292231200Smm ARCHIVE_ERRNO_MISC, 293231200Smm "truncated bzip2 input"); 294228753Smm return (ARCHIVE_FATAL); 295231200Smm } 296228753Smm state->stream.next_in = (char *)(uintptr_t)read_buf; 297228753Smm state->stream.avail_in = ret; 298228753Smm /* There is no more data, return whatever we have. */ 299228753Smm if (ret == 0) { 300228753Smm state->eof = 1; 301228753Smm *p = state->out_block; 302228753Smm decompressed = state->stream.next_out 303228753Smm - state->out_block; 304228753Smm return (decompressed); 305228753Smm } 306228753Smm 307228753Smm /* Decompress as much as we can in one pass. */ 308228753Smm ret = BZ2_bzDecompress(&(state->stream)); 309228753Smm __archive_read_filter_consume(self->upstream, 310228753Smm state->stream.next_in - read_buf); 311228753Smm 312228753Smm switch (ret) { 313228753Smm case BZ_STREAM_END: /* Found end of stream. */ 314228753Smm switch (BZ2_bzDecompressEnd(&(state->stream))) { 315228753Smm case BZ_OK: 316228753Smm break; 317228753Smm default: 318228753Smm archive_set_error(&(self->archive->archive), 319228753Smm ARCHIVE_ERRNO_MISC, 320228753Smm "Failed to clean up decompressor"); 321228753Smm return (ARCHIVE_FATAL); 322228753Smm } 323228753Smm state->valid = 0; 324228753Smm /* FALLTHROUGH */ 325228753Smm case BZ_OK: /* Decompressor made some progress. */ 326228753Smm /* If we filled our buffer, update stats and return. */ 327228753Smm if (state->stream.avail_out == 0) { 328228753Smm *p = state->out_block; 329228753Smm decompressed = state->stream.next_out 330228753Smm - state->out_block; 331228753Smm return (decompressed); 332228753Smm } 333228753Smm break; 334228753Smm default: /* Return an error. */ 335228753Smm archive_set_error(&self->archive->archive, 336228753Smm ARCHIVE_ERRNO_MISC, "bzip decompression failed"); 337228753Smm return (ARCHIVE_FATAL); 338228753Smm } 339228753Smm } 340228753Smm} 341228753Smm 342228753Smm/* 343228753Smm * Clean up the decompressor. 344228753Smm */ 345228753Smmstatic int 346228753Smmbzip2_filter_close(struct archive_read_filter *self) 347228753Smm{ 348228753Smm struct private_data *state; 349228753Smm int ret = ARCHIVE_OK; 350228753Smm 351228753Smm state = (struct private_data *)self->data; 352228753Smm 353228753Smm if (state->valid) { 354228753Smm switch (BZ2_bzDecompressEnd(&state->stream)) { 355228753Smm case BZ_OK: 356228753Smm break; 357228753Smm default: 358228753Smm archive_set_error(&self->archive->archive, 359228753Smm ARCHIVE_ERRNO_MISC, 360228753Smm "Failed to clean up decompressor"); 361228753Smm ret = ARCHIVE_FATAL; 362228753Smm } 363231200Smm state->valid = 0; 364228753Smm } 365228753Smm 366228753Smm free(state->out_block); 367228753Smm free(state); 368228753Smm return (ret); 369228753Smm} 370228753Smm 371228753Smm#endif /* HAVE_BZLIB_H && BZ_CONFIG_ERROR */ 372