archive_read_support_format_lha.c revision 337352
1/*- 2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27 28#ifdef HAVE_ERRNO_H 29#include <errno.h> 30#endif 31#ifdef HAVE_LIMITS_H 32#include <limits.h> 33#endif 34#ifdef HAVE_STDLIB_H 35#include <stdlib.h> 36#endif 37#ifdef HAVE_STRING_H 38#include <string.h> 39#endif 40 41#include "archive.h" 42#include "archive_entry.h" 43#include "archive_entry_locale.h" 44#include "archive_private.h" 45#include "archive_read_private.h" 46#include "archive_endian.h" 47 48 49#define MAXMATCH 256 /* Maximum match length. */ 50#define MINMATCH 3 /* Minimum match length. */ 51/* 52 * Literal table format: 53 * +0 +256 +510 54 * +---------------+-------------------------+ 55 * | literal code | match length | 56 * | 0 ... 255 | MINMATCH ... MAXMATCH | 57 * +---------------+-------------------------+ 58 * <--- LT_BITLEN_SIZE ---> 59 */ 60/* Literal table size. */ 61#define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1) 62/* Position table size. 63 * Note: this used for both position table and pre literal table.*/ 64#define PT_BITLEN_SIZE (3 + 16) 65 66struct lzh_dec { 67 /* Decoding status. */ 68 int state; 69 70 /* 71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded 72 * data. 73 */ 74 int w_size; 75 int w_mask; 76 /* Window buffer, which is a loop buffer. */ 77 unsigned char *w_buff; 78 /* The insert position to the window. */ 79 int w_pos; 80 /* The position where we can copy decoded code from the window. */ 81 int copy_pos; 82 /* The length how many bytes we can copy decoded code from 83 * the window. */ 84 int copy_len; 85 86 /* 87 * Bit stream reader. 88 */ 89 struct lzh_br { 90#define CACHE_TYPE uint64_t 91#define CACHE_BITS (8 * sizeof(CACHE_TYPE)) 92 /* Cache buffer. */ 93 CACHE_TYPE cache_buffer; 94 /* Indicates how many bits avail in cache_buffer. */ 95 int cache_avail; 96 } br; 97 98 /* 99 * Huffman coding. 100 */ 101 struct huffman { 102 int len_size; 103 int len_avail; 104 int len_bits; 105 int freq[17]; 106 unsigned char *bitlen; 107 108 /* 109 * Use a index table. It's faster than searching a huffman 110 * coding tree, which is a binary tree. But a use of a large 111 * index table causes L1 cache read miss many times. 112 */ 113#define HTBL_BITS 10 114 int max_bits; 115 int shift_bits; 116 int tbl_bits; 117 int tree_used; 118 int tree_avail; 119 /* Direct access table. */ 120 uint16_t *tbl; 121 /* Binary tree table for extra bits over the direct access. */ 122 struct htree_t { 123 uint16_t left; 124 uint16_t right; 125 } *tree; 126 } lt, pt; 127 128 int blocks_avail; 129 int pos_pt_len_size; 130 int pos_pt_len_bits; 131 int literal_pt_len_size; 132 int literal_pt_len_bits; 133 int reading_position; 134 int loop; 135 int error; 136}; 137 138struct lzh_stream { 139 const unsigned char *next_in; 140 int avail_in; 141 int64_t total_in; 142 const unsigned char *ref_ptr; 143 int avail_out; 144 int64_t total_out; 145 struct lzh_dec *ds; 146}; 147 148struct lha { 149 /* entry_bytes_remaining is the number of bytes we expect. */ 150 int64_t entry_offset; 151 int64_t entry_bytes_remaining; 152 int64_t entry_unconsumed; 153 uint16_t entry_crc_calculated; 154 155 size_t header_size; /* header size */ 156 unsigned char level; /* header level */ 157 char method[3]; /* compress type */ 158 int64_t compsize; /* compressed data size */ 159 int64_t origsize; /* original file size */ 160 int setflag; 161#define BIRTHTIME_IS_SET 1 162#define ATIME_IS_SET 2 163#define UNIX_MODE_IS_SET 4 164#define CRC_IS_SET 8 165 time_t birthtime; 166 long birthtime_tv_nsec; 167 time_t mtime; 168 long mtime_tv_nsec; 169 time_t atime; 170 long atime_tv_nsec; 171 mode_t mode; 172 int64_t uid; 173 int64_t gid; 174 struct archive_string uname; 175 struct archive_string gname; 176 uint16_t header_crc; 177 uint16_t crc; 178 struct archive_string_conv *sconv; 179 struct archive_string_conv *opt_sconv; 180 181 struct archive_string dirname; 182 struct archive_string filename; 183 struct archive_wstring ws; 184 185 unsigned char dos_attr; 186 187 /* Flag to mark progress that an archive was read their first header.*/ 188 char found_first_header; 189 /* Flag to mark that indicates an empty directory. */ 190 char directory; 191 192 /* Flags to mark progress of decompression. */ 193 char decompress_init; 194 char end_of_entry; 195 char end_of_entry_cleanup; 196 char entry_is_compressed; 197 198 char format_name[64]; 199 200 struct lzh_stream strm; 201}; 202 203/* 204 * LHA header common member offset. 205 */ 206#define H_METHOD_OFFSET 2 /* Compress type. */ 207#define H_ATTR_OFFSET 19 /* DOS attribute. */ 208#define H_LEVEL_OFFSET 20 /* Header Level. */ 209#define H_SIZE 22 /* Minimum header size. */ 210 211static int archive_read_format_lha_bid(struct archive_read *, int); 212static int archive_read_format_lha_options(struct archive_read *, 213 const char *, const char *); 214static int archive_read_format_lha_read_header(struct archive_read *, 215 struct archive_entry *); 216static int archive_read_format_lha_read_data(struct archive_read *, 217 const void **, size_t *, int64_t *); 218static int archive_read_format_lha_read_data_skip(struct archive_read *); 219static int archive_read_format_lha_cleanup(struct archive_read *); 220 221static void lha_replace_path_separator(struct lha *, 222 struct archive_entry *); 223static int lha_read_file_header_0(struct archive_read *, struct lha *); 224static int lha_read_file_header_1(struct archive_read *, struct lha *); 225static int lha_read_file_header_2(struct archive_read *, struct lha *); 226static int lha_read_file_header_3(struct archive_read *, struct lha *); 227static int lha_read_file_extended_header(struct archive_read *, 228 struct lha *, uint16_t *, int, size_t, size_t *); 229static size_t lha_check_header_format(const void *); 230static int lha_skip_sfx(struct archive_read *); 231static time_t lha_dos_time(const unsigned char *); 232static time_t lha_win_time(uint64_t, long *); 233static unsigned char lha_calcsum(unsigned char, const void *, 234 int, size_t); 235static int lha_parse_linkname(struct archive_string *, 236 struct archive_string *); 237static int lha_read_data_none(struct archive_read *, const void **, 238 size_t *, int64_t *); 239static int lha_read_data_lzh(struct archive_read *, const void **, 240 size_t *, int64_t *); 241static void lha_crc16_init(void); 242static uint16_t lha_crc16(uint16_t, const void *, size_t); 243static int lzh_decode_init(struct lzh_stream *, const char *); 244static void lzh_decode_free(struct lzh_stream *); 245static int lzh_decode(struct lzh_stream *, int); 246static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *); 247static int lzh_huffman_init(struct huffman *, size_t, int); 248static void lzh_huffman_free(struct huffman *); 249static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end); 250static int lzh_make_fake_table(struct huffman *, uint16_t); 251static int lzh_make_huffman_table(struct huffman *); 252static inline int lzh_decode_huffman(struct huffman *, unsigned); 253static int lzh_decode_huffman_tree(struct huffman *, unsigned, int); 254 255 256int 257archive_read_support_format_lha(struct archive *_a) 258{ 259 struct archive_read *a = (struct archive_read *)_a; 260 struct lha *lha; 261 int r; 262 263 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 264 ARCHIVE_STATE_NEW, "archive_read_support_format_lha"); 265 266 lha = (struct lha *)calloc(1, sizeof(*lha)); 267 if (lha == NULL) { 268 archive_set_error(&a->archive, ENOMEM, 269 "Can't allocate lha data"); 270 return (ARCHIVE_FATAL); 271 } 272 archive_string_init(&lha->ws); 273 274 r = __archive_read_register_format(a, 275 lha, 276 "lha", 277 archive_read_format_lha_bid, 278 archive_read_format_lha_options, 279 archive_read_format_lha_read_header, 280 archive_read_format_lha_read_data, 281 archive_read_format_lha_read_data_skip, 282 NULL, 283 archive_read_format_lha_cleanup, 284 NULL, 285 NULL); 286 287 if (r != ARCHIVE_OK) 288 free(lha); 289 return (ARCHIVE_OK); 290} 291 292static size_t 293lha_check_header_format(const void *h) 294{ 295 const unsigned char *p = h; 296 size_t next_skip_bytes; 297 298 switch (p[H_METHOD_OFFSET+3]) { 299 /* 300 * "-lh0-" ... "-lh7-" "-lhd-" 301 * "-lzs-" "-lz5-" 302 */ 303 case '0': case '1': case '2': case '3': 304 case '4': case '5': case '6': case '7': 305 case 'd': 306 case 's': 307 next_skip_bytes = 4; 308 309 /* b0 == 0 means the end of an LHa archive file. */ 310 if (p[0] == 0) 311 break; 312 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l' 313 || p[H_METHOD_OFFSET+4] != '-') 314 break; 315 316 if (p[H_METHOD_OFFSET+2] == 'h') { 317 /* "-lh?-" */ 318 if (p[H_METHOD_OFFSET+3] == 's') 319 break; 320 if (p[H_LEVEL_OFFSET] == 0) 321 return (0); 322 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20) 323 return (0); 324 } 325 if (p[H_METHOD_OFFSET+2] == 'z') { 326 /* LArc extensions: -lzs-,-lz4- and -lz5- */ 327 if (p[H_LEVEL_OFFSET] != 0) 328 break; 329 if (p[H_METHOD_OFFSET+3] == 's' 330 || p[H_METHOD_OFFSET+3] == '4' 331 || p[H_METHOD_OFFSET+3] == '5') 332 return (0); 333 } 334 break; 335 case 'h': next_skip_bytes = 1; break; 336 case 'z': next_skip_bytes = 1; break; 337 case 'l': next_skip_bytes = 2; break; 338 case '-': next_skip_bytes = 3; break; 339 default : next_skip_bytes = 4; break; 340 } 341 342 return (next_skip_bytes); 343} 344 345static int 346archive_read_format_lha_bid(struct archive_read *a, int best_bid) 347{ 348 const char *p; 349 const void *buff; 350 ssize_t bytes_avail, offset, window; 351 size_t next; 352 353 /* If there's already a better bid than we can ever 354 make, don't bother testing. */ 355 if (best_bid > 30) 356 return (-1); 357 358 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) 359 return (-1); 360 361 if (lha_check_header_format(p) == 0) 362 return (30); 363 364 if (p[0] == 'M' && p[1] == 'Z') { 365 /* PE file */ 366 offset = 0; 367 window = 4096; 368 while (offset < (1024 * 20)) { 369 buff = __archive_read_ahead(a, offset + window, 370 &bytes_avail); 371 if (buff == NULL) { 372 /* Remaining bytes are less than window. */ 373 window >>= 1; 374 if (window < (H_SIZE + 3)) 375 return (0); 376 continue; 377 } 378 p = (const char *)buff + offset; 379 while (p + H_SIZE < (const char *)buff + bytes_avail) { 380 if ((next = lha_check_header_format(p)) == 0) 381 return (30); 382 p += next; 383 } 384 offset = p - (const char *)buff; 385 } 386 } 387 return (0); 388} 389 390static int 391archive_read_format_lha_options(struct archive_read *a, 392 const char *key, const char *val) 393{ 394 struct lha *lha; 395 int ret = ARCHIVE_FAILED; 396 397 lha = (struct lha *)(a->format->data); 398 if (strcmp(key, "hdrcharset") == 0) { 399 if (val == NULL || val[0] == 0) 400 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 401 "lha: hdrcharset option needs a character-set name"); 402 else { 403 lha->opt_sconv = 404 archive_string_conversion_from_charset( 405 &a->archive, val, 0); 406 if (lha->opt_sconv != NULL) 407 ret = ARCHIVE_OK; 408 else 409 ret = ARCHIVE_FATAL; 410 } 411 return (ret); 412 } 413 414 /* Note: The "warn" return is just to inform the options 415 * supervisor that we didn't handle it. It will generate 416 * a suitable error if no one used this option. */ 417 return (ARCHIVE_WARN); 418} 419 420static int 421lha_skip_sfx(struct archive_read *a) 422{ 423 const void *h; 424 const char *p, *q; 425 size_t next, skip; 426 ssize_t bytes, window; 427 428 window = 4096; 429 for (;;) { 430 h = __archive_read_ahead(a, window, &bytes); 431 if (h == NULL) { 432 /* Remaining bytes are less than window. */ 433 window >>= 1; 434 if (window < (H_SIZE + 3)) 435 goto fatal; 436 continue; 437 } 438 if (bytes < H_SIZE) 439 goto fatal; 440 p = h; 441 q = p + bytes; 442 443 /* 444 * Scan ahead until we find something that looks 445 * like the lha header. 446 */ 447 while (p + H_SIZE < q) { 448 if ((next = lha_check_header_format(p)) == 0) { 449 skip = p - (const char *)h; 450 __archive_read_consume(a, skip); 451 return (ARCHIVE_OK); 452 } 453 p += next; 454 } 455 skip = p - (const char *)h; 456 __archive_read_consume(a, skip); 457 } 458fatal: 459 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 460 "Couldn't find out LHa header"); 461 return (ARCHIVE_FATAL); 462} 463 464static int 465truncated_error(struct archive_read *a) 466{ 467 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 468 "Truncated LHa header"); 469 return (ARCHIVE_FATAL); 470} 471 472static int 473archive_read_format_lha_read_header(struct archive_read *a, 474 struct archive_entry *entry) 475{ 476 struct archive_string linkname; 477 struct archive_string pathname; 478 struct lha *lha; 479 const unsigned char *p; 480 const char *signature; 481 int err; 482 483 lha_crc16_init(); 484 485 a->archive.archive_format = ARCHIVE_FORMAT_LHA; 486 if (a->archive.archive_format_name == NULL) 487 a->archive.archive_format_name = "lha"; 488 489 lha = (struct lha *)(a->format->data); 490 lha->decompress_init = 0; 491 lha->end_of_entry = 0; 492 lha->end_of_entry_cleanup = 0; 493 lha->entry_unconsumed = 0; 494 495 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) { 496 /* 497 * LHa archiver added 0 to the tail of its archive file as 498 * the mark of the end of the archive. 499 */ 500 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL); 501 if (signature == NULL || signature[0] == 0) 502 return (ARCHIVE_EOF); 503 return (truncated_error(a)); 504 } 505 506 signature = (const char *)p; 507 if (lha->found_first_header == 0 && 508 signature[0] == 'M' && signature[1] == 'Z') { 509 /* This is an executable? Must be self-extracting... */ 510 err = lha_skip_sfx(a); 511 if (err < ARCHIVE_WARN) 512 return (err); 513 514 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) 515 return (truncated_error(a)); 516 signature = (const char *)p; 517 } 518 /* signature[0] == 0 means the end of an LHa archive file. */ 519 if (signature[0] == 0) 520 return (ARCHIVE_EOF); 521 522 /* 523 * Check the header format and method type. 524 */ 525 if (lha_check_header_format(p) != 0) { 526 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 527 "Bad LHa file"); 528 return (ARCHIVE_FATAL); 529 } 530 531 /* We've found the first header. */ 532 lha->found_first_header = 1; 533 /* Set a default value and common data */ 534 lha->header_size = 0; 535 lha->level = p[H_LEVEL_OFFSET]; 536 lha->method[0] = p[H_METHOD_OFFSET+1]; 537 lha->method[1] = p[H_METHOD_OFFSET+2]; 538 lha->method[2] = p[H_METHOD_OFFSET+3]; 539 if (memcmp(lha->method, "lhd", 3) == 0) 540 lha->directory = 1; 541 else 542 lha->directory = 0; 543 if (memcmp(lha->method, "lh0", 3) == 0 || 544 memcmp(lha->method, "lz4", 3) == 0) 545 lha->entry_is_compressed = 0; 546 else 547 lha->entry_is_compressed = 1; 548 549 lha->compsize = 0; 550 lha->origsize = 0; 551 lha->setflag = 0; 552 lha->birthtime = 0; 553 lha->birthtime_tv_nsec = 0; 554 lha->mtime = 0; 555 lha->mtime_tv_nsec = 0; 556 lha->atime = 0; 557 lha->atime_tv_nsec = 0; 558 lha->mode = (lha->directory)? 0777 : 0666; 559 lha->uid = 0; 560 lha->gid = 0; 561 archive_string_empty(&lha->dirname); 562 archive_string_empty(&lha->filename); 563 lha->dos_attr = 0; 564 if (lha->opt_sconv != NULL) 565 lha->sconv = lha->opt_sconv; 566 else 567 lha->sconv = NULL; 568 569 switch (p[H_LEVEL_OFFSET]) { 570 case 0: 571 err = lha_read_file_header_0(a, lha); 572 break; 573 case 1: 574 err = lha_read_file_header_1(a, lha); 575 break; 576 case 2: 577 err = lha_read_file_header_2(a, lha); 578 break; 579 case 3: 580 err = lha_read_file_header_3(a, lha); 581 break; 582 default: 583 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 584 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]); 585 err = ARCHIVE_FATAL; 586 break; 587 } 588 if (err < ARCHIVE_WARN) 589 return (err); 590 591 592 if (!lha->directory && archive_strlen(&lha->filename) == 0) 593 /* The filename has not been set */ 594 return (truncated_error(a)); 595 596 /* 597 * Make a pathname from a dirname and a filename. 598 */ 599 archive_string_concat(&lha->dirname, &lha->filename); 600 archive_string_init(&pathname); 601 archive_string_init(&linkname); 602 archive_string_copy(&pathname, &lha->dirname); 603 604 if ((lha->mode & AE_IFMT) == AE_IFLNK) { 605 /* 606 * Extract the symlink-name if it's included in the pathname. 607 */ 608 if (!lha_parse_linkname(&linkname, &pathname)) { 609 /* We couldn't get the symlink-name. */ 610 archive_set_error(&a->archive, 611 ARCHIVE_ERRNO_FILE_FORMAT, 612 "Unknown symlink-name"); 613 archive_string_free(&pathname); 614 archive_string_free(&linkname); 615 return (ARCHIVE_FAILED); 616 } 617 } else { 618 /* 619 * Make sure a file-type is set. 620 * The mode has been overridden if it is in the extended data. 621 */ 622 lha->mode = (lha->mode & ~AE_IFMT) | 623 ((lha->directory)? AE_IFDIR: AE_IFREG); 624 } 625 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 && 626 (lha->dos_attr & 1) != 0) 627 lha->mode &= ~(0222);/* read only. */ 628 629 /* 630 * Set basic file parameters. 631 */ 632 if (archive_entry_copy_pathname_l(entry, pathname.s, 633 pathname.length, lha->sconv) != 0) { 634 if (errno == ENOMEM) { 635 archive_set_error(&a->archive, ENOMEM, 636 "Can't allocate memory for Pathname"); 637 return (ARCHIVE_FATAL); 638 } 639 archive_set_error(&a->archive, 640 ARCHIVE_ERRNO_FILE_FORMAT, 641 "Pathname cannot be converted " 642 "from %s to current locale.", 643 archive_string_conversion_charset_name(lha->sconv)); 644 err = ARCHIVE_WARN; 645 } 646 archive_string_free(&pathname); 647 if (archive_strlen(&linkname) > 0) { 648 if (archive_entry_copy_symlink_l(entry, linkname.s, 649 linkname.length, lha->sconv) != 0) { 650 if (errno == ENOMEM) { 651 archive_set_error(&a->archive, ENOMEM, 652 "Can't allocate memory for Linkname"); 653 return (ARCHIVE_FATAL); 654 } 655 archive_set_error(&a->archive, 656 ARCHIVE_ERRNO_FILE_FORMAT, 657 "Linkname cannot be converted " 658 "from %s to current locale.", 659 archive_string_conversion_charset_name(lha->sconv)); 660 err = ARCHIVE_WARN; 661 } 662 } else 663 archive_entry_set_symlink(entry, NULL); 664 archive_string_free(&linkname); 665 /* 666 * When a header level is 0, there is a possibility that 667 * a pathname and a symlink has '\' character, a directory 668 * separator in DOS/Windows. So we should convert it to '/'. 669 */ 670 if (p[H_LEVEL_OFFSET] == 0) 671 lha_replace_path_separator(lha, entry); 672 673 archive_entry_set_mode(entry, lha->mode); 674 archive_entry_set_uid(entry, lha->uid); 675 archive_entry_set_gid(entry, lha->gid); 676 if (archive_strlen(&lha->uname) > 0) 677 archive_entry_set_uname(entry, lha->uname.s); 678 if (archive_strlen(&lha->gname) > 0) 679 archive_entry_set_gname(entry, lha->gname.s); 680 if (lha->setflag & BIRTHTIME_IS_SET) { 681 archive_entry_set_birthtime(entry, lha->birthtime, 682 lha->birthtime_tv_nsec); 683 archive_entry_set_ctime(entry, lha->birthtime, 684 lha->birthtime_tv_nsec); 685 } else { 686 archive_entry_unset_birthtime(entry); 687 archive_entry_unset_ctime(entry); 688 } 689 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec); 690 if (lha->setflag & ATIME_IS_SET) 691 archive_entry_set_atime(entry, lha->atime, 692 lha->atime_tv_nsec); 693 else 694 archive_entry_unset_atime(entry); 695 if (lha->directory || archive_entry_symlink(entry) != NULL) 696 archive_entry_unset_size(entry); 697 else 698 archive_entry_set_size(entry, lha->origsize); 699 700 /* 701 * Prepare variables used to read a file content. 702 */ 703 lha->entry_bytes_remaining = lha->compsize; 704 if (lha->entry_bytes_remaining < 0) { 705 archive_set_error(&a->archive, 706 ARCHIVE_ERRNO_FILE_FORMAT, 707 "Invalid LHa entry size"); 708 return (ARCHIVE_FATAL); 709 } 710 lha->entry_offset = 0; 711 lha->entry_crc_calculated = 0; 712 713 /* 714 * This file does not have a content. 715 */ 716 if (lha->directory || lha->compsize == 0) 717 lha->end_of_entry = 1; 718 719 sprintf(lha->format_name, "lha -%c%c%c-", 720 lha->method[0], lha->method[1], lha->method[2]); 721 a->archive.archive_format_name = lha->format_name; 722 723 return (err); 724} 725 726/* 727 * Replace a DOS path separator '\' by a character '/'. 728 * Some multi-byte character set have a character '\' in its second byte. 729 */ 730static void 731lha_replace_path_separator(struct lha *lha, struct archive_entry *entry) 732{ 733 const wchar_t *wp; 734 size_t i; 735 736 if ((wp = archive_entry_pathname_w(entry)) != NULL) { 737 archive_wstrcpy(&(lha->ws), wp); 738 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 739 if (lha->ws.s[i] == L'\\') 740 lha->ws.s[i] = L'/'; 741 } 742 archive_entry_copy_pathname_w(entry, lha->ws.s); 743 } 744 745 if ((wp = archive_entry_symlink_w(entry)) != NULL) { 746 archive_wstrcpy(&(lha->ws), wp); 747 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 748 if (lha->ws.s[i] == L'\\') 749 lha->ws.s[i] = L'/'; 750 } 751 archive_entry_copy_symlink_w(entry, lha->ws.s); 752 } 753} 754 755/* 756 * Header 0 format 757 * 758 * +0 +1 +2 +7 +11 759 * +---------------+----------+----------------+-------------------+ 760 * |header size(*1)|header sum|compression type|compressed size(*2)| 761 * +---------------+----------+----------------+-------------------+ 762 * <---------------------(*1)----------* 763 * 764 * +11 +15 +17 +19 +20 +21 765 * +-----------------+---------+---------+--------------+----------------+ 766 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)| 767 * +-----------------+---------+---------+--------------+----------------+ 768 * *--------------------------------(*1)---------------------------------* 769 * 770 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4) 771 * +---------------+---------+----------+----------------+------------------+ 772 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data | 773 * +---------------+---------+----------+----------------+------------------+ 774 * <--(*3)-> <------(*2)------> 775 * *----------------------(*1)--------------------------> 776 * 777 */ 778#define H0_HEADER_SIZE_OFFSET 0 779#define H0_HEADER_SUM_OFFSET 1 780#define H0_COMP_SIZE_OFFSET 7 781#define H0_ORIG_SIZE_OFFSET 11 782#define H0_DOS_TIME_OFFSET 15 783#define H0_NAME_LEN_OFFSET 21 784#define H0_FILE_NAME_OFFSET 22 785#define H0_FIXED_SIZE 24 786static int 787lha_read_file_header_0(struct archive_read *a, struct lha *lha) 788{ 789 const unsigned char *p; 790 int extdsize, namelen; 791 unsigned char headersum, sum_calculated; 792 793 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL) 794 return (truncated_error(a)); 795 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2; 796 headersum = p[H0_HEADER_SUM_OFFSET]; 797 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET); 798 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET); 799 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET); 800 namelen = p[H0_NAME_LEN_OFFSET]; 801 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen; 802 if ((namelen > 221 || extdsize < 0) && extdsize != -2) { 803 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 804 "Invalid LHa header"); 805 return (ARCHIVE_FATAL); 806 } 807 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 808 return (truncated_error(a)); 809 810 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen); 811 /* When extdsize == -2, A CRC16 value is not present in the header. */ 812 if (extdsize >= 0) { 813 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen); 814 lha->setflag |= CRC_IS_SET; 815 } 816 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 817 818 /* Read an extended header */ 819 if (extdsize > 0) { 820 /* This extended data is set by 'LHa for UNIX' only. 821 * Maybe fixed size. 822 */ 823 p += H0_FILE_NAME_OFFSET + namelen + 2; 824 if (p[0] == 'U' && extdsize == 12) { 825 /* p[1] is a minor version. */ 826 lha->mtime = archive_le32dec(&p[2]); 827 lha->mode = archive_le16dec(&p[6]); 828 lha->uid = archive_le16dec(&p[8]); 829 lha->gid = archive_le16dec(&p[10]); 830 lha->setflag |= UNIX_MODE_IS_SET; 831 } 832 } 833 __archive_read_consume(a, lha->header_size); 834 835 if (sum_calculated != headersum) { 836 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 837 "LHa header sum error"); 838 return (ARCHIVE_FATAL); 839 } 840 841 return (ARCHIVE_OK); 842} 843 844/* 845 * Header 1 format 846 * 847 * +0 +1 +2 +7 +11 848 * +---------------+----------+----------------+-------------+ 849 * |header size(*1)|header sum|compression type|skip size(*2)| 850 * +---------------+----------+----------------+-------------+ 851 * <---------------(*1)----------* 852 * 853 * +11 +15 +17 +19 +20 +21 854 * +-----------------+---------+---------+--------------+----------------+ 855 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)| 856 * +-----------------+---------+---------+--------------+----------------+ 857 * *-------------------------------(*1)----------------------------------* 858 * 859 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4) 860 * +---------------+---------+----------+-----------+-----------+ 861 * |name length(*3)|file name|file CRC16| creator |padding(*4)| 862 * +---------------+---------+----------+-----------+-----------+ 863 * <--(*3)-> 864 * *----------------------------(*1)----------------------------* 865 * 866 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5) 867 * +----------------+---------------------+------------------------+ 868 * |next header size| extended header(*5) | compressed data | 869 * +----------------+---------------------+------------------------+ 870 * *------(*1)-----> <--------------------(*2)--------------------> 871 */ 872#define H1_HEADER_SIZE_OFFSET 0 873#define H1_HEADER_SUM_OFFSET 1 874#define H1_COMP_SIZE_OFFSET 7 875#define H1_ORIG_SIZE_OFFSET 11 876#define H1_DOS_TIME_OFFSET 15 877#define H1_NAME_LEN_OFFSET 21 878#define H1_FILE_NAME_OFFSET 22 879#define H1_FIXED_SIZE 27 880static int 881lha_read_file_header_1(struct archive_read *a, struct lha *lha) 882{ 883 const unsigned char *p; 884 size_t extdsize; 885 int i, err, err2; 886 int namelen, padding; 887 unsigned char headersum, sum_calculated; 888 889 err = ARCHIVE_OK; 890 891 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL) 892 return (truncated_error(a)); 893 894 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2; 895 headersum = p[H1_HEADER_SUM_OFFSET]; 896 /* Note: An extended header size is included in a compsize. */ 897 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET); 898 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET); 899 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET); 900 namelen = p[H1_NAME_LEN_OFFSET]; 901 /* Calculate a padding size. The result will be normally 0 only(?) */ 902 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen; 903 904 if (namelen > 230 || padding < 0) 905 goto invalid; 906 907 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 908 return (truncated_error(a)); 909 910 for (i = 0; i < namelen; i++) { 911 if (p[i + H1_FILE_NAME_OFFSET] == 0xff) 912 goto invalid;/* Invalid filename. */ 913 } 914 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen); 915 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen); 916 lha->setflag |= CRC_IS_SET; 917 918 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 919 /* Consume used bytes but not include `next header size' data 920 * since it will be consumed in lha_read_file_extended_header(). */ 921 __archive_read_consume(a, lha->header_size - 2); 922 923 /* Read extended headers */ 924 err2 = lha_read_file_extended_header(a, lha, NULL, 2, 925 (size_t)(lha->compsize + 2), &extdsize); 926 if (err2 < ARCHIVE_WARN) 927 return (err2); 928 if (err2 < err) 929 err = err2; 930 /* Get a real compressed file size. */ 931 lha->compsize -= extdsize - 2; 932 933 if (lha->compsize < 0) 934 goto invalid; /* Invalid compressed file size */ 935 936 if (sum_calculated != headersum) { 937 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 938 "LHa header sum error"); 939 return (ARCHIVE_FATAL); 940 } 941 return (err); 942invalid: 943 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 944 "Invalid LHa header"); 945 return (ARCHIVE_FATAL); 946} 947 948/* 949 * Header 2 format 950 * 951 * +0 +2 +7 +11 +15 952 * +---------------+----------------+-------------------+-----------------+ 953 * |header size(*1)|compression type|compressed size(*2)|uncompressed size| 954 * +---------------+----------------+-------------------+-----------------+ 955 * <--------------------------------(*1)---------------------------------* 956 * 957 * +15 +19 +20 +21 +23 +24 958 * +-----------------+------------+----------------+----------+-----------+ 959 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator | 960 * +-----------------+------------+----------------+----------+-----------+ 961 * *---------------------------------(*1)---------------------------------* 962 * 963 * +24 +26 +26+(*3) +26+(*3)+(*4) 964 * +----------------+-------------------+-------------+-------------------+ 965 * |next header size|extended header(*3)| padding(*4) | compressed data | 966 * +----------------+-------------------+-------------+-------------------+ 967 * *--------------------------(*1)-------------------> <------(*2)-------> 968 * 969 */ 970#define H2_HEADER_SIZE_OFFSET 0 971#define H2_COMP_SIZE_OFFSET 7 972#define H2_ORIG_SIZE_OFFSET 11 973#define H2_TIME_OFFSET 15 974#define H2_CRC_OFFSET 21 975#define H2_FIXED_SIZE 24 976static int 977lha_read_file_header_2(struct archive_read *a, struct lha *lha) 978{ 979 const unsigned char *p; 980 size_t extdsize; 981 int err, padding; 982 uint16_t header_crc; 983 984 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL) 985 return (truncated_error(a)); 986 987 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET); 988 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET); 989 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET); 990 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET); 991 lha->crc = archive_le16dec(p + H2_CRC_OFFSET); 992 lha->setflag |= CRC_IS_SET; 993 994 if (lha->header_size < H2_FIXED_SIZE) { 995 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 996 "Invalid LHa header size"); 997 return (ARCHIVE_FATAL); 998 } 999 1000 header_crc = lha_crc16(0, p, H2_FIXED_SIZE); 1001 __archive_read_consume(a, H2_FIXED_SIZE); 1002 1003 /* Read extended headers */ 1004 err = lha_read_file_extended_header(a, lha, &header_crc, 2, 1005 lha->header_size - H2_FIXED_SIZE, &extdsize); 1006 if (err < ARCHIVE_WARN) 1007 return (err); 1008 1009 /* Calculate a padding size. The result will be normally 0 or 1. */ 1010 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize); 1011 if (padding > 0) { 1012 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL) 1013 return (truncated_error(a)); 1014 header_crc = lha_crc16(header_crc, p, padding); 1015 __archive_read_consume(a, padding); 1016 } 1017 1018 if (header_crc != lha->header_crc) { 1019 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1020 "LHa header CRC error"); 1021 return (ARCHIVE_FATAL); 1022 } 1023 return (err); 1024} 1025 1026/* 1027 * Header 3 format 1028 * 1029 * +0 +2 +7 +11 +15 1030 * +------------+----------------+-------------------+-----------------+ 1031 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size| 1032 * +------------+----------------+-------------------+-----------------+ 1033 * <-------------------------------(*1)-------------------------------* 1034 * 1035 * +15 +19 +20 +21 +23 +24 1036 * +-----------------+------------+----------------+----------+-----------+ 1037 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator | 1038 * +-----------------+------------+----------------+----------+-----------+ 1039 * *--------------------------------(*1)----------------------------------* 1040 * 1041 * +24 +28 +32 +32+(*3) 1042 * +---------------+----------------+-------------------+-----------------+ 1043 * |header size(*1)|next header size|extended header(*3)| compressed data | 1044 * +---------------+----------------+-------------------+-----------------+ 1045 * *------------------------(*1)-----------------------> <------(*2)-----> 1046 * 1047 */ 1048#define H3_FIELD_LEN_OFFSET 0 1049#define H3_COMP_SIZE_OFFSET 7 1050#define H3_ORIG_SIZE_OFFSET 11 1051#define H3_TIME_OFFSET 15 1052#define H3_CRC_OFFSET 21 1053#define H3_HEADER_SIZE_OFFSET 24 1054#define H3_FIXED_SIZE 28 1055static int 1056lha_read_file_header_3(struct archive_read *a, struct lha *lha) 1057{ 1058 const unsigned char *p; 1059 size_t extdsize; 1060 int err; 1061 uint16_t header_crc; 1062 1063 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL) 1064 return (truncated_error(a)); 1065 1066 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4) 1067 goto invalid; 1068 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET); 1069 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET); 1070 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET); 1071 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET); 1072 lha->crc = archive_le16dec(p + H3_CRC_OFFSET); 1073 lha->setflag |= CRC_IS_SET; 1074 1075 if (lha->header_size < H3_FIXED_SIZE + 4) 1076 goto invalid; 1077 header_crc = lha_crc16(0, p, H3_FIXED_SIZE); 1078 __archive_read_consume(a, H3_FIXED_SIZE); 1079 1080 /* Read extended headers */ 1081 err = lha_read_file_extended_header(a, lha, &header_crc, 4, 1082 lha->header_size - H3_FIXED_SIZE, &extdsize); 1083 if (err < ARCHIVE_WARN) 1084 return (err); 1085 1086 if (header_crc != lha->header_crc) { 1087 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1088 "LHa header CRC error"); 1089 return (ARCHIVE_FATAL); 1090 } 1091 return (err); 1092invalid: 1093 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1094 "Invalid LHa header"); 1095 return (ARCHIVE_FATAL); 1096} 1097 1098/* 1099 * Extended header format 1100 * 1101 * +0 +2 +3 -- used in header 1 and 2 1102 * +0 +4 +5 -- used in header 3 1103 * +--------------+---------+-------------------+--------------+-- 1104 * |ex-header size|header id| data |ex-header size| ....... 1105 * +--------------+---------+-------------------+--------------+-- 1106 * <-------------( ex-header size)------------> <-- next extended header --* 1107 * 1108 * If the ex-header size is zero, it is the make of the end of extended 1109 * headers. 1110 * 1111 */ 1112static int 1113lha_read_file_extended_header(struct archive_read *a, struct lha *lha, 1114 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) 1115{ 1116 const void *h; 1117 const unsigned char *extdheader; 1118 size_t extdsize; 1119 size_t datasize; 1120 unsigned int i; 1121 unsigned char extdtype; 1122 1123#define EXT_HEADER_CRC 0x00 /* Header CRC and information*/ 1124#define EXT_FILENAME 0x01 /* Filename */ 1125#define EXT_DIRECTORY 0x02 /* Directory name */ 1126#define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */ 1127#define EXT_TIMESTAMP 0x41 /* Windows time stamp */ 1128#define EXT_FILESIZE 0x42 /* Large file size */ 1129#define EXT_TIMEZONE 0x43 /* Time zone */ 1130#define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */ 1131#define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */ 1132#define EXT_CODEPAGE 0x46 /* Codepage */ 1133#define EXT_UNIX_MODE 0x50 /* File permission */ 1134#define EXT_UNIX_GID_UID 0x51 /* gid,uid */ 1135#define EXT_UNIX_GNAME 0x52 /* Group name */ 1136#define EXT_UNIX_UNAME 0x53 /* User name */ 1137#define EXT_UNIX_MTIME 0x54 /* Modified time */ 1138#define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */ 1139#define EXT_NEW_ATTR 0xff /* new attribute */ 1140 1141 *total_size = sizefield_length; 1142 1143 for (;;) { 1144 /* Read an extended header size. */ 1145 if ((h = 1146 __archive_read_ahead(a, sizefield_length, NULL)) == NULL) 1147 return (truncated_error(a)); 1148 /* Check if the size is the zero indicates the end of the 1149 * extended header. */ 1150 if (sizefield_length == sizeof(uint16_t)) 1151 extdsize = archive_le16dec(h); 1152 else 1153 extdsize = archive_le32dec(h); 1154 if (extdsize == 0) { 1155 /* End of extended header */ 1156 if (crc != NULL) 1157 *crc = lha_crc16(*crc, h, sizefield_length); 1158 __archive_read_consume(a, sizefield_length); 1159 return (ARCHIVE_OK); 1160 } 1161 1162 /* Sanity check to the extended header size. */ 1163 if (((uint64_t)*total_size + extdsize) > 1164 (uint64_t)limitsize || 1165 extdsize <= (size_t)sizefield_length) 1166 goto invalid; 1167 1168 /* Read the extended header. */ 1169 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL) 1170 return (truncated_error(a)); 1171 *total_size += extdsize; 1172 1173 extdheader = (const unsigned char *)h; 1174 /* Get the extended header type. */ 1175 extdtype = extdheader[sizefield_length]; 1176 /* Calculate an extended data size. */ 1177 datasize = extdsize - (1 + sizefield_length); 1178 /* Skip an extended header size field and type field. */ 1179 extdheader += sizefield_length + 1; 1180 1181 if (crc != NULL && extdtype != EXT_HEADER_CRC) 1182 *crc = lha_crc16(*crc, h, extdsize); 1183 switch (extdtype) { 1184 case EXT_HEADER_CRC: 1185 /* We only use a header CRC. Following data will not 1186 * be used. */ 1187 if (datasize >= 2) { 1188 lha->header_crc = archive_le16dec(extdheader); 1189 if (crc != NULL) { 1190 static const char zeros[2] = {0, 0}; 1191 *crc = lha_crc16(*crc, h, 1192 extdsize - datasize); 1193 /* CRC value itself as zero */ 1194 *crc = lha_crc16(*crc, zeros, 2); 1195 *crc = lha_crc16(*crc, 1196 extdheader+2, datasize - 2); 1197 } 1198 } 1199 break; 1200 case EXT_FILENAME: 1201 if (datasize == 0) { 1202 /* maybe directory header */ 1203 archive_string_empty(&lha->filename); 1204 break; 1205 } 1206 if (extdheader[0] == '\0') 1207 goto invalid; 1208 archive_strncpy(&lha->filename, 1209 (const char *)extdheader, datasize); 1210 break; 1211 case EXT_DIRECTORY: 1212 if (datasize == 0 || extdheader[0] == '\0') 1213 /* no directory name data. exit this case. */ 1214 goto invalid; 1215 1216 archive_strncpy(&lha->dirname, 1217 (const char *)extdheader, datasize); 1218 /* 1219 * Convert directory delimiter from 0xFF 1220 * to '/' for local system. 1221 */ 1222 for (i = 0; i < lha->dirname.length; i++) { 1223 if ((unsigned char)lha->dirname.s[i] == 0xFF) 1224 lha->dirname.s[i] = '/'; 1225 } 1226 /* Is last character directory separator? */ 1227 if (lha->dirname.s[lha->dirname.length-1] != '/') 1228 /* invalid directory data */ 1229 goto invalid; 1230 break; 1231 case EXT_DOS_ATTR: 1232 if (datasize == 2) 1233 lha->dos_attr = (unsigned char) 1234 (archive_le16dec(extdheader) & 0xff); 1235 break; 1236 case EXT_TIMESTAMP: 1237 if (datasize == (sizeof(uint64_t) * 3)) { 1238 lha->birthtime = lha_win_time( 1239 archive_le64dec(extdheader), 1240 &lha->birthtime_tv_nsec); 1241 extdheader += sizeof(uint64_t); 1242 lha->mtime = lha_win_time( 1243 archive_le64dec(extdheader), 1244 &lha->mtime_tv_nsec); 1245 extdheader += sizeof(uint64_t); 1246 lha->atime = lha_win_time( 1247 archive_le64dec(extdheader), 1248 &lha->atime_tv_nsec); 1249 lha->setflag |= BIRTHTIME_IS_SET | 1250 ATIME_IS_SET; 1251 } 1252 break; 1253 case EXT_FILESIZE: 1254 if (datasize == sizeof(uint64_t) * 2) { 1255 lha->compsize = archive_le64dec(extdheader); 1256 extdheader += sizeof(uint64_t); 1257 lha->origsize = archive_le64dec(extdheader); 1258 } 1259 break; 1260 case EXT_CODEPAGE: 1261 /* Get an archived filename charset from codepage. 1262 * This overwrites the charset specified by 1263 * hdrcharset option. */ 1264 if (datasize == sizeof(uint32_t)) { 1265 struct archive_string cp; 1266 const char *charset; 1267 1268 archive_string_init(&cp); 1269 switch (archive_le32dec(extdheader)) { 1270 case 65001: /* UTF-8 */ 1271 charset = "UTF-8"; 1272 break; 1273 default: 1274 archive_string_sprintf(&cp, "CP%d", 1275 (int)archive_le32dec(extdheader)); 1276 charset = cp.s; 1277 break; 1278 } 1279 lha->sconv = 1280 archive_string_conversion_from_charset( 1281 &(a->archive), charset, 1); 1282 archive_string_free(&cp); 1283 if (lha->sconv == NULL) 1284 return (ARCHIVE_FATAL); 1285 } 1286 break; 1287 case EXT_UNIX_MODE: 1288 if (datasize == sizeof(uint16_t)) { 1289 lha->mode = archive_le16dec(extdheader); 1290 lha->setflag |= UNIX_MODE_IS_SET; 1291 } 1292 break; 1293 case EXT_UNIX_GID_UID: 1294 if (datasize == (sizeof(uint16_t) * 2)) { 1295 lha->gid = archive_le16dec(extdheader); 1296 lha->uid = archive_le16dec(extdheader+2); 1297 } 1298 break; 1299 case EXT_UNIX_GNAME: 1300 if (datasize > 0) 1301 archive_strncpy(&lha->gname, 1302 (const char *)extdheader, datasize); 1303 break; 1304 case EXT_UNIX_UNAME: 1305 if (datasize > 0) 1306 archive_strncpy(&lha->uname, 1307 (const char *)extdheader, datasize); 1308 break; 1309 case EXT_UNIX_MTIME: 1310 if (datasize == sizeof(uint32_t)) 1311 lha->mtime = archive_le32dec(extdheader); 1312 break; 1313 case EXT_OS2_NEW_ATTR: 1314 /* This extended header is OS/2 depend. */ 1315 if (datasize == 16) { 1316 lha->dos_attr = (unsigned char) 1317 (archive_le16dec(extdheader) & 0xff); 1318 lha->mode = archive_le16dec(extdheader+2); 1319 lha->gid = archive_le16dec(extdheader+4); 1320 lha->uid = archive_le16dec(extdheader+6); 1321 lha->birthtime = archive_le32dec(extdheader+8); 1322 lha->atime = archive_le32dec(extdheader+12); 1323 lha->setflag |= UNIX_MODE_IS_SET 1324 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1325 } 1326 break; 1327 case EXT_NEW_ATTR: 1328 if (datasize == 20) { 1329 lha->mode = (mode_t)archive_le32dec(extdheader); 1330 lha->gid = archive_le32dec(extdheader+4); 1331 lha->uid = archive_le32dec(extdheader+8); 1332 lha->birthtime = archive_le32dec(extdheader+12); 1333 lha->atime = archive_le32dec(extdheader+16); 1334 lha->setflag |= UNIX_MODE_IS_SET 1335 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1336 } 1337 break; 1338 case EXT_TIMEZONE: /* Not supported */ 1339 case EXT_UTF16_FILENAME: /* Not supported */ 1340 case EXT_UTF16_DIRECTORY: /* Not supported */ 1341 default: 1342 break; 1343 } 1344 1345 __archive_read_consume(a, extdsize); 1346 } 1347invalid: 1348 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1349 "Invalid extended LHa header"); 1350 return (ARCHIVE_FATAL); 1351} 1352 1353static int 1354lha_end_of_entry(struct archive_read *a) 1355{ 1356 struct lha *lha = (struct lha *)(a->format->data); 1357 int r = ARCHIVE_EOF; 1358 1359 if (!lha->end_of_entry_cleanup) { 1360 if ((lha->setflag & CRC_IS_SET) && 1361 lha->crc != lha->entry_crc_calculated) { 1362 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1363 "LHa data CRC error"); 1364 r = ARCHIVE_WARN; 1365 } 1366 1367 /* End-of-entry cleanup done. */ 1368 lha->end_of_entry_cleanup = 1; 1369 } 1370 return (r); 1371} 1372 1373static int 1374archive_read_format_lha_read_data(struct archive_read *a, 1375 const void **buff, size_t *size, int64_t *offset) 1376{ 1377 struct lha *lha = (struct lha *)(a->format->data); 1378 int r; 1379 1380 if (lha->entry_unconsumed) { 1381 /* Consume as much as the decompressor actually used. */ 1382 __archive_read_consume(a, lha->entry_unconsumed); 1383 lha->entry_unconsumed = 0; 1384 } 1385 if (lha->end_of_entry) { 1386 *offset = lha->entry_offset; 1387 *size = 0; 1388 *buff = NULL; 1389 return (lha_end_of_entry(a)); 1390 } 1391 1392 if (lha->entry_is_compressed) 1393 r = lha_read_data_lzh(a, buff, size, offset); 1394 else 1395 /* No compression. */ 1396 r = lha_read_data_none(a, buff, size, offset); 1397 return (r); 1398} 1399 1400/* 1401 * Read a file content in no compression. 1402 * 1403 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1404 * lha->end_of_entry if it consumes all of the data. 1405 */ 1406static int 1407lha_read_data_none(struct archive_read *a, const void **buff, 1408 size_t *size, int64_t *offset) 1409{ 1410 struct lha *lha = (struct lha *)(a->format->data); 1411 ssize_t bytes_avail; 1412 1413 if (lha->entry_bytes_remaining == 0) { 1414 *buff = NULL; 1415 *size = 0; 1416 *offset = lha->entry_offset; 1417 lha->end_of_entry = 1; 1418 return (ARCHIVE_OK); 1419 } 1420 /* 1421 * Note: '1' here is a performance optimization. 1422 * Recall that the decompression layer returns a count of 1423 * available bytes; asking for more than that forces the 1424 * decompressor to combine reads by copying data. 1425 */ 1426 *buff = __archive_read_ahead(a, 1, &bytes_avail); 1427 if (bytes_avail <= 0) { 1428 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1429 "Truncated LHa file data"); 1430 return (ARCHIVE_FATAL); 1431 } 1432 if (bytes_avail > lha->entry_bytes_remaining) 1433 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1434 lha->entry_crc_calculated = 1435 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail); 1436 *size = bytes_avail; 1437 *offset = lha->entry_offset; 1438 lha->entry_offset += bytes_avail; 1439 lha->entry_bytes_remaining -= bytes_avail; 1440 if (lha->entry_bytes_remaining == 0) 1441 lha->end_of_entry = 1; 1442 lha->entry_unconsumed = bytes_avail; 1443 return (ARCHIVE_OK); 1444} 1445 1446/* 1447 * Read a file content in LZHUFF encoding. 1448 * 1449 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is 1450 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes 1451 * all of the data. 1452 */ 1453static int 1454lha_read_data_lzh(struct archive_read *a, const void **buff, 1455 size_t *size, int64_t *offset) 1456{ 1457 struct lha *lha = (struct lha *)(a->format->data); 1458 ssize_t bytes_avail; 1459 int r; 1460 1461 /* If we haven't yet read any data, initialize the decompressor. */ 1462 if (!lha->decompress_init) { 1463 r = lzh_decode_init(&(lha->strm), lha->method); 1464 switch (r) { 1465 case ARCHIVE_OK: 1466 break; 1467 case ARCHIVE_FAILED: 1468 /* Unsupported compression. */ 1469 *buff = NULL; 1470 *size = 0; 1471 *offset = 0; 1472 archive_set_error(&a->archive, 1473 ARCHIVE_ERRNO_FILE_FORMAT, 1474 "Unsupported lzh compression method -%c%c%c-", 1475 lha->method[0], lha->method[1], lha->method[2]); 1476 /* We know compressed size; just skip it. */ 1477 archive_read_format_lha_read_data_skip(a); 1478 return (ARCHIVE_WARN); 1479 default: 1480 archive_set_error(&a->archive, ENOMEM, 1481 "Couldn't allocate memory " 1482 "for lzh decompression"); 1483 return (ARCHIVE_FATAL); 1484 } 1485 /* We've initialized decompression for this stream. */ 1486 lha->decompress_init = 1; 1487 lha->strm.avail_out = 0; 1488 lha->strm.total_out = 0; 1489 } 1490 1491 /* 1492 * Note: '1' here is a performance optimization. 1493 * Recall that the decompression layer returns a count of 1494 * available bytes; asking for more than that forces the 1495 * decompressor to combine reads by copying data. 1496 */ 1497 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail); 1498 if (bytes_avail <= 0) { 1499 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1500 "Truncated LHa file body"); 1501 return (ARCHIVE_FATAL); 1502 } 1503 if (bytes_avail > lha->entry_bytes_remaining) 1504 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1505 1506 lha->strm.avail_in = (int)bytes_avail; 1507 lha->strm.total_in = 0; 1508 lha->strm.avail_out = 0; 1509 1510 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining); 1511 switch (r) { 1512 case ARCHIVE_OK: 1513 break; 1514 case ARCHIVE_EOF: 1515 lha->end_of_entry = 1; 1516 break; 1517 default: 1518 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1519 "Bad lzh data"); 1520 return (ARCHIVE_FAILED); 1521 } 1522 lha->entry_unconsumed = lha->strm.total_in; 1523 lha->entry_bytes_remaining -= lha->strm.total_in; 1524 1525 if (lha->strm.avail_out) { 1526 *offset = lha->entry_offset; 1527 *size = lha->strm.avail_out; 1528 *buff = lha->strm.ref_ptr; 1529 lha->entry_crc_calculated = 1530 lha_crc16(lha->entry_crc_calculated, *buff, *size); 1531 lha->entry_offset += *size; 1532 } else { 1533 *offset = lha->entry_offset; 1534 *size = 0; 1535 *buff = NULL; 1536 if (lha->end_of_entry) 1537 return (lha_end_of_entry(a)); 1538 } 1539 return (ARCHIVE_OK); 1540} 1541 1542/* 1543 * Skip a file content. 1544 */ 1545static int 1546archive_read_format_lha_read_data_skip(struct archive_read *a) 1547{ 1548 struct lha *lha; 1549 int64_t bytes_skipped; 1550 1551 lha = (struct lha *)(a->format->data); 1552 1553 if (lha->entry_unconsumed) { 1554 /* Consume as much as the decompressor actually used. */ 1555 __archive_read_consume(a, lha->entry_unconsumed); 1556 lha->entry_unconsumed = 0; 1557 } 1558 1559 /* if we've already read to end of data, we're done. */ 1560 if (lha->end_of_entry_cleanup) 1561 return (ARCHIVE_OK); 1562 1563 /* 1564 * If the length is at the beginning, we can skip the 1565 * compressed data much more quickly. 1566 */ 1567 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining); 1568 if (bytes_skipped < 0) 1569 return (ARCHIVE_FATAL); 1570 1571 /* This entry is finished and done. */ 1572 lha->end_of_entry_cleanup = lha->end_of_entry = 1; 1573 return (ARCHIVE_OK); 1574} 1575 1576static int 1577archive_read_format_lha_cleanup(struct archive_read *a) 1578{ 1579 struct lha *lha = (struct lha *)(a->format->data); 1580 1581 lzh_decode_free(&(lha->strm)); 1582 archive_string_free(&(lha->dirname)); 1583 archive_string_free(&(lha->filename)); 1584 archive_string_free(&(lha->uname)); 1585 archive_string_free(&(lha->gname)); 1586 archive_wstring_free(&(lha->ws)); 1587 free(lha); 1588 (a->format->data) = NULL; 1589 return (ARCHIVE_OK); 1590} 1591 1592/* 1593 * 'LHa for UNIX' utility has archived a symbolic-link name after 1594 * a pathname with '|' character. 1595 * This function extracts the symbolic-link name from the pathname. 1596 * 1597 * example. 1598 * 1. a symbolic-name is 'aaa/bb/cc' 1599 * 2. a filename is 'xxx/bbb' 1600 * then a archived pathname is 'xxx/bbb|aaa/bb/cc' 1601 */ 1602static int 1603lha_parse_linkname(struct archive_string *linkname, 1604 struct archive_string *pathname) 1605{ 1606 char * linkptr; 1607 size_t symlen; 1608 1609 linkptr = strchr(pathname->s, '|'); 1610 if (linkptr != NULL) { 1611 symlen = strlen(linkptr + 1); 1612 archive_strncpy(linkname, linkptr+1, symlen); 1613 1614 *linkptr = 0; 1615 pathname->length = strlen(pathname->s); 1616 1617 return (1); 1618 } 1619 return (0); 1620} 1621 1622/* Convert an MSDOS-style date/time into Unix-style time. */ 1623static time_t 1624lha_dos_time(const unsigned char *p) 1625{ 1626 int msTime, msDate; 1627 struct tm ts; 1628 1629 msTime = archive_le16dec(p); 1630 msDate = archive_le16dec(p+2); 1631 1632 memset(&ts, 0, sizeof(ts)); 1633 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1634 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1635 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1636 ts.tm_hour = (msTime >> 11) & 0x1f; 1637 ts.tm_min = (msTime >> 5) & 0x3f; 1638 ts.tm_sec = (msTime << 1) & 0x3e; 1639 ts.tm_isdst = -1; 1640 return (mktime(&ts)); 1641} 1642 1643/* Convert an MS-Windows-style date/time into Unix-style time. */ 1644static time_t 1645lha_win_time(uint64_t wintime, long *ns) 1646{ 1647#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000) 1648 1649 if (wintime >= EPOC_TIME) { 1650 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */ 1651 if (ns != NULL) 1652 *ns = (long)(wintime % 10000000) * 100; 1653 return (wintime / 10000000); 1654 } else { 1655 if (ns != NULL) 1656 *ns = 0; 1657 return (0); 1658 } 1659} 1660 1661static unsigned char 1662lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size) 1663{ 1664 unsigned char const *p = (unsigned char const *)pp; 1665 1666 p += offset; 1667 for (;size > 0; --size) 1668 sum += *p++; 1669 return (sum); 1670} 1671 1672static uint16_t crc16tbl[2][256]; 1673static void 1674lha_crc16_init(void) 1675{ 1676 unsigned int i; 1677 static int crc16init = 0; 1678 1679 if (crc16init) 1680 return; 1681 crc16init = 1; 1682 1683 for (i = 0; i < 256; i++) { 1684 unsigned int j; 1685 uint16_t crc = (uint16_t)i; 1686 for (j = 8; j; j--) 1687 crc = (crc >> 1) ^ ((crc & 1) * 0xA001); 1688 crc16tbl[0][i] = crc; 1689 } 1690 1691 for (i = 0; i < 256; i++) { 1692 crc16tbl[1][i] = (crc16tbl[0][i] >> 8) 1693 ^ crc16tbl[0][crc16tbl[0][i] & 0xff]; 1694 } 1695} 1696 1697static uint16_t 1698lha_crc16(uint16_t crc, const void *pp, size_t len) 1699{ 1700 const unsigned char *p = (const unsigned char *)pp; 1701 const uint16_t *buff; 1702 const union { 1703 uint32_t i; 1704 char c[4]; 1705 } u = { 0x01020304 }; 1706 1707 if (len == 0) 1708 return crc; 1709 1710 /* Process unaligned address. */ 1711 if (((uintptr_t)p) & (uintptr_t)0x1) { 1712 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1713 len--; 1714 } 1715 buff = (const uint16_t *)p; 1716 /* 1717 * Modern C compiler such as GCC does not unroll automatically yet 1718 * without unrolling pragma, and Clang is so. So we should 1719 * unroll this loop for its performance. 1720 */ 1721 for (;len >= 8; len -= 8) { 1722 /* This if statement expects compiler optimization will 1723 * remove the statement which will not be executed. */ 1724#undef bswap16 1725#if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ 1726# define bswap16(x) _byteswap_ushort(x) 1727#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) 1728/* GCC 4.8 and later has __builtin_bswap16() */ 1729# define bswap16(x) __builtin_bswap16(x) 1730#elif defined(__clang__) 1731/* All clang versions have __builtin_bswap16() */ 1732# define bswap16(x) __builtin_bswap16(x) 1733#else 1734# define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8)) 1735#endif 1736#define CRC16W do { \ 1737 if(u.c[0] == 1) { /* Big endian */ \ 1738 crc ^= bswap16(*buff); buff++; \ 1739 } else \ 1740 crc ^= *buff++; \ 1741 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\ 1742} while (0) 1743 CRC16W; 1744 CRC16W; 1745 CRC16W; 1746 CRC16W; 1747#undef CRC16W 1748#undef bswap16 1749 } 1750 1751 p = (const unsigned char *)buff; 1752 for (;len; len--) { 1753 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1754 } 1755 return crc; 1756} 1757 1758/* 1759 * Initialize LZHUF decoder. 1760 * 1761 * Returns ARCHIVE_OK if initialization was successful. 1762 * Returns ARCHIVE_FAILED if method is unsupported. 1763 * Returns ARCHIVE_FATAL if initialization failed; memory allocation 1764 * error occurred. 1765 */ 1766static int 1767lzh_decode_init(struct lzh_stream *strm, const char *method) 1768{ 1769 struct lzh_dec *ds; 1770 int w_bits, w_size; 1771 1772 if (strm->ds == NULL) { 1773 strm->ds = calloc(1, sizeof(*strm->ds)); 1774 if (strm->ds == NULL) 1775 return (ARCHIVE_FATAL); 1776 } 1777 ds = strm->ds; 1778 ds->error = ARCHIVE_FAILED; 1779 if (method == NULL || method[0] != 'l' || method[1] != 'h') 1780 return (ARCHIVE_FAILED); 1781 switch (method[2]) { 1782 case '5': 1783 w_bits = 13;/* 8KiB for window */ 1784 break; 1785 case '6': 1786 w_bits = 15;/* 32KiB for window */ 1787 break; 1788 case '7': 1789 w_bits = 16;/* 64KiB for window */ 1790 break; 1791 default: 1792 return (ARCHIVE_FAILED);/* Not supported. */ 1793 } 1794 ds->error = ARCHIVE_FATAL; 1795 /* Expand a window size up to 128 KiB for decompressing process 1796 * performance whatever its original window size is. */ 1797 ds->w_size = 1U << 17; 1798 ds->w_mask = ds->w_size -1; 1799 if (ds->w_buff == NULL) { 1800 ds->w_buff = malloc(ds->w_size); 1801 if (ds->w_buff == NULL) 1802 return (ARCHIVE_FATAL); 1803 } 1804 w_size = 1U << w_bits; 1805 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size); 1806 ds->w_pos = 0; 1807 ds->state = 0; 1808 ds->pos_pt_len_size = w_bits + 1; 1809 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4; 1810 ds->literal_pt_len_size = PT_BITLEN_SIZE; 1811 ds->literal_pt_len_bits = 5; 1812 ds->br.cache_buffer = 0; 1813 ds->br.cache_avail = 0; 1814 1815 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16) 1816 != ARCHIVE_OK) 1817 return (ARCHIVE_FATAL); 1818 ds->lt.len_bits = 9; 1819 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16) 1820 != ARCHIVE_OK) 1821 return (ARCHIVE_FATAL); 1822 ds->error = 0; 1823 1824 return (ARCHIVE_OK); 1825} 1826 1827/* 1828 * Release LZHUF decoder. 1829 */ 1830static void 1831lzh_decode_free(struct lzh_stream *strm) 1832{ 1833 1834 if (strm->ds == NULL) 1835 return; 1836 free(strm->ds->w_buff); 1837 lzh_huffman_free(&(strm->ds->lt)); 1838 lzh_huffman_free(&(strm->ds->pt)); 1839 free(strm->ds); 1840 strm->ds = NULL; 1841} 1842 1843/* 1844 * Bit stream reader. 1845 */ 1846/* Check that the cache buffer has enough bits. */ 1847#define lzh_br_has(br, n) ((br)->cache_avail >= n) 1848/* Get compressed data by bit. */ 1849#define lzh_br_bits(br, n) \ 1850 (((uint16_t)((br)->cache_buffer >> \ 1851 ((br)->cache_avail - (n)))) & cache_masks[n]) 1852#define lzh_br_bits_forced(br, n) \ 1853 (((uint16_t)((br)->cache_buffer << \ 1854 ((n) - (br)->cache_avail))) & cache_masks[n]) 1855/* Read ahead to make sure the cache buffer has enough compressed data we 1856 * will use. 1857 * True : completed, there is enough data in the cache buffer. 1858 * False : we met that strm->next_in is empty, we have to get following 1859 * bytes. */ 1860#define lzh_br_read_ahead_0(strm, br, n) \ 1861 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br)) 1862/* True : the cache buffer has some bits as much as we need. 1863 * False : there are no enough bits in the cache buffer to be used, 1864 * we have to get following bytes if we could. */ 1865#define lzh_br_read_ahead(strm, br, n) \ 1866 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n))) 1867 1868/* Notify how many bits we consumed. */ 1869#define lzh_br_consume(br, n) ((br)->cache_avail -= (n)) 1870#define lzh_br_unconsume(br, n) ((br)->cache_avail += (n)) 1871 1872static const uint16_t cache_masks[] = { 1873 0x0000, 0x0001, 0x0003, 0x0007, 1874 0x000F, 0x001F, 0x003F, 0x007F, 1875 0x00FF, 0x01FF, 0x03FF, 0x07FF, 1876 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 1877 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF 1878}; 1879 1880/* 1881 * Shift away used bits in the cache data and fill it up with following bits. 1882 * Call this when cache buffer does not have enough bits you need. 1883 * 1884 * Returns 1 if the cache buffer is full. 1885 * Returns 0 if the cache buffer is not full; input buffer is empty. 1886 */ 1887static int 1888lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br) 1889{ 1890 int n = CACHE_BITS - br->cache_avail; 1891 1892 for (;;) { 1893 const int x = n >> 3; 1894 if (strm->avail_in >= x) { 1895 switch (x) { 1896 case 8: 1897 br->cache_buffer = 1898 ((uint64_t)strm->next_in[0]) << 56 | 1899 ((uint64_t)strm->next_in[1]) << 48 | 1900 ((uint64_t)strm->next_in[2]) << 40 | 1901 ((uint64_t)strm->next_in[3]) << 32 | 1902 ((uint32_t)strm->next_in[4]) << 24 | 1903 ((uint32_t)strm->next_in[5]) << 16 | 1904 ((uint32_t)strm->next_in[6]) << 8 | 1905 (uint32_t)strm->next_in[7]; 1906 strm->next_in += 8; 1907 strm->avail_in -= 8; 1908 br->cache_avail += 8 * 8; 1909 return (1); 1910 case 7: 1911 br->cache_buffer = 1912 (br->cache_buffer << 56) | 1913 ((uint64_t)strm->next_in[0]) << 48 | 1914 ((uint64_t)strm->next_in[1]) << 40 | 1915 ((uint64_t)strm->next_in[2]) << 32 | 1916 ((uint32_t)strm->next_in[3]) << 24 | 1917 ((uint32_t)strm->next_in[4]) << 16 | 1918 ((uint32_t)strm->next_in[5]) << 8 | 1919 (uint32_t)strm->next_in[6]; 1920 strm->next_in += 7; 1921 strm->avail_in -= 7; 1922 br->cache_avail += 7 * 8; 1923 return (1); 1924 case 6: 1925 br->cache_buffer = 1926 (br->cache_buffer << 48) | 1927 ((uint64_t)strm->next_in[0]) << 40 | 1928 ((uint64_t)strm->next_in[1]) << 32 | 1929 ((uint32_t)strm->next_in[2]) << 24 | 1930 ((uint32_t)strm->next_in[3]) << 16 | 1931 ((uint32_t)strm->next_in[4]) << 8 | 1932 (uint32_t)strm->next_in[5]; 1933 strm->next_in += 6; 1934 strm->avail_in -= 6; 1935 br->cache_avail += 6 * 8; 1936 return (1); 1937 case 0: 1938 /* We have enough compressed data in 1939 * the cache buffer.*/ 1940 return (1); 1941 default: 1942 break; 1943 } 1944 } 1945 if (strm->avail_in == 0) { 1946 /* There is not enough compressed data to fill up the 1947 * cache buffer. */ 1948 return (0); 1949 } 1950 br->cache_buffer = 1951 (br->cache_buffer << 8) | *strm->next_in++; 1952 strm->avail_in--; 1953 br->cache_avail += 8; 1954 n -= 8; 1955 } 1956} 1957 1958/* 1959 * Decode LZHUF. 1960 * 1961 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. 1962 * Please set available buffer and call this function again. 1963 * 2. Returns ARCHIVE_EOF if decompression has been completed. 1964 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data 1965 * is broken or you do not set 'last' flag properly. 1966 * 4. 'last' flag is very important, you must set 1 to the flag if there 1967 * is no input data. The lha compressed data format does not provide how 1968 * to know the compressed data is really finished. 1969 * Note: lha command utility check if the total size of output bytes is 1970 * reached the uncompressed size recorded in its header. it does not mind 1971 * that the decoding process is properly finished. 1972 * GNU ZIP can decompress another compressed file made by SCO LZH compress. 1973 * it handles EOF as null to fill read buffer with zero until the decoding 1974 * process meet 2 bytes of zeros at reading a size of a next chunk, so the 1975 * zeros are treated as the mark of the end of the data although the zeros 1976 * is dummy, not the file data. 1977 */ 1978static int lzh_read_blocks(struct lzh_stream *, int); 1979static int lzh_decode_blocks(struct lzh_stream *, int); 1980#define ST_RD_BLOCK 0 1981#define ST_RD_PT_1 1 1982#define ST_RD_PT_2 2 1983#define ST_RD_PT_3 3 1984#define ST_RD_PT_4 4 1985#define ST_RD_LITERAL_1 5 1986#define ST_RD_LITERAL_2 6 1987#define ST_RD_LITERAL_3 7 1988#define ST_RD_POS_DATA_1 8 1989#define ST_GET_LITERAL 9 1990#define ST_GET_POS_1 10 1991#define ST_GET_POS_2 11 1992#define ST_COPY_DATA 12 1993 1994static int 1995lzh_decode(struct lzh_stream *strm, int last) 1996{ 1997 struct lzh_dec *ds = strm->ds; 1998 int avail_in; 1999 int r; 2000 2001 if (ds->error) 2002 return (ds->error); 2003 2004 avail_in = strm->avail_in; 2005 do { 2006 if (ds->state < ST_GET_LITERAL) 2007 r = lzh_read_blocks(strm, last); 2008 else 2009 r = lzh_decode_blocks(strm, last); 2010 } while (r == 100); 2011 strm->total_in += avail_in - strm->avail_in; 2012 return (r); 2013} 2014 2015static void 2016lzh_emit_window(struct lzh_stream *strm, size_t s) 2017{ 2018 strm->ref_ptr = strm->ds->w_buff; 2019 strm->avail_out = (int)s; 2020 strm->total_out += s; 2021} 2022 2023static int 2024lzh_read_blocks(struct lzh_stream *strm, int last) 2025{ 2026 struct lzh_dec *ds = strm->ds; 2027 struct lzh_br *br = &(ds->br); 2028 int c = 0, i; 2029 unsigned rbits; 2030 2031 for (;;) { 2032 switch (ds->state) { 2033 case ST_RD_BLOCK: 2034 /* 2035 * Read a block number indicates how many blocks 2036 * we will handle. The block is composed of a 2037 * literal and a match, sometimes a literal only 2038 * in particular, there are no reference data at 2039 * the beginning of the decompression. 2040 */ 2041 if (!lzh_br_read_ahead_0(strm, br, 16)) { 2042 if (!last) 2043 /* We need following data. */ 2044 return (ARCHIVE_OK); 2045 if (lzh_br_has(br, 8)) { 2046 /* 2047 * It seems there are extra bits. 2048 * 1. Compressed data is broken. 2049 * 2. `last' flag does not properly 2050 * set. 2051 */ 2052 goto failed; 2053 } 2054 if (ds->w_pos > 0) { 2055 lzh_emit_window(strm, ds->w_pos); 2056 ds->w_pos = 0; 2057 return (ARCHIVE_OK); 2058 } 2059 /* End of compressed data; we have completely 2060 * handled all compressed data. */ 2061 return (ARCHIVE_EOF); 2062 } 2063 ds->blocks_avail = lzh_br_bits(br, 16); 2064 if (ds->blocks_avail == 0) 2065 goto failed; 2066 lzh_br_consume(br, 16); 2067 /* 2068 * Read a literal table compressed in huffman 2069 * coding. 2070 */ 2071 ds->pt.len_size = ds->literal_pt_len_size; 2072 ds->pt.len_bits = ds->literal_pt_len_bits; 2073 ds->reading_position = 0; 2074 /* FALL THROUGH */ 2075 case ST_RD_PT_1: 2076 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are 2077 * used in reading both a literal table and a 2078 * position table. */ 2079 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) { 2080 if (last) 2081 goto failed;/* Truncated data. */ 2082 ds->state = ST_RD_PT_1; 2083 return (ARCHIVE_OK); 2084 } 2085 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits); 2086 lzh_br_consume(br, ds->pt.len_bits); 2087 /* FALL THROUGH */ 2088 case ST_RD_PT_2: 2089 if (ds->pt.len_avail == 0) { 2090 /* There is no bitlen. */ 2091 if (!lzh_br_read_ahead(strm, br, 2092 ds->pt.len_bits)) { 2093 if (last) 2094 goto failed;/* Truncated data.*/ 2095 ds->state = ST_RD_PT_2; 2096 return (ARCHIVE_OK); 2097 } 2098 if (!lzh_make_fake_table(&(ds->pt), 2099 lzh_br_bits(br, ds->pt.len_bits))) 2100 goto failed;/* Invalid data. */ 2101 lzh_br_consume(br, ds->pt.len_bits); 2102 if (ds->reading_position) 2103 ds->state = ST_GET_LITERAL; 2104 else 2105 ds->state = ST_RD_LITERAL_1; 2106 break; 2107 } else if (ds->pt.len_avail > ds->pt.len_size) 2108 goto failed;/* Invalid data. */ 2109 ds->loop = 0; 2110 memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); 2111 if (ds->pt.len_avail < 3 || 2112 ds->pt.len_size == ds->pos_pt_len_size) { 2113 ds->state = ST_RD_PT_4; 2114 break; 2115 } 2116 /* FALL THROUGH */ 2117 case ST_RD_PT_3: 2118 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3); 2119 if (ds->loop < 3) { 2120 if (ds->loop < 0 || last) 2121 goto failed;/* Invalid data. */ 2122 /* Not completed, get following data. */ 2123 ds->state = ST_RD_PT_3; 2124 return (ARCHIVE_OK); 2125 } 2126 /* There are some null in bitlen of the literal. */ 2127 if (!lzh_br_read_ahead(strm, br, 2)) { 2128 if (last) 2129 goto failed;/* Truncated data. */ 2130 ds->state = ST_RD_PT_3; 2131 return (ARCHIVE_OK); 2132 } 2133 c = lzh_br_bits(br, 2); 2134 lzh_br_consume(br, 2); 2135 if (c > ds->pt.len_avail - 3) 2136 goto failed;/* Invalid data. */ 2137 for (i = 3; c-- > 0 ;) 2138 ds->pt.bitlen[i++] = 0; 2139 ds->loop = i; 2140 /* FALL THROUGH */ 2141 case ST_RD_PT_4: 2142 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 2143 ds->pt.len_avail); 2144 if (ds->loop < ds->pt.len_avail) { 2145 if (ds->loop < 0 || last) 2146 goto failed;/* Invalid data. */ 2147 /* Not completed, get following data. */ 2148 ds->state = ST_RD_PT_4; 2149 return (ARCHIVE_OK); 2150 } 2151 if (!lzh_make_huffman_table(&(ds->pt))) 2152 goto failed;/* Invalid data */ 2153 if (ds->reading_position) { 2154 ds->state = ST_GET_LITERAL; 2155 break; 2156 } 2157 /* FALL THROUGH */ 2158 case ST_RD_LITERAL_1: 2159 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) { 2160 if (last) 2161 goto failed;/* Truncated data. */ 2162 ds->state = ST_RD_LITERAL_1; 2163 return (ARCHIVE_OK); 2164 } 2165 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits); 2166 lzh_br_consume(br, ds->lt.len_bits); 2167 /* FALL THROUGH */ 2168 case ST_RD_LITERAL_2: 2169 if (ds->lt.len_avail == 0) { 2170 /* There is no bitlen. */ 2171 if (!lzh_br_read_ahead(strm, br, 2172 ds->lt.len_bits)) { 2173 if (last) 2174 goto failed;/* Truncated data.*/ 2175 ds->state = ST_RD_LITERAL_2; 2176 return (ARCHIVE_OK); 2177 } 2178 if (!lzh_make_fake_table(&(ds->lt), 2179 lzh_br_bits(br, ds->lt.len_bits))) 2180 goto failed;/* Invalid data */ 2181 lzh_br_consume(br, ds->lt.len_bits); 2182 ds->state = ST_RD_POS_DATA_1; 2183 break; 2184 } else if (ds->lt.len_avail > ds->lt.len_size) 2185 goto failed;/* Invalid data */ 2186 ds->loop = 0; 2187 memset(ds->lt.freq, 0, sizeof(ds->lt.freq)); 2188 /* FALL THROUGH */ 2189 case ST_RD_LITERAL_3: 2190 i = ds->loop; 2191 while (i < ds->lt.len_avail) { 2192 if (!lzh_br_read_ahead(strm, br, 2193 ds->pt.max_bits)) { 2194 if (last) 2195 goto failed;/* Truncated data.*/ 2196 ds->loop = i; 2197 ds->state = ST_RD_LITERAL_3; 2198 return (ARCHIVE_OK); 2199 } 2200 rbits = lzh_br_bits(br, ds->pt.max_bits); 2201 c = lzh_decode_huffman(&(ds->pt), rbits); 2202 if (c > 2) { 2203 /* Note: 'c' will never be more than 2204 * eighteen since it's limited by 2205 * PT_BITLEN_SIZE, which is being set 2206 * to ds->pt.len_size through 2207 * ds->literal_pt_len_size. */ 2208 lzh_br_consume(br, ds->pt.bitlen[c]); 2209 c -= 2; 2210 ds->lt.freq[c]++; 2211 ds->lt.bitlen[i++] = c; 2212 } else if (c == 0) { 2213 lzh_br_consume(br, ds->pt.bitlen[c]); 2214 ds->lt.bitlen[i++] = 0; 2215 } else { 2216 /* c == 1 or c == 2 */ 2217 int n = (c == 1)?4:9; 2218 if (!lzh_br_read_ahead(strm, br, 2219 ds->pt.bitlen[c] + n)) { 2220 if (last) /* Truncated data. */ 2221 goto failed; 2222 ds->loop = i; 2223 ds->state = ST_RD_LITERAL_3; 2224 return (ARCHIVE_OK); 2225 } 2226 lzh_br_consume(br, ds->pt.bitlen[c]); 2227 c = lzh_br_bits(br, n); 2228 lzh_br_consume(br, n); 2229 c += (n == 4)?3:20; 2230 if (i + c > ds->lt.len_avail) 2231 goto failed;/* Invalid data */ 2232 memset(&(ds->lt.bitlen[i]), 0, c); 2233 i += c; 2234 } 2235 } 2236 if (i > ds->lt.len_avail || 2237 !lzh_make_huffman_table(&(ds->lt))) 2238 goto failed;/* Invalid data */ 2239 /* FALL THROUGH */ 2240 case ST_RD_POS_DATA_1: 2241 /* 2242 * Read a position table compressed in huffman 2243 * coding. 2244 */ 2245 ds->pt.len_size = ds->pos_pt_len_size; 2246 ds->pt.len_bits = ds->pos_pt_len_bits; 2247 ds->reading_position = 1; 2248 ds->state = ST_RD_PT_1; 2249 break; 2250 case ST_GET_LITERAL: 2251 return (100); 2252 } 2253 } 2254failed: 2255 return (ds->error = ARCHIVE_FAILED); 2256} 2257 2258static int 2259lzh_decode_blocks(struct lzh_stream *strm, int last) 2260{ 2261 struct lzh_dec *ds = strm->ds; 2262 struct lzh_br bre = ds->br; 2263 struct huffman *lt = &(ds->lt); 2264 struct huffman *pt = &(ds->pt); 2265 unsigned char *w_buff = ds->w_buff; 2266 unsigned char *lt_bitlen = lt->bitlen; 2267 unsigned char *pt_bitlen = pt->bitlen; 2268 int blocks_avail = ds->blocks_avail, c = 0; 2269 int copy_len = ds->copy_len, copy_pos = ds->copy_pos; 2270 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; 2271 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits; 2272 int state = ds->state; 2273 2274 for (;;) { 2275 switch (state) { 2276 case ST_GET_LITERAL: 2277 for (;;) { 2278 if (blocks_avail == 0) { 2279 /* We have decoded all blocks. 2280 * Let's handle next blocks. */ 2281 ds->state = ST_RD_BLOCK; 2282 ds->br = bre; 2283 ds->blocks_avail = 0; 2284 ds->w_pos = w_pos; 2285 ds->copy_pos = 0; 2286 return (100); 2287 } 2288 2289 /* lzh_br_read_ahead() always try to fill the 2290 * cache buffer up. In specific situation we 2291 * are close to the end of the data, the cache 2292 * buffer will not be full and thus we have to 2293 * determine if the cache buffer has some bits 2294 * as much as we need after lzh_br_read_ahead() 2295 * failed. */ 2296 if (!lzh_br_read_ahead(strm, &bre, 2297 lt_max_bits)) { 2298 if (!last) 2299 goto next_data; 2300 /* Remaining bits are less than 2301 * maximum bits(lt.max_bits) but maybe 2302 * it still remains as much as we need, 2303 * so we should try to use it with 2304 * dummy bits. */ 2305 c = lzh_decode_huffman(lt, 2306 lzh_br_bits_forced(&bre, 2307 lt_max_bits)); 2308 lzh_br_consume(&bre, lt_bitlen[c]); 2309 if (!lzh_br_has(&bre, 0)) 2310 goto failed;/* Over read. */ 2311 } else { 2312 c = lzh_decode_huffman(lt, 2313 lzh_br_bits(&bre, lt_max_bits)); 2314 lzh_br_consume(&bre, lt_bitlen[c]); 2315 } 2316 blocks_avail--; 2317 if (c > UCHAR_MAX) 2318 /* Current block is a match data. */ 2319 break; 2320 /* 2321 * 'c' is exactly a literal code. 2322 */ 2323 /* Save a decoded code to reference it 2324 * afterward. */ 2325 w_buff[w_pos] = c; 2326 if (++w_pos >= w_size) { 2327 w_pos = 0; 2328 lzh_emit_window(strm, w_size); 2329 goto next_data; 2330 } 2331 } 2332 /* 'c' is the length of a match pattern we have 2333 * already extracted, which has be stored in 2334 * window(ds->w_buff). */ 2335 copy_len = c - (UCHAR_MAX + 1) + MINMATCH; 2336 /* FALL THROUGH */ 2337 case ST_GET_POS_1: 2338 /* 2339 * Get a reference position. 2340 */ 2341 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) { 2342 if (!last) { 2343 state = ST_GET_POS_1; 2344 ds->copy_len = copy_len; 2345 goto next_data; 2346 } 2347 copy_pos = lzh_decode_huffman(pt, 2348 lzh_br_bits_forced(&bre, pt_max_bits)); 2349 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2350 if (!lzh_br_has(&bre, 0)) 2351 goto failed;/* Over read. */ 2352 } else { 2353 copy_pos = lzh_decode_huffman(pt, 2354 lzh_br_bits(&bre, pt_max_bits)); 2355 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2356 } 2357 /* FALL THROUGH */ 2358 case ST_GET_POS_2: 2359 if (copy_pos > 1) { 2360 /* We need an additional adjustment number to 2361 * the position. */ 2362 int p = copy_pos - 1; 2363 if (!lzh_br_read_ahead(strm, &bre, p)) { 2364 if (last) 2365 goto failed;/* Truncated data.*/ 2366 state = ST_GET_POS_2; 2367 ds->copy_len = copy_len; 2368 ds->copy_pos = copy_pos; 2369 goto next_data; 2370 } 2371 copy_pos = (1 << p) + lzh_br_bits(&bre, p); 2372 lzh_br_consume(&bre, p); 2373 } 2374 /* The position is actually a distance from the last 2375 * code we had extracted and thus we have to convert 2376 * it to a position of the window. */ 2377 copy_pos = (w_pos - copy_pos - 1) & w_mask; 2378 /* FALL THROUGH */ 2379 case ST_COPY_DATA: 2380 /* 2381 * Copy `copy_len' bytes as extracted data from 2382 * the window into the output buffer. 2383 */ 2384 for (;;) { 2385 int l; 2386 2387 l = copy_len; 2388 if (copy_pos > w_pos) { 2389 if (l > w_size - copy_pos) 2390 l = w_size - copy_pos; 2391 } else { 2392 if (l > w_size - w_pos) 2393 l = w_size - w_pos; 2394 } 2395 if ((copy_pos + l < w_pos) 2396 || (w_pos + l < copy_pos)) { 2397 /* No overlap. */ 2398 memcpy(w_buff + w_pos, 2399 w_buff + copy_pos, l); 2400 } else { 2401 const unsigned char *s; 2402 unsigned char *d; 2403 int li; 2404 2405 d = w_buff + w_pos; 2406 s = w_buff + copy_pos; 2407 for (li = 0; li < l-1;) { 2408 d[li] = s[li];li++; 2409 d[li] = s[li];li++; 2410 } 2411 if (li < l) 2412 d[li] = s[li]; 2413 } 2414 w_pos += l; 2415 if (w_pos == w_size) { 2416 w_pos = 0; 2417 lzh_emit_window(strm, w_size); 2418 if (copy_len <= l) 2419 state = ST_GET_LITERAL; 2420 else { 2421 state = ST_COPY_DATA; 2422 ds->copy_len = copy_len - l; 2423 ds->copy_pos = 2424 (copy_pos + l) & w_mask; 2425 } 2426 goto next_data; 2427 } 2428 if (copy_len <= l) 2429 /* A copy of current pattern ended. */ 2430 break; 2431 copy_len -= l; 2432 copy_pos = (copy_pos + l) & w_mask; 2433 } 2434 state = ST_GET_LITERAL; 2435 break; 2436 } 2437 } 2438failed: 2439 return (ds->error = ARCHIVE_FAILED); 2440next_data: 2441 ds->br = bre; 2442 ds->blocks_avail = blocks_avail; 2443 ds->state = state; 2444 ds->w_pos = w_pos; 2445 return (ARCHIVE_OK); 2446} 2447 2448static int 2449lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) 2450{ 2451 int bits; 2452 2453 if (hf->bitlen == NULL) { 2454 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0])); 2455 if (hf->bitlen == NULL) 2456 return (ARCHIVE_FATAL); 2457 } 2458 if (hf->tbl == NULL) { 2459 if (tbl_bits < HTBL_BITS) 2460 bits = tbl_bits; 2461 else 2462 bits = HTBL_BITS; 2463 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); 2464 if (hf->tbl == NULL) 2465 return (ARCHIVE_FATAL); 2466 } 2467 if (hf->tree == NULL && tbl_bits > HTBL_BITS) { 2468 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); 2469 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); 2470 if (hf->tree == NULL) 2471 return (ARCHIVE_FATAL); 2472 } 2473 hf->len_size = (int)len_size; 2474 hf->tbl_bits = tbl_bits; 2475 return (ARCHIVE_OK); 2476} 2477 2478static void 2479lzh_huffman_free(struct huffman *hf) 2480{ 2481 free(hf->bitlen); 2482 free(hf->tbl); 2483 free(hf->tree); 2484} 2485 2486static const char bitlen_tbl[0x400] = { 2487 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2488 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2489 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2490 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2491 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2492 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2493 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2494 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2495 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2496 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2497 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2498 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2499 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2500 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2506 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2507 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2508 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2509 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2510 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2511 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2512 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2513 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2514 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2515 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2516 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2517 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2518 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2519 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2520 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2521 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2522 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2523 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2524 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2525 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2526 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2527 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2528 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2529 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2530 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2531 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2532 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2533 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2534 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2535 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2536 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2537 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2538 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2539 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2540 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2541 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2542 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2543 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2544 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2545 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2546 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2547 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2548 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2549 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2550 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0 2551}; 2552static int 2553lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end) 2554{ 2555 struct lzh_dec *ds = strm->ds; 2556 struct lzh_br *br = &(ds->br); 2557 int c, i; 2558 2559 for (i = start; i < end; ) { 2560 /* 2561 * bit pattern the number we need 2562 * 000 -> 0 2563 * 001 -> 1 2564 * 010 -> 2 2565 * ... 2566 * 110 -> 6 2567 * 1110 -> 7 2568 * 11110 -> 8 2569 * ... 2570 * 1111111111110 -> 16 2571 */ 2572 if (!lzh_br_read_ahead(strm, br, 3)) 2573 return (i); 2574 if ((c = lzh_br_bits(br, 3)) == 7) { 2575 if (!lzh_br_read_ahead(strm, br, 13)) 2576 return (i); 2577 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF]; 2578 if (c) 2579 lzh_br_consume(br, c - 3); 2580 else 2581 return (-1);/* Invalid data. */ 2582 } else 2583 lzh_br_consume(br, 3); 2584 ds->pt.bitlen[i++] = c; 2585 ds->pt.freq[c]++; 2586 } 2587 return (i); 2588} 2589 2590static int 2591lzh_make_fake_table(struct huffman *hf, uint16_t c) 2592{ 2593 if (c >= hf->len_size) 2594 return (0); 2595 hf->tbl[0] = c; 2596 hf->max_bits = 0; 2597 hf->shift_bits = 0; 2598 hf->bitlen[hf->tbl[0]] = 0; 2599 return (1); 2600} 2601 2602/* 2603 * Make a huffman coding table. 2604 */ 2605static int 2606lzh_make_huffman_table(struct huffman *hf) 2607{ 2608 uint16_t *tbl; 2609 const unsigned char *bitlen; 2610 int bitptn[17], weight[17]; 2611 int i, maxbits = 0, ptn, tbl_size, w; 2612 int diffbits, len_avail; 2613 2614 /* 2615 * Initialize bit patterns. 2616 */ 2617 ptn = 0; 2618 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { 2619 bitptn[i] = ptn; 2620 weight[i] = w; 2621 if (hf->freq[i]) { 2622 ptn += hf->freq[i] * w; 2623 maxbits = i; 2624 } 2625 } 2626 if (ptn != 0x10000 || maxbits > hf->tbl_bits) 2627 return (0);/* Invalid */ 2628 2629 hf->max_bits = maxbits; 2630 2631 /* 2632 * Cut out extra bits which we won't house in the table. 2633 * This preparation reduces the same calculation in the for-loop 2634 * making the table. 2635 */ 2636 if (maxbits < 16) { 2637 int ebits = 16 - maxbits; 2638 for (i = 1; i <= maxbits; i++) { 2639 bitptn[i] >>= ebits; 2640 weight[i] >>= ebits; 2641 } 2642 } 2643 if (maxbits > HTBL_BITS) { 2644 unsigned htbl_max; 2645 uint16_t *p; 2646 2647 diffbits = maxbits - HTBL_BITS; 2648 for (i = 1; i <= HTBL_BITS; i++) { 2649 bitptn[i] >>= diffbits; 2650 weight[i] >>= diffbits; 2651 } 2652 htbl_max = bitptn[HTBL_BITS] + 2653 weight[HTBL_BITS] * hf->freq[HTBL_BITS]; 2654 p = &(hf->tbl[htbl_max]); 2655 while (p < &hf->tbl[1U<<HTBL_BITS]) 2656 *p++ = 0; 2657 } else 2658 diffbits = 0; 2659 hf->shift_bits = diffbits; 2660 2661 /* 2662 * Make the table. 2663 */ 2664 tbl_size = 1 << HTBL_BITS; 2665 tbl = hf->tbl; 2666 bitlen = hf->bitlen; 2667 len_avail = hf->len_avail; 2668 hf->tree_used = 0; 2669 for (i = 0; i < len_avail; i++) { 2670 uint16_t *p; 2671 int len, cnt; 2672 uint16_t bit; 2673 int extlen; 2674 struct htree_t *ht; 2675 2676 if (bitlen[i] == 0) 2677 continue; 2678 /* Get a bit pattern */ 2679 len = bitlen[i]; 2680 ptn = bitptn[len]; 2681 cnt = weight[len]; 2682 if (len <= HTBL_BITS) { 2683 /* Calculate next bit pattern */ 2684 if ((bitptn[len] = ptn + cnt) > tbl_size) 2685 return (0);/* Invalid */ 2686 /* Update the table */ 2687 p = &(tbl[ptn]); 2688 if (cnt > 7) { 2689 uint16_t *pc; 2690 2691 cnt -= 8; 2692 pc = &p[cnt]; 2693 pc[0] = (uint16_t)i; 2694 pc[1] = (uint16_t)i; 2695 pc[2] = (uint16_t)i; 2696 pc[3] = (uint16_t)i; 2697 pc[4] = (uint16_t)i; 2698 pc[5] = (uint16_t)i; 2699 pc[6] = (uint16_t)i; 2700 pc[7] = (uint16_t)i; 2701 if (cnt > 7) { 2702 cnt -= 8; 2703 memcpy(&p[cnt], pc, 2704 8 * sizeof(uint16_t)); 2705 pc = &p[cnt]; 2706 while (cnt > 15) { 2707 cnt -= 16; 2708 memcpy(&p[cnt], pc, 2709 16 * sizeof(uint16_t)); 2710 } 2711 } 2712 if (cnt) 2713 memcpy(p, pc, cnt * sizeof(uint16_t)); 2714 } else { 2715 while (cnt > 1) { 2716 p[--cnt] = (uint16_t)i; 2717 p[--cnt] = (uint16_t)i; 2718 } 2719 if (cnt) 2720 p[--cnt] = (uint16_t)i; 2721 } 2722 continue; 2723 } 2724 2725 /* 2726 * A bit length is too big to be housed to a direct table, 2727 * so we use a tree model for its extra bits. 2728 */ 2729 bitptn[len] = ptn + cnt; 2730 bit = 1U << (diffbits -1); 2731 extlen = len - HTBL_BITS; 2732 2733 p = &(tbl[ptn >> diffbits]); 2734 if (*p == 0) { 2735 *p = len_avail + hf->tree_used; 2736 ht = &(hf->tree[hf->tree_used++]); 2737 if (hf->tree_used > hf->tree_avail) 2738 return (0);/* Invalid */ 2739 ht->left = 0; 2740 ht->right = 0; 2741 } else { 2742 if (*p < len_avail || 2743 *p >= (len_avail + hf->tree_used)) 2744 return (0);/* Invalid */ 2745 ht = &(hf->tree[*p - len_avail]); 2746 } 2747 while (--extlen > 0) { 2748 if (ptn & bit) { 2749 if (ht->left < len_avail) { 2750 ht->left = len_avail + hf->tree_used; 2751 ht = &(hf->tree[hf->tree_used++]); 2752 if (hf->tree_used > hf->tree_avail) 2753 return (0);/* Invalid */ 2754 ht->left = 0; 2755 ht->right = 0; 2756 } else { 2757 ht = &(hf->tree[ht->left - len_avail]); 2758 } 2759 } else { 2760 if (ht->right < len_avail) { 2761 ht->right = len_avail + hf->tree_used; 2762 ht = &(hf->tree[hf->tree_used++]); 2763 if (hf->tree_used > hf->tree_avail) 2764 return (0);/* Invalid */ 2765 ht->left = 0; 2766 ht->right = 0; 2767 } else { 2768 ht = &(hf->tree[ht->right - len_avail]); 2769 } 2770 } 2771 bit >>= 1; 2772 } 2773 if (ptn & bit) { 2774 if (ht->left != 0) 2775 return (0);/* Invalid */ 2776 ht->left = (uint16_t)i; 2777 } else { 2778 if (ht->right != 0) 2779 return (0);/* Invalid */ 2780 ht->right = (uint16_t)i; 2781 } 2782 } 2783 return (1); 2784} 2785 2786static int 2787lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) 2788{ 2789 struct htree_t *ht; 2790 int extlen; 2791 2792 ht = hf->tree; 2793 extlen = hf->shift_bits; 2794 while (c >= hf->len_avail) { 2795 c -= hf->len_avail; 2796 if (extlen-- <= 0 || c >= hf->tree_used) 2797 return (0); 2798 if (rbits & (1U << extlen)) 2799 c = ht[c].left; 2800 else 2801 c = ht[c].right; 2802 } 2803 return (c); 2804} 2805 2806static inline int 2807lzh_decode_huffman(struct huffman *hf, unsigned rbits) 2808{ 2809 int c; 2810 /* 2811 * At first search an index table for a bit pattern. 2812 * If it fails, search a huffman tree for. 2813 */ 2814 c = hf->tbl[rbits >> hf->shift_bits]; 2815 if (c < hf->len_avail || hf->len_avail == 0) 2816 return (c); 2817 /* This bit pattern needs to be found out at a huffman tree. */ 2818 return (lzh_decode_huffman_tree(hf, rbits, c)); 2819} 2820 2821