bzip2.c revision 146293
1 2/*-----------------------------------------------------------*/ 3/*--- A block-sorting, lossless compressor bzip2.c ---*/ 4/*-----------------------------------------------------------*/ 5 6/*-- 7 This file is a part of bzip2 and/or libbzip2, a program and 8 library for lossless, block-sorting data compression. 9 10 Copyright (C) 1996-2005 Julian R Seward. All rights reserved. 11 12 Redistribution and use in source and binary forms, with or without 13 modification, are permitted provided that the following conditions 14 are met: 15 16 1. Redistributions of source code must retain the above copyright 17 notice, this list of conditions and the following disclaimer. 18 19 2. The origin of this software must not be misrepresented; you must 20 not claim that you wrote the original software. If you use this 21 software in a product, an acknowledgment in the product 22 documentation would be appreciated but is not required. 23 24 3. Altered source versions must be plainly marked as such, and must 25 not be misrepresented as being the original software. 26 27 4. The name of the author may not be used to endorse or promote 28 products derived from this software without specific prior written 29 permission. 30 31 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 32 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 33 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 35 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 39 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 40 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 43 Julian Seward, Cambridge, UK. 44 jseward@bzip.org 45 bzip2/libbzip2 version 1.0 of 21 March 2000 46 47 This program is based on (at least) the work of: 48 Mike Burrows 49 David Wheeler 50 Peter Fenwick 51 Alistair Moffat 52 Radford Neal 53 Ian H. Witten 54 Robert Sedgewick 55 Jon L. Bentley 56 57 For more information on these sources, see the manual. 58--*/ 59 60 61/*----------------------------------------------------*/ 62/*--- IMPORTANT ---*/ 63/*----------------------------------------------------*/ 64 65/*-- 66 WARNING: 67 This program and library (attempts to) compress data by 68 performing several non-trivial transformations on it. 69 Unless you are 100% familiar with *all* the algorithms 70 contained herein, and with the consequences of modifying them, 71 you should NOT meddle with the compression or decompression 72 machinery. Incorrect changes can and very likely *will* 73 lead to disasterous loss of data. 74 75 DISCLAIMER: 76 I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE 77 USE OF THIS PROGRAM, HOWSOEVER CAUSED. 78 79 Every compression of a file implies an assumption that the 80 compressed file can be decompressed to reproduce the original. 81 Great efforts in design, coding and testing have been made to 82 ensure that this program works correctly. However, the 83 complexity of the algorithms, and, in particular, the presence 84 of various special cases in the code which occur with very low 85 but non-zero probability make it impossible to rule out the 86 possibility of bugs remaining in the program. DO NOT COMPRESS 87 ANY DATA WITH THIS PROGRAM AND/OR LIBRARY UNLESS YOU ARE PREPARED 88 TO ACCEPT THE POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL 89 NOT BE RECOVERABLE. 90 91 That is not to say this program is inherently unreliable. 92 Indeed, I very much hope the opposite is true. bzip2/libbzip2 93 has been carefully constructed and extensively tested. 94 95 PATENTS: 96 To the best of my knowledge, bzip2/libbzip2 does not use any 97 patented algorithms. However, I do not have the resources 98 available to carry out a full patent search. Therefore I cannot 99 give any guarantee of the above statement. 100--*/ 101 102 103 104/*----------------------------------------------------*/ 105/*--- and now for something much more pleasant :-) ---*/ 106/*----------------------------------------------------*/ 107 108/*---------------------------------------------*/ 109/*-- 110 Place a 1 beside your platform, and 0 elsewhere. 111--*/ 112 113/*-- 114 Generic 32-bit Unix. 115 Also works on 64-bit Unix boxes. 116 This is the default. 117--*/ 118#define BZ_UNIX 1 119 120/*-- 121 Win32, as seen by Jacob Navia's excellent 122 port of (Chris Fraser & David Hanson)'s excellent 123 lcc compiler. Or with MS Visual C. 124 This is selected automatically if compiled by a compiler which 125 defines _WIN32, not including the Cygwin GCC. 126--*/ 127#define BZ_LCCWIN32 0 128 129#if defined(_WIN32) && !defined(__CYGWIN__) 130#undef BZ_LCCWIN32 131#define BZ_LCCWIN32 1 132#undef BZ_UNIX 133#define BZ_UNIX 0 134#endif 135 136 137/*---------------------------------------------*/ 138/*-- 139 Some stuff for all platforms. 140--*/ 141 142#include <stdio.h> 143#include <stdlib.h> 144#include <string.h> 145#include <signal.h> 146#include <math.h> 147#include <errno.h> 148#include <ctype.h> 149#include "bzlib.h" 150 151#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); } 152#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); } 153#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); } 154 155 156/*---------------------------------------------*/ 157/*-- 158 Platform-specific stuff. 159--*/ 160 161#if BZ_UNIX 162# include <fcntl.h> 163# include <sys/types.h> 164# include <utime.h> 165# include <unistd.h> 166# include <sys/stat.h> 167# include <sys/times.h> 168 169# define PATH_SEP '/' 170# define MY_LSTAT lstat 171# define MY_STAT stat 172# define MY_S_ISREG S_ISREG 173# define MY_S_ISDIR S_ISDIR 174 175# define APPEND_FILESPEC(root, name) \ 176 root=snocString((root), (name)) 177 178# define APPEND_FLAG(root, name) \ 179 root=snocString((root), (name)) 180 181# define SET_BINARY_MODE(fd) /**/ 182 183# ifdef __GNUC__ 184# define NORETURN __attribute__ ((noreturn)) 185# else 186# define NORETURN /**/ 187# endif 188 189# ifdef __DJGPP__ 190# include <io.h> 191# include <fcntl.h> 192# undef MY_LSTAT 193# undef MY_STAT 194# define MY_LSTAT stat 195# define MY_STAT stat 196# undef SET_BINARY_MODE 197# define SET_BINARY_MODE(fd) \ 198 do { \ 199 int retVal = setmode ( fileno ( fd ), \ 200 O_BINARY ); \ 201 ERROR_IF_MINUS_ONE ( retVal ); \ 202 } while ( 0 ) 203# endif 204 205# ifdef __CYGWIN__ 206# include <io.h> 207# include <fcntl.h> 208# undef SET_BINARY_MODE 209# define SET_BINARY_MODE(fd) \ 210 do { \ 211 int retVal = setmode ( fileno ( fd ), \ 212 O_BINARY ); \ 213 ERROR_IF_MINUS_ONE ( retVal ); \ 214 } while ( 0 ) 215# endif 216#endif /* BZ_UNIX */ 217 218 219 220#if BZ_LCCWIN32 221# include <io.h> 222# include <fcntl.h> 223# include <sys\stat.h> 224 225# define NORETURN /**/ 226# define PATH_SEP '\\' 227# define MY_LSTAT _stat 228# define MY_STAT _stat 229# define MY_S_ISREG(x) ((x) & _S_IFREG) 230# define MY_S_ISDIR(x) ((x) & _S_IFDIR) 231 232# define APPEND_FLAG(root, name) \ 233 root=snocString((root), (name)) 234 235# define APPEND_FILESPEC(root, name) \ 236 root = snocString ((root), (name)) 237 238# define SET_BINARY_MODE(fd) \ 239 do { \ 240 int retVal = setmode ( fileno ( fd ), \ 241 O_BINARY ); \ 242 ERROR_IF_MINUS_ONE ( retVal ); \ 243 } while ( 0 ) 244 245#endif /* BZ_LCCWIN32 */ 246 247 248/*---------------------------------------------*/ 249/*-- 250 Some more stuff for all platforms :-) 251--*/ 252 253typedef char Char; 254typedef unsigned char Bool; 255typedef unsigned char UChar; 256typedef int Int32; 257typedef unsigned int UInt32; 258typedef short Int16; 259typedef unsigned short UInt16; 260 261#define True ((Bool)1) 262#define False ((Bool)0) 263 264/*-- 265 IntNative is your platform's `native' int size. 266 Only here to avoid probs with 64-bit platforms. 267--*/ 268typedef int IntNative; 269 270 271/*---------------------------------------------------*/ 272/*--- Misc (file handling) data decls ---*/ 273/*---------------------------------------------------*/ 274 275Int32 verbosity; 276Bool keepInputFiles, smallMode, deleteOutputOnInterrupt; 277Bool forceOverwrite, testFailsExist, unzFailsExist, noisy; 278Int32 numFileNames, numFilesProcessed, blockSize100k; 279Int32 exitValue; 280 281/*-- source modes; F==file, I==stdin, O==stdout --*/ 282#define SM_I2O 1 283#define SM_F2O 2 284#define SM_F2F 3 285 286/*-- operation modes --*/ 287#define OM_Z 1 288#define OM_UNZ 2 289#define OM_TEST 3 290 291Int32 opMode; 292Int32 srcMode; 293 294#define FILE_NAME_LEN 1034 295 296Int32 longestFileName; 297Char inName [FILE_NAME_LEN]; 298Char outName[FILE_NAME_LEN]; 299Char tmpName[FILE_NAME_LEN]; 300Char *progName; 301Char progNameReally[FILE_NAME_LEN]; 302FILE *outputHandleJustInCase; 303Int32 workFactor; 304 305static void panic ( Char* ) NORETURN; 306static void ioError ( void ) NORETURN; 307static void outOfMemory ( void ) NORETURN; 308static void configError ( void ) NORETURN; 309static void crcError ( void ) NORETURN; 310static void cleanUpAndFail ( Int32 ) NORETURN; 311static void compressedStreamEOF ( void ) NORETURN; 312 313static void copyFileName ( Char*, Char* ); 314static void* myMalloc ( Int32 ); 315 316 317 318/*---------------------------------------------------*/ 319/*--- An implementation of 64-bit ints. Sigh. ---*/ 320/*--- Roll on widespread deployment of ANSI C9X ! ---*/ 321/*---------------------------------------------------*/ 322 323typedef 324 struct { UChar b[8]; } 325 UInt64; 326 327 328static 329void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) 330{ 331 n->b[7] = (UChar)((hi32 >> 24) & 0xFF); 332 n->b[6] = (UChar)((hi32 >> 16) & 0xFF); 333 n->b[5] = (UChar)((hi32 >> 8) & 0xFF); 334 n->b[4] = (UChar) (hi32 & 0xFF); 335 n->b[3] = (UChar)((lo32 >> 24) & 0xFF); 336 n->b[2] = (UChar)((lo32 >> 16) & 0xFF); 337 n->b[1] = (UChar)((lo32 >> 8) & 0xFF); 338 n->b[0] = (UChar) (lo32 & 0xFF); 339} 340 341 342static 343double uInt64_to_double ( UInt64* n ) 344{ 345 Int32 i; 346 double base = 1.0; 347 double sum = 0.0; 348 for (i = 0; i < 8; i++) { 349 sum += base * (double)(n->b[i]); 350 base *= 256.0; 351 } 352 return sum; 353} 354 355 356static 357Bool uInt64_isZero ( UInt64* n ) 358{ 359 Int32 i; 360 for (i = 0; i < 8; i++) 361 if (n->b[i] != 0) return 0; 362 return 1; 363} 364 365 366/* Divide *n by 10, and return the remainder. */ 367static 368Int32 uInt64_qrm10 ( UInt64* n ) 369{ 370 UInt32 rem, tmp; 371 Int32 i; 372 rem = 0; 373 for (i = 7; i >= 0; i--) { 374 tmp = rem * 256 + n->b[i]; 375 n->b[i] = tmp / 10; 376 rem = tmp % 10; 377 } 378 return rem; 379} 380 381 382/* ... and the Whole Entire Point of all this UInt64 stuff is 383 so that we can supply the following function. 384*/ 385static 386void uInt64_toAscii ( char* outbuf, UInt64* n ) 387{ 388 Int32 i, q; 389 UChar buf[32]; 390 Int32 nBuf = 0; 391 UInt64 n_copy = *n; 392 do { 393 q = uInt64_qrm10 ( &n_copy ); 394 buf[nBuf] = q + '0'; 395 nBuf++; 396 } while (!uInt64_isZero(&n_copy)); 397 outbuf[nBuf] = 0; 398 for (i = 0; i < nBuf; i++) 399 outbuf[i] = buf[nBuf-i-1]; 400} 401 402 403/*---------------------------------------------------*/ 404/*--- Processing of complete files and streams ---*/ 405/*---------------------------------------------------*/ 406 407/*---------------------------------------------*/ 408static 409Bool myfeof ( FILE* f ) 410{ 411 Int32 c = fgetc ( f ); 412 if (c == EOF) return True; 413 ungetc ( c, f ); 414 return False; 415} 416 417 418/*---------------------------------------------*/ 419static 420void compressStream ( FILE *stream, FILE *zStream ) 421{ 422 BZFILE* bzf = NULL; 423 UChar ibuf[5000]; 424 Int32 nIbuf; 425 UInt32 nbytes_in_lo32, nbytes_in_hi32; 426 UInt32 nbytes_out_lo32, nbytes_out_hi32; 427 Int32 bzerr, bzerr_dummy, ret; 428 429 SET_BINARY_MODE(stream); 430 SET_BINARY_MODE(zStream); 431 432 if (ferror(stream)) goto errhandler_io; 433 if (ferror(zStream)) goto errhandler_io; 434 435 bzf = BZ2_bzWriteOpen ( &bzerr, zStream, 436 blockSize100k, verbosity, workFactor ); 437 if (bzerr != BZ_OK) goto errhandler; 438 439 if (verbosity >= 2) fprintf ( stderr, "\n" ); 440 441 while (True) { 442 443 if (myfeof(stream)) break; 444 nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream ); 445 if (ferror(stream)) goto errhandler_io; 446 if (nIbuf > 0) BZ2_bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); 447 if (bzerr != BZ_OK) goto errhandler; 448 449 } 450 451 BZ2_bzWriteClose64 ( &bzerr, bzf, 0, 452 &nbytes_in_lo32, &nbytes_in_hi32, 453 &nbytes_out_lo32, &nbytes_out_hi32 ); 454 if (bzerr != BZ_OK) goto errhandler; 455 456 if (ferror(zStream)) goto errhandler_io; 457 ret = fflush ( zStream ); 458 if (ret == EOF) goto errhandler_io; 459 if (zStream != stdout) { 460 ret = fclose ( zStream ); 461 outputHandleJustInCase = NULL; 462 if (ret == EOF) goto errhandler_io; 463 } 464 outputHandleJustInCase = NULL; 465 if (ferror(stream)) goto errhandler_io; 466 ret = fclose ( stream ); 467 if (ret == EOF) goto errhandler_io; 468 469 if (verbosity >= 1) { 470 if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) { 471 fprintf ( stderr, " no data compressed.\n"); 472 } else { 473 Char buf_nin[32], buf_nout[32]; 474 UInt64 nbytes_in, nbytes_out; 475 double nbytes_in_d, nbytes_out_d; 476 uInt64_from_UInt32s ( &nbytes_in, 477 nbytes_in_lo32, nbytes_in_hi32 ); 478 uInt64_from_UInt32s ( &nbytes_out, 479 nbytes_out_lo32, nbytes_out_hi32 ); 480 nbytes_in_d = uInt64_to_double ( &nbytes_in ); 481 nbytes_out_d = uInt64_to_double ( &nbytes_out ); 482 uInt64_toAscii ( buf_nin, &nbytes_in ); 483 uInt64_toAscii ( buf_nout, &nbytes_out ); 484 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " 485 "%5.2f%% saved, %s in, %s out.\n", 486 nbytes_in_d / nbytes_out_d, 487 (8.0 * nbytes_out_d) / nbytes_in_d, 488 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), 489 buf_nin, 490 buf_nout 491 ); 492 } 493 } 494 495 return; 496 497 errhandler: 498 BZ2_bzWriteClose64 ( &bzerr_dummy, bzf, 1, 499 &nbytes_in_lo32, &nbytes_in_hi32, 500 &nbytes_out_lo32, &nbytes_out_hi32 ); 501 switch (bzerr) { 502 case BZ_CONFIG_ERROR: 503 configError(); break; 504 case BZ_MEM_ERROR: 505 outOfMemory (); break; 506 case BZ_IO_ERROR: 507 errhandler_io: 508 ioError(); break; 509 default: 510 panic ( "compress:unexpected error" ); 511 } 512 513 panic ( "compress:end" ); 514 /*notreached*/ 515} 516 517 518 519/*---------------------------------------------*/ 520static 521Bool uncompressStream ( FILE *zStream, FILE *stream ) 522{ 523 BZFILE* bzf = NULL; 524 Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; 525 UChar obuf[5000]; 526 UChar unused[BZ_MAX_UNUSED]; 527 Int32 nUnused; 528 void* unusedTmpV; 529 UChar* unusedTmp; 530 531 nUnused = 0; 532 streamNo = 0; 533 534 SET_BINARY_MODE(stream); 535 SET_BINARY_MODE(zStream); 536 537 if (ferror(stream)) goto errhandler_io; 538 if (ferror(zStream)) goto errhandler_io; 539 540 while (True) { 541 542 bzf = BZ2_bzReadOpen ( 543 &bzerr, zStream, verbosity, 544 (int)smallMode, unused, nUnused 545 ); 546 if (bzf == NULL || bzerr != BZ_OK) goto errhandler; 547 streamNo++; 548 549 while (bzerr == BZ_OK) { 550 nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); 551 if (bzerr == BZ_DATA_ERROR_MAGIC) goto trycat; 552 if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) 553 fwrite ( obuf, sizeof(UChar), nread, stream ); 554 if (ferror(stream)) goto errhandler_io; 555 } 556 if (bzerr != BZ_STREAM_END) goto errhandler; 557 558 BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); 559 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); 560 561 unusedTmp = (UChar*)unusedTmpV; 562 for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; 563 564 BZ2_bzReadClose ( &bzerr, bzf ); 565 if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); 566 567 if (nUnused == 0 && myfeof(zStream)) break; 568 } 569 570 closeok: 571 if (ferror(zStream)) goto errhandler_io; 572 ret = fclose ( zStream ); 573 if (ret == EOF) goto errhandler_io; 574 575 if (ferror(stream)) goto errhandler_io; 576 ret = fflush ( stream ); 577 if (ret != 0) goto errhandler_io; 578 if (stream != stdout) { 579 ret = fclose ( stream ); 580 outputHandleJustInCase = NULL; 581 if (ret == EOF) goto errhandler_io; 582 } 583 outputHandleJustInCase = NULL; 584 if (verbosity >= 2) fprintf ( stderr, "\n " ); 585 return True; 586 587 trycat: 588 if (forceOverwrite) { 589 rewind(zStream); 590 while (True) { 591 if (myfeof(zStream)) break; 592 nread = fread ( obuf, sizeof(UChar), 5000, zStream ); 593 if (ferror(zStream)) goto errhandler_io; 594 if (nread > 0) fwrite ( obuf, sizeof(UChar), nread, stream ); 595 if (ferror(stream)) goto errhandler_io; 596 } 597 goto closeok; 598 } 599 600 errhandler: 601 BZ2_bzReadClose ( &bzerr_dummy, bzf ); 602 switch (bzerr) { 603 case BZ_CONFIG_ERROR: 604 configError(); break; 605 case BZ_IO_ERROR: 606 errhandler_io: 607 ioError(); break; 608 case BZ_DATA_ERROR: 609 crcError(); 610 case BZ_MEM_ERROR: 611 outOfMemory(); 612 case BZ_UNEXPECTED_EOF: 613 compressedStreamEOF(); 614 case BZ_DATA_ERROR_MAGIC: 615 if (zStream != stdin) fclose(zStream); 616 if (stream != stdout) fclose(stream); 617 if (streamNo == 1) { 618 return False; 619 } else { 620 if (noisy) 621 fprintf ( stderr, 622 "\n%s: %s: trailing garbage after EOF ignored\n", 623 progName, inName ); 624 return True; 625 } 626 default: 627 panic ( "decompress:unexpected error" ); 628 } 629 630 panic ( "decompress:end" ); 631 return True; /*notreached*/ 632} 633 634 635/*---------------------------------------------*/ 636static 637Bool testStream ( FILE *zStream ) 638{ 639 BZFILE* bzf = NULL; 640 Int32 bzerr, bzerr_dummy, ret, nread, streamNo, i; 641 UChar obuf[5000]; 642 UChar unused[BZ_MAX_UNUSED]; 643 Int32 nUnused; 644 void* unusedTmpV; 645 UChar* unusedTmp; 646 647 nUnused = 0; 648 streamNo = 0; 649 650 SET_BINARY_MODE(zStream); 651 if (ferror(zStream)) goto errhandler_io; 652 653 while (True) { 654 655 bzf = BZ2_bzReadOpen ( 656 &bzerr, zStream, verbosity, 657 (int)smallMode, unused, nUnused 658 ); 659 if (bzf == NULL || bzerr != BZ_OK) goto errhandler; 660 streamNo++; 661 662 while (bzerr == BZ_OK) { 663 nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); 664 if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; 665 } 666 if (bzerr != BZ_STREAM_END) goto errhandler; 667 668 BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); 669 if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); 670 671 unusedTmp = (UChar*)unusedTmpV; 672 for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; 673 674 BZ2_bzReadClose ( &bzerr, bzf ); 675 if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); 676 if (nUnused == 0 && myfeof(zStream)) break; 677 678 } 679 680 if (ferror(zStream)) goto errhandler_io; 681 ret = fclose ( zStream ); 682 if (ret == EOF) goto errhandler_io; 683 684 if (verbosity >= 2) fprintf ( stderr, "\n " ); 685 return True; 686 687 errhandler: 688 BZ2_bzReadClose ( &bzerr_dummy, bzf ); 689 if (verbosity == 0) 690 fprintf ( stderr, "%s: %s: ", progName, inName ); 691 switch (bzerr) { 692 case BZ_CONFIG_ERROR: 693 configError(); break; 694 case BZ_IO_ERROR: 695 errhandler_io: 696 ioError(); break; 697 case BZ_DATA_ERROR: 698 fprintf ( stderr, 699 "data integrity (CRC) error in data\n" ); 700 return False; 701 case BZ_MEM_ERROR: 702 outOfMemory(); 703 case BZ_UNEXPECTED_EOF: 704 fprintf ( stderr, 705 "file ends unexpectedly\n" ); 706 return False; 707 case BZ_DATA_ERROR_MAGIC: 708 if (zStream != stdin) fclose(zStream); 709 if (streamNo == 1) { 710 fprintf ( stderr, 711 "bad magic number (file not created by bzip2)\n" ); 712 return False; 713 } else { 714 if (noisy) 715 fprintf ( stderr, 716 "trailing garbage after EOF ignored\n" ); 717 return True; 718 } 719 default: 720 panic ( "test:unexpected error" ); 721 } 722 723 panic ( "test:end" ); 724 return True; /*notreached*/ 725} 726 727 728/*---------------------------------------------------*/ 729/*--- Error [non-] handling grunge ---*/ 730/*---------------------------------------------------*/ 731 732/*---------------------------------------------*/ 733static 734void setExit ( Int32 v ) 735{ 736 if (v > exitValue) exitValue = v; 737} 738 739 740/*---------------------------------------------*/ 741static 742void cadvise ( void ) 743{ 744 if (noisy) 745 fprintf ( 746 stderr, 747 "\nIt is possible that the compressed file(s) have become corrupted.\n" 748 "You can use the -tvv option to test integrity of such files.\n\n" 749 "You can use the `bzip2recover' program to attempt to recover\n" 750 "data from undamaged sections of corrupted files.\n\n" 751 ); 752} 753 754 755/*---------------------------------------------*/ 756static 757void showFileNames ( void ) 758{ 759 if (noisy) 760 fprintf ( 761 stderr, 762 "\tInput file = %s, output file = %s\n", 763 inName, outName 764 ); 765} 766 767 768/*---------------------------------------------*/ 769static 770void cleanUpAndFail ( Int32 ec ) 771{ 772 IntNative retVal; 773 struct MY_STAT statBuf; 774 775 if ( srcMode == SM_F2F 776 && opMode != OM_TEST 777 && deleteOutputOnInterrupt ) { 778 779 /* Check whether input file still exists. Delete output file 780 only if input exists to avoid loss of data. Joerg Prante, 5 781 January 2002. (JRS 06-Jan-2002: other changes in 1.0.2 mean 782 this is less likely to happen. But to be ultra-paranoid, we 783 do the check anyway.) */ 784 retVal = MY_STAT ( inName, &statBuf ); 785 if (retVal == 0) { 786 if (noisy) 787 fprintf ( stderr, 788 "%s: Deleting output file %s, if it exists.\n", 789 progName, outName ); 790 if (outputHandleJustInCase != NULL) 791 fclose ( outputHandleJustInCase ); 792 retVal = remove ( outName ); 793 if (retVal != 0) 794 fprintf ( stderr, 795 "%s: WARNING: deletion of output file " 796 "(apparently) failed.\n", 797 progName ); 798 } else { 799 fprintf ( stderr, 800 "%s: WARNING: deletion of output file suppressed\n", 801 progName ); 802 fprintf ( stderr, 803 "%s: since input file no longer exists. Output file\n", 804 progName ); 805 fprintf ( stderr, 806 "%s: `%s' may be incomplete.\n", 807 progName, outName ); 808 fprintf ( stderr, 809 "%s: I suggest doing an integrity test (bzip2 -tv)" 810 " of it.\n", 811 progName ); 812 } 813 } 814 815 if (noisy && numFileNames > 0 && numFilesProcessed < numFileNames) { 816 fprintf ( stderr, 817 "%s: WARNING: some files have not been processed:\n" 818 "%s: %d specified on command line, %d not processed yet.\n\n", 819 progName, progName, 820 numFileNames, numFileNames - numFilesProcessed ); 821 } 822 setExit(ec); 823 exit(exitValue); 824} 825 826 827/*---------------------------------------------*/ 828static 829void panic ( Char* s ) 830{ 831 fprintf ( stderr, 832 "\n%s: PANIC -- internal consistency error:\n" 833 "\t%s\n" 834 "\tThis is a BUG. Please report it to me at:\n" 835 "\tjseward@bzip.org\n", 836 progName, s ); 837 showFileNames(); 838 cleanUpAndFail( 3 ); 839} 840 841 842/*---------------------------------------------*/ 843static 844void crcError ( void ) 845{ 846 fprintf ( stderr, 847 "\n%s: Data integrity error when decompressing.\n", 848 progName ); 849 showFileNames(); 850 cadvise(); 851 cleanUpAndFail( 2 ); 852} 853 854 855/*---------------------------------------------*/ 856static 857void compressedStreamEOF ( void ) 858{ 859 if (noisy) { 860 fprintf ( stderr, 861 "\n%s: Compressed file ends unexpectedly;\n\t" 862 "perhaps it is corrupted? *Possible* reason follows.\n", 863 progName ); 864 perror ( progName ); 865 showFileNames(); 866 cadvise(); 867 } 868 cleanUpAndFail( 2 ); 869} 870 871 872/*---------------------------------------------*/ 873static 874void ioError ( void ) 875{ 876 fprintf ( stderr, 877 "\n%s: I/O or other error, bailing out. " 878 "Possible reason follows.\n", 879 progName ); 880 perror ( progName ); 881 showFileNames(); 882 cleanUpAndFail( 1 ); 883} 884 885 886/*---------------------------------------------*/ 887static 888void mySignalCatcher ( IntNative n ) 889{ 890 fprintf ( stderr, 891 "\n%s: Control-C or similar caught, quitting.\n", 892 progName ); 893 cleanUpAndFail(1); 894} 895 896 897/*---------------------------------------------*/ 898static 899void mySIGSEGVorSIGBUScatcher ( IntNative n ) 900{ 901 if (opMode == OM_Z) 902 fprintf ( 903 stderr, 904 "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing.\n" 905 "\n" 906 " Possible causes are (most likely first):\n" 907 " (1) This computer has unreliable memory or cache hardware\n" 908 " (a surprisingly common problem; try a different machine.)\n" 909 " (2) A bug in the compiler used to create this executable\n" 910 " (unlikely, if you didn't compile bzip2 yourself.)\n" 911 " (3) A real bug in bzip2 -- I hope this should never be the case.\n" 912 " The user's manual, Section 4.3, has more info on (1) and (2).\n" 913 " \n" 914 " If you suspect this is a bug in bzip2, or are unsure about (1)\n" 915 " or (2), feel free to report it to me at: jseward@bzip.org.\n" 916 " Section 4.3 of the user's manual describes the info a useful\n" 917 " bug report should have. If the manual is available on your\n" 918 " system, please try and read it before mailing me. If you don't\n" 919 " have the manual or can't be bothered to read it, mail me anyway.\n" 920 "\n", 921 progName ); 922 else 923 fprintf ( 924 stderr, 925 "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n" 926 "\n" 927 " Possible causes are (most likely first):\n" 928 " (1) The compressed data is corrupted, and bzip2's usual checks\n" 929 " failed to detect this. Try bzip2 -tvv my_file.bz2.\n" 930 " (2) This computer has unreliable memory or cache hardware\n" 931 " (a surprisingly common problem; try a different machine.)\n" 932 " (3) A bug in the compiler used to create this executable\n" 933 " (unlikely, if you didn't compile bzip2 yourself.)\n" 934 " (4) A real bug in bzip2 -- I hope this should never be the case.\n" 935 " The user's manual, Section 4.3, has more info on (2) and (3).\n" 936 " \n" 937 " If you suspect this is a bug in bzip2, or are unsure about (2)\n" 938 " or (3), feel free to report it to me at: jseward@bzip.org.\n" 939 " Section 4.3 of the user's manual describes the info a useful\n" 940 " bug report should have. If the manual is available on your\n" 941 " system, please try and read it before mailing me. If you don't\n" 942 " have the manual or can't be bothered to read it, mail me anyway.\n" 943 "\n", 944 progName ); 945 946 showFileNames(); 947 if (opMode == OM_Z) 948 cleanUpAndFail( 3 ); else 949 { cadvise(); cleanUpAndFail( 2 ); } 950} 951 952 953/*---------------------------------------------*/ 954static 955void outOfMemory ( void ) 956{ 957 fprintf ( stderr, 958 "\n%s: couldn't allocate enough memory\n", 959 progName ); 960 showFileNames(); 961 cleanUpAndFail(1); 962} 963 964 965/*---------------------------------------------*/ 966static 967void configError ( void ) 968{ 969 fprintf ( stderr, 970 "bzip2: I'm not configured correctly for this platform!\n" 971 "\tI require Int32, Int16 and Char to have sizes\n" 972 "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" 973 "\tProbably you can fix this by defining them correctly,\n" 974 "\tand recompiling. Bye!\n" ); 975 setExit(3); 976 exit(exitValue); 977} 978 979 980/*---------------------------------------------------*/ 981/*--- The main driver machinery ---*/ 982/*---------------------------------------------------*/ 983 984/* All rather crufty. The main problem is that input files 985 are stat()d multiple times before use. This should be 986 cleaned up. 987*/ 988 989/*---------------------------------------------*/ 990static 991void pad ( Char *s ) 992{ 993 Int32 i; 994 if ( (Int32)strlen(s) >= longestFileName ) return; 995 for (i = 1; i <= longestFileName - (Int32)strlen(s); i++) 996 fprintf ( stderr, " " ); 997} 998 999 1000/*---------------------------------------------*/ 1001static 1002void copyFileName ( Char* to, Char* from ) 1003{ 1004 if ( strlen(from) > FILE_NAME_LEN-10 ) { 1005 fprintf ( 1006 stderr, 1007 "bzip2: file name\n`%s'\n" 1008 "is suspiciously (more than %d chars) long.\n" 1009 "Try using a reasonable file name instead. Sorry! :-)\n", 1010 from, FILE_NAME_LEN-10 1011 ); 1012 setExit(1); 1013 exit(exitValue); 1014 } 1015 1016 strncpy(to,from,FILE_NAME_LEN-10); 1017 to[FILE_NAME_LEN-10]='\0'; 1018} 1019 1020 1021/*---------------------------------------------*/ 1022static 1023Bool fileExists ( Char* name ) 1024{ 1025 FILE *tmp = fopen ( name, "rb" ); 1026 Bool exists = (tmp != NULL); 1027 if (tmp != NULL) fclose ( tmp ); 1028 return exists; 1029} 1030 1031 1032/*---------------------------------------------*/ 1033/* Open an output file safely with O_EXCL and good permissions. 1034 This avoids a race condition in versions < 1.0.2, in which 1035 the file was first opened and then had its interim permissions 1036 set safely. We instead use open() to create the file with 1037 the interim permissions required. (--- --- rw-). 1038 1039 For non-Unix platforms, if we are not worrying about 1040 security issues, simple this simply behaves like fopen. 1041*/ 1042FILE* fopen_output_safely ( Char* name, const char* mode ) 1043{ 1044# if BZ_UNIX 1045 FILE* fp; 1046 IntNative fh; 1047 fh = open(name, O_WRONLY|O_CREAT|O_EXCL, S_IWUSR|S_IRUSR); 1048 if (fh == -1) return NULL; 1049 fp = fdopen(fh, mode); 1050 if (fp == NULL) close(fh); 1051 return fp; 1052# else 1053 return fopen(name, mode); 1054# endif 1055} 1056 1057 1058/*---------------------------------------------*/ 1059/*-- 1060 if in doubt, return True 1061--*/ 1062static 1063Bool notAStandardFile ( Char* name ) 1064{ 1065 IntNative i; 1066 struct MY_STAT statBuf; 1067 1068 i = MY_LSTAT ( name, &statBuf ); 1069 if (i != 0) return True; 1070 if (MY_S_ISREG(statBuf.st_mode)) return False; 1071 return True; 1072} 1073 1074 1075/*---------------------------------------------*/ 1076/*-- 1077 rac 11/21/98 see if file has hard links to it 1078--*/ 1079static 1080Int32 countHardLinks ( Char* name ) 1081{ 1082 IntNative i; 1083 struct MY_STAT statBuf; 1084 1085 i = MY_LSTAT ( name, &statBuf ); 1086 if (i != 0) return 0; 1087 return (statBuf.st_nlink - 1); 1088} 1089 1090 1091/*---------------------------------------------*/ 1092/* Copy modification date, access date, permissions and owner from the 1093 source to destination file. We have to copy this meta-info off 1094 into fileMetaInfo before starting to compress / decompress it, 1095 because doing it afterwards means we get the wrong access time. 1096 1097 To complicate matters, in compress() and decompress() below, the 1098 sequence of tests preceding the call to saveInputFileMetaInfo() 1099 involves calling fileExists(), which in turn establishes its result 1100 by attempting to fopen() the file, and if successful, immediately 1101 fclose()ing it again. So we have to assume that the fopen() call 1102 does not cause the access time field to be updated. 1103 1104 Reading of the man page for stat() (man 2 stat) on RedHat 7.2 seems 1105 to imply that merely doing open() will not affect the access time. 1106 Therefore we merely need to hope that the C library only does 1107 open() as a result of fopen(), and not any kind of read()-ahead 1108 cleverness. 1109 1110 It sounds pretty fragile to me. Whether this carries across 1111 robustly to arbitrary Unix-like platforms (or even works robustly 1112 on this one, RedHat 7.2) is unknown to me. Nevertheless ... 1113*/ 1114#if BZ_UNIX 1115static 1116struct MY_STAT fileMetaInfo; 1117#endif 1118 1119static 1120void saveInputFileMetaInfo ( Char *srcName ) 1121{ 1122# if BZ_UNIX 1123 IntNative retVal; 1124 /* Note use of stat here, not lstat. */ 1125 retVal = MY_STAT( srcName, &fileMetaInfo ); 1126 ERROR_IF_NOT_ZERO ( retVal ); 1127# endif 1128} 1129 1130 1131static 1132void applySavedMetaInfoToOutputFile ( Char *dstName ) 1133{ 1134# if BZ_UNIX 1135 IntNative retVal; 1136 struct utimbuf uTimBuf; 1137 1138 uTimBuf.actime = fileMetaInfo.st_atime; 1139 uTimBuf.modtime = fileMetaInfo.st_mtime; 1140 1141 retVal = chmod ( dstName, fileMetaInfo.st_mode ); 1142 ERROR_IF_NOT_ZERO ( retVal ); 1143 1144 retVal = utime ( dstName, &uTimBuf ); 1145 ERROR_IF_NOT_ZERO ( retVal ); 1146 1147 retVal = chown ( dstName, fileMetaInfo.st_uid, fileMetaInfo.st_gid ); 1148 /* chown() will in many cases return with EPERM, which can 1149 be safely ignored. 1150 */ 1151# endif 1152} 1153 1154 1155/*---------------------------------------------*/ 1156static 1157Bool containsDubiousChars ( Char* name ) 1158{ 1159# if BZ_UNIX 1160 /* On unix, files can contain any characters and the file expansion 1161 * is performed by the shell. 1162 */ 1163 return False; 1164# else /* ! BZ_UNIX */ 1165 /* On non-unix (Win* platforms), wildcard characters are not allowed in 1166 * filenames. 1167 */ 1168 for (; *name != '\0'; name++) 1169 if (*name == '?' || *name == '*') return True; 1170 return False; 1171# endif /* BZ_UNIX */ 1172} 1173 1174 1175/*---------------------------------------------*/ 1176#define BZ_N_SUFFIX_PAIRS 4 1177 1178Char* zSuffix[BZ_N_SUFFIX_PAIRS] 1179 = { ".bz2", ".bz", ".tbz2", ".tbz" }; 1180Char* unzSuffix[BZ_N_SUFFIX_PAIRS] 1181 = { "", "", ".tar", ".tar" }; 1182 1183static 1184Bool hasSuffix ( Char* s, Char* suffix ) 1185{ 1186 Int32 ns = strlen(s); 1187 Int32 nx = strlen(suffix); 1188 if (ns < nx) return False; 1189 if (strcmp(s + ns - nx, suffix) == 0) return True; 1190 return False; 1191} 1192 1193static 1194Bool mapSuffix ( Char* name, 1195 Char* oldSuffix, Char* newSuffix ) 1196{ 1197 if (!hasSuffix(name,oldSuffix)) return False; 1198 name[strlen(name)-strlen(oldSuffix)] = 0; 1199 strcat ( name, newSuffix ); 1200 return True; 1201} 1202 1203 1204/*---------------------------------------------*/ 1205static 1206void compress ( Char *name ) 1207{ 1208 FILE *inStr; 1209 FILE *outStr; 1210 Int32 n, i; 1211 struct MY_STAT statBuf; 1212 1213 deleteOutputOnInterrupt = False; 1214 1215 if (name == NULL && srcMode != SM_I2O) 1216 panic ( "compress: bad modes\n" ); 1217 1218 switch (srcMode) { 1219 case SM_I2O: 1220 copyFileName ( inName, "(stdin)" ); 1221 copyFileName ( outName, "(stdout)" ); 1222 break; 1223 case SM_F2F: 1224 copyFileName ( inName, name ); 1225 copyFileName ( outName, name ); 1226 strcat ( outName, ".bz2" ); 1227 break; 1228 case SM_F2O: 1229 copyFileName ( inName, name ); 1230 copyFileName ( outName, "(stdout)" ); 1231 break; 1232 } 1233 1234 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 1235 if (noisy) 1236 fprintf ( stderr, "%s: There are no files matching `%s'.\n", 1237 progName, inName ); 1238 setExit(1); 1239 return; 1240 } 1241 if ( srcMode != SM_I2O && !fileExists ( inName ) ) { 1242 fprintf ( stderr, "%s: Can't open input file %s: %s.\n", 1243 progName, inName, strerror(errno) ); 1244 setExit(1); 1245 return; 1246 } 1247 for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) { 1248 if (hasSuffix(inName, zSuffix[i])) { 1249 if (noisy) 1250 fprintf ( stderr, 1251 "%s: Input file %s already has %s suffix.\n", 1252 progName, inName, zSuffix[i] ); 1253 setExit(1); 1254 return; 1255 } 1256 } 1257 if ( srcMode == SM_F2F || srcMode == SM_F2O ) { 1258 MY_STAT(inName, &statBuf); 1259 if ( MY_S_ISDIR(statBuf.st_mode) ) { 1260 fprintf( stderr, 1261 "%s: Input file %s is a directory.\n", 1262 progName,inName); 1263 setExit(1); 1264 return; 1265 } 1266 } 1267 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { 1268 if (noisy) 1269 fprintf ( stderr, "%s: Input file %s is not a normal file.\n", 1270 progName, inName ); 1271 setExit(1); 1272 return; 1273 } 1274 if ( srcMode == SM_F2F && fileExists ( outName ) ) { 1275 if (forceOverwrite) { 1276 remove(outName); 1277 } else { 1278 fprintf ( stderr, "%s: Output file %s already exists.\n", 1279 progName, outName ); 1280 setExit(1); 1281 return; 1282 } 1283 } 1284 if ( srcMode == SM_F2F && !forceOverwrite && 1285 (n=countHardLinks ( inName )) > 0) { 1286 fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", 1287 progName, inName, n, n > 1 ? "s" : "" ); 1288 setExit(1); 1289 return; 1290 } 1291 1292 if ( srcMode == SM_F2F ) { 1293 /* Save the file's meta-info before we open it. Doing it later 1294 means we mess up the access times. */ 1295 saveInputFileMetaInfo ( inName ); 1296 } 1297 1298 switch ( srcMode ) { 1299 1300 case SM_I2O: 1301 inStr = stdin; 1302 outStr = stdout; 1303 if ( isatty ( fileno ( stdout ) ) ) { 1304 fprintf ( stderr, 1305 "%s: I won't write compressed data to a terminal.\n", 1306 progName ); 1307 fprintf ( stderr, "%s: For help, type: `%s --help'.\n", 1308 progName, progName ); 1309 setExit(1); 1310 return; 1311 }; 1312 break; 1313 1314 case SM_F2O: 1315 inStr = fopen ( inName, "rb" ); 1316 outStr = stdout; 1317 if ( isatty ( fileno ( stdout ) ) ) { 1318 fprintf ( stderr, 1319 "%s: I won't write compressed data to a terminal.\n", 1320 progName ); 1321 fprintf ( stderr, "%s: For help, type: `%s --help'.\n", 1322 progName, progName ); 1323 if ( inStr != NULL ) fclose ( inStr ); 1324 setExit(1); 1325 return; 1326 }; 1327 if ( inStr == NULL ) { 1328 fprintf ( stderr, "%s: Can't open input file %s: %s.\n", 1329 progName, inName, strerror(errno) ); 1330 setExit(1); 1331 return; 1332 }; 1333 break; 1334 1335 case SM_F2F: 1336 inStr = fopen ( inName, "rb" ); 1337 outStr = fopen_output_safely ( outName, "wb" ); 1338 if ( outStr == NULL) { 1339 fprintf ( stderr, "%s: Can't create output file %s: %s.\n", 1340 progName, outName, strerror(errno) ); 1341 if ( inStr != NULL ) fclose ( inStr ); 1342 setExit(1); 1343 return; 1344 } 1345 if ( inStr == NULL ) { 1346 fprintf ( stderr, "%s: Can't open input file %s: %s.\n", 1347 progName, inName, strerror(errno) ); 1348 if ( outStr != NULL ) fclose ( outStr ); 1349 setExit(1); 1350 return; 1351 }; 1352 break; 1353 1354 default: 1355 panic ( "compress: bad srcMode" ); 1356 break; 1357 } 1358 1359 if (verbosity >= 1) { 1360 fprintf ( stderr, " %s: ", inName ); 1361 pad ( inName ); 1362 fflush ( stderr ); 1363 } 1364 1365 /*--- Now the input and output handles are sane. Do the Biz. ---*/ 1366 outputHandleJustInCase = outStr; 1367 deleteOutputOnInterrupt = True; 1368 compressStream ( inStr, outStr ); 1369 outputHandleJustInCase = NULL; 1370 1371 /*--- If there was an I/O error, we won't get here. ---*/ 1372 if ( srcMode == SM_F2F ) { 1373 applySavedMetaInfoToOutputFile ( outName ); 1374 deleteOutputOnInterrupt = False; 1375 if ( !keepInputFiles ) { 1376 IntNative retVal = remove ( inName ); 1377 ERROR_IF_NOT_ZERO ( retVal ); 1378 } 1379 } 1380 1381 deleteOutputOnInterrupt = False; 1382} 1383 1384 1385/*---------------------------------------------*/ 1386static 1387void uncompress ( Char *name ) 1388{ 1389 FILE *inStr; 1390 FILE *outStr; 1391 Int32 n, i; 1392 Bool magicNumberOK; 1393 Bool cantGuess; 1394 struct MY_STAT statBuf; 1395 1396 deleteOutputOnInterrupt = False; 1397 1398 if (name == NULL && srcMode != SM_I2O) 1399 panic ( "uncompress: bad modes\n" ); 1400 1401 cantGuess = False; 1402 switch (srcMode) { 1403 case SM_I2O: 1404 copyFileName ( inName, "(stdin)" ); 1405 copyFileName ( outName, "(stdout)" ); 1406 break; 1407 case SM_F2F: 1408 copyFileName ( inName, name ); 1409 copyFileName ( outName, name ); 1410 for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) 1411 if (mapSuffix(outName,zSuffix[i],unzSuffix[i])) 1412 goto zzz; 1413 cantGuess = True; 1414 strcat ( outName, ".out" ); 1415 break; 1416 case SM_F2O: 1417 copyFileName ( inName, name ); 1418 copyFileName ( outName, "(stdout)" ); 1419 break; 1420 } 1421 1422 zzz: 1423 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 1424 if (noisy) 1425 fprintf ( stderr, "%s: There are no files matching `%s'.\n", 1426 progName, inName ); 1427 setExit(1); 1428 return; 1429 } 1430 if ( srcMode != SM_I2O && !fileExists ( inName ) ) { 1431 fprintf ( stderr, "%s: Can't open input file %s: %s.\n", 1432 progName, inName, strerror(errno) ); 1433 setExit(1); 1434 return; 1435 } 1436 if ( srcMode == SM_F2F || srcMode == SM_F2O ) { 1437 MY_STAT(inName, &statBuf); 1438 if ( MY_S_ISDIR(statBuf.st_mode) ) { 1439 fprintf( stderr, 1440 "%s: Input file %s is a directory.\n", 1441 progName,inName); 1442 setExit(1); 1443 return; 1444 } 1445 } 1446 if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { 1447 if (noisy) 1448 fprintf ( stderr, "%s: Input file %s is not a normal file.\n", 1449 progName, inName ); 1450 setExit(1); 1451 return; 1452 } 1453 if ( /* srcMode == SM_F2F implied && */ cantGuess ) { 1454 if (noisy) 1455 fprintf ( stderr, 1456 "%s: Can't guess original name for %s -- using %s\n", 1457 progName, inName, outName ); 1458 /* just a warning, no return */ 1459 } 1460 if ( srcMode == SM_F2F && fileExists ( outName ) ) { 1461 if (forceOverwrite) { 1462 remove(outName); 1463 } else { 1464 fprintf ( stderr, "%s: Output file %s already exists.\n", 1465 progName, outName ); 1466 setExit(1); 1467 return; 1468 } 1469 } 1470 if ( srcMode == SM_F2F && !forceOverwrite && 1471 (n=countHardLinks ( inName ) ) > 0) { 1472 fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", 1473 progName, inName, n, n > 1 ? "s" : "" ); 1474 setExit(1); 1475 return; 1476 } 1477 1478 if ( srcMode == SM_F2F ) { 1479 /* Save the file's meta-info before we open it. Doing it later 1480 means we mess up the access times. */ 1481 saveInputFileMetaInfo ( inName ); 1482 } 1483 1484 switch ( srcMode ) { 1485 1486 case SM_I2O: 1487 inStr = stdin; 1488 outStr = stdout; 1489 if ( isatty ( fileno ( stdin ) ) ) { 1490 fprintf ( stderr, 1491 "%s: I won't read compressed data from a terminal.\n", 1492 progName ); 1493 fprintf ( stderr, "%s: For help, type: `%s --help'.\n", 1494 progName, progName ); 1495 setExit(1); 1496 return; 1497 }; 1498 break; 1499 1500 case SM_F2O: 1501 inStr = fopen ( inName, "rb" ); 1502 outStr = stdout; 1503 if ( inStr == NULL ) { 1504 fprintf ( stderr, "%s: Can't open input file %s:%s.\n", 1505 progName, inName, strerror(errno) ); 1506 if ( inStr != NULL ) fclose ( inStr ); 1507 setExit(1); 1508 return; 1509 }; 1510 break; 1511 1512 case SM_F2F: 1513 inStr = fopen ( inName, "rb" ); 1514 outStr = fopen_output_safely ( outName, "wb" ); 1515 if ( outStr == NULL) { 1516 fprintf ( stderr, "%s: Can't create output file %s: %s.\n", 1517 progName, outName, strerror(errno) ); 1518 if ( inStr != NULL ) fclose ( inStr ); 1519 setExit(1); 1520 return; 1521 } 1522 if ( inStr == NULL ) { 1523 fprintf ( stderr, "%s: Can't open input file %s: %s.\n", 1524 progName, inName, strerror(errno) ); 1525 if ( outStr != NULL ) fclose ( outStr ); 1526 setExit(1); 1527 return; 1528 }; 1529 break; 1530 1531 default: 1532 panic ( "uncompress: bad srcMode" ); 1533 break; 1534 } 1535 1536 if (verbosity >= 1) { 1537 fprintf ( stderr, " %s: ", inName ); 1538 pad ( inName ); 1539 fflush ( stderr ); 1540 } 1541 1542 /*--- Now the input and output handles are sane. Do the Biz. ---*/ 1543 outputHandleJustInCase = outStr; 1544 deleteOutputOnInterrupt = True; 1545 magicNumberOK = uncompressStream ( inStr, outStr ); 1546 outputHandleJustInCase = NULL; 1547 1548 /*--- If there was an I/O error, we won't get here. ---*/ 1549 if ( magicNumberOK ) { 1550 if ( srcMode == SM_F2F ) { 1551 applySavedMetaInfoToOutputFile ( outName ); 1552 deleteOutputOnInterrupt = False; 1553 if ( !keepInputFiles ) { 1554 IntNative retVal = remove ( inName ); 1555 ERROR_IF_NOT_ZERO ( retVal ); 1556 } 1557 } 1558 } else { 1559 unzFailsExist = True; 1560 deleteOutputOnInterrupt = False; 1561 if ( srcMode == SM_F2F ) { 1562 IntNative retVal = remove ( outName ); 1563 ERROR_IF_NOT_ZERO ( retVal ); 1564 } 1565 } 1566 deleteOutputOnInterrupt = False; 1567 1568 if ( magicNumberOK ) { 1569 if (verbosity >= 1) 1570 fprintf ( stderr, "done\n" ); 1571 } else { 1572 setExit(2); 1573 if (verbosity >= 1) 1574 fprintf ( stderr, "not a bzip2 file.\n" ); else 1575 fprintf ( stderr, 1576 "%s: %s is not a bzip2 file.\n", 1577 progName, inName ); 1578 } 1579 1580} 1581 1582 1583/*---------------------------------------------*/ 1584static 1585void testf ( Char *name ) 1586{ 1587 FILE *inStr; 1588 Bool allOK; 1589 struct MY_STAT statBuf; 1590 1591 deleteOutputOnInterrupt = False; 1592 1593 if (name == NULL && srcMode != SM_I2O) 1594 panic ( "testf: bad modes\n" ); 1595 1596 copyFileName ( outName, "(none)" ); 1597 switch (srcMode) { 1598 case SM_I2O: copyFileName ( inName, "(stdin)" ); break; 1599 case SM_F2F: copyFileName ( inName, name ); break; 1600 case SM_F2O: copyFileName ( inName, name ); break; 1601 } 1602 1603 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { 1604 if (noisy) 1605 fprintf ( stderr, "%s: There are no files matching `%s'.\n", 1606 progName, inName ); 1607 setExit(1); 1608 return; 1609 } 1610 if ( srcMode != SM_I2O && !fileExists ( inName ) ) { 1611 fprintf ( stderr, "%s: Can't open input %s: %s.\n", 1612 progName, inName, strerror(errno) ); 1613 setExit(1); 1614 return; 1615 } 1616 if ( srcMode != SM_I2O ) { 1617 MY_STAT(inName, &statBuf); 1618 if ( MY_S_ISDIR(statBuf.st_mode) ) { 1619 fprintf( stderr, 1620 "%s: Input file %s is a directory.\n", 1621 progName,inName); 1622 setExit(1); 1623 return; 1624 } 1625 } 1626 1627 switch ( srcMode ) { 1628 1629 case SM_I2O: 1630 if ( isatty ( fileno ( stdin ) ) ) { 1631 fprintf ( stderr, 1632 "%s: I won't read compressed data from a terminal.\n", 1633 progName ); 1634 fprintf ( stderr, "%s: For help, type: `%s --help'.\n", 1635 progName, progName ); 1636 setExit(1); 1637 return; 1638 }; 1639 inStr = stdin; 1640 break; 1641 1642 case SM_F2O: case SM_F2F: 1643 inStr = fopen ( inName, "rb" ); 1644 if ( inStr == NULL ) { 1645 fprintf ( stderr, "%s: Can't open input file %s:%s.\n", 1646 progName, inName, strerror(errno) ); 1647 setExit(1); 1648 return; 1649 }; 1650 break; 1651 1652 default: 1653 panic ( "testf: bad srcMode" ); 1654 break; 1655 } 1656 1657 if (verbosity >= 1) { 1658 fprintf ( stderr, " %s: ", inName ); 1659 pad ( inName ); 1660 fflush ( stderr ); 1661 } 1662 1663 /*--- Now the input handle is sane. Do the Biz. ---*/ 1664 outputHandleJustInCase = NULL; 1665 allOK = testStream ( inStr ); 1666 1667 if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" ); 1668 if (!allOK) testFailsExist = True; 1669} 1670 1671 1672/*---------------------------------------------*/ 1673static 1674void license ( void ) 1675{ 1676 fprintf ( stderr, 1677 1678 "bzip2, a block-sorting file compressor. " 1679 "Version %s.\n" 1680 " \n" 1681 " Copyright (C) 1996-2005 by Julian Seward.\n" 1682 " \n" 1683 " This program is free software; you can redistribute it and/or modify\n" 1684 " it under the terms set out in the LICENSE file, which is included\n" 1685 " in the bzip2-1.0 source distribution.\n" 1686 " \n" 1687 " This program is distributed in the hope that it will be useful,\n" 1688 " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" 1689 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" 1690 " LICENSE file for more details.\n" 1691 " \n", 1692 BZ2_bzlibVersion() 1693 ); 1694} 1695 1696 1697/*---------------------------------------------*/ 1698static 1699void usage ( Char *fullProgName ) 1700{ 1701 fprintf ( 1702 stderr, 1703 "bzip2, a block-sorting file compressor. " 1704 "Version %s.\n" 1705 "\n usage: %s [flags and input files in any order]\n" 1706 "\n" 1707 " -h --help print this message\n" 1708 " -d --decompress force decompression\n" 1709 " -z --compress force compression\n" 1710 " -k --keep keep (don't delete) input files\n" 1711 " -f --force overwrite existing output files\n" 1712 " -t --test test compressed file integrity\n" 1713 " -c --stdout output to standard out\n" 1714 " -q --quiet suppress noncritical error messages\n" 1715 " -v --verbose be verbose (a 2nd -v gives more)\n" 1716 " -L --license display software version & license\n" 1717 " -V --version display software version & license\n" 1718 " -s --small use less memory (at most 2500k)\n" 1719 " -1 .. -9 set block size to 100k .. 900k\n" 1720 " --fast alias for -1\n" 1721 " --best alias for -9\n" 1722 "\n" 1723 " If invoked as `bzip2', default action is to compress.\n" 1724 " as `bunzip2', default action is to decompress.\n" 1725 " as `bzcat', default action is to decompress to stdout.\n" 1726 "\n" 1727 " If no file names are given, bzip2 compresses or decompresses\n" 1728 " from standard input to standard output. You can combine\n" 1729 " short flags, so `-v -4' means the same as -v4 or -4v, &c.\n" 1730# if BZ_UNIX 1731 "\n" 1732# endif 1733 , 1734 1735 BZ2_bzlibVersion(), 1736 fullProgName 1737 ); 1738} 1739 1740 1741/*---------------------------------------------*/ 1742static 1743void redundant ( Char* flag ) 1744{ 1745 fprintf ( 1746 stderr, 1747 "%s: %s is redundant in versions 0.9.5 and above\n", 1748 progName, flag ); 1749} 1750 1751 1752/*---------------------------------------------*/ 1753/*-- 1754 All the garbage from here to main() is purely to 1755 implement a linked list of command-line arguments, 1756 into which main() copies argv[1 .. argc-1]. 1757 1758 The purpose of this exercise is to facilitate 1759 the expansion of wildcard characters * and ? in 1760 filenames for OSs which don't know how to do it 1761 themselves, like MSDOS, Windows 95 and NT. 1762 1763 The actual Dirty Work is done by the platform- 1764 specific macro APPEND_FILESPEC. 1765--*/ 1766 1767typedef 1768 struct zzzz { 1769 Char *name; 1770 struct zzzz *link; 1771 } 1772 Cell; 1773 1774 1775/*---------------------------------------------*/ 1776static 1777void *myMalloc ( Int32 n ) 1778{ 1779 void* p; 1780 1781 p = malloc ( (size_t)n ); 1782 if (p == NULL) outOfMemory (); 1783 return p; 1784} 1785 1786 1787/*---------------------------------------------*/ 1788static 1789Cell *mkCell ( void ) 1790{ 1791 Cell *c; 1792 1793 c = (Cell*) myMalloc ( sizeof ( Cell ) ); 1794 c->name = NULL; 1795 c->link = NULL; 1796 return c; 1797} 1798 1799 1800/*---------------------------------------------*/ 1801static 1802Cell *snocString ( Cell *root, Char *name ) 1803{ 1804 if (root == NULL) { 1805 Cell *tmp = mkCell(); 1806 tmp->name = (Char*) myMalloc ( 5 + strlen(name) ); 1807 strcpy ( tmp->name, name ); 1808 return tmp; 1809 } else { 1810 Cell *tmp = root; 1811 while (tmp->link != NULL) tmp = tmp->link; 1812 tmp->link = snocString ( tmp->link, name ); 1813 return root; 1814 } 1815} 1816 1817 1818/*---------------------------------------------*/ 1819static 1820void addFlagsFromEnvVar ( Cell** argList, Char* varName ) 1821{ 1822 Int32 i, j, k; 1823 Char *envbase, *p; 1824 1825 envbase = getenv(varName); 1826 if (envbase != NULL) { 1827 p = envbase; 1828 i = 0; 1829 while (True) { 1830 if (p[i] == 0) break; 1831 p += i; 1832 i = 0; 1833 while (isspace((Int32)(p[0]))) p++; 1834 while (p[i] != 0 && !isspace((Int32)(p[i]))) i++; 1835 if (i > 0) { 1836 k = i; if (k > FILE_NAME_LEN-10) k = FILE_NAME_LEN-10; 1837 for (j = 0; j < k; j++) tmpName[j] = p[j]; 1838 tmpName[k] = 0; 1839 APPEND_FLAG(*argList, tmpName); 1840 } 1841 } 1842 } 1843} 1844 1845 1846/*---------------------------------------------*/ 1847#define ISFLAG(s) (strcmp(aa->name, (s))==0) 1848 1849IntNative main ( IntNative argc, Char *argv[] ) 1850{ 1851 Int32 i, j; 1852 Char *tmp; 1853 Cell *argList; 1854 Cell *aa; 1855 Bool decode; 1856 1857 /*-- Be really really really paranoid :-) --*/ 1858 if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || 1859 sizeof(Int16) != 2 || sizeof(UInt16) != 2 || 1860 sizeof(Char) != 1 || sizeof(UChar) != 1) 1861 configError(); 1862 1863 /*-- Initialise --*/ 1864 outputHandleJustInCase = NULL; 1865 smallMode = False; 1866 keepInputFiles = False; 1867 forceOverwrite = False; 1868 noisy = True; 1869 verbosity = 0; 1870 blockSize100k = 9; 1871 testFailsExist = False; 1872 unzFailsExist = False; 1873 numFileNames = 0; 1874 numFilesProcessed = 0; 1875 workFactor = 30; 1876 deleteOutputOnInterrupt = False; 1877 exitValue = 0; 1878 i = j = 0; /* avoid bogus warning from egcs-1.1.X */ 1879 1880 /*-- Set up signal handlers for mem access errors --*/ 1881 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher); 1882# if BZ_UNIX 1883# ifndef __DJGPP__ 1884 signal (SIGBUS, mySIGSEGVorSIGBUScatcher); 1885# endif 1886# endif 1887 1888 copyFileName ( inName, "(none)" ); 1889 copyFileName ( outName, "(none)" ); 1890 1891 copyFileName ( progNameReally, argv[0] ); 1892 progName = &progNameReally[0]; 1893 for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++) 1894 if (*tmp == PATH_SEP) progName = tmp + 1; 1895 1896 1897 /*-- Copy flags from env var BZIP2, and 1898 expand filename wildcards in arg list. 1899 --*/ 1900 argList = NULL; 1901 addFlagsFromEnvVar ( &argList, "BZIP2" ); 1902 addFlagsFromEnvVar ( &argList, "BZIP" ); 1903 for (i = 1; i <= argc-1; i++) 1904 APPEND_FILESPEC(argList, argv[i]); 1905 1906 1907 /*-- Find the length of the longest filename --*/ 1908 longestFileName = 7; 1909 numFileNames = 0; 1910 decode = True; 1911 for (aa = argList; aa != NULL; aa = aa->link) { 1912 if (ISFLAG("--")) { decode = False; continue; } 1913 if (aa->name[0] == '-' && decode) continue; 1914 numFileNames++; 1915 if (longestFileName < (Int32)strlen(aa->name) ) 1916 longestFileName = (Int32)strlen(aa->name); 1917 } 1918 1919 1920 /*-- Determine source modes; flag handling may change this too. --*/ 1921 if (numFileNames == 0) 1922 srcMode = SM_I2O; else srcMode = SM_F2F; 1923 1924 1925 /*-- Determine what to do (compress/uncompress/test/cat). --*/ 1926 /*-- Note that subsequent flag handling may change this. --*/ 1927 opMode = OM_Z; 1928 1929 if ( (strstr ( progName, "unzip" ) != 0) || 1930 (strstr ( progName, "UNZIP" ) != 0) ) 1931 opMode = OM_UNZ; 1932 1933 if ( (strstr ( progName, "z2cat" ) != 0) || 1934 (strstr ( progName, "Z2CAT" ) != 0) || 1935 (strstr ( progName, "zcat" ) != 0) || 1936 (strstr ( progName, "ZCAT" ) != 0) ) { 1937 opMode = OM_UNZ; 1938 srcMode = (numFileNames == 0) ? SM_I2O : SM_F2O; 1939 } 1940 1941 1942 /*-- Look at the flags. --*/ 1943 for (aa = argList; aa != NULL; aa = aa->link) { 1944 if (ISFLAG("--")) break; 1945 if (aa->name[0] == '-' && aa->name[1] != '-') { 1946 for (j = 1; aa->name[j] != '\0'; j++) { 1947 switch (aa->name[j]) { 1948 case 'c': srcMode = SM_F2O; break; 1949 case 'd': opMode = OM_UNZ; break; 1950 case 'z': opMode = OM_Z; break; 1951 case 'f': forceOverwrite = True; break; 1952 case 't': opMode = OM_TEST; break; 1953 case 'k': keepInputFiles = True; break; 1954 case 's': smallMode = True; break; 1955 case 'q': noisy = False; break; 1956 case '1': blockSize100k = 1; break; 1957 case '2': blockSize100k = 2; break; 1958 case '3': blockSize100k = 3; break; 1959 case '4': blockSize100k = 4; break; 1960 case '5': blockSize100k = 5; break; 1961 case '6': blockSize100k = 6; break; 1962 case '7': blockSize100k = 7; break; 1963 case '8': blockSize100k = 8; break; 1964 case '9': blockSize100k = 9; break; 1965 case 'V': 1966 case 'L': license(); break; 1967 case 'v': verbosity++; break; 1968 case 'h': usage ( progName ); 1969 exit ( 0 ); 1970 break; 1971 default: fprintf ( stderr, "%s: Bad flag `%s'\n", 1972 progName, aa->name ); 1973 usage ( progName ); 1974 exit ( 1 ); 1975 break; 1976 } 1977 } 1978 } 1979 } 1980 1981 /*-- And again ... --*/ 1982 for (aa = argList; aa != NULL; aa = aa->link) { 1983 if (ISFLAG("--")) break; 1984 if (ISFLAG("--stdout")) srcMode = SM_F2O; else 1985 if (ISFLAG("--decompress")) opMode = OM_UNZ; else 1986 if (ISFLAG("--compress")) opMode = OM_Z; else 1987 if (ISFLAG("--force")) forceOverwrite = True; else 1988 if (ISFLAG("--test")) opMode = OM_TEST; else 1989 if (ISFLAG("--keep")) keepInputFiles = True; else 1990 if (ISFLAG("--small")) smallMode = True; else 1991 if (ISFLAG("--quiet")) noisy = False; else 1992 if (ISFLAG("--version")) license(); else 1993 if (ISFLAG("--license")) license(); else 1994 if (ISFLAG("--exponential")) workFactor = 1; else 1995 if (ISFLAG("--repetitive-best")) redundant(aa->name); else 1996 if (ISFLAG("--repetitive-fast")) redundant(aa->name); else 1997 if (ISFLAG("--fast")) blockSize100k = 1; else 1998 if (ISFLAG("--best")) blockSize100k = 9; else 1999 if (ISFLAG("--verbose")) verbosity++; else 2000 if (ISFLAG("--help")) { usage ( progName ); exit ( 0 ); } 2001 else 2002 if (strncmp ( aa->name, "--", 2) == 0) { 2003 fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name ); 2004 usage ( progName ); 2005 exit ( 1 ); 2006 } 2007 } 2008 2009 if (verbosity > 4) verbosity = 4; 2010 if (opMode == OM_Z && smallMode && blockSize100k > 2) 2011 blockSize100k = 2; 2012 2013 if (opMode == OM_TEST && srcMode == SM_F2O) { 2014 fprintf ( stderr, "%s: -c and -t cannot be used together.\n", 2015 progName ); 2016 exit ( 1 ); 2017 } 2018 2019 if (srcMode == SM_F2O && numFileNames == 0) 2020 srcMode = SM_I2O; 2021 2022 if (opMode != OM_Z) blockSize100k = 0; 2023 2024 if (srcMode == SM_F2F) { 2025 signal (SIGINT, mySignalCatcher); 2026 signal (SIGTERM, mySignalCatcher); 2027# if BZ_UNIX 2028 signal (SIGHUP, mySignalCatcher); 2029# endif 2030 } 2031 2032 if (opMode == OM_Z) { 2033 if (srcMode == SM_I2O) { 2034 compress ( NULL ); 2035 } else { 2036 decode = True; 2037 for (aa = argList; aa != NULL; aa = aa->link) { 2038 if (ISFLAG("--")) { decode = False; continue; } 2039 if (aa->name[0] == '-' && decode) continue; 2040 numFilesProcessed++; 2041 compress ( aa->name ); 2042 } 2043 } 2044 } 2045 else 2046 2047 if (opMode == OM_UNZ) { 2048 unzFailsExist = False; 2049 if (srcMode == SM_I2O) { 2050 uncompress ( NULL ); 2051 } else { 2052 decode = True; 2053 for (aa = argList; aa != NULL; aa = aa->link) { 2054 if (ISFLAG("--")) { decode = False; continue; } 2055 if (aa->name[0] == '-' && decode) continue; 2056 numFilesProcessed++; 2057 uncompress ( aa->name ); 2058 } 2059 } 2060 if (unzFailsExist) { 2061 setExit(2); 2062 exit(exitValue); 2063 } 2064 } 2065 2066 else { 2067 testFailsExist = False; 2068 if (srcMode == SM_I2O) { 2069 testf ( NULL ); 2070 } else { 2071 decode = True; 2072 for (aa = argList; aa != NULL; aa = aa->link) { 2073 if (ISFLAG("--")) { decode = False; continue; } 2074 if (aa->name[0] == '-' && decode) continue; 2075 numFilesProcessed++; 2076 testf ( aa->name ); 2077 } 2078 } 2079 if (testFailsExist && noisy) { 2080 fprintf ( stderr, 2081 "\n" 2082 "You can use the `bzip2recover' program to attempt to recover\n" 2083 "data from undamaged sections of corrupted files.\n\n" 2084 ); 2085 setExit(2); 2086 exit(exitValue); 2087 } 2088 } 2089 2090 /* Free the argument list memory to mollify leak detectors 2091 (eg) Purify, Checker. Serves no other useful purpose. 2092 */ 2093 aa = argList; 2094 while (aa != NULL) { 2095 Cell* aa2 = aa->link; 2096 if (aa->name != NULL) free(aa->name); 2097 free(aa); 2098 aa = aa2; 2099 } 2100 2101 return exitValue; 2102} 2103 2104 2105/*-----------------------------------------------------------*/ 2106/*--- end bzip2.c ---*/ 2107/*-----------------------------------------------------------*/ 2108