bzip2recover.c revision 78556
178556Sobrien 278556Sobrien/*-----------------------------------------------------------*/ 378556Sobrien/*--- Block recoverer program for bzip2 ---*/ 478556Sobrien/*--- bzip2recover.c ---*/ 578556Sobrien/*-----------------------------------------------------------*/ 678556Sobrien 778556Sobrien/*-- 878556Sobrien This program is bzip2recover, a program to attempt data 978556Sobrien salvage from damaged files created by the accompanying 1078556Sobrien bzip2-1.0 program. 1178556Sobrien 1278556Sobrien Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 1378556Sobrien 1478556Sobrien Redistribution and use in source and binary forms, with or without 1578556Sobrien modification, are permitted provided that the following conditions 1678556Sobrien are met: 1778556Sobrien 1878556Sobrien 1. Redistributions of source code must retain the above copyright 1978556Sobrien notice, this list of conditions and the following disclaimer. 2078556Sobrien 2178556Sobrien 2. The origin of this software must not be misrepresented; you must 2278556Sobrien not claim that you wrote the original software. If you use this 2378556Sobrien software in a product, an acknowledgment in the product 2478556Sobrien documentation would be appreciated but is not required. 2578556Sobrien 2678556Sobrien 3. Altered source versions must be plainly marked as such, and must 2778556Sobrien not be misrepresented as being the original software. 2878556Sobrien 2978556Sobrien 4. The name of the author may not be used to endorse or promote 3078556Sobrien products derived from this software without specific prior written 3178556Sobrien permission. 3278556Sobrien 3378556Sobrien THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3478556Sobrien OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3578556Sobrien WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3678556Sobrien ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3778556Sobrien DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3878556Sobrien DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3978556Sobrien GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4078556Sobrien INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4178556Sobrien WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4278556Sobrien NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4378556Sobrien SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4478556Sobrien 4578556Sobrien Julian Seward, Cambridge, UK. 4678556Sobrien jseward@acm.org 4778556Sobrien bzip2/libbzip2 version 1.0 of 21 March 2000 4878556Sobrien--*/ 4978556Sobrien 5078556Sobrien/*-- 5178556Sobrien This program is a complete hack and should be rewritten 5278556Sobrien properly. It isn't very complicated. 5378556Sobrien--*/ 5478556Sobrien 5578556Sobrien#include <stdio.h> 5678556Sobrien#include <errno.h> 5778556Sobrien#include <stdlib.h> 5878556Sobrien#include <string.h> 5978556Sobrien 6078556Sobrientypedef unsigned int UInt32; 6178556Sobrientypedef int Int32; 6278556Sobrientypedef unsigned char UChar; 6378556Sobrientypedef char Char; 6478556Sobrientypedef unsigned char Bool; 6578556Sobrien#define True ((Bool)1) 6678556Sobrien#define False ((Bool)0) 6778556Sobrien 6878556Sobrien 6978556SobrienChar inFileName[2000]; 7078556SobrienChar outFileName[2000]; 7178556SobrienChar progName[2000]; 7278556Sobrien 7378556SobrienUInt32 bytesOut = 0; 7478556SobrienUInt32 bytesIn = 0; 7578556Sobrien 7678556Sobrien 7778556Sobrien/*---------------------------------------------------*/ 7878556Sobrien/*--- I/O errors ---*/ 7978556Sobrien/*---------------------------------------------------*/ 8078556Sobrien 8178556Sobrien/*---------------------------------------------*/ 8278556Sobrienvoid readError ( void ) 8378556Sobrien{ 8478556Sobrien fprintf ( stderr, 8578556Sobrien "%s: I/O error reading `%s', possible reason follows.\n", 8678556Sobrien progName, inFileName ); 8778556Sobrien perror ( progName ); 8878556Sobrien fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 8978556Sobrien progName ); 9078556Sobrien exit ( 1 ); 9178556Sobrien} 9278556Sobrien 9378556Sobrien 9478556Sobrien/*---------------------------------------------*/ 9578556Sobrienvoid writeError ( void ) 9678556Sobrien{ 9778556Sobrien fprintf ( stderr, 9878556Sobrien "%s: I/O error reading `%s', possible reason follows.\n", 9978556Sobrien progName, inFileName ); 10078556Sobrien perror ( progName ); 10178556Sobrien fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 10278556Sobrien progName ); 10378556Sobrien exit ( 1 ); 10478556Sobrien} 10578556Sobrien 10678556Sobrien 10778556Sobrien/*---------------------------------------------*/ 10878556Sobrienvoid mallocFail ( Int32 n ) 10978556Sobrien{ 11078556Sobrien fprintf ( stderr, 11178556Sobrien "%s: malloc failed on request for %d bytes.\n", 11278556Sobrien progName, n ); 11378556Sobrien fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 11478556Sobrien progName ); 11578556Sobrien exit ( 1 ); 11678556Sobrien} 11778556Sobrien 11878556Sobrien 11978556Sobrien/*---------------------------------------------------*/ 12078556Sobrien/*--- Bit stream I/O ---*/ 12178556Sobrien/*---------------------------------------------------*/ 12278556Sobrien 12378556Sobrientypedef 12478556Sobrien struct { 12578556Sobrien FILE* handle; 12678556Sobrien Int32 buffer; 12778556Sobrien Int32 buffLive; 12878556Sobrien Char mode; 12978556Sobrien } 13078556Sobrien BitStream; 13178556Sobrien 13278556Sobrien 13378556Sobrien/*---------------------------------------------*/ 13478556SobrienBitStream* bsOpenReadStream ( FILE* stream ) 13578556Sobrien{ 13678556Sobrien BitStream *bs = malloc ( sizeof(BitStream) ); 13778556Sobrien if (bs == NULL) mallocFail ( sizeof(BitStream) ); 13878556Sobrien bs->handle = stream; 13978556Sobrien bs->buffer = 0; 14078556Sobrien bs->buffLive = 0; 14178556Sobrien bs->mode = 'r'; 14278556Sobrien return bs; 14378556Sobrien} 14478556Sobrien 14578556Sobrien 14678556Sobrien/*---------------------------------------------*/ 14778556SobrienBitStream* bsOpenWriteStream ( FILE* stream ) 14878556Sobrien{ 14978556Sobrien BitStream *bs = malloc ( sizeof(BitStream) ); 15078556Sobrien if (bs == NULL) mallocFail ( sizeof(BitStream) ); 15178556Sobrien bs->handle = stream; 15278556Sobrien bs->buffer = 0; 15378556Sobrien bs->buffLive = 0; 15478556Sobrien bs->mode = 'w'; 15578556Sobrien return bs; 15678556Sobrien} 15778556Sobrien 15878556Sobrien 15978556Sobrien/*---------------------------------------------*/ 16078556Sobrienvoid bsPutBit ( BitStream* bs, Int32 bit ) 16178556Sobrien{ 16278556Sobrien if (bs->buffLive == 8) { 16378556Sobrien Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 16478556Sobrien if (retVal == EOF) writeError(); 16578556Sobrien bytesOut++; 16678556Sobrien bs->buffLive = 1; 16778556Sobrien bs->buffer = bit & 0x1; 16878556Sobrien } else { 16978556Sobrien bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 17078556Sobrien bs->buffLive++; 17178556Sobrien }; 17278556Sobrien} 17378556Sobrien 17478556Sobrien 17578556Sobrien/*---------------------------------------------*/ 17678556Sobrien/*-- 17778556Sobrien Returns 0 or 1, or 2 to indicate EOF. 17878556Sobrien--*/ 17978556SobrienInt32 bsGetBit ( BitStream* bs ) 18078556Sobrien{ 18178556Sobrien if (bs->buffLive > 0) { 18278556Sobrien bs->buffLive --; 18378556Sobrien return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 18478556Sobrien } else { 18578556Sobrien Int32 retVal = getc ( bs->handle ); 18678556Sobrien if ( retVal == EOF ) { 18778556Sobrien if (errno != 0) readError(); 18878556Sobrien return 2; 18978556Sobrien } 19078556Sobrien bs->buffLive = 7; 19178556Sobrien bs->buffer = retVal; 19278556Sobrien return ( ((bs->buffer) >> 7) & 0x1 ); 19378556Sobrien } 19478556Sobrien} 19578556Sobrien 19678556Sobrien 19778556Sobrien/*---------------------------------------------*/ 19878556Sobrienvoid bsClose ( BitStream* bs ) 19978556Sobrien{ 20078556Sobrien Int32 retVal; 20178556Sobrien 20278556Sobrien if ( bs->mode == 'w' ) { 20378556Sobrien while ( bs->buffLive < 8 ) { 20478556Sobrien bs->buffLive++; 20578556Sobrien bs->buffer <<= 1; 20678556Sobrien }; 20778556Sobrien retVal = putc ( (UChar) (bs->buffer), bs->handle ); 20878556Sobrien if (retVal == EOF) writeError(); 20978556Sobrien bytesOut++; 21078556Sobrien retVal = fflush ( bs->handle ); 21178556Sobrien if (retVal == EOF) writeError(); 21278556Sobrien } 21378556Sobrien retVal = fclose ( bs->handle ); 21478556Sobrien if (retVal == EOF) { 21578556Sobrien if (bs->mode == 'w') writeError(); else readError(); 21678556Sobrien } 21778556Sobrien free ( bs ); 21878556Sobrien} 21978556Sobrien 22078556Sobrien 22178556Sobrien/*---------------------------------------------*/ 22278556Sobrienvoid bsPutUChar ( BitStream* bs, UChar c ) 22378556Sobrien{ 22478556Sobrien Int32 i; 22578556Sobrien for (i = 7; i >= 0; i--) 22678556Sobrien bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 22778556Sobrien} 22878556Sobrien 22978556Sobrien 23078556Sobrien/*---------------------------------------------*/ 23178556Sobrienvoid bsPutUInt32 ( BitStream* bs, UInt32 c ) 23278556Sobrien{ 23378556Sobrien Int32 i; 23478556Sobrien 23578556Sobrien for (i = 31; i >= 0; i--) 23678556Sobrien bsPutBit ( bs, (c >> i) & 0x1 ); 23778556Sobrien} 23878556Sobrien 23978556Sobrien 24078556Sobrien/*---------------------------------------------*/ 24178556SobrienBool endsInBz2 ( Char* name ) 24278556Sobrien{ 24378556Sobrien Int32 n = strlen ( name ); 24478556Sobrien if (n <= 4) return False; 24578556Sobrien return 24678556Sobrien (name[n-4] == '.' && 24778556Sobrien name[n-3] == 'b' && 24878556Sobrien name[n-2] == 'z' && 24978556Sobrien name[n-1] == '2'); 25078556Sobrien} 25178556Sobrien 25278556Sobrien 25378556Sobrien/*---------------------------------------------------*/ 25478556Sobrien/*--- ---*/ 25578556Sobrien/*---------------------------------------------------*/ 25678556Sobrien 25778556Sobrien#define BLOCK_HEADER_HI 0x00003141UL 25878556Sobrien#define BLOCK_HEADER_LO 0x59265359UL 25978556Sobrien 26078556Sobrien#define BLOCK_ENDMARK_HI 0x00001772UL 26178556Sobrien#define BLOCK_ENDMARK_LO 0x45385090UL 26278556Sobrien 26378556Sobrien 26478556SobrienUInt32 bStart[20000]; 26578556SobrienUInt32 bEnd[20000]; 26678556SobrienUInt32 rbStart[20000]; 26778556SobrienUInt32 rbEnd[20000]; 26878556Sobrien 26978556SobrienInt32 main ( Int32 argc, Char** argv ) 27078556Sobrien{ 27178556Sobrien FILE* inFile; 27278556Sobrien FILE* outFile; 27378556Sobrien BitStream* bsIn, *bsWr; 27478556Sobrien Int32 currBlock, b, wrBlock; 27578556Sobrien UInt32 bitsRead; 27678556Sobrien Int32 rbCtr; 27778556Sobrien 27878556Sobrien 27978556Sobrien UInt32 buffHi, buffLo, blockCRC; 28078556Sobrien Char* p; 28178556Sobrien 28278556Sobrien strcpy ( progName, argv[0] ); 28378556Sobrien inFileName[0] = outFileName[0] = 0; 28478556Sobrien 28578556Sobrien fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" ); 28678556Sobrien 28778556Sobrien if (argc != 2) { 28878556Sobrien fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 28978556Sobrien progName, progName ); 29078556Sobrien exit(1); 29178556Sobrien } 29278556Sobrien 29378556Sobrien strcpy ( inFileName, argv[1] ); 29478556Sobrien 29578556Sobrien inFile = fopen ( inFileName, "rb" ); 29678556Sobrien if (inFile == NULL) { 29778556Sobrien fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 29878556Sobrien exit(1); 29978556Sobrien } 30078556Sobrien 30178556Sobrien bsIn = bsOpenReadStream ( inFile ); 30278556Sobrien fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 30378556Sobrien 30478556Sobrien bitsRead = 0; 30578556Sobrien buffHi = buffLo = 0; 30678556Sobrien currBlock = 0; 30778556Sobrien bStart[currBlock] = 0; 30878556Sobrien 30978556Sobrien rbCtr = 0; 31078556Sobrien 31178556Sobrien while (True) { 31278556Sobrien b = bsGetBit ( bsIn ); 31378556Sobrien bitsRead++; 31478556Sobrien if (b == 2) { 31578556Sobrien if (bitsRead >= bStart[currBlock] && 31678556Sobrien (bitsRead - bStart[currBlock]) >= 40) { 31778556Sobrien bEnd[currBlock] = bitsRead-1; 31878556Sobrien if (currBlock > 0) 31978556Sobrien fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n", 32078556Sobrien currBlock, bStart[currBlock], bEnd[currBlock] ); 32178556Sobrien } else 32278556Sobrien currBlock--; 32378556Sobrien break; 32478556Sobrien } 32578556Sobrien buffHi = (buffHi << 1) | (buffLo >> 31); 32678556Sobrien buffLo = (buffLo << 1) | (b & 1); 32778556Sobrien if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 32878556Sobrien && buffLo == BLOCK_HEADER_LO) 32978556Sobrien || 33078556Sobrien ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 33178556Sobrien && buffLo == BLOCK_ENDMARK_LO) 33278556Sobrien ) { 33378556Sobrien if (bitsRead > 49) 33478556Sobrien bEnd[currBlock] = bitsRead-49; else 33578556Sobrien bEnd[currBlock] = 0; 33678556Sobrien if (currBlock > 0 && 33778556Sobrien (bEnd[currBlock] - bStart[currBlock]) >= 130) { 33878556Sobrien fprintf ( stderr, " block %d runs from %d to %d\n", 33978556Sobrien rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 34078556Sobrien rbStart[rbCtr] = bStart[currBlock]; 34178556Sobrien rbEnd[rbCtr] = bEnd[currBlock]; 34278556Sobrien rbCtr++; 34378556Sobrien } 34478556Sobrien currBlock++; 34578556Sobrien 34678556Sobrien bStart[currBlock] = bitsRead; 34778556Sobrien } 34878556Sobrien } 34978556Sobrien 35078556Sobrien bsClose ( bsIn ); 35178556Sobrien 35278556Sobrien /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 35378556Sobrien 35478556Sobrien if (rbCtr < 1) { 35578556Sobrien fprintf ( stderr, 35678556Sobrien "%s: sorry, I couldn't find any block boundaries.\n", 35778556Sobrien progName ); 35878556Sobrien exit(1); 35978556Sobrien }; 36078556Sobrien 36178556Sobrien fprintf ( stderr, "%s: splitting into blocks\n", progName ); 36278556Sobrien 36378556Sobrien inFile = fopen ( inFileName, "rb" ); 36478556Sobrien if (inFile == NULL) { 36578556Sobrien fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 36678556Sobrien exit(1); 36778556Sobrien } 36878556Sobrien bsIn = bsOpenReadStream ( inFile ); 36978556Sobrien 37078556Sobrien /*-- placate gcc's dataflow analyser --*/ 37178556Sobrien blockCRC = 0; bsWr = 0; 37278556Sobrien 37378556Sobrien bitsRead = 0; 37478556Sobrien outFile = NULL; 37578556Sobrien wrBlock = 0; 37678556Sobrien while (True) { 37778556Sobrien b = bsGetBit(bsIn); 37878556Sobrien if (b == 2) break; 37978556Sobrien buffHi = (buffHi << 1) | (buffLo >> 31); 38078556Sobrien buffLo = (buffLo << 1) | (b & 1); 38178556Sobrien if (bitsRead == 47+rbStart[wrBlock]) 38278556Sobrien blockCRC = (buffHi << 16) | (buffLo >> 16); 38378556Sobrien 38478556Sobrien if (outFile != NULL && bitsRead >= rbStart[wrBlock] 38578556Sobrien && bitsRead <= rbEnd[wrBlock]) { 38678556Sobrien bsPutBit ( bsWr, b ); 38778556Sobrien } 38878556Sobrien 38978556Sobrien bitsRead++; 39078556Sobrien 39178556Sobrien if (bitsRead == rbEnd[wrBlock]+1) { 39278556Sobrien if (outFile != NULL) { 39378556Sobrien bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 39478556Sobrien bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 39578556Sobrien bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 39678556Sobrien bsPutUInt32 ( bsWr, blockCRC ); 39778556Sobrien bsClose ( bsWr ); 39878556Sobrien } 39978556Sobrien if (wrBlock >= rbCtr) break; 40078556Sobrien wrBlock++; 40178556Sobrien } else 40278556Sobrien if (bitsRead == rbStart[wrBlock]) { 40378556Sobrien outFileName[0] = 0; 40478556Sobrien sprintf ( outFileName, "rec%4d", wrBlock+1 ); 40578556Sobrien for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0'; 40678556Sobrien strcat ( outFileName, inFileName ); 40778556Sobrien if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 40878556Sobrien 40978556Sobrien fprintf ( stderr, " writing block %d to `%s' ...\n", 41078556Sobrien wrBlock+1, outFileName ); 41178556Sobrien 41278556Sobrien outFile = fopen ( outFileName, "wb" ); 41378556Sobrien if (outFile == NULL) { 41478556Sobrien fprintf ( stderr, "%s: can't write `%s'\n", 41578556Sobrien progName, outFileName ); 41678556Sobrien exit(1); 41778556Sobrien } 41878556Sobrien bsWr = bsOpenWriteStream ( outFile ); 41978556Sobrien bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' ); 42078556Sobrien bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' ); 42178556Sobrien bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 42278556Sobrien bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 42378556Sobrien bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 42478556Sobrien } 42578556Sobrien } 42678556Sobrien 42778556Sobrien fprintf ( stderr, "%s: finished\n", progName ); 42878556Sobrien return 0; 42978556Sobrien} 43078556Sobrien 43178556Sobrien 43278556Sobrien 43378556Sobrien/*-----------------------------------------------------------*/ 43478556Sobrien/*--- end bzip2recover.c ---*/ 43578556Sobrien/*-----------------------------------------------------------*/ 436