bzip2recover.c revision 78556
178556Sobrien
278556Sobrien/*-----------------------------------------------------------*/
378556Sobrien/*--- Block recoverer program for bzip2                   ---*/
478556Sobrien/*---                                      bzip2recover.c ---*/
578556Sobrien/*-----------------------------------------------------------*/
678556Sobrien
778556Sobrien/*--
878556Sobrien  This program is bzip2recover, a program to attempt data
978556Sobrien  salvage from damaged files created by the accompanying
1078556Sobrien  bzip2-1.0 program.
1178556Sobrien
1278556Sobrien  Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
1378556Sobrien
1478556Sobrien  Redistribution and use in source and binary forms, with or without
1578556Sobrien  modification, are permitted provided that the following conditions
1678556Sobrien  are met:
1778556Sobrien
1878556Sobrien  1. Redistributions of source code must retain the above copyright
1978556Sobrien     notice, this list of conditions and the following disclaimer.
2078556Sobrien
2178556Sobrien  2. The origin of this software must not be misrepresented; you must
2278556Sobrien     not claim that you wrote the original software.  If you use this
2378556Sobrien     software in a product, an acknowledgment in the product
2478556Sobrien     documentation would be appreciated but is not required.
2578556Sobrien
2678556Sobrien  3. Altered source versions must be plainly marked as such, and must
2778556Sobrien     not be misrepresented as being the original software.
2878556Sobrien
2978556Sobrien  4. The name of the author may not be used to endorse or promote
3078556Sobrien     products derived from this software without specific prior written
3178556Sobrien     permission.
3278556Sobrien
3378556Sobrien  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
3478556Sobrien  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
3578556Sobrien  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3678556Sobrien  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
3778556Sobrien  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3878556Sobrien  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
3978556Sobrien  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
4078556Sobrien  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4178556Sobrien  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4278556Sobrien  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4378556Sobrien  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4478556Sobrien
4578556Sobrien  Julian Seward, Cambridge, UK.
4678556Sobrien  jseward@acm.org
4778556Sobrien  bzip2/libbzip2 version 1.0 of 21 March 2000
4878556Sobrien--*/
4978556Sobrien
5078556Sobrien/*--
5178556Sobrien  This program is a complete hack and should be rewritten
5278556Sobrien  properly.  It isn't very complicated.
5378556Sobrien--*/
5478556Sobrien
5578556Sobrien#include <stdio.h>
5678556Sobrien#include <errno.h>
5778556Sobrien#include <stdlib.h>
5878556Sobrien#include <string.h>
5978556Sobrien
6078556Sobrientypedef  unsigned int   UInt32;
6178556Sobrientypedef  int            Int32;
6278556Sobrientypedef  unsigned char  UChar;
6378556Sobrientypedef  char           Char;
6478556Sobrientypedef  unsigned char  Bool;
6578556Sobrien#define True    ((Bool)1)
6678556Sobrien#define False   ((Bool)0)
6778556Sobrien
6878556Sobrien
6978556SobrienChar inFileName[2000];
7078556SobrienChar outFileName[2000];
7178556SobrienChar progName[2000];
7278556Sobrien
7378556SobrienUInt32 bytesOut = 0;
7478556SobrienUInt32 bytesIn  = 0;
7578556Sobrien
7678556Sobrien
7778556Sobrien/*---------------------------------------------------*/
7878556Sobrien/*--- I/O errors                                  ---*/
7978556Sobrien/*---------------------------------------------------*/
8078556Sobrien
8178556Sobrien/*---------------------------------------------*/
8278556Sobrienvoid readError ( void )
8378556Sobrien{
8478556Sobrien   fprintf ( stderr,
8578556Sobrien             "%s: I/O error reading `%s', possible reason follows.\n",
8678556Sobrien            progName, inFileName );
8778556Sobrien   perror ( progName );
8878556Sobrien   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
8978556Sobrien             progName );
9078556Sobrien   exit ( 1 );
9178556Sobrien}
9278556Sobrien
9378556Sobrien
9478556Sobrien/*---------------------------------------------*/
9578556Sobrienvoid writeError ( void )
9678556Sobrien{
9778556Sobrien   fprintf ( stderr,
9878556Sobrien             "%s: I/O error reading `%s', possible reason follows.\n",
9978556Sobrien            progName, inFileName );
10078556Sobrien   perror ( progName );
10178556Sobrien   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
10278556Sobrien             progName );
10378556Sobrien   exit ( 1 );
10478556Sobrien}
10578556Sobrien
10678556Sobrien
10778556Sobrien/*---------------------------------------------*/
10878556Sobrienvoid mallocFail ( Int32 n )
10978556Sobrien{
11078556Sobrien   fprintf ( stderr,
11178556Sobrien             "%s: malloc failed on request for %d bytes.\n",
11278556Sobrien            progName, n );
11378556Sobrien   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
11478556Sobrien             progName );
11578556Sobrien   exit ( 1 );
11678556Sobrien}
11778556Sobrien
11878556Sobrien
11978556Sobrien/*---------------------------------------------------*/
12078556Sobrien/*--- Bit stream I/O                              ---*/
12178556Sobrien/*---------------------------------------------------*/
12278556Sobrien
12378556Sobrientypedef
12478556Sobrien   struct {
12578556Sobrien      FILE*  handle;
12678556Sobrien      Int32  buffer;
12778556Sobrien      Int32  buffLive;
12878556Sobrien      Char   mode;
12978556Sobrien   }
13078556Sobrien   BitStream;
13178556Sobrien
13278556Sobrien
13378556Sobrien/*---------------------------------------------*/
13478556SobrienBitStream* bsOpenReadStream ( FILE* stream )
13578556Sobrien{
13678556Sobrien   BitStream *bs = malloc ( sizeof(BitStream) );
13778556Sobrien   if (bs == NULL) mallocFail ( sizeof(BitStream) );
13878556Sobrien   bs->handle = stream;
13978556Sobrien   bs->buffer = 0;
14078556Sobrien   bs->buffLive = 0;
14178556Sobrien   bs->mode = 'r';
14278556Sobrien   return bs;
14378556Sobrien}
14478556Sobrien
14578556Sobrien
14678556Sobrien/*---------------------------------------------*/
14778556SobrienBitStream* bsOpenWriteStream ( FILE* stream )
14878556Sobrien{
14978556Sobrien   BitStream *bs = malloc ( sizeof(BitStream) );
15078556Sobrien   if (bs == NULL) mallocFail ( sizeof(BitStream) );
15178556Sobrien   bs->handle = stream;
15278556Sobrien   bs->buffer = 0;
15378556Sobrien   bs->buffLive = 0;
15478556Sobrien   bs->mode = 'w';
15578556Sobrien   return bs;
15678556Sobrien}
15778556Sobrien
15878556Sobrien
15978556Sobrien/*---------------------------------------------*/
16078556Sobrienvoid bsPutBit ( BitStream* bs, Int32 bit )
16178556Sobrien{
16278556Sobrien   if (bs->buffLive == 8) {
16378556Sobrien      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
16478556Sobrien      if (retVal == EOF) writeError();
16578556Sobrien      bytesOut++;
16678556Sobrien      bs->buffLive = 1;
16778556Sobrien      bs->buffer = bit & 0x1;
16878556Sobrien   } else {
16978556Sobrien      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
17078556Sobrien      bs->buffLive++;
17178556Sobrien   };
17278556Sobrien}
17378556Sobrien
17478556Sobrien
17578556Sobrien/*---------------------------------------------*/
17678556Sobrien/*--
17778556Sobrien   Returns 0 or 1, or 2 to indicate EOF.
17878556Sobrien--*/
17978556SobrienInt32 bsGetBit ( BitStream* bs )
18078556Sobrien{
18178556Sobrien   if (bs->buffLive > 0) {
18278556Sobrien      bs->buffLive --;
18378556Sobrien      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
18478556Sobrien   } else {
18578556Sobrien      Int32 retVal = getc ( bs->handle );
18678556Sobrien      if ( retVal == EOF ) {
18778556Sobrien         if (errno != 0) readError();
18878556Sobrien         return 2;
18978556Sobrien      }
19078556Sobrien      bs->buffLive = 7;
19178556Sobrien      bs->buffer = retVal;
19278556Sobrien      return ( ((bs->buffer) >> 7) & 0x1 );
19378556Sobrien   }
19478556Sobrien}
19578556Sobrien
19678556Sobrien
19778556Sobrien/*---------------------------------------------*/
19878556Sobrienvoid bsClose ( BitStream* bs )
19978556Sobrien{
20078556Sobrien   Int32 retVal;
20178556Sobrien
20278556Sobrien   if ( bs->mode == 'w' ) {
20378556Sobrien      while ( bs->buffLive < 8 ) {
20478556Sobrien         bs->buffLive++;
20578556Sobrien         bs->buffer <<= 1;
20678556Sobrien      };
20778556Sobrien      retVal = putc ( (UChar) (bs->buffer), bs->handle );
20878556Sobrien      if (retVal == EOF) writeError();
20978556Sobrien      bytesOut++;
21078556Sobrien      retVal = fflush ( bs->handle );
21178556Sobrien      if (retVal == EOF) writeError();
21278556Sobrien   }
21378556Sobrien   retVal = fclose ( bs->handle );
21478556Sobrien   if (retVal == EOF) {
21578556Sobrien      if (bs->mode == 'w') writeError(); else readError();
21678556Sobrien   }
21778556Sobrien   free ( bs );
21878556Sobrien}
21978556Sobrien
22078556Sobrien
22178556Sobrien/*---------------------------------------------*/
22278556Sobrienvoid bsPutUChar ( BitStream* bs, UChar c )
22378556Sobrien{
22478556Sobrien   Int32 i;
22578556Sobrien   for (i = 7; i >= 0; i--)
22678556Sobrien      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
22778556Sobrien}
22878556Sobrien
22978556Sobrien
23078556Sobrien/*---------------------------------------------*/
23178556Sobrienvoid bsPutUInt32 ( BitStream* bs, UInt32 c )
23278556Sobrien{
23378556Sobrien   Int32 i;
23478556Sobrien
23578556Sobrien   for (i = 31; i >= 0; i--)
23678556Sobrien      bsPutBit ( bs, (c >> i) & 0x1 );
23778556Sobrien}
23878556Sobrien
23978556Sobrien
24078556Sobrien/*---------------------------------------------*/
24178556SobrienBool endsInBz2 ( Char* name )
24278556Sobrien{
24378556Sobrien   Int32 n = strlen ( name );
24478556Sobrien   if (n <= 4) return False;
24578556Sobrien   return
24678556Sobrien      (name[n-4] == '.' &&
24778556Sobrien       name[n-3] == 'b' &&
24878556Sobrien       name[n-2] == 'z' &&
24978556Sobrien       name[n-1] == '2');
25078556Sobrien}
25178556Sobrien
25278556Sobrien
25378556Sobrien/*---------------------------------------------------*/
25478556Sobrien/*---                                             ---*/
25578556Sobrien/*---------------------------------------------------*/
25678556Sobrien
25778556Sobrien#define BLOCK_HEADER_HI  0x00003141UL
25878556Sobrien#define BLOCK_HEADER_LO  0x59265359UL
25978556Sobrien
26078556Sobrien#define BLOCK_ENDMARK_HI 0x00001772UL
26178556Sobrien#define BLOCK_ENDMARK_LO 0x45385090UL
26278556Sobrien
26378556Sobrien
26478556SobrienUInt32 bStart[20000];
26578556SobrienUInt32 bEnd[20000];
26678556SobrienUInt32 rbStart[20000];
26778556SobrienUInt32 rbEnd[20000];
26878556Sobrien
26978556SobrienInt32 main ( Int32 argc, Char** argv )
27078556Sobrien{
27178556Sobrien   FILE*       inFile;
27278556Sobrien   FILE*       outFile;
27378556Sobrien   BitStream*  bsIn, *bsWr;
27478556Sobrien   Int32       currBlock, b, wrBlock;
27578556Sobrien   UInt32      bitsRead;
27678556Sobrien   Int32       rbCtr;
27778556Sobrien
27878556Sobrien
27978556Sobrien   UInt32      buffHi, buffLo, blockCRC;
28078556Sobrien   Char*       p;
28178556Sobrien
28278556Sobrien   strcpy ( progName, argv[0] );
28378556Sobrien   inFileName[0] = outFileName[0] = 0;
28478556Sobrien
28578556Sobrien   fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
28678556Sobrien
28778556Sobrien   if (argc != 2) {
28878556Sobrien      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
28978556Sobrien                        progName, progName );
29078556Sobrien      exit(1);
29178556Sobrien   }
29278556Sobrien
29378556Sobrien   strcpy ( inFileName, argv[1] );
29478556Sobrien
29578556Sobrien   inFile = fopen ( inFileName, "rb" );
29678556Sobrien   if (inFile == NULL) {
29778556Sobrien      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
29878556Sobrien      exit(1);
29978556Sobrien   }
30078556Sobrien
30178556Sobrien   bsIn = bsOpenReadStream ( inFile );
30278556Sobrien   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
30378556Sobrien
30478556Sobrien   bitsRead = 0;
30578556Sobrien   buffHi = buffLo = 0;
30678556Sobrien   currBlock = 0;
30778556Sobrien   bStart[currBlock] = 0;
30878556Sobrien
30978556Sobrien   rbCtr = 0;
31078556Sobrien
31178556Sobrien   while (True) {
31278556Sobrien      b = bsGetBit ( bsIn );
31378556Sobrien      bitsRead++;
31478556Sobrien      if (b == 2) {
31578556Sobrien         if (bitsRead >= bStart[currBlock] &&
31678556Sobrien            (bitsRead - bStart[currBlock]) >= 40) {
31778556Sobrien            bEnd[currBlock] = bitsRead-1;
31878556Sobrien            if (currBlock > 0)
31978556Sobrien               fprintf ( stderr, "   block %d runs from %d to %d (incomplete)\n",
32078556Sobrien                         currBlock,  bStart[currBlock], bEnd[currBlock] );
32178556Sobrien         } else
32278556Sobrien            currBlock--;
32378556Sobrien         break;
32478556Sobrien      }
32578556Sobrien      buffHi = (buffHi << 1) | (buffLo >> 31);
32678556Sobrien      buffLo = (buffLo << 1) | (b & 1);
32778556Sobrien      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
32878556Sobrien             && buffLo == BLOCK_HEADER_LO)
32978556Sobrien           ||
33078556Sobrien           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
33178556Sobrien             && buffLo == BLOCK_ENDMARK_LO)
33278556Sobrien         ) {
33378556Sobrien         if (bitsRead > 49)
33478556Sobrien            bEnd[currBlock] = bitsRead-49; else
33578556Sobrien            bEnd[currBlock] = 0;
33678556Sobrien         if (currBlock > 0 &&
33778556Sobrien	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
33878556Sobrien            fprintf ( stderr, "   block %d runs from %d to %d\n",
33978556Sobrien                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
34078556Sobrien            rbStart[rbCtr] = bStart[currBlock];
34178556Sobrien            rbEnd[rbCtr] = bEnd[currBlock];
34278556Sobrien            rbCtr++;
34378556Sobrien         }
34478556Sobrien         currBlock++;
34578556Sobrien
34678556Sobrien         bStart[currBlock] = bitsRead;
34778556Sobrien      }
34878556Sobrien   }
34978556Sobrien
35078556Sobrien   bsClose ( bsIn );
35178556Sobrien
35278556Sobrien   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
35378556Sobrien
35478556Sobrien   if (rbCtr < 1) {
35578556Sobrien      fprintf ( stderr,
35678556Sobrien                "%s: sorry, I couldn't find any block boundaries.\n",
35778556Sobrien                progName );
35878556Sobrien      exit(1);
35978556Sobrien   };
36078556Sobrien
36178556Sobrien   fprintf ( stderr, "%s: splitting into blocks\n", progName );
36278556Sobrien
36378556Sobrien   inFile = fopen ( inFileName, "rb" );
36478556Sobrien   if (inFile == NULL) {
36578556Sobrien      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
36678556Sobrien      exit(1);
36778556Sobrien   }
36878556Sobrien   bsIn = bsOpenReadStream ( inFile );
36978556Sobrien
37078556Sobrien   /*-- placate gcc's dataflow analyser --*/
37178556Sobrien   blockCRC = 0; bsWr = 0;
37278556Sobrien
37378556Sobrien   bitsRead = 0;
37478556Sobrien   outFile = NULL;
37578556Sobrien   wrBlock = 0;
37678556Sobrien   while (True) {
37778556Sobrien      b = bsGetBit(bsIn);
37878556Sobrien      if (b == 2) break;
37978556Sobrien      buffHi = (buffHi << 1) | (buffLo >> 31);
38078556Sobrien      buffLo = (buffLo << 1) | (b & 1);
38178556Sobrien      if (bitsRead == 47+rbStart[wrBlock])
38278556Sobrien         blockCRC = (buffHi << 16) | (buffLo >> 16);
38378556Sobrien
38478556Sobrien      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
38578556Sobrien                          && bitsRead <= rbEnd[wrBlock]) {
38678556Sobrien         bsPutBit ( bsWr, b );
38778556Sobrien      }
38878556Sobrien
38978556Sobrien      bitsRead++;
39078556Sobrien
39178556Sobrien      if (bitsRead == rbEnd[wrBlock]+1) {
39278556Sobrien         if (outFile != NULL) {
39378556Sobrien            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
39478556Sobrien            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
39578556Sobrien            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
39678556Sobrien            bsPutUInt32 ( bsWr, blockCRC );
39778556Sobrien            bsClose ( bsWr );
39878556Sobrien         }
39978556Sobrien         if (wrBlock >= rbCtr) break;
40078556Sobrien         wrBlock++;
40178556Sobrien      } else
40278556Sobrien      if (bitsRead == rbStart[wrBlock]) {
40378556Sobrien         outFileName[0] = 0;
40478556Sobrien         sprintf ( outFileName, "rec%4d", wrBlock+1 );
40578556Sobrien         for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0';
40678556Sobrien         strcat ( outFileName, inFileName );
40778556Sobrien         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
40878556Sobrien
40978556Sobrien         fprintf ( stderr, "   writing block %d to `%s' ...\n",
41078556Sobrien                           wrBlock+1, outFileName );
41178556Sobrien
41278556Sobrien         outFile = fopen ( outFileName, "wb" );
41378556Sobrien         if (outFile == NULL) {
41478556Sobrien            fprintf ( stderr, "%s: can't write `%s'\n",
41578556Sobrien                      progName, outFileName );
41678556Sobrien            exit(1);
41778556Sobrien         }
41878556Sobrien         bsWr = bsOpenWriteStream ( outFile );
41978556Sobrien         bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' );
42078556Sobrien         bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' );
42178556Sobrien         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
42278556Sobrien         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
42378556Sobrien         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
42478556Sobrien      }
42578556Sobrien   }
42678556Sobrien
42778556Sobrien   fprintf ( stderr, "%s: finished\n", progName );
42878556Sobrien   return 0;
42978556Sobrien}
43078556Sobrien
43178556Sobrien
43278556Sobrien
43378556Sobrien/*-----------------------------------------------------------*/
43478556Sobrien/*--- end                                  bzip2recover.c ---*/
43578556Sobrien/*-----------------------------------------------------------*/
436