bzip2recover.c revision 146293
1
2/*-----------------------------------------------------------*/
3/*--- Block recoverer program for bzip2                   ---*/
4/*---                                      bzip2recover.c ---*/
5/*-----------------------------------------------------------*/
6
7/*--
8  This program is bzip2recover, a program to attempt data
9  salvage from damaged files created by the accompanying
10  bzip2-1.0.3 program.
11
12  Copyright (C) 1996-2005 Julian R Seward.  All rights reserved.
13
14  Redistribution and use in source and binary forms, with or without
15  modification, are permitted provided that the following conditions
16  are met:
17
18  1. Redistributions of source code must retain the above copyright
19     notice, this list of conditions and the following disclaimer.
20
21  2. The origin of this software must not be misrepresented; you must
22     not claim that you wrote the original software.  If you use this
23     software in a product, an acknowledgment in the product
24     documentation would be appreciated but is not required.
25
26  3. Altered source versions must be plainly marked as such, and must
27     not be misrepresented as being the original software.
28
29  4. The name of the author may not be used to endorse or promote
30     products derived from this software without specific prior written
31     permission.
32
33  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45  Julian Seward, Cambridge, UK.
46  jseward@bzip.org
47  bzip2/libbzip2 version 1.0.3 of 15 February 2005
48--*/
49
50/*--
51  This program is a complete hack and should be rewritten
52  properly.  It isn't very complicated.
53--*/
54
55#include <stdio.h>
56#include <errno.h>
57#include <stdlib.h>
58#include <string.h>
59
60
61/* This program records bit locations in the file to be recovered.
62   That means that if 64-bit ints are not supported, we will not
63   be able to recover .bz2 files over 512MB (2^32 bits) long.
64   On GNU supported platforms, we take advantage of the 64-bit
65   int support to circumvent this problem.  Ditto MSVC.
66
67   This change occurred in version 1.0.2; all prior versions have
68   the 512MB limitation.
69*/
70#ifdef __GNUC__
71   typedef  unsigned long long int  MaybeUInt64;
72#  define MaybeUInt64_FMT "%Lu"
73#else
74#ifdef _MSC_VER
75   typedef  unsigned __int64  MaybeUInt64;
76#  define MaybeUInt64_FMT "%I64u"
77#else
78   typedef  unsigned int   MaybeUInt64;
79#  define MaybeUInt64_FMT "%u"
80#endif
81#endif
82
83typedef  unsigned int   UInt32;
84typedef  int            Int32;
85typedef  unsigned char  UChar;
86typedef  char           Char;
87typedef  unsigned char  Bool;
88#define True    ((Bool)1)
89#define False   ((Bool)0)
90
91
92#define BZ_MAX_FILENAME 2000
93
94Char inFileName[BZ_MAX_FILENAME];
95Char outFileName[BZ_MAX_FILENAME];
96Char progName[BZ_MAX_FILENAME];
97
98MaybeUInt64 bytesOut = 0;
99MaybeUInt64 bytesIn  = 0;
100
101
102/*---------------------------------------------------*/
103/*--- Header bytes                                ---*/
104/*---------------------------------------------------*/
105
106#define BZ_HDR_B 0x42                         /* 'B' */
107#define BZ_HDR_Z 0x5a                         /* 'Z' */
108#define BZ_HDR_h 0x68                         /* 'h' */
109#define BZ_HDR_0 0x30                         /* '0' */
110
111
112/*---------------------------------------------------*/
113/*--- I/O errors                                  ---*/
114/*---------------------------------------------------*/
115
116/*---------------------------------------------*/
117void readError ( void )
118{
119   fprintf ( stderr,
120             "%s: I/O error reading `%s', possible reason follows.\n",
121            progName, inFileName );
122   perror ( progName );
123   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
124             progName );
125   exit ( 1 );
126}
127
128
129/*---------------------------------------------*/
130void writeError ( void )
131{
132   fprintf ( stderr,
133             "%s: I/O error reading `%s', possible reason follows.\n",
134            progName, inFileName );
135   perror ( progName );
136   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
137             progName );
138   exit ( 1 );
139}
140
141
142/*---------------------------------------------*/
143void mallocFail ( Int32 n )
144{
145   fprintf ( stderr,
146             "%s: malloc failed on request for %d bytes.\n",
147            progName, n );
148   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
149             progName );
150   exit ( 1 );
151}
152
153
154/*---------------------------------------------*/
155void tooManyBlocks ( Int32 max_handled_blocks )
156{
157   fprintf ( stderr,
158             "%s: `%s' appears to contain more than %d blocks\n",
159            progName, inFileName, max_handled_blocks );
160   fprintf ( stderr,
161             "%s: and cannot be handled.  To fix, increase\n",
162             progName );
163   fprintf ( stderr,
164             "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
165             progName );
166   exit ( 1 );
167}
168
169
170
171/*---------------------------------------------------*/
172/*--- Bit stream I/O                              ---*/
173/*---------------------------------------------------*/
174
175typedef
176   struct {
177      FILE*  handle;
178      Int32  buffer;
179      Int32  buffLive;
180      Char   mode;
181   }
182   BitStream;
183
184
185/*---------------------------------------------*/
186BitStream* bsOpenReadStream ( FILE* stream )
187{
188   BitStream *bs = malloc ( sizeof(BitStream) );
189   if (bs == NULL) mallocFail ( sizeof(BitStream) );
190   bs->handle = stream;
191   bs->buffer = 0;
192   bs->buffLive = 0;
193   bs->mode = 'r';
194   return bs;
195}
196
197
198/*---------------------------------------------*/
199BitStream* bsOpenWriteStream ( FILE* stream )
200{
201   BitStream *bs = malloc ( sizeof(BitStream) );
202   if (bs == NULL) mallocFail ( sizeof(BitStream) );
203   bs->handle = stream;
204   bs->buffer = 0;
205   bs->buffLive = 0;
206   bs->mode = 'w';
207   return bs;
208}
209
210
211/*---------------------------------------------*/
212void bsPutBit ( BitStream* bs, Int32 bit )
213{
214   if (bs->buffLive == 8) {
215      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
216      if (retVal == EOF) writeError();
217      bytesOut++;
218      bs->buffLive = 1;
219      bs->buffer = bit & 0x1;
220   } else {
221      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
222      bs->buffLive++;
223   };
224}
225
226
227/*---------------------------------------------*/
228/*--
229   Returns 0 or 1, or 2 to indicate EOF.
230--*/
231Int32 bsGetBit ( BitStream* bs )
232{
233   if (bs->buffLive > 0) {
234      bs->buffLive --;
235      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
236   } else {
237      Int32 retVal = getc ( bs->handle );
238      if ( retVal == EOF ) {
239         if (errno != 0) readError();
240         return 2;
241      }
242      bs->buffLive = 7;
243      bs->buffer = retVal;
244      return ( ((bs->buffer) >> 7) & 0x1 );
245   }
246}
247
248
249/*---------------------------------------------*/
250void bsClose ( BitStream* bs )
251{
252   Int32 retVal;
253
254   if ( bs->mode == 'w' ) {
255      while ( bs->buffLive < 8 ) {
256         bs->buffLive++;
257         bs->buffer <<= 1;
258      };
259      retVal = putc ( (UChar) (bs->buffer), bs->handle );
260      if (retVal == EOF) writeError();
261      bytesOut++;
262      retVal = fflush ( bs->handle );
263      if (retVal == EOF) writeError();
264   }
265   retVal = fclose ( bs->handle );
266   if (retVal == EOF) {
267      if (bs->mode == 'w') writeError(); else readError();
268   }
269   free ( bs );
270}
271
272
273/*---------------------------------------------*/
274void bsPutUChar ( BitStream* bs, UChar c )
275{
276   Int32 i;
277   for (i = 7; i >= 0; i--)
278      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
279}
280
281
282/*---------------------------------------------*/
283void bsPutUInt32 ( BitStream* bs, UInt32 c )
284{
285   Int32 i;
286
287   for (i = 31; i >= 0; i--)
288      bsPutBit ( bs, (c >> i) & 0x1 );
289}
290
291
292/*---------------------------------------------*/
293Bool endsInBz2 ( Char* name )
294{
295   Int32 n = strlen ( name );
296   if (n <= 4) return False;
297   return
298      (name[n-4] == '.' &&
299       name[n-3] == 'b' &&
300       name[n-2] == 'z' &&
301       name[n-1] == '2');
302}
303
304
305/*---------------------------------------------------*/
306/*---                                             ---*/
307/*---------------------------------------------------*/
308
309/* This logic isn't really right when it comes to Cygwin. */
310#ifdef _WIN32
311#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
312#else
313#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
314#endif
315
316#define BLOCK_HEADER_HI  0x00003141UL
317#define BLOCK_HEADER_LO  0x59265359UL
318
319#define BLOCK_ENDMARK_HI 0x00001772UL
320#define BLOCK_ENDMARK_LO 0x45385090UL
321
322/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
323   would have an uncompressed size of at least 40GB, so the chances
324   are low you'll need to up this.
325*/
326#define BZ_MAX_HANDLED_BLOCKS 50000
327
328MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
329MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
330MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
331MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
332
333Int32 main ( Int32 argc, Char** argv )
334{
335   FILE*       inFile;
336   FILE*       outFile;
337   BitStream*  bsIn, *bsWr;
338   Int32       b, wrBlock, currBlock, rbCtr;
339   MaybeUInt64 bitsRead;
340
341   UInt32      buffHi, buffLo, blockCRC;
342   Char*       p;
343
344   strcpy ( progName, argv[0] );
345   inFileName[0] = outFileName[0] = 0;
346
347   fprintf ( stderr,
348             "bzip2recover 1.0.3: extracts blocks from damaged .bz2 files.\n" );
349
350   if (argc != 2) {
351      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
352                        progName, progName );
353      switch (sizeof(MaybeUInt64)) {
354         case 8:
355            fprintf(stderr,
356                    "\trestrictions on size of recovered file: None\n");
357            break;
358         case 4:
359            fprintf(stderr,
360                    "\trestrictions on size of recovered file: 512 MB\n");
361            fprintf(stderr,
362                    "\tto circumvent, recompile with MaybeUInt64 as an\n"
363                    "\tunsigned 64-bit int.\n");
364            break;
365         default:
366            fprintf(stderr,
367                    "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
368                    "configuration error.\n");
369            break;
370      }
371      exit(1);
372   }
373
374   if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
375      fprintf ( stderr,
376                "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
377                progName, (int)strlen(argv[1]) );
378      exit(1);
379   }
380
381   strcpy ( inFileName, argv[1] );
382
383   inFile = fopen ( inFileName, "rb" );
384   if (inFile == NULL) {
385      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
386      exit(1);
387   }
388
389   bsIn = bsOpenReadStream ( inFile );
390   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
391
392   bitsRead = 0;
393   buffHi = buffLo = 0;
394   currBlock = 0;
395   bStart[currBlock] = 0;
396
397   rbCtr = 0;
398
399   while (True) {
400      b = bsGetBit ( bsIn );
401      bitsRead++;
402      if (b == 2) {
403         if (bitsRead >= bStart[currBlock] &&
404            (bitsRead - bStart[currBlock]) >= 40) {
405            bEnd[currBlock] = bitsRead-1;
406            if (currBlock > 0)
407               fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
408                                 " to " MaybeUInt64_FMT " (incomplete)\n",
409                         currBlock,  bStart[currBlock], bEnd[currBlock] );
410         } else
411            currBlock--;
412         break;
413      }
414      buffHi = (buffHi << 1) | (buffLo >> 31);
415      buffLo = (buffLo << 1) | (b & 1);
416      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
417             && buffLo == BLOCK_HEADER_LO)
418           ||
419           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
420             && buffLo == BLOCK_ENDMARK_LO)
421         ) {
422         if (bitsRead > 49) {
423            bEnd[currBlock] = bitsRead-49;
424         } else {
425            bEnd[currBlock] = 0;
426         }
427         if (currBlock > 0 &&
428	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
429            fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
430                              " to " MaybeUInt64_FMT "\n",
431                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
432            rbStart[rbCtr] = bStart[currBlock];
433            rbEnd[rbCtr] = bEnd[currBlock];
434            rbCtr++;
435         }
436         if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
437            tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
438         currBlock++;
439
440         bStart[currBlock] = bitsRead;
441      }
442   }
443
444   bsClose ( bsIn );
445
446   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
447
448   if (rbCtr < 1) {
449      fprintf ( stderr,
450                "%s: sorry, I couldn't find any block boundaries.\n",
451                progName );
452      exit(1);
453   };
454
455   fprintf ( stderr, "%s: splitting into blocks\n", progName );
456
457   inFile = fopen ( inFileName, "rb" );
458   if (inFile == NULL) {
459      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
460      exit(1);
461   }
462   bsIn = bsOpenReadStream ( inFile );
463
464   /*-- placate gcc's dataflow analyser --*/
465   blockCRC = 0; bsWr = 0;
466
467   bitsRead = 0;
468   outFile = NULL;
469   wrBlock = 0;
470   while (True) {
471      b = bsGetBit(bsIn);
472      if (b == 2) break;
473      buffHi = (buffHi << 1) | (buffLo >> 31);
474      buffLo = (buffLo << 1) | (b & 1);
475      if (bitsRead == 47+rbStart[wrBlock])
476         blockCRC = (buffHi << 16) | (buffLo >> 16);
477
478      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
479                          && bitsRead <= rbEnd[wrBlock]) {
480         bsPutBit ( bsWr, b );
481      }
482
483      bitsRead++;
484
485      if (bitsRead == rbEnd[wrBlock]+1) {
486         if (outFile != NULL) {
487            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
488            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
489            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
490            bsPutUInt32 ( bsWr, blockCRC );
491            bsClose ( bsWr );
492         }
493         if (wrBlock >= rbCtr) break;
494         wrBlock++;
495      } else
496      if (bitsRead == rbStart[wrBlock]) {
497         /* Create the output file name, correctly handling leading paths.
498            (31.10.2001 by Sergey E. Kusikov) */
499         Char* split;
500         Int32 ofs, k;
501         for (k = 0; k < BZ_MAX_FILENAME; k++)
502            outFileName[k] = 0;
503         strcpy (outFileName, inFileName);
504         split = strrchr (outFileName, BZ_SPLIT_SYM);
505         if (split == NULL) {
506            split = outFileName;
507         } else {
508            ++split;
509	 }
510	 /* Now split points to the start of the basename. */
511         ofs  = split - outFileName;
512         sprintf (split, "rec%5d", wrBlock+1);
513         for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
514         strcat (outFileName, inFileName + ofs);
515
516         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
517
518         fprintf ( stderr, "   writing block %d to `%s' ...\n",
519                           wrBlock+1, outFileName );
520
521         outFile = fopen ( outFileName, "wb" );
522         if (outFile == NULL) {
523            fprintf ( stderr, "%s: can't write `%s'\n",
524                      progName, outFileName );
525            exit(1);
526         }
527         bsWr = bsOpenWriteStream ( outFile );
528         bsPutUChar ( bsWr, BZ_HDR_B );
529         bsPutUChar ( bsWr, BZ_HDR_Z );
530         bsPutUChar ( bsWr, BZ_HDR_h );
531         bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
532         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
533         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
534         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
535      }
536   }
537
538   fprintf ( stderr, "%s: finished\n", progName );
539   return 0;
540}
541
542
543
544/*-----------------------------------------------------------*/
545/*--- end                                  bzip2recover.c ---*/
546/*-----------------------------------------------------------*/
547