1/*-----------------------------------------------------------*/
2/*--- Block recoverer program for bzip2                   ---*/
3/*---                                      bzip2recover.c ---*/
4/*-----------------------------------------------------------*/
5
6/* ------------------------------------------------------------------
7   This file is part of bzip2/libbzip2, a program and library for
8   lossless, block-sorting data compression.
9
10   bzip2/libbzip2 version 1.0.8 of 13 July 2019
11   Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
12
13   Please read the WARNING, DISCLAIMER and PATENTS sections in the
14   README file.
15
16   This program is released under the terms of the license contained
17   in the file LICENSE.
18   ------------------------------------------------------------------ */
19
20/* This program is a complete hack and should be rewritten properly.
21	 It isn't very complicated. */
22
23#include <stdio.h>
24#include <errno.h>
25#include <stdlib.h>
26#include <string.h>
27
28
29/* This program records bit locations in the file to be recovered.
30   That means that if 64-bit ints are not supported, we will not
31   be able to recover .bz2 files over 512MB (2^32 bits) long.
32   On GNU supported platforms, we take advantage of the 64-bit
33   int support to circumvent this problem.  Ditto MSVC.
34
35   This change occurred in version 1.0.2; all prior versions have
36   the 512MB limitation.
37*/
38#ifdef __GNUC__
39   typedef  unsigned long long int  MaybeUInt64;
40#  define MaybeUInt64_FMT "%llu"
41#else
42#ifdef _MSC_VER
43   typedef  unsigned __int64  MaybeUInt64;
44#  define MaybeUInt64_FMT "%I64u"
45#else
46   typedef  unsigned int   MaybeUInt64;
47#  define MaybeUInt64_FMT "%u"
48#endif
49#endif
50
51typedef  unsigned int   UInt32;
52typedef  int            Int32;
53typedef  unsigned char  UChar;
54typedef  char           Char;
55typedef  unsigned char  Bool;
56#define True    ((Bool)1)
57#define False   ((Bool)0)
58
59
60#define BZ_MAX_FILENAME 2000
61
62Char inFileName[BZ_MAX_FILENAME];
63Char outFileName[BZ_MAX_FILENAME];
64Char progName[BZ_MAX_FILENAME];
65
66MaybeUInt64 bytesOut = 0;
67MaybeUInt64 bytesIn  = 0;
68
69
70/*---------------------------------------------------*/
71/*--- Header bytes                                ---*/
72/*---------------------------------------------------*/
73
74#define BZ_HDR_B 0x42                         /* 'B' */
75#define BZ_HDR_Z 0x5a                         /* 'Z' */
76#define BZ_HDR_h 0x68                         /* 'h' */
77#define BZ_HDR_0 0x30                         /* '0' */
78
79
80/*---------------------------------------------------*/
81/*--- I/O errors                                  ---*/
82/*---------------------------------------------------*/
83
84/*---------------------------------------------*/
85static void readError ( void )
86{
87   fprintf ( stderr,
88             "%s: I/O error reading `%s', possible reason follows.\n",
89            progName, inFileName );
90   perror ( progName );
91   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92             progName );
93   exit ( 1 );
94}
95
96
97/*---------------------------------------------*/
98static void writeError ( void )
99{
100   fprintf ( stderr,
101             "%s: I/O error reading `%s', possible reason follows.\n",
102            progName, inFileName );
103   perror ( progName );
104   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105             progName );
106   exit ( 1 );
107}
108
109
110/*---------------------------------------------*/
111static void mallocFail ( Int32 n )
112{
113   fprintf ( stderr,
114             "%s: malloc failed on request for %d bytes.\n",
115            progName, n );
116   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117             progName );
118   exit ( 1 );
119}
120
121
122/*---------------------------------------------*/
123static void tooManyBlocks ( Int32 max_handled_blocks )
124{
125   fprintf ( stderr,
126             "%s: `%s' appears to contain more than %d blocks\n",
127            progName, inFileName, max_handled_blocks );
128   fprintf ( stderr,
129             "%s: and cannot be handled.  To fix, increase\n",
130             progName );
131   fprintf ( stderr,
132             "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133             progName );
134   exit ( 1 );
135}
136
137
138
139/*---------------------------------------------------*/
140/*--- Bit stream I/O                              ---*/
141/*---------------------------------------------------*/
142
143typedef
144   struct {
145      FILE*  handle;
146      Int32  buffer;
147      Int32  buffLive;
148      Char   mode;
149   }
150   BitStream;
151
152
153/*---------------------------------------------*/
154static BitStream* bsOpenReadStream ( FILE* stream )
155{
156   BitStream *bs = malloc ( sizeof(BitStream) );
157   if (bs == NULL) mallocFail ( sizeof(BitStream) );
158   bs->handle = stream;
159   bs->buffer = 0;
160   bs->buffLive = 0;
161   bs->mode = 'r';
162   return bs;
163}
164
165
166/*---------------------------------------------*/
167static BitStream* bsOpenWriteStream ( FILE* stream )
168{
169   BitStream *bs = malloc ( sizeof(BitStream) );
170   if (bs == NULL) mallocFail ( sizeof(BitStream) );
171   bs->handle = stream;
172   bs->buffer = 0;
173   bs->buffLive = 0;
174   bs->mode = 'w';
175   return bs;
176}
177
178
179/*---------------------------------------------*/
180static void bsPutBit ( BitStream* bs, Int32 bit )
181{
182   if (bs->buffLive == 8) {
183      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184      if (retVal == EOF) writeError();
185      bytesOut++;
186      bs->buffLive = 1;
187      bs->buffer = bit & 0x1;
188   } else {
189      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190      bs->buffLive++;
191   };
192}
193
194
195/*---------------------------------------------*/
196/*--
197   Returns 0 or 1, or 2 to indicate EOF.
198--*/
199static Int32 bsGetBit ( BitStream* bs )
200{
201   if (bs->buffLive > 0) {
202      bs->buffLive --;
203      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204   } else {
205      Int32 retVal = getc ( bs->handle );
206      if ( retVal == EOF ) {
207         if (errno != 0) readError();
208         return 2;
209      }
210      bs->buffLive = 7;
211      bs->buffer = retVal;
212      return ( ((bs->buffer) >> 7) & 0x1 );
213   }
214}
215
216
217/*---------------------------------------------*/
218static void bsClose ( BitStream* bs )
219{
220   Int32 retVal;
221
222   if ( bs->mode == 'w' ) {
223      while ( bs->buffLive < 8 ) {
224         bs->buffLive++;
225         bs->buffer <<= 1;
226      };
227      retVal = putc ( (UChar) (bs->buffer), bs->handle );
228      if (retVal == EOF) writeError();
229      bytesOut++;
230      retVal = fflush ( bs->handle );
231      if (retVal == EOF) writeError();
232   }
233   retVal = fclose ( bs->handle );
234   if (retVal == EOF) {
235      if (bs->mode == 'w') writeError(); else readError();
236   }
237   free ( bs );
238}
239
240
241/*---------------------------------------------*/
242static void bsPutUChar ( BitStream* bs, UChar c )
243{
244   Int32 i;
245   for (i = 7; i >= 0; i--)
246      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247}
248
249
250/*---------------------------------------------*/
251static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252{
253   Int32 i;
254
255   for (i = 31; i >= 0; i--)
256      bsPutBit ( bs, (c >> i) & 0x1 );
257}
258
259
260/*---------------------------------------------*/
261static Bool endsInBz2 ( Char* name )
262{
263   Int32 n = strlen ( name );
264   if (n <= 4) return False;
265   return
266      (name[n-4] == '.' &&
267       name[n-3] == 'b' &&
268       name[n-2] == 'z' &&
269       name[n-1] == '2');
270}
271
272
273/*---------------------------------------------------*/
274/*---                                             ---*/
275/*---------------------------------------------------*/
276
277/* This logic isn't really right when it comes to Cygwin. */
278#ifdef _WIN32
279#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
280#else
281#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
282#endif
283
284#define BLOCK_HEADER_HI  0x00003141UL
285#define BLOCK_HEADER_LO  0x59265359UL
286
287#define BLOCK_ENDMARK_HI 0x00001772UL
288#define BLOCK_ENDMARK_LO 0x45385090UL
289
290/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
291   would have an uncompressed size of at least 40GB, so the chances
292   are low you'll need to up this.
293*/
294#define BZ_MAX_HANDLED_BLOCKS 50000
295
296MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
298MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
300
301Int32 main ( Int32 argc, Char** argv )
302{
303   FILE*       inFile;
304   FILE*       outFile;
305   BitStream*  bsIn, *bsWr;
306   Int32       b, wrBlock, currBlock, rbCtr;
307   MaybeUInt64 bitsRead;
308
309   UInt32      buffHi, buffLo, blockCRC;
310   Char*       p;
311
312   strncpy ( progName, argv[0], BZ_MAX_FILENAME-1);
313   progName[BZ_MAX_FILENAME-1]='\0';
314   inFileName[0] = outFileName[0] = 0;
315
316   fprintf ( stderr,
317             "bzip2recover 1.0.8: extracts blocks from damaged .bz2 files.\n" );
318
319   if (argc != 2) {
320      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
321                        progName, progName );
322      switch (sizeof(MaybeUInt64)) {
323         case 8:
324            fprintf(stderr,
325                    "\trestrictions on size of recovered file: None\n");
326            break;
327         case 4:
328            fprintf(stderr,
329                    "\trestrictions on size of recovered file: 512 MB\n");
330            fprintf(stderr,
331                    "\tto circumvent, recompile with MaybeUInt64 as an\n"
332                    "\tunsigned 64-bit int.\n");
333            break;
334         default:
335            fprintf(stderr,
336                    "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
337                    "configuration error.\n");
338            break;
339      }
340      exit(1);
341   }
342
343   if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
344      fprintf ( stderr,
345                "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
346                progName, (int)strlen(argv[1]) );
347      exit(1);
348   }
349
350   strcpy ( inFileName, argv[1] );
351
352   inFile = fopen ( inFileName, "rb" );
353   if (inFile == NULL) {
354      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
355      exit(1);
356   }
357
358   bsIn = bsOpenReadStream ( inFile );
359   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
360
361   bitsRead = 0;
362   buffHi = buffLo = 0;
363   currBlock = 0;
364   bStart[currBlock] = 0;
365
366   rbCtr = 0;
367
368   while (True) {
369      b = bsGetBit ( bsIn );
370      bitsRead++;
371      if (b == 2) {
372         if (bitsRead >= bStart[currBlock] &&
373            (bitsRead - bStart[currBlock]) >= 40) {
374            bEnd[currBlock] = bitsRead-1;
375            if (currBlock > 0)
376               fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
377                                 " to " MaybeUInt64_FMT " (incomplete)\n",
378                         currBlock,  bStart[currBlock], bEnd[currBlock] );
379         } else
380            currBlock--;
381         break;
382      }
383      buffHi = (buffHi << 1) | (buffLo >> 31);
384      buffLo = (buffLo << 1) | (b & 1);
385      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
386             && buffLo == BLOCK_HEADER_LO)
387           ||
388           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
389             && buffLo == BLOCK_ENDMARK_LO)
390         ) {
391         if (bitsRead > 49) {
392            bEnd[currBlock] = bitsRead-49;
393         } else {
394            bEnd[currBlock] = 0;
395         }
396         if (currBlock > 0 &&
397	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
398            fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
399                              " to " MaybeUInt64_FMT "\n",
400                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
401            rbStart[rbCtr] = bStart[currBlock];
402            rbEnd[rbCtr] = bEnd[currBlock];
403            rbCtr++;
404         }
405         if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
406            tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
407         currBlock++;
408
409         bStart[currBlock] = bitsRead;
410      }
411   }
412
413   bsClose ( bsIn );
414
415   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
416
417   if (rbCtr < 1) {
418      fprintf ( stderr,
419                "%s: sorry, I couldn't find any block boundaries.\n",
420                progName );
421      exit(1);
422   };
423
424   fprintf ( stderr, "%s: splitting into blocks\n", progName );
425
426   inFile = fopen ( inFileName, "rb" );
427   if (inFile == NULL) {
428      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
429      exit(1);
430   }
431   bsIn = bsOpenReadStream ( inFile );
432
433   /*-- placate gcc's dataflow analyser --*/
434   blockCRC = 0; bsWr = 0;
435
436   bitsRead = 0;
437   outFile = NULL;
438   wrBlock = 0;
439   while (True) {
440      b = bsGetBit(bsIn);
441      if (b == 2) break;
442      buffHi = (buffHi << 1) | (buffLo >> 31);
443      buffLo = (buffLo << 1) | (b & 1);
444      if (bitsRead == 47+rbStart[wrBlock])
445         blockCRC = (buffHi << 16) | (buffLo >> 16);
446
447      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
448                          && bitsRead <= rbEnd[wrBlock]) {
449         bsPutBit ( bsWr, b );
450      }
451
452      bitsRead++;
453
454      if (bitsRead == rbEnd[wrBlock]+1) {
455         if (outFile != NULL) {
456            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
457            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
458            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
459            bsPutUInt32 ( bsWr, blockCRC );
460            bsClose ( bsWr );
461            outFile = NULL;
462         }
463         if (wrBlock >= rbCtr) break;
464         wrBlock++;
465      } else
466      if (bitsRead == rbStart[wrBlock]) {
467         /* Create the output file name, correctly handling leading paths.
468            (31.10.2001 by Sergey E. Kusikov) */
469         Char* split;
470         Int32 ofs, k;
471         for (k = 0; k < BZ_MAX_FILENAME; k++)
472            outFileName[k] = 0;
473         strcpy (outFileName, inFileName);
474         split = strrchr (outFileName, BZ_SPLIT_SYM);
475         if (split == NULL) {
476            split = outFileName;
477         } else {
478            ++split;
479	 }
480	 /* Now split points to the start of the basename. */
481         ofs  = split - outFileName;
482         sprintf (split, "rec%5d", wrBlock+1);
483         for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
484         strcat (outFileName, inFileName + ofs);
485
486         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
487
488         fprintf ( stderr, "   writing block %d to `%s' ...\n",
489                           wrBlock+1, outFileName );
490
491         outFile = fopen ( outFileName, "wb" );
492         if (outFile == NULL) {
493            fprintf ( stderr, "%s: can't write `%s'\n",
494                      progName, outFileName );
495            exit(1);
496         }
497         bsWr = bsOpenWriteStream ( outFile );
498         bsPutUChar ( bsWr, BZ_HDR_B );
499         bsPutUChar ( bsWr, BZ_HDR_Z );
500         bsPutUChar ( bsWr, BZ_HDR_h );
501         bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
502         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
503         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
504         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
505      }
506   }
507
508   fprintf ( stderr, "%s: finished\n", progName );
509   return 0;
510}
511
512
513
514/*-----------------------------------------------------------*/
515/*--- end                                  bzip2recover.c ---*/
516/*-----------------------------------------------------------*/
517