/*-----------------------------------------------------------*/ /*--- Block recoverer program for bzip2 ---*/ /*--- bzip2recover.c ---*/ /*-----------------------------------------------------------*/ /* ------------------------------------------------------------------ This file is part of bzip2/libbzip2, a program and library for lossless, block-sorting data compression. bzip2/libbzip2 version 1.0.5 of 10 December 2007 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org> Please read the WARNING, DISCLAIMER and PATENTS sections in the README file. This program is released under the terms of the license contained in the file LICENSE. ------------------------------------------------------------------ */ /* This program is a complete hack and should be rewritten properly. It isn't very complicated. */ #include <stdio.h> #include <errno.h> #include <stdlib.h> #include <string.h> /* This program records bit locations in the file to be recovered. That means that if 64-bit ints are not supported, we will not be able to recover .bz2 files over 512MB (2^32 bits) long. On GNU supported platforms, we take advantage of the 64-bit int support to circumvent this problem. Ditto MSVC. This change occurred in version 1.0.2; all prior versions have the 512MB limitation. */ #ifdef __GNUC__ typedef unsigned long long int MaybeUInt64; # define MaybeUInt64_FMT "%Lu" #else #ifdef _MSC_VER typedef unsigned __int64 MaybeUInt64; # define MaybeUInt64_FMT "%I64u" #else typedef unsigned int MaybeUInt64; # define MaybeUInt64_FMT "%u" #endif #endif typedef unsigned int UInt32; typedef int Int32; typedef unsigned char UChar; typedef char Char; typedef unsigned char Bool; #define True ((Bool)1) #define False ((Bool)0) #define BZ_MAX_FILENAME 2000 Char inFileName[BZ_MAX_FILENAME]; Char outFileName[BZ_MAX_FILENAME]; Char progName[BZ_MAX_FILENAME]; MaybeUInt64 bytesOut = 0; MaybeUInt64 bytesIn = 0; /*---------------------------------------------------*/ /*--- Header bytes ---*/ /*---------------------------------------------------*/ #define BZ_HDR_B 0x42 /* 'B' */ #define BZ_HDR_Z 0x5a /* 'Z' */ #define BZ_HDR_h 0x68 /* 'h' */ #define BZ_HDR_0 0x30 /* '0' */ /*---------------------------------------------------*/ /*--- I/O errors ---*/ /*---------------------------------------------------*/ /*---------------------------------------------*/ static void readError ( void ) { fprintf ( stderr, "%s: I/O error reading `%s', possible reason follows.\n", progName, inFileName ); perror ( progName ); fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", progName ); exit ( 1 ); } /*---------------------------------------------*/ static void writeError ( void ) { fprintf ( stderr, "%s: I/O error reading `%s', possible reason follows.\n", progName, inFileName ); perror ( progName ); fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", progName ); exit ( 1 ); } /*---------------------------------------------*/ static void mallocFail ( Int32 n ) { fprintf ( stderr, "%s: malloc failed on request for %d bytes.\n", progName, n ); fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", progName ); exit ( 1 ); } /*---------------------------------------------*/ static void tooManyBlocks ( Int32 max_handled_blocks ) { fprintf ( stderr, "%s: `%s' appears to contain more than %d blocks\n", progName, inFileName, max_handled_blocks ); fprintf ( stderr, "%s: and cannot be handled. To fix, increase\n", progName ); fprintf ( stderr, "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", progName ); exit ( 1 ); } /*---------------------------------------------------*/ /*--- Bit stream I/O ---*/ /*---------------------------------------------------*/ typedef struct { FILE* handle; Int32 buffer; Int32 buffLive; Char mode; } BitStream; /*---------------------------------------------*/ static BitStream* bsOpenReadStream ( FILE* stream ) { BitStream *bs = malloc ( sizeof(BitStream) ); if (bs == NULL) mallocFail ( sizeof(BitStream) ); bs->handle = stream; bs->buffer = 0; bs->buffLive = 0; bs->mode = 'r'; return bs; } /*---------------------------------------------*/ static BitStream* bsOpenWriteStream ( FILE* stream ) { BitStream *bs = malloc ( sizeof(BitStream) ); if (bs == NULL) mallocFail ( sizeof(BitStream) ); bs->handle = stream; bs->buffer = 0; bs->buffLive = 0; bs->mode = 'w'; return bs; } /*---------------------------------------------*/ static void bsPutBit ( BitStream* bs, Int32 bit ) { if (bs->buffLive == 8) { Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); if (retVal == EOF) writeError(); bytesOut++; bs->buffLive = 1; bs->buffer = bit & 0x1; } else { bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); bs->buffLive++; }; } /*---------------------------------------------*/ /*-- Returns 0 or 1, or 2 to indicate EOF. --*/ static Int32 bsGetBit ( BitStream* bs ) { if (bs->buffLive > 0) { bs->buffLive --; return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); } else { Int32 retVal = getc ( bs->handle ); if ( retVal == EOF ) { if (errno != 0) readError(); return 2; } bs->buffLive = 7; bs->buffer = retVal; return ( ((bs->buffer) >> 7) & 0x1 ); } } /*---------------------------------------------*/ static void bsClose ( BitStream* bs ) { Int32 retVal; if ( bs->mode == 'w' ) { while ( bs->buffLive < 8 ) { bs->buffLive++; bs->buffer <<= 1; }; retVal = putc ( (UChar) (bs->buffer), bs->handle ); if (retVal == EOF) writeError(); bytesOut++; retVal = fflush ( bs->handle ); if (retVal == EOF) writeError(); } retVal = fclose ( bs->handle ); if (retVal == EOF) { if (bs->mode == 'w') writeError(); else readError(); } free ( bs ); } /*---------------------------------------------*/ static void bsPutUChar ( BitStream* bs, UChar c ) { Int32 i; for (i = 7; i >= 0; i--) bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); } /*---------------------------------------------*/ static void bsPutUInt32 ( BitStream* bs, UInt32 c ) { Int32 i; for (i = 31; i >= 0; i--) bsPutBit ( bs, (c >> i) & 0x1 ); } /*---------------------------------------------*/ static Bool endsInBz2 ( Char* name ) { Int32 n = strlen ( name ); if (n <= 4) return False; return (name[n-4] == '.' && name[n-3] == 'b' && name[n-2] == 'z' && name[n-1] == '2'); } /*---------------------------------------------------*/ /*--- ---*/ /*---------------------------------------------------*/ /* This logic isn't really right when it comes to Cygwin. */ #ifdef _WIN32 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ #else # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ #endif #define BLOCK_HEADER_HI 0x00003141UL #define BLOCK_HEADER_LO 0x59265359UL #define BLOCK_ENDMARK_HI 0x00001772UL #define BLOCK_ENDMARK_LO 0x45385090UL /* Increase if necessary. However, a .bz2 file with > 50000 blocks would have an uncompressed size of at least 40GB, so the chances are low you'll need to up this. */ #define BZ_MAX_HANDLED_BLOCKS 50000 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; Int32 main ( Int32 argc, Char** argv ) { FILE* inFile; FILE* outFile; BitStream* bsIn, *bsWr; Int32 b, wrBlock, currBlock, rbCtr; MaybeUInt64 bitsRead; UInt32 buffHi, buffLo, blockCRC; Char* p; strcpy ( progName, argv[0] ); inFileName[0] = outFileName[0] = 0; fprintf ( stderr, "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" ); if (argc != 2) { fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", progName, progName ); switch (sizeof(MaybeUInt64)) { case 8: fprintf(stderr, "\trestrictions on size of recovered file: None\n"); break; case 4: fprintf(stderr, "\trestrictions on size of recovered file: 512 MB\n"); fprintf(stderr, "\tto circumvent, recompile with MaybeUInt64 as an\n" "\tunsigned 64-bit int.\n"); break; default: fprintf(stderr, "\tsizeof(MaybeUInt64) is not 4 or 8 -- " "configuration error.\n"); break; } exit(1); } if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { fprintf ( stderr, "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", progName, (int)strlen(argv[1]) ); exit(1); } strcpy ( inFileName, argv[1] ); inFile = fopen ( inFileName, "rb" ); if (inFile == NULL) { fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); exit(1); } bsIn = bsOpenReadStream ( inFile ); fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); bitsRead = 0; buffHi = buffLo = 0; currBlock = 0; bStart[currBlock] = 0; rbCtr = 0; while (True) { b = bsGetBit ( bsIn ); bitsRead++; if (b == 2) { if (bitsRead >= bStart[currBlock] && (bitsRead - bStart[currBlock]) >= 40) { bEnd[currBlock] = bitsRead-1; if (currBlock > 0) fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT " to " MaybeUInt64_FMT " (incomplete)\n", currBlock, bStart[currBlock], bEnd[currBlock] ); } else currBlock--; break; } buffHi = (buffHi << 1) | (buffLo >> 31); buffLo = (buffLo << 1) | (b & 1); if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI && buffLo == BLOCK_HEADER_LO) || ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI && buffLo == BLOCK_ENDMARK_LO) ) { if (bitsRead > 49) { bEnd[currBlock] = bitsRead-49; } else { bEnd[currBlock] = 0; } if (currBlock > 0 && (bEnd[currBlock] - bStart[currBlock]) >= 130) { fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT " to " MaybeUInt64_FMT "\n", rbCtr+1, bStart[currBlock], bEnd[currBlock] ); rbStart[rbCtr] = bStart[currBlock]; rbEnd[rbCtr] = bEnd[currBlock]; rbCtr++; } if (currBlock >= BZ_MAX_HANDLED_BLOCKS) tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); currBlock++; bStart[currBlock] = bitsRead; } } bsClose ( bsIn ); /*-- identified blocks run from 1 to rbCtr inclusive. --*/ if (rbCtr < 1) { fprintf ( stderr, "%s: sorry, I couldn't find any block boundaries.\n", progName ); exit(1); }; fprintf ( stderr, "%s: splitting into blocks\n", progName ); inFile = fopen ( inFileName, "rb" ); if (inFile == NULL) { fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); exit(1); } bsIn = bsOpenReadStream ( inFile ); /*-- placate gcc's dataflow analyser --*/ blockCRC = 0; bsWr = 0; bitsRead = 0; outFile = NULL; wrBlock = 0; while (True) { b = bsGetBit(bsIn); if (b == 2) break; buffHi = (buffHi << 1) | (buffLo >> 31); buffLo = (buffLo << 1) | (b & 1); if (bitsRead == 47+rbStart[wrBlock]) blockCRC = (buffHi << 16) | (buffLo >> 16); if (outFile != NULL && bitsRead >= rbStart[wrBlock] && bitsRead <= rbEnd[wrBlock]) { bsPutBit ( bsWr, b ); } bitsRead++; if (bitsRead == rbEnd[wrBlock]+1) { if (outFile != NULL) { bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); bsPutUInt32 ( bsWr, blockCRC ); bsClose ( bsWr ); } if (wrBlock >= rbCtr) break; wrBlock++; } else if (bitsRead == rbStart[wrBlock]) { /* Create the output file name, correctly handling leading paths. (31.10.2001 by Sergey E. Kusikov) */ Char* split; Int32 ofs, k; for (k = 0; k < BZ_MAX_FILENAME; k++) outFileName[k] = 0; strcpy (outFileName, inFileName); split = strrchr (outFileName, BZ_SPLIT_SYM); if (split == NULL) { split = outFileName; } else { ++split; } /* Now split points to the start of the basename. */ ofs = split - outFileName; sprintf (split, "rec%5d", wrBlock+1); for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; strcat (outFileName, inFileName + ofs); if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); fprintf ( stderr, " writing block %d to `%s' ...\n", wrBlock+1, outFileName ); outFile = fopen ( outFileName, "wb" ); if (outFile == NULL) { fprintf ( stderr, "%s: can't write `%s'\n", progName, outFileName ); exit(1); } bsWr = bsOpenWriteStream ( outFile ); bsPutUChar ( bsWr, BZ_HDR_B ); bsPutUChar ( bsWr, BZ_HDR_Z ); bsPutUChar ( bsWr, BZ_HDR_h ); bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); } } fprintf ( stderr, "%s: finished\n", progName ); return 0; } /*-----------------------------------------------------------*/ /*--- end bzip2recover.c ---*/ /*-----------------------------------------------------------*/