178556Sobrien 278556Sobrien/*-------------------------------------------------------------*/ 378556Sobrien/*--- Compression machinery (not incl block sorting) ---*/ 478556Sobrien/*--- compress.c ---*/ 578556Sobrien/*-------------------------------------------------------------*/ 678556Sobrien 7167974Sdelphij/* ------------------------------------------------------------------ 8167974Sdelphij This file is part of bzip2/libbzip2, a program and library for 9167974Sdelphij lossless, block-sorting data compression. 1078556Sobrien 11215041Sobrien bzip2/libbzip2 version 1.0.6 of 6 September 2010 12215041Sobrien Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 1378556Sobrien 14167974Sdelphij Please read the WARNING, DISCLAIMER and PATENTS sections in the 15167974Sdelphij README file. 1678556Sobrien 17167974Sdelphij This program is released under the terms of the license contained 18167974Sdelphij in the file LICENSE. 19167974Sdelphij ------------------------------------------------------------------ */ 2078556Sobrien 2178556Sobrien 22167974Sdelphij/* CHANGES 23167974Sdelphij 0.9.0 -- original version. 24167974Sdelphij 0.9.0a/b -- no changes in this file. 25167974Sdelphij 0.9.0c -- changed setting of nGroups in sendMTFValues() 26167974Sdelphij so as to do a bit better on small files 27167974Sdelphij*/ 2878556Sobrien 2978556Sobrien#include "bzlib_private.h" 3078556Sobrien 3178556Sobrien 3278556Sobrien/*---------------------------------------------------*/ 3378556Sobrien/*--- Bit stream I/O ---*/ 3478556Sobrien/*---------------------------------------------------*/ 3578556Sobrien 3678556Sobrien/*---------------------------------------------------*/ 3778556Sobrienvoid BZ2_bsInitWrite ( EState* s ) 3878556Sobrien{ 3978556Sobrien s->bsLive = 0; 4078556Sobrien s->bsBuff = 0; 4178556Sobrien} 4278556Sobrien 4378556Sobrien 4478556Sobrien/*---------------------------------------------------*/ 4578556Sobrienstatic 4678556Sobrienvoid bsFinishWrite ( EState* s ) 4778556Sobrien{ 4878556Sobrien while (s->bsLive > 0) { 4978556Sobrien s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 5078556Sobrien s->numZ++; 5178556Sobrien s->bsBuff <<= 8; 5278556Sobrien s->bsLive -= 8; 5378556Sobrien } 5478556Sobrien} 5578556Sobrien 5678556Sobrien 5778556Sobrien/*---------------------------------------------------*/ 5878556Sobrien#define bsNEEDW(nz) \ 5978556Sobrien{ \ 6078556Sobrien while (s->bsLive >= 8) { \ 6178556Sobrien s->zbits[s->numZ] \ 6278556Sobrien = (UChar)(s->bsBuff >> 24); \ 6378556Sobrien s->numZ++; \ 6478556Sobrien s->bsBuff <<= 8; \ 6578556Sobrien s->bsLive -= 8; \ 6678556Sobrien } \ 6778556Sobrien} 6878556Sobrien 6978556Sobrien 7078556Sobrien/*---------------------------------------------------*/ 7178556Sobrienstatic 7278556Sobrien__inline__ 7378556Sobrienvoid bsW ( EState* s, Int32 n, UInt32 v ) 7478556Sobrien{ 7578556Sobrien bsNEEDW ( n ); 7678556Sobrien s->bsBuff |= (v << (32 - s->bsLive - n)); 7778556Sobrien s->bsLive += n; 7878556Sobrien} 7978556Sobrien 8078556Sobrien 8178556Sobrien/*---------------------------------------------------*/ 8278556Sobrienstatic 8378556Sobrienvoid bsPutUInt32 ( EState* s, UInt32 u ) 8478556Sobrien{ 8578556Sobrien bsW ( s, 8, (u >> 24) & 0xffL ); 8678556Sobrien bsW ( s, 8, (u >> 16) & 0xffL ); 8778556Sobrien bsW ( s, 8, (u >> 8) & 0xffL ); 8878556Sobrien bsW ( s, 8, u & 0xffL ); 8978556Sobrien} 9078556Sobrien 9178556Sobrien 9278556Sobrien/*---------------------------------------------------*/ 9378556Sobrienstatic 9478556Sobrienvoid bsPutUChar ( EState* s, UChar c ) 9578556Sobrien{ 9678556Sobrien bsW( s, 8, (UInt32)c ); 9778556Sobrien} 9878556Sobrien 9978556Sobrien 10078556Sobrien/*---------------------------------------------------*/ 10178556Sobrien/*--- The back end proper ---*/ 10278556Sobrien/*---------------------------------------------------*/ 10378556Sobrien 10478556Sobrien/*---------------------------------------------------*/ 10578556Sobrienstatic 10678556Sobrienvoid makeMaps_e ( EState* s ) 10778556Sobrien{ 10878556Sobrien Int32 i; 10978556Sobrien s->nInUse = 0; 11078556Sobrien for (i = 0; i < 256; i++) 11178556Sobrien if (s->inUse[i]) { 11278556Sobrien s->unseqToSeq[i] = s->nInUse; 11378556Sobrien s->nInUse++; 11478556Sobrien } 11578556Sobrien} 11678556Sobrien 11778556Sobrien 11878556Sobrien/*---------------------------------------------------*/ 11978556Sobrienstatic 12078556Sobrienvoid generateMTFValues ( EState* s ) 12178556Sobrien{ 12278556Sobrien UChar yy[256]; 12378556Sobrien Int32 i, j; 12478556Sobrien Int32 zPend; 12578556Sobrien Int32 wr; 12678556Sobrien Int32 EOB; 12778556Sobrien 12878556Sobrien /* 12978556Sobrien After sorting (eg, here), 13078556Sobrien s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 13178556Sobrien and 13278556Sobrien ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 13378556Sobrien holds the original block data. 13478556Sobrien 13578556Sobrien The first thing to do is generate the MTF values, 13678556Sobrien and put them in 13778556Sobrien ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 13878556Sobrien Because there are strictly fewer or equal MTF values 13978556Sobrien than block values, ptr values in this area are overwritten 14078556Sobrien with MTF values only when they are no longer needed. 14178556Sobrien 14278556Sobrien The final compressed bitstream is generated into the 14378556Sobrien area starting at 14478556Sobrien (UChar*) (&((UChar*)s->arr2)[s->nblock]) 14578556Sobrien 14678556Sobrien These storage aliases are set up in bzCompressInit(), 14778556Sobrien except for the last one, which is arranged in 14878556Sobrien compressBlock(). 14978556Sobrien */ 15078556Sobrien UInt32* ptr = s->ptr; 15178556Sobrien UChar* block = s->block; 15278556Sobrien UInt16* mtfv = s->mtfv; 15378556Sobrien 15478556Sobrien makeMaps_e ( s ); 15578556Sobrien EOB = s->nInUse+1; 15678556Sobrien 15778556Sobrien for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 15878556Sobrien 15978556Sobrien wr = 0; 16078556Sobrien zPend = 0; 16178556Sobrien for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 16278556Sobrien 16378556Sobrien for (i = 0; i < s->nblock; i++) { 16478556Sobrien UChar ll_i; 16578556Sobrien AssertD ( wr <= i, "generateMTFValues(1)" ); 16678556Sobrien j = ptr[i]-1; if (j < 0) j += s->nblock; 16778556Sobrien ll_i = s->unseqToSeq[block[j]]; 16878556Sobrien AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 16978556Sobrien 17078556Sobrien if (yy[0] == ll_i) { 17178556Sobrien zPend++; 17278556Sobrien } else { 17378556Sobrien 17478556Sobrien if (zPend > 0) { 17578556Sobrien zPend--; 17678556Sobrien while (True) { 17778556Sobrien if (zPend & 1) { 17878556Sobrien mtfv[wr] = BZ_RUNB; wr++; 17978556Sobrien s->mtfFreq[BZ_RUNB]++; 18078556Sobrien } else { 18178556Sobrien mtfv[wr] = BZ_RUNA; wr++; 18278556Sobrien s->mtfFreq[BZ_RUNA]++; 18378556Sobrien } 18478556Sobrien if (zPend < 2) break; 18578556Sobrien zPend = (zPend - 2) / 2; 18678556Sobrien }; 18778556Sobrien zPend = 0; 18878556Sobrien } 18978556Sobrien { 19078556Sobrien register UChar rtmp; 19178556Sobrien register UChar* ryy_j; 19278556Sobrien register UChar rll_i; 19378556Sobrien rtmp = yy[1]; 19478556Sobrien yy[1] = yy[0]; 19578556Sobrien ryy_j = &(yy[1]); 19678556Sobrien rll_i = ll_i; 19778556Sobrien while ( rll_i != rtmp ) { 19878556Sobrien register UChar rtmp2; 19978556Sobrien ryy_j++; 20078556Sobrien rtmp2 = rtmp; 20178556Sobrien rtmp = *ryy_j; 20278556Sobrien *ryy_j = rtmp2; 20378556Sobrien }; 20478556Sobrien yy[0] = rtmp; 20578556Sobrien j = ryy_j - &(yy[0]); 20678556Sobrien mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 20778556Sobrien } 20878556Sobrien 20978556Sobrien } 21078556Sobrien } 21178556Sobrien 21278556Sobrien if (zPend > 0) { 21378556Sobrien zPend--; 21478556Sobrien while (True) { 21578556Sobrien if (zPend & 1) { 21678556Sobrien mtfv[wr] = BZ_RUNB; wr++; 21778556Sobrien s->mtfFreq[BZ_RUNB]++; 21878556Sobrien } else { 21978556Sobrien mtfv[wr] = BZ_RUNA; wr++; 22078556Sobrien s->mtfFreq[BZ_RUNA]++; 22178556Sobrien } 22278556Sobrien if (zPend < 2) break; 22378556Sobrien zPend = (zPend - 2) / 2; 22478556Sobrien }; 22578556Sobrien zPend = 0; 22678556Sobrien } 22778556Sobrien 22878556Sobrien mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 22978556Sobrien 23078556Sobrien s->nMTF = wr; 23178556Sobrien} 23278556Sobrien 23378556Sobrien 23478556Sobrien/*---------------------------------------------------*/ 23578556Sobrien#define BZ_LESSER_ICOST 0 23678556Sobrien#define BZ_GREATER_ICOST 15 23778556Sobrien 23878556Sobrienstatic 23978556Sobrienvoid sendMTFValues ( EState* s ) 24078556Sobrien{ 24178556Sobrien Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 24278556Sobrien Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 24378556Sobrien Int32 nGroups, nBytes; 24478556Sobrien 24578556Sobrien /*-- 24678556Sobrien UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 24778556Sobrien is a global since the decoder also needs it. 24878556Sobrien 24978556Sobrien Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 25078556Sobrien Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 25178556Sobrien are also globals only used in this proc. 25278556Sobrien Made global to keep stack frame size small. 25378556Sobrien --*/ 25478556Sobrien 25578556Sobrien 25678556Sobrien UInt16 cost[BZ_N_GROUPS]; 25778556Sobrien Int32 fave[BZ_N_GROUPS]; 25878556Sobrien 25978556Sobrien UInt16* mtfv = s->mtfv; 26078556Sobrien 26178556Sobrien if (s->verbosity >= 3) 26278556Sobrien VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 26378556Sobrien "%d+2 syms in use\n", 26478556Sobrien s->nblock, s->nMTF, s->nInUse ); 26578556Sobrien 26678556Sobrien alphaSize = s->nInUse+2; 26778556Sobrien for (t = 0; t < BZ_N_GROUPS; t++) 26878556Sobrien for (v = 0; v < alphaSize; v++) 26978556Sobrien s->len[t][v] = BZ_GREATER_ICOST; 27078556Sobrien 27178556Sobrien /*--- Decide how many coding tables to use ---*/ 27278556Sobrien AssertH ( s->nMTF > 0, 3001 ); 27378556Sobrien if (s->nMTF < 200) nGroups = 2; else 27478556Sobrien if (s->nMTF < 600) nGroups = 3; else 27578556Sobrien if (s->nMTF < 1200) nGroups = 4; else 27678556Sobrien if (s->nMTF < 2400) nGroups = 5; else 27778556Sobrien nGroups = 6; 27878556Sobrien 27978556Sobrien /*--- Generate an initial set of coding tables ---*/ 28078556Sobrien { 28178556Sobrien Int32 nPart, remF, tFreq, aFreq; 28278556Sobrien 28378556Sobrien nPart = nGroups; 28478556Sobrien remF = s->nMTF; 28578556Sobrien gs = 0; 28678556Sobrien while (nPart > 0) { 28778556Sobrien tFreq = remF / nPart; 28878556Sobrien ge = gs-1; 28978556Sobrien aFreq = 0; 29078556Sobrien while (aFreq < tFreq && ge < alphaSize-1) { 29178556Sobrien ge++; 29278556Sobrien aFreq += s->mtfFreq[ge]; 29378556Sobrien } 29478556Sobrien 29578556Sobrien if (ge > gs 29678556Sobrien && nPart != nGroups && nPart != 1 29778556Sobrien && ((nGroups-nPart) % 2 == 1)) { 29878556Sobrien aFreq -= s->mtfFreq[ge]; 29978556Sobrien ge--; 30078556Sobrien } 30178556Sobrien 30278556Sobrien if (s->verbosity >= 3) 30378556Sobrien VPrintf5( " initial group %d, [%d .. %d], " 30478556Sobrien "has %d syms (%4.1f%%)\n", 30578556Sobrien nPart, gs, ge, aFreq, 30678556Sobrien (100.0 * (float)aFreq) / (float)(s->nMTF) ); 30778556Sobrien 30878556Sobrien for (v = 0; v < alphaSize; v++) 30978556Sobrien if (v >= gs && v <= ge) 31078556Sobrien s->len[nPart-1][v] = BZ_LESSER_ICOST; else 31178556Sobrien s->len[nPart-1][v] = BZ_GREATER_ICOST; 31278556Sobrien 31378556Sobrien nPart--; 31478556Sobrien gs = ge+1; 31578556Sobrien remF -= aFreq; 31678556Sobrien } 31778556Sobrien } 31878556Sobrien 31978556Sobrien /*--- 32078556Sobrien Iterate up to BZ_N_ITERS times to improve the tables. 32178556Sobrien ---*/ 32278556Sobrien for (iter = 0; iter < BZ_N_ITERS; iter++) { 32378556Sobrien 32478556Sobrien for (t = 0; t < nGroups; t++) fave[t] = 0; 32578556Sobrien 32678556Sobrien for (t = 0; t < nGroups; t++) 32778556Sobrien for (v = 0; v < alphaSize; v++) 32878556Sobrien s->rfreq[t][v] = 0; 32978556Sobrien 33078556Sobrien /*--- 33178556Sobrien Set up an auxiliary length table which is used to fast-track 33278556Sobrien the common case (nGroups == 6). 33378556Sobrien ---*/ 33478556Sobrien if (nGroups == 6) { 33578556Sobrien for (v = 0; v < alphaSize; v++) { 33678556Sobrien s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 33778556Sobrien s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 33878556Sobrien s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 33978556Sobrien } 34078556Sobrien } 34178556Sobrien 34278556Sobrien nSelectors = 0; 34378556Sobrien totc = 0; 34478556Sobrien gs = 0; 34578556Sobrien while (True) { 34678556Sobrien 34778556Sobrien /*--- Set group start & end marks. --*/ 34878556Sobrien if (gs >= s->nMTF) break; 34978556Sobrien ge = gs + BZ_G_SIZE - 1; 35078556Sobrien if (ge >= s->nMTF) ge = s->nMTF-1; 35178556Sobrien 35278556Sobrien /*-- 35378556Sobrien Calculate the cost of this group as coded 35478556Sobrien by each of the coding tables. 35578556Sobrien --*/ 35678556Sobrien for (t = 0; t < nGroups; t++) cost[t] = 0; 35778556Sobrien 35878556Sobrien if (nGroups == 6 && 50 == ge-gs+1) { 35978556Sobrien /*--- fast track the common case ---*/ 36078556Sobrien register UInt32 cost01, cost23, cost45; 36178556Sobrien register UInt16 icv; 36278556Sobrien cost01 = cost23 = cost45 = 0; 36378556Sobrien 36478556Sobrien# define BZ_ITER(nn) \ 36578556Sobrien icv = mtfv[gs+(nn)]; \ 36678556Sobrien cost01 += s->len_pack[icv][0]; \ 36778556Sobrien cost23 += s->len_pack[icv][1]; \ 36878556Sobrien cost45 += s->len_pack[icv][2]; \ 36978556Sobrien 37078556Sobrien BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); 37178556Sobrien BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); 37278556Sobrien BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 37378556Sobrien BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 37478556Sobrien BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 37578556Sobrien BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 37678556Sobrien BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 37778556Sobrien BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 37878556Sobrien BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 37978556Sobrien BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 38078556Sobrien 38178556Sobrien# undef BZ_ITER 38278556Sobrien 38378556Sobrien cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 38478556Sobrien cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 38578556Sobrien cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 38678556Sobrien 38778556Sobrien } else { 38878556Sobrien /*--- slow version which correctly handles all situations ---*/ 38978556Sobrien for (i = gs; i <= ge; i++) { 39078556Sobrien UInt16 icv = mtfv[i]; 39178556Sobrien for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 39278556Sobrien } 39378556Sobrien } 39478556Sobrien 39578556Sobrien /*-- 39678556Sobrien Find the coding table which is best for this group, 39778556Sobrien and record its identity in the selector table. 39878556Sobrien --*/ 39978556Sobrien bc = 999999999; bt = -1; 40078556Sobrien for (t = 0; t < nGroups; t++) 40178556Sobrien if (cost[t] < bc) { bc = cost[t]; bt = t; }; 40278556Sobrien totc += bc; 40378556Sobrien fave[bt]++; 40478556Sobrien s->selector[nSelectors] = bt; 40578556Sobrien nSelectors++; 40678556Sobrien 40778556Sobrien /*-- 40878556Sobrien Increment the symbol frequencies for the selected table. 40978556Sobrien --*/ 41078556Sobrien if (nGroups == 6 && 50 == ge-gs+1) { 41178556Sobrien /*--- fast track the common case ---*/ 41278556Sobrien 41378556Sobrien# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 41478556Sobrien 41578556Sobrien BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); 41678556Sobrien BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); 41778556Sobrien BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 41878556Sobrien BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 41978556Sobrien BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 42078556Sobrien BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 42178556Sobrien BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 42278556Sobrien BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 42378556Sobrien BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 42478556Sobrien BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 42578556Sobrien 42678556Sobrien# undef BZ_ITUR 42778556Sobrien 42878556Sobrien } else { 42978556Sobrien /*--- slow version which correctly handles all situations ---*/ 43078556Sobrien for (i = gs; i <= ge; i++) 43178556Sobrien s->rfreq[bt][ mtfv[i] ]++; 43278556Sobrien } 43378556Sobrien 43478556Sobrien gs = ge+1; 43578556Sobrien } 43678556Sobrien if (s->verbosity >= 3) { 43778556Sobrien VPrintf2 ( " pass %d: size is %d, grp uses are ", 43878556Sobrien iter+1, totc/8 ); 43978556Sobrien for (t = 0; t < nGroups; t++) 44078556Sobrien VPrintf1 ( "%d ", fave[t] ); 44178556Sobrien VPrintf0 ( "\n" ); 44278556Sobrien } 44378556Sobrien 44478556Sobrien /*-- 44578556Sobrien Recompute the tables based on the accumulated frequencies. 44678556Sobrien --*/ 447146293Sobrien /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See 448146293Sobrien comment in huffman.c for details. */ 44978556Sobrien for (t = 0; t < nGroups; t++) 45078556Sobrien BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 451146293Sobrien alphaSize, 17 /*20*/ ); 45278556Sobrien } 45378556Sobrien 45478556Sobrien 45578556Sobrien AssertH( nGroups < 8, 3002 ); 45678556Sobrien AssertH( nSelectors < 32768 && 45778556Sobrien nSelectors <= (2 + (900000 / BZ_G_SIZE)), 45878556Sobrien 3003 ); 45978556Sobrien 46078556Sobrien 46178556Sobrien /*--- Compute MTF values for the selectors. ---*/ 46278556Sobrien { 46378556Sobrien UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 46478556Sobrien for (i = 0; i < nGroups; i++) pos[i] = i; 46578556Sobrien for (i = 0; i < nSelectors; i++) { 46678556Sobrien ll_i = s->selector[i]; 46778556Sobrien j = 0; 46878556Sobrien tmp = pos[j]; 46978556Sobrien while ( ll_i != tmp ) { 47078556Sobrien j++; 47178556Sobrien tmp2 = tmp; 47278556Sobrien tmp = pos[j]; 47378556Sobrien pos[j] = tmp2; 47478556Sobrien }; 47578556Sobrien pos[0] = tmp; 47678556Sobrien s->selectorMtf[i] = j; 47778556Sobrien } 47878556Sobrien }; 47978556Sobrien 48078556Sobrien /*--- Assign actual codes for the tables. --*/ 48178556Sobrien for (t = 0; t < nGroups; t++) { 48278556Sobrien minLen = 32; 48378556Sobrien maxLen = 0; 48478556Sobrien for (i = 0; i < alphaSize; i++) { 48578556Sobrien if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 48678556Sobrien if (s->len[t][i] < minLen) minLen = s->len[t][i]; 48778556Sobrien } 488146293Sobrien AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 48978556Sobrien AssertH ( !(minLen < 1), 3005 ); 49078556Sobrien BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 49178556Sobrien minLen, maxLen, alphaSize ); 49278556Sobrien } 49378556Sobrien 49478556Sobrien /*--- Transmit the mapping table. ---*/ 49578556Sobrien { 49678556Sobrien Bool inUse16[16]; 49778556Sobrien for (i = 0; i < 16; i++) { 49878556Sobrien inUse16[i] = False; 49978556Sobrien for (j = 0; j < 16; j++) 50078556Sobrien if (s->inUse[i * 16 + j]) inUse16[i] = True; 50178556Sobrien } 50278556Sobrien 50378556Sobrien nBytes = s->numZ; 50478556Sobrien for (i = 0; i < 16; i++) 50578556Sobrien if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 50678556Sobrien 50778556Sobrien for (i = 0; i < 16; i++) 50878556Sobrien if (inUse16[i]) 50978556Sobrien for (j = 0; j < 16; j++) { 51078556Sobrien if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 51178556Sobrien } 51278556Sobrien 51378556Sobrien if (s->verbosity >= 3) 51478556Sobrien VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); 51578556Sobrien } 51678556Sobrien 51778556Sobrien /*--- Now the selectors. ---*/ 51878556Sobrien nBytes = s->numZ; 51978556Sobrien bsW ( s, 3, nGroups ); 52078556Sobrien bsW ( s, 15, nSelectors ); 52178556Sobrien for (i = 0; i < nSelectors; i++) { 52278556Sobrien for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 52378556Sobrien bsW(s,1,0); 52478556Sobrien } 52578556Sobrien if (s->verbosity >= 3) 52678556Sobrien VPrintf1( "selectors %d, ", s->numZ-nBytes ); 52778556Sobrien 52878556Sobrien /*--- Now the coding tables. ---*/ 52978556Sobrien nBytes = s->numZ; 53078556Sobrien 53178556Sobrien for (t = 0; t < nGroups; t++) { 53278556Sobrien Int32 curr = s->len[t][0]; 53378556Sobrien bsW ( s, 5, curr ); 53478556Sobrien for (i = 0; i < alphaSize; i++) { 53578556Sobrien while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 53678556Sobrien while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 53778556Sobrien bsW ( s, 1, 0 ); 53878556Sobrien } 53978556Sobrien } 54078556Sobrien 54178556Sobrien if (s->verbosity >= 3) 54278556Sobrien VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 54378556Sobrien 54478556Sobrien /*--- And finally, the block data proper ---*/ 54578556Sobrien nBytes = s->numZ; 54678556Sobrien selCtr = 0; 54778556Sobrien gs = 0; 54878556Sobrien while (True) { 54978556Sobrien if (gs >= s->nMTF) break; 55078556Sobrien ge = gs + BZ_G_SIZE - 1; 55178556Sobrien if (ge >= s->nMTF) ge = s->nMTF-1; 55278556Sobrien AssertH ( s->selector[selCtr] < nGroups, 3006 ); 55378556Sobrien 55478556Sobrien if (nGroups == 6 && 50 == ge-gs+1) { 55578556Sobrien /*--- fast track the common case ---*/ 55678556Sobrien UInt16 mtfv_i; 55778556Sobrien UChar* s_len_sel_selCtr 55878556Sobrien = &(s->len[s->selector[selCtr]][0]); 55978556Sobrien Int32* s_code_sel_selCtr 56078556Sobrien = &(s->code[s->selector[selCtr]][0]); 56178556Sobrien 56278556Sobrien# define BZ_ITAH(nn) \ 56378556Sobrien mtfv_i = mtfv[gs+(nn)]; \ 56478556Sobrien bsW ( s, \ 56578556Sobrien s_len_sel_selCtr[mtfv_i], \ 56678556Sobrien s_code_sel_selCtr[mtfv_i] ) 56778556Sobrien 56878556Sobrien BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); 56978556Sobrien BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); 57078556Sobrien BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 57178556Sobrien BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 57278556Sobrien BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 57378556Sobrien BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 57478556Sobrien BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 57578556Sobrien BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 57678556Sobrien BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 57778556Sobrien BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 57878556Sobrien 57978556Sobrien# undef BZ_ITAH 58078556Sobrien 58178556Sobrien } else { 58278556Sobrien /*--- slow version which correctly handles all situations ---*/ 58378556Sobrien for (i = gs; i <= ge; i++) { 58478556Sobrien bsW ( s, 58578556Sobrien s->len [s->selector[selCtr]] [mtfv[i]], 58678556Sobrien s->code [s->selector[selCtr]] [mtfv[i]] ); 58778556Sobrien } 58878556Sobrien } 58978556Sobrien 59078556Sobrien 59178556Sobrien gs = ge+1; 59278556Sobrien selCtr++; 59378556Sobrien } 59478556Sobrien AssertH( selCtr == nSelectors, 3007 ); 59578556Sobrien 59678556Sobrien if (s->verbosity >= 3) 59778556Sobrien VPrintf1( "codes %d\n", s->numZ-nBytes ); 59878556Sobrien} 59978556Sobrien 60078556Sobrien 60178556Sobrien/*---------------------------------------------------*/ 60278556Sobrienvoid BZ2_compressBlock ( EState* s, Bool is_last_block ) 60378556Sobrien{ 60478556Sobrien if (s->nblock > 0) { 60578556Sobrien 60678556Sobrien BZ_FINALISE_CRC ( s->blockCRC ); 60778556Sobrien s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 60878556Sobrien s->combinedCRC ^= s->blockCRC; 60978556Sobrien if (s->blockNo > 1) s->numZ = 0; 61078556Sobrien 61178556Sobrien if (s->verbosity >= 2) 612146293Sobrien VPrintf4( " block %d: crc = 0x%08x, " 613146293Sobrien "combined CRC = 0x%08x, size = %d\n", 61478556Sobrien s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 61578556Sobrien 61678556Sobrien BZ2_blockSort ( s ); 61778556Sobrien } 61878556Sobrien 61978556Sobrien s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 62078556Sobrien 62178556Sobrien /*-- If this is the first block, create the stream header. --*/ 62278556Sobrien if (s->blockNo == 1) { 62378556Sobrien BZ2_bsInitWrite ( s ); 62490067Ssobomax bsPutUChar ( s, BZ_HDR_B ); 62590067Ssobomax bsPutUChar ( s, BZ_HDR_Z ); 62690067Ssobomax bsPutUChar ( s, BZ_HDR_h ); 62790067Ssobomax bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 62878556Sobrien } 62978556Sobrien 63078556Sobrien if (s->nblock > 0) { 63178556Sobrien 63278556Sobrien bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 63378556Sobrien bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 63478556Sobrien bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 63578556Sobrien 63678556Sobrien /*-- Now the block's CRC, so it is in a known place. --*/ 63778556Sobrien bsPutUInt32 ( s, s->blockCRC ); 63878556Sobrien 63978556Sobrien /*-- 64078556Sobrien Now a single bit indicating (non-)randomisation. 64178556Sobrien As of version 0.9.5, we use a better sorting algorithm 64278556Sobrien which makes randomisation unnecessary. So always set 64378556Sobrien the randomised bit to 'no'. Of course, the decoder 64478556Sobrien still needs to be able to handle randomised blocks 64578556Sobrien so as to maintain backwards compatibility with 64678556Sobrien older versions of bzip2. 64778556Sobrien --*/ 64878556Sobrien bsW(s,1,0); 64978556Sobrien 65078556Sobrien bsW ( s, 24, s->origPtr ); 65178556Sobrien generateMTFValues ( s ); 65278556Sobrien sendMTFValues ( s ); 65378556Sobrien } 65478556Sobrien 65578556Sobrien 65678556Sobrien /*-- If this is the last block, add the stream trailer. --*/ 65778556Sobrien if (is_last_block) { 65878556Sobrien 65978556Sobrien bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 66078556Sobrien bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 66178556Sobrien bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 66278556Sobrien bsPutUInt32 ( s, s->combinedCRC ); 66378556Sobrien if (s->verbosity >= 2) 664146293Sobrien VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); 66578556Sobrien bsFinishWrite ( s ); 66678556Sobrien } 66778556Sobrien} 66878556Sobrien 66978556Sobrien 67078556Sobrien/*-------------------------------------------------------------*/ 67178556Sobrien/*--- end compress.c ---*/ 67278556Sobrien/*-------------------------------------------------------------*/ 673