178556Sobrien
278556Sobrien/*-------------------------------------------------------------*/
378556Sobrien/*--- Compression machinery (not incl block sorting)        ---*/
478556Sobrien/*---                                            compress.c ---*/
578556Sobrien/*-------------------------------------------------------------*/
678556Sobrien
7167974Sdelphij/* ------------------------------------------------------------------
8167974Sdelphij   This file is part of bzip2/libbzip2, a program and library for
9167974Sdelphij   lossless, block-sorting data compression.
1078556Sobrien
11215041Sobrien   bzip2/libbzip2 version 1.0.6 of 6 September 2010
12215041Sobrien   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
1378556Sobrien
14167974Sdelphij   Please read the WARNING, DISCLAIMER and PATENTS sections in the
15167974Sdelphij   README file.
1678556Sobrien
17167974Sdelphij   This program is released under the terms of the license contained
18167974Sdelphij   in the file LICENSE.
19167974Sdelphij   ------------------------------------------------------------------ */
2078556Sobrien
2178556Sobrien
22167974Sdelphij/* CHANGES
23167974Sdelphij    0.9.0    -- original version.
24167974Sdelphij    0.9.0a/b -- no changes in this file.
25167974Sdelphij    0.9.0c   -- changed setting of nGroups in sendMTFValues()
26167974Sdelphij                so as to do a bit better on small files
27167974Sdelphij*/
2878556Sobrien
2978556Sobrien#include "bzlib_private.h"
3078556Sobrien
3178556Sobrien
3278556Sobrien/*---------------------------------------------------*/
3378556Sobrien/*--- Bit stream I/O                              ---*/
3478556Sobrien/*---------------------------------------------------*/
3578556Sobrien
3678556Sobrien/*---------------------------------------------------*/
3778556Sobrienvoid BZ2_bsInitWrite ( EState* s )
3878556Sobrien{
3978556Sobrien   s->bsLive = 0;
4078556Sobrien   s->bsBuff = 0;
4178556Sobrien}
4278556Sobrien
4378556Sobrien
4478556Sobrien/*---------------------------------------------------*/
4578556Sobrienstatic
4678556Sobrienvoid bsFinishWrite ( EState* s )
4778556Sobrien{
4878556Sobrien   while (s->bsLive > 0) {
4978556Sobrien      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
5078556Sobrien      s->numZ++;
5178556Sobrien      s->bsBuff <<= 8;
5278556Sobrien      s->bsLive -= 8;
5378556Sobrien   }
5478556Sobrien}
5578556Sobrien
5678556Sobrien
5778556Sobrien/*---------------------------------------------------*/
5878556Sobrien#define bsNEEDW(nz)                           \
5978556Sobrien{                                             \
6078556Sobrien   while (s->bsLive >= 8) {                   \
6178556Sobrien      s->zbits[s->numZ]                       \
6278556Sobrien         = (UChar)(s->bsBuff >> 24);          \
6378556Sobrien      s->numZ++;                              \
6478556Sobrien      s->bsBuff <<= 8;                        \
6578556Sobrien      s->bsLive -= 8;                         \
6678556Sobrien   }                                          \
6778556Sobrien}
6878556Sobrien
6978556Sobrien
7078556Sobrien/*---------------------------------------------------*/
7178556Sobrienstatic
7278556Sobrien__inline__
7378556Sobrienvoid bsW ( EState* s, Int32 n, UInt32 v )
7478556Sobrien{
7578556Sobrien   bsNEEDW ( n );
7678556Sobrien   s->bsBuff |= (v << (32 - s->bsLive - n));
7778556Sobrien   s->bsLive += n;
7878556Sobrien}
7978556Sobrien
8078556Sobrien
8178556Sobrien/*---------------------------------------------------*/
8278556Sobrienstatic
8378556Sobrienvoid bsPutUInt32 ( EState* s, UInt32 u )
8478556Sobrien{
8578556Sobrien   bsW ( s, 8, (u >> 24) & 0xffL );
8678556Sobrien   bsW ( s, 8, (u >> 16) & 0xffL );
8778556Sobrien   bsW ( s, 8, (u >>  8) & 0xffL );
8878556Sobrien   bsW ( s, 8,  u        & 0xffL );
8978556Sobrien}
9078556Sobrien
9178556Sobrien
9278556Sobrien/*---------------------------------------------------*/
9378556Sobrienstatic
9478556Sobrienvoid bsPutUChar ( EState* s, UChar c )
9578556Sobrien{
9678556Sobrien   bsW( s, 8, (UInt32)c );
9778556Sobrien}
9878556Sobrien
9978556Sobrien
10078556Sobrien/*---------------------------------------------------*/
10178556Sobrien/*--- The back end proper                         ---*/
10278556Sobrien/*---------------------------------------------------*/
10378556Sobrien
10478556Sobrien/*---------------------------------------------------*/
10578556Sobrienstatic
10678556Sobrienvoid makeMaps_e ( EState* s )
10778556Sobrien{
10878556Sobrien   Int32 i;
10978556Sobrien   s->nInUse = 0;
11078556Sobrien   for (i = 0; i < 256; i++)
11178556Sobrien      if (s->inUse[i]) {
11278556Sobrien         s->unseqToSeq[i] = s->nInUse;
11378556Sobrien         s->nInUse++;
11478556Sobrien      }
11578556Sobrien}
11678556Sobrien
11778556Sobrien
11878556Sobrien/*---------------------------------------------------*/
11978556Sobrienstatic
12078556Sobrienvoid generateMTFValues ( EState* s )
12178556Sobrien{
12278556Sobrien   UChar   yy[256];
12378556Sobrien   Int32   i, j;
12478556Sobrien   Int32   zPend;
12578556Sobrien   Int32   wr;
12678556Sobrien   Int32   EOB;
12778556Sobrien
12878556Sobrien   /*
12978556Sobrien      After sorting (eg, here),
13078556Sobrien         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
13178556Sobrien         and
13278556Sobrien         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
13378556Sobrien         holds the original block data.
13478556Sobrien
13578556Sobrien      The first thing to do is generate the MTF values,
13678556Sobrien      and put them in
13778556Sobrien         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
13878556Sobrien      Because there are strictly fewer or equal MTF values
13978556Sobrien      than block values, ptr values in this area are overwritten
14078556Sobrien      with MTF values only when they are no longer needed.
14178556Sobrien
14278556Sobrien      The final compressed bitstream is generated into the
14378556Sobrien      area starting at
14478556Sobrien         (UChar*) (&((UChar*)s->arr2)[s->nblock])
14578556Sobrien
14678556Sobrien      These storage aliases are set up in bzCompressInit(),
14778556Sobrien      except for the last one, which is arranged in
14878556Sobrien      compressBlock().
14978556Sobrien   */
15078556Sobrien   UInt32* ptr   = s->ptr;
15178556Sobrien   UChar* block  = s->block;
15278556Sobrien   UInt16* mtfv  = s->mtfv;
15378556Sobrien
15478556Sobrien   makeMaps_e ( s );
15578556Sobrien   EOB = s->nInUse+1;
15678556Sobrien
15778556Sobrien   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
15878556Sobrien
15978556Sobrien   wr = 0;
16078556Sobrien   zPend = 0;
16178556Sobrien   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
16278556Sobrien
16378556Sobrien   for (i = 0; i < s->nblock; i++) {
16478556Sobrien      UChar ll_i;
16578556Sobrien      AssertD ( wr <= i, "generateMTFValues(1)" );
16678556Sobrien      j = ptr[i]-1; if (j < 0) j += s->nblock;
16778556Sobrien      ll_i = s->unseqToSeq[block[j]];
16878556Sobrien      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
16978556Sobrien
17078556Sobrien      if (yy[0] == ll_i) {
17178556Sobrien         zPend++;
17278556Sobrien      } else {
17378556Sobrien
17478556Sobrien         if (zPend > 0) {
17578556Sobrien            zPend--;
17678556Sobrien            while (True) {
17778556Sobrien               if (zPend & 1) {
17878556Sobrien                  mtfv[wr] = BZ_RUNB; wr++;
17978556Sobrien                  s->mtfFreq[BZ_RUNB]++;
18078556Sobrien               } else {
18178556Sobrien                  mtfv[wr] = BZ_RUNA; wr++;
18278556Sobrien                  s->mtfFreq[BZ_RUNA]++;
18378556Sobrien               }
18478556Sobrien               if (zPend < 2) break;
18578556Sobrien               zPend = (zPend - 2) / 2;
18678556Sobrien            };
18778556Sobrien            zPend = 0;
18878556Sobrien         }
18978556Sobrien         {
19078556Sobrien            register UChar  rtmp;
19178556Sobrien            register UChar* ryy_j;
19278556Sobrien            register UChar  rll_i;
19378556Sobrien            rtmp  = yy[1];
19478556Sobrien            yy[1] = yy[0];
19578556Sobrien            ryy_j = &(yy[1]);
19678556Sobrien            rll_i = ll_i;
19778556Sobrien            while ( rll_i != rtmp ) {
19878556Sobrien               register UChar rtmp2;
19978556Sobrien               ryy_j++;
20078556Sobrien               rtmp2  = rtmp;
20178556Sobrien               rtmp   = *ryy_j;
20278556Sobrien               *ryy_j = rtmp2;
20378556Sobrien            };
20478556Sobrien            yy[0] = rtmp;
20578556Sobrien            j = ryy_j - &(yy[0]);
20678556Sobrien            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
20778556Sobrien         }
20878556Sobrien
20978556Sobrien      }
21078556Sobrien   }
21178556Sobrien
21278556Sobrien   if (zPend > 0) {
21378556Sobrien      zPend--;
21478556Sobrien      while (True) {
21578556Sobrien         if (zPend & 1) {
21678556Sobrien            mtfv[wr] = BZ_RUNB; wr++;
21778556Sobrien            s->mtfFreq[BZ_RUNB]++;
21878556Sobrien         } else {
21978556Sobrien            mtfv[wr] = BZ_RUNA; wr++;
22078556Sobrien            s->mtfFreq[BZ_RUNA]++;
22178556Sobrien         }
22278556Sobrien         if (zPend < 2) break;
22378556Sobrien         zPend = (zPend - 2) / 2;
22478556Sobrien      };
22578556Sobrien      zPend = 0;
22678556Sobrien   }
22778556Sobrien
22878556Sobrien   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
22978556Sobrien
23078556Sobrien   s->nMTF = wr;
23178556Sobrien}
23278556Sobrien
23378556Sobrien
23478556Sobrien/*---------------------------------------------------*/
23578556Sobrien#define BZ_LESSER_ICOST  0
23678556Sobrien#define BZ_GREATER_ICOST 15
23778556Sobrien
23878556Sobrienstatic
23978556Sobrienvoid sendMTFValues ( EState* s )
24078556Sobrien{
24178556Sobrien   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
24278556Sobrien   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
24378556Sobrien   Int32 nGroups, nBytes;
24478556Sobrien
24578556Sobrien   /*--
24678556Sobrien   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
24778556Sobrien   is a global since the decoder also needs it.
24878556Sobrien
24978556Sobrien   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
25078556Sobrien   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
25178556Sobrien   are also globals only used in this proc.
25278556Sobrien   Made global to keep stack frame size small.
25378556Sobrien   --*/
25478556Sobrien
25578556Sobrien
25678556Sobrien   UInt16 cost[BZ_N_GROUPS];
25778556Sobrien   Int32  fave[BZ_N_GROUPS];
25878556Sobrien
25978556Sobrien   UInt16* mtfv = s->mtfv;
26078556Sobrien
26178556Sobrien   if (s->verbosity >= 3)
26278556Sobrien      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
26378556Sobrien                "%d+2 syms in use\n",
26478556Sobrien                s->nblock, s->nMTF, s->nInUse );
26578556Sobrien
26678556Sobrien   alphaSize = s->nInUse+2;
26778556Sobrien   for (t = 0; t < BZ_N_GROUPS; t++)
26878556Sobrien      for (v = 0; v < alphaSize; v++)
26978556Sobrien         s->len[t][v] = BZ_GREATER_ICOST;
27078556Sobrien
27178556Sobrien   /*--- Decide how many coding tables to use ---*/
27278556Sobrien   AssertH ( s->nMTF > 0, 3001 );
27378556Sobrien   if (s->nMTF < 200)  nGroups = 2; else
27478556Sobrien   if (s->nMTF < 600)  nGroups = 3; else
27578556Sobrien   if (s->nMTF < 1200) nGroups = 4; else
27678556Sobrien   if (s->nMTF < 2400) nGroups = 5; else
27778556Sobrien                       nGroups = 6;
27878556Sobrien
27978556Sobrien   /*--- Generate an initial set of coding tables ---*/
28078556Sobrien   {
28178556Sobrien      Int32 nPart, remF, tFreq, aFreq;
28278556Sobrien
28378556Sobrien      nPart = nGroups;
28478556Sobrien      remF  = s->nMTF;
28578556Sobrien      gs = 0;
28678556Sobrien      while (nPart > 0) {
28778556Sobrien         tFreq = remF / nPart;
28878556Sobrien         ge = gs-1;
28978556Sobrien         aFreq = 0;
29078556Sobrien         while (aFreq < tFreq && ge < alphaSize-1) {
29178556Sobrien            ge++;
29278556Sobrien            aFreq += s->mtfFreq[ge];
29378556Sobrien         }
29478556Sobrien
29578556Sobrien         if (ge > gs
29678556Sobrien             && nPart != nGroups && nPart != 1
29778556Sobrien             && ((nGroups-nPart) % 2 == 1)) {
29878556Sobrien            aFreq -= s->mtfFreq[ge];
29978556Sobrien            ge--;
30078556Sobrien         }
30178556Sobrien
30278556Sobrien         if (s->verbosity >= 3)
30378556Sobrien            VPrintf5( "      initial group %d, [%d .. %d], "
30478556Sobrien                      "has %d syms (%4.1f%%)\n",
30578556Sobrien                      nPart, gs, ge, aFreq,
30678556Sobrien                      (100.0 * (float)aFreq) / (float)(s->nMTF) );
30778556Sobrien
30878556Sobrien         for (v = 0; v < alphaSize; v++)
30978556Sobrien            if (v >= gs && v <= ge)
31078556Sobrien               s->len[nPart-1][v] = BZ_LESSER_ICOST; else
31178556Sobrien               s->len[nPart-1][v] = BZ_GREATER_ICOST;
31278556Sobrien
31378556Sobrien         nPart--;
31478556Sobrien         gs = ge+1;
31578556Sobrien         remF -= aFreq;
31678556Sobrien      }
31778556Sobrien   }
31878556Sobrien
31978556Sobrien   /*---
32078556Sobrien      Iterate up to BZ_N_ITERS times to improve the tables.
32178556Sobrien   ---*/
32278556Sobrien   for (iter = 0; iter < BZ_N_ITERS; iter++) {
32378556Sobrien
32478556Sobrien      for (t = 0; t < nGroups; t++) fave[t] = 0;
32578556Sobrien
32678556Sobrien      for (t = 0; t < nGroups; t++)
32778556Sobrien         for (v = 0; v < alphaSize; v++)
32878556Sobrien            s->rfreq[t][v] = 0;
32978556Sobrien
33078556Sobrien      /*---
33178556Sobrien        Set up an auxiliary length table which is used to fast-track
33278556Sobrien	the common case (nGroups == 6).
33378556Sobrien      ---*/
33478556Sobrien      if (nGroups == 6) {
33578556Sobrien         for (v = 0; v < alphaSize; v++) {
33678556Sobrien            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
33778556Sobrien            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
33878556Sobrien            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
33978556Sobrien	 }
34078556Sobrien      }
34178556Sobrien
34278556Sobrien      nSelectors = 0;
34378556Sobrien      totc = 0;
34478556Sobrien      gs = 0;
34578556Sobrien      while (True) {
34678556Sobrien
34778556Sobrien         /*--- Set group start & end marks. --*/
34878556Sobrien         if (gs >= s->nMTF) break;
34978556Sobrien         ge = gs + BZ_G_SIZE - 1;
35078556Sobrien         if (ge >= s->nMTF) ge = s->nMTF-1;
35178556Sobrien
35278556Sobrien         /*--
35378556Sobrien            Calculate the cost of this group as coded
35478556Sobrien            by each of the coding tables.
35578556Sobrien         --*/
35678556Sobrien         for (t = 0; t < nGroups; t++) cost[t] = 0;
35778556Sobrien
35878556Sobrien         if (nGroups == 6 && 50 == ge-gs+1) {
35978556Sobrien            /*--- fast track the common case ---*/
36078556Sobrien            register UInt32 cost01, cost23, cost45;
36178556Sobrien            register UInt16 icv;
36278556Sobrien            cost01 = cost23 = cost45 = 0;
36378556Sobrien
36478556Sobrien#           define BZ_ITER(nn)                \
36578556Sobrien               icv = mtfv[gs+(nn)];           \
36678556Sobrien               cost01 += s->len_pack[icv][0]; \
36778556Sobrien               cost23 += s->len_pack[icv][1]; \
36878556Sobrien               cost45 += s->len_pack[icv][2]; \
36978556Sobrien
37078556Sobrien            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
37178556Sobrien            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
37278556Sobrien            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
37378556Sobrien            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
37478556Sobrien            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
37578556Sobrien            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
37678556Sobrien            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
37778556Sobrien            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
37878556Sobrien            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
37978556Sobrien            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
38078556Sobrien
38178556Sobrien#           undef BZ_ITER
38278556Sobrien
38378556Sobrien            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
38478556Sobrien            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
38578556Sobrien            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
38678556Sobrien
38778556Sobrien         } else {
38878556Sobrien	    /*--- slow version which correctly handles all situations ---*/
38978556Sobrien            for (i = gs; i <= ge; i++) {
39078556Sobrien               UInt16 icv = mtfv[i];
39178556Sobrien               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
39278556Sobrien            }
39378556Sobrien         }
39478556Sobrien
39578556Sobrien         /*--
39678556Sobrien            Find the coding table which is best for this group,
39778556Sobrien            and record its identity in the selector table.
39878556Sobrien         --*/
39978556Sobrien         bc = 999999999; bt = -1;
40078556Sobrien         for (t = 0; t < nGroups; t++)
40178556Sobrien            if (cost[t] < bc) { bc = cost[t]; bt = t; };
40278556Sobrien         totc += bc;
40378556Sobrien         fave[bt]++;
40478556Sobrien         s->selector[nSelectors] = bt;
40578556Sobrien         nSelectors++;
40678556Sobrien
40778556Sobrien         /*--
40878556Sobrien            Increment the symbol frequencies for the selected table.
40978556Sobrien          --*/
41078556Sobrien         if (nGroups == 6 && 50 == ge-gs+1) {
41178556Sobrien            /*--- fast track the common case ---*/
41278556Sobrien
41378556Sobrien#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
41478556Sobrien
41578556Sobrien            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
41678556Sobrien            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
41778556Sobrien            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
41878556Sobrien            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
41978556Sobrien            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
42078556Sobrien            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
42178556Sobrien            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
42278556Sobrien            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
42378556Sobrien            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
42478556Sobrien            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
42578556Sobrien
42678556Sobrien#           undef BZ_ITUR
42778556Sobrien
42878556Sobrien         } else {
42978556Sobrien	    /*--- slow version which correctly handles all situations ---*/
43078556Sobrien            for (i = gs; i <= ge; i++)
43178556Sobrien               s->rfreq[bt][ mtfv[i] ]++;
43278556Sobrien         }
43378556Sobrien
43478556Sobrien         gs = ge+1;
43578556Sobrien      }
43678556Sobrien      if (s->verbosity >= 3) {
43778556Sobrien         VPrintf2 ( "      pass %d: size is %d, grp uses are ",
43878556Sobrien                   iter+1, totc/8 );
43978556Sobrien         for (t = 0; t < nGroups; t++)
44078556Sobrien            VPrintf1 ( "%d ", fave[t] );
44178556Sobrien         VPrintf0 ( "\n" );
44278556Sobrien      }
44378556Sobrien
44478556Sobrien      /*--
44578556Sobrien        Recompute the tables based on the accumulated frequencies.
44678556Sobrien      --*/
447146293Sobrien      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
448146293Sobrien         comment in huffman.c for details. */
44978556Sobrien      for (t = 0; t < nGroups; t++)
45078556Sobrien         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
451146293Sobrien                                 alphaSize, 17 /*20*/ );
45278556Sobrien   }
45378556Sobrien
45478556Sobrien
45578556Sobrien   AssertH( nGroups < 8, 3002 );
45678556Sobrien   AssertH( nSelectors < 32768 &&
45778556Sobrien            nSelectors <= (2 + (900000 / BZ_G_SIZE)),
45878556Sobrien            3003 );
45978556Sobrien
46078556Sobrien
46178556Sobrien   /*--- Compute MTF values for the selectors. ---*/
46278556Sobrien   {
46378556Sobrien      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
46478556Sobrien      for (i = 0; i < nGroups; i++) pos[i] = i;
46578556Sobrien      for (i = 0; i < nSelectors; i++) {
46678556Sobrien         ll_i = s->selector[i];
46778556Sobrien         j = 0;
46878556Sobrien         tmp = pos[j];
46978556Sobrien         while ( ll_i != tmp ) {
47078556Sobrien            j++;
47178556Sobrien            tmp2 = tmp;
47278556Sobrien            tmp = pos[j];
47378556Sobrien            pos[j] = tmp2;
47478556Sobrien         };
47578556Sobrien         pos[0] = tmp;
47678556Sobrien         s->selectorMtf[i] = j;
47778556Sobrien      }
47878556Sobrien   };
47978556Sobrien
48078556Sobrien   /*--- Assign actual codes for the tables. --*/
48178556Sobrien   for (t = 0; t < nGroups; t++) {
48278556Sobrien      minLen = 32;
48378556Sobrien      maxLen = 0;
48478556Sobrien      for (i = 0; i < alphaSize; i++) {
48578556Sobrien         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
48678556Sobrien         if (s->len[t][i] < minLen) minLen = s->len[t][i];
48778556Sobrien      }
488146293Sobrien      AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
48978556Sobrien      AssertH ( !(minLen < 1),  3005 );
49078556Sobrien      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
49178556Sobrien                          minLen, maxLen, alphaSize );
49278556Sobrien   }
49378556Sobrien
49478556Sobrien   /*--- Transmit the mapping table. ---*/
49578556Sobrien   {
49678556Sobrien      Bool inUse16[16];
49778556Sobrien      for (i = 0; i < 16; i++) {
49878556Sobrien          inUse16[i] = False;
49978556Sobrien          for (j = 0; j < 16; j++)
50078556Sobrien             if (s->inUse[i * 16 + j]) inUse16[i] = True;
50178556Sobrien      }
50278556Sobrien
50378556Sobrien      nBytes = s->numZ;
50478556Sobrien      for (i = 0; i < 16; i++)
50578556Sobrien         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
50678556Sobrien
50778556Sobrien      for (i = 0; i < 16; i++)
50878556Sobrien         if (inUse16[i])
50978556Sobrien            for (j = 0; j < 16; j++) {
51078556Sobrien               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
51178556Sobrien            }
51278556Sobrien
51378556Sobrien      if (s->verbosity >= 3)
51478556Sobrien         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
51578556Sobrien   }
51678556Sobrien
51778556Sobrien   /*--- Now the selectors. ---*/
51878556Sobrien   nBytes = s->numZ;
51978556Sobrien   bsW ( s, 3, nGroups );
52078556Sobrien   bsW ( s, 15, nSelectors );
52178556Sobrien   for (i = 0; i < nSelectors; i++) {
52278556Sobrien      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
52378556Sobrien      bsW(s,1,0);
52478556Sobrien   }
52578556Sobrien   if (s->verbosity >= 3)
52678556Sobrien      VPrintf1( "selectors %d, ", s->numZ-nBytes );
52778556Sobrien
52878556Sobrien   /*--- Now the coding tables. ---*/
52978556Sobrien   nBytes = s->numZ;
53078556Sobrien
53178556Sobrien   for (t = 0; t < nGroups; t++) {
53278556Sobrien      Int32 curr = s->len[t][0];
53378556Sobrien      bsW ( s, 5, curr );
53478556Sobrien      for (i = 0; i < alphaSize; i++) {
53578556Sobrien         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
53678556Sobrien         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
53778556Sobrien         bsW ( s, 1, 0 );
53878556Sobrien      }
53978556Sobrien   }
54078556Sobrien
54178556Sobrien   if (s->verbosity >= 3)
54278556Sobrien      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
54378556Sobrien
54478556Sobrien   /*--- And finally, the block data proper ---*/
54578556Sobrien   nBytes = s->numZ;
54678556Sobrien   selCtr = 0;
54778556Sobrien   gs = 0;
54878556Sobrien   while (True) {
54978556Sobrien      if (gs >= s->nMTF) break;
55078556Sobrien      ge = gs + BZ_G_SIZE - 1;
55178556Sobrien      if (ge >= s->nMTF) ge = s->nMTF-1;
55278556Sobrien      AssertH ( s->selector[selCtr] < nGroups, 3006 );
55378556Sobrien
55478556Sobrien      if (nGroups == 6 && 50 == ge-gs+1) {
55578556Sobrien            /*--- fast track the common case ---*/
55678556Sobrien            UInt16 mtfv_i;
55778556Sobrien            UChar* s_len_sel_selCtr
55878556Sobrien               = &(s->len[s->selector[selCtr]][0]);
55978556Sobrien            Int32* s_code_sel_selCtr
56078556Sobrien               = &(s->code[s->selector[selCtr]][0]);
56178556Sobrien
56278556Sobrien#           define BZ_ITAH(nn)                      \
56378556Sobrien               mtfv_i = mtfv[gs+(nn)];              \
56478556Sobrien               bsW ( s,                             \
56578556Sobrien                     s_len_sel_selCtr[mtfv_i],      \
56678556Sobrien                     s_code_sel_selCtr[mtfv_i] )
56778556Sobrien
56878556Sobrien            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
56978556Sobrien            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
57078556Sobrien            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
57178556Sobrien            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
57278556Sobrien            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
57378556Sobrien            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
57478556Sobrien            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
57578556Sobrien            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
57678556Sobrien            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
57778556Sobrien            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
57878556Sobrien
57978556Sobrien#           undef BZ_ITAH
58078556Sobrien
58178556Sobrien      } else {
58278556Sobrien	 /*--- slow version which correctly handles all situations ---*/
58378556Sobrien         for (i = gs; i <= ge; i++) {
58478556Sobrien            bsW ( s,
58578556Sobrien                  s->len  [s->selector[selCtr]] [mtfv[i]],
58678556Sobrien                  s->code [s->selector[selCtr]] [mtfv[i]] );
58778556Sobrien         }
58878556Sobrien      }
58978556Sobrien
59078556Sobrien
59178556Sobrien      gs = ge+1;
59278556Sobrien      selCtr++;
59378556Sobrien   }
59478556Sobrien   AssertH( selCtr == nSelectors, 3007 );
59578556Sobrien
59678556Sobrien   if (s->verbosity >= 3)
59778556Sobrien      VPrintf1( "codes %d\n", s->numZ-nBytes );
59878556Sobrien}
59978556Sobrien
60078556Sobrien
60178556Sobrien/*---------------------------------------------------*/
60278556Sobrienvoid BZ2_compressBlock ( EState* s, Bool is_last_block )
60378556Sobrien{
60478556Sobrien   if (s->nblock > 0) {
60578556Sobrien
60678556Sobrien      BZ_FINALISE_CRC ( s->blockCRC );
60778556Sobrien      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
60878556Sobrien      s->combinedCRC ^= s->blockCRC;
60978556Sobrien      if (s->blockNo > 1) s->numZ = 0;
61078556Sobrien
61178556Sobrien      if (s->verbosity >= 2)
612146293Sobrien         VPrintf4( "    block %d: crc = 0x%08x, "
613146293Sobrien                   "combined CRC = 0x%08x, size = %d\n",
61478556Sobrien                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
61578556Sobrien
61678556Sobrien      BZ2_blockSort ( s );
61778556Sobrien   }
61878556Sobrien
61978556Sobrien   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
62078556Sobrien
62178556Sobrien   /*-- If this is the first block, create the stream header. --*/
62278556Sobrien   if (s->blockNo == 1) {
62378556Sobrien      BZ2_bsInitWrite ( s );
62490067Ssobomax      bsPutUChar ( s, BZ_HDR_B );
62590067Ssobomax      bsPutUChar ( s, BZ_HDR_Z );
62690067Ssobomax      bsPutUChar ( s, BZ_HDR_h );
62790067Ssobomax      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
62878556Sobrien   }
62978556Sobrien
63078556Sobrien   if (s->nblock > 0) {
63178556Sobrien
63278556Sobrien      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
63378556Sobrien      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
63478556Sobrien      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
63578556Sobrien
63678556Sobrien      /*-- Now the block's CRC, so it is in a known place. --*/
63778556Sobrien      bsPutUInt32 ( s, s->blockCRC );
63878556Sobrien
63978556Sobrien      /*--
64078556Sobrien         Now a single bit indicating (non-)randomisation.
64178556Sobrien         As of version 0.9.5, we use a better sorting algorithm
64278556Sobrien         which makes randomisation unnecessary.  So always set
64378556Sobrien         the randomised bit to 'no'.  Of course, the decoder
64478556Sobrien         still needs to be able to handle randomised blocks
64578556Sobrien         so as to maintain backwards compatibility with
64678556Sobrien         older versions of bzip2.
64778556Sobrien      --*/
64878556Sobrien      bsW(s,1,0);
64978556Sobrien
65078556Sobrien      bsW ( s, 24, s->origPtr );
65178556Sobrien      generateMTFValues ( s );
65278556Sobrien      sendMTFValues ( s );
65378556Sobrien   }
65478556Sobrien
65578556Sobrien
65678556Sobrien   /*-- If this is the last block, add the stream trailer. --*/
65778556Sobrien   if (is_last_block) {
65878556Sobrien
65978556Sobrien      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
66078556Sobrien      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
66178556Sobrien      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
66278556Sobrien      bsPutUInt32 ( s, s->combinedCRC );
66378556Sobrien      if (s->verbosity >= 2)
664146293Sobrien         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
66578556Sobrien      bsFinishWrite ( s );
66678556Sobrien   }
66778556Sobrien}
66878556Sobrien
66978556Sobrien
67078556Sobrien/*-------------------------------------------------------------*/
67178556Sobrien/*--- end                                        compress.c ---*/
67278556Sobrien/*-------------------------------------------------------------*/
673