178556Sobrien
278556Sobrien/*-------------------------------------------------------------*/
378556Sobrien/*--- Huffman coding low-level stuff                        ---*/
478556Sobrien/*---                                             huffman.c ---*/
578556Sobrien/*-------------------------------------------------------------*/
678556Sobrien
7167974Sdelphij/* ------------------------------------------------------------------
8167974Sdelphij   This file is part of bzip2/libbzip2, a program and library for
9167974Sdelphij   lossless, block-sorting data compression.
1078556Sobrien
11215041Sobrien   bzip2/libbzip2 version 1.0.6 of 6 September 2010
12215041Sobrien   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
1378556Sobrien
14167974Sdelphij   Please read the WARNING, DISCLAIMER and PATENTS sections in the
15167974Sdelphij   README file.
1678556Sobrien
17167974Sdelphij   This program is released under the terms of the license contained
18167974Sdelphij   in the file LICENSE.
19167974Sdelphij   ------------------------------------------------------------------ */
2078556Sobrien
2178556Sobrien
2278556Sobrien#include "bzlib_private.h"
2378556Sobrien
2478556Sobrien/*---------------------------------------------------*/
2578556Sobrien#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
2678556Sobrien#define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
2778556Sobrien#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
2878556Sobrien
2978556Sobrien#define ADDWEIGHTS(zw1,zw2)                           \
3078556Sobrien   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
3178556Sobrien   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
3278556Sobrien
3378556Sobrien#define UPHEAP(z)                                     \
3478556Sobrien{                                                     \
3578556Sobrien   Int32 zz, tmp;                                     \
3678556Sobrien   zz = z; tmp = heap[zz];                            \
3778556Sobrien   while (weight[tmp] < weight[heap[zz >> 1]]) {      \
3878556Sobrien      heap[zz] = heap[zz >> 1];                       \
3978556Sobrien      zz >>= 1;                                       \
4078556Sobrien   }                                                  \
4178556Sobrien   heap[zz] = tmp;                                    \
4278556Sobrien}
4378556Sobrien
4478556Sobrien#define DOWNHEAP(z)                                   \
4578556Sobrien{                                                     \
4678556Sobrien   Int32 zz, yy, tmp;                                 \
4778556Sobrien   zz = z; tmp = heap[zz];                            \
4878556Sobrien   while (True) {                                     \
4978556Sobrien      yy = zz << 1;                                   \
5078556Sobrien      if (yy > nHeap) break;                          \
5178556Sobrien      if (yy < nHeap &&                               \
5278556Sobrien          weight[heap[yy+1]] < weight[heap[yy]])      \
5378556Sobrien         yy++;                                        \
5478556Sobrien      if (weight[tmp] < weight[heap[yy]]) break;      \
5578556Sobrien      heap[zz] = heap[yy];                            \
5678556Sobrien      zz = yy;                                        \
5778556Sobrien   }                                                  \
5878556Sobrien   heap[zz] = tmp;                                    \
5978556Sobrien}
6078556Sobrien
6178556Sobrien
6278556Sobrien/*---------------------------------------------------*/
6378556Sobrienvoid BZ2_hbMakeCodeLengths ( UChar *len,
6478556Sobrien                             Int32 *freq,
6578556Sobrien                             Int32 alphaSize,
6678556Sobrien                             Int32 maxLen )
6778556Sobrien{
6878556Sobrien   /*--
6978556Sobrien      Nodes and heap entries run from 1.  Entry 0
7078556Sobrien      for both the heap and nodes is a sentinel.
7178556Sobrien   --*/
7278556Sobrien   Int32 nNodes, nHeap, n1, n2, i, j, k;
7378556Sobrien   Bool  tooLong;
7478556Sobrien
7578556Sobrien   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
7678556Sobrien   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
7778556Sobrien   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
7878556Sobrien
7978556Sobrien   for (i = 0; i < alphaSize; i++)
8078556Sobrien      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
8178556Sobrien
8278556Sobrien   while (True) {
8378556Sobrien
8478556Sobrien      nNodes = alphaSize;
8578556Sobrien      nHeap = 0;
8678556Sobrien
8778556Sobrien      heap[0] = 0;
8878556Sobrien      weight[0] = 0;
8978556Sobrien      parent[0] = -2;
9078556Sobrien
9178556Sobrien      for (i = 1; i <= alphaSize; i++) {
9278556Sobrien         parent[i] = -1;
9378556Sobrien         nHeap++;
9478556Sobrien         heap[nHeap] = i;
9578556Sobrien         UPHEAP(nHeap);
9678556Sobrien      }
9778556Sobrien
9878556Sobrien      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
9978556Sobrien
10078556Sobrien      while (nHeap > 1) {
10178556Sobrien         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
10278556Sobrien         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
10378556Sobrien         nNodes++;
10478556Sobrien         parent[n1] = parent[n2] = nNodes;
10578556Sobrien         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
10678556Sobrien         parent[nNodes] = -1;
10778556Sobrien         nHeap++;
10878556Sobrien         heap[nHeap] = nNodes;
10978556Sobrien         UPHEAP(nHeap);
11078556Sobrien      }
11178556Sobrien
11278556Sobrien      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
11378556Sobrien
11478556Sobrien      tooLong = False;
11578556Sobrien      for (i = 1; i <= alphaSize; i++) {
11678556Sobrien         j = 0;
11778556Sobrien         k = i;
11878556Sobrien         while (parent[k] >= 0) { k = parent[k]; j++; }
11978556Sobrien         len[i-1] = j;
12078556Sobrien         if (j > maxLen) tooLong = True;
12178556Sobrien      }
12278556Sobrien
12378556Sobrien      if (! tooLong) break;
12478556Sobrien
125146293Sobrien      /* 17 Oct 04: keep-going condition for the following loop used
126146293Sobrien         to be 'i < alphaSize', which missed the last element,
127146293Sobrien         theoretically leading to the possibility of the compressor
128146293Sobrien         looping.  However, this count-scaling step is only needed if
129146293Sobrien         one of the generated Huffman code words is longer than
130146293Sobrien         maxLen, which up to and including version 1.0.2 was 20 bits,
131146293Sobrien         which is extremely unlikely.  In version 1.0.3 maxLen was
132146293Sobrien         changed to 17 bits, which has minimal effect on compression
133146293Sobrien         ratio, but does mean this scaling step is used from time to
134146293Sobrien         time, enough to verify that it works.
135146293Sobrien
136146293Sobrien         This means that bzip2-1.0.3 and later will only produce
137146293Sobrien         Huffman codes with a maximum length of 17 bits.  However, in
138146293Sobrien         order to preserve backwards compatibility with bitstreams
139146293Sobrien         produced by versions pre-1.0.3, the decompressor must still
140146293Sobrien         handle lengths of up to 20. */
141146293Sobrien
142146293Sobrien      for (i = 1; i <= alphaSize; i++) {
14378556Sobrien         j = weight[i] >> 8;
14478556Sobrien         j = 1 + (j / 2);
14578556Sobrien         weight[i] = j << 8;
14678556Sobrien      }
14778556Sobrien   }
14878556Sobrien}
14978556Sobrien
15078556Sobrien
15178556Sobrien/*---------------------------------------------------*/
15278556Sobrienvoid BZ2_hbAssignCodes ( Int32 *code,
15378556Sobrien                         UChar *length,
15478556Sobrien                         Int32 minLen,
15578556Sobrien                         Int32 maxLen,
15678556Sobrien                         Int32 alphaSize )
15778556Sobrien{
15878556Sobrien   Int32 n, vec, i;
15978556Sobrien
16078556Sobrien   vec = 0;
16178556Sobrien   for (n = minLen; n <= maxLen; n++) {
16278556Sobrien      for (i = 0; i < alphaSize; i++)
16378556Sobrien         if (length[i] == n) { code[i] = vec; vec++; };
16478556Sobrien      vec <<= 1;
16578556Sobrien   }
16678556Sobrien}
16778556Sobrien
16878556Sobrien
16978556Sobrien/*---------------------------------------------------*/
17078556Sobrienvoid BZ2_hbCreateDecodeTables ( Int32 *limit,
17178556Sobrien                                Int32 *base,
17278556Sobrien                                Int32 *perm,
17378556Sobrien                                UChar *length,
17478556Sobrien                                Int32 minLen,
17578556Sobrien                                Int32 maxLen,
17678556Sobrien                                Int32 alphaSize )
17778556Sobrien{
17878556Sobrien   Int32 pp, i, j, vec;
17978556Sobrien
18078556Sobrien   pp = 0;
18178556Sobrien   for (i = minLen; i <= maxLen; i++)
18278556Sobrien      for (j = 0; j < alphaSize; j++)
18378556Sobrien         if (length[j] == i) { perm[pp] = j; pp++; };
18478556Sobrien
18578556Sobrien   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
18678556Sobrien   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
18778556Sobrien
18878556Sobrien   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
18978556Sobrien
19078556Sobrien   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
19178556Sobrien   vec = 0;
19278556Sobrien
19378556Sobrien   for (i = minLen; i <= maxLen; i++) {
19478556Sobrien      vec += (base[i+1] - base[i]);
19578556Sobrien      limit[i] = vec-1;
19678556Sobrien      vec <<= 1;
19778556Sobrien   }
19878556Sobrien   for (i = minLen + 1; i <= maxLen; i++)
19978556Sobrien      base[i] = ((limit[i-1] + 1) << 1) - base[i];
20078556Sobrien}
20178556Sobrien
20278556Sobrien
20378556Sobrien/*-------------------------------------------------------------*/
20478556Sobrien/*--- end                                         huffman.c ---*/
20578556Sobrien/*-------------------------------------------------------------*/
206