178556Sobrien 278556Sobrien/*-------------------------------------------------------------*/ 378556Sobrien/*--- Huffman coding low-level stuff ---*/ 478556Sobrien/*--- huffman.c ---*/ 578556Sobrien/*-------------------------------------------------------------*/ 678556Sobrien 7167974Sdelphij/* ------------------------------------------------------------------ 8167974Sdelphij This file is part of bzip2/libbzip2, a program and library for 9167974Sdelphij lossless, block-sorting data compression. 1078556Sobrien 11215041Sobrien bzip2/libbzip2 version 1.0.6 of 6 September 2010 12215041Sobrien Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 1378556Sobrien 14167974Sdelphij Please read the WARNING, DISCLAIMER and PATENTS sections in the 15167974Sdelphij README file. 1678556Sobrien 17167974Sdelphij This program is released under the terms of the license contained 18167974Sdelphij in the file LICENSE. 19167974Sdelphij ------------------------------------------------------------------ */ 2078556Sobrien 2178556Sobrien 2278556Sobrien#include "bzlib_private.h" 2378556Sobrien 2478556Sobrien/*---------------------------------------------------*/ 2578556Sobrien#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 2678556Sobrien#define DEPTHOF(zz1) ((zz1) & 0x000000ff) 2778556Sobrien#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 2878556Sobrien 2978556Sobrien#define ADDWEIGHTS(zw1,zw2) \ 3078556Sobrien (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 3178556Sobrien (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 3278556Sobrien 3378556Sobrien#define UPHEAP(z) \ 3478556Sobrien{ \ 3578556Sobrien Int32 zz, tmp; \ 3678556Sobrien zz = z; tmp = heap[zz]; \ 3778556Sobrien while (weight[tmp] < weight[heap[zz >> 1]]) { \ 3878556Sobrien heap[zz] = heap[zz >> 1]; \ 3978556Sobrien zz >>= 1; \ 4078556Sobrien } \ 4178556Sobrien heap[zz] = tmp; \ 4278556Sobrien} 4378556Sobrien 4478556Sobrien#define DOWNHEAP(z) \ 4578556Sobrien{ \ 4678556Sobrien Int32 zz, yy, tmp; \ 4778556Sobrien zz = z; tmp = heap[zz]; \ 4878556Sobrien while (True) { \ 4978556Sobrien yy = zz << 1; \ 5078556Sobrien if (yy > nHeap) break; \ 5178556Sobrien if (yy < nHeap && \ 5278556Sobrien weight[heap[yy+1]] < weight[heap[yy]]) \ 5378556Sobrien yy++; \ 5478556Sobrien if (weight[tmp] < weight[heap[yy]]) break; \ 5578556Sobrien heap[zz] = heap[yy]; \ 5678556Sobrien zz = yy; \ 5778556Sobrien } \ 5878556Sobrien heap[zz] = tmp; \ 5978556Sobrien} 6078556Sobrien 6178556Sobrien 6278556Sobrien/*---------------------------------------------------*/ 6378556Sobrienvoid BZ2_hbMakeCodeLengths ( UChar *len, 6478556Sobrien Int32 *freq, 6578556Sobrien Int32 alphaSize, 6678556Sobrien Int32 maxLen ) 6778556Sobrien{ 6878556Sobrien /*-- 6978556Sobrien Nodes and heap entries run from 1. Entry 0 7078556Sobrien for both the heap and nodes is a sentinel. 7178556Sobrien --*/ 7278556Sobrien Int32 nNodes, nHeap, n1, n2, i, j, k; 7378556Sobrien Bool tooLong; 7478556Sobrien 7578556Sobrien Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 7678556Sobrien Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 7778556Sobrien Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 7878556Sobrien 7978556Sobrien for (i = 0; i < alphaSize; i++) 8078556Sobrien weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 8178556Sobrien 8278556Sobrien while (True) { 8378556Sobrien 8478556Sobrien nNodes = alphaSize; 8578556Sobrien nHeap = 0; 8678556Sobrien 8778556Sobrien heap[0] = 0; 8878556Sobrien weight[0] = 0; 8978556Sobrien parent[0] = -2; 9078556Sobrien 9178556Sobrien for (i = 1; i <= alphaSize; i++) { 9278556Sobrien parent[i] = -1; 9378556Sobrien nHeap++; 9478556Sobrien heap[nHeap] = i; 9578556Sobrien UPHEAP(nHeap); 9678556Sobrien } 9778556Sobrien 9878556Sobrien AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 9978556Sobrien 10078556Sobrien while (nHeap > 1) { 10178556Sobrien n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 10278556Sobrien n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 10378556Sobrien nNodes++; 10478556Sobrien parent[n1] = parent[n2] = nNodes; 10578556Sobrien weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 10678556Sobrien parent[nNodes] = -1; 10778556Sobrien nHeap++; 10878556Sobrien heap[nHeap] = nNodes; 10978556Sobrien UPHEAP(nHeap); 11078556Sobrien } 11178556Sobrien 11278556Sobrien AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 11378556Sobrien 11478556Sobrien tooLong = False; 11578556Sobrien for (i = 1; i <= alphaSize; i++) { 11678556Sobrien j = 0; 11778556Sobrien k = i; 11878556Sobrien while (parent[k] >= 0) { k = parent[k]; j++; } 11978556Sobrien len[i-1] = j; 12078556Sobrien if (j > maxLen) tooLong = True; 12178556Sobrien } 12278556Sobrien 12378556Sobrien if (! tooLong) break; 12478556Sobrien 125146293Sobrien /* 17 Oct 04: keep-going condition for the following loop used 126146293Sobrien to be 'i < alphaSize', which missed the last element, 127146293Sobrien theoretically leading to the possibility of the compressor 128146293Sobrien looping. However, this count-scaling step is only needed if 129146293Sobrien one of the generated Huffman code words is longer than 130146293Sobrien maxLen, which up to and including version 1.0.2 was 20 bits, 131146293Sobrien which is extremely unlikely. In version 1.0.3 maxLen was 132146293Sobrien changed to 17 bits, which has minimal effect on compression 133146293Sobrien ratio, but does mean this scaling step is used from time to 134146293Sobrien time, enough to verify that it works. 135146293Sobrien 136146293Sobrien This means that bzip2-1.0.3 and later will only produce 137146293Sobrien Huffman codes with a maximum length of 17 bits. However, in 138146293Sobrien order to preserve backwards compatibility with bitstreams 139146293Sobrien produced by versions pre-1.0.3, the decompressor must still 140146293Sobrien handle lengths of up to 20. */ 141146293Sobrien 142146293Sobrien for (i = 1; i <= alphaSize; i++) { 14378556Sobrien j = weight[i] >> 8; 14478556Sobrien j = 1 + (j / 2); 14578556Sobrien weight[i] = j << 8; 14678556Sobrien } 14778556Sobrien } 14878556Sobrien} 14978556Sobrien 15078556Sobrien 15178556Sobrien/*---------------------------------------------------*/ 15278556Sobrienvoid BZ2_hbAssignCodes ( Int32 *code, 15378556Sobrien UChar *length, 15478556Sobrien Int32 minLen, 15578556Sobrien Int32 maxLen, 15678556Sobrien Int32 alphaSize ) 15778556Sobrien{ 15878556Sobrien Int32 n, vec, i; 15978556Sobrien 16078556Sobrien vec = 0; 16178556Sobrien for (n = minLen; n <= maxLen; n++) { 16278556Sobrien for (i = 0; i < alphaSize; i++) 16378556Sobrien if (length[i] == n) { code[i] = vec; vec++; }; 16478556Sobrien vec <<= 1; 16578556Sobrien } 16678556Sobrien} 16778556Sobrien 16878556Sobrien 16978556Sobrien/*---------------------------------------------------*/ 17078556Sobrienvoid BZ2_hbCreateDecodeTables ( Int32 *limit, 17178556Sobrien Int32 *base, 17278556Sobrien Int32 *perm, 17378556Sobrien UChar *length, 17478556Sobrien Int32 minLen, 17578556Sobrien Int32 maxLen, 17678556Sobrien Int32 alphaSize ) 17778556Sobrien{ 17878556Sobrien Int32 pp, i, j, vec; 17978556Sobrien 18078556Sobrien pp = 0; 18178556Sobrien for (i = minLen; i <= maxLen; i++) 18278556Sobrien for (j = 0; j < alphaSize; j++) 18378556Sobrien if (length[j] == i) { perm[pp] = j; pp++; }; 18478556Sobrien 18578556Sobrien for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 18678556Sobrien for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 18778556Sobrien 18878556Sobrien for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 18978556Sobrien 19078556Sobrien for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 19178556Sobrien vec = 0; 19278556Sobrien 19378556Sobrien for (i = minLen; i <= maxLen; i++) { 19478556Sobrien vec += (base[i+1] - base[i]); 19578556Sobrien limit[i] = vec-1; 19678556Sobrien vec <<= 1; 19778556Sobrien } 19878556Sobrien for (i = minLen + 1; i <= maxLen; i++) 19978556Sobrien base[i] = ((limit[i-1] + 1) << 1) - base[i]; 20078556Sobrien} 20178556Sobrien 20278556Sobrien 20378556Sobrien/*-------------------------------------------------------------*/ 20478556Sobrien/*--- end huffman.c ---*/ 20578556Sobrien/*-------------------------------------------------------------*/ 206