huffman.c revision 78556
178556Sobrien 278556Sobrien/*-------------------------------------------------------------*/ 378556Sobrien/*--- Huffman coding low-level stuff ---*/ 478556Sobrien/*--- huffman.c ---*/ 578556Sobrien/*-------------------------------------------------------------*/ 678556Sobrien 778556Sobrien/*-- 878556Sobrien This file is a part of bzip2 and/or libbzip2, a program and 978556Sobrien library for lossless, block-sorting data compression. 1078556Sobrien 1178556Sobrien Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 1278556Sobrien 1378556Sobrien Redistribution and use in source and binary forms, with or without 1478556Sobrien modification, are permitted provided that the following conditions 1578556Sobrien are met: 1678556Sobrien 1778556Sobrien 1. Redistributions of source code must retain the above copyright 1878556Sobrien notice, this list of conditions and the following disclaimer. 1978556Sobrien 2078556Sobrien 2. The origin of this software must not be misrepresented; you must 2178556Sobrien not claim that you wrote the original software. If you use this 2278556Sobrien software in a product, an acknowledgment in the product 2378556Sobrien documentation would be appreciated but is not required. 2478556Sobrien 2578556Sobrien 3. Altered source versions must be plainly marked as such, and must 2678556Sobrien not be misrepresented as being the original software. 2778556Sobrien 2878556Sobrien 4. The name of the author may not be used to endorse or promote 2978556Sobrien products derived from this software without specific prior written 3078556Sobrien permission. 3178556Sobrien 3278556Sobrien THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3378556Sobrien OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3478556Sobrien WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3578556Sobrien ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3678556Sobrien DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3778556Sobrien DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3878556Sobrien GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3978556Sobrien INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4078556Sobrien WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4178556Sobrien NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4278556Sobrien SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4378556Sobrien 4478556Sobrien Julian Seward, Cambridge, UK. 4578556Sobrien jseward@acm.org 4678556Sobrien bzip2/libbzip2 version 1.0 of 21 March 2000 4778556Sobrien 4878556Sobrien This program is based on (at least) the work of: 4978556Sobrien Mike Burrows 5078556Sobrien David Wheeler 5178556Sobrien Peter Fenwick 5278556Sobrien Alistair Moffat 5378556Sobrien Radford Neal 5478556Sobrien Ian H. Witten 5578556Sobrien Robert Sedgewick 5678556Sobrien Jon L. Bentley 5778556Sobrien 5878556Sobrien For more information on these sources, see the manual. 5978556Sobrien--*/ 6078556Sobrien 6178556Sobrien 6278556Sobrien#include "bzlib_private.h" 6378556Sobrien 6478556Sobrien/*---------------------------------------------------*/ 6578556Sobrien#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 6678556Sobrien#define DEPTHOF(zz1) ((zz1) & 0x000000ff) 6778556Sobrien#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 6878556Sobrien 6978556Sobrien#define ADDWEIGHTS(zw1,zw2) \ 7078556Sobrien (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 7178556Sobrien (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 7278556Sobrien 7378556Sobrien#define UPHEAP(z) \ 7478556Sobrien{ \ 7578556Sobrien Int32 zz, tmp; \ 7678556Sobrien zz = z; tmp = heap[zz]; \ 7778556Sobrien while (weight[tmp] < weight[heap[zz >> 1]]) { \ 7878556Sobrien heap[zz] = heap[zz >> 1]; \ 7978556Sobrien zz >>= 1; \ 8078556Sobrien } \ 8178556Sobrien heap[zz] = tmp; \ 8278556Sobrien} 8378556Sobrien 8478556Sobrien#define DOWNHEAP(z) \ 8578556Sobrien{ \ 8678556Sobrien Int32 zz, yy, tmp; \ 8778556Sobrien zz = z; tmp = heap[zz]; \ 8878556Sobrien while (True) { \ 8978556Sobrien yy = zz << 1; \ 9078556Sobrien if (yy > nHeap) break; \ 9178556Sobrien if (yy < nHeap && \ 9278556Sobrien weight[heap[yy+1]] < weight[heap[yy]]) \ 9378556Sobrien yy++; \ 9478556Sobrien if (weight[tmp] < weight[heap[yy]]) break; \ 9578556Sobrien heap[zz] = heap[yy]; \ 9678556Sobrien zz = yy; \ 9778556Sobrien } \ 9878556Sobrien heap[zz] = tmp; \ 9978556Sobrien} 10078556Sobrien 10178556Sobrien 10278556Sobrien/*---------------------------------------------------*/ 10378556Sobrienvoid BZ2_hbMakeCodeLengths ( UChar *len, 10478556Sobrien Int32 *freq, 10578556Sobrien Int32 alphaSize, 10678556Sobrien Int32 maxLen ) 10778556Sobrien{ 10878556Sobrien /*-- 10978556Sobrien Nodes and heap entries run from 1. Entry 0 11078556Sobrien for both the heap and nodes is a sentinel. 11178556Sobrien --*/ 11278556Sobrien Int32 nNodes, nHeap, n1, n2, i, j, k; 11378556Sobrien Bool tooLong; 11478556Sobrien 11578556Sobrien Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 11678556Sobrien Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 11778556Sobrien Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 11878556Sobrien 11978556Sobrien for (i = 0; i < alphaSize; i++) 12078556Sobrien weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 12178556Sobrien 12278556Sobrien while (True) { 12378556Sobrien 12478556Sobrien nNodes = alphaSize; 12578556Sobrien nHeap = 0; 12678556Sobrien 12778556Sobrien heap[0] = 0; 12878556Sobrien weight[0] = 0; 12978556Sobrien parent[0] = -2; 13078556Sobrien 13178556Sobrien for (i = 1; i <= alphaSize; i++) { 13278556Sobrien parent[i] = -1; 13378556Sobrien nHeap++; 13478556Sobrien heap[nHeap] = i; 13578556Sobrien UPHEAP(nHeap); 13678556Sobrien } 13778556Sobrien 13878556Sobrien AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 13978556Sobrien 14078556Sobrien while (nHeap > 1) { 14178556Sobrien n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 14278556Sobrien n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 14378556Sobrien nNodes++; 14478556Sobrien parent[n1] = parent[n2] = nNodes; 14578556Sobrien weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 14678556Sobrien parent[nNodes] = -1; 14778556Sobrien nHeap++; 14878556Sobrien heap[nHeap] = nNodes; 14978556Sobrien UPHEAP(nHeap); 15078556Sobrien } 15178556Sobrien 15278556Sobrien AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 15378556Sobrien 15478556Sobrien tooLong = False; 15578556Sobrien for (i = 1; i <= alphaSize; i++) { 15678556Sobrien j = 0; 15778556Sobrien k = i; 15878556Sobrien while (parent[k] >= 0) { k = parent[k]; j++; } 15978556Sobrien len[i-1] = j; 16078556Sobrien if (j > maxLen) tooLong = True; 16178556Sobrien } 16278556Sobrien 16378556Sobrien if (! tooLong) break; 16478556Sobrien 16578556Sobrien for (i = 1; i < alphaSize; i++) { 16678556Sobrien j = weight[i] >> 8; 16778556Sobrien j = 1 + (j / 2); 16878556Sobrien weight[i] = j << 8; 16978556Sobrien } 17078556Sobrien } 17178556Sobrien} 17278556Sobrien 17378556Sobrien 17478556Sobrien/*---------------------------------------------------*/ 17578556Sobrienvoid BZ2_hbAssignCodes ( Int32 *code, 17678556Sobrien UChar *length, 17778556Sobrien Int32 minLen, 17878556Sobrien Int32 maxLen, 17978556Sobrien Int32 alphaSize ) 18078556Sobrien{ 18178556Sobrien Int32 n, vec, i; 18278556Sobrien 18378556Sobrien vec = 0; 18478556Sobrien for (n = minLen; n <= maxLen; n++) { 18578556Sobrien for (i = 0; i < alphaSize; i++) 18678556Sobrien if (length[i] == n) { code[i] = vec; vec++; }; 18778556Sobrien vec <<= 1; 18878556Sobrien } 18978556Sobrien} 19078556Sobrien 19178556Sobrien 19278556Sobrien/*---------------------------------------------------*/ 19378556Sobrienvoid BZ2_hbCreateDecodeTables ( Int32 *limit, 19478556Sobrien Int32 *base, 19578556Sobrien Int32 *perm, 19678556Sobrien UChar *length, 19778556Sobrien Int32 minLen, 19878556Sobrien Int32 maxLen, 19978556Sobrien Int32 alphaSize ) 20078556Sobrien{ 20178556Sobrien Int32 pp, i, j, vec; 20278556Sobrien 20378556Sobrien pp = 0; 20478556Sobrien for (i = minLen; i <= maxLen; i++) 20578556Sobrien for (j = 0; j < alphaSize; j++) 20678556Sobrien if (length[j] == i) { perm[pp] = j; pp++; }; 20778556Sobrien 20878556Sobrien for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 20978556Sobrien for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 21078556Sobrien 21178556Sobrien for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 21278556Sobrien 21378556Sobrien for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 21478556Sobrien vec = 0; 21578556Sobrien 21678556Sobrien for (i = minLen; i <= maxLen; i++) { 21778556Sobrien vec += (base[i+1] - base[i]); 21878556Sobrien limit[i] = vec-1; 21978556Sobrien vec <<= 1; 22078556Sobrien } 22178556Sobrien for (i = minLen + 1; i <= maxLen; i++) 22278556Sobrien base[i] = ((limit[i-1] + 1) << 1) - base[i]; 22378556Sobrien} 22478556Sobrien 22578556Sobrien 22678556Sobrien/*-------------------------------------------------------------*/ 22778556Sobrien/*--- end huffman.c ---*/ 22878556Sobrien/*-------------------------------------------------------------*/ 229