117651Speter/* inftrees.c -- generate Huffman trees for efficient decoding 2250261Sdelphij * Copyright (C) 1995-2013 Mark Adler 3131380Stjr * For conditions of distribution and use, see copyright notice in zlib.h 417651Speter */ 517651Speter 617651Speter#include "zutil.h" 717651Speter#include "inftrees.h" 817651Speter 9131380Stjr#define MAXBITS 15 1042468Speter 1133904Ssteveconst char inflate_copyright[] = 12250261Sdelphij " inflate 1.2.8 Copyright 1995-2013 Mark Adler "; 1317651Speter/* 1417651Speter If you use the zlib library in a product, an acknowledgment is welcome 1517651Speter in the documentation of your product. If for some reason you cannot 1617651Speter include such an acknowledgment, I would appreciate that you keep this 1717651Speter copyright string in the executable of your product. 1817651Speter */ 1917651Speter 20131380Stjr/* 21131380Stjr Build a set of tables to decode the provided canonical Huffman code. 22131380Stjr The code lengths are lens[0..codes-1]. The result starts at *table, 23131380Stjr whose indices are 0..2^bits-1. work is a writable array of at least 24131380Stjr lens shorts, which is used as a work area. type is the type of code 25131380Stjr to be generated, CODES, LENS, or DISTS. On return, zero is success, 26131380Stjr -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 27131380Stjr on return points to the next available entry's address. bits is the 28131380Stjr requested root table index bits, and on return it is the actual root 29131380Stjr table index bits. It will differ if the request is greater than the 30131380Stjr longest code or if it is less than the shortest code. 31131380Stjr */ 32206924Sdelphijint ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) 33131380Stjrcodetype type; 34131380Stjrunsigned short FAR *lens; 35131380Stjrunsigned codes; 36131380Stjrcode FAR * FAR *table; 37131380Stjrunsigned FAR *bits; 38131380Stjrunsigned short FAR *work; 39131380Stjr{ 40131380Stjr unsigned len; /* a code's length in bits */ 41131380Stjr unsigned sym; /* index of code symbols */ 42131380Stjr unsigned min, max; /* minimum and maximum code lengths */ 43131380Stjr unsigned root; /* number of index bits for root table */ 44131380Stjr unsigned curr; /* number of index bits for current table */ 45131380Stjr unsigned drop; /* code bits to drop for sub-table */ 46131380Stjr int left; /* number of prefix codes available */ 47131380Stjr unsigned used; /* code entries in table used */ 48131380Stjr unsigned huff; /* Huffman code */ 49131380Stjr unsigned incr; /* for incrementing code, index */ 50131380Stjr unsigned fill; /* index for replicating entries */ 51131380Stjr unsigned low; /* low bits for current root entry */ 52131380Stjr unsigned mask; /* mask for low root bits */ 53205471Sdelphij code here; /* table entry for duplication */ 54131380Stjr code FAR *next; /* next available space in table */ 55131380Stjr const unsigned short FAR *base; /* base value table to use */ 56131380Stjr const unsigned short FAR *extra; /* extra bits table to use */ 57131380Stjr int end; /* use base and extra for symbol > end */ 58131380Stjr unsigned short count[MAXBITS+1]; /* number of codes of each length */ 59131380Stjr unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 60131380Stjr static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 6117651Speter 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 6217651Speter 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 63131380Stjr static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 64131380Stjr 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 65250261Sdelphij 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78}; 66131380Stjr static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 6717651Speter 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 6817651Speter 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 69131380Stjr 8193, 12289, 16385, 24577, 0, 0}; 70131380Stjr static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ 71131380Stjr 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 72131380Stjr 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 73131380Stjr 28, 28, 29, 29, 64, 64}; 7417651Speter 75131380Stjr /* 76131380Stjr Process a set of code lengths to create a canonical Huffman code. The 77131380Stjr code lengths are lens[0..codes-1]. Each length corresponds to the 78131380Stjr symbols 0..codes-1. The Huffman code is generated by first sorting the 79131380Stjr symbols by length from short to long, and retaining the symbol order 80131380Stjr for codes with equal lengths. Then the code starts with all zero bits 81131380Stjr for the first code of the shortest length, and the codes are integer 82131380Stjr increments for the same length, and zeros are appended as the length 83131380Stjr increases. For the deflate format, these bits are stored backwards 84131380Stjr from their more natural integer increment ordering, and so when the 85131380Stjr decoding tables are built in the large loop below, the integer codes 86131380Stjr are incremented backwards. 8717651Speter 88131380Stjr This routine assumes, but does not check, that all of the entries in 89131380Stjr lens[] are in the range 0..MAXBITS. The caller must assure this. 90131380Stjr 1..MAXBITS is interpreted as that code length. zero means that that 91131380Stjr symbol does not occur in this code. 9217651Speter 93131380Stjr The codes are sorted by computing a count of codes for each length, 94131380Stjr creating from that a table of starting indices for each length in the 95131380Stjr sorted table, and then entering the symbols in order in the sorted 96131380Stjr table. The sorted table is work[], with that space being provided by 97131380Stjr the caller. 9817651Speter 99131380Stjr The length counts are used for other purposes as well, i.e. finding 100131380Stjr the minimum and maximum length codes, determining if there are any 101131380Stjr codes at all, checking for a valid set of lengths, and looking ahead 102131380Stjr at length counts to determine sub-table sizes when building the 103131380Stjr decoding tables. 104131380Stjr */ 10517651Speter 106131380Stjr /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 107131380Stjr for (len = 0; len <= MAXBITS; len++) 108131380Stjr count[len] = 0; 109131380Stjr for (sym = 0; sym < codes; sym++) 110131380Stjr count[lens[sym]]++; 11117651Speter 112131380Stjr /* bound code lengths, force root to be within code lengths */ 113131380Stjr root = *bits; 114131380Stjr for (max = MAXBITS; max >= 1; max--) 115131380Stjr if (count[max] != 0) break; 116131380Stjr if (root > max) root = max; 117146081Skientzle if (max == 0) { /* no symbols to code at all */ 118205471Sdelphij here.op = (unsigned char)64; /* invalid code marker */ 119205471Sdelphij here.bits = (unsigned char)1; 120205471Sdelphij here.val = (unsigned short)0; 121205471Sdelphij *(*table)++ = here; /* make a table to force an error */ 122205471Sdelphij *(*table)++ = here; 123146081Skientzle *bits = 1; 124146081Skientzle return 0; /* no symbols, but wait for decoding to report error */ 125146081Skientzle } 126205471Sdelphij for (min = 1; min < max; min++) 127131380Stjr if (count[min] != 0) break; 128131380Stjr if (root < min) root = min; 12917651Speter 130131380Stjr /* check for an over-subscribed or incomplete set of lengths */ 131131380Stjr left = 1; 132131380Stjr for (len = 1; len <= MAXBITS; len++) { 133131380Stjr left <<= 1; 134131380Stjr left -= count[len]; 135131380Stjr if (left < 0) return -1; /* over-subscribed */ 136131380Stjr } 137147790Scperciva if (left > 0 && (type == CODES || max != 1)) 138131380Stjr return -1; /* incomplete set */ 13917651Speter 140131380Stjr /* generate offsets into symbol table for each length for sorting */ 141131380Stjr offs[1] = 0; 142131380Stjr for (len = 1; len < MAXBITS; len++) 143131380Stjr offs[len + 1] = offs[len] + count[len]; 14417651Speter 145131380Stjr /* sort symbols by length, by symbol order within each length */ 146131380Stjr for (sym = 0; sym < codes; sym++) 147131380Stjr if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 14817651Speter 149131380Stjr /* 150131380Stjr Create and fill in decoding tables. In this loop, the table being 151131380Stjr filled is at next and has curr index bits. The code being used is huff 152131380Stjr with length len. That code is converted to an index by dropping drop 153131380Stjr bits off of the bottom. For codes where len is less than drop + curr, 154131380Stjr those top drop + curr - len bits are incremented through all values to 155131380Stjr fill the table with replicated entries. 15617651Speter 157131380Stjr root is the number of index bits for the root table. When len exceeds 158131380Stjr root, sub-tables are created pointed to by the root entry with an index 159131380Stjr of the low root bits of huff. This is saved in low to check for when a 160131380Stjr new sub-table should be started. drop is zero when the root table is 161131380Stjr being filled, and drop is root when sub-tables are being filled. 16217651Speter 163131380Stjr When a new sub-table is needed, it is necessary to look ahead in the 164131380Stjr code lengths to determine what size sub-table is needed. The length 165131380Stjr counts are used for this, and so count[] is decremented as codes are 166131380Stjr entered in the tables. 16717651Speter 168131380Stjr used keeps track of how many table entries have been allocated from the 169205471Sdelphij provided *table space. It is checked for LENS and DIST tables against 170205471Sdelphij the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 171205471Sdelphij the initial root table size constants. See the comments in inftrees.h 172205471Sdelphij for more information. 17317651Speter 174131380Stjr sym increments through all symbols, and the loop terminates when 175131380Stjr all codes of length max, i.e. all codes, have been processed. This 176131380Stjr routine permits incomplete codes, so another loop after this one fills 177131380Stjr in the rest of the decoding tables with invalid code markers. 178131380Stjr */ 17917651Speter 180131380Stjr /* set up for code type */ 181131380Stjr switch (type) { 182131380Stjr case CODES: 183131380Stjr base = extra = work; /* dummy value--not used */ 184131380Stjr end = 19; 185131380Stjr break; 186131380Stjr case LENS: 187131380Stjr base = lbase; 188131380Stjr base -= 257; 189131380Stjr extra = lext; 190131380Stjr extra -= 257; 191131380Stjr end = 256; 192131380Stjr break; 193131380Stjr default: /* DISTS */ 194131380Stjr base = dbase; 195131380Stjr extra = dext; 196131380Stjr end = -1; 197131380Stjr } 19817651Speter 199131380Stjr /* initialize state for loop */ 200131380Stjr huff = 0; /* starting code */ 201131380Stjr sym = 0; /* starting code symbol */ 202131380Stjr len = min; /* starting code length */ 203131380Stjr next = *table; /* current table to fill in */ 204131380Stjr curr = root; /* current table index bits */ 205131380Stjr drop = 0; /* current bits to drop from code for index */ 206131380Stjr low = (unsigned)(-1); /* trigger new sub-table when len > root */ 207131380Stjr used = 1U << root; /* use root table entries */ 208131380Stjr mask = used - 1; /* mask for comparing low */ 20917651Speter 210131380Stjr /* check available table space */ 211250261Sdelphij if ((type == LENS && used > ENOUGH_LENS) || 212250261Sdelphij (type == DISTS && used > ENOUGH_DISTS)) 213131380Stjr return 1; 21417651Speter 215131380Stjr /* process all codes and make table entries */ 216131380Stjr for (;;) { 217131380Stjr /* create table entry */ 218205471Sdelphij here.bits = (unsigned char)(len - drop); 219131380Stjr if ((int)(work[sym]) < end) { 220205471Sdelphij here.op = (unsigned char)0; 221205471Sdelphij here.val = work[sym]; 22217651Speter } 223131380Stjr else if ((int)(work[sym]) > end) { 224205471Sdelphij here.op = (unsigned char)(extra[work[sym]]); 225205471Sdelphij here.val = base[work[sym]]; 226131380Stjr } 227131380Stjr else { 228205471Sdelphij here.op = (unsigned char)(32 + 64); /* end of block */ 229205471Sdelphij here.val = 0; 230131380Stjr } 23117651Speter 232131380Stjr /* replicate for those indices with low len bits equal to huff */ 233131380Stjr incr = 1U << (len - drop); 234131380Stjr fill = 1U << curr; 235157046Sdes min = fill; /* save offset to next table */ 236131380Stjr do { 237131380Stjr fill -= incr; 238205471Sdelphij next[(huff >> drop) + fill] = here; 239131380Stjr } while (fill != 0); 24017651Speter 241131380Stjr /* backwards increment the len-bit code huff */ 242131380Stjr incr = 1U << (len - 1); 243131380Stjr while (huff & incr) 244131380Stjr incr >>= 1; 245131380Stjr if (incr != 0) { 246131380Stjr huff &= incr - 1; 247131380Stjr huff += incr; 24817651Speter } 24942468Speter else 250131380Stjr huff = 0; 25117651Speter 252131380Stjr /* go to next symbol, update count, len */ 253131380Stjr sym++; 254131380Stjr if (--(count[len]) == 0) { 255131380Stjr if (len == max) break; 256131380Stjr len = lens[work[sym]]; 257131380Stjr } 25817651Speter 259131380Stjr /* create new sub-table if needed */ 260131380Stjr if (len > root && (huff & mask) != low) { 261131380Stjr /* if first time, transition to sub-tables */ 262131380Stjr if (drop == 0) 263131380Stjr drop = root; 26417651Speter 265131380Stjr /* increment past last table */ 266157046Sdes next += min; /* here min is 1 << curr */ 26717651Speter 268131380Stjr /* determine length of next table */ 269131380Stjr curr = len - drop; 270131380Stjr left = (int)(1 << curr); 271131380Stjr while (curr + drop < max) { 272131380Stjr left -= count[curr + drop]; 273131380Stjr if (left <= 0) break; 274131380Stjr curr++; 275131380Stjr left <<= 1; 276131380Stjr } 27717651Speter 278131380Stjr /* check for enough space */ 279131380Stjr used += 1U << curr; 280250261Sdelphij if ((type == LENS && used > ENOUGH_LENS) || 281250261Sdelphij (type == DISTS && used > ENOUGH_DISTS)) 282131380Stjr return 1; 28317651Speter 284131380Stjr /* point entry in root table to sub-table */ 285131380Stjr low = huff & mask; 286131380Stjr (*table)[low].op = (unsigned char)curr; 287131380Stjr (*table)[low].bits = (unsigned char)root; 288131380Stjr (*table)[low].val = (unsigned short)(next - *table); 289131380Stjr } 29017651Speter } 29117651Speter 292237410Sdelphij /* fill in remaining table entry if code is incomplete (guaranteed to have 293237410Sdelphij at most one remaining entry, since if the code is incomplete, the 294237410Sdelphij maximum code length that was allowed to get this far is one bit) */ 295237410Sdelphij if (huff != 0) { 296237410Sdelphij here.op = (unsigned char)64; /* invalid code marker */ 297237410Sdelphij here.bits = (unsigned char)(len - drop); 298237410Sdelphij here.val = (unsigned short)0; 299237410Sdelphij next[huff] = here; 30033904Ssteve } 30117651Speter 302131380Stjr /* set return parameters */ 303131380Stjr *table += used; 304131380Stjr *bits = root; 305131380Stjr return 0; 30617651Speter} 307