deflate.h revision 146081
173006Sjulian/* deflate.h -- internal compression state 273006Sjulian * Copyright (C) 1995-2002 Jean-loup Gailly 373006Sjulian * For conditions of distribution and use, see copyright notice in zlib.h 473006Sjulian */ 573006Sjulian 673006Sjulian/* WARNING: this file should *not* be used by applications. It is 773006Sjulian part of the implementation of the compression library and is 873006Sjulian subject to change. Applications should only use zlib.h. 973006Sjulian */ 1073006Sjulian 1173006Sjulian/* @(#) $Id$ */ 1273006Sjulian 1373006Sjulian#ifndef DEFLATE_H 1473006Sjulian#define DEFLATE_H 1573006Sjulian 1673006Sjulian#include "zutil.h" 1773006Sjulian 1873006Sjulian/* define NO_GZIP when compiling if you want to disable gzip header and 1973006Sjulian trailer creation by deflate(). NO_GZIP would be used to avoid linking in 2073006Sjulian the crc code when it is not needed. For shared libraries, gzip encoding 2173006Sjulian should be left enabled. */ 2273006Sjulian#ifndef NO_GZIP 2373006Sjulian# define GZIP 2473006Sjulian#endif 2573006Sjulian 2673006Sjulian/* =========================================================================== 2773006Sjulian * Internal compression state. 2873006Sjulian */ 2973006Sjulian 3073006Sjulian#define LENGTH_CODES 29 3173006Sjulian/* number of length codes, not counting the special END_BLOCK code */ 3273006Sjulian 3373006Sjulian#define LITERALS 256 3473006Sjulian/* number of literal bytes 0..255 */ 3573006Sjulian 3673006Sjulian#define L_CODES (LITERALS+1+LENGTH_CODES) 3773006Sjulian/* number of Literal or Length codes, including the END_BLOCK code */ 3873006Sjulian 3973006Sjulian#define D_CODES 30 4073006Sjulian/* number of distance codes */ 4173006Sjulian 4273006Sjulian#define BL_CODES 19 4373006Sjulian/* number of codes used to transfer the bit lengths */ 4473006Sjulian 4573006Sjulian#define HEAP_SIZE (2*L_CODES+1) 4673006Sjulian/* maximum heap size */ 4773006Sjulian 4873006Sjulian#define MAX_BITS 15 4973006Sjulian/* All codes must not exceed MAX_BITS bits */ 5073006Sjulian 5173006Sjulian#define INIT_STATE 42 5273006Sjulian#define BUSY_STATE 113 5373006Sjulian#define FINISH_STATE 666 5473006Sjulian/* Stream status */ 5573006Sjulian 5673006Sjulian 5773006Sjulian/* Data structure describing a single value and its code string. */ 5873006Sjuliantypedef struct ct_data_s { 5973006Sjulian union { 6073006Sjulian ush freq; /* frequency count */ 6173006Sjulian ush code; /* bit string */ 6273006Sjulian } fc; 6373006Sjulian union { 6473006Sjulian ush dad; /* father node in Huffman tree */ 6573006Sjulian ush len; /* length of bit string */ 6673006Sjulian } dl; 6773006Sjulian} FAR ct_data; 6873006Sjulian 6973006Sjulian#define Freq fc.freq 7073006Sjulian#define Code fc.code 7173006Sjulian#define Dad dl.dad 7273006Sjulian#define Len dl.len 7373006Sjulian 7473006Sjuliantypedef struct static_tree_desc_s static_tree_desc; 7573006Sjulian 7673006Sjuliantypedef struct tree_desc_s { 7773006Sjulian ct_data *dyn_tree; /* the dynamic tree */ 7873006Sjulian int max_code; /* largest code with non zero frequency */ 7973006Sjulian static_tree_desc *stat_desc; /* the corresponding static tree */ 8073006Sjulian} FAR tree_desc; 8173006Sjulian 8273006Sjuliantypedef ush Pos; 8373006Sjuliantypedef Pos FAR Posf; 8473006Sjuliantypedef unsigned IPos; 8573006Sjulian 8673006Sjulian/* A Pos is an index in the character window. We use short instead of int to 8773006Sjulian * save space in the various tables. IPos is used only for parameter passing. 8873006Sjulian */ 8973006Sjulian 9073006Sjuliantypedef struct internal_state { 9173006Sjulian z_streamp strm; /* pointer back to this zlib stream */ 9273006Sjulian int status; /* as the name implies */ 9373006Sjulian Bytef *pending_buf; /* output still pending */ 9473006Sjulian ulg pending_buf_size; /* size of pending_buf */ 9573006Sjulian Bytef *pending_out; /* next pending byte to output to the stream */ 9673006Sjulian int pending; /* nb of bytes in the pending buffer */ 9773006Sjulian int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 9873006Sjulian Byte method; /* STORED (for zip only) or DEFLATED */ 9973006Sjulian int last_flush; /* value of flush param for previous deflate call */ 10073006Sjulian 10173006Sjulian /* used by deflate.c: */ 10273006Sjulian 10373006Sjulian uInt w_size; /* LZ77 window size (32K by default) */ 10473006Sjulian uInt w_bits; /* log2(w_size) (8..16) */ 10573006Sjulian uInt w_mask; /* w_size - 1 */ 10673006Sjulian 10773006Sjulian Bytef *window; 10873006Sjulian /* Sliding window. Input bytes are read into the second half of the window, 10973006Sjulian * and move to the first half later to keep a dictionary of at least wSize 11073006Sjulian * bytes. With this organization, matches are limited to a distance of 11173006Sjulian * wSize-MAX_MATCH bytes, but this ensures that IO is always 11273006Sjulian * performed with a length multiple of the block size. Also, it limits 11373006Sjulian * the window size to 64K, which is quite useful on MSDOS. 11473006Sjulian * To do: use the user input buffer as sliding window. 11573006Sjulian */ 11673006Sjulian 11773006Sjulian ulg window_size; 11873006Sjulian /* Actual size of window: 2*wSize, except when the user input buffer 11973006Sjulian * is directly used as sliding window. 12073006Sjulian */ 12173006Sjulian 12273006Sjulian Posf *prev; 12373006Sjulian /* Link to older string with same hash index. To limit the size of this 12473006Sjulian * array to 64K, this link is maintained only for the last 32K strings. 12573006Sjulian * An index in this array is thus a window index modulo 32K. 12673006Sjulian */ 12773006Sjulian 12873006Sjulian Posf *head; /* Heads of the hash chains or NIL. */ 12973006Sjulian 13073006Sjulian uInt ins_h; /* hash index of string to be inserted */ 13173006Sjulian uInt hash_size; /* number of elements in hash table */ 13273006Sjulian uInt hash_bits; /* log2(hash_size) */ 13373006Sjulian uInt hash_mask; /* hash_size-1 */ 13473006Sjulian 13573006Sjulian uInt hash_shift; 13673006Sjulian /* Number of bits by which ins_h must be shifted at each input 13773006Sjulian * step. It must be such that after MIN_MATCH steps, the oldest 13873006Sjulian * byte no longer takes part in the hash key, that is: 13973006Sjulian * hash_shift * MIN_MATCH >= hash_bits 14073006Sjulian */ 14173006Sjulian 14273006Sjulian long block_start; 14373006Sjulian /* Window position at the beginning of the current output block. Gets 14473006Sjulian * negative when the window is moved backwards. 14573006Sjulian */ 14673006Sjulian 14773006Sjulian uInt match_length; /* length of best match */ 14873006Sjulian IPos prev_match; /* previous match */ 14973006Sjulian int match_available; /* set if previous match exists */ 15073006Sjulian uInt strstart; /* start of string to insert */ 15173006Sjulian uInt match_start; /* start of matching string */ 15273006Sjulian uInt lookahead; /* number of valid bytes ahead in window */ 15373006Sjulian 15473006Sjulian uInt prev_length; 15573006Sjulian /* Length of the best match at previous step. Matches not greater than this 15673006Sjulian * are discarded. This is used in the lazy match evaluation. 15773006Sjulian */ 15873006Sjulian 15973006Sjulian uInt max_chain_length; 16073006Sjulian /* To speed up deflation, hash chains are never searched beyond this 16173006Sjulian * length. A higher limit improves compression ratio but degrades the 16273006Sjulian * speed. 16373006Sjulian */ 16473006Sjulian 16573006Sjulian uInt max_lazy_match; 16673006Sjulian /* Attempt to find a better match only when the current match is strictly 16773006Sjulian * smaller than this value. This mechanism is used only for compression 16873006Sjulian * levels >= 4. 16973006Sjulian */ 17073006Sjulian# define max_insert_length max_lazy_match 17173006Sjulian /* Insert new strings in the hash table only if the match length is not 17273006Sjulian * greater than this length. This saves time but degrades compression. 17373006Sjulian * max_insert_length is used only for compression levels <= 3. 17473006Sjulian */ 17573006Sjulian 17673006Sjulian int level; /* compression level (1..9) */ 17773006Sjulian int strategy; /* favor or force Huffman coding*/ 17873006Sjulian 17973006Sjulian uInt good_match; 18073006Sjulian /* Use a faster search when the previous match is longer than this */ 18173006Sjulian 18273006Sjulian int nice_match; /* Stop searching when current match exceeds this */ 18373006Sjulian 18473006Sjulian /* used by trees.c: */ 18573006Sjulian /* Didn't use ct_data typedef below to supress compiler warning */ 18673006Sjulian struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 18773006Sjulian struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 18873006Sjulian struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 18973006Sjulian 19073006Sjulian struct tree_desc_s l_desc; /* desc. for literal tree */ 19173006Sjulian struct tree_desc_s d_desc; /* desc. for distance tree */ 19273006Sjulian struct tree_desc_s bl_desc; /* desc. for bit length tree */ 19373006Sjulian 19473006Sjulian ush bl_count[MAX_BITS+1]; 19573006Sjulian /* number of codes at each bit length for an optimal tree */ 19673006Sjulian 19773006Sjulian int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 19873006Sjulian int heap_len; /* number of elements in the heap */ 19973006Sjulian int heap_max; /* element of largest frequency */ 20073006Sjulian /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 20173006Sjulian * The same heap array is used to build all trees. 20273006Sjulian */ 20373006Sjulian 20473006Sjulian uch depth[2*L_CODES+1]; 20573006Sjulian /* Depth of each subtree used as tie breaker for trees of equal frequency 20673006Sjulian */ 20773006Sjulian 20873006Sjulian uchf *l_buf; /* buffer for literals or lengths */ 20973006Sjulian 21073006Sjulian uInt lit_bufsize; 21173006Sjulian /* Size of match buffer for literals/lengths. There are 4 reasons for 21273006Sjulian * limiting lit_bufsize to 64K: 21373006Sjulian * - frequencies can be kept in 16 bit counters 21473006Sjulian * - if compression is not successful for the first block, all input 21573006Sjulian * data is still in the window so we can still emit a stored block even 21673006Sjulian * when input comes from standard input. (This can also be done for 21773006Sjulian * all blocks if lit_bufsize is not greater than 32K.) 21873006Sjulian * - if compression is not successful for a file smaller than 64K, we can 21973006Sjulian * even emit a stored file instead of a stored block (saving 5 bytes). 22073006Sjulian * This is applicable only for zip (not gzip or zlib). 22173006Sjulian * - creating new Huffman trees less frequently may not provide fast 22273006Sjulian * adaptation to changes in the input data statistics. (Take for 22373006Sjulian * example a binary file with poorly compressible code followed by 22473006Sjulian * a highly compressible string table.) Smaller buffer sizes give 22573006Sjulian * fast adaptation but have of course the overhead of transmitting 22673006Sjulian * trees more frequently. 22773006Sjulian * - I can't count above 4 22873006Sjulian */ 22973006Sjulian 23073006Sjulian uInt last_lit; /* running index in l_buf */ 23173006Sjulian 23273006Sjulian ushf *d_buf; 23373006Sjulian /* Buffer for distances. To simplify the code, d_buf and l_buf have 23473006Sjulian * the same number of elements. To use different lengths, an extra flag 23573006Sjulian * array would be necessary. 23673006Sjulian */ 23773006Sjulian 23873006Sjulian ulg opt_len; /* bit length of current block with optimal trees */ 23973006Sjulian ulg static_len; /* bit length of current block with static trees */ 24073006Sjulian uInt matches; /* number of string matches in current block */ 24173006Sjulian int last_eob_len; /* bit length of EOB code for last block */ 24273006Sjulian 24373006Sjulian#ifdef DEBUG 24473006Sjulian ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 24573006Sjulian ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 24673006Sjulian#endif 24773006Sjulian 24873006Sjulian ush bi_buf; 24973006Sjulian /* Output buffer. bits are inserted starting at the bottom (least 25073006Sjulian * significant bits). 25173006Sjulian */ 25273006Sjulian int bi_valid; 25373006Sjulian /* Number of valid bits in bi_buf. All bits above the last valid bit 25473006Sjulian * are always zero. 25573006Sjulian */ 25673006Sjulian 25773006Sjulian} FAR deflate_state; 25873006Sjulian 25973006Sjulian/* Output a byte on the stream. 26073006Sjulian * IN assertion: there is enough room in pending_buf. 26173006Sjulian */ 26273006Sjulian#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 26373006Sjulian 26473006Sjulian 26573006Sjulian#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 26673006Sjulian/* Minimum amount of lookahead, except at the end of the input file. 26773006Sjulian * See deflate.c for comments about the MIN_MATCH+1. 26873006Sjulian */ 26973006Sjulian 27073006Sjulian#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 27173006Sjulian/* In order to simplify the code, particularly on 16 bit machines, match 27273006Sjulian * distances are limited to MAX_DIST instead of WSIZE. 27373006Sjulian */ 27473006Sjulian 27573006Sjulian /* in trees.c */ 27673006Sjulianvoid _tr_init OF((deflate_state *s)); 27773006Sjulianint _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); 27873006Sjulianvoid _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, 27973006Sjulian int eof)); 28073006Sjulianvoid _tr_align OF((deflate_state *s)); 28173006Sjulianvoid _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, 28273006Sjulian int eof)); 28373006Sjulian 28473006Sjulian#define d_code(dist) \ 28573006Sjulian ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 28673006Sjulian/* Mapping from a distance to a distance code. dist is the distance - 1 and 28773006Sjulian * must not have side effects. _dist_code[256] and _dist_code[257] are never 28873006Sjulian * used. 28973006Sjulian */ 29073006Sjulian 29173006Sjulian#ifndef DEBUG 29273006Sjulian/* Inline versions of _tr_tally for speed: */ 29373006Sjulian 29473006Sjulian#if defined(GEN_TREES_H) || !defined(STDC) 29573006Sjulian extern uch _length_code[]; 29673006Sjulian extern uch _dist_code[]; 29773006Sjulian#else 29873006Sjulian extern const uch _length_code[]; 29973006Sjulian extern const uch _dist_code[]; 30073006Sjulian#endif 30173006Sjulian 30273006Sjulian# define _tr_tally_lit(s, c, flush) \ 30373006Sjulian { uch cc = (c); \ 30473006Sjulian s->d_buf[s->last_lit] = 0; \ 30573006Sjulian s->l_buf[s->last_lit++] = cc; \ 30673006Sjulian s->dyn_ltree[cc].Freq++; \ 30773006Sjulian flush = (s->last_lit == s->lit_bufsize-1); \ 30873006Sjulian } 30973006Sjulian# define _tr_tally_dist(s, distance, length, flush) \ 31073006Sjulian { uch len = (length); \ 31173006Sjulian ush dist = (distance); \ 31273006Sjulian s->d_buf[s->last_lit] = dist; \ 31373006Sjulian s->l_buf[s->last_lit++] = len; \ 31473006Sjulian dist--; \ 31573006Sjulian s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 31673006Sjulian s->dyn_dtree[d_code(dist)].Freq++; \ 31773006Sjulian flush = (s->last_lit == s->lit_bufsize-1); \ 31873006Sjulian } 31973006Sjulian#else 32073006Sjulian# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 32173006Sjulian# define _tr_tally_dist(s, distance, length, flush) \ 32273006Sjulian flush = _tr_tally(s, distance, length) 32373006Sjulian#endif 32473006Sjulian 32573006Sjulian#endif /* DEFLATE_H */ 32673006Sjulian