inffast.c revision 157046
1131380Stjr/* inffast.c -- fast decoding
2146081Skientzle * Copyright (C) 1995-2004 Mark Adler
3131380Stjr * For conditions of distribution and use, see copyright notice in zlib.h
417651Speter */
517651Speter
617651Speter#include "zutil.h"
717651Speter#include "inftrees.h"
8131380Stjr#include "inflate.h"
917651Speter#include "inffast.h"
1017651Speter
11131380Stjr#ifndef ASMINF
1217651Speter
13131380Stjr/* Allow machine dependent optimization for post-increment or pre-increment.
14131380Stjr   Based on testing to date,
15131380Stjr   Pre-increment preferred for:
16131380Stjr   - PowerPC G3 (Adler)
17131380Stjr   - MIPS R5000 (Randers-Pehrson)
18131380Stjr   Post-increment preferred for:
19131380Stjr   - none
20131380Stjr   No measurable difference:
21131380Stjr   - Pentium III (Anderson)
22146081Skientzle   - M68060 (Nikl)
23131380Stjr */
24131380Stjr#ifdef POSTINC
25131380Stjr#  define OFF 0
26131380Stjr#  define PUP(a) *(a)++
27131380Stjr#else
28131380Stjr#  define OFF 1
29131380Stjr#  define PUP(a) *++(a)
30131380Stjr#endif
3117651Speter
32131380Stjr/*
33131380Stjr   Decode literal, length, and distance codes and write out the resulting
34131380Stjr   literal and match bytes until either not enough input or output is
35131380Stjr   available, an end-of-block is encountered, or a data error is encountered.
36131380Stjr   When large enough input and output buffers are supplied to inflate(), for
37131380Stjr   example, a 16K input buffer and a 64K output buffer, more than 95% of the
38131380Stjr   inflate execution time is spent in this routine.
3917651Speter
40131380Stjr   Entry assumptions:
4117651Speter
42131380Stjr        state->mode == LEN
43131380Stjr        strm->avail_in >= 6
44131380Stjr        strm->avail_out >= 258
45131380Stjr        start >= strm->avail_out
46131380Stjr        state->bits < 8
4717651Speter
48131380Stjr   On return, state->mode is one of:
4917651Speter
50131380Stjr        LEN -- ran out of enough output space or enough available input
51131380Stjr        TYPE -- reached end of block code, inflate() to interpret next block
52131380Stjr        BAD -- error in block data
5317651Speter
54131380Stjr   Notes:
5517651Speter
56131380Stjr    - The maximum input bits used by a length/distance pair is 15 bits for the
57131380Stjr      length code, 5 bits for the length extra, 15 bits for the distance code,
58131380Stjr      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59131380Stjr      Therefore if strm->avail_in >= 6, then there is enough input to avoid
60131380Stjr      checking for available input while decoding.
6117651Speter
62131380Stjr    - The maximum bytes that a single length/distance pair can output is 258
63131380Stjr      bytes, which is the maximum length that can be coded.  inflate_fast()
64131380Stjr      requires strm->avail_out >= 258 for each loop to avoid checking for
65131380Stjr      output space.
66131380Stjr */
67131380Stjrvoid inflate_fast(strm, start)
68131380Stjrz_streamp strm;
69131380Stjrunsigned start;         /* inflate()'s starting value for strm->avail_out */
70131380Stjr{
71131380Stjr    struct inflate_state FAR *state;
72131380Stjr    unsigned char FAR *in;      /* local strm->next_in */
73131380Stjr    unsigned char FAR *last;    /* while in < last, enough input available */
74131380Stjr    unsigned char FAR *out;     /* local strm->next_out */
75131380Stjr    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76131380Stjr    unsigned char FAR *end;     /* while out < end, enough space available */
77157046Sdes#ifdef INFLATE_STRICT
78157046Sdes    unsigned dmax;              /* maximum distance from zlib header */
79157046Sdes#endif
80131380Stjr    unsigned wsize;             /* window size or zero if not using window */
81131380Stjr    unsigned whave;             /* valid bytes in the window */
82131380Stjr    unsigned write;             /* window write index */
83131380Stjr    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
84131380Stjr    unsigned long hold;         /* local strm->hold */
85131380Stjr    unsigned bits;              /* local strm->bits */
86131380Stjr    code const FAR *lcode;      /* local strm->lencode */
87131380Stjr    code const FAR *dcode;      /* local strm->distcode */
88131380Stjr    unsigned lmask;             /* mask for first level of length codes */
89131380Stjr    unsigned dmask;             /* mask for first level of distance codes */
90131380Stjr    code this;                  /* retrieved table entry */
91131380Stjr    unsigned op;                /* code bits, operation, extra bits, or */
92131380Stjr                                /*  window position, window bytes to copy */
93131380Stjr    unsigned len;               /* match length, unused bytes */
94131380Stjr    unsigned dist;              /* match distance */
95131380Stjr    unsigned char FAR *from;    /* where to copy match from */
96131380Stjr
97131380Stjr    /* copy state to local variables */
98131380Stjr    state = (struct inflate_state FAR *)strm->state;
99131380Stjr    in = strm->next_in - OFF;
100131380Stjr    last = in + (strm->avail_in - 5);
101131380Stjr    out = strm->next_out - OFF;
102131380Stjr    beg = out - (start - strm->avail_out);
103131380Stjr    end = out + (strm->avail_out - 257);
104157046Sdes#ifdef INFLATE_STRICT
105157046Sdes    dmax = state->dmax;
106157046Sdes#endif
107131380Stjr    wsize = state->wsize;
108131380Stjr    whave = state->whave;
109131380Stjr    write = state->write;
110131380Stjr    window = state->window;
111131380Stjr    hold = state->hold;
112131380Stjr    bits = state->bits;
113131380Stjr    lcode = state->lencode;
114131380Stjr    dcode = state->distcode;
115131380Stjr    lmask = (1U << state->lenbits) - 1;
116131380Stjr    dmask = (1U << state->distbits) - 1;
117131380Stjr
118131380Stjr    /* decode literals and length/distances until end-of-block or not enough
119131380Stjr       input data or output space */
120131380Stjr    do {
121131380Stjr        if (bits < 15) {
122131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
123131380Stjr            bits += 8;
124131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
125131380Stjr            bits += 8;
126131380Stjr        }
127131380Stjr        this = lcode[hold & lmask];
128131380Stjr      dolen:
129131380Stjr        op = (unsigned)(this.bits);
130131380Stjr        hold >>= op;
131131380Stjr        bits -= op;
132131380Stjr        op = (unsigned)(this.op);
133131380Stjr        if (op == 0) {                          /* literal */
134131380Stjr            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
135131380Stjr                    "inflate:         literal '%c'\n" :
136131380Stjr                    "inflate:         literal 0x%02x\n", this.val));
137131380Stjr            PUP(out) = (unsigned char)(this.val);
138131380Stjr        }
139131380Stjr        else if (op & 16) {                     /* length base */
140131380Stjr            len = (unsigned)(this.val);
141131380Stjr            op &= 15;                           /* number of extra bits */
142131380Stjr            if (op) {
143131380Stjr                if (bits < op) {
144131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
145131380Stjr                    bits += 8;
146131380Stjr                }
147131380Stjr                len += (unsigned)hold & ((1U << op) - 1);
148131380Stjr                hold >>= op;
149131380Stjr                bits -= op;
15017651Speter            }
151131380Stjr            Tracevv((stderr, "inflate:         length %u\n", len));
152131380Stjr            if (bits < 15) {
153131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
154131380Stjr                bits += 8;
155131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
156131380Stjr                bits += 8;
15792114Sgreen            }
158131380Stjr            this = dcode[hold & dmask];
159131380Stjr          dodist:
160131380Stjr            op = (unsigned)(this.bits);
161131380Stjr            hold >>= op;
162131380Stjr            bits -= op;
163131380Stjr            op = (unsigned)(this.op);
164131380Stjr            if (op & 16) {                      /* distance base */
165131380Stjr                dist = (unsigned)(this.val);
166131380Stjr                op &= 15;                       /* number of extra bits */
167131380Stjr                if (bits < op) {
168131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
169131380Stjr                    bits += 8;
170131380Stjr                    if (bits < op) {
171131380Stjr                        hold += (unsigned long)(PUP(in)) << bits;
172131380Stjr                        bits += 8;
173131380Stjr                    }
174131380Stjr                }
175131380Stjr                dist += (unsigned)hold & ((1U << op) - 1);
176157046Sdes#ifdef INFLATE_STRICT
177157046Sdes                if (dist > dmax) {
178157046Sdes                    strm->msg = (char *)"invalid distance too far back";
179157046Sdes                    state->mode = BAD;
180157046Sdes                    break;
181157046Sdes                }
182157046Sdes#endif
183131380Stjr                hold >>= op;
184131380Stjr                bits -= op;
185131380Stjr                Tracevv((stderr, "inflate:         distance %u\n", dist));
186131380Stjr                op = (unsigned)(out - beg);     /* max distance in output */
187131380Stjr                if (dist > op) {                /* see if copy from window */
188131380Stjr                    op = dist - op;             /* distance back in window */
189131380Stjr                    if (op > whave) {
190131380Stjr                        strm->msg = (char *)"invalid distance too far back";
191131380Stjr                        state->mode = BAD;
192131380Stjr                        break;
193131380Stjr                    }
194131380Stjr                    from = window - OFF;
195131380Stjr                    if (write == 0) {           /* very common case */
196131380Stjr                        from += wsize - op;
197131380Stjr                        if (op < len) {         /* some from window */
198131380Stjr                            len -= op;
199131380Stjr                            do {
200131380Stjr                                PUP(out) = PUP(from);
201131380Stjr                            } while (--op);
202131380Stjr                            from = out - dist;  /* rest from output */
203131380Stjr                        }
204131380Stjr                    }
205131380Stjr                    else if (write < op) {      /* wrap around window */
206131380Stjr                        from += wsize + write - op;
207131380Stjr                        op -= write;
208131380Stjr                        if (op < len) {         /* some from end of window */
209131380Stjr                            len -= op;
210131380Stjr                            do {
211131380Stjr                                PUP(out) = PUP(from);
212131380Stjr                            } while (--op);
213131380Stjr                            from = window - OFF;
214131380Stjr                            if (write < len) {  /* some from start of window */
215131380Stjr                                op = write;
216131380Stjr                                len -= op;
217131380Stjr                                do {
218131380Stjr                                    PUP(out) = PUP(from);
219131380Stjr                                } while (--op);
220131380Stjr                                from = out - dist;      /* rest from output */
221131380Stjr                            }
222131380Stjr                        }
223131380Stjr                    }
224131380Stjr                    else {                      /* contiguous in window */
225131380Stjr                        from += write - op;
226131380Stjr                        if (op < len) {         /* some from window */
227131380Stjr                            len -= op;
228131380Stjr                            do {
229131380Stjr                                PUP(out) = PUP(from);
230131380Stjr                            } while (--op);
231131380Stjr                            from = out - dist;  /* rest from output */
232131380Stjr                        }
233131380Stjr                    }
234131380Stjr                    while (len > 2) {
235131380Stjr                        PUP(out) = PUP(from);
236131380Stjr                        PUP(out) = PUP(from);
237131380Stjr                        PUP(out) = PUP(from);
238131380Stjr                        len -= 3;
239131380Stjr                    }
240131380Stjr                    if (len) {
241131380Stjr                        PUP(out) = PUP(from);
242131380Stjr                        if (len > 1)
243131380Stjr                            PUP(out) = PUP(from);
244131380Stjr                    }
245131380Stjr                }
246131380Stjr                else {
247131380Stjr                    from = out - dist;          /* copy direct from output */
248131380Stjr                    do {                        /* minimum length is three */
249131380Stjr                        PUP(out) = PUP(from);
250131380Stjr                        PUP(out) = PUP(from);
251131380Stjr                        PUP(out) = PUP(from);
252131380Stjr                        len -= 3;
253131380Stjr                    } while (len > 2);
254131380Stjr                    if (len) {
255131380Stjr                        PUP(out) = PUP(from);
256131380Stjr                        if (len > 1)
257131380Stjr                            PUP(out) = PUP(from);
258131380Stjr                    }
259131380Stjr                }
260131380Stjr            }
261131380Stjr            else if ((op & 64) == 0) {          /* 2nd level distance code */
262131380Stjr                this = dcode[this.val + (hold & ((1U << op) - 1))];
263131380Stjr                goto dodist;
264131380Stjr            }
265131380Stjr            else {
266131380Stjr                strm->msg = (char *)"invalid distance code";
267131380Stjr                state->mode = BAD;
268131380Stjr                break;
269131380Stjr            }
270131380Stjr        }
271131380Stjr        else if ((op & 64) == 0) {              /* 2nd level length code */
272131380Stjr            this = lcode[this.val + (hold & ((1U << op) - 1))];
273131380Stjr            goto dolen;
274131380Stjr        }
275131380Stjr        else if (op & 32) {                     /* end-of-block */
276131380Stjr            Tracevv((stderr, "inflate:         end of block\n"));
277131380Stjr            state->mode = TYPE;
27817651Speter            break;
27917651Speter        }
280131380Stjr        else {
281131380Stjr            strm->msg = (char *)"invalid literal/length code";
282131380Stjr            state->mode = BAD;
283131380Stjr            break;
284131380Stjr        }
285131380Stjr    } while (in < last && out < end);
28617651Speter
287131380Stjr    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
288131380Stjr    len = bits >> 3;
289131380Stjr    in -= len;
290131380Stjr    bits -= len << 3;
291131380Stjr    hold &= (1U << bits) - 1;
292131380Stjr
293131380Stjr    /* update state and return */
294131380Stjr    strm->next_in = in + OFF;
295131380Stjr    strm->next_out = out + OFF;
296131380Stjr    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
297131380Stjr    strm->avail_out = (unsigned)(out < end ?
298131380Stjr                                 257 + (end - out) : 257 - (out - end));
299131380Stjr    state->hold = hold;
300131380Stjr    state->bits = bits;
301131380Stjr    return;
30217651Speter}
303131380Stjr
304131380Stjr/*
305131380Stjr   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
306131380Stjr   - Using bit fields for code structure
307131380Stjr   - Different op definition to avoid & for extra bits (do & for table bits)
308131380Stjr   - Three separate decoding do-loops for direct, window, and write == 0
309131380Stjr   - Special case for distance > 1 copies to do overlapped load and store copy
310131380Stjr   - Explicit branch predictions (based on measured branch probabilities)
311131380Stjr   - Deferring match copy and interspersed it with decoding subsequent codes
312131380Stjr   - Swapping literal/length else
313131380Stjr   - Swapping window/direct else
314131380Stjr   - Larger unrolled copy loops (three is about right)
315131380Stjr   - Moving len -= 3 statement into middle of loop
316131380Stjr */
317131380Stjr
318131380Stjr#endif /* !ASMINF */
319