inffast.c revision 146081
1193323Sed/* inffast.c -- fast decoding
2193323Sed * Copyright (C) 1995-2004 Mark Adler
3193323Sed * For conditions of distribution and use, see copyright notice in zlib.h
4193323Sed */
5193323Sed
6193323Sed#include "zutil.h"
7193323Sed#include "inftrees.h"
8193323Sed#include "inflate.h"
9193323Sed#include "inffast.h"
10193323Sed
11193323Sed#ifndef ASMINF
12193323Sed
13193323Sed/* Allow machine dependent optimization for post-increment or pre-increment.
14193323Sed   Based on testing to date,
15193323Sed   Pre-increment preferred for:
16193323Sed   - PowerPC G3 (Adler)
17193323Sed   - MIPS R5000 (Randers-Pehrson)
18193323Sed   Post-increment preferred for:
19193323Sed   - none
20193323Sed   No measurable difference:
21193323Sed   - Pentium III (Anderson)
22193323Sed   - M68060 (Nikl)
23193323Sed */
24198090Srdivacky#ifdef POSTINC
25251662Sdim#  define OFF 0
26249423Sdim#  define PUP(a) *(a)++
27249423Sdim#else
28193323Sed#  define OFF 1
29193323Sed#  define PUP(a) *++(a)
30193323Sed#endif
31193323Sed
32193323Sed/*
33193323Sed   Decode literal, length, and distance codes and write out the resulting
34193323Sed   literal and match bytes until either not enough input or output is
35212904Sdim   available, an end-of-block is encountered, or a data error is encountered.
36193323Sed   When large enough input and output buffers are supplied to inflate(), for
37193323Sed   example, a 16K input buffer and a 64K output buffer, more than 95% of the
38193323Sed   inflate execution time is spent in this routine.
39193323Sed
40193323Sed   Entry assumptions:
41193323Sed
42193323Sed        state->mode == LEN
43193323Sed        strm->avail_in >= 6
44234353Sdim        strm->avail_out >= 258
45243830Sdim        start >= strm->avail_out
46193323Sed        state->bits < 8
47193323Sed
48193323Sed   On return, state->mode is one of:
49193323Sed
50193323Sed        LEN -- ran out of enough output space or enough available input
51193323Sed        TYPE -- reached end of block code, inflate() to interpret next block
52243830Sdim        BAD -- error in block data
53193323Sed
54193323Sed   Notes:
55193323Sed
56193323Sed    - The maximum input bits used by a length/distance pair is 15 bits for the
57193323Sed      length code, 5 bits for the length extra, 15 bits for the distance code,
58193323Sed      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59193323Sed      Therefore if strm->avail_in >= 6, then there is enough input to avoid
60193323Sed      checking for available input while decoding.
61226633Sdim
62226633Sdim    - The maximum bytes that a single length/distance pair can output is 258
63226633Sdim      bytes, which is the maximum length that can be coded.  inflate_fast()
64226633Sdim      requires strm->avail_out >= 258 for each loop to avoid checking for
65226633Sdim      output space.
66226633Sdim */
67226633Sdimvoid inflate_fast(strm, start)
68226633Sdimz_streamp strm;
69193323Sedunsigned start;         /* inflate()'s starting value for strm->avail_out */
70226633Sdim{
71221345Sdim    struct inflate_state FAR *state;
72221345Sdim    unsigned char FAR *in;      /* local strm->next_in */
73221345Sdim    unsigned char FAR *last;    /* while in < last, enough input available */
74221345Sdim    unsigned char FAR *out;     /* local strm->next_out */
75221345Sdim    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76221345Sdim    unsigned char FAR *end;     /* while out < end, enough space available */
77193323Sed    unsigned wsize;             /* window size or zero if not using window */
78193323Sed    unsigned whave;             /* valid bytes in the window */
79193323Sed    unsigned write;             /* window write index */
80193323Sed    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
81193323Sed    unsigned long hold;         /* local strm->hold */
82193323Sed    unsigned bits;              /* local strm->bits */
83193323Sed    code const FAR *lcode;      /* local strm->lencode */
84198090Srdivacky    code const FAR *dcode;      /* local strm->distcode */
85234353Sdim    unsigned lmask;             /* mask for first level of length codes */
86234353Sdim    unsigned dmask;             /* mask for first level of distance codes */
87234353Sdim    code this;                  /* retrieved table entry */
88234353Sdim    unsigned op;                /* code bits, operation, extra bits, or */
89193323Sed                                /*  window position, window bytes to copy */
90193323Sed    unsigned len;               /* match length, unused bytes */
91193323Sed    unsigned dist;              /* match distance */
92193323Sed    unsigned char FAR *from;    /* where to copy match from */
93193323Sed
94193323Sed    /* copy state to local variables */
95193323Sed    state = (struct inflate_state FAR *)strm->state;
96193323Sed    in = strm->next_in - OFF;
97234353Sdim    last = in + (strm->avail_in - 5);
98193323Sed    out = strm->next_out - OFF;
99193323Sed    beg = out - (start - strm->avail_out);
100193323Sed    end = out + (strm->avail_out - 257);
101234353Sdim    wsize = state->wsize;
102234353Sdim    whave = state->whave;
103234353Sdim    write = state->write;
104234353Sdim    window = state->window;
105193323Sed    hold = state->hold;
106193323Sed    bits = state->bits;
107193323Sed    lcode = state->lencode;
108234353Sdim    dcode = state->distcode;
109234353Sdim    lmask = (1U << state->lenbits) - 1;
110234353Sdim    dmask = (1U << state->distbits) - 1;
111193323Sed
112193323Sed    /* decode literals and length/distances until end-of-block or not enough
113193323Sed       input data or output space */
114193323Sed    do {
115193323Sed        if (bits < 15) {
116193323Sed            hold += (unsigned long)(PUP(in)) << bits;
117193323Sed            bits += 8;
118193323Sed            hold += (unsigned long)(PUP(in)) << bits;
119193323Sed            bits += 8;
120193323Sed        }
121193323Sed        this = lcode[hold & lmask];
122193323Sed      dolen:
123193323Sed        op = (unsigned)(this.bits);
124193323Sed        hold >>= op;
125193323Sed        bits -= op;
126193323Sed        op = (unsigned)(this.op);
127193323Sed        if (op == 0) {                          /* literal */
128193323Sed            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
129234353Sdim                    "inflate:         literal '%c'\n" :
130234353Sdim                    "inflate:         literal 0x%02x\n", this.val));
131234353Sdim            PUP(out) = (unsigned char)(this.val);
132234353Sdim        }
133193323Sed        else if (op & 16) {                     /* length base */
134193323Sed            len = (unsigned)(this.val);
135193323Sed            op &= 15;                           /* number of extra bits */
136234353Sdim            if (op) {
137234353Sdim                if (bits < op) {
138234353Sdim                    hold += (unsigned long)(PUP(in)) << bits;
139193323Sed                    bits += 8;
140193323Sed                }
141251662Sdim                len += (unsigned)hold & ((1U << op) - 1);
142251662Sdim                hold >>= op;
143251662Sdim                bits -= op;
144251662Sdim            }
145251662Sdim            Tracevv((stderr, "inflate:         length %u\n", len));
146251662Sdim            if (bits < 15) {
147251662Sdim                hold += (unsigned long)(PUP(in)) << bits;
148251662Sdim                bits += 8;
149251662Sdim                hold += (unsigned long)(PUP(in)) << bits;
150251662Sdim                bits += 8;
151251662Sdim            }
152251662Sdim            this = dcode[hold & dmask];
153251662Sdim          dodist:
154251662Sdim            op = (unsigned)(this.bits);
155193323Sed            hold >>= op;
156251662Sdim            bits -= op;
157251662Sdim            op = (unsigned)(this.op);
158193323Sed            if (op & 16) {                      /* distance base */
159193323Sed                dist = (unsigned)(this.val);
160193323Sed                op &= 15;                       /* number of extra bits */
161193323Sed                if (bits < op) {
162193323Sed                    hold += (unsigned long)(PUP(in)) << bits;
163193323Sed                    bits += 8;
164193323Sed                    if (bits < op) {
165193323Sed                        hold += (unsigned long)(PUP(in)) << bits;
166193323Sed                        bits += 8;
167193323Sed                    }
168193323Sed                }
169193323Sed                dist += (unsigned)hold & ((1U << op) - 1);
170198090Srdivacky                hold >>= op;
171198090Srdivacky                bits -= op;
172193323Sed                Tracevv((stderr, "inflate:         distance %u\n", dist));
173193323Sed                op = (unsigned)(out - beg);     /* max distance in output */
174193323Sed                if (dist > op) {                /* see if copy from window */
175193323Sed                    op = dist - op;             /* distance back in window */
176193323Sed                    if (op > whave) {
177193323Sed                        strm->msg = (char *)"invalid distance too far back";
178193323Sed                        state->mode = BAD;
179193323Sed                        break;
180193323Sed                    }
181234353Sdim                    from = window - OFF;
182234353Sdim                    if (write == 0) {           /* very common case */
183234353Sdim                        from += wsize - op;
184234353Sdim                        if (op < len) {         /* some from window */
185234353Sdim                            len -= op;
186234353Sdim                            do {
187234353Sdim                                PUP(out) = PUP(from);
188234353Sdim                            } while (--op);
189234353Sdim                            from = out - dist;  /* rest from output */
190193323Sed                        }
191193323Sed                    }
192193323Sed                    else if (write < op) {      /* wrap around window */
193251662Sdim                        from += wsize + write - op;
194193323Sed                        op -= write;
195251662Sdim                        if (op < len) {         /* some from end of window */
196251662Sdim                            len -= op;
197251662Sdim                            do {
198251662Sdim                                PUP(out) = PUP(from);
199193323Sed                            } while (--op);
200198090Srdivacky                            from = window - OFF;
201234353Sdim                            if (write < len) {  /* some from start of window */
202193323Sed                                op = write;
203193323Sed                                len -= op;
204234353Sdim                                do {
205193323Sed                                    PUP(out) = PUP(from);
206193323Sed                                } while (--op);
207193323Sed                                from = out - dist;      /* rest from output */
208234353Sdim                            }
209193323Sed                        }
210193323Sed                    }
211234353Sdim                    else {                      /* contiguous in window */
212193323Sed                        from += write - op;
213193323Sed                        if (op < len) {         /* some from window */
214234353Sdim                            len -= op;
215193323Sed                            do {
216193323Sed                                PUP(out) = PUP(from);
217193323Sed                            } while (--op);
218193323Sed                            from = out - dist;  /* rest from output */
219193323Sed                        }
220193323Sed                    }
221193323Sed                    while (len > 2) {
222193323Sed                        PUP(out) = PUP(from);
223193323Sed                        PUP(out) = PUP(from);
224193323Sed                        PUP(out) = PUP(from);
225193323Sed                        len -= 3;
226193323Sed                    }
227193323Sed                    if (len) {
228198090Srdivacky                        PUP(out) = PUP(from);
229234353Sdim                        if (len > 1)
230193323Sed                            PUP(out) = PUP(from);
231234353Sdim                    }
232234353Sdim                }
233234353Sdim                else {
234234353Sdim                    from = out - dist;          /* copy direct from output */
235193323Sed                    do {                        /* minimum length is three */
236251662Sdim                        PUP(out) = PUP(from);
237193323Sed                        PUP(out) = PUP(from);
238239462Sdim                        PUP(out) = PUP(from);
239234353Sdim                        len -= 3;
240239462Sdim                    } while (len > 2);
241239462Sdim                    if (len) {
242239462Sdim                        PUP(out) = PUP(from);
243251662Sdim                        if (len > 1)
244193323Sed                            PUP(out) = PUP(from);
245193323Sed                    }
246198090Srdivacky                }
247193323Sed            }
248193323Sed            else if ((op & 64) == 0) {          /* 2nd level distance code */
249193323Sed                this = dcode[this.val + (hold & ((1U << op) - 1))];
250193323Sed                goto dodist;
251193323Sed            }
252193323Sed            else {
253193323Sed                strm->msg = (char *)"invalid distance code";
254193323Sed                state->mode = BAD;
255193323Sed                break;
256193323Sed            }
257193323Sed        }
258193323Sed        else if ((op & 64) == 0) {              /* 2nd level length code */
259193323Sed            this = lcode[this.val + (hold & ((1U << op) - 1))];
260193323Sed            goto dolen;
261193323Sed        }
262221345Sdim        else if (op & 32) {                     /* end-of-block */
263221345Sdim            Tracevv((stderr, "inflate:         end of block\n"));
264198090Srdivacky            state->mode = TYPE;
265193323Sed            break;
266198090Srdivacky        }
267193323Sed        else {
268198090Srdivacky            strm->msg = (char *)"invalid literal/length code";
269198090Srdivacky            state->mode = BAD;
270193323Sed            break;
271193323Sed        }
272198090Srdivacky    } while (in < last && out < end);
273193323Sed
274193323Sed    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
275193323Sed    len = bits >> 3;
276193323Sed    in -= len;
277193323Sed    bits -= len << 3;
278193323Sed    hold &= (1U << bits) - 1;
279193323Sed
280193323Sed    /* update state and return */
281193323Sed    strm->next_in = in + OFF;
282193323Sed    strm->next_out = out + OFF;
283193323Sed    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
284193323Sed    strm->avail_out = (unsigned)(out < end ?
285204642Srdivacky                                 257 + (end - out) : 257 - (out - end));
286193323Sed    state->hold = hold;
287193323Sed    state->bits = bits;
288193323Sed    return;
289193323Sed}
290193323Sed
291193323Sed/*
292204642Srdivacky   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
293193323Sed   - Using bit fields for code structure
294193323Sed   - Different op definition to avoid & for extra bits (do & for table bits)
295193323Sed   - Three separate decoding do-loops for direct, window, and write == 0
296193323Sed   - Special case for distance > 1 copies to do overlapped load and store copy
297193323Sed   - Explicit branch predictions (based on measured branch probabilities)
298193323Sed   - Deferring match copy and interspersed it with decoding subsequent codes
299193323Sed   - Swapping literal/length else
300193323Sed   - Swapping window/direct else
301193323Sed   - Larger unrolled copy loops (three is about right)
302193323Sed   - Moving len -= 3 statement into middle of loop
303193323Sed */
304193323Sed
305193323Sed#endif /* !ASMINF */
306193323Sed