1131380Stjr/* inffast.c -- fast decoding
2250261Sdelphij * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
3131380Stjr * For conditions of distribution and use, see copyright notice in zlib.h
417651Speter */
517651Speter
617651Speter#include "zutil.h"
717651Speter#include "inftrees.h"
8131380Stjr#include "inflate.h"
917651Speter#include "inffast.h"
1017651Speter
11131380Stjr#ifndef ASMINF
1217651Speter
13131380Stjr/* Allow machine dependent optimization for post-increment or pre-increment.
14131380Stjr   Based on testing to date,
15131380Stjr   Pre-increment preferred for:
16131380Stjr   - PowerPC G3 (Adler)
17131380Stjr   - MIPS R5000 (Randers-Pehrson)
18131380Stjr   Post-increment preferred for:
19131380Stjr   - none
20131380Stjr   No measurable difference:
21131380Stjr   - Pentium III (Anderson)
22146081Skientzle   - M68060 (Nikl)
23131380Stjr */
24131380Stjr#ifdef POSTINC
25131380Stjr#  define OFF 0
26131380Stjr#  define PUP(a) *(a)++
27131380Stjr#else
28131380Stjr#  define OFF 1
29131380Stjr#  define PUP(a) *++(a)
30131380Stjr#endif
3117651Speter
32131380Stjr/*
33131380Stjr   Decode literal, length, and distance codes and write out the resulting
34131380Stjr   literal and match bytes until either not enough input or output is
35131380Stjr   available, an end-of-block is encountered, or a data error is encountered.
36131380Stjr   When large enough input and output buffers are supplied to inflate(), for
37131380Stjr   example, a 16K input buffer and a 64K output buffer, more than 95% of the
38131380Stjr   inflate execution time is spent in this routine.
3917651Speter
40131380Stjr   Entry assumptions:
4117651Speter
42131380Stjr        state->mode == LEN
43131380Stjr        strm->avail_in >= 6
44131380Stjr        strm->avail_out >= 258
45131380Stjr        start >= strm->avail_out
46131380Stjr        state->bits < 8
4717651Speter
48131380Stjr   On return, state->mode is one of:
4917651Speter
50131380Stjr        LEN -- ran out of enough output space or enough available input
51131380Stjr        TYPE -- reached end of block code, inflate() to interpret next block
52131380Stjr        BAD -- error in block data
5317651Speter
54131380Stjr   Notes:
5517651Speter
56131380Stjr    - The maximum input bits used by a length/distance pair is 15 bits for the
57131380Stjr      length code, 5 bits for the length extra, 15 bits for the distance code,
58131380Stjr      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59131380Stjr      Therefore if strm->avail_in >= 6, then there is enough input to avoid
60131380Stjr      checking for available input while decoding.
6117651Speter
62131380Stjr    - The maximum bytes that a single length/distance pair can output is 258
63131380Stjr      bytes, which is the maximum length that can be coded.  inflate_fast()
64131380Stjr      requires strm->avail_out >= 258 for each loop to avoid checking for
65131380Stjr      output space.
66131380Stjr */
67206924Sdelphijvoid ZLIB_INTERNAL inflate_fast(strm, start)
68131380Stjrz_streamp strm;
69131380Stjrunsigned start;         /* inflate()'s starting value for strm->avail_out */
70131380Stjr{
71131380Stjr    struct inflate_state FAR *state;
72250261Sdelphij    z_const unsigned char FAR *in;      /* local strm->next_in */
73250261Sdelphij    z_const unsigned char FAR *last;    /* have enough input while in < last */
74131380Stjr    unsigned char FAR *out;     /* local strm->next_out */
75131380Stjr    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76131380Stjr    unsigned char FAR *end;     /* while out < end, enough space available */
77157046Sdes#ifdef INFLATE_STRICT
78157046Sdes    unsigned dmax;              /* maximum distance from zlib header */
79157046Sdes#endif
80131380Stjr    unsigned wsize;             /* window size or zero if not using window */
81131380Stjr    unsigned whave;             /* valid bytes in the window */
82205471Sdelphij    unsigned wnext;             /* window write index */
83131380Stjr    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
84131380Stjr    unsigned long hold;         /* local strm->hold */
85131380Stjr    unsigned bits;              /* local strm->bits */
86131380Stjr    code const FAR *lcode;      /* local strm->lencode */
87131380Stjr    code const FAR *dcode;      /* local strm->distcode */
88131380Stjr    unsigned lmask;             /* mask for first level of length codes */
89131380Stjr    unsigned dmask;             /* mask for first level of distance codes */
90205471Sdelphij    code here;                  /* retrieved table entry */
91131380Stjr    unsigned op;                /* code bits, operation, extra bits, or */
92131380Stjr                                /*  window position, window bytes to copy */
93131380Stjr    unsigned len;               /* match length, unused bytes */
94131380Stjr    unsigned dist;              /* match distance */
95131380Stjr    unsigned char FAR *from;    /* where to copy match from */
96131380Stjr
97131380Stjr    /* copy state to local variables */
98131380Stjr    state = (struct inflate_state FAR *)strm->state;
99131380Stjr    in = strm->next_in - OFF;
100131380Stjr    last = in + (strm->avail_in - 5);
101131380Stjr    out = strm->next_out - OFF;
102131380Stjr    beg = out - (start - strm->avail_out);
103131380Stjr    end = out + (strm->avail_out - 257);
104157046Sdes#ifdef INFLATE_STRICT
105157046Sdes    dmax = state->dmax;
106157046Sdes#endif
107131380Stjr    wsize = state->wsize;
108131380Stjr    whave = state->whave;
109205471Sdelphij    wnext = state->wnext;
110131380Stjr    window = state->window;
111131380Stjr    hold = state->hold;
112131380Stjr    bits = state->bits;
113131380Stjr    lcode = state->lencode;
114131380Stjr    dcode = state->distcode;
115131380Stjr    lmask = (1U << state->lenbits) - 1;
116131380Stjr    dmask = (1U << state->distbits) - 1;
117131380Stjr
118131380Stjr    /* decode literals and length/distances until end-of-block or not enough
119131380Stjr       input data or output space */
120131380Stjr    do {
121131380Stjr        if (bits < 15) {
122131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
123131380Stjr            bits += 8;
124131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
125131380Stjr            bits += 8;
126131380Stjr        }
127205471Sdelphij        here = lcode[hold & lmask];
128131380Stjr      dolen:
129205471Sdelphij        op = (unsigned)(here.bits);
130131380Stjr        hold >>= op;
131131380Stjr        bits -= op;
132205471Sdelphij        op = (unsigned)(here.op);
133131380Stjr        if (op == 0) {                          /* literal */
134205471Sdelphij            Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
135131380Stjr                    "inflate:         literal '%c'\n" :
136205471Sdelphij                    "inflate:         literal 0x%02x\n", here.val));
137205471Sdelphij            PUP(out) = (unsigned char)(here.val);
138131380Stjr        }
139131380Stjr        else if (op & 16) {                     /* length base */
140205471Sdelphij            len = (unsigned)(here.val);
141131380Stjr            op &= 15;                           /* number of extra bits */
142131380Stjr            if (op) {
143131380Stjr                if (bits < op) {
144131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
145131380Stjr                    bits += 8;
146131380Stjr                }
147131380Stjr                len += (unsigned)hold & ((1U << op) - 1);
148131380Stjr                hold >>= op;
149131380Stjr                bits -= op;
15017651Speter            }
151131380Stjr            Tracevv((stderr, "inflate:         length %u\n", len));
152131380Stjr            if (bits < 15) {
153131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
154131380Stjr                bits += 8;
155131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
156131380Stjr                bits += 8;
15792114Sgreen            }
158205471Sdelphij            here = dcode[hold & dmask];
159131380Stjr          dodist:
160205471Sdelphij            op = (unsigned)(here.bits);
161131380Stjr            hold >>= op;
162131380Stjr            bits -= op;
163205471Sdelphij            op = (unsigned)(here.op);
164131380Stjr            if (op & 16) {                      /* distance base */
165205471Sdelphij                dist = (unsigned)(here.val);
166131380Stjr                op &= 15;                       /* number of extra bits */
167131380Stjr                if (bits < op) {
168131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
169131380Stjr                    bits += 8;
170131380Stjr                    if (bits < op) {
171131380Stjr                        hold += (unsigned long)(PUP(in)) << bits;
172131380Stjr                        bits += 8;
173131380Stjr                    }
174131380Stjr                }
175131380Stjr                dist += (unsigned)hold & ((1U << op) - 1);
176157046Sdes#ifdef INFLATE_STRICT
177157046Sdes                if (dist > dmax) {
178157046Sdes                    strm->msg = (char *)"invalid distance too far back";
179157046Sdes                    state->mode = BAD;
180157046Sdes                    break;
181157046Sdes                }
182157046Sdes#endif
183131380Stjr                hold >>= op;
184131380Stjr                bits -= op;
185131380Stjr                Tracevv((stderr, "inflate:         distance %u\n", dist));
186131380Stjr                op = (unsigned)(out - beg);     /* max distance in output */
187131380Stjr                if (dist > op) {                /* see if copy from window */
188131380Stjr                    op = dist - op;             /* distance back in window */
189131380Stjr                    if (op > whave) {
190205471Sdelphij                        if (state->sane) {
191205471Sdelphij                            strm->msg =
192205471Sdelphij                                (char *)"invalid distance too far back";
193205471Sdelphij                            state->mode = BAD;
194205471Sdelphij                            break;
195205471Sdelphij                        }
196205471Sdelphij#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
197205471Sdelphij                        if (len <= op - whave) {
198205471Sdelphij                            do {
199205471Sdelphij                                PUP(out) = 0;
200205471Sdelphij                            } while (--len);
201205471Sdelphij                            continue;
202205471Sdelphij                        }
203205471Sdelphij                        len -= op - whave;
204205471Sdelphij                        do {
205205471Sdelphij                            PUP(out) = 0;
206205471Sdelphij                        } while (--op > whave);
207205471Sdelphij                        if (op == 0) {
208205471Sdelphij                            from = out - dist;
209205471Sdelphij                            do {
210205471Sdelphij                                PUP(out) = PUP(from);
211205471Sdelphij                            } while (--len);
212205471Sdelphij                            continue;
213205471Sdelphij                        }
214205471Sdelphij#endif
215131380Stjr                    }
216131380Stjr                    from = window - OFF;
217205471Sdelphij                    if (wnext == 0) {           /* very common case */
218131380Stjr                        from += wsize - op;
219131380Stjr                        if (op < len) {         /* some from window */
220131380Stjr                            len -= op;
221131380Stjr                            do {
222131380Stjr                                PUP(out) = PUP(from);
223131380Stjr                            } while (--op);
224131380Stjr                            from = out - dist;  /* rest from output */
225131380Stjr                        }
226131380Stjr                    }
227205471Sdelphij                    else if (wnext < op) {      /* wrap around window */
228205471Sdelphij                        from += wsize + wnext - op;
229205471Sdelphij                        op -= wnext;
230131380Stjr                        if (op < len) {         /* some from end of window */
231131380Stjr                            len -= op;
232131380Stjr                            do {
233131380Stjr                                PUP(out) = PUP(from);
234131380Stjr                            } while (--op);
235131380Stjr                            from = window - OFF;
236205471Sdelphij                            if (wnext < len) {  /* some from start of window */
237205471Sdelphij                                op = wnext;
238131380Stjr                                len -= op;
239131380Stjr                                do {
240131380Stjr                                    PUP(out) = PUP(from);
241131380Stjr                                } while (--op);
242131380Stjr                                from = out - dist;      /* rest from output */
243131380Stjr                            }
244131380Stjr                        }
245131380Stjr                    }
246131380Stjr                    else {                      /* contiguous in window */
247205471Sdelphij                        from += wnext - op;
248131380Stjr                        if (op < len) {         /* some from window */
249131380Stjr                            len -= op;
250131380Stjr                            do {
251131380Stjr                                PUP(out) = PUP(from);
252131380Stjr                            } while (--op);
253131380Stjr                            from = out - dist;  /* rest from output */
254131380Stjr                        }
255131380Stjr                    }
256131380Stjr                    while (len > 2) {
257131380Stjr                        PUP(out) = PUP(from);
258131380Stjr                        PUP(out) = PUP(from);
259131380Stjr                        PUP(out) = PUP(from);
260131380Stjr                        len -= 3;
261131380Stjr                    }
262131380Stjr                    if (len) {
263131380Stjr                        PUP(out) = PUP(from);
264131380Stjr                        if (len > 1)
265131380Stjr                            PUP(out) = PUP(from);
266131380Stjr                    }
267131380Stjr                }
268131380Stjr                else {
269131380Stjr                    from = out - dist;          /* copy direct from output */
270131380Stjr                    do {                        /* minimum length is three */
271131380Stjr                        PUP(out) = PUP(from);
272131380Stjr                        PUP(out) = PUP(from);
273131380Stjr                        PUP(out) = PUP(from);
274131380Stjr                        len -= 3;
275131380Stjr                    } while (len > 2);
276131380Stjr                    if (len) {
277131380Stjr                        PUP(out) = PUP(from);
278131380Stjr                        if (len > 1)
279131380Stjr                            PUP(out) = PUP(from);
280131380Stjr                    }
281131380Stjr                }
282131380Stjr            }
283131380Stjr            else if ((op & 64) == 0) {          /* 2nd level distance code */
284205471Sdelphij                here = dcode[here.val + (hold & ((1U << op) - 1))];
285131380Stjr                goto dodist;
286131380Stjr            }
287131380Stjr            else {
288131380Stjr                strm->msg = (char *)"invalid distance code";
289131380Stjr                state->mode = BAD;
290131380Stjr                break;
291131380Stjr            }
292131380Stjr        }
293131380Stjr        else if ((op & 64) == 0) {              /* 2nd level length code */
294205471Sdelphij            here = lcode[here.val + (hold & ((1U << op) - 1))];
295131380Stjr            goto dolen;
296131380Stjr        }
297131380Stjr        else if (op & 32) {                     /* end-of-block */
298131380Stjr            Tracevv((stderr, "inflate:         end of block\n"));
299131380Stjr            state->mode = TYPE;
30017651Speter            break;
30117651Speter        }
302131380Stjr        else {
303131380Stjr            strm->msg = (char *)"invalid literal/length code";
304131380Stjr            state->mode = BAD;
305131380Stjr            break;
306131380Stjr        }
307131380Stjr    } while (in < last && out < end);
30817651Speter
309131380Stjr    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
310131380Stjr    len = bits >> 3;
311131380Stjr    in -= len;
312131380Stjr    bits -= len << 3;
313131380Stjr    hold &= (1U << bits) - 1;
314131380Stjr
315131380Stjr    /* update state and return */
316131380Stjr    strm->next_in = in + OFF;
317131380Stjr    strm->next_out = out + OFF;
318131380Stjr    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
319131380Stjr    strm->avail_out = (unsigned)(out < end ?
320131380Stjr                                 257 + (end - out) : 257 - (out - end));
321131380Stjr    state->hold = hold;
322131380Stjr    state->bits = bits;
323131380Stjr    return;
32417651Speter}
325131380Stjr
326131380Stjr/*
327131380Stjr   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
328131380Stjr   - Using bit fields for code structure
329131380Stjr   - Different op definition to avoid & for extra bits (do & for table bits)
330205471Sdelphij   - Three separate decoding do-loops for direct, window, and wnext == 0
331131380Stjr   - Special case for distance > 1 copies to do overlapped load and store copy
332131380Stjr   - Explicit branch predictions (based on measured branch probabilities)
333131380Stjr   - Deferring match copy and interspersed it with decoding subsequent codes
334131380Stjr   - Swapping literal/length else
335131380Stjr   - Swapping window/direct else
336131380Stjr   - Larger unrolled copy loops (three is about right)
337131380Stjr   - Moving len -= 3 statement into middle of loop
338131380Stjr */
339131380Stjr
340131380Stjr#endif /* !ASMINF */
341