1168404Spjd/* inffast.c -- fast decoding
2168404Spjd * Copyright (C) 1995-2004 Mark Adler
3168404Spjd * For conditions of distribution and use, see copyright notice in zlib.h
4168404Spjd */
5168404Spjd
6168404Spjd#pragma ident	"%Z%%M%	%I%	%E% SMI"
7168404Spjd
8168404Spjd#include "zutil.h"
9168404Spjd#include "inftrees.h"
10168404Spjd#include "inflate.h"
11168404Spjd#include "inffast.h"
12168404Spjd
13168404Spjd#ifndef ASMINF
14168404Spjd
15168404Spjd/* Allow machine dependent optimization for post-increment or pre-increment.
16168404Spjd   Based on testing to date,
17168404Spjd   Pre-increment preferred for:
18168404Spjd   - PowerPC G3 (Adler)
19168404Spjd   - MIPS R5000 (Randers-Pehrson)
20168404Spjd   Post-increment preferred for:
21168404Spjd   - none
22168404Spjd   No measurable difference:
23168404Spjd   - Pentium III (Anderson)
24168404Spjd   - M68060 (Nikl)
25168404Spjd */
26168404Spjd#ifdef POSTINC
27168404Spjd#  define OFF 0
28168404Spjd#  define PUP(a) *(a)++
29168404Spjd#else
30168404Spjd#  define OFF 1
31168404Spjd#  define PUP(a) *++(a)
32168404Spjd#endif
33168404Spjd
34168404Spjd/*
35168404Spjd   Decode literal, length, and distance codes and write out the resulting
36168404Spjd   literal and match bytes until either not enough input or output is
37168404Spjd   available, an end-of-block is encountered, or a data error is encountered.
38168404Spjd   When large enough input and output buffers are supplied to inflate(), for
39168404Spjd   example, a 16K input buffer and a 64K output buffer, more than 95% of the
40168404Spjd   inflate execution time is spent in this routine.
41168404Spjd
42168404Spjd   Entry assumptions:
43168404Spjd
44168404Spjd        state->mode == LEN
45168404Spjd        strm->avail_in >= 6
46168404Spjd        strm->avail_out >= 258
47168404Spjd        start >= strm->avail_out
48168404Spjd        state->bits < 8
49168404Spjd
50168404Spjd   On return, state->mode is one of:
51168404Spjd
52168404Spjd        LEN -- ran out of enough output space or enough available input
53168404Spjd        TYPE -- reached end of block code, inflate() to interpret next block
54168404Spjd        BAD -- error in block data
55168404Spjd
56168404Spjd   Notes:
57168404Spjd
58168404Spjd    - The maximum input bits used by a length/distance pair is 15 bits for the
59168404Spjd      length code, 5 bits for the length extra, 15 bits for the distance code,
60168404Spjd      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
61168404Spjd      Therefore if strm->avail_in >= 6, then there is enough input to avoid
62168404Spjd      checking for available input while decoding.
63168404Spjd
64168404Spjd    - The maximum bytes that a single length/distance pair can output is 258
65168404Spjd      bytes, which is the maximum length that can be coded.  inflate_fast()
66168404Spjd      requires strm->avail_out >= 258 for each loop to avoid checking for
67168404Spjd      output space.
68168404Spjd */
69168404Spjdvoid inflate_fast(strm, start)
70168404Spjdz_streamp strm;
71168404Spjdunsigned start;         /* inflate()'s starting value for strm->avail_out */
72168404Spjd{
73168404Spjd    struct inflate_state FAR *state;
74168404Spjd    unsigned char FAR *in;      /* local strm->next_in */
75168404Spjd    unsigned char FAR *last;    /* while in < last, enough input available */
76168404Spjd    unsigned char FAR *out;     /* local strm->next_out */
77168404Spjd    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
78168404Spjd    unsigned char FAR *end;     /* while out < end, enough space available */
79168404Spjd#ifdef INFLATE_STRICT
80168404Spjd    unsigned dmax;              /* maximum distance from zlib header */
81168404Spjd#endif
82168404Spjd    unsigned wsize;             /* window size or zero if not using window */
83168404Spjd    unsigned whave;             /* valid bytes in the window */
84168404Spjd    unsigned write;             /* window write index */
85168404Spjd    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
86168404Spjd    unsigned long hold;         /* local strm->hold */
87168404Spjd    unsigned bits;              /* local strm->bits */
88168404Spjd    code const FAR *lcode;      /* local strm->lencode */
89168404Spjd    code const FAR *dcode;      /* local strm->distcode */
90168404Spjd    unsigned lmask;             /* mask for first level of length codes */
91168404Spjd    unsigned dmask;             /* mask for first level of distance codes */
92168404Spjd    code this;                  /* retrieved table entry */
93168404Spjd    unsigned op;                /* code bits, operation, extra bits, or */
94168404Spjd                                /*  window position, window bytes to copy */
95168404Spjd    unsigned len;               /* match length, unused bytes */
96168404Spjd    unsigned dist;              /* match distance */
97168404Spjd    unsigned char FAR *from;    /* where to copy match from */
98168404Spjd
99168404Spjd    /* copy state to local variables */
100168404Spjd    state = (struct inflate_state FAR *)strm->state;
101168404Spjd    in = strm->next_in - OFF;
102168404Spjd    last = in + (strm->avail_in - 5);
103168404Spjd    out = strm->next_out - OFF;
104168404Spjd    beg = out - (start - strm->avail_out);
105168404Spjd    end = out + (strm->avail_out - 257);
106168404Spjd#ifdef INFLATE_STRICT
107168404Spjd    dmax = state->dmax;
108168404Spjd#endif
109168404Spjd    wsize = state->wsize;
110168404Spjd    whave = state->whave;
111168404Spjd    write = state->write;
112168404Spjd    window = state->window;
113168404Spjd    hold = state->hold;
114168404Spjd    bits = state->bits;
115168404Spjd    lcode = state->lencode;
116168404Spjd    dcode = state->distcode;
117168404Spjd    lmask = (1U << state->lenbits) - 1;
118168404Spjd    dmask = (1U << state->distbits) - 1;
119168404Spjd
120168404Spjd    /* decode literals and length/distances until end-of-block or not enough
121168404Spjd       input data or output space */
122168404Spjd    do {
123168404Spjd        if (bits < 15) {
124168404Spjd            hold += (unsigned long)(PUP(in)) << bits;
125168404Spjd            bits += 8;
126168404Spjd            hold += (unsigned long)(PUP(in)) << bits;
127168404Spjd            bits += 8;
128168404Spjd        }
129168404Spjd        this = lcode[hold & lmask];
130168404Spjd      dolen:
131168404Spjd        op = (unsigned)(this.bits);
132168404Spjd        hold >>= op;
133168404Spjd        bits -= op;
134168404Spjd        op = (unsigned)(this.op);
135168404Spjd        if (op == 0) {                          /* literal */
136168404Spjd            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
137168404Spjd                    "inflate:         literal '%c'\n" :
138168404Spjd                    "inflate:         literal 0x%02x\n", this.val));
139168404Spjd            PUP(out) = (unsigned char)(this.val);
140168404Spjd        }
141168404Spjd        else if (op & 16) {                     /* length base */
142168404Spjd            len = (unsigned)(this.val);
143168404Spjd            op &= 15;                           /* number of extra bits */
144168404Spjd            if (op) {
145168404Spjd                if (bits < op) {
146168404Spjd                    hold += (unsigned long)(PUP(in)) << bits;
147168404Spjd                    bits += 8;
148168404Spjd                }
149168404Spjd                len += (unsigned)hold & ((1U << op) - 1);
150168404Spjd                hold >>= op;
151168404Spjd                bits -= op;
152168404Spjd            }
153168404Spjd            Tracevv((stderr, "inflate:         length %u\n", len));
154168404Spjd            if (bits < 15) {
155168404Spjd                hold += (unsigned long)(PUP(in)) << bits;
156168404Spjd                bits += 8;
157168404Spjd                hold += (unsigned long)(PUP(in)) << bits;
158168404Spjd                bits += 8;
159168404Spjd            }
160168404Spjd            this = dcode[hold & dmask];
161168404Spjd          dodist:
162168404Spjd            op = (unsigned)(this.bits);
163168404Spjd            hold >>= op;
164168404Spjd            bits -= op;
165168404Spjd            op = (unsigned)(this.op);
166168404Spjd            if (op & 16) {                      /* distance base */
167168404Spjd                dist = (unsigned)(this.val);
168168404Spjd                op &= 15;                       /* number of extra bits */
169168404Spjd                if (bits < op) {
170168404Spjd                    hold += (unsigned long)(PUP(in)) << bits;
171168404Spjd                    bits += 8;
172168404Spjd                    if (bits < op) {
173168404Spjd                        hold += (unsigned long)(PUP(in)) << bits;
174168404Spjd                        bits += 8;
175168404Spjd                    }
176168404Spjd                }
177168404Spjd                dist += (unsigned)hold & ((1U << op) - 1);
178168404Spjd#ifdef INFLATE_STRICT
179168404Spjd                if (dist > dmax) {
180168404Spjd                    strm->msg = (char *)"invalid distance too far back";
181168404Spjd                    state->mode = BAD;
182168404Spjd                    break;
183168404Spjd                }
184168404Spjd#endif
185168404Spjd                hold >>= op;
186168404Spjd                bits -= op;
187168404Spjd                Tracevv((stderr, "inflate:         distance %u\n", dist));
188168404Spjd                op = (unsigned)(out - beg);     /* max distance in output */
189168404Spjd                if (dist > op) {                /* see if copy from window */
190168404Spjd                    op = dist - op;             /* distance back in window */
191168404Spjd                    if (op > whave) {
192168404Spjd                        strm->msg = (char *)"invalid distance too far back";
193168404Spjd                        state->mode = BAD;
194168404Spjd                        break;
195168404Spjd                    }
196168404Spjd                    from = window - OFF;
197168404Spjd                    if (write == 0) {           /* very common case */
198168404Spjd                        from += wsize - op;
199168404Spjd                        if (op < len) {         /* some from window */
200168404Spjd                            len -= op;
201168404Spjd                            do {
202168404Spjd                                PUP(out) = PUP(from);
203168404Spjd                            } while (--op);
204168404Spjd                            from = out - dist;  /* rest from output */
205168404Spjd                        }
206168404Spjd                    }
207168404Spjd                    else if (write < op) {      /* wrap around window */
208168404Spjd                        from += wsize + write - op;
209168404Spjd                        op -= write;
210168404Spjd                        if (op < len) {         /* some from end of window */
211168404Spjd                            len -= op;
212168404Spjd                            do {
213168404Spjd                                PUP(out) = PUP(from);
214168404Spjd                            } while (--op);
215168404Spjd                            from = window - OFF;
216168404Spjd                            if (write < len) {  /* some from start of window */
217168404Spjd                                op = write;
218168404Spjd                                len -= op;
219168404Spjd                                do {
220168404Spjd                                    PUP(out) = PUP(from);
221168404Spjd                                } while (--op);
222168404Spjd                                from = out - dist;      /* rest from output */
223168404Spjd                            }
224168404Spjd                        }
225168404Spjd                    }
226168404Spjd                    else {                      /* contiguous in window */
227168404Spjd                        from += write - op;
228168404Spjd                        if (op < len) {         /* some from window */
229168404Spjd                            len -= op;
230168404Spjd                            do {
231168404Spjd                                PUP(out) = PUP(from);
232168404Spjd                            } while (--op);
233168404Spjd                            from = out - dist;  /* rest from output */
234168404Spjd                        }
235168404Spjd                    }
236168404Spjd                    while (len > 2) {
237168404Spjd                        PUP(out) = PUP(from);
238168404Spjd                        PUP(out) = PUP(from);
239168404Spjd                        PUP(out) = PUP(from);
240168404Spjd                        len -= 3;
241168404Spjd                    }
242168404Spjd                    if (len) {
243168404Spjd                        PUP(out) = PUP(from);
244168404Spjd                        if (len > 1)
245168404Spjd                            PUP(out) = PUP(from);
246168404Spjd                    }
247168404Spjd                }
248168404Spjd                else {
249168404Spjd                    from = out - dist;          /* copy direct from output */
250168404Spjd                    do {                        /* minimum length is three */
251168404Spjd                        PUP(out) = PUP(from);
252168404Spjd                        PUP(out) = PUP(from);
253168404Spjd                        PUP(out) = PUP(from);
254168404Spjd                        len -= 3;
255168404Spjd                    } while (len > 2);
256168404Spjd                    if (len) {
257168404Spjd                        PUP(out) = PUP(from);
258168404Spjd                        if (len > 1)
259168404Spjd                            PUP(out) = PUP(from);
260168404Spjd                    }
261168404Spjd                }
262168404Spjd            }
263168404Spjd            else if ((op & 64) == 0) {          /* 2nd level distance code */
264168404Spjd                this = dcode[this.val + (hold & ((1U << op) - 1))];
265168404Spjd                goto dodist;
266168404Spjd            }
267168404Spjd            else {
268168404Spjd                strm->msg = (char *)"invalid distance code";
269168404Spjd                state->mode = BAD;
270168404Spjd                break;
271168404Spjd            }
272168404Spjd        }
273168404Spjd        else if ((op & 64) == 0) {              /* 2nd level length code */
274168404Spjd            this = lcode[this.val + (hold & ((1U << op) - 1))];
275168404Spjd            goto dolen;
276168404Spjd        }
277168404Spjd        else if (op & 32) {                     /* end-of-block */
278168404Spjd            Tracevv((stderr, "inflate:         end of block\n"));
279168404Spjd            state->mode = TYPE;
280168404Spjd            break;
281168404Spjd        }
282168404Spjd        else {
283168404Spjd            strm->msg = (char *)"invalid literal/length code";
284168404Spjd            state->mode = BAD;
285168404Spjd            break;
286168404Spjd        }
287168404Spjd    } while (in < last && out < end);
288168404Spjd
289168404Spjd    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
290168404Spjd    len = bits >> 3;
291168404Spjd    in -= len;
292168404Spjd    bits -= len << 3;
293168404Spjd    hold &= (1U << bits) - 1;
294168404Spjd
295168404Spjd    /* update state and return */
296168404Spjd    strm->next_in = in + OFF;
297168404Spjd    strm->next_out = out + OFF;
298168404Spjd    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
299168404Spjd    strm->avail_out = (unsigned)(out < end ?
300168404Spjd                                 257 + (end - out) : 257 - (out - end));
301168404Spjd    state->hold = hold;
302168404Spjd    state->bits = bits;
303168404Spjd    return;
304168404Spjd}
305168404Spjd
306168404Spjd/*
307168404Spjd   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
308168404Spjd   - Using bit fields for code structure
309168404Spjd   - Different op definition to avoid & for extra bits (do & for table bits)
310168404Spjd   - Three separate decoding do-loops for direct, window, and write == 0
311168404Spjd   - Special case for distance > 1 copies to do overlapped load and store copy
312168404Spjd   - Explicit branch predictions (based on measured branch probabilities)
313168404Spjd   - Deferring match copy and interspersed it with decoding subsequent codes
314168404Spjd   - Swapping literal/length else
315168404Spjd   - Swapping window/direct else
316168404Spjd   - Larger unrolled copy loops (three is about right)
317168404Spjd   - Moving len -= 3 statement into middle of loop
318168404Spjd */
319168404Spjd
320168404Spjd#endif /* !ASMINF */
321