inffast.c revision 131380
1131380Stjr/* inffast.c -- fast decoding
2131380Stjr * Copyright (C) 1995-2003 Mark Adler
3131380Stjr * For conditions of distribution and use, see copyright notice in zlib.h
417651Speter */
517651Speter
684228Sdillon#include <sys/cdefs.h>
784228Sdillon__FBSDID("$FreeBSD: head/lib/libz/inffast.c 131380 2004-06-30 23:54:46Z tjr $");
884228Sdillon
917651Speter#include "zutil.h"
1017651Speter#include "inftrees.h"
11131380Stjr#include "inflate.h"
1217651Speter#include "inffast.h"
1317651Speter
14131380Stjr#ifndef ASMINF
1517651Speter
16131380Stjr/* Allow machine dependent optimization for post-increment or pre-increment.
17131380Stjr   Based on testing to date,
18131380Stjr   Pre-increment preferred for:
19131380Stjr   - PowerPC G3 (Adler)
20131380Stjr   - MIPS R5000 (Randers-Pehrson)
21131380Stjr   Post-increment preferred for:
22131380Stjr   - none
23131380Stjr   No measurable difference:
24131380Stjr   - Pentium III (Anderson)
25131380Stjr   - 68060 (Nikl)
26131380Stjr */
27131380Stjr#ifdef POSTINC
28131380Stjr#  define OFF 0
29131380Stjr#  define PUP(a) *(a)++
30131380Stjr#else
31131380Stjr#  define OFF 1
32131380Stjr#  define PUP(a) *++(a)
33131380Stjr#endif
3417651Speter
35131380Stjr/*
36131380Stjr   Decode literal, length, and distance codes and write out the resulting
37131380Stjr   literal and match bytes until either not enough input or output is
38131380Stjr   available, an end-of-block is encountered, or a data error is encountered.
39131380Stjr   When large enough input and output buffers are supplied to inflate(), for
40131380Stjr   example, a 16K input buffer and a 64K output buffer, more than 95% of the
41131380Stjr   inflate execution time is spent in this routine.
4217651Speter
43131380Stjr   Entry assumptions:
4417651Speter
45131380Stjr        state->mode == LEN
46131380Stjr        strm->avail_in >= 6
47131380Stjr        strm->avail_out >= 258
48131380Stjr        start >= strm->avail_out
49131380Stjr        state->bits < 8
5017651Speter
51131380Stjr   On return, state->mode is one of:
5217651Speter
53131380Stjr        LEN -- ran out of enough output space or enough available input
54131380Stjr        TYPE -- reached end of block code, inflate() to interpret next block
55131380Stjr        BAD -- error in block data
5617651Speter
57131380Stjr   Notes:
5817651Speter
59131380Stjr    - The maximum input bits used by a length/distance pair is 15 bits for the
60131380Stjr      length code, 5 bits for the length extra, 15 bits for the distance code,
61131380Stjr      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
62131380Stjr      Therefore if strm->avail_in >= 6, then there is enough input to avoid
63131380Stjr      checking for available input while decoding.
6417651Speter
65131380Stjr    - The maximum bytes that a single length/distance pair can output is 258
66131380Stjr      bytes, which is the maximum length that can be coded.  inflate_fast()
67131380Stjr      requires strm->avail_out >= 258 for each loop to avoid checking for
68131380Stjr      output space.
69131380Stjr */
70131380Stjrvoid inflate_fast(strm, start)
71131380Stjrz_streamp strm;
72131380Stjrunsigned start;         /* inflate()'s starting value for strm->avail_out */
73131380Stjr{
74131380Stjr    struct inflate_state FAR *state;
75131380Stjr    unsigned char FAR *in;      /* local strm->next_in */
76131380Stjr    unsigned char FAR *last;    /* while in < last, enough input available */
77131380Stjr    unsigned char FAR *out;     /* local strm->next_out */
78131380Stjr    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
79131380Stjr    unsigned char FAR *end;     /* while out < end, enough space available */
80131380Stjr    unsigned wsize;             /* window size or zero if not using window */
81131380Stjr    unsigned whave;             /* valid bytes in the window */
82131380Stjr    unsigned write;             /* window write index */
83131380Stjr    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
84131380Stjr    unsigned long hold;         /* local strm->hold */
85131380Stjr    unsigned bits;              /* local strm->bits */
86131380Stjr    code const FAR *lcode;      /* local strm->lencode */
87131380Stjr    code const FAR *dcode;      /* local strm->distcode */
88131380Stjr    unsigned lmask;             /* mask for first level of length codes */
89131380Stjr    unsigned dmask;             /* mask for first level of distance codes */
90131380Stjr    code this;                  /* retrieved table entry */
91131380Stjr    unsigned op;                /* code bits, operation, extra bits, or */
92131380Stjr                                /*  window position, window bytes to copy */
93131380Stjr    unsigned len;               /* match length, unused bytes */
94131380Stjr    unsigned dist;              /* match distance */
95131380Stjr    unsigned char FAR *from;    /* where to copy match from */
96131380Stjr
97131380Stjr    /* copy state to local variables */
98131380Stjr    state = (struct inflate_state FAR *)strm->state;
99131380Stjr    in = strm->next_in - OFF;
100131380Stjr    last = in + (strm->avail_in - 5);
101131380Stjr    out = strm->next_out - OFF;
102131380Stjr    beg = out - (start - strm->avail_out);
103131380Stjr    end = out + (strm->avail_out - 257);
104131380Stjr    wsize = state->wsize;
105131380Stjr    whave = state->whave;
106131380Stjr    write = state->write;
107131380Stjr    window = state->window;
108131380Stjr    hold = state->hold;
109131380Stjr    bits = state->bits;
110131380Stjr    lcode = state->lencode;
111131380Stjr    dcode = state->distcode;
112131380Stjr    lmask = (1U << state->lenbits) - 1;
113131380Stjr    dmask = (1U << state->distbits) - 1;
114131380Stjr
115131380Stjr    /* decode literals and length/distances until end-of-block or not enough
116131380Stjr       input data or output space */
117131380Stjr    do {
118131380Stjr        if (bits < 15) {
119131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
120131380Stjr            bits += 8;
121131380Stjr            hold += (unsigned long)(PUP(in)) << bits;
122131380Stjr            bits += 8;
123131380Stjr        }
124131380Stjr        this = lcode[hold & lmask];
125131380Stjr      dolen:
126131380Stjr        op = (unsigned)(this.bits);
127131380Stjr        hold >>= op;
128131380Stjr        bits -= op;
129131380Stjr        op = (unsigned)(this.op);
130131380Stjr        if (op == 0) {                          /* literal */
131131380Stjr            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
132131380Stjr                    "inflate:         literal '%c'\n" :
133131380Stjr                    "inflate:         literal 0x%02x\n", this.val));
134131380Stjr            PUP(out) = (unsigned char)(this.val);
135131380Stjr        }
136131380Stjr        else if (op & 16) {                     /* length base */
137131380Stjr            len = (unsigned)(this.val);
138131380Stjr            op &= 15;                           /* number of extra bits */
139131380Stjr            if (op) {
140131380Stjr                if (bits < op) {
141131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
142131380Stjr                    bits += 8;
143131380Stjr                }
144131380Stjr                len += (unsigned)hold & ((1U << op) - 1);
145131380Stjr                hold >>= op;
146131380Stjr                bits -= op;
14717651Speter            }
148131380Stjr            Tracevv((stderr, "inflate:         length %u\n", len));
149131380Stjr            if (bits < 15) {
150131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
151131380Stjr                bits += 8;
152131380Stjr                hold += (unsigned long)(PUP(in)) << bits;
153131380Stjr                bits += 8;
15492114Sgreen            }
155131380Stjr            this = dcode[hold & dmask];
156131380Stjr          dodist:
157131380Stjr            op = (unsigned)(this.bits);
158131380Stjr            hold >>= op;
159131380Stjr            bits -= op;
160131380Stjr            op = (unsigned)(this.op);
161131380Stjr            if (op & 16) {                      /* distance base */
162131380Stjr                dist = (unsigned)(this.val);
163131380Stjr                op &= 15;                       /* number of extra bits */
164131380Stjr                if (bits < op) {
165131380Stjr                    hold += (unsigned long)(PUP(in)) << bits;
166131380Stjr                    bits += 8;
167131380Stjr                    if (bits < op) {
168131380Stjr                        hold += (unsigned long)(PUP(in)) << bits;
169131380Stjr                        bits += 8;
170131380Stjr                    }
171131380Stjr                }
172131380Stjr                dist += (unsigned)hold & ((1U << op) - 1);
173131380Stjr                hold >>= op;
174131380Stjr                bits -= op;
175131380Stjr                Tracevv((stderr, "inflate:         distance %u\n", dist));
176131380Stjr                op = (unsigned)(out - beg);     /* max distance in output */
177131380Stjr                if (dist > op) {                /* see if copy from window */
178131380Stjr                    op = dist - op;             /* distance back in window */
179131380Stjr                    if (op > whave) {
180131380Stjr                        strm->msg = (char *)"invalid distance too far back";
181131380Stjr                        state->mode = BAD;
182131380Stjr                        break;
183131380Stjr                    }
184131380Stjr                    from = window - OFF;
185131380Stjr                    if (write == 0) {           /* very common case */
186131380Stjr                        from += wsize - op;
187131380Stjr                        if (op < len) {         /* some from window */
188131380Stjr                            len -= op;
189131380Stjr                            do {
190131380Stjr                                PUP(out) = PUP(from);
191131380Stjr                            } while (--op);
192131380Stjr                            from = out - dist;  /* rest from output */
193131380Stjr                        }
194131380Stjr                    }
195131380Stjr                    else if (write < op) {      /* wrap around window */
196131380Stjr                        from += wsize + write - op;
197131380Stjr                        op -= write;
198131380Stjr                        if (op < len) {         /* some from end of window */
199131380Stjr                            len -= op;
200131380Stjr                            do {
201131380Stjr                                PUP(out) = PUP(from);
202131380Stjr                            } while (--op);
203131380Stjr                            from = window - OFF;
204131380Stjr                            if (write < len) {  /* some from start of window */
205131380Stjr                                op = write;
206131380Stjr                                len -= op;
207131380Stjr                                do {
208131380Stjr                                    PUP(out) = PUP(from);
209131380Stjr                                } while (--op);
210131380Stjr                                from = out - dist;      /* rest from output */
211131380Stjr                            }
212131380Stjr                        }
213131380Stjr                    }
214131380Stjr                    else {                      /* contiguous in window */
215131380Stjr                        from += write - op;
216131380Stjr                        if (op < len) {         /* some from window */
217131380Stjr                            len -= op;
218131380Stjr                            do {
219131380Stjr                                PUP(out) = PUP(from);
220131380Stjr                            } while (--op);
221131380Stjr                            from = out - dist;  /* rest from output */
222131380Stjr                        }
223131380Stjr                    }
224131380Stjr                    while (len > 2) {
225131380Stjr                        PUP(out) = PUP(from);
226131380Stjr                        PUP(out) = PUP(from);
227131380Stjr                        PUP(out) = PUP(from);
228131380Stjr                        len -= 3;
229131380Stjr                    }
230131380Stjr                    if (len) {
231131380Stjr                        PUP(out) = PUP(from);
232131380Stjr                        if (len > 1)
233131380Stjr                            PUP(out) = PUP(from);
234131380Stjr                    }
235131380Stjr                }
236131380Stjr                else {
237131380Stjr                    from = out - dist;          /* copy direct from output */
238131380Stjr                    do {                        /* minimum length is three */
239131380Stjr                        PUP(out) = PUP(from);
240131380Stjr                        PUP(out) = PUP(from);
241131380Stjr                        PUP(out) = PUP(from);
242131380Stjr                        len -= 3;
243131380Stjr                    } while (len > 2);
244131380Stjr                    if (len) {
245131380Stjr                        PUP(out) = PUP(from);
246131380Stjr                        if (len > 1)
247131380Stjr                            PUP(out) = PUP(from);
248131380Stjr                    }
249131380Stjr                }
250131380Stjr            }
251131380Stjr            else if ((op & 64) == 0) {          /* 2nd level distance code */
252131380Stjr                this = dcode[this.val + (hold & ((1U << op) - 1))];
253131380Stjr                goto dodist;
254131380Stjr            }
255131380Stjr            else {
256131380Stjr                strm->msg = (char *)"invalid distance code";
257131380Stjr                state->mode = BAD;
258131380Stjr                break;
259131380Stjr            }
260131380Stjr        }
261131380Stjr        else if ((op & 64) == 0) {              /* 2nd level length code */
262131380Stjr            this = lcode[this.val + (hold & ((1U << op) - 1))];
263131380Stjr            goto dolen;
264131380Stjr        }
265131380Stjr        else if (op & 32) {                     /* end-of-block */
266131380Stjr            Tracevv((stderr, "inflate:         end of block\n"));
267131380Stjr            state->mode = TYPE;
26817651Speter            break;
26917651Speter        }
270131380Stjr        else {
271131380Stjr            strm->msg = (char *)"invalid literal/length code";
272131380Stjr            state->mode = BAD;
273131380Stjr            break;
274131380Stjr        }
275131380Stjr    } while (in < last && out < end);
27617651Speter
277131380Stjr    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
278131380Stjr    len = bits >> 3;
279131380Stjr    in -= len;
280131380Stjr    bits -= len << 3;
281131380Stjr    hold &= (1U << bits) - 1;
282131380Stjr
283131380Stjr    /* update state and return */
284131380Stjr    strm->next_in = in + OFF;
285131380Stjr    strm->next_out = out + OFF;
286131380Stjr    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
287131380Stjr    strm->avail_out = (unsigned)(out < end ?
288131380Stjr                                 257 + (end - out) : 257 - (out - end));
289131380Stjr    state->hold = hold;
290131380Stjr    state->bits = bits;
291131380Stjr    return;
29217651Speter}
293131380Stjr
294131380Stjr/*
295131380Stjr   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
296131380Stjr   - Using bit fields for code structure
297131380Stjr   - Different op definition to avoid & for extra bits (do & for table bits)
298131380Stjr   - Three separate decoding do-loops for direct, window, and write == 0
299131380Stjr   - Special case for distance > 1 copies to do overlapped load and store copy
300131380Stjr   - Explicit branch predictions (based on measured branch probabilities)
301131380Stjr   - Deferring match copy and interspersed it with decoding subsequent codes
302131380Stjr   - Swapping literal/length else
303131380Stjr   - Swapping window/direct else
304131380Stjr   - Larger unrolled copy loops (three is about right)
305131380Stjr   - Moving len -= 3 statement into middle of loop
306131380Stjr */
307131380Stjr
308131380Stjr#endif /* !ASMINF */
309