inffast.c revision 131380
1/* inffast.c -- fast decoding
2 * Copyright (C) 1995-2003 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include <sys/cdefs.h>
7__FBSDID("$FreeBSD: head/lib/libz/inffast.c 131380 2004-06-30 23:54:46Z tjr $");
8
9#include "zutil.h"
10#include "inftrees.h"
11#include "inflate.h"
12#include "inffast.h"
13
14#ifndef ASMINF
15
16/* Allow machine dependent optimization for post-increment or pre-increment.
17   Based on testing to date,
18   Pre-increment preferred for:
19   - PowerPC G3 (Adler)
20   - MIPS R5000 (Randers-Pehrson)
21   Post-increment preferred for:
22   - none
23   No measurable difference:
24   - Pentium III (Anderson)
25   - 68060 (Nikl)
26 */
27#ifdef POSTINC
28#  define OFF 0
29#  define PUP(a) *(a)++
30#else
31#  define OFF 1
32#  define PUP(a) *++(a)
33#endif
34
35/*
36   Decode literal, length, and distance codes and write out the resulting
37   literal and match bytes until either not enough input or output is
38   available, an end-of-block is encountered, or a data error is encountered.
39   When large enough input and output buffers are supplied to inflate(), for
40   example, a 16K input buffer and a 64K output buffer, more than 95% of the
41   inflate execution time is spent in this routine.
42
43   Entry assumptions:
44
45        state->mode == LEN
46        strm->avail_in >= 6
47        strm->avail_out >= 258
48        start >= strm->avail_out
49        state->bits < 8
50
51   On return, state->mode is one of:
52
53        LEN -- ran out of enough output space or enough available input
54        TYPE -- reached end of block code, inflate() to interpret next block
55        BAD -- error in block data
56
57   Notes:
58
59    - The maximum input bits used by a length/distance pair is 15 bits for the
60      length code, 5 bits for the length extra, 15 bits for the distance code,
61      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
62      Therefore if strm->avail_in >= 6, then there is enough input to avoid
63      checking for available input while decoding.
64
65    - The maximum bytes that a single length/distance pair can output is 258
66      bytes, which is the maximum length that can be coded.  inflate_fast()
67      requires strm->avail_out >= 258 for each loop to avoid checking for
68      output space.
69 */
70void inflate_fast(strm, start)
71z_streamp strm;
72unsigned start;         /* inflate()'s starting value for strm->avail_out */
73{
74    struct inflate_state FAR *state;
75    unsigned char FAR *in;      /* local strm->next_in */
76    unsigned char FAR *last;    /* while in < last, enough input available */
77    unsigned char FAR *out;     /* local strm->next_out */
78    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
79    unsigned char FAR *end;     /* while out < end, enough space available */
80    unsigned wsize;             /* window size or zero if not using window */
81    unsigned whave;             /* valid bytes in the window */
82    unsigned write;             /* window write index */
83    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
84    unsigned long hold;         /* local strm->hold */
85    unsigned bits;              /* local strm->bits */
86    code const FAR *lcode;      /* local strm->lencode */
87    code const FAR *dcode;      /* local strm->distcode */
88    unsigned lmask;             /* mask for first level of length codes */
89    unsigned dmask;             /* mask for first level of distance codes */
90    code this;                  /* retrieved table entry */
91    unsigned op;                /* code bits, operation, extra bits, or */
92                                /*  window position, window bytes to copy */
93    unsigned len;               /* match length, unused bytes */
94    unsigned dist;              /* match distance */
95    unsigned char FAR *from;    /* where to copy match from */
96
97    /* copy state to local variables */
98    state = (struct inflate_state FAR *)strm->state;
99    in = strm->next_in - OFF;
100    last = in + (strm->avail_in - 5);
101    out = strm->next_out - OFF;
102    beg = out - (start - strm->avail_out);
103    end = out + (strm->avail_out - 257);
104    wsize = state->wsize;
105    whave = state->whave;
106    write = state->write;
107    window = state->window;
108    hold = state->hold;
109    bits = state->bits;
110    lcode = state->lencode;
111    dcode = state->distcode;
112    lmask = (1U << state->lenbits) - 1;
113    dmask = (1U << state->distbits) - 1;
114
115    /* decode literals and length/distances until end-of-block or not enough
116       input data or output space */
117    do {
118        if (bits < 15) {
119            hold += (unsigned long)(PUP(in)) << bits;
120            bits += 8;
121            hold += (unsigned long)(PUP(in)) << bits;
122            bits += 8;
123        }
124        this = lcode[hold & lmask];
125      dolen:
126        op = (unsigned)(this.bits);
127        hold >>= op;
128        bits -= op;
129        op = (unsigned)(this.op);
130        if (op == 0) {                          /* literal */
131            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
132                    "inflate:         literal '%c'\n" :
133                    "inflate:         literal 0x%02x\n", this.val));
134            PUP(out) = (unsigned char)(this.val);
135        }
136        else if (op & 16) {                     /* length base */
137            len = (unsigned)(this.val);
138            op &= 15;                           /* number of extra bits */
139            if (op) {
140                if (bits < op) {
141                    hold += (unsigned long)(PUP(in)) << bits;
142                    bits += 8;
143                }
144                len += (unsigned)hold & ((1U << op) - 1);
145                hold >>= op;
146                bits -= op;
147            }
148            Tracevv((stderr, "inflate:         length %u\n", len));
149            if (bits < 15) {
150                hold += (unsigned long)(PUP(in)) << bits;
151                bits += 8;
152                hold += (unsigned long)(PUP(in)) << bits;
153                bits += 8;
154            }
155            this = dcode[hold & dmask];
156          dodist:
157            op = (unsigned)(this.bits);
158            hold >>= op;
159            bits -= op;
160            op = (unsigned)(this.op);
161            if (op & 16) {                      /* distance base */
162                dist = (unsigned)(this.val);
163                op &= 15;                       /* number of extra bits */
164                if (bits < op) {
165                    hold += (unsigned long)(PUP(in)) << bits;
166                    bits += 8;
167                    if (bits < op) {
168                        hold += (unsigned long)(PUP(in)) << bits;
169                        bits += 8;
170                    }
171                }
172                dist += (unsigned)hold & ((1U << op) - 1);
173                hold >>= op;
174                bits -= op;
175                Tracevv((stderr, "inflate:         distance %u\n", dist));
176                op = (unsigned)(out - beg);     /* max distance in output */
177                if (dist > op) {                /* see if copy from window */
178                    op = dist - op;             /* distance back in window */
179                    if (op > whave) {
180                        strm->msg = (char *)"invalid distance too far back";
181                        state->mode = BAD;
182                        break;
183                    }
184                    from = window - OFF;
185                    if (write == 0) {           /* very common case */
186                        from += wsize - op;
187                        if (op < len) {         /* some from window */
188                            len -= op;
189                            do {
190                                PUP(out) = PUP(from);
191                            } while (--op);
192                            from = out - dist;  /* rest from output */
193                        }
194                    }
195                    else if (write < op) {      /* wrap around window */
196                        from += wsize + write - op;
197                        op -= write;
198                        if (op < len) {         /* some from end of window */
199                            len -= op;
200                            do {
201                                PUP(out) = PUP(from);
202                            } while (--op);
203                            from = window - OFF;
204                            if (write < len) {  /* some from start of window */
205                                op = write;
206                                len -= op;
207                                do {
208                                    PUP(out) = PUP(from);
209                                } while (--op);
210                                from = out - dist;      /* rest from output */
211                            }
212                        }
213                    }
214                    else {                      /* contiguous in window */
215                        from += write - op;
216                        if (op < len) {         /* some from window */
217                            len -= op;
218                            do {
219                                PUP(out) = PUP(from);
220                            } while (--op);
221                            from = out - dist;  /* rest from output */
222                        }
223                    }
224                    while (len > 2) {
225                        PUP(out) = PUP(from);
226                        PUP(out) = PUP(from);
227                        PUP(out) = PUP(from);
228                        len -= 3;
229                    }
230                    if (len) {
231                        PUP(out) = PUP(from);
232                        if (len > 1)
233                            PUP(out) = PUP(from);
234                    }
235                }
236                else {
237                    from = out - dist;          /* copy direct from output */
238                    do {                        /* minimum length is three */
239                        PUP(out) = PUP(from);
240                        PUP(out) = PUP(from);
241                        PUP(out) = PUP(from);
242                        len -= 3;
243                    } while (len > 2);
244                    if (len) {
245                        PUP(out) = PUP(from);
246                        if (len > 1)
247                            PUP(out) = PUP(from);
248                    }
249                }
250            }
251            else if ((op & 64) == 0) {          /* 2nd level distance code */
252                this = dcode[this.val + (hold & ((1U << op) - 1))];
253                goto dodist;
254            }
255            else {
256                strm->msg = (char *)"invalid distance code";
257                state->mode = BAD;
258                break;
259            }
260        }
261        else if ((op & 64) == 0) {              /* 2nd level length code */
262            this = lcode[this.val + (hold & ((1U << op) - 1))];
263            goto dolen;
264        }
265        else if (op & 32) {                     /* end-of-block */
266            Tracevv((stderr, "inflate:         end of block\n"));
267            state->mode = TYPE;
268            break;
269        }
270        else {
271            strm->msg = (char *)"invalid literal/length code";
272            state->mode = BAD;
273            break;
274        }
275    } while (in < last && out < end);
276
277    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
278    len = bits >> 3;
279    in -= len;
280    bits -= len << 3;
281    hold &= (1U << bits) - 1;
282
283    /* update state and return */
284    strm->next_in = in + OFF;
285    strm->next_out = out + OFF;
286    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
287    strm->avail_out = (unsigned)(out < end ?
288                                 257 + (end - out) : 257 - (out - end));
289    state->hold = hold;
290    state->bits = bits;
291    return;
292}
293
294/*
295   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
296   - Using bit fields for code structure
297   - Different op definition to avoid & for extra bits (do & for table bits)
298   - Three separate decoding do-loops for direct, window, and write == 0
299   - Special case for distance > 1 copies to do overlapped load and store copy
300   - Explicit branch predictions (based on measured branch probabilities)
301   - Deferring match copy and interspersed it with decoding subsequent codes
302   - Swapping literal/length else
303   - Swapping window/direct else
304   - Larger unrolled copy loops (three is about right)
305   - Moving len -= 3 statement into middle of loop
306 */
307
308#endif /* !ASMINF */
309