1168404Spjd/* inffast.c -- fast decoding 2168404Spjd * Copyright (C) 1995-2004 Mark Adler 3168404Spjd * For conditions of distribution and use, see copyright notice in zlib.h 4168404Spjd */ 5168404Spjd 6168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 7168404Spjd 8168404Spjd#include "zutil.h" 9168404Spjd#include "inftrees.h" 10168404Spjd#include "inflate.h" 11168404Spjd#include "inffast.h" 12168404Spjd 13168404Spjd#ifndef ASMINF 14168404Spjd 15168404Spjd/* Allow machine dependent optimization for post-increment or pre-increment. 16168404Spjd Based on testing to date, 17168404Spjd Pre-increment preferred for: 18168404Spjd - PowerPC G3 (Adler) 19168404Spjd - MIPS R5000 (Randers-Pehrson) 20168404Spjd Post-increment preferred for: 21168404Spjd - none 22168404Spjd No measurable difference: 23168404Spjd - Pentium III (Anderson) 24168404Spjd - M68060 (Nikl) 25168404Spjd */ 26168404Spjd#ifdef POSTINC 27168404Spjd# define OFF 0 28168404Spjd# define PUP(a) *(a)++ 29168404Spjd#else 30168404Spjd# define OFF 1 31168404Spjd# define PUP(a) *++(a) 32168404Spjd#endif 33168404Spjd 34168404Spjd/* 35168404Spjd Decode literal, length, and distance codes and write out the resulting 36168404Spjd literal and match bytes until either not enough input or output is 37168404Spjd available, an end-of-block is encountered, or a data error is encountered. 38168404Spjd When large enough input and output buffers are supplied to inflate(), for 39168404Spjd example, a 16K input buffer and a 64K output buffer, more than 95% of the 40168404Spjd inflate execution time is spent in this routine. 41168404Spjd 42168404Spjd Entry assumptions: 43168404Spjd 44168404Spjd state->mode == LEN 45168404Spjd strm->avail_in >= 6 46168404Spjd strm->avail_out >= 258 47168404Spjd start >= strm->avail_out 48168404Spjd state->bits < 8 49168404Spjd 50168404Spjd On return, state->mode is one of: 51168404Spjd 52168404Spjd LEN -- ran out of enough output space or enough available input 53168404Spjd TYPE -- reached end of block code, inflate() to interpret next block 54168404Spjd BAD -- error in block data 55168404Spjd 56168404Spjd Notes: 57168404Spjd 58168404Spjd - The maximum input bits used by a length/distance pair is 15 bits for the 59168404Spjd length code, 5 bits for the length extra, 15 bits for the distance code, 60168404Spjd and 13 bits for the distance extra. This totals 48 bits, or six bytes. 61168404Spjd Therefore if strm->avail_in >= 6, then there is enough input to avoid 62168404Spjd checking for available input while decoding. 63168404Spjd 64168404Spjd - The maximum bytes that a single length/distance pair can output is 258 65168404Spjd bytes, which is the maximum length that can be coded. inflate_fast() 66168404Spjd requires strm->avail_out >= 258 for each loop to avoid checking for 67168404Spjd output space. 68168404Spjd */ 69168404Spjdvoid inflate_fast(strm, start) 70168404Spjdz_streamp strm; 71168404Spjdunsigned start; /* inflate()'s starting value for strm->avail_out */ 72168404Spjd{ 73168404Spjd struct inflate_state FAR *state; 74168404Spjd unsigned char FAR *in; /* local strm->next_in */ 75168404Spjd unsigned char FAR *last; /* while in < last, enough input available */ 76168404Spjd unsigned char FAR *out; /* local strm->next_out */ 77168404Spjd unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 78168404Spjd unsigned char FAR *end; /* while out < end, enough space available */ 79168404Spjd#ifdef INFLATE_STRICT 80168404Spjd unsigned dmax; /* maximum distance from zlib header */ 81168404Spjd#endif 82168404Spjd unsigned wsize; /* window size or zero if not using window */ 83168404Spjd unsigned whave; /* valid bytes in the window */ 84168404Spjd unsigned write; /* window write index */ 85168404Spjd unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 86168404Spjd unsigned long hold; /* local strm->hold */ 87168404Spjd unsigned bits; /* local strm->bits */ 88168404Spjd code const FAR *lcode; /* local strm->lencode */ 89168404Spjd code const FAR *dcode; /* local strm->distcode */ 90168404Spjd unsigned lmask; /* mask for first level of length codes */ 91168404Spjd unsigned dmask; /* mask for first level of distance codes */ 92168404Spjd code this; /* retrieved table entry */ 93168404Spjd unsigned op; /* code bits, operation, extra bits, or */ 94168404Spjd /* window position, window bytes to copy */ 95168404Spjd unsigned len; /* match length, unused bytes */ 96168404Spjd unsigned dist; /* match distance */ 97168404Spjd unsigned char FAR *from; /* where to copy match from */ 98168404Spjd 99168404Spjd /* copy state to local variables */ 100168404Spjd state = (struct inflate_state FAR *)strm->state; 101168404Spjd in = strm->next_in - OFF; 102168404Spjd last = in + (strm->avail_in - 5); 103168404Spjd out = strm->next_out - OFF; 104168404Spjd beg = out - (start - strm->avail_out); 105168404Spjd end = out + (strm->avail_out - 257); 106168404Spjd#ifdef INFLATE_STRICT 107168404Spjd dmax = state->dmax; 108168404Spjd#endif 109168404Spjd wsize = state->wsize; 110168404Spjd whave = state->whave; 111168404Spjd write = state->write; 112168404Spjd window = state->window; 113168404Spjd hold = state->hold; 114168404Spjd bits = state->bits; 115168404Spjd lcode = state->lencode; 116168404Spjd dcode = state->distcode; 117168404Spjd lmask = (1U << state->lenbits) - 1; 118168404Spjd dmask = (1U << state->distbits) - 1; 119168404Spjd 120168404Spjd /* decode literals and length/distances until end-of-block or not enough 121168404Spjd input data or output space */ 122168404Spjd do { 123168404Spjd if (bits < 15) { 124168404Spjd hold += (unsigned long)(PUP(in)) << bits; 125168404Spjd bits += 8; 126168404Spjd hold += (unsigned long)(PUP(in)) << bits; 127168404Spjd bits += 8; 128168404Spjd } 129168404Spjd this = lcode[hold & lmask]; 130168404Spjd dolen: 131168404Spjd op = (unsigned)(this.bits); 132168404Spjd hold >>= op; 133168404Spjd bits -= op; 134168404Spjd op = (unsigned)(this.op); 135168404Spjd if (op == 0) { /* literal */ 136168404Spjd Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? 137168404Spjd "inflate: literal '%c'\n" : 138168404Spjd "inflate: literal 0x%02x\n", this.val)); 139168404Spjd PUP(out) = (unsigned char)(this.val); 140168404Spjd } 141168404Spjd else if (op & 16) { /* length base */ 142168404Spjd len = (unsigned)(this.val); 143168404Spjd op &= 15; /* number of extra bits */ 144168404Spjd if (op) { 145168404Spjd if (bits < op) { 146168404Spjd hold += (unsigned long)(PUP(in)) << bits; 147168404Spjd bits += 8; 148168404Spjd } 149168404Spjd len += (unsigned)hold & ((1U << op) - 1); 150168404Spjd hold >>= op; 151168404Spjd bits -= op; 152168404Spjd } 153168404Spjd Tracevv((stderr, "inflate: length %u\n", len)); 154168404Spjd if (bits < 15) { 155168404Spjd hold += (unsigned long)(PUP(in)) << bits; 156168404Spjd bits += 8; 157168404Spjd hold += (unsigned long)(PUP(in)) << bits; 158168404Spjd bits += 8; 159168404Spjd } 160168404Spjd this = dcode[hold & dmask]; 161168404Spjd dodist: 162168404Spjd op = (unsigned)(this.bits); 163168404Spjd hold >>= op; 164168404Spjd bits -= op; 165168404Spjd op = (unsigned)(this.op); 166168404Spjd if (op & 16) { /* distance base */ 167168404Spjd dist = (unsigned)(this.val); 168168404Spjd op &= 15; /* number of extra bits */ 169168404Spjd if (bits < op) { 170168404Spjd hold += (unsigned long)(PUP(in)) << bits; 171168404Spjd bits += 8; 172168404Spjd if (bits < op) { 173168404Spjd hold += (unsigned long)(PUP(in)) << bits; 174168404Spjd bits += 8; 175168404Spjd } 176168404Spjd } 177168404Spjd dist += (unsigned)hold & ((1U << op) - 1); 178168404Spjd#ifdef INFLATE_STRICT 179168404Spjd if (dist > dmax) { 180168404Spjd strm->msg = (char *)"invalid distance too far back"; 181168404Spjd state->mode = BAD; 182168404Spjd break; 183168404Spjd } 184168404Spjd#endif 185168404Spjd hold >>= op; 186168404Spjd bits -= op; 187168404Spjd Tracevv((stderr, "inflate: distance %u\n", dist)); 188168404Spjd op = (unsigned)(out - beg); /* max distance in output */ 189168404Spjd if (dist > op) { /* see if copy from window */ 190168404Spjd op = dist - op; /* distance back in window */ 191168404Spjd if (op > whave) { 192168404Spjd strm->msg = (char *)"invalid distance too far back"; 193168404Spjd state->mode = BAD; 194168404Spjd break; 195168404Spjd } 196168404Spjd from = window - OFF; 197168404Spjd if (write == 0) { /* very common case */ 198168404Spjd from += wsize - op; 199168404Spjd if (op < len) { /* some from window */ 200168404Spjd len -= op; 201168404Spjd do { 202168404Spjd PUP(out) = PUP(from); 203168404Spjd } while (--op); 204168404Spjd from = out - dist; /* rest from output */ 205168404Spjd } 206168404Spjd } 207168404Spjd else if (write < op) { /* wrap around window */ 208168404Spjd from += wsize + write - op; 209168404Spjd op -= write; 210168404Spjd if (op < len) { /* some from end of window */ 211168404Spjd len -= op; 212168404Spjd do { 213168404Spjd PUP(out) = PUP(from); 214168404Spjd } while (--op); 215168404Spjd from = window - OFF; 216168404Spjd if (write < len) { /* some from start of window */ 217168404Spjd op = write; 218168404Spjd len -= op; 219168404Spjd do { 220168404Spjd PUP(out) = PUP(from); 221168404Spjd } while (--op); 222168404Spjd from = out - dist; /* rest from output */ 223168404Spjd } 224168404Spjd } 225168404Spjd } 226168404Spjd else { /* contiguous in window */ 227168404Spjd from += write - op; 228168404Spjd if (op < len) { /* some from window */ 229168404Spjd len -= op; 230168404Spjd do { 231168404Spjd PUP(out) = PUP(from); 232168404Spjd } while (--op); 233168404Spjd from = out - dist; /* rest from output */ 234168404Spjd } 235168404Spjd } 236168404Spjd while (len > 2) { 237168404Spjd PUP(out) = PUP(from); 238168404Spjd PUP(out) = PUP(from); 239168404Spjd PUP(out) = PUP(from); 240168404Spjd len -= 3; 241168404Spjd } 242168404Spjd if (len) { 243168404Spjd PUP(out) = PUP(from); 244168404Spjd if (len > 1) 245168404Spjd PUP(out) = PUP(from); 246168404Spjd } 247168404Spjd } 248168404Spjd else { 249168404Spjd from = out - dist; /* copy direct from output */ 250168404Spjd do { /* minimum length is three */ 251168404Spjd PUP(out) = PUP(from); 252168404Spjd PUP(out) = PUP(from); 253168404Spjd PUP(out) = PUP(from); 254168404Spjd len -= 3; 255168404Spjd } while (len > 2); 256168404Spjd if (len) { 257168404Spjd PUP(out) = PUP(from); 258168404Spjd if (len > 1) 259168404Spjd PUP(out) = PUP(from); 260168404Spjd } 261168404Spjd } 262168404Spjd } 263168404Spjd else if ((op & 64) == 0) { /* 2nd level distance code */ 264168404Spjd this = dcode[this.val + (hold & ((1U << op) - 1))]; 265168404Spjd goto dodist; 266168404Spjd } 267168404Spjd else { 268168404Spjd strm->msg = (char *)"invalid distance code"; 269168404Spjd state->mode = BAD; 270168404Spjd break; 271168404Spjd } 272168404Spjd } 273168404Spjd else if ((op & 64) == 0) { /* 2nd level length code */ 274168404Spjd this = lcode[this.val + (hold & ((1U << op) - 1))]; 275168404Spjd goto dolen; 276168404Spjd } 277168404Spjd else if (op & 32) { /* end-of-block */ 278168404Spjd Tracevv((stderr, "inflate: end of block\n")); 279168404Spjd state->mode = TYPE; 280168404Spjd break; 281168404Spjd } 282168404Spjd else { 283168404Spjd strm->msg = (char *)"invalid literal/length code"; 284168404Spjd state->mode = BAD; 285168404Spjd break; 286168404Spjd } 287168404Spjd } while (in < last && out < end); 288168404Spjd 289168404Spjd /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 290168404Spjd len = bits >> 3; 291168404Spjd in -= len; 292168404Spjd bits -= len << 3; 293168404Spjd hold &= (1U << bits) - 1; 294168404Spjd 295168404Spjd /* update state and return */ 296168404Spjd strm->next_in = in + OFF; 297168404Spjd strm->next_out = out + OFF; 298168404Spjd strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 299168404Spjd strm->avail_out = (unsigned)(out < end ? 300168404Spjd 257 + (end - out) : 257 - (out - end)); 301168404Spjd state->hold = hold; 302168404Spjd state->bits = bits; 303168404Spjd return; 304168404Spjd} 305168404Spjd 306168404Spjd/* 307168404Spjd inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 308168404Spjd - Using bit fields for code structure 309168404Spjd - Different op definition to avoid & for extra bits (do & for table bits) 310168404Spjd - Three separate decoding do-loops for direct, window, and write == 0 311168404Spjd - Special case for distance > 1 copies to do overlapped load and store copy 312168404Spjd - Explicit branch predictions (based on measured branch probabilities) 313168404Spjd - Deferring match copy and interspersed it with decoding subsequent codes 314168404Spjd - Swapping literal/length else 315168404Spjd - Swapping window/direct else 316168404Spjd - Larger unrolled copy loops (three is about right) 317168404Spjd - Moving len -= 3 statement into middle of loop 318168404Spjd */ 319168404Spjd 320168404Spjd#endif /* !ASMINF */ 321