gzread.c revision 250261
1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/* $FreeBSD: head/lib/libz/gzread.c 250261 2013-05-05 06:20:49Z delphij $ */
7
8#include "gzguts.h"
9#include <unistd.h>
10
11/* Local functions */
12local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
13local int gz_avail OF((gz_statep));
14local int gz_look OF((gz_statep));
15local int gz_decomp OF((gz_statep));
16local int gz_fetch OF((gz_statep));
17local int gz_skip OF((gz_statep, z_off64_t));
18
19/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
20   state->fd, and update state->eof, state->err, and state->msg as appropriate.
21   This function needs to loop on read(), since read() is not guaranteed to
22   read the number of bytes requested, depending on the type of descriptor. */
23local int gz_load(state, buf, len, have)
24    gz_statep state;
25    unsigned char *buf;
26    unsigned len;
27    unsigned *have;
28{
29    int ret;
30
31    *have = 0;
32    do {
33        ret = read(state->fd, buf + *have, len - *have);
34        if (ret <= 0)
35            break;
36        *have += ret;
37    } while (*have < len);
38    if (ret < 0) {
39        gz_error(state, Z_ERRNO, zstrerror());
40        return -1;
41    }
42    if (ret == 0)
43        state->eof = 1;
44    return 0;
45}
46
47/* Load up input buffer and set eof flag if last data loaded -- return -1 on
48   error, 0 otherwise.  Note that the eof flag is set when the end of the input
49   file is reached, even though there may be unused data in the buffer.  Once
50   that data has been used, no more attempts will be made to read the file.
51   If strm->avail_in != 0, then the current data is moved to the beginning of
52   the input buffer, and then the remainder of the buffer is loaded with the
53   available data from the input file. */
54local int gz_avail(state)
55    gz_statep state;
56{
57    unsigned got;
58    z_streamp strm = &(state->strm);
59
60    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
61        return -1;
62    if (state->eof == 0) {
63        if (strm->avail_in) {       /* copy what's there to the start */
64            unsigned char *p = state->in;
65            unsigned const char *q = strm->next_in;
66            unsigned n = strm->avail_in;
67            do {
68                *p++ = *q++;
69            } while (--n);
70        }
71        if (gz_load(state, state->in + strm->avail_in,
72                    state->size - strm->avail_in, &got) == -1)
73            return -1;
74        strm->avail_in += got;
75        strm->next_in = state->in;
76    }
77    return 0;
78}
79
80/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
81   If this is the first time in, allocate required memory.  state->how will be
82   left unchanged if there is no more input data available, will be set to COPY
83   if there is no gzip header and direct copying will be performed, or it will
84   be set to GZIP for decompression.  If direct copying, then leftover input
85   data from the input buffer will be copied to the output buffer.  In that
86   case, all further file reads will be directly to either the output buffer or
87   a user buffer.  If decompressing, the inflate state will be initialized.
88   gz_look() will return 0 on success or -1 on failure. */
89local int gz_look(state)
90    gz_statep state;
91{
92    z_streamp strm = &(state->strm);
93
94    /* allocate read buffers and inflate memory */
95    if (state->size == 0) {
96        /* allocate buffers */
97        state->in = (unsigned char *)malloc(state->want);
98        state->out = (unsigned char *)malloc(state->want << 1);
99        if (state->in == NULL || state->out == NULL) {
100            if (state->out != NULL)
101                free(state->out);
102            if (state->in != NULL)
103                free(state->in);
104            gz_error(state, Z_MEM_ERROR, "out of memory");
105            return -1;
106        }
107        state->size = state->want;
108
109        /* allocate inflate memory */
110        state->strm.zalloc = Z_NULL;
111        state->strm.zfree = Z_NULL;
112        state->strm.opaque = Z_NULL;
113        state->strm.avail_in = 0;
114        state->strm.next_in = Z_NULL;
115        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
116            free(state->out);
117            free(state->in);
118            state->size = 0;
119            gz_error(state, Z_MEM_ERROR, "out of memory");
120            return -1;
121        }
122    }
123
124    /* get at least the magic bytes in the input buffer */
125    if (strm->avail_in < 2) {
126        if (gz_avail(state) == -1)
127            return -1;
128        if (strm->avail_in == 0)
129            return 0;
130    }
131
132    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
133       a logical dilemma here when considering the case of a partially written
134       gzip file, to wit, if a single 31 byte is written, then we cannot tell
135       whether this is a single-byte file, or just a partially written gzip
136       file -- for here we assume that if a gzip file is being written, then
137       the header will be written in a single operation, so that reading a
138       single byte is sufficient indication that it is not a gzip file) */
139    if (strm->avail_in > 1 &&
140            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
141        inflateReset(strm);
142        state->how = GZIP;
143        state->direct = 0;
144        return 0;
145    }
146
147    /* no gzip header -- if we were decoding gzip before, then this is trailing
148       garbage.  Ignore the trailing garbage and finish. */
149    if (state->direct == 0) {
150        strm->avail_in = 0;
151        state->eof = 1;
152        state->x.have = 0;
153        return 0;
154    }
155
156    /* doing raw i/o, copy any leftover input to output -- this assumes that
157       the output buffer is larger than the input buffer, which also assures
158       space for gzungetc() */
159    state->x.next = state->out;
160    if (strm->avail_in) {
161        memcpy(state->x.next, strm->next_in, strm->avail_in);
162        state->x.have = strm->avail_in;
163        strm->avail_in = 0;
164    }
165    state->how = COPY;
166    state->direct = 1;
167    return 0;
168}
169
170/* Decompress from input to the provided next_out and avail_out in the state.
171   On return, state->x.have and state->x.next point to the just decompressed
172   data.  If the gzip stream completes, state->how is reset to LOOK to look for
173   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
174   on success, -1 on failure. */
175local int gz_decomp(state)
176    gz_statep state;
177{
178    int ret = Z_OK;
179    unsigned had;
180    z_streamp strm = &(state->strm);
181
182    /* fill output buffer up to end of deflate stream */
183    had = strm->avail_out;
184    do {
185        /* get more input for inflate() */
186        if (strm->avail_in == 0 && gz_avail(state) == -1)
187            return -1;
188        if (strm->avail_in == 0) {
189            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
190            break;
191        }
192
193        /* decompress and handle errors */
194        ret = inflate(strm, Z_NO_FLUSH);
195        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
196            gz_error(state, Z_STREAM_ERROR,
197                     "internal error: inflate stream corrupt");
198            return -1;
199        }
200        if (ret == Z_MEM_ERROR) {
201            gz_error(state, Z_MEM_ERROR, "out of memory");
202            return -1;
203        }
204        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
205            gz_error(state, Z_DATA_ERROR,
206                     strm->msg == NULL ? "compressed data error" : strm->msg);
207            return -1;
208        }
209    } while (strm->avail_out && ret != Z_STREAM_END);
210
211    /* update available output */
212    state->x.have = had - strm->avail_out;
213    state->x.next = strm->next_out - state->x.have;
214
215    /* if the gzip stream completed successfully, look for another */
216    if (ret == Z_STREAM_END)
217        state->how = LOOK;
218
219    /* good decompression */
220    return 0;
221}
222
223/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
224   Data is either copied from the input file or decompressed from the input
225   file depending on state->how.  If state->how is LOOK, then a gzip header is
226   looked for to determine whether to copy or decompress.  Returns -1 on error,
227   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
228   end of the input file has been reached and all data has been processed.  */
229local int gz_fetch(state)
230    gz_statep state;
231{
232    z_streamp strm = &(state->strm);
233
234    do {
235        switch(state->how) {
236        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
237            if (gz_look(state) == -1)
238                return -1;
239            if (state->how == LOOK)
240                return 0;
241            break;
242        case COPY:      /* -> COPY */
243            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
244                    == -1)
245                return -1;
246            state->x.next = state->out;
247            return 0;
248        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
249            strm->avail_out = state->size << 1;
250            strm->next_out = state->out;
251            if (gz_decomp(state) == -1)
252                return -1;
253        }
254    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
255    return 0;
256}
257
258/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
259local int gz_skip(state, len)
260    gz_statep state;
261    z_off64_t len;
262{
263    unsigned n;
264
265    /* skip over len bytes or reach end-of-file, whichever comes first */
266    while (len)
267        /* skip over whatever is in output buffer */
268        if (state->x.have) {
269            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
270                (unsigned)len : state->x.have;
271            state->x.have -= n;
272            state->x.next += n;
273            state->x.pos += n;
274            len -= n;
275        }
276
277        /* output buffer empty -- return if we're at the end of the input */
278        else if (state->eof && state->strm.avail_in == 0)
279            break;
280
281        /* need more data to skip -- load up output buffer */
282        else {
283            /* get more output, looking for header if required */
284            if (gz_fetch(state) == -1)
285                return -1;
286        }
287    return 0;
288}
289
290/* -- see zlib.h -- */
291int ZEXPORT gzread(file, buf, len)
292    gzFile file;
293    voidp buf;
294    unsigned len;
295{
296    unsigned got, n;
297    gz_statep state;
298    z_streamp strm;
299
300    /* get internal structure */
301    if (file == NULL)
302        return -1;
303    state = (gz_statep)file;
304    strm = &(state->strm);
305
306    /* check that we're reading and that there's no (serious) error */
307    if (state->mode != GZ_READ ||
308            (state->err != Z_OK && state->err != Z_BUF_ERROR))
309        return -1;
310
311    /* since an int is returned, make sure len fits in one, otherwise return
312       with an error (this avoids the flaw in the interface) */
313    if ((int)len < 0) {
314        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
315        return -1;
316    }
317
318    /* if len is zero, avoid unnecessary operations */
319    if (len == 0)
320        return 0;
321
322    /* process a skip request */
323    if (state->seek) {
324        state->seek = 0;
325        if (gz_skip(state, state->skip) == -1)
326            return -1;
327    }
328
329    /* get len bytes to buf, or less than len if at the end */
330    got = 0;
331    do {
332        /* first just try copying data from the output buffer */
333        if (state->x.have) {
334            n = state->x.have > len ? len : state->x.have;
335            memcpy(buf, state->x.next, n);
336            state->x.next += n;
337            state->x.have -= n;
338        }
339
340        /* output buffer empty -- return if we're at the end of the input */
341        else if (state->eof && strm->avail_in == 0) {
342            state->past = 1;        /* tried to read past end */
343            break;
344        }
345
346        /* need output data -- for small len or new stream load up our output
347           buffer */
348        else if (state->how == LOOK || len < (state->size << 1)) {
349            /* get more output, looking for header if required */
350            if (gz_fetch(state) == -1)
351                return -1;
352            continue;       /* no progress yet -- go back to copy above */
353            /* the copy above assures that we will leave with space in the
354               output buffer, allowing at least one gzungetc() to succeed */
355        }
356
357        /* large len -- read directly into user buffer */
358        else if (state->how == COPY) {      /* read directly */
359            if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
360                return -1;
361        }
362
363        /* large len -- decompress directly into user buffer */
364        else {  /* state->how == GZIP */
365            strm->avail_out = len;
366            strm->next_out = (unsigned char *)buf;
367            if (gz_decomp(state) == -1)
368                return -1;
369            n = state->x.have;
370            state->x.have = 0;
371        }
372
373        /* update progress */
374        len -= n;
375        buf = (char *)buf + n;
376        got += n;
377        state->x.pos += n;
378    } while (len);
379
380    /* return number of bytes read into user buffer (will fit in int) */
381    return (int)got;
382}
383
384/* -- see zlib.h -- */
385#ifdef Z_PREFIX_SET
386#  undef z_gzgetc
387#else
388#  undef gzgetc
389#endif
390int ZEXPORT gzgetc(file)
391    gzFile file;
392{
393    int ret;
394    unsigned char buf[1];
395    gz_statep state;
396
397    /* get internal structure */
398    if (file == NULL)
399        return -1;
400    state = (gz_statep)file;
401
402    /* check that we're reading and that there's no (serious) error */
403    if (state->mode != GZ_READ ||
404        (state->err != Z_OK && state->err != Z_BUF_ERROR))
405        return -1;
406
407    /* try output buffer (no need to check for skip request) */
408    if (state->x.have) {
409        state->x.have--;
410        state->x.pos++;
411        return *(state->x.next)++;
412    }
413
414    /* nothing there -- try gzread() */
415    ret = gzread(file, buf, 1);
416    return ret < 1 ? -1 : buf[0];
417}
418
419int ZEXPORT gzgetc_(file)
420gzFile file;
421{
422    return gzgetc(file);
423}
424
425/* -- see zlib.h -- */
426int ZEXPORT gzungetc(c, file)
427    int c;
428    gzFile file;
429{
430    gz_statep state;
431
432    /* get internal structure */
433    if (file == NULL)
434        return -1;
435    state = (gz_statep)file;
436
437    /* check that we're reading and that there's no (serious) error */
438    if (state->mode != GZ_READ ||
439        (state->err != Z_OK && state->err != Z_BUF_ERROR))
440        return -1;
441
442    /* process a skip request */
443    if (state->seek) {
444        state->seek = 0;
445        if (gz_skip(state, state->skip) == -1)
446            return -1;
447    }
448
449    /* can't push EOF */
450    if (c < 0)
451        return -1;
452
453    /* if output buffer empty, put byte at end (allows more pushing) */
454    if (state->x.have == 0) {
455        state->x.have = 1;
456        state->x.next = state->out + (state->size << 1) - 1;
457        state->x.next[0] = c;
458        state->x.pos--;
459        state->past = 0;
460        return c;
461    }
462
463    /* if no room, give up (must have already done a gzungetc()) */
464    if (state->x.have == (state->size << 1)) {
465        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
466        return -1;
467    }
468
469    /* slide output data if needed and insert byte before existing data */
470    if (state->x.next == state->out) {
471        unsigned char *src = state->out + state->x.have;
472        unsigned char *dest = state->out + (state->size << 1);
473        while (src > state->out)
474            *--dest = *--src;
475        state->x.next = dest;
476    }
477    state->x.have++;
478    state->x.next--;
479    state->x.next[0] = c;
480    state->x.pos--;
481    state->past = 0;
482    return c;
483}
484
485/* -- see zlib.h -- */
486char * ZEXPORT gzgets(file, buf, len)
487    gzFile file;
488    char *buf;
489    int len;
490{
491    unsigned left, n;
492    char *str;
493    unsigned char *eol;
494    gz_statep state;
495
496    /* check parameters and get internal structure */
497    if (file == NULL || buf == NULL || len < 1)
498        return NULL;
499    state = (gz_statep)file;
500
501    /* check that we're reading and that there's no (serious) error */
502    if (state->mode != GZ_READ ||
503        (state->err != Z_OK && state->err != Z_BUF_ERROR))
504        return NULL;
505
506    /* process a skip request */
507    if (state->seek) {
508        state->seek = 0;
509        if (gz_skip(state, state->skip) == -1)
510            return NULL;
511    }
512
513    /* copy output bytes up to new line or len - 1, whichever comes first --
514       append a terminating zero to the string (we don't check for a zero in
515       the contents, let the user worry about that) */
516    str = buf;
517    left = (unsigned)len - 1;
518    if (left) do {
519        /* assure that something is in the output buffer */
520        if (state->x.have == 0 && gz_fetch(state) == -1)
521            return NULL;                /* error */
522        if (state->x.have == 0) {       /* end of file */
523            state->past = 1;            /* read past end */
524            break;                      /* return what we have */
525        }
526
527        /* look for end-of-line in current output buffer */
528        n = state->x.have > left ? left : state->x.have;
529        eol = (unsigned char *)memchr(state->x.next, '\n', n);
530        if (eol != NULL)
531            n = (unsigned)(eol - state->x.next) + 1;
532
533        /* copy through end-of-line, or remainder if not found */
534        memcpy(buf, state->x.next, n);
535        state->x.have -= n;
536        state->x.next += n;
537        state->x.pos += n;
538        left -= n;
539        buf += n;
540    } while (left && eol == NULL);
541
542    /* return terminated string, or if nothing, end of file */
543    if (buf == str)
544        return NULL;
545    buf[0] = 0;
546    return str;
547}
548
549/* -- see zlib.h -- */
550int ZEXPORT gzdirect(file)
551    gzFile file;
552{
553    gz_statep state;
554
555    /* get internal structure */
556    if (file == NULL)
557        return 0;
558    state = (gz_statep)file;
559
560    /* if the state is not known, but we can find out, then do so (this is
561       mainly for right after a gzopen() or gzdopen()) */
562    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
563        (void)gz_look(state);
564
565    /* return 1 if transparent, 0 if processing a gzip stream */
566    return state->direct;
567}
568
569/* -- see zlib.h -- */
570int ZEXPORT gzclose_r(file)
571    gzFile file;
572{
573    int ret, err;
574    gz_statep state;
575
576    /* get internal structure */
577    if (file == NULL)
578        return Z_STREAM_ERROR;
579    state = (gz_statep)file;
580
581    /* check that we're reading */
582    if (state->mode != GZ_READ)
583        return Z_STREAM_ERROR;
584
585    /* free memory and close file */
586    if (state->size) {
587        inflateEnd(&(state->strm));
588        free(state->out);
589        free(state->in);
590    }
591    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
592    gz_error(state, Z_OK, NULL);
593    free(state->path);
594    ret = close(state->fd);
595    free(state);
596    return ret ? Z_ERRNO : err;
597}
598