1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004-2017 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7#include <unistd.h>
8
9/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
10   state->fd, and update state->eof, state->err, and state->msg as appropriate.
11   This function needs to loop on read(), since read() is not guaranteed to
12   read the number of bytes requested, depending on the type of descriptor. */
13local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
14                  unsigned *have) {
15    int ret;
16    unsigned get, max = ((unsigned)-1 >> 2) + 1;
17
18    *have = 0;
19    do {
20        get = len - *have;
21        if (get > max)
22            get = max;
23        ret = read(state->fd, buf + *have, get);
24        if (ret <= 0)
25            break;
26        *have += (unsigned)ret;
27    } while (*have < len);
28    if (ret < 0) {
29        gz_error(state, Z_ERRNO, zstrerror());
30        return -1;
31    }
32    if (ret == 0)
33        state->eof = 1;
34    return 0;
35}
36
37/* Load up input buffer and set eof flag if last data loaded -- return -1 on
38   error, 0 otherwise.  Note that the eof flag is set when the end of the input
39   file is reached, even though there may be unused data in the buffer.  Once
40   that data has been used, no more attempts will be made to read the file.
41   If strm->avail_in != 0, then the current data is moved to the beginning of
42   the input buffer, and then the remainder of the buffer is loaded with the
43   available data from the input file. */
44local int gz_avail(gz_statep state) {
45    unsigned got;
46    z_streamp strm = &(state->strm);
47
48    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
49        return -1;
50    if (state->eof == 0) {
51        if (strm->avail_in) {       /* copy what's there to the start */
52            unsigned char *p = state->in;
53            unsigned const char *q = strm->next_in;
54            unsigned n = strm->avail_in;
55            do {
56                *p++ = *q++;
57            } while (--n);
58        }
59        if (gz_load(state, state->in + strm->avail_in,
60                    state->size - strm->avail_in, &got) == -1)
61            return -1;
62        strm->avail_in += got;
63        strm->next_in = state->in;
64    }
65    return 0;
66}
67
68/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
69   If this is the first time in, allocate required memory.  state->how will be
70   left unchanged if there is no more input data available, will be set to COPY
71   if there is no gzip header and direct copying will be performed, or it will
72   be set to GZIP for decompression.  If direct copying, then leftover input
73   data from the input buffer will be copied to the output buffer.  In that
74   case, all further file reads will be directly to either the output buffer or
75   a user buffer.  If decompressing, the inflate state will be initialized.
76   gz_look() will return 0 on success or -1 on failure. */
77local int gz_look(gz_statep state) {
78    z_streamp strm = &(state->strm);
79
80    /* allocate read buffers and inflate memory */
81    if (state->size == 0) {
82        /* allocate buffers */
83        state->in = (unsigned char *)malloc(state->want);
84        state->out = (unsigned char *)malloc(state->want << 1);
85        if (state->in == NULL || state->out == NULL) {
86            free(state->out);
87            free(state->in);
88            gz_error(state, Z_MEM_ERROR, "out of memory");
89            return -1;
90        }
91        state->size = state->want;
92
93        /* allocate inflate memory */
94        state->strm.zalloc = Z_NULL;
95        state->strm.zfree = Z_NULL;
96        state->strm.opaque = Z_NULL;
97        state->strm.avail_in = 0;
98        state->strm.next_in = Z_NULL;
99        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
100            free(state->out);
101            free(state->in);
102            state->size = 0;
103            gz_error(state, Z_MEM_ERROR, "out of memory");
104            return -1;
105        }
106    }
107
108    /* get at least the magic bytes in the input buffer */
109    if (strm->avail_in < 2) {
110        if (gz_avail(state) == -1)
111            return -1;
112        if (strm->avail_in == 0)
113            return 0;
114    }
115
116    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
117       a logical dilemma here when considering the case of a partially written
118       gzip file, to wit, if a single 31 byte is written, then we cannot tell
119       whether this is a single-byte file, or just a partially written gzip
120       file -- for here we assume that if a gzip file is being written, then
121       the header will be written in a single operation, so that reading a
122       single byte is sufficient indication that it is not a gzip file) */
123    if (strm->avail_in > 1 &&
124            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
125        inflateReset(strm);
126        state->how = GZIP;
127        state->direct = 0;
128        return 0;
129    }
130
131    /* no gzip header -- if we were decoding gzip before, then this is trailing
132       garbage.  Ignore the trailing garbage and finish. */
133    if (state->direct == 0) {
134        strm->avail_in = 0;
135        state->eof = 1;
136        state->x.have = 0;
137        return 0;
138    }
139
140    /* doing raw i/o, copy any leftover input to output -- this assumes that
141       the output buffer is larger than the input buffer, which also assures
142       space for gzungetc() */
143    state->x.next = state->out;
144    memcpy(state->x.next, strm->next_in, strm->avail_in);
145    state->x.have = strm->avail_in;
146    strm->avail_in = 0;
147    state->how = COPY;
148    state->direct = 1;
149    return 0;
150}
151
152/* Decompress from input to the provided next_out and avail_out in the state.
153   On return, state->x.have and state->x.next point to the just decompressed
154   data.  If the gzip stream completes, state->how is reset to LOOK to look for
155   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
156   on success, -1 on failure. */
157local int gz_decomp(gz_statep state) {
158    int ret = Z_OK;
159    unsigned had;
160    z_streamp strm = &(state->strm);
161
162    /* fill output buffer up to end of deflate stream */
163    had = strm->avail_out;
164    do {
165        /* get more input for inflate() */
166        if (strm->avail_in == 0 && gz_avail(state) == -1)
167            return -1;
168        if (strm->avail_in == 0) {
169            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
170            break;
171        }
172
173        /* decompress and handle errors */
174        ret = inflate(strm, Z_NO_FLUSH);
175        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
176            gz_error(state, Z_STREAM_ERROR,
177                     "internal error: inflate stream corrupt");
178            return -1;
179        }
180        if (ret == Z_MEM_ERROR) {
181            gz_error(state, Z_MEM_ERROR, "out of memory");
182            return -1;
183        }
184        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
185            gz_error(state, Z_DATA_ERROR,
186                     strm->msg == NULL ? "compressed data error" : strm->msg);
187            return -1;
188        }
189    } while (strm->avail_out && ret != Z_STREAM_END);
190
191    /* update available output */
192    state->x.have = had - strm->avail_out;
193    state->x.next = strm->next_out - state->x.have;
194
195    /* if the gzip stream completed successfully, look for another */
196    if (ret == Z_STREAM_END)
197        state->how = LOOK;
198
199    /* good decompression */
200    return 0;
201}
202
203/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
204   Data is either copied from the input file or decompressed from the input
205   file depending on state->how.  If state->how is LOOK, then a gzip header is
206   looked for to determine whether to copy or decompress.  Returns -1 on error,
207   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
208   end of the input file has been reached and all data has been processed.  */
209local int gz_fetch(gz_statep state) {
210    z_streamp strm = &(state->strm);
211
212    do {
213        switch(state->how) {
214        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
215            if (gz_look(state) == -1)
216                return -1;
217            if (state->how == LOOK)
218                return 0;
219            break;
220        case COPY:      /* -> COPY */
221            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
222                    == -1)
223                return -1;
224            state->x.next = state->out;
225            return 0;
226        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
227            strm->avail_out = state->size << 1;
228            strm->next_out = state->out;
229            if (gz_decomp(state) == -1)
230                return -1;
231        }
232    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
233    return 0;
234}
235
236/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
237local int gz_skip(gz_statep state, z_off64_t len) {
238    unsigned n;
239
240    /* skip over len bytes or reach end-of-file, whichever comes first */
241    while (len)
242        /* skip over whatever is in output buffer */
243        if (state->x.have) {
244            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
245                (unsigned)len : state->x.have;
246            state->x.have -= n;
247            state->x.next += n;
248            state->x.pos += n;
249            len -= n;
250        }
251
252        /* output buffer empty -- return if we're at the end of the input */
253        else if (state->eof && state->strm.avail_in == 0)
254            break;
255
256        /* need more data to skip -- load up output buffer */
257        else {
258            /* get more output, looking for header if required */
259            if (gz_fetch(state) == -1)
260                return -1;
261        }
262    return 0;
263}
264
265/* Read len bytes into buf from file, or less than len up to the end of the
266   input.  Return the number of bytes read.  If zero is returned, either the
267   end of file was reached, or there was an error.  state->err must be
268   consulted in that case to determine which. */
269local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
270    z_size_t got;
271    unsigned n;
272
273    /* if len is zero, avoid unnecessary operations */
274    if (len == 0)
275        return 0;
276
277    /* process a skip request */
278    if (state->seek) {
279        state->seek = 0;
280        if (gz_skip(state, state->skip) == -1)
281            return 0;
282    }
283
284    /* get len bytes to buf, or less than len if at the end */
285    got = 0;
286    do {
287        /* set n to the maximum amount of len that fits in an unsigned int */
288        n = (unsigned)-1;
289        if (n > len)
290            n = (unsigned)len;
291
292        /* first just try copying data from the output buffer */
293        if (state->x.have) {
294            if (state->x.have < n)
295                n = state->x.have;
296            memcpy(buf, state->x.next, n);
297            state->x.next += n;
298            state->x.have -= n;
299        }
300
301        /* output buffer empty -- return if we're at the end of the input */
302        else if (state->eof && state->strm.avail_in == 0) {
303            state->past = 1;        /* tried to read past end */
304            break;
305        }
306
307        /* need output data -- for small len or new stream load up our output
308           buffer */
309        else if (state->how == LOOK || n < (state->size << 1)) {
310            /* get more output, looking for header if required */
311            if (gz_fetch(state) == -1)
312                return 0;
313            continue;       /* no progress yet -- go back to copy above */
314            /* the copy above assures that we will leave with space in the
315               output buffer, allowing at least one gzungetc() to succeed */
316        }
317
318        /* large len -- read directly into user buffer */
319        else if (state->how == COPY) {      /* read directly */
320            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
321                return 0;
322        }
323
324        /* large len -- decompress directly into user buffer */
325        else {  /* state->how == GZIP */
326            state->strm.avail_out = n;
327            state->strm.next_out = (unsigned char *)buf;
328            if (gz_decomp(state) == -1)
329                return 0;
330            n = state->x.have;
331            state->x.have = 0;
332        }
333
334        /* update progress */
335        len -= n;
336        buf = (char *)buf + n;
337        got += n;
338        state->x.pos += n;
339    } while (len);
340
341    /* return number of bytes read into user buffer */
342    return got;
343}
344
345/* -- see zlib.h -- */
346int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
347    gz_statep state;
348
349    /* get internal structure */
350    if (file == NULL)
351        return -1;
352    state = (gz_statep)file;
353
354    /* check that we're reading and that there's no (serious) error */
355    if (state->mode != GZ_READ ||
356            (state->err != Z_OK && state->err != Z_BUF_ERROR))
357        return -1;
358
359    /* since an int is returned, make sure len fits in one, otherwise return
360       with an error (this avoids a flaw in the interface) */
361    if ((int)len < 0) {
362        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
363        return -1;
364    }
365
366    /* read len or fewer bytes to buf */
367    len = (unsigned)gz_read(state, buf, len);
368
369    /* check for an error */
370    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
371        return -1;
372
373    /* return the number of bytes read (this is assured to fit in an int) */
374    return (int)len;
375}
376
377/* -- see zlib.h -- */
378z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
379    z_size_t len;
380    gz_statep state;
381
382    /* get internal structure */
383    if (file == NULL)
384        return 0;
385    state = (gz_statep)file;
386
387    /* check that we're reading and that there's no (serious) error */
388    if (state->mode != GZ_READ ||
389            (state->err != Z_OK && state->err != Z_BUF_ERROR))
390        return 0;
391
392    /* compute bytes to read -- error on overflow */
393    len = nitems * size;
394    if (size && len / size != nitems) {
395        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
396        return 0;
397    }
398
399    /* read len or fewer bytes to buf, return the number of full items read */
400    return len ? gz_read(state, buf, len) / size : 0;
401}
402
403/* -- see zlib.h -- */
404#ifdef Z_PREFIX_SET
405#  undef z_gzgetc
406#else
407#  undef gzgetc
408#endif
409int ZEXPORT gzgetc(gzFile file) {
410    unsigned char buf[1];
411    gz_statep state;
412
413    /* get internal structure */
414    if (file == NULL)
415        return -1;
416    state = (gz_statep)file;
417
418    /* check that we're reading and that there's no (serious) error */
419    if (state->mode != GZ_READ ||
420        (state->err != Z_OK && state->err != Z_BUF_ERROR))
421        return -1;
422
423    /* try output buffer (no need to check for skip request) */
424    if (state->x.have) {
425        state->x.have--;
426        state->x.pos++;
427        return *(state->x.next)++;
428    }
429
430    /* nothing there -- try gz_read() */
431    return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
432}
433
434int ZEXPORT gzgetc_(gzFile file) {
435    return gzgetc(file);
436}
437
438/* -- see zlib.h -- */
439int ZEXPORT gzungetc(int c, gzFile file) {
440    gz_statep state;
441
442    /* get internal structure */
443    if (file == NULL)
444        return -1;
445    state = (gz_statep)file;
446
447    /* in case this was just opened, set up the input buffer */
448    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
449        (void)gz_look(state);
450
451    /* check that we're reading and that there's no (serious) error */
452    if (state->mode != GZ_READ ||
453        (state->err != Z_OK && state->err != Z_BUF_ERROR))
454        return -1;
455
456    /* process a skip request */
457    if (state->seek) {
458        state->seek = 0;
459        if (gz_skip(state, state->skip) == -1)
460            return -1;
461    }
462
463    /* can't push EOF */
464    if (c < 0)
465        return -1;
466
467    /* if output buffer empty, put byte at end (allows more pushing) */
468    if (state->x.have == 0) {
469        state->x.have = 1;
470        state->x.next = state->out + (state->size << 1) - 1;
471        state->x.next[0] = (unsigned char)c;
472        state->x.pos--;
473        state->past = 0;
474        return c;
475    }
476
477    /* if no room, give up (must have already done a gzungetc()) */
478    if (state->x.have == (state->size << 1)) {
479        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
480        return -1;
481    }
482
483    /* slide output data if needed and insert byte before existing data */
484    if (state->x.next == state->out) {
485        unsigned char *src = state->out + state->x.have;
486        unsigned char *dest = state->out + (state->size << 1);
487        while (src > state->out)
488            *--dest = *--src;
489        state->x.next = dest;
490    }
491    state->x.have++;
492    state->x.next--;
493    state->x.next[0] = (unsigned char)c;
494    state->x.pos--;
495    state->past = 0;
496    return c;
497}
498
499/* -- see zlib.h -- */
500char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
501    unsigned left, n;
502    char *str;
503    unsigned char *eol;
504    gz_statep state;
505
506    /* check parameters and get internal structure */
507    if (file == NULL || buf == NULL || len < 1)
508        return NULL;
509    state = (gz_statep)file;
510
511    /* check that we're reading and that there's no (serious) error */
512    if (state->mode != GZ_READ ||
513        (state->err != Z_OK && state->err != Z_BUF_ERROR))
514        return NULL;
515
516    /* process a skip request */
517    if (state->seek) {
518        state->seek = 0;
519        if (gz_skip(state, state->skip) == -1)
520            return NULL;
521    }
522
523    /* copy output bytes up to new line or len - 1, whichever comes first --
524       append a terminating zero to the string (we don't check for a zero in
525       the contents, let the user worry about that) */
526    str = buf;
527    left = (unsigned)len - 1;
528    if (left) do {
529        /* assure that something is in the output buffer */
530        if (state->x.have == 0 && gz_fetch(state) == -1)
531            return NULL;                /* error */
532        if (state->x.have == 0) {       /* end of file */
533            state->past = 1;            /* read past end */
534            break;                      /* return what we have */
535        }
536
537        /* look for end-of-line in current output buffer */
538        n = state->x.have > left ? left : state->x.have;
539        eol = (unsigned char *)memchr(state->x.next, '\n', n);
540        if (eol != NULL)
541            n = (unsigned)(eol - state->x.next) + 1;
542
543        /* copy through end-of-line, or remainder if not found */
544        memcpy(buf, state->x.next, n);
545        state->x.have -= n;
546        state->x.next += n;
547        state->x.pos += n;
548        left -= n;
549        buf += n;
550    } while (left && eol == NULL);
551
552    /* return terminated string, or if nothing, end of file */
553    if (buf == str)
554        return NULL;
555    buf[0] = 0;
556    return str;
557}
558
559/* -- see zlib.h -- */
560int ZEXPORT gzdirect(gzFile file) {
561    gz_statep state;
562
563    /* get internal structure */
564    if (file == NULL)
565        return 0;
566    state = (gz_statep)file;
567
568    /* if the state is not known, but we can find out, then do so (this is
569       mainly for right after a gzopen() or gzdopen()) */
570    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
571        (void)gz_look(state);
572
573    /* return 1 if transparent, 0 if processing a gzip stream */
574    return state->direct;
575}
576
577/* -- see zlib.h -- */
578int ZEXPORT gzclose_r(gzFile file) {
579    int ret, err;
580    gz_statep state;
581
582    /* get internal structure */
583    if (file == NULL)
584        return Z_STREAM_ERROR;
585    state = (gz_statep)file;
586
587    /* check that we're reading */
588    if (state->mode != GZ_READ)
589        return Z_STREAM_ERROR;
590
591    /* free memory and close file */
592    if (state->size) {
593        inflateEnd(&(state->strm));
594        free(state->out);
595        free(state->in);
596    }
597    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
598    gz_error(state, Z_OK, NULL);
599    free(state->path);
600    ret = close(state->fd);
601    free(state);
602    return ret ? Z_ERRNO : err;
603}
604