1/* gzread.c contains minimal changes required to be compiled with zlibWrapper:
2 * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
3
4 /* gzread.c -- zlib functions for reading gzip files
5 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
6 * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
7 */
8
9#include "gzguts.h"
10
11/* fix for Visual Studio, which doesn't support ssize_t type.
12 * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
13#if defined(_MSC_VER) && !defined(ssize_t)
14#  include <BaseTsd.h>
15   typedef SSIZE_T ssize_t;
16#endif
17
18
19/* Local functions */
20local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
21local int gz_avail OF((gz_statep));
22local int gz_look OF((gz_statep));
23local int gz_decomp OF((gz_statep));
24local int gz_fetch OF((gz_statep));
25local int gz_skip OF((gz_statep, z_off64_t));
26local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
27
28/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
29   state.state->fd, and update state.state->eof, state.state->err, and state.state->msg as appropriate.
30   This function needs to loop on read(), since read() is not guaranteed to
31   read the number of bytes requested, depending on the type of descriptor. */
32local int gz_load(state, buf, len, have)
33    gz_statep state;
34    unsigned char *buf;
35    unsigned len;
36    unsigned *have;
37{
38    ssize_t ret;
39    unsigned get, max = ((unsigned)-1 >> 2) + 1;
40
41    *have = 0;
42    do {
43        get = len - *have;
44        if (get > max)
45            get = max;
46        ret = read(state.state->fd, buf + *have, get);
47        if (ret <= 0)
48            break;
49        *have += (unsigned)ret;
50    } while (*have < len);
51    if (ret < 0) {
52        gz_error(state, Z_ERRNO, zstrerror());
53        return -1;
54    }
55    if (ret == 0)
56        state.state->eof = 1;
57    return 0;
58}
59
60/* Load up input buffer and set eof flag if last data loaded -- return -1 on
61   error, 0 otherwise.  Note that the eof flag is set when the end of the input
62   file is reached, even though there may be unused data in the buffer.  Once
63   that data has been used, no more attempts will be made to read the file.
64   If strm->avail_in != 0, then the current data is moved to the beginning of
65   the input buffer, and then the remainder of the buffer is loaded with the
66   available data from the input file. */
67local int gz_avail(state)
68    gz_statep state;
69{
70    unsigned got;
71    z_streamp strm = &(state.state->strm);
72
73    if (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR)
74        return -1;
75    if (state.state->eof == 0) {
76        if (strm->avail_in) {       /* copy what's there to the start */
77            unsigned char *p = state.state->in;
78            unsigned const char *q = strm->next_in;
79            unsigned n = strm->avail_in;
80            do {
81                *p++ = *q++;
82            } while (--n);
83        }
84        if (gz_load(state, state.state->in + strm->avail_in,
85                    state.state->size - strm->avail_in, &got) == -1)
86            return -1;
87        strm->avail_in += got;
88        strm->next_in = state.state->in;
89    }
90    return 0;
91}
92
93/* Look for gzip header, set up for inflate or copy.  state.state->x.have must be 0.
94   If this is the first time in, allocate required memory.  state.state->how will be
95   left unchanged if there is no more input data available, will be set to COPY
96   if there is no gzip header and direct copying will be performed, or it will
97   be set to GZIP for decompression.  If direct copying, then leftover input
98   data from the input buffer will be copied to the output buffer.  In that
99   case, all further file reads will be directly to either the output buffer or
100   a user buffer.  If decompressing, the inflate state will be initialized.
101   gz_look() will return 0 on success or -1 on failure. */
102local int gz_look(state)
103    gz_statep state;
104{
105    z_streamp strm = &(state.state->strm);
106
107    /* allocate read buffers and inflate memory */
108    if (state.state->size == 0) {
109        /* allocate buffers */
110        state.state->in = (unsigned char *)malloc(state.state->want);
111        state.state->out = (unsigned char *)malloc(state.state->want << 1);
112        if (state.state->in == NULL || state.state->out == NULL) {
113            free(state.state->out);
114            free(state.state->in);
115            gz_error(state, Z_MEM_ERROR, "out of memory");
116            return -1;
117        }
118        state.state->size = state.state->want;
119
120        /* allocate inflate memory */
121        state.state->strm.zalloc = Z_NULL;
122        state.state->strm.zfree = Z_NULL;
123        state.state->strm.opaque = Z_NULL;
124        state.state->strm.avail_in = 0;
125        state.state->strm.next_in = Z_NULL;
126        if (inflateInit2(&(state.state->strm), 15 + 16) != Z_OK) {    /* gunzip */
127            free(state.state->out);
128            free(state.state->in);
129            state.state->size = 0;
130            gz_error(state, Z_MEM_ERROR, "out of memory");
131            return -1;
132        }
133    }
134
135    /* get at least the magic bytes in the input buffer */
136    if (strm->avail_in < 2) {
137        if (gz_avail(state) == -1)
138            return -1;
139        if (strm->avail_in == 0)
140            return 0;
141    }
142
143    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
144       a logical dilemma here when considering the case of a partially written
145       gzip file, to wit, if a single 31 byte is written, then we cannot tell
146       whether this is a single-byte file, or just a partially written gzip
147       file -- for here we assume that if a gzip file is being written, then
148       the header will be written in a single operation, so that reading a
149       single byte is sufficient indication that it is not a gzip file) */
150    if (strm->avail_in > 1 &&
151            ((strm->next_in[0] == 31 && strm->next_in[1] == 139) /* gz header */
152            || (strm->next_in[0] == 40 && strm->next_in[1] == 181))) { /* zstd header */
153        inflateReset(strm);
154        state.state->how = GZIP;
155        state.state->direct = 0;
156        return 0;
157    }
158
159    /* no gzip header -- if we were decoding gzip before, then this is trailing
160       garbage.  Ignore the trailing garbage and finish. */
161    if (state.state->direct == 0) {
162        strm->avail_in = 0;
163        state.state->eof = 1;
164        state.state->x.have = 0;
165        return 0;
166    }
167
168    /* doing raw i/o, copy any leftover input to output -- this assumes that
169       the output buffer is larger than the input buffer, which also assures
170       space for gzungetc() */
171    state.state->x.next = state.state->out;
172    if (strm->avail_in) {
173        memcpy(state.state->x.next, strm->next_in, strm->avail_in);
174        state.state->x.have = strm->avail_in;
175        strm->avail_in = 0;
176    }
177    state.state->how = COPY;
178    state.state->direct = 1;
179    return 0;
180}
181
182/* Decompress from input to the provided next_out and avail_out in the state.
183   On return, state.state->x.have and state.state->x.next point to the just decompressed
184   data.  If the gzip stream completes, state.state->how is reset to LOOK to look for
185   the next gzip stream or raw data, once state.state->x.have is depleted.  Returns 0
186   on success, -1 on failure. */
187local int gz_decomp(state)
188    gz_statep state;
189{
190    int ret = Z_OK;
191    unsigned had;
192    z_streamp strm = &(state.state->strm);
193
194    /* fill output buffer up to end of deflate stream */
195    had = strm->avail_out;
196    do {
197        /* get more input for inflate() */
198        if (strm->avail_in == 0 && gz_avail(state) == -1)
199            return -1;
200        if (strm->avail_in == 0) {
201            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
202            break;
203        }
204
205        /* decompress and handle errors */
206        ret = inflate(strm, Z_NO_FLUSH);
207        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
208            gz_error(state, Z_STREAM_ERROR,
209                     "internal error: inflate stream corrupt");
210            return -1;
211        }
212        if (ret == Z_MEM_ERROR) {
213            gz_error(state, Z_MEM_ERROR, "out of memory");
214            return -1;
215        }
216        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
217            gz_error(state, Z_DATA_ERROR,
218                     strm->msg == NULL ? "compressed data error" : strm->msg);
219            return -1;
220        }
221    } while (strm->avail_out && ret != Z_STREAM_END);
222
223    /* update available output */
224    state.state->x.have = had - strm->avail_out;
225    state.state->x.next = strm->next_out - state.state->x.have;
226
227    /* if the gzip stream completed successfully, look for another */
228    if (ret == Z_STREAM_END)
229        state.state->how = LOOK;
230
231    /* good decompression */
232    return 0;
233}
234
235/* Fetch data and put it in the output buffer.  Assumes state.state->x.have is 0.
236   Data is either copied from the input file or decompressed from the input
237   file depending on state.state->how.  If state.state->how is LOOK, then a gzip header is
238   looked for to determine whether to copy or decompress.  Returns -1 on error,
239   otherwise 0.  gz_fetch() will leave state.state->how as COPY or GZIP unless the
240   end of the input file has been reached and all data has been processed.  */
241local int gz_fetch(state)
242    gz_statep state;
243{
244    z_streamp strm = &(state.state->strm);
245
246    do {
247        switch(state.state->how) {
248        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
249            if (gz_look(state) == -1)
250                return -1;
251            if (state.state->how == LOOK)
252                return 0;
253            break;
254        case COPY:      /* -> COPY */
255            if (gz_load(state, state.state->out, state.state->size << 1, &(state.state->x.have))
256                    == -1)
257                return -1;
258            state.state->x.next = state.state->out;
259            return 0;
260        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
261            strm->avail_out = state.state->size << 1;
262            strm->next_out = state.state->out;
263            if (gz_decomp(state) == -1)
264                return -1;
265        }
266    } while (state.state->x.have == 0 && (!state.state->eof || strm->avail_in));
267    return 0;
268}
269
270/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
271local int gz_skip(state, len)
272    gz_statep state;
273    z_off64_t len;
274{
275    unsigned n;
276
277    /* skip over len bytes or reach end-of-file, whichever comes first */
278    while (len)
279        /* skip over whatever is in output buffer */
280        if (state.state->x.have) {
281            n = GT_OFF(state.state->x.have) || (z_off64_t)state.state->x.have > len ?
282                (unsigned)len : state.state->x.have;
283            state.state->x.have -= n;
284            state.state->x.next += n;
285            state.state->x.pos += n;
286            len -= n;
287        }
288
289        /* output buffer empty -- return if we're at the end of the input */
290        else if (state.state->eof && state.state->strm.avail_in == 0)
291            break;
292
293        /* need more data to skip -- load up output buffer */
294        else {
295            /* get more output, looking for header if required */
296            if (gz_fetch(state) == -1)
297                return -1;
298        }
299    return 0;
300}
301
302/* Read len bytes into buf from file, or less than len up to the end of the
303   input.  Return the number of bytes read.  If zero is returned, either the
304   end of file was reached, or there was an error.  state.state->err must be
305   consulted in that case to determine which. */
306local z_size_t gz_read(state, buf, len)
307    gz_statep state;
308    voidp buf;
309    z_size_t len;
310{
311    z_size_t got;
312    unsigned n;
313
314    /* if len is zero, avoid unnecessary operations */
315    if (len == 0)
316        return 0;
317
318    /* process a skip request */
319    if (state.state->seek) {
320        state.state->seek = 0;
321        if (gz_skip(state, state.state->skip) == -1)
322            return 0;
323    }
324
325    /* get len bytes to buf, or less than len if at the end */
326    got = 0;
327    do {
328        /* set n to the maximum amount of len that fits in an unsigned int */
329        n = -1;
330        if (n > len)
331            n = (unsigned)len;
332
333        /* first just try copying data from the output buffer */
334        if (state.state->x.have) {
335            if (state.state->x.have < n)
336                n = state.state->x.have;
337            memcpy(buf, state.state->x.next, n);
338            state.state->x.next += n;
339            state.state->x.have -= n;
340        }
341
342        /* output buffer empty -- return if we're at the end of the input */
343        else if (state.state->eof && state.state->strm.avail_in == 0) {
344            state.state->past = 1;        /* tried to read past end */
345            break;
346        }
347
348        /* need output data -- for small len or new stream load up our output
349           buffer */
350        else if (state.state->how == LOOK || n < (state.state->size << 1)) {
351            /* get more output, looking for header if required */
352            if (gz_fetch(state) == -1)
353                return 0;
354            continue;       /* no progress yet -- go back to copy above */
355            /* the copy above assures that we will leave with space in the
356               output buffer, allowing at least one gzungetc() to succeed */
357        }
358
359        /* large len -- read directly into user buffer */
360        else if (state.state->how == COPY) {      /* read directly */
361            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
362                return 0;
363        }
364
365        /* large len -- decompress directly into user buffer */
366        else {  /* state.state->how == GZIP */
367            state.state->strm.avail_out = n;
368            state.state->strm.next_out = (unsigned char *)buf;
369            if (gz_decomp(state) == -1)
370                return 0;
371            n = state.state->x.have;
372            state.state->x.have = 0;
373        }
374
375        /* update progress */
376        len -= n;
377        buf = (char *)buf + n;
378        got += n;
379        state.state->x.pos += n;
380    } while (len);
381
382    /* return number of bytes read into user buffer */
383    return got;
384}
385
386/* -- see zlib.h -- */
387int ZEXPORT gzread(file, buf, len)
388    gzFile file;
389    voidp buf;
390    unsigned len;
391{
392    gz_statep state;
393
394    /* get internal structure */
395    if (file == NULL)
396        return -1;
397    state.file = file;
398
399    /* check that we're reading and that there's no (serious) error */
400    if (state.state->mode != GZ_READ ||
401            (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
402        return -1;
403
404    /* since an int is returned, make sure len fits in one, otherwise return
405       with an error (this avoids a flaw in the interface) */
406    if ((int)len < 0) {
407        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
408        return -1;
409    }
410
411    /* read len or fewer bytes to buf */
412    len = (unsigned)gz_read(state, buf, len);
413
414    /* check for an error */
415    if (len == 0 && state.state->err != Z_OK && state.state->err != Z_BUF_ERROR)
416        return -1;
417
418    /* return the number of bytes read (this is assured to fit in an int) */
419    return (int)len;
420}
421
422/* -- see zlib.h -- */
423z_size_t ZEXPORT gzfread(buf, size, nitems, file)
424    voidp buf;
425    z_size_t size;
426    z_size_t nitems;
427    gzFile file;
428{
429    z_size_t len;
430    gz_statep state;
431
432    /* get internal structure */
433    if (file == NULL)
434        return 0;
435    state.file = file;
436
437    /* check that we're reading and that there's no (serious) error */
438    if (state.state->mode != GZ_READ ||
439            (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
440        return 0;
441
442    /* compute bytes to read -- error on overflow */
443    len = nitems * size;
444    if (size && len / size != nitems) {
445        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
446        return 0;
447    }
448
449    /* read len or fewer bytes to buf, return the number of full items read */
450    return len ? gz_read(state, buf, len) / size : 0;
451}
452
453/* -- see zlib.h -- */
454#if ZLIB_VERNUM >= 0x1261
455#ifdef Z_PREFIX_SET
456#  undef z_gzgetc
457#else
458#  undef gzgetc
459#endif
460#endif
461
462#if ZLIB_VERNUM == 0x1260
463#  undef gzgetc
464#endif
465
466#if ZLIB_VERNUM <= 0x1250
467ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
468ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));
469#endif
470
471int ZEXPORT gzgetc(file)
472    gzFile file;
473{
474    int ret;
475    unsigned char buf[1];
476    gz_statep state;
477
478    /* get internal structure */
479    if (file == NULL)
480        return -1;
481    state.file = file;
482
483    /* check that we're reading and that there's no (serious) error */
484    if (state.state->mode != GZ_READ ||
485        (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
486        return -1;
487
488    /* try output buffer (no need to check for skip request) */
489    if (state.state->x.have) {
490        state.state->x.have--;
491        state.state->x.pos++;
492        return *(state.state->x.next)++;
493    }
494
495    /* nothing there -- try gz_read() */
496    ret = (int)gz_read(state, buf, 1);
497    return ret < 1 ? -1 : buf[0];
498}
499
500int ZEXPORT gzgetc_(file)
501gzFile file;
502{
503    return gzgetc(file);
504}
505
506/* -- see zlib.h -- */
507int ZEXPORT gzungetc(c, file)
508    int c;
509    gzFile file;
510{
511    gz_statep state;
512
513    /* get internal structure */
514    if (file == NULL)
515        return -1;
516    state.file = file;
517
518    /* check that we're reading and that there's no (serious) error */
519    if (state.state->mode != GZ_READ ||
520        (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
521        return -1;
522
523    /* process a skip request */
524    if (state.state->seek) {
525        state.state->seek = 0;
526        if (gz_skip(state, state.state->skip) == -1)
527            return -1;
528    }
529
530    /* can't push EOF */
531    if (c < 0)
532        return -1;
533
534    /* if output buffer empty, put byte at end (allows more pushing) */
535    if (state.state->x.have == 0) {
536        state.state->x.have = 1;
537        state.state->x.next = state.state->out + (state.state->size << 1) - 1;
538        state.state->x.next[0] = (unsigned char)c;
539        state.state->x.pos--;
540        state.state->past = 0;
541        return c;
542    }
543
544    /* if no room, give up (must have already done a gzungetc()) */
545    if (state.state->x.have == (state.state->size << 1)) {
546        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
547        return -1;
548    }
549
550    /* slide output data if needed and insert byte before existing data */
551    if (state.state->x.next == state.state->out) {
552        unsigned char *src = state.state->out + state.state->x.have;
553        unsigned char *dest = state.state->out + (state.state->size << 1);
554        while (src > state.state->out)
555            *--dest = *--src;
556        state.state->x.next = dest;
557    }
558    state.state->x.have++;
559    state.state->x.next--;
560    state.state->x.next[0] = (unsigned char)c;
561    state.state->x.pos--;
562    state.state->past = 0;
563    return c;
564}
565
566/* -- see zlib.h -- */
567char * ZEXPORT gzgets(file, buf, len)
568    gzFile file;
569    char *buf;
570    int len;
571{
572    unsigned left, n;
573    char *str;
574    unsigned char *eol;
575    gz_statep state;
576
577    /* check parameters and get internal structure */
578    if (file == NULL || buf == NULL || len < 1)
579        return NULL;
580    state.file = file;
581
582    /* check that we're reading and that there's no (serious) error */
583    if (state.state->mode != GZ_READ ||
584        (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
585        return NULL;
586
587    /* process a skip request */
588    if (state.state->seek) {
589        state.state->seek = 0;
590        if (gz_skip(state, state.state->skip) == -1)
591            return NULL;
592    }
593
594    /* copy output bytes up to new line or len - 1, whichever comes first --
595       append a terminating zero to the string (we don't check for a zero in
596       the contents, let the user worry about that) */
597    str = buf;
598    left = (unsigned)len - 1;
599    if (left) do {
600        /* assure that something is in the output buffer */
601        if (state.state->x.have == 0 && gz_fetch(state) == -1)
602            return NULL;                /* error */
603        if (state.state->x.have == 0) {       /* end of file */
604            state.state->past = 1;            /* read past end */
605            break;                      /* return what we have */
606        }
607
608        /* look for end-of-line in current output buffer */
609        n = state.state->x.have > left ? left : state.state->x.have;
610        eol = (unsigned char *)memchr(state.state->x.next, '\n', n);
611        if (eol != NULL)
612            n = (unsigned)(eol - state.state->x.next) + 1;
613
614        /* copy through end-of-line, or remainder if not found */
615        memcpy(buf, state.state->x.next, n);
616        state.state->x.have -= n;
617        state.state->x.next += n;
618        state.state->x.pos += n;
619        left -= n;
620        buf += n;
621    } while (left && eol == NULL);
622
623    /* return terminated string, or if nothing, end of file */
624    if (buf == str)
625        return NULL;
626    buf[0] = 0;
627    return str;
628}
629
630/* -- see zlib.h -- */
631int ZEXPORT gzdirect(file)
632    gzFile file;
633{
634    gz_statep state;
635
636    /* get internal structure */
637    if (file == NULL)
638        return 0;
639    state.file = file;
640
641    /* if the state is not known, but we can find out, then do so (this is
642       mainly for right after a gzopen() or gzdopen()) */
643    if (state.state->mode == GZ_READ && state.state->how == LOOK && state.state->x.have == 0)
644        (void)gz_look(state);
645
646    /* return 1 if transparent, 0 if processing a gzip stream */
647    return state.state->direct;
648}
649
650/* -- see zlib.h -- */
651int ZEXPORT gzclose_r(file)
652    gzFile file;
653{
654    int ret, err;
655    gz_statep state;
656
657    /* get internal structure */
658    if (file == NULL)
659        return Z_STREAM_ERROR;
660    state.file = file;
661
662    /* check that we're reading */
663    if (state.state->mode != GZ_READ)
664        return Z_STREAM_ERROR;
665
666    /* free memory and close file */
667    if (state.state->size) {
668        inflateEnd(&(state.state->strm));
669        free(state.state->out);
670        free(state.state->in);
671    }
672    err = state.state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
673    gz_error(state, Z_OK, NULL);
674    free(state.state->path);
675    ret = close(state.state->fd);
676    free(state.state);
677    return ret ? Z_ERRNO : err;
678}
679