1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * compress routines:
30 *	zmagic() - returns 0 if not recognized, uncompresses and prints
31 *		   information if recognized
32 *	uncompress(method, old, n, newch) - uncompress old into new,
33 *					    using method, return sizeof new
34 */
35#include "file.h"
36
37#ifndef lint
38FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
39#endif
40
41#include "magic.h"
42#include <stdlib.h>
43#ifdef HAVE_UNISTD_H
44#include <unistd.h>
45#endif
46#ifdef HAVE_SPAWN_H
47#include <spawn.h>
48#endif
49#include <string.h>
50#include <errno.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <signal.h>
54#ifndef HAVE_SIG_T
55typedef void (*sig_t)(int);
56#endif /* HAVE_SIG_T */
57#ifdef HAVE_SYS_IOCTL_H
58#include <sys/ioctl.h>
59#endif
60#ifdef HAVE_SYS_WAIT_H
61#include <sys/wait.h>
62#endif
63#if defined(HAVE_SYS_TIME_H)
64#include <sys/time.h>
65#endif
66
67#if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
68#define BUILTIN_DECOMPRESS
69#include <zlib.h>
70#endif
71
72#if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
73#define BUILTIN_BZLIB
74#include <bzlib.h>
75#endif
76
77#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
78#define BUILTIN_XZLIB
79#include <lzma.h>
80#endif
81
82#if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
83#define BUILTIN_ZSTDLIB
84#include <zstd.h>
85#include <zstd_errors.h>
86#endif
87
88#if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
89#define BUILTIN_LZLIB
90#include <lzlib.h>
91#endif
92
93#ifdef DEBUG
94int tty = -1;
95#define DPRINTF(...)	do { \
96	if (tty == -1) \
97		tty = open("/dev/tty", O_RDWR); \
98	if (tty == -1) \
99		abort(); \
100	dprintf(tty, __VA_ARGS__); \
101} while (/*CONSTCOND*/0)
102#else
103#define DPRINTF(...)
104#endif
105
106#ifdef ZLIBSUPPORT
107/*
108 * The following python code is not really used because ZLIBSUPPORT is only
109 * defined if we have a built-in zlib, and the built-in zlib handles that.
110 * That is not true for android where we have zlib.h and not -lz.
111 */
112static const char zlibcode[] =
113    "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
114
115static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
116
117static int
118zlibcmp(const unsigned char *buf)
119{
120	unsigned short x = 1;
121	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
122
123	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
124		return 0;
125	if (s[0] != 1)	/* endianness test */
126		x = buf[0] | (buf[1] << 8);
127	else
128		x = buf[1] | (buf[0] << 8);
129	if (x % 31)
130		return 0;
131	return 1;
132}
133#endif
134
135static int
136lzmacmp(const unsigned char *buf)
137{
138	if (buf[0] != 0x5d || buf[1] || buf[2])
139		return 0;
140	if (buf[12] && buf[12] != 0xff)
141		return 0;
142	return 1;
143}
144
145#define gzip_flags "-cd"
146#define lzip_flags gzip_flags
147
148static const char *gzip_args[] = {
149	"gzip", gzip_flags, NULL
150};
151static const char *uncompress_args[] = {
152	"uncompress", "-c", NULL
153};
154static const char *bzip2_args[] = {
155	"bzip2", "-cd", NULL
156};
157static const char *lzip_args[] = {
158	"lzip", lzip_flags, NULL
159};
160static const char *xz_args[] = {
161	"xz", "-cd", NULL
162};
163static const char *lrzip_args[] = {
164	"lrzip", "-qdf", "-", NULL
165};
166static const char *lz4_args[] = {
167	"lz4", "-cd", NULL
168};
169static const char *zstd_args[] = {
170	"zstd", "-cd", NULL
171};
172
173#define	do_zlib		NULL
174#define	do_bzlib	NULL
175
176file_private const struct {
177	union {
178		const char *magic;
179		int (*func)(const unsigned char *);
180	} u;
181	int maglen;
182	const char **argv;
183	void *unused;
184} compr[] = {
185#define METH_FROZEN	2
186#define METH_BZIP	7
187#define METH_XZ		9
188#define METH_LZIP	8
189#define METH_ZSTD	12
190#define METH_LZMA	13
191#define METH_ZLIB	14
192    { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
193    /* Uncompress can get stuck; so use gzip first if we have it
194     * Idea from Damien Clark, thanks! */
195    { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
196    { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
197    { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
198    { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
199    /* the standard pack utilities do not accept standard input */
200    { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
201    { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
202    /* ...only first file examined */
203    { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
204    { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
205    { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
206    { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
207    { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
208    { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
209    { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
210#ifdef ZLIBSUPPORT
211    { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
212#endif
213};
214
215#define OKDATA 	0
216#define NODATA	1
217#define ERRDATA	2
218
219file_private ssize_t swrite(int, const void *, size_t);
220#if HAVE_FORK
221file_private size_t ncompr = __arraycount(compr);
222file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
223    unsigned char **, size_t *);
224#ifdef BUILTIN_DECOMPRESS
225file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
226    size_t *, int);
227file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
228    size_t *, int);
229#endif
230#ifdef BUILTIN_BZLIB
231file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
232    size_t *, int);
233#endif
234#ifdef BUILTIN_XZLIB
235file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
236    size_t *, int);
237#endif
238#ifdef BUILTIN_ZSTDLIB
239file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
240    size_t *, int);
241#endif
242#ifdef BUILTIN_LZLIB
243file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
244    size_t *, int);
245#endif
246
247static int makeerror(unsigned char **, size_t *, const char *, ...)
248    __attribute__((__format__(__printf__, 3, 4)));
249file_private const char *methodname(size_t);
250
251file_private int
252format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
253{
254	unsigned char *p;
255	int mime = ms->flags & MAGIC_MIME;
256
257	if (!mime)
258		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
259
260	for (p = buf; *p; p++)
261		if (!isalnum(*p))
262			*p = '-';
263
264	return file_printf(ms, "application/x-decompression-error-%s-%s",
265	    methodname(i), buf);
266}
267
268file_protected int
269file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
270{
271	unsigned char *newbuf = NULL;
272	size_t i, nsz;
273	char *rbuf;
274	file_pushbuf_t *pb;
275	int urv, prv, rv = 0;
276	int mime = ms->flags & MAGIC_MIME;
277	int fd = b->fd;
278	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
279	size_t nbytes = b->flen;
280	int sa_saved = 0;
281	struct sigaction sig_act;
282
283	if ((ms->flags & MAGIC_COMPRESS) == 0)
284		return 0;
285
286	for (i = 0; i < ncompr; i++) {
287		int zm;
288		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
289			continue;
290		if (compr[i].maglen < 0) {
291			zm = (*compr[i].u.func)(buf);
292		} else {
293			zm = memcmp(buf, compr[i].u.magic,
294			    CAST(size_t, compr[i].maglen)) == 0;
295		}
296
297		if (!zm)
298			continue;
299
300		/* Prevent SIGPIPE death if child dies unexpectedly */
301		if (!sa_saved) {
302			//We can use sig_act for both new and old, but
303			struct sigaction new_act;
304			memset(&new_act, 0, sizeof(new_act));
305			new_act.sa_handler = SIG_IGN;
306			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
307		}
308
309		nsz = nbytes;
310		free(newbuf);
311		urv = uncompressbuf(fd, ms->bytes_max, i,
312		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
313		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
314		    (char *)newbuf, nsz);
315		switch (urv) {
316		case OKDATA:
317		case ERRDATA:
318			ms->flags &= ~MAGIC_COMPRESS;
319			if (urv == ERRDATA)
320				prv = format_decompression_error(ms, i, newbuf);
321			else
322				prv = file_buffer(ms, -1, NULL, name, newbuf,
323				    nsz);
324			if (prv == -1)
325				goto error;
326			rv = 1;
327			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
328				goto out;
329			if (mime != MAGIC_MIME && mime != 0)
330				goto out;
331			if ((file_printf(ms,
332			    mime ? " compressed-encoding=" : " (")) == -1)
333				goto error;
334			if ((pb = file_push_buffer(ms)) == NULL)
335				goto error;
336			/*
337			 * XXX: If file_buffer fails here, we overwrite
338			 * the compressed text. FIXME.
339			 */
340			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
341			{
342				if (file_pop_buffer(ms, pb) != NULL)
343					abort();
344				goto error;
345			}
346			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
347				if (file_printf(ms, "%s", rbuf) == -1) {
348					free(rbuf);
349					goto error;
350				}
351				free(rbuf);
352			}
353			if (!mime && file_printf(ms, ")") == -1)
354				goto error;
355			/*FALLTHROUGH*/
356		case NODATA:
357			break;
358		default:
359			abort();
360			/*NOTREACHED*/
361		error:
362			rv = -1;
363			break;
364		}
365	}
366out:
367	DPRINTF("rv = %d\n", rv);
368
369	if (sa_saved && sig_act.sa_handler != SIG_IGN)
370		(void)sigaction(SIGPIPE, &sig_act, NULL);
371
372	free(newbuf);
373	ms->flags |= MAGIC_COMPRESS;
374	DPRINTF("Zmagic returns %d\n", rv);
375	return rv;
376}
377#endif
378/*
379 * `safe' write for sockets and pipes.
380 */
381file_private ssize_t
382swrite(int fd, const void *buf, size_t n)
383{
384	ssize_t rv;
385	size_t rn = n;
386
387	do
388		switch (rv = write(fd, buf, n)) {
389		case -1:
390			if (errno == EINTR)
391				continue;
392			return -1;
393		default:
394			n -= rv;
395			buf = CAST(const char *, buf) + rv;
396			break;
397		}
398	while (n > 0);
399	return rn;
400}
401
402
403/*
404 * `safe' read for sockets and pipes.
405 */
406file_protected ssize_t
407sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
408{
409	ssize_t rv;
410#if defined(FIONREAD) && !defined(__MINGW32__)
411	int t = 0;
412#endif
413	size_t rn = n;
414
415	if (fd == STDIN_FILENO)
416		goto nocheck;
417
418#if defined(FIONREAD) && !defined(__MINGW32__)
419	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
420#ifdef FD_ZERO
421		ssize_t cnt;
422		for (cnt = 0;; cnt++) {
423			fd_set check;
424			struct timeval tout = {0, 100 * 1000};
425			int selrv;
426
427			FD_ZERO(&check);
428			FD_SET(fd, &check);
429
430			/*
431			 * Avoid soft deadlock: do not read if there
432			 * is nothing to read from sockets and pipes.
433			 */
434			selrv = select(fd + 1, &check, NULL, NULL, &tout);
435			if (selrv == -1) {
436				if (errno == EINTR || errno == EAGAIN)
437					continue;
438			} else if (selrv == 0 && cnt >= 5) {
439				return 0;
440			} else
441				break;
442		}
443#endif
444		(void)ioctl(fd, FIONREAD, &t);
445	}
446
447	if (t > 0 && CAST(size_t, t) < n) {
448		n = t;
449		rn = n;
450	}
451#endif
452
453nocheck:
454	do
455		switch ((rv = read(fd, buf, n))) {
456		case -1:
457			if (errno == EINTR)
458				continue;
459			return -1;
460		case 0:
461			return rn - n;
462		default:
463			n -= rv;
464			buf = CAST(char *, CCAST(void *, buf)) + rv;
465			break;
466		}
467	while (n > 0);
468	return rn;
469}
470
471file_protected int
472file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
473    size_t nbytes)
474{
475	char buf[4096];
476	ssize_t r;
477	int tfd;
478
479#ifdef WIN32
480	const char *t;
481	buf[0] = '\0';
482	if ((t = getenv("TEMP")) != NULL)
483		(void)strlcpy(buf, t, sizeof(buf));
484	else if ((t = getenv("TMP")) != NULL)
485		(void)strlcpy(buf, t, sizeof(buf));
486	else if ((t = getenv("TMPDIR")) != NULL)
487		(void)strlcpy(buf, t, sizeof(buf));
488	if (buf[0] != '\0')
489		(void)strlcat(buf, "/", sizeof(buf));
490	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
491#else
492	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
493#endif
494#ifndef HAVE_MKSTEMP
495	{
496		char *ptr = mktemp(buf);
497		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
498		r = errno;
499		(void)unlink(ptr);
500		errno = r;
501	}
502#else
503	{
504		int te;
505		mode_t ou = umask(0);
506		tfd = mkstemp(buf);
507		(void)umask(ou);
508		te = errno;
509		(void)unlink(buf);
510		errno = te;
511	}
512#endif
513	if (tfd == -1) {
514		file_error(ms, errno,
515		    "cannot create temporary file for pipe copy");
516		return -1;
517	}
518
519	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
520		r = 1;
521	else {
522		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
523			if (swrite(tfd, buf, CAST(size_t, r)) != r)
524				break;
525	}
526
527	switch (r) {
528	case -1:
529		file_error(ms, errno, "error copying from pipe to temp file");
530		return -1;
531	case 0:
532		break;
533	default:
534		file_error(ms, errno, "error while writing to temp file");
535		return -1;
536	}
537
538	/*
539	 * We duplicate the file descriptor, because fclose on a
540	 * tmpfile will delete the file, but any open descriptors
541	 * can still access the phantom inode.
542	 */
543	if ((fd = dup2(tfd, fd)) == -1) {
544		file_error(ms, errno, "could not dup descriptor for temp file");
545		return -1;
546	}
547	(void)close(tfd);
548	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
549		file_badseek(ms);
550		return -1;
551	}
552	return fd;
553}
554#if HAVE_FORK
555#ifdef BUILTIN_DECOMPRESS
556
557#define FHCRC		(1 << 1)
558#define FEXTRA		(1 << 2)
559#define FNAME		(1 << 3)
560#define FCOMMENT	(1 << 4)
561
562
563file_private int
564uncompressgzipped(const unsigned char *old, unsigned char **newch,
565    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
566{
567	unsigned char flg;
568	size_t data_start = 10;
569
570	if (*n < 4) {
571		goto err;
572	}
573
574	flg = old[3];
575
576	if (flg & FEXTRA) {
577		if (data_start + 1 >= *n)
578			goto err;
579		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
580	}
581	if (flg & FNAME) {
582		while(data_start < *n && old[data_start])
583			data_start++;
584		data_start++;
585	}
586	if (flg & FCOMMENT) {
587		while(data_start < *n && old[data_start])
588			data_start++;
589		data_start++;
590	}
591	if (flg & FHCRC)
592		data_start += 2;
593
594	if (data_start >= *n)
595		goto err;
596
597	*n -= data_start;
598	old += data_start;
599	return uncompresszlib(old, newch, bytes_max, n, 0);
600err:
601	return makeerror(newch, n, "File too short");
602}
603
604file_private int
605uncompresszlib(const unsigned char *old, unsigned char **newch,
606    size_t bytes_max, size_t *n, int zlib)
607{
608	int rc;
609	z_stream z;
610
611	DPRINTF("builtin zlib decompression\n");
612	z.next_in = CCAST(Bytef *, old);
613	z.avail_in = CAST(uint32_t, *n);
614	z.next_out = *newch;
615	z.avail_out = CAST(unsigned int, bytes_max);
616	z.zalloc = Z_NULL;
617	z.zfree = Z_NULL;
618	z.opaque = Z_NULL;
619
620	/* LINTED bug in header macro */
621	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
622	if (rc != Z_OK)
623		goto err;
624
625	rc = inflate(&z, Z_SYNC_FLUSH);
626	if (rc != Z_OK && rc != Z_STREAM_END) {
627		inflateEnd(&z);
628		goto err;
629	}
630
631	*n = CAST(size_t, z.total_out);
632	rc = inflateEnd(&z);
633	if (rc != Z_OK)
634		goto err;
635
636	/* let's keep the nul-terminate tradition */
637	(*newch)[*n] = '\0';
638
639	return OKDATA;
640err:
641	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
642}
643#endif
644
645#ifdef BUILTIN_BZLIB
646file_private int
647uncompressbzlib(const unsigned char *old, unsigned char **newch,
648    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
649{
650	int rc;
651	bz_stream bz;
652
653	DPRINTF("builtin bzlib decompression\n");
654	memset(&bz, 0, sizeof(bz));
655	rc = BZ2_bzDecompressInit(&bz, 0, 0);
656	if (rc != BZ_OK)
657		goto err;
658
659	bz.next_in = CCAST(char *, RCAST(const char *, old));
660	bz.avail_in = CAST(uint32_t, *n);
661	bz.next_out = RCAST(char *, *newch);
662	bz.avail_out = CAST(unsigned int, bytes_max);
663
664	rc = BZ2_bzDecompress(&bz);
665	if (rc != BZ_OK && rc != BZ_STREAM_END) {
666		BZ2_bzDecompressEnd(&bz);
667		goto err;
668	}
669
670	/* Assume byte_max is within 32bit */
671	/* assert(bz.total_out_hi32 == 0); */
672	*n = CAST(size_t, bz.total_out_lo32);
673	rc = BZ2_bzDecompressEnd(&bz);
674	if (rc != BZ_OK)
675		goto err;
676
677	/* let's keep the nul-terminate tradition */
678	(*newch)[*n] = '\0';
679
680	return OKDATA;
681err:
682	return makeerror(newch, n, "bunzip error %d", rc);
683}
684#endif
685
686#ifdef BUILTIN_XZLIB
687file_private int
688uncompressxzlib(const unsigned char *old, unsigned char **newch,
689    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
690{
691	int rc;
692	lzma_stream xz;
693
694	DPRINTF("builtin xzlib decompression\n");
695	memset(&xz, 0, sizeof(xz));
696	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
697	if (rc != LZMA_OK)
698		goto err;
699
700	xz.next_in = CCAST(const uint8_t *, old);
701	xz.avail_in = CAST(uint32_t, *n);
702	xz.next_out = RCAST(uint8_t *, *newch);
703	xz.avail_out = CAST(unsigned int, bytes_max);
704
705	rc = lzma_code(&xz, LZMA_RUN);
706	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
707		lzma_end(&xz);
708		goto err;
709	}
710
711	*n = CAST(size_t, xz.total_out);
712
713	lzma_end(&xz);
714
715	/* let's keep the nul-terminate tradition */
716	(*newch)[*n] = '\0';
717
718	return OKDATA;
719err:
720	return makeerror(newch, n, "unxz error %d", rc);
721}
722#endif
723
724#ifdef BUILTIN_ZSTDLIB
725file_private int
726uncompresszstd(const unsigned char *old, unsigned char **newch,
727    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
728{
729	size_t rc;
730	ZSTD_DStream *zstd;
731	ZSTD_inBuffer in;
732	ZSTD_outBuffer out;
733
734	DPRINTF("builtin zstd decompression\n");
735	if ((zstd = ZSTD_createDStream()) == NULL) {
736		return makeerror(newch, n, "No ZSTD decompression stream, %s",
737		    strerror(errno));
738	}
739
740	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
741	if (ZSTD_isError(rc))
742		goto err;
743
744	in.src = CCAST(const void *, old);
745	in.size = *n;
746	in.pos = 0;
747	out.dst = RCAST(void *, *newch);
748	out.size = bytes_max;
749	out.pos = 0;
750
751	rc = ZSTD_decompressStream(zstd, &out, &in);
752	if (ZSTD_isError(rc))
753		goto err;
754
755	*n = out.pos;
756
757	ZSTD_freeDStream(zstd);
758
759	/* let's keep the nul-terminate tradition */
760	(*newch)[*n] = '\0';
761
762	return OKDATA;
763err:
764	ZSTD_freeDStream(zstd);
765	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
766}
767#endif
768
769#ifdef BUILTIN_LZLIB
770file_private int
771uncompresslzlib(const unsigned char *old, unsigned char **newch,
772    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
773{
774	enum LZ_Errno err;
775	size_t old_remaining = *n;
776	size_t new_remaining = bytes_max;
777	size_t total_read = 0;
778	unsigned char *bufp;
779	struct LZ_Decoder *dec;
780
781	bufp = *newch;
782
783	DPRINTF("builtin lzlib decompression\n");
784	dec = LZ_decompress_open();
785	if (!dec) {
786		return makeerror(newch, n, "unable to allocate LZ_Decoder");
787	}
788	if (LZ_decompress_errno(dec) != LZ_ok)
789		goto err;
790
791	for (;;) {
792		// LZ_decompress_read() stops at member boundaries, so we may
793		// have more than one successful read after writing all data
794		// we have.
795		if (old_remaining > 0) {
796			int wr = LZ_decompress_write(dec, old, old_remaining);
797			if (wr < 0)
798				goto err;
799			old_remaining -= wr;
800			old += wr;
801		}
802
803		int rd = LZ_decompress_read(dec, bufp, new_remaining);
804		if (rd > 0) {
805			new_remaining -= rd;
806			bufp += rd;
807			total_read += rd;
808		}
809
810		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
811			goto err;
812		if (new_remaining == 0)
813			break;
814		if (old_remaining == 0 && rd == 0)
815			break;
816	}
817
818	LZ_decompress_close(dec);
819	*n = total_read;
820
821	/* let's keep the nul-terminate tradition */
822	*bufp = '\0';
823
824	return OKDATA;
825err:
826	err = LZ_decompress_errno(dec);
827	LZ_decompress_close(dec);
828	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
829}
830#endif
831
832
833static int
834makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
835{
836	char *msg;
837	va_list ap;
838	int rv;
839
840	DPRINTF("Makeerror %s\n", fmt);
841	free(*buf);
842	va_start(ap, fmt);
843	rv = vasprintf(&msg, fmt, ap);
844	va_end(ap);
845	if (rv < 0) {
846		DPRINTF("Makeerror failed");
847		*buf = NULL;
848		*len = 0;
849		return NODATA;
850	}
851	*buf = RCAST(unsigned char *, msg);
852	*len = strlen(msg);
853	return ERRDATA;
854}
855
856static void
857closefd(int *fd, size_t i)
858{
859	if (fd[i] == -1)
860		return;
861	(void) close(fd[i]);
862	fd[i] = -1;
863}
864
865static void
866closep(int *fd)
867{
868	size_t i;
869	for (i = 0; i < 2; i++)
870		closefd(fd, i);
871}
872
873static void
874movedesc(void *v, int i, int fd)
875{
876	if (fd == i)
877		return; /* "no dup was necessary" */
878#ifdef HAVE_POSIX_SPAWNP
879	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
880	posix_spawn_file_actions_adddup2(fa, fd, i);
881	posix_spawn_file_actions_addclose(fa, fd);
882#else
883	if (dup2(fd, i) == -1) {
884		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
885		exit(EXIT_FAILURE);
886	}
887	close(v ? fd : fd);
888#endif
889}
890
891static void
892closedesc(void *v, int fd)
893{
894#ifdef HAVE_POSIX_SPAWNP
895	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
896	posix_spawn_file_actions_addclose(fa, fd);
897#else
898	close(v ? fd : fd);
899#endif
900}
901
902static void
903handledesc(void *v, int fd, int fdp[3][2])
904{
905	if (fd != -1) {
906		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
907		movedesc(v, STDIN_FILENO, fd);
908	} else {
909		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
910		if (fdp[STDIN_FILENO][1] > 2)
911		    closedesc(v, fdp[STDIN_FILENO][1]);
912	}
913
914	file_clear_closexec(STDIN_FILENO);
915
916///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
917	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
918	if (fdp[STDOUT_FILENO][0] > 2)
919		closedesc(v, fdp[STDOUT_FILENO][0]);
920
921	file_clear_closexec(STDOUT_FILENO);
922
923	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
924	if (fdp[STDERR_FILENO][0] > 2)
925		closedesc(v, fdp[STDERR_FILENO][0]);
926
927	file_clear_closexec(STDERR_FILENO);
928}
929
930static pid_t
931writechild(int fd, const void *old, size_t n)
932{
933	pid_t pid;
934
935	/*
936	 * fork again, to avoid blocking because both
937	 * pipes filled
938	 */
939	pid = fork();
940	if (pid == -1) {
941		DPRINTF("Fork failed (%s)\n", strerror(errno));
942		return -1;
943	}
944	if (pid == 0) {
945		/* child */
946		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
947			DPRINTF("Write failed (%s)\n", strerror(errno));
948			exit(EXIT_FAILURE);
949		}
950		exit(EXIT_SUCCESS);
951	}
952	/* parent */
953	return pid;
954}
955
956static ssize_t
957filter_error(unsigned char *ubuf, ssize_t n)
958{
959	char *p;
960	char *buf;
961
962	ubuf[n] = '\0';
963	buf = RCAST(char *, ubuf);
964	while (isspace(CAST(unsigned char, *buf)))
965		buf++;
966	DPRINTF("Filter error[[[%s]]]\n", buf);
967	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
968		*p = '\0';
969	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
970		*p = '\0';
971	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
972		++p;
973		while (isspace(CAST(unsigned char, *p)))
974			p++;
975		n = strlen(p);
976		memmove(ubuf, p, CAST(size_t, n + 1));
977	}
978	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
979	if (islower(*ubuf))
980		*ubuf = toupper(*ubuf);
981	return n;
982}
983
984file_private const char *
985methodname(size_t method)
986{
987	switch (method) {
988#ifdef BUILTIN_DECOMPRESS
989	case METH_FROZEN:
990	case METH_ZLIB:
991		return "zlib";
992#endif
993#ifdef BUILTIN_BZLIB
994	case METH_BZIP:
995		return "bzlib";
996#endif
997#ifdef BUILTIN_XZLIB
998	case METH_XZ:
999	case METH_LZMA:
1000		return "xzlib";
1001#endif
1002#ifdef BUILTIN_ZSTDLIB
1003	case METH_ZSTD:
1004		return "zstd";
1005#endif
1006#ifdef BUILTIN_LZLIB
1007	case METH_LZIP:
1008		return "lzlib";
1009#endif
1010	default:
1011		return compr[method].argv[0];
1012	}
1013}
1014
1015file_private int (*
1016getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1017    size_t *, int)
1018{
1019	switch (method) {
1020#ifdef BUILTIN_DECOMPRESS
1021	case METH_FROZEN:
1022		return uncompressgzipped;
1023	case METH_ZLIB:
1024		return uncompresszlib;
1025#endif
1026#ifdef BUILTIN_BZLIB
1027	case METH_BZIP:
1028		return uncompressbzlib;
1029#endif
1030#ifdef BUILTIN_XZLIB
1031	case METH_XZ:
1032	case METH_LZMA:
1033		return uncompressxzlib;
1034#endif
1035#ifdef BUILTIN_ZSTDLIB
1036	case METH_ZSTD:
1037		return uncompresszstd;
1038#endif
1039#ifdef BUILTIN_LZLIB
1040	case METH_LZIP:
1041		return uncompresslzlib;
1042#endif
1043	default:
1044		return NULL;
1045	}
1046}
1047
1048file_private int
1049uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1050    const unsigned char *old, unsigned char **newch, size_t* n)
1051{
1052	int fdp[3][2];
1053	int status, rv, w;
1054	pid_t pid;
1055	pid_t writepid = -1;
1056	size_t i;
1057	ssize_t r, re;
1058	char *const *args;
1059#ifdef HAVE_POSIX_SPAWNP
1060	posix_spawn_file_actions_t fa;
1061#endif
1062	int (*decompress)(const unsigned char *, unsigned char **,
1063	    size_t, size_t *, int) = getdecompressor(method);
1064
1065	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1066	if (*newch == NULL)
1067		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1068
1069	if (decompress) {
1070		if (nofork) {
1071			return makeerror(newch, n,
1072			    "Fork is required to uncompress, but disabled");
1073		}
1074		return (*decompress)(old, newch, bytes_max, n, 1);
1075	}
1076
1077	(void)fflush(stdout);
1078	(void)fflush(stderr);
1079
1080	for (i = 0; i < __arraycount(fdp); i++)
1081		fdp[i][0] = fdp[i][1] = -1;
1082
1083	/*
1084	 * There are multithreaded users who run magic_file()
1085	 * from dozens of threads. If two parallel magic_file() calls
1086	 * analyze two large compressed files, both will spawn
1087	 * an uncompressing child here, which writes out uncompressed data.
1088	 * We read some portion, then close the pipe, then waitpid() the child.
1089	 * If uncompressed data is larger, child should get EPIPE and exit.
1090	 * However, with *parallel* calls OTHER child may unintentionally
1091	 * inherit pipe fds, thus keeping pipe open and making writes in
1092	 * our child block instead of failing with EPIPE!
1093	 * (For the bug to occur, two threads must mutually inherit their pipes,
1094	 * and both must have large outputs. Thus it happens not that often).
1095	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1096	 */
1097	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1098	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1099	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1100		closep(fdp[STDIN_FILENO]);
1101		closep(fdp[STDOUT_FILENO]);
1102		return makeerror(newch, n, "Cannot create pipe, %s",
1103		    strerror(errno));
1104	}
1105
1106	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1107#ifdef HAVE_POSIX_SPAWNP
1108	posix_spawn_file_actions_init(&fa);
1109
1110	handledesc(&fa, fd, fdp);
1111
1112	DPRINTF("Executing %s\n", compr[method].argv[0]);
1113	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1114	    args, NULL);
1115
1116	posix_spawn_file_actions_destroy(&fa);
1117
1118	if (status == -1) {
1119		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1120		    compr[method].argv[0], strerror(errno));
1121	}
1122#else
1123	/* For processes with large mapped virtual sizes, vfork
1124	 * may be _much_ faster (10-100 times) than fork.
1125	 */
1126	pid = vfork();
1127	if (pid == -1) {
1128		return makeerror(newch, n, "Cannot vfork, %s",
1129		    strerror(errno));
1130	}
1131	if (pid == 0) {
1132		/* child */
1133		/* Note: we are after vfork, do not modify memory
1134		 * in a way which confuses parent. In particular,
1135		 * do not modify fdp[i][j].
1136		 */
1137		handledesc(NULL, fd, fdp);
1138		DPRINTF("Executing %s\n", compr[method].argv[0]);
1139
1140		(void)execvp(compr[method].argv[0], args);
1141		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1142		    compr[method].argv[0], strerror(errno));
1143		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1144	}
1145#endif
1146	/* parent */
1147	/* Close write sides of child stdout/err pipes */
1148	for (i = 1; i < __arraycount(fdp); i++)
1149		closefd(fdp[i], 1);
1150	/* Write the buffer data to child stdin, if we don't have fd */
1151	if (fd == -1) {
1152		closefd(fdp[STDIN_FILENO], 0);
1153		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1154		if (writepid == (pid_t)-1) {
1155			rv = makeerror(newch, n, "Write to child failed, %s",
1156			    strerror(errno));
1157			DPRINTF("Write to child failed\n");
1158			goto err;
1159		}
1160		closefd(fdp[STDIN_FILENO], 1);
1161	}
1162
1163	rv = OKDATA;
1164	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1165	DPRINTF("read got %zd\n", r);
1166	if (r < 0) {
1167		rv = ERRDATA;
1168		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1169		        strerror(errno));
1170		goto err;
1171	}
1172	if (CAST(size_t, r) == bytes_max) {
1173		/*
1174		 * close fd so that the child exits with sigpipe and ignore
1175		 * errors, otherwise we risk the child blocking and never
1176		 * exiting.
1177		 */
1178		DPRINTF("Closing stdout for bytes_max\n");
1179		closefd(fdp[STDOUT_FILENO], 0);
1180		goto ok;
1181	}
1182	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1183		DPRINTF("Got stuff from stderr %s\n", *newch);
1184		rv = ERRDATA;
1185		r = filter_error(*newch, r);
1186		goto ok;
1187	}
1188	if  (re == 0)
1189		goto ok;
1190	rv = makeerror(newch, n, "Read stderr failed, %s",
1191	    strerror(errno));
1192	goto err;
1193ok:
1194	*n = r;
1195	/* NUL terminate, as every buffer is handled here. */
1196	(*newch)[*n] = '\0';
1197err:
1198	closefd(fdp[STDIN_FILENO], 1);
1199	closefd(fdp[STDOUT_FILENO], 0);
1200	closefd(fdp[STDERR_FILENO], 0);
1201
1202	w = waitpid(pid, &status, 0);
1203wait_err:
1204	if (w == -1) {
1205		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1206		DPRINTF("Child wait return %#x\n", status);
1207	} else if (!WIFEXITED(status)) {
1208		DPRINTF("Child not exited (%#x)\n", status);
1209	} else if (WEXITSTATUS(status) != 0) {
1210		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1211	}
1212	if (writepid > 0) {
1213		/* _After_ we know decompressor has exited, our input writer
1214		 * definitely will exit now (at worst, writing fails in it,
1215		 * since output fd is closed now on the reading size).
1216		 */
1217		w = waitpid(writepid, &status, 0);
1218		writepid = -1;
1219		goto wait_err;
1220	}
1221
1222	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1223	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1224
1225	return rv;
1226}
1227#endif
1228