1228753Smm/*-
2228753Smm * Copyright (c) 2007 Joerg Sonnenberger
3248616Smm * Copyright (c) 2012 Michihiro NAKAJIMA
4228753Smm * All rights reserved.
5228753Smm *
6228753Smm * Redistribution and use in source and binary forms, with or without
7228753Smm * modification, are permitted provided that the following conditions
8228753Smm * are met:
9228753Smm * 1. Redistributions of source code must retain the above copyright
10228753Smm *    notice, this list of conditions and the following disclaimer.
11228753Smm * 2. Redistributions in binary form must reproduce the above copyright
12228753Smm *    notice, this list of conditions and the following disclaimer in the
13228753Smm *    documentation and/or other materials provided with the distribution.
14228753Smm *
15228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25228753Smm */
26228753Smm
27228753Smm#include "archive_platform.h"
28231200Smm__FBSDID("$FreeBSD$");
29228753Smm
30228753Smm#ifdef HAVE_SYS_WAIT_H
31228753Smm#  include <sys/wait.h>
32228753Smm#endif
33228753Smm#ifdef HAVE_ERRNO_H
34228753Smm#  include <errno.h>
35228753Smm#endif
36228753Smm#ifdef HAVE_FCNTL_H
37228753Smm#  include <fcntl.h>
38228753Smm#endif
39228753Smm#ifdef HAVE_LIMITS_H
40228753Smm#  include <limits.h>
41228753Smm#endif
42228753Smm#ifdef HAVE_SIGNAL_H
43228753Smm#  include <signal.h>
44228753Smm#endif
45228753Smm#ifdef HAVE_STDLIB_H
46228753Smm#  include <stdlib.h>
47228753Smm#endif
48228753Smm#ifdef HAVE_STRING_H
49228753Smm#  include <string.h>
50228753Smm#endif
51228753Smm#ifdef HAVE_UNISTD_H
52228753Smm#  include <unistd.h>
53228753Smm#endif
54228753Smm
55228753Smm#include "archive.h"
56228753Smm#include "archive_private.h"
57248616Smm#include "archive_string.h"
58228753Smm#include "archive_read_private.h"
59248616Smm#include "filter_fork.h"
60228753Smm
61231200Smm
62231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000
63231200Smm/* Deprecated; remove in libarchive 4.0 */
64228753Smmint
65228753Smmarchive_read_support_compression_program(struct archive *a, const char *cmd)
66228753Smm{
67231200Smm	return archive_read_support_filter_program(a, cmd);
68228753Smm}
69228753Smm
70231200Smmint
71231200Smmarchive_read_support_compression_program_signature(struct archive *a,
72231200Smm    const char *cmd, const void *signature, size_t signature_len)
73231200Smm{
74231200Smm	return archive_read_support_filter_program_signature(a,
75231200Smm	    cmd, signature, signature_len);
76231200Smm}
77231200Smm#endif
78228753Smm
79231200Smmint
80231200Smmarchive_read_support_filter_program(struct archive *a, const char *cmd)
81231200Smm{
82231200Smm	return (archive_read_support_filter_program_signature(a, cmd, NULL, 0));
83231200Smm}
84231200Smm
85228753Smm/*
86228753Smm * The bidder object stores the command and the signature to watch for.
87228753Smm * The 'inhibit' entry here is used to ensure that unchecked filters never
88228753Smm * bid twice in the same pipeline.
89228753Smm */
90228753Smmstruct program_bidder {
91248616Smm	char *description;
92228753Smm	char *cmd;
93228753Smm	void *signature;
94228753Smm	size_t signature_len;
95228753Smm	int inhibit;
96228753Smm};
97228753Smm
98228753Smmstatic int	program_bidder_bid(struct archive_read_filter_bidder *,
99228753Smm		    struct archive_read_filter *upstream);
100228753Smmstatic int	program_bidder_init(struct archive_read_filter *);
101228753Smmstatic int	program_bidder_free(struct archive_read_filter_bidder *);
102228753Smm
103228753Smm/*
104228753Smm * The actual filter needs to track input and output data.
105228753Smm */
106228753Smmstruct program_filter {
107248616Smm	struct archive_string description;
108248616Smm#if defined(_WIN32) && !defined(__CYGWIN__)
109248616Smm	HANDLE		 child;
110248616Smm#else
111228753Smm	pid_t		 child;
112248616Smm#endif
113228753Smm	int		 exit_status;
114228753Smm	int		 waitpid_return;
115228753Smm	int		 child_stdin, child_stdout;
116228753Smm
117228753Smm	char		*out_buf;
118228753Smm	size_t		 out_buf_len;
119228753Smm};
120228753Smm
121228753Smmstatic ssize_t	program_filter_read(struct archive_read_filter *,
122228753Smm		    const void **);
123228753Smmstatic int	program_filter_close(struct archive_read_filter *);
124248616Smmstatic void	free_state(struct program_bidder *);
125228753Smm
126248616Smmstatic int
127248616Smmset_bidder_signature(struct archive_read_filter_bidder *bidder,
128248616Smm    struct program_bidder *state, const void *signature, size_t signature_len)
129248616Smm{
130248616Smm
131248616Smm	if (signature != NULL && signature_len > 0) {
132248616Smm		state->signature_len = signature_len;
133248616Smm		state->signature = malloc(signature_len);
134248616Smm		memcpy(state->signature, signature, signature_len);
135248616Smm	}
136248616Smm
137248616Smm	/*
138248616Smm	 * Fill in the bidder object.
139248616Smm	 */
140248616Smm	bidder->data = state;
141248616Smm	bidder->bid = program_bidder_bid;
142248616Smm	bidder->init = program_bidder_init;
143248616Smm	bidder->options = NULL;
144248616Smm	bidder->free = program_bidder_free;
145248616Smm	return (ARCHIVE_OK);
146248616Smm}
147248616Smm
148228753Smmint
149231200Smmarchive_read_support_filter_program_signature(struct archive *_a,
150228753Smm    const char *cmd, const void *signature, size_t signature_len)
151228753Smm{
152228753Smm	struct archive_read *a = (struct archive_read *)_a;
153228753Smm	struct archive_read_filter_bidder *bidder;
154228753Smm	struct program_bidder *state;
155228753Smm
156228753Smm	/*
157228753Smm	 * Get a bidder object from the read core.
158228753Smm	 */
159231200Smm	if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK)
160228753Smm		return (ARCHIVE_FATAL);
161228753Smm
162228753Smm	/*
163228753Smm	 * Allocate our private state.
164228753Smm	 */
165248616Smm	state = (struct program_bidder *)calloc(1, sizeof (*state));
166228753Smm	if (state == NULL)
167248616Smm		goto memerr;
168228753Smm	state->cmd = strdup(cmd);
169248616Smm	if (state->cmd == NULL)
170248616Smm		goto memerr;
171228753Smm
172248616Smm	return set_bidder_signature(bidder, state, signature, signature_len);
173248616Smmmemerr:
174248616Smm	free_state(state);
175248616Smm	archive_set_error(_a, ENOMEM, "Can't allocate memory");
176248616Smm	return (ARCHIVE_FATAL);
177228753Smm}
178228753Smm
179228753Smmstatic int
180228753Smmprogram_bidder_free(struct archive_read_filter_bidder *self)
181228753Smm{
182228753Smm	struct program_bidder *state = (struct program_bidder *)self->data;
183248616Smm
184248616Smm	free_state(state);
185228753Smm	return (ARCHIVE_OK);
186228753Smm}
187228753Smm
188248616Smmstatic void
189248616Smmfree_state(struct program_bidder *state)
190248616Smm{
191248616Smm
192248616Smm	if (state) {
193248616Smm		free(state->cmd);
194248616Smm		free(state->signature);
195248616Smm		free(state);
196248616Smm	}
197248616Smm}
198248616Smm
199228753Smm/*
200228753Smm * If we do have a signature, bid only if that matches.
201228753Smm *
202228753Smm * If there's no signature, we bid INT_MAX the first time
203228753Smm * we're called, then never bid again.
204228753Smm */
205228753Smmstatic int
206228753Smmprogram_bidder_bid(struct archive_read_filter_bidder *self,
207228753Smm    struct archive_read_filter *upstream)
208228753Smm{
209228753Smm	struct program_bidder *state = self->data;
210228753Smm	const char *p;
211228753Smm
212228753Smm	/* If we have a signature, use that to match. */
213228753Smm	if (state->signature_len > 0) {
214228753Smm		p = __archive_read_filter_ahead(upstream,
215228753Smm		    state->signature_len, NULL);
216228753Smm		if (p == NULL)
217228753Smm			return (0);
218228753Smm		/* No match, so don't bid. */
219228753Smm		if (memcmp(p, state->signature, state->signature_len) != 0)
220228753Smm			return (0);
221228753Smm		return ((int)state->signature_len * 8);
222228753Smm	}
223228753Smm
224228753Smm	/* Otherwise, bid once and then never bid again. */
225228753Smm	if (state->inhibit)
226228753Smm		return (0);
227228753Smm	state->inhibit = 1;
228228753Smm	return (INT_MAX);
229228753Smm}
230228753Smm
231228753Smm/*
232228753Smm * Shut down the child, return ARCHIVE_OK if it exited normally.
233228753Smm *
234228753Smm * Note that the return value is sticky; if we're called again,
235228753Smm * we won't reap the child again, but we will return the same status
236228753Smm * (including error message if the child came to a bad end).
237228753Smm */
238228753Smmstatic int
239228753Smmchild_stop(struct archive_read_filter *self, struct program_filter *state)
240228753Smm{
241228753Smm	/* Close our side of the I/O with the child. */
242228753Smm	if (state->child_stdin != -1) {
243228753Smm		close(state->child_stdin);
244228753Smm		state->child_stdin = -1;
245228753Smm	}
246228753Smm	if (state->child_stdout != -1) {
247228753Smm		close(state->child_stdout);
248228753Smm		state->child_stdout = -1;
249228753Smm	}
250228753Smm
251228753Smm	if (state->child != 0) {
252228753Smm		/* Reap the child. */
253228753Smm		do {
254228753Smm			state->waitpid_return
255228753Smm			    = waitpid(state->child, &state->exit_status, 0);
256228753Smm		} while (state->waitpid_return == -1 && errno == EINTR);
257248616Smm#if defined(_WIN32) && !defined(__CYGWIN__)
258248616Smm		CloseHandle(state->child);
259248616Smm#endif
260228753Smm		state->child = 0;
261228753Smm	}
262228753Smm
263228753Smm	if (state->waitpid_return < 0) {
264228753Smm		/* waitpid() failed?  This is ugly. */
265228753Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
266228753Smm		    "Child process exited badly");
267228753Smm		return (ARCHIVE_WARN);
268228753Smm	}
269228753Smm
270228753Smm#if !defined(_WIN32) || defined(__CYGWIN__)
271228753Smm	if (WIFSIGNALED(state->exit_status)) {
272228753Smm#ifdef SIGPIPE
273228753Smm		/* If the child died because we stopped reading before
274228753Smm		 * it was done, that's okay.  Some archive formats
275228753Smm		 * have padding at the end that we routinely ignore. */
276228753Smm		/* The alternative to this would be to add a step
277228753Smm		 * before close(child_stdout) above to read from the
278228753Smm		 * child until the child has no more to write. */
279228753Smm		if (WTERMSIG(state->exit_status) == SIGPIPE)
280228753Smm			return (ARCHIVE_OK);
281228753Smm#endif
282228753Smm		archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
283228753Smm		    "Child process exited with signal %d",
284228753Smm		    WTERMSIG(state->exit_status));
285228753Smm		return (ARCHIVE_WARN);
286228753Smm	}
287228753Smm#endif /* !_WIN32 || __CYGWIN__ */
288228753Smm
289228753Smm	if (WIFEXITED(state->exit_status)) {
290228753Smm		if (WEXITSTATUS(state->exit_status) == 0)
291228753Smm			return (ARCHIVE_OK);
292228753Smm
293228753Smm		archive_set_error(&self->archive->archive,
294228753Smm		    ARCHIVE_ERRNO_MISC,
295228753Smm		    "Child process exited with status %d",
296228753Smm		    WEXITSTATUS(state->exit_status));
297228753Smm		return (ARCHIVE_WARN);
298228753Smm	}
299228753Smm
300228753Smm	return (ARCHIVE_WARN);
301228753Smm}
302228753Smm
303228753Smm/*
304228753Smm * Use select() to decide whether the child is ready for read or write.
305228753Smm */
306228753Smmstatic ssize_t
307228753Smmchild_read(struct archive_read_filter *self, char *buf, size_t buf_len)
308228753Smm{
309228753Smm	struct program_filter *state = self->data;
310228753Smm	ssize_t ret, requested, avail;
311228753Smm	const char *p;
312248616Smm#if defined(_WIN32) && !defined(__CYGWIN__)
313248616Smm	HANDLE handle = (HANDLE)_get_osfhandle(state->child_stdout);
314248616Smm#endif
315228753Smm
316228753Smm	requested = buf_len > SSIZE_MAX ? SSIZE_MAX : buf_len;
317228753Smm
318228753Smm	for (;;) {
319228753Smm		do {
320248616Smm#if defined(_WIN32) && !defined(__CYGWIN__)
321248616Smm			/* Avoid infinity wait.
322248616Smm			 * Note: If there is no data in the pipe, ReadFile()
323248616Smm			 * called in read() never returns and so we won't
324248616Smm			 * write remaining encoded data to the pipe.
325248616Smm			 * Note: This way may cause performance problem.
326248616Smm			 * we are looking forward to great code to resolve
327248616Smm			 * this.  */
328248616Smm			DWORD pipe_avail = -1;
329248616Smm			int cnt = 2;
330248616Smm
331248616Smm			while (PeekNamedPipe(handle, NULL, 0, NULL,
332248616Smm			    &pipe_avail, NULL) != 0 && pipe_avail == 0 &&
333248616Smm			    cnt--)
334248616Smm				Sleep(5);
335248616Smm			if (pipe_avail == 0) {
336248616Smm				ret = -1;
337248616Smm				errno = EAGAIN;
338248616Smm				break;
339248616Smm			}
340248616Smm#endif
341228753Smm			ret = read(state->child_stdout, buf, requested);
342228753Smm		} while (ret == -1 && errno == EINTR);
343228753Smm
344228753Smm		if (ret > 0)
345228753Smm			return (ret);
346228753Smm		if (ret == 0 || (ret == -1 && errno == EPIPE))
347228753Smm			/* Child has closed its output; reap the child
348228753Smm			 * and return the status. */
349228753Smm			return (child_stop(self, state));
350228753Smm		if (ret == -1 && errno != EAGAIN)
351228753Smm			return (-1);
352228753Smm
353228753Smm		if (state->child_stdin == -1) {
354228753Smm			/* Block until child has some I/O ready. */
355228753Smm			__archive_check_child(state->child_stdin,
356228753Smm			    state->child_stdout);
357228753Smm			continue;
358228753Smm		}
359228753Smm
360228753Smm		/* Get some more data from upstream. */
361228753Smm		p = __archive_read_filter_ahead(self->upstream, 1, &avail);
362228753Smm		if (p == NULL) {
363228753Smm			close(state->child_stdin);
364228753Smm			state->child_stdin = -1;
365228753Smm			fcntl(state->child_stdout, F_SETFL, 0);
366228753Smm			if (avail < 0)
367228753Smm				return (avail);
368228753Smm			continue;
369228753Smm		}
370228753Smm
371228753Smm		do {
372228753Smm			ret = write(state->child_stdin, p, avail);
373228753Smm		} while (ret == -1 && errno == EINTR);
374228753Smm
375228753Smm		if (ret > 0) {
376228753Smm			/* Consume whatever we managed to write. */
377228753Smm			__archive_read_filter_consume(self->upstream, ret);
378228753Smm		} else if (ret == -1 && errno == EAGAIN) {
379228753Smm			/* Block until child has some I/O ready. */
380228753Smm			__archive_check_child(state->child_stdin,
381228753Smm			    state->child_stdout);
382228753Smm		} else {
383228753Smm			/* Write failed. */
384228753Smm			close(state->child_stdin);
385228753Smm			state->child_stdin = -1;
386228753Smm			fcntl(state->child_stdout, F_SETFL, 0);
387228753Smm			/* If it was a bad error, we're done; otherwise
388228753Smm			 * it was EPIPE or EOF, and we can still read
389228753Smm			 * from the child. */
390228753Smm			if (ret == -1 && errno != EPIPE)
391228753Smm				return (-1);
392228753Smm		}
393228753Smm	}
394228753Smm}
395228753Smm
396228753Smmint
397228753Smm__archive_read_program(struct archive_read_filter *self, const char *cmd)
398228753Smm{
399228753Smm	struct program_filter	*state;
400228753Smm	static const size_t out_buf_len = 65536;
401228753Smm	char *out_buf;
402228753Smm	const char *prefix = "Program: ";
403248616Smm	pid_t child;
404248616Smm	size_t l;
405228753Smm
406248616Smm	l = strlen(prefix) + strlen(cmd) + 1;
407228753Smm	state = (struct program_filter *)calloc(1, sizeof(*state));
408228753Smm	out_buf = (char *)malloc(out_buf_len);
409248616Smm	if (state == NULL || out_buf == NULL ||
410248616Smm	    archive_string_ensure(&state->description, l) == NULL) {
411228753Smm		archive_set_error(&self->archive->archive, ENOMEM,
412228753Smm		    "Can't allocate input data");
413248616Smm		if (state != NULL) {
414248616Smm			archive_string_free(&state->description);
415248616Smm			free(state);
416248616Smm		}
417228753Smm		free(out_buf);
418228753Smm		return (ARCHIVE_FATAL);
419228753Smm	}
420248616Smm	archive_strcpy(&state->description, prefix);
421248616Smm	archive_strcat(&state->description, cmd);
422228753Smm
423248616Smm	self->code = ARCHIVE_FILTER_PROGRAM;
424248616Smm	self->name = state->description.s;
425228753Smm
426228753Smm	state->out_buf = out_buf;
427228753Smm	state->out_buf_len = out_buf_len;
428228753Smm
429248616Smm	child = __archive_create_child(cmd, &state->child_stdin,
430248616Smm	    &state->child_stdout);
431248616Smm	if (child == -1) {
432228753Smm		free(state->out_buf);
433228753Smm		free(state);
434228753Smm		archive_set_error(&self->archive->archive, EINVAL,
435248616Smm		    "Can't initialize filter; unable to run program \"%s\"",
436248616Smm		    cmd);
437228753Smm		return (ARCHIVE_FATAL);
438228753Smm	}
439248616Smm#if defined(_WIN32) && !defined(__CYGWIN__)
440248616Smm	state->child = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, child);
441248616Smm	if (state->child == NULL) {
442248616Smm		child_stop(self, state);
443248616Smm		free(state->out_buf);
444248616Smm		free(state);
445248616Smm		archive_set_error(&self->archive->archive, EINVAL,
446248616Smm		    "Can't initialize filter; unable to run program \"%s\"",
447248616Smm		    cmd);
448248616Smm		return (ARCHIVE_FATAL);
449248616Smm	}
450248616Smm#else
451248616Smm	state->child = child;
452248616Smm#endif
453228753Smm
454228753Smm	self->data = state;
455228753Smm	self->read = program_filter_read;
456228753Smm	self->skip = NULL;
457228753Smm	self->close = program_filter_close;
458228753Smm
459228753Smm	/* XXX Check that we can read at least one byte? */
460228753Smm	return (ARCHIVE_OK);
461228753Smm}
462228753Smm
463228753Smmstatic int
464228753Smmprogram_bidder_init(struct archive_read_filter *self)
465228753Smm{
466228753Smm	struct program_bidder   *bidder_state;
467228753Smm
468228753Smm	bidder_state = (struct program_bidder *)self->bidder->data;
469228753Smm	return (__archive_read_program(self, bidder_state->cmd));
470228753Smm}
471228753Smm
472228753Smmstatic ssize_t
473228753Smmprogram_filter_read(struct archive_read_filter *self, const void **buff)
474228753Smm{
475228753Smm	struct program_filter *state;
476228753Smm	ssize_t bytes;
477228753Smm	size_t total;
478228753Smm	char *p;
479228753Smm
480228753Smm	state = (struct program_filter *)self->data;
481228753Smm
482228753Smm	total = 0;
483228753Smm	p = state->out_buf;
484228753Smm	while (state->child_stdout != -1 && total < state->out_buf_len) {
485228753Smm		bytes = child_read(self, p, state->out_buf_len - total);
486228753Smm		if (bytes < 0)
487228753Smm			/* No recovery is possible if we can no longer
488228753Smm			 * read from the child. */
489228753Smm			return (ARCHIVE_FATAL);
490228753Smm		if (bytes == 0)
491228753Smm			/* We got EOF from the child. */
492228753Smm			break;
493228753Smm		total += bytes;
494228753Smm		p += bytes;
495228753Smm	}
496228753Smm
497228753Smm	*buff = state->out_buf;
498228753Smm	return (total);
499228753Smm}
500228753Smm
501228753Smmstatic int
502228753Smmprogram_filter_close(struct archive_read_filter *self)
503228753Smm{
504228753Smm	struct program_filter	*state;
505228753Smm	int e;
506228753Smm
507228753Smm	state = (struct program_filter *)self->data;
508228753Smm	e = child_stop(self, state);
509228753Smm
510228753Smm	/* Release our private data. */
511228753Smm	free(state->out_buf);
512248616Smm	archive_string_free(&state->description);
513228753Smm	free(state);
514228753Smm
515228753Smm	return (e);
516228753Smm}
517