1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3232153Smm * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4313571Smm * Copyright (c) 2016 Martin Matuska
5228753Smm * All rights reserved.
6228753Smm *
7228753Smm * Redistribution and use in source and binary forms, with or without
8228753Smm * modification, are permitted provided that the following conditions
9228753Smm * are met:
10228753Smm * 1. Redistributions of source code must retain the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer.
12228753Smm * 2. Redistributions in binary form must reproduce the above copyright
13228753Smm *    notice, this list of conditions and the following disclaimer in the
14228753Smm *    documentation and/or other materials provided with the distribution.
15228753Smm *
16228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26228753Smm */
27228753Smm
28228753Smm#include "archive_platform.h"
29228763Smm__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_read_support_format_tar.c 362134 2020-06-12 23:02:34Z mm $");
30228753Smm
31228753Smm#ifdef HAVE_ERRNO_H
32228753Smm#include <errno.h>
33228753Smm#endif
34228753Smm#include <stddef.h>
35228753Smm#ifdef HAVE_STDLIB_H
36228753Smm#include <stdlib.h>
37228753Smm#endif
38228753Smm#ifdef HAVE_STRING_H
39228753Smm#include <string.h>
40228753Smm#endif
41228753Smm
42228753Smm#include "archive.h"
43232153Smm#include "archive_acl_private.h" /* For ACL parsing routines. */
44228753Smm#include "archive_entry.h"
45232153Smm#include "archive_entry_locale.h"
46228753Smm#include "archive_private.h"
47228753Smm#include "archive_read_private.h"
48228753Smm
49228753Smm#define tar_min(a,b) ((a) < (b) ? (a) : (b))
50228753Smm
51228753Smm/*
52228753Smm * Layout of POSIX 'ustar' tar header.
53228753Smm */
54228753Smmstruct archive_entry_header_ustar {
55228753Smm	char	name[100];
56228753Smm	char	mode[8];
57228753Smm	char	uid[8];
58228753Smm	char	gid[8];
59228753Smm	char	size[12];
60228753Smm	char	mtime[12];
61228753Smm	char	checksum[8];
62228753Smm	char	typeflag[1];
63228753Smm	char	linkname[100];	/* "old format" header ends here */
64228753Smm	char	magic[6];	/* For POSIX: "ustar\0" */
65228753Smm	char	version[2];	/* For POSIX: "00" */
66228753Smm	char	uname[32];
67228753Smm	char	gname[32];
68228753Smm	char	rdevmajor[8];
69228753Smm	char	rdevminor[8];
70228753Smm	char	prefix[155];
71228753Smm};
72228753Smm
73228753Smm/*
74228753Smm * Structure of GNU tar header
75228753Smm */
76228753Smmstruct gnu_sparse {
77228753Smm	char	offset[12];
78228753Smm	char	numbytes[12];
79228753Smm};
80228753Smm
81228753Smmstruct archive_entry_header_gnutar {
82228753Smm	char	name[100];
83228753Smm	char	mode[8];
84228753Smm	char	uid[8];
85228753Smm	char	gid[8];
86228753Smm	char	size[12];
87228753Smm	char	mtime[12];
88228753Smm	char	checksum[8];
89228753Smm	char	typeflag[1];
90228753Smm	char	linkname[100];
91228753Smm	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
92228753Smm	char	uname[32];
93228753Smm	char	gname[32];
94228753Smm	char	rdevmajor[8];
95228753Smm	char	rdevminor[8];
96228753Smm	char	atime[12];
97228753Smm	char	ctime[12];
98228753Smm	char	offset[12];
99228753Smm	char	longnames[4];
100228753Smm	char	unused[1];
101228753Smm	struct gnu_sparse sparse[4];
102228753Smm	char	isextended[1];
103228753Smm	char	realsize[12];
104228753Smm	/*
105228753Smm	 * Old GNU format doesn't use POSIX 'prefix' field; they use
106228753Smm	 * the 'L' (longname) entry instead.
107228753Smm	 */
108228753Smm};
109228753Smm
110228753Smm/*
111228753Smm * Data specific to this format.
112228753Smm */
113228753Smmstruct sparse_block {
114228753Smm	struct sparse_block	*next;
115232153Smm	int64_t	offset;
116232153Smm	int64_t	remaining;
117232153Smm	int hole;
118228753Smm};
119228753Smm
120228753Smmstruct tar {
121228753Smm	struct archive_string	 acl_text;
122228753Smm	struct archive_string	 entry_pathname;
123228753Smm	/* For "GNU.sparse.name" and other similar path extensions. */
124228753Smm	struct archive_string	 entry_pathname_override;
125228753Smm	struct archive_string	 entry_linkpath;
126228753Smm	struct archive_string	 entry_uname;
127228753Smm	struct archive_string	 entry_gname;
128228753Smm	struct archive_string	 longlink;
129228753Smm	struct archive_string	 longname;
130228753Smm	struct archive_string	 pax_header;
131228753Smm	struct archive_string	 pax_global;
132228753Smm	struct archive_string	 line;
133228753Smm	int			 pax_hdrcharset_binary;
134228753Smm	int			 header_recursion_depth;
135228753Smm	int64_t			 entry_bytes_remaining;
136228753Smm	int64_t			 entry_offset;
137228753Smm	int64_t			 entry_padding;
138232153Smm	int64_t 		 entry_bytes_unconsumed;
139228753Smm	int64_t			 realsize;
140306322Smm	int			 sparse_allowed;
141228753Smm	struct sparse_block	*sparse_list;
142228753Smm	struct sparse_block	*sparse_last;
143228753Smm	int64_t			 sparse_offset;
144228753Smm	int64_t			 sparse_numbytes;
145228753Smm	int			 sparse_gnu_major;
146228753Smm	int			 sparse_gnu_minor;
147228753Smm	char			 sparse_gnu_pending;
148232153Smm
149232153Smm	struct archive_string	 localname;
150232153Smm	struct archive_string_conv *opt_sconv;
151232153Smm	struct archive_string_conv *sconv;
152232153Smm	struct archive_string_conv *sconv_acl;
153232153Smm	struct archive_string_conv *sconv_default;
154232153Smm	int			 init_default_conversion;
155232153Smm	int			 compat_2x;
156302001Smm	int			 process_mac_extensions;
157302001Smm	int			 read_concatenated_archives;
158316338Smm	int			 realsize_override;
159228753Smm};
160228753Smm
161232153Smmstatic int	archive_block_is_null(const char *p);
162228753Smmstatic char	*base64_decode(const char *, size_t, size_t *);
163232153Smmstatic int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
164232153Smm		    int64_t offset, int64_t remaining);
165232153Smm
166228753Smmstatic void	gnu_clear_sparse_list(struct tar *);
167228753Smmstatic int	gnu_sparse_old_read(struct archive_read *, struct tar *,
168232153Smm		    const struct archive_entry_header_gnutar *header, size_t *);
169232153Smmstatic int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
170228753Smm		    const struct gnu_sparse *sparse, int length);
171232153Smmstatic int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
172232153Smm		    const char *);
173232153Smmstatic ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
174232153Smm			size_t *);
175228753Smmstatic int	header_Solaris_ACL(struct archive_read *,  struct tar *,
176232153Smm		    struct archive_entry *, const void *, size_t *);
177228753Smmstatic int	header_common(struct archive_read *,  struct tar *,
178228753Smm		    struct archive_entry *, const void *);
179228753Smmstatic int	header_old_tar(struct archive_read *, struct tar *,
180228753Smm		    struct archive_entry *, const void *);
181228753Smmstatic int	header_pax_extensions(struct archive_read *, struct tar *,
182232153Smm		    struct archive_entry *, const void *, size_t *);
183228753Smmstatic int	header_pax_global(struct archive_read *, struct tar *,
184232153Smm		    struct archive_entry *, const void *h, size_t *);
185228753Smmstatic int	header_longlink(struct archive_read *, struct tar *,
186232153Smm		    struct archive_entry *, const void *h, size_t *);
187228753Smmstatic int	header_longname(struct archive_read *, struct tar *,
188232153Smm		    struct archive_entry *, const void *h, size_t *);
189232153Smmstatic int	read_mac_metadata_blob(struct archive_read *, struct tar *,
190232153Smm		    struct archive_entry *, const void *h, size_t *);
191228753Smmstatic int	header_volume(struct archive_read *, struct tar *,
192232153Smm		    struct archive_entry *, const void *h, size_t *);
193228753Smmstatic int	header_ustar(struct archive_read *, struct tar *,
194228753Smm		    struct archive_entry *, const void *h);
195228753Smmstatic int	header_gnutar(struct archive_read *, struct tar *,
196232153Smm		    struct archive_entry *, const void *h, size_t *);
197232153Smmstatic int	archive_read_format_tar_bid(struct archive_read *, int);
198232153Smmstatic int	archive_read_format_tar_options(struct archive_read *,
199232153Smm		    const char *, const char *);
200228753Smmstatic int	archive_read_format_tar_cleanup(struct archive_read *);
201228753Smmstatic int	archive_read_format_tar_read_data(struct archive_read *a,
202232153Smm		    const void **buff, size_t *size, int64_t *offset);
203228753Smmstatic int	archive_read_format_tar_skip(struct archive_read *a);
204228753Smmstatic int	archive_read_format_tar_read_header(struct archive_read *,
205228753Smm		    struct archive_entry *);
206228753Smmstatic int	checksum(struct archive_read *, const void *);
207232153Smmstatic int 	pax_attribute(struct archive_read *, struct tar *,
208313571Smm		    struct archive_entry *, const char *key, const char *value,
209313571Smm		    size_t value_length);
210313571Smmstatic int	pax_attribute_acl(struct archive_read *, struct tar *,
211313571Smm		    struct archive_entry *, const char *, int);
212313571Smmstatic int	pax_attribute_xattr(struct archive_entry *, const char *,
213313571Smm		    const char *);
214228753Smmstatic int 	pax_header(struct archive_read *, struct tar *,
215313571Smm		    struct archive_entry *, struct archive_string *);
216228753Smmstatic void	pax_time(const char *, int64_t *sec, long *nanos);
217228753Smmstatic ssize_t	readline(struct archive_read *, struct tar *, const char **,
218232153Smm		    ssize_t limit, size_t *);
219228753Smmstatic int	read_body_to_string(struct archive_read *, struct tar *,
220232153Smm		    struct archive_string *, const void *h, size_t *);
221232153Smmstatic int	solaris_sparse_parse(struct archive_read *, struct tar *,
222232153Smm		    struct archive_entry *, const char *);
223248616Smmstatic int64_t	tar_atol(const char *, size_t);
224248616Smmstatic int64_t	tar_atol10(const char *, size_t);
225248616Smmstatic int64_t	tar_atol256(const char *, size_t);
226248616Smmstatic int64_t	tar_atol8(const char *, size_t);
227228753Smmstatic int	tar_read_header(struct archive_read *, struct tar *,
228232153Smm		    struct archive_entry *, size_t *);
229228753Smmstatic int	tohex(int c);
230228753Smmstatic char	*url_decode(const char *);
231232153Smmstatic void	tar_flush_unconsumed(struct archive_read *, size_t *);
232228753Smm
233232153Smm
234228753Smmint
235228753Smmarchive_read_support_format_gnutar(struct archive *a)
236228753Smm{
237232153Smm	archive_check_magic(a, ARCHIVE_READ_MAGIC,
238232153Smm	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
239228753Smm	return (archive_read_support_format_tar(a));
240228753Smm}
241228753Smm
242228753Smm
243228753Smmint
244228753Smmarchive_read_support_format_tar(struct archive *_a)
245228753Smm{
246228753Smm	struct archive_read *a = (struct archive_read *)_a;
247228753Smm	struct tar *tar;
248228753Smm	int r;
249228753Smm
250232153Smm	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
251232153Smm	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
252232153Smm
253232153Smm	tar = (struct tar *)calloc(1, sizeof(*tar));
254228753Smm	if (tar == NULL) {
255228753Smm		archive_set_error(&a->archive, ENOMEM,
256228753Smm		    "Can't allocate tar data");
257228753Smm		return (ARCHIVE_FATAL);
258228753Smm	}
259328828Smm#ifdef HAVE_COPYFILE_H
260328828Smm	/* Set this by default on Mac OS. */
261328828Smm	tar->process_mac_extensions = 1;
262328828Smm#endif
263228753Smm
264228753Smm	r = __archive_read_register_format(a, tar, "tar",
265228753Smm	    archive_read_format_tar_bid,
266232153Smm	    archive_read_format_tar_options,
267228753Smm	    archive_read_format_tar_read_header,
268228753Smm	    archive_read_format_tar_read_data,
269228753Smm	    archive_read_format_tar_skip,
270248616Smm	    NULL,
271302001Smm	    archive_read_format_tar_cleanup,
272302001Smm	    NULL,
273302001Smm	    NULL);
274228753Smm
275228753Smm	if (r != ARCHIVE_OK)
276228753Smm		free(tar);
277228753Smm	return (ARCHIVE_OK);
278228753Smm}
279228753Smm
280228753Smmstatic int
281228753Smmarchive_read_format_tar_cleanup(struct archive_read *a)
282228753Smm{
283228753Smm	struct tar *tar;
284228753Smm
285228753Smm	tar = (struct tar *)(a->format->data);
286228753Smm	gnu_clear_sparse_list(tar);
287228753Smm	archive_string_free(&tar->acl_text);
288228753Smm	archive_string_free(&tar->entry_pathname);
289228753Smm	archive_string_free(&tar->entry_pathname_override);
290228753Smm	archive_string_free(&tar->entry_linkpath);
291228753Smm	archive_string_free(&tar->entry_uname);
292228753Smm	archive_string_free(&tar->entry_gname);
293228753Smm	archive_string_free(&tar->line);
294228753Smm	archive_string_free(&tar->pax_global);
295228753Smm	archive_string_free(&tar->pax_header);
296228753Smm	archive_string_free(&tar->longname);
297228753Smm	archive_string_free(&tar->longlink);
298232153Smm	archive_string_free(&tar->localname);
299228753Smm	free(tar);
300228753Smm	(a->format->data) = NULL;
301228753Smm	return (ARCHIVE_OK);
302228753Smm}
303228753Smm
304311042Smm/*
305311042Smm * Validate number field
306311042Smm *
307311042Smm * This has to be pretty lenient in order to accommodate the enormous
308311042Smm * variety of tar writers in the world:
309311042Smm *  = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
310311042Smm *    zeros and allows fields to be terminated with space or null characters
311311042Smm *  = Many writers use different termination (in particular, libarchive
312311042Smm *    omits terminator bytes to squeeze one or two more digits)
313311042Smm *  = Many writers pad with space and omit leading zeros
314311042Smm *  = GNU tar and star write base-256 values if numbers are too
315311042Smm *    big to be represented in octal
316311042Smm *
317311042Smm *  Examples of specific tar headers that we should support:
318311042Smm *  = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
319311042Smm *    null bytes, pads size with spaces and other numeric fields with zeroes
320311042Smm *  = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
321311042Smm *    may have uid and gid fields filled with spaces without any octal digits
322311042Smm *    at all and pads all numeric fields with spaces
323311042Smm *
324311042Smm * This should tolerate all variants in use.  It will reject a field
325311042Smm * where the writer just left garbage after a trailing NUL.
326311042Smm */
327311042Smmstatic int
328311042Smmvalidate_number_field(const char* p_field, size_t i_size)
329311042Smm{
330311042Smm	unsigned char marker = (unsigned char)p_field[0];
331311042Smm	if (marker == 128 || marker == 255 || marker == 0) {
332311042Smm		/* Base-256 marker, there's nothing we can check. */
333311042Smm		return 1;
334311042Smm	} else {
335311042Smm		/* Must be octal */
336311042Smm		size_t i = 0;
337311042Smm		/* Skip any leading spaces */
338311042Smm		while (i < i_size && p_field[i] == ' ') {
339311042Smm			++i;
340311042Smm		}
341311042Smm		/* Skip octal digits. */
342311042Smm		while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
343311042Smm			++i;
344311042Smm		}
345311042Smm		/* Any remaining characters must be space or NUL padding. */
346311042Smm		while (i < i_size) {
347311042Smm			if (p_field[i] != ' ' && p_field[i] != 0) {
348311042Smm				return 0;
349311042Smm			}
350311042Smm			++i;
351311042Smm		}
352311042Smm		return 1;
353311042Smm	}
354311042Smm}
355228753Smm
356228753Smmstatic int
357232153Smmarchive_read_format_tar_bid(struct archive_read *a, int best_bid)
358228753Smm{
359228753Smm	int bid;
360232153Smm	const char *h;
361228753Smm	const struct archive_entry_header_ustar *header;
362228753Smm
363232153Smm	(void)best_bid; /* UNUSED */
364232153Smm
365228753Smm	bid = 0;
366228753Smm
367228753Smm	/* Now let's look at the actual header and see if it matches. */
368228753Smm	h = __archive_read_ahead(a, 512, NULL);
369228753Smm	if (h == NULL)
370228753Smm		return (-1);
371228753Smm
372228753Smm	/* If it's an end-of-archive mark, we can handle it. */
373232153Smm	if (h[0] == 0 && archive_block_is_null(h)) {
374228753Smm		/*
375228753Smm		 * Usually, I bid the number of bits verified, but
376228753Smm		 * in this case, 4096 seems excessive so I picked 10 as
377228753Smm		 * an arbitrary but reasonable-seeming value.
378228753Smm		 */
379228753Smm		return (10);
380228753Smm	}
381228753Smm
382228753Smm	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
383228753Smm	if (!checksum(a, h))
384228753Smm		return (0);
385228753Smm	bid += 48;  /* Checksum is usually 6 octal digits. */
386228753Smm
387228753Smm	header = (const struct archive_entry_header_ustar *)h;
388228753Smm
389228753Smm	/* Recognize POSIX formats. */
390228753Smm	if ((memcmp(header->magic, "ustar\0", 6) == 0)
391232153Smm	    && (memcmp(header->version, "00", 2) == 0))
392228753Smm		bid += 56;
393228753Smm
394228753Smm	/* Recognize GNU tar format. */
395228753Smm	if ((memcmp(header->magic, "ustar ", 6) == 0)
396232153Smm	    && (memcmp(header->version, " \0", 2) == 0))
397228753Smm		bid += 56;
398228753Smm
399228753Smm	/* Type flag must be null, digit or A-Z, a-z. */
400228753Smm	if (header->typeflag[0] != 0 &&
401228753Smm	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
402228753Smm	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
403228753Smm	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
404228753Smm		return (0);
405228753Smm	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
406228753Smm
407311042Smm	/*
408311042Smm	 * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
409311042Smm	 */
410311042Smm	if (bid > 0 && (
411311042Smm	    validate_number_field(header->mode, sizeof(header->mode)) == 0
412311042Smm	    || validate_number_field(header->uid, sizeof(header->uid)) == 0
413311042Smm	    || validate_number_field(header->gid, sizeof(header->gid)) == 0
414311042Smm	    || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
415311042Smm	    || validate_number_field(header->size, sizeof(header->size)) == 0
416311042Smm	    || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
417311042Smm	    || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0)) {
418311042Smm		bid = 0;
419228753Smm	}
420228753Smm
421228753Smm	return (bid);
422228753Smm}
423228753Smm
424232153Smmstatic int
425232153Smmarchive_read_format_tar_options(struct archive_read *a,
426232153Smm    const char *key, const char *val)
427232153Smm{
428232153Smm	struct tar *tar;
429232153Smm	int ret = ARCHIVE_FAILED;
430232153Smm
431232153Smm	tar = (struct tar *)(a->format->data);
432232153Smm	if (strcmp(key, "compat-2x")  == 0) {
433311042Smm		/* Handle UTF-8 filenames as libarchive 2.x */
434302001Smm		tar->compat_2x = (val != NULL && val[0] != 0);
435232153Smm		tar->init_default_conversion = tar->compat_2x;
436232153Smm		return (ARCHIVE_OK);
437232153Smm	} else if (strcmp(key, "hdrcharset")  == 0) {
438232153Smm		if (val == NULL || val[0] == 0)
439232153Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
440232153Smm			    "tar: hdrcharset option needs a character-set name");
441232153Smm		else {
442232153Smm			tar->opt_sconv =
443232153Smm			    archive_string_conversion_from_charset(
444232153Smm				&a->archive, val, 0);
445232153Smm			if (tar->opt_sconv != NULL)
446232153Smm				ret = ARCHIVE_OK;
447232153Smm			else
448232153Smm				ret = ARCHIVE_FATAL;
449232153Smm		}
450232153Smm		return (ret);
451302001Smm	} else if (strcmp(key, "mac-ext") == 0) {
452302001Smm		tar->process_mac_extensions = (val != NULL && val[0] != 0);
453302001Smm		return (ARCHIVE_OK);
454302001Smm	} else if (strcmp(key, "read_concatenated_archives") == 0) {
455302001Smm		tar->read_concatenated_archives = (val != NULL && val[0] != 0);
456302001Smm		return (ARCHIVE_OK);
457232153Smm	}
458232153Smm
459232153Smm	/* Note: The "warn" return is just to inform the options
460232153Smm	 * supervisor that we didn't handle it.  It will generate
461232153Smm	 * a suitable error if no one used this option. */
462232153Smm	return (ARCHIVE_WARN);
463232153Smm}
464232153Smm
465232153Smm/* utility function- this exists to centralize the logic of tracking
466232153Smm * how much unconsumed data we have floating around, and to consume
467232153Smm * anything outstanding since we're going to do read_aheads
468232153Smm */
469302001Smmstatic void
470232153Smmtar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
471232153Smm{
472232153Smm	if (*unconsumed) {
473228753Smm/*
474232153Smm		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
475232153Smm		 * this block of code is to poison claimed unconsumed space, ensuring
476232153Smm		 * things break if it is in use still.
477232153Smm		 * currently it WILL break things, so enable it only for debugging this issue
478232153Smm		if (data) {
479232153Smm			memset(data, 0xff, *unconsumed);
480232153Smm		}
481232153Smm*/
482232153Smm		__archive_read_consume(a, *unconsumed);
483232153Smm		*unconsumed = 0;
484232153Smm	}
485232153Smm}
486232153Smm
487232153Smm/*
488232153Smm * The function invoked by archive_read_next_header().  This
489228753Smm * just sets up a few things and then calls the internal
490228753Smm * tar_read_header() function below.
491228753Smm */
492228753Smmstatic int
493228753Smmarchive_read_format_tar_read_header(struct archive_read *a,
494228753Smm    struct archive_entry *entry)
495228753Smm{
496228753Smm	/*
497228753Smm	 * When converting tar archives to cpio archives, it is
498228753Smm	 * essential that each distinct file have a distinct inode
499228753Smm	 * number.  To simplify this, we keep a static count here to
500228753Smm	 * assign fake dev/inode numbers to each tar entry.  Note that
501228753Smm	 * pax format archives may overwrite this with something more
502228753Smm	 * useful.
503228753Smm	 *
504228753Smm	 * Ideally, we would track every file read from the archive so
505228753Smm	 * that we could assign the same dev/ino pair to hardlinks,
506228753Smm	 * but the memory required to store a complete lookup table is
507228753Smm	 * probably not worthwhile just to support the relatively
508228753Smm	 * obscure tar->cpio conversion case.
509228753Smm	 */
510228753Smm	static int default_inode;
511228753Smm	static int default_dev;
512228753Smm	struct tar *tar;
513228753Smm	const char *p;
514302001Smm	const wchar_t *wp;
515228753Smm	int r;
516232153Smm	size_t l, unconsumed = 0;
517228753Smm
518228753Smm	/* Assign default device/inode values. */
519228753Smm	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
520228753Smm	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
521228753Smm	/* Limit generated st_ino number to 16 bits. */
522228753Smm	if (default_inode >= 0xffff) {
523228753Smm		++default_dev;
524228753Smm		default_inode = 0;
525228753Smm	}
526228753Smm
527228753Smm	tar = (struct tar *)(a->format->data);
528228753Smm	tar->entry_offset = 0;
529232153Smm	gnu_clear_sparse_list(tar);
530228753Smm	tar->realsize = -1; /* Mark this as "unset" */
531316338Smm	tar->realsize_override = 0;
532228753Smm
533232153Smm	/* Setup default string conversion. */
534232153Smm	tar->sconv = tar->opt_sconv;
535232153Smm	if (tar->sconv == NULL) {
536232153Smm		if (!tar->init_default_conversion) {
537232153Smm			tar->sconv_default =
538232153Smm			    archive_string_default_conversion_for_read(&(a->archive));
539232153Smm			tar->init_default_conversion = 1;
540232153Smm		}
541232153Smm		tar->sconv = tar->sconv_default;
542232153Smm	}
543228753Smm
544232153Smm	r = tar_read_header(a, tar, entry, &unconsumed);
545232153Smm
546232153Smm	tar_flush_unconsumed(a, &unconsumed);
547232153Smm
548228753Smm	/*
549228753Smm	 * "non-sparse" files are really just sparse files with
550228753Smm	 * a single block.
551228753Smm	 */
552232153Smm	if (tar->sparse_list == NULL) {
553232153Smm		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
554232153Smm		    != ARCHIVE_OK)
555232153Smm			return (ARCHIVE_FATAL);
556232153Smm	} else {
557232153Smm		struct sparse_block *sb;
558228753Smm
559232153Smm		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
560232153Smm			if (!sb->hole)
561232153Smm				archive_entry_sparse_add_entry(entry,
562232153Smm				    sb->offset, sb->remaining);
563232153Smm		}
564232153Smm	}
565232153Smm
566302001Smm	if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
567228753Smm		/*
568228753Smm		 * "Regular" entry with trailing '/' is really
569228753Smm		 * directory: This is needed for certain old tar
570228753Smm		 * variants and even for some broken newer ones.
571228753Smm		 */
572302001Smm		if ((wp = archive_entry_pathname_w(entry)) != NULL) {
573232153Smm			l = wcslen(wp);
574302001Smm			if (l > 0 && wp[l - 1] == L'/') {
575232153Smm				archive_entry_set_filetype(entry, AE_IFDIR);
576302001Smm			}
577302001Smm		} else if ((p = archive_entry_pathname(entry)) != NULL) {
578232153Smm			l = strlen(p);
579302001Smm			if (l > 0 && p[l - 1] == '/') {
580232153Smm				archive_entry_set_filetype(entry, AE_IFDIR);
581302001Smm			}
582232153Smm		}
583228753Smm	}
584228753Smm	return (r);
585228753Smm}
586228753Smm
587228753Smmstatic int
588228753Smmarchive_read_format_tar_read_data(struct archive_read *a,
589232153Smm    const void **buff, size_t *size, int64_t *offset)
590228753Smm{
591228753Smm	ssize_t bytes_read;
592228753Smm	struct tar *tar;
593228753Smm	struct sparse_block *p;
594228753Smm
595228753Smm	tar = (struct tar *)(a->format->data);
596228753Smm
597238856Smm	for (;;) {
598238856Smm		/* Remove exhausted entries from sparse list. */
599238856Smm		while (tar->sparse_list != NULL &&
600238856Smm		    tar->sparse_list->remaining == 0) {
601238856Smm			p = tar->sparse_list;
602238856Smm			tar->sparse_list = p->next;
603238856Smm			free(p);
604238856Smm		}
605228753Smm
606238856Smm		if (tar->entry_bytes_unconsumed) {
607238856Smm			__archive_read_consume(a, tar->entry_bytes_unconsumed);
608238856Smm			tar->entry_bytes_unconsumed = 0;
609238856Smm		}
610232153Smm
611238856Smm		/* If we're at end of file, return EOF. */
612238856Smm		if (tar->sparse_list == NULL ||
613238856Smm		    tar->entry_bytes_remaining == 0) {
614238856Smm			if (__archive_read_consume(a, tar->entry_padding) < 0)
615238856Smm				return (ARCHIVE_FATAL);
616238856Smm			tar->entry_padding = 0;
617238856Smm			*buff = NULL;
618238856Smm			*size = 0;
619238856Smm			*offset = tar->realsize;
620238856Smm			return (ARCHIVE_EOF);
621238856Smm		}
622238856Smm
623238856Smm		*buff = __archive_read_ahead(a, 1, &bytes_read);
624238856Smm		if (bytes_read < 0)
625228753Smm			return (ARCHIVE_FATAL);
626238856Smm		if (*buff == NULL) {
627238856Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
628238856Smm			    "Truncated tar archive");
629238856Smm			return (ARCHIVE_FATAL);
630238856Smm		}
631238856Smm		if (bytes_read > tar->entry_bytes_remaining)
632238856Smm			bytes_read = (ssize_t)tar->entry_bytes_remaining;
633238856Smm		/* Don't read more than is available in the
634238856Smm		 * current sparse block. */
635238856Smm		if (tar->sparse_list->remaining < bytes_read)
636238856Smm			bytes_read = (ssize_t)tar->sparse_list->remaining;
637238856Smm		*size = bytes_read;
638238856Smm		*offset = tar->sparse_list->offset;
639238856Smm		tar->sparse_list->remaining -= bytes_read;
640238856Smm		tar->sparse_list->offset += bytes_read;
641238856Smm		tar->entry_bytes_remaining -= bytes_read;
642238856Smm		tar->entry_bytes_unconsumed = bytes_read;
643228753Smm
644238856Smm		if (!tar->sparse_list->hole)
645238856Smm			return (ARCHIVE_OK);
646238856Smm		/* Current is hole data and skip this. */
647228753Smm	}
648228753Smm}
649228753Smm
650228753Smmstatic int
651228753Smmarchive_read_format_tar_skip(struct archive_read *a)
652228753Smm{
653228753Smm	int64_t bytes_skipped;
654286082Sbdrewery	int64_t request;
655286082Sbdrewery	struct sparse_block *p;
656228753Smm	struct tar* tar;
657228753Smm
658228753Smm	tar = (struct tar *)(a->format->data);
659228753Smm
660286082Sbdrewery	/* Do not consume the hole of a sparse file. */
661286082Sbdrewery	request = 0;
662286082Sbdrewery	for (p = tar->sparse_list; p != NULL; p = p->next) {
663302001Smm		if (!p->hole) {
664302001Smm			if (p->remaining >= INT64_MAX - request) {
665302001Smm				return ARCHIVE_FATAL;
666302001Smm			}
667286082Sbdrewery			request += p->remaining;
668302001Smm		}
669286082Sbdrewery	}
670286082Sbdrewery	if (request > tar->entry_bytes_remaining)
671286082Sbdrewery		request = tar->entry_bytes_remaining;
672286082Sbdrewery	request += tar->entry_padding + tar->entry_bytes_unconsumed;
673286082Sbdrewery
674286082Sbdrewery	bytes_skipped = __archive_read_consume(a, request);
675228753Smm	if (bytes_skipped < 0)
676228753Smm		return (ARCHIVE_FATAL);
677228753Smm
678228753Smm	tar->entry_bytes_remaining = 0;
679232153Smm	tar->entry_bytes_unconsumed = 0;
680228753Smm	tar->entry_padding = 0;
681228753Smm
682228753Smm	/* Free the sparse list. */
683228753Smm	gnu_clear_sparse_list(tar);
684228753Smm
685228753Smm	return (ARCHIVE_OK);
686228753Smm}
687228753Smm
688228753Smm/*
689228753Smm * This function recursively interprets all of the headers associated
690228753Smm * with a single entry.
691228753Smm */
692228753Smmstatic int
693228753Smmtar_read_header(struct archive_read *a, struct tar *tar,
694232153Smm    struct archive_entry *entry, size_t *unconsumed)
695228753Smm{
696228753Smm	ssize_t bytes;
697348608Smm	int err, eof_vol_header;
698232153Smm	const char *h;
699228753Smm	const struct archive_entry_header_ustar *header;
700248616Smm	const struct archive_entry_header_gnutar *gnuheader;
701228753Smm
702348608Smm	eof_vol_header = 0;
703348608Smm
704302001Smm	/* Loop until we find a workable header record. */
705302001Smm	for (;;) {
706302001Smm		tar_flush_unconsumed(a, unconsumed);
707232153Smm
708302001Smm		/* Read 512-byte header record */
709302001Smm		h = __archive_read_ahead(a, 512, &bytes);
710302001Smm		if (bytes < 0)
711302001Smm			return ((int)bytes);
712302001Smm		if (bytes == 0) { /* EOF at a block boundary. */
713302001Smm			/* Some writers do omit the block of nulls. <sigh> */
714302001Smm			return (ARCHIVE_EOF);
715302001Smm		}
716302001Smm		if (bytes < 512) {  /* Short block at EOF; this is bad. */
717302001Smm			archive_set_error(&a->archive,
718302001Smm			    ARCHIVE_ERRNO_FILE_FORMAT,
719302001Smm			    "Truncated tar archive");
720302001Smm			return (ARCHIVE_FATAL);
721302001Smm		}
722302001Smm		*unconsumed = 512;
723228753Smm
724302001Smm		/* Header is workable if it's not an end-of-archive mark. */
725302001Smm		if (h[0] != 0 || !archive_block_is_null(h))
726302001Smm			break;
727302001Smm
728302001Smm		/* Ensure format is set for archives with only null blocks. */
729228753Smm		if (a->archive.archive_format_name == NULL) {
730228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
731228753Smm			a->archive.archive_format_name = "tar";
732228753Smm		}
733302001Smm
734302001Smm		if (!tar->read_concatenated_archives) {
735302001Smm			/* Try to consume a second all-null record, as well. */
736302001Smm			tar_flush_unconsumed(a, unconsumed);
737302001Smm			h = __archive_read_ahead(a, 512, NULL);
738302001Smm			if (h != NULL && h[0] == 0 && archive_block_is_null(h))
739302001Smm				__archive_read_consume(a, 512);
740302001Smm			archive_clear_error(&a->archive);
741302001Smm			return (ARCHIVE_EOF);
742302001Smm		}
743302001Smm
744302001Smm		/*
745302001Smm		 * We're reading concatenated archives, ignore this block and
746302001Smm		 * loop to get the next.
747302001Smm		 */
748228753Smm	}
749228753Smm
750228753Smm	/*
751228753Smm	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
752228753Smm	 * then the client is likely to just retry.  This is a very
753228753Smm	 * crude way to search for the next valid header!
754228753Smm	 *
755228753Smm	 * TODO: Improve this by implementing a real header scan.
756228753Smm	 */
757228753Smm	if (!checksum(a, h)) {
758232153Smm		tar_flush_unconsumed(a, unconsumed);
759228753Smm		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
760228753Smm		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
761228753Smm	}
762228753Smm
763228753Smm	if (++tar->header_recursion_depth > 32) {
764232153Smm		tar_flush_unconsumed(a, unconsumed);
765228753Smm		archive_set_error(&a->archive, EINVAL, "Too many special headers");
766228753Smm		return (ARCHIVE_WARN);
767228753Smm	}
768228753Smm
769228753Smm	/* Determine the format variant. */
770228753Smm	header = (const struct archive_entry_header_ustar *)h;
771232153Smm
772228753Smm	switch(header->typeflag[0]) {
773228753Smm	case 'A': /* Solaris tar ACL */
774228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
775228753Smm		a->archive.archive_format_name = "Solaris tar";
776232153Smm		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
777228753Smm		break;
778228753Smm	case 'g': /* POSIX-standard 'g' header. */
779228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
780228753Smm		a->archive.archive_format_name = "POSIX pax interchange format";
781232153Smm		err = header_pax_global(a, tar, entry, h, unconsumed);
782302001Smm		if (err == ARCHIVE_EOF)
783302001Smm			return (err);
784228753Smm		break;
785228753Smm	case 'K': /* Long link name (GNU tar, others) */
786232153Smm		err = header_longlink(a, tar, entry, h, unconsumed);
787228753Smm		break;
788228753Smm	case 'L': /* Long filename (GNU tar, others) */
789232153Smm		err = header_longname(a, tar, entry, h, unconsumed);
790228753Smm		break;
791228753Smm	case 'V': /* GNU volume header */
792232153Smm		err = header_volume(a, tar, entry, h, unconsumed);
793348608Smm		if (err == ARCHIVE_EOF)
794348608Smm			eof_vol_header = 1;
795228753Smm		break;
796228753Smm	case 'X': /* Used by SUN tar; same as 'x'. */
797228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
798228753Smm		a->archive.archive_format_name =
799228753Smm		    "POSIX pax interchange format (Sun variant)";
800232153Smm		err = header_pax_extensions(a, tar, entry, h, unconsumed);
801228753Smm		break;
802228753Smm	case 'x': /* POSIX-standard 'x' header. */
803228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
804228753Smm		a->archive.archive_format_name = "POSIX pax interchange format";
805232153Smm		err = header_pax_extensions(a, tar, entry, h, unconsumed);
806228753Smm		break;
807228753Smm	default:
808248616Smm		gnuheader = (const struct archive_entry_header_gnutar *)h;
809248616Smm		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
810228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
811228753Smm			a->archive.archive_format_name = "GNU tar format";
812232153Smm			err = header_gnutar(a, tar, entry, h, unconsumed);
813228753Smm		} else if (memcmp(header->magic, "ustar", 5) == 0) {
814228753Smm			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
815228753Smm				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
816228753Smm				a->archive.archive_format_name = "POSIX ustar format";
817228753Smm			}
818228753Smm			err = header_ustar(a, tar, entry, h);
819228753Smm		} else {
820228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
821228753Smm			a->archive.archive_format_name = "tar (non-POSIX)";
822228753Smm			err = header_old_tar(a, tar, entry, h);
823228753Smm		}
824228753Smm	}
825232153Smm	if (err == ARCHIVE_FATAL)
826232153Smm		return (err);
827232153Smm
828232153Smm	tar_flush_unconsumed(a, unconsumed);
829232153Smm
830232153Smm	h = NULL;
831232153Smm	header = NULL;
832232153Smm
833228753Smm	--tar->header_recursion_depth;
834232153Smm	/* Yuck.  Apple's design here ends up storing long pathname
835232153Smm	 * extensions for both the AppleDouble extension entry and the
836232153Smm	 * regular entry.
837232153Smm	 */
838232153Smm	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
839302001Smm	    tar->header_recursion_depth == 0 &&
840302001Smm	    tar->process_mac_extensions) {
841232153Smm		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
842232153Smm		if (err2 < err)
843232153Smm			err = err2;
844232153Smm	}
845232153Smm
846228753Smm	/* We return warnings or success as-is.  Anything else is fatal. */
847232153Smm	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
848232153Smm		if (tar->sparse_gnu_pending) {
849232153Smm			if (tar->sparse_gnu_major == 1 &&
850232153Smm			    tar->sparse_gnu_minor == 0) {
851232153Smm				ssize_t bytes_read;
852232153Smm
853232153Smm				tar->sparse_gnu_pending = 0;
854232153Smm				/* Read initial sparse map. */
855232153Smm				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
856232153Smm				if (bytes_read < 0)
857248616Smm					return ((int)bytes_read);
858315433Smm				tar->entry_bytes_remaining -= bytes_read;
859232153Smm			} else {
860232153Smm				archive_set_error(&a->archive,
861232153Smm				    ARCHIVE_ERRNO_MISC,
862232153Smm				    "Unrecognized GNU sparse file format");
863232153Smm				return (ARCHIVE_WARN);
864232153Smm			}
865232153Smm			tar->sparse_gnu_pending = 0;
866232153Smm		}
867228753Smm		return (err);
868232153Smm	}
869348608Smm	if (err == ARCHIVE_EOF) {
870348608Smm		if (!eof_vol_header) {
871348608Smm			/* EOF when recursively reading a header is bad. */
872348608Smm			archive_set_error(&a->archive, EINVAL,
873348608Smm			    "Damaged tar archive");
874348608Smm		} else {
875348608Smm			/* If we encounter just a GNU volume header treat
876348608Smm			 * this situation as an empty archive */
877348608Smm			return (ARCHIVE_EOF);
878348608Smm		}
879348608Smm	}
880228753Smm	return (ARCHIVE_FATAL);
881228753Smm}
882228753Smm
883228753Smm/*
884228753Smm * Return true if block checksum is correct.
885228753Smm */
886228753Smmstatic int
887228753Smmchecksum(struct archive_read *a, const void *h)
888228753Smm{
889228753Smm	const unsigned char *bytes;
890228753Smm	const struct archive_entry_header_ustar	*header;
891302001Smm	int check, sum;
892302001Smm	size_t i;
893228753Smm
894228753Smm	(void)a; /* UNUSED */
895228753Smm	bytes = (const unsigned char *)h;
896228753Smm	header = (const struct archive_entry_header_ustar *)h;
897228753Smm
898302001Smm	/* Checksum field must hold an octal number */
899302001Smm	for (i = 0; i < sizeof(header->checksum); ++i) {
900302001Smm		char c = header->checksum[i];
901302001Smm		if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
902302001Smm			return 0;
903302001Smm	}
904302001Smm
905228753Smm	/*
906228753Smm	 * Test the checksum.  Note that POSIX specifies _unsigned_
907228753Smm	 * bytes for this calculation.
908228753Smm	 */
909238856Smm	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
910228753Smm	check = 0;
911228753Smm	for (i = 0; i < 148; i++)
912228753Smm		check += (unsigned char)bytes[i];
913228753Smm	for (; i < 156; i++)
914228753Smm		check += 32;
915228753Smm	for (; i < 512; i++)
916228753Smm		check += (unsigned char)bytes[i];
917228753Smm	if (sum == check)
918228753Smm		return (1);
919228753Smm
920228753Smm	/*
921228753Smm	 * Repeat test with _signed_ bytes, just in case this archive
922228753Smm	 * was created by an old BSD, Solaris, or HP-UX tar with a
923228753Smm	 * broken checksum calculation.
924228753Smm	 */
925228753Smm	check = 0;
926228753Smm	for (i = 0; i < 148; i++)
927228753Smm		check += (signed char)bytes[i];
928228753Smm	for (; i < 156; i++)
929228753Smm		check += 32;
930228753Smm	for (; i < 512; i++)
931228753Smm		check += (signed char)bytes[i];
932228753Smm	if (sum == check)
933228753Smm		return (1);
934228753Smm
935228753Smm	return (0);
936228753Smm}
937228753Smm
938228753Smm/*
939228753Smm * Return true if this block contains only nulls.
940228753Smm */
941228753Smmstatic int
942232153Smmarchive_block_is_null(const char *p)
943228753Smm{
944228753Smm	unsigned i;
945228753Smm
946228753Smm	for (i = 0; i < 512; i++)
947228753Smm		if (*p++)
948228753Smm			return (0);
949228753Smm	return (1);
950228753Smm}
951228753Smm
952228753Smm/*
953228753Smm * Interpret 'A' Solaris ACL header
954228753Smm */
955228753Smmstatic int
956228753Smmheader_Solaris_ACL(struct archive_read *a, struct tar *tar,
957232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
958228753Smm{
959228753Smm	const struct archive_entry_header_ustar *header;
960228753Smm	size_t size;
961313571Smm	int err, acl_type;
962228753Smm	int64_t type;
963228753Smm	char *acl, *p;
964228753Smm
965228753Smm	/*
966228753Smm	 * read_body_to_string adds a NUL terminator, but we need a little
967228753Smm	 * more to make sure that we don't overrun acl_text later.
968228753Smm	 */
969228753Smm	header = (const struct archive_entry_header_ustar *)h;
970238856Smm	size = (size_t)tar_atol(header->size, sizeof(header->size));
971232153Smm	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
972228753Smm	if (err != ARCHIVE_OK)
973228753Smm		return (err);
974232153Smm
975228753Smm	/* Recursively read next header */
976232153Smm	err = tar_read_header(a, tar, entry, unconsumed);
977228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
978228753Smm		return (err);
979228753Smm
980228753Smm	/* TODO: Examine the first characters to see if this
981228753Smm	 * is an AIX ACL descriptor.  We'll likely never support
982228753Smm	 * them, but it would be polite to recognize and warn when
983228753Smm	 * we do see them. */
984228753Smm
985228753Smm	/* Leading octal number indicates ACL type and number of entries. */
986228753Smm	p = acl = tar->acl_text.s;
987228753Smm	type = 0;
988228753Smm	while (*p != '\0' && p < acl + size) {
989228753Smm		if (*p < '0' || *p > '7') {
990228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
991228753Smm			    "Malformed Solaris ACL attribute (invalid digit)");
992228753Smm			return(ARCHIVE_WARN);
993228753Smm		}
994228753Smm		type <<= 3;
995228753Smm		type += *p - '0';
996228753Smm		if (type > 077777777) {
997228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
998228753Smm			    "Malformed Solaris ACL attribute (count too large)");
999228753Smm			return (ARCHIVE_WARN);
1000228753Smm		}
1001228753Smm		p++;
1002228753Smm	}
1003228753Smm	switch ((int)type & ~0777777) {
1004228753Smm	case 01000000:
1005228753Smm		/* POSIX.1e ACL */
1006313571Smm		acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
1007228753Smm		break;
1008228753Smm	case 03000000:
1009313571Smm		/* NFSv4 ACL */
1010313571Smm		acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
1011313571Smm		break;
1012228753Smm	default:
1013228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1014228753Smm		    "Malformed Solaris ACL attribute (unsupported type %o)",
1015228753Smm		    (int)type);
1016228753Smm		return (ARCHIVE_WARN);
1017228753Smm	}
1018228753Smm	p++;
1019228753Smm
1020228753Smm	if (p >= acl + size) {
1021228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1022228753Smm		    "Malformed Solaris ACL attribute (body overflow)");
1023228753Smm		return(ARCHIVE_WARN);
1024228753Smm	}
1025228753Smm
1026228753Smm	/* ACL text is null-terminated; find the end. */
1027228753Smm	size -= (p - acl);
1028228753Smm	acl = p;
1029228753Smm
1030228753Smm	while (*p != '\0' && p < acl + size)
1031228753Smm		p++;
1032228753Smm
1033232153Smm	if (tar->sconv_acl == NULL) {
1034232153Smm		tar->sconv_acl = archive_string_conversion_from_charset(
1035232153Smm		    &(a->archive), "UTF-8", 1);
1036232153Smm		if (tar->sconv_acl == NULL)
1037232153Smm			return (ARCHIVE_FATAL);
1038232153Smm	}
1039232153Smm	archive_strncpy(&(tar->localname), acl, p - acl);
1040313571Smm	err = archive_acl_from_text_l(archive_entry_acl(entry),
1041313571Smm	    tar->localname.s, acl_type, tar->sconv_acl);
1042232153Smm	if (err != ARCHIVE_OK) {
1043232153Smm		if (errno == ENOMEM) {
1044232153Smm			archive_set_error(&a->archive, ENOMEM,
1045232153Smm			    "Can't allocate memory for ACL");
1046232153Smm		} else
1047232153Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1048232153Smm			    "Malformed Solaris ACL attribute (unparsable)");
1049232153Smm	}
1050228753Smm	return (err);
1051228753Smm}
1052228753Smm
1053228753Smm/*
1054228753Smm * Interpret 'K' long linkname header.
1055228753Smm */
1056228753Smmstatic int
1057228753Smmheader_longlink(struct archive_read *a, struct tar *tar,
1058232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1059228753Smm{
1060228753Smm	int err;
1061228753Smm
1062232153Smm	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
1063228753Smm	if (err != ARCHIVE_OK)
1064228753Smm		return (err);
1065232153Smm	err = tar_read_header(a, tar, entry, unconsumed);
1066228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1067228753Smm		return (err);
1068228753Smm	/* Set symlink if symlink already set, else hardlink. */
1069228753Smm	archive_entry_copy_link(entry, tar->longlink.s);
1070228753Smm	return (ARCHIVE_OK);
1071228753Smm}
1072228753Smm
1073232153Smmstatic int
1074232153Smmset_conversion_failed_error(struct archive_read *a,
1075232153Smm    struct archive_string_conv *sconv, const char *name)
1076232153Smm{
1077232153Smm	if (errno == ENOMEM) {
1078232153Smm		archive_set_error(&a->archive, ENOMEM,
1079232153Smm		    "Can't allocate memory for %s", name);
1080232153Smm		return (ARCHIVE_FATAL);
1081232153Smm	}
1082232153Smm	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1083232153Smm	    "%s can't be converted from %s to current locale.",
1084232153Smm	    name, archive_string_conversion_charset_name(sconv));
1085232153Smm	return (ARCHIVE_WARN);
1086232153Smm}
1087232153Smm
1088228753Smm/*
1089228753Smm * Interpret 'L' long filename header.
1090228753Smm */
1091228753Smmstatic int
1092228753Smmheader_longname(struct archive_read *a, struct tar *tar,
1093232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1094228753Smm{
1095228753Smm	int err;
1096228753Smm
1097232153Smm	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
1098228753Smm	if (err != ARCHIVE_OK)
1099228753Smm		return (err);
1100228753Smm	/* Read and parse "real" header, then override name. */
1101232153Smm	err = tar_read_header(a, tar, entry, unconsumed);
1102228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1103228753Smm		return (err);
1104232153Smm	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
1105232153Smm	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
1106232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1107232153Smm	return (err);
1108228753Smm}
1109228753Smm
1110228753Smm
1111228753Smm/*
1112228753Smm * Interpret 'V' GNU tar volume header.
1113228753Smm */
1114228753Smmstatic int
1115228753Smmheader_volume(struct archive_read *a, struct tar *tar,
1116232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1117228753Smm{
1118228753Smm	(void)h;
1119228753Smm
1120228753Smm	/* Just skip this and read the next header. */
1121232153Smm	return (tar_read_header(a, tar, entry, unconsumed));
1122228753Smm}
1123228753Smm
1124228753Smm/*
1125228753Smm * Read body of an archive entry into an archive_string object.
1126228753Smm */
1127228753Smmstatic int
1128228753Smmread_body_to_string(struct archive_read *a, struct tar *tar,
1129232153Smm    struct archive_string *as, const void *h, size_t *unconsumed)
1130228753Smm{
1131232153Smm	int64_t size;
1132228753Smm	const struct archive_entry_header_ustar *header;
1133228753Smm	const void *src;
1134228753Smm
1135228753Smm	(void)tar; /* UNUSED */
1136228753Smm	header = (const struct archive_entry_header_ustar *)h;
1137228753Smm	size  = tar_atol(header->size, sizeof(header->size));
1138228753Smm	if ((size > 1048576) || (size < 0)) {
1139228753Smm		archive_set_error(&a->archive, EINVAL,
1140228753Smm		    "Special header too large");
1141228753Smm		return (ARCHIVE_FATAL);
1142228753Smm	}
1143228753Smm
1144228753Smm	/* Fail if we can't make our buffer big enough. */
1145238856Smm	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1146228753Smm		archive_set_error(&a->archive, ENOMEM,
1147228753Smm		    "No memory");
1148228753Smm		return (ARCHIVE_FATAL);
1149228753Smm	}
1150228753Smm
1151232153Smm	tar_flush_unconsumed(a, unconsumed);
1152232153Smm
1153232153Smm	/* Read the body into the string. */
1154238856Smm	*unconsumed = (size_t)((size + 511) & ~ 511);
1155232153Smm	src = __archive_read_ahead(a, *unconsumed, NULL);
1156232153Smm	if (src == NULL) {
1157232153Smm		*unconsumed = 0;
1158228753Smm		return (ARCHIVE_FATAL);
1159232153Smm	}
1160238856Smm	memcpy(as->s, src, (size_t)size);
1161228753Smm	as->s[size] = '\0';
1162238856Smm	as->length = (size_t)size;
1163228753Smm	return (ARCHIVE_OK);
1164228753Smm}
1165228753Smm
1166228753Smm/*
1167228753Smm * Parse out common header elements.
1168228753Smm *
1169228753Smm * This would be the same as header_old_tar, except that the
1170228753Smm * filename is handled slightly differently for old and POSIX
1171228753Smm * entries  (POSIX entries support a 'prefix').  This factoring
1172228753Smm * allows header_old_tar and header_ustar
1173228753Smm * to handle filenames differently, while still putting most of the
1174228753Smm * common parsing into one place.
1175228753Smm */
1176228753Smmstatic int
1177228753Smmheader_common(struct archive_read *a, struct tar *tar,
1178228753Smm    struct archive_entry *entry, const void *h)
1179228753Smm{
1180228753Smm	const struct archive_entry_header_ustar	*header;
1181228753Smm	char	tartype;
1182232153Smm	int     err = ARCHIVE_OK;
1183228753Smm
1184228753Smm	header = (const struct archive_entry_header_ustar *)h;
1185228753Smm	if (header->linkname[0])
1186232153Smm		archive_strncpy(&(tar->entry_linkpath),
1187232153Smm		    header->linkname, sizeof(header->linkname));
1188228753Smm	else
1189228753Smm		archive_string_empty(&(tar->entry_linkpath));
1190228753Smm
1191228753Smm	/* Parse out the numeric fields (all are octal) */
1192238856Smm	archive_entry_set_mode(entry,
1193238856Smm		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
1194228753Smm	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1195228753Smm	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1196228753Smm	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1197232153Smm	if (tar->entry_bytes_remaining < 0) {
1198232153Smm		tar->entry_bytes_remaining = 0;
1199232153Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1200305192Smm		    "Tar entry has negative size");
1201305192Smm		return (ARCHIVE_FATAL);
1202232153Smm	}
1203305192Smm	if (tar->entry_bytes_remaining == INT64_MAX) {
1204305192Smm		/* Note: tar_atol returns INT64_MAX on overflow */
1205305192Smm		tar->entry_bytes_remaining = 0;
1206305192Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1207305192Smm		    "Tar entry size overflow");
1208305192Smm		return (ARCHIVE_FATAL);
1209305192Smm	}
1210228753Smm	tar->realsize = tar->entry_bytes_remaining;
1211228753Smm	archive_entry_set_size(entry, tar->entry_bytes_remaining);
1212228753Smm	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1213228753Smm
1214228753Smm	/* Handle the tar type flag appropriately. */
1215228753Smm	tartype = header->typeflag[0];
1216228753Smm
1217228753Smm	switch (tartype) {
1218228753Smm	case '1': /* Hard link */
1219232153Smm		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
1220232153Smm		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1221232153Smm			err = set_conversion_failed_error(a, tar->sconv,
1222232153Smm			    "Linkname");
1223232153Smm			if (err == ARCHIVE_FATAL)
1224232153Smm				return (err);
1225232153Smm		}
1226228753Smm		/*
1227228753Smm		 * The following may seem odd, but: Technically, tar
1228228753Smm		 * does not store the file type for a "hard link"
1229228753Smm		 * entry, only the fact that it is a hard link.  So, I
1230228753Smm		 * leave the type zero normally.  But, pax interchange
1231228753Smm		 * format allows hard links to have data, which
1232228753Smm		 * implies that the underlying entry is a regular
1233228753Smm		 * file.
1234228753Smm		 */
1235228753Smm		if (archive_entry_size(entry) > 0)
1236228753Smm			archive_entry_set_filetype(entry, AE_IFREG);
1237228753Smm
1238228753Smm		/*
1239228753Smm		 * A tricky point: Traditionally, tar readers have
1240228753Smm		 * ignored the size field when reading hardlink
1241228753Smm		 * entries, and some writers put non-zero sizes even
1242228753Smm		 * though the body is empty.  POSIX blessed this
1243228753Smm		 * convention in the 1988 standard, but broke with
1244228753Smm		 * this tradition in 2001 by permitting hardlink
1245228753Smm		 * entries to store valid bodies in pax interchange
1246228753Smm		 * format, but not in ustar format.  Since there is no
1247228753Smm		 * hard and fast way to distinguish pax interchange
1248228753Smm		 * from earlier archives (the 'x' and 'g' entries are
1249228753Smm		 * optional, after all), we need a heuristic.
1250228753Smm		 */
1251228753Smm		if (archive_entry_size(entry) == 0) {
1252228753Smm			/* If the size is already zero, we're done. */
1253228753Smm		}  else if (a->archive.archive_format
1254228753Smm		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1255228753Smm			/* Definitely pax extended; must obey hardlink size. */
1256228753Smm		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1257228753Smm		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1258228753Smm		{
1259228753Smm			/* Old-style or GNU tar: we must ignore the size. */
1260228753Smm			archive_entry_set_size(entry, 0);
1261228753Smm			tar->entry_bytes_remaining = 0;
1262232153Smm		} else if (archive_read_format_tar_bid(a, 50) > 50) {
1263228753Smm			/*
1264228753Smm			 * We don't know if it's pax: If the bid
1265228753Smm			 * function sees a valid ustar header
1266228753Smm			 * immediately following, then let's ignore
1267228753Smm			 * the hardlink size.
1268228753Smm			 */
1269228753Smm			archive_entry_set_size(entry, 0);
1270228753Smm			tar->entry_bytes_remaining = 0;
1271228753Smm		}
1272228753Smm		/*
1273228753Smm		 * TODO: There are still two cases I'd like to handle:
1274228753Smm		 *   = a ustar non-pax archive with a hardlink entry at
1275228753Smm		 *     end-of-archive.  (Look for block of nulls following?)
1276228753Smm		 *   = a pax archive that has not seen any pax headers
1277228753Smm		 *     and has an entry which is a hardlink entry storing
1278228753Smm		 *     a body containing an uncompressed tar archive.
1279228753Smm		 * The first is worth addressing; I don't see any reliable
1280228753Smm		 * way to deal with the second possibility.
1281228753Smm		 */
1282228753Smm		break;
1283228753Smm	case '2': /* Symlink */
1284228753Smm		archive_entry_set_filetype(entry, AE_IFLNK);
1285228753Smm		archive_entry_set_size(entry, 0);
1286228753Smm		tar->entry_bytes_remaining = 0;
1287232153Smm		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
1288232153Smm		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1289232153Smm			err = set_conversion_failed_error(a, tar->sconv,
1290232153Smm			    "Linkname");
1291232153Smm			if (err == ARCHIVE_FATAL)
1292232153Smm				return (err);
1293232153Smm		}
1294228753Smm		break;
1295228753Smm	case '3': /* Character device */
1296228753Smm		archive_entry_set_filetype(entry, AE_IFCHR);
1297228753Smm		archive_entry_set_size(entry, 0);
1298228753Smm		tar->entry_bytes_remaining = 0;
1299228753Smm		break;
1300228753Smm	case '4': /* Block device */
1301228753Smm		archive_entry_set_filetype(entry, AE_IFBLK);
1302228753Smm		archive_entry_set_size(entry, 0);
1303228753Smm		tar->entry_bytes_remaining = 0;
1304228753Smm		break;
1305228753Smm	case '5': /* Dir */
1306228753Smm		archive_entry_set_filetype(entry, AE_IFDIR);
1307228753Smm		archive_entry_set_size(entry, 0);
1308228753Smm		tar->entry_bytes_remaining = 0;
1309228753Smm		break;
1310228753Smm	case '6': /* FIFO device */
1311228753Smm		archive_entry_set_filetype(entry, AE_IFIFO);
1312228753Smm		archive_entry_set_size(entry, 0);
1313228753Smm		tar->entry_bytes_remaining = 0;
1314228753Smm		break;
1315228753Smm	case 'D': /* GNU incremental directory type */
1316228753Smm		/*
1317228753Smm		 * No special handling is actually required here.
1318228753Smm		 * It might be nice someday to preprocess the file list and
1319228753Smm		 * provide it to the client, though.
1320228753Smm		 */
1321228753Smm		archive_entry_set_filetype(entry, AE_IFDIR);
1322228753Smm		break;
1323228753Smm	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1324228753Smm		/*
1325228753Smm		 * As far as I can tell, this is just like a regular file
1326228753Smm		 * entry, except that the contents should be _appended_ to
1327228753Smm		 * the indicated file at the indicated offset.  This may
1328228753Smm		 * require some API work to fully support.
1329228753Smm		 */
1330228753Smm		break;
1331228753Smm	case 'N': /* Old GNU "long filename" entry. */
1332228753Smm		/* The body of this entry is a script for renaming
1333228753Smm		 * previously-extracted entries.  Ugh.  It will never
1334228753Smm		 * be supported by libarchive. */
1335228753Smm		archive_entry_set_filetype(entry, AE_IFREG);
1336228753Smm		break;
1337228753Smm	case 'S': /* GNU sparse files */
1338228753Smm		/*
1339228753Smm		 * Sparse files are really just regular files with
1340228753Smm		 * sparse information in the extended area.
1341228753Smm		 */
1342228753Smm		/* FALLTHROUGH */
1343306322Smm	case '0':
1344306322Smm		/*
1345306322Smm		 * Enable sparse file "read" support only for regular
1346306322Smm		 * files and explicit GNU sparse files.  However, we
1347306322Smm		 * don't allow non-standard file types to be sparse.
1348306322Smm		 */
1349306322Smm		tar->sparse_allowed = 1;
1350306322Smm		/* FALLTHROUGH */
1351228753Smm	default: /* Regular file  and non-standard types */
1352228753Smm		/*
1353228753Smm		 * Per POSIX: non-recognized types should always be
1354228753Smm		 * treated as regular files.
1355228753Smm		 */
1356228753Smm		archive_entry_set_filetype(entry, AE_IFREG);
1357228753Smm		break;
1358228753Smm	}
1359232153Smm	return (err);
1360228753Smm}
1361228753Smm
1362228753Smm/*
1363228753Smm * Parse out header elements for "old-style" tar archives.
1364228753Smm */
1365228753Smmstatic int
1366228753Smmheader_old_tar(struct archive_read *a, struct tar *tar,
1367228753Smm    struct archive_entry *entry, const void *h)
1368228753Smm{
1369228753Smm	const struct archive_entry_header_ustar	*header;
1370232153Smm	int err = ARCHIVE_OK, err2;
1371228753Smm
1372228753Smm	/* Copy filename over (to ensure null termination). */
1373228753Smm	header = (const struct archive_entry_header_ustar *)h;
1374232153Smm	if (archive_entry_copy_pathname_l(entry,
1375232153Smm	    header->name, sizeof(header->name), tar->sconv) != 0) {
1376232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1377232153Smm		if (err == ARCHIVE_FATAL)
1378232153Smm			return (err);
1379232153Smm	}
1380228753Smm
1381228753Smm	/* Grab rest of common fields */
1382232153Smm	err2 = header_common(a, tar, entry, h);
1383232153Smm	if (err > err2)
1384232153Smm		err = err2;
1385228753Smm
1386228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1387232153Smm	return (err);
1388228753Smm}
1389228753Smm
1390228753Smm/*
1391232153Smm * Read a Mac AppleDouble-encoded blob of file metadata,
1392232153Smm * if there is one.
1393232153Smm */
1394232153Smmstatic int
1395232153Smmread_mac_metadata_blob(struct archive_read *a, struct tar *tar,
1396232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1397232153Smm{
1398232153Smm	int64_t size;
1399232153Smm	const void *data;
1400232153Smm	const char *p, *name;
1401232153Smm	const wchar_t *wp, *wname;
1402232153Smm
1403232153Smm	(void)h; /* UNUSED */
1404232153Smm
1405232153Smm	wname = wp = archive_entry_pathname_w(entry);
1406232153Smm	if (wp != NULL) {
1407232153Smm		/* Find the last path element. */
1408232153Smm		for (; *wp != L'\0'; ++wp) {
1409232153Smm			if (wp[0] == '/' && wp[1] != L'\0')
1410232153Smm				wname = wp + 1;
1411232153Smm		}
1412302001Smm		/*
1413232153Smm		 * If last path element starts with "._", then
1414232153Smm		 * this is a Mac extension.
1415232153Smm		 */
1416232153Smm		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
1417232153Smm			return ARCHIVE_OK;
1418232153Smm	} else {
1419232153Smm		/* Find the last path element. */
1420232153Smm		name = p = archive_entry_pathname(entry);
1421232153Smm		if (p == NULL)
1422232153Smm			return (ARCHIVE_FAILED);
1423232153Smm		for (; *p != '\0'; ++p) {
1424232153Smm			if (p[0] == '/' && p[1] != '\0')
1425232153Smm				name = p + 1;
1426232153Smm		}
1427302001Smm		/*
1428232153Smm		 * If last path element starts with "._", then
1429232153Smm		 * this is a Mac extension.
1430232153Smm		 */
1431232153Smm		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
1432232153Smm			return ARCHIVE_OK;
1433232153Smm	}
1434232153Smm
1435232153Smm 	/* Read the body as a Mac OS metadata blob. */
1436232153Smm	size = archive_entry_size(entry);
1437232153Smm
1438232153Smm	/*
1439232153Smm	 * TODO: Look beyond the body here to peek at the next header.
1440232153Smm	 * If it's a regular header (not an extension header)
1441232153Smm	 * that has the wrong name, just return the current
1442232153Smm	 * entry as-is, without consuming the body here.
1443232153Smm	 * That would reduce the risk of us mis-identifying
1444232153Smm	 * an ordinary file that just happened to have
1445232153Smm	 * a name starting with "._".
1446232153Smm	 *
1447232153Smm	 * Q: Is the above idea really possible?  Even
1448232153Smm	 * when there are GNU or pax extension entries?
1449232153Smm	 */
1450238856Smm	data = __archive_read_ahead(a, (size_t)size, NULL);
1451232153Smm	if (data == NULL) {
1452232153Smm		*unconsumed = 0;
1453232153Smm		return (ARCHIVE_FATAL);
1454232153Smm	}
1455238856Smm	archive_entry_copy_mac_metadata(entry, data, (size_t)size);
1456238856Smm	*unconsumed = (size_t)((size + 511) & ~ 511);
1457232153Smm	tar_flush_unconsumed(a, unconsumed);
1458232153Smm	return (tar_read_header(a, tar, entry, unconsumed));
1459232153Smm}
1460232153Smm
1461232153Smm/*
1462228753Smm * Parse a file header for a pax extended archive entry.
1463228753Smm */
1464228753Smmstatic int
1465228753Smmheader_pax_global(struct archive_read *a, struct tar *tar,
1466232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1467228753Smm{
1468228753Smm	int err;
1469228753Smm
1470232153Smm	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
1471228753Smm	if (err != ARCHIVE_OK)
1472228753Smm		return (err);
1473232153Smm	err = tar_read_header(a, tar, entry, unconsumed);
1474228753Smm	return (err);
1475228753Smm}
1476228753Smm
1477228753Smmstatic int
1478228753Smmheader_pax_extensions(struct archive_read *a, struct tar *tar,
1479232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
1480228753Smm{
1481228753Smm	int err, err2;
1482228753Smm
1483232153Smm	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
1484228753Smm	if (err != ARCHIVE_OK)
1485228753Smm		return (err);
1486228753Smm
1487228753Smm	/* Parse the next header. */
1488232153Smm	err = tar_read_header(a, tar, entry, unconsumed);
1489228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1490228753Smm		return (err);
1491228753Smm
1492228753Smm	/*
1493228753Smm	 * TODO: Parse global/default options into 'entry' struct here
1494228753Smm	 * before handling file-specific options.
1495228753Smm	 *
1496228753Smm	 * This design (parse standard header, then overwrite with pax
1497228753Smm	 * extended attribute data) usually works well, but isn't ideal;
1498228753Smm	 * it would be better to parse the pax extended attributes first
1499228753Smm	 * and then skip any fields in the standard header that were
1500228753Smm	 * defined in the pax header.
1501228753Smm	 */
1502313571Smm	err2 = pax_header(a, tar, entry, &tar->pax_header);
1503228753Smm	err =  err_combine(err, err2);
1504228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1505228753Smm	return (err);
1506228753Smm}
1507228753Smm
1508228753Smm
1509228753Smm/*
1510228753Smm * Parse a file header for a Posix "ustar" archive entry.  This also
1511228753Smm * handles "pax" or "extended ustar" entries.
1512228753Smm */
1513228753Smmstatic int
1514228753Smmheader_ustar(struct archive_read *a, struct tar *tar,
1515228753Smm    struct archive_entry *entry, const void *h)
1516228753Smm{
1517228753Smm	const struct archive_entry_header_ustar	*header;
1518228753Smm	struct archive_string *as;
1519232153Smm	int err = ARCHIVE_OK, r;
1520228753Smm
1521228753Smm	header = (const struct archive_entry_header_ustar *)h;
1522228753Smm
1523228753Smm	/* Copy name into an internal buffer to ensure null-termination. */
1524228753Smm	as = &(tar->entry_pathname);
1525228753Smm	if (header->prefix[0]) {
1526228753Smm		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1527228753Smm		if (as->s[archive_strlen(as) - 1] != '/')
1528228753Smm			archive_strappend_char(as, '/');
1529228753Smm		archive_strncat(as, header->name, sizeof(header->name));
1530232153Smm	} else {
1531228753Smm		archive_strncpy(as, header->name, sizeof(header->name));
1532232153Smm	}
1533232153Smm	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
1534232153Smm	    tar->sconv) != 0) {
1535232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1536232153Smm		if (err == ARCHIVE_FATAL)
1537232153Smm			return (err);
1538232153Smm	}
1539228753Smm
1540228753Smm	/* Handle rest of common fields. */
1541232153Smm	r = header_common(a, tar, entry, h);
1542232153Smm	if (r == ARCHIVE_FATAL)
1543232153Smm		return (r);
1544232153Smm	if (r < err)
1545232153Smm		err = r;
1546228753Smm
1547228753Smm	/* Handle POSIX ustar fields. */
1548232153Smm	if (archive_entry_copy_uname_l(entry,
1549232153Smm	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
1550232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Uname");
1551232153Smm		if (err == ARCHIVE_FATAL)
1552232153Smm			return (err);
1553232153Smm	}
1554228753Smm
1555232153Smm	if (archive_entry_copy_gname_l(entry,
1556232153Smm	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
1557232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Gname");
1558232153Smm		if (err == ARCHIVE_FATAL)
1559232153Smm			return (err);
1560232153Smm	}
1561228753Smm
1562228753Smm	/* Parse out device numbers only for char and block specials. */
1563228753Smm	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1564238856Smm		archive_entry_set_rdevmajor(entry, (dev_t)
1565228753Smm		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1566238856Smm		archive_entry_set_rdevminor(entry, (dev_t)
1567228753Smm		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1568228753Smm	}
1569228753Smm
1570228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1571228753Smm
1572232153Smm	return (err);
1573228753Smm}
1574228753Smm
1575228753Smm
1576228753Smm/*
1577228753Smm * Parse the pax extended attributes record.
1578228753Smm *
1579228753Smm * Returns non-zero if there's an error in the data.
1580228753Smm */
1581228753Smmstatic int
1582228753Smmpax_header(struct archive_read *a, struct tar *tar,
1583313571Smm    struct archive_entry *entry, struct archive_string *in_as)
1584228753Smm{
1585313571Smm	size_t attr_length, l, line_length, value_length;
1586228753Smm	char *p;
1587228753Smm	char *key, *value;
1588232153Smm	struct archive_string *as;
1589232153Smm	struct archive_string_conv *sconv;
1590228753Smm	int err, err2;
1591313571Smm	char *attr = in_as->s;
1592228753Smm
1593313571Smm	attr_length = in_as->length;
1594228753Smm	tar->pax_hdrcharset_binary = 0;
1595228753Smm	archive_string_empty(&(tar->entry_gname));
1596228753Smm	archive_string_empty(&(tar->entry_linkpath));
1597228753Smm	archive_string_empty(&(tar->entry_pathname));
1598228753Smm	archive_string_empty(&(tar->entry_pathname_override));
1599228753Smm	archive_string_empty(&(tar->entry_uname));
1600228753Smm	err = ARCHIVE_OK;
1601228753Smm	while (attr_length > 0) {
1602228753Smm		/* Parse decimal length field at start of line. */
1603228753Smm		line_length = 0;
1604228753Smm		l = attr_length;
1605228753Smm		p = attr; /* Record start of line. */
1606228753Smm		while (l>0) {
1607228753Smm			if (*p == ' ') {
1608228753Smm				p++;
1609228753Smm				l--;
1610228753Smm				break;
1611228753Smm			}
1612228753Smm			if (*p < '0' || *p > '9') {
1613228753Smm				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1614228753Smm				    "Ignoring malformed pax extended attributes");
1615228753Smm				return (ARCHIVE_WARN);
1616228753Smm			}
1617228753Smm			line_length *= 10;
1618228753Smm			line_length += *p - '0';
1619228753Smm			if (line_length > 999999) {
1620228753Smm				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1621228753Smm				    "Rejecting pax extended attribute > 1MB");
1622228753Smm				return (ARCHIVE_WARN);
1623228753Smm			}
1624228753Smm			p++;
1625228753Smm			l--;
1626228753Smm		}
1627228753Smm
1628228753Smm		/*
1629228753Smm		 * Parsed length must be no bigger than available data,
1630228753Smm		 * at least 1, and the last character of the line must
1631228753Smm		 * be '\n'.
1632228753Smm		 */
1633228753Smm		if (line_length > attr_length
1634228753Smm		    || line_length < 1
1635228753Smm		    || attr[line_length - 1] != '\n')
1636228753Smm		{
1637228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1638228753Smm			    "Ignoring malformed pax extended attribute");
1639228753Smm			return (ARCHIVE_WARN);
1640228753Smm		}
1641228753Smm
1642228753Smm		/* Null-terminate the line. */
1643228753Smm		attr[line_length - 1] = '\0';
1644228753Smm
1645228753Smm		/* Find end of key and null terminate it. */
1646228753Smm		key = p;
1647228753Smm		if (key[0] == '=')
1648228753Smm			return (-1);
1649228753Smm		while (*p && *p != '=')
1650228753Smm			++p;
1651228753Smm		if (*p == '\0') {
1652228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1653228753Smm			    "Invalid pax extended attributes");
1654228753Smm			return (ARCHIVE_WARN);
1655228753Smm		}
1656228753Smm		*p = '\0';
1657228753Smm
1658228753Smm		value = p + 1;
1659228753Smm
1660313571Smm		/* Some values may be binary data */
1661313571Smm		value_length = attr + line_length - 1 - value;
1662313571Smm
1663228753Smm		/* Identify this attribute and set it in the entry. */
1664313571Smm		err2 = pax_attribute(a, tar, entry, key, value, value_length);
1665232153Smm		if (err2 == ARCHIVE_FATAL)
1666232153Smm			return (err2);
1667228753Smm		err = err_combine(err, err2);
1668228753Smm
1669228753Smm		/* Skip to next line */
1670228753Smm		attr += line_length;
1671228753Smm		attr_length -= line_length;
1672228753Smm	}
1673232153Smm
1674232153Smm	/*
1675232153Smm	 * PAX format uses UTF-8 as default charset for its metadata
1676232153Smm	 * unless hdrcharset=BINARY is present in its header.
1677232153Smm	 * We apply the charset specified by the hdrcharset option only
1678232153Smm	 * when the hdrcharset attribute(in PAX header) is BINARY because
1679232153Smm	 * we respect the charset described in PAX header and BINARY also
1680232153Smm	 * means that metadata(filename,uname and gname) character-set
1681232153Smm	 * is unknown.
1682232153Smm	 */
1683232153Smm	if (tar->pax_hdrcharset_binary)
1684232153Smm		sconv = tar->opt_sconv;
1685232153Smm	else {
1686232153Smm		sconv = archive_string_conversion_from_charset(
1687232153Smm		    &(a->archive), "UTF-8", 1);
1688232153Smm		if (sconv == NULL)
1689232153Smm			return (ARCHIVE_FATAL);
1690232153Smm		if (tar->compat_2x)
1691232153Smm			archive_string_conversion_set_opt(sconv,
1692232153Smm			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1693232153Smm	}
1694232153Smm
1695228753Smm	if (archive_strlen(&(tar->entry_gname)) > 0) {
1696232153Smm		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
1697232153Smm		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
1698232153Smm			err = set_conversion_failed_error(a, sconv, "Gname");
1699232153Smm			if (err == ARCHIVE_FATAL)
1700232153Smm				return (err);
1701232153Smm			/* Use a converted an original name. */
1702232153Smm			archive_entry_copy_gname(entry, tar->entry_gname.s);
1703228753Smm		}
1704228753Smm	}
1705228753Smm	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1706232153Smm		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
1707232153Smm		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
1708232153Smm			err = set_conversion_failed_error(a, sconv, "Linkname");
1709232153Smm			if (err == ARCHIVE_FATAL)
1710232153Smm				return (err);
1711232153Smm			/* Use a converted an original name. */
1712232153Smm			archive_entry_copy_link(entry, tar->entry_linkpath.s);
1713228753Smm		}
1714228753Smm	}
1715228753Smm	/*
1716228753Smm	 * Some extensions (such as the GNU sparse file extensions)
1717228753Smm	 * deliberately store a synthetic name under the regular 'path'
1718228753Smm	 * attribute and the real file name under a different attribute.
1719228753Smm	 * Since we're supposed to not care about the order, we
1720228753Smm	 * have no choice but to store all of the various filenames
1721228753Smm	 * we find and figure it all out afterwards.  This is the
1722228753Smm	 * figuring out part.
1723228753Smm	 */
1724232153Smm	as = NULL;
1725228753Smm	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1726232153Smm		as = &(tar->entry_pathname_override);
1727228753Smm	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1728232153Smm		as = &(tar->entry_pathname);
1729232153Smm	if (as != NULL) {
1730232153Smm		if (archive_entry_copy_pathname_l(entry, as->s,
1731232153Smm		    archive_strlen(as), sconv) != 0) {
1732232153Smm			err = set_conversion_failed_error(a, sconv, "Pathname");
1733232153Smm			if (err == ARCHIVE_FATAL)
1734232153Smm				return (err);
1735232153Smm			/* Use a converted an original name. */
1736232153Smm			archive_entry_copy_pathname(entry, as->s);
1737228753Smm		}
1738228753Smm	}
1739228753Smm	if (archive_strlen(&(tar->entry_uname)) > 0) {
1740232153Smm		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1741232153Smm		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1742232153Smm			err = set_conversion_failed_error(a, sconv, "Uname");
1743232153Smm			if (err == ARCHIVE_FATAL)
1744232153Smm				return (err);
1745232153Smm			/* Use a converted an original name. */
1746232153Smm			archive_entry_copy_uname(entry, tar->entry_uname.s);
1747228753Smm		}
1748228753Smm	}
1749228753Smm	return (err);
1750228753Smm}
1751228753Smm
1752228753Smmstatic int
1753228753Smmpax_attribute_xattr(struct archive_entry *entry,
1754302001Smm	const char *name, const char *value)
1755228753Smm{
1756228753Smm	char *name_decoded;
1757228753Smm	void *value_decoded;
1758228753Smm	size_t value_len;
1759228753Smm
1760232153Smm	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1761228753Smm		return 3;
1762228753Smm
1763228753Smm	name += 17;
1764228753Smm
1765228753Smm	/* URL-decode name */
1766228753Smm	name_decoded = url_decode(name);
1767228753Smm	if (name_decoded == NULL)
1768228753Smm		return 2;
1769228753Smm
1770228753Smm	/* Base-64 decode value */
1771228753Smm	value_decoded = base64_decode(value, strlen(value), &value_len);
1772228753Smm	if (value_decoded == NULL) {
1773228753Smm		free(name_decoded);
1774228753Smm		return 1;
1775228753Smm	}
1776228753Smm
1777228753Smm	archive_entry_xattr_add_entry(entry, name_decoded,
1778228753Smm		value_decoded, value_len);
1779228753Smm
1780228753Smm	free(name_decoded);
1781228753Smm	free(value_decoded);
1782228753Smm	return 0;
1783228753Smm}
1784228753Smm
1785313571Smmstatic int
1786313571Smmpax_attribute_schily_xattr(struct archive_entry *entry,
1787313571Smm	const char *name, const char *value, size_t value_length)
1788313571Smm{
1789313571Smm	if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0)
1790313571Smm		return 1;
1791313571Smm
1792313571Smm	name += 13;
1793313571Smm
1794313571Smm	archive_entry_xattr_add_entry(entry, name, value, value_length);
1795313571Smm
1796313571Smm	return 0;
1797313571Smm}
1798313571Smm
1799313571Smmstatic int
1800362134Smmpax_attribute_rht_security_selinux(struct archive_entry *entry,
1801362134Smm	const char *value, size_t value_length)
1802362134Smm{
1803362134Smm	archive_entry_xattr_add_entry(entry, "security.selinux",
1804362134Smm            value, value_length);
1805362134Smm
1806362134Smm	return 0;
1807362134Smm}
1808362134Smm
1809362134Smmstatic int
1810313571Smmpax_attribute_acl(struct archive_read *a, struct tar *tar,
1811313571Smm    struct archive_entry *entry, const char *value, int type)
1812313571Smm{
1813313571Smm	int r;
1814313571Smm	const char* errstr;
1815313571Smm
1816313571Smm	switch (type) {
1817313571Smm	case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
1818313571Smm		errstr = "SCHILY.acl.access";
1819313571Smm		break;
1820313571Smm	case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
1821313571Smm		errstr = "SCHILY.acl.default";
1822313571Smm		break;
1823313571Smm	case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
1824313571Smm		errstr = "SCHILY.acl.ace";
1825313571Smm		break;
1826313571Smm	default:
1827313571Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1828313571Smm		    "Unknown ACL type: %d", type);
1829313571Smm		return(ARCHIVE_FATAL);
1830313571Smm	}
1831313571Smm
1832313571Smm	if (tar->sconv_acl == NULL) {
1833313571Smm		tar->sconv_acl =
1834313571Smm		    archive_string_conversion_from_charset(
1835313571Smm			&(a->archive), "UTF-8", 1);
1836313571Smm		if (tar->sconv_acl == NULL)
1837313571Smm			return (ARCHIVE_FATAL);
1838313571Smm	}
1839313571Smm
1840313571Smm	r = archive_acl_from_text_l(archive_entry_acl(entry), value, type,
1841313571Smm	    tar->sconv_acl);
1842313571Smm	if (r != ARCHIVE_OK) {
1843313571Smm		if (r == ARCHIVE_FATAL) {
1844313571Smm			archive_set_error(&a->archive, ENOMEM,
1845313571Smm			    "%s %s", "Can't allocate memory for ",
1846313571Smm			    errstr);
1847313571Smm			return (r);
1848313571Smm		}
1849313571Smm		archive_set_error(&a->archive,
1850313571Smm		    ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
1851313571Smm	}
1852313571Smm	return (r);
1853313571Smm}
1854313571Smm
1855228753Smm/*
1856228753Smm * Parse a single key=value attribute.  key/value pointers are
1857228753Smm * assumed to point into reasonably long-lived storage.
1858228753Smm *
1859228753Smm * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1860228753Smm * extensions should always have keywords of the form "VENDOR.attribute"
1861228753Smm * In particular, it's quite feasible to support many different
1862228753Smm * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1863228753Smm * unique to this library.
1864228753Smm *
1865228753Smm * Investigate other vendor-specific extensions and see if
1866228753Smm * any of them look useful.
1867228753Smm */
1868228753Smmstatic int
1869232153Smmpax_attribute(struct archive_read *a, struct tar *tar,
1870313571Smm    struct archive_entry *entry, const char *key, const char *value, size_t value_length)
1871228753Smm{
1872228753Smm	int64_t s;
1873228753Smm	long n;
1874232153Smm	int err = ARCHIVE_OK, r;
1875228753Smm
1876238856Smm#ifndef __FreeBSD__
1877238856Smm	if (value == NULL)
1878238856Smm		value = "";	/* Disable compiler warning; do not pass
1879238856Smm				 * NULL pointer to strlen().  */
1880238856Smm#endif
1881228753Smm	switch (key[0]) {
1882228753Smm	case 'G':
1883306322Smm		/* Reject GNU.sparse.* headers on non-regular files. */
1884306322Smm		if (strncmp(key, "GNU.sparse", 10) == 0 &&
1885306322Smm		    !tar->sparse_allowed) {
1886306322Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1887306322Smm			    "Non-regular file cannot be sparse");
1888306322Smm			return (ARCHIVE_FATAL);
1889306322Smm		}
1890306322Smm
1891228753Smm		/* GNU "0.0" sparse pax format. */
1892228753Smm		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1893228753Smm			tar->sparse_offset = -1;
1894228753Smm			tar->sparse_numbytes = -1;
1895228753Smm			tar->sparse_gnu_major = 0;
1896228753Smm			tar->sparse_gnu_minor = 0;
1897228753Smm		}
1898228753Smm		if (strcmp(key, "GNU.sparse.offset") == 0) {
1899228753Smm			tar->sparse_offset = tar_atol10(value, strlen(value));
1900228753Smm			if (tar->sparse_numbytes != -1) {
1901232153Smm				if (gnu_add_sparse_entry(a, tar,
1902232153Smm				    tar->sparse_offset, tar->sparse_numbytes)
1903232153Smm				    != ARCHIVE_OK)
1904232153Smm					return (ARCHIVE_FATAL);
1905228753Smm				tar->sparse_offset = -1;
1906228753Smm				tar->sparse_numbytes = -1;
1907228753Smm			}
1908228753Smm		}
1909228753Smm		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1910228753Smm			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1911228753Smm			if (tar->sparse_numbytes != -1) {
1912232153Smm				if (gnu_add_sparse_entry(a, tar,
1913232153Smm				    tar->sparse_offset, tar->sparse_numbytes)
1914232153Smm				    != ARCHIVE_OK)
1915232153Smm					return (ARCHIVE_FATAL);
1916228753Smm				tar->sparse_offset = -1;
1917228753Smm				tar->sparse_numbytes = -1;
1918228753Smm			}
1919228753Smm		}
1920228753Smm		if (strcmp(key, "GNU.sparse.size") == 0) {
1921228753Smm			tar->realsize = tar_atol10(value, strlen(value));
1922228753Smm			archive_entry_set_size(entry, tar->realsize);
1923316338Smm			tar->realsize_override = 1;
1924228753Smm		}
1925228753Smm
1926228753Smm		/* GNU "0.1" sparse pax format. */
1927228753Smm		if (strcmp(key, "GNU.sparse.map") == 0) {
1928228753Smm			tar->sparse_gnu_major = 0;
1929228753Smm			tar->sparse_gnu_minor = 1;
1930232153Smm			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
1931228753Smm				return (ARCHIVE_WARN);
1932228753Smm		}
1933228753Smm
1934228753Smm		/* GNU "1.0" sparse pax format */
1935228753Smm		if (strcmp(key, "GNU.sparse.major") == 0) {
1936238856Smm			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
1937228753Smm			tar->sparse_gnu_pending = 1;
1938228753Smm		}
1939228753Smm		if (strcmp(key, "GNU.sparse.minor") == 0) {
1940238856Smm			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
1941228753Smm			tar->sparse_gnu_pending = 1;
1942228753Smm		}
1943228753Smm		if (strcmp(key, "GNU.sparse.name") == 0) {
1944228753Smm			/*
1945228753Smm			 * The real filename; when storing sparse
1946228753Smm			 * files, GNU tar puts a synthesized name into
1947228753Smm			 * the regular 'path' attribute in an attempt
1948228753Smm			 * to limit confusion. ;-)
1949228753Smm			 */
1950228753Smm			archive_strcpy(&(tar->entry_pathname_override), value);
1951228753Smm		}
1952228753Smm		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1953228753Smm			tar->realsize = tar_atol10(value, strlen(value));
1954228753Smm			archive_entry_set_size(entry, tar->realsize);
1955316338Smm			tar->realsize_override = 1;
1956228753Smm		}
1957228753Smm		break;
1958228753Smm	case 'L':
1959228753Smm		/* Our extensions */
1960228753Smm/* TODO: Handle arbitrary extended attributes... */
1961228753Smm/*
1962232153Smm		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
1963228753Smm			archive_entry_set_xxxxxx(entry, value);
1964228753Smm*/
1965232153Smm		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
1966228753Smm			pax_time(value, &s, &n);
1967228753Smm			archive_entry_set_birthtime(entry, s, n);
1968228753Smm		}
1969348608Smm		if (strcmp(key, "LIBARCHIVE.symlinktype") == 0) {
1970348608Smm			if (strcmp(value, "file") == 0) {
1971348608Smm				archive_entry_set_symlink_type(entry,
1972348608Smm				    AE_SYMLINK_TYPE_FILE);
1973348608Smm			} else if (strcmp(value, "dir") == 0) {
1974348608Smm				archive_entry_set_symlink_type(entry,
1975348608Smm				    AE_SYMLINK_TYPE_DIRECTORY);
1976348608Smm			}
1977348608Smm		}
1978232153Smm		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
1979228753Smm			pax_attribute_xattr(entry, key, value);
1980228753Smm		break;
1981362134Smm	case 'R':
1982362134Smm		/* GNU tar uses RHT.security header to store SELinux xattrs
1983362134Smm		 * SCHILY.xattr.security.selinux == RHT.security.selinux */
1984362134Smm		if (strcmp(key, "RHT.security.selinux") == 0) {
1985362134Smm			pax_attribute_rht_security_selinux(entry, value,
1986362134Smm			    value_length);
1987362134Smm			}
1988362134Smm		break;
1989228753Smm	case 'S':
1990228753Smm		/* We support some keys used by the "star" archiver */
1991232153Smm		if (strcmp(key, "SCHILY.acl.access") == 0) {
1992313571Smm			r = pax_attribute_acl(a, tar, entry, value,
1993313571Smm			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
1994313571Smm			if (r == ARCHIVE_FATAL)
1995313571Smm				return (r);
1996232153Smm		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
1997313571Smm			r = pax_attribute_acl(a, tar, entry, value,
1998313571Smm			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
1999313571Smm			if (r == ARCHIVE_FATAL)
2000313571Smm				return (r);
2001313571Smm		} else if (strcmp(key, "SCHILY.acl.ace") == 0) {
2002313571Smm			r = pax_attribute_acl(a, tar, entry, value,
2003313571Smm			    ARCHIVE_ENTRY_ACL_TYPE_NFS4);
2004313571Smm			if (r == ARCHIVE_FATAL)
2005313571Smm				return (r);
2006232153Smm		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
2007228753Smm			archive_entry_set_rdevmajor(entry,
2008238856Smm			    (dev_t)tar_atol10(value, strlen(value)));
2009232153Smm		} else if (strcmp(key, "SCHILY.devminor") == 0) {
2010228753Smm			archive_entry_set_rdevminor(entry,
2011238856Smm			    (dev_t)tar_atol10(value, strlen(value)));
2012232153Smm		} else if (strcmp(key, "SCHILY.fflags") == 0) {
2013228753Smm			archive_entry_copy_fflags_text(entry, value);
2014232153Smm		} else if (strcmp(key, "SCHILY.dev") == 0) {
2015228753Smm			archive_entry_set_dev(entry,
2016238856Smm			    (dev_t)tar_atol10(value, strlen(value)));
2017232153Smm		} else if (strcmp(key, "SCHILY.ino") == 0) {
2018228753Smm			archive_entry_set_ino(entry,
2019228753Smm			    tar_atol10(value, strlen(value)));
2020232153Smm		} else if (strcmp(key, "SCHILY.nlink") == 0) {
2021238856Smm			archive_entry_set_nlink(entry, (unsigned)
2022228753Smm			    tar_atol10(value, strlen(value)));
2023232153Smm		} else if (strcmp(key, "SCHILY.realsize") == 0) {
2024228753Smm			tar->realsize = tar_atol10(value, strlen(value));
2025316338Smm			tar->realsize_override = 1;
2026228753Smm			archive_entry_set_size(entry, tar->realsize);
2027313571Smm		} else if (strncmp(key, "SCHILY.xattr.", 13) == 0) {
2028313571Smm			pax_attribute_schily_xattr(entry, key, value,
2029313571Smm			    value_length);
2030232153Smm		} else if (strcmp(key, "SUN.holesdata") == 0) {
2031232153Smm			/* A Solaris extension for sparse. */
2032232153Smm			r = solaris_sparse_parse(a, tar, entry, value);
2033232153Smm			if (r < err) {
2034232153Smm				if (r == ARCHIVE_FATAL)
2035232153Smm					return (r);
2036232153Smm				err = r;
2037232153Smm				archive_set_error(&a->archive,
2038232153Smm				    ARCHIVE_ERRNO_MISC,
2039232153Smm				    "Parse error: SUN.holesdata");
2040232153Smm			}
2041228753Smm		}
2042228753Smm		break;
2043228753Smm	case 'a':
2044232153Smm		if (strcmp(key, "atime") == 0) {
2045228753Smm			pax_time(value, &s, &n);
2046228753Smm			archive_entry_set_atime(entry, s, n);
2047228753Smm		}
2048228753Smm		break;
2049228753Smm	case 'c':
2050232153Smm		if (strcmp(key, "ctime") == 0) {
2051228753Smm			pax_time(value, &s, &n);
2052228753Smm			archive_entry_set_ctime(entry, s, n);
2053232153Smm		} else if (strcmp(key, "charset") == 0) {
2054228753Smm			/* TODO: Publish charset information in entry. */
2055232153Smm		} else if (strcmp(key, "comment") == 0) {
2056228753Smm			/* TODO: Publish comment in entry. */
2057228753Smm		}
2058228753Smm		break;
2059228753Smm	case 'g':
2060232153Smm		if (strcmp(key, "gid") == 0) {
2061228753Smm			archive_entry_set_gid(entry,
2062228753Smm			    tar_atol10(value, strlen(value)));
2063232153Smm		} else if (strcmp(key, "gname") == 0) {
2064228753Smm			archive_strcpy(&(tar->entry_gname), value);
2065228753Smm		}
2066228753Smm		break;
2067228753Smm	case 'h':
2068228753Smm		if (strcmp(key, "hdrcharset") == 0) {
2069228753Smm			if (strcmp(value, "BINARY") == 0)
2070232153Smm				/* Binary  mode. */
2071228753Smm				tar->pax_hdrcharset_binary = 1;
2072228753Smm			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
2073228753Smm				tar->pax_hdrcharset_binary = 0;
2074228753Smm		}
2075228753Smm		break;
2076228753Smm	case 'l':
2077228753Smm		/* pax interchange doesn't distinguish hardlink vs. symlink. */
2078232153Smm		if (strcmp(key, "linkpath") == 0) {
2079228753Smm			archive_strcpy(&(tar->entry_linkpath), value);
2080228753Smm		}
2081228753Smm		break;
2082228753Smm	case 'm':
2083232153Smm		if (strcmp(key, "mtime") == 0) {
2084228753Smm			pax_time(value, &s, &n);
2085228753Smm			archive_entry_set_mtime(entry, s, n);
2086228753Smm		}
2087228753Smm		break;
2088228753Smm	case 'p':
2089232153Smm		if (strcmp(key, "path") == 0) {
2090228753Smm			archive_strcpy(&(tar->entry_pathname), value);
2091228753Smm		}
2092228753Smm		break;
2093228753Smm	case 'r':
2094228753Smm		/* POSIX has reserved 'realtime.*' */
2095228753Smm		break;
2096228753Smm	case 's':
2097228753Smm		/* POSIX has reserved 'security.*' */
2098232153Smm		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
2099232153Smm		if (strcmp(key, "size") == 0) {
2100228753Smm			/* "size" is the size of the data in the entry. */
2101228753Smm			tar->entry_bytes_remaining
2102228753Smm			    = tar_atol10(value, strlen(value));
2103228753Smm			/*
2104316338Smm			 * The "size" pax header keyword always overrides the
2105316338Smm			 * "size" field in the tar header.
2106316338Smm			 * GNU.sparse.realsize, GNU.sparse.size and
2107316338Smm			 * SCHILY.realsize override this value.
2108228753Smm			 */
2109316338Smm			if (!tar->realsize_override) {
2110228753Smm				archive_entry_set_size(entry,
2111228753Smm				    tar->entry_bytes_remaining);
2112228753Smm				tar->realsize
2113228753Smm				    = tar->entry_bytes_remaining;
2114228753Smm			}
2115228753Smm		}
2116228753Smm		break;
2117228753Smm	case 'u':
2118232153Smm		if (strcmp(key, "uid") == 0) {
2119228753Smm			archive_entry_set_uid(entry,
2120228753Smm			    tar_atol10(value, strlen(value)));
2121232153Smm		} else if (strcmp(key, "uname") == 0) {
2122228753Smm			archive_strcpy(&(tar->entry_uname), value);
2123228753Smm		}
2124228753Smm		break;
2125228753Smm	}
2126232153Smm	return (err);
2127228753Smm}
2128228753Smm
2129228753Smm
2130228753Smm
2131228753Smm/*
2132228753Smm * parse a decimal time value, which may include a fractional portion
2133228753Smm */
2134228753Smmstatic void
2135228753Smmpax_time(const char *p, int64_t *ps, long *pn)
2136228753Smm{
2137228753Smm	char digit;
2138228753Smm	int64_t	s;
2139228753Smm	unsigned long l;
2140228753Smm	int sign;
2141228753Smm	int64_t limit, last_digit_limit;
2142228753Smm
2143228753Smm	limit = INT64_MAX / 10;
2144228753Smm	last_digit_limit = INT64_MAX % 10;
2145228753Smm
2146228753Smm	s = 0;
2147228753Smm	sign = 1;
2148228753Smm	if (*p == '-') {
2149228753Smm		sign = -1;
2150228753Smm		p++;
2151228753Smm	}
2152228753Smm	while (*p >= '0' && *p <= '9') {
2153228753Smm		digit = *p - '0';
2154228753Smm		if (s > limit ||
2155228753Smm		    (s == limit && digit > last_digit_limit)) {
2156228753Smm			s = INT64_MAX;
2157228753Smm			break;
2158228753Smm		}
2159228753Smm		s = (s * 10) + digit;
2160228753Smm		++p;
2161228753Smm	}
2162228753Smm
2163228753Smm	*ps = s * sign;
2164228753Smm
2165228753Smm	/* Calculate nanoseconds. */
2166228753Smm	*pn = 0;
2167228753Smm
2168228753Smm	if (*p != '.')
2169228753Smm		return;
2170228753Smm
2171228753Smm	l = 100000000UL;
2172228753Smm	do {
2173228753Smm		++p;
2174228753Smm		if (*p >= '0' && *p <= '9')
2175228753Smm			*pn += (*p - '0') * l;
2176228753Smm		else
2177228753Smm			break;
2178228753Smm	} while (l /= 10);
2179228753Smm}
2180228753Smm
2181228753Smm/*
2182228753Smm * Parse GNU tar header
2183228753Smm */
2184228753Smmstatic int
2185228753Smmheader_gnutar(struct archive_read *a, struct tar *tar,
2186232153Smm    struct archive_entry *entry, const void *h, size_t *unconsumed)
2187228753Smm{
2188228753Smm	const struct archive_entry_header_gnutar *header;
2189232153Smm	int64_t t;
2190232153Smm	int err = ARCHIVE_OK;
2191228753Smm
2192228753Smm	/*
2193228753Smm	 * GNU header is like POSIX ustar, except 'prefix' is
2194228753Smm	 * replaced with some other fields. This also means the
2195228753Smm	 * filename is stored as in old-style archives.
2196228753Smm	 */
2197228753Smm
2198228753Smm	/* Grab fields common to all tar variants. */
2199232153Smm	err = header_common(a, tar, entry, h);
2200232153Smm	if (err == ARCHIVE_FATAL)
2201232153Smm		return (err);
2202228753Smm
2203228753Smm	/* Copy filename over (to ensure null termination). */
2204228753Smm	header = (const struct archive_entry_header_gnutar *)h;
2205232153Smm	if (archive_entry_copy_pathname_l(entry,
2206232153Smm	    header->name, sizeof(header->name), tar->sconv) != 0) {
2207232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2208232153Smm		if (err == ARCHIVE_FATAL)
2209232153Smm			return (err);
2210232153Smm	}
2211228753Smm
2212228753Smm	/* Fields common to ustar and GNU */
2213228753Smm	/* XXX Can the following be factored out since it's common
2214228753Smm	 * to ustar and gnu tar?  Is it okay to move it down into
2215228753Smm	 * header_common, perhaps?  */
2216232153Smm	if (archive_entry_copy_uname_l(entry,
2217232153Smm	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
2218232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Uname");
2219232153Smm		if (err == ARCHIVE_FATAL)
2220232153Smm			return (err);
2221232153Smm	}
2222228753Smm
2223232153Smm	if (archive_entry_copy_gname_l(entry,
2224232153Smm	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
2225232153Smm		err = set_conversion_failed_error(a, tar->sconv, "Gname");
2226232153Smm		if (err == ARCHIVE_FATAL)
2227232153Smm			return (err);
2228232153Smm	}
2229228753Smm
2230228753Smm	/* Parse out device numbers only for char and block specials */
2231228753Smm	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2232238856Smm		archive_entry_set_rdevmajor(entry, (dev_t)
2233228753Smm		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2234238856Smm		archive_entry_set_rdevminor(entry, (dev_t)
2235228753Smm		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2236228753Smm	} else
2237228753Smm		archive_entry_set_rdev(entry, 0);
2238228753Smm
2239228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2240228753Smm
2241228753Smm	/* Grab GNU-specific fields. */
2242232153Smm	t = tar_atol(header->atime, sizeof(header->atime));
2243232153Smm	if (t > 0)
2244232153Smm		archive_entry_set_atime(entry, t, 0);
2245232153Smm	t = tar_atol(header->ctime, sizeof(header->ctime));
2246232153Smm	if (t > 0)
2247232153Smm		archive_entry_set_ctime(entry, t, 0);
2248232153Smm
2249228753Smm	if (header->realsize[0] != 0) {
2250228753Smm		tar->realsize
2251228753Smm		    = tar_atol(header->realsize, sizeof(header->realsize));
2252228753Smm		archive_entry_set_size(entry, tar->realsize);
2253316338Smm		tar->realsize_override = 1;
2254228753Smm	}
2255228753Smm
2256228753Smm	if (header->sparse[0].offset[0] != 0) {
2257232153Smm		if (gnu_sparse_old_read(a, tar, header, unconsumed)
2258232153Smm		    != ARCHIVE_OK)
2259232153Smm			return (ARCHIVE_FATAL);
2260228753Smm	} else {
2261228753Smm		if (header->isextended[0] != 0) {
2262228753Smm			/* XXX WTF? XXX */
2263228753Smm		}
2264228753Smm	}
2265228753Smm
2266232153Smm	return (err);
2267228753Smm}
2268228753Smm
2269232153Smmstatic int
2270232153Smmgnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2271232153Smm    int64_t offset, int64_t remaining)
2272228753Smm{
2273228753Smm	struct sparse_block *p;
2274228753Smm
2275311042Smm	p = (struct sparse_block *)calloc(1, sizeof(*p));
2276232153Smm	if (p == NULL) {
2277232153Smm		archive_set_error(&a->archive, ENOMEM, "Out of memory");
2278232153Smm		return (ARCHIVE_FATAL);
2279232153Smm	}
2280228753Smm	if (tar->sparse_last != NULL)
2281228753Smm		tar->sparse_last->next = p;
2282228753Smm	else
2283228753Smm		tar->sparse_list = p;
2284228753Smm	tar->sparse_last = p;
2285324418Smm	if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) {
2286302001Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
2287302001Smm		return (ARCHIVE_FATAL);
2288302001Smm	}
2289228753Smm	p->offset = offset;
2290228753Smm	p->remaining = remaining;
2291232153Smm	return (ARCHIVE_OK);
2292228753Smm}
2293228753Smm
2294228753Smmstatic void
2295228753Smmgnu_clear_sparse_list(struct tar *tar)
2296228753Smm{
2297228753Smm	struct sparse_block *p;
2298228753Smm
2299228753Smm	while (tar->sparse_list != NULL) {
2300228753Smm		p = tar->sparse_list;
2301228753Smm		tar->sparse_list = p->next;
2302228753Smm		free(p);
2303228753Smm	}
2304228753Smm	tar->sparse_last = NULL;
2305228753Smm}
2306228753Smm
2307228753Smm/*
2308228753Smm * GNU tar old-format sparse data.
2309228753Smm *
2310228753Smm * GNU old-format sparse data is stored in a fixed-field
2311228753Smm * format.  Offset/size values are 11-byte octal fields (same
2312228753Smm * format as 'size' field in ustart header).  These are
2313228753Smm * stored in the header, allocating subsequent header blocks
2314228753Smm * as needed.  Extending the header in this way is a pretty
2315228753Smm * severe POSIX violation; this design has earned GNU tar a
2316228753Smm * lot of criticism.
2317228753Smm */
2318228753Smm
2319228753Smmstatic int
2320228753Smmgnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2321232153Smm    const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2322228753Smm{
2323228753Smm	ssize_t bytes_read;
2324228753Smm	const void *data;
2325228753Smm	struct extended {
2326228753Smm		struct gnu_sparse sparse[21];
2327228753Smm		char	isextended[1];
2328228753Smm		char	padding[7];
2329228753Smm	};
2330228753Smm	const struct extended *ext;
2331228753Smm
2332232153Smm	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2333232153Smm		return (ARCHIVE_FATAL);
2334228753Smm	if (header->isextended[0] == 0)
2335228753Smm		return (ARCHIVE_OK);
2336228753Smm
2337228753Smm	do {
2338232153Smm		tar_flush_unconsumed(a, unconsumed);
2339228753Smm		data = __archive_read_ahead(a, 512, &bytes_read);
2340228753Smm		if (bytes_read < 0)
2341228753Smm			return (ARCHIVE_FATAL);
2342228753Smm		if (bytes_read < 512) {
2343228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2344228753Smm			    "Truncated tar archive "
2345228753Smm			    "detected while reading sparse file data");
2346228753Smm			return (ARCHIVE_FATAL);
2347228753Smm		}
2348232153Smm		*unconsumed = 512;
2349228753Smm		ext = (const struct extended *)data;
2350232153Smm		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
2351232153Smm			return (ARCHIVE_FATAL);
2352228753Smm	} while (ext->isextended[0] != 0);
2353228753Smm	if (tar->sparse_list != NULL)
2354228753Smm		tar->entry_offset = tar->sparse_list->offset;
2355228753Smm	return (ARCHIVE_OK);
2356228753Smm}
2357228753Smm
2358232153Smmstatic int
2359232153Smmgnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
2360228753Smm    const struct gnu_sparse *sparse, int length)
2361228753Smm{
2362228753Smm	while (length > 0 && sparse->offset[0] != 0) {
2363232153Smm		if (gnu_add_sparse_entry(a, tar,
2364228753Smm		    tar_atol(sparse->offset, sizeof(sparse->offset)),
2365232153Smm		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
2366232153Smm		    != ARCHIVE_OK)
2367232153Smm			return (ARCHIVE_FATAL);
2368228753Smm		sparse++;
2369228753Smm		length--;
2370228753Smm	}
2371232153Smm	return (ARCHIVE_OK);
2372228753Smm}
2373228753Smm
2374228753Smm/*
2375228753Smm * GNU tar sparse format 0.0
2376228753Smm *
2377228753Smm * Beginning with GNU tar 1.15, sparse files are stored using
2378228753Smm * information in the pax extended header.  The GNU tar maintainers
2379228753Smm * have gone through a number of variations in the process of working
2380232153Smm * out this scheme; fortunately, they're all numbered.
2381228753Smm *
2382228753Smm * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
2383228753Smm * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
2384228753Smm * store offset/size for each block.  The repeated instances of these
2385228753Smm * latter fields violate the pax specification (which frowns on
2386228753Smm * duplicate keys), so this format was quickly replaced.
2387228753Smm */
2388228753Smm
2389228753Smm/*
2390228753Smm * GNU tar sparse format 0.1
2391228753Smm *
2392228753Smm * This version replaced the offset/numbytes attributes with
2393228753Smm * a single "map" attribute that stored a list of integers.  This
2394228753Smm * format had two problems: First, the "map" attribute could be very
2395228753Smm * long, which caused problems for some implementations.  More
2396228753Smm * importantly, the sparse data was lost when extracted by archivers
2397228753Smm * that didn't recognize this extension.
2398228753Smm */
2399228753Smm
2400228753Smmstatic int
2401232153Smmgnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
2402228753Smm{
2403228753Smm	const char *e;
2404232153Smm	int64_t offset = -1, size = -1;
2405228753Smm
2406228753Smm	for (;;) {
2407228753Smm		e = p;
2408228753Smm		while (*e != '\0' && *e != ',') {
2409228753Smm			if (*e < '0' || *e > '9')
2410228753Smm				return (ARCHIVE_WARN);
2411228753Smm			e++;
2412228753Smm		}
2413228753Smm		if (offset < 0) {
2414228753Smm			offset = tar_atol10(p, e - p);
2415228753Smm			if (offset < 0)
2416228753Smm				return (ARCHIVE_WARN);
2417228753Smm		} else {
2418228753Smm			size = tar_atol10(p, e - p);
2419228753Smm			if (size < 0)
2420228753Smm				return (ARCHIVE_WARN);
2421232153Smm			if (gnu_add_sparse_entry(a, tar, offset, size)
2422232153Smm			    != ARCHIVE_OK)
2423232153Smm				return (ARCHIVE_FATAL);
2424228753Smm			offset = -1;
2425228753Smm		}
2426228753Smm		if (*e == '\0')
2427228753Smm			return (ARCHIVE_OK);
2428228753Smm		p = e + 1;
2429228753Smm	}
2430228753Smm}
2431228753Smm
2432228753Smm/*
2433228753Smm * GNU tar sparse format 1.0
2434228753Smm *
2435228753Smm * The idea: The offset/size data is stored as a series of base-10
2436228753Smm * ASCII numbers prepended to the file data, so that dearchivers that
2437228753Smm * don't support this format will extract the block map along with the
2438228753Smm * data and a separate post-process can restore the sparseness.
2439228753Smm *
2440228753Smm * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
2441228753Smm * padding to the body of the file when using this format.  GNU tar
2442228753Smm * 1.17 corrected this bug without bumping the version number, so
2443228753Smm * it's not possible to support both variants.  This code supports
2444228753Smm * the later variant at the expense of not supporting the former.
2445228753Smm *
2446228753Smm * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
2447228753Smm * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
2448228753Smm */
2449228753Smm
2450228753Smm/*
2451228753Smm * Read the next line from the input, and parse it as a decimal
2452228753Smm * integer followed by '\n'.  Returns positive integer value or
2453228753Smm * negative on error.
2454228753Smm */
2455228753Smmstatic int64_t
2456228753Smmgnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
2457232153Smm    int64_t *remaining, size_t *unconsumed)
2458228753Smm{
2459228753Smm	int64_t l, limit, last_digit_limit;
2460228753Smm	const char *p;
2461228753Smm	ssize_t bytes_read;
2462228753Smm	int base, digit;
2463228753Smm
2464228753Smm	base = 10;
2465228753Smm	limit = INT64_MAX / base;
2466228753Smm	last_digit_limit = INT64_MAX % base;
2467228753Smm
2468228753Smm	/*
2469228753Smm	 * Skip any lines starting with '#'; GNU tar specs
2470228753Smm	 * don't require this, but they should.
2471228753Smm	 */
2472228753Smm	do {
2473238856Smm		bytes_read = readline(a, tar, &p,
2474238856Smm			(ssize_t)tar_min(*remaining, 100), unconsumed);
2475228753Smm		if (bytes_read <= 0)
2476228753Smm			return (ARCHIVE_FATAL);
2477228753Smm		*remaining -= bytes_read;
2478228753Smm	} while (p[0] == '#');
2479228753Smm
2480228753Smm	l = 0;
2481228753Smm	while (bytes_read > 0) {
2482228753Smm		if (*p == '\n')
2483228753Smm			return (l);
2484228753Smm		if (*p < '0' || *p >= '0' + base)
2485228753Smm			return (ARCHIVE_WARN);
2486228753Smm		digit = *p - '0';
2487228753Smm		if (l > limit || (l == limit && digit > last_digit_limit))
2488228753Smm			l = INT64_MAX; /* Truncate on overflow. */
2489228753Smm		else
2490228753Smm			l = (l * base) + digit;
2491228753Smm		p++;
2492228753Smm		bytes_read--;
2493228753Smm	}
2494228753Smm	/* TODO: Error message. */
2495228753Smm	return (ARCHIVE_WARN);
2496228753Smm}
2497228753Smm
2498228753Smm/*
2499228753Smm * Returns length (in bytes) of the sparse data description
2500228753Smm * that was read.
2501228753Smm */
2502228753Smmstatic ssize_t
2503232153Smmgnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
2504228753Smm{
2505232153Smm	ssize_t bytes_read;
2506228753Smm	int entries;
2507232153Smm	int64_t offset, size, to_skip, remaining;
2508228753Smm
2509228753Smm	/* Clear out the existing sparse list. */
2510228753Smm	gnu_clear_sparse_list(tar);
2511228753Smm
2512228753Smm	remaining = tar->entry_bytes_remaining;
2513228753Smm
2514228753Smm	/* Parse entries. */
2515238856Smm	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2516228753Smm	if (entries < 0)
2517228753Smm		return (ARCHIVE_FATAL);
2518228753Smm	/* Parse the individual entries. */
2519228753Smm	while (entries-- > 0) {
2520228753Smm		/* Parse offset/size */
2521232153Smm		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2522228753Smm		if (offset < 0)
2523228753Smm			return (ARCHIVE_FATAL);
2524232153Smm		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2525228753Smm		if (size < 0)
2526228753Smm			return (ARCHIVE_FATAL);
2527228753Smm		/* Add a new sparse entry. */
2528232153Smm		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
2529232153Smm			return (ARCHIVE_FATAL);
2530228753Smm	}
2531228753Smm	/* Skip rest of block... */
2532232153Smm	tar_flush_unconsumed(a, unconsumed);
2533238856Smm	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
2534228753Smm	to_skip = 0x1ff & -bytes_read;
2535315433Smm	/* Fail if tar->entry_bytes_remaing would get negative */
2536315433Smm	if (to_skip > remaining)
2537315433Smm		return (ARCHIVE_FATAL);
2538232153Smm	if (to_skip != __archive_read_consume(a, to_skip))
2539228753Smm		return (ARCHIVE_FATAL);
2540238856Smm	return ((ssize_t)(bytes_read + to_skip));
2541228753Smm}
2542228753Smm
2543232153Smm/*
2544232153Smm * Solaris pax extension for a sparse file. This is recorded with the
2545232153Smm * data and hole pairs. The way recording sparse information by Solaris'
2546232153Smm * pax simply indicates where data and sparse are, so the stored contents
2547232153Smm * consist of both data and hole.
2548232153Smm */
2549232153Smmstatic int
2550232153Smmsolaris_sparse_parse(struct archive_read *a, struct tar *tar,
2551232153Smm    struct archive_entry *entry, const char *p)
2552232153Smm{
2553232153Smm	const char *e;
2554232153Smm	int64_t start, end;
2555232153Smm	int hole = 1;
2556232153Smm
2557232153Smm	(void)entry; /* UNUSED */
2558232153Smm
2559232153Smm	end = 0;
2560232153Smm	if (*p == ' ')
2561232153Smm		p++;
2562232153Smm	else
2563232153Smm		return (ARCHIVE_WARN);
2564232153Smm	for (;;) {
2565232153Smm		e = p;
2566232153Smm		while (*e != '\0' && *e != ' ') {
2567232153Smm			if (*e < '0' || *e > '9')
2568232153Smm				return (ARCHIVE_WARN);
2569232153Smm			e++;
2570232153Smm		}
2571232153Smm		start = end;
2572232153Smm		end = tar_atol10(p, e - p);
2573232153Smm		if (end < 0)
2574232153Smm			return (ARCHIVE_WARN);
2575232153Smm		if (start < end) {
2576232153Smm			if (gnu_add_sparse_entry(a, tar, start,
2577232153Smm			    end - start) != ARCHIVE_OK)
2578232153Smm				return (ARCHIVE_FATAL);
2579232153Smm			tar->sparse_last->hole = hole;
2580232153Smm		}
2581232153Smm		if (*e == '\0')
2582232153Smm			return (ARCHIVE_OK);
2583232153Smm		p = e + 1;
2584232153Smm		hole = hole == 0;
2585232153Smm	}
2586232153Smm}
2587232153Smm
2588228753Smm/*-
2589228753Smm * Convert text->integer.
2590228753Smm *
2591228753Smm * Traditional tar formats (including POSIX) specify base-8 for
2592228753Smm * all of the standard numeric fields.  This is a significant limitation
2593228753Smm * in practice:
2594228753Smm *   = file size is limited to 8GB
2595228753Smm *   = rdevmajor and rdevminor are limited to 21 bits
2596228753Smm *   = uid/gid are limited to 21 bits
2597228753Smm *
2598228753Smm * There are two workarounds for this:
2599228753Smm *   = pax extended headers, which use variable-length string fields
2600228753Smm *   = GNU tar and STAR both allow either base-8 or base-256 in
2601228753Smm *      most fields.  The high bit is set to indicate base-256.
2602228753Smm *
2603228753Smm * On read, this implementation supports both extensions.
2604228753Smm */
2605228753Smmstatic int64_t
2606248616Smmtar_atol(const char *p, size_t char_cnt)
2607228753Smm{
2608228753Smm	/*
2609228753Smm	 * Technically, GNU tar considers a field to be in base-256
2610228753Smm	 * only if the first byte is 0xff or 0x80.
2611228753Smm	 */
2612228753Smm	if (*p & 0x80)
2613228753Smm		return (tar_atol256(p, char_cnt));
2614228753Smm	return (tar_atol8(p, char_cnt));
2615228753Smm}
2616228753Smm
2617228753Smm/*
2618228753Smm * Note that this implementation does not (and should not!) obey
2619228753Smm * locale settings; you cannot simply substitute strtol here, since
2620228753Smm * it does obey locale.
2621228753Smm */
2622228753Smmstatic int64_t
2623248616Smmtar_atol_base_n(const char *p, size_t char_cnt, int base)
2624228753Smm{
2625302001Smm	int64_t	l, maxval, limit, last_digit_limit;
2626248616Smm	int digit, sign;
2627228753Smm
2628302001Smm	maxval = INT64_MAX;
2629228753Smm	limit = INT64_MAX / base;
2630228753Smm	last_digit_limit = INT64_MAX % base;
2631228753Smm
2632248616Smm	/* the pointer will not be dereferenced if char_cnt is zero
2633311042Smm	 * due to the way the && operator is evaluated.
2634248616Smm	 */
2635248616Smm	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
2636228753Smm		p++;
2637248616Smm		char_cnt--;
2638248616Smm	}
2639248616Smm
2640248616Smm	sign = 1;
2641248616Smm	if (char_cnt != 0 && *p == '-') {
2642228753Smm		sign = -1;
2643228753Smm		p++;
2644248616Smm		char_cnt--;
2645302001Smm
2646302001Smm		maxval = INT64_MIN;
2647302001Smm		limit = -(INT64_MIN / base);
2648302001Smm		last_digit_limit = INT64_MIN % base;
2649248616Smm	}
2650228753Smm
2651228753Smm	l = 0;
2652248616Smm	if (char_cnt != 0) {
2653248616Smm		digit = *p - '0';
2654248616Smm		while (digit >= 0 && digit < base  && char_cnt != 0) {
2655248616Smm			if (l>limit || (l == limit && digit > last_digit_limit)) {
2656302001Smm				return maxval; /* Truncate on overflow. */
2657248616Smm			}
2658248616Smm			l = (l * base) + digit;
2659248616Smm			digit = *++p - '0';
2660248616Smm			char_cnt--;
2661228753Smm		}
2662228753Smm	}
2663228753Smm	return (sign < 0) ? -l : l;
2664228753Smm}
2665228753Smm
2666228753Smmstatic int64_t
2667248616Smmtar_atol8(const char *p, size_t char_cnt)
2668228753Smm{
2669248616Smm	return tar_atol_base_n(p, char_cnt, 8);
2670248616Smm}
2671228753Smm
2672248616Smmstatic int64_t
2673248616Smmtar_atol10(const char *p, size_t char_cnt)
2674248616Smm{
2675248616Smm	return tar_atol_base_n(p, char_cnt, 10);
2676228753Smm}
2677228753Smm
2678228753Smm/*
2679302001Smm * Parse a base-256 integer.  This is just a variable-length
2680302001Smm * twos-complement signed binary value in big-endian order, except
2681302001Smm * that the high-order bit is ignored.  The values here can be up to
2682302001Smm * 12 bytes, so we need to be careful about overflowing 64-bit
2683302001Smm * (8-byte) integers.
2684302001Smm *
2685302001Smm * This code unashamedly assumes that the local machine uses 8-bit
2686302001Smm * bytes and twos-complement arithmetic.
2687228753Smm */
2688228753Smmstatic int64_t
2689248616Smmtar_atol256(const char *_p, size_t char_cnt)
2690228753Smm{
2691302001Smm	uint64_t l;
2692228753Smm	const unsigned char *p = (const unsigned char *)_p;
2693302001Smm	unsigned char c, neg;
2694228753Smm
2695302001Smm	/* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
2696302001Smm	c = *p;
2697302001Smm	if (c & 0x40) {
2698302001Smm		neg = 0xff;
2699302001Smm		c |= 0x80;
2700302001Smm		l = ~ARCHIVE_LITERAL_ULL(0);
2701302001Smm	} else {
2702302001Smm		neg = 0;
2703302001Smm		c &= 0x7f;
2704302001Smm		l = 0;
2705302001Smm	}
2706228753Smm
2707302001Smm	/* If more than 8 bytes, check that we can ignore
2708302001Smm	 * high-order bits without overflow. */
2709302001Smm	while (char_cnt > sizeof(int64_t)) {
2710302001Smm		--char_cnt;
2711302001Smm		if (c != neg)
2712302001Smm			return neg ? INT64_MIN : INT64_MAX;
2713302001Smm		c = *++p;
2714302001Smm	}
2715302001Smm
2716302001Smm	/* c is first byte that fits; if sign mismatch, return overflow */
2717302001Smm	if ((c ^ neg) & 0x80) {
2718302001Smm		return neg ? INT64_MIN : INT64_MAX;
2719302001Smm	}
2720302001Smm
2721302001Smm	/* Accumulate remaining bytes. */
2722228753Smm	while (--char_cnt > 0) {
2723302001Smm		l = (l << 8) | c;
2724302001Smm		c = *++p;
2725228753Smm	}
2726302001Smm	l = (l << 8) | c;
2727302001Smm	/* Return signed twos-complement value. */
2728302001Smm	return (int64_t)(l);
2729228753Smm}
2730228753Smm
2731228753Smm/*
2732228753Smm * Returns length of line (including trailing newline)
2733228753Smm * or negative on error.  'start' argument is updated to
2734228753Smm * point to first character of line.  This avoids copying
2735228753Smm * when possible.
2736228753Smm */
2737228753Smmstatic ssize_t
2738228753Smmreadline(struct archive_read *a, struct tar *tar, const char **start,
2739232153Smm    ssize_t limit, size_t *unconsumed)
2740228753Smm{
2741228753Smm	ssize_t bytes_read;
2742228753Smm	ssize_t total_size = 0;
2743228753Smm	const void *t;
2744228753Smm	const char *s;
2745228753Smm	void *p;
2746228753Smm
2747232153Smm	tar_flush_unconsumed(a, unconsumed);
2748232153Smm
2749228753Smm	t = __archive_read_ahead(a, 1, &bytes_read);
2750228753Smm	if (bytes_read <= 0)
2751228753Smm		return (ARCHIVE_FATAL);
2752228753Smm	s = t;  /* Start of line? */
2753228753Smm	p = memchr(t, '\n', bytes_read);
2754228753Smm	/* If we found '\n' in the read buffer, return pointer to that. */
2755228753Smm	if (p != NULL) {
2756228753Smm		bytes_read = 1 + ((const char *)p) - s;
2757228753Smm		if (bytes_read > limit) {
2758228753Smm			archive_set_error(&a->archive,
2759228753Smm			    ARCHIVE_ERRNO_FILE_FORMAT,
2760228753Smm			    "Line too long");
2761228753Smm			return (ARCHIVE_FATAL);
2762228753Smm		}
2763232153Smm		*unconsumed = bytes_read;
2764228753Smm		*start = s;
2765228753Smm		return (bytes_read);
2766228753Smm	}
2767232153Smm	*unconsumed = bytes_read;
2768228753Smm	/* Otherwise, we need to accumulate in a line buffer. */
2769228753Smm	for (;;) {
2770228753Smm		if (total_size + bytes_read > limit) {
2771228753Smm			archive_set_error(&a->archive,
2772228753Smm			    ARCHIVE_ERRNO_FILE_FORMAT,
2773228753Smm			    "Line too long");
2774228753Smm			return (ARCHIVE_FATAL);
2775228753Smm		}
2776228753Smm		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2777228753Smm			archive_set_error(&a->archive, ENOMEM,
2778228753Smm			    "Can't allocate working buffer");
2779228753Smm			return (ARCHIVE_FATAL);
2780228753Smm		}
2781228753Smm		memcpy(tar->line.s + total_size, t, bytes_read);
2782232153Smm		tar_flush_unconsumed(a, unconsumed);
2783228753Smm		total_size += bytes_read;
2784228753Smm		/* If we found '\n', clean up and return. */
2785228753Smm		if (p != NULL) {
2786228753Smm			*start = tar->line.s;
2787228753Smm			return (total_size);
2788228753Smm		}
2789228753Smm		/* Read some more. */
2790228753Smm		t = __archive_read_ahead(a, 1, &bytes_read);
2791228753Smm		if (bytes_read <= 0)
2792228753Smm			return (ARCHIVE_FATAL);
2793228753Smm		s = t;  /* Start of line? */
2794228753Smm		p = memchr(t, '\n', bytes_read);
2795228753Smm		/* If we found '\n', trim the read. */
2796228753Smm		if (p != NULL) {
2797228753Smm			bytes_read = 1 + ((const char *)p) - s;
2798228753Smm		}
2799232153Smm		*unconsumed = bytes_read;
2800228753Smm	}
2801228753Smm}
2802228753Smm
2803228753Smm/*
2804228753Smm * base64_decode - Base64 decode
2805228753Smm *
2806228753Smm * This accepts most variations of base-64 encoding, including:
2807228753Smm *    * with or without line breaks
2808228753Smm *    * with or without the final group padded with '=' or '_' characters
2809228753Smm * (The most economical Base-64 variant does not pad the last group and
2810228753Smm * omits line breaks; RFC1341 used for MIME requires both.)
2811228753Smm */
2812228753Smmstatic char *
2813228753Smmbase64_decode(const char *s, size_t len, size_t *out_len)
2814228753Smm{
2815228753Smm	static const unsigned char digits[64] = {
2816228753Smm		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2817228753Smm		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2818228753Smm		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2819228753Smm		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2820228753Smm		'4','5','6','7','8','9','+','/' };
2821228753Smm	static unsigned char decode_table[128];
2822228753Smm	char *out, *d;
2823228753Smm	const unsigned char *src = (const unsigned char *)s;
2824228753Smm
2825228753Smm	/* If the decode table is not yet initialized, prepare it. */
2826228753Smm	if (decode_table[digits[1]] != 1) {
2827228753Smm		unsigned i;
2828228753Smm		memset(decode_table, 0xff, sizeof(decode_table));
2829228753Smm		for (i = 0; i < sizeof(digits); i++)
2830228753Smm			decode_table[digits[i]] = i;
2831228753Smm	}
2832228753Smm
2833228753Smm	/* Allocate enough space to hold the entire output. */
2834228753Smm	/* Note that we may not use all of this... */
2835228753Smm	out = (char *)malloc(len - len / 4 + 1);
2836228753Smm	if (out == NULL) {
2837228753Smm		*out_len = 0;
2838228753Smm		return (NULL);
2839228753Smm	}
2840228753Smm	d = out;
2841228753Smm
2842228753Smm	while (len > 0) {
2843228753Smm		/* Collect the next group of (up to) four characters. */
2844228753Smm		int v = 0;
2845228753Smm		int group_size = 0;
2846228753Smm		while (group_size < 4 && len > 0) {
2847228753Smm			/* '=' or '_' padding indicates final group. */
2848228753Smm			if (*src == '=' || *src == '_') {
2849228753Smm				len = 0;
2850228753Smm				break;
2851228753Smm			}
2852228753Smm			/* Skip illegal characters (including line breaks) */
2853228753Smm			if (*src > 127 || *src < 32
2854228753Smm			    || decode_table[*src] == 0xff) {
2855228753Smm				len--;
2856228753Smm				src++;
2857228753Smm				continue;
2858228753Smm			}
2859228753Smm			v <<= 6;
2860228753Smm			v |= decode_table[*src++];
2861228753Smm			len --;
2862228753Smm			group_size++;
2863228753Smm		}
2864228753Smm		/* Align a short group properly. */
2865228753Smm		v <<= 6 * (4 - group_size);
2866228753Smm		/* Unpack the group we just collected. */
2867228753Smm		switch (group_size) {
2868228753Smm		case 4: d[2] = v & 0xff;
2869228753Smm			/* FALLTHROUGH */
2870228753Smm		case 3: d[1] = (v >> 8) & 0xff;
2871228753Smm			/* FALLTHROUGH */
2872228753Smm		case 2: d[0] = (v >> 16) & 0xff;
2873228753Smm			break;
2874228753Smm		case 1: /* this is invalid! */
2875228753Smm			break;
2876228753Smm		}
2877228753Smm		d += group_size * 3 / 4;
2878228753Smm	}
2879228753Smm
2880228753Smm	*out_len = d - out;
2881228753Smm	return (out);
2882228753Smm}
2883228753Smm
2884228753Smmstatic char *
2885228753Smmurl_decode(const char *in)
2886228753Smm{
2887228753Smm	char *out, *d;
2888228753Smm	const char *s;
2889228753Smm
2890228753Smm	out = (char *)malloc(strlen(in) + 1);
2891228753Smm	if (out == NULL)
2892228753Smm		return (NULL);
2893228753Smm	for (s = in, d = out; *s != '\0'; ) {
2894228753Smm		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2895228753Smm			/* Try to convert % escape */
2896228753Smm			int digit1 = tohex(s[1]);
2897228753Smm			int digit2 = tohex(s[2]);
2898228753Smm			if (digit1 >= 0 && digit2 >= 0) {
2899228753Smm				/* Looks good, consume three chars */
2900228753Smm				s += 3;
2901228753Smm				/* Convert output */
2902228753Smm				*d++ = ((digit1 << 4) | digit2);
2903228753Smm				continue;
2904228753Smm			}
2905228753Smm			/* Else fall through and treat '%' as normal char */
2906228753Smm		}
2907228753Smm		*d++ = *s++;
2908228753Smm	}
2909228753Smm	*d = '\0';
2910228753Smm	return (out);
2911228753Smm}
2912228753Smm
2913228753Smmstatic int
2914228753Smmtohex(int c)
2915228753Smm{
2916228753Smm	if (c >= '0' && c <= '9')
2917228753Smm		return (c - '0');
2918228753Smm	else if (c >= 'A' && c <= 'F')
2919228753Smm		return (c - 'A' + 10);
2920228753Smm	else if (c >= 'a' && c <= 'f')
2921228753Smm		return (c - 'a' + 10);
2922228753Smm	else
2923228753Smm		return (-1);
2924228753Smm}
2925