archive_read_support_format_tar.c revision 306941
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "archive_platform.h"
28__FBSDID("$FreeBSD: releng/10.1/contrib/libarchive/libarchive/archive_read_support_format_tar.c 306941 2016-10-10 07:18:54Z delphij $");
29
30#ifdef HAVE_ERRNO_H
31#include <errno.h>
32#endif
33#include <stddef.h>
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_acl_private.h" /* For ACL parsing routines. */
43#include "archive_entry.h"
44#include "archive_entry_locale.h"
45#include "archive_private.h"
46#include "archive_read_private.h"
47
48#define tar_min(a,b) ((a) < (b) ? (a) : (b))
49
50/*
51 * Layout of POSIX 'ustar' tar header.
52 */
53struct archive_entry_header_ustar {
54	char	name[100];
55	char	mode[8];
56	char	uid[8];
57	char	gid[8];
58	char	size[12];
59	char	mtime[12];
60	char	checksum[8];
61	char	typeflag[1];
62	char	linkname[100];	/* "old format" header ends here */
63	char	magic[6];	/* For POSIX: "ustar\0" */
64	char	version[2];	/* For POSIX: "00" */
65	char	uname[32];
66	char	gname[32];
67	char	rdevmajor[8];
68	char	rdevminor[8];
69	char	prefix[155];
70};
71
72/*
73 * Structure of GNU tar header
74 */
75struct gnu_sparse {
76	char	offset[12];
77	char	numbytes[12];
78};
79
80struct archive_entry_header_gnutar {
81	char	name[100];
82	char	mode[8];
83	char	uid[8];
84	char	gid[8];
85	char	size[12];
86	char	mtime[12];
87	char	checksum[8];
88	char	typeflag[1];
89	char	linkname[100];
90	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
91	char	uname[32];
92	char	gname[32];
93	char	rdevmajor[8];
94	char	rdevminor[8];
95	char	atime[12];
96	char	ctime[12];
97	char	offset[12];
98	char	longnames[4];
99	char	unused[1];
100	struct gnu_sparse sparse[4];
101	char	isextended[1];
102	char	realsize[12];
103	/*
104	 * Old GNU format doesn't use POSIX 'prefix' field; they use
105	 * the 'L' (longname) entry instead.
106	 */
107};
108
109/*
110 * Data specific to this format.
111 */
112struct sparse_block {
113	struct sparse_block	*next;
114	int64_t	offset;
115	int64_t	remaining;
116	int hole;
117};
118
119struct tar {
120	struct archive_string	 acl_text;
121	struct archive_string	 entry_pathname;
122	/* For "GNU.sparse.name" and other similar path extensions. */
123	struct archive_string	 entry_pathname_override;
124	struct archive_string	 entry_linkpath;
125	struct archive_string	 entry_uname;
126	struct archive_string	 entry_gname;
127	struct archive_string	 longlink;
128	struct archive_string	 longname;
129	struct archive_string	 pax_header;
130	struct archive_string	 pax_global;
131	struct archive_string	 line;
132	int			 pax_hdrcharset_binary;
133	int			 header_recursion_depth;
134	int64_t			 entry_bytes_remaining;
135	int64_t			 entry_offset;
136	int64_t			 entry_padding;
137	int64_t 		 entry_bytes_unconsumed;
138	int64_t			 realsize;
139	int			 sparse_allowed;
140	struct sparse_block	*sparse_list;
141	struct sparse_block	*sparse_last;
142	int64_t			 sparse_offset;
143	int64_t			 sparse_numbytes;
144	int			 sparse_gnu_major;
145	int			 sparse_gnu_minor;
146	char			 sparse_gnu_pending;
147
148	struct archive_string	 localname;
149	struct archive_string_conv *opt_sconv;
150	struct archive_string_conv *sconv;
151	struct archive_string_conv *sconv_acl;
152	struct archive_string_conv *sconv_default;
153	int			 init_default_conversion;
154	int			 compat_2x;
155};
156
157static int	archive_block_is_null(const char *p);
158static char	*base64_decode(const char *, size_t, size_t *);
159static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
160		    int64_t offset, int64_t remaining);
161
162static void	gnu_clear_sparse_list(struct tar *);
163static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
164		    const struct archive_entry_header_gnutar *header, size_t *);
165static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
166		    const struct gnu_sparse *sparse, int length);
167static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
168		    const char *);
169static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
170			size_t *);
171static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
172		    struct archive_entry *, const void *, size_t *);
173static int	header_common(struct archive_read *,  struct tar *,
174		    struct archive_entry *, const void *);
175static int	header_old_tar(struct archive_read *, struct tar *,
176		    struct archive_entry *, const void *);
177static int	header_pax_extensions(struct archive_read *, struct tar *,
178		    struct archive_entry *, const void *, size_t *);
179static int	header_pax_global(struct archive_read *, struct tar *,
180		    struct archive_entry *, const void *h, size_t *);
181static int	header_longlink(struct archive_read *, struct tar *,
182		    struct archive_entry *, const void *h, size_t *);
183static int	header_longname(struct archive_read *, struct tar *,
184		    struct archive_entry *, const void *h, size_t *);
185static int	read_mac_metadata_blob(struct archive_read *, struct tar *,
186		    struct archive_entry *, const void *h, size_t *);
187static int	header_volume(struct archive_read *, struct tar *,
188		    struct archive_entry *, const void *h, size_t *);
189static int	header_ustar(struct archive_read *, struct tar *,
190		    struct archive_entry *, const void *h);
191static int	header_gnutar(struct archive_read *, struct tar *,
192		    struct archive_entry *, const void *h, size_t *);
193static int	archive_read_format_tar_bid(struct archive_read *, int);
194static int	archive_read_format_tar_options(struct archive_read *,
195		    const char *, const char *);
196static int	archive_read_format_tar_cleanup(struct archive_read *);
197static int	archive_read_format_tar_read_data(struct archive_read *a,
198		    const void **buff, size_t *size, int64_t *offset);
199static int	archive_read_format_tar_skip(struct archive_read *a);
200static int	archive_read_format_tar_read_header(struct archive_read *,
201		    struct archive_entry *);
202static int	checksum(struct archive_read *, const void *);
203static int 	pax_attribute(struct archive_read *, struct tar *,
204		    struct archive_entry *, char *key, char *value);
205static int 	pax_header(struct archive_read *, struct tar *,
206		    struct archive_entry *, char *attr);
207static void	pax_time(const char *, int64_t *sec, long *nanos);
208static ssize_t	readline(struct archive_read *, struct tar *, const char **,
209		    ssize_t limit, size_t *);
210static int	read_body_to_string(struct archive_read *, struct tar *,
211		    struct archive_string *, const void *h, size_t *);
212static int	solaris_sparse_parse(struct archive_read *, struct tar *,
213		    struct archive_entry *, const char *);
214static int64_t	tar_atol(const char *, size_t);
215static int64_t	tar_atol10(const char *, size_t);
216static int64_t	tar_atol256(const char *, size_t);
217static int64_t	tar_atol8(const char *, size_t);
218static int	tar_read_header(struct archive_read *, struct tar *,
219		    struct archive_entry *, size_t *);
220static int	tohex(int c);
221static char	*url_decode(const char *);
222static void	tar_flush_unconsumed(struct archive_read *, size_t *);
223
224
225int
226archive_read_support_format_gnutar(struct archive *a)
227{
228	archive_check_magic(a, ARCHIVE_READ_MAGIC,
229	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
230	return (archive_read_support_format_tar(a));
231}
232
233
234int
235archive_read_support_format_tar(struct archive *_a)
236{
237	struct archive_read *a = (struct archive_read *)_a;
238	struct tar *tar;
239	int r;
240
241	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
242	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
243
244	tar = (struct tar *)calloc(1, sizeof(*tar));
245	if (tar == NULL) {
246		archive_set_error(&a->archive, ENOMEM,
247		    "Can't allocate tar data");
248		return (ARCHIVE_FATAL);
249	}
250
251	r = __archive_read_register_format(a, tar, "tar",
252	    archive_read_format_tar_bid,
253	    archive_read_format_tar_options,
254	    archive_read_format_tar_read_header,
255	    archive_read_format_tar_read_data,
256	    archive_read_format_tar_skip,
257	    NULL,
258	    archive_read_format_tar_cleanup);
259
260	if (r != ARCHIVE_OK)
261		free(tar);
262	return (ARCHIVE_OK);
263}
264
265static int
266archive_read_format_tar_cleanup(struct archive_read *a)
267{
268	struct tar *tar;
269
270	tar = (struct tar *)(a->format->data);
271	gnu_clear_sparse_list(tar);
272	archive_string_free(&tar->acl_text);
273	archive_string_free(&tar->entry_pathname);
274	archive_string_free(&tar->entry_pathname_override);
275	archive_string_free(&tar->entry_linkpath);
276	archive_string_free(&tar->entry_uname);
277	archive_string_free(&tar->entry_gname);
278	archive_string_free(&tar->line);
279	archive_string_free(&tar->pax_global);
280	archive_string_free(&tar->pax_header);
281	archive_string_free(&tar->longname);
282	archive_string_free(&tar->longlink);
283	archive_string_free(&tar->localname);
284	free(tar);
285	(a->format->data) = NULL;
286	return (ARCHIVE_OK);
287}
288
289
290static int
291archive_read_format_tar_bid(struct archive_read *a, int best_bid)
292{
293	int bid;
294	const char *h;
295	const struct archive_entry_header_ustar *header;
296
297	(void)best_bid; /* UNUSED */
298
299	bid = 0;
300
301	/* Now let's look at the actual header and see if it matches. */
302	h = __archive_read_ahead(a, 512, NULL);
303	if (h == NULL)
304		return (-1);
305
306	/* If it's an end-of-archive mark, we can handle it. */
307	if (h[0] == 0 && archive_block_is_null(h)) {
308		/*
309		 * Usually, I bid the number of bits verified, but
310		 * in this case, 4096 seems excessive so I picked 10 as
311		 * an arbitrary but reasonable-seeming value.
312		 */
313		return (10);
314	}
315
316	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
317	if (!checksum(a, h))
318		return (0);
319	bid += 48;  /* Checksum is usually 6 octal digits. */
320
321	header = (const struct archive_entry_header_ustar *)h;
322
323	/* Recognize POSIX formats. */
324	if ((memcmp(header->magic, "ustar\0", 6) == 0)
325	    && (memcmp(header->version, "00", 2) == 0))
326		bid += 56;
327
328	/* Recognize GNU tar format. */
329	if ((memcmp(header->magic, "ustar ", 6) == 0)
330	    && (memcmp(header->version, " \0", 2) == 0))
331		bid += 56;
332
333	/* Type flag must be null, digit or A-Z, a-z. */
334	if (header->typeflag[0] != 0 &&
335	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
336	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
337	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
338		return (0);
339	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
340
341	/* Sanity check: Look at first byte of mode field. */
342	switch (255 & (unsigned)header->mode[0]) {
343	case 0: case 255:
344		/* Base-256 value: No further verification possible! */
345		break;
346	case ' ': /* Not recommended, but not illegal, either. */
347		break;
348	case '0': case '1': case '2': case '3':
349	case '4': case '5': case '6': case '7':
350		/* Octal Value. */
351		/* TODO: Check format of remainder of this field. */
352		break;
353	default:
354		/* Not a valid mode; bail out here. */
355		return (0);
356	}
357	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
358
359	return (bid);
360}
361
362static int
363archive_read_format_tar_options(struct archive_read *a,
364    const char *key, const char *val)
365{
366	struct tar *tar;
367	int ret = ARCHIVE_FAILED;
368
369	tar = (struct tar *)(a->format->data);
370	if (strcmp(key, "compat-2x")  == 0) {
371		/* Handle UTF-8 filnames as libarchive 2.x */
372		tar->compat_2x = (val != NULL)?1:0;
373		tar->init_default_conversion = tar->compat_2x;
374		return (ARCHIVE_OK);
375	} else if (strcmp(key, "hdrcharset")  == 0) {
376		if (val == NULL || val[0] == 0)
377			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
378			    "tar: hdrcharset option needs a character-set name");
379		else {
380			tar->opt_sconv =
381			    archive_string_conversion_from_charset(
382				&a->archive, val, 0);
383			if (tar->opt_sconv != NULL)
384				ret = ARCHIVE_OK;
385			else
386				ret = ARCHIVE_FATAL;
387		}
388		return (ret);
389	}
390
391	/* Note: The "warn" return is just to inform the options
392	 * supervisor that we didn't handle it.  It will generate
393	 * a suitable error if no one used this option. */
394	return (ARCHIVE_WARN);
395}
396
397/* utility function- this exists to centralize the logic of tracking
398 * how much unconsumed data we have floating around, and to consume
399 * anything outstanding since we're going to do read_aheads
400 */
401static void
402tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
403{
404	if (*unconsumed) {
405/*
406		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
407		 * this block of code is to poison claimed unconsumed space, ensuring
408		 * things break if it is in use still.
409		 * currently it WILL break things, so enable it only for debugging this issue
410		if (data) {
411			memset(data, 0xff, *unconsumed);
412		}
413*/
414		__archive_read_consume(a, *unconsumed);
415		*unconsumed = 0;
416	}
417}
418
419/*
420 * The function invoked by archive_read_next_header().  This
421 * just sets up a few things and then calls the internal
422 * tar_read_header() function below.
423 */
424static int
425archive_read_format_tar_read_header(struct archive_read *a,
426    struct archive_entry *entry)
427{
428	/*
429	 * When converting tar archives to cpio archives, it is
430	 * essential that each distinct file have a distinct inode
431	 * number.  To simplify this, we keep a static count here to
432	 * assign fake dev/inode numbers to each tar entry.  Note that
433	 * pax format archives may overwrite this with something more
434	 * useful.
435	 *
436	 * Ideally, we would track every file read from the archive so
437	 * that we could assign the same dev/ino pair to hardlinks,
438	 * but the memory required to store a complete lookup table is
439	 * probably not worthwhile just to support the relatively
440	 * obscure tar->cpio conversion case.
441	 */
442	static int default_inode;
443	static int default_dev;
444	struct tar *tar;
445	const char *p;
446	int r;
447	size_t l, unconsumed = 0;
448
449	/* Assign default device/inode values. */
450	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
451	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
452	/* Limit generated st_ino number to 16 bits. */
453	if (default_inode >= 0xffff) {
454		++default_dev;
455		default_inode = 0;
456	}
457
458	tar = (struct tar *)(a->format->data);
459	tar->entry_offset = 0;
460	gnu_clear_sparse_list(tar);
461	tar->realsize = -1; /* Mark this as "unset" */
462
463	/* Setup default string conversion. */
464	tar->sconv = tar->opt_sconv;
465	if (tar->sconv == NULL) {
466		if (!tar->init_default_conversion) {
467			tar->sconv_default =
468			    archive_string_default_conversion_for_read(&(a->archive));
469			tar->init_default_conversion = 1;
470		}
471		tar->sconv = tar->sconv_default;
472	}
473
474	r = tar_read_header(a, tar, entry, &unconsumed);
475
476	tar_flush_unconsumed(a, &unconsumed);
477
478	/*
479	 * "non-sparse" files are really just sparse files with
480	 * a single block.
481	 */
482	if (tar->sparse_list == NULL) {
483		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
484		    != ARCHIVE_OK)
485			return (ARCHIVE_FATAL);
486	} else {
487		struct sparse_block *sb;
488
489		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
490			if (!sb->hole)
491				archive_entry_sparse_add_entry(entry,
492				    sb->offset, sb->remaining);
493		}
494	}
495
496	if (r == ARCHIVE_OK) {
497		/*
498		 * "Regular" entry with trailing '/' is really
499		 * directory: This is needed for certain old tar
500		 * variants and even for some broken newer ones.
501		 */
502		const wchar_t *wp;
503		wp = archive_entry_pathname_w(entry);
504		if (wp != NULL) {
505			l = wcslen(wp);
506			if (archive_entry_filetype(entry) == AE_IFREG
507			    && wp[l-1] == L'/')
508				archive_entry_set_filetype(entry, AE_IFDIR);
509		} else {
510			p = archive_entry_pathname(entry);
511			if (p == NULL)
512				return (ARCHIVE_FAILED);
513			l = strlen(p);
514			if (archive_entry_filetype(entry) == AE_IFREG
515			    && p[l-1] == '/')
516				archive_entry_set_filetype(entry, AE_IFDIR);
517		}
518	}
519	return (r);
520}
521
522static int
523archive_read_format_tar_read_data(struct archive_read *a,
524    const void **buff, size_t *size, int64_t *offset)
525{
526	ssize_t bytes_read;
527	struct tar *tar;
528	struct sparse_block *p;
529
530	tar = (struct tar *)(a->format->data);
531
532	for (;;) {
533		/* Remove exhausted entries from sparse list. */
534		while (tar->sparse_list != NULL &&
535		    tar->sparse_list->remaining == 0) {
536			p = tar->sparse_list;
537			tar->sparse_list = p->next;
538			free(p);
539		}
540
541		if (tar->entry_bytes_unconsumed) {
542			__archive_read_consume(a, tar->entry_bytes_unconsumed);
543			tar->entry_bytes_unconsumed = 0;
544		}
545
546		/* If we're at end of file, return EOF. */
547		if (tar->sparse_list == NULL ||
548		    tar->entry_bytes_remaining == 0) {
549			if (__archive_read_consume(a, tar->entry_padding) < 0)
550				return (ARCHIVE_FATAL);
551			tar->entry_padding = 0;
552			*buff = NULL;
553			*size = 0;
554			*offset = tar->realsize;
555			return (ARCHIVE_EOF);
556		}
557
558		*buff = __archive_read_ahead(a, 1, &bytes_read);
559		if (bytes_read < 0)
560			return (ARCHIVE_FATAL);
561		if (*buff == NULL) {
562			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
563			    "Truncated tar archive");
564			return (ARCHIVE_FATAL);
565		}
566		if (bytes_read > tar->entry_bytes_remaining)
567			bytes_read = (ssize_t)tar->entry_bytes_remaining;
568		/* Don't read more than is available in the
569		 * current sparse block. */
570		if (tar->sparse_list->remaining < bytes_read)
571			bytes_read = (ssize_t)tar->sparse_list->remaining;
572		*size = bytes_read;
573		*offset = tar->sparse_list->offset;
574		tar->sparse_list->remaining -= bytes_read;
575		tar->sparse_list->offset += bytes_read;
576		tar->entry_bytes_remaining -= bytes_read;
577		tar->entry_bytes_unconsumed = bytes_read;
578
579		if (!tar->sparse_list->hole)
580			return (ARCHIVE_OK);
581		/* Current is hole data and skip this. */
582	}
583}
584
585static int
586archive_read_format_tar_skip(struct archive_read *a)
587{
588	int64_t bytes_skipped;
589	struct tar* tar;
590
591	tar = (struct tar *)(a->format->data);
592
593	bytes_skipped = __archive_read_consume(a,
594	    tar->entry_bytes_remaining + tar->entry_padding +
595	    tar->entry_bytes_unconsumed);
596	if (bytes_skipped < 0)
597		return (ARCHIVE_FATAL);
598
599	tar->entry_bytes_remaining = 0;
600	tar->entry_bytes_unconsumed = 0;
601	tar->entry_padding = 0;
602
603	/* Free the sparse list. */
604	gnu_clear_sparse_list(tar);
605
606	return (ARCHIVE_OK);
607}
608
609/*
610 * This function recursively interprets all of the headers associated
611 * with a single entry.
612 */
613static int
614tar_read_header(struct archive_read *a, struct tar *tar,
615    struct archive_entry *entry, size_t *unconsumed)
616{
617	ssize_t bytes;
618	int err;
619	const char *h;
620	const struct archive_entry_header_ustar *header;
621	const struct archive_entry_header_gnutar *gnuheader;
622
623	tar_flush_unconsumed(a, unconsumed);
624
625	/* Read 512-byte header record */
626	h = __archive_read_ahead(a, 512, &bytes);
627	if (bytes < 0)
628		return ((int)bytes);
629	if (bytes == 0) { /* EOF at a block boundary. */
630		/* Some writers do omit the block of nulls. <sigh> */
631		return (ARCHIVE_EOF);
632	}
633	if (bytes < 512) {  /* Short block at EOF; this is bad. */
634		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
635		    "Truncated tar archive");
636		return (ARCHIVE_FATAL);
637	}
638	*unconsumed = 512;
639
640	/* Check for end-of-archive mark. */
641	if (h[0] == 0 && archive_block_is_null(h)) {
642		/* Try to consume a second all-null record, as well. */
643		tar_flush_unconsumed(a, unconsumed);
644		h = __archive_read_ahead(a, 512, NULL);
645		if (h != NULL)
646			__archive_read_consume(a, 512);
647		archive_clear_error(&a->archive);
648		if (a->archive.archive_format_name == NULL) {
649			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
650			a->archive.archive_format_name = "tar";
651		}
652		return (ARCHIVE_EOF);
653	}
654
655	/*
656	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
657	 * then the client is likely to just retry.  This is a very
658	 * crude way to search for the next valid header!
659	 *
660	 * TODO: Improve this by implementing a real header scan.
661	 */
662	if (!checksum(a, h)) {
663		tar_flush_unconsumed(a, unconsumed);
664		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
665		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
666	}
667
668	if (++tar->header_recursion_depth > 32) {
669		tar_flush_unconsumed(a, unconsumed);
670		archive_set_error(&a->archive, EINVAL, "Too many special headers");
671		return (ARCHIVE_WARN);
672	}
673
674	/* Determine the format variant. */
675	header = (const struct archive_entry_header_ustar *)h;
676
677	switch(header->typeflag[0]) {
678	case 'A': /* Solaris tar ACL */
679		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
680		a->archive.archive_format_name = "Solaris tar";
681		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
682		break;
683	case 'g': /* POSIX-standard 'g' header. */
684		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
685		a->archive.archive_format_name = "POSIX pax interchange format";
686		err = header_pax_global(a, tar, entry, h, unconsumed);
687		break;
688	case 'K': /* Long link name (GNU tar, others) */
689		err = header_longlink(a, tar, entry, h, unconsumed);
690		break;
691	case 'L': /* Long filename (GNU tar, others) */
692		err = header_longname(a, tar, entry, h, unconsumed);
693		break;
694	case 'V': /* GNU volume header */
695		err = header_volume(a, tar, entry, h, unconsumed);
696		break;
697	case 'X': /* Used by SUN tar; same as 'x'. */
698		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
699		a->archive.archive_format_name =
700		    "POSIX pax interchange format (Sun variant)";
701		err = header_pax_extensions(a, tar, entry, h, unconsumed);
702		break;
703	case 'x': /* POSIX-standard 'x' header. */
704		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
705		a->archive.archive_format_name = "POSIX pax interchange format";
706		err = header_pax_extensions(a, tar, entry, h, unconsumed);
707		break;
708	default:
709		gnuheader = (const struct archive_entry_header_gnutar *)h;
710		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
711			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
712			a->archive.archive_format_name = "GNU tar format";
713			err = header_gnutar(a, tar, entry, h, unconsumed);
714		} else if (memcmp(header->magic, "ustar", 5) == 0) {
715			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
716				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
717				a->archive.archive_format_name = "POSIX ustar format";
718			}
719			err = header_ustar(a, tar, entry, h);
720		} else {
721			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
722			a->archive.archive_format_name = "tar (non-POSIX)";
723			err = header_old_tar(a, tar, entry, h);
724		}
725	}
726	if (err == ARCHIVE_FATAL)
727		return (err);
728
729	tar_flush_unconsumed(a, unconsumed);
730
731	h = NULL;
732	header = NULL;
733
734	--tar->header_recursion_depth;
735	/* Yuck.  Apple's design here ends up storing long pathname
736	 * extensions for both the AppleDouble extension entry and the
737	 * regular entry.
738	 */
739	/* TODO: Should this be disabled on non-Mac platforms? */
740	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
741	    tar->header_recursion_depth == 0) {
742		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
743		if (err2 < err)
744			err = err2;
745	}
746
747	/* We return warnings or success as-is.  Anything else is fatal. */
748	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
749		if (tar->sparse_gnu_pending) {
750			if (tar->sparse_gnu_major == 1 &&
751			    tar->sparse_gnu_minor == 0) {
752				ssize_t bytes_read;
753
754				tar->sparse_gnu_pending = 0;
755				/* Read initial sparse map. */
756				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
757				tar->entry_bytes_remaining -= bytes_read;
758				if (bytes_read < 0)
759					return ((int)bytes_read);
760			} else {
761				archive_set_error(&a->archive,
762				    ARCHIVE_ERRNO_MISC,
763				    "Unrecognized GNU sparse file format");
764				return (ARCHIVE_WARN);
765			}
766			tar->sparse_gnu_pending = 0;
767		}
768		return (err);
769	}
770	if (err == ARCHIVE_EOF)
771		/* EOF when recursively reading a header is bad. */
772		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
773	return (ARCHIVE_FATAL);
774}
775
776/*
777 * Return true if block checksum is correct.
778 */
779static int
780checksum(struct archive_read *a, const void *h)
781{
782	const unsigned char *bytes;
783	const struct archive_entry_header_ustar	*header;
784	int check, i, sum;
785
786	(void)a; /* UNUSED */
787	bytes = (const unsigned char *)h;
788	header = (const struct archive_entry_header_ustar *)h;
789
790	/*
791	 * Test the checksum.  Note that POSIX specifies _unsigned_
792	 * bytes for this calculation.
793	 */
794	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
795	check = 0;
796	for (i = 0; i < 148; i++)
797		check += (unsigned char)bytes[i];
798	for (; i < 156; i++)
799		check += 32;
800	for (; i < 512; i++)
801		check += (unsigned char)bytes[i];
802	if (sum == check)
803		return (1);
804
805	/*
806	 * Repeat test with _signed_ bytes, just in case this archive
807	 * was created by an old BSD, Solaris, or HP-UX tar with a
808	 * broken checksum calculation.
809	 */
810	check = 0;
811	for (i = 0; i < 148; i++)
812		check += (signed char)bytes[i];
813	for (; i < 156; i++)
814		check += 32;
815	for (; i < 512; i++)
816		check += (signed char)bytes[i];
817	if (sum == check)
818		return (1);
819
820	return (0);
821}
822
823/*
824 * Return true if this block contains only nulls.
825 */
826static int
827archive_block_is_null(const char *p)
828{
829	unsigned i;
830
831	for (i = 0; i < 512; i++)
832		if (*p++)
833			return (0);
834	return (1);
835}
836
837/*
838 * Interpret 'A' Solaris ACL header
839 */
840static int
841header_Solaris_ACL(struct archive_read *a, struct tar *tar,
842    struct archive_entry *entry, const void *h, size_t *unconsumed)
843{
844	const struct archive_entry_header_ustar *header;
845	size_t size;
846	int err;
847	int64_t type;
848	char *acl, *p;
849
850	/*
851	 * read_body_to_string adds a NUL terminator, but we need a little
852	 * more to make sure that we don't overrun acl_text later.
853	 */
854	header = (const struct archive_entry_header_ustar *)h;
855	size = (size_t)tar_atol(header->size, sizeof(header->size));
856	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
857	if (err != ARCHIVE_OK)
858		return (err);
859
860	/* Recursively read next header */
861	err = tar_read_header(a, tar, entry, unconsumed);
862	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
863		return (err);
864
865	/* TODO: Examine the first characters to see if this
866	 * is an AIX ACL descriptor.  We'll likely never support
867	 * them, but it would be polite to recognize and warn when
868	 * we do see them. */
869
870	/* Leading octal number indicates ACL type and number of entries. */
871	p = acl = tar->acl_text.s;
872	type = 0;
873	while (*p != '\0' && p < acl + size) {
874		if (*p < '0' || *p > '7') {
875			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
876			    "Malformed Solaris ACL attribute (invalid digit)");
877			return(ARCHIVE_WARN);
878		}
879		type <<= 3;
880		type += *p - '0';
881		if (type > 077777777) {
882			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
883			    "Malformed Solaris ACL attribute (count too large)");
884			return (ARCHIVE_WARN);
885		}
886		p++;
887	}
888	switch ((int)type & ~0777777) {
889	case 01000000:
890		/* POSIX.1e ACL */
891		break;
892	case 03000000:
893		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
894		    "Solaris NFSv4 ACLs not supported");
895		return (ARCHIVE_WARN);
896	default:
897		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
898		    "Malformed Solaris ACL attribute (unsupported type %o)",
899		    (int)type);
900		return (ARCHIVE_WARN);
901	}
902	p++;
903
904	if (p >= acl + size) {
905		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
906		    "Malformed Solaris ACL attribute (body overflow)");
907		return(ARCHIVE_WARN);
908	}
909
910	/* ACL text is null-terminated; find the end. */
911	size -= (p - acl);
912	acl = p;
913
914	while (*p != '\0' && p < acl + size)
915		p++;
916
917	if (tar->sconv_acl == NULL) {
918		tar->sconv_acl = archive_string_conversion_from_charset(
919		    &(a->archive), "UTF-8", 1);
920		if (tar->sconv_acl == NULL)
921			return (ARCHIVE_FATAL);
922	}
923	archive_strncpy(&(tar->localname), acl, p - acl);
924	err = archive_acl_parse_l(archive_entry_acl(entry),
925	    tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl);
926	if (err != ARCHIVE_OK) {
927		if (errno == ENOMEM) {
928			archive_set_error(&a->archive, ENOMEM,
929			    "Can't allocate memory for ACL");
930		} else
931			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
932			    "Malformed Solaris ACL attribute (unparsable)");
933	}
934	return (err);
935}
936
937/*
938 * Interpret 'K' long linkname header.
939 */
940static int
941header_longlink(struct archive_read *a, struct tar *tar,
942    struct archive_entry *entry, const void *h, size_t *unconsumed)
943{
944	int err;
945
946	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
947	if (err != ARCHIVE_OK)
948		return (err);
949	err = tar_read_header(a, tar, entry, unconsumed);
950	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
951		return (err);
952	/* Set symlink if symlink already set, else hardlink. */
953	archive_entry_copy_link(entry, tar->longlink.s);
954	return (ARCHIVE_OK);
955}
956
957static int
958set_conversion_failed_error(struct archive_read *a,
959    struct archive_string_conv *sconv, const char *name)
960{
961	if (errno == ENOMEM) {
962		archive_set_error(&a->archive, ENOMEM,
963		    "Can't allocate memory for %s", name);
964		return (ARCHIVE_FATAL);
965	}
966	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967	    "%s can't be converted from %s to current locale.",
968	    name, archive_string_conversion_charset_name(sconv));
969	return (ARCHIVE_WARN);
970}
971
972/*
973 * Interpret 'L' long filename header.
974 */
975static int
976header_longname(struct archive_read *a, struct tar *tar,
977    struct archive_entry *entry, const void *h, size_t *unconsumed)
978{
979	int err;
980
981	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
982	if (err != ARCHIVE_OK)
983		return (err);
984	/* Read and parse "real" header, then override name. */
985	err = tar_read_header(a, tar, entry, unconsumed);
986	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
987		return (err);
988	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
989	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
990		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
991	return (err);
992}
993
994
995/*
996 * Interpret 'V' GNU tar volume header.
997 */
998static int
999header_volume(struct archive_read *a, struct tar *tar,
1000    struct archive_entry *entry, const void *h, size_t *unconsumed)
1001{
1002	(void)h;
1003
1004	/* Just skip this and read the next header. */
1005	return (tar_read_header(a, tar, entry, unconsumed));
1006}
1007
1008/*
1009 * Read body of an archive entry into an archive_string object.
1010 */
1011static int
1012read_body_to_string(struct archive_read *a, struct tar *tar,
1013    struct archive_string *as, const void *h, size_t *unconsumed)
1014{
1015	int64_t size;
1016	const struct archive_entry_header_ustar *header;
1017	const void *src;
1018
1019	(void)tar; /* UNUSED */
1020	header = (const struct archive_entry_header_ustar *)h;
1021	size  = tar_atol(header->size, sizeof(header->size));
1022	if ((size > 1048576) || (size < 0)) {
1023		archive_set_error(&a->archive, EINVAL,
1024		    "Special header too large");
1025		return (ARCHIVE_FATAL);
1026	}
1027
1028	/* Fail if we can't make our buffer big enough. */
1029	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1030		archive_set_error(&a->archive, ENOMEM,
1031		    "No memory");
1032		return (ARCHIVE_FATAL);
1033	}
1034
1035	tar_flush_unconsumed(a, unconsumed);
1036
1037	/* Read the body into the string. */
1038	*unconsumed = (size_t)((size + 511) & ~ 511);
1039	src = __archive_read_ahead(a, *unconsumed, NULL);
1040	if (src == NULL) {
1041		*unconsumed = 0;
1042		return (ARCHIVE_FATAL);
1043	}
1044	memcpy(as->s, src, (size_t)size);
1045	as->s[size] = '\0';
1046	as->length = (size_t)size;
1047	return (ARCHIVE_OK);
1048}
1049
1050/*
1051 * Parse out common header elements.
1052 *
1053 * This would be the same as header_old_tar, except that the
1054 * filename is handled slightly differently for old and POSIX
1055 * entries  (POSIX entries support a 'prefix').  This factoring
1056 * allows header_old_tar and header_ustar
1057 * to handle filenames differently, while still putting most of the
1058 * common parsing into one place.
1059 */
1060static int
1061header_common(struct archive_read *a, struct tar *tar,
1062    struct archive_entry *entry, const void *h)
1063{
1064	const struct archive_entry_header_ustar	*header;
1065	char	tartype;
1066	int     err = ARCHIVE_OK;
1067
1068	header = (const struct archive_entry_header_ustar *)h;
1069	if (header->linkname[0])
1070		archive_strncpy(&(tar->entry_linkpath),
1071		    header->linkname, sizeof(header->linkname));
1072	else
1073		archive_string_empty(&(tar->entry_linkpath));
1074
1075	/* Parse out the numeric fields (all are octal) */
1076	archive_entry_set_mode(entry,
1077		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
1078	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1079	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1080	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1081	if (tar->entry_bytes_remaining < 0) {
1082		tar->entry_bytes_remaining = 0;
1083		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1084		    "Tar entry has negative size?");
1085		err = ARCHIVE_WARN;
1086	}
1087	tar->realsize = tar->entry_bytes_remaining;
1088	archive_entry_set_size(entry, tar->entry_bytes_remaining);
1089	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1090
1091	/* Handle the tar type flag appropriately. */
1092	tartype = header->typeflag[0];
1093
1094	switch (tartype) {
1095	case '1': /* Hard link */
1096		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
1097		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1098			err = set_conversion_failed_error(a, tar->sconv,
1099			    "Linkname");
1100			if (err == ARCHIVE_FATAL)
1101				return (err);
1102		}
1103		/*
1104		 * The following may seem odd, but: Technically, tar
1105		 * does not store the file type for a "hard link"
1106		 * entry, only the fact that it is a hard link.  So, I
1107		 * leave the type zero normally.  But, pax interchange
1108		 * format allows hard links to have data, which
1109		 * implies that the underlying entry is a regular
1110		 * file.
1111		 */
1112		if (archive_entry_size(entry) > 0)
1113			archive_entry_set_filetype(entry, AE_IFREG);
1114
1115		/*
1116		 * A tricky point: Traditionally, tar readers have
1117		 * ignored the size field when reading hardlink
1118		 * entries, and some writers put non-zero sizes even
1119		 * though the body is empty.  POSIX blessed this
1120		 * convention in the 1988 standard, but broke with
1121		 * this tradition in 2001 by permitting hardlink
1122		 * entries to store valid bodies in pax interchange
1123		 * format, but not in ustar format.  Since there is no
1124		 * hard and fast way to distinguish pax interchange
1125		 * from earlier archives (the 'x' and 'g' entries are
1126		 * optional, after all), we need a heuristic.
1127		 */
1128		if (archive_entry_size(entry) == 0) {
1129			/* If the size is already zero, we're done. */
1130		}  else if (a->archive.archive_format
1131		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1132			/* Definitely pax extended; must obey hardlink size. */
1133		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1134		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1135		{
1136			/* Old-style or GNU tar: we must ignore the size. */
1137			archive_entry_set_size(entry, 0);
1138			tar->entry_bytes_remaining = 0;
1139		} else if (archive_read_format_tar_bid(a, 50) > 50) {
1140			/*
1141			 * We don't know if it's pax: If the bid
1142			 * function sees a valid ustar header
1143			 * immediately following, then let's ignore
1144			 * the hardlink size.
1145			 */
1146			archive_entry_set_size(entry, 0);
1147			tar->entry_bytes_remaining = 0;
1148		}
1149		/*
1150		 * TODO: There are still two cases I'd like to handle:
1151		 *   = a ustar non-pax archive with a hardlink entry at
1152		 *     end-of-archive.  (Look for block of nulls following?)
1153		 *   = a pax archive that has not seen any pax headers
1154		 *     and has an entry which is a hardlink entry storing
1155		 *     a body containing an uncompressed tar archive.
1156		 * The first is worth addressing; I don't see any reliable
1157		 * way to deal with the second possibility.
1158		 */
1159		break;
1160	case '2': /* Symlink */
1161		archive_entry_set_filetype(entry, AE_IFLNK);
1162		archive_entry_set_size(entry, 0);
1163		tar->entry_bytes_remaining = 0;
1164		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
1165		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1166			err = set_conversion_failed_error(a, tar->sconv,
1167			    "Linkname");
1168			if (err == ARCHIVE_FATAL)
1169				return (err);
1170		}
1171		break;
1172	case '3': /* Character device */
1173		archive_entry_set_filetype(entry, AE_IFCHR);
1174		archive_entry_set_size(entry, 0);
1175		tar->entry_bytes_remaining = 0;
1176		break;
1177	case '4': /* Block device */
1178		archive_entry_set_filetype(entry, AE_IFBLK);
1179		archive_entry_set_size(entry, 0);
1180		tar->entry_bytes_remaining = 0;
1181		break;
1182	case '5': /* Dir */
1183		archive_entry_set_filetype(entry, AE_IFDIR);
1184		archive_entry_set_size(entry, 0);
1185		tar->entry_bytes_remaining = 0;
1186		break;
1187	case '6': /* FIFO device */
1188		archive_entry_set_filetype(entry, AE_IFIFO);
1189		archive_entry_set_size(entry, 0);
1190		tar->entry_bytes_remaining = 0;
1191		break;
1192	case 'D': /* GNU incremental directory type */
1193		/*
1194		 * No special handling is actually required here.
1195		 * It might be nice someday to preprocess the file list and
1196		 * provide it to the client, though.
1197		 */
1198		archive_entry_set_filetype(entry, AE_IFDIR);
1199		break;
1200	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1201		/*
1202		 * As far as I can tell, this is just like a regular file
1203		 * entry, except that the contents should be _appended_ to
1204		 * the indicated file at the indicated offset.  This may
1205		 * require some API work to fully support.
1206		 */
1207		break;
1208	case 'N': /* Old GNU "long filename" entry. */
1209		/* The body of this entry is a script for renaming
1210		 * previously-extracted entries.  Ugh.  It will never
1211		 * be supported by libarchive. */
1212		archive_entry_set_filetype(entry, AE_IFREG);
1213		break;
1214	case 'S': /* GNU sparse files */
1215		/*
1216		 * Sparse files are really just regular files with
1217		 * sparse information in the extended area.
1218		 */
1219		/* FALLTHROUGH */
1220	case '0':
1221		/*
1222		 * Enable sparse file "read" support only for regular
1223		 * files and explicit GNU sparse files.  However, we
1224		 * don't allow non-standard file types to be sparse.
1225		 */
1226		tar->sparse_allowed = 1;
1227		/* FALLTHROUGH */
1228	default: /* Regular file  and non-standard types */
1229		/*
1230		 * Per POSIX: non-recognized types should always be
1231		 * treated as regular files.
1232		 */
1233		archive_entry_set_filetype(entry, AE_IFREG);
1234		break;
1235	}
1236	return (err);
1237}
1238
1239/*
1240 * Parse out header elements for "old-style" tar archives.
1241 */
1242static int
1243header_old_tar(struct archive_read *a, struct tar *tar,
1244    struct archive_entry *entry, const void *h)
1245{
1246	const struct archive_entry_header_ustar	*header;
1247	int err = ARCHIVE_OK, err2;
1248
1249	/* Copy filename over (to ensure null termination). */
1250	header = (const struct archive_entry_header_ustar *)h;
1251	if (archive_entry_copy_pathname_l(entry,
1252	    header->name, sizeof(header->name), tar->sconv) != 0) {
1253		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1254		if (err == ARCHIVE_FATAL)
1255			return (err);
1256	}
1257
1258	/* Grab rest of common fields */
1259	err2 = header_common(a, tar, entry, h);
1260	if (err > err2)
1261		err = err2;
1262
1263	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1264	return (err);
1265}
1266
1267/*
1268 * Read a Mac AppleDouble-encoded blob of file metadata,
1269 * if there is one.
1270 */
1271static int
1272read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
1273    struct archive_entry *entry, const void *h, size_t *unconsumed)
1274{
1275	int64_t size;
1276	const void *data;
1277	const char *p, *name;
1278	const wchar_t *wp, *wname;
1279
1280	(void)h; /* UNUSED */
1281
1282	wname = wp = archive_entry_pathname_w(entry);
1283	if (wp != NULL) {
1284		/* Find the last path element. */
1285		for (; *wp != L'\0'; ++wp) {
1286			if (wp[0] == '/' && wp[1] != L'\0')
1287				wname = wp + 1;
1288		}
1289		/*
1290		 * If last path element starts with "._", then
1291		 * this is a Mac extension.
1292		 */
1293		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
1294			return ARCHIVE_OK;
1295	} else {
1296		/* Find the last path element. */
1297		name = p = archive_entry_pathname(entry);
1298		if (p == NULL)
1299			return (ARCHIVE_FAILED);
1300		for (; *p != '\0'; ++p) {
1301			if (p[0] == '/' && p[1] != '\0')
1302				name = p + 1;
1303		}
1304		/*
1305		 * If last path element starts with "._", then
1306		 * this is a Mac extension.
1307		 */
1308		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
1309			return ARCHIVE_OK;
1310	}
1311
1312 	/* Read the body as a Mac OS metadata blob. */
1313	size = archive_entry_size(entry);
1314
1315	/*
1316	 * TODO: Look beyond the body here to peek at the next header.
1317	 * If it's a regular header (not an extension header)
1318	 * that has the wrong name, just return the current
1319	 * entry as-is, without consuming the body here.
1320	 * That would reduce the risk of us mis-identifying
1321	 * an ordinary file that just happened to have
1322	 * a name starting with "._".
1323	 *
1324	 * Q: Is the above idea really possible?  Even
1325	 * when there are GNU or pax extension entries?
1326	 */
1327	data = __archive_read_ahead(a, (size_t)size, NULL);
1328	if (data == NULL) {
1329		*unconsumed = 0;
1330		return (ARCHIVE_FATAL);
1331	}
1332	archive_entry_copy_mac_metadata(entry, data, (size_t)size);
1333	*unconsumed = (size_t)((size + 511) & ~ 511);
1334	tar_flush_unconsumed(a, unconsumed);
1335	return (tar_read_header(a, tar, entry, unconsumed));
1336}
1337
1338/*
1339 * Parse a file header for a pax extended archive entry.
1340 */
1341static int
1342header_pax_global(struct archive_read *a, struct tar *tar,
1343    struct archive_entry *entry, const void *h, size_t *unconsumed)
1344{
1345	int err;
1346
1347	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
1348	if (err != ARCHIVE_OK)
1349		return (err);
1350	err = tar_read_header(a, tar, entry, unconsumed);
1351	return (err);
1352}
1353
1354static int
1355header_pax_extensions(struct archive_read *a, struct tar *tar,
1356    struct archive_entry *entry, const void *h, size_t *unconsumed)
1357{
1358	int err, err2;
1359
1360	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
1361	if (err != ARCHIVE_OK)
1362		return (err);
1363
1364	/* Parse the next header. */
1365	err = tar_read_header(a, tar, entry, unconsumed);
1366	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1367		return (err);
1368
1369	/*
1370	 * TODO: Parse global/default options into 'entry' struct here
1371	 * before handling file-specific options.
1372	 *
1373	 * This design (parse standard header, then overwrite with pax
1374	 * extended attribute data) usually works well, but isn't ideal;
1375	 * it would be better to parse the pax extended attributes first
1376	 * and then skip any fields in the standard header that were
1377	 * defined in the pax header.
1378	 */
1379	err2 = pax_header(a, tar, entry, tar->pax_header.s);
1380	err =  err_combine(err, err2);
1381	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1382	return (err);
1383}
1384
1385
1386/*
1387 * Parse a file header for a Posix "ustar" archive entry.  This also
1388 * handles "pax" or "extended ustar" entries.
1389 */
1390static int
1391header_ustar(struct archive_read *a, struct tar *tar,
1392    struct archive_entry *entry, const void *h)
1393{
1394	const struct archive_entry_header_ustar	*header;
1395	struct archive_string *as;
1396	int err = ARCHIVE_OK, r;
1397
1398	header = (const struct archive_entry_header_ustar *)h;
1399
1400	/* Copy name into an internal buffer to ensure null-termination. */
1401	as = &(tar->entry_pathname);
1402	if (header->prefix[0]) {
1403		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1404		if (as->s[archive_strlen(as) - 1] != '/')
1405			archive_strappend_char(as, '/');
1406		archive_strncat(as, header->name, sizeof(header->name));
1407	} else {
1408		archive_strncpy(as, header->name, sizeof(header->name));
1409	}
1410	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
1411	    tar->sconv) != 0) {
1412		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1413		if (err == ARCHIVE_FATAL)
1414			return (err);
1415	}
1416
1417	/* Handle rest of common fields. */
1418	r = header_common(a, tar, entry, h);
1419	if (r == ARCHIVE_FATAL)
1420		return (r);
1421	if (r < err)
1422		err = r;
1423
1424	/* Handle POSIX ustar fields. */
1425	if (archive_entry_copy_uname_l(entry,
1426	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
1427		err = set_conversion_failed_error(a, tar->sconv, "Uname");
1428		if (err == ARCHIVE_FATAL)
1429			return (err);
1430	}
1431
1432	if (archive_entry_copy_gname_l(entry,
1433	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
1434		err = set_conversion_failed_error(a, tar->sconv, "Gname");
1435		if (err == ARCHIVE_FATAL)
1436			return (err);
1437	}
1438
1439	/* Parse out device numbers only for char and block specials. */
1440	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1441		archive_entry_set_rdevmajor(entry, (dev_t)
1442		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1443		archive_entry_set_rdevminor(entry, (dev_t)
1444		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1445	}
1446
1447	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1448
1449	return (err);
1450}
1451
1452
1453/*
1454 * Parse the pax extended attributes record.
1455 *
1456 * Returns non-zero if there's an error in the data.
1457 */
1458static int
1459pax_header(struct archive_read *a, struct tar *tar,
1460    struct archive_entry *entry, char *attr)
1461{
1462	size_t attr_length, l, line_length;
1463	char *p;
1464	char *key, *value;
1465	struct archive_string *as;
1466	struct archive_string_conv *sconv;
1467	int err, err2;
1468
1469	attr_length = strlen(attr);
1470	tar->pax_hdrcharset_binary = 0;
1471	archive_string_empty(&(tar->entry_gname));
1472	archive_string_empty(&(tar->entry_linkpath));
1473	archive_string_empty(&(tar->entry_pathname));
1474	archive_string_empty(&(tar->entry_pathname_override));
1475	archive_string_empty(&(tar->entry_uname));
1476	err = ARCHIVE_OK;
1477	while (attr_length > 0) {
1478		/* Parse decimal length field at start of line. */
1479		line_length = 0;
1480		l = attr_length;
1481		p = attr; /* Record start of line. */
1482		while (l>0) {
1483			if (*p == ' ') {
1484				p++;
1485				l--;
1486				break;
1487			}
1488			if (*p < '0' || *p > '9') {
1489				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1490				    "Ignoring malformed pax extended attributes");
1491				return (ARCHIVE_WARN);
1492			}
1493			line_length *= 10;
1494			line_length += *p - '0';
1495			if (line_length > 999999) {
1496				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1497				    "Rejecting pax extended attribute > 1MB");
1498				return (ARCHIVE_WARN);
1499			}
1500			p++;
1501			l--;
1502		}
1503
1504		/*
1505		 * Parsed length must be no bigger than available data,
1506		 * at least 1, and the last character of the line must
1507		 * be '\n'.
1508		 */
1509		if (line_length > attr_length
1510		    || line_length < 1
1511		    || attr[line_length - 1] != '\n')
1512		{
1513			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1514			    "Ignoring malformed pax extended attribute");
1515			return (ARCHIVE_WARN);
1516		}
1517
1518		/* Null-terminate the line. */
1519		attr[line_length - 1] = '\0';
1520
1521		/* Find end of key and null terminate it. */
1522		key = p;
1523		if (key[0] == '=')
1524			return (-1);
1525		while (*p && *p != '=')
1526			++p;
1527		if (*p == '\0') {
1528			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1529			    "Invalid pax extended attributes");
1530			return (ARCHIVE_WARN);
1531		}
1532		*p = '\0';
1533
1534		/* Identify null-terminated 'value' portion. */
1535		value = p + 1;
1536
1537		/* Identify this attribute and set it in the entry. */
1538		err2 = pax_attribute(a, tar, entry, key, value);
1539		if (err2 == ARCHIVE_FATAL)
1540			return (err2);
1541		err = err_combine(err, err2);
1542
1543		/* Skip to next line */
1544		attr += line_length;
1545		attr_length -= line_length;
1546	}
1547
1548	/*
1549	 * PAX format uses UTF-8 as default charset for its metadata
1550	 * unless hdrcharset=BINARY is present in its header.
1551	 * We apply the charset specified by the hdrcharset option only
1552	 * when the hdrcharset attribute(in PAX header) is BINARY because
1553	 * we respect the charset described in PAX header and BINARY also
1554	 * means that metadata(filename,uname and gname) character-set
1555	 * is unknown.
1556	 */
1557	if (tar->pax_hdrcharset_binary)
1558		sconv = tar->opt_sconv;
1559	else {
1560		sconv = archive_string_conversion_from_charset(
1561		    &(a->archive), "UTF-8", 1);
1562		if (sconv == NULL)
1563			return (ARCHIVE_FATAL);
1564		if (tar->compat_2x)
1565			archive_string_conversion_set_opt(sconv,
1566			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1567	}
1568
1569	if (archive_strlen(&(tar->entry_gname)) > 0) {
1570		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
1571		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
1572			err = set_conversion_failed_error(a, sconv, "Gname");
1573			if (err == ARCHIVE_FATAL)
1574				return (err);
1575			/* Use a converted an original name. */
1576			archive_entry_copy_gname(entry, tar->entry_gname.s);
1577		}
1578	}
1579	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1580		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
1581		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
1582			err = set_conversion_failed_error(a, sconv, "Linkname");
1583			if (err == ARCHIVE_FATAL)
1584				return (err);
1585			/* Use a converted an original name. */
1586			archive_entry_copy_link(entry, tar->entry_linkpath.s);
1587		}
1588	}
1589	/*
1590	 * Some extensions (such as the GNU sparse file extensions)
1591	 * deliberately store a synthetic name under the regular 'path'
1592	 * attribute and the real file name under a different attribute.
1593	 * Since we're supposed to not care about the order, we
1594	 * have no choice but to store all of the various filenames
1595	 * we find and figure it all out afterwards.  This is the
1596	 * figuring out part.
1597	 */
1598	as = NULL;
1599	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1600		as = &(tar->entry_pathname_override);
1601	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1602		as = &(tar->entry_pathname);
1603	if (as != NULL) {
1604		if (archive_entry_copy_pathname_l(entry, as->s,
1605		    archive_strlen(as), sconv) != 0) {
1606			err = set_conversion_failed_error(a, sconv, "Pathname");
1607			if (err == ARCHIVE_FATAL)
1608				return (err);
1609			/* Use a converted an original name. */
1610			archive_entry_copy_pathname(entry, as->s);
1611		}
1612	}
1613	if (archive_strlen(&(tar->entry_uname)) > 0) {
1614		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1615		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1616			err = set_conversion_failed_error(a, sconv, "Uname");
1617			if (err == ARCHIVE_FATAL)
1618				return (err);
1619			/* Use a converted an original name. */
1620			archive_entry_copy_uname(entry, tar->entry_uname.s);
1621		}
1622	}
1623	return (err);
1624}
1625
1626static int
1627pax_attribute_xattr(struct archive_entry *entry,
1628	char *name, char *value)
1629{
1630	char *name_decoded;
1631	void *value_decoded;
1632	size_t value_len;
1633
1634	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1635		return 3;
1636
1637	name += 17;
1638
1639	/* URL-decode name */
1640	name_decoded = url_decode(name);
1641	if (name_decoded == NULL)
1642		return 2;
1643
1644	/* Base-64 decode value */
1645	value_decoded = base64_decode(value, strlen(value), &value_len);
1646	if (value_decoded == NULL) {
1647		free(name_decoded);
1648		return 1;
1649	}
1650
1651	archive_entry_xattr_add_entry(entry, name_decoded,
1652		value_decoded, value_len);
1653
1654	free(name_decoded);
1655	free(value_decoded);
1656	return 0;
1657}
1658
1659/*
1660 * Parse a single key=value attribute.  key/value pointers are
1661 * assumed to point into reasonably long-lived storage.
1662 *
1663 * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1664 * extensions should always have keywords of the form "VENDOR.attribute"
1665 * In particular, it's quite feasible to support many different
1666 * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1667 * unique to this library.
1668 *
1669 * Investigate other vendor-specific extensions and see if
1670 * any of them look useful.
1671 */
1672static int
1673pax_attribute(struct archive_read *a, struct tar *tar,
1674    struct archive_entry *entry, char *key, char *value)
1675{
1676	int64_t s;
1677	long n;
1678	int err = ARCHIVE_OK, r;
1679
1680#ifndef __FreeBSD__
1681	if (value == NULL)
1682		value = "";	/* Disable compiler warning; do not pass
1683				 * NULL pointer to strlen().  */
1684#endif
1685	switch (key[0]) {
1686	case 'G':
1687		/* Reject GNU.sparse.* headers on non-regular files. */
1688		if (strncmp(key, "GNU.sparse", 10) == 0 &&
1689		    !tar->sparse_allowed) {
1690			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1691			    "Non-regular file cannot be sparse");
1692			return (ARCHIVE_FATAL);
1693		}
1694
1695		/* GNU "0.0" sparse pax format. */
1696		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1697			tar->sparse_offset = -1;
1698			tar->sparse_numbytes = -1;
1699			tar->sparse_gnu_major = 0;
1700			tar->sparse_gnu_minor = 0;
1701		}
1702		if (strcmp(key, "GNU.sparse.offset") == 0) {
1703			tar->sparse_offset = tar_atol10(value, strlen(value));
1704			if (tar->sparse_numbytes != -1) {
1705				if (gnu_add_sparse_entry(a, tar,
1706				    tar->sparse_offset, tar->sparse_numbytes)
1707				    != ARCHIVE_OK)
1708					return (ARCHIVE_FATAL);
1709				tar->sparse_offset = -1;
1710				tar->sparse_numbytes = -1;
1711			}
1712		}
1713		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1714			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1715			if (tar->sparse_numbytes != -1) {
1716				if (gnu_add_sparse_entry(a, tar,
1717				    tar->sparse_offset, tar->sparse_numbytes)
1718				    != ARCHIVE_OK)
1719					return (ARCHIVE_FATAL);
1720				tar->sparse_offset = -1;
1721				tar->sparse_numbytes = -1;
1722			}
1723		}
1724		if (strcmp(key, "GNU.sparse.size") == 0) {
1725			tar->realsize = tar_atol10(value, strlen(value));
1726			archive_entry_set_size(entry, tar->realsize);
1727		}
1728
1729		/* GNU "0.1" sparse pax format. */
1730		if (strcmp(key, "GNU.sparse.map") == 0) {
1731			tar->sparse_gnu_major = 0;
1732			tar->sparse_gnu_minor = 1;
1733			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
1734				return (ARCHIVE_WARN);
1735		}
1736
1737		/* GNU "1.0" sparse pax format */
1738		if (strcmp(key, "GNU.sparse.major") == 0) {
1739			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
1740			tar->sparse_gnu_pending = 1;
1741		}
1742		if (strcmp(key, "GNU.sparse.minor") == 0) {
1743			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
1744			tar->sparse_gnu_pending = 1;
1745		}
1746		if (strcmp(key, "GNU.sparse.name") == 0) {
1747			/*
1748			 * The real filename; when storing sparse
1749			 * files, GNU tar puts a synthesized name into
1750			 * the regular 'path' attribute in an attempt
1751			 * to limit confusion. ;-)
1752			 */
1753			archive_strcpy(&(tar->entry_pathname_override), value);
1754		}
1755		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1756			tar->realsize = tar_atol10(value, strlen(value));
1757			archive_entry_set_size(entry, tar->realsize);
1758		}
1759		break;
1760	case 'L':
1761		/* Our extensions */
1762/* TODO: Handle arbitrary extended attributes... */
1763/*
1764		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
1765			archive_entry_set_xxxxxx(entry, value);
1766*/
1767		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
1768			pax_time(value, &s, &n);
1769			archive_entry_set_birthtime(entry, s, n);
1770		}
1771		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
1772			pax_attribute_xattr(entry, key, value);
1773		break;
1774	case 'S':
1775		/* We support some keys used by the "star" archiver */
1776		if (strcmp(key, "SCHILY.acl.access") == 0) {
1777			if (tar->sconv_acl == NULL) {
1778				tar->sconv_acl =
1779				    archive_string_conversion_from_charset(
1780					&(a->archive), "UTF-8", 1);
1781				if (tar->sconv_acl == NULL)
1782					return (ARCHIVE_FATAL);
1783			}
1784
1785			r = archive_acl_parse_l(archive_entry_acl(entry),
1786			    value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
1787			    tar->sconv_acl);
1788			if (r != ARCHIVE_OK) {
1789				err = r;
1790				if (err == ARCHIVE_FATAL) {
1791					archive_set_error(&a->archive, ENOMEM,
1792					    "Can't allocate memory for "
1793					    "SCHILY.acl.access");
1794					return (err);
1795				}
1796				archive_set_error(&a->archive,
1797				    ARCHIVE_ERRNO_MISC,
1798				    "Parse error: SCHILY.acl.access");
1799			}
1800		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
1801			if (tar->sconv_acl == NULL) {
1802				tar->sconv_acl =
1803				    archive_string_conversion_from_charset(
1804					&(a->archive), "UTF-8", 1);
1805				if (tar->sconv_acl == NULL)
1806					return (ARCHIVE_FATAL);
1807			}
1808
1809			r = archive_acl_parse_l(archive_entry_acl(entry),
1810			    value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT,
1811			    tar->sconv_acl);
1812			if (r != ARCHIVE_OK) {
1813				err = r;
1814				if (err == ARCHIVE_FATAL) {
1815					archive_set_error(&a->archive, ENOMEM,
1816					    "Can't allocate memory for "
1817					    "SCHILY.acl.default");
1818					return (err);
1819				}
1820				archive_set_error(&a->archive,
1821				    ARCHIVE_ERRNO_MISC,
1822				    "Parse error: SCHILY.acl.default");
1823			}
1824		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
1825			archive_entry_set_rdevmajor(entry,
1826			    (dev_t)tar_atol10(value, strlen(value)));
1827		} else if (strcmp(key, "SCHILY.devminor") == 0) {
1828			archive_entry_set_rdevminor(entry,
1829			    (dev_t)tar_atol10(value, strlen(value)));
1830		} else if (strcmp(key, "SCHILY.fflags") == 0) {
1831			archive_entry_copy_fflags_text(entry, value);
1832		} else if (strcmp(key, "SCHILY.dev") == 0) {
1833			archive_entry_set_dev(entry,
1834			    (dev_t)tar_atol10(value, strlen(value)));
1835		} else if (strcmp(key, "SCHILY.ino") == 0) {
1836			archive_entry_set_ino(entry,
1837			    tar_atol10(value, strlen(value)));
1838		} else if (strcmp(key, "SCHILY.nlink") == 0) {
1839			archive_entry_set_nlink(entry, (unsigned)
1840			    tar_atol10(value, strlen(value)));
1841		} else if (strcmp(key, "SCHILY.realsize") == 0) {
1842			tar->realsize = tar_atol10(value, strlen(value));
1843			archive_entry_set_size(entry, tar->realsize);
1844		} else if (strcmp(key, "SUN.holesdata") == 0) {
1845			/* A Solaris extension for sparse. */
1846			r = solaris_sparse_parse(a, tar, entry, value);
1847			if (r < err) {
1848				if (r == ARCHIVE_FATAL)
1849					return (r);
1850				err = r;
1851				archive_set_error(&a->archive,
1852				    ARCHIVE_ERRNO_MISC,
1853				    "Parse error: SUN.holesdata");
1854			}
1855		}
1856		break;
1857	case 'a':
1858		if (strcmp(key, "atime") == 0) {
1859			pax_time(value, &s, &n);
1860			archive_entry_set_atime(entry, s, n);
1861		}
1862		break;
1863	case 'c':
1864		if (strcmp(key, "ctime") == 0) {
1865			pax_time(value, &s, &n);
1866			archive_entry_set_ctime(entry, s, n);
1867		} else if (strcmp(key, "charset") == 0) {
1868			/* TODO: Publish charset information in entry. */
1869		} else if (strcmp(key, "comment") == 0) {
1870			/* TODO: Publish comment in entry. */
1871		}
1872		break;
1873	case 'g':
1874		if (strcmp(key, "gid") == 0) {
1875			archive_entry_set_gid(entry,
1876			    tar_atol10(value, strlen(value)));
1877		} else if (strcmp(key, "gname") == 0) {
1878			archive_strcpy(&(tar->entry_gname), value);
1879		}
1880		break;
1881	case 'h':
1882		if (strcmp(key, "hdrcharset") == 0) {
1883			if (strcmp(value, "BINARY") == 0)
1884				/* Binary  mode. */
1885				tar->pax_hdrcharset_binary = 1;
1886			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1887				tar->pax_hdrcharset_binary = 0;
1888		}
1889		break;
1890	case 'l':
1891		/* pax interchange doesn't distinguish hardlink vs. symlink. */
1892		if (strcmp(key, "linkpath") == 0) {
1893			archive_strcpy(&(tar->entry_linkpath), value);
1894		}
1895		break;
1896	case 'm':
1897		if (strcmp(key, "mtime") == 0) {
1898			pax_time(value, &s, &n);
1899			archive_entry_set_mtime(entry, s, n);
1900		}
1901		break;
1902	case 'p':
1903		if (strcmp(key, "path") == 0) {
1904			archive_strcpy(&(tar->entry_pathname), value);
1905		}
1906		break;
1907	case 'r':
1908		/* POSIX has reserved 'realtime.*' */
1909		break;
1910	case 's':
1911		/* POSIX has reserved 'security.*' */
1912		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
1913		if (strcmp(key, "size") == 0) {
1914			/* "size" is the size of the data in the entry. */
1915			tar->entry_bytes_remaining
1916			    = tar_atol10(value, strlen(value));
1917			/*
1918			 * But, "size" is not necessarily the size of
1919			 * the file on disk; if this is a sparse file,
1920			 * the disk size may have already been set from
1921			 * GNU.sparse.realsize or GNU.sparse.size or
1922			 * an old GNU header field or SCHILY.realsize
1923			 * or ....
1924			 */
1925			if (tar->realsize < 0) {
1926				archive_entry_set_size(entry,
1927				    tar->entry_bytes_remaining);
1928				tar->realsize
1929				    = tar->entry_bytes_remaining;
1930			}
1931		}
1932		break;
1933	case 'u':
1934		if (strcmp(key, "uid") == 0) {
1935			archive_entry_set_uid(entry,
1936			    tar_atol10(value, strlen(value)));
1937		} else if (strcmp(key, "uname") == 0) {
1938			archive_strcpy(&(tar->entry_uname), value);
1939		}
1940		break;
1941	}
1942	return (err);
1943}
1944
1945
1946
1947/*
1948 * parse a decimal time value, which may include a fractional portion
1949 */
1950static void
1951pax_time(const char *p, int64_t *ps, long *pn)
1952{
1953	char digit;
1954	int64_t	s;
1955	unsigned long l;
1956	int sign;
1957	int64_t limit, last_digit_limit;
1958
1959	limit = INT64_MAX / 10;
1960	last_digit_limit = INT64_MAX % 10;
1961
1962	s = 0;
1963	sign = 1;
1964	if (*p == '-') {
1965		sign = -1;
1966		p++;
1967	}
1968	while (*p >= '0' && *p <= '9') {
1969		digit = *p - '0';
1970		if (s > limit ||
1971		    (s == limit && digit > last_digit_limit)) {
1972			s = INT64_MAX;
1973			break;
1974		}
1975		s = (s * 10) + digit;
1976		++p;
1977	}
1978
1979	*ps = s * sign;
1980
1981	/* Calculate nanoseconds. */
1982	*pn = 0;
1983
1984	if (*p != '.')
1985		return;
1986
1987	l = 100000000UL;
1988	do {
1989		++p;
1990		if (*p >= '0' && *p <= '9')
1991			*pn += (*p - '0') * l;
1992		else
1993			break;
1994	} while (l /= 10);
1995}
1996
1997/*
1998 * Parse GNU tar header
1999 */
2000static int
2001header_gnutar(struct archive_read *a, struct tar *tar,
2002    struct archive_entry *entry, const void *h, size_t *unconsumed)
2003{
2004	const struct archive_entry_header_gnutar *header;
2005	int64_t t;
2006	int err = ARCHIVE_OK;
2007
2008	/*
2009	 * GNU header is like POSIX ustar, except 'prefix' is
2010	 * replaced with some other fields. This also means the
2011	 * filename is stored as in old-style archives.
2012	 */
2013
2014	/* Grab fields common to all tar variants. */
2015	err = header_common(a, tar, entry, h);
2016	if (err == ARCHIVE_FATAL)
2017		return (err);
2018
2019	/* Copy filename over (to ensure null termination). */
2020	header = (const struct archive_entry_header_gnutar *)h;
2021	if (archive_entry_copy_pathname_l(entry,
2022	    header->name, sizeof(header->name), tar->sconv) != 0) {
2023		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2024		if (err == ARCHIVE_FATAL)
2025			return (err);
2026	}
2027
2028	/* Fields common to ustar and GNU */
2029	/* XXX Can the following be factored out since it's common
2030	 * to ustar and gnu tar?  Is it okay to move it down into
2031	 * header_common, perhaps?  */
2032	if (archive_entry_copy_uname_l(entry,
2033	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
2034		err = set_conversion_failed_error(a, tar->sconv, "Uname");
2035		if (err == ARCHIVE_FATAL)
2036			return (err);
2037	}
2038
2039	if (archive_entry_copy_gname_l(entry,
2040	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
2041		err = set_conversion_failed_error(a, tar->sconv, "Gname");
2042		if (err == ARCHIVE_FATAL)
2043			return (err);
2044	}
2045
2046	/* Parse out device numbers only for char and block specials */
2047	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2048		archive_entry_set_rdevmajor(entry, (dev_t)
2049		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2050		archive_entry_set_rdevminor(entry, (dev_t)
2051		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2052	} else
2053		archive_entry_set_rdev(entry, 0);
2054
2055	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2056
2057	/* Grab GNU-specific fields. */
2058	t = tar_atol(header->atime, sizeof(header->atime));
2059	if (t > 0)
2060		archive_entry_set_atime(entry, t, 0);
2061	t = tar_atol(header->ctime, sizeof(header->ctime));
2062	if (t > 0)
2063		archive_entry_set_ctime(entry, t, 0);
2064
2065	if (header->realsize[0] != 0) {
2066		tar->realsize
2067		    = tar_atol(header->realsize, sizeof(header->realsize));
2068		archive_entry_set_size(entry, tar->realsize);
2069	}
2070
2071	if (header->sparse[0].offset[0] != 0) {
2072		if (gnu_sparse_old_read(a, tar, header, unconsumed)
2073		    != ARCHIVE_OK)
2074			return (ARCHIVE_FATAL);
2075	} else {
2076		if (header->isextended[0] != 0) {
2077			/* XXX WTF? XXX */
2078		}
2079	}
2080
2081	return (err);
2082}
2083
2084static int
2085gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2086    int64_t offset, int64_t remaining)
2087{
2088	struct sparse_block *p;
2089
2090	p = (struct sparse_block *)malloc(sizeof(*p));
2091	if (p == NULL) {
2092		archive_set_error(&a->archive, ENOMEM, "Out of memory");
2093		return (ARCHIVE_FATAL);
2094	}
2095	memset(p, 0, sizeof(*p));
2096	if (tar->sparse_last != NULL)
2097		tar->sparse_last->next = p;
2098	else
2099		tar->sparse_list = p;
2100	tar->sparse_last = p;
2101	p->offset = offset;
2102	p->remaining = remaining;
2103	return (ARCHIVE_OK);
2104}
2105
2106static void
2107gnu_clear_sparse_list(struct tar *tar)
2108{
2109	struct sparse_block *p;
2110
2111	while (tar->sparse_list != NULL) {
2112		p = tar->sparse_list;
2113		tar->sparse_list = p->next;
2114		free(p);
2115	}
2116	tar->sparse_last = NULL;
2117}
2118
2119/*
2120 * GNU tar old-format sparse data.
2121 *
2122 * GNU old-format sparse data is stored in a fixed-field
2123 * format.  Offset/size values are 11-byte octal fields (same
2124 * format as 'size' field in ustart header).  These are
2125 * stored in the header, allocating subsequent header blocks
2126 * as needed.  Extending the header in this way is a pretty
2127 * severe POSIX violation; this design has earned GNU tar a
2128 * lot of criticism.
2129 */
2130
2131static int
2132gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2133    const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2134{
2135	ssize_t bytes_read;
2136	const void *data;
2137	struct extended {
2138		struct gnu_sparse sparse[21];
2139		char	isextended[1];
2140		char	padding[7];
2141	};
2142	const struct extended *ext;
2143
2144	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2145		return (ARCHIVE_FATAL);
2146	if (header->isextended[0] == 0)
2147		return (ARCHIVE_OK);
2148
2149	do {
2150		tar_flush_unconsumed(a, unconsumed);
2151		data = __archive_read_ahead(a, 512, &bytes_read);
2152		if (bytes_read < 0)
2153			return (ARCHIVE_FATAL);
2154		if (bytes_read < 512) {
2155			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2156			    "Truncated tar archive "
2157			    "detected while reading sparse file data");
2158			return (ARCHIVE_FATAL);
2159		}
2160		*unconsumed = 512;
2161		ext = (const struct extended *)data;
2162		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
2163			return (ARCHIVE_FATAL);
2164	} while (ext->isextended[0] != 0);
2165	if (tar->sparse_list != NULL)
2166		tar->entry_offset = tar->sparse_list->offset;
2167	return (ARCHIVE_OK);
2168}
2169
2170static int
2171gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
2172    const struct gnu_sparse *sparse, int length)
2173{
2174	while (length > 0 && sparse->offset[0] != 0) {
2175		if (gnu_add_sparse_entry(a, tar,
2176		    tar_atol(sparse->offset, sizeof(sparse->offset)),
2177		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
2178		    != ARCHIVE_OK)
2179			return (ARCHIVE_FATAL);
2180		sparse++;
2181		length--;
2182	}
2183	return (ARCHIVE_OK);
2184}
2185
2186/*
2187 * GNU tar sparse format 0.0
2188 *
2189 * Beginning with GNU tar 1.15, sparse files are stored using
2190 * information in the pax extended header.  The GNU tar maintainers
2191 * have gone through a number of variations in the process of working
2192 * out this scheme; fortunately, they're all numbered.
2193 *
2194 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
2195 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
2196 * store offset/size for each block.  The repeated instances of these
2197 * latter fields violate the pax specification (which frowns on
2198 * duplicate keys), so this format was quickly replaced.
2199 */
2200
2201/*
2202 * GNU tar sparse format 0.1
2203 *
2204 * This version replaced the offset/numbytes attributes with
2205 * a single "map" attribute that stored a list of integers.  This
2206 * format had two problems: First, the "map" attribute could be very
2207 * long, which caused problems for some implementations.  More
2208 * importantly, the sparse data was lost when extracted by archivers
2209 * that didn't recognize this extension.
2210 */
2211
2212static int
2213gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
2214{
2215	const char *e;
2216	int64_t offset = -1, size = -1;
2217
2218	for (;;) {
2219		e = p;
2220		while (*e != '\0' && *e != ',') {
2221			if (*e < '0' || *e > '9')
2222				return (ARCHIVE_WARN);
2223			e++;
2224		}
2225		if (offset < 0) {
2226			offset = tar_atol10(p, e - p);
2227			if (offset < 0)
2228				return (ARCHIVE_WARN);
2229		} else {
2230			size = tar_atol10(p, e - p);
2231			if (size < 0)
2232				return (ARCHIVE_WARN);
2233			if (gnu_add_sparse_entry(a, tar, offset, size)
2234			    != ARCHIVE_OK)
2235				return (ARCHIVE_FATAL);
2236			offset = -1;
2237		}
2238		if (*e == '\0')
2239			return (ARCHIVE_OK);
2240		p = e + 1;
2241	}
2242}
2243
2244/*
2245 * GNU tar sparse format 1.0
2246 *
2247 * The idea: The offset/size data is stored as a series of base-10
2248 * ASCII numbers prepended to the file data, so that dearchivers that
2249 * don't support this format will extract the block map along with the
2250 * data and a separate post-process can restore the sparseness.
2251 *
2252 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
2253 * padding to the body of the file when using this format.  GNU tar
2254 * 1.17 corrected this bug without bumping the version number, so
2255 * it's not possible to support both variants.  This code supports
2256 * the later variant at the expense of not supporting the former.
2257 *
2258 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
2259 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
2260 */
2261
2262/*
2263 * Read the next line from the input, and parse it as a decimal
2264 * integer followed by '\n'.  Returns positive integer value or
2265 * negative on error.
2266 */
2267static int64_t
2268gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
2269    int64_t *remaining, size_t *unconsumed)
2270{
2271	int64_t l, limit, last_digit_limit;
2272	const char *p;
2273	ssize_t bytes_read;
2274	int base, digit;
2275
2276	base = 10;
2277	limit = INT64_MAX / base;
2278	last_digit_limit = INT64_MAX % base;
2279
2280	/*
2281	 * Skip any lines starting with '#'; GNU tar specs
2282	 * don't require this, but they should.
2283	 */
2284	do {
2285		bytes_read = readline(a, tar, &p,
2286			(ssize_t)tar_min(*remaining, 100), unconsumed);
2287		if (bytes_read <= 0)
2288			return (ARCHIVE_FATAL);
2289		*remaining -= bytes_read;
2290	} while (p[0] == '#');
2291
2292	l = 0;
2293	while (bytes_read > 0) {
2294		if (*p == '\n')
2295			return (l);
2296		if (*p < '0' || *p >= '0' + base)
2297			return (ARCHIVE_WARN);
2298		digit = *p - '0';
2299		if (l > limit || (l == limit && digit > last_digit_limit))
2300			l = INT64_MAX; /* Truncate on overflow. */
2301		else
2302			l = (l * base) + digit;
2303		p++;
2304		bytes_read--;
2305	}
2306	/* TODO: Error message. */
2307	return (ARCHIVE_WARN);
2308}
2309
2310/*
2311 * Returns length (in bytes) of the sparse data description
2312 * that was read.
2313 */
2314static ssize_t
2315gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
2316{
2317	ssize_t bytes_read;
2318	int entries;
2319	int64_t offset, size, to_skip, remaining;
2320
2321	/* Clear out the existing sparse list. */
2322	gnu_clear_sparse_list(tar);
2323
2324	remaining = tar->entry_bytes_remaining;
2325
2326	/* Parse entries. */
2327	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2328	if (entries < 0)
2329		return (ARCHIVE_FATAL);
2330	/* Parse the individual entries. */
2331	while (entries-- > 0) {
2332		/* Parse offset/size */
2333		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2334		if (offset < 0)
2335			return (ARCHIVE_FATAL);
2336		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2337		if (size < 0)
2338			return (ARCHIVE_FATAL);
2339		/* Add a new sparse entry. */
2340		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
2341			return (ARCHIVE_FATAL);
2342	}
2343	/* Skip rest of block... */
2344	tar_flush_unconsumed(a, unconsumed);
2345	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
2346	to_skip = 0x1ff & -bytes_read;
2347	if (to_skip != __archive_read_consume(a, to_skip))
2348		return (ARCHIVE_FATAL);
2349	return ((ssize_t)(bytes_read + to_skip));
2350}
2351
2352/*
2353 * Solaris pax extension for a sparse file. This is recorded with the
2354 * data and hole pairs. The way recording sparse information by Solaris'
2355 * pax simply indicates where data and sparse are, so the stored contents
2356 * consist of both data and hole.
2357 */
2358static int
2359solaris_sparse_parse(struct archive_read *a, struct tar *tar,
2360    struct archive_entry *entry, const char *p)
2361{
2362	const char *e;
2363	int64_t start, end;
2364	int hole = 1;
2365
2366	(void)entry; /* UNUSED */
2367
2368	end = 0;
2369	if (*p == ' ')
2370		p++;
2371	else
2372		return (ARCHIVE_WARN);
2373	for (;;) {
2374		e = p;
2375		while (*e != '\0' && *e != ' ') {
2376			if (*e < '0' || *e > '9')
2377				return (ARCHIVE_WARN);
2378			e++;
2379		}
2380		start = end;
2381		end = tar_atol10(p, e - p);
2382		if (end < 0)
2383			return (ARCHIVE_WARN);
2384		if (start < end) {
2385			if (gnu_add_sparse_entry(a, tar, start,
2386			    end - start) != ARCHIVE_OK)
2387				return (ARCHIVE_FATAL);
2388			tar->sparse_last->hole = hole;
2389		}
2390		if (*e == '\0')
2391			return (ARCHIVE_OK);
2392		p = e + 1;
2393		hole = hole == 0;
2394	}
2395}
2396
2397/*-
2398 * Convert text->integer.
2399 *
2400 * Traditional tar formats (including POSIX) specify base-8 for
2401 * all of the standard numeric fields.  This is a significant limitation
2402 * in practice:
2403 *   = file size is limited to 8GB
2404 *   = rdevmajor and rdevminor are limited to 21 bits
2405 *   = uid/gid are limited to 21 bits
2406 *
2407 * There are two workarounds for this:
2408 *   = pax extended headers, which use variable-length string fields
2409 *   = GNU tar and STAR both allow either base-8 or base-256 in
2410 *      most fields.  The high bit is set to indicate base-256.
2411 *
2412 * On read, this implementation supports both extensions.
2413 */
2414static int64_t
2415tar_atol(const char *p, size_t char_cnt)
2416{
2417	/*
2418	 * Technically, GNU tar considers a field to be in base-256
2419	 * only if the first byte is 0xff or 0x80.
2420	 */
2421	if (*p & 0x80)
2422		return (tar_atol256(p, char_cnt));
2423	return (tar_atol8(p, char_cnt));
2424}
2425
2426/*
2427 * Note that this implementation does not (and should not!) obey
2428 * locale settings; you cannot simply substitute strtol here, since
2429 * it does obey locale.
2430 */
2431static int64_t
2432tar_atol_base_n(const char *p, size_t char_cnt, int base)
2433{
2434	int64_t	l, limit, last_digit_limit;
2435	int digit, sign;
2436
2437	limit = INT64_MAX / base;
2438	last_digit_limit = INT64_MAX % base;
2439
2440	/* the pointer will not be dereferenced if char_cnt is zero
2441	 * due to the way the && operator is evaulated.
2442	 */
2443	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
2444		p++;
2445		char_cnt--;
2446	}
2447
2448	sign = 1;
2449	if (char_cnt != 0 && *p == '-') {
2450		sign = -1;
2451		p++;
2452		char_cnt--;
2453	}
2454
2455	l = 0;
2456	if (char_cnt != 0) {
2457		digit = *p - '0';
2458		while (digit >= 0 && digit < base  && char_cnt != 0) {
2459			if (l>limit || (l == limit && digit > last_digit_limit)) {
2460				l = INT64_MAX; /* Truncate on overflow. */
2461				break;
2462			}
2463			l = (l * base) + digit;
2464			digit = *++p - '0';
2465			char_cnt--;
2466		}
2467	}
2468	return (sign < 0) ? -l : l;
2469}
2470
2471static int64_t
2472tar_atol8(const char *p, size_t char_cnt)
2473{
2474	return tar_atol_base_n(p, char_cnt, 8);
2475}
2476
2477static int64_t
2478tar_atol10(const char *p, size_t char_cnt)
2479{
2480	return tar_atol_base_n(p, char_cnt, 10);
2481}
2482
2483/*
2484 * Parse a base-256 integer.  This is just a straight signed binary
2485 * value in big-endian order, except that the high-order bit is
2486 * ignored.
2487 */
2488static int64_t
2489tar_atol256(const char *_p, size_t char_cnt)
2490{
2491	int64_t	l, upper_limit, lower_limit;
2492	const unsigned char *p = (const unsigned char *)_p;
2493
2494	upper_limit = INT64_MAX / 256;
2495	lower_limit = INT64_MIN / 256;
2496
2497	/* Pad with 1 or 0 bits, depending on sign. */
2498	if ((0x40 & *p) == 0x40)
2499		l = (int64_t)-1;
2500	else
2501		l = 0;
2502	l = (l << 6) | (0x3f & *p++);
2503	while (--char_cnt > 0) {
2504		if (l > upper_limit) {
2505			l = INT64_MAX; /* Truncate on overflow */
2506			break;
2507		} else if (l < lower_limit) {
2508			l = INT64_MIN;
2509			break;
2510		}
2511		l = (l << 8) | (0xff & (int64_t)*p++);
2512	}
2513	return (l);
2514}
2515
2516/*
2517 * Returns length of line (including trailing newline)
2518 * or negative on error.  'start' argument is updated to
2519 * point to first character of line.  This avoids copying
2520 * when possible.
2521 */
2522static ssize_t
2523readline(struct archive_read *a, struct tar *tar, const char **start,
2524    ssize_t limit, size_t *unconsumed)
2525{
2526	ssize_t bytes_read;
2527	ssize_t total_size = 0;
2528	const void *t;
2529	const char *s;
2530	void *p;
2531
2532	tar_flush_unconsumed(a, unconsumed);
2533
2534	t = __archive_read_ahead(a, 1, &bytes_read);
2535	if (bytes_read <= 0)
2536		return (ARCHIVE_FATAL);
2537	s = t;  /* Start of line? */
2538	p = memchr(t, '\n', bytes_read);
2539	/* If we found '\n' in the read buffer, return pointer to that. */
2540	if (p != NULL) {
2541		bytes_read = 1 + ((const char *)p) - s;
2542		if (bytes_read > limit) {
2543			archive_set_error(&a->archive,
2544			    ARCHIVE_ERRNO_FILE_FORMAT,
2545			    "Line too long");
2546			return (ARCHIVE_FATAL);
2547		}
2548		*unconsumed = bytes_read;
2549		*start = s;
2550		return (bytes_read);
2551	}
2552	*unconsumed = bytes_read;
2553	/* Otherwise, we need to accumulate in a line buffer. */
2554	for (;;) {
2555		if (total_size + bytes_read > limit) {
2556			archive_set_error(&a->archive,
2557			    ARCHIVE_ERRNO_FILE_FORMAT,
2558			    "Line too long");
2559			return (ARCHIVE_FATAL);
2560		}
2561		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2562			archive_set_error(&a->archive, ENOMEM,
2563			    "Can't allocate working buffer");
2564			return (ARCHIVE_FATAL);
2565		}
2566		memcpy(tar->line.s + total_size, t, bytes_read);
2567		tar_flush_unconsumed(a, unconsumed);
2568		total_size += bytes_read;
2569		/* If we found '\n', clean up and return. */
2570		if (p != NULL) {
2571			*start = tar->line.s;
2572			return (total_size);
2573		}
2574		/* Read some more. */
2575		t = __archive_read_ahead(a, 1, &bytes_read);
2576		if (bytes_read <= 0)
2577			return (ARCHIVE_FATAL);
2578		s = t;  /* Start of line? */
2579		p = memchr(t, '\n', bytes_read);
2580		/* If we found '\n', trim the read. */
2581		if (p != NULL) {
2582			bytes_read = 1 + ((const char *)p) - s;
2583		}
2584		*unconsumed = bytes_read;
2585	}
2586}
2587
2588/*
2589 * base64_decode - Base64 decode
2590 *
2591 * This accepts most variations of base-64 encoding, including:
2592 *    * with or without line breaks
2593 *    * with or without the final group padded with '=' or '_' characters
2594 * (The most economical Base-64 variant does not pad the last group and
2595 * omits line breaks; RFC1341 used for MIME requires both.)
2596 */
2597static char *
2598base64_decode(const char *s, size_t len, size_t *out_len)
2599{
2600	static const unsigned char digits[64] = {
2601		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2602		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2603		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2604		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2605		'4','5','6','7','8','9','+','/' };
2606	static unsigned char decode_table[128];
2607	char *out, *d;
2608	const unsigned char *src = (const unsigned char *)s;
2609
2610	/* If the decode table is not yet initialized, prepare it. */
2611	if (decode_table[digits[1]] != 1) {
2612		unsigned i;
2613		memset(decode_table, 0xff, sizeof(decode_table));
2614		for (i = 0; i < sizeof(digits); i++)
2615			decode_table[digits[i]] = i;
2616	}
2617
2618	/* Allocate enough space to hold the entire output. */
2619	/* Note that we may not use all of this... */
2620	out = (char *)malloc(len - len / 4 + 1);
2621	if (out == NULL) {
2622		*out_len = 0;
2623		return (NULL);
2624	}
2625	d = out;
2626
2627	while (len > 0) {
2628		/* Collect the next group of (up to) four characters. */
2629		int v = 0;
2630		int group_size = 0;
2631		while (group_size < 4 && len > 0) {
2632			/* '=' or '_' padding indicates final group. */
2633			if (*src == '=' || *src == '_') {
2634				len = 0;
2635				break;
2636			}
2637			/* Skip illegal characters (including line breaks) */
2638			if (*src > 127 || *src < 32
2639			    || decode_table[*src] == 0xff) {
2640				len--;
2641				src++;
2642				continue;
2643			}
2644			v <<= 6;
2645			v |= decode_table[*src++];
2646			len --;
2647			group_size++;
2648		}
2649		/* Align a short group properly. */
2650		v <<= 6 * (4 - group_size);
2651		/* Unpack the group we just collected. */
2652		switch (group_size) {
2653		case 4: d[2] = v & 0xff;
2654			/* FALLTHROUGH */
2655		case 3: d[1] = (v >> 8) & 0xff;
2656			/* FALLTHROUGH */
2657		case 2: d[0] = (v >> 16) & 0xff;
2658			break;
2659		case 1: /* this is invalid! */
2660			break;
2661		}
2662		d += group_size * 3 / 4;
2663	}
2664
2665	*out_len = d - out;
2666	return (out);
2667}
2668
2669static char *
2670url_decode(const char *in)
2671{
2672	char *out, *d;
2673	const char *s;
2674
2675	out = (char *)malloc(strlen(in) + 1);
2676	if (out == NULL)
2677		return (NULL);
2678	for (s = in, d = out; *s != '\0'; ) {
2679		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2680			/* Try to convert % escape */
2681			int digit1 = tohex(s[1]);
2682			int digit2 = tohex(s[2]);
2683			if (digit1 >= 0 && digit2 >= 0) {
2684				/* Looks good, consume three chars */
2685				s += 3;
2686				/* Convert output */
2687				*d++ = ((digit1 << 4) | digit2);
2688				continue;
2689			}
2690			/* Else fall through and treat '%' as normal char */
2691		}
2692		*d++ = *s++;
2693	}
2694	*d = '\0';
2695	return (out);
2696}
2697
2698static int
2699tohex(int c)
2700{
2701	if (c >= '0' && c <= '9')
2702		return (c - '0');
2703	else if (c >= 'A' && c <= 'F')
2704		return (c - 'A' + 10);
2705	else if (c >= 'a' && c <= 'f')
2706		return (c - 'a' + 10);
2707	else
2708		return (-1);
2709}
2710