archive_read_support_format_tar.c revision 302001
118316Swollman/*- 218316Swollman * Copyright (c) 2003-2007 Tim Kientzle 318316Swollman * Copyright (c) 2011-2012 Michihiro NAKAJIMA 418316Swollman * All rights reserved. 518316Swollman * 618316Swollman * Redistribution and use in source and binary forms, with or without 718316Swollman * modification, are permitted provided that the following conditions 818316Swollman * are met: 918316Swollman * 1. Redistributions of source code must retain the above copyright 1018316Swollman * notice, this list of conditions and the following disclaimer. 1118316Swollman * 2. Redistributions in binary form must reproduce the above copyright 1218316Swollman * notice, this list of conditions and the following disclaimer in the 1318316Swollman * documentation and/or other materials provided with the distribution. 1418316Swollman * 1518316Swollman * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 1618316Swollman * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1718316Swollman * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1818316Swollman * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 1918316Swollman * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2018316Swollman * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2118316Swollman * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2218316Swollman * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2318316Swollman * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2418316Swollman * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2518316Swollman */ 2618316Swollman 2718316Swollman#include "archive_platform.h" 2846303Smarkm__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_read_support_format_tar.c 302001 2016-06-17 22:40:10Z mm $"); 2950476Speter 3018316Swollman#ifdef HAVE_ERRNO_H 3118316Swollman#include <errno.h> 3246303Smarkm#endif 3346303Smarkm#include <stddef.h> 34126250Sbms#ifdef HAVE_STDLIB_H 3546303Smarkm#include <stdlib.h> 36126250Sbms#endif 37126250Sbms#ifdef HAVE_STRING_H 38126250Sbms#include <string.h> 39126250Sbms#endif 40126250Sbms 4118316Swollman#include "archive.h" 4218316Swollman#include "archive_acl_private.h" /* For ACL parsing routines. */ 4319880Swollman#include "archive_entry.h" 4419880Swollman#include "archive_entry_locale.h" 4546303Smarkm#include "archive_private.h" 4619880Swollman#include "archive_read_private.h" 4719880Swollman 4818316Swollman#define tar_min(a,b) ((a) < (b) ? (a) : (b)) 4918316Swollman 5018316Swollman/* 5118316Swollman * Layout of POSIX 'ustar' tar header. 5218316Swollman */ 5318316Swollmanstruct archive_entry_header_ustar { 5419880Swollman char name[100]; 5518316Swollman char mode[8]; 5618316Swollman char uid[8]; 5719880Swollman char gid[8]; 58148726Sstefanf char size[12]; 59148726Sstefanf char mtime[12]; 6020339Swollman char checksum[8]; 6120339Swollman char typeflag[1]; 6219880Swollman char linkname[100]; /* "old format" header ends here */ 6319880Swollman char magic[6]; /* For POSIX: "ustar\0" */ 6419880Swollman char version[2]; /* For POSIX: "00" */ 6519880Swollman char uname[32]; 6620339Swollman char gname[32]; 6720339Swollman char rdevmajor[8]; 6820339Swollman char rdevminor[8]; 6920339Swollman char prefix[155]; 7020339Swollman}; 7118316Swollman 7218316Swollman/* 7318316Swollman * Structure of GNU tar header 7418316Swollman */ 7518316Swollmanstruct gnu_sparse { 7618316Swollman char offset[12]; 7718316Swollman char numbytes[12]; 7818316Swollman}; 7918316Swollman 8018316Swollmanstruct archive_entry_header_gnutar { 8118316Swollman char name[100]; 8218316Swollman char mode[8]; 8318316Swollman char uid[8]; 84148726Sstefanf char gid[8]; 8518316Swollman char size[12]; 8619880Swollman char mtime[12]; 8719880Swollman char checksum[8]; 8819880Swollman char typeflag[1]; 8919880Swollman char linkname[100]; 9019880Swollman char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ 9119880Swollman char uname[32]; 9219880Swollman char gname[32]; 9319880Swollman char rdevmajor[8]; 9419880Swollman char rdevminor[8]; 9519880Swollman char atime[12]; 9619880Swollman char ctime[12]; 9719880Swollman char offset[12]; 9819880Swollman char longnames[4]; 99190713Sphk char unused[1]; 10019880Swollman struct gnu_sparse sparse[4]; 10119880Swollman char isextended[1]; 10219880Swollman char realsize[12]; 10319880Swollman /* 10419880Swollman * Old GNU format doesn't use POSIX 'prefix' field; they use 10519880Swollman * the 'L' (longname) entry instead. 10620606Swollman */ 10720606Swollman}; 10820606Swollman 10920606Swollman/* 11020606Swollman * Data specific to this format. 11120606Swollman */ 11220606Swollmanstruct sparse_block { 11320606Swollman struct sparse_block *next; 11420606Swollman int64_t offset; 11520606Swollman int64_t remaining; 11620606Swollman int hole; 11720606Swollman}; 11820606Swollman 11919880Swollmanstruct tar { 12019880Swollman struct archive_string acl_text; 12119880Swollman struct archive_string entry_pathname; 12219880Swollman /* For "GNU.sparse.name" and other similar path extensions. */ 12319880Swollman struct archive_string entry_pathname_override; 12419880Swollman struct archive_string entry_linkpath; 12519880Swollman struct archive_string entry_uname; 12619880Swollman struct archive_string entry_gname; 12719880Swollman struct archive_string longlink; 12818316Swollman struct archive_string longname; 12918316Swollman struct archive_string pax_header; 13018316Swollman struct archive_string pax_global; 13118316Swollman struct archive_string line; 13218316Swollman int pax_hdrcharset_binary; 13318316Swollman int header_recursion_depth; 13418316Swollman int64_t entry_bytes_remaining; 13518316Swollman int64_t entry_offset; 13619880Swollman int64_t entry_padding; 13719880Swollman int64_t entry_bytes_unconsumed; 13818316Swollman int64_t realsize; 13919880Swollman struct sparse_block *sparse_list; 14018316Swollman struct sparse_block *sparse_last; 14118316Swollman int64_t sparse_offset; 14219880Swollman int64_t sparse_numbytes; 14346303Smarkm int sparse_gnu_major; 14418316Swollman int sparse_gnu_minor; 14518316Swollman char sparse_gnu_pending; 14646303Smarkm 14718316Swollman struct archive_string localname; 14818316Swollman struct archive_string_conv *opt_sconv; 14946303Smarkm struct archive_string_conv *sconv; 15020339Swollman struct archive_string_conv *sconv_acl; 15146303Smarkm struct archive_string_conv *sconv_default; 15246303Smarkm int init_default_conversion; 15346303Smarkm int compat_2x; 15418316Swollman int process_mac_extensions; 15519880Swollman int read_concatenated_archives; 15619880Swollman}; 15719880Swollman 15819880Swollmanstatic int archive_block_is_null(const char *p); 15919880Swollmanstatic char *base64_decode(const char *, size_t, size_t *); 16018316Swollmanstatic int gnu_add_sparse_entry(struct archive_read *, struct tar *, 16119880Swollman int64_t offset, int64_t remaining); 16218316Swollman 163286348Sdelphijstatic void gnu_clear_sparse_list(struct tar *); 164286348Sdelphijstatic int gnu_sparse_old_read(struct archive_read *, struct tar *, 165286348Sdelphij const struct archive_entry_header_gnutar *header, size_t *); 166286348Sdelphijstatic int gnu_sparse_old_parse(struct archive_read *, struct tar *, 167286348Sdelphij const struct gnu_sparse *sparse, int length); 168286348Sdelphijstatic int gnu_sparse_01_parse(struct archive_read *, struct tar *, 16918316Swollman const char *); 17019880Swollmanstatic ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, 17119880Swollman size_t *); 17219880Swollmanstatic int header_Solaris_ACL(struct archive_read *, struct tar *, 17318316Swollman struct archive_entry *, const void *, size_t *); 17418316Swollmanstatic int header_common(struct archive_read *, struct tar *, 17518316Swollman struct archive_entry *, const void *); 17618316Swollmanstatic int header_old_tar(struct archive_read *, struct tar *, 17746303Smarkm struct archive_entry *, const void *); 17819880Swollmanstatic int header_pax_extensions(struct archive_read *, struct tar *, 17919880Swollman struct archive_entry *, const void *, size_t *); 18019880Swollmanstatic int header_pax_global(struct archive_read *, struct tar *, 18118316Swollman struct archive_entry *, const void *h, size_t *); 18218316Swollmanstatic int header_longlink(struct archive_read *, struct tar *, 18318316Swollman struct archive_entry *, const void *h, size_t *); 18418316Swollmanstatic int header_longname(struct archive_read *, struct tar *, 18519880Swollman struct archive_entry *, const void *h, size_t *); 18618316Swollmanstatic int read_mac_metadata_blob(struct archive_read *, struct tar *, 18718316Swollman struct archive_entry *, const void *h, size_t *); 18818316Swollmanstatic int header_volume(struct archive_read *, struct tar *, 18918316Swollman struct archive_entry *, const void *h, size_t *); 19018316Swollmanstatic int header_ustar(struct archive_read *, struct tar *, 19119880Swollman struct archive_entry *, const void *h); 19218316Swollmanstatic int header_gnutar(struct archive_read *, struct tar *, 19337908Scharnier struct archive_entry *, const void *h, size_t *); 19418316Swollmanstatic int archive_read_format_tar_bid(struct archive_read *, int); 19518316Swollmanstatic int archive_read_format_tar_options(struct archive_read *, 19618316Swollman const char *, const char *); 19718316Swollmanstatic int archive_read_format_tar_cleanup(struct archive_read *); 19818316Swollmanstatic int archive_read_format_tar_read_data(struct archive_read *a, 19919880Swollman const void **buff, size_t *size, int64_t *offset); 20019880Swollmanstatic int archive_read_format_tar_skip(struct archive_read *a); 20119880Swollmanstatic int archive_read_format_tar_read_header(struct archive_read *, 20218316Swollman struct archive_entry *); 20318316Swollmanstatic int checksum(struct archive_read *, const void *); 20418316Swollmanstatic int pax_attribute(struct archive_read *, struct tar *, 20518316Swollman struct archive_entry *, const char *key, const char *value); 20618316Swollmanstatic int pax_header(struct archive_read *, struct tar *, 20719880Swollman struct archive_entry *, char *attr); 20818316Swollmanstatic void pax_time(const char *, int64_t *sec, long *nanos); 20919880Swollmanstatic ssize_t readline(struct archive_read *, struct tar *, const char **, 21019880Swollman ssize_t limit, size_t *); 21119880Swollmanstatic int read_body_to_string(struct archive_read *, struct tar *, 21219880Swollman struct archive_string *, const void *h, size_t *); 21319880Swollmanstatic int solaris_sparse_parse(struct archive_read *, struct tar *, 21419880Swollman struct archive_entry *, const char *); 21519880Swollmanstatic int64_t tar_atol(const char *, size_t); 21619880Swollmanstatic int64_t tar_atol10(const char *, size_t); 21719880Swollmanstatic int64_t tar_atol256(const char *, size_t); 21819880Swollmanstatic int64_t tar_atol8(const char *, size_t); 21919880Swollmanstatic int tar_read_header(struct archive_read *, struct tar *, 22019880Swollman struct archive_entry *, size_t *); 22119880Swollmanstatic int tohex(int c); 22219880Swollmanstatic char *url_decode(const char *); 22319880Swollmanstatic void tar_flush_unconsumed(struct archive_read *, size_t *); 22419880Swollman 22518316Swollman 22618316Swollmanint 22719880Swollmanarchive_read_support_format_gnutar(struct archive *a) 22818316Swollman{ 22919880Swollman archive_check_magic(a, ARCHIVE_READ_MAGIC, 23019880Swollman ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); 23119880Swollman return (archive_read_support_format_tar(a)); 23219880Swollman} 23319880Swollman 23418316Swollman 23519880Swollmanint 23619880Swollmanarchive_read_support_format_tar(struct archive *_a) 23719880Swollman{ 23819880Swollman struct archive_read *a = (struct archive_read *)_a; 23918316Swollman struct tar *tar; 24018316Swollman int r; 24118316Swollman 24218316Swollman archive_check_magic(_a, ARCHIVE_READ_MAGIC, 24337908Scharnier ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); 24418316Swollman 24519880Swollman tar = (struct tar *)calloc(1, sizeof(*tar)); 24619880Swollman#ifdef HAVE_COPYFILE_H 24719880Swollman /* Set this by default on Mac OS. */ 24819880Swollman tar->process_mac_extensions = 1; 24919880Swollman#endif 25018316Swollman if (tar == NULL) { 25118316Swollman archive_set_error(&a->archive, ENOMEM, 25218316Swollman "Can't allocate tar data"); 25318316Swollman return (ARCHIVE_FATAL); 25419880Swollman } 25519880Swollman 25619880Swollman r = __archive_read_register_format(a, tar, "tar", 25719880Swollman archive_read_format_tar_bid, 25818316Swollman archive_read_format_tar_options, 25919880Swollman archive_read_format_tar_read_header, 26019880Swollman archive_read_format_tar_read_data, 26119880Swollman archive_read_format_tar_skip, 26219880Swollman NULL, 26319880Swollman archive_read_format_tar_cleanup, 26418316Swollman NULL, 26519880Swollman NULL); 26619880Swollman 26719880Swollman if (r != ARCHIVE_OK) 26819880Swollman free(tar); 26919880Swollman return (ARCHIVE_OK); 27019880Swollman} 27119880Swollman 27220339Swollmanstatic int 27320735Sachearchive_read_format_tar_cleanup(struct archive_read *a) 27420339Swollman{ 27519880Swollman struct tar *tar; 27619880Swollman 27719880Swollman tar = (struct tar *)(a->format->data); 27819880Swollman gnu_clear_sparse_list(tar); 27919880Swollman archive_string_free(&tar->acl_text); 28019880Swollman archive_string_free(&tar->entry_pathname); 28120339Swollman archive_string_free(&tar->entry_pathname_override); 28219880Swollman archive_string_free(&tar->entry_linkpath); 28319880Swollman archive_string_free(&tar->entry_uname); 28490868Smike archive_string_free(&tar->entry_gname); 28519880Swollman archive_string_free(&tar->line); 28618316Swollman archive_string_free(&tar->pax_global); 28718316Swollman archive_string_free(&tar->pax_header); 28818316Swollman archive_string_free(&tar->longname); 28918316Swollman archive_string_free(&tar->longlink); 29018316Swollman archive_string_free(&tar->localname); 29118316Swollman free(tar); 29218316Swollman (a->format->data) = NULL; 29319880Swollman return (ARCHIVE_OK); 29446303Smarkm} 29546303Smarkm 29646303Smarkm 29718316Swollmanstatic int 298272872Shrsarchive_read_format_tar_bid(struct archive_read *a, int best_bid) 299272872Shrs{ 300272872Shrs int bid; 301272872Shrs const char *h; 302272872Shrs const struct archive_entry_header_ustar *header; 303272872Shrs 304272872Shrs (void)best_bid; /* UNUSED */ 305272872Shrs 306272872Shrs bid = 0; 307272872Shrs 308272872Shrs /* Now let's look at the actual header and see if it matches. */ 309272872Shrs h = __archive_read_ahead(a, 512, NULL); 310272872Shrs if (h == NULL) 31118316Swollman return (-1); 31218316Swollman 31319880Swollman /* If it's an end-of-archive mark, we can handle it. */ 31418316Swollman if (h[0] == 0 && archive_block_is_null(h)) { 31518316Swollman /* 31618316Swollman * Usually, I bid the number of bits verified, but 31718316Swollman * in this case, 4096 seems excessive so I picked 10 as 31818316Swollman * an arbitrary but reasonable-seeming value. 31918316Swollman */ 32046303Smarkm return (10); 32118316Swollman } 32219880Swollman 32319880Swollman /* If it's not an end-of-archive mark, it must have a valid checksum.*/ 32419880Swollman if (!checksum(a, h)) 32519880Swollman return (0); 32618316Swollman bid += 48; /* Checksum is usually 6 octal digits. */ 32719880Swollman 32819880Swollman header = (const struct archive_entry_header_ustar *)h; 32918316Swollman 33019880Swollman /* Recognize POSIX formats. */ 33118316Swollman if ((memcmp(header->magic, "ustar\0", 6) == 0) 33246303Smarkm && (memcmp(header->version, "00", 2) == 0)) 33346303Smarkm bid += 56; 33446303Smarkm 33546303Smarkm /* Recognize GNU tar format. */ 33646303Smarkm if ((memcmp(header->magic, "ustar ", 6) == 0) 33746303Smarkm && (memcmp(header->version, " \0", 2) == 0)) 33846303Smarkm bid += 56; 33946303Smarkm 34046303Smarkm /* Type flag must be null, digit or A-Z, a-z. */ 34146303Smarkm if (header->typeflag[0] != 0 && 34246303Smarkm !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && 34346303Smarkm !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && 34446303Smarkm !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) 34546303Smarkm return (0); 346126250Sbms bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ 347126250Sbms 348126250Sbms /* Sanity check: Look at first byte of mode field. */ 349126250Sbms switch (255 & (unsigned)header->mode[0]) { 350126250Sbms case 0: case 255: 351126250Sbms /* Base-256 value: No further verification possible! */ 352126250Sbms break; 353126250Sbms case ' ': /* Not recommended, but not illegal, either. */ 35446303Smarkm break; 35546303Smarkm case '0': case '1': case '2': case '3': 35646303Smarkm case '4': case '5': case '6': case '7': 35746303Smarkm /* Octal Value. */ 35846303Smarkm /* TODO: Check format of remainder of this field. */ 35946303Smarkm break; 36046303Smarkm default: 36146303Smarkm /* Not a valid mode; bail out here. */ 36246303Smarkm return (0); 36346303Smarkm } 36446303Smarkm /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ 36518316Swollman 36646303Smarkm return (bid); 36719880Swollman} 36819880Swollman 36918316Swollmanstatic int 37018316Swollmanarchive_read_format_tar_options(struct archive_read *a, 37118316Swollman const char *key, const char *val) 37219880Swollman{ 37319880Swollman struct tar *tar; 37419880Swollman int ret = ARCHIVE_FAILED; 37519880Swollman 37618316Swollman tar = (struct tar *)(a->format->data); 37719880Swollman if (strcmp(key, "compat-2x") == 0) { 37846303Smarkm /* Handle UTF-8 filnames as libarchive 2.x */ 37946303Smarkm tar->compat_2x = (val != NULL && val[0] != 0); 38019880Swollman tar->init_default_conversion = tar->compat_2x; 38119880Swollman return (ARCHIVE_OK); 38219880Swollman } else if (strcmp(key, "hdrcharset") == 0) { 38318316Swollman if (val == NULL || val[0] == 0) 38418316Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 38518316Swollman "tar: hdrcharset option needs a character-set name"); 38619880Swollman else { 38719880Swollman tar->opt_sconv = 38818316Swollman archive_string_conversion_from_charset( 38918316Swollman &a->archive, val, 0); 39019880Swollman if (tar->opt_sconv != NULL) 39119880Swollman ret = ARCHIVE_OK; 39219880Swollman else 39319880Swollman ret = ARCHIVE_FATAL; 39418316Swollman } 39518316Swollman return (ret); 39618316Swollman } else if (strcmp(key, "mac-ext") == 0) { 39719880Swollman tar->process_mac_extensions = (val != NULL && val[0] != 0); 39818316Swollman return (ARCHIVE_OK); 39918316Swollman } else if (strcmp(key, "read_concatenated_archives") == 0) { 40018316Swollman tar->read_concatenated_archives = (val != NULL && val[0] != 0); 40119880Swollman return (ARCHIVE_OK); 40218316Swollman } 40319880Swollman 40418316Swollman /* Note: The "warn" return is just to inform the options 40518316Swollman * supervisor that we didn't handle it. It will generate 40618316Swollman * a suitable error if no one used this option. */ 40718316Swollman return (ARCHIVE_WARN); 40819880Swollman} 40919880Swollman 41018316Swollman/* utility function- this exists to centralize the logic of tracking 41119880Swollman * how much unconsumed data we have floating around, and to consume 41219880Swollman * anything outstanding since we're going to do read_aheads 41318316Swollman */ 41419880Swollmanstatic void 41519880Swollmantar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) 41619880Swollman{ 41719880Swollman if (*unconsumed) { 41819880Swollman/* 419126250Sbms void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); 420126250Sbms * this block of code is to poison claimed unconsumed space, ensuring 421126250Sbms * things break if it is in use still. 422126250Sbms * currently it WILL break things, so enable it only for debugging this issue 423126250Sbms if (data) { 424126250Sbms memset(data, 0xff, *unconsumed); 425126250Sbms } 426126250Sbms*/ 427126250Sbms __archive_read_consume(a, *unconsumed); 42819880Swollman *unconsumed = 0; 42919880Swollman } 43019880Swollman} 43119880Swollman 43219880Swollman/* 43318316Swollman * The function invoked by archive_read_next_header(). This 43419880Swollman * just sets up a few things and then calls the internal 43519880Swollman * tar_read_header() function below. 43619880Swollman */ 43719880Swollmanstatic int 43818316Swollmanarchive_read_format_tar_read_header(struct archive_read *a, 43918316Swollman struct archive_entry *entry) 44090868Smike{ 44119880Swollman /* 44219880Swollman * When converting tar archives to cpio archives, it is 44319880Swollman * essential that each distinct file have a distinct inode 44419880Swollman * number. To simplify this, we keep a static count here to 44519880Swollman * assign fake dev/inode numbers to each tar entry. Note that 44619880Swollman * pax format archives may overwrite this with something more 44719880Swollman * useful. 44819880Swollman * 44918316Swollman * Ideally, we would track every file read from the archive so 45020339Swollman * that we could assign the same dev/ino pair to hardlinks, 45119880Swollman * but the memory required to store a complete lookup table is 45219880Swollman * probably not worthwhile just to support the relatively 45318316Swollman * obscure tar->cpio conversion case. 45418316Swollman */ 45519880Swollman static int default_inode; 45619880Swollman static int default_dev; 45719880Swollman struct tar *tar; 45818316Swollman const char *p; 45919880Swollman const wchar_t *wp; 46019880Swollman int r; 46119880Swollman size_t l, unconsumed = 0; 46219880Swollman 46319880Swollman /* Assign default device/inode values. */ 46419880Swollman archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ 46519880Swollman archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ 46619880Swollman /* Limit generated st_ino number to 16 bits. */ 46719880Swollman if (default_inode >= 0xffff) { 46818316Swollman ++default_dev; 46918316Swollman default_inode = 0; 47018316Swollman } 47118316Swollman 47218316Swollman tar = (struct tar *)(a->format->data); 47346303Smarkm tar->entry_offset = 0; 47446303Smarkm gnu_clear_sparse_list(tar); 47546303Smarkm tar->realsize = -1; /* Mark this as "unset" */ 47646303Smarkm 47746303Smarkm /* Setup default string conversion. */ 47846303Smarkm tar->sconv = tar->opt_sconv; 47946303Smarkm if (tar->sconv == NULL) { 48018316Swollman if (!tar->init_default_conversion) { 48118316Swollman tar->sconv_default = 48218316Swollman archive_string_default_conversion_for_read(&(a->archive)); 48318316Swollman tar->init_default_conversion = 1; 48418316Swollman } 48518316Swollman tar->sconv = tar->sconv_default; 48618316Swollman } 48718316Swollman 48818316Swollman r = tar_read_header(a, tar, entry, &unconsumed); 48918316Swollman 49018316Swollman tar_flush_unconsumed(a, &unconsumed); 49119880Swollman 49220339Swollman /* 49320339Swollman * "non-sparse" files are really just sparse files with 49418316Swollman * a single block. 49546303Smarkm */ 49618316Swollman if (tar->sparse_list == NULL) { 49718316Swollman if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) 49818316Swollman != ARCHIVE_OK) 49918316Swollman return (ARCHIVE_FATAL); 50018316Swollman } else { 50119880Swollman struct sparse_block *sb; 50219880Swollman 50319880Swollman for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { 50419880Swollman if (!sb->hole) 50518316Swollman archive_entry_sparse_add_entry(entry, 50618316Swollman sb->offset, sb->remaining); 50718316Swollman } 50818316Swollman } 50919880Swollman 51019880Swollman if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { 511126250Sbms /* 512126250Sbms * "Regular" entry with trailing '/' is really 51318316Swollman * directory: This is needed for certain old tar 51418316Swollman * variants and even for some broken newer ones. 51518316Swollman */ 51618316Swollman if ((wp = archive_entry_pathname_w(entry)) != NULL) { 51719880Swollman l = wcslen(wp); 51818316Swollman if (l > 0 && wp[l - 1] == L'/') { 51918316Swollman archive_entry_set_filetype(entry, AE_IFDIR); 52018316Swollman } 52118316Swollman } else if ((p = archive_entry_pathname(entry)) != NULL) { 52218316Swollman l = strlen(p); 52318316Swollman if (l > 0 && p[l - 1] == '/') { 52418316Swollman archive_entry_set_filetype(entry, AE_IFDIR); 52518316Swollman } 52619880Swollman } 52719880Swollman } 52819880Swollman return (r); 52919880Swollman} 53019880Swollman 53118316Swollmanstatic int 53218316Swollmanarchive_read_format_tar_read_data(struct archive_read *a, 53319880Swollman const void **buff, size_t *size, int64_t *offset) 53419880Swollman{ 53518316Swollman ssize_t bytes_read; 53618316Swollman struct tar *tar; 53718316Swollman struct sparse_block *p; 53819880Swollman 53919880Swollman tar = (struct tar *)(a->format->data); 54018316Swollman 54118316Swollman for (;;) { 54219880Swollman /* Remove exhausted entries from sparse list. */ 54319880Swollman while (tar->sparse_list != NULL && 54419880Swollman tar->sparse_list->remaining == 0) { 54519880Swollman p = tar->sparse_list; 54619880Swollman tar->sparse_list = p->next; 54718316Swollman free(p); 54818316Swollman } 54919880Swollman 55019880Swollman if (tar->entry_bytes_unconsumed) { 55119880Swollman __archive_read_consume(a, tar->entry_bytes_unconsumed); 55219880Swollman tar->entry_bytes_unconsumed = 0; 55318316Swollman } 55418316Swollman 55518316Swollman /* If we're at end of file, return EOF. */ 55619880Swollman if (tar->sparse_list == NULL || 55719880Swollman tar->entry_bytes_remaining == 0) { 55819880Swollman if (__archive_read_consume(a, tar->entry_padding) < 0) 55919880Swollman return (ARCHIVE_FATAL); 56019880Swollman tar->entry_padding = 0; 56119880Swollman *buff = NULL; 56218316Swollman *size = 0; 56318316Swollman *offset = tar->realsize; 56418316Swollman return (ARCHIVE_EOF); 56518316Swollman } 56619880Swollman 56718316Swollman *buff = __archive_read_ahead(a, 1, &bytes_read); 56818316Swollman if (bytes_read < 0) 56918316Swollman return (ARCHIVE_FATAL); 57018316Swollman if (*buff == NULL) { 57118316Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 57218316Swollman "Truncated tar archive"); 57318316Swollman return (ARCHIVE_FATAL); 57446303Smarkm } 57518316Swollman if (bytes_read > tar->entry_bytes_remaining) 57618316Swollman bytes_read = (ssize_t)tar->entry_bytes_remaining; 57718316Swollman /* Don't read more than is available in the 57818316Swollman * current sparse block. */ 57919880Swollman if (tar->sparse_list->remaining < bytes_read) 58020339Swollman bytes_read = (ssize_t)tar->sparse_list->remaining; 58120339Swollman *size = bytes_read; 58219880Swollman *offset = tar->sparse_list->offset; 58319880Swollman tar->sparse_list->remaining -= bytes_read; 58446303Smarkm tar->sparse_list->offset += bytes_read; 58519880Swollman tar->entry_bytes_remaining -= bytes_read; 58619880Swollman tar->entry_bytes_unconsumed = bytes_read; 58719880Swollman 58819880Swollman if (!tar->sparse_list->hole) 58919880Swollman return (ARCHIVE_OK); 59019880Swollman /* Current is hole data and skip this. */ 59119880Swollman } 59219880Swollman} 59319880Swollman 59419880Swollmanstatic int 59519880Swollmanarchive_read_format_tar_skip(struct archive_read *a) 59618316Swollman{ 59720339Swollman int64_t bytes_skipped; 59820339Swollman int64_t request; 59918316Swollman struct sparse_block *p; 60020339Swollman struct tar* tar; 60120339Swollman 60220339Swollman tar = (struct tar *)(a->format->data); 60320339Swollman 60418316Swollman /* Do not consume the hole of a sparse file. */ 60519880Swollman request = 0; 60618316Swollman for (p = tar->sparse_list; p != NULL; p = p->next) { 60718316Swollman if (!p->hole) { 60818316Swollman if (p->remaining >= INT64_MAX - request) { 60990868Smike return ARCHIVE_FATAL; 61018316Swollman } 61118316Swollman request += p->remaining; 61218316Swollman } 61318316Swollman } 61419880Swollman if (request > tar->entry_bytes_remaining) 61519880Swollman request = tar->entry_bytes_remaining; 61619880Swollman request += tar->entry_padding + tar->entry_bytes_unconsumed; 61719880Swollman 61819880Swollman bytes_skipped = __archive_read_consume(a, request); 61919880Swollman if (bytes_skipped < 0) 62018316Swollman return (ARCHIVE_FATAL); 62118316Swollman 62218316Swollman tar->entry_bytes_remaining = 0; 62319880Swollman tar->entry_bytes_unconsumed = 0; 62419880Swollman tar->entry_padding = 0; 62519880Swollman 62619880Swollman /* Free the sparse list. */ 62718316Swollman gnu_clear_sparse_list(tar); 62818316Swollman 62918316Swollman return (ARCHIVE_OK); 63018316Swollman} 63119880Swollman 63219880Swollman/* 63319880Swollman * This function recursively interprets all of the headers associated 63419880Swollman * with a single entry. 63519880Swollman */ 63619880Swollmanstatic int 63718316Swollmantar_read_header(struct archive_read *a, struct tar *tar, 63818316Swollman struct archive_entry *entry, size_t *unconsumed) 63918316Swollman{ 64018316Swollman ssize_t bytes; 64118316Swollman int err; 64219880Swollman const char *h; 64318316Swollman const struct archive_entry_header_ustar *header; 64446303Smarkm const struct archive_entry_header_gnutar *gnuheader; 64518316Swollman 64618316Swollman /* Loop until we find a workable header record. */ 64718316Swollman for (;;) { 64818316Swollman tar_flush_unconsumed(a, unconsumed); 64918316Swollman 65018316Swollman /* Read 512-byte header record */ 65118316Swollman h = __archive_read_ahead(a, 512, &bytes); 65218316Swollman if (bytes < 0) 65319880Swollman return ((int)bytes); 65419880Swollman if (bytes == 0) { /* EOF at a block boundary. */ 65519880Swollman /* Some writers do omit the block of nulls. <sigh> */ 65619880Swollman return (ARCHIVE_EOF); 65719880Swollman } 65819880Swollman if (bytes < 512) { /* Short block at EOF; this is bad. */ 65919880Swollman archive_set_error(&a->archive, 66018316Swollman ARCHIVE_ERRNO_FILE_FORMAT, 66118316Swollman "Truncated tar archive"); 66218316Swollman return (ARCHIVE_FATAL); 66318316Swollman } 66418316Swollman *unconsumed = 512; 66518316Swollman 66618316Swollman /* Header is workable if it's not an end-of-archive mark. */ 66718316Swollman if (h[0] != 0 || !archive_block_is_null(h)) 66819880Swollman break; 66919880Swollman 67046303Smarkm /* Ensure format is set for archives with only null blocks. */ 67119880Swollman if (a->archive.archive_format_name == NULL) { 67246303Smarkm a->archive.archive_format = ARCHIVE_FORMAT_TAR; 67319880Swollman a->archive.archive_format_name = "tar"; 67418316Swollman } 67518316Swollman 67618316Swollman if (!tar->read_concatenated_archives) { 67718316Swollman /* Try to consume a second all-null record, as well. */ 67818316Swollman tar_flush_unconsumed(a, unconsumed); 67918316Swollman h = __archive_read_ahead(a, 512, NULL); 68018316Swollman if (h != NULL && h[0] == 0 && archive_block_is_null(h)) 681126250Sbms __archive_read_consume(a, 512); 682126250Sbms archive_clear_error(&a->archive); 68318316Swollman return (ARCHIVE_EOF); 68418316Swollman } 68518316Swollman 68646303Smarkm /* 68746303Smarkm * We're reading concatenated archives, ignore this block and 68846303Smarkm * loop to get the next. 68946303Smarkm */ 69046303Smarkm } 69146303Smarkm 69246303Smarkm /* 69346303Smarkm * Note: If the checksum fails and we return ARCHIVE_RETRY, 69446303Smarkm * then the client is likely to just retry. This is a very 69546303Smarkm * crude way to search for the next valid header! 69646303Smarkm * 69746303Smarkm * TODO: Improve this by implementing a real header scan. 69846303Smarkm */ 69946303Smarkm if (!checksum(a, h)) { 70018316Swollman tar_flush_unconsumed(a, unconsumed); 70118316Swollman archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 70218316Swollman return (ARCHIVE_RETRY); /* Retryable: Invalid header */ 70318316Swollman } 70418316Swollman 70518316Swollman if (++tar->header_recursion_depth > 32) { 70618316Swollman tar_flush_unconsumed(a, unconsumed); 70718316Swollman archive_set_error(&a->archive, EINVAL, "Too many special headers"); 70846303Smarkm return (ARCHIVE_WARN); 70918316Swollman } 71018316Swollman 71118316Swollman /* Determine the format variant. */ 71218316Swollman header = (const struct archive_entry_header_ustar *)h; 71318316Swollman 71418316Swollman switch(header->typeflag[0]) { 71518316Swollman case 'A': /* Solaris tar ACL */ 71618316Swollman a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 71718316Swollman a->archive.archive_format_name = "Solaris tar"; 71818316Swollman err = header_Solaris_ACL(a, tar, entry, h, unconsumed); 71918316Swollman break; 72018316Swollman case 'g': /* POSIX-standard 'g' header. */ 72118316Swollman a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 72218316Swollman a->archive.archive_format_name = "POSIX pax interchange format"; 72318316Swollman err = header_pax_global(a, tar, entry, h, unconsumed); 72418316Swollman if (err == ARCHIVE_EOF) 72518316Swollman return (err); 72618316Swollman break; 72718316Swollman case 'K': /* Long link name (GNU tar, others) */ 72818316Swollman err = header_longlink(a, tar, entry, h, unconsumed); 72918316Swollman break; 73018316Swollman case 'L': /* Long filename (GNU tar, others) */ 73118316Swollman err = header_longname(a, tar, entry, h, unconsumed); 73218316Swollman break; 73319880Swollman case 'V': /* GNU volume header */ 73419880Swollman err = header_volume(a, tar, entry, h, unconsumed); 73518316Swollman break; 73618316Swollman case 'X': /* Used by SUN tar; same as 'x'. */ 73718316Swollman a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 73818316Swollman a->archive.archive_format_name = 73918316Swollman "POSIX pax interchange format (Sun variant)"; 74018316Swollman err = header_pax_extensions(a, tar, entry, h, unconsumed); 74146303Smarkm break; 74246303Smarkm case 'x': /* POSIX-standard 'x' header. */ 74346303Smarkm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 74446303Smarkm a->archive.archive_format_name = "POSIX pax interchange format"; 74546303Smarkm err = header_pax_extensions(a, tar, entry, h, unconsumed); 74646303Smarkm break; 74718316Swollman default: 74818316Swollman gnuheader = (const struct archive_entry_header_gnutar *)h; 74918316Swollman if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { 75018316Swollman a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; 75118316Swollman a->archive.archive_format_name = "GNU tar format"; 75218316Swollman err = header_gnutar(a, tar, entry, h, unconsumed); 75318316Swollman } else if (memcmp(header->magic, "ustar", 5) == 0) { 75418316Swollman if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 75546303Smarkm a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; 75646303Smarkm a->archive.archive_format_name = "POSIX ustar format"; 75746303Smarkm } 75846303Smarkm err = header_ustar(a, tar, entry, h); 75946303Smarkm } else { 76046303Smarkm a->archive.archive_format = ARCHIVE_FORMAT_TAR; 76146303Smarkm a->archive.archive_format_name = "tar (non-POSIX)"; 76246303Smarkm err = header_old_tar(a, tar, entry, h); 76318316Swollman } 76446303Smarkm } 76546303Smarkm if (err == ARCHIVE_FATAL) 76618316Swollman return (err); 76718316Swollman 76818316Swollman tar_flush_unconsumed(a, unconsumed); 76919880Swollman 77018316Swollman h = NULL; 77118316Swollman header = NULL; 77219880Swollman 77318316Swollman --tar->header_recursion_depth; 77418316Swollman /* Yuck. Apple's design here ends up storing long pathname 77518316Swollman * extensions for both the AppleDouble extension entry and the 77618316Swollman * regular entry. 77718316Swollman */ 77818316Swollman if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && 77946303Smarkm tar->header_recursion_depth == 0 && 78018316Swollman tar->process_mac_extensions) { 78146303Smarkm int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); 78218316Swollman if (err2 < err) 78318316Swollman err = err2; 78418316Swollman } 78518316Swollman 78618316Swollman /* We return warnings or success as-is. Anything else is fatal. */ 78718316Swollman if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { 78818316Swollman if (tar->sparse_gnu_pending) { 78918316Swollman if (tar->sparse_gnu_major == 1 && 79018316Swollman tar->sparse_gnu_minor == 0) { 79118316Swollman ssize_t bytes_read; 79218316Swollman 79318316Swollman tar->sparse_gnu_pending = 0; 79418316Swollman /* Read initial sparse map. */ 79518316Swollman bytes_read = gnu_sparse_10_read(a, tar, unconsumed); 79618316Swollman tar->entry_bytes_remaining -= bytes_read; 79718316Swollman if (bytes_read < 0) 79819880Swollman return ((int)bytes_read); 79919880Swollman } else { 80018316Swollman archive_set_error(&a->archive, 80118316Swollman ARCHIVE_ERRNO_MISC, 80218316Swollman "Unrecognized GNU sparse file format"); 80318316Swollman return (ARCHIVE_WARN); 80418316Swollman } 80518316Swollman tar->sparse_gnu_pending = 0; 80618316Swollman } 80718316Swollman return (err); 80818316Swollman } 80918316Swollman if (err == ARCHIVE_EOF) 81018316Swollman /* EOF when recursively reading a header is bad. */ 81146303Smarkm archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 81218316Swollman return (ARCHIVE_FATAL); 81318316Swollman} 81418316Swollman 81518316Swollman/* 81618316Swollman * Return true if block checksum is correct. 81718316Swollman */ 81818316Swollmanstatic int 81918316Swollmanchecksum(struct archive_read *a, const void *h) 82018316Swollman{ 82118316Swollman const unsigned char *bytes; 82218316Swollman const struct archive_entry_header_ustar *header; 82346303Smarkm int check, sum; 82418316Swollman size_t i; 82518316Swollman 82618316Swollman (void)a; /* UNUSED */ 82718316Swollman bytes = (const unsigned char *)h; 82818316Swollman header = (const struct archive_entry_header_ustar *)h; 82918316Swollman 83018316Swollman /* Checksum field must hold an octal number */ 83118316Swollman for (i = 0; i < sizeof(header->checksum); ++i) { 83218316Swollman char c = header->checksum[i]; 83318316Swollman if (c != ' ' && c != '\0' && (c < '0' || c > '7')) 83418316Swollman return 0; 83518316Swollman } 83618316Swollman 83718316Swollman /* 83818316Swollman * Test the checksum. Note that POSIX specifies _unsigned_ 83918316Swollman * bytes for this calculation. 84018316Swollman */ 84118316Swollman sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); 84218316Swollman check = 0; 84318316Swollman for (i = 0; i < 148; i++) 84418316Swollman check += (unsigned char)bytes[i]; 84518316Swollman for (; i < 156; i++) 84618316Swollman check += 32; 84718316Swollman for (; i < 512; i++) 84846303Smarkm check += (unsigned char)bytes[i]; 84918316Swollman if (sum == check) 85018316Swollman return (1); 85118316Swollman 85218316Swollman /* 85318316Swollman * Repeat test with _signed_ bytes, just in case this archive 85418316Swollman * was created by an old BSD, Solaris, or HP-UX tar with a 85518316Swollman * broken checksum calculation. 85618316Swollman */ 85718316Swollman check = 0; 85846303Smarkm for (i = 0; i < 148; i++) 85918316Swollman check += (signed char)bytes[i]; 86018316Swollman for (; i < 156; i++) 86146303Smarkm check += 32; 86246303Smarkm for (; i < 512; i++) 86346303Smarkm check += (signed char)bytes[i]; 86446303Smarkm if (sum == check) 86546303Smarkm return (1); 86646303Smarkm 86746303Smarkm return (0); 86846303Smarkm} 86946303Smarkm 87046303Smarkm/* 87118316Swollman * Return true if this block contains only nulls. 87246303Smarkm */ 87318316Swollmanstatic int 87446303Smarkmarchive_block_is_null(const char *p) 87546303Smarkm{ 87646303Smarkm unsigned i; 87718316Swollman 87818316Swollman for (i = 0; i < 512; i++) 87918316Swollman if (*p++) 88018316Swollman return (0); 88118316Swollman return (1); 88246303Smarkm} 88318316Swollman 88418316Swollman/* 88546303Smarkm * Interpret 'A' Solaris ACL header 88618316Swollman */ 88718316Swollmanstatic int 88818316Swollmanheader_Solaris_ACL(struct archive_read *a, struct tar *tar, 88918316Swollman struct archive_entry *entry, const void *h, size_t *unconsumed) 89018316Swollman{ 89118316Swollman const struct archive_entry_header_ustar *header; 89218316Swollman size_t size; 89346303Smarkm int err; 89446303Smarkm int64_t type; 89546303Smarkm char *acl, *p; 89646303Smarkm 89746303Smarkm /* 89818316Swollman * read_body_to_string adds a NUL terminator, but we need a little 89918316Swollman * more to make sure that we don't overrun acl_text later. 90046303Smarkm */ 90146303Smarkm header = (const struct archive_entry_header_ustar *)h; 90246303Smarkm size = (size_t)tar_atol(header->size, sizeof(header->size)); 90346303Smarkm err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); 90446303Smarkm if (err != ARCHIVE_OK) 90546303Smarkm return (err); 90618316Swollman 90718316Swollman /* Recursively read next header */ 90818316Swollman err = tar_read_header(a, tar, entry, unconsumed); 90918316Swollman if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 91018316Swollman return (err); 91118316Swollman 91218316Swollman /* TODO: Examine the first characters to see if this 91318316Swollman * is an AIX ACL descriptor. We'll likely never support 91418316Swollman * them, but it would be polite to recognize and warn when 91518316Swollman * we do see them. */ 91618316Swollman 91746303Smarkm /* Leading octal number indicates ACL type and number of entries. */ 91818316Swollman p = acl = tar->acl_text.s; 91918316Swollman type = 0; 92018316Swollman while (*p != '\0' && p < acl + size) { 92118316Swollman if (*p < '0' || *p > '7') { 92218316Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 92318316Swollman "Malformed Solaris ACL attribute (invalid digit)"); 92418316Swollman return(ARCHIVE_WARN); 92546303Smarkm } 92618316Swollman type <<= 3; 92718316Swollman type += *p - '0'; 92818316Swollman if (type > 077777777) { 92946303Smarkm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 93046303Smarkm "Malformed Solaris ACL attribute (count too large)"); 93118316Swollman return (ARCHIVE_WARN); 93218316Swollman } 93318316Swollman p++; 93418316Swollman } 93519880Swollman switch ((int)type & ~0777777) { 93619880Swollman case 01000000: 93719880Swollman /* POSIX.1e ACL */ 93819880Swollman break; 93919880Swollman case 03000000: 94019880Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 94119880Swollman "Solaris NFSv4 ACLs not supported"); 94219880Swollman return (ARCHIVE_WARN); 94319880Swollman default: 94419880Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 94519880Swollman "Malformed Solaris ACL attribute (unsupported type %o)", 94620339Swollman (int)type); 94719880Swollman return (ARCHIVE_WARN); 94819880Swollman } 94946303Smarkm p++; 95019880Swollman 951190718Sphk if (p >= acl + size) { 95219880Swollman archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 95319880Swollman "Malformed Solaris ACL attribute (body overflow)"); 95419880Swollman return(ARCHIVE_WARN); 95519880Swollman } 95619880Swollman 95719880Swollman /* ACL text is null-terminated; find the end. */ 95820339Swollman size -= (p - acl); 95920339Swollman acl = p; 96020339Swollman 96120339Swollman while (*p != '\0' && p < acl + size) 96220339Swollman p++; 96320339Swollman 96420339Swollman if (tar->sconv_acl == NULL) { 96519880Swollman tar->sconv_acl = archive_string_conversion_from_charset( 96620339Swollman &(a->archive), "UTF-8", 1); 96746303Smarkm if (tar->sconv_acl == NULL) 96819880Swollman return (ARCHIVE_FATAL); 96919880Swollman } 97020339Swollman archive_strncpy(&(tar->localname), acl, p - acl); 97120339Swollman err = archive_acl_parse_l(archive_entry_acl(entry), 97220339Swollman tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl); 97320339Swollman if (err != ARCHIVE_OK) { 97420339Swollman if (errno == ENOMEM) { 97519880Swollman archive_set_error(&a->archive, ENOMEM, 97646303Smarkm "Can't allocate memory for ACL"); 97746303Smarkm } else 97846303Smarkm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 97919880Swollman "Malformed Solaris ACL attribute (unparsable)"); 98046303Smarkm } 98146303Smarkm return (err); 98246303Smarkm} 98346303Smarkm 98419880Swollman/* 98519880Swollman * Interpret 'K' long linkname header. 98619880Swollman */ 98746303Smarkmstatic int 98846303Smarkmheader_longlink(struct archive_read *a, struct tar *tar, 98946303Smarkm struct archive_entry *entry, const void *h, size_t *unconsumed) 99046303Smarkm{ 99146303Smarkm int err; 99246303Smarkm 99346303Smarkm err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); 99446303Smarkm if (err != ARCHIVE_OK) 995126250Sbms return (err); 99646303Smarkm err = tar_read_header(a, tar, entry, unconsumed); 99746303Smarkm if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 99846303Smarkm return (err); 99946303Smarkm /* Set symlink if symlink already set, else hardlink. */ 1000190745Sphk archive_entry_copy_link(entry, tar->longlink.s); 100146303Smarkm return (ARCHIVE_OK); 100246303Smarkm} 100346303Smarkm 100446303Smarkmstatic int 100546303Smarkmset_conversion_failed_error(struct archive_read *a, 100646303Smarkm struct archive_string_conv *sconv, const char *name) 100746303Smarkm{ 100846303Smarkm if (errno == ENOMEM) { 100946303Smarkm archive_set_error(&a->archive, ENOMEM, 101046303Smarkm "Can't allocate memory for %s", name); 101146303Smarkm return (ARCHIVE_FATAL); 101246303Smarkm } 101346303Smarkm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 101446303Smarkm "%s can't be converted from %s to current locale.", 101546303Smarkm name, archive_string_conversion_charset_name(sconv)); 101619880Swollman return (ARCHIVE_WARN); 1017126250Sbms} 1018126250Sbms 1019126250Sbms/* 102019880Swollman * Interpret 'L' long filename header. 102146303Smarkm */ 102246303Smarkmstatic int 102319880Swollmanheader_longname(struct archive_read *a, struct tar *tar, 102419880Swollman struct archive_entry *entry, const void *h, size_t *unconsumed) 102519880Swollman{ 102619880Swollman int err; 102719880Swollman 102819880Swollman err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); 102919880Swollman if (err != ARCHIVE_OK) 103019880Swollman return (err); 1031 /* Read and parse "real" header, then override name. */ 1032 err = tar_read_header(a, tar, entry, unconsumed); 1033 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1034 return (err); 1035 if (archive_entry_copy_pathname_l(entry, tar->longname.s, 1036 archive_strlen(&(tar->longname)), tar->sconv) != 0) 1037 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1038 return (err); 1039} 1040 1041 1042/* 1043 * Interpret 'V' GNU tar volume header. 1044 */ 1045static int 1046header_volume(struct archive_read *a, struct tar *tar, 1047 struct archive_entry *entry, const void *h, size_t *unconsumed) 1048{ 1049 (void)h; 1050 1051 /* Just skip this and read the next header. */ 1052 return (tar_read_header(a, tar, entry, unconsumed)); 1053} 1054 1055/* 1056 * Read body of an archive entry into an archive_string object. 1057 */ 1058static int 1059read_body_to_string(struct archive_read *a, struct tar *tar, 1060 struct archive_string *as, const void *h, size_t *unconsumed) 1061{ 1062 int64_t size; 1063 const struct archive_entry_header_ustar *header; 1064 const void *src; 1065 1066 (void)tar; /* UNUSED */ 1067 header = (const struct archive_entry_header_ustar *)h; 1068 size = tar_atol(header->size, sizeof(header->size)); 1069 if ((size > 1048576) || (size < 0)) { 1070 archive_set_error(&a->archive, EINVAL, 1071 "Special header too large"); 1072 return (ARCHIVE_FATAL); 1073 } 1074 1075 /* Fail if we can't make our buffer big enough. */ 1076 if (archive_string_ensure(as, (size_t)size+1) == NULL) { 1077 archive_set_error(&a->archive, ENOMEM, 1078 "No memory"); 1079 return (ARCHIVE_FATAL); 1080 } 1081 1082 tar_flush_unconsumed(a, unconsumed); 1083 1084 /* Read the body into the string. */ 1085 *unconsumed = (size_t)((size + 511) & ~ 511); 1086 src = __archive_read_ahead(a, *unconsumed, NULL); 1087 if (src == NULL) { 1088 *unconsumed = 0; 1089 return (ARCHIVE_FATAL); 1090 } 1091 memcpy(as->s, src, (size_t)size); 1092 as->s[size] = '\0'; 1093 as->length = (size_t)size; 1094 return (ARCHIVE_OK); 1095} 1096 1097/* 1098 * Parse out common header elements. 1099 * 1100 * This would be the same as header_old_tar, except that the 1101 * filename is handled slightly differently for old and POSIX 1102 * entries (POSIX entries support a 'prefix'). This factoring 1103 * allows header_old_tar and header_ustar 1104 * to handle filenames differently, while still putting most of the 1105 * common parsing into one place. 1106 */ 1107static int 1108header_common(struct archive_read *a, struct tar *tar, 1109 struct archive_entry *entry, const void *h) 1110{ 1111 const struct archive_entry_header_ustar *header; 1112 char tartype; 1113 int err = ARCHIVE_OK; 1114 1115 header = (const struct archive_entry_header_ustar *)h; 1116 if (header->linkname[0]) 1117 archive_strncpy(&(tar->entry_linkpath), 1118 header->linkname, sizeof(header->linkname)); 1119 else 1120 archive_string_empty(&(tar->entry_linkpath)); 1121 1122 /* Parse out the numeric fields (all are octal) */ 1123 archive_entry_set_mode(entry, 1124 (mode_t)tar_atol(header->mode, sizeof(header->mode))); 1125 archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); 1126 archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); 1127 tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); 1128 if (tar->entry_bytes_remaining < 0) { 1129 tar->entry_bytes_remaining = 0; 1130 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1131 "Tar entry has negative size?"); 1132 err = ARCHIVE_WARN; 1133 } 1134 tar->realsize = tar->entry_bytes_remaining; 1135 archive_entry_set_size(entry, tar->entry_bytes_remaining); 1136 archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); 1137 1138 /* Handle the tar type flag appropriately. */ 1139 tartype = header->typeflag[0]; 1140 1141 switch (tartype) { 1142 case '1': /* Hard link */ 1143 if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, 1144 archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1145 err = set_conversion_failed_error(a, tar->sconv, 1146 "Linkname"); 1147 if (err == ARCHIVE_FATAL) 1148 return (err); 1149 } 1150 /* 1151 * The following may seem odd, but: Technically, tar 1152 * does not store the file type for a "hard link" 1153 * entry, only the fact that it is a hard link. So, I 1154 * leave the type zero normally. But, pax interchange 1155 * format allows hard links to have data, which 1156 * implies that the underlying entry is a regular 1157 * file. 1158 */ 1159 if (archive_entry_size(entry) > 0) 1160 archive_entry_set_filetype(entry, AE_IFREG); 1161 1162 /* 1163 * A tricky point: Traditionally, tar readers have 1164 * ignored the size field when reading hardlink 1165 * entries, and some writers put non-zero sizes even 1166 * though the body is empty. POSIX blessed this 1167 * convention in the 1988 standard, but broke with 1168 * this tradition in 2001 by permitting hardlink 1169 * entries to store valid bodies in pax interchange 1170 * format, but not in ustar format. Since there is no 1171 * hard and fast way to distinguish pax interchange 1172 * from earlier archives (the 'x' and 'g' entries are 1173 * optional, after all), we need a heuristic. 1174 */ 1175 if (archive_entry_size(entry) == 0) { 1176 /* If the size is already zero, we're done. */ 1177 } else if (a->archive.archive_format 1178 == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 1179 /* Definitely pax extended; must obey hardlink size. */ 1180 } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR 1181 || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) 1182 { 1183 /* Old-style or GNU tar: we must ignore the size. */ 1184 archive_entry_set_size(entry, 0); 1185 tar->entry_bytes_remaining = 0; 1186 } else if (archive_read_format_tar_bid(a, 50) > 50) { 1187 /* 1188 * We don't know if it's pax: If the bid 1189 * function sees a valid ustar header 1190 * immediately following, then let's ignore 1191 * the hardlink size. 1192 */ 1193 archive_entry_set_size(entry, 0); 1194 tar->entry_bytes_remaining = 0; 1195 } 1196 /* 1197 * TODO: There are still two cases I'd like to handle: 1198 * = a ustar non-pax archive with a hardlink entry at 1199 * end-of-archive. (Look for block of nulls following?) 1200 * = a pax archive that has not seen any pax headers 1201 * and has an entry which is a hardlink entry storing 1202 * a body containing an uncompressed tar archive. 1203 * The first is worth addressing; I don't see any reliable 1204 * way to deal with the second possibility. 1205 */ 1206 break; 1207 case '2': /* Symlink */ 1208 archive_entry_set_filetype(entry, AE_IFLNK); 1209 archive_entry_set_size(entry, 0); 1210 tar->entry_bytes_remaining = 0; 1211 if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, 1212 archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1213 err = set_conversion_failed_error(a, tar->sconv, 1214 "Linkname"); 1215 if (err == ARCHIVE_FATAL) 1216 return (err); 1217 } 1218 break; 1219 case '3': /* Character device */ 1220 archive_entry_set_filetype(entry, AE_IFCHR); 1221 archive_entry_set_size(entry, 0); 1222 tar->entry_bytes_remaining = 0; 1223 break; 1224 case '4': /* Block device */ 1225 archive_entry_set_filetype(entry, AE_IFBLK); 1226 archive_entry_set_size(entry, 0); 1227 tar->entry_bytes_remaining = 0; 1228 break; 1229 case '5': /* Dir */ 1230 archive_entry_set_filetype(entry, AE_IFDIR); 1231 archive_entry_set_size(entry, 0); 1232 tar->entry_bytes_remaining = 0; 1233 break; 1234 case '6': /* FIFO device */ 1235 archive_entry_set_filetype(entry, AE_IFIFO); 1236 archive_entry_set_size(entry, 0); 1237 tar->entry_bytes_remaining = 0; 1238 break; 1239 case 'D': /* GNU incremental directory type */ 1240 /* 1241 * No special handling is actually required here. 1242 * It might be nice someday to preprocess the file list and 1243 * provide it to the client, though. 1244 */ 1245 archive_entry_set_filetype(entry, AE_IFDIR); 1246 break; 1247 case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ 1248 /* 1249 * As far as I can tell, this is just like a regular file 1250 * entry, except that the contents should be _appended_ to 1251 * the indicated file at the indicated offset. This may 1252 * require some API work to fully support. 1253 */ 1254 break; 1255 case 'N': /* Old GNU "long filename" entry. */ 1256 /* The body of this entry is a script for renaming 1257 * previously-extracted entries. Ugh. It will never 1258 * be supported by libarchive. */ 1259 archive_entry_set_filetype(entry, AE_IFREG); 1260 break; 1261 case 'S': /* GNU sparse files */ 1262 /* 1263 * Sparse files are really just regular files with 1264 * sparse information in the extended area. 1265 */ 1266 /* FALLTHROUGH */ 1267 default: /* Regular file and non-standard types */ 1268 /* 1269 * Per POSIX: non-recognized types should always be 1270 * treated as regular files. 1271 */ 1272 archive_entry_set_filetype(entry, AE_IFREG); 1273 break; 1274 } 1275 return (err); 1276} 1277 1278/* 1279 * Parse out header elements for "old-style" tar archives. 1280 */ 1281static int 1282header_old_tar(struct archive_read *a, struct tar *tar, 1283 struct archive_entry *entry, const void *h) 1284{ 1285 const struct archive_entry_header_ustar *header; 1286 int err = ARCHIVE_OK, err2; 1287 1288 /* Copy filename over (to ensure null termination). */ 1289 header = (const struct archive_entry_header_ustar *)h; 1290 if (archive_entry_copy_pathname_l(entry, 1291 header->name, sizeof(header->name), tar->sconv) != 0) { 1292 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1293 if (err == ARCHIVE_FATAL) 1294 return (err); 1295 } 1296 1297 /* Grab rest of common fields */ 1298 err2 = header_common(a, tar, entry, h); 1299 if (err > err2) 1300 err = err2; 1301 1302 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1303 return (err); 1304} 1305 1306/* 1307 * Read a Mac AppleDouble-encoded blob of file metadata, 1308 * if there is one. 1309 */ 1310static int 1311read_mac_metadata_blob(struct archive_read *a, struct tar *tar, 1312 struct archive_entry *entry, const void *h, size_t *unconsumed) 1313{ 1314 int64_t size; 1315 const void *data; 1316 const char *p, *name; 1317 const wchar_t *wp, *wname; 1318 1319 (void)h; /* UNUSED */ 1320 1321 wname = wp = archive_entry_pathname_w(entry); 1322 if (wp != NULL) { 1323 /* Find the last path element. */ 1324 for (; *wp != L'\0'; ++wp) { 1325 if (wp[0] == '/' && wp[1] != L'\0') 1326 wname = wp + 1; 1327 } 1328 /* 1329 * If last path element starts with "._", then 1330 * this is a Mac extension. 1331 */ 1332 if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') 1333 return ARCHIVE_OK; 1334 } else { 1335 /* Find the last path element. */ 1336 name = p = archive_entry_pathname(entry); 1337 if (p == NULL) 1338 return (ARCHIVE_FAILED); 1339 for (; *p != '\0'; ++p) { 1340 if (p[0] == '/' && p[1] != '\0') 1341 name = p + 1; 1342 } 1343 /* 1344 * If last path element starts with "._", then 1345 * this is a Mac extension. 1346 */ 1347 if (name[0] != '.' || name[1] != '_' || name[2] == '\0') 1348 return ARCHIVE_OK; 1349 } 1350 1351 /* Read the body as a Mac OS metadata blob. */ 1352 size = archive_entry_size(entry); 1353 1354 /* 1355 * TODO: Look beyond the body here to peek at the next header. 1356 * If it's a regular header (not an extension header) 1357 * that has the wrong name, just return the current 1358 * entry as-is, without consuming the body here. 1359 * That would reduce the risk of us mis-identifying 1360 * an ordinary file that just happened to have 1361 * a name starting with "._". 1362 * 1363 * Q: Is the above idea really possible? Even 1364 * when there are GNU or pax extension entries? 1365 */ 1366 data = __archive_read_ahead(a, (size_t)size, NULL); 1367 if (data == NULL) { 1368 *unconsumed = 0; 1369 return (ARCHIVE_FATAL); 1370 } 1371 archive_entry_copy_mac_metadata(entry, data, (size_t)size); 1372 *unconsumed = (size_t)((size + 511) & ~ 511); 1373 tar_flush_unconsumed(a, unconsumed); 1374 return (tar_read_header(a, tar, entry, unconsumed)); 1375} 1376 1377/* 1378 * Parse a file header for a pax extended archive entry. 1379 */ 1380static int 1381header_pax_global(struct archive_read *a, struct tar *tar, 1382 struct archive_entry *entry, const void *h, size_t *unconsumed) 1383{ 1384 int err; 1385 1386 err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); 1387 if (err != ARCHIVE_OK) 1388 return (err); 1389 err = tar_read_header(a, tar, entry, unconsumed); 1390 return (err); 1391} 1392 1393static int 1394header_pax_extensions(struct archive_read *a, struct tar *tar, 1395 struct archive_entry *entry, const void *h, size_t *unconsumed) 1396{ 1397 int err, err2; 1398 1399 err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); 1400 if (err != ARCHIVE_OK) 1401 return (err); 1402 1403 /* Parse the next header. */ 1404 err = tar_read_header(a, tar, entry, unconsumed); 1405 if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1406 return (err); 1407 1408 /* 1409 * TODO: Parse global/default options into 'entry' struct here 1410 * before handling file-specific options. 1411 * 1412 * This design (parse standard header, then overwrite with pax 1413 * extended attribute data) usually works well, but isn't ideal; 1414 * it would be better to parse the pax extended attributes first 1415 * and then skip any fields in the standard header that were 1416 * defined in the pax header. 1417 */ 1418 err2 = pax_header(a, tar, entry, tar->pax_header.s); 1419 err = err_combine(err, err2); 1420 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1421 return (err); 1422} 1423 1424 1425/* 1426 * Parse a file header for a Posix "ustar" archive entry. This also 1427 * handles "pax" or "extended ustar" entries. 1428 */ 1429static int 1430header_ustar(struct archive_read *a, struct tar *tar, 1431 struct archive_entry *entry, const void *h) 1432{ 1433 const struct archive_entry_header_ustar *header; 1434 struct archive_string *as; 1435 int err = ARCHIVE_OK, r; 1436 1437 header = (const struct archive_entry_header_ustar *)h; 1438 1439 /* Copy name into an internal buffer to ensure null-termination. */ 1440 as = &(tar->entry_pathname); 1441 if (header->prefix[0]) { 1442 archive_strncpy(as, header->prefix, sizeof(header->prefix)); 1443 if (as->s[archive_strlen(as) - 1] != '/') 1444 archive_strappend_char(as, '/'); 1445 archive_strncat(as, header->name, sizeof(header->name)); 1446 } else { 1447 archive_strncpy(as, header->name, sizeof(header->name)); 1448 } 1449 if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), 1450 tar->sconv) != 0) { 1451 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1452 if (err == ARCHIVE_FATAL) 1453 return (err); 1454 } 1455 1456 /* Handle rest of common fields. */ 1457 r = header_common(a, tar, entry, h); 1458 if (r == ARCHIVE_FATAL) 1459 return (r); 1460 if (r < err) 1461 err = r; 1462 1463 /* Handle POSIX ustar fields. */ 1464 if (archive_entry_copy_uname_l(entry, 1465 header->uname, sizeof(header->uname), tar->sconv) != 0) { 1466 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 1467 if (err == ARCHIVE_FATAL) 1468 return (err); 1469 } 1470 1471 if (archive_entry_copy_gname_l(entry, 1472 header->gname, sizeof(header->gname), tar->sconv) != 0) { 1473 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 1474 if (err == ARCHIVE_FATAL) 1475 return (err); 1476 } 1477 1478 /* Parse out device numbers only for char and block specials. */ 1479 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 1480 archive_entry_set_rdevmajor(entry, (dev_t) 1481 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 1482 archive_entry_set_rdevminor(entry, (dev_t) 1483 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 1484 } 1485 1486 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1487 1488 return (err); 1489} 1490 1491 1492/* 1493 * Parse the pax extended attributes record. 1494 * 1495 * Returns non-zero if there's an error in the data. 1496 */ 1497static int 1498pax_header(struct archive_read *a, struct tar *tar, 1499 struct archive_entry *entry, char *attr) 1500{ 1501 size_t attr_length, l, line_length; 1502 char *p; 1503 char *key, *value; 1504 struct archive_string *as; 1505 struct archive_string_conv *sconv; 1506 int err, err2; 1507 1508 attr_length = strlen(attr); 1509 tar->pax_hdrcharset_binary = 0; 1510 archive_string_empty(&(tar->entry_gname)); 1511 archive_string_empty(&(tar->entry_linkpath)); 1512 archive_string_empty(&(tar->entry_pathname)); 1513 archive_string_empty(&(tar->entry_pathname_override)); 1514 archive_string_empty(&(tar->entry_uname)); 1515 err = ARCHIVE_OK; 1516 while (attr_length > 0) { 1517 /* Parse decimal length field at start of line. */ 1518 line_length = 0; 1519 l = attr_length; 1520 p = attr; /* Record start of line. */ 1521 while (l>0) { 1522 if (*p == ' ') { 1523 p++; 1524 l--; 1525 break; 1526 } 1527 if (*p < '0' || *p > '9') { 1528 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1529 "Ignoring malformed pax extended attributes"); 1530 return (ARCHIVE_WARN); 1531 } 1532 line_length *= 10; 1533 line_length += *p - '0'; 1534 if (line_length > 999999) { 1535 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1536 "Rejecting pax extended attribute > 1MB"); 1537 return (ARCHIVE_WARN); 1538 } 1539 p++; 1540 l--; 1541 } 1542 1543 /* 1544 * Parsed length must be no bigger than available data, 1545 * at least 1, and the last character of the line must 1546 * be '\n'. 1547 */ 1548 if (line_length > attr_length 1549 || line_length < 1 1550 || attr[line_length - 1] != '\n') 1551 { 1552 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1553 "Ignoring malformed pax extended attribute"); 1554 return (ARCHIVE_WARN); 1555 } 1556 1557 /* Null-terminate the line. */ 1558 attr[line_length - 1] = '\0'; 1559 1560 /* Find end of key and null terminate it. */ 1561 key = p; 1562 if (key[0] == '=') 1563 return (-1); 1564 while (*p && *p != '=') 1565 ++p; 1566 if (*p == '\0') { 1567 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1568 "Invalid pax extended attributes"); 1569 return (ARCHIVE_WARN); 1570 } 1571 *p = '\0'; 1572 1573 /* Identify null-terminated 'value' portion. */ 1574 value = p + 1; 1575 1576 /* Identify this attribute and set it in the entry. */ 1577 err2 = pax_attribute(a, tar, entry, key, value); 1578 if (err2 == ARCHIVE_FATAL) 1579 return (err2); 1580 err = err_combine(err, err2); 1581 1582 /* Skip to next line */ 1583 attr += line_length; 1584 attr_length -= line_length; 1585 } 1586 1587 /* 1588 * PAX format uses UTF-8 as default charset for its metadata 1589 * unless hdrcharset=BINARY is present in its header. 1590 * We apply the charset specified by the hdrcharset option only 1591 * when the hdrcharset attribute(in PAX header) is BINARY because 1592 * we respect the charset described in PAX header and BINARY also 1593 * means that metadata(filename,uname and gname) character-set 1594 * is unknown. 1595 */ 1596 if (tar->pax_hdrcharset_binary) 1597 sconv = tar->opt_sconv; 1598 else { 1599 sconv = archive_string_conversion_from_charset( 1600 &(a->archive), "UTF-8", 1); 1601 if (sconv == NULL) 1602 return (ARCHIVE_FATAL); 1603 if (tar->compat_2x) 1604 archive_string_conversion_set_opt(sconv, 1605 SCONV_SET_OPT_UTF8_LIBARCHIVE2X); 1606 } 1607 1608 if (archive_strlen(&(tar->entry_gname)) > 0) { 1609 if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, 1610 archive_strlen(&(tar->entry_gname)), sconv) != 0) { 1611 err = set_conversion_failed_error(a, sconv, "Gname"); 1612 if (err == ARCHIVE_FATAL) 1613 return (err); 1614 /* Use a converted an original name. */ 1615 archive_entry_copy_gname(entry, tar->entry_gname.s); 1616 } 1617 } 1618 if (archive_strlen(&(tar->entry_linkpath)) > 0) { 1619 if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, 1620 archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { 1621 err = set_conversion_failed_error(a, sconv, "Linkname"); 1622 if (err == ARCHIVE_FATAL) 1623 return (err); 1624 /* Use a converted an original name. */ 1625 archive_entry_copy_link(entry, tar->entry_linkpath.s); 1626 } 1627 } 1628 /* 1629 * Some extensions (such as the GNU sparse file extensions) 1630 * deliberately store a synthetic name under the regular 'path' 1631 * attribute and the real file name under a different attribute. 1632 * Since we're supposed to not care about the order, we 1633 * have no choice but to store all of the various filenames 1634 * we find and figure it all out afterwards. This is the 1635 * figuring out part. 1636 */ 1637 as = NULL; 1638 if (archive_strlen(&(tar->entry_pathname_override)) > 0) 1639 as = &(tar->entry_pathname_override); 1640 else if (archive_strlen(&(tar->entry_pathname)) > 0) 1641 as = &(tar->entry_pathname); 1642 if (as != NULL) { 1643 if (archive_entry_copy_pathname_l(entry, as->s, 1644 archive_strlen(as), sconv) != 0) { 1645 err = set_conversion_failed_error(a, sconv, "Pathname"); 1646 if (err == ARCHIVE_FATAL) 1647 return (err); 1648 /* Use a converted an original name. */ 1649 archive_entry_copy_pathname(entry, as->s); 1650 } 1651 } 1652 if (archive_strlen(&(tar->entry_uname)) > 0) { 1653 if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, 1654 archive_strlen(&(tar->entry_uname)), sconv) != 0) { 1655 err = set_conversion_failed_error(a, sconv, "Uname"); 1656 if (err == ARCHIVE_FATAL) 1657 return (err); 1658 /* Use a converted an original name. */ 1659 archive_entry_copy_uname(entry, tar->entry_uname.s); 1660 } 1661 } 1662 return (err); 1663} 1664 1665static int 1666pax_attribute_xattr(struct archive_entry *entry, 1667 const char *name, const char *value) 1668{ 1669 char *name_decoded; 1670 void *value_decoded; 1671 size_t value_len; 1672 1673 if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) 1674 return 3; 1675 1676 name += 17; 1677 1678 /* URL-decode name */ 1679 name_decoded = url_decode(name); 1680 if (name_decoded == NULL) 1681 return 2; 1682 1683 /* Base-64 decode value */ 1684 value_decoded = base64_decode(value, strlen(value), &value_len); 1685 if (value_decoded == NULL) { 1686 free(name_decoded); 1687 return 1; 1688 } 1689 1690 archive_entry_xattr_add_entry(entry, name_decoded, 1691 value_decoded, value_len); 1692 1693 free(name_decoded); 1694 free(value_decoded); 1695 return 0; 1696} 1697 1698/* 1699 * Parse a single key=value attribute. key/value pointers are 1700 * assumed to point into reasonably long-lived storage. 1701 * 1702 * Note that POSIX reserves all-lowercase keywords. Vendor-specific 1703 * extensions should always have keywords of the form "VENDOR.attribute" 1704 * In particular, it's quite feasible to support many different 1705 * vendor extensions here. I'm using "LIBARCHIVE" for extensions 1706 * unique to this library. 1707 * 1708 * Investigate other vendor-specific extensions and see if 1709 * any of them look useful. 1710 */ 1711static int 1712pax_attribute(struct archive_read *a, struct tar *tar, 1713 struct archive_entry *entry, const char *key, const char *value) 1714{ 1715 int64_t s; 1716 long n; 1717 int err = ARCHIVE_OK, r; 1718 1719#ifndef __FreeBSD__ 1720 if (value == NULL) 1721 value = ""; /* Disable compiler warning; do not pass 1722 * NULL pointer to strlen(). */ 1723#endif 1724 switch (key[0]) { 1725 case 'G': 1726 /* GNU "0.0" sparse pax format. */ 1727 if (strcmp(key, "GNU.sparse.numblocks") == 0) { 1728 tar->sparse_offset = -1; 1729 tar->sparse_numbytes = -1; 1730 tar->sparse_gnu_major = 0; 1731 tar->sparse_gnu_minor = 0; 1732 } 1733 if (strcmp(key, "GNU.sparse.offset") == 0) { 1734 tar->sparse_offset = tar_atol10(value, strlen(value)); 1735 if (tar->sparse_numbytes != -1) { 1736 if (gnu_add_sparse_entry(a, tar, 1737 tar->sparse_offset, tar->sparse_numbytes) 1738 != ARCHIVE_OK) 1739 return (ARCHIVE_FATAL); 1740 tar->sparse_offset = -1; 1741 tar->sparse_numbytes = -1; 1742 } 1743 } 1744 if (strcmp(key, "GNU.sparse.numbytes") == 0) { 1745 tar->sparse_numbytes = tar_atol10(value, strlen(value)); 1746 if (tar->sparse_numbytes != -1) { 1747 if (gnu_add_sparse_entry(a, tar, 1748 tar->sparse_offset, tar->sparse_numbytes) 1749 != ARCHIVE_OK) 1750 return (ARCHIVE_FATAL); 1751 tar->sparse_offset = -1; 1752 tar->sparse_numbytes = -1; 1753 } 1754 } 1755 if (strcmp(key, "GNU.sparse.size") == 0) { 1756 tar->realsize = tar_atol10(value, strlen(value)); 1757 archive_entry_set_size(entry, tar->realsize); 1758 } 1759 1760 /* GNU "0.1" sparse pax format. */ 1761 if (strcmp(key, "GNU.sparse.map") == 0) { 1762 tar->sparse_gnu_major = 0; 1763 tar->sparse_gnu_minor = 1; 1764 if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) 1765 return (ARCHIVE_WARN); 1766 } 1767 1768 /* GNU "1.0" sparse pax format */ 1769 if (strcmp(key, "GNU.sparse.major") == 0) { 1770 tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); 1771 tar->sparse_gnu_pending = 1; 1772 } 1773 if (strcmp(key, "GNU.sparse.minor") == 0) { 1774 tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); 1775 tar->sparse_gnu_pending = 1; 1776 } 1777 if (strcmp(key, "GNU.sparse.name") == 0) { 1778 /* 1779 * The real filename; when storing sparse 1780 * files, GNU tar puts a synthesized name into 1781 * the regular 'path' attribute in an attempt 1782 * to limit confusion. ;-) 1783 */ 1784 archive_strcpy(&(tar->entry_pathname_override), value); 1785 } 1786 if (strcmp(key, "GNU.sparse.realsize") == 0) { 1787 tar->realsize = tar_atol10(value, strlen(value)); 1788 archive_entry_set_size(entry, tar->realsize); 1789 } 1790 break; 1791 case 'L': 1792 /* Our extensions */ 1793/* TODO: Handle arbitrary extended attributes... */ 1794/* 1795 if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) 1796 archive_entry_set_xxxxxx(entry, value); 1797*/ 1798 if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { 1799 pax_time(value, &s, &n); 1800 archive_entry_set_birthtime(entry, s, n); 1801 } 1802 if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) 1803 pax_attribute_xattr(entry, key, value); 1804 break; 1805 case 'S': 1806 /* We support some keys used by the "star" archiver */ 1807 if (strcmp(key, "SCHILY.acl.access") == 0) { 1808 if (tar->sconv_acl == NULL) { 1809 tar->sconv_acl = 1810 archive_string_conversion_from_charset( 1811 &(a->archive), "UTF-8", 1); 1812 if (tar->sconv_acl == NULL) 1813 return (ARCHIVE_FATAL); 1814 } 1815 1816 r = archive_acl_parse_l(archive_entry_acl(entry), 1817 value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, 1818 tar->sconv_acl); 1819 if (r != ARCHIVE_OK) { 1820 err = r; 1821 if (err == ARCHIVE_FATAL) { 1822 archive_set_error(&a->archive, ENOMEM, 1823 "Can't allocate memory for " 1824 "SCHILY.acl.access"); 1825 return (err); 1826 } 1827 archive_set_error(&a->archive, 1828 ARCHIVE_ERRNO_MISC, 1829 "Parse error: SCHILY.acl.access"); 1830 } 1831 } else if (strcmp(key, "SCHILY.acl.default") == 0) { 1832 if (tar->sconv_acl == NULL) { 1833 tar->sconv_acl = 1834 archive_string_conversion_from_charset( 1835 &(a->archive), "UTF-8", 1); 1836 if (tar->sconv_acl == NULL) 1837 return (ARCHIVE_FATAL); 1838 } 1839 1840 r = archive_acl_parse_l(archive_entry_acl(entry), 1841 value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, 1842 tar->sconv_acl); 1843 if (r != ARCHIVE_OK) { 1844 err = r; 1845 if (err == ARCHIVE_FATAL) { 1846 archive_set_error(&a->archive, ENOMEM, 1847 "Can't allocate memory for " 1848 "SCHILY.acl.default"); 1849 return (err); 1850 } 1851 archive_set_error(&a->archive, 1852 ARCHIVE_ERRNO_MISC, 1853 "Parse error: SCHILY.acl.default"); 1854 } 1855 } else if (strcmp(key, "SCHILY.devmajor") == 0) { 1856 archive_entry_set_rdevmajor(entry, 1857 (dev_t)tar_atol10(value, strlen(value))); 1858 } else if (strcmp(key, "SCHILY.devminor") == 0) { 1859 archive_entry_set_rdevminor(entry, 1860 (dev_t)tar_atol10(value, strlen(value))); 1861 } else if (strcmp(key, "SCHILY.fflags") == 0) { 1862 archive_entry_copy_fflags_text(entry, value); 1863 } else if (strcmp(key, "SCHILY.dev") == 0) { 1864 archive_entry_set_dev(entry, 1865 (dev_t)tar_atol10(value, strlen(value))); 1866 } else if (strcmp(key, "SCHILY.ino") == 0) { 1867 archive_entry_set_ino(entry, 1868 tar_atol10(value, strlen(value))); 1869 } else if (strcmp(key, "SCHILY.nlink") == 0) { 1870 archive_entry_set_nlink(entry, (unsigned) 1871 tar_atol10(value, strlen(value))); 1872 } else if (strcmp(key, "SCHILY.realsize") == 0) { 1873 tar->realsize = tar_atol10(value, strlen(value)); 1874 archive_entry_set_size(entry, tar->realsize); 1875 } else if (strcmp(key, "SUN.holesdata") == 0) { 1876 /* A Solaris extension for sparse. */ 1877 r = solaris_sparse_parse(a, tar, entry, value); 1878 if (r < err) { 1879 if (r == ARCHIVE_FATAL) 1880 return (r); 1881 err = r; 1882 archive_set_error(&a->archive, 1883 ARCHIVE_ERRNO_MISC, 1884 "Parse error: SUN.holesdata"); 1885 } 1886 } 1887 break; 1888 case 'a': 1889 if (strcmp(key, "atime") == 0) { 1890 pax_time(value, &s, &n); 1891 archive_entry_set_atime(entry, s, n); 1892 } 1893 break; 1894 case 'c': 1895 if (strcmp(key, "ctime") == 0) { 1896 pax_time(value, &s, &n); 1897 archive_entry_set_ctime(entry, s, n); 1898 } else if (strcmp(key, "charset") == 0) { 1899 /* TODO: Publish charset information in entry. */ 1900 } else if (strcmp(key, "comment") == 0) { 1901 /* TODO: Publish comment in entry. */ 1902 } 1903 break; 1904 case 'g': 1905 if (strcmp(key, "gid") == 0) { 1906 archive_entry_set_gid(entry, 1907 tar_atol10(value, strlen(value))); 1908 } else if (strcmp(key, "gname") == 0) { 1909 archive_strcpy(&(tar->entry_gname), value); 1910 } 1911 break; 1912 case 'h': 1913 if (strcmp(key, "hdrcharset") == 0) { 1914 if (strcmp(value, "BINARY") == 0) 1915 /* Binary mode. */ 1916 tar->pax_hdrcharset_binary = 1; 1917 else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) 1918 tar->pax_hdrcharset_binary = 0; 1919 } 1920 break; 1921 case 'l': 1922 /* pax interchange doesn't distinguish hardlink vs. symlink. */ 1923 if (strcmp(key, "linkpath") == 0) { 1924 archive_strcpy(&(tar->entry_linkpath), value); 1925 } 1926 break; 1927 case 'm': 1928 if (strcmp(key, "mtime") == 0) { 1929 pax_time(value, &s, &n); 1930 archive_entry_set_mtime(entry, s, n); 1931 } 1932 break; 1933 case 'p': 1934 if (strcmp(key, "path") == 0) { 1935 archive_strcpy(&(tar->entry_pathname), value); 1936 } 1937 break; 1938 case 'r': 1939 /* POSIX has reserved 'realtime.*' */ 1940 break; 1941 case 's': 1942 /* POSIX has reserved 'security.*' */ 1943 /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ 1944 if (strcmp(key, "size") == 0) { 1945 /* "size" is the size of the data in the entry. */ 1946 tar->entry_bytes_remaining 1947 = tar_atol10(value, strlen(value)); 1948 /* 1949 * But, "size" is not necessarily the size of 1950 * the file on disk; if this is a sparse file, 1951 * the disk size may have already been set from 1952 * GNU.sparse.realsize or GNU.sparse.size or 1953 * an old GNU header field or SCHILY.realsize 1954 * or .... 1955 */ 1956 if (tar->realsize < 0) { 1957 archive_entry_set_size(entry, 1958 tar->entry_bytes_remaining); 1959 tar->realsize 1960 = tar->entry_bytes_remaining; 1961 } 1962 } 1963 break; 1964 case 'u': 1965 if (strcmp(key, "uid") == 0) { 1966 archive_entry_set_uid(entry, 1967 tar_atol10(value, strlen(value))); 1968 } else if (strcmp(key, "uname") == 0) { 1969 archive_strcpy(&(tar->entry_uname), value); 1970 } 1971 break; 1972 } 1973 return (err); 1974} 1975 1976 1977 1978/* 1979 * parse a decimal time value, which may include a fractional portion 1980 */ 1981static void 1982pax_time(const char *p, int64_t *ps, long *pn) 1983{ 1984 char digit; 1985 int64_t s; 1986 unsigned long l; 1987 int sign; 1988 int64_t limit, last_digit_limit; 1989 1990 limit = INT64_MAX / 10; 1991 last_digit_limit = INT64_MAX % 10; 1992 1993 s = 0; 1994 sign = 1; 1995 if (*p == '-') { 1996 sign = -1; 1997 p++; 1998 } 1999 while (*p >= '0' && *p <= '9') { 2000 digit = *p - '0'; 2001 if (s > limit || 2002 (s == limit && digit > last_digit_limit)) { 2003 s = INT64_MAX; 2004 break; 2005 } 2006 s = (s * 10) + digit; 2007 ++p; 2008 } 2009 2010 *ps = s * sign; 2011 2012 /* Calculate nanoseconds. */ 2013 *pn = 0; 2014 2015 if (*p != '.') 2016 return; 2017 2018 l = 100000000UL; 2019 do { 2020 ++p; 2021 if (*p >= '0' && *p <= '9') 2022 *pn += (*p - '0') * l; 2023 else 2024 break; 2025 } while (l /= 10); 2026} 2027 2028/* 2029 * Parse GNU tar header 2030 */ 2031static int 2032header_gnutar(struct archive_read *a, struct tar *tar, 2033 struct archive_entry *entry, const void *h, size_t *unconsumed) 2034{ 2035 const struct archive_entry_header_gnutar *header; 2036 int64_t t; 2037 int err = ARCHIVE_OK; 2038 2039 /* 2040 * GNU header is like POSIX ustar, except 'prefix' is 2041 * replaced with some other fields. This also means the 2042 * filename is stored as in old-style archives. 2043 */ 2044 2045 /* Grab fields common to all tar variants. */ 2046 err = header_common(a, tar, entry, h); 2047 if (err == ARCHIVE_FATAL) 2048 return (err); 2049 2050 /* Copy filename over (to ensure null termination). */ 2051 header = (const struct archive_entry_header_gnutar *)h; 2052 if (archive_entry_copy_pathname_l(entry, 2053 header->name, sizeof(header->name), tar->sconv) != 0) { 2054 err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 2055 if (err == ARCHIVE_FATAL) 2056 return (err); 2057 } 2058 2059 /* Fields common to ustar and GNU */ 2060 /* XXX Can the following be factored out since it's common 2061 * to ustar and gnu tar? Is it okay to move it down into 2062 * header_common, perhaps? */ 2063 if (archive_entry_copy_uname_l(entry, 2064 header->uname, sizeof(header->uname), tar->sconv) != 0) { 2065 err = set_conversion_failed_error(a, tar->sconv, "Uname"); 2066 if (err == ARCHIVE_FATAL) 2067 return (err); 2068 } 2069 2070 if (archive_entry_copy_gname_l(entry, 2071 header->gname, sizeof(header->gname), tar->sconv) != 0) { 2072 err = set_conversion_failed_error(a, tar->sconv, "Gname"); 2073 if (err == ARCHIVE_FATAL) 2074 return (err); 2075 } 2076 2077 /* Parse out device numbers only for char and block specials */ 2078 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 2079 archive_entry_set_rdevmajor(entry, (dev_t) 2080 tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 2081 archive_entry_set_rdevminor(entry, (dev_t) 2082 tar_atol(header->rdevminor, sizeof(header->rdevminor))); 2083 } else 2084 archive_entry_set_rdev(entry, 0); 2085 2086 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 2087 2088 /* Grab GNU-specific fields. */ 2089 t = tar_atol(header->atime, sizeof(header->atime)); 2090 if (t > 0) 2091 archive_entry_set_atime(entry, t, 0); 2092 t = tar_atol(header->ctime, sizeof(header->ctime)); 2093 if (t > 0) 2094 archive_entry_set_ctime(entry, t, 0); 2095 2096 if (header->realsize[0] != 0) { 2097 tar->realsize 2098 = tar_atol(header->realsize, sizeof(header->realsize)); 2099 archive_entry_set_size(entry, tar->realsize); 2100 } 2101 2102 if (header->sparse[0].offset[0] != 0) { 2103 if (gnu_sparse_old_read(a, tar, header, unconsumed) 2104 != ARCHIVE_OK) 2105 return (ARCHIVE_FATAL); 2106 } else { 2107 if (header->isextended[0] != 0) { 2108 /* XXX WTF? XXX */ 2109 } 2110 } 2111 2112 return (err); 2113} 2114 2115static int 2116gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, 2117 int64_t offset, int64_t remaining) 2118{ 2119 struct sparse_block *p; 2120 2121 p = (struct sparse_block *)malloc(sizeof(*p)); 2122 if (p == NULL) { 2123 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 2124 return (ARCHIVE_FATAL); 2125 } 2126 memset(p, 0, sizeof(*p)); 2127 if (tar->sparse_last != NULL) 2128 tar->sparse_last->next = p; 2129 else 2130 tar->sparse_list = p; 2131 tar->sparse_last = p; 2132 if (remaining < 0 || offset < 0) { 2133 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); 2134 return (ARCHIVE_FATAL); 2135 } 2136 p->offset = offset; 2137 p->remaining = remaining; 2138 return (ARCHIVE_OK); 2139} 2140 2141static void 2142gnu_clear_sparse_list(struct tar *tar) 2143{ 2144 struct sparse_block *p; 2145 2146 while (tar->sparse_list != NULL) { 2147 p = tar->sparse_list; 2148 tar->sparse_list = p->next; 2149 free(p); 2150 } 2151 tar->sparse_last = NULL; 2152} 2153 2154/* 2155 * GNU tar old-format sparse data. 2156 * 2157 * GNU old-format sparse data is stored in a fixed-field 2158 * format. Offset/size values are 11-byte octal fields (same 2159 * format as 'size' field in ustart header). These are 2160 * stored in the header, allocating subsequent header blocks 2161 * as needed. Extending the header in this way is a pretty 2162 * severe POSIX violation; this design has earned GNU tar a 2163 * lot of criticism. 2164 */ 2165 2166static int 2167gnu_sparse_old_read(struct archive_read *a, struct tar *tar, 2168 const struct archive_entry_header_gnutar *header, size_t *unconsumed) 2169{ 2170 ssize_t bytes_read; 2171 const void *data; 2172 struct extended { 2173 struct gnu_sparse sparse[21]; 2174 char isextended[1]; 2175 char padding[7]; 2176 }; 2177 const struct extended *ext; 2178 2179 if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) 2180 return (ARCHIVE_FATAL); 2181 if (header->isextended[0] == 0) 2182 return (ARCHIVE_OK); 2183 2184 do { 2185 tar_flush_unconsumed(a, unconsumed); 2186 data = __archive_read_ahead(a, 512, &bytes_read); 2187 if (bytes_read < 0) 2188 return (ARCHIVE_FATAL); 2189 if (bytes_read < 512) { 2190 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2191 "Truncated tar archive " 2192 "detected while reading sparse file data"); 2193 return (ARCHIVE_FATAL); 2194 } 2195 *unconsumed = 512; 2196 ext = (const struct extended *)data; 2197 if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) 2198 return (ARCHIVE_FATAL); 2199 } while (ext->isextended[0] != 0); 2200 if (tar->sparse_list != NULL) 2201 tar->entry_offset = tar->sparse_list->offset; 2202 return (ARCHIVE_OK); 2203} 2204 2205static int 2206gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, 2207 const struct gnu_sparse *sparse, int length) 2208{ 2209 while (length > 0 && sparse->offset[0] != 0) { 2210 if (gnu_add_sparse_entry(a, tar, 2211 tar_atol(sparse->offset, sizeof(sparse->offset)), 2212 tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) 2213 != ARCHIVE_OK) 2214 return (ARCHIVE_FATAL); 2215 sparse++; 2216 length--; 2217 } 2218 return (ARCHIVE_OK); 2219} 2220 2221/* 2222 * GNU tar sparse format 0.0 2223 * 2224 * Beginning with GNU tar 1.15, sparse files are stored using 2225 * information in the pax extended header. The GNU tar maintainers 2226 * have gone through a number of variations in the process of working 2227 * out this scheme; fortunately, they're all numbered. 2228 * 2229 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the 2230 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to 2231 * store offset/size for each block. The repeated instances of these 2232 * latter fields violate the pax specification (which frowns on 2233 * duplicate keys), so this format was quickly replaced. 2234 */ 2235 2236/* 2237 * GNU tar sparse format 0.1 2238 * 2239 * This version replaced the offset/numbytes attributes with 2240 * a single "map" attribute that stored a list of integers. This 2241 * format had two problems: First, the "map" attribute could be very 2242 * long, which caused problems for some implementations. More 2243 * importantly, the sparse data was lost when extracted by archivers 2244 * that didn't recognize this extension. 2245 */ 2246 2247static int 2248gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) 2249{ 2250 const char *e; 2251 int64_t offset = -1, size = -1; 2252 2253 for (;;) { 2254 e = p; 2255 while (*e != '\0' && *e != ',') { 2256 if (*e < '0' || *e > '9') 2257 return (ARCHIVE_WARN); 2258 e++; 2259 } 2260 if (offset < 0) { 2261 offset = tar_atol10(p, e - p); 2262 if (offset < 0) 2263 return (ARCHIVE_WARN); 2264 } else { 2265 size = tar_atol10(p, e - p); 2266 if (size < 0) 2267 return (ARCHIVE_WARN); 2268 if (gnu_add_sparse_entry(a, tar, offset, size) 2269 != ARCHIVE_OK) 2270 return (ARCHIVE_FATAL); 2271 offset = -1; 2272 } 2273 if (*e == '\0') 2274 return (ARCHIVE_OK); 2275 p = e + 1; 2276 } 2277} 2278 2279/* 2280 * GNU tar sparse format 1.0 2281 * 2282 * The idea: The offset/size data is stored as a series of base-10 2283 * ASCII numbers prepended to the file data, so that dearchivers that 2284 * don't support this format will extract the block map along with the 2285 * data and a separate post-process can restore the sparseness. 2286 * 2287 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary 2288 * padding to the body of the file when using this format. GNU tar 2289 * 1.17 corrected this bug without bumping the version number, so 2290 * it's not possible to support both variants. This code supports 2291 * the later variant at the expense of not supporting the former. 2292 * 2293 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize 2294 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. 2295 */ 2296 2297/* 2298 * Read the next line from the input, and parse it as a decimal 2299 * integer followed by '\n'. Returns positive integer value or 2300 * negative on error. 2301 */ 2302static int64_t 2303gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, 2304 int64_t *remaining, size_t *unconsumed) 2305{ 2306 int64_t l, limit, last_digit_limit; 2307 const char *p; 2308 ssize_t bytes_read; 2309 int base, digit; 2310 2311 base = 10; 2312 limit = INT64_MAX / base; 2313 last_digit_limit = INT64_MAX % base; 2314 2315 /* 2316 * Skip any lines starting with '#'; GNU tar specs 2317 * don't require this, but they should. 2318 */ 2319 do { 2320 bytes_read = readline(a, tar, &p, 2321 (ssize_t)tar_min(*remaining, 100), unconsumed); 2322 if (bytes_read <= 0) 2323 return (ARCHIVE_FATAL); 2324 *remaining -= bytes_read; 2325 } while (p[0] == '#'); 2326 2327 l = 0; 2328 while (bytes_read > 0) { 2329 if (*p == '\n') 2330 return (l); 2331 if (*p < '0' || *p >= '0' + base) 2332 return (ARCHIVE_WARN); 2333 digit = *p - '0'; 2334 if (l > limit || (l == limit && digit > last_digit_limit)) 2335 l = INT64_MAX; /* Truncate on overflow. */ 2336 else 2337 l = (l * base) + digit; 2338 p++; 2339 bytes_read--; 2340 } 2341 /* TODO: Error message. */ 2342 return (ARCHIVE_WARN); 2343} 2344 2345/* 2346 * Returns length (in bytes) of the sparse data description 2347 * that was read. 2348 */ 2349static ssize_t 2350gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) 2351{ 2352 ssize_t bytes_read; 2353 int entries; 2354 int64_t offset, size, to_skip, remaining; 2355 2356 /* Clear out the existing sparse list. */ 2357 gnu_clear_sparse_list(tar); 2358 2359 remaining = tar->entry_bytes_remaining; 2360 2361 /* Parse entries. */ 2362 entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2363 if (entries < 0) 2364 return (ARCHIVE_FATAL); 2365 /* Parse the individual entries. */ 2366 while (entries-- > 0) { 2367 /* Parse offset/size */ 2368 offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2369 if (offset < 0) 2370 return (ARCHIVE_FATAL); 2371 size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2372 if (size < 0) 2373 return (ARCHIVE_FATAL); 2374 /* Add a new sparse entry. */ 2375 if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) 2376 return (ARCHIVE_FATAL); 2377 } 2378 /* Skip rest of block... */ 2379 tar_flush_unconsumed(a, unconsumed); 2380 bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); 2381 to_skip = 0x1ff & -bytes_read; 2382 if (to_skip != __archive_read_consume(a, to_skip)) 2383 return (ARCHIVE_FATAL); 2384 return ((ssize_t)(bytes_read + to_skip)); 2385} 2386 2387/* 2388 * Solaris pax extension for a sparse file. This is recorded with the 2389 * data and hole pairs. The way recording sparse information by Solaris' 2390 * pax simply indicates where data and sparse are, so the stored contents 2391 * consist of both data and hole. 2392 */ 2393static int 2394solaris_sparse_parse(struct archive_read *a, struct tar *tar, 2395 struct archive_entry *entry, const char *p) 2396{ 2397 const char *e; 2398 int64_t start, end; 2399 int hole = 1; 2400 2401 (void)entry; /* UNUSED */ 2402 2403 end = 0; 2404 if (*p == ' ') 2405 p++; 2406 else 2407 return (ARCHIVE_WARN); 2408 for (;;) { 2409 e = p; 2410 while (*e != '\0' && *e != ' ') { 2411 if (*e < '0' || *e > '9') 2412 return (ARCHIVE_WARN); 2413 e++; 2414 } 2415 start = end; 2416 end = tar_atol10(p, e - p); 2417 if (end < 0) 2418 return (ARCHIVE_WARN); 2419 if (start < end) { 2420 if (gnu_add_sparse_entry(a, tar, start, 2421 end - start) != ARCHIVE_OK) 2422 return (ARCHIVE_FATAL); 2423 tar->sparse_last->hole = hole; 2424 } 2425 if (*e == '\0') 2426 return (ARCHIVE_OK); 2427 p = e + 1; 2428 hole = hole == 0; 2429 } 2430} 2431 2432/*- 2433 * Convert text->integer. 2434 * 2435 * Traditional tar formats (including POSIX) specify base-8 for 2436 * all of the standard numeric fields. This is a significant limitation 2437 * in practice: 2438 * = file size is limited to 8GB 2439 * = rdevmajor and rdevminor are limited to 21 bits 2440 * = uid/gid are limited to 21 bits 2441 * 2442 * There are two workarounds for this: 2443 * = pax extended headers, which use variable-length string fields 2444 * = GNU tar and STAR both allow either base-8 or base-256 in 2445 * most fields. The high bit is set to indicate base-256. 2446 * 2447 * On read, this implementation supports both extensions. 2448 */ 2449static int64_t 2450tar_atol(const char *p, size_t char_cnt) 2451{ 2452 /* 2453 * Technically, GNU tar considers a field to be in base-256 2454 * only if the first byte is 0xff or 0x80. 2455 */ 2456 if (*p & 0x80) 2457 return (tar_atol256(p, char_cnt)); 2458 return (tar_atol8(p, char_cnt)); 2459} 2460 2461/* 2462 * Note that this implementation does not (and should not!) obey 2463 * locale settings; you cannot simply substitute strtol here, since 2464 * it does obey locale. 2465 */ 2466static int64_t 2467tar_atol_base_n(const char *p, size_t char_cnt, int base) 2468{ 2469 int64_t l, maxval, limit, last_digit_limit; 2470 int digit, sign; 2471 2472 maxval = INT64_MAX; 2473 limit = INT64_MAX / base; 2474 last_digit_limit = INT64_MAX % base; 2475 2476 /* the pointer will not be dereferenced if char_cnt is zero 2477 * due to the way the && operator is evaulated. 2478 */ 2479 while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { 2480 p++; 2481 char_cnt--; 2482 } 2483 2484 sign = 1; 2485 if (char_cnt != 0 && *p == '-') { 2486 sign = -1; 2487 p++; 2488 char_cnt--; 2489 2490 maxval = INT64_MIN; 2491 limit = -(INT64_MIN / base); 2492 last_digit_limit = INT64_MIN % base; 2493 } 2494 2495 l = 0; 2496 if (char_cnt != 0) { 2497 digit = *p - '0'; 2498 while (digit >= 0 && digit < base && char_cnt != 0) { 2499 if (l>limit || (l == limit && digit > last_digit_limit)) { 2500 return maxval; /* Truncate on overflow. */ 2501 } 2502 l = (l * base) + digit; 2503 digit = *++p - '0'; 2504 char_cnt--; 2505 } 2506 } 2507 return (sign < 0) ? -l : l; 2508} 2509 2510static int64_t 2511tar_atol8(const char *p, size_t char_cnt) 2512{ 2513 return tar_atol_base_n(p, char_cnt, 8); 2514} 2515 2516static int64_t 2517tar_atol10(const char *p, size_t char_cnt) 2518{ 2519 return tar_atol_base_n(p, char_cnt, 10); 2520} 2521 2522/* 2523 * Parse a base-256 integer. This is just a variable-length 2524 * twos-complement signed binary value in big-endian order, except 2525 * that the high-order bit is ignored. The values here can be up to 2526 * 12 bytes, so we need to be careful about overflowing 64-bit 2527 * (8-byte) integers. 2528 * 2529 * This code unashamedly assumes that the local machine uses 8-bit 2530 * bytes and twos-complement arithmetic. 2531 */ 2532static int64_t 2533tar_atol256(const char *_p, size_t char_cnt) 2534{ 2535 uint64_t l; 2536 const unsigned char *p = (const unsigned char *)_p; 2537 unsigned char c, neg; 2538 2539 /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ 2540 c = *p; 2541 if (c & 0x40) { 2542 neg = 0xff; 2543 c |= 0x80; 2544 l = ~ARCHIVE_LITERAL_ULL(0); 2545 } else { 2546 neg = 0; 2547 c &= 0x7f; 2548 l = 0; 2549 } 2550 2551 /* If more than 8 bytes, check that we can ignore 2552 * high-order bits without overflow. */ 2553 while (char_cnt > sizeof(int64_t)) { 2554 --char_cnt; 2555 if (c != neg) 2556 return neg ? INT64_MIN : INT64_MAX; 2557 c = *++p; 2558 } 2559 2560 /* c is first byte that fits; if sign mismatch, return overflow */ 2561 if ((c ^ neg) & 0x80) { 2562 return neg ? INT64_MIN : INT64_MAX; 2563 } 2564 2565 /* Accumulate remaining bytes. */ 2566 while (--char_cnt > 0) { 2567 l = (l << 8) | c; 2568 c = *++p; 2569 } 2570 l = (l << 8) | c; 2571 /* Return signed twos-complement value. */ 2572 return (int64_t)(l); 2573} 2574 2575/* 2576 * Returns length of line (including trailing newline) 2577 * or negative on error. 'start' argument is updated to 2578 * point to first character of line. This avoids copying 2579 * when possible. 2580 */ 2581static ssize_t 2582readline(struct archive_read *a, struct tar *tar, const char **start, 2583 ssize_t limit, size_t *unconsumed) 2584{ 2585 ssize_t bytes_read; 2586 ssize_t total_size = 0; 2587 const void *t; 2588 const char *s; 2589 void *p; 2590 2591 tar_flush_unconsumed(a, unconsumed); 2592 2593 t = __archive_read_ahead(a, 1, &bytes_read); 2594 if (bytes_read <= 0) 2595 return (ARCHIVE_FATAL); 2596 s = t; /* Start of line? */ 2597 p = memchr(t, '\n', bytes_read); 2598 /* If we found '\n' in the read buffer, return pointer to that. */ 2599 if (p != NULL) { 2600 bytes_read = 1 + ((const char *)p) - s; 2601 if (bytes_read > limit) { 2602 archive_set_error(&a->archive, 2603 ARCHIVE_ERRNO_FILE_FORMAT, 2604 "Line too long"); 2605 return (ARCHIVE_FATAL); 2606 } 2607 *unconsumed = bytes_read; 2608 *start = s; 2609 return (bytes_read); 2610 } 2611 *unconsumed = bytes_read; 2612 /* Otherwise, we need to accumulate in a line buffer. */ 2613 for (;;) { 2614 if (total_size + bytes_read > limit) { 2615 archive_set_error(&a->archive, 2616 ARCHIVE_ERRNO_FILE_FORMAT, 2617 "Line too long"); 2618 return (ARCHIVE_FATAL); 2619 } 2620 if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { 2621 archive_set_error(&a->archive, ENOMEM, 2622 "Can't allocate working buffer"); 2623 return (ARCHIVE_FATAL); 2624 } 2625 memcpy(tar->line.s + total_size, t, bytes_read); 2626 tar_flush_unconsumed(a, unconsumed); 2627 total_size += bytes_read; 2628 /* If we found '\n', clean up and return. */ 2629 if (p != NULL) { 2630 *start = tar->line.s; 2631 return (total_size); 2632 } 2633 /* Read some more. */ 2634 t = __archive_read_ahead(a, 1, &bytes_read); 2635 if (bytes_read <= 0) 2636 return (ARCHIVE_FATAL); 2637 s = t; /* Start of line? */ 2638 p = memchr(t, '\n', bytes_read); 2639 /* If we found '\n', trim the read. */ 2640 if (p != NULL) { 2641 bytes_read = 1 + ((const char *)p) - s; 2642 } 2643 *unconsumed = bytes_read; 2644 } 2645} 2646 2647/* 2648 * base64_decode - Base64 decode 2649 * 2650 * This accepts most variations of base-64 encoding, including: 2651 * * with or without line breaks 2652 * * with or without the final group padded with '=' or '_' characters 2653 * (The most economical Base-64 variant does not pad the last group and 2654 * omits line breaks; RFC1341 used for MIME requires both.) 2655 */ 2656static char * 2657base64_decode(const char *s, size_t len, size_t *out_len) 2658{ 2659 static const unsigned char digits[64] = { 2660 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 2661 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 2662 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 2663 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', 2664 '4','5','6','7','8','9','+','/' }; 2665 static unsigned char decode_table[128]; 2666 char *out, *d; 2667 const unsigned char *src = (const unsigned char *)s; 2668 2669 /* If the decode table is not yet initialized, prepare it. */ 2670 if (decode_table[digits[1]] != 1) { 2671 unsigned i; 2672 memset(decode_table, 0xff, sizeof(decode_table)); 2673 for (i = 0; i < sizeof(digits); i++) 2674 decode_table[digits[i]] = i; 2675 } 2676 2677 /* Allocate enough space to hold the entire output. */ 2678 /* Note that we may not use all of this... */ 2679 out = (char *)malloc(len - len / 4 + 1); 2680 if (out == NULL) { 2681 *out_len = 0; 2682 return (NULL); 2683 } 2684 d = out; 2685 2686 while (len > 0) { 2687 /* Collect the next group of (up to) four characters. */ 2688 int v = 0; 2689 int group_size = 0; 2690 while (group_size < 4 && len > 0) { 2691 /* '=' or '_' padding indicates final group. */ 2692 if (*src == '=' || *src == '_') { 2693 len = 0; 2694 break; 2695 } 2696 /* Skip illegal characters (including line breaks) */ 2697 if (*src > 127 || *src < 32 2698 || decode_table[*src] == 0xff) { 2699 len--; 2700 src++; 2701 continue; 2702 } 2703 v <<= 6; 2704 v |= decode_table[*src++]; 2705 len --; 2706 group_size++; 2707 } 2708 /* Align a short group properly. */ 2709 v <<= 6 * (4 - group_size); 2710 /* Unpack the group we just collected. */ 2711 switch (group_size) { 2712 case 4: d[2] = v & 0xff; 2713 /* FALLTHROUGH */ 2714 case 3: d[1] = (v >> 8) & 0xff; 2715 /* FALLTHROUGH */ 2716 case 2: d[0] = (v >> 16) & 0xff; 2717 break; 2718 case 1: /* this is invalid! */ 2719 break; 2720 } 2721 d += group_size * 3 / 4; 2722 } 2723 2724 *out_len = d - out; 2725 return (out); 2726} 2727 2728static char * 2729url_decode(const char *in) 2730{ 2731 char *out, *d; 2732 const char *s; 2733 2734 out = (char *)malloc(strlen(in) + 1); 2735 if (out == NULL) 2736 return (NULL); 2737 for (s = in, d = out; *s != '\0'; ) { 2738 if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { 2739 /* Try to convert % escape */ 2740 int digit1 = tohex(s[1]); 2741 int digit2 = tohex(s[2]); 2742 if (digit1 >= 0 && digit2 >= 0) { 2743 /* Looks good, consume three chars */ 2744 s += 3; 2745 /* Convert output */ 2746 *d++ = ((digit1 << 4) | digit2); 2747 continue; 2748 } 2749 /* Else fall through and treat '%' as normal char */ 2750 } 2751 *d++ = *s++; 2752 } 2753 *d = '\0'; 2754 return (out); 2755} 2756 2757static int 2758tohex(int c) 2759{ 2760 if (c >= '0' && c <= '9') 2761 return (c - '0'); 2762 else if (c >= 'A' && c <= 'F') 2763 return (c - 'A' + 10); 2764 else if (c >= 'a' && c <= 'f') 2765 return (c - 'a' + 10); 2766 else 2767 return (-1); 2768} 2769