1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3232153Smm * Copyright (c) 2011-2012 Michihiro NAKAJIMA 4313571Smm * Copyright (c) 2016 Martin Matuska 5228753Smm * All rights reserved. 6228753Smm * 7228753Smm * Redistribution and use in source and binary forms, with or without 8228753Smm * modification, are permitted provided that the following conditions 9228753Smm * are met: 10228753Smm * 1. Redistributions of source code must retain the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer. 12228753Smm * 2. Redistributions in binary form must reproduce the above copyright 13228753Smm * notice, this list of conditions and the following disclaimer in the 14228753Smm * documentation and/or other materials provided with the distribution. 15228753Smm * 16228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26228753Smm */ 27228753Smm 28228753Smm#include "archive_platform.h" 29228763Smm__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_read_support_format_tar.c 362134 2020-06-12 23:02:34Z mm $"); 30228753Smm 31228753Smm#ifdef HAVE_ERRNO_H 32228753Smm#include <errno.h> 33228753Smm#endif 34228753Smm#include <stddef.h> 35228753Smm#ifdef HAVE_STDLIB_H 36228753Smm#include <stdlib.h> 37228753Smm#endif 38228753Smm#ifdef HAVE_STRING_H 39228753Smm#include <string.h> 40228753Smm#endif 41228753Smm 42228753Smm#include "archive.h" 43232153Smm#include "archive_acl_private.h" /* For ACL parsing routines. */ 44228753Smm#include "archive_entry.h" 45232153Smm#include "archive_entry_locale.h" 46228753Smm#include "archive_private.h" 47228753Smm#include "archive_read_private.h" 48228753Smm 49228753Smm#define tar_min(a,b) ((a) < (b) ? (a) : (b)) 50228753Smm 51228753Smm/* 52228753Smm * Layout of POSIX 'ustar' tar header. 53228753Smm */ 54228753Smmstruct archive_entry_header_ustar { 55228753Smm char name[100]; 56228753Smm char mode[8]; 57228753Smm char uid[8]; 58228753Smm char gid[8]; 59228753Smm char size[12]; 60228753Smm char mtime[12]; 61228753Smm char checksum[8]; 62228753Smm char typeflag[1]; 63228753Smm char linkname[100]; /* "old format" header ends here */ 64228753Smm char magic[6]; /* For POSIX: "ustar\0" */ 65228753Smm char version[2]; /* For POSIX: "00" */ 66228753Smm char uname[32]; 67228753Smm char gname[32]; 68228753Smm char rdevmajor[8]; 69228753Smm char rdevminor[8]; 70228753Smm char prefix[155]; 71228753Smm}; 72228753Smm 73228753Smm/* 74228753Smm * Structure of GNU tar header 75228753Smm */ 76228753Smmstruct gnu_sparse { 77228753Smm char offset[12]; 78228753Smm char numbytes[12]; 79228753Smm}; 80228753Smm 81228753Smmstruct archive_entry_header_gnutar { 82228753Smm char name[100]; 83228753Smm char mode[8]; 84228753Smm char uid[8]; 85228753Smm char gid[8]; 86228753Smm char size[12]; 87228753Smm char mtime[12]; 88228753Smm char checksum[8]; 89228753Smm char typeflag[1]; 90228753Smm char linkname[100]; 91228753Smm char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ 92228753Smm char uname[32]; 93228753Smm char gname[32]; 94228753Smm char rdevmajor[8]; 95228753Smm char rdevminor[8]; 96228753Smm char atime[12]; 97228753Smm char ctime[12]; 98228753Smm char offset[12]; 99228753Smm char longnames[4]; 100228753Smm char unused[1]; 101228753Smm struct gnu_sparse sparse[4]; 102228753Smm char isextended[1]; 103228753Smm char realsize[12]; 104228753Smm /* 105228753Smm * Old GNU format doesn't use POSIX 'prefix' field; they use 106228753Smm * the 'L' (longname) entry instead. 107228753Smm */ 108228753Smm}; 109228753Smm 110228753Smm/* 111228753Smm * Data specific to this format. 112228753Smm */ 113228753Smmstruct sparse_block { 114228753Smm struct sparse_block *next; 115232153Smm int64_t offset; 116232153Smm int64_t remaining; 117232153Smm int hole; 118228753Smm}; 119228753Smm 120228753Smmstruct tar { 121228753Smm struct archive_string acl_text; 122228753Smm struct archive_string entry_pathname; 123228753Smm /* For "GNU.sparse.name" and other similar path extensions. */ 124228753Smm struct archive_string entry_pathname_override; 125228753Smm struct archive_string entry_linkpath; 126228753Smm struct archive_string entry_uname; 127228753Smm struct archive_string entry_gname; 128228753Smm struct archive_string longlink; 129228753Smm struct archive_string longname; 130228753Smm struct archive_string pax_header; 131228753Smm struct archive_string pax_global; 132228753Smm struct archive_string line; 133228753Smm int pax_hdrcharset_binary; 134228753Smm int header_recursion_depth; 135228753Smm int64_t entry_bytes_remaining; 136228753Smm int64_t entry_offset; 137228753Smm int64_t entry_padding; 138232153Smm int64_t entry_bytes_unconsumed; 139228753Smm int64_t realsize; 140306322Smm int sparse_allowed; 141228753Smm struct sparse_block *sparse_list; 142228753Smm struct sparse_block *sparse_last; 143228753Smm int64_t sparse_offset; 144228753Smm int64_t sparse_numbytes; 145228753Smm int sparse_gnu_major; 146228753Smm int sparse_gnu_minor; 147228753Smm char sparse_gnu_pending; 148232153Smm 149232153Smm struct archive_string localname; 150232153Smm struct archive_string_conv *opt_sconv; 151232153Smm struct archive_string_conv *sconv; 152232153Smm struct archive_string_conv *sconv_acl; 153232153Smm struct archive_string_conv *sconv_default; 154232153Smm int init_default_conversion; 155232153Smm int compat_2x; 156302001Smm int process_mac_extensions; 157302001Smm int read_concatenated_archives; 158316338Smm int realsize_override; 159228753Smm}; 160228753Smm 161232153Smmstatic int archive_block_is_null(const char *p); 162228753Smmstatic char *base64_decode(const char *, size_t, size_t *); 163232153Smmstatic int gnu_add_sparse_entry(struct archive_read *, struct tar *, 164232153Smm int64_t offset, int64_t remaining); 165232153Smm 166228753Smmstatic void gnu_clear_sparse_list(struct tar *); 167228753Smmstatic int gnu_sparse_old_read(struct archive_read *, struct tar *, 168232153Smm const struct archive_entry_header_gnutar *header, size_t *); 169232153Smmstatic int gnu_sparse_old_parse(struct archive_read *, struct tar *, 170228753Smm const struct gnu_sparse *sparse, int length); 171232153Smmstatic int gnu_sparse_01_parse(struct archive_read *, struct tar *, 172232153Smm const char *); 173232153Smmstatic ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, 174232153Smm size_t *); 175228753Smmstatic int header_Solaris_ACL(struct archive_read *, struct tar *, 176232153Smm struct archive_entry *, const void *, size_t *); 177228753Smmstatic int header_common(struct archive_read *, struct tar *, 178228753Smm struct archive_entry *, const void *); 179228753Smmstatic int header_old_tar(struct archive_read *, struct tar *, 180228753Smm struct archive_entry *, const void *); 181228753Smmstatic int header_pax_extensions(struct archive_read *, struct tar *, 182232153Smm struct archive_entry *, const void *, size_t *); 183228753Smmstatic int header_pax_global(struct archive_read *, struct tar *, 184232153Smm struct archive_entry *, const void *h, size_t *); 185228753Smmstatic int header_longlink(struct archive_read *, struct tar *, 186232153Smm struct archive_entry *, const void *h, size_t *); 187228753Smmstatic int header_longname(struct archive_read *, struct tar *, 188232153Smm struct archive_entry *, const void *h, size_t *); 189232153Smmstatic int read_mac_metadata_blob(struct archive_read *, struct tar *, 190232153Smm struct archive_entry *, const void *h, size_t *); 191228753Smmstatic int header_volume(struct archive_read *, struct tar *, 192232153Smm struct archive_entry *, const void *h, size_t *); 193228753Smmstatic int header_ustar(struct archive_read *, struct tar *, 194228753Smm struct archive_entry *, const void *h); 195228753Smmstatic int header_gnutar(struct archive_read *, struct tar *, 196232153Smm struct archive_entry *, const void *h, size_t *); 197232153Smmstatic int archive_read_format_tar_bid(struct archive_read *, int); 198232153Smmstatic int archive_read_format_tar_options(struct archive_read *, 199232153Smm const char *, const char *); 200228753Smmstatic int archive_read_format_tar_cleanup(struct archive_read *); 201228753Smmstatic int archive_read_format_tar_read_data(struct archive_read *a, 202232153Smm const void **buff, size_t *size, int64_t *offset); 203228753Smmstatic int archive_read_format_tar_skip(struct archive_read *a); 204228753Smmstatic int archive_read_format_tar_read_header(struct archive_read *, 205228753Smm struct archive_entry *); 206228753Smmstatic int checksum(struct archive_read *, const void *); 207232153Smmstatic int pax_attribute(struct archive_read *, struct tar *, 208313571Smm struct archive_entry *, const char *key, const char *value, 209313571Smm size_t value_length); 210313571Smmstatic int pax_attribute_acl(struct archive_read *, struct tar *, 211313571Smm struct archive_entry *, const char *, int); 212313571Smmstatic int pax_attribute_xattr(struct archive_entry *, const char *, 213313571Smm const char *); 214228753Smmstatic int pax_header(struct archive_read *, struct tar *, 215313571Smm struct archive_entry *, struct archive_string *); 216228753Smmstatic void pax_time(const char *, int64_t *sec, long *nanos); 217228753Smmstatic ssize_t readline(struct archive_read *, struct tar *, const char **, 218232153Smm ssize_t limit, size_t *); 219228753Smmstatic int read_body_to_string(struct archive_read *, struct tar *, 220232153Smm struct archive_string *, const void *h, size_t *); 221232153Smmstatic int solaris_sparse_parse(struct archive_read *, struct tar *, 222232153Smm struct archive_entry *, const char *); 223248616Smmstatic int64_t tar_atol(const char *, size_t); 224248616Smmstatic int64_t tar_atol10(const char *, size_t); 225248616Smmstatic int64_t tar_atol256(const char *, size_t); 226248616Smmstatic int64_t tar_atol8(const char *, size_t); 227228753Smmstatic int tar_read_header(struct archive_read *, struct tar *, 228232153Smm struct archive_entry *, size_t *); 229228753Smmstatic int tohex(int c); 230228753Smmstatic char *url_decode(const char *); 231232153Smmstatic void tar_flush_unconsumed(struct archive_read *, size_t *); 232228753Smm 233232153Smm 234228753Smmint 235228753Smmarchive_read_support_format_gnutar(struct archive *a) 236228753Smm{ 237232153Smm archive_check_magic(a, ARCHIVE_READ_MAGIC, 238232153Smm ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); 239228753Smm return (archive_read_support_format_tar(a)); 240228753Smm} 241228753Smm 242228753Smm 243228753Smmint 244228753Smmarchive_read_support_format_tar(struct archive *_a) 245228753Smm{ 246228753Smm struct archive_read *a = (struct archive_read *)_a; 247228753Smm struct tar *tar; 248228753Smm int r; 249228753Smm 250232153Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 251232153Smm ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); 252232153Smm 253232153Smm tar = (struct tar *)calloc(1, sizeof(*tar)); 254228753Smm if (tar == NULL) { 255228753Smm archive_set_error(&a->archive, ENOMEM, 256228753Smm "Can't allocate tar data"); 257228753Smm return (ARCHIVE_FATAL); 258228753Smm } 259328828Smm#ifdef HAVE_COPYFILE_H 260328828Smm /* Set this by default on Mac OS. */ 261328828Smm tar->process_mac_extensions = 1; 262328828Smm#endif 263228753Smm 264228753Smm r = __archive_read_register_format(a, tar, "tar", 265228753Smm archive_read_format_tar_bid, 266232153Smm archive_read_format_tar_options, 267228753Smm archive_read_format_tar_read_header, 268228753Smm archive_read_format_tar_read_data, 269228753Smm archive_read_format_tar_skip, 270248616Smm NULL, 271302001Smm archive_read_format_tar_cleanup, 272302001Smm NULL, 273302001Smm NULL); 274228753Smm 275228753Smm if (r != ARCHIVE_OK) 276228753Smm free(tar); 277228753Smm return (ARCHIVE_OK); 278228753Smm} 279228753Smm 280228753Smmstatic int 281228753Smmarchive_read_format_tar_cleanup(struct archive_read *a) 282228753Smm{ 283228753Smm struct tar *tar; 284228753Smm 285228753Smm tar = (struct tar *)(a->format->data); 286228753Smm gnu_clear_sparse_list(tar); 287228753Smm archive_string_free(&tar->acl_text); 288228753Smm archive_string_free(&tar->entry_pathname); 289228753Smm archive_string_free(&tar->entry_pathname_override); 290228753Smm archive_string_free(&tar->entry_linkpath); 291228753Smm archive_string_free(&tar->entry_uname); 292228753Smm archive_string_free(&tar->entry_gname); 293228753Smm archive_string_free(&tar->line); 294228753Smm archive_string_free(&tar->pax_global); 295228753Smm archive_string_free(&tar->pax_header); 296228753Smm archive_string_free(&tar->longname); 297228753Smm archive_string_free(&tar->longlink); 298232153Smm archive_string_free(&tar->localname); 299228753Smm free(tar); 300228753Smm (a->format->data) = NULL; 301228753Smm return (ARCHIVE_OK); 302228753Smm} 303228753Smm 304311042Smm/* 305311042Smm * Validate number field 306311042Smm * 307311042Smm * This has to be pretty lenient in order to accommodate the enormous 308311042Smm * variety of tar writers in the world: 309311042Smm * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading 310311042Smm * zeros and allows fields to be terminated with space or null characters 311311042Smm * = Many writers use different termination (in particular, libarchive 312311042Smm * omits terminator bytes to squeeze one or two more digits) 313311042Smm * = Many writers pad with space and omit leading zeros 314311042Smm * = GNU tar and star write base-256 values if numbers are too 315311042Smm * big to be represented in octal 316311042Smm * 317311042Smm * Examples of specific tar headers that we should support: 318311042Smm * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two 319311042Smm * null bytes, pads size with spaces and other numeric fields with zeroes 320311042Smm * = plexus-archiver prior to 2.6.3 (before switching to commons-compress) 321311042Smm * may have uid and gid fields filled with spaces without any octal digits 322311042Smm * at all and pads all numeric fields with spaces 323311042Smm * 324311042Smm * This should tolerate all variants in use. It will reject a field 325311042Smm * where the writer just left garbage after a trailing NUL. 326311042Smm */ 327311042Smmstatic int 328311042Smmvalidate_number_field(const char* p_field, size_t i_size) 329311042Smm{ 330311042Smm unsigned char marker = (unsigned char)p_field[0]; 331311042Smm if (marker == 128 || marker == 255 || marker == 0) { 332311042Smm /* Base-256 marker, there's nothing we can check. */ 333311042Smm return 1; 334311042Smm } else { 335311042Smm /* Must be octal */ 336311042Smm size_t i = 0; 337311042Smm /* Skip any leading spaces */ 338311042Smm while (i < i_size && p_field[i] == ' ') { 339311042Smm ++i; 340311042Smm } 341311042Smm /* Skip octal digits. */ 342311042Smm while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') { 343311042Smm ++i; 344311042Smm } 345311042Smm /* Any remaining characters must be space or NUL padding. */ 346311042Smm while (i < i_size) { 347311042Smm if (p_field[i] != ' ' && p_field[i] != 0) { 348311042Smm return 0; 349311042Smm } 350311042Smm ++i; 351311042Smm } 352311042Smm return 1; 353311042Smm } 354311042Smm} 355228753Smm 356228753Smmstatic int 357232153Smmarchive_read_format_tar_bid(struct archive_read *a, int best_bid) 358228753Smm{ 359228753Smm int bid; 360232153Smm const char *h; 361228753Smm const struct archive_entry_header_ustar *header; 362228753Smm 363232153Smm (void)best_bid; /* UNUSED */ 364232153Smm 365228753Smm bid = 0; 366228753Smm 367228753Smm /* Now let's look at the actual header and see if it matches. */ 368228753Smm h = __archive_read_ahead(a, 512, NULL); 369228753Smm if (h == NULL) 370228753Smm return (-1); 371228753Smm 372228753Smm /* If it's an end-of-archive mark, we can handle it. */ 373232153Smm if (h[0] == 0 && archive_block_is_null(h)) { 374228753Smm /* 375228753Smm * Usually, I bid the number of bits verified, but 376228753Smm * in this case, 4096 seems excessive so I picked 10 as 377228753Smm * an arbitrary but reasonable-seeming value. 378228753Smm */ 379228753Smm return (10); 380228753Smm } 381228753Smm 382228753Smm /* If it's not an end-of-archive mark, it must have a valid checksum.*/ 383228753Smm if (!checksum(a, h)) 384228753Smm return (0); 385228753Smm bid += 48; /* Checksum is usually 6 octal digits. */ 386228753Smm 387228753Smm header = (const struct archive_entry_header_ustar *)h; 388228753Smm 389228753Smm /* Recognize POSIX formats. */ 390228753Smm if ((memcmp(header->magic, "ustar\0", 6) == 0) 391232153Smm && (memcmp(header->version, "00", 2) == 0)) 392228753Smm bid += 56; 393228753Smm 394228753Smm /* Recognize GNU tar format. */ 395228753Smm if ((memcmp(header->magic, "ustar ", 6) == 0) 396232153Smm && (memcmp(header->version, " \0", 2) == 0)) 397228753Smm bid += 56; 398228753Smm 399228753Smm /* Type flag must be null, digit or A-Z, a-z. */ 400228753Smm if (header->typeflag[0] != 0 && 401228753Smm !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && 402228753Smm !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && 403228753Smm !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) 404228753Smm return (0); 405228753Smm bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ 406228753Smm 407311042Smm /* 408311042Smm * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields. 409311042Smm */ 410311042Smm if (bid > 0 && ( 411311042Smm validate_number_field(header->mode, sizeof(header->mode)) == 0 412311042Smm || validate_number_field(header->uid, sizeof(header->uid)) == 0 413311042Smm || validate_number_field(header->gid, sizeof(header->gid)) == 0 414311042Smm || validate_number_field(header->mtime, sizeof(header->mtime)) == 0 415311042Smm || validate_number_field(header->size, sizeof(header->size)) == 0 416311042Smm || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0 417311042Smm || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0)) { 418311042Smm bid = 0; 419228753Smm } 420228753Smm 421228753Smm return (bid); 422228753Smm} 423228753Smm 424232153Smmstatic int 425232153Smmarchive_read_format_tar_options(struct archive_read *a, 426232153Smm const char *key, const char *val) 427232153Smm{ 428232153Smm struct tar *tar; 429232153Smm int ret = ARCHIVE_FAILED; 430232153Smm 431232153Smm tar = (struct tar *)(a->format->data); 432232153Smm if (strcmp(key, "compat-2x") == 0) { 433311042Smm /* Handle UTF-8 filenames as libarchive 2.x */ 434302001Smm tar->compat_2x = (val != NULL && val[0] != 0); 435232153Smm tar->init_default_conversion = tar->compat_2x; 436232153Smm return (ARCHIVE_OK); 437232153Smm } else if (strcmp(key, "hdrcharset") == 0) { 438232153Smm if (val == NULL || val[0] == 0) 439232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 440232153Smm "tar: hdrcharset option needs a character-set name"); 441232153Smm else { 442232153Smm tar->opt_sconv = 443232153Smm archive_string_conversion_from_charset( 444232153Smm &a->archive, val, 0); 445232153Smm if (tar->opt_sconv != NULL) 446232153Smm ret = ARCHIVE_OK; 447232153Smm else 448232153Smm ret = ARCHIVE_FATAL; 449232153Smm } 450232153Smm return (ret); 451302001Smm } else if (strcmp(key, "mac-ext") == 0) { 452302001Smm tar->process_mac_extensions = (val != NULL && val[0] != 0); 453302001Smm return (ARCHIVE_OK); 454302001Smm } else if (strcmp(key, "read_concatenated_archives") == 0) { 455302001Smm tar->read_concatenated_archives = (val != NULL && val[0] != 0); 456302001Smm return (ARCHIVE_OK); 457232153Smm } 458232153Smm 459232153Smm /* Note: The "warn" return is just to inform the options 460232153Smm * supervisor that we didn't handle it. It will generate 461232153Smm * a suitable error if no one used this option. */ 462232153Smm return (ARCHIVE_WARN); 463232153Smm} 464232153Smm 465232153Smm/* utility function- this exists to centralize the logic of tracking 466232153Smm * how much unconsumed data we have floating around, and to consume 467232153Smm * anything outstanding since we're going to do read_aheads 468232153Smm */ 469302001Smmstatic void 470232153Smmtar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) 471232153Smm{ 472232153Smm if (*unconsumed) { 473228753Smm/* 474232153Smm void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); 475232153Smm * this block of code is to poison claimed unconsumed space, ensuring 476232153Smm * things break if it is in use still. 477232153Smm * currently it WILL break things, so enable it only for debugging this issue 478232153Smm if (data) { 479232153Smm memset(data, 0xff, *unconsumed); 480232153Smm } 481232153Smm*/ 482232153Smm __archive_read_consume(a, *unconsumed); 483232153Smm *unconsumed = 0; 484232153Smm } 485232153Smm} 486232153Smm 487232153Smm/* 488232153Smm * The function invoked by archive_read_next_header(). This 489228753Smm * just sets up a few things and then calls the internal 490228753Smm * tar_read_header() function below. 491228753Smm */ 492228753Smmstatic int 493228753Smmarchive_read_format_tar_read_header(struct archive_read *a, 494228753Smm struct archive_entry *entry) 495228753Smm{ 496228753Smm /* 497228753Smm * When converting tar archives to cpio archives, it is 498228753Smm * essential that each distinct file have a distinct inode 499228753Smm * number. To simplify this, we keep a static count here to 500228753Smm * assign fake dev/inode numbers to each tar entry. Note that 501228753Smm * pax format archives may overwrite this with something more 502228753Smm * useful. 503228753Smm * 504228753Smm * Ideally, we would track every file read from the archive so 505228753Smm * that we could assign the same dev/ino pair to hardlinks, 506228753Smm * but the memory required to store a complete lookup table is 507228753Smm * probably not worthwhile just to support the relatively 508228753Smm * obscure tar->cpio conversion case. 509228753Smm */ 510228753Smm static int default_inode; 511228753Smm static int default_dev; 512228753Smm struct tar *tar; 513228753Smm const char *p; 514302001Smm const wchar_t *wp; 515228753Smm int r; 516232153Smm size_t l, unconsumed = 0; 517228753Smm 518228753Smm /* Assign default device/inode values. */ 519228753Smm archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ 520228753Smm archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ 521228753Smm /* Limit generated st_ino number to 16 bits. */ 522228753Smm if (default_inode >= 0xffff) { 523228753Smm ++default_dev; 524228753Smm default_inode = 0; 525228753Smm } 526228753Smm 527228753Smm tar = (struct tar *)(a->format->data); 528228753Smm tar->entry_offset = 0; 529232153Smm gnu_clear_sparse_list(tar); 530228753Smm tar->realsize = -1; /* Mark this as "unset" */ 531316338Smm tar->realsize_override = 0; 532228753Smm 533232153Smm /* Setup default string conversion. */ 534232153Smm tar->sconv = tar->opt_sconv; 535232153Smm if (tar->sconv == NULL) { 536232153Smm if (!tar->init_default_conversion) { 537232153Smm tar->sconv_default = 538232153Smm archive_string_default_conversion_for_read(&(a->archive)); 539232153Smm tar->init_default_conversion = 1; 540232153Smm } 541232153Smm tar->sconv = tar->sconv_default; 542232153Smm } 543228753Smm 544232153Smm r = tar_read_header(a, tar, entry, &unconsumed); 545232153Smm 546232153Smm tar_flush_unconsumed(a, &unconsumed); 547232153Smm 548228753Smm /* 549228753Smm * "non-sparse" files are really just sparse files with 550228753Smm * a single block. 551228753Smm */ 552232153Smm if (tar->sparse_list == NULL) { 553232153Smm if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) 554232153Smm != ARCHIVE_OK) 555232153Smm return (ARCHIVE_FATAL); 556232153Smm } else { 557232153Smm struct sparse_block *sb; 558228753Smm 559232153Smm for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { 560232153Smm if (!sb->hole) 561232153Smm archive_entry_sparse_add_entry(entry, 562232153Smm sb->offset, sb->remaining); 563232153Smm } 564232153Smm } 565232153Smm 566302001Smm if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { 567228753Smm /* 568228753Smm * "Regular" entry with trailing '/' is really 569228753Smm * directory: This is needed for certain old tar 570228753Smm * variants and even for some broken newer ones. 571228753Smm */ 572302001Smm if ((wp = archive_entry_pathname_w(entry)) != NULL) { 573232153Smm l = wcslen(wp); 574302001Smm if (l > 0 && wp[l - 1] == L'/') { 575232153Smm archive_entry_set_filetype(entry, AE_IFDIR); 576302001Smm } 577302001Smm } else if ((p = archive_entry_pathname(entry)) != NULL) { 578232153Smm l = strlen(p); 579302001Smm if (l > 0 && p[l - 1] == '/') { 580232153Smm archive_entry_set_filetype(entry, AE_IFDIR); 581302001Smm } 582232153Smm } 583228753Smm } 584228753Smm return (r); 585228753Smm} 586228753Smm 587228753Smmstatic int 588228753Smmarchive_read_format_tar_read_data(struct archive_read *a, 589232153Smm const void **buff, size_t *size, int64_t *offset) 590228753Smm{ 591228753Smm ssize_t bytes_read; 592228753Smm struct tar *tar; 593228753Smm struct sparse_block *p; 594228753Smm 595228753Smm tar = (struct tar *)(a->format->data); 596228753Smm 597238856Smm for (;;) { 598238856Smm /* Remove exhausted entries from sparse list. */ 599238856Smm while (tar->sparse_list != NULL && 600238856Smm tar->sparse_list->remaining == 0) { 601238856Smm p = tar->sparse_list; 602238856Smm tar->sparse_list = p->next; 603238856Smm free(p); 604238856Smm } 605228753Smm 606238856Smm if (tar->entry_bytes_unconsumed) { 607238856Smm __archive_read_consume(a, tar->entry_bytes_unconsumed); 608238856Smm tar->entry_bytes_unconsumed = 0; 609238856Smm } 610232153Smm 611238856Smm /* If we're at end of file, return EOF. */ 612238856Smm if (tar->sparse_list == NULL || 613238856Smm tar->entry_bytes_remaining == 0) { 614238856Smm if (__archive_read_consume(a, tar->entry_padding) < 0) 615238856Smm return (ARCHIVE_FATAL); 616238856Smm tar->entry_padding = 0; 617238856Smm *buff = NULL; 618238856Smm *size = 0; 619238856Smm *offset = tar->realsize; 620238856Smm return (ARCHIVE_EOF); 621238856Smm } 622238856Smm 623238856Smm *buff = __archive_read_ahead(a, 1, &bytes_read); 624238856Smm if (bytes_read < 0) 625228753Smm return (ARCHIVE_FATAL); 626238856Smm if (*buff == NULL) { 627238856Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 628238856Smm "Truncated tar archive"); 629238856Smm return (ARCHIVE_FATAL); 630238856Smm } 631238856Smm if (bytes_read > tar->entry_bytes_remaining) 632238856Smm bytes_read = (ssize_t)tar->entry_bytes_remaining; 633238856Smm /* Don't read more than is available in the 634238856Smm * current sparse block. */ 635238856Smm if (tar->sparse_list->remaining < bytes_read) 636238856Smm bytes_read = (ssize_t)tar->sparse_list->remaining; 637238856Smm *size = bytes_read; 638238856Smm *offset = tar->sparse_list->offset; 639238856Smm tar->sparse_list->remaining -= bytes_read; 640238856Smm tar->sparse_list->offset += bytes_read; 641238856Smm tar->entry_bytes_remaining -= bytes_read; 642238856Smm tar->entry_bytes_unconsumed = bytes_read; 643228753Smm 644238856Smm if (!tar->sparse_list->hole) 645238856Smm return (ARCHIVE_OK); 646238856Smm /* Current is hole data and skip this. */ 647228753Smm } 648228753Smm} 649228753Smm 650228753Smmstatic int 651228753Smmarchive_read_format_tar_skip(struct archive_read *a) 652228753Smm{ 653228753Smm int64_t bytes_skipped; 654286082Sbdrewery int64_t request; 655286082Sbdrewery struct sparse_block *p; 656228753Smm struct tar* tar; 657228753Smm 658228753Smm tar = (struct tar *)(a->format->data); 659228753Smm 660286082Sbdrewery /* Do not consume the hole of a sparse file. */ 661286082Sbdrewery request = 0; 662286082Sbdrewery for (p = tar->sparse_list; p != NULL; p = p->next) { 663302001Smm if (!p->hole) { 664302001Smm if (p->remaining >= INT64_MAX - request) { 665302001Smm return ARCHIVE_FATAL; 666302001Smm } 667286082Sbdrewery request += p->remaining; 668302001Smm } 669286082Sbdrewery } 670286082Sbdrewery if (request > tar->entry_bytes_remaining) 671286082Sbdrewery request = tar->entry_bytes_remaining; 672286082Sbdrewery request += tar->entry_padding + tar->entry_bytes_unconsumed; 673286082Sbdrewery 674286082Sbdrewery bytes_skipped = __archive_read_consume(a, request); 675228753Smm if (bytes_skipped < 0) 676228753Smm return (ARCHIVE_FATAL); 677228753Smm 678228753Smm tar->entry_bytes_remaining = 0; 679232153Smm tar->entry_bytes_unconsumed = 0; 680228753Smm tar->entry_padding = 0; 681228753Smm 682228753Smm /* Free the sparse list. */ 683228753Smm gnu_clear_sparse_list(tar); 684228753Smm 685228753Smm return (ARCHIVE_OK); 686228753Smm} 687228753Smm 688228753Smm/* 689228753Smm * This function recursively interprets all of the headers associated 690228753Smm * with a single entry. 691228753Smm */ 692228753Smmstatic int 693228753Smmtar_read_header(struct archive_read *a, struct tar *tar, 694232153Smm struct archive_entry *entry, size_t *unconsumed) 695228753Smm{ 696228753Smm ssize_t bytes; 697348608Smm int err, eof_vol_header; 698232153Smm const char *h; 699228753Smm const struct archive_entry_header_ustar *header; 700248616Smm const struct archive_entry_header_gnutar *gnuheader; 701228753Smm 702348608Smm eof_vol_header = 0; 703348608Smm 704302001Smm /* Loop until we find a workable header record. */ 705302001Smm for (;;) { 706302001Smm tar_flush_unconsumed(a, unconsumed); 707232153Smm 708302001Smm /* Read 512-byte header record */ 709302001Smm h = __archive_read_ahead(a, 512, &bytes); 710302001Smm if (bytes < 0) 711302001Smm return ((int)bytes); 712302001Smm if (bytes == 0) { /* EOF at a block boundary. */ 713302001Smm /* Some writers do omit the block of nulls. <sigh> */ 714302001Smm return (ARCHIVE_EOF); 715302001Smm } 716302001Smm if (bytes < 512) { /* Short block at EOF; this is bad. */ 717302001Smm archive_set_error(&a->archive, 718302001Smm ARCHIVE_ERRNO_FILE_FORMAT, 719302001Smm "Truncated tar archive"); 720302001Smm return (ARCHIVE_FATAL); 721302001Smm } 722302001Smm *unconsumed = 512; 723228753Smm 724302001Smm /* Header is workable if it's not an end-of-archive mark. */ 725302001Smm if (h[0] != 0 || !archive_block_is_null(h)) 726302001Smm break; 727302001Smm 728302001Smm /* Ensure format is set for archives with only null blocks. */ 729228753Smm if (a->archive.archive_format_name == NULL) { 730228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR; 731228753Smm a->archive.archive_format_name = "tar"; 732228753Smm } 733302001Smm 734302001Smm if (!tar->read_concatenated_archives) { 735302001Smm /* Try to consume a second all-null record, as well. */ 736302001Smm tar_flush_unconsumed(a, unconsumed); 737302001Smm h = __archive_read_ahead(a, 512, NULL); 738302001Smm if (h != NULL && h[0] == 0 && archive_block_is_null(h)) 739302001Smm __archive_read_consume(a, 512); 740302001Smm archive_clear_error(&a->archive); 741302001Smm return (ARCHIVE_EOF); 742302001Smm } 743302001Smm 744302001Smm /* 745302001Smm * We're reading concatenated archives, ignore this block and 746302001Smm * loop to get the next. 747302001Smm */ 748228753Smm } 749228753Smm 750228753Smm /* 751228753Smm * Note: If the checksum fails and we return ARCHIVE_RETRY, 752228753Smm * then the client is likely to just retry. This is a very 753228753Smm * crude way to search for the next valid header! 754228753Smm * 755228753Smm * TODO: Improve this by implementing a real header scan. 756228753Smm */ 757228753Smm if (!checksum(a, h)) { 758232153Smm tar_flush_unconsumed(a, unconsumed); 759228753Smm archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); 760228753Smm return (ARCHIVE_RETRY); /* Retryable: Invalid header */ 761228753Smm } 762228753Smm 763228753Smm if (++tar->header_recursion_depth > 32) { 764232153Smm tar_flush_unconsumed(a, unconsumed); 765228753Smm archive_set_error(&a->archive, EINVAL, "Too many special headers"); 766228753Smm return (ARCHIVE_WARN); 767228753Smm } 768228753Smm 769228753Smm /* Determine the format variant. */ 770228753Smm header = (const struct archive_entry_header_ustar *)h; 771232153Smm 772228753Smm switch(header->typeflag[0]) { 773228753Smm case 'A': /* Solaris tar ACL */ 774228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 775228753Smm a->archive.archive_format_name = "Solaris tar"; 776232153Smm err = header_Solaris_ACL(a, tar, entry, h, unconsumed); 777228753Smm break; 778228753Smm case 'g': /* POSIX-standard 'g' header. */ 779228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 780228753Smm a->archive.archive_format_name = "POSIX pax interchange format"; 781232153Smm err = header_pax_global(a, tar, entry, h, unconsumed); 782302001Smm if (err == ARCHIVE_EOF) 783302001Smm return (err); 784228753Smm break; 785228753Smm case 'K': /* Long link name (GNU tar, others) */ 786232153Smm err = header_longlink(a, tar, entry, h, unconsumed); 787228753Smm break; 788228753Smm case 'L': /* Long filename (GNU tar, others) */ 789232153Smm err = header_longname(a, tar, entry, h, unconsumed); 790228753Smm break; 791228753Smm case 'V': /* GNU volume header */ 792232153Smm err = header_volume(a, tar, entry, h, unconsumed); 793348608Smm if (err == ARCHIVE_EOF) 794348608Smm eof_vol_header = 1; 795228753Smm break; 796228753Smm case 'X': /* Used by SUN tar; same as 'x'. */ 797228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 798228753Smm a->archive.archive_format_name = 799228753Smm "POSIX pax interchange format (Sun variant)"; 800232153Smm err = header_pax_extensions(a, tar, entry, h, unconsumed); 801228753Smm break; 802228753Smm case 'x': /* POSIX-standard 'x' header. */ 803228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 804228753Smm a->archive.archive_format_name = "POSIX pax interchange format"; 805232153Smm err = header_pax_extensions(a, tar, entry, h, unconsumed); 806228753Smm break; 807228753Smm default: 808248616Smm gnuheader = (const struct archive_entry_header_gnutar *)h; 809248616Smm if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { 810228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; 811228753Smm a->archive.archive_format_name = "GNU tar format"; 812232153Smm err = header_gnutar(a, tar, entry, h, unconsumed); 813228753Smm } else if (memcmp(header->magic, "ustar", 5) == 0) { 814228753Smm if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 815228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; 816228753Smm a->archive.archive_format_name = "POSIX ustar format"; 817228753Smm } 818228753Smm err = header_ustar(a, tar, entry, h); 819228753Smm } else { 820228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR; 821228753Smm a->archive.archive_format_name = "tar (non-POSIX)"; 822228753Smm err = header_old_tar(a, tar, entry, h); 823228753Smm } 824228753Smm } 825232153Smm if (err == ARCHIVE_FATAL) 826232153Smm return (err); 827232153Smm 828232153Smm tar_flush_unconsumed(a, unconsumed); 829232153Smm 830232153Smm h = NULL; 831232153Smm header = NULL; 832232153Smm 833228753Smm --tar->header_recursion_depth; 834232153Smm /* Yuck. Apple's design here ends up storing long pathname 835232153Smm * extensions for both the AppleDouble extension entry and the 836232153Smm * regular entry. 837232153Smm */ 838232153Smm if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && 839302001Smm tar->header_recursion_depth == 0 && 840302001Smm tar->process_mac_extensions) { 841232153Smm int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); 842232153Smm if (err2 < err) 843232153Smm err = err2; 844232153Smm } 845232153Smm 846228753Smm /* We return warnings or success as-is. Anything else is fatal. */ 847232153Smm if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { 848232153Smm if (tar->sparse_gnu_pending) { 849232153Smm if (tar->sparse_gnu_major == 1 && 850232153Smm tar->sparse_gnu_minor == 0) { 851232153Smm ssize_t bytes_read; 852232153Smm 853232153Smm tar->sparse_gnu_pending = 0; 854232153Smm /* Read initial sparse map. */ 855232153Smm bytes_read = gnu_sparse_10_read(a, tar, unconsumed); 856232153Smm if (bytes_read < 0) 857248616Smm return ((int)bytes_read); 858315433Smm tar->entry_bytes_remaining -= bytes_read; 859232153Smm } else { 860232153Smm archive_set_error(&a->archive, 861232153Smm ARCHIVE_ERRNO_MISC, 862232153Smm "Unrecognized GNU sparse file format"); 863232153Smm return (ARCHIVE_WARN); 864232153Smm } 865232153Smm tar->sparse_gnu_pending = 0; 866232153Smm } 867228753Smm return (err); 868232153Smm } 869348608Smm if (err == ARCHIVE_EOF) { 870348608Smm if (!eof_vol_header) { 871348608Smm /* EOF when recursively reading a header is bad. */ 872348608Smm archive_set_error(&a->archive, EINVAL, 873348608Smm "Damaged tar archive"); 874348608Smm } else { 875348608Smm /* If we encounter just a GNU volume header treat 876348608Smm * this situation as an empty archive */ 877348608Smm return (ARCHIVE_EOF); 878348608Smm } 879348608Smm } 880228753Smm return (ARCHIVE_FATAL); 881228753Smm} 882228753Smm 883228753Smm/* 884228753Smm * Return true if block checksum is correct. 885228753Smm */ 886228753Smmstatic int 887228753Smmchecksum(struct archive_read *a, const void *h) 888228753Smm{ 889228753Smm const unsigned char *bytes; 890228753Smm const struct archive_entry_header_ustar *header; 891302001Smm int check, sum; 892302001Smm size_t i; 893228753Smm 894228753Smm (void)a; /* UNUSED */ 895228753Smm bytes = (const unsigned char *)h; 896228753Smm header = (const struct archive_entry_header_ustar *)h; 897228753Smm 898302001Smm /* Checksum field must hold an octal number */ 899302001Smm for (i = 0; i < sizeof(header->checksum); ++i) { 900302001Smm char c = header->checksum[i]; 901302001Smm if (c != ' ' && c != '\0' && (c < '0' || c > '7')) 902302001Smm return 0; 903302001Smm } 904302001Smm 905228753Smm /* 906228753Smm * Test the checksum. Note that POSIX specifies _unsigned_ 907228753Smm * bytes for this calculation. 908228753Smm */ 909238856Smm sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); 910228753Smm check = 0; 911228753Smm for (i = 0; i < 148; i++) 912228753Smm check += (unsigned char)bytes[i]; 913228753Smm for (; i < 156; i++) 914228753Smm check += 32; 915228753Smm for (; i < 512; i++) 916228753Smm check += (unsigned char)bytes[i]; 917228753Smm if (sum == check) 918228753Smm return (1); 919228753Smm 920228753Smm /* 921228753Smm * Repeat test with _signed_ bytes, just in case this archive 922228753Smm * was created by an old BSD, Solaris, or HP-UX tar with a 923228753Smm * broken checksum calculation. 924228753Smm */ 925228753Smm check = 0; 926228753Smm for (i = 0; i < 148; i++) 927228753Smm check += (signed char)bytes[i]; 928228753Smm for (; i < 156; i++) 929228753Smm check += 32; 930228753Smm for (; i < 512; i++) 931228753Smm check += (signed char)bytes[i]; 932228753Smm if (sum == check) 933228753Smm return (1); 934228753Smm 935228753Smm return (0); 936228753Smm} 937228753Smm 938228753Smm/* 939228753Smm * Return true if this block contains only nulls. 940228753Smm */ 941228753Smmstatic int 942232153Smmarchive_block_is_null(const char *p) 943228753Smm{ 944228753Smm unsigned i; 945228753Smm 946228753Smm for (i = 0; i < 512; i++) 947228753Smm if (*p++) 948228753Smm return (0); 949228753Smm return (1); 950228753Smm} 951228753Smm 952228753Smm/* 953228753Smm * Interpret 'A' Solaris ACL header 954228753Smm */ 955228753Smmstatic int 956228753Smmheader_Solaris_ACL(struct archive_read *a, struct tar *tar, 957232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 958228753Smm{ 959228753Smm const struct archive_entry_header_ustar *header; 960228753Smm size_t size; 961313571Smm int err, acl_type; 962228753Smm int64_t type; 963228753Smm char *acl, *p; 964228753Smm 965228753Smm /* 966228753Smm * read_body_to_string adds a NUL terminator, but we need a little 967228753Smm * more to make sure that we don't overrun acl_text later. 968228753Smm */ 969228753Smm header = (const struct archive_entry_header_ustar *)h; 970238856Smm size = (size_t)tar_atol(header->size, sizeof(header->size)); 971232153Smm err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); 972228753Smm if (err != ARCHIVE_OK) 973228753Smm return (err); 974232153Smm 975228753Smm /* Recursively read next header */ 976232153Smm err = tar_read_header(a, tar, entry, unconsumed); 977228753Smm if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 978228753Smm return (err); 979228753Smm 980228753Smm /* TODO: Examine the first characters to see if this 981228753Smm * is an AIX ACL descriptor. We'll likely never support 982228753Smm * them, but it would be polite to recognize and warn when 983228753Smm * we do see them. */ 984228753Smm 985228753Smm /* Leading octal number indicates ACL type and number of entries. */ 986228753Smm p = acl = tar->acl_text.s; 987228753Smm type = 0; 988228753Smm while (*p != '\0' && p < acl + size) { 989228753Smm if (*p < '0' || *p > '7') { 990228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 991228753Smm "Malformed Solaris ACL attribute (invalid digit)"); 992228753Smm return(ARCHIVE_WARN); 993228753Smm } 994228753Smm type <<= 3; 995228753Smm type += *p - '0'; 996228753Smm if (type > 077777777) { 997228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 998228753Smm "Malformed Solaris ACL attribute (count too large)"); 999228753Smm return (ARCHIVE_WARN); 1000228753Smm } 1001228753Smm p++; 1002228753Smm } 1003228753Smm switch ((int)type & ~0777777) { 1004228753Smm case 01000000: 1005228753Smm /* POSIX.1e ACL */ 1006313571Smm acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; 1007228753Smm break; 1008228753Smm case 03000000: 1009313571Smm /* NFSv4 ACL */ 1010313571Smm acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4; 1011313571Smm break; 1012228753Smm default: 1013228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1014228753Smm "Malformed Solaris ACL attribute (unsupported type %o)", 1015228753Smm (int)type); 1016228753Smm return (ARCHIVE_WARN); 1017228753Smm } 1018228753Smm p++; 1019228753Smm 1020228753Smm if (p >= acl + size) { 1021228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1022228753Smm "Malformed Solaris ACL attribute (body overflow)"); 1023228753Smm return(ARCHIVE_WARN); 1024228753Smm } 1025228753Smm 1026228753Smm /* ACL text is null-terminated; find the end. */ 1027228753Smm size -= (p - acl); 1028228753Smm acl = p; 1029228753Smm 1030228753Smm while (*p != '\0' && p < acl + size) 1031228753Smm p++; 1032228753Smm 1033232153Smm if (tar->sconv_acl == NULL) { 1034232153Smm tar->sconv_acl = archive_string_conversion_from_charset( 1035232153Smm &(a->archive), "UTF-8", 1); 1036232153Smm if (tar->sconv_acl == NULL) 1037232153Smm return (ARCHIVE_FATAL); 1038232153Smm } 1039232153Smm archive_strncpy(&(tar->localname), acl, p - acl); 1040313571Smm err = archive_acl_from_text_l(archive_entry_acl(entry), 1041313571Smm tar->localname.s, acl_type, tar->sconv_acl); 1042232153Smm if (err != ARCHIVE_OK) { 1043232153Smm if (errno == ENOMEM) { 1044232153Smm archive_set_error(&a->archive, ENOMEM, 1045232153Smm "Can't allocate memory for ACL"); 1046232153Smm } else 1047232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1048232153Smm "Malformed Solaris ACL attribute (unparsable)"); 1049232153Smm } 1050228753Smm return (err); 1051228753Smm} 1052228753Smm 1053228753Smm/* 1054228753Smm * Interpret 'K' long linkname header. 1055228753Smm */ 1056228753Smmstatic int 1057228753Smmheader_longlink(struct archive_read *a, struct tar *tar, 1058232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1059228753Smm{ 1060228753Smm int err; 1061228753Smm 1062232153Smm err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); 1063228753Smm if (err != ARCHIVE_OK) 1064228753Smm return (err); 1065232153Smm err = tar_read_header(a, tar, entry, unconsumed); 1066228753Smm if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1067228753Smm return (err); 1068228753Smm /* Set symlink if symlink already set, else hardlink. */ 1069228753Smm archive_entry_copy_link(entry, tar->longlink.s); 1070228753Smm return (ARCHIVE_OK); 1071228753Smm} 1072228753Smm 1073232153Smmstatic int 1074232153Smmset_conversion_failed_error(struct archive_read *a, 1075232153Smm struct archive_string_conv *sconv, const char *name) 1076232153Smm{ 1077232153Smm if (errno == ENOMEM) { 1078232153Smm archive_set_error(&a->archive, ENOMEM, 1079232153Smm "Can't allocate memory for %s", name); 1080232153Smm return (ARCHIVE_FATAL); 1081232153Smm } 1082232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1083232153Smm "%s can't be converted from %s to current locale.", 1084232153Smm name, archive_string_conversion_charset_name(sconv)); 1085232153Smm return (ARCHIVE_WARN); 1086232153Smm} 1087232153Smm 1088228753Smm/* 1089228753Smm * Interpret 'L' long filename header. 1090228753Smm */ 1091228753Smmstatic int 1092228753Smmheader_longname(struct archive_read *a, struct tar *tar, 1093232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1094228753Smm{ 1095228753Smm int err; 1096228753Smm 1097232153Smm err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); 1098228753Smm if (err != ARCHIVE_OK) 1099228753Smm return (err); 1100228753Smm /* Read and parse "real" header, then override name. */ 1101232153Smm err = tar_read_header(a, tar, entry, unconsumed); 1102228753Smm if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1103228753Smm return (err); 1104232153Smm if (archive_entry_copy_pathname_l(entry, tar->longname.s, 1105232153Smm archive_strlen(&(tar->longname)), tar->sconv) != 0) 1106232153Smm err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1107232153Smm return (err); 1108228753Smm} 1109228753Smm 1110228753Smm 1111228753Smm/* 1112228753Smm * Interpret 'V' GNU tar volume header. 1113228753Smm */ 1114228753Smmstatic int 1115228753Smmheader_volume(struct archive_read *a, struct tar *tar, 1116232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1117228753Smm{ 1118228753Smm (void)h; 1119228753Smm 1120228753Smm /* Just skip this and read the next header. */ 1121232153Smm return (tar_read_header(a, tar, entry, unconsumed)); 1122228753Smm} 1123228753Smm 1124228753Smm/* 1125228753Smm * Read body of an archive entry into an archive_string object. 1126228753Smm */ 1127228753Smmstatic int 1128228753Smmread_body_to_string(struct archive_read *a, struct tar *tar, 1129232153Smm struct archive_string *as, const void *h, size_t *unconsumed) 1130228753Smm{ 1131232153Smm int64_t size; 1132228753Smm const struct archive_entry_header_ustar *header; 1133228753Smm const void *src; 1134228753Smm 1135228753Smm (void)tar; /* UNUSED */ 1136228753Smm header = (const struct archive_entry_header_ustar *)h; 1137228753Smm size = tar_atol(header->size, sizeof(header->size)); 1138228753Smm if ((size > 1048576) || (size < 0)) { 1139228753Smm archive_set_error(&a->archive, EINVAL, 1140228753Smm "Special header too large"); 1141228753Smm return (ARCHIVE_FATAL); 1142228753Smm } 1143228753Smm 1144228753Smm /* Fail if we can't make our buffer big enough. */ 1145238856Smm if (archive_string_ensure(as, (size_t)size+1) == NULL) { 1146228753Smm archive_set_error(&a->archive, ENOMEM, 1147228753Smm "No memory"); 1148228753Smm return (ARCHIVE_FATAL); 1149228753Smm } 1150228753Smm 1151232153Smm tar_flush_unconsumed(a, unconsumed); 1152232153Smm 1153232153Smm /* Read the body into the string. */ 1154238856Smm *unconsumed = (size_t)((size + 511) & ~ 511); 1155232153Smm src = __archive_read_ahead(a, *unconsumed, NULL); 1156232153Smm if (src == NULL) { 1157232153Smm *unconsumed = 0; 1158228753Smm return (ARCHIVE_FATAL); 1159232153Smm } 1160238856Smm memcpy(as->s, src, (size_t)size); 1161228753Smm as->s[size] = '\0'; 1162238856Smm as->length = (size_t)size; 1163228753Smm return (ARCHIVE_OK); 1164228753Smm} 1165228753Smm 1166228753Smm/* 1167228753Smm * Parse out common header elements. 1168228753Smm * 1169228753Smm * This would be the same as header_old_tar, except that the 1170228753Smm * filename is handled slightly differently for old and POSIX 1171228753Smm * entries (POSIX entries support a 'prefix'). This factoring 1172228753Smm * allows header_old_tar and header_ustar 1173228753Smm * to handle filenames differently, while still putting most of the 1174228753Smm * common parsing into one place. 1175228753Smm */ 1176228753Smmstatic int 1177228753Smmheader_common(struct archive_read *a, struct tar *tar, 1178228753Smm struct archive_entry *entry, const void *h) 1179228753Smm{ 1180228753Smm const struct archive_entry_header_ustar *header; 1181228753Smm char tartype; 1182232153Smm int err = ARCHIVE_OK; 1183228753Smm 1184228753Smm header = (const struct archive_entry_header_ustar *)h; 1185228753Smm if (header->linkname[0]) 1186232153Smm archive_strncpy(&(tar->entry_linkpath), 1187232153Smm header->linkname, sizeof(header->linkname)); 1188228753Smm else 1189228753Smm archive_string_empty(&(tar->entry_linkpath)); 1190228753Smm 1191228753Smm /* Parse out the numeric fields (all are octal) */ 1192238856Smm archive_entry_set_mode(entry, 1193238856Smm (mode_t)tar_atol(header->mode, sizeof(header->mode))); 1194228753Smm archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); 1195228753Smm archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); 1196228753Smm tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); 1197232153Smm if (tar->entry_bytes_remaining < 0) { 1198232153Smm tar->entry_bytes_remaining = 0; 1199232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1200305192Smm "Tar entry has negative size"); 1201305192Smm return (ARCHIVE_FATAL); 1202232153Smm } 1203305192Smm if (tar->entry_bytes_remaining == INT64_MAX) { 1204305192Smm /* Note: tar_atol returns INT64_MAX on overflow */ 1205305192Smm tar->entry_bytes_remaining = 0; 1206305192Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1207305192Smm "Tar entry size overflow"); 1208305192Smm return (ARCHIVE_FATAL); 1209305192Smm } 1210228753Smm tar->realsize = tar->entry_bytes_remaining; 1211228753Smm archive_entry_set_size(entry, tar->entry_bytes_remaining); 1212228753Smm archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); 1213228753Smm 1214228753Smm /* Handle the tar type flag appropriately. */ 1215228753Smm tartype = header->typeflag[0]; 1216228753Smm 1217228753Smm switch (tartype) { 1218228753Smm case '1': /* Hard link */ 1219232153Smm if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, 1220232153Smm archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1221232153Smm err = set_conversion_failed_error(a, tar->sconv, 1222232153Smm "Linkname"); 1223232153Smm if (err == ARCHIVE_FATAL) 1224232153Smm return (err); 1225232153Smm } 1226228753Smm /* 1227228753Smm * The following may seem odd, but: Technically, tar 1228228753Smm * does not store the file type for a "hard link" 1229228753Smm * entry, only the fact that it is a hard link. So, I 1230228753Smm * leave the type zero normally. But, pax interchange 1231228753Smm * format allows hard links to have data, which 1232228753Smm * implies that the underlying entry is a regular 1233228753Smm * file. 1234228753Smm */ 1235228753Smm if (archive_entry_size(entry) > 0) 1236228753Smm archive_entry_set_filetype(entry, AE_IFREG); 1237228753Smm 1238228753Smm /* 1239228753Smm * A tricky point: Traditionally, tar readers have 1240228753Smm * ignored the size field when reading hardlink 1241228753Smm * entries, and some writers put non-zero sizes even 1242228753Smm * though the body is empty. POSIX blessed this 1243228753Smm * convention in the 1988 standard, but broke with 1244228753Smm * this tradition in 2001 by permitting hardlink 1245228753Smm * entries to store valid bodies in pax interchange 1246228753Smm * format, but not in ustar format. Since there is no 1247228753Smm * hard and fast way to distinguish pax interchange 1248228753Smm * from earlier archives (the 'x' and 'g' entries are 1249228753Smm * optional, after all), we need a heuristic. 1250228753Smm */ 1251228753Smm if (archive_entry_size(entry) == 0) { 1252228753Smm /* If the size is already zero, we're done. */ 1253228753Smm } else if (a->archive.archive_format 1254228753Smm == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { 1255228753Smm /* Definitely pax extended; must obey hardlink size. */ 1256228753Smm } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR 1257228753Smm || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) 1258228753Smm { 1259228753Smm /* Old-style or GNU tar: we must ignore the size. */ 1260228753Smm archive_entry_set_size(entry, 0); 1261228753Smm tar->entry_bytes_remaining = 0; 1262232153Smm } else if (archive_read_format_tar_bid(a, 50) > 50) { 1263228753Smm /* 1264228753Smm * We don't know if it's pax: If the bid 1265228753Smm * function sees a valid ustar header 1266228753Smm * immediately following, then let's ignore 1267228753Smm * the hardlink size. 1268228753Smm */ 1269228753Smm archive_entry_set_size(entry, 0); 1270228753Smm tar->entry_bytes_remaining = 0; 1271228753Smm } 1272228753Smm /* 1273228753Smm * TODO: There are still two cases I'd like to handle: 1274228753Smm * = a ustar non-pax archive with a hardlink entry at 1275228753Smm * end-of-archive. (Look for block of nulls following?) 1276228753Smm * = a pax archive that has not seen any pax headers 1277228753Smm * and has an entry which is a hardlink entry storing 1278228753Smm * a body containing an uncompressed tar archive. 1279228753Smm * The first is worth addressing; I don't see any reliable 1280228753Smm * way to deal with the second possibility. 1281228753Smm */ 1282228753Smm break; 1283228753Smm case '2': /* Symlink */ 1284228753Smm archive_entry_set_filetype(entry, AE_IFLNK); 1285228753Smm archive_entry_set_size(entry, 0); 1286228753Smm tar->entry_bytes_remaining = 0; 1287232153Smm if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, 1288232153Smm archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { 1289232153Smm err = set_conversion_failed_error(a, tar->sconv, 1290232153Smm "Linkname"); 1291232153Smm if (err == ARCHIVE_FATAL) 1292232153Smm return (err); 1293232153Smm } 1294228753Smm break; 1295228753Smm case '3': /* Character device */ 1296228753Smm archive_entry_set_filetype(entry, AE_IFCHR); 1297228753Smm archive_entry_set_size(entry, 0); 1298228753Smm tar->entry_bytes_remaining = 0; 1299228753Smm break; 1300228753Smm case '4': /* Block device */ 1301228753Smm archive_entry_set_filetype(entry, AE_IFBLK); 1302228753Smm archive_entry_set_size(entry, 0); 1303228753Smm tar->entry_bytes_remaining = 0; 1304228753Smm break; 1305228753Smm case '5': /* Dir */ 1306228753Smm archive_entry_set_filetype(entry, AE_IFDIR); 1307228753Smm archive_entry_set_size(entry, 0); 1308228753Smm tar->entry_bytes_remaining = 0; 1309228753Smm break; 1310228753Smm case '6': /* FIFO device */ 1311228753Smm archive_entry_set_filetype(entry, AE_IFIFO); 1312228753Smm archive_entry_set_size(entry, 0); 1313228753Smm tar->entry_bytes_remaining = 0; 1314228753Smm break; 1315228753Smm case 'D': /* GNU incremental directory type */ 1316228753Smm /* 1317228753Smm * No special handling is actually required here. 1318228753Smm * It might be nice someday to preprocess the file list and 1319228753Smm * provide it to the client, though. 1320228753Smm */ 1321228753Smm archive_entry_set_filetype(entry, AE_IFDIR); 1322228753Smm break; 1323228753Smm case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ 1324228753Smm /* 1325228753Smm * As far as I can tell, this is just like a regular file 1326228753Smm * entry, except that the contents should be _appended_ to 1327228753Smm * the indicated file at the indicated offset. This may 1328228753Smm * require some API work to fully support. 1329228753Smm */ 1330228753Smm break; 1331228753Smm case 'N': /* Old GNU "long filename" entry. */ 1332228753Smm /* The body of this entry is a script for renaming 1333228753Smm * previously-extracted entries. Ugh. It will never 1334228753Smm * be supported by libarchive. */ 1335228753Smm archive_entry_set_filetype(entry, AE_IFREG); 1336228753Smm break; 1337228753Smm case 'S': /* GNU sparse files */ 1338228753Smm /* 1339228753Smm * Sparse files are really just regular files with 1340228753Smm * sparse information in the extended area. 1341228753Smm */ 1342228753Smm /* FALLTHROUGH */ 1343306322Smm case '0': 1344306322Smm /* 1345306322Smm * Enable sparse file "read" support only for regular 1346306322Smm * files and explicit GNU sparse files. However, we 1347306322Smm * don't allow non-standard file types to be sparse. 1348306322Smm */ 1349306322Smm tar->sparse_allowed = 1; 1350306322Smm /* FALLTHROUGH */ 1351228753Smm default: /* Regular file and non-standard types */ 1352228753Smm /* 1353228753Smm * Per POSIX: non-recognized types should always be 1354228753Smm * treated as regular files. 1355228753Smm */ 1356228753Smm archive_entry_set_filetype(entry, AE_IFREG); 1357228753Smm break; 1358228753Smm } 1359232153Smm return (err); 1360228753Smm} 1361228753Smm 1362228753Smm/* 1363228753Smm * Parse out header elements for "old-style" tar archives. 1364228753Smm */ 1365228753Smmstatic int 1366228753Smmheader_old_tar(struct archive_read *a, struct tar *tar, 1367228753Smm struct archive_entry *entry, const void *h) 1368228753Smm{ 1369228753Smm const struct archive_entry_header_ustar *header; 1370232153Smm int err = ARCHIVE_OK, err2; 1371228753Smm 1372228753Smm /* Copy filename over (to ensure null termination). */ 1373228753Smm header = (const struct archive_entry_header_ustar *)h; 1374232153Smm if (archive_entry_copy_pathname_l(entry, 1375232153Smm header->name, sizeof(header->name), tar->sconv) != 0) { 1376232153Smm err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1377232153Smm if (err == ARCHIVE_FATAL) 1378232153Smm return (err); 1379232153Smm } 1380228753Smm 1381228753Smm /* Grab rest of common fields */ 1382232153Smm err2 = header_common(a, tar, entry, h); 1383232153Smm if (err > err2) 1384232153Smm err = err2; 1385228753Smm 1386228753Smm tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1387232153Smm return (err); 1388228753Smm} 1389228753Smm 1390228753Smm/* 1391232153Smm * Read a Mac AppleDouble-encoded blob of file metadata, 1392232153Smm * if there is one. 1393232153Smm */ 1394232153Smmstatic int 1395232153Smmread_mac_metadata_blob(struct archive_read *a, struct tar *tar, 1396232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1397232153Smm{ 1398232153Smm int64_t size; 1399232153Smm const void *data; 1400232153Smm const char *p, *name; 1401232153Smm const wchar_t *wp, *wname; 1402232153Smm 1403232153Smm (void)h; /* UNUSED */ 1404232153Smm 1405232153Smm wname = wp = archive_entry_pathname_w(entry); 1406232153Smm if (wp != NULL) { 1407232153Smm /* Find the last path element. */ 1408232153Smm for (; *wp != L'\0'; ++wp) { 1409232153Smm if (wp[0] == '/' && wp[1] != L'\0') 1410232153Smm wname = wp + 1; 1411232153Smm } 1412302001Smm /* 1413232153Smm * If last path element starts with "._", then 1414232153Smm * this is a Mac extension. 1415232153Smm */ 1416232153Smm if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') 1417232153Smm return ARCHIVE_OK; 1418232153Smm } else { 1419232153Smm /* Find the last path element. */ 1420232153Smm name = p = archive_entry_pathname(entry); 1421232153Smm if (p == NULL) 1422232153Smm return (ARCHIVE_FAILED); 1423232153Smm for (; *p != '\0'; ++p) { 1424232153Smm if (p[0] == '/' && p[1] != '\0') 1425232153Smm name = p + 1; 1426232153Smm } 1427302001Smm /* 1428232153Smm * If last path element starts with "._", then 1429232153Smm * this is a Mac extension. 1430232153Smm */ 1431232153Smm if (name[0] != '.' || name[1] != '_' || name[2] == '\0') 1432232153Smm return ARCHIVE_OK; 1433232153Smm } 1434232153Smm 1435232153Smm /* Read the body as a Mac OS metadata blob. */ 1436232153Smm size = archive_entry_size(entry); 1437232153Smm 1438232153Smm /* 1439232153Smm * TODO: Look beyond the body here to peek at the next header. 1440232153Smm * If it's a regular header (not an extension header) 1441232153Smm * that has the wrong name, just return the current 1442232153Smm * entry as-is, without consuming the body here. 1443232153Smm * That would reduce the risk of us mis-identifying 1444232153Smm * an ordinary file that just happened to have 1445232153Smm * a name starting with "._". 1446232153Smm * 1447232153Smm * Q: Is the above idea really possible? Even 1448232153Smm * when there are GNU or pax extension entries? 1449232153Smm */ 1450238856Smm data = __archive_read_ahead(a, (size_t)size, NULL); 1451232153Smm if (data == NULL) { 1452232153Smm *unconsumed = 0; 1453232153Smm return (ARCHIVE_FATAL); 1454232153Smm } 1455238856Smm archive_entry_copy_mac_metadata(entry, data, (size_t)size); 1456238856Smm *unconsumed = (size_t)((size + 511) & ~ 511); 1457232153Smm tar_flush_unconsumed(a, unconsumed); 1458232153Smm return (tar_read_header(a, tar, entry, unconsumed)); 1459232153Smm} 1460232153Smm 1461232153Smm/* 1462228753Smm * Parse a file header for a pax extended archive entry. 1463228753Smm */ 1464228753Smmstatic int 1465228753Smmheader_pax_global(struct archive_read *a, struct tar *tar, 1466232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1467228753Smm{ 1468228753Smm int err; 1469228753Smm 1470232153Smm err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); 1471228753Smm if (err != ARCHIVE_OK) 1472228753Smm return (err); 1473232153Smm err = tar_read_header(a, tar, entry, unconsumed); 1474228753Smm return (err); 1475228753Smm} 1476228753Smm 1477228753Smmstatic int 1478228753Smmheader_pax_extensions(struct archive_read *a, struct tar *tar, 1479232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 1480228753Smm{ 1481228753Smm int err, err2; 1482228753Smm 1483232153Smm err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); 1484228753Smm if (err != ARCHIVE_OK) 1485228753Smm return (err); 1486228753Smm 1487228753Smm /* Parse the next header. */ 1488232153Smm err = tar_read_header(a, tar, entry, unconsumed); 1489228753Smm if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) 1490228753Smm return (err); 1491228753Smm 1492228753Smm /* 1493228753Smm * TODO: Parse global/default options into 'entry' struct here 1494228753Smm * before handling file-specific options. 1495228753Smm * 1496228753Smm * This design (parse standard header, then overwrite with pax 1497228753Smm * extended attribute data) usually works well, but isn't ideal; 1498228753Smm * it would be better to parse the pax extended attributes first 1499228753Smm * and then skip any fields in the standard header that were 1500228753Smm * defined in the pax header. 1501228753Smm */ 1502313571Smm err2 = pax_header(a, tar, entry, &tar->pax_header); 1503228753Smm err = err_combine(err, err2); 1504228753Smm tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1505228753Smm return (err); 1506228753Smm} 1507228753Smm 1508228753Smm 1509228753Smm/* 1510228753Smm * Parse a file header for a Posix "ustar" archive entry. This also 1511228753Smm * handles "pax" or "extended ustar" entries. 1512228753Smm */ 1513228753Smmstatic int 1514228753Smmheader_ustar(struct archive_read *a, struct tar *tar, 1515228753Smm struct archive_entry *entry, const void *h) 1516228753Smm{ 1517228753Smm const struct archive_entry_header_ustar *header; 1518228753Smm struct archive_string *as; 1519232153Smm int err = ARCHIVE_OK, r; 1520228753Smm 1521228753Smm header = (const struct archive_entry_header_ustar *)h; 1522228753Smm 1523228753Smm /* Copy name into an internal buffer to ensure null-termination. */ 1524228753Smm as = &(tar->entry_pathname); 1525228753Smm if (header->prefix[0]) { 1526228753Smm archive_strncpy(as, header->prefix, sizeof(header->prefix)); 1527228753Smm if (as->s[archive_strlen(as) - 1] != '/') 1528228753Smm archive_strappend_char(as, '/'); 1529228753Smm archive_strncat(as, header->name, sizeof(header->name)); 1530232153Smm } else { 1531228753Smm archive_strncpy(as, header->name, sizeof(header->name)); 1532232153Smm } 1533232153Smm if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), 1534232153Smm tar->sconv) != 0) { 1535232153Smm err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 1536232153Smm if (err == ARCHIVE_FATAL) 1537232153Smm return (err); 1538232153Smm } 1539228753Smm 1540228753Smm /* Handle rest of common fields. */ 1541232153Smm r = header_common(a, tar, entry, h); 1542232153Smm if (r == ARCHIVE_FATAL) 1543232153Smm return (r); 1544232153Smm if (r < err) 1545232153Smm err = r; 1546228753Smm 1547228753Smm /* Handle POSIX ustar fields. */ 1548232153Smm if (archive_entry_copy_uname_l(entry, 1549232153Smm header->uname, sizeof(header->uname), tar->sconv) != 0) { 1550232153Smm err = set_conversion_failed_error(a, tar->sconv, "Uname"); 1551232153Smm if (err == ARCHIVE_FATAL) 1552232153Smm return (err); 1553232153Smm } 1554228753Smm 1555232153Smm if (archive_entry_copy_gname_l(entry, 1556232153Smm header->gname, sizeof(header->gname), tar->sconv) != 0) { 1557232153Smm err = set_conversion_failed_error(a, tar->sconv, "Gname"); 1558232153Smm if (err == ARCHIVE_FATAL) 1559232153Smm return (err); 1560232153Smm } 1561228753Smm 1562228753Smm /* Parse out device numbers only for char and block specials. */ 1563228753Smm if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 1564238856Smm archive_entry_set_rdevmajor(entry, (dev_t) 1565228753Smm tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 1566238856Smm archive_entry_set_rdevminor(entry, (dev_t) 1567228753Smm tar_atol(header->rdevminor, sizeof(header->rdevminor))); 1568228753Smm } 1569228753Smm 1570228753Smm tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 1571228753Smm 1572232153Smm return (err); 1573228753Smm} 1574228753Smm 1575228753Smm 1576228753Smm/* 1577228753Smm * Parse the pax extended attributes record. 1578228753Smm * 1579228753Smm * Returns non-zero if there's an error in the data. 1580228753Smm */ 1581228753Smmstatic int 1582228753Smmpax_header(struct archive_read *a, struct tar *tar, 1583313571Smm struct archive_entry *entry, struct archive_string *in_as) 1584228753Smm{ 1585313571Smm size_t attr_length, l, line_length, value_length; 1586228753Smm char *p; 1587228753Smm char *key, *value; 1588232153Smm struct archive_string *as; 1589232153Smm struct archive_string_conv *sconv; 1590228753Smm int err, err2; 1591313571Smm char *attr = in_as->s; 1592228753Smm 1593313571Smm attr_length = in_as->length; 1594228753Smm tar->pax_hdrcharset_binary = 0; 1595228753Smm archive_string_empty(&(tar->entry_gname)); 1596228753Smm archive_string_empty(&(tar->entry_linkpath)); 1597228753Smm archive_string_empty(&(tar->entry_pathname)); 1598228753Smm archive_string_empty(&(tar->entry_pathname_override)); 1599228753Smm archive_string_empty(&(tar->entry_uname)); 1600228753Smm err = ARCHIVE_OK; 1601228753Smm while (attr_length > 0) { 1602228753Smm /* Parse decimal length field at start of line. */ 1603228753Smm line_length = 0; 1604228753Smm l = attr_length; 1605228753Smm p = attr; /* Record start of line. */ 1606228753Smm while (l>0) { 1607228753Smm if (*p == ' ') { 1608228753Smm p++; 1609228753Smm l--; 1610228753Smm break; 1611228753Smm } 1612228753Smm if (*p < '0' || *p > '9') { 1613228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1614228753Smm "Ignoring malformed pax extended attributes"); 1615228753Smm return (ARCHIVE_WARN); 1616228753Smm } 1617228753Smm line_length *= 10; 1618228753Smm line_length += *p - '0'; 1619228753Smm if (line_length > 999999) { 1620228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1621228753Smm "Rejecting pax extended attribute > 1MB"); 1622228753Smm return (ARCHIVE_WARN); 1623228753Smm } 1624228753Smm p++; 1625228753Smm l--; 1626228753Smm } 1627228753Smm 1628228753Smm /* 1629228753Smm * Parsed length must be no bigger than available data, 1630228753Smm * at least 1, and the last character of the line must 1631228753Smm * be '\n'. 1632228753Smm */ 1633228753Smm if (line_length > attr_length 1634228753Smm || line_length < 1 1635228753Smm || attr[line_length - 1] != '\n') 1636228753Smm { 1637228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1638228753Smm "Ignoring malformed pax extended attribute"); 1639228753Smm return (ARCHIVE_WARN); 1640228753Smm } 1641228753Smm 1642228753Smm /* Null-terminate the line. */ 1643228753Smm attr[line_length - 1] = '\0'; 1644228753Smm 1645228753Smm /* Find end of key and null terminate it. */ 1646228753Smm key = p; 1647228753Smm if (key[0] == '=') 1648228753Smm return (-1); 1649228753Smm while (*p && *p != '=') 1650228753Smm ++p; 1651228753Smm if (*p == '\0') { 1652228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1653228753Smm "Invalid pax extended attributes"); 1654228753Smm return (ARCHIVE_WARN); 1655228753Smm } 1656228753Smm *p = '\0'; 1657228753Smm 1658228753Smm value = p + 1; 1659228753Smm 1660313571Smm /* Some values may be binary data */ 1661313571Smm value_length = attr + line_length - 1 - value; 1662313571Smm 1663228753Smm /* Identify this attribute and set it in the entry. */ 1664313571Smm err2 = pax_attribute(a, tar, entry, key, value, value_length); 1665232153Smm if (err2 == ARCHIVE_FATAL) 1666232153Smm return (err2); 1667228753Smm err = err_combine(err, err2); 1668228753Smm 1669228753Smm /* Skip to next line */ 1670228753Smm attr += line_length; 1671228753Smm attr_length -= line_length; 1672228753Smm } 1673232153Smm 1674232153Smm /* 1675232153Smm * PAX format uses UTF-8 as default charset for its metadata 1676232153Smm * unless hdrcharset=BINARY is present in its header. 1677232153Smm * We apply the charset specified by the hdrcharset option only 1678232153Smm * when the hdrcharset attribute(in PAX header) is BINARY because 1679232153Smm * we respect the charset described in PAX header and BINARY also 1680232153Smm * means that metadata(filename,uname and gname) character-set 1681232153Smm * is unknown. 1682232153Smm */ 1683232153Smm if (tar->pax_hdrcharset_binary) 1684232153Smm sconv = tar->opt_sconv; 1685232153Smm else { 1686232153Smm sconv = archive_string_conversion_from_charset( 1687232153Smm &(a->archive), "UTF-8", 1); 1688232153Smm if (sconv == NULL) 1689232153Smm return (ARCHIVE_FATAL); 1690232153Smm if (tar->compat_2x) 1691232153Smm archive_string_conversion_set_opt(sconv, 1692232153Smm SCONV_SET_OPT_UTF8_LIBARCHIVE2X); 1693232153Smm } 1694232153Smm 1695228753Smm if (archive_strlen(&(tar->entry_gname)) > 0) { 1696232153Smm if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, 1697232153Smm archive_strlen(&(tar->entry_gname)), sconv) != 0) { 1698232153Smm err = set_conversion_failed_error(a, sconv, "Gname"); 1699232153Smm if (err == ARCHIVE_FATAL) 1700232153Smm return (err); 1701232153Smm /* Use a converted an original name. */ 1702232153Smm archive_entry_copy_gname(entry, tar->entry_gname.s); 1703228753Smm } 1704228753Smm } 1705228753Smm if (archive_strlen(&(tar->entry_linkpath)) > 0) { 1706232153Smm if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, 1707232153Smm archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { 1708232153Smm err = set_conversion_failed_error(a, sconv, "Linkname"); 1709232153Smm if (err == ARCHIVE_FATAL) 1710232153Smm return (err); 1711232153Smm /* Use a converted an original name. */ 1712232153Smm archive_entry_copy_link(entry, tar->entry_linkpath.s); 1713228753Smm } 1714228753Smm } 1715228753Smm /* 1716228753Smm * Some extensions (such as the GNU sparse file extensions) 1717228753Smm * deliberately store a synthetic name under the regular 'path' 1718228753Smm * attribute and the real file name under a different attribute. 1719228753Smm * Since we're supposed to not care about the order, we 1720228753Smm * have no choice but to store all of the various filenames 1721228753Smm * we find and figure it all out afterwards. This is the 1722228753Smm * figuring out part. 1723228753Smm */ 1724232153Smm as = NULL; 1725228753Smm if (archive_strlen(&(tar->entry_pathname_override)) > 0) 1726232153Smm as = &(tar->entry_pathname_override); 1727228753Smm else if (archive_strlen(&(tar->entry_pathname)) > 0) 1728232153Smm as = &(tar->entry_pathname); 1729232153Smm if (as != NULL) { 1730232153Smm if (archive_entry_copy_pathname_l(entry, as->s, 1731232153Smm archive_strlen(as), sconv) != 0) { 1732232153Smm err = set_conversion_failed_error(a, sconv, "Pathname"); 1733232153Smm if (err == ARCHIVE_FATAL) 1734232153Smm return (err); 1735232153Smm /* Use a converted an original name. */ 1736232153Smm archive_entry_copy_pathname(entry, as->s); 1737228753Smm } 1738228753Smm } 1739228753Smm if (archive_strlen(&(tar->entry_uname)) > 0) { 1740232153Smm if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, 1741232153Smm archive_strlen(&(tar->entry_uname)), sconv) != 0) { 1742232153Smm err = set_conversion_failed_error(a, sconv, "Uname"); 1743232153Smm if (err == ARCHIVE_FATAL) 1744232153Smm return (err); 1745232153Smm /* Use a converted an original name. */ 1746232153Smm archive_entry_copy_uname(entry, tar->entry_uname.s); 1747228753Smm } 1748228753Smm } 1749228753Smm return (err); 1750228753Smm} 1751228753Smm 1752228753Smmstatic int 1753228753Smmpax_attribute_xattr(struct archive_entry *entry, 1754302001Smm const char *name, const char *value) 1755228753Smm{ 1756228753Smm char *name_decoded; 1757228753Smm void *value_decoded; 1758228753Smm size_t value_len; 1759228753Smm 1760232153Smm if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) 1761228753Smm return 3; 1762228753Smm 1763228753Smm name += 17; 1764228753Smm 1765228753Smm /* URL-decode name */ 1766228753Smm name_decoded = url_decode(name); 1767228753Smm if (name_decoded == NULL) 1768228753Smm return 2; 1769228753Smm 1770228753Smm /* Base-64 decode value */ 1771228753Smm value_decoded = base64_decode(value, strlen(value), &value_len); 1772228753Smm if (value_decoded == NULL) { 1773228753Smm free(name_decoded); 1774228753Smm return 1; 1775228753Smm } 1776228753Smm 1777228753Smm archive_entry_xattr_add_entry(entry, name_decoded, 1778228753Smm value_decoded, value_len); 1779228753Smm 1780228753Smm free(name_decoded); 1781228753Smm free(value_decoded); 1782228753Smm return 0; 1783228753Smm} 1784228753Smm 1785313571Smmstatic int 1786313571Smmpax_attribute_schily_xattr(struct archive_entry *entry, 1787313571Smm const char *name, const char *value, size_t value_length) 1788313571Smm{ 1789313571Smm if (strlen(name) < 14 || (memcmp(name, "SCHILY.xattr.", 13)) != 0) 1790313571Smm return 1; 1791313571Smm 1792313571Smm name += 13; 1793313571Smm 1794313571Smm archive_entry_xattr_add_entry(entry, name, value, value_length); 1795313571Smm 1796313571Smm return 0; 1797313571Smm} 1798313571Smm 1799313571Smmstatic int 1800362134Smmpax_attribute_rht_security_selinux(struct archive_entry *entry, 1801362134Smm const char *value, size_t value_length) 1802362134Smm{ 1803362134Smm archive_entry_xattr_add_entry(entry, "security.selinux", 1804362134Smm value, value_length); 1805362134Smm 1806362134Smm return 0; 1807362134Smm} 1808362134Smm 1809362134Smmstatic int 1810313571Smmpax_attribute_acl(struct archive_read *a, struct tar *tar, 1811313571Smm struct archive_entry *entry, const char *value, int type) 1812313571Smm{ 1813313571Smm int r; 1814313571Smm const char* errstr; 1815313571Smm 1816313571Smm switch (type) { 1817313571Smm case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: 1818313571Smm errstr = "SCHILY.acl.access"; 1819313571Smm break; 1820313571Smm case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: 1821313571Smm errstr = "SCHILY.acl.default"; 1822313571Smm break; 1823313571Smm case ARCHIVE_ENTRY_ACL_TYPE_NFS4: 1824313571Smm errstr = "SCHILY.acl.ace"; 1825313571Smm break; 1826313571Smm default: 1827313571Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1828313571Smm "Unknown ACL type: %d", type); 1829313571Smm return(ARCHIVE_FATAL); 1830313571Smm } 1831313571Smm 1832313571Smm if (tar->sconv_acl == NULL) { 1833313571Smm tar->sconv_acl = 1834313571Smm archive_string_conversion_from_charset( 1835313571Smm &(a->archive), "UTF-8", 1); 1836313571Smm if (tar->sconv_acl == NULL) 1837313571Smm return (ARCHIVE_FATAL); 1838313571Smm } 1839313571Smm 1840313571Smm r = archive_acl_from_text_l(archive_entry_acl(entry), value, type, 1841313571Smm tar->sconv_acl); 1842313571Smm if (r != ARCHIVE_OK) { 1843313571Smm if (r == ARCHIVE_FATAL) { 1844313571Smm archive_set_error(&a->archive, ENOMEM, 1845313571Smm "%s %s", "Can't allocate memory for ", 1846313571Smm errstr); 1847313571Smm return (r); 1848313571Smm } 1849313571Smm archive_set_error(&a->archive, 1850313571Smm ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr); 1851313571Smm } 1852313571Smm return (r); 1853313571Smm} 1854313571Smm 1855228753Smm/* 1856228753Smm * Parse a single key=value attribute. key/value pointers are 1857228753Smm * assumed to point into reasonably long-lived storage. 1858228753Smm * 1859228753Smm * Note that POSIX reserves all-lowercase keywords. Vendor-specific 1860228753Smm * extensions should always have keywords of the form "VENDOR.attribute" 1861228753Smm * In particular, it's quite feasible to support many different 1862228753Smm * vendor extensions here. I'm using "LIBARCHIVE" for extensions 1863228753Smm * unique to this library. 1864228753Smm * 1865228753Smm * Investigate other vendor-specific extensions and see if 1866228753Smm * any of them look useful. 1867228753Smm */ 1868228753Smmstatic int 1869232153Smmpax_attribute(struct archive_read *a, struct tar *tar, 1870313571Smm struct archive_entry *entry, const char *key, const char *value, size_t value_length) 1871228753Smm{ 1872228753Smm int64_t s; 1873228753Smm long n; 1874232153Smm int err = ARCHIVE_OK, r; 1875228753Smm 1876238856Smm#ifndef __FreeBSD__ 1877238856Smm if (value == NULL) 1878238856Smm value = ""; /* Disable compiler warning; do not pass 1879238856Smm * NULL pointer to strlen(). */ 1880238856Smm#endif 1881228753Smm switch (key[0]) { 1882228753Smm case 'G': 1883306322Smm /* Reject GNU.sparse.* headers on non-regular files. */ 1884306322Smm if (strncmp(key, "GNU.sparse", 10) == 0 && 1885306322Smm !tar->sparse_allowed) { 1886306322Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1887306322Smm "Non-regular file cannot be sparse"); 1888306322Smm return (ARCHIVE_FATAL); 1889306322Smm } 1890306322Smm 1891228753Smm /* GNU "0.0" sparse pax format. */ 1892228753Smm if (strcmp(key, "GNU.sparse.numblocks") == 0) { 1893228753Smm tar->sparse_offset = -1; 1894228753Smm tar->sparse_numbytes = -1; 1895228753Smm tar->sparse_gnu_major = 0; 1896228753Smm tar->sparse_gnu_minor = 0; 1897228753Smm } 1898228753Smm if (strcmp(key, "GNU.sparse.offset") == 0) { 1899228753Smm tar->sparse_offset = tar_atol10(value, strlen(value)); 1900228753Smm if (tar->sparse_numbytes != -1) { 1901232153Smm if (gnu_add_sparse_entry(a, tar, 1902232153Smm tar->sparse_offset, tar->sparse_numbytes) 1903232153Smm != ARCHIVE_OK) 1904232153Smm return (ARCHIVE_FATAL); 1905228753Smm tar->sparse_offset = -1; 1906228753Smm tar->sparse_numbytes = -1; 1907228753Smm } 1908228753Smm } 1909228753Smm if (strcmp(key, "GNU.sparse.numbytes") == 0) { 1910228753Smm tar->sparse_numbytes = tar_atol10(value, strlen(value)); 1911228753Smm if (tar->sparse_numbytes != -1) { 1912232153Smm if (gnu_add_sparse_entry(a, tar, 1913232153Smm tar->sparse_offset, tar->sparse_numbytes) 1914232153Smm != ARCHIVE_OK) 1915232153Smm return (ARCHIVE_FATAL); 1916228753Smm tar->sparse_offset = -1; 1917228753Smm tar->sparse_numbytes = -1; 1918228753Smm } 1919228753Smm } 1920228753Smm if (strcmp(key, "GNU.sparse.size") == 0) { 1921228753Smm tar->realsize = tar_atol10(value, strlen(value)); 1922228753Smm archive_entry_set_size(entry, tar->realsize); 1923316338Smm tar->realsize_override = 1; 1924228753Smm } 1925228753Smm 1926228753Smm /* GNU "0.1" sparse pax format. */ 1927228753Smm if (strcmp(key, "GNU.sparse.map") == 0) { 1928228753Smm tar->sparse_gnu_major = 0; 1929228753Smm tar->sparse_gnu_minor = 1; 1930232153Smm if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) 1931228753Smm return (ARCHIVE_WARN); 1932228753Smm } 1933228753Smm 1934228753Smm /* GNU "1.0" sparse pax format */ 1935228753Smm if (strcmp(key, "GNU.sparse.major") == 0) { 1936238856Smm tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); 1937228753Smm tar->sparse_gnu_pending = 1; 1938228753Smm } 1939228753Smm if (strcmp(key, "GNU.sparse.minor") == 0) { 1940238856Smm tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); 1941228753Smm tar->sparse_gnu_pending = 1; 1942228753Smm } 1943228753Smm if (strcmp(key, "GNU.sparse.name") == 0) { 1944228753Smm /* 1945228753Smm * The real filename; when storing sparse 1946228753Smm * files, GNU tar puts a synthesized name into 1947228753Smm * the regular 'path' attribute in an attempt 1948228753Smm * to limit confusion. ;-) 1949228753Smm */ 1950228753Smm archive_strcpy(&(tar->entry_pathname_override), value); 1951228753Smm } 1952228753Smm if (strcmp(key, "GNU.sparse.realsize") == 0) { 1953228753Smm tar->realsize = tar_atol10(value, strlen(value)); 1954228753Smm archive_entry_set_size(entry, tar->realsize); 1955316338Smm tar->realsize_override = 1; 1956228753Smm } 1957228753Smm break; 1958228753Smm case 'L': 1959228753Smm /* Our extensions */ 1960228753Smm/* TODO: Handle arbitrary extended attributes... */ 1961228753Smm/* 1962232153Smm if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) 1963228753Smm archive_entry_set_xxxxxx(entry, value); 1964228753Smm*/ 1965232153Smm if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { 1966228753Smm pax_time(value, &s, &n); 1967228753Smm archive_entry_set_birthtime(entry, s, n); 1968228753Smm } 1969348608Smm if (strcmp(key, "LIBARCHIVE.symlinktype") == 0) { 1970348608Smm if (strcmp(value, "file") == 0) { 1971348608Smm archive_entry_set_symlink_type(entry, 1972348608Smm AE_SYMLINK_TYPE_FILE); 1973348608Smm } else if (strcmp(value, "dir") == 0) { 1974348608Smm archive_entry_set_symlink_type(entry, 1975348608Smm AE_SYMLINK_TYPE_DIRECTORY); 1976348608Smm } 1977348608Smm } 1978232153Smm if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) 1979228753Smm pax_attribute_xattr(entry, key, value); 1980228753Smm break; 1981362134Smm case 'R': 1982362134Smm /* GNU tar uses RHT.security header to store SELinux xattrs 1983362134Smm * SCHILY.xattr.security.selinux == RHT.security.selinux */ 1984362134Smm if (strcmp(key, "RHT.security.selinux") == 0) { 1985362134Smm pax_attribute_rht_security_selinux(entry, value, 1986362134Smm value_length); 1987362134Smm } 1988362134Smm break; 1989228753Smm case 'S': 1990228753Smm /* We support some keys used by the "star" archiver */ 1991232153Smm if (strcmp(key, "SCHILY.acl.access") == 0) { 1992313571Smm r = pax_attribute_acl(a, tar, entry, value, 1993313571Smm ARCHIVE_ENTRY_ACL_TYPE_ACCESS); 1994313571Smm if (r == ARCHIVE_FATAL) 1995313571Smm return (r); 1996232153Smm } else if (strcmp(key, "SCHILY.acl.default") == 0) { 1997313571Smm r = pax_attribute_acl(a, tar, entry, value, 1998313571Smm ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); 1999313571Smm if (r == ARCHIVE_FATAL) 2000313571Smm return (r); 2001313571Smm } else if (strcmp(key, "SCHILY.acl.ace") == 0) { 2002313571Smm r = pax_attribute_acl(a, tar, entry, value, 2003313571Smm ARCHIVE_ENTRY_ACL_TYPE_NFS4); 2004313571Smm if (r == ARCHIVE_FATAL) 2005313571Smm return (r); 2006232153Smm } else if (strcmp(key, "SCHILY.devmajor") == 0) { 2007228753Smm archive_entry_set_rdevmajor(entry, 2008238856Smm (dev_t)tar_atol10(value, strlen(value))); 2009232153Smm } else if (strcmp(key, "SCHILY.devminor") == 0) { 2010228753Smm archive_entry_set_rdevminor(entry, 2011238856Smm (dev_t)tar_atol10(value, strlen(value))); 2012232153Smm } else if (strcmp(key, "SCHILY.fflags") == 0) { 2013228753Smm archive_entry_copy_fflags_text(entry, value); 2014232153Smm } else if (strcmp(key, "SCHILY.dev") == 0) { 2015228753Smm archive_entry_set_dev(entry, 2016238856Smm (dev_t)tar_atol10(value, strlen(value))); 2017232153Smm } else if (strcmp(key, "SCHILY.ino") == 0) { 2018228753Smm archive_entry_set_ino(entry, 2019228753Smm tar_atol10(value, strlen(value))); 2020232153Smm } else if (strcmp(key, "SCHILY.nlink") == 0) { 2021238856Smm archive_entry_set_nlink(entry, (unsigned) 2022228753Smm tar_atol10(value, strlen(value))); 2023232153Smm } else if (strcmp(key, "SCHILY.realsize") == 0) { 2024228753Smm tar->realsize = tar_atol10(value, strlen(value)); 2025316338Smm tar->realsize_override = 1; 2026228753Smm archive_entry_set_size(entry, tar->realsize); 2027313571Smm } else if (strncmp(key, "SCHILY.xattr.", 13) == 0) { 2028313571Smm pax_attribute_schily_xattr(entry, key, value, 2029313571Smm value_length); 2030232153Smm } else if (strcmp(key, "SUN.holesdata") == 0) { 2031232153Smm /* A Solaris extension for sparse. */ 2032232153Smm r = solaris_sparse_parse(a, tar, entry, value); 2033232153Smm if (r < err) { 2034232153Smm if (r == ARCHIVE_FATAL) 2035232153Smm return (r); 2036232153Smm err = r; 2037232153Smm archive_set_error(&a->archive, 2038232153Smm ARCHIVE_ERRNO_MISC, 2039232153Smm "Parse error: SUN.holesdata"); 2040232153Smm } 2041228753Smm } 2042228753Smm break; 2043228753Smm case 'a': 2044232153Smm if (strcmp(key, "atime") == 0) { 2045228753Smm pax_time(value, &s, &n); 2046228753Smm archive_entry_set_atime(entry, s, n); 2047228753Smm } 2048228753Smm break; 2049228753Smm case 'c': 2050232153Smm if (strcmp(key, "ctime") == 0) { 2051228753Smm pax_time(value, &s, &n); 2052228753Smm archive_entry_set_ctime(entry, s, n); 2053232153Smm } else if (strcmp(key, "charset") == 0) { 2054228753Smm /* TODO: Publish charset information in entry. */ 2055232153Smm } else if (strcmp(key, "comment") == 0) { 2056228753Smm /* TODO: Publish comment in entry. */ 2057228753Smm } 2058228753Smm break; 2059228753Smm case 'g': 2060232153Smm if (strcmp(key, "gid") == 0) { 2061228753Smm archive_entry_set_gid(entry, 2062228753Smm tar_atol10(value, strlen(value))); 2063232153Smm } else if (strcmp(key, "gname") == 0) { 2064228753Smm archive_strcpy(&(tar->entry_gname), value); 2065228753Smm } 2066228753Smm break; 2067228753Smm case 'h': 2068228753Smm if (strcmp(key, "hdrcharset") == 0) { 2069228753Smm if (strcmp(value, "BINARY") == 0) 2070232153Smm /* Binary mode. */ 2071228753Smm tar->pax_hdrcharset_binary = 1; 2072228753Smm else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) 2073228753Smm tar->pax_hdrcharset_binary = 0; 2074228753Smm } 2075228753Smm break; 2076228753Smm case 'l': 2077228753Smm /* pax interchange doesn't distinguish hardlink vs. symlink. */ 2078232153Smm if (strcmp(key, "linkpath") == 0) { 2079228753Smm archive_strcpy(&(tar->entry_linkpath), value); 2080228753Smm } 2081228753Smm break; 2082228753Smm case 'm': 2083232153Smm if (strcmp(key, "mtime") == 0) { 2084228753Smm pax_time(value, &s, &n); 2085228753Smm archive_entry_set_mtime(entry, s, n); 2086228753Smm } 2087228753Smm break; 2088228753Smm case 'p': 2089232153Smm if (strcmp(key, "path") == 0) { 2090228753Smm archive_strcpy(&(tar->entry_pathname), value); 2091228753Smm } 2092228753Smm break; 2093228753Smm case 'r': 2094228753Smm /* POSIX has reserved 'realtime.*' */ 2095228753Smm break; 2096228753Smm case 's': 2097228753Smm /* POSIX has reserved 'security.*' */ 2098232153Smm /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ 2099232153Smm if (strcmp(key, "size") == 0) { 2100228753Smm /* "size" is the size of the data in the entry. */ 2101228753Smm tar->entry_bytes_remaining 2102228753Smm = tar_atol10(value, strlen(value)); 2103228753Smm /* 2104316338Smm * The "size" pax header keyword always overrides the 2105316338Smm * "size" field in the tar header. 2106316338Smm * GNU.sparse.realsize, GNU.sparse.size and 2107316338Smm * SCHILY.realsize override this value. 2108228753Smm */ 2109316338Smm if (!tar->realsize_override) { 2110228753Smm archive_entry_set_size(entry, 2111228753Smm tar->entry_bytes_remaining); 2112228753Smm tar->realsize 2113228753Smm = tar->entry_bytes_remaining; 2114228753Smm } 2115228753Smm } 2116228753Smm break; 2117228753Smm case 'u': 2118232153Smm if (strcmp(key, "uid") == 0) { 2119228753Smm archive_entry_set_uid(entry, 2120228753Smm tar_atol10(value, strlen(value))); 2121232153Smm } else if (strcmp(key, "uname") == 0) { 2122228753Smm archive_strcpy(&(tar->entry_uname), value); 2123228753Smm } 2124228753Smm break; 2125228753Smm } 2126232153Smm return (err); 2127228753Smm} 2128228753Smm 2129228753Smm 2130228753Smm 2131228753Smm/* 2132228753Smm * parse a decimal time value, which may include a fractional portion 2133228753Smm */ 2134228753Smmstatic void 2135228753Smmpax_time(const char *p, int64_t *ps, long *pn) 2136228753Smm{ 2137228753Smm char digit; 2138228753Smm int64_t s; 2139228753Smm unsigned long l; 2140228753Smm int sign; 2141228753Smm int64_t limit, last_digit_limit; 2142228753Smm 2143228753Smm limit = INT64_MAX / 10; 2144228753Smm last_digit_limit = INT64_MAX % 10; 2145228753Smm 2146228753Smm s = 0; 2147228753Smm sign = 1; 2148228753Smm if (*p == '-') { 2149228753Smm sign = -1; 2150228753Smm p++; 2151228753Smm } 2152228753Smm while (*p >= '0' && *p <= '9') { 2153228753Smm digit = *p - '0'; 2154228753Smm if (s > limit || 2155228753Smm (s == limit && digit > last_digit_limit)) { 2156228753Smm s = INT64_MAX; 2157228753Smm break; 2158228753Smm } 2159228753Smm s = (s * 10) + digit; 2160228753Smm ++p; 2161228753Smm } 2162228753Smm 2163228753Smm *ps = s * sign; 2164228753Smm 2165228753Smm /* Calculate nanoseconds. */ 2166228753Smm *pn = 0; 2167228753Smm 2168228753Smm if (*p != '.') 2169228753Smm return; 2170228753Smm 2171228753Smm l = 100000000UL; 2172228753Smm do { 2173228753Smm ++p; 2174228753Smm if (*p >= '0' && *p <= '9') 2175228753Smm *pn += (*p - '0') * l; 2176228753Smm else 2177228753Smm break; 2178228753Smm } while (l /= 10); 2179228753Smm} 2180228753Smm 2181228753Smm/* 2182228753Smm * Parse GNU tar header 2183228753Smm */ 2184228753Smmstatic int 2185228753Smmheader_gnutar(struct archive_read *a, struct tar *tar, 2186232153Smm struct archive_entry *entry, const void *h, size_t *unconsumed) 2187228753Smm{ 2188228753Smm const struct archive_entry_header_gnutar *header; 2189232153Smm int64_t t; 2190232153Smm int err = ARCHIVE_OK; 2191228753Smm 2192228753Smm /* 2193228753Smm * GNU header is like POSIX ustar, except 'prefix' is 2194228753Smm * replaced with some other fields. This also means the 2195228753Smm * filename is stored as in old-style archives. 2196228753Smm */ 2197228753Smm 2198228753Smm /* Grab fields common to all tar variants. */ 2199232153Smm err = header_common(a, tar, entry, h); 2200232153Smm if (err == ARCHIVE_FATAL) 2201232153Smm return (err); 2202228753Smm 2203228753Smm /* Copy filename over (to ensure null termination). */ 2204228753Smm header = (const struct archive_entry_header_gnutar *)h; 2205232153Smm if (archive_entry_copy_pathname_l(entry, 2206232153Smm header->name, sizeof(header->name), tar->sconv) != 0) { 2207232153Smm err = set_conversion_failed_error(a, tar->sconv, "Pathname"); 2208232153Smm if (err == ARCHIVE_FATAL) 2209232153Smm return (err); 2210232153Smm } 2211228753Smm 2212228753Smm /* Fields common to ustar and GNU */ 2213228753Smm /* XXX Can the following be factored out since it's common 2214228753Smm * to ustar and gnu tar? Is it okay to move it down into 2215228753Smm * header_common, perhaps? */ 2216232153Smm if (archive_entry_copy_uname_l(entry, 2217232153Smm header->uname, sizeof(header->uname), tar->sconv) != 0) { 2218232153Smm err = set_conversion_failed_error(a, tar->sconv, "Uname"); 2219232153Smm if (err == ARCHIVE_FATAL) 2220232153Smm return (err); 2221232153Smm } 2222228753Smm 2223232153Smm if (archive_entry_copy_gname_l(entry, 2224232153Smm header->gname, sizeof(header->gname), tar->sconv) != 0) { 2225232153Smm err = set_conversion_failed_error(a, tar->sconv, "Gname"); 2226232153Smm if (err == ARCHIVE_FATAL) 2227232153Smm return (err); 2228232153Smm } 2229228753Smm 2230228753Smm /* Parse out device numbers only for char and block specials */ 2231228753Smm if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { 2232238856Smm archive_entry_set_rdevmajor(entry, (dev_t) 2233228753Smm tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); 2234238856Smm archive_entry_set_rdevminor(entry, (dev_t) 2235228753Smm tar_atol(header->rdevminor, sizeof(header->rdevminor))); 2236228753Smm } else 2237228753Smm archive_entry_set_rdev(entry, 0); 2238228753Smm 2239228753Smm tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); 2240228753Smm 2241228753Smm /* Grab GNU-specific fields. */ 2242232153Smm t = tar_atol(header->atime, sizeof(header->atime)); 2243232153Smm if (t > 0) 2244232153Smm archive_entry_set_atime(entry, t, 0); 2245232153Smm t = tar_atol(header->ctime, sizeof(header->ctime)); 2246232153Smm if (t > 0) 2247232153Smm archive_entry_set_ctime(entry, t, 0); 2248232153Smm 2249228753Smm if (header->realsize[0] != 0) { 2250228753Smm tar->realsize 2251228753Smm = tar_atol(header->realsize, sizeof(header->realsize)); 2252228753Smm archive_entry_set_size(entry, tar->realsize); 2253316338Smm tar->realsize_override = 1; 2254228753Smm } 2255228753Smm 2256228753Smm if (header->sparse[0].offset[0] != 0) { 2257232153Smm if (gnu_sparse_old_read(a, tar, header, unconsumed) 2258232153Smm != ARCHIVE_OK) 2259232153Smm return (ARCHIVE_FATAL); 2260228753Smm } else { 2261228753Smm if (header->isextended[0] != 0) { 2262228753Smm /* XXX WTF? XXX */ 2263228753Smm } 2264228753Smm } 2265228753Smm 2266232153Smm return (err); 2267228753Smm} 2268228753Smm 2269232153Smmstatic int 2270232153Smmgnu_add_sparse_entry(struct archive_read *a, struct tar *tar, 2271232153Smm int64_t offset, int64_t remaining) 2272228753Smm{ 2273228753Smm struct sparse_block *p; 2274228753Smm 2275311042Smm p = (struct sparse_block *)calloc(1, sizeof(*p)); 2276232153Smm if (p == NULL) { 2277232153Smm archive_set_error(&a->archive, ENOMEM, "Out of memory"); 2278232153Smm return (ARCHIVE_FATAL); 2279232153Smm } 2280228753Smm if (tar->sparse_last != NULL) 2281228753Smm tar->sparse_last->next = p; 2282228753Smm else 2283228753Smm tar->sparse_list = p; 2284228753Smm tar->sparse_last = p; 2285324418Smm if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) { 2286302001Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); 2287302001Smm return (ARCHIVE_FATAL); 2288302001Smm } 2289228753Smm p->offset = offset; 2290228753Smm p->remaining = remaining; 2291232153Smm return (ARCHIVE_OK); 2292228753Smm} 2293228753Smm 2294228753Smmstatic void 2295228753Smmgnu_clear_sparse_list(struct tar *tar) 2296228753Smm{ 2297228753Smm struct sparse_block *p; 2298228753Smm 2299228753Smm while (tar->sparse_list != NULL) { 2300228753Smm p = tar->sparse_list; 2301228753Smm tar->sparse_list = p->next; 2302228753Smm free(p); 2303228753Smm } 2304228753Smm tar->sparse_last = NULL; 2305228753Smm} 2306228753Smm 2307228753Smm/* 2308228753Smm * GNU tar old-format sparse data. 2309228753Smm * 2310228753Smm * GNU old-format sparse data is stored in a fixed-field 2311228753Smm * format. Offset/size values are 11-byte octal fields (same 2312228753Smm * format as 'size' field in ustart header). These are 2313228753Smm * stored in the header, allocating subsequent header blocks 2314228753Smm * as needed. Extending the header in this way is a pretty 2315228753Smm * severe POSIX violation; this design has earned GNU tar a 2316228753Smm * lot of criticism. 2317228753Smm */ 2318228753Smm 2319228753Smmstatic int 2320228753Smmgnu_sparse_old_read(struct archive_read *a, struct tar *tar, 2321232153Smm const struct archive_entry_header_gnutar *header, size_t *unconsumed) 2322228753Smm{ 2323228753Smm ssize_t bytes_read; 2324228753Smm const void *data; 2325228753Smm struct extended { 2326228753Smm struct gnu_sparse sparse[21]; 2327228753Smm char isextended[1]; 2328228753Smm char padding[7]; 2329228753Smm }; 2330228753Smm const struct extended *ext; 2331228753Smm 2332232153Smm if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) 2333232153Smm return (ARCHIVE_FATAL); 2334228753Smm if (header->isextended[0] == 0) 2335228753Smm return (ARCHIVE_OK); 2336228753Smm 2337228753Smm do { 2338232153Smm tar_flush_unconsumed(a, unconsumed); 2339228753Smm data = __archive_read_ahead(a, 512, &bytes_read); 2340228753Smm if (bytes_read < 0) 2341228753Smm return (ARCHIVE_FATAL); 2342228753Smm if (bytes_read < 512) { 2343228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2344228753Smm "Truncated tar archive " 2345228753Smm "detected while reading sparse file data"); 2346228753Smm return (ARCHIVE_FATAL); 2347228753Smm } 2348232153Smm *unconsumed = 512; 2349228753Smm ext = (const struct extended *)data; 2350232153Smm if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) 2351232153Smm return (ARCHIVE_FATAL); 2352228753Smm } while (ext->isextended[0] != 0); 2353228753Smm if (tar->sparse_list != NULL) 2354228753Smm tar->entry_offset = tar->sparse_list->offset; 2355228753Smm return (ARCHIVE_OK); 2356228753Smm} 2357228753Smm 2358232153Smmstatic int 2359232153Smmgnu_sparse_old_parse(struct archive_read *a, struct tar *tar, 2360228753Smm const struct gnu_sparse *sparse, int length) 2361228753Smm{ 2362228753Smm while (length > 0 && sparse->offset[0] != 0) { 2363232153Smm if (gnu_add_sparse_entry(a, tar, 2364228753Smm tar_atol(sparse->offset, sizeof(sparse->offset)), 2365232153Smm tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) 2366232153Smm != ARCHIVE_OK) 2367232153Smm return (ARCHIVE_FATAL); 2368228753Smm sparse++; 2369228753Smm length--; 2370228753Smm } 2371232153Smm return (ARCHIVE_OK); 2372228753Smm} 2373228753Smm 2374228753Smm/* 2375228753Smm * GNU tar sparse format 0.0 2376228753Smm * 2377228753Smm * Beginning with GNU tar 1.15, sparse files are stored using 2378228753Smm * information in the pax extended header. The GNU tar maintainers 2379228753Smm * have gone through a number of variations in the process of working 2380232153Smm * out this scheme; fortunately, they're all numbered. 2381228753Smm * 2382228753Smm * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the 2383228753Smm * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to 2384228753Smm * store offset/size for each block. The repeated instances of these 2385228753Smm * latter fields violate the pax specification (which frowns on 2386228753Smm * duplicate keys), so this format was quickly replaced. 2387228753Smm */ 2388228753Smm 2389228753Smm/* 2390228753Smm * GNU tar sparse format 0.1 2391228753Smm * 2392228753Smm * This version replaced the offset/numbytes attributes with 2393228753Smm * a single "map" attribute that stored a list of integers. This 2394228753Smm * format had two problems: First, the "map" attribute could be very 2395228753Smm * long, which caused problems for some implementations. More 2396228753Smm * importantly, the sparse data was lost when extracted by archivers 2397228753Smm * that didn't recognize this extension. 2398228753Smm */ 2399228753Smm 2400228753Smmstatic int 2401232153Smmgnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) 2402228753Smm{ 2403228753Smm const char *e; 2404232153Smm int64_t offset = -1, size = -1; 2405228753Smm 2406228753Smm for (;;) { 2407228753Smm e = p; 2408228753Smm while (*e != '\0' && *e != ',') { 2409228753Smm if (*e < '0' || *e > '9') 2410228753Smm return (ARCHIVE_WARN); 2411228753Smm e++; 2412228753Smm } 2413228753Smm if (offset < 0) { 2414228753Smm offset = tar_atol10(p, e - p); 2415228753Smm if (offset < 0) 2416228753Smm return (ARCHIVE_WARN); 2417228753Smm } else { 2418228753Smm size = tar_atol10(p, e - p); 2419228753Smm if (size < 0) 2420228753Smm return (ARCHIVE_WARN); 2421232153Smm if (gnu_add_sparse_entry(a, tar, offset, size) 2422232153Smm != ARCHIVE_OK) 2423232153Smm return (ARCHIVE_FATAL); 2424228753Smm offset = -1; 2425228753Smm } 2426228753Smm if (*e == '\0') 2427228753Smm return (ARCHIVE_OK); 2428228753Smm p = e + 1; 2429228753Smm } 2430228753Smm} 2431228753Smm 2432228753Smm/* 2433228753Smm * GNU tar sparse format 1.0 2434228753Smm * 2435228753Smm * The idea: The offset/size data is stored as a series of base-10 2436228753Smm * ASCII numbers prepended to the file data, so that dearchivers that 2437228753Smm * don't support this format will extract the block map along with the 2438228753Smm * data and a separate post-process can restore the sparseness. 2439228753Smm * 2440228753Smm * Unfortunately, GNU tar 1.16 had a bug that added unnecessary 2441228753Smm * padding to the body of the file when using this format. GNU tar 2442228753Smm * 1.17 corrected this bug without bumping the version number, so 2443228753Smm * it's not possible to support both variants. This code supports 2444228753Smm * the later variant at the expense of not supporting the former. 2445228753Smm * 2446228753Smm * This variant also replaced GNU.sparse.size with GNU.sparse.realsize 2447228753Smm * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. 2448228753Smm */ 2449228753Smm 2450228753Smm/* 2451228753Smm * Read the next line from the input, and parse it as a decimal 2452228753Smm * integer followed by '\n'. Returns positive integer value or 2453228753Smm * negative on error. 2454228753Smm */ 2455228753Smmstatic int64_t 2456228753Smmgnu_sparse_10_atol(struct archive_read *a, struct tar *tar, 2457232153Smm int64_t *remaining, size_t *unconsumed) 2458228753Smm{ 2459228753Smm int64_t l, limit, last_digit_limit; 2460228753Smm const char *p; 2461228753Smm ssize_t bytes_read; 2462228753Smm int base, digit; 2463228753Smm 2464228753Smm base = 10; 2465228753Smm limit = INT64_MAX / base; 2466228753Smm last_digit_limit = INT64_MAX % base; 2467228753Smm 2468228753Smm /* 2469228753Smm * Skip any lines starting with '#'; GNU tar specs 2470228753Smm * don't require this, but they should. 2471228753Smm */ 2472228753Smm do { 2473238856Smm bytes_read = readline(a, tar, &p, 2474238856Smm (ssize_t)tar_min(*remaining, 100), unconsumed); 2475228753Smm if (bytes_read <= 0) 2476228753Smm return (ARCHIVE_FATAL); 2477228753Smm *remaining -= bytes_read; 2478228753Smm } while (p[0] == '#'); 2479228753Smm 2480228753Smm l = 0; 2481228753Smm while (bytes_read > 0) { 2482228753Smm if (*p == '\n') 2483228753Smm return (l); 2484228753Smm if (*p < '0' || *p >= '0' + base) 2485228753Smm return (ARCHIVE_WARN); 2486228753Smm digit = *p - '0'; 2487228753Smm if (l > limit || (l == limit && digit > last_digit_limit)) 2488228753Smm l = INT64_MAX; /* Truncate on overflow. */ 2489228753Smm else 2490228753Smm l = (l * base) + digit; 2491228753Smm p++; 2492228753Smm bytes_read--; 2493228753Smm } 2494228753Smm /* TODO: Error message. */ 2495228753Smm return (ARCHIVE_WARN); 2496228753Smm} 2497228753Smm 2498228753Smm/* 2499228753Smm * Returns length (in bytes) of the sparse data description 2500228753Smm * that was read. 2501228753Smm */ 2502228753Smmstatic ssize_t 2503232153Smmgnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) 2504228753Smm{ 2505232153Smm ssize_t bytes_read; 2506228753Smm int entries; 2507232153Smm int64_t offset, size, to_skip, remaining; 2508228753Smm 2509228753Smm /* Clear out the existing sparse list. */ 2510228753Smm gnu_clear_sparse_list(tar); 2511228753Smm 2512228753Smm remaining = tar->entry_bytes_remaining; 2513228753Smm 2514228753Smm /* Parse entries. */ 2515238856Smm entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2516228753Smm if (entries < 0) 2517228753Smm return (ARCHIVE_FATAL); 2518228753Smm /* Parse the individual entries. */ 2519228753Smm while (entries-- > 0) { 2520228753Smm /* Parse offset/size */ 2521232153Smm offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2522228753Smm if (offset < 0) 2523228753Smm return (ARCHIVE_FATAL); 2524232153Smm size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); 2525228753Smm if (size < 0) 2526228753Smm return (ARCHIVE_FATAL); 2527228753Smm /* Add a new sparse entry. */ 2528232153Smm if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) 2529232153Smm return (ARCHIVE_FATAL); 2530228753Smm } 2531228753Smm /* Skip rest of block... */ 2532232153Smm tar_flush_unconsumed(a, unconsumed); 2533238856Smm bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); 2534228753Smm to_skip = 0x1ff & -bytes_read; 2535315433Smm /* Fail if tar->entry_bytes_remaing would get negative */ 2536315433Smm if (to_skip > remaining) 2537315433Smm return (ARCHIVE_FATAL); 2538232153Smm if (to_skip != __archive_read_consume(a, to_skip)) 2539228753Smm return (ARCHIVE_FATAL); 2540238856Smm return ((ssize_t)(bytes_read + to_skip)); 2541228753Smm} 2542228753Smm 2543232153Smm/* 2544232153Smm * Solaris pax extension for a sparse file. This is recorded with the 2545232153Smm * data and hole pairs. The way recording sparse information by Solaris' 2546232153Smm * pax simply indicates where data and sparse are, so the stored contents 2547232153Smm * consist of both data and hole. 2548232153Smm */ 2549232153Smmstatic int 2550232153Smmsolaris_sparse_parse(struct archive_read *a, struct tar *tar, 2551232153Smm struct archive_entry *entry, const char *p) 2552232153Smm{ 2553232153Smm const char *e; 2554232153Smm int64_t start, end; 2555232153Smm int hole = 1; 2556232153Smm 2557232153Smm (void)entry; /* UNUSED */ 2558232153Smm 2559232153Smm end = 0; 2560232153Smm if (*p == ' ') 2561232153Smm p++; 2562232153Smm else 2563232153Smm return (ARCHIVE_WARN); 2564232153Smm for (;;) { 2565232153Smm e = p; 2566232153Smm while (*e != '\0' && *e != ' ') { 2567232153Smm if (*e < '0' || *e > '9') 2568232153Smm return (ARCHIVE_WARN); 2569232153Smm e++; 2570232153Smm } 2571232153Smm start = end; 2572232153Smm end = tar_atol10(p, e - p); 2573232153Smm if (end < 0) 2574232153Smm return (ARCHIVE_WARN); 2575232153Smm if (start < end) { 2576232153Smm if (gnu_add_sparse_entry(a, tar, start, 2577232153Smm end - start) != ARCHIVE_OK) 2578232153Smm return (ARCHIVE_FATAL); 2579232153Smm tar->sparse_last->hole = hole; 2580232153Smm } 2581232153Smm if (*e == '\0') 2582232153Smm return (ARCHIVE_OK); 2583232153Smm p = e + 1; 2584232153Smm hole = hole == 0; 2585232153Smm } 2586232153Smm} 2587232153Smm 2588228753Smm/*- 2589228753Smm * Convert text->integer. 2590228753Smm * 2591228753Smm * Traditional tar formats (including POSIX) specify base-8 for 2592228753Smm * all of the standard numeric fields. This is a significant limitation 2593228753Smm * in practice: 2594228753Smm * = file size is limited to 8GB 2595228753Smm * = rdevmajor and rdevminor are limited to 21 bits 2596228753Smm * = uid/gid are limited to 21 bits 2597228753Smm * 2598228753Smm * There are two workarounds for this: 2599228753Smm * = pax extended headers, which use variable-length string fields 2600228753Smm * = GNU tar and STAR both allow either base-8 or base-256 in 2601228753Smm * most fields. The high bit is set to indicate base-256. 2602228753Smm * 2603228753Smm * On read, this implementation supports both extensions. 2604228753Smm */ 2605228753Smmstatic int64_t 2606248616Smmtar_atol(const char *p, size_t char_cnt) 2607228753Smm{ 2608228753Smm /* 2609228753Smm * Technically, GNU tar considers a field to be in base-256 2610228753Smm * only if the first byte is 0xff or 0x80. 2611228753Smm */ 2612228753Smm if (*p & 0x80) 2613228753Smm return (tar_atol256(p, char_cnt)); 2614228753Smm return (tar_atol8(p, char_cnt)); 2615228753Smm} 2616228753Smm 2617228753Smm/* 2618228753Smm * Note that this implementation does not (and should not!) obey 2619228753Smm * locale settings; you cannot simply substitute strtol here, since 2620228753Smm * it does obey locale. 2621228753Smm */ 2622228753Smmstatic int64_t 2623248616Smmtar_atol_base_n(const char *p, size_t char_cnt, int base) 2624228753Smm{ 2625302001Smm int64_t l, maxval, limit, last_digit_limit; 2626248616Smm int digit, sign; 2627228753Smm 2628302001Smm maxval = INT64_MAX; 2629228753Smm limit = INT64_MAX / base; 2630228753Smm last_digit_limit = INT64_MAX % base; 2631228753Smm 2632248616Smm /* the pointer will not be dereferenced if char_cnt is zero 2633311042Smm * due to the way the && operator is evaluated. 2634248616Smm */ 2635248616Smm while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { 2636228753Smm p++; 2637248616Smm char_cnt--; 2638248616Smm } 2639248616Smm 2640248616Smm sign = 1; 2641248616Smm if (char_cnt != 0 && *p == '-') { 2642228753Smm sign = -1; 2643228753Smm p++; 2644248616Smm char_cnt--; 2645302001Smm 2646302001Smm maxval = INT64_MIN; 2647302001Smm limit = -(INT64_MIN / base); 2648302001Smm last_digit_limit = INT64_MIN % base; 2649248616Smm } 2650228753Smm 2651228753Smm l = 0; 2652248616Smm if (char_cnt != 0) { 2653248616Smm digit = *p - '0'; 2654248616Smm while (digit >= 0 && digit < base && char_cnt != 0) { 2655248616Smm if (l>limit || (l == limit && digit > last_digit_limit)) { 2656302001Smm return maxval; /* Truncate on overflow. */ 2657248616Smm } 2658248616Smm l = (l * base) + digit; 2659248616Smm digit = *++p - '0'; 2660248616Smm char_cnt--; 2661228753Smm } 2662228753Smm } 2663228753Smm return (sign < 0) ? -l : l; 2664228753Smm} 2665228753Smm 2666228753Smmstatic int64_t 2667248616Smmtar_atol8(const char *p, size_t char_cnt) 2668228753Smm{ 2669248616Smm return tar_atol_base_n(p, char_cnt, 8); 2670248616Smm} 2671228753Smm 2672248616Smmstatic int64_t 2673248616Smmtar_atol10(const char *p, size_t char_cnt) 2674248616Smm{ 2675248616Smm return tar_atol_base_n(p, char_cnt, 10); 2676228753Smm} 2677228753Smm 2678228753Smm/* 2679302001Smm * Parse a base-256 integer. This is just a variable-length 2680302001Smm * twos-complement signed binary value in big-endian order, except 2681302001Smm * that the high-order bit is ignored. The values here can be up to 2682302001Smm * 12 bytes, so we need to be careful about overflowing 64-bit 2683302001Smm * (8-byte) integers. 2684302001Smm * 2685302001Smm * This code unashamedly assumes that the local machine uses 8-bit 2686302001Smm * bytes and twos-complement arithmetic. 2687228753Smm */ 2688228753Smmstatic int64_t 2689248616Smmtar_atol256(const char *_p, size_t char_cnt) 2690228753Smm{ 2691302001Smm uint64_t l; 2692228753Smm const unsigned char *p = (const unsigned char *)_p; 2693302001Smm unsigned char c, neg; 2694228753Smm 2695302001Smm /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ 2696302001Smm c = *p; 2697302001Smm if (c & 0x40) { 2698302001Smm neg = 0xff; 2699302001Smm c |= 0x80; 2700302001Smm l = ~ARCHIVE_LITERAL_ULL(0); 2701302001Smm } else { 2702302001Smm neg = 0; 2703302001Smm c &= 0x7f; 2704302001Smm l = 0; 2705302001Smm } 2706228753Smm 2707302001Smm /* If more than 8 bytes, check that we can ignore 2708302001Smm * high-order bits without overflow. */ 2709302001Smm while (char_cnt > sizeof(int64_t)) { 2710302001Smm --char_cnt; 2711302001Smm if (c != neg) 2712302001Smm return neg ? INT64_MIN : INT64_MAX; 2713302001Smm c = *++p; 2714302001Smm } 2715302001Smm 2716302001Smm /* c is first byte that fits; if sign mismatch, return overflow */ 2717302001Smm if ((c ^ neg) & 0x80) { 2718302001Smm return neg ? INT64_MIN : INT64_MAX; 2719302001Smm } 2720302001Smm 2721302001Smm /* Accumulate remaining bytes. */ 2722228753Smm while (--char_cnt > 0) { 2723302001Smm l = (l << 8) | c; 2724302001Smm c = *++p; 2725228753Smm } 2726302001Smm l = (l << 8) | c; 2727302001Smm /* Return signed twos-complement value. */ 2728302001Smm return (int64_t)(l); 2729228753Smm} 2730228753Smm 2731228753Smm/* 2732228753Smm * Returns length of line (including trailing newline) 2733228753Smm * or negative on error. 'start' argument is updated to 2734228753Smm * point to first character of line. This avoids copying 2735228753Smm * when possible. 2736228753Smm */ 2737228753Smmstatic ssize_t 2738228753Smmreadline(struct archive_read *a, struct tar *tar, const char **start, 2739232153Smm ssize_t limit, size_t *unconsumed) 2740228753Smm{ 2741228753Smm ssize_t bytes_read; 2742228753Smm ssize_t total_size = 0; 2743228753Smm const void *t; 2744228753Smm const char *s; 2745228753Smm void *p; 2746228753Smm 2747232153Smm tar_flush_unconsumed(a, unconsumed); 2748232153Smm 2749228753Smm t = __archive_read_ahead(a, 1, &bytes_read); 2750228753Smm if (bytes_read <= 0) 2751228753Smm return (ARCHIVE_FATAL); 2752228753Smm s = t; /* Start of line? */ 2753228753Smm p = memchr(t, '\n', bytes_read); 2754228753Smm /* If we found '\n' in the read buffer, return pointer to that. */ 2755228753Smm if (p != NULL) { 2756228753Smm bytes_read = 1 + ((const char *)p) - s; 2757228753Smm if (bytes_read > limit) { 2758228753Smm archive_set_error(&a->archive, 2759228753Smm ARCHIVE_ERRNO_FILE_FORMAT, 2760228753Smm "Line too long"); 2761228753Smm return (ARCHIVE_FATAL); 2762228753Smm } 2763232153Smm *unconsumed = bytes_read; 2764228753Smm *start = s; 2765228753Smm return (bytes_read); 2766228753Smm } 2767232153Smm *unconsumed = bytes_read; 2768228753Smm /* Otherwise, we need to accumulate in a line buffer. */ 2769228753Smm for (;;) { 2770228753Smm if (total_size + bytes_read > limit) { 2771228753Smm archive_set_error(&a->archive, 2772228753Smm ARCHIVE_ERRNO_FILE_FORMAT, 2773228753Smm "Line too long"); 2774228753Smm return (ARCHIVE_FATAL); 2775228753Smm } 2776228753Smm if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { 2777228753Smm archive_set_error(&a->archive, ENOMEM, 2778228753Smm "Can't allocate working buffer"); 2779228753Smm return (ARCHIVE_FATAL); 2780228753Smm } 2781228753Smm memcpy(tar->line.s + total_size, t, bytes_read); 2782232153Smm tar_flush_unconsumed(a, unconsumed); 2783228753Smm total_size += bytes_read; 2784228753Smm /* If we found '\n', clean up and return. */ 2785228753Smm if (p != NULL) { 2786228753Smm *start = tar->line.s; 2787228753Smm return (total_size); 2788228753Smm } 2789228753Smm /* Read some more. */ 2790228753Smm t = __archive_read_ahead(a, 1, &bytes_read); 2791228753Smm if (bytes_read <= 0) 2792228753Smm return (ARCHIVE_FATAL); 2793228753Smm s = t; /* Start of line? */ 2794228753Smm p = memchr(t, '\n', bytes_read); 2795228753Smm /* If we found '\n', trim the read. */ 2796228753Smm if (p != NULL) { 2797228753Smm bytes_read = 1 + ((const char *)p) - s; 2798228753Smm } 2799232153Smm *unconsumed = bytes_read; 2800228753Smm } 2801228753Smm} 2802228753Smm 2803228753Smm/* 2804228753Smm * base64_decode - Base64 decode 2805228753Smm * 2806228753Smm * This accepts most variations of base-64 encoding, including: 2807228753Smm * * with or without line breaks 2808228753Smm * * with or without the final group padded with '=' or '_' characters 2809228753Smm * (The most economical Base-64 variant does not pad the last group and 2810228753Smm * omits line breaks; RFC1341 used for MIME requires both.) 2811228753Smm */ 2812228753Smmstatic char * 2813228753Smmbase64_decode(const char *s, size_t len, size_t *out_len) 2814228753Smm{ 2815228753Smm static const unsigned char digits[64] = { 2816228753Smm 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 2817228753Smm 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 2818228753Smm 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 2819228753Smm 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', 2820228753Smm '4','5','6','7','8','9','+','/' }; 2821228753Smm static unsigned char decode_table[128]; 2822228753Smm char *out, *d; 2823228753Smm const unsigned char *src = (const unsigned char *)s; 2824228753Smm 2825228753Smm /* If the decode table is not yet initialized, prepare it. */ 2826228753Smm if (decode_table[digits[1]] != 1) { 2827228753Smm unsigned i; 2828228753Smm memset(decode_table, 0xff, sizeof(decode_table)); 2829228753Smm for (i = 0; i < sizeof(digits); i++) 2830228753Smm decode_table[digits[i]] = i; 2831228753Smm } 2832228753Smm 2833228753Smm /* Allocate enough space to hold the entire output. */ 2834228753Smm /* Note that we may not use all of this... */ 2835228753Smm out = (char *)malloc(len - len / 4 + 1); 2836228753Smm if (out == NULL) { 2837228753Smm *out_len = 0; 2838228753Smm return (NULL); 2839228753Smm } 2840228753Smm d = out; 2841228753Smm 2842228753Smm while (len > 0) { 2843228753Smm /* Collect the next group of (up to) four characters. */ 2844228753Smm int v = 0; 2845228753Smm int group_size = 0; 2846228753Smm while (group_size < 4 && len > 0) { 2847228753Smm /* '=' or '_' padding indicates final group. */ 2848228753Smm if (*src == '=' || *src == '_') { 2849228753Smm len = 0; 2850228753Smm break; 2851228753Smm } 2852228753Smm /* Skip illegal characters (including line breaks) */ 2853228753Smm if (*src > 127 || *src < 32 2854228753Smm || decode_table[*src] == 0xff) { 2855228753Smm len--; 2856228753Smm src++; 2857228753Smm continue; 2858228753Smm } 2859228753Smm v <<= 6; 2860228753Smm v |= decode_table[*src++]; 2861228753Smm len --; 2862228753Smm group_size++; 2863228753Smm } 2864228753Smm /* Align a short group properly. */ 2865228753Smm v <<= 6 * (4 - group_size); 2866228753Smm /* Unpack the group we just collected. */ 2867228753Smm switch (group_size) { 2868228753Smm case 4: d[2] = v & 0xff; 2869228753Smm /* FALLTHROUGH */ 2870228753Smm case 3: d[1] = (v >> 8) & 0xff; 2871228753Smm /* FALLTHROUGH */ 2872228753Smm case 2: d[0] = (v >> 16) & 0xff; 2873228753Smm break; 2874228753Smm case 1: /* this is invalid! */ 2875228753Smm break; 2876228753Smm } 2877228753Smm d += group_size * 3 / 4; 2878228753Smm } 2879228753Smm 2880228753Smm *out_len = d - out; 2881228753Smm return (out); 2882228753Smm} 2883228753Smm 2884228753Smmstatic char * 2885228753Smmurl_decode(const char *in) 2886228753Smm{ 2887228753Smm char *out, *d; 2888228753Smm const char *s; 2889228753Smm 2890228753Smm out = (char *)malloc(strlen(in) + 1); 2891228753Smm if (out == NULL) 2892228753Smm return (NULL); 2893228753Smm for (s = in, d = out; *s != '\0'; ) { 2894228753Smm if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { 2895228753Smm /* Try to convert % escape */ 2896228753Smm int digit1 = tohex(s[1]); 2897228753Smm int digit2 = tohex(s[2]); 2898228753Smm if (digit1 >= 0 && digit2 >= 0) { 2899228753Smm /* Looks good, consume three chars */ 2900228753Smm s += 3; 2901228753Smm /* Convert output */ 2902228753Smm *d++ = ((digit1 << 4) | digit2); 2903228753Smm continue; 2904228753Smm } 2905228753Smm /* Else fall through and treat '%' as normal char */ 2906228753Smm } 2907228753Smm *d++ = *s++; 2908228753Smm } 2909228753Smm *d = '\0'; 2910228753Smm return (out); 2911228753Smm} 2912228753Smm 2913228753Smmstatic int 2914228753Smmtohex(int c) 2915228753Smm{ 2916228753Smm if (c >= '0' && c <= '9') 2917228753Smm return (c - '0'); 2918228753Smm else if (c >= 'A' && c <= 'F') 2919228753Smm return (c - 'A' + 10); 2920228753Smm else if (c >= 'a' && c <= 'f') 2921228753Smm return (c - 'a' + 10); 2922228753Smm else 2923228753Smm return (-1); 2924228753Smm} 2925