1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27229592Smm__FBSDID("$FreeBSD$"); 28228753Smm 29228753Smm#ifdef HAVE_ERRNO_H 30228753Smm#include <errno.h> 31228753Smm#endif 32228753Smm#ifdef HAVE_STDLIB_H 33228753Smm#include <stdlib.h> 34228753Smm#endif 35228753Smm#ifdef HAVE_STRING_H 36228753Smm#include <string.h> 37228753Smm#endif 38228753Smm 39228753Smm#include "archive.h" 40228753Smm#include "archive_entry.h" 41228753Smm#include "archive_private.h" 42228753Smm#include "archive_write_private.h" 43228753Smm 44228753Smmstruct pax { 45228753Smm uint64_t entry_bytes_remaining; 46228753Smm uint64_t entry_padding; 47228753Smm struct archive_string pax_header; 48228753Smm}; 49228753Smm 50228753Smmstatic void add_pax_attr(struct archive_string *, const char *key, 51228753Smm const char *value); 52228753Smmstatic void add_pax_attr_int(struct archive_string *, 53228753Smm const char *key, int64_t value); 54228753Smmstatic void add_pax_attr_time(struct archive_string *, 55228753Smm const char *key, int64_t sec, 56228753Smm unsigned long nanos); 57228753Smmstatic void add_pax_attr_w(struct archive_string *, 58228753Smm const char *key, const wchar_t *wvalue); 59228753Smmstatic ssize_t archive_write_pax_data(struct archive_write *, 60228753Smm const void *, size_t); 61228753Smmstatic int archive_write_pax_finish(struct archive_write *); 62228753Smmstatic int archive_write_pax_destroy(struct archive_write *); 63228753Smmstatic int archive_write_pax_finish_entry(struct archive_write *); 64228753Smmstatic int archive_write_pax_header(struct archive_write *, 65228753Smm struct archive_entry *); 66228753Smmstatic char *base64_encode(const char *src, size_t len); 67228753Smmstatic char *build_pax_attribute_name(char *dest, const char *src); 68228753Smmstatic char *build_ustar_entry_name(char *dest, const char *src, 69228753Smm size_t src_length, const char *insert); 70228753Smmstatic char *format_int(char *dest, int64_t); 71228753Smmstatic int has_non_ASCII(const wchar_t *); 72228753Smmstatic char *url_encode(const char *in); 73228753Smmstatic int write_nulls(struct archive_write *, size_t); 74228753Smm 75228753Smm/* 76228753Smm * Set output format to 'restricted pax' format. 77228753Smm * 78228753Smm * This is the same as normal 'pax', but tries to suppress 79228753Smm * the pax header whenever possible. This is the default for 80228753Smm * bsdtar, for instance. 81228753Smm */ 82228753Smmint 83228753Smmarchive_write_set_format_pax_restricted(struct archive *_a) 84228753Smm{ 85228753Smm struct archive_write *a = (struct archive_write *)_a; 86228753Smm int r; 87228753Smm r = archive_write_set_format_pax(&a->archive); 88228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED; 89228753Smm a->archive.archive_format_name = "restricted POSIX pax interchange"; 90228753Smm return (r); 91228753Smm} 92228753Smm 93228753Smm/* 94228753Smm * Set output format to 'pax' format. 95228753Smm */ 96228753Smmint 97228753Smmarchive_write_set_format_pax(struct archive *_a) 98228753Smm{ 99228753Smm struct archive_write *a = (struct archive_write *)_a; 100228753Smm struct pax *pax; 101228753Smm 102228753Smm if (a->format_destroy != NULL) 103228753Smm (a->format_destroy)(a); 104228753Smm 105228753Smm pax = (struct pax *)malloc(sizeof(*pax)); 106228753Smm if (pax == NULL) { 107228753Smm archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); 108228753Smm return (ARCHIVE_FATAL); 109228753Smm } 110228753Smm memset(pax, 0, sizeof(*pax)); 111228753Smm a->format_data = pax; 112228753Smm 113228753Smm a->pad_uncompressed = 1; 114228753Smm a->format_name = "pax"; 115228753Smm a->format_write_header = archive_write_pax_header; 116228753Smm a->format_write_data = archive_write_pax_data; 117228753Smm a->format_finish = archive_write_pax_finish; 118228753Smm a->format_destroy = archive_write_pax_destroy; 119228753Smm a->format_finish_entry = archive_write_pax_finish_entry; 120228753Smm a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 121228753Smm a->archive.archive_format_name = "POSIX pax interchange"; 122228753Smm return (ARCHIVE_OK); 123228753Smm} 124228753Smm 125228753Smm/* 126228753Smm * Note: This code assumes that 'nanos' has the same sign as 'sec', 127228753Smm * which implies that sec=-1, nanos=200000000 represents -1.2 seconds 128228753Smm * and not -0.8 seconds. This is a pretty pedantic point, as we're 129228753Smm * unlikely to encounter many real files created before Jan 1, 1970, 130228753Smm * much less ones with timestamps recorded to sub-second resolution. 131228753Smm */ 132228753Smmstatic void 133228753Smmadd_pax_attr_time(struct archive_string *as, const char *key, 134228753Smm int64_t sec, unsigned long nanos) 135228753Smm{ 136228753Smm int digit, i; 137228753Smm char *t; 138228753Smm /* 139228753Smm * Note that each byte contributes fewer than 3 base-10 140228753Smm * digits, so this will always be big enough. 141228753Smm */ 142228753Smm char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)]; 143228753Smm 144228753Smm tmp[sizeof(tmp) - 1] = 0; 145228753Smm t = tmp + sizeof(tmp) - 1; 146228753Smm 147228753Smm /* Skip trailing zeros in the fractional part. */ 148228753Smm for (digit = 0, i = 10; i > 0 && digit == 0; i--) { 149228753Smm digit = nanos % 10; 150228753Smm nanos /= 10; 151228753Smm } 152228753Smm 153228753Smm /* Only format the fraction if it's non-zero. */ 154228753Smm if (i > 0) { 155228753Smm while (i > 0) { 156228753Smm *--t = "0123456789"[digit]; 157228753Smm digit = nanos % 10; 158228753Smm nanos /= 10; 159228753Smm i--; 160228753Smm } 161228753Smm *--t = '.'; 162228753Smm } 163228753Smm t = format_int(t, sec); 164228753Smm 165228753Smm add_pax_attr(as, key, t); 166228753Smm} 167228753Smm 168228753Smmstatic char * 169228753Smmformat_int(char *t, int64_t i) 170228753Smm{ 171228753Smm int sign; 172228753Smm 173228753Smm if (i < 0) { 174228753Smm sign = -1; 175228753Smm i = -i; 176228753Smm } else 177228753Smm sign = 1; 178228753Smm 179228753Smm do { 180228753Smm *--t = "0123456789"[i % 10]; 181228753Smm } while (i /= 10); 182228753Smm if (sign < 0) 183228753Smm *--t = '-'; 184228753Smm return (t); 185228753Smm} 186228753Smm 187228753Smmstatic void 188228753Smmadd_pax_attr_int(struct archive_string *as, const char *key, int64_t value) 189228753Smm{ 190228753Smm char tmp[1 + 3 * sizeof(value)]; 191228753Smm 192228753Smm tmp[sizeof(tmp) - 1] = 0; 193228753Smm add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); 194228753Smm} 195228753Smm 196228753Smmstatic char * 197228753Smmutf8_encode(const wchar_t *wval) 198228753Smm{ 199228753Smm int utf8len; 200228753Smm const wchar_t *wp; 201228753Smm unsigned long wc; 202228753Smm char *utf8_value, *p; 203228753Smm 204228753Smm utf8len = 0; 205228753Smm for (wp = wval; *wp != L'\0'; ) { 206228753Smm wc = *wp++; 207228753Smm 208228753Smm if (wc >= 0xd800 && wc <= 0xdbff 209228753Smm && *wp >= 0xdc00 && *wp <= 0xdfff) { 210228753Smm /* This is a surrogate pair. Combine into a 211228753Smm * full Unicode value before encoding into 212228753Smm * UTF-8. */ 213228753Smm wc = (wc - 0xd800) << 10; /* High 10 bits */ 214228753Smm wc += (*wp++ - 0xdc00); /* Low 10 bits */ 215228753Smm wc += 0x10000; /* Skip BMP */ 216228753Smm } 217228753Smm if (wc <= 0x7f) 218228753Smm utf8len++; 219228753Smm else if (wc <= 0x7ff) 220228753Smm utf8len += 2; 221228753Smm else if (wc <= 0xffff) 222228753Smm utf8len += 3; 223228753Smm else if (wc <= 0x1fffff) 224228753Smm utf8len += 4; 225228753Smm else if (wc <= 0x3ffffff) 226228753Smm utf8len += 5; 227228753Smm else if (wc <= 0x7fffffff) 228228753Smm utf8len += 6; 229228753Smm /* Ignore larger values; UTF-8 can't encode them. */ 230228753Smm } 231228753Smm 232228753Smm utf8_value = (char *)malloc(utf8len + 1); 233228753Smm if (utf8_value == NULL) { 234228753Smm __archive_errx(1, "Not enough memory for attributes"); 235228753Smm return (NULL); 236228753Smm } 237228753Smm 238228753Smm for (wp = wval, p = utf8_value; *wp != L'\0'; ) { 239228753Smm wc = *wp++; 240228753Smm if (wc >= 0xd800 && wc <= 0xdbff 241228753Smm && *wp >= 0xdc00 && *wp <= 0xdfff) { 242228753Smm /* Combine surrogate pair. */ 243228753Smm wc = (wc - 0xd800) << 10; 244228753Smm wc += *wp++ - 0xdc00 + 0x10000; 245228753Smm } 246228753Smm if (wc <= 0x7f) { 247228753Smm *p++ = (char)wc; 248228753Smm } else if (wc <= 0x7ff) { 249228753Smm p[0] = 0xc0 | ((wc >> 6) & 0x1f); 250228753Smm p[1] = 0x80 | (wc & 0x3f); 251228753Smm p += 2; 252228753Smm } else if (wc <= 0xffff) { 253228753Smm p[0] = 0xe0 | ((wc >> 12) & 0x0f); 254228753Smm p[1] = 0x80 | ((wc >> 6) & 0x3f); 255228753Smm p[2] = 0x80 | (wc & 0x3f); 256228753Smm p += 3; 257228753Smm } else if (wc <= 0x1fffff) { 258228753Smm p[0] = 0xf0 | ((wc >> 18) & 0x07); 259228753Smm p[1] = 0x80 | ((wc >> 12) & 0x3f); 260228753Smm p[2] = 0x80 | ((wc >> 6) & 0x3f); 261228753Smm p[3] = 0x80 | (wc & 0x3f); 262228753Smm p += 4; 263228753Smm } else if (wc <= 0x3ffffff) { 264228753Smm p[0] = 0xf8 | ((wc >> 24) & 0x03); 265228753Smm p[1] = 0x80 | ((wc >> 18) & 0x3f); 266228753Smm p[2] = 0x80 | ((wc >> 12) & 0x3f); 267228753Smm p[3] = 0x80 | ((wc >> 6) & 0x3f); 268228753Smm p[4] = 0x80 | (wc & 0x3f); 269228753Smm p += 5; 270228753Smm } else if (wc <= 0x7fffffff) { 271228753Smm p[0] = 0xfc | ((wc >> 30) & 0x01); 272228753Smm p[1] = 0x80 | ((wc >> 24) & 0x3f); 273228753Smm p[1] = 0x80 | ((wc >> 18) & 0x3f); 274228753Smm p[2] = 0x80 | ((wc >> 12) & 0x3f); 275228753Smm p[3] = 0x80 | ((wc >> 6) & 0x3f); 276228753Smm p[4] = 0x80 | (wc & 0x3f); 277228753Smm p += 6; 278228753Smm } 279228753Smm /* Ignore larger values; UTF-8 can't encode them. */ 280228753Smm } 281228753Smm *p = '\0'; 282228753Smm 283228753Smm return (utf8_value); 284228753Smm} 285228753Smm 286228753Smmstatic void 287228753Smmadd_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) 288228753Smm{ 289228753Smm char *utf8_value = utf8_encode(wval); 290228753Smm if (utf8_value == NULL) 291228753Smm return; 292228753Smm add_pax_attr(as, key, utf8_value); 293228753Smm free(utf8_value); 294228753Smm} 295228753Smm 296228753Smm/* 297228753Smm * Add a key/value attribute to the pax header. This function handles 298228753Smm * the length field and various other syntactic requirements. 299228753Smm */ 300228753Smmstatic void 301228753Smmadd_pax_attr(struct archive_string *as, const char *key, const char *value) 302228753Smm{ 303228753Smm int digits, i, len, next_ten; 304228753Smm char tmp[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */ 305228753Smm 306228753Smm /*- 307228753Smm * PAX attributes have the following layout: 308228753Smm * <len> <space> <key> <=> <value> <nl> 309228753Smm */ 310228753Smm len = 1 + (int)strlen(key) + 1 + (int)strlen(value) + 1; 311228753Smm 312228753Smm /* 313228753Smm * The <len> field includes the length of the <len> field, so 314228753Smm * computing the correct length is tricky. I start by 315228753Smm * counting the number of base-10 digits in 'len' and 316228753Smm * computing the next higher power of 10. 317228753Smm */ 318228753Smm next_ten = 1; 319228753Smm digits = 0; 320228753Smm i = len; 321228753Smm while (i > 0) { 322228753Smm i = i / 10; 323228753Smm digits++; 324228753Smm next_ten = next_ten * 10; 325228753Smm } 326228753Smm /* 327228753Smm * For example, if string without the length field is 99 328228753Smm * chars, then adding the 2 digit length "99" will force the 329228753Smm * total length past 100, requiring an extra digit. The next 330228753Smm * statement adjusts for this effect. 331228753Smm */ 332228753Smm if (len + digits >= next_ten) 333228753Smm digits++; 334228753Smm 335228753Smm /* Now, we have the right length so we can build the line. */ 336228753Smm tmp[sizeof(tmp) - 1] = 0; /* Null-terminate the work area. */ 337228753Smm archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits)); 338228753Smm archive_strappend_char(as, ' '); 339228753Smm archive_strcat(as, key); 340228753Smm archive_strappend_char(as, '='); 341228753Smm archive_strcat(as, value); 342228753Smm archive_strappend_char(as, '\n'); 343228753Smm} 344228753Smm 345228753Smmstatic void 346228753Smmarchive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) 347228753Smm{ 348228753Smm struct archive_string s; 349228753Smm int i = archive_entry_xattr_reset(entry); 350228753Smm 351228753Smm while (i--) { 352228753Smm const char *name; 353228753Smm const void *value; 354228753Smm char *encoded_value; 355228753Smm char *url_encoded_name = NULL, *encoded_name = NULL; 356228753Smm wchar_t *wcs_name = NULL; 357228753Smm size_t size; 358228753Smm 359228753Smm archive_entry_xattr_next(entry, &name, &value, &size); 360228753Smm /* Name is URL-encoded, then converted to wchar_t, 361228753Smm * then UTF-8 encoded. */ 362228753Smm url_encoded_name = url_encode(name); 363228753Smm if (url_encoded_name != NULL) { 364228753Smm /* Convert narrow-character to wide-character. */ 365228753Smm size_t wcs_length = strlen(url_encoded_name); 366228753Smm wcs_name = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t)); 367228753Smm if (wcs_name == NULL) 368228753Smm __archive_errx(1, "No memory for xattr conversion"); 369228753Smm mbstowcs(wcs_name, url_encoded_name, wcs_length); 370228753Smm wcs_name[wcs_length] = 0; 371228753Smm free(url_encoded_name); /* Done with this. */ 372228753Smm } 373228753Smm if (wcs_name != NULL) { 374228753Smm encoded_name = utf8_encode(wcs_name); 375228753Smm free(wcs_name); /* Done with wchar_t name. */ 376228753Smm } 377228753Smm 378228753Smm encoded_value = base64_encode((const char *)value, size); 379228753Smm 380228753Smm if (encoded_name != NULL && encoded_value != NULL) { 381228753Smm archive_string_init(&s); 382228753Smm archive_strcpy(&s, "LIBARCHIVE.xattr."); 383228753Smm archive_strcat(&s, encoded_name); 384228753Smm add_pax_attr(&(pax->pax_header), s.s, encoded_value); 385228753Smm archive_string_free(&s); 386228753Smm } 387228753Smm free(encoded_name); 388228753Smm free(encoded_value); 389228753Smm } 390228753Smm} 391228753Smm 392228753Smm/* 393228753Smm * TODO: Consider adding 'comment' and 'charset' fields to 394228753Smm * archive_entry so that clients can specify them. Also, consider 395228753Smm * adding generic key/value tags so clients can add arbitrary 396228753Smm * key/value data. 397228753Smm */ 398228753Smmstatic int 399228753Smmarchive_write_pax_header(struct archive_write *a, 400228753Smm struct archive_entry *entry_original) 401228753Smm{ 402228753Smm struct archive_entry *entry_main; 403228753Smm const char *p; 404228753Smm char *t; 405228753Smm const wchar_t *wp; 406228753Smm const char *suffix; 407228753Smm int need_extension, r, ret; 408228753Smm struct pax *pax; 409228753Smm const char *hdrcharset = NULL; 410228753Smm const char *hardlink; 411228753Smm const char *path = NULL, *linkpath = NULL; 412228753Smm const char *uname = NULL, *gname = NULL; 413228753Smm const wchar_t *path_w = NULL, *linkpath_w = NULL; 414228753Smm const wchar_t *uname_w = NULL, *gname_w = NULL; 415228753Smm 416228753Smm char paxbuff[512]; 417228753Smm char ustarbuff[512]; 418228753Smm char ustar_entry_name[256]; 419228753Smm char pax_entry_name[256]; 420228753Smm 421228753Smm ret = ARCHIVE_OK; 422228753Smm need_extension = 0; 423228753Smm pax = (struct pax *)a->format_data; 424228753Smm 425228753Smm hardlink = archive_entry_hardlink(entry_original); 426228753Smm 427228753Smm /* Make sure this is a type of entry that we can handle here */ 428228753Smm if (hardlink == NULL) { 429228753Smm switch (archive_entry_filetype(entry_original)) { 430228753Smm case AE_IFBLK: 431228753Smm case AE_IFCHR: 432228753Smm case AE_IFIFO: 433228753Smm case AE_IFLNK: 434228753Smm case AE_IFREG: 435228753Smm break; 436228753Smm case AE_IFDIR: 437228753Smm /* 438228753Smm * Ensure a trailing '/'. Modify the original 439228753Smm * entry so the client sees the change. 440228753Smm */ 441228753Smm p = archive_entry_pathname(entry_original); 442228753Smm if (p[strlen(p) - 1] != '/') { 443228753Smm t = (char *)malloc(strlen(p) + 2); 444228753Smm if (t == NULL) { 445228753Smm archive_set_error(&a->archive, ENOMEM, 446228753Smm "Can't allocate pax data"); 447228753Smm return(ARCHIVE_FATAL); 448228753Smm } 449228753Smm strcpy(t, p); 450228753Smm strcat(t, "/"); 451228753Smm archive_entry_copy_pathname(entry_original, t); 452228753Smm free(t); 453228753Smm } 454228753Smm break; 455228753Smm case AE_IFSOCK: 456228753Smm archive_set_error(&a->archive, 457228753Smm ARCHIVE_ERRNO_FILE_FORMAT, 458228753Smm "tar format cannot archive socket"); 459228753Smm return (ARCHIVE_WARN); 460228753Smm default: 461228753Smm archive_set_error(&a->archive, 462228753Smm ARCHIVE_ERRNO_FILE_FORMAT, 463228753Smm "tar format cannot archive this (type=0%lo)", 464228753Smm (unsigned long)archive_entry_filetype(entry_original)); 465228753Smm return (ARCHIVE_WARN); 466228753Smm } 467228753Smm } 468228753Smm 469228753Smm /* Copy entry so we can modify it as needed. */ 470228753Smm entry_main = archive_entry_clone(entry_original); 471228753Smm archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ 472228753Smm 473228753Smm /* 474228753Smm * First, check the name fields and see if any of them 475228753Smm * require binary coding. If any of them does, then all of 476228753Smm * them do. 477228753Smm */ 478228753Smm hdrcharset = NULL; 479228753Smm path = archive_entry_pathname(entry_main); 480228753Smm path_w = archive_entry_pathname_w(entry_main); 481228753Smm if (path != NULL && path_w == NULL) { 482228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 483228753Smm "Can't translate pathname '%s' to UTF-8", path); 484228753Smm ret = ARCHIVE_WARN; 485228753Smm hdrcharset = "BINARY"; 486228753Smm } 487228753Smm uname = archive_entry_uname(entry_main); 488228753Smm uname_w = archive_entry_uname_w(entry_main); 489228753Smm if (uname != NULL && uname_w == NULL) { 490228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 491228753Smm "Can't translate uname '%s' to UTF-8", uname); 492228753Smm ret = ARCHIVE_WARN; 493228753Smm hdrcharset = "BINARY"; 494228753Smm } 495228753Smm gname = archive_entry_gname(entry_main); 496228753Smm gname_w = archive_entry_gname_w(entry_main); 497228753Smm if (gname != NULL && gname_w == NULL) { 498228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 499228753Smm "Can't translate gname '%s' to UTF-8", gname); 500228753Smm ret = ARCHIVE_WARN; 501228753Smm hdrcharset = "BINARY"; 502228753Smm } 503228753Smm linkpath = hardlink; 504228753Smm if (linkpath != NULL) { 505228753Smm linkpath_w = archive_entry_hardlink_w(entry_main); 506228753Smm } else { 507228753Smm linkpath = archive_entry_symlink(entry_main); 508228753Smm if (linkpath != NULL) 509228753Smm linkpath_w = archive_entry_symlink_w(entry_main); 510228753Smm } 511228753Smm if (linkpath != NULL && linkpath_w == NULL) { 512228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 513228753Smm "Can't translate linkpath '%s' to UTF-8", linkpath); 514228753Smm ret = ARCHIVE_WARN; 515228753Smm hdrcharset = "BINARY"; 516228753Smm } 517228753Smm 518228753Smm /* Store the header encoding first, to be nice to readers. */ 519228753Smm if (hdrcharset != NULL) 520228753Smm add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset); 521228753Smm 522228753Smm 523228753Smm /* 524228753Smm * If name is too long, or has non-ASCII characters, add 525228753Smm * 'path' to pax extended attrs. (Note that an unconvertible 526228753Smm * name must have non-ASCII characters.) 527228753Smm */ 528228753Smm if (path == NULL) { 529228753Smm /* We don't have a narrow version, so we have to store 530228753Smm * the wide version. */ 531228753Smm add_pax_attr_w(&(pax->pax_header), "path", path_w); 532228753Smm archive_entry_set_pathname(entry_main, "@WidePath"); 533228753Smm need_extension = 1; 534228753Smm } else if (has_non_ASCII(path_w)) { 535228753Smm /* We have non-ASCII characters. */ 536228753Smm if (path_w == NULL || hdrcharset != NULL) { 537228753Smm /* Can't do UTF-8, so store it raw. */ 538228753Smm add_pax_attr(&(pax->pax_header), "path", path); 539228753Smm } else { 540228753Smm /* Store UTF-8 */ 541228753Smm add_pax_attr_w(&(pax->pax_header), 542228753Smm "path", path_w); 543228753Smm } 544228753Smm archive_entry_set_pathname(entry_main, 545228753Smm build_ustar_entry_name(ustar_entry_name, 546228753Smm path, strlen(path), NULL)); 547228753Smm need_extension = 1; 548228753Smm } else { 549228753Smm /* We have an all-ASCII path; we'd like to just store 550228753Smm * it in the ustar header if it will fit. Yes, this 551228753Smm * duplicates some of the logic in 552228753Smm * write_set_format_ustar.c 553228753Smm */ 554228753Smm if (strlen(path) <= 100) { 555228753Smm /* Fits in the old 100-char tar name field. */ 556228753Smm } else { 557228753Smm /* Find largest suffix that will fit. */ 558228753Smm /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */ 559228753Smm suffix = strchr(path + strlen(path) - 100 - 1, '/'); 560228753Smm /* Don't attempt an empty prefix. */ 561228753Smm if (suffix == path) 562228753Smm suffix = strchr(suffix + 1, '/'); 563228753Smm /* We can put it in the ustar header if it's 564228753Smm * all ASCII and it's either <= 100 characters 565228753Smm * or can be split at a '/' into a prefix <= 566228753Smm * 155 chars and a suffix <= 100 chars. (Note 567228753Smm * the strchr() above will return NULL exactly 568228753Smm * when the path can't be split.) 569228753Smm */ 570228753Smm if (suffix == NULL /* Suffix > 100 chars. */ 571228753Smm || suffix[1] == '\0' /* empty suffix */ 572228753Smm || suffix - path > 155) /* Prefix > 155 chars */ 573228753Smm { 574228753Smm if (path_w == NULL || hdrcharset != NULL) { 575228753Smm /* Can't do UTF-8, so store it raw. */ 576228753Smm add_pax_attr(&(pax->pax_header), 577228753Smm "path", path); 578228753Smm } else { 579228753Smm /* Store UTF-8 */ 580228753Smm add_pax_attr_w(&(pax->pax_header), 581228753Smm "path", path_w); 582228753Smm } 583228753Smm archive_entry_set_pathname(entry_main, 584228753Smm build_ustar_entry_name(ustar_entry_name, 585228753Smm path, strlen(path), NULL)); 586228753Smm need_extension = 1; 587228753Smm } 588228753Smm } 589228753Smm } 590228753Smm 591228753Smm if (linkpath != NULL) { 592228753Smm /* If link name is too long or has non-ASCII characters, add 593228753Smm * 'linkpath' to pax extended attrs. */ 594228753Smm if (strlen(linkpath) > 100 || linkpath_w == NULL 595228753Smm || linkpath_w == NULL || has_non_ASCII(linkpath_w)) { 596228753Smm if (linkpath_w == NULL || hdrcharset != NULL) 597228753Smm /* If the linkpath is not convertible 598228753Smm * to wide, or we're encoding in 599228753Smm * binary anyway, store it raw. */ 600228753Smm add_pax_attr(&(pax->pax_header), 601228753Smm "linkpath", linkpath); 602228753Smm else 603228753Smm /* If the link is long or has a 604228753Smm * non-ASCII character, store it as a 605228753Smm * pax extended attribute. */ 606228753Smm add_pax_attr_w(&(pax->pax_header), 607228753Smm "linkpath", linkpath_w); 608228753Smm if (strlen(linkpath) > 100) { 609228753Smm if (hardlink != NULL) 610228753Smm archive_entry_set_hardlink(entry_main, 611228753Smm "././@LongHardLink"); 612228753Smm else 613228753Smm archive_entry_set_symlink(entry_main, 614228753Smm "././@LongSymLink"); 615228753Smm } 616228753Smm need_extension = 1; 617228753Smm } 618228753Smm } 619228753Smm 620228753Smm /* If file size is too large, add 'size' to pax extended attrs. */ 621228753Smm if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) { 622228753Smm add_pax_attr_int(&(pax->pax_header), "size", 623228753Smm archive_entry_size(entry_main)); 624228753Smm need_extension = 1; 625228753Smm } 626228753Smm 627228753Smm /* If numeric GID is too large, add 'gid' to pax extended attrs. */ 628228753Smm if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) { 629228753Smm add_pax_attr_int(&(pax->pax_header), "gid", 630228753Smm archive_entry_gid(entry_main)); 631228753Smm need_extension = 1; 632228753Smm } 633228753Smm 634228753Smm /* If group name is too large or has non-ASCII characters, add 635228753Smm * 'gname' to pax extended attrs. */ 636228753Smm if (gname != NULL) { 637228753Smm if (strlen(gname) > 31 638228753Smm || gname_w == NULL 639228753Smm || has_non_ASCII(gname_w)) 640228753Smm { 641228753Smm if (gname_w == NULL || hdrcharset != NULL) { 642228753Smm add_pax_attr(&(pax->pax_header), 643228753Smm "gname", gname); 644228753Smm } else { 645228753Smm add_pax_attr_w(&(pax->pax_header), 646228753Smm "gname", gname_w); 647228753Smm } 648228753Smm need_extension = 1; 649228753Smm } 650228753Smm } 651228753Smm 652228753Smm /* If numeric UID is too large, add 'uid' to pax extended attrs. */ 653228753Smm if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) { 654228753Smm add_pax_attr_int(&(pax->pax_header), "uid", 655228753Smm archive_entry_uid(entry_main)); 656228753Smm need_extension = 1; 657228753Smm } 658228753Smm 659228753Smm /* Add 'uname' to pax extended attrs if necessary. */ 660228753Smm if (uname != NULL) { 661228753Smm if (strlen(uname) > 31 662228753Smm || uname_w == NULL 663228753Smm || has_non_ASCII(uname_w)) 664228753Smm { 665228753Smm if (uname_w == NULL || hdrcharset != NULL) { 666228753Smm add_pax_attr(&(pax->pax_header), 667228753Smm "uname", uname); 668228753Smm } else { 669228753Smm add_pax_attr_w(&(pax->pax_header), 670228753Smm "uname", uname_w); 671228753Smm } 672228753Smm need_extension = 1; 673228753Smm } 674228753Smm } 675228753Smm 676228753Smm /* 677228753Smm * POSIX/SUSv3 doesn't provide a standard key for large device 678228753Smm * numbers. I use the same keys here that Joerg Schilling 679228753Smm * used for 'star.' (Which, somewhat confusingly, are called 680228753Smm * "devXXX" even though they code "rdev" values.) No doubt, 681228753Smm * other implementations use other keys. Note that there's no 682228753Smm * reason we can't write the same information into a number of 683228753Smm * different keys. 684228753Smm * 685228753Smm * Of course, this is only needed for block or char device entries. 686228753Smm */ 687228753Smm if (archive_entry_filetype(entry_main) == AE_IFBLK 688228753Smm || archive_entry_filetype(entry_main) == AE_IFCHR) { 689228753Smm /* 690228753Smm * If rdevmajor is too large, add 'SCHILY.devmajor' to 691228753Smm * extended attributes. 692228753Smm */ 693228753Smm dev_t rdevmajor, rdevminor; 694228753Smm rdevmajor = archive_entry_rdevmajor(entry_main); 695228753Smm rdevminor = archive_entry_rdevminor(entry_main); 696228753Smm if (rdevmajor >= (1 << 18)) { 697228753Smm add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor", 698228753Smm rdevmajor); 699228753Smm /* 700228753Smm * Non-strict formatting below means we don't 701228753Smm * have to truncate here. Not truncating improves 702228753Smm * the chance that some more modern tar archivers 703228753Smm * (such as GNU tar 1.13) can restore the full 704228753Smm * value even if they don't understand the pax 705228753Smm * extended attributes. See my rant below about 706228753Smm * file size fields for additional details. 707228753Smm */ 708228753Smm /* archive_entry_set_rdevmajor(entry_main, 709228753Smm rdevmajor & ((1 << 18) - 1)); */ 710228753Smm need_extension = 1; 711228753Smm } 712228753Smm 713228753Smm /* 714228753Smm * If devminor is too large, add 'SCHILY.devminor' to 715228753Smm * extended attributes. 716228753Smm */ 717228753Smm if (rdevminor >= (1 << 18)) { 718228753Smm add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor", 719228753Smm rdevminor); 720228753Smm /* Truncation is not necessary here, either. */ 721228753Smm /* archive_entry_set_rdevminor(entry_main, 722228753Smm rdevminor & ((1 << 18) - 1)); */ 723228753Smm need_extension = 1; 724228753Smm } 725228753Smm } 726228753Smm 727228753Smm /* 728228753Smm * Technically, the mtime field in the ustar header can 729228753Smm * support 33 bits, but many platforms use signed 32-bit time 730228753Smm * values. The cutoff of 0x7fffffff here is a compromise. 731228753Smm * Yes, this check is duplicated just below; this helps to 732228753Smm * avoid writing an mtime attribute just to handle a 733228753Smm * high-resolution timestamp in "restricted pax" mode. 734228753Smm */ 735228753Smm if (!need_extension && 736228753Smm ((archive_entry_mtime(entry_main) < 0) 737228753Smm || (archive_entry_mtime(entry_main) >= 0x7fffffff))) 738228753Smm need_extension = 1; 739228753Smm 740228753Smm /* I use a star-compatible file flag attribute. */ 741228753Smm p = archive_entry_fflags_text(entry_main); 742228753Smm if (!need_extension && p != NULL && *p != '\0') 743228753Smm need_extension = 1; 744228753Smm 745228753Smm /* If there are non-trivial ACL entries, we need an extension. */ 746228753Smm if (!need_extension && archive_entry_acl_count(entry_original, 747228753Smm ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0) 748228753Smm need_extension = 1; 749228753Smm 750228753Smm /* If there are non-trivial ACL entries, we need an extension. */ 751228753Smm if (!need_extension && archive_entry_acl_count(entry_original, 752228753Smm ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0) 753228753Smm need_extension = 1; 754228753Smm 755228753Smm /* If there are extended attributes, we need an extension */ 756228753Smm if (!need_extension && archive_entry_xattr_count(entry_original) > 0) 757228753Smm need_extension = 1; 758228753Smm 759228753Smm /* 760228753Smm * The following items are handled differently in "pax 761228753Smm * restricted" format. In particular, in "pax restricted" 762228753Smm * format they won't be added unless need_extension is 763228753Smm * already set (we're already generating an extended header, so 764228753Smm * may as well include these). 765228753Smm */ 766228753Smm if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED || 767228753Smm need_extension) { 768228753Smm 769228753Smm if (archive_entry_mtime(entry_main) < 0 || 770228753Smm archive_entry_mtime(entry_main) >= 0x7fffffff || 771228753Smm archive_entry_mtime_nsec(entry_main) != 0) 772228753Smm add_pax_attr_time(&(pax->pax_header), "mtime", 773228753Smm archive_entry_mtime(entry_main), 774228753Smm archive_entry_mtime_nsec(entry_main)); 775228753Smm 776228753Smm if (archive_entry_ctime(entry_main) != 0 || 777228753Smm archive_entry_ctime_nsec(entry_main) != 0) 778228753Smm add_pax_attr_time(&(pax->pax_header), "ctime", 779228753Smm archive_entry_ctime(entry_main), 780228753Smm archive_entry_ctime_nsec(entry_main)); 781228753Smm 782228753Smm if (archive_entry_atime(entry_main) != 0 || 783228753Smm archive_entry_atime_nsec(entry_main) != 0) 784228753Smm add_pax_attr_time(&(pax->pax_header), "atime", 785228753Smm archive_entry_atime(entry_main), 786228753Smm archive_entry_atime_nsec(entry_main)); 787228753Smm 788228753Smm /* Store birth/creationtime only if it's earlier than mtime */ 789228753Smm if (archive_entry_birthtime_is_set(entry_main) && 790228753Smm archive_entry_birthtime(entry_main) 791228753Smm < archive_entry_mtime(entry_main)) 792228753Smm add_pax_attr_time(&(pax->pax_header), 793228753Smm "LIBARCHIVE.creationtime", 794228753Smm archive_entry_birthtime(entry_main), 795228753Smm archive_entry_birthtime_nsec(entry_main)); 796228753Smm 797228753Smm /* I use a star-compatible file flag attribute. */ 798228753Smm p = archive_entry_fflags_text(entry_main); 799228753Smm if (p != NULL && *p != '\0') 800228753Smm add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p); 801228753Smm 802228753Smm /* I use star-compatible ACL attributes. */ 803228753Smm wp = archive_entry_acl_text_w(entry_original, 804228753Smm ARCHIVE_ENTRY_ACL_TYPE_ACCESS | 805228753Smm ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); 806228753Smm if (wp != NULL && *wp != L'\0') 807228753Smm add_pax_attr_w(&(pax->pax_header), 808228753Smm "SCHILY.acl.access", wp); 809228753Smm wp = archive_entry_acl_text_w(entry_original, 810228753Smm ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | 811228753Smm ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); 812228753Smm if (wp != NULL && *wp != L'\0') 813228753Smm add_pax_attr_w(&(pax->pax_header), 814228753Smm "SCHILY.acl.default", wp); 815228753Smm 816228753Smm /* Include star-compatible metadata info. */ 817228753Smm /* Note: "SCHILY.dev{major,minor}" are NOT the 818228753Smm * major/minor portions of "SCHILY.dev". */ 819228753Smm add_pax_attr_int(&(pax->pax_header), "SCHILY.dev", 820228753Smm archive_entry_dev(entry_main)); 821228753Smm add_pax_attr_int(&(pax->pax_header), "SCHILY.ino", 822228753Smm archive_entry_ino64(entry_main)); 823228753Smm add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink", 824228753Smm archive_entry_nlink(entry_main)); 825228753Smm 826228753Smm /* Store extended attributes */ 827228753Smm archive_write_pax_header_xattrs(pax, entry_original); 828228753Smm } 829228753Smm 830228753Smm /* Only regular files have data. */ 831228753Smm if (archive_entry_filetype(entry_main) != AE_IFREG) 832228753Smm archive_entry_set_size(entry_main, 0); 833228753Smm 834228753Smm /* 835228753Smm * Pax-restricted does not store data for hardlinks, in order 836228753Smm * to improve compatibility with ustar. 837228753Smm */ 838228753Smm if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && 839228753Smm hardlink != NULL) 840228753Smm archive_entry_set_size(entry_main, 0); 841228753Smm 842228753Smm /* 843228753Smm * XXX Full pax interchange format does permit a hardlink 844228753Smm * entry to have data associated with it. I'm not supporting 845228753Smm * that here because the client expects me to tell them whether 846228753Smm * or not this format expects data for hardlinks. If I 847228753Smm * don't check here, then every pax archive will end up with 848228753Smm * duplicated data for hardlinks. Someday, there may be 849228753Smm * need to select this behavior, in which case the following 850228753Smm * will need to be revisited. XXX 851228753Smm */ 852228753Smm if (hardlink != NULL) 853228753Smm archive_entry_set_size(entry_main, 0); 854228753Smm 855228753Smm /* Format 'ustar' header for main entry. 856228753Smm * 857228753Smm * The trouble with file size: If the reader can't understand 858228753Smm * the file size, they may not be able to locate the next 859228753Smm * entry and the rest of the archive is toast. Pax-compliant 860228753Smm * readers are supposed to ignore the file size in the main 861228753Smm * header, so the question becomes how to maximize portability 862228753Smm * for readers that don't support pax attribute extensions. 863228753Smm * For maximum compatibility, I permit numeric extensions in 864228753Smm * the main header so that the file size stored will always be 865228753Smm * correct, even if it's in a format that only some 866228753Smm * implementations understand. The technique used here is: 867228753Smm * 868228753Smm * a) If possible, follow the standard exactly. This handles 869228753Smm * files up to 8 gigabytes minus 1. 870228753Smm * 871228753Smm * b) If that fails, try octal but omit the field terminator. 872228753Smm * That handles files up to 64 gigabytes minus 1. 873228753Smm * 874228753Smm * c) Otherwise, use base-256 extensions. That handles files 875228753Smm * up to 2^63 in this implementation, with the potential to 876228753Smm * go up to 2^94. That should hold us for a while. ;-) 877228753Smm * 878228753Smm * The non-strict formatter uses similar logic for other 879228753Smm * numeric fields, though they're less critical. 880228753Smm */ 881228753Smm __archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0); 882228753Smm 883228753Smm /* If we built any extended attributes, write that entry first. */ 884228753Smm if (archive_strlen(&(pax->pax_header)) > 0) { 885228753Smm struct archive_entry *pax_attr_entry; 886228753Smm time_t s; 887228753Smm uid_t uid; 888228753Smm gid_t gid; 889228753Smm mode_t mode; 890228753Smm 891228753Smm pax_attr_entry = archive_entry_new(); 892228753Smm p = archive_entry_pathname(entry_main); 893228753Smm archive_entry_set_pathname(pax_attr_entry, 894228753Smm build_pax_attribute_name(pax_entry_name, p)); 895228753Smm archive_entry_set_size(pax_attr_entry, 896228753Smm archive_strlen(&(pax->pax_header))); 897228753Smm /* Copy uid/gid (but clip to ustar limits). */ 898228753Smm uid = archive_entry_uid(entry_main); 899228753Smm if ((unsigned int)uid >= 1 << 18) 900228753Smm uid = (uid_t)(1 << 18) - 1; 901228753Smm archive_entry_set_uid(pax_attr_entry, uid); 902228753Smm gid = archive_entry_gid(entry_main); 903228753Smm if ((unsigned int)gid >= 1 << 18) 904228753Smm gid = (gid_t)(1 << 18) - 1; 905228753Smm archive_entry_set_gid(pax_attr_entry, gid); 906228753Smm /* Copy mode over (but not setuid/setgid bits) */ 907228753Smm mode = archive_entry_mode(entry_main); 908228753Smm#ifdef S_ISUID 909228753Smm mode &= ~S_ISUID; 910228753Smm#endif 911228753Smm#ifdef S_ISGID 912228753Smm mode &= ~S_ISGID; 913228753Smm#endif 914228753Smm#ifdef S_ISVTX 915228753Smm mode &= ~S_ISVTX; 916228753Smm#endif 917228753Smm archive_entry_set_mode(pax_attr_entry, mode); 918228753Smm 919228753Smm /* Copy uname/gname. */ 920228753Smm archive_entry_set_uname(pax_attr_entry, 921228753Smm archive_entry_uname(entry_main)); 922228753Smm archive_entry_set_gname(pax_attr_entry, 923228753Smm archive_entry_gname(entry_main)); 924228753Smm 925228753Smm /* Copy mtime, but clip to ustar limits. */ 926228753Smm s = archive_entry_mtime(entry_main); 927228753Smm if (s < 0) { s = 0; } 928228753Smm if (s >= 0x7fffffff) { s = 0x7fffffff; } 929228753Smm archive_entry_set_mtime(pax_attr_entry, s, 0); 930228753Smm 931228753Smm /* Standard ustar doesn't support atime. */ 932228753Smm archive_entry_set_atime(pax_attr_entry, 0, 0); 933228753Smm 934228753Smm /* Standard ustar doesn't support ctime. */ 935228753Smm archive_entry_set_ctime(pax_attr_entry, 0, 0); 936228753Smm 937228753Smm r = __archive_write_format_header_ustar(a, paxbuff, 938228753Smm pax_attr_entry, 'x', 1); 939228753Smm 940228753Smm archive_entry_free(pax_attr_entry); 941228753Smm 942228753Smm /* Note that the 'x' header shouldn't ever fail to format */ 943228753Smm if (r != 0) { 944228753Smm const char *msg = "archive_write_pax_header: " 945228753Smm "'x' header failed?! This can't happen.\n"; 946228753Smm size_t u = write(2, msg, strlen(msg)); 947228753Smm (void)u; /* UNUSED */ 948228753Smm exit(1); 949228753Smm } 950228753Smm r = (a->compressor.write)(a, paxbuff, 512); 951228753Smm if (r != ARCHIVE_OK) { 952228753Smm pax->entry_bytes_remaining = 0; 953228753Smm pax->entry_padding = 0; 954228753Smm return (ARCHIVE_FATAL); 955228753Smm } 956228753Smm 957228753Smm pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header)); 958228753Smm pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); 959228753Smm 960228753Smm r = (a->compressor.write)(a, pax->pax_header.s, 961228753Smm archive_strlen(&(pax->pax_header))); 962228753Smm if (r != ARCHIVE_OK) { 963228753Smm /* If a write fails, we're pretty much toast. */ 964228753Smm return (ARCHIVE_FATAL); 965228753Smm } 966228753Smm /* Pad out the end of the entry. */ 967228753Smm r = write_nulls(a, pax->entry_padding); 968228753Smm if (r != ARCHIVE_OK) { 969228753Smm /* If a write fails, we're pretty much toast. */ 970228753Smm return (ARCHIVE_FATAL); 971228753Smm } 972228753Smm pax->entry_bytes_remaining = pax->entry_padding = 0; 973228753Smm } 974228753Smm 975228753Smm /* Write the header for main entry. */ 976228753Smm r = (a->compressor.write)(a, ustarbuff, 512); 977228753Smm if (r != ARCHIVE_OK) 978228753Smm return (r); 979228753Smm 980228753Smm /* 981228753Smm * Inform the client of the on-disk size we're using, so 982228753Smm * they can avoid unnecessarily writing a body for something 983228753Smm * that we're just going to ignore. 984228753Smm */ 985228753Smm archive_entry_set_size(entry_original, archive_entry_size(entry_main)); 986228753Smm pax->entry_bytes_remaining = archive_entry_size(entry_main); 987228753Smm pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); 988228753Smm archive_entry_free(entry_main); 989228753Smm 990228753Smm return (ret); 991228753Smm} 992228753Smm 993228753Smm/* 994228753Smm * We need a valid name for the regular 'ustar' entry. This routine 995228753Smm * tries to hack something more-or-less reasonable. 996228753Smm * 997228753Smm * The approach here tries to preserve leading dir names. We do so by 998228753Smm * working with four sections: 999228753Smm * 1) "prefix" directory names, 1000228753Smm * 2) "suffix" directory names, 1001228753Smm * 3) inserted dir name (optional), 1002228753Smm * 4) filename. 1003228753Smm * 1004228753Smm * These sections must satisfy the following requirements: 1005228753Smm * * Parts 1 & 2 together form an initial portion of the dir name. 1006228753Smm * * Part 3 is specified by the caller. (It should not contain a leading 1007228753Smm * or trailing '/'.) 1008228753Smm * * Part 4 forms an initial portion of the base filename. 1009228753Smm * * The filename must be <= 99 chars to fit the ustar 'name' field. 1010228753Smm * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld. 1011228753Smm * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field. 1012228753Smm * * If the original name ends in a '/', the new name must also end in a '/' 1013228753Smm * * Trailing '/.' sequences may be stripped. 1014228753Smm * 1015228753Smm * Note: Recall that the ustar format does not store the '/' separating 1016228753Smm * parts 1 & 2, but does store the '/' separating parts 2 & 3. 1017228753Smm */ 1018228753Smmstatic char * 1019228753Smmbuild_ustar_entry_name(char *dest, const char *src, size_t src_length, 1020228753Smm const char *insert) 1021228753Smm{ 1022228753Smm const char *prefix, *prefix_end; 1023228753Smm const char *suffix, *suffix_end; 1024228753Smm const char *filename, *filename_end; 1025228753Smm char *p; 1026228753Smm int need_slash = 0; /* Was there a trailing slash? */ 1027228753Smm size_t suffix_length = 99; 1028228753Smm size_t insert_length; 1029228753Smm 1030228753Smm /* Length of additional dir element to be added. */ 1031228753Smm if (insert == NULL) 1032228753Smm insert_length = 0; 1033228753Smm else 1034228753Smm /* +2 here allows for '/' before and after the insert. */ 1035228753Smm insert_length = strlen(insert) + 2; 1036228753Smm 1037228753Smm /* Step 0: Quick bailout in a common case. */ 1038228753Smm if (src_length < 100 && insert == NULL) { 1039228753Smm strncpy(dest, src, src_length); 1040228753Smm dest[src_length] = '\0'; 1041228753Smm return (dest); 1042228753Smm } 1043228753Smm 1044228753Smm /* Step 1: Locate filename and enforce the length restriction. */ 1045228753Smm filename_end = src + src_length; 1046228753Smm /* Remove trailing '/' chars and '/.' pairs. */ 1047228753Smm for (;;) { 1048228753Smm if (filename_end > src && filename_end[-1] == '/') { 1049228753Smm filename_end --; 1050228753Smm need_slash = 1; /* Remember to restore trailing '/'. */ 1051228753Smm continue; 1052228753Smm } 1053228753Smm if (filename_end > src + 1 && filename_end[-1] == '.' 1054228753Smm && filename_end[-2] == '/') { 1055228753Smm filename_end -= 2; 1056228753Smm need_slash = 1; /* "foo/." will become "foo/" */ 1057228753Smm continue; 1058228753Smm } 1059228753Smm break; 1060228753Smm } 1061228753Smm if (need_slash) 1062228753Smm suffix_length--; 1063228753Smm /* Find start of filename. */ 1064228753Smm filename = filename_end - 1; 1065228753Smm while ((filename > src) && (*filename != '/')) 1066228753Smm filename --; 1067228753Smm if ((*filename == '/') && (filename < filename_end - 1)) 1068228753Smm filename ++; 1069228753Smm /* Adjust filename_end so that filename + insert fits in 99 chars. */ 1070228753Smm suffix_length -= insert_length; 1071228753Smm if (filename_end > filename + suffix_length) 1072228753Smm filename_end = filename + suffix_length; 1073228753Smm /* Calculate max size for "suffix" section (#3 above). */ 1074228753Smm suffix_length -= filename_end - filename; 1075228753Smm 1076228753Smm /* Step 2: Locate the "prefix" section of the dirname, including 1077228753Smm * trailing '/'. */ 1078228753Smm prefix = src; 1079228753Smm prefix_end = prefix + 155; 1080228753Smm if (prefix_end > filename) 1081228753Smm prefix_end = filename; 1082228753Smm while (prefix_end > prefix && *prefix_end != '/') 1083228753Smm prefix_end--; 1084228753Smm if ((prefix_end < filename) && (*prefix_end == '/')) 1085228753Smm prefix_end++; 1086228753Smm 1087228753Smm /* Step 3: Locate the "suffix" section of the dirname, 1088228753Smm * including trailing '/'. */ 1089228753Smm suffix = prefix_end; 1090228753Smm suffix_end = suffix + suffix_length; /* Enforce limit. */ 1091228753Smm if (suffix_end > filename) 1092228753Smm suffix_end = filename; 1093228753Smm if (suffix_end < suffix) 1094228753Smm suffix_end = suffix; 1095228753Smm while (suffix_end > suffix && *suffix_end != '/') 1096228753Smm suffix_end--; 1097228753Smm if ((suffix_end < filename) && (*suffix_end == '/')) 1098228753Smm suffix_end++; 1099228753Smm 1100228753Smm /* Step 4: Build the new name. */ 1101228753Smm /* The OpenBSD strlcpy function is safer, but less portable. */ 1102228753Smm /* Rather than maintain two versions, just use the strncpy version. */ 1103228753Smm p = dest; 1104228753Smm if (prefix_end > prefix) { 1105228753Smm strncpy(p, prefix, prefix_end - prefix); 1106228753Smm p += prefix_end - prefix; 1107228753Smm } 1108228753Smm if (suffix_end > suffix) { 1109228753Smm strncpy(p, suffix, suffix_end - suffix); 1110228753Smm p += suffix_end - suffix; 1111228753Smm } 1112228753Smm if (insert != NULL) { 1113228753Smm /* Note: assume insert does not have leading or trailing '/' */ 1114228753Smm strcpy(p, insert); 1115228753Smm p += strlen(insert); 1116228753Smm *p++ = '/'; 1117228753Smm } 1118228753Smm strncpy(p, filename, filename_end - filename); 1119228753Smm p += filename_end - filename; 1120228753Smm if (need_slash) 1121228753Smm *p++ = '/'; 1122228753Smm *p = '\0'; 1123228753Smm 1124228753Smm return (dest); 1125228753Smm} 1126228753Smm 1127228753Smm/* 1128228753Smm * The ustar header for the pax extended attributes must have a 1129228753Smm * reasonable name: SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename' 1130228753Smm * where 'pid' is the PID of the archiving process. Unfortunately, 1131228753Smm * that makes testing a pain since the output varies for each run, 1132228753Smm * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename' 1133228753Smm * for now. (Someday, I'll make this settable. Then I can use the 1134228753Smm * SUS recommendation as default and test harnesses can override it 1135228753Smm * to get predictable results.) 1136228753Smm * 1137228753Smm * Joerg Schilling has argued that this is unnecessary because, in 1138228753Smm * practice, if the pax extended attributes get extracted as regular 1139228753Smm * files, noone is going to bother reading those attributes to 1140228753Smm * manually restore them. Based on this, 'star' uses 1141228753Smm * /tmp/PaxHeader/'basename' as the ustar header name. This is a 1142228753Smm * tempting argument, in part because it's simpler than the SUSv3 1143228753Smm * recommendation, but I'm not entirely convinced. I'm also 1144228753Smm * uncomfortable with the fact that "/tmp" is a Unix-ism. 1145228753Smm * 1146228753Smm * The following routine leverages build_ustar_entry_name() above and 1147228753Smm * so is simpler than you might think. It just needs to provide the 1148228753Smm * additional path element and handle a few pathological cases). 1149228753Smm */ 1150228753Smmstatic char * 1151228753Smmbuild_pax_attribute_name(char *dest, const char *src) 1152228753Smm{ 1153228753Smm char buff[64]; 1154228753Smm const char *p; 1155228753Smm 1156228753Smm /* Handle the null filename case. */ 1157228753Smm if (src == NULL || *src == '\0') { 1158228753Smm strcpy(dest, "PaxHeader/blank"); 1159228753Smm return (dest); 1160228753Smm } 1161228753Smm 1162228753Smm /* Prune final '/' and other unwanted final elements. */ 1163228753Smm p = src + strlen(src); 1164228753Smm for (;;) { 1165228753Smm /* Ends in "/", remove the '/' */ 1166228753Smm if (p > src && p[-1] == '/') { 1167228753Smm --p; 1168228753Smm continue; 1169228753Smm } 1170228753Smm /* Ends in "/.", remove the '.' */ 1171228753Smm if (p > src + 1 && p[-1] == '.' 1172228753Smm && p[-2] == '/') { 1173228753Smm --p; 1174228753Smm continue; 1175228753Smm } 1176228753Smm break; 1177228753Smm } 1178228753Smm 1179228753Smm /* Pathological case: After above, there was nothing left. 1180228753Smm * This includes "/." "/./." "/.//./." etc. */ 1181228753Smm if (p == src) { 1182228753Smm strcpy(dest, "/PaxHeader/rootdir"); 1183228753Smm return (dest); 1184228753Smm } 1185228753Smm 1186228753Smm /* Convert unadorned "." into a suitable filename. */ 1187228753Smm if (*src == '.' && p == src + 1) { 1188228753Smm strcpy(dest, "PaxHeader/currentdir"); 1189228753Smm return (dest); 1190228753Smm } 1191228753Smm 1192228753Smm /* 1193228753Smm * TODO: Push this string into the 'pax' structure to avoid 1194228753Smm * recomputing it every time. That will also open the door 1195228753Smm * to having clients override it. 1196228753Smm */ 1197228753Smm#if HAVE_GETPID && 0 /* Disable this for now; see above comment. */ 1198228753Smm sprintf(buff, "PaxHeader.%d", getpid()); 1199228753Smm#else 1200228753Smm /* If the platform can't fetch the pid, don't include it. */ 1201228753Smm strcpy(buff, "PaxHeader"); 1202228753Smm#endif 1203228753Smm /* General case: build a ustar-compatible name adding "/PaxHeader/". */ 1204228753Smm build_ustar_entry_name(dest, src, p - src, buff); 1205228753Smm 1206228753Smm return (dest); 1207228753Smm} 1208228753Smm 1209228753Smm/* Write two null blocks for the end of archive */ 1210228753Smmstatic int 1211228753Smmarchive_write_pax_finish(struct archive_write *a) 1212228753Smm{ 1213228753Smm int r; 1214228753Smm 1215228753Smm if (a->compressor.write == NULL) 1216228753Smm return (ARCHIVE_OK); 1217228753Smm 1218228753Smm r = write_nulls(a, 512 * 2); 1219228753Smm return (r); 1220228753Smm} 1221228753Smm 1222228753Smmstatic int 1223228753Smmarchive_write_pax_destroy(struct archive_write *a) 1224228753Smm{ 1225228753Smm struct pax *pax; 1226228753Smm 1227228753Smm pax = (struct pax *)a->format_data; 1228228753Smm if (pax == NULL) 1229228753Smm return (ARCHIVE_OK); 1230228753Smm 1231228753Smm archive_string_free(&pax->pax_header); 1232228753Smm free(pax); 1233228753Smm a->format_data = NULL; 1234228753Smm return (ARCHIVE_OK); 1235228753Smm} 1236228753Smm 1237228753Smmstatic int 1238228753Smmarchive_write_pax_finish_entry(struct archive_write *a) 1239228753Smm{ 1240228753Smm struct pax *pax; 1241228753Smm int ret; 1242228753Smm 1243228753Smm pax = (struct pax *)a->format_data; 1244228753Smm ret = write_nulls(a, pax->entry_bytes_remaining + pax->entry_padding); 1245228753Smm pax->entry_bytes_remaining = pax->entry_padding = 0; 1246228753Smm return (ret); 1247228753Smm} 1248228753Smm 1249228753Smmstatic int 1250228753Smmwrite_nulls(struct archive_write *a, size_t padding) 1251228753Smm{ 1252228753Smm int ret; 1253228753Smm size_t to_write; 1254228753Smm 1255228753Smm while (padding > 0) { 1256228753Smm to_write = padding < a->null_length ? padding : a->null_length; 1257228753Smm ret = (a->compressor.write)(a, a->nulls, to_write); 1258228753Smm if (ret != ARCHIVE_OK) 1259228753Smm return (ret); 1260228753Smm padding -= to_write; 1261228753Smm } 1262228753Smm return (ARCHIVE_OK); 1263228753Smm} 1264228753Smm 1265228753Smmstatic ssize_t 1266228753Smmarchive_write_pax_data(struct archive_write *a, const void *buff, size_t s) 1267228753Smm{ 1268228753Smm struct pax *pax; 1269228753Smm int ret; 1270228753Smm 1271228753Smm pax = (struct pax *)a->format_data; 1272228753Smm if (s > pax->entry_bytes_remaining) 1273228753Smm s = pax->entry_bytes_remaining; 1274228753Smm 1275228753Smm ret = (a->compressor.write)(a, buff, s); 1276228753Smm pax->entry_bytes_remaining -= s; 1277228753Smm if (ret == ARCHIVE_OK) 1278228753Smm return (s); 1279228753Smm else 1280228753Smm return (ret); 1281228753Smm} 1282228753Smm 1283228753Smmstatic int 1284228753Smmhas_non_ASCII(const wchar_t *wp) 1285228753Smm{ 1286228753Smm if (wp == NULL) 1287228753Smm return (1); 1288228753Smm while (*wp != L'\0' && *wp < 128) 1289228753Smm wp++; 1290228753Smm return (*wp != L'\0'); 1291228753Smm} 1292228753Smm 1293228753Smm/* 1294228753Smm * Used by extended attribute support; encodes the name 1295228753Smm * so that there will be no '=' characters in the result. 1296228753Smm */ 1297228753Smmstatic char * 1298228753Smmurl_encode(const char *in) 1299228753Smm{ 1300228753Smm const char *s; 1301228753Smm char *d; 1302228753Smm int out_len = 0; 1303228753Smm char *out; 1304228753Smm 1305228753Smm for (s = in; *s != '\0'; s++) { 1306228753Smm if (*s < 33 || *s > 126 || *s == '%' || *s == '=') 1307228753Smm out_len += 3; 1308228753Smm else 1309228753Smm out_len++; 1310228753Smm } 1311228753Smm 1312228753Smm out = (char *)malloc(out_len + 1); 1313228753Smm if (out == NULL) 1314228753Smm return (NULL); 1315228753Smm 1316228753Smm for (s = in, d = out; *s != '\0'; s++) { 1317228753Smm /* encode any non-printable ASCII character or '%' or '=' */ 1318228753Smm if (*s < 33 || *s > 126 || *s == '%' || *s == '=') { 1319228753Smm /* URL encoding is '%' followed by two hex digits */ 1320228753Smm *d++ = '%'; 1321228753Smm *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)]; 1322228753Smm *d++ = "0123456789ABCDEF"[0x0f & *s]; 1323228753Smm } else { 1324228753Smm *d++ = *s; 1325228753Smm } 1326228753Smm } 1327228753Smm *d = '\0'; 1328228753Smm return (out); 1329228753Smm} 1330228753Smm 1331228753Smm/* 1332228753Smm * Encode a sequence of bytes into a C string using base-64 encoding. 1333228753Smm * 1334228753Smm * Returns a null-terminated C string allocated with malloc(); caller 1335228753Smm * is responsible for freeing the result. 1336228753Smm */ 1337228753Smmstatic char * 1338228753Smmbase64_encode(const char *s, size_t len) 1339228753Smm{ 1340228753Smm static const char digits[64] = 1341228753Smm { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', 1342228753Smm 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d', 1343228753Smm 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s', 1344228753Smm 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7', 1345228753Smm '8','9','+','/' }; 1346228753Smm int v; 1347228753Smm char *d, *out; 1348228753Smm 1349228753Smm /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */ 1350228753Smm out = (char *)malloc((len * 4 + 2) / 3 + 1); 1351228753Smm if (out == NULL) 1352228753Smm return (NULL); 1353228753Smm d = out; 1354228753Smm 1355228753Smm /* Convert each group of 3 bytes into 4 characters. */ 1356228753Smm while (len >= 3) { 1357228753Smm v = (((int)s[0] << 16) & 0xff0000) 1358228753Smm | (((int)s[1] << 8) & 0xff00) 1359228753Smm | (((int)s[2]) & 0x00ff); 1360228753Smm s += 3; 1361228753Smm len -= 3; 1362228753Smm *d++ = digits[(v >> 18) & 0x3f]; 1363228753Smm *d++ = digits[(v >> 12) & 0x3f]; 1364228753Smm *d++ = digits[(v >> 6) & 0x3f]; 1365228753Smm *d++ = digits[(v) & 0x3f]; 1366228753Smm } 1367228753Smm /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */ 1368228753Smm switch (len) { 1369228753Smm case 0: break; 1370228753Smm case 1: 1371228753Smm v = (((int)s[0] << 16) & 0xff0000); 1372228753Smm *d++ = digits[(v >> 18) & 0x3f]; 1373228753Smm *d++ = digits[(v >> 12) & 0x3f]; 1374228753Smm break; 1375228753Smm case 2: 1376228753Smm v = (((int)s[0] << 16) & 0xff0000) 1377228753Smm | (((int)s[1] << 8) & 0xff00); 1378228753Smm *d++ = digits[(v >> 18) & 0x3f]; 1379228753Smm *d++ = digits[(v >> 12) & 0x3f]; 1380228753Smm *d++ = digits[(v >> 6) & 0x3f]; 1381228753Smm break; 1382228753Smm } 1383228753Smm /* Add trailing NUL character so output is a valid C string. */ 1384228753Smm *d = '\0'; 1385228753Smm return (out); 1386228753Smm} 1387