1/*- 2 * Copyright (c) 2007 Kai Wang 3 * Copyright (c) 2007 Tim Kientzle 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "archive_platform.h" 29 30#ifdef HAVE_SYS_STAT_H 31#include <sys/stat.h> 32#endif 33#ifdef HAVE_ERRNO_H 34#include <errno.h> 35#endif 36#ifdef HAVE_STDLIB_H 37#include <stdlib.h> 38#endif 39#ifdef HAVE_STRING_H 40#include <string.h> 41#endif 42#ifdef HAVE_LIMITS_H 43#include <limits.h> 44#endif 45 46#include "archive.h" 47#include "archive_entry.h" 48#include "archive_private.h" 49#include "archive_read_private.h" 50 51struct ar { 52 int64_t entry_bytes_remaining; 53 /* unconsumed is purely to track data we've gotten from readahead, 54 * but haven't yet marked as consumed. Must be paired with 55 * entry_bytes_remaining usage/modification. 56 */ 57 size_t entry_bytes_unconsumed; 58 int64_t entry_offset; 59 int64_t entry_padding; 60 char *strtab; 61 size_t strtab_size; 62 char read_global_header; 63}; 64 65/* 66 * Define structure of the "ar" header. 67 */ 68#define AR_name_offset 0 69#define AR_name_size 16 70#define AR_date_offset 16 71#define AR_date_size 12 72#define AR_uid_offset 28 73#define AR_uid_size 6 74#define AR_gid_offset 34 75#define AR_gid_size 6 76#define AR_mode_offset 40 77#define AR_mode_size 8 78#define AR_size_offset 48 79#define AR_size_size 10 80#define AR_fmag_offset 58 81#define AR_fmag_size 2 82 83static int archive_read_format_ar_bid(struct archive_read *a, int); 84static int archive_read_format_ar_cleanup(struct archive_read *a); 85static int archive_read_format_ar_read_data(struct archive_read *a, 86 const void **buff, size_t *size, int64_t *offset); 87static int archive_read_format_ar_skip(struct archive_read *a); 88static int archive_read_format_ar_read_header(struct archive_read *a, 89 struct archive_entry *e); 90static uint64_t ar_atol8(const char *p, unsigned char_cnt); 91static uint64_t ar_atol10(const char *p, unsigned char_cnt); 92static int ar_parse_gnu_filename_table(struct archive_read *a); 93static int ar_parse_common_header(struct ar *ar, struct archive_entry *, 94 const char *h); 95 96int 97archive_read_support_format_ar(struct archive *_a) 98{ 99 struct archive_read *a = (struct archive_read *)_a; 100 struct ar *ar; 101 int r; 102 103 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 104 ARCHIVE_STATE_NEW, "archive_read_support_format_ar"); 105 106 ar = (struct ar *)calloc(1, sizeof(*ar)); 107 if (ar == NULL) { 108 archive_set_error(&a->archive, ENOMEM, 109 "Can't allocate ar data"); 110 return (ARCHIVE_FATAL); 111 } 112 ar->strtab = NULL; 113 114 r = __archive_read_register_format(a, 115 ar, 116 "ar", 117 archive_read_format_ar_bid, 118 NULL, 119 archive_read_format_ar_read_header, 120 archive_read_format_ar_read_data, 121 archive_read_format_ar_skip, 122 NULL, 123 archive_read_format_ar_cleanup, 124 NULL, 125 NULL); 126 127 if (r != ARCHIVE_OK) { 128 free(ar); 129 return (r); 130 } 131 return (ARCHIVE_OK); 132} 133 134static int 135archive_read_format_ar_cleanup(struct archive_read *a) 136{ 137 struct ar *ar; 138 139 ar = (struct ar *)(a->format->data); 140 free(ar->strtab); 141 free(ar); 142 (a->format->data) = NULL; 143 return (ARCHIVE_OK); 144} 145 146static int 147archive_read_format_ar_bid(struct archive_read *a, int best_bid) 148{ 149 const void *h; 150 151 (void)best_bid; /* UNUSED */ 152 153 /* 154 * Verify the 8-byte file signature. 155 * TODO: Do we need to check more than this? 156 */ 157 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL) 158 return (-1); 159 if (memcmp(h, "!<arch>\n", 8) == 0) { 160 return (64); 161 } 162 return (-1); 163} 164 165static int 166_ar_read_header(struct archive_read *a, struct archive_entry *entry, 167 struct ar *ar, const char *h, size_t *unconsumed) 168{ 169 char filename[AR_name_size + 1]; 170 uint64_t number; /* Used to hold parsed numbers before validation. */ 171 size_t bsd_name_length, entry_size; 172 char *p, *st; 173 const void *b; 174 int r; 175 176 /* Verify the magic signature on the file header. */ 177 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) { 178 archive_set_error(&a->archive, EINVAL, 179 "Incorrect file header signature"); 180 return (ARCHIVE_FATAL); 181 } 182 183 /* Copy filename into work buffer. */ 184 strncpy(filename, h + AR_name_offset, AR_name_size); 185 filename[AR_name_size] = '\0'; 186 187 /* 188 * Guess the format variant based on the filename. 189 */ 190 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) { 191 /* We don't already know the variant, so let's guess. */ 192 /* 193 * Biggest clue is presence of '/': GNU starts special 194 * filenames with '/', appends '/' as terminator to 195 * non-special names, so anything with '/' should be 196 * GNU except for BSD long filenames. 197 */ 198 if (strncmp(filename, "#1/", 3) == 0) 199 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 200 else if (strchr(filename, '/') != NULL) 201 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU; 202 else if (strncmp(filename, "__.SYMDEF", 9) == 0) 203 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 204 /* 205 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/' 206 * if name exactly fills 16-byte field? If so, we 207 * can't assume entries without '/' are BSD. XXX 208 */ 209 } 210 211 /* Update format name from the code. */ 212 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU) 213 a->archive.archive_format_name = "ar (GNU/SVR4)"; 214 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD) 215 a->archive.archive_format_name = "ar (BSD)"; 216 else 217 a->archive.archive_format_name = "ar"; 218 219 /* 220 * Remove trailing spaces from the filename. GNU and BSD 221 * variants both pad filename area out with spaces. 222 * This will only be wrong if GNU/SVR4 'ar' implementations 223 * omit trailing '/' for 16-char filenames and we have 224 * a 16-char filename that ends in ' '. 225 */ 226 p = filename + AR_name_size - 1; 227 while (p >= filename && *p == ' ') { 228 *p = '\0'; 229 p--; 230 } 231 232 /* 233 * Remove trailing slash unless first character is '/'. 234 * (BSD entries never end in '/', so this will only trim 235 * GNU-format entries. GNU special entries start with '/' 236 * and are not terminated in '/', so we don't trim anything 237 * that starts with '/'.) 238 */ 239 if (filename[0] != '/' && p > filename && *p == '/') { 240 *p = '\0'; 241 } 242 243 if (p < filename) { 244 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 245 "Found entry with empty filename"); 246 return (ARCHIVE_FATAL); 247 } 248 249 /* 250 * '//' is the GNU filename table. 251 * Later entries can refer to names in this table. 252 */ 253 if (strcmp(filename, "//") == 0) { 254 /* This must come before any call to _read_ahead. */ 255 ar_parse_common_header(ar, entry, h); 256 archive_entry_copy_pathname(entry, filename); 257 archive_entry_set_filetype(entry, AE_IFREG); 258 /* Get the size of the filename table. */ 259 number = ar_atol10(h + AR_size_offset, AR_size_size); 260 if (number > SIZE_MAX || number > 1024 * 1024 * 1024) { 261 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 262 "Filename table too large"); 263 return (ARCHIVE_FATAL); 264 } 265 entry_size = (size_t)number; 266 if (entry_size == 0) { 267 archive_set_error(&a->archive, EINVAL, 268 "Invalid string table"); 269 return (ARCHIVE_FATAL); 270 } 271 if (ar->strtab != NULL) { 272 archive_set_error(&a->archive, EINVAL, 273 "More than one string table exists"); 274 return (ARCHIVE_FATAL); 275 } 276 277 /* Read the filename table into memory. */ 278 st = malloc(entry_size); 279 if (st == NULL) { 280 archive_set_error(&a->archive, ENOMEM, 281 "Can't allocate filename table buffer"); 282 return (ARCHIVE_FATAL); 283 } 284 ar->strtab = st; 285 ar->strtab_size = entry_size; 286 287 if (*unconsumed) { 288 __archive_read_consume(a, *unconsumed); 289 *unconsumed = 0; 290 } 291 292 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL) 293 return (ARCHIVE_FATAL); 294 memcpy(st, b, entry_size); 295 __archive_read_consume(a, entry_size); 296 /* All contents are consumed. */ 297 ar->entry_bytes_remaining = 0; 298 archive_entry_set_size(entry, ar->entry_bytes_remaining); 299 300 /* Parse the filename table. */ 301 return (ar_parse_gnu_filename_table(a)); 302 } 303 304 /* 305 * GNU variant handles long filenames by storing /<number> 306 * to indicate a name stored in the filename table. 307 * XXX TODO: Verify that it's all digits... Don't be fooled 308 * by "/9xyz" XXX 309 */ 310 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') { 311 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1); 312 /* 313 * If we can't look up the real name, warn and return 314 * the entry with the wrong name. 315 */ 316 if (ar->strtab == NULL || number >= ar->strtab_size) { 317 archive_set_error(&a->archive, EINVAL, 318 "Can't find long filename for GNU/SVR4 archive entry"); 319 archive_entry_copy_pathname(entry, filename); 320 /* Parse the time, owner, mode, size fields. */ 321 ar_parse_common_header(ar, entry, h); 322 return (ARCHIVE_FATAL); 323 } 324 325 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]); 326 /* Parse the time, owner, mode, size fields. */ 327 return (ar_parse_common_header(ar, entry, h)); 328 } 329 330 /* 331 * BSD handles long filenames by storing "#1/" followed by the 332 * length of filename as a decimal number, then prepends the 333 * the filename to the file contents. 334 */ 335 if (strncmp(filename, "#1/", 3) == 0) { 336 /* Parse the time, owner, mode, size fields. */ 337 /* This must occur before _read_ahead is called again. */ 338 ar_parse_common_header(ar, entry, h); 339 340 /* Parse the size of the name, adjust the file size. */ 341 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3); 342 /* Sanity check the filename length: 343 * = Must be <= SIZE_MAX - 1 344 * = Must be <= 1MB 345 * = Cannot be bigger than the entire entry 346 */ 347 if (number > SIZE_MAX - 1 348 || number > 1024 * 1024 349 || (int64_t)number > ar->entry_bytes_remaining) { 350 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 351 "Bad input file size"); 352 return (ARCHIVE_FATAL); 353 } 354 bsd_name_length = (size_t)number; 355 ar->entry_bytes_remaining -= bsd_name_length; 356 /* Adjust file size reported to client. */ 357 archive_entry_set_size(entry, ar->entry_bytes_remaining); 358 359 if (*unconsumed) { 360 __archive_read_consume(a, *unconsumed); 361 *unconsumed = 0; 362 } 363 364 /* Read the long name into memory. */ 365 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) { 366 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 367 "Truncated input file"); 368 return (ARCHIVE_FATAL); 369 } 370 /* Store it in the entry. */ 371 p = (char *)malloc(bsd_name_length + 1); 372 if (p == NULL) { 373 archive_set_error(&a->archive, ENOMEM, 374 "Can't allocate fname buffer"); 375 return (ARCHIVE_FATAL); 376 } 377 strncpy(p, b, bsd_name_length); 378 p[bsd_name_length] = '\0'; 379 380 __archive_read_consume(a, bsd_name_length); 381 382 archive_entry_copy_pathname(entry, p); 383 free(p); 384 return (ARCHIVE_OK); 385 } 386 387 /* 388 * "/" is the SVR4/GNU archive symbol table. 389 * "/SYM64/" is the SVR4/GNU 64-bit variant archive symbol table. 390 */ 391 if (strcmp(filename, "/") == 0 || strcmp(filename, "/SYM64/") == 0) { 392 archive_entry_copy_pathname(entry, filename); 393 /* Parse the time, owner, mode, size fields. */ 394 r = ar_parse_common_header(ar, entry, h); 395 /* Force the file type to a regular file. */ 396 archive_entry_set_filetype(entry, AE_IFREG); 397 return (r); 398 } 399 400 /* 401 * "__.SYMDEF" is a BSD archive symbol table. 402 */ 403 if (strcmp(filename, "__.SYMDEF") == 0) { 404 archive_entry_copy_pathname(entry, filename); 405 /* Parse the time, owner, mode, size fields. */ 406 return (ar_parse_common_header(ar, entry, h)); 407 } 408 409 /* 410 * Otherwise, this is a standard entry. The filename 411 * has already been trimmed as much as possible, based 412 * on our current knowledge of the format. 413 */ 414 archive_entry_copy_pathname(entry, filename); 415 return (ar_parse_common_header(ar, entry, h)); 416} 417 418static int 419archive_read_format_ar_read_header(struct archive_read *a, 420 struct archive_entry *entry) 421{ 422 struct ar *ar = (struct ar*)(a->format->data); 423 size_t unconsumed; 424 const void *header_data; 425 int ret; 426 427 if (!ar->read_global_header) { 428 /* 429 * We are now at the beginning of the archive, 430 * so we need first consume the ar global header. 431 */ 432 __archive_read_consume(a, 8); 433 ar->read_global_header = 1; 434 /* Set a default format code for now. */ 435 a->archive.archive_format = ARCHIVE_FORMAT_AR; 436 } 437 438 /* Read the header for the next file entry. */ 439 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL) 440 /* Broken header. */ 441 return (ARCHIVE_EOF); 442 443 unconsumed = 60; 444 445 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed); 446 447 if (unconsumed) 448 __archive_read_consume(a, unconsumed); 449 450 return ret; 451} 452 453 454static int 455ar_parse_common_header(struct ar *ar, struct archive_entry *entry, 456 const char *h) 457{ 458 uint64_t n; 459 460 /* Copy remaining header */ 461 archive_entry_set_filetype(entry, AE_IFREG); 462 archive_entry_set_mtime(entry, 463 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L); 464 archive_entry_set_uid(entry, 465 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size)); 466 archive_entry_set_gid(entry, 467 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size)); 468 archive_entry_set_mode(entry, 469 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size)); 470 n = ar_atol10(h + AR_size_offset, AR_size_size); 471 472 ar->entry_offset = 0; 473 ar->entry_padding = n % 2; 474 archive_entry_set_size(entry, n); 475 ar->entry_bytes_remaining = n; 476 return (ARCHIVE_OK); 477} 478 479static int 480archive_read_format_ar_read_data(struct archive_read *a, 481 const void **buff, size_t *size, int64_t *offset) 482{ 483 ssize_t bytes_read; 484 struct ar *ar; 485 486 ar = (struct ar *)(a->format->data); 487 488 if (ar->entry_bytes_unconsumed) { 489 __archive_read_consume(a, ar->entry_bytes_unconsumed); 490 ar->entry_bytes_unconsumed = 0; 491 } 492 493 if (ar->entry_bytes_remaining > 0) { 494 *buff = __archive_read_ahead(a, 1, &bytes_read); 495 if (bytes_read == 0) { 496 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 497 "Truncated ar archive"); 498 return (ARCHIVE_FATAL); 499 } 500 if (bytes_read < 0) 501 return (ARCHIVE_FATAL); 502 if (bytes_read > ar->entry_bytes_remaining) 503 bytes_read = (ssize_t)ar->entry_bytes_remaining; 504 *size = bytes_read; 505 ar->entry_bytes_unconsumed = bytes_read; 506 *offset = ar->entry_offset; 507 ar->entry_offset += bytes_read; 508 ar->entry_bytes_remaining -= bytes_read; 509 return (ARCHIVE_OK); 510 } else { 511 int64_t skipped = __archive_read_consume(a, ar->entry_padding); 512 if (skipped >= 0) { 513 ar->entry_padding -= skipped; 514 } 515 if (ar->entry_padding) { 516 if (skipped >= 0) { 517 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 518 "Truncated ar archive - failed consuming padding"); 519 } 520 return (ARCHIVE_FATAL); 521 } 522 *buff = NULL; 523 *size = 0; 524 *offset = ar->entry_offset; 525 return (ARCHIVE_EOF); 526 } 527} 528 529static int 530archive_read_format_ar_skip(struct archive_read *a) 531{ 532 int64_t bytes_skipped; 533 struct ar* ar; 534 535 ar = (struct ar *)(a->format->data); 536 537 bytes_skipped = __archive_read_consume(a, 538 ar->entry_bytes_remaining + ar->entry_padding 539 + ar->entry_bytes_unconsumed); 540 if (bytes_skipped < 0) 541 return (ARCHIVE_FATAL); 542 543 ar->entry_bytes_remaining = 0; 544 ar->entry_bytes_unconsumed = 0; 545 ar->entry_padding = 0; 546 547 return (ARCHIVE_OK); 548} 549 550static int 551ar_parse_gnu_filename_table(struct archive_read *a) 552{ 553 struct ar *ar; 554 char *p; 555 size_t size; 556 557 ar = (struct ar*)(a->format->data); 558 size = ar->strtab_size; 559 560 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) { 561 if (*p == '/') { 562 *p++ = '\0'; 563 if (*p != '\n') 564 goto bad_string_table; 565 *p = '\0'; 566 } 567 } 568 /* 569 * GNU ar always pads the table to an even size. 570 * The pad character is either '\n' or '`'. 571 */ 572 if (p != ar->strtab + size && *p != '\n' && *p != '`') 573 goto bad_string_table; 574 575 /* Enforce zero termination. */ 576 ar->strtab[size - 1] = '\0'; 577 578 return (ARCHIVE_OK); 579 580bad_string_table: 581 archive_set_error(&a->archive, EINVAL, 582 "Invalid string table"); 583 free(ar->strtab); 584 ar->strtab = NULL; 585 return (ARCHIVE_FATAL); 586} 587 588static uint64_t 589ar_atol8(const char *p, unsigned char_cnt) 590{ 591 uint64_t l, limit, last_digit_limit; 592 unsigned int digit, base; 593 594 base = 8; 595 limit = UINT64_MAX / base; 596 last_digit_limit = UINT64_MAX % base; 597 598 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 599 p++; 600 601 l = 0; 602 digit = *p - '0'; 603 while (*p >= '0' && digit < base && char_cnt-- > 0) { 604 if (l>limit || (l == limit && digit > last_digit_limit)) { 605 l = UINT64_MAX; /* Truncate on overflow. */ 606 break; 607 } 608 l = (l * base) + digit; 609 digit = *++p - '0'; 610 } 611 return (l); 612} 613 614static uint64_t 615ar_atol10(const char *p, unsigned char_cnt) 616{ 617 uint64_t l, limit, last_digit_limit; 618 unsigned int base, digit; 619 620 base = 10; 621 limit = UINT64_MAX / base; 622 last_digit_limit = UINT64_MAX % base; 623 624 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 625 p++; 626 l = 0; 627 digit = *p - '0'; 628 while (*p >= '0' && digit < base && char_cnt-- > 0) { 629 if (l > limit || (l == limit && digit > last_digit_limit)) { 630 l = UINT64_MAX; /* Truncate on overflow. */ 631 break; 632 } 633 l = (l * base) + digit; 634 digit = *++p - '0'; 635 } 636 return (l); 637} 638