1228753Smm/*- 2228753Smm * Copyright (c) 2004 Tim Kientzle 3232153Smm * Copyright (c) 2011-2012 Michihiro NAKAJIMA 4228753Smm * All rights reserved. 5228753Smm * 6228753Smm * Redistribution and use in source and binary forms, with or without 7228753Smm * modification, are permitted provided that the following conditions 8228753Smm * are met: 9228753Smm * 1. Redistributions of source code must retain the above copyright 10228753Smm * notice, this list of conditions and the following disclaimer. 11228753Smm * 2. Redistributions in binary form must reproduce the above copyright 12228753Smm * notice, this list of conditions and the following disclaimer in the 13228753Smm * documentation and/or other materials provided with the distribution. 14228753Smm * 15228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25228753Smm */ 26228753Smm 27228753Smm#include "archive_platform.h" 28228763Smm__FBSDID("$FreeBSD$"); 29228753Smm 30228753Smm#ifdef HAVE_ERRNO_H 31228753Smm#include <errno.h> 32228753Smm#endif 33228753Smm#ifdef HAVE_STDLIB_H 34228753Smm#include <stdlib.h> 35228753Smm#endif 36228753Smm#ifdef HAVE_ZLIB_H 37228753Smm#include <zlib.h> 38228753Smm#endif 39228753Smm 40228753Smm#include "archive.h" 41238856Smm#include "archive_endian.h" 42228753Smm#include "archive_entry.h" 43232153Smm#include "archive_entry_locale.h" 44228753Smm#include "archive_private.h" 45238856Smm#include "archive_rb.h" 46228753Smm#include "archive_read_private.h" 47228753Smm 48228753Smm#ifndef HAVE_ZLIB_H 49228753Smm#include "archive_crc32.h" 50228753Smm#endif 51228753Smm 52232153Smmstruct zip_entry { 53238856Smm struct archive_rb_node node; 54232153Smm int64_t local_header_offset; 55232153Smm int64_t compressed_size; 56232153Smm int64_t uncompressed_size; 57232153Smm int64_t gid; 58232153Smm int64_t uid; 59232153Smm struct archive_entry *entry; 60248616Smm struct archive_string rsrcname; 61232153Smm time_t mtime; 62232153Smm time_t atime; 63232153Smm time_t ctime; 64232153Smm uint32_t crc32; 65232153Smm uint16_t mode; 66232153Smm uint16_t flags; 67232153Smm char compression; 68232153Smm char system; 69232153Smm}; 70232153Smm 71228753Smmstruct zip { 72232153Smm /* Structural information about the archive. */ 73248616Smm int64_t end_of_central_directory_offset; 74232153Smm int64_t central_directory_offset; 75232153Smm size_t central_directory_size; 76232153Smm size_t central_directory_entries; 77232153Smm char have_central_directory; 78238856Smm int64_t offset; 79232153Smm 80232153Smm /* List of entries (seekable Zip only) */ 81232153Smm size_t entries_remaining; 82232153Smm struct zip_entry *zip_entries; 83232153Smm struct zip_entry *entry; 84238856Smm struct archive_rb_tree tree; 85248616Smm struct archive_rb_tree tree_rsrc; 86232153Smm 87232153Smm size_t unconsumed; 88232153Smm 89228753Smm /* entry_bytes_remaining is the number of bytes we expect. */ 90228753Smm int64_t entry_bytes_remaining; 91228753Smm 92228753Smm /* These count the number of bytes actually read for the entry. */ 93228753Smm int64_t entry_compressed_bytes_read; 94228753Smm int64_t entry_uncompressed_bytes_read; 95228753Smm 96228753Smm /* Running CRC32 of the decompressed data */ 97228753Smm unsigned long entry_crc32; 98228753Smm 99228753Smm /* Flags to mark progress of decompression. */ 100228753Smm char decompress_init; 101228753Smm char end_of_entry; 102228753Smm 103228753Smm ssize_t filename_length; 104228753Smm ssize_t extra_length; 105228753Smm 106228753Smm unsigned char *uncompressed_buffer; 107228753Smm size_t uncompressed_buffer_size; 108228753Smm#ifdef HAVE_ZLIB_H 109228753Smm z_stream stream; 110228753Smm char stream_valid; 111228753Smm#endif 112228753Smm 113228753Smm struct archive_string extra; 114232153Smm struct archive_string_conv *sconv; 115232153Smm struct archive_string_conv *sconv_default; 116232153Smm struct archive_string_conv *sconv_utf8; 117232153Smm int init_default_conversion; 118228753Smm char format_name[64]; 119228753Smm}; 120228753Smm 121228753Smm#define ZIP_LENGTH_AT_END 8 122232153Smm#define ZIP_ENCRYPTED (1<<0) 123232153Smm#define ZIP_STRONG_ENCRYPTED (1<<6) 124232153Smm#define ZIP_UTF8_NAME (1<<11) 125228753Smm 126248616Smmstatic int archive_read_format_zip_streamable_bid(struct archive_read *, 127248616Smm int); 128248616Smmstatic int archive_read_format_zip_seekable_bid(struct archive_read *, 129248616Smm int); 130232153Smmstatic int archive_read_format_zip_options(struct archive_read *, 131232153Smm const char *, const char *); 132228753Smmstatic int archive_read_format_zip_cleanup(struct archive_read *); 133228753Smmstatic int archive_read_format_zip_read_data(struct archive_read *, 134232153Smm const void **, size_t *, int64_t *); 135228753Smmstatic int archive_read_format_zip_read_data_skip(struct archive_read *a); 136248616Smmstatic int archive_read_format_zip_seekable_read_header( 137248616Smm struct archive_read *, struct archive_entry *); 138248616Smmstatic int archive_read_format_zip_streamable_read_header( 139248616Smm struct archive_read *, struct archive_entry *); 140248616Smmstatic ssize_t zip_get_local_file_header_size(struct archive_read *, size_t); 141232153Smm#ifdef HAVE_ZLIB_H 142248616Smmstatic int zip_deflate_init(struct archive_read *, struct zip *); 143228753Smmstatic int zip_read_data_deflate(struct archive_read *a, const void **buff, 144232153Smm size_t *size, int64_t *offset); 145232153Smm#endif 146228753Smmstatic int zip_read_data_none(struct archive_read *a, const void **buff, 147232153Smm size_t *size, int64_t *offset); 148232153Smmstatic int zip_read_local_file_header(struct archive_read *a, 149248616Smm struct archive_entry *entry, struct zip *); 150228753Smmstatic time_t zip_time(const char *); 151232153Smmstatic const char *compression_name(int compression); 152248616Smmstatic void process_extra(const char *, size_t, struct zip_entry *); 153228753Smm 154232153Smmint archive_read_support_format_zip_streamable(struct archive *); 155232153Smmint archive_read_support_format_zip_seekable(struct archive *); 156232153Smm 157228753Smmint 158232153Smmarchive_read_support_format_zip_streamable(struct archive *_a) 159228753Smm{ 160228753Smm struct archive_read *a = (struct archive_read *)_a; 161228753Smm struct zip *zip; 162228753Smm int r; 163228753Smm 164232153Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 165232153Smm ARCHIVE_STATE_NEW, "archive_read_support_format_zip"); 166232153Smm 167228753Smm zip = (struct zip *)malloc(sizeof(*zip)); 168228753Smm if (zip == NULL) { 169232153Smm archive_set_error(&a->archive, ENOMEM, 170232153Smm "Can't allocate zip data"); 171228753Smm return (ARCHIVE_FATAL); 172228753Smm } 173228753Smm memset(zip, 0, sizeof(*zip)); 174228753Smm 175228753Smm r = __archive_read_register_format(a, 176228753Smm zip, 177228753Smm "zip", 178232153Smm archive_read_format_zip_streamable_bid, 179232153Smm archive_read_format_zip_options, 180232153Smm archive_read_format_zip_streamable_read_header, 181228753Smm archive_read_format_zip_read_data, 182228753Smm archive_read_format_zip_read_data_skip, 183248616Smm NULL, 184228753Smm archive_read_format_zip_cleanup); 185228753Smm 186228753Smm if (r != ARCHIVE_OK) 187228753Smm free(zip); 188228753Smm return (ARCHIVE_OK); 189228753Smm} 190228753Smm 191232153Smmint 192232153Smmarchive_read_support_format_zip_seekable(struct archive *_a) 193232153Smm{ 194232153Smm struct archive_read *a = (struct archive_read *)_a; 195232153Smm struct zip *zip; 196232153Smm int r; 197228753Smm 198232153Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 199232153Smm ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable"); 200232153Smm 201232153Smm zip = (struct zip *)malloc(sizeof(*zip)); 202232153Smm if (zip == NULL) { 203232153Smm archive_set_error(&a->archive, ENOMEM, 204232153Smm "Can't allocate zip data"); 205232153Smm return (ARCHIVE_FATAL); 206232153Smm } 207232153Smm memset(zip, 0, sizeof(*zip)); 208232153Smm 209232153Smm r = __archive_read_register_format(a, 210232153Smm zip, 211232153Smm "zip", 212232153Smm archive_read_format_zip_seekable_bid, 213232153Smm archive_read_format_zip_options, 214232153Smm archive_read_format_zip_seekable_read_header, 215232153Smm archive_read_format_zip_read_data, 216232153Smm archive_read_format_zip_read_data_skip, 217248616Smm NULL, 218232153Smm archive_read_format_zip_cleanup); 219232153Smm 220232153Smm if (r != ARCHIVE_OK) 221232153Smm free(zip); 222232153Smm return (ARCHIVE_OK); 223232153Smm} 224232153Smm 225232153Smmint 226232153Smmarchive_read_support_format_zip(struct archive *a) 227232153Smm{ 228232153Smm int r; 229232153Smm r = archive_read_support_format_zip_streamable(a); 230232153Smm if (r != ARCHIVE_OK) 231232153Smm return r; 232232153Smm return (archive_read_support_format_zip_seekable(a)); 233232153Smm} 234232153Smm 235232153Smm/* 236232153Smm * TODO: This is a performance sink because it forces the read core to 237232153Smm * drop buffered data from the start of file, which will then have to 238232153Smm * be re-read again if this bidder loses. 239232153Smm * 240232153Smm * We workaround this a little by passing in the best bid so far so 241232153Smm * that later bidders can do nothing if they know they'll never 242232153Smm * outbid. But we can certainly do better... 243232153Smm */ 244228753Smmstatic int 245232153Smmarchive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) 246228753Smm{ 247232153Smm struct zip *zip = (struct zip *)a->format->data; 248232153Smm int64_t filesize; 249228753Smm const char *p; 250228753Smm 251232153Smm /* If someone has already bid more than 32, then avoid 252232153Smm trashing the look-ahead buffers with a seek. */ 253232153Smm if (best_bid > 32) 254232153Smm return (-1); 255232153Smm 256232153Smm filesize = __archive_read_seek(a, -22, SEEK_END); 257232153Smm /* If we can't seek, then we can't bid. */ 258232153Smm if (filesize <= 0) 259232153Smm return 0; 260232153Smm 261232153Smm /* TODO: More robust search for end of central directory record. */ 262232153Smm if ((p = __archive_read_ahead(a, 22, NULL)) == NULL) 263232153Smm return 0; 264232153Smm /* First four bytes are signature for end of central directory 265232153Smm record. Four zero bytes ensure this isn't a multi-volume 266232153Smm Zip file (which we don't yet support). */ 267248616Smm if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) { 268248616Smm int64_t i, tail; 269248616Smm int found; 270232153Smm 271248616Smm /* 272248616Smm * If there is a comment in end of central directory 273248616Smm * record, 22 bytes are too short. we have to read more 274248616Smm * to properly detect the record. Hopefully, a length 275248616Smm * of the comment is not longer than 16362 bytes(16K-22). 276248616Smm */ 277248616Smm if (filesize + 22 > 1024 * 16) { 278248616Smm tail = 1024 * 16; 279248616Smm filesize = __archive_read_seek(a, tail * -1, SEEK_END); 280248616Smm } else { 281248616Smm tail = filesize + 22; 282248616Smm filesize = __archive_read_seek(a, 0, SEEK_SET); 283248616Smm } 284248616Smm if (filesize < 0) 285248616Smm return 0; 286248616Smm if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL) 287248616Smm return 0; 288248616Smm for (found = 0, i = 0;!found && i < tail - 22;) { 289248616Smm switch (p[i]) { 290248616Smm case 'P': 291248616Smm if (memcmp(p+i, 292248616Smm "PK\005\006\000\000\000\000", 8) == 0) { 293248616Smm p += i; 294248616Smm filesize += tail - 295248616Smm (22 + archive_le16dec(p+20)); 296248616Smm found = 1; 297248616Smm } else 298248616Smm i += 8; 299248616Smm break; 300248616Smm case 'K': i += 7; break; 301248616Smm case 005: i += 6; break; 302248616Smm case 006: i += 5; break; 303248616Smm default: i += 1; break; 304248616Smm } 305248616Smm } 306248616Smm if (!found) 307248616Smm return 0; 308248616Smm } 309248616Smm 310232153Smm /* Since we've already done the hard work of finding the 311232153Smm end of central directory record, let's save the important 312232153Smm information. */ 313232153Smm zip->central_directory_entries = archive_le16dec(p + 10); 314232153Smm zip->central_directory_size = archive_le32dec(p + 12); 315232153Smm zip->central_directory_offset = archive_le32dec(p + 16); 316248616Smm zip->end_of_central_directory_offset = filesize; 317232153Smm 318232153Smm /* Just one volume, so central dir must all be on this volume. */ 319232153Smm if (zip->central_directory_entries != archive_le16dec(p + 8)) 320232153Smm return 0; 321232153Smm /* Central directory can't extend beyond end of this file. */ 322248616Smm if (zip->central_directory_offset + 323248616Smm (int64_t)zip->central_directory_size > filesize) 324232153Smm return 0; 325232153Smm 326232153Smm /* This is just a tiny bit higher than the maximum returned by 327232153Smm the streaming Zip bidder. This ensures that the more accurate 328232153Smm seeking Zip parser wins whenever seek is available. */ 329232153Smm return 32; 330232153Smm} 331232153Smm 332232153Smmstatic int 333238856Smmcmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) 334238856Smm{ 335238856Smm const struct zip_entry *e1 = (const struct zip_entry *)n1; 336238856Smm const struct zip_entry *e2 = (const struct zip_entry *)n2; 337238856Smm 338238856Smm return ((int)(e2->local_header_offset - e1->local_header_offset)); 339238856Smm} 340238856Smm 341238856Smmstatic int 342238856Smmcmp_key(const struct archive_rb_node *n, const void *key) 343238856Smm{ 344238856Smm /* This function won't be called */ 345238856Smm (void)n; /* UNUSED */ 346238856Smm (void)key; /* UNUSED */ 347238856Smm return 1; 348238856Smm} 349238856Smm 350238856Smmstatic int 351248616Smmrsrc_cmp_node(const struct archive_rb_node *n1, 352248616Smm const struct archive_rb_node *n2) 353248616Smm{ 354248616Smm const struct zip_entry *e1 = (const struct zip_entry *)n1; 355248616Smm const struct zip_entry *e2 = (const struct zip_entry *)n2; 356248616Smm 357248616Smm return (strcmp(e2->rsrcname.s, e1->rsrcname.s)); 358248616Smm} 359248616Smm 360248616Smmstatic int 361248616Smmrsrc_cmp_key(const struct archive_rb_node *n, const void *key) 362248616Smm{ 363248616Smm const struct zip_entry *e = (const struct zip_entry *)n; 364248616Smm return (strcmp((const char *)key, e->rsrcname.s)); 365248616Smm} 366248616Smm 367248616Smmstatic const char * 368248616Smmrsrc_basename(const char *name, size_t name_length) 369248616Smm{ 370248616Smm const char *s, *r; 371248616Smm 372248616Smm r = s = name; 373248616Smm for (;;) { 374248616Smm s = memchr(s, '/', name_length - (s - name)); 375248616Smm if (s == NULL) 376248616Smm break; 377248616Smm r = ++s; 378248616Smm } 379248616Smm return (r); 380248616Smm} 381248616Smm 382248616Smmstatic void 383248616Smmexpose_parent_dirs(struct zip *zip, const char *name, size_t name_length) 384248616Smm{ 385248616Smm struct archive_string str; 386248616Smm struct zip_entry *dir; 387248616Smm char *s; 388248616Smm 389248616Smm archive_string_init(&str); 390248616Smm archive_strncpy(&str, name, name_length); 391248616Smm for (;;) { 392248616Smm s = strrchr(str.s, '/'); 393248616Smm if (s == NULL) 394248616Smm break; 395248616Smm *s = '\0'; 396248616Smm /* Transfer the parent directory from zip->tree_rsrc RB 397248616Smm * tree to zip->tree RB tree to expose. */ 398248616Smm dir = (struct zip_entry *) 399248616Smm __archive_rb_tree_find_node(&zip->tree_rsrc, str.s); 400248616Smm if (dir == NULL) 401248616Smm break; 402248616Smm __archive_rb_tree_remove_node(&zip->tree_rsrc, &dir->node); 403248616Smm archive_string_free(&dir->rsrcname); 404248616Smm __archive_rb_tree_insert_node(&zip->tree, &dir->node); 405248616Smm } 406248616Smm archive_string_free(&str); 407248616Smm} 408248616Smm 409248616Smmstatic int 410232153Smmslurp_central_directory(struct archive_read *a, struct zip *zip) 411232153Smm{ 412232153Smm unsigned i; 413248616Smm int64_t correction; 414238856Smm static const struct archive_rb_tree_ops rb_ops = { 415238856Smm &cmp_node, &cmp_key 416238856Smm }; 417248616Smm static const struct archive_rb_tree_ops rb_rsrc_ops = { 418248616Smm &rsrc_cmp_node, &rsrc_cmp_key 419248616Smm }; 420232153Smm 421248616Smm /* 422248616Smm * Consider the archive file we are reading may be SFX. 423248616Smm * So we have to calculate a SFX header size to revise 424248616Smm * ZIP header offsets. 425248616Smm */ 426248616Smm correction = zip->end_of_central_directory_offset - 427248616Smm (zip->central_directory_offset + zip->central_directory_size); 428248616Smm /* The central directory offset is relative value, and so 429248616Smm * we revise this offset for SFX. */ 430248616Smm zip->central_directory_offset += correction; 431248616Smm 432232153Smm __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); 433238856Smm zip->offset = zip->central_directory_offset; 434238856Smm __archive_rb_tree_init(&zip->tree, &rb_ops); 435248616Smm __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops); 436232153Smm 437238856Smm zip->zip_entries = calloc(zip->central_directory_entries, 438238856Smm sizeof(struct zip_entry)); 439232153Smm for (i = 0; i < zip->central_directory_entries; ++i) { 440232153Smm struct zip_entry *zip_entry = &zip->zip_entries[i]; 441232153Smm size_t filename_length, extra_length, comment_length; 442232153Smm uint32_t external_attributes; 443248616Smm const char *name, *p, *r; 444232153Smm 445232153Smm if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) 446232153Smm return ARCHIVE_FATAL; 447232153Smm if (memcmp(p, "PK\001\002", 4) != 0) { 448232153Smm archive_set_error(&a->archive, 449232153Smm -1, "Invalid central directory signature"); 450232153Smm return ARCHIVE_FATAL; 451232153Smm } 452232153Smm zip->have_central_directory = 1; 453232153Smm /* version = p[4]; */ 454232153Smm zip_entry->system = p[5]; 455232153Smm /* version_required = archive_le16dec(p + 6); */ 456232153Smm zip_entry->flags = archive_le16dec(p + 8); 457238856Smm zip_entry->compression = (char)archive_le16dec(p + 10); 458232153Smm zip_entry->mtime = zip_time(p + 12); 459232153Smm zip_entry->crc32 = archive_le32dec(p + 16); 460232153Smm zip_entry->compressed_size = archive_le32dec(p + 20); 461232153Smm zip_entry->uncompressed_size = archive_le32dec(p + 24); 462232153Smm filename_length = archive_le16dec(p + 28); 463232153Smm extra_length = archive_le16dec(p + 30); 464232153Smm comment_length = archive_le16dec(p + 32); 465232153Smm /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ 466232153Smm /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ 467232153Smm external_attributes = archive_le32dec(p + 38); 468248616Smm zip_entry->local_header_offset = 469248616Smm archive_le32dec(p + 42) + correction; 470232153Smm 471232153Smm /* If we can't guess the mode, leave it zero here; 472232153Smm when we read the local file header we might get 473232153Smm more information. */ 474232153Smm zip_entry->mode = 0; 475232153Smm if (zip_entry->system == 3) { 476232153Smm zip_entry->mode = external_attributes >> 16; 477232153Smm } 478232153Smm 479248616Smm /* 480248616Smm * Mac resource fork files are stored under the 481248616Smm * "__MACOSX/" directory, so we should check if 482248616Smm * it is. 483248616Smm */ 484248616Smm /* Make sure we have the file name. */ 485248616Smm if ((p = __archive_read_ahead(a, 46 + filename_length, NULL)) 486248616Smm == NULL) 487248616Smm return ARCHIVE_FATAL; 488248616Smm name = p + 46; 489248616Smm r = rsrc_basename(name, filename_length); 490248616Smm if (filename_length >= 9 && 491248616Smm strncmp("__MACOSX/", name, 9) == 0) { 492248616Smm /* If this file is not a resource fork nor 493248616Smm * a directory. We should treat it as a non 494248616Smm * resource fork file to expose it. */ 495248616Smm if (name[filename_length-1] != '/' && 496248616Smm (r - name < 3 || r[0] != '.' || r[1] != '_')) { 497248616Smm __archive_rb_tree_insert_node(&zip->tree, 498248616Smm &zip_entry->node); 499248616Smm /* Expose its parent directories. */ 500248616Smm expose_parent_dirs(zip, name, filename_length); 501248616Smm } else { 502248616Smm /* This file is a resource fork file or 503248616Smm * a directory. */ 504248616Smm archive_strncpy(&(zip_entry->rsrcname), name, 505248616Smm filename_length); 506248616Smm __archive_rb_tree_insert_node(&zip->tree_rsrc, 507248616Smm &zip_entry->node); 508248616Smm } 509248616Smm } else { 510248616Smm /* Generate resource fork name to find its resource 511248616Smm * file at zip->tree_rsrc. */ 512248616Smm archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); 513248616Smm archive_strncat(&(zip_entry->rsrcname), name, r - name); 514248616Smm archive_strcat(&(zip_entry->rsrcname), "._"); 515248616Smm archive_strncat(&(zip_entry->rsrcname), 516248616Smm name + (r - name), filename_length - (r - name)); 517248616Smm /* Register an entry to RB tree to sort it by 518248616Smm * file offset. */ 519248616Smm __archive_rb_tree_insert_node(&zip->tree, 520248616Smm &zip_entry->node); 521248616Smm } 522248616Smm 523232153Smm /* We don't read the filename until we get to the 524232153Smm local file header. Reading it here would speed up 525232153Smm table-of-contents operations (removing the need to 526232153Smm find and read local file header to get the 527232153Smm filename) at the cost of requiring a lot of extra 528232153Smm space. */ 529232153Smm /* We don't read the extra block here. We assume it 530232153Smm will be duplicated at the local file header. */ 531232153Smm __archive_read_consume(a, 532232153Smm 46 + filename_length + extra_length + comment_length); 533232153Smm } 534232153Smm 535232153Smm return ARCHIVE_OK; 536232153Smm} 537232153Smm 538238856Smmstatic int64_t 539238856Smmzip_read_consume(struct archive_read *a, int64_t bytes) 540238856Smm{ 541238856Smm struct zip *zip = (struct zip *)a->format->data; 542238856Smm int64_t skip; 543238856Smm 544238856Smm skip = __archive_read_consume(a, bytes); 545238856Smm if (skip > 0) 546238856Smm zip->offset += skip; 547238856Smm return (skip); 548238856Smm} 549238856Smm 550232153Smmstatic int 551248616Smmzip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry, 552248616Smm struct zip_entry *rsrc) 553248616Smm{ 554248616Smm struct zip *zip = (struct zip *)a->format->data; 555248616Smm unsigned char *metadata, *mp; 556248616Smm int64_t offset = zip->offset; 557248616Smm size_t remaining_bytes, metadata_bytes; 558248616Smm ssize_t hsize; 559248616Smm int ret = ARCHIVE_OK, eof; 560248616Smm 561248616Smm switch(rsrc->compression) { 562248616Smm case 0: /* No compression. */ 563248616Smm#ifdef HAVE_ZLIB_H 564248616Smm case 8: /* Deflate compression. */ 565248616Smm#endif 566248616Smm break; 567248616Smm default: /* Unsupported compression. */ 568248616Smm /* Return a warning. */ 569248616Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 570248616Smm "Unsupported ZIP compression method (%s)", 571248616Smm compression_name(rsrc->compression)); 572248616Smm /* We can't decompress this entry, but we will 573248616Smm * be able to skip() it and try the next entry. */ 574248616Smm return (ARCHIVE_WARN); 575248616Smm } 576248616Smm 577248616Smm if (rsrc->uncompressed_size > (128 * 1024)) { 578248616Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 579248616Smm "Mac metadata is too large: %jd > 128K bytes", 580248616Smm (intmax_t)rsrc->uncompressed_size); 581248616Smm return (ARCHIVE_WARN); 582248616Smm } 583248616Smm 584248616Smm metadata = malloc((size_t)rsrc->uncompressed_size); 585248616Smm if (metadata == NULL) { 586248616Smm archive_set_error(&a->archive, ENOMEM, 587248616Smm "Can't allocate memory for Mac metadata"); 588248616Smm return (ARCHIVE_FATAL); 589248616Smm } 590248616Smm 591248616Smm if (zip->offset < rsrc->local_header_offset) 592248616Smm zip_read_consume(a, rsrc->local_header_offset - zip->offset); 593248616Smm else if (zip->offset != rsrc->local_header_offset) { 594248616Smm __archive_read_seek(a, rsrc->local_header_offset, SEEK_SET); 595248616Smm zip->offset = zip->entry->local_header_offset; 596248616Smm } 597248616Smm 598248616Smm hsize = zip_get_local_file_header_size(a, 0); 599248616Smm zip_read_consume(a, hsize); 600248616Smm 601248616Smm remaining_bytes = (size_t)rsrc->compressed_size; 602248616Smm metadata_bytes = (size_t)rsrc->uncompressed_size; 603248616Smm mp = metadata; 604248616Smm eof = 0; 605248616Smm while (!eof && remaining_bytes) { 606248616Smm const unsigned char *p; 607248616Smm ssize_t bytes_avail; 608248616Smm size_t bytes_used; 609248616Smm 610248616Smm p = __archive_read_ahead(a, 1, &bytes_avail); 611248616Smm if (p == NULL) { 612248616Smm archive_set_error(&a->archive, 613248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 614248616Smm "Truncated ZIP file header"); 615248616Smm ret = ARCHIVE_WARN; 616248616Smm goto exit_mac_metadata; 617248616Smm } 618248616Smm if ((size_t)bytes_avail > remaining_bytes) 619248616Smm bytes_avail = remaining_bytes; 620248616Smm switch(rsrc->compression) { 621248616Smm case 0: /* No compression. */ 622248616Smm memcpy(mp, p, bytes_avail); 623248616Smm bytes_used = (size_t)bytes_avail; 624248616Smm metadata_bytes -= bytes_used; 625248616Smm mp += bytes_used; 626248616Smm if (metadata_bytes == 0) 627248616Smm eof = 1; 628248616Smm break; 629248616Smm#ifdef HAVE_ZLIB_H 630248616Smm case 8: /* Deflate compression. */ 631248616Smm { 632248616Smm int r; 633248616Smm 634248616Smm ret = zip_deflate_init(a, zip); 635248616Smm if (ret != ARCHIVE_OK) 636248616Smm goto exit_mac_metadata; 637248616Smm zip->stream.next_in = 638248616Smm (Bytef *)(uintptr_t)(const void *)p; 639248616Smm zip->stream.avail_in = (uInt)bytes_avail; 640248616Smm zip->stream.total_in = 0; 641248616Smm zip->stream.next_out = mp; 642248616Smm zip->stream.avail_out = (uInt)metadata_bytes; 643248616Smm zip->stream.total_out = 0; 644248616Smm 645248616Smm r = inflate(&zip->stream, 0); 646248616Smm switch (r) { 647248616Smm case Z_OK: 648248616Smm break; 649248616Smm case Z_STREAM_END: 650248616Smm eof = 1; 651248616Smm break; 652248616Smm case Z_MEM_ERROR: 653248616Smm archive_set_error(&a->archive, ENOMEM, 654248616Smm "Out of memory for ZIP decompression"); 655248616Smm ret = ARCHIVE_FATAL; 656248616Smm goto exit_mac_metadata; 657248616Smm default: 658248616Smm archive_set_error(&a->archive, 659248616Smm ARCHIVE_ERRNO_MISC, 660248616Smm "ZIP decompression failed (%d)", r); 661248616Smm ret = ARCHIVE_FATAL; 662248616Smm goto exit_mac_metadata; 663248616Smm } 664248616Smm bytes_used = zip->stream.total_in; 665248616Smm metadata_bytes -= zip->stream.total_out; 666248616Smm mp += zip->stream.total_out; 667248616Smm break; 668248616Smm } 669248616Smm#endif 670248616Smm default: 671248616Smm bytes_used = 0; 672248616Smm break; 673248616Smm } 674248616Smm zip_read_consume(a, bytes_used); 675248616Smm remaining_bytes -= bytes_used; 676248616Smm } 677248616Smm archive_entry_copy_mac_metadata(entry, metadata, 678248616Smm (size_t)rsrc->uncompressed_size - metadata_bytes); 679248616Smm 680248616Smm __archive_read_seek(a, offset, SEEK_SET); 681248616Smm zip->offset = offset; 682248616Smmexit_mac_metadata: 683248616Smm zip->decompress_init = 0; 684248616Smm free(metadata); 685248616Smm return (ret); 686248616Smm} 687248616Smm 688248616Smmstatic int 689232153Smmarchive_read_format_zip_seekable_read_header(struct archive_read *a, 690232153Smm struct archive_entry *entry) 691232153Smm{ 692232153Smm struct zip *zip = (struct zip *)a->format->data; 693248616Smm struct zip_entry *rsrc; 694238856Smm int r, ret = ARCHIVE_OK; 695232153Smm 696232153Smm a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 697232153Smm if (a->archive.archive_format_name == NULL) 698232153Smm a->archive.archive_format_name = "ZIP"; 699232153Smm 700232153Smm if (zip->zip_entries == NULL) { 701232153Smm r = slurp_central_directory(a, zip); 702232153Smm zip->entries_remaining = zip->central_directory_entries; 703232153Smm if (r != ARCHIVE_OK) 704232153Smm return r; 705238856Smm /* Get first entry whose local header offset is lower than 706238856Smm * other entries in the archive file. */ 707238856Smm zip->entry = 708238856Smm (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree); 709238856Smm } else if (zip->entry != NULL) { 710238856Smm /* Get next entry in local header offset order. */ 711238856Smm zip->entry = (struct zip_entry *)__archive_rb_tree_iterate( 712238856Smm &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT); 713232153Smm } 714232153Smm 715238856Smm if (zip->entries_remaining <= 0 || zip->entry == NULL) 716232153Smm return ARCHIVE_EOF; 717232153Smm --zip->entries_remaining; 718232153Smm 719248616Smm if (zip->entry->rsrcname.s) 720248616Smm rsrc = (struct zip_entry *)__archive_rb_tree_find_node( 721248616Smm &zip->tree_rsrc, zip->entry->rsrcname.s); 722248616Smm else 723248616Smm rsrc = NULL; 724248616Smm 725248616Smm /* File entries are sorted by the header offset, we should mostly 726248616Smm * use zip_read_consume to advance a read point to avoid redundant 727248616Smm * data reading. */ 728248616Smm if (zip->offset < zip->entry->local_header_offset) 729248616Smm zip_read_consume(a, 730248616Smm zip->entry->local_header_offset - zip->offset); 731248616Smm else if (zip->offset != zip->entry->local_header_offset) { 732238856Smm __archive_read_seek(a, zip->entry->local_header_offset, 733238856Smm SEEK_SET); 734238856Smm zip->offset = zip->entry->local_header_offset; 735238856Smm } 736232153Smm zip->unconsumed = 0; 737232153Smm r = zip_read_local_file_header(a, entry, zip); 738232153Smm if (r != ARCHIVE_OK) 739232153Smm return r; 740232153Smm if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { 741232153Smm const void *p; 742238856Smm struct archive_string_conv *sconv; 743238856Smm size_t linkname_length = (size_t)archive_entry_size(entry); 744232153Smm 745232153Smm archive_entry_set_size(entry, 0); 746232153Smm p = __archive_read_ahead(a, linkname_length, NULL); 747232153Smm if (p == NULL) { 748232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 749232153Smm "Truncated Zip file"); 750232153Smm return ARCHIVE_FATAL; 751232153Smm } 752232153Smm 753238856Smm sconv = zip->sconv; 754238856Smm if (sconv == NULL && (zip->entry->flags & ZIP_UTF8_NAME)) 755238856Smm sconv = zip->sconv_utf8; 756238856Smm if (sconv == NULL) 757238856Smm sconv = zip->sconv_default; 758232153Smm if (archive_entry_copy_symlink_l(entry, p, linkname_length, 759238856Smm sconv) != 0) { 760238856Smm if (errno != ENOMEM && sconv == zip->sconv_utf8 && 761238856Smm (zip->entry->flags & ZIP_UTF8_NAME)) 762238856Smm archive_entry_copy_symlink_l(entry, p, 763238856Smm linkname_length, NULL); 764238856Smm if (errno == ENOMEM) { 765238856Smm archive_set_error(&a->archive, ENOMEM, 766238856Smm "Can't allocate memory for Symlink"); 767238856Smm return (ARCHIVE_FATAL); 768238856Smm } 769238856Smm /* 770238856Smm * Since there is no character-set regulation for 771238856Smm * symlink name, do not report the conversion error 772238856Smm * in an automatic conversion. 773238856Smm */ 774238856Smm if (sconv != zip->sconv_utf8 || 775238856Smm (zip->entry->flags & ZIP_UTF8_NAME) == 0) { 776238856Smm archive_set_error(&a->archive, 777238856Smm ARCHIVE_ERRNO_FILE_FORMAT, 778238856Smm "Symlink cannot be converted " 779238856Smm "from %s to current locale.", 780238856Smm archive_string_conversion_charset_name( 781238856Smm sconv)); 782238856Smm ret = ARCHIVE_WARN; 783238856Smm } 784232153Smm } 785232153Smm } 786248616Smm if (rsrc) { 787248616Smm int ret2 = zip_read_mac_metadata(a, entry, rsrc); 788248616Smm if (ret2 < ret) 789248616Smm ret = ret2; 790248616Smm } 791238856Smm return (ret); 792232153Smm} 793232153Smm 794232153Smmstatic int 795232153Smmarchive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) 796232153Smm{ 797232153Smm const char *p; 798232153Smm 799232153Smm (void)best_bid; /* UNUSED */ 800232153Smm 801228753Smm if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) 802228753Smm return (-1); 803228753Smm 804228753Smm /* 805228753Smm * Bid of 30 here is: 16 bits for "PK", 806228753Smm * next 16-bit field has four options (-2 bits). 807228753Smm * 16 + 16-2 = 30. 808228753Smm */ 809228753Smm if (p[0] == 'P' && p[1] == 'K') { 810228753Smm if ((p[2] == '\001' && p[3] == '\002') 811228753Smm || (p[2] == '\003' && p[3] == '\004') 812228753Smm || (p[2] == '\005' && p[3] == '\006') 813228753Smm || (p[2] == '\007' && p[3] == '\010') 814228753Smm || (p[2] == '0' && p[3] == '0')) 815228753Smm return (30); 816228753Smm } 817228753Smm 818232153Smm /* TODO: It's worth looking ahead a little bit for a valid 819232153Smm * PK signature. In particular, that would make it possible 820232153Smm * to read some UUEncoded SFX files or SFX files coming from 821232153Smm * a network socket. */ 822228753Smm 823228753Smm return (0); 824228753Smm} 825228753Smm 826228753Smmstatic int 827232153Smmarchive_read_format_zip_options(struct archive_read *a, 828232153Smm const char *key, const char *val) 829228753Smm{ 830232153Smm struct zip *zip; 831232153Smm int ret = ARCHIVE_FAILED; 832228753Smm 833232153Smm zip = (struct zip *)(a->format->data); 834232153Smm if (strcmp(key, "compat-2x") == 0) { 835232153Smm /* Handle filnames as libarchive 2.x */ 836232153Smm zip->init_default_conversion = (val != NULL) ? 1 : 0; 837232153Smm return (ARCHIVE_OK); 838232153Smm } else if (strcmp(key, "hdrcharset") == 0) { 839232153Smm if (val == NULL || val[0] == 0) 840232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 841248616Smm "zip: hdrcharset option needs a character-set name" 842248616Smm ); 843232153Smm else { 844232153Smm zip->sconv = archive_string_conversion_from_charset( 845232153Smm &a->archive, val, 0); 846232153Smm if (zip->sconv != NULL) { 847232153Smm if (strcmp(val, "UTF-8") == 0) 848232153Smm zip->sconv_utf8 = zip->sconv; 849232153Smm ret = ARCHIVE_OK; 850232153Smm } else 851232153Smm ret = ARCHIVE_FATAL; 852228753Smm } 853232153Smm return (ret); 854228753Smm } 855232153Smm 856232153Smm /* Note: The "warn" return is just to inform the options 857232153Smm * supervisor that we didn't handle it. It will generate 858232153Smm * a suitable error if no one used this option. */ 859232153Smm return (ARCHIVE_WARN); 860228753Smm} 861228753Smm 862228753Smmstatic int 863232153Smmarchive_read_format_zip_streamable_read_header(struct archive_read *a, 864228753Smm struct archive_entry *entry) 865228753Smm{ 866228753Smm struct zip *zip; 867228753Smm 868228753Smm a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 869228753Smm if (a->archive.archive_format_name == NULL) 870228753Smm a->archive.archive_format_name = "ZIP"; 871228753Smm 872228753Smm zip = (struct zip *)(a->format->data); 873228753Smm 874232153Smm /* Make sure we have a zip_entry structure to use. */ 875232153Smm if (zip->zip_entries == NULL) { 876232153Smm zip->zip_entries = malloc(sizeof(struct zip_entry)); 877232153Smm if (zip->zip_entries == NULL) { 878248616Smm archive_set_error(&a->archive, ENOMEM, 879248616Smm "Out of memory"); 880232153Smm return ARCHIVE_FATAL; 881228773Smm } 882228753Smm } 883232153Smm zip->entry = zip->zip_entries; 884232153Smm memset(zip->entry, 0, sizeof(struct zip_entry)); 885228753Smm 886232153Smm /* Search ahead for the next local file header. */ 887238856Smm zip_read_consume(a, zip->unconsumed); 888232153Smm zip->unconsumed = 0; 889232153Smm for (;;) { 890232153Smm int64_t skipped = 0; 891232153Smm const char *p, *end; 892232153Smm ssize_t bytes; 893228753Smm 894232153Smm p = __archive_read_ahead(a, 4, &bytes); 895232153Smm if (p == NULL) 896228773Smm return (ARCHIVE_FATAL); 897232153Smm end = p + bytes; 898228773Smm 899232153Smm while (p + 4 <= end) { 900228773Smm if (p[0] == 'P' && p[1] == 'K') { 901232153Smm if (p[2] == '\001' && p[3] == '\002') 902232153Smm /* Beginning of central directory. */ 903232153Smm return (ARCHIVE_EOF); 904232153Smm 905232153Smm if (p[2] == '\003' && p[3] == '\004') { 906232153Smm /* Regular file entry. */ 907238856Smm zip_read_consume(a, skipped); 908248616Smm return zip_read_local_file_header(a, 909248616Smm entry, zip); 910228773Smm } 911232153Smm 912232153Smm if (p[2] == '\005' && p[3] == '\006') 913232153Smm /* End of central directory. */ 914232153Smm return (ARCHIVE_EOF); 915228773Smm } 916228773Smm ++p; 917232153Smm ++skipped; 918228773Smm } 919238856Smm zip_read_consume(a, skipped); 920228773Smm } 921228773Smm} 922228773Smm 923248616Smmstatic ssize_t 924248616Smmzip_get_local_file_header_size(struct archive_read *a, size_t extra) 925248616Smm{ 926248616Smm const char *p; 927248616Smm ssize_t filename_length, extra_length; 928248616Smm 929248616Smm if ((p = __archive_read_ahead(a, extra + 30, NULL)) == NULL) { 930248616Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 931248616Smm "Truncated ZIP file header"); 932248616Smm return (ARCHIVE_WARN); 933248616Smm } 934248616Smm p += extra; 935248616Smm 936248616Smm if (memcmp(p, "PK\003\004", 4) != 0) { 937248616Smm archive_set_error(&a->archive, -1, "Damaged Zip archive"); 938248616Smm return ARCHIVE_WARN; 939248616Smm } 940248616Smm filename_length = archive_le16dec(p + 26); 941248616Smm extra_length = archive_le16dec(p + 28); 942248616Smm 943248616Smm return (30 + filename_length + extra_length); 944248616Smm} 945248616Smm 946232153Smm/* 947232153Smm * Assumes file pointer is at beginning of local file header. 948232153Smm */ 949228773Smmstatic int 950232153Smmzip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, 951228753Smm struct zip *zip) 952228753Smm{ 953232153Smm const char *p; 954228753Smm const void *h; 955232153Smm const wchar_t *wp; 956232153Smm const char *cp; 957232153Smm size_t len, filename_length, extra_length; 958232153Smm struct archive_string_conv *sconv; 959232153Smm struct zip_entry *zip_entry = zip->entry; 960232153Smm uint32_t local_crc32; 961232153Smm int64_t compressed_size, uncompressed_size; 962232153Smm int ret = ARCHIVE_OK; 963232153Smm char version; 964228753Smm 965232153Smm zip->decompress_init = 0; 966232153Smm zip->end_of_entry = 0; 967232153Smm zip->entry_uncompressed_bytes_read = 0; 968232153Smm zip->entry_compressed_bytes_read = 0; 969232153Smm zip->entry_crc32 = crc32(0, NULL, 0); 970232153Smm 971232153Smm /* Setup default conversion. */ 972232153Smm if (zip->sconv == NULL && !zip->init_default_conversion) { 973232153Smm zip->sconv_default = 974232153Smm archive_string_default_conversion_for_read(&(a->archive)); 975232153Smm zip->init_default_conversion = 1; 976232153Smm } 977232153Smm 978232153Smm if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) { 979228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 980228753Smm "Truncated ZIP file header"); 981228753Smm return (ARCHIVE_FATAL); 982228753Smm } 983228753Smm 984232153Smm if (memcmp(p, "PK\003\004", 4) != 0) { 985232153Smm archive_set_error(&a->archive, -1, "Damaged Zip archive"); 986232153Smm return ARCHIVE_FATAL; 987232153Smm } 988232153Smm version = p[4]; 989232153Smm zip_entry->system = p[5]; 990232153Smm zip_entry->flags = archive_le16dec(p + 6); 991238856Smm zip_entry->compression = (char)archive_le16dec(p + 8); 992232153Smm zip_entry->mtime = zip_time(p + 10); 993232153Smm local_crc32 = archive_le32dec(p + 14); 994232153Smm compressed_size = archive_le32dec(p + 18); 995232153Smm uncompressed_size = archive_le32dec(p + 22); 996232153Smm filename_length = archive_le16dec(p + 26); 997232153Smm extra_length = archive_le16dec(p + 28); 998228753Smm 999238856Smm zip_read_consume(a, 30); 1000228753Smm 1001232153Smm if (zip->have_central_directory) { 1002248616Smm /* If we read the central dir entry, we must have size 1003248616Smm * information as well, so ignore the length-at-end flag. */ 1004232153Smm zip_entry->flags &= ~ZIP_LENGTH_AT_END; 1005232153Smm /* If we have values from both the local file header 1006232153Smm and the central directory, warn about mismatches 1007232153Smm which might indicate a damaged file. But some 1008232153Smm writers always put zero in the local header; don't 1009232153Smm bother warning about that. */ 1010232153Smm if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) { 1011248616Smm archive_set_error(&a->archive, 1012248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1013232153Smm "Inconsistent CRC32 values"); 1014232153Smm ret = ARCHIVE_WARN; 1015232153Smm } 1016232153Smm if (compressed_size != 0 1017232153Smm && compressed_size != zip_entry->compressed_size) { 1018248616Smm archive_set_error(&a->archive, 1019248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1020232153Smm "Inconsistent compressed size"); 1021232153Smm ret = ARCHIVE_WARN; 1022232153Smm } 1023232153Smm if (uncompressed_size != 0 1024232153Smm && uncompressed_size != zip_entry->uncompressed_size) { 1025248616Smm archive_set_error(&a->archive, 1026248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1027232153Smm "Inconsistent uncompressed size"); 1028232153Smm ret = ARCHIVE_WARN; 1029232153Smm } 1030232153Smm } else { 1031232153Smm /* If we don't have the CD info, use whatever we do have. */ 1032232153Smm zip_entry->crc32 = local_crc32; 1033232153Smm zip_entry->compressed_size = compressed_size; 1034232153Smm zip_entry->uncompressed_size = uncompressed_size; 1035232153Smm } 1036228753Smm 1037228753Smm /* Read the filename. */ 1038232153Smm if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) { 1039228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1040228753Smm "Truncated ZIP file header"); 1041228753Smm return (ARCHIVE_FATAL); 1042228753Smm } 1043232153Smm if (zip_entry->flags & ZIP_UTF8_NAME) { 1044232153Smm /* The filename is stored to be UTF-8. */ 1045232153Smm if (zip->sconv_utf8 == NULL) { 1046232153Smm zip->sconv_utf8 = 1047232153Smm archive_string_conversion_from_charset( 1048232153Smm &a->archive, "UTF-8", 1); 1049232153Smm if (zip->sconv_utf8 == NULL) 1050232153Smm return (ARCHIVE_FATAL); 1051232153Smm } 1052232153Smm sconv = zip->sconv_utf8; 1053232153Smm } else if (zip->sconv != NULL) 1054232153Smm sconv = zip->sconv; 1055228753Smm else 1056232153Smm sconv = zip->sconv_default; 1057228753Smm 1058232153Smm if (archive_entry_copy_pathname_l(entry, 1059232153Smm h, filename_length, sconv) != 0) { 1060232153Smm if (errno == ENOMEM) { 1061232153Smm archive_set_error(&a->archive, ENOMEM, 1062232153Smm "Can't allocate memory for Pathname"); 1063232153Smm return (ARCHIVE_FATAL); 1064232153Smm } 1065232153Smm archive_set_error(&a->archive, 1066232153Smm ARCHIVE_ERRNO_FILE_FORMAT, 1067232153Smm "Pathname cannot be converted " 1068232153Smm "from %s to current locale.", 1069232153Smm archive_string_conversion_charset_name(sconv)); 1070232153Smm ret = ARCHIVE_WARN; 1071232153Smm } 1072238856Smm zip_read_consume(a, filename_length); 1073232153Smm 1074232153Smm if (zip_entry->mode == 0) { 1075232153Smm /* Especially in streaming mode, we can end up 1076232153Smm here without having seen any mode information. 1077232153Smm Guess from the filename. */ 1078232153Smm wp = archive_entry_pathname_w(entry); 1079232153Smm if (wp != NULL) { 1080232153Smm len = wcslen(wp); 1081232153Smm if (len > 0 && wp[len - 1] == L'/') 1082232153Smm zip_entry->mode = AE_IFDIR | 0777; 1083232153Smm else 1084238856Smm zip_entry->mode = AE_IFREG | 0666; 1085232153Smm } else { 1086232153Smm cp = archive_entry_pathname(entry); 1087232153Smm len = (cp != NULL)?strlen(cp):0; 1088232153Smm if (len > 0 && cp[len - 1] == '/') 1089232153Smm zip_entry->mode = AE_IFDIR | 0777; 1090232153Smm else 1091238856Smm zip_entry->mode = AE_IFREG | 0666; 1092232153Smm } 1093232153Smm } 1094232153Smm 1095228753Smm /* Read the extra data. */ 1096232153Smm if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) { 1097228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1098228753Smm "Truncated ZIP file header"); 1099228753Smm return (ARCHIVE_FATAL); 1100228753Smm } 1101232153Smm process_extra(h, extra_length, zip_entry); 1102238856Smm zip_read_consume(a, extra_length); 1103228753Smm 1104228753Smm /* Populate some additional entry fields: */ 1105232153Smm archive_entry_set_mode(entry, zip_entry->mode); 1106232153Smm archive_entry_set_uid(entry, zip_entry->uid); 1107232153Smm archive_entry_set_gid(entry, zip_entry->gid); 1108232153Smm archive_entry_set_mtime(entry, zip_entry->mtime, 0); 1109232153Smm archive_entry_set_ctime(entry, zip_entry->ctime, 0); 1110232153Smm archive_entry_set_atime(entry, zip_entry->atime, 0); 1111228753Smm /* Set the size only if it's meaningful. */ 1112232153Smm if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)) 1113232153Smm archive_entry_set_size(entry, zip_entry->uncompressed_size); 1114228753Smm 1115232153Smm zip->entry_bytes_remaining = zip_entry->compressed_size; 1116228753Smm 1117228753Smm /* If there's no body, force read_data() to return EOF immediately. */ 1118232153Smm if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END) 1119228753Smm && zip->entry_bytes_remaining < 1) 1120228753Smm zip->end_of_entry = 1; 1121228753Smm 1122228753Smm /* Set up a more descriptive format name. */ 1123228753Smm sprintf(zip->format_name, "ZIP %d.%d (%s)", 1124232153Smm version / 10, version % 10, 1125232153Smm compression_name(zip->entry->compression)); 1126228753Smm a->archive.archive_format_name = zip->format_name; 1127228753Smm 1128232153Smm return (ret); 1129228753Smm} 1130228753Smm 1131232153Smmstatic const char * 1132232153Smmcompression_name(int compression) 1133232153Smm{ 1134232153Smm static const char *compression_names[] = { 1135232153Smm "uncompressed", 1136232153Smm "shrinking", 1137232153Smm "reduced-1", 1138232153Smm "reduced-2", 1139232153Smm "reduced-3", 1140232153Smm "reduced-4", 1141232153Smm "imploded", 1142232153Smm "reserved", 1143232153Smm "deflation" 1144232153Smm }; 1145232153Smm 1146232153Smm if (0 <= compression && compression < 1147232153Smm (int)(sizeof(compression_names)/sizeof(compression_names[0]))) 1148232153Smm return compression_names[compression]; 1149232153Smm else 1150232153Smm return "??"; 1151232153Smm} 1152232153Smm 1153228753Smm/* Convert an MSDOS-style date/time into Unix-style time. */ 1154228753Smmstatic time_t 1155228753Smmzip_time(const char *p) 1156228753Smm{ 1157228753Smm int msTime, msDate; 1158228753Smm struct tm ts; 1159228753Smm 1160228753Smm msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); 1161228753Smm msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); 1162228753Smm 1163228753Smm memset(&ts, 0, sizeof(ts)); 1164228753Smm ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1165228753Smm ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1166228753Smm ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1167228753Smm ts.tm_hour = (msTime >> 11) & 0x1f; 1168228753Smm ts.tm_min = (msTime >> 5) & 0x3f; 1169228753Smm ts.tm_sec = (msTime << 1) & 0x3e; 1170228753Smm ts.tm_isdst = -1; 1171228753Smm return mktime(&ts); 1172228753Smm} 1173228753Smm 1174228753Smmstatic int 1175228753Smmarchive_read_format_zip_read_data(struct archive_read *a, 1176232153Smm const void **buff, size_t *size, int64_t *offset) 1177228753Smm{ 1178228753Smm int r; 1179232153Smm struct zip *zip = (struct zip *)(a->format->data); 1180228753Smm 1181232153Smm *offset = zip->entry_uncompressed_bytes_read; 1182232153Smm *size = 0; 1183232153Smm *buff = NULL; 1184228753Smm 1185232153Smm /* If we hit end-of-entry last time, return ARCHIVE_EOF. */ 1186232153Smm if (zip->end_of_entry) 1187228753Smm return (ARCHIVE_EOF); 1188232153Smm 1189232153Smm /* Return EOF immediately if this is a non-regular file. */ 1190232153Smm if (AE_IFREG != (zip->entry->mode & AE_IFMT)) 1191232153Smm return (ARCHIVE_EOF); 1192232153Smm 1193232153Smm if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) { 1194232153Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1195232153Smm "Encrypted file is unsupported"); 1196232153Smm return (ARCHIVE_FAILED); 1197228753Smm } 1198228753Smm 1199238856Smm zip_read_consume(a, zip->unconsumed); 1200232153Smm zip->unconsumed = 0; 1201232153Smm 1202232153Smm switch(zip->entry->compression) { 1203228753Smm case 0: /* No compression. */ 1204228753Smm r = zip_read_data_none(a, buff, size, offset); 1205228753Smm break; 1206232153Smm#ifdef HAVE_ZLIB_H 1207228753Smm case 8: /* Deflate compression. */ 1208228753Smm r = zip_read_data_deflate(a, buff, size, offset); 1209228753Smm break; 1210232153Smm#endif 1211228753Smm default: /* Unsupported compression. */ 1212228753Smm /* Return a warning. */ 1213228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1214228753Smm "Unsupported ZIP compression method (%s)", 1215232153Smm compression_name(zip->entry->compression)); 1216232153Smm /* We can't decompress this entry, but we will 1217232153Smm * be able to skip() it and try the next entry. */ 1218232153Smm return (ARCHIVE_FAILED); 1219228753Smm break; 1220228753Smm } 1221228753Smm if (r != ARCHIVE_OK) 1222228753Smm return (r); 1223228753Smm /* Update checksum */ 1224228753Smm if (*size) 1225248616Smm zip->entry_crc32 = crc32(zip->entry_crc32, *buff, 1226248616Smm (unsigned)*size); 1227228753Smm /* If we hit the end, swallow any end-of-data marker. */ 1228228753Smm if (zip->end_of_entry) { 1229228753Smm /* Check file size, CRC against these values. */ 1230248616Smm if (zip->entry->compressed_size != 1231248616Smm zip->entry_compressed_bytes_read) { 1232228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1233248616Smm "ZIP compressed data is wrong size " 1234248616Smm "(read %jd, expected %jd)", 1235232153Smm (intmax_t)zip->entry_compressed_bytes_read, 1236232153Smm (intmax_t)zip->entry->compressed_size); 1237228753Smm return (ARCHIVE_WARN); 1238228753Smm } 1239232153Smm /* Size field only stores the lower 32 bits of the actual 1240232153Smm * size. */ 1241232153Smm if ((zip->entry->uncompressed_size & UINT32_MAX) 1242228753Smm != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { 1243228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1244248616Smm "ZIP uncompressed data is wrong size " 1245248616Smm "(read %jd, expected %jd)", 1246232153Smm (intmax_t)zip->entry_uncompressed_bytes_read, 1247232153Smm (intmax_t)zip->entry->uncompressed_size); 1248228753Smm return (ARCHIVE_WARN); 1249228753Smm } 1250228753Smm /* Check computed CRC against header */ 1251232153Smm if (zip->entry->crc32 != zip->entry_crc32) { 1252228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1253228753Smm "ZIP bad CRC: 0x%lx should be 0x%lx", 1254232153Smm (unsigned long)zip->entry_crc32, 1255232153Smm (unsigned long)zip->entry->crc32); 1256228753Smm return (ARCHIVE_WARN); 1257228753Smm } 1258228753Smm } 1259228753Smm 1260228753Smm return (ARCHIVE_OK); 1261228753Smm} 1262228753Smm 1263228753Smm/* 1264232153Smm * Read "uncompressed" data. There are three cases: 1265232153Smm * 1) We know the size of the data. This is always true for the 1266232153Smm * seeking reader (we've examined the Central Directory already). 1267232153Smm * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. 1268232153Smm * Info-ZIP seems to do this; we know the size but have to grab 1269232153Smm * the CRC from the data descriptor afterwards. 1270232153Smm * 3) We're streaming and ZIP_LENGTH_AT_END was specified and 1271232153Smm * we have no size information. In this case, we can do pretty 1272232153Smm * well by watching for the data descriptor record. The data 1273232153Smm * descriptor is 16 bytes and includes a computed CRC that should 1274232153Smm * provide a strong check. 1275228753Smm * 1276232153Smm * TODO: Technically, the PK\007\010 signature is optional. 1277232153Smm * In the original spec, the data descriptor contained CRC 1278232153Smm * and size fields but had no leading signature. In practice, 1279232153Smm * newer writers seem to provide the signature pretty consistently, 1280232153Smm * but we might need to do something more complex here if 1281232153Smm * we want to handle older archives that lack that signature. 1282232153Smm * 1283228753Smm * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1284228753Smm * zip->end_of_entry if it consumes all of the data. 1285228753Smm */ 1286228753Smmstatic int 1287232153Smmzip_read_data_none(struct archive_read *a, const void **_buff, 1288232153Smm size_t *size, int64_t *offset) 1289228753Smm{ 1290228753Smm struct zip *zip; 1291232153Smm const char *buff; 1292228753Smm ssize_t bytes_avail; 1293228753Smm 1294232153Smm (void)offset; /* UNUSED */ 1295232153Smm 1296228753Smm zip = (struct zip *)(a->format->data); 1297228753Smm 1298232153Smm if (zip->entry->flags & ZIP_LENGTH_AT_END) { 1299232153Smm const char *p; 1300232153Smm 1301232153Smm /* Grab at least 16 bytes. */ 1302232153Smm buff = __archive_read_ahead(a, 16, &bytes_avail); 1303232153Smm if (bytes_avail < 16) { 1304232153Smm /* Zip archives have end-of-archive markers 1305232153Smm that are longer than this, so a failure to get at 1306232153Smm least 16 bytes really does indicate a truncated 1307232153Smm file. */ 1308248616Smm archive_set_error(&a->archive, 1309248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1310232153Smm "Truncated ZIP file data"); 1311232153Smm return (ARCHIVE_FATAL); 1312232153Smm } 1313232153Smm /* Check for a complete PK\007\010 signature. */ 1314232153Smm p = buff; 1315232153Smm if (p[0] == 'P' && p[1] == 'K' 1316232153Smm && p[2] == '\007' && p[3] == '\010' 1317232153Smm && archive_le32dec(p + 4) == zip->entry_crc32 1318248616Smm && archive_le32dec(p + 8) == 1319248616Smm zip->entry_compressed_bytes_read 1320248616Smm && archive_le32dec(p + 12) == 1321248616Smm zip->entry_uncompressed_bytes_read) { 1322232153Smm zip->entry->crc32 = archive_le32dec(p + 4); 1323232153Smm zip->entry->compressed_size = archive_le32dec(p + 8); 1324232153Smm zip->entry->uncompressed_size = archive_le32dec(p + 12); 1325232153Smm zip->end_of_entry = 1; 1326232153Smm zip->unconsumed = 16; 1327232153Smm return (ARCHIVE_OK); 1328232153Smm } 1329232153Smm /* If not at EOF, ensure we consume at least one byte. */ 1330232153Smm ++p; 1331232153Smm 1332248616Smm /* Scan forward until we see where a PK\007\010 signature 1333248616Smm * might be. */ 1334248616Smm /* Return bytes up until that point. On the next call, 1335248616Smm * the code above will verify the data descriptor. */ 1336232153Smm while (p < buff + bytes_avail - 4) { 1337232153Smm if (p[3] == 'P') { p += 3; } 1338232153Smm else if (p[3] == 'K') { p += 2; } 1339232153Smm else if (p[3] == '\007') { p += 1; } 1340232153Smm else if (p[3] == '\010' && p[2] == '\007' 1341232153Smm && p[1] == 'K' && p[0] == 'P') { 1342232153Smm break; 1343232153Smm } else { p += 4; } 1344232153Smm } 1345232153Smm bytes_avail = p - buff; 1346232153Smm } else { 1347232153Smm if (zip->entry_bytes_remaining == 0) { 1348232153Smm zip->end_of_entry = 1; 1349232153Smm return (ARCHIVE_OK); 1350232153Smm } 1351232153Smm /* Grab a bunch of bytes. */ 1352232153Smm buff = __archive_read_ahead(a, 1, &bytes_avail); 1353232153Smm if (bytes_avail <= 0) { 1354248616Smm archive_set_error(&a->archive, 1355248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1356232153Smm "Truncated ZIP file data"); 1357232153Smm return (ARCHIVE_FATAL); 1358232153Smm } 1359232153Smm if (bytes_avail > zip->entry_bytes_remaining) 1360238856Smm bytes_avail = (ssize_t)zip->entry_bytes_remaining; 1361228753Smm } 1362228753Smm *size = bytes_avail; 1363232153Smm zip->entry_bytes_remaining -= bytes_avail; 1364232153Smm zip->entry_uncompressed_bytes_read += bytes_avail; 1365232153Smm zip->entry_compressed_bytes_read += bytes_avail; 1366232153Smm zip->unconsumed += bytes_avail; 1367232153Smm *_buff = buff; 1368228753Smm return (ARCHIVE_OK); 1369228753Smm} 1370228753Smm 1371228753Smm#ifdef HAVE_ZLIB_H 1372228753Smmstatic int 1373248616Smmzip_deflate_init(struct archive_read *a, struct zip *zip) 1374248616Smm{ 1375248616Smm int r; 1376248616Smm 1377248616Smm /* If we haven't yet read any data, initialize the decompressor. */ 1378248616Smm if (!zip->decompress_init) { 1379248616Smm if (zip->stream_valid) 1380248616Smm r = inflateReset(&zip->stream); 1381248616Smm else 1382248616Smm r = inflateInit2(&zip->stream, 1383248616Smm -15 /* Don't check for zlib header */); 1384248616Smm if (r != Z_OK) { 1385248616Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1386248616Smm "Can't initialize ZIP decompression."); 1387248616Smm return (ARCHIVE_FATAL); 1388248616Smm } 1389248616Smm /* Stream structure has been set up. */ 1390248616Smm zip->stream_valid = 1; 1391248616Smm /* We've initialized decompression for this stream. */ 1392248616Smm zip->decompress_init = 1; 1393248616Smm } 1394248616Smm return (ARCHIVE_OK); 1395248616Smm} 1396248616Smm 1397248616Smmstatic int 1398228753Smmzip_read_data_deflate(struct archive_read *a, const void **buff, 1399232153Smm size_t *size, int64_t *offset) 1400228753Smm{ 1401228753Smm struct zip *zip; 1402228753Smm ssize_t bytes_avail; 1403228753Smm const void *compressed_buff; 1404228753Smm int r; 1405228753Smm 1406232153Smm (void)offset; /* UNUSED */ 1407232153Smm 1408228753Smm zip = (struct zip *)(a->format->data); 1409228753Smm 1410228753Smm /* If the buffer hasn't been allocated, allocate it now. */ 1411228753Smm if (zip->uncompressed_buffer == NULL) { 1412232153Smm zip->uncompressed_buffer_size = 256 * 1024; 1413228753Smm zip->uncompressed_buffer 1414228753Smm = (unsigned char *)malloc(zip->uncompressed_buffer_size); 1415228753Smm if (zip->uncompressed_buffer == NULL) { 1416228753Smm archive_set_error(&a->archive, ENOMEM, 1417228753Smm "No memory for ZIP decompression"); 1418228753Smm return (ARCHIVE_FATAL); 1419228753Smm } 1420228753Smm } 1421228753Smm 1422248616Smm r = zip_deflate_init(a, zip); 1423248616Smm if (r != ARCHIVE_OK) 1424248616Smm return (r); 1425228753Smm 1426228753Smm /* 1427228753Smm * Note: '1' here is a performance optimization. 1428228753Smm * Recall that the decompression layer returns a count of 1429228753Smm * available bytes; asking for more than that forces the 1430228753Smm * decompressor to combine reads by copying data. 1431228753Smm */ 1432228753Smm compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); 1433232153Smm if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END) 1434232153Smm && bytes_avail > zip->entry_bytes_remaining) { 1435238856Smm bytes_avail = (ssize_t)zip->entry_bytes_remaining; 1436232153Smm } 1437228753Smm if (bytes_avail <= 0) { 1438228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1439228753Smm "Truncated ZIP file body"); 1440228753Smm return (ARCHIVE_FATAL); 1441228753Smm } 1442228753Smm 1443228753Smm /* 1444228753Smm * A bug in zlib.h: stream.next_in should be marked 'const' 1445228753Smm * but isn't (the library never alters data through the 1446228753Smm * next_in pointer, only reads it). The result: this ugly 1447228753Smm * cast to remove 'const'. 1448228753Smm */ 1449228753Smm zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; 1450248616Smm zip->stream.avail_in = (uInt)bytes_avail; 1451228753Smm zip->stream.total_in = 0; 1452228753Smm zip->stream.next_out = zip->uncompressed_buffer; 1453248616Smm zip->stream.avail_out = (uInt)zip->uncompressed_buffer_size; 1454228753Smm zip->stream.total_out = 0; 1455228753Smm 1456228753Smm r = inflate(&zip->stream, 0); 1457228753Smm switch (r) { 1458228753Smm case Z_OK: 1459228753Smm break; 1460228753Smm case Z_STREAM_END: 1461228753Smm zip->end_of_entry = 1; 1462228753Smm break; 1463228753Smm case Z_MEM_ERROR: 1464228753Smm archive_set_error(&a->archive, ENOMEM, 1465228753Smm "Out of memory for ZIP decompression"); 1466228753Smm return (ARCHIVE_FATAL); 1467228753Smm default: 1468228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1469228753Smm "ZIP decompression failed (%d)", r); 1470228753Smm return (ARCHIVE_FATAL); 1471228753Smm } 1472228753Smm 1473228753Smm /* Consume as much as the compressor actually used. */ 1474228753Smm bytes_avail = zip->stream.total_in; 1475238856Smm zip_read_consume(a, bytes_avail); 1476228753Smm zip->entry_bytes_remaining -= bytes_avail; 1477228753Smm zip->entry_compressed_bytes_read += bytes_avail; 1478228753Smm 1479228753Smm *size = zip->stream.total_out; 1480232153Smm zip->entry_uncompressed_bytes_read += zip->stream.total_out; 1481228753Smm *buff = zip->uncompressed_buffer; 1482232153Smm 1483232153Smm if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1484232153Smm const char *p; 1485232153Smm 1486232153Smm if (NULL == (p = __archive_read_ahead(a, 16, NULL))) { 1487232153Smm archive_set_error(&a->archive, 1488232153Smm ARCHIVE_ERRNO_FILE_FORMAT, 1489232153Smm "Truncated ZIP end-of-file record"); 1490232153Smm return (ARCHIVE_FATAL); 1491232153Smm } 1492232153Smm /* Consume the optional PK\007\010 marker. */ 1493248616Smm if (p[0] == 'P' && p[1] == 'K' && 1494248616Smm p[2] == '\007' && p[3] == '\010') { 1495232153Smm zip->entry->crc32 = archive_le32dec(p + 4); 1496232153Smm zip->entry->compressed_size = archive_le32dec(p + 8); 1497232153Smm zip->entry->uncompressed_size = archive_le32dec(p + 12); 1498232153Smm zip->unconsumed = 16; 1499232153Smm } 1500232153Smm } 1501232153Smm 1502228753Smm return (ARCHIVE_OK); 1503228753Smm} 1504228753Smm#endif 1505228753Smm 1506228753Smmstatic int 1507228753Smmarchive_read_format_zip_read_data_skip(struct archive_read *a) 1508228753Smm{ 1509228753Smm struct zip *zip; 1510228753Smm 1511228753Smm zip = (struct zip *)(a->format->data); 1512228753Smm 1513228753Smm /* If we've already read to end of data, we're done. */ 1514228753Smm if (zip->end_of_entry) 1515228753Smm return (ARCHIVE_OK); 1516228753Smm 1517232153Smm /* So we know we're streaming... */ 1518232153Smm if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1519232153Smm /* We know the compressed length, so we can just skip. */ 1520238856Smm int64_t bytes_skipped = zip_read_consume(a, 1521232153Smm zip->entry_bytes_remaining + zip->unconsumed); 1522232153Smm if (bytes_skipped < 0) 1523232153Smm return (ARCHIVE_FATAL); 1524232153Smm zip->unconsumed = 0; 1525232153Smm return (ARCHIVE_OK); 1526228753Smm } 1527228753Smm 1528232153Smm /* We're streaming and we don't know the length. */ 1529232153Smm /* If the body is compressed and we know the format, we can 1530232153Smm * find an exact end-of-entry by decompressing it. */ 1531232153Smm switch (zip->entry->compression) { 1532232153Smm#ifdef HAVE_ZLIB_H 1533232153Smm case 8: /* Deflate compression. */ 1534232153Smm while (!zip->end_of_entry) { 1535232153Smm int64_t offset = 0; 1536232153Smm const void *buff = NULL; 1537232153Smm size_t size = 0; 1538232153Smm int r; 1539232153Smm r = zip_read_data_deflate(a, &buff, &size, &offset); 1540232153Smm if (r != ARCHIVE_OK) 1541232153Smm return (r); 1542232153Smm } 1543238856Smm return ARCHIVE_OK; 1544232153Smm#endif 1545232153Smm default: /* Uncompressed or unknown. */ 1546232153Smm /* Scan for a PK\007\010 signature. */ 1547238856Smm zip_read_consume(a, zip->unconsumed); 1548232153Smm zip->unconsumed = 0; 1549232153Smm for (;;) { 1550232153Smm const char *p, *buff; 1551232153Smm ssize_t bytes_avail; 1552232153Smm buff = __archive_read_ahead(a, 16, &bytes_avail); 1553232153Smm if (bytes_avail < 16) { 1554248616Smm archive_set_error(&a->archive, 1555248616Smm ARCHIVE_ERRNO_FILE_FORMAT, 1556232153Smm "Truncated ZIP file data"); 1557232153Smm return (ARCHIVE_FATAL); 1558232153Smm } 1559232153Smm p = buff; 1560232153Smm while (p <= buff + bytes_avail - 16) { 1561232153Smm if (p[3] == 'P') { p += 3; } 1562232153Smm else if (p[3] == 'K') { p += 2; } 1563232153Smm else if (p[3] == '\007') { p += 1; } 1564232153Smm else if (p[3] == '\010' && p[2] == '\007' 1565232153Smm && p[1] == 'K' && p[0] == 'P') { 1566238856Smm zip_read_consume(a, p - buff + 16); 1567232153Smm return ARCHIVE_OK; 1568232153Smm } else { p += 4; } 1569232153Smm } 1570238856Smm zip_read_consume(a, p - buff); 1571232153Smm } 1572232153Smm } 1573228753Smm} 1574228753Smm 1575228753Smmstatic int 1576228753Smmarchive_read_format_zip_cleanup(struct archive_read *a) 1577228753Smm{ 1578228753Smm struct zip *zip; 1579228753Smm 1580228753Smm zip = (struct zip *)(a->format->data); 1581228753Smm#ifdef HAVE_ZLIB_H 1582228753Smm if (zip->stream_valid) 1583228753Smm inflateEnd(&zip->stream); 1584228753Smm#endif 1585248616Smm if (zip->zip_entries && zip->central_directory_entries) { 1586248616Smm unsigned i; 1587248616Smm for (i = 0; i < zip->central_directory_entries; i++) 1588248616Smm archive_string_free(&(zip->zip_entries[i].rsrcname)); 1589248616Smm } 1590232153Smm free(zip->zip_entries); 1591228753Smm free(zip->uncompressed_buffer); 1592228753Smm archive_string_free(&(zip->extra)); 1593228753Smm free(zip); 1594228753Smm (a->format->data) = NULL; 1595228753Smm return (ARCHIVE_OK); 1596228753Smm} 1597228753Smm 1598228753Smm/* 1599228753Smm * The extra data is stored as a list of 1600228753Smm * id1+size1+data1 + id2+size2+data2 ... 1601228753Smm * triplets. id and size are 2 bytes each. 1602228753Smm */ 1603228753Smmstatic void 1604232153Smmprocess_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) 1605228753Smm{ 1606232153Smm unsigned offset = 0; 1607232153Smm 1608232153Smm while (offset < extra_length - 4) 1609228753Smm { 1610228753Smm unsigned short headerid = archive_le16dec(p + offset); 1611228753Smm unsigned short datasize = archive_le16dec(p + offset + 2); 1612228753Smm offset += 4; 1613232153Smm if (offset + datasize > extra_length) 1614228753Smm break; 1615228753Smm#ifdef DEBUG 1616232153Smm fprintf(stderr, "Header id 0x%x, length %d\n", 1617228753Smm headerid, datasize); 1618228753Smm#endif 1619228753Smm switch (headerid) { 1620228753Smm case 0x0001: 1621228753Smm /* Zip64 extended information extra field. */ 1622228753Smm if (datasize >= 8) 1623232153Smm zip_entry->uncompressed_size = 1624232153Smm archive_le64dec(p + offset); 1625228753Smm if (datasize >= 16) 1626232153Smm zip_entry->compressed_size = 1627232153Smm archive_le64dec(p + offset + 8); 1628228753Smm break; 1629228753Smm case 0x5455: 1630228753Smm { 1631228753Smm /* Extended time field "UT". */ 1632228753Smm int flags = p[offset]; 1633228753Smm offset++; 1634228753Smm datasize--; 1635228753Smm /* Flag bits indicate which dates are present. */ 1636228753Smm if (flags & 0x01) 1637228753Smm { 1638228753Smm#ifdef DEBUG 1639228753Smm fprintf(stderr, "mtime: %lld -> %d\n", 1640232153Smm (long long)zip_entry->mtime, 1641228753Smm archive_le32dec(p + offset)); 1642228753Smm#endif 1643228753Smm if (datasize < 4) 1644228753Smm break; 1645232153Smm zip_entry->mtime = archive_le32dec(p + offset); 1646228753Smm offset += 4; 1647228753Smm datasize -= 4; 1648228753Smm } 1649228753Smm if (flags & 0x02) 1650228753Smm { 1651228753Smm if (datasize < 4) 1652228753Smm break; 1653232153Smm zip_entry->atime = archive_le32dec(p + offset); 1654228753Smm offset += 4; 1655228753Smm datasize -= 4; 1656228753Smm } 1657228753Smm if (flags & 0x04) 1658228753Smm { 1659228753Smm if (datasize < 4) 1660228753Smm break; 1661232153Smm zip_entry->ctime = archive_le32dec(p + offset); 1662228753Smm offset += 4; 1663228753Smm datasize -= 4; 1664228753Smm } 1665228753Smm break; 1666228753Smm } 1667232153Smm case 0x5855: 1668232153Smm { 1669232153Smm /* Info-ZIP Unix Extra Field (old version) "UX". */ 1670232153Smm if (datasize >= 8) { 1671232153Smm zip_entry->atime = archive_le32dec(p + offset); 1672248616Smm zip_entry->mtime = 1673248616Smm archive_le32dec(p + offset + 4); 1674232153Smm } 1675232153Smm if (datasize >= 12) { 1676248616Smm zip_entry->uid = 1677248616Smm archive_le16dec(p + offset + 8); 1678248616Smm zip_entry->gid = 1679248616Smm archive_le16dec(p + offset + 10); 1680232153Smm } 1681232153Smm break; 1682232153Smm } 1683228753Smm case 0x7855: 1684228753Smm /* Info-ZIP Unix Extra Field (type 2) "Ux". */ 1685228753Smm#ifdef DEBUG 1686228753Smm fprintf(stderr, "uid %d gid %d\n", 1687228753Smm archive_le16dec(p + offset), 1688228753Smm archive_le16dec(p + offset + 2)); 1689228753Smm#endif 1690228753Smm if (datasize >= 2) 1691232153Smm zip_entry->uid = archive_le16dec(p + offset); 1692228753Smm if (datasize >= 4) 1693248616Smm zip_entry->gid = 1694248616Smm archive_le16dec(p + offset + 2); 1695228753Smm break; 1696228773Smm case 0x7875: 1697232153Smm { 1698228773Smm /* Info-Zip Unix Extra Field (type 3) "ux". */ 1699232153Smm int uidsize = 0, gidsize = 0; 1700232153Smm 1701232153Smm if (datasize >= 1 && p[offset] == 1) {/* version=1 */ 1702232153Smm if (datasize >= 4) { 1703232153Smm /* get a uid size. */ 1704232153Smm uidsize = p[offset+1]; 1705232153Smm if (uidsize == 2) 1706248616Smm zip_entry->uid = 1707248616Smm archive_le16dec( 1708248616Smm p + offset + 2); 1709232153Smm else if (uidsize == 4 && datasize >= 6) 1710248616Smm zip_entry->uid = 1711248616Smm archive_le32dec( 1712248616Smm p + offset + 2); 1713232153Smm } 1714232153Smm if (datasize >= (2 + uidsize + 3)) { 1715232153Smm /* get a gid size. */ 1716232153Smm gidsize = p[offset+2+uidsize]; 1717232153Smm if (gidsize == 2) 1718248616Smm zip_entry->gid = 1719248616Smm archive_le16dec( 1720248616Smm p+offset+2+uidsize+1); 1721232153Smm else if (gidsize == 4 && 1722232153Smm datasize >= (2 + uidsize + 5)) 1723248616Smm zip_entry->gid = 1724248616Smm archive_le32dec( 1725248616Smm p+offset+2+uidsize+1); 1726232153Smm } 1727232153Smm } 1728228773Smm break; 1729232153Smm } 1730228753Smm default: 1731228753Smm break; 1732228753Smm } 1733228753Smm offset += datasize; 1734228753Smm } 1735228753Smm#ifdef DEBUG 1736232153Smm if (offset != extra_length) 1737228753Smm { 1738228753Smm fprintf(stderr, 1739232153Smm "Extra data field contents do not match reported size!\n"); 1740228753Smm } 1741228753Smm#endif 1742228753Smm} 1743