1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27 28#ifdef HAVE_SYS_STAT_H 29#include <sys/stat.h> 30#endif 31#ifdef HAVE_ERRNO_H 32#include <errno.h> 33#endif 34#include <stdio.h> 35#ifdef HAVE_STDLIB_H 36#include <stdlib.h> 37#endif 38#ifdef HAVE_STRING_H 39#include <string.h> 40#endif 41 42#include "archive.h" 43#include "archive_entry.h" 44 45/* 46 * This is mostly a pretty straightforward hash table implementation. 47 * The only interesting bit is the different strategies used to 48 * match up links. These strategies match those used by various 49 * archiving formats: 50 * tar - content stored with first link, remainder refer back to it. 51 * This requires us to match each subsequent link up with the 52 * first appearance. 53 * cpio - Old cpio just stored body with each link, match-ups were 54 * implicit. This is trivial. 55 * new cpio - New cpio only stores body with last link, match-ups 56 * are implicit. This is actually quite tricky; see the notes 57 * below. 58 */ 59 60/* Users pass us a format code, we translate that into a strategy here. */ 61#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 62#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 63#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 64#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 65 66/* Initial size of link cache. */ 67#define links_cache_initial_size 1024 68 69struct links_entry { 70 struct links_entry *next; 71 struct links_entry *previous; 72 struct archive_entry *canonical; 73 struct archive_entry *entry; 74 size_t hash; 75 unsigned int links; /* # links not yet seen */ 76}; 77 78struct archive_entry_linkresolver { 79 struct links_entry **buckets; 80 struct links_entry *spare; 81 unsigned long number_entries; 82 size_t number_buckets; 83 int strategy; 84}; 85 86#define NEXT_ENTRY_DEFERRED 1 87#define NEXT_ENTRY_PARTIAL 2 88#define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) 89 90static struct links_entry *find_entry(struct archive_entry_linkresolver *, 91 struct archive_entry *); 92static void grow_hash(struct archive_entry_linkresolver *); 93static struct links_entry *insert_entry(struct archive_entry_linkresolver *, 94 struct archive_entry *); 95static struct links_entry *next_entry(struct archive_entry_linkresolver *, 96 int); 97 98struct archive_entry_linkresolver * 99archive_entry_linkresolver_new(void) 100{ 101 struct archive_entry_linkresolver *res; 102 103 /* Check for positive power-of-two */ 104 if (links_cache_initial_size == 0 || 105 (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) 106 return (NULL); 107 108 res = calloc(1, sizeof(struct archive_entry_linkresolver)); 109 if (res == NULL) 110 return (NULL); 111 res->number_buckets = links_cache_initial_size; 112 res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); 113 if (res->buckets == NULL) { 114 free(res); 115 return (NULL); 116 } 117 return (res); 118} 119 120void 121archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 122 int fmt) 123{ 124 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 125 126 switch (fmtbase) { 127 case ARCHIVE_FORMAT_7ZIP: 128 case ARCHIVE_FORMAT_AR: 129 case ARCHIVE_FORMAT_ZIP: 130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 131 break; 132 case ARCHIVE_FORMAT_CPIO: 133 switch (fmt) { 134 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 135 case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 136 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 137 break; 138 default: 139 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 140 break; 141 } 142 break; 143 case ARCHIVE_FORMAT_MTREE: 144 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 145 break; 146 case ARCHIVE_FORMAT_ISO9660: 147 case ARCHIVE_FORMAT_SHAR: 148 case ARCHIVE_FORMAT_TAR: 149 case ARCHIVE_FORMAT_XAR: 150 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 151 break; 152 default: 153 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 154 break; 155 } 156} 157 158void 159archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 160{ 161 struct links_entry *le; 162 163 if (res == NULL) 164 return; 165 166 while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) 167 archive_entry_free(le->entry); 168 free(res->buckets); 169 free(res); 170} 171 172void 173archive_entry_linkify(struct archive_entry_linkresolver *res, 174 struct archive_entry **e, struct archive_entry **f) 175{ 176 struct links_entry *le; 177 struct archive_entry *t; 178 179 *f = NULL; /* Default: Don't return a second entry. */ 180 181 if (*e == NULL) { 182 le = next_entry(res, NEXT_ENTRY_DEFERRED); 183 if (le != NULL) { 184 *e = le->entry; 185 le->entry = NULL; 186 } 187 return; 188 } 189 190 /* If it has only one link, then we're done. */ 191 if (archive_entry_nlink(*e) == 1) 192 return; 193 /* Directories, devices never have hardlinks. */ 194 if (archive_entry_filetype(*e) == AE_IFDIR 195 || archive_entry_filetype(*e) == AE_IFBLK 196 || archive_entry_filetype(*e) == AE_IFCHR) 197 return; 198 199 switch (res->strategy) { 200 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 201 le = find_entry(res, *e); 202 if (le != NULL) { 203 archive_entry_unset_size(*e); 204 archive_entry_copy_hardlink(*e, 205 archive_entry_pathname(le->canonical)); 206 } else 207 insert_entry(res, *e); 208 return; 209 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 210 le = find_entry(res, *e); 211 if (le != NULL) { 212 archive_entry_copy_hardlink(*e, 213 archive_entry_pathname(le->canonical)); 214 } else 215 insert_entry(res, *e); 216 return; 217 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 218 /* This one is trivial. */ 219 return; 220 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 221 le = find_entry(res, *e); 222 if (le != NULL) { 223 /* 224 * Put the new entry in le, return the 225 * old entry from le. 226 */ 227 t = *e; 228 *e = le->entry; 229 le->entry = t; 230 /* Make the old entry into a hardlink. */ 231 archive_entry_unset_size(*e); 232 archive_entry_copy_hardlink(*e, 233 archive_entry_pathname(le->canonical)); 234 /* If we ran out of links, return the 235 * final entry as well. */ 236 if (le->links == 0) { 237 *f = le->entry; 238 le->entry = NULL; 239 } 240 } else { 241 /* 242 * If we haven't seen it, tuck it away 243 * for future use. 244 */ 245 le = insert_entry(res, *e); 246 if (le == NULL) 247 /* XXX We should return an error code XXX */ 248 return; 249 le->entry = *e; 250 *e = NULL; 251 } 252 return; 253 default: 254 break; 255 } 256 return; 257} 258 259static struct links_entry * 260find_entry(struct archive_entry_linkresolver *res, 261 struct archive_entry *entry) 262{ 263 struct links_entry *le; 264 size_t hash, bucket; 265 dev_t dev; 266 int64_t ino; 267 268 /* Free a held entry. */ 269 if (res->spare != NULL) { 270 archive_entry_free(res->spare->canonical); 271 archive_entry_free(res->spare->entry); 272 free(res->spare); 273 res->spare = NULL; 274 } 275 276 dev = archive_entry_dev(entry); 277 ino = archive_entry_ino64(entry); 278 hash = (size_t)(dev ^ ino); 279 280 /* Try to locate this entry in the links cache. */ 281 bucket = hash & (res->number_buckets - 1); 282 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 283 if (le->hash == hash 284 && dev == archive_entry_dev(le->canonical) 285 && ino == archive_entry_ino64(le->canonical)) { 286 /* 287 * Decrement link count each time and release 288 * the entry if it hits zero. This saves 289 * memory and is necessary for detecting 290 * missed links. 291 */ 292 --le->links; 293 if (le->links > 0) 294 return (le); 295 /* Remove it from this hash bucket. */ 296 if (le->previous != NULL) 297 le->previous->next = le->next; 298 if (le->next != NULL) 299 le->next->previous = le->previous; 300 if (res->buckets[bucket] == le) 301 res->buckets[bucket] = le->next; 302 res->number_entries--; 303 /* Defer freeing this entry. */ 304 res->spare = le; 305 return (le); 306 } 307 } 308 return (NULL); 309} 310 311static struct links_entry * 312next_entry(struct archive_entry_linkresolver *res, int mode) 313{ 314 struct links_entry *le; 315 size_t bucket; 316 317 /* Free a held entry. */ 318 if (res->spare != NULL) { 319 archive_entry_free(res->spare->canonical); 320 archive_entry_free(res->spare->entry); 321 free(res->spare); 322 res->spare = NULL; 323 } 324 325 /* Look for next non-empty bucket in the links cache. */ 326 for (bucket = 0; bucket < res->number_buckets; bucket++) { 327 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 328 if (le->entry != NULL && 329 (mode & NEXT_ENTRY_DEFERRED) == 0) 330 continue; 331 if (le->entry == NULL && 332 (mode & NEXT_ENTRY_PARTIAL) == 0) 333 continue; 334 /* Remove it from this hash bucket. */ 335 if (le->next != NULL) 336 le->next->previous = le->previous; 337 if (le->previous != NULL) 338 le->previous->next = le->next; 339 else 340 res->buckets[bucket] = le->next; 341 res->number_entries--; 342 /* Defer freeing this entry. */ 343 res->spare = le; 344 return (le); 345 } 346 } 347 return (NULL); 348} 349 350static struct links_entry * 351insert_entry(struct archive_entry_linkresolver *res, 352 struct archive_entry *entry) 353{ 354 struct links_entry *le; 355 size_t hash, bucket; 356 357 /* Add this entry to the links cache. */ 358 le = calloc(1, sizeof(struct links_entry)); 359 if (le == NULL) 360 return (NULL); 361 le->canonical = archive_entry_clone(entry); 362 363 /* If the links cache is getting too full, enlarge the hash table. */ 364 if (res->number_entries > res->number_buckets * 2) 365 grow_hash(res); 366 367 hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); 368 bucket = hash & (res->number_buckets - 1); 369 370 /* If we could allocate the entry, record it. */ 371 if (res->buckets[bucket] != NULL) 372 res->buckets[bucket]->previous = le; 373 res->number_entries++; 374 le->next = res->buckets[bucket]; 375 le->previous = NULL; 376 res->buckets[bucket] = le; 377 le->hash = hash; 378 le->links = archive_entry_nlink(entry) - 1; 379 return (le); 380} 381 382static void 383grow_hash(struct archive_entry_linkresolver *res) 384{ 385 struct links_entry *le, **new_buckets; 386 size_t new_size; 387 size_t i, bucket; 388 389 /* Try to enlarge the bucket list. */ 390 new_size = res->number_buckets * 2; 391 if (new_size < res->number_buckets) 392 return; 393 new_buckets = calloc(new_size, sizeof(struct links_entry *)); 394 395 if (new_buckets == NULL) 396 return; 397 398 for (i = 0; i < res->number_buckets; i++) { 399 while (res->buckets[i] != NULL) { 400 /* Remove entry from old bucket. */ 401 le = res->buckets[i]; 402 res->buckets[i] = le->next; 403 404 /* Add entry to new bucket. */ 405 bucket = le->hash & (new_size - 1); 406 407 if (new_buckets[bucket] != NULL) 408 new_buckets[bucket]->previous = le; 409 le->next = new_buckets[bucket]; 410 le->previous = NULL; 411 new_buckets[bucket] = le; 412 } 413 } 414 free(res->buckets); 415 res->buckets = new_buckets; 416 res->number_buckets = new_size; 417} 418 419struct archive_entry * 420archive_entry_partial_links(struct archive_entry_linkresolver *res, 421 unsigned int *links) 422{ 423 struct archive_entry *e; 424 struct links_entry *le; 425 426 /* Free a held entry. */ 427 if (res->spare != NULL) { 428 archive_entry_free(res->spare->canonical); 429 archive_entry_free(res->spare->entry); 430 free(res->spare); 431 res->spare = NULL; 432 } 433 434 le = next_entry(res, NEXT_ENTRY_PARTIAL); 435 if (le != NULL) { 436 e = le->canonical; 437 if (links != NULL) 438 *links = le->links; 439 le->canonical = NULL; 440 } else { 441 e = NULL; 442 if (links != NULL) 443 *links = 0; 444 } 445 return (e); 446} 447