1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_SYS_STAT_H
29#include <sys/stat.h>
30#endif
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#include <stdio.h>
35#ifdef HAVE_STDLIB_H
36#include <stdlib.h>
37#endif
38#ifdef HAVE_STRING_H
39#include <string.h>
40#endif
41
42#include "archive.h"
43#include "archive_entry.h"
44
45/*
46 * This is mostly a pretty straightforward hash table implementation.
47 * The only interesting bit is the different strategies used to
48 * match up links.  These strategies match those used by various
49 * archiving formats:
50 *   tar - content stored with first link, remainder refer back to it.
51 *       This requires us to match each subsequent link up with the
52 *       first appearance.
53 *   cpio - Old cpio just stored body with each link, match-ups were
54 *       implicit.  This is trivial.
55 *   new cpio - New cpio only stores body with last link, match-ups
56 *       are implicit.  This is actually quite tricky; see the notes
57 *       below.
58 */
59
60/* Users pass us a format code, we translate that into a strategy here. */
61#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
62#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
63#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
64#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
65
66/* Initial size of link cache. */
67#define	links_cache_initial_size 1024
68
69struct links_entry {
70	struct links_entry	*next;
71	struct links_entry	*previous;
72	struct archive_entry	*canonical;
73	struct archive_entry	*entry;
74	size_t			 hash;
75	unsigned int		 links; /* # links not yet seen */
76};
77
78struct archive_entry_linkresolver {
79	struct links_entry	**buckets;
80	struct links_entry	 *spare;
81	unsigned long		  number_entries;
82	size_t			  number_buckets;
83	int			  strategy;
84};
85
86#define	NEXT_ENTRY_DEFERRED	1
87#define	NEXT_ENTRY_PARTIAL	2
88#define	NEXT_ENTRY_ALL		(NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
89
90static struct links_entry *find_entry(struct archive_entry_linkresolver *,
91		    struct archive_entry *);
92static void grow_hash(struct archive_entry_linkresolver *);
93static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
94		    struct archive_entry *);
95static struct links_entry *next_entry(struct archive_entry_linkresolver *,
96    int);
97
98struct archive_entry_linkresolver *
99archive_entry_linkresolver_new(void)
100{
101	struct archive_entry_linkresolver *res;
102
103	/* Check for positive power-of-two */
104	if (links_cache_initial_size == 0 ||
105	    (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
106		return (NULL);
107
108	res = calloc(1, sizeof(struct archive_entry_linkresolver));
109	if (res == NULL)
110		return (NULL);
111	res->number_buckets = links_cache_initial_size;
112	res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
113	if (res->buckets == NULL) {
114		free(res);
115		return (NULL);
116	}
117	return (res);
118}
119
120void
121archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
122    int fmt)
123{
124	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
125
126	switch (fmtbase) {
127	case ARCHIVE_FORMAT_7ZIP:
128	case ARCHIVE_FORMAT_AR:
129	case ARCHIVE_FORMAT_ZIP:
130		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131		break;
132	case ARCHIVE_FORMAT_CPIO:
133		switch (fmt) {
134		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
135		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
136			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
137			break;
138		default:
139			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
140			break;
141		}
142		break;
143	case ARCHIVE_FORMAT_MTREE:
144		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
145		break;
146	case ARCHIVE_FORMAT_ISO9660:
147	case ARCHIVE_FORMAT_SHAR:
148	case ARCHIVE_FORMAT_TAR:
149	case ARCHIVE_FORMAT_XAR:
150		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
151		break;
152	default:
153		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
154		break;
155	}
156}
157
158void
159archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
160{
161	struct links_entry *le;
162
163	if (res == NULL)
164		return;
165
166	while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
167		archive_entry_free(le->entry);
168	free(res->buckets);
169	free(res);
170}
171
172void
173archive_entry_linkify(struct archive_entry_linkresolver *res,
174    struct archive_entry **e, struct archive_entry **f)
175{
176	struct links_entry *le;
177	struct archive_entry *t;
178
179	*f = NULL; /* Default: Don't return a second entry. */
180
181	if (*e == NULL) {
182		le = next_entry(res, NEXT_ENTRY_DEFERRED);
183		if (le != NULL) {
184			*e = le->entry;
185			le->entry = NULL;
186		}
187		return;
188	}
189
190	/* If it has only one link, then we're done. */
191	if (archive_entry_nlink(*e) == 1)
192		return;
193	/* Directories, devices never have hardlinks. */
194	if (archive_entry_filetype(*e) == AE_IFDIR
195	    || archive_entry_filetype(*e) == AE_IFBLK
196	    || archive_entry_filetype(*e) == AE_IFCHR)
197		return;
198
199	switch (res->strategy) {
200	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
201		le = find_entry(res, *e);
202		if (le != NULL) {
203			archive_entry_unset_size(*e);
204			archive_entry_copy_hardlink(*e,
205			    archive_entry_pathname(le->canonical));
206		} else
207			insert_entry(res, *e);
208		return;
209	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
210		le = find_entry(res, *e);
211		if (le != NULL) {
212			archive_entry_copy_hardlink(*e,
213			    archive_entry_pathname(le->canonical));
214		} else
215			insert_entry(res, *e);
216		return;
217	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
218		/* This one is trivial. */
219		return;
220	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
221		le = find_entry(res, *e);
222		if (le != NULL) {
223			/*
224			 * Put the new entry in le, return the
225			 * old entry from le.
226			 */
227			t = *e;
228			*e = le->entry;
229			le->entry = t;
230			/* Make the old entry into a hardlink. */
231			archive_entry_unset_size(*e);
232			archive_entry_copy_hardlink(*e,
233			    archive_entry_pathname(le->canonical));
234			/* If we ran out of links, return the
235			 * final entry as well. */
236			if (le->links == 0) {
237				*f = le->entry;
238				le->entry = NULL;
239			}
240		} else {
241			/*
242			 * If we haven't seen it, tuck it away
243			 * for future use.
244			 */
245			le = insert_entry(res, *e);
246			if (le == NULL)
247				/* XXX We should return an error code XXX */
248				return;
249			le->entry = *e;
250			*e = NULL;
251		}
252		return;
253	default:
254		break;
255	}
256	return;
257}
258
259static struct links_entry *
260find_entry(struct archive_entry_linkresolver *res,
261    struct archive_entry *entry)
262{
263	struct links_entry	*le;
264	size_t			 hash, bucket;
265	dev_t			 dev;
266	int64_t			 ino;
267
268	/* Free a held entry. */
269	if (res->spare != NULL) {
270		archive_entry_free(res->spare->canonical);
271		archive_entry_free(res->spare->entry);
272		free(res->spare);
273		res->spare = NULL;
274	}
275
276	dev = archive_entry_dev(entry);
277	ino = archive_entry_ino64(entry);
278	hash = (size_t)(dev ^ ino);
279
280	/* Try to locate this entry in the links cache. */
281	bucket = hash & (res->number_buckets - 1);
282	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
283		if (le->hash == hash
284		    && dev == archive_entry_dev(le->canonical)
285		    && ino == archive_entry_ino64(le->canonical)) {
286			/*
287			 * Decrement link count each time and release
288			 * the entry if it hits zero.  This saves
289			 * memory and is necessary for detecting
290			 * missed links.
291			 */
292			--le->links;
293			if (le->links > 0)
294				return (le);
295			/* Remove it from this hash bucket. */
296			if (le->previous != NULL)
297				le->previous->next = le->next;
298			if (le->next != NULL)
299				le->next->previous = le->previous;
300			if (res->buckets[bucket] == le)
301				res->buckets[bucket] = le->next;
302			res->number_entries--;
303			/* Defer freeing this entry. */
304			res->spare = le;
305			return (le);
306		}
307	}
308	return (NULL);
309}
310
311static struct links_entry *
312next_entry(struct archive_entry_linkresolver *res, int mode)
313{
314	struct links_entry	*le;
315	size_t			 bucket;
316
317	/* Free a held entry. */
318	if (res->spare != NULL) {
319		archive_entry_free(res->spare->canonical);
320		archive_entry_free(res->spare->entry);
321		free(res->spare);
322		res->spare = NULL;
323	}
324
325	/* Look for next non-empty bucket in the links cache. */
326	for (bucket = 0; bucket < res->number_buckets; bucket++) {
327		for (le = res->buckets[bucket]; le != NULL; le = le->next) {
328			if (le->entry != NULL &&
329			    (mode & NEXT_ENTRY_DEFERRED) == 0)
330				continue;
331			if (le->entry == NULL &&
332			    (mode & NEXT_ENTRY_PARTIAL) == 0)
333				continue;
334			/* Remove it from this hash bucket. */
335			if (le->next != NULL)
336				le->next->previous = le->previous;
337			if (le->previous != NULL)
338				le->previous->next = le->next;
339			else
340				res->buckets[bucket] = le->next;
341			res->number_entries--;
342			/* Defer freeing this entry. */
343			res->spare = le;
344			return (le);
345		}
346	}
347	return (NULL);
348}
349
350static struct links_entry *
351insert_entry(struct archive_entry_linkresolver *res,
352    struct archive_entry *entry)
353{
354	struct links_entry *le;
355	size_t hash, bucket;
356
357	/* Add this entry to the links cache. */
358	le = calloc(1, sizeof(struct links_entry));
359	if (le == NULL)
360		return (NULL);
361	le->canonical = archive_entry_clone(entry);
362
363	/* If the links cache is getting too full, enlarge the hash table. */
364	if (res->number_entries > res->number_buckets * 2)
365		grow_hash(res);
366
367	hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
368	bucket = hash & (res->number_buckets - 1);
369
370	/* If we could allocate the entry, record it. */
371	if (res->buckets[bucket] != NULL)
372		res->buckets[bucket]->previous = le;
373	res->number_entries++;
374	le->next = res->buckets[bucket];
375	le->previous = NULL;
376	res->buckets[bucket] = le;
377	le->hash = hash;
378	le->links = archive_entry_nlink(entry) - 1;
379	return (le);
380}
381
382static void
383grow_hash(struct archive_entry_linkresolver *res)
384{
385	struct links_entry *le, **new_buckets;
386	size_t new_size;
387	size_t i, bucket;
388
389	/* Try to enlarge the bucket list. */
390	new_size = res->number_buckets * 2;
391	if (new_size < res->number_buckets)
392		return;
393	new_buckets = calloc(new_size, sizeof(struct links_entry *));
394
395	if (new_buckets == NULL)
396		return;
397
398	for (i = 0; i < res->number_buckets; i++) {
399		while (res->buckets[i] != NULL) {
400			/* Remove entry from old bucket. */
401			le = res->buckets[i];
402			res->buckets[i] = le->next;
403
404			/* Add entry to new bucket. */
405			bucket = le->hash & (new_size - 1);
406
407			if (new_buckets[bucket] != NULL)
408				new_buckets[bucket]->previous = le;
409			le->next = new_buckets[bucket];
410			le->previous = NULL;
411			new_buckets[bucket] = le;
412		}
413	}
414	free(res->buckets);
415	res->buckets = new_buckets;
416	res->number_buckets = new_size;
417}
418
419struct archive_entry *
420archive_entry_partial_links(struct archive_entry_linkresolver *res,
421    unsigned int *links)
422{
423	struct archive_entry	*e;
424	struct links_entry	*le;
425
426	/* Free a held entry. */
427	if (res->spare != NULL) {
428		archive_entry_free(res->spare->canonical);
429		archive_entry_free(res->spare->entry);
430		free(res->spare);
431		res->spare = NULL;
432	}
433
434	le = next_entry(res, NEXT_ENTRY_PARTIAL);
435	if (le != NULL) {
436		e = le->canonical;
437		if (links != NULL)
438			*links = le->links;
439		le->canonical = NULL;
440	} else {
441		e = NULL;
442		if (links != NULL)
443			*links = 0;
444	}
445	return (e);
446}
447