1/*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29
30#ifdef HAVE_SYS_STAT_H
31#include <sys/stat.h>
32#endif
33#ifdef HAVE_ERRNO_H
34#include <errno.h>
35#endif
36#ifdef HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#ifdef HAVE_STRING_H
40#include <string.h>
41#endif
42#ifdef HAVE_LIMITS_H
43#include <limits.h>
44#endif
45
46#include "archive.h"
47#include "archive_entry.h"
48#include "archive_private.h"
49#include "archive_read_private.h"
50
51struct ar {
52	int64_t	 entry_bytes_remaining;
53	/* unconsumed is purely to track data we've gotten from readahead,
54	 * but haven't yet marked as consumed.  Must be paired with
55	 * entry_bytes_remaining usage/modification.
56	 */
57	size_t   entry_bytes_unconsumed;
58	int64_t	 entry_offset;
59	int64_t	 entry_padding;
60	char	*strtab;
61	size_t	 strtab_size;
62	char	 read_global_header;
63};
64
65/*
66 * Define structure of the "ar" header.
67 */
68#define AR_name_offset 0
69#define AR_name_size 16
70#define AR_date_offset 16
71#define AR_date_size 12
72#define AR_uid_offset 28
73#define AR_uid_size 6
74#define AR_gid_offset 34
75#define AR_gid_size 6
76#define AR_mode_offset 40
77#define AR_mode_size 8
78#define AR_size_offset 48
79#define AR_size_size 10
80#define AR_fmag_offset 58
81#define AR_fmag_size 2
82
83static int	archive_read_format_ar_bid(struct archive_read *a, int);
84static int	archive_read_format_ar_cleanup(struct archive_read *a);
85static int	archive_read_format_ar_read_data(struct archive_read *a,
86		    const void **buff, size_t *size, int64_t *offset);
87static int	archive_read_format_ar_skip(struct archive_read *a);
88static int	archive_read_format_ar_read_header(struct archive_read *a,
89		    struct archive_entry *e);
90static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
91static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
92static int	ar_parse_gnu_filename_table(struct archive_read *a);
93static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
94		    const char *h);
95
96int
97archive_read_support_format_ar(struct archive *_a)
98{
99	struct archive_read *a = (struct archive_read *)_a;
100	struct ar *ar;
101	int r;
102
103	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
104	    ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
105
106	ar = (struct ar *)calloc(1, sizeof(*ar));
107	if (ar == NULL) {
108		archive_set_error(&a->archive, ENOMEM,
109		    "Can't allocate ar data");
110		return (ARCHIVE_FATAL);
111	}
112	ar->strtab = NULL;
113
114	r = __archive_read_register_format(a,
115	    ar,
116	    "ar",
117	    archive_read_format_ar_bid,
118	    NULL,
119	    archive_read_format_ar_read_header,
120	    archive_read_format_ar_read_data,
121	    archive_read_format_ar_skip,
122	    NULL,
123	    archive_read_format_ar_cleanup,
124	    NULL,
125	    NULL);
126
127	if (r != ARCHIVE_OK) {
128		free(ar);
129		return (r);
130	}
131	return (ARCHIVE_OK);
132}
133
134static int
135archive_read_format_ar_cleanup(struct archive_read *a)
136{
137	struct ar *ar;
138
139	ar = (struct ar *)(a->format->data);
140	free(ar->strtab);
141	free(ar);
142	(a->format->data) = NULL;
143	return (ARCHIVE_OK);
144}
145
146static int
147archive_read_format_ar_bid(struct archive_read *a, int best_bid)
148{
149	const void *h;
150
151	(void)best_bid; /* UNUSED */
152
153	/*
154	 * Verify the 8-byte file signature.
155	 * TODO: Do we need to check more than this?
156	 */
157	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
158		return (-1);
159	if (memcmp(h, "!<arch>\n", 8) == 0) {
160		return (64);
161	}
162	return (-1);
163}
164
165static int
166_ar_read_header(struct archive_read *a, struct archive_entry *entry,
167	struct ar *ar, const char *h, size_t *unconsumed)
168{
169	char filename[AR_name_size + 1];
170	uint64_t number; /* Used to hold parsed numbers before validation. */
171	size_t bsd_name_length, entry_size;
172	char *p, *st;
173	const void *b;
174	int r;
175
176	/* Verify the magic signature on the file header. */
177	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
178		archive_set_error(&a->archive, EINVAL,
179		    "Incorrect file header signature");
180		return (ARCHIVE_FATAL);
181	}
182
183	/* Copy filename into work buffer. */
184	strncpy(filename, h + AR_name_offset, AR_name_size);
185	filename[AR_name_size] = '\0';
186
187	/*
188	 * Guess the format variant based on the filename.
189	 */
190	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
191		/* We don't already know the variant, so let's guess. */
192		/*
193		 * Biggest clue is presence of '/': GNU starts special
194		 * filenames with '/', appends '/' as terminator to
195		 * non-special names, so anything with '/' should be
196		 * GNU except for BSD long filenames.
197		 */
198		if (strncmp(filename, "#1/", 3) == 0)
199			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
200		else if (strchr(filename, '/') != NULL)
201			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
202		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
203			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
204		/*
205		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
206		 * if name exactly fills 16-byte field?  If so, we
207		 * can't assume entries without '/' are BSD. XXX
208		 */
209	}
210
211	/* Update format name from the code. */
212	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
213		a->archive.archive_format_name = "ar (GNU/SVR4)";
214	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
215		a->archive.archive_format_name = "ar (BSD)";
216	else
217		a->archive.archive_format_name = "ar";
218
219	/*
220	 * Remove trailing spaces from the filename.  GNU and BSD
221	 * variants both pad filename area out with spaces.
222	 * This will only be wrong if GNU/SVR4 'ar' implementations
223	 * omit trailing '/' for 16-char filenames and we have
224	 * a 16-char filename that ends in ' '.
225	 */
226	p = filename + AR_name_size - 1;
227	while (p >= filename && *p == ' ') {
228		*p = '\0';
229		p--;
230	}
231
232	/*
233	 * Remove trailing slash unless first character is '/'.
234	 * (BSD entries never end in '/', so this will only trim
235	 * GNU-format entries.  GNU special entries start with '/'
236	 * and are not terminated in '/', so we don't trim anything
237	 * that starts with '/'.)
238	 */
239	if (filename[0] != '/' && p > filename && *p == '/') {
240		*p = '\0';
241	}
242
243	if (p < filename) {
244		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
245		    "Found entry with empty filename");
246		return (ARCHIVE_FATAL);
247	}
248
249	/*
250	 * '//' is the GNU filename table.
251	 * Later entries can refer to names in this table.
252	 */
253	if (strcmp(filename, "//") == 0) {
254		/* This must come before any call to _read_ahead. */
255		ar_parse_common_header(ar, entry, h);
256		archive_entry_copy_pathname(entry, filename);
257		archive_entry_set_filetype(entry, AE_IFREG);
258		/* Get the size of the filename table. */
259		number = ar_atol10(h + AR_size_offset, AR_size_size);
260		if (number > SIZE_MAX || number > 1024 * 1024 * 1024) {
261			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
262			    "Filename table too large");
263			return (ARCHIVE_FATAL);
264		}
265		entry_size = (size_t)number;
266		if (entry_size == 0) {
267			archive_set_error(&a->archive, EINVAL,
268			    "Invalid string table");
269			return (ARCHIVE_FATAL);
270		}
271		if (ar->strtab != NULL) {
272			archive_set_error(&a->archive, EINVAL,
273			    "More than one string table exists");
274			return (ARCHIVE_FATAL);
275		}
276
277		/* Read the filename table into memory. */
278		st = malloc(entry_size);
279		if (st == NULL) {
280			archive_set_error(&a->archive, ENOMEM,
281			    "Can't allocate filename table buffer");
282			return (ARCHIVE_FATAL);
283		}
284		ar->strtab = st;
285		ar->strtab_size = entry_size;
286
287		if (*unconsumed) {
288			__archive_read_consume(a, *unconsumed);
289			*unconsumed = 0;
290		}
291
292		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
293			return (ARCHIVE_FATAL);
294		memcpy(st, b, entry_size);
295		__archive_read_consume(a, entry_size);
296		/* All contents are consumed. */
297		ar->entry_bytes_remaining = 0;
298		archive_entry_set_size(entry, ar->entry_bytes_remaining);
299
300		/* Parse the filename table. */
301		return (ar_parse_gnu_filename_table(a));
302	}
303
304	/*
305	 * GNU variant handles long filenames by storing /<number>
306	 * to indicate a name stored in the filename table.
307	 * XXX TODO: Verify that it's all digits... Don't be fooled
308	 * by "/9xyz" XXX
309	 */
310	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
311		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
312		/*
313		 * If we can't look up the real name, warn and return
314		 * the entry with the wrong name.
315		 */
316		if (ar->strtab == NULL || number >= ar->strtab_size) {
317			archive_set_error(&a->archive, EINVAL,
318			    "Can't find long filename for GNU/SVR4 archive entry");
319			archive_entry_copy_pathname(entry, filename);
320			/* Parse the time, owner, mode, size fields. */
321			ar_parse_common_header(ar, entry, h);
322			return (ARCHIVE_FATAL);
323		}
324
325		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
326		/* Parse the time, owner, mode, size fields. */
327		return (ar_parse_common_header(ar, entry, h));
328	}
329
330	/*
331	 * BSD handles long filenames by storing "#1/" followed by the
332	 * length of filename as a decimal number, then prepends the
333	 * the filename to the file contents.
334	 */
335	if (strncmp(filename, "#1/", 3) == 0) {
336		/* Parse the time, owner, mode, size fields. */
337		/* This must occur before _read_ahead is called again. */
338		ar_parse_common_header(ar, entry, h);
339
340		/* Parse the size of the name, adjust the file size. */
341		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
342		/* Sanity check the filename length:
343		 *   = Must be <= SIZE_MAX - 1
344		 *   = Must be <= 1MB
345		 *   = Cannot be bigger than the entire entry
346		 */
347		if (number > SIZE_MAX - 1
348		    || number > 1024 * 1024
349		    || (int64_t)number > ar->entry_bytes_remaining) {
350			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
351			    "Bad input file size");
352			return (ARCHIVE_FATAL);
353		}
354		bsd_name_length = (size_t)number;
355		ar->entry_bytes_remaining -= bsd_name_length;
356		/* Adjust file size reported to client. */
357		archive_entry_set_size(entry, ar->entry_bytes_remaining);
358
359		if (*unconsumed) {
360			__archive_read_consume(a, *unconsumed);
361			*unconsumed = 0;
362		}
363
364		/* Read the long name into memory. */
365		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
366			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
367			    "Truncated input file");
368			return (ARCHIVE_FATAL);
369		}
370		/* Store it in the entry. */
371		p = (char *)malloc(bsd_name_length + 1);
372		if (p == NULL) {
373			archive_set_error(&a->archive, ENOMEM,
374			    "Can't allocate fname buffer");
375			return (ARCHIVE_FATAL);
376		}
377		strncpy(p, b, bsd_name_length);
378		p[bsd_name_length] = '\0';
379
380		__archive_read_consume(a, bsd_name_length);
381
382		archive_entry_copy_pathname(entry, p);
383		free(p);
384		return (ARCHIVE_OK);
385	}
386
387	/*
388	 * "/" is the SVR4/GNU archive symbol table.
389	 * "/SYM64/" is the SVR4/GNU 64-bit variant archive symbol table.
390	 */
391	if (strcmp(filename, "/") == 0 || strcmp(filename, "/SYM64/") == 0) {
392		archive_entry_copy_pathname(entry, filename);
393		/* Parse the time, owner, mode, size fields. */
394		r = ar_parse_common_header(ar, entry, h);
395		/* Force the file type to a regular file. */
396		archive_entry_set_filetype(entry, AE_IFREG);
397		return (r);
398	}
399
400	/*
401	 * "__.SYMDEF" is a BSD archive symbol table.
402	 */
403	if (strcmp(filename, "__.SYMDEF") == 0) {
404		archive_entry_copy_pathname(entry, filename);
405		/* Parse the time, owner, mode, size fields. */
406		return (ar_parse_common_header(ar, entry, h));
407	}
408
409	/*
410	 * Otherwise, this is a standard entry.  The filename
411	 * has already been trimmed as much as possible, based
412	 * on our current knowledge of the format.
413	 */
414	archive_entry_copy_pathname(entry, filename);
415	return (ar_parse_common_header(ar, entry, h));
416}
417
418static int
419archive_read_format_ar_read_header(struct archive_read *a,
420    struct archive_entry *entry)
421{
422	struct ar *ar = (struct ar*)(a->format->data);
423	size_t unconsumed;
424	const void *header_data;
425	int ret;
426
427	if (!ar->read_global_header) {
428		/*
429		 * We are now at the beginning of the archive,
430		 * so we need first consume the ar global header.
431		 */
432		__archive_read_consume(a, 8);
433		ar->read_global_header = 1;
434		/* Set a default format code for now. */
435		a->archive.archive_format = ARCHIVE_FORMAT_AR;
436	}
437
438	/* Read the header for the next file entry. */
439	if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
440		/* Broken header. */
441		return (ARCHIVE_EOF);
442
443	unconsumed = 60;
444
445	ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
446
447	if (unconsumed)
448		__archive_read_consume(a, unconsumed);
449
450	return ret;
451}
452
453
454static int
455ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
456    const char *h)
457{
458	uint64_t n;
459
460	/* Copy remaining header */
461	archive_entry_set_filetype(entry, AE_IFREG);
462	archive_entry_set_mtime(entry,
463	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
464	archive_entry_set_uid(entry,
465	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
466	archive_entry_set_gid(entry,
467	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
468	archive_entry_set_mode(entry,
469	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
470	n = ar_atol10(h + AR_size_offset, AR_size_size);
471
472	ar->entry_offset = 0;
473	ar->entry_padding = n % 2;
474	archive_entry_set_size(entry, n);
475	ar->entry_bytes_remaining = n;
476	return (ARCHIVE_OK);
477}
478
479static int
480archive_read_format_ar_read_data(struct archive_read *a,
481    const void **buff, size_t *size, int64_t *offset)
482{
483	ssize_t bytes_read;
484	struct ar *ar;
485
486	ar = (struct ar *)(a->format->data);
487
488	if (ar->entry_bytes_unconsumed) {
489		__archive_read_consume(a, ar->entry_bytes_unconsumed);
490		ar->entry_bytes_unconsumed = 0;
491	}
492
493	if (ar->entry_bytes_remaining > 0) {
494		*buff = __archive_read_ahead(a, 1, &bytes_read);
495		if (bytes_read == 0) {
496			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
497			    "Truncated ar archive");
498			return (ARCHIVE_FATAL);
499		}
500		if (bytes_read < 0)
501			return (ARCHIVE_FATAL);
502		if (bytes_read > ar->entry_bytes_remaining)
503			bytes_read = (ssize_t)ar->entry_bytes_remaining;
504		*size = bytes_read;
505		ar->entry_bytes_unconsumed = bytes_read;
506		*offset = ar->entry_offset;
507		ar->entry_offset += bytes_read;
508		ar->entry_bytes_remaining -= bytes_read;
509		return (ARCHIVE_OK);
510	} else {
511		int64_t skipped = __archive_read_consume(a, ar->entry_padding);
512		if (skipped >= 0) {
513			ar->entry_padding -= skipped;
514		}
515		if (ar->entry_padding) {
516			if (skipped >= 0) {
517				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
518					"Truncated ar archive - failed consuming padding");
519			}
520			return (ARCHIVE_FATAL);
521		}
522		*buff = NULL;
523		*size = 0;
524		*offset = ar->entry_offset;
525		return (ARCHIVE_EOF);
526	}
527}
528
529static int
530archive_read_format_ar_skip(struct archive_read *a)
531{
532	int64_t bytes_skipped;
533	struct ar* ar;
534
535	ar = (struct ar *)(a->format->data);
536
537	bytes_skipped = __archive_read_consume(a,
538	    ar->entry_bytes_remaining + ar->entry_padding
539	    + ar->entry_bytes_unconsumed);
540	if (bytes_skipped < 0)
541		return (ARCHIVE_FATAL);
542
543	ar->entry_bytes_remaining = 0;
544	ar->entry_bytes_unconsumed = 0;
545	ar->entry_padding = 0;
546
547	return (ARCHIVE_OK);
548}
549
550static int
551ar_parse_gnu_filename_table(struct archive_read *a)
552{
553	struct ar *ar;
554	char *p;
555	size_t size;
556
557	ar = (struct ar*)(a->format->data);
558	size = ar->strtab_size;
559
560	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
561		if (*p == '/') {
562			*p++ = '\0';
563			if (*p != '\n')
564				goto bad_string_table;
565			*p = '\0';
566		}
567	}
568	/*
569	 * GNU ar always pads the table to an even size.
570	 * The pad character is either '\n' or '`'.
571	 */
572	if (p != ar->strtab + size && *p != '\n' && *p != '`')
573		goto bad_string_table;
574
575	/* Enforce zero termination. */
576	ar->strtab[size - 1] = '\0';
577
578	return (ARCHIVE_OK);
579
580bad_string_table:
581	archive_set_error(&a->archive, EINVAL,
582	    "Invalid string table");
583	free(ar->strtab);
584	ar->strtab = NULL;
585	return (ARCHIVE_FATAL);
586}
587
588static uint64_t
589ar_atol8(const char *p, unsigned char_cnt)
590{
591	uint64_t l, limit, last_digit_limit;
592	unsigned int digit, base;
593
594	base = 8;
595	limit = UINT64_MAX / base;
596	last_digit_limit = UINT64_MAX % base;
597
598	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
599		p++;
600
601	l = 0;
602	digit = *p - '0';
603	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
604		if (l>limit || (l == limit && digit > last_digit_limit)) {
605			l = UINT64_MAX; /* Truncate on overflow. */
606			break;
607		}
608		l = (l * base) + digit;
609		digit = *++p - '0';
610	}
611	return (l);
612}
613
614static uint64_t
615ar_atol10(const char *p, unsigned char_cnt)
616{
617	uint64_t l, limit, last_digit_limit;
618	unsigned int base, digit;
619
620	base = 10;
621	limit = UINT64_MAX / base;
622	last_digit_limit = UINT64_MAX % base;
623
624	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
625		p++;
626	l = 0;
627	digit = *p - '0';
628	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
629		if (l > limit || (l == limit && digit > last_digit_limit)) {
630			l = UINT64_MAX; /* Truncate on overflow. */
631			break;
632		}
633		l = (l * base) + digit;
634		digit = *++p - '0';
635	}
636	return (l);
637}
638