1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 Juniper Networks, Inc.
5 * Copyright (c) 2022-2024 Klara, Inc.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "opt_tarfs.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/buf.h>
34#include <sys/conf.h>
35#include <sys/fcntl.h>
36#include <sys/libkern.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mount.h>
41#include <sys/mutex.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/queue.h>
46#include <sys/sbuf.h>
47#include <sys/stat.h>
48#include <sys/uio.h>
49#include <sys/vnode.h>
50
51#include <vm/vm_param.h>
52
53#include <geom/geom.h>
54#include <geom/geom_vfs.h>
55
56#include <fs/tarfs/tarfs.h>
57#include <fs/tarfs/tarfs_dbg.h>
58
59CTASSERT(ZERO_REGION_SIZE >= TARFS_BLOCKSIZE);
60
61struct ustar_header {
62	char	name[100];		/* File name */
63	char	mode[8];		/* Mode flags */
64	char	uid[8];			/* User id */
65	char	gid[8];			/* Group id */
66	char	size[12];		/* Size */
67	char	mtime[12];		/* Modified time */
68	char	checksum[8];		/* Checksum */
69	char	typeflag[1];		/* Type */
70	char	linkname[100];		/* "old format" stops here */
71	char	magic[6];		/* POSIX UStar "ustar\0" indicator */
72	char	version[2];		/* POSIX UStar version "00" */
73	char	uname[32];		/* User name */
74	char	gname[32];		/* Group name */
75	char	major[8];		/* Device major number */
76	char	minor[8];		/* Device minor number */
77	char	prefix[155];		/* Path prefix */
78	char	_pad[12];
79};
80
81CTASSERT(sizeof(struct ustar_header) == TARFS_BLOCKSIZE);
82
83#define	TAR_EOF			((size_t)-1)
84
85#define	TAR_TYPE_FILE		'0'
86#define	TAR_TYPE_HARDLINK	'1'
87#define	TAR_TYPE_SYMLINK	'2'
88#define	TAR_TYPE_CHAR		'3'
89#define	TAR_TYPE_BLOCK		'4'
90#define	TAR_TYPE_DIRECTORY	'5'
91#define	TAR_TYPE_FIFO		'6'
92#define	TAR_TYPE_CONTIG		'7'
93#define	TAR_TYPE_GLOBAL_EXTHDR	'g'
94#define	TAR_TYPE_EXTHDR		'x'
95#define	TAR_TYPE_GNU_SPARSE	'S'
96
97#define	USTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
98#define	USTAR_VERSION		(uint8_t []){ '0', '0' }
99#define	GNUTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
100#define	GNUTAR_VERSION		(uint8_t []){ ' ', '\x0' }
101
102#define	DEFDIRMODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
103
104MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
105MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
106
107static vfs_mount_t	tarfs_mount;
108static vfs_unmount_t	tarfs_unmount;
109static vfs_root_t	tarfs_root;
110static vfs_statfs_t	tarfs_statfs;
111static vfs_fhtovp_t	tarfs_fhtovp;
112
113static const char *tarfs_opts[] = {
114	"as", "from", "gid", "mode", "uid", "verify",
115	NULL
116};
117
118/*
119 * Reads a len-width signed octal number from strp.  Returns 0 on success
120 * and non-zero on error.
121 */
122static int
123tarfs_str2octal(const char *strp, size_t len, int64_t *num)
124{
125	int64_t val;
126	size_t idx;
127	int sign;
128
129	idx = 0;
130	if (strp[idx] == '-') {
131		sign = -1;
132		idx++;
133	} else {
134		sign = 1;
135	}
136
137	val = 0;
138	for (; idx < len && strp[idx] != '\0' && strp[idx] != ' '; idx++) {
139		if (strp[idx] < '0' || strp[idx] > '7')
140			return (EINVAL);
141		val <<= 3;
142		val += strp[idx] - '0';
143		if (val > INT64_MAX / 8)
144			return (ERANGE);
145	}
146
147	*num = val * sign;
148	return (0);
149}
150
151/*
152 * Reads a len-byte extended numeric value from strp.  The first byte has
153 * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
154 * bytes that follow form a big-endian signed two's complement binary
155 * number.  Returns 0 on success and non-zero on error;
156 */
157static int
158tarfs_str2base256(const char *strp, size_t len, int64_t *num)
159{
160	int64_t val;
161	size_t idx;
162
163	KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
164
165	/* Sign-extend the first byte */
166	if ((strp[0] & 0x40) != 0)
167		val = (int64_t)-1;
168	else
169		val = 0;
170	val <<= 6;
171	val |= (strp[0] & 0x3f);
172
173	/* Read subsequent bytes */
174	for (idx = 1; idx < len; idx++) {
175		val <<= 8;
176		val |= (0xff & (int64_t)strp[idx]);
177		if (val > INT64_MAX / 256 || val < INT64_MIN / 256)
178			return (ERANGE);
179	}
180
181	*num = val;
182	return (0);
183}
184
185/*
186 * Read a len-byte numeric field from strp.  If bit 7 of the first byte it
187 * set, assume an extended numeric value (signed two's complement);
188 * otherwise, assume a signed octal value.
189 */
190static int
191tarfs_str2int64(const char *strp, size_t len, int64_t *num)
192{
193	if (len < 1)
194		return (EINVAL);
195	if ((strp[0] & 0x80) != 0)
196		return (tarfs_str2base256(strp, len, num));
197	return (tarfs_str2octal(strp, len, num));
198}
199
200/*
201 * Verifies the checksum of a header.  Returns true if the checksum is
202 * valid, false otherwise.
203 */
204static boolean_t
205tarfs_checksum(struct ustar_header *hdrp)
206{
207	const unsigned char *ptr;
208	int64_t checksum, hdrsum;
209
210	if (tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum), &hdrsum) != 0) {
211		TARFS_DPF(CHECKSUM, "%s: invalid header checksum \"%.*s\"\n",
212		    __func__, (int)sizeof(hdrp->checksum), hdrp->checksum);
213		return (false);
214	}
215	TARFS_DPF(CHECKSUM, "%s: header checksum \"%.*s\" = %#lo\n", __func__,
216	    (int)sizeof(hdrp->checksum), hdrp->checksum, hdrsum);
217
218	checksum = 0;
219	for (ptr = (const unsigned char *)hdrp;
220	     ptr < (const unsigned char *)hdrp->checksum; ptr++)
221		checksum += *ptr;
222	for (;
223	     ptr < (const unsigned char *)hdrp->typeflag; ptr++)
224		checksum += 0x20;
225	for (;
226	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
227		checksum += *ptr;
228	TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %#lo\n", __func__,
229	    checksum);
230	if (hdrsum == checksum)
231		return (true);
232
233	/*
234	 * Repeat test with signed bytes, some older formats use a broken
235	 * form of the calculation
236	 */
237	checksum = 0;
238	for (ptr = (const unsigned char *)hdrp;
239	     ptr < (const unsigned char *)&hdrp->checksum; ptr++)
240		checksum += *((const signed char *)ptr);
241	for (;
242	     ptr < (const unsigned char *)&hdrp->typeflag; ptr++)
243		checksum += 0x20;
244	for (;
245	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
246		checksum += *((const signed char *)ptr);
247	TARFS_DPF(CHECKSUM, "%s: calc signed checksum %#lo\n", __func__,
248	    checksum);
249	if (hdrsum == checksum)
250		return (true);
251
252	return (false);
253}
254
255
256/*
257 * Looks up a path in the tarfs node tree.
258 *
259 * - If the path exists, stores a pointer to the corresponding tarfs_node
260 *   in retnode and a pointer to its parent in retparent.
261 *
262 * - If the path does not exist, but create_dirs is true, creates ancestor
263 *   directories and returns NULL in retnode and the parent in retparent.
264 *
265 * - If the path does not exist and create_dirs is false, stops at the
266 *   first missing path name component.
267 *
268 * - In all cases, on return, endp and sepp point to the beginning and
269 *   end, respectively, of the last-processed path name component.
270 *
271 * - Returns 0 if the node was found, ENOENT if it was not, and some other
272 *   positive errno value on failure.
273 */
274static int
275tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
276    char **endp, char **sepp, struct tarfs_node **retparent,
277    struct tarfs_node **retnode, boolean_t create_dirs)
278{
279	struct componentname cn = { };
280	struct tarfs_node *parent, *tnp;
281	char *sep;
282	size_t len;
283	int error;
284	boolean_t do_lookup;
285
286	MPASS(name != NULL && namelen != 0);
287
288	do_lookup = true;
289	error = 0;
290	parent = tnp = tmp->root;
291	if (tnp == NULL)
292		panic("%s: root node not yet created", __func__);
293
294	TARFS_DPF(LOOKUP, "%s: full path: %.*s\n", __func__,
295	    (int)namelen, name);
296
297	sep = NULL;
298	for (;;) {
299		/* skip leading slash(es) */
300		while (name[0] == '/' && namelen > 0)
301			name++, namelen--;
302
303		/* did we reach the end? */
304		if (namelen == 0 || name[0] == '\0') {
305			name = do_lookup ? NULL : cn.cn_nameptr;
306			namelen = do_lookup ? 0 : cn.cn_namelen;
307			break;
308		}
309
310		/* we're not at the end, so we must be in a directory */
311		if (tnp != NULL && tnp->type != VDIR) {
312			TARFS_DPF(LOOKUP, "%s: %.*s is not a directory\n", __func__,
313			    (int)tnp->namelen, tnp->name);
314			error = ENOTDIR;
315			break;
316		}
317
318		/* locate the next separator */
319		for (sep = name, len = 0;
320		     *sep != '\0' && *sep != '/' && len < namelen;
321		     sep++, len++)
322			/* nothing */ ;
323
324		/* check for . and .. */
325		if (name[0] == '.' && len == 1) {
326			name += len;
327			namelen -= len;
328			continue;
329		}
330		if (name[0] == '.' && name[1] == '.' && len == 2) {
331			if (tnp == tmp->root) {
332				error = EINVAL;
333				break;
334			}
335			tnp = parent;
336			parent = tnp->parent;
337			cn.cn_nameptr = tnp->name;
338			cn.cn_namelen = tnp->namelen;
339			do_lookup = true;
340			TARFS_DPF(LOOKUP, "%s: back to %.*s/\n", __func__,
341			    (int)tnp->namelen, tnp->name);
342			name += len;
343			namelen -= len;
344			continue;
345		}
346
347		/* create parent if necessary */
348		if (!do_lookup) {
349			TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
350			    (int)cn.cn_namelen, cn.cn_nameptr);
351			error = tarfs_alloc_node(tmp, cn.cn_nameptr,
352			    cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
353			    DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
354			if (error != 0)
355				break;
356		}
357
358		parent = tnp;
359		tnp = NULL;
360		cn.cn_nameptr = name;
361		cn.cn_namelen = len;
362		TARFS_DPF(LOOKUP, "%s: looking up %.*s in %.*s/\n", __func__,
363		    (int)cn.cn_namelen, cn.cn_nameptr,
364		    (int)parent->namelen, parent->name);
365		if (do_lookup) {
366			tnp = tarfs_lookup_node(parent, NULL, &cn);
367			if (tnp == NULL) {
368				do_lookup = false;
369				if (!create_dirs) {
370					error = ENOENT;
371					break;
372				}
373			}
374		}
375		name += cn.cn_namelen;
376		namelen -= cn.cn_namelen;
377	}
378
379	TARFS_DPF(LOOKUP, "%s: parent %p node %p\n", __func__, parent, tnp);
380
381	if (retparent)
382		*retparent = parent;
383	if (retnode)
384		*retnode = tnp;
385	if (endp) {
386		if (namelen > 0)
387			*endp = name;
388		else
389			*endp = NULL;
390	}
391	if (sepp)
392		*sepp = sep;
393	return (error);
394}
395
396/*
397 * Frees a tarfs_mount structure and everything it references.
398 */
399static void
400tarfs_free_mount(struct tarfs_mount *tmp)
401{
402	struct mount *mp;
403	struct tarfs_node *tnp, *tnp_next;
404
405	MPASS(tmp != NULL);
406
407	TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
408
409	TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
410	TAILQ_FOREACH_SAFE(tnp, &tmp->allnodes, entries, tnp_next) {
411		tarfs_free_node(tnp);
412	}
413
414	(void)tarfs_io_fini(tmp);
415
416	TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
417	delete_unrhdr(tmp->ino_unr);
418	mp = tmp->vfs;
419	mp->mnt_data = NULL;
420
421	TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
422	free(tmp, M_TARFSMNT);
423}
424
425/*
426 * Processes the tar file header at block offset blknump and allocates and
427 * populates a tarfs_node structure for the file it describes.  Updated
428 * blknump to point to the next unread tar file block, or TAR_EOF if EOF
429 * is reached.  Returns 0 on success or EOF and a positive errno value on
430 * failure.
431 */
432static int
433tarfs_alloc_one(struct tarfs_mount *tmp, size_t *blknump)
434{
435	char block[TARFS_BLOCKSIZE];
436	struct ustar_header *hdrp = (struct ustar_header *)block;
437	struct sbuf *namebuf = NULL;
438	char *exthdr = NULL, *name = NULL, *link = NULL;
439	size_t blknum = *blknump;
440	int64_t num;
441	int endmarker = 0;
442	char *namep, *sep;
443	struct tarfs_node *parent, *tnp, *other;
444	size_t namelen = 0, linklen = 0, realsize = 0, sz;
445	ssize_t res;
446	dev_t rdev;
447	gid_t gid;
448	mode_t mode;
449	time_t mtime;
450	uid_t uid;
451	long major = -1, minor = -1;
452	unsigned int flags = 0;
453	int error;
454	boolean_t sparse = false;
455
456again:
457	/* read next header */
458	res = tarfs_io_read_buf(tmp, false, block,
459	    TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
460	if (res < 0) {
461		error = -res;
462		goto bad;
463	} else if (res < TARFS_BLOCKSIZE) {
464		goto eof;
465	}
466	blknum++;
467
468	/* check for end marker */
469	if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
470		if (endmarker++) {
471			if (exthdr != NULL) {
472				TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
473				    __func__, TARFS_BLOCKSIZE * (blknum - 1));
474				free(exthdr, M_TEMP);
475			}
476			TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
477			    TARFS_BLOCKSIZE * blknum);
478			tmp->nblocks = blknum;
479			*blknump = TAR_EOF;
480			return (0);
481		}
482		goto again;
483	}
484
485	/* verify magic */
486	if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
487	    memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
488		/* POSIX */
489	} else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
490	    memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
491		TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
492		    TARFS_BLOCKSIZE * (blknum - 1));
493		error = EFTYPE;
494		goto bad;
495	} else {
496		TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
497		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
498		error = EINVAL;
499		goto bad;
500	}
501
502	/* verify checksum */
503	if (!tarfs_checksum(hdrp)) {
504		TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
505		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
506		error = EINVAL;
507		goto bad;
508	}
509
510	/* get standard attributes */
511	if (tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode), &num) != 0 ||
512	    num < 0 || num > (S_IFMT|ALLPERMS)) {
513		TARFS_DPF(ALLOC, "%s: invalid file mode at %zu\n",
514		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
515		mode = S_IRUSR;
516	} else {
517		mode = num & ALLPERMS;
518	}
519	if (tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid), &num) != 0 ||
520	    num < 0 || num > UID_MAX) {
521		TARFS_DPF(ALLOC, "%s: invalid UID at %zu\n",
522		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
523		uid = tmp->root->uid;
524		mode &= ~S_ISUID;
525	} else {
526		uid = num;
527	}
528	if (tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid), &num) != 0 ||
529	    num < 0 || num > GID_MAX) {
530		TARFS_DPF(ALLOC, "%s: invalid GID at %zu\n",
531		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
532		gid = tmp->root->gid;
533		mode &= ~S_ISGID;
534	} else {
535		gid = num;
536	}
537	if (tarfs_str2int64(hdrp->size, sizeof(hdrp->size), &num) != 0 ||
538	    num < 0) {
539		TARFS_DPF(ALLOC, "%s: invalid size at %zu\n",
540		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
541		error = EINVAL;
542		goto bad;
543	}
544	sz = num;
545	if (tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime), &num) != 0) {
546		TARFS_DPF(ALLOC, "%s: invalid modification time at %zu\n",
547		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
548		error = EINVAL;
549		goto bad;
550	}
551	mtime = num;
552	rdev = NODEV;
553	TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
554	    hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
555
556	/* global extended header? */
557	if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
558		TARFS_DPF(ALLOC, "%s: %zu-byte global extended header at %zu\n",
559		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
560		goto skip;
561	}
562
563	/* extended header? */
564	if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
565		if (exthdr != NULL) {
566			TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
567			    __func__, TARFS_BLOCKSIZE * (blknum - 1));
568			error = EFTYPE;
569			goto bad;
570		}
571		/* read the contents of the exthdr */
572		TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zu\n",
573		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
574		exthdr = malloc(sz, M_TEMP, M_WAITOK);
575		res = tarfs_io_read_buf(tmp, false, exthdr,
576		    TARFS_BLOCKSIZE * blknum, sz);
577		if (res < 0) {
578			error = -res;
579			goto bad;
580		}
581		if (res < sz) {
582			goto eof;
583		}
584		blknum += TARFS_SZ2BLKS(res);
585		/* XXX TODO: refactor this parser */
586		char *line = exthdr;
587		while (line < exthdr + sz) {
588			char *eol, *key, *value, *sep;
589			size_t len = strtoul(line, &sep, 10);
590			if (len == 0 || sep == line || *sep != ' ') {
591				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
592				    __func__);
593				error = EINVAL;
594				goto bad;
595			}
596			if ((uintptr_t)line + len < (uintptr_t)line ||
597			    line + len > exthdr + sz) {
598				TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
599				    __func__);
600				error = EINVAL;
601				goto bad;
602			}
603			eol = line + len - 1;
604			*eol = '\0';
605			line += len;
606			key = sep + 1;
607			sep = strchr(key, '=');
608			if (sep == NULL) {
609				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
610				    __func__);
611				error = EINVAL;
612				goto bad;
613			}
614			*sep = '\0';
615			value = sep + 1;
616			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
617			    key, value);
618			if (strcmp(key, "path") == 0) {
619				name = value;
620				namelen = eol - value;
621			} else if (strcmp(key, "linkpath") == 0) {
622				link = value;
623				linklen = eol - value;
624			} else if (strcmp(key, "GNU.sparse.major") == 0) {
625				sparse = true;
626				major = strtol(value, &sep, 10);
627				if (sep != eol) {
628					printf("exthdr syntax error\n");
629					error = EINVAL;
630					goto bad;
631				}
632			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
633				sparse = true;
634				minor = strtol(value, &sep, 10);
635				if (sep != eol) {
636					printf("exthdr syntax error\n");
637					error = EINVAL;
638					goto bad;
639				}
640			} else if (strcmp(key, "GNU.sparse.name") == 0) {
641				sparse = true;
642				name = value;
643				namelen = eol - value;
644				if (namelen == 0) {
645					printf("exthdr syntax error\n");
646					error = EINVAL;
647					goto bad;
648				}
649			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
650				sparse = true;
651				realsize = strtoul(value, &sep, 10);
652				if (sep != eol) {
653					printf("exthdr syntax error\n");
654					error = EINVAL;
655					goto bad;
656				}
657			} else if (strcmp(key, "SCHILY.fflags") == 0) {
658				flags |= tarfs_strtofflags(value, &sep);
659				if (sep != eol) {
660					printf("exthdr syntax error\n");
661					error = EINVAL;
662					goto bad;
663				}
664			}
665		}
666		goto again;
667	}
668
669	/* sparse file consistency checks */
670	if (sparse) {
671		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
672		    name, major, minor, realsize);
673		if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
674		    hdrp->typeflag[0] != TAR_TYPE_FILE) {
675			TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
676			error = EINVAL;
677			goto bad;
678		}
679	}
680
681	/* file name */
682	if (name == NULL) {
683		if (hdrp->prefix[0] != '\0') {
684			namebuf = sbuf_new_auto();
685			sbuf_printf(namebuf, "%.*s/%.*s",
686			    (int)sizeof(hdrp->prefix), hdrp->prefix,
687			    (int)sizeof(hdrp->name), hdrp->name);
688			sbuf_finish(namebuf);
689			name = sbuf_data(namebuf);
690			namelen = sbuf_len(namebuf);
691		} else {
692			name = hdrp->name;
693			namelen = strnlen(hdrp->name, sizeof(hdrp->name));
694		}
695	}
696
697	error = tarfs_lookup_path(tmp, name, namelen, &namep,
698	    &sep, &parent, &tnp, true);
699	if (error != 0) {
700		TARFS_DPF(ALLOC, "%s: failed to look up %.*s\n", __func__,
701		    (int)namelen, name);
702		error = EINVAL;
703		goto bad;
704	}
705	if (tnp != NULL) {
706		if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
707			/* XXX set attributes? */
708			goto skip;
709		}
710		TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
711		    (int)namelen, name);
712		error = EINVAL;
713		goto bad;
714	}
715	switch (hdrp->typeflag[0]) {
716	case TAR_TYPE_DIRECTORY:
717		error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
718		    0, 0, mtime, uid, gid, mode, flags, NULL, 0,
719		    parent, &tnp);
720		break;
721	case TAR_TYPE_FILE:
722		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
723		    blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
724		    flags, NULL, 0, parent, &tnp);
725		if (error == 0 && sparse) {
726			error = tarfs_load_blockmap(tnp, realsize);
727		}
728		break;
729	case TAR_TYPE_HARDLINK:
730		if (link == NULL) {
731			link = hdrp->linkname;
732			linklen = strnlen(link, sizeof(hdrp->linkname));
733		}
734		if (linklen == 0) {
735			TARFS_DPF(ALLOC, "%s: %.*s: link without target\n",
736			    __func__, (int)namelen, name);
737			error = EINVAL;
738			goto bad;
739		}
740		error = tarfs_lookup_path(tmp, link, linklen, NULL,
741		    NULL, NULL, &other, false);
742		if (error != 0 || other == NULL ||
743		    other->type != VREG || other->other != NULL) {
744			TARFS_DPF(ALLOC, "%s: %.*s: invalid link to %.*s\n",
745			    __func__, (int)namelen, name, (int)linklen, link);
746			error = EINVAL;
747			goto bad;
748		}
749		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
750		    0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
751		if (error == 0) {
752			tnp->other = other;
753			tnp->other->nlink++;
754		}
755		break;
756	case TAR_TYPE_SYMLINK:
757		if (link == NULL) {
758			link = hdrp->linkname;
759			linklen = strnlen(link, sizeof(hdrp->linkname));
760		}
761		if (linklen == 0) {
762			TARFS_DPF(ALLOC, "%s: %.*s: link without target\n",
763			    __func__, (int)namelen, name);
764			error = EINVAL;
765			goto bad;
766		}
767		error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
768		    0, linklen, mtime, uid, gid, mode, flags, link, 0,
769		    parent, &tnp);
770		break;
771	case TAR_TYPE_BLOCK:
772		if (tarfs_str2int64(hdrp->major, sizeof(hdrp->major), &num) != 0 ||
773		    num < 0 || num > INT_MAX) {
774			TARFS_DPF(ALLOC, "%s: %.*s: invalid device major\n",
775			    __func__, (int)namelen, name);
776			error = EINVAL;
777			goto bad;
778		}
779		major = num;
780		if (tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor), &num) != 0 ||
781		    num < 0 || num > INT_MAX) {
782			TARFS_DPF(ALLOC, "%s: %.*s: invalid device minor\n",
783			    __func__, (int)namelen, name);
784			error = EINVAL;
785			goto bad;
786		}
787		minor = num;
788		rdev = makedev(major, minor);
789		error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
790		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
791		    parent, &tnp);
792		break;
793	case TAR_TYPE_CHAR:
794		if (tarfs_str2int64(hdrp->major, sizeof(hdrp->major), &num) != 0 ||
795		    num < 0 || num > INT_MAX) {
796			TARFS_DPF(ALLOC, "%s: %.*s: invalid device major\n",
797			    __func__, (int)namelen, name);
798			error = EINVAL;
799			goto bad;
800		}
801		major = num;
802		if (tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor), &num) != 0 ||
803		    num < 0 || num > INT_MAX) {
804			TARFS_DPF(ALLOC, "%s: %.*s: invalid device minor\n",
805			    __func__, (int)namelen, name);
806			error = EINVAL;
807			goto bad;
808		}
809		minor = num;
810		rdev = makedev(major, minor);
811		error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
812		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
813		    parent, &tnp);
814		break;
815	default:
816		TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
817		    __func__, hdrp->typeflag[0], (int)namelen, name);
818		error = EINVAL;
819		break;
820	}
821	if (error != 0)
822		goto bad;
823
824skip:
825	blknum += TARFS_SZ2BLKS(sz);
826	tmp->nblocks = blknum;
827	*blknump = blknum;
828	if (exthdr != NULL) {
829		free(exthdr, M_TEMP);
830	}
831	if (namebuf != NULL) {
832		sbuf_delete(namebuf);
833	}
834	return (0);
835eof:
836	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
837	error = EIO;
838	goto bad;
839bad:
840	if (exthdr != NULL) {
841		free(exthdr, M_TEMP);
842	}
843	if (namebuf != NULL) {
844		sbuf_delete(namebuf);
845	}
846	return (error);
847}
848
849/*
850 * Allocates and populates the metadata structures for the tar file
851 * referenced by vp.  On success, a pointer to the tarfs_mount structure
852 * is stored in tmpp.  Returns 0 on success or a positive errno value on
853 * failure.
854 */
855static int
856tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
857    uid_t root_uid, gid_t root_gid, mode_t root_mode,
858    struct tarfs_mount **tmpp)
859{
860	struct vattr va;
861	struct thread *td = curthread;
862	struct tarfs_mount *tmp;
863	struct tarfs_node *root;
864	size_t blknum;
865	time_t mtime;
866	int error;
867
868	KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
869	ASSERT_VOP_LOCKED(vp, __func__);
870
871	tmp = NULL;
872
873	TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
874	    __func__, vp);
875
876	/* Get source metadata */
877	error = VOP_GETATTR(vp, &va, td->td_ucred);
878	if (error != 0) {
879		return (error);
880	}
881	VOP_UNLOCK(vp);
882	mtime = va.va_mtime.tv_sec;
883
884	mp->mnt_iosize_max = vp->v_mount->mnt_iosize_max;
885
886	/* Allocate and initialize tarfs mount structure */
887	tmp = malloc(sizeof(*tmp), M_TARFSMNT, M_WAITOK | M_ZERO);
888	TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
889	mp->mnt_data = tmp;
890
891	mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
892	    MTX_DEF);
893	TAILQ_INIT(&tmp->allnodes);
894	tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
895	tmp->vp = vp;
896	tmp->vfs = mp;
897	tmp->mtime = mtime;
898
899	/* Initialize I/O layer */
900	tmp->iosize = 1U << tarfs_ioshift;
901	error = tarfs_io_init(tmp);
902	if (error != 0)
903		goto bad;
904
905	error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
906	    root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
907	if (error != 0 || root == NULL)
908		goto bad;
909	tmp->root = root;
910
911	blknum = 0;
912	do {
913		if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
914			printf("unsupported or corrupt tar file at %zu\n",
915			    TARFS_BLOCKSIZE * blknum);
916			goto bad;
917		}
918	} while (blknum != TAR_EOF);
919
920	*tmpp = tmp;
921
922	TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
923	return (0);
924
925bad:
926	tarfs_free_mount(tmp);
927	return (error);
928}
929
930/*
931 * VFS Operations.
932 */
933
934static int
935tarfs_mount(struct mount *mp)
936{
937	struct nameidata nd;
938	struct vattr va;
939	struct tarfs_mount *tmp = NULL;
940	struct thread *td = curthread;
941	struct vnode *vp;
942	char *as, *from;
943	uid_t root_uid;
944	gid_t root_gid;
945	mode_t root_mode;
946	int error, flags, aslen, len;
947
948	if (mp->mnt_flag & MNT_UPDATE)
949		return (EOPNOTSUPP);
950
951	if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
952		return (EINVAL);
953
954	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
955	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
956	VOP_UNLOCK(mp->mnt_vnodecovered);
957	if (error)
958		return (error);
959
960	if (mp->mnt_cred->cr_ruid != 0 ||
961	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
962		root_gid = va.va_gid;
963	if (mp->mnt_cred->cr_ruid != 0 ||
964	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
965		root_uid = va.va_uid;
966	if (mp->mnt_cred->cr_ruid != 0 ||
967	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
968		root_mode = va.va_mode;
969
970	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
971	if (error != 0 || from[len - 1] != '\0')
972		return (EINVAL);
973	error = vfs_getopt(mp->mnt_optnew, "as", (void **)&as, &aslen);
974	if (error != 0 || as[aslen - 1] != '\0')
975		as = from;
976
977	/* Find the source tarball */
978	TARFS_DPF(FS, "%s(%s%s%s, uid=%u, gid=%u, mode=%o)\n", __func__,
979	    from, (as != from) ? " as " : "", (as != from) ? as : "",
980	    root_uid, root_gid, root_mode);
981	flags = FREAD;
982	if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
983	    flags |= O_VERIFY;
984	}
985	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
986	error = namei(&nd);
987	if (error != 0)
988		return (error);
989	NDFREE_PNBUF(&nd);
990	vp = nd.ni_vp;
991	TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
992	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
993	/* vp is now held and locked */
994
995	/* Open the source tarball */
996	error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
997	if (error != 0) {
998		TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
999		    from, error);
1000		vput(vp);
1001		goto bad;
1002	}
1003	TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
1004	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1005	if (vp->v_type != VREG) {
1006		TARFS_DPF(FS, "%s: not a regular file\n", __func__);
1007		error = EOPNOTSUPP;
1008		goto bad_open_locked;
1009	}
1010	error = priv_check(td, PRIV_VFS_MOUNT_PERM);
1011	if (error != 0) {
1012		TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
1013		goto bad_open_locked;
1014	}
1015	if (flags & O_VERIFY) {
1016		mp->mnt_flag |= MNT_VERIFIED;
1017	}
1018
1019	/* Allocate the tarfs mount */
1020	error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
1021	/* vp is now held but unlocked */
1022	if (error != 0) {
1023		TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
1024		    from, error);
1025		goto bad_open_unlocked;
1026	}
1027	TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
1028	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1029
1030	/* Unconditionally mount as read-only */
1031	MNT_ILOCK(mp);
1032	mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
1033	MNT_IUNLOCK(mp);
1034
1035	vfs_getnewfsid(mp);
1036	vfs_mountedfrom(mp, as);
1037	TARFS_DPF(FS, "%s: success\n", __func__);
1038
1039	return (0);
1040
1041bad_open_locked:
1042	/* vp must be held and locked */
1043	TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
1044	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1045	VOP_UNLOCK(vp);
1046bad_open_unlocked:
1047	/* vp must be held and unlocked */
1048	TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
1049	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1050	(void)vn_close(vp, flags, td->td_ucred, td);
1051bad:
1052	/* vp must be released and unlocked */
1053	TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
1054	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1055	return (error);
1056}
1057
1058/*
1059 * Unmounts a tarfs filesystem.
1060 */
1061static int
1062tarfs_unmount(struct mount *mp, int mntflags)
1063{
1064	struct thread *td = curthread;
1065	struct tarfs_mount *tmp;
1066	struct vnode *vp;
1067	int error;
1068	int flags = 0;
1069
1070	TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
1071
1072	/* Handle forced unmounts */
1073	if (mntflags & MNT_FORCE)
1074		flags |= FORCECLOSE;
1075
1076	/* Finalize all pending I/O */
1077	error = vflush(mp, 0, flags, curthread);
1078	if (error != 0)
1079		return (error);
1080	tmp = MP_TO_TARFS_MOUNT(mp);
1081	vp = tmp->vp;
1082
1083	MPASS(vp != NULL);
1084	TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
1085	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1086	vn_close(vp, FREAD, td->td_ucred, td);
1087	TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
1088	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1089	tarfs_free_mount(tmp);
1090
1091	return (0);
1092}
1093
1094/*
1095 * Gets the root of a tarfs filesystem.  Returns 0 on success or a
1096 * positive errno value on failure.
1097 */
1098static int
1099tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
1100{
1101	struct vnode *nvp;
1102	int error;
1103
1104	TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
1105
1106	error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
1107	if (error != 0)
1108		return (error);
1109
1110	nvp->v_vflag |= VV_ROOT;
1111	*vpp = nvp;
1112	return (0);
1113}
1114
1115/*
1116 * Gets statistics for a tarfs filesystem.  Returns 0.
1117 */
1118static int
1119tarfs_statfs(struct mount *mp, struct statfs *sbp)
1120{
1121	struct tarfs_mount *tmp;
1122
1123	tmp = MP_TO_TARFS_MOUNT(mp);
1124
1125	sbp->f_bsize = TARFS_BLOCKSIZE;
1126	sbp->f_iosize = tmp->iosize;
1127	sbp->f_blocks = tmp->nblocks;
1128	sbp->f_bfree = 0;
1129	sbp->f_bavail = 0;
1130	sbp->f_files = tmp->nfiles;
1131	sbp->f_ffree = 0;
1132
1133	return (0);
1134}
1135
1136/*
1137 * Gets a vnode for the given inode.  On success, a pointer to the vnode
1138 * is stored in vpp.  Returns 0 on success or a positive errno value on
1139 * failure.
1140 */
1141static int
1142tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
1143{
1144	struct tarfs_mount *tmp;
1145	struct tarfs_node *tnp;
1146	struct thread *td;
1147	struct vnode *vp;
1148	int error;
1149
1150	TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
1151	    lkflags);
1152
1153	td = curthread;
1154	error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
1155	if (error != 0)
1156		return (error);
1157
1158	if (*vpp != NULL) {
1159		TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
1160		return (error);
1161	}
1162
1163	TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
1164
1165	tmp = MP_TO_TARFS_MOUNT(mp);
1166
1167	if (ino == TARFS_ZIOINO) {
1168		error = vget(tmp->znode, lkflags);
1169		if (error != 0)
1170			return (error);
1171		*vpp = tmp->znode;
1172		return (0);
1173	}
1174
1175	/* XXX Should use hash instead? */
1176	TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
1177		if (tnp->ino == ino)
1178			break;
1179	}
1180	TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
1181	if (tnp == NULL)
1182		return (ENOENT);
1183
1184	(void)getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
1185	TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
1186	vp->v_data = tnp;
1187	vp->v_type = tnp->type;
1188	tnp->vnode = vp;
1189
1190	lockmgr(vp->v_vnlock, lkflags, NULL);
1191	error = insmntque(vp, mp);
1192	if (error != 0)
1193		goto bad;
1194	TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
1195	error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
1196	if (error != 0 || *vpp != NULL)
1197		return (error);
1198
1199	vn_set_state(vp, VSTATE_CONSTRUCTED);
1200	*vpp = vp;
1201	return (0);
1202
1203bad:
1204	*vpp = NULLVP;
1205	return (error);
1206}
1207
1208static int
1209tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1210{
1211	struct tarfs_node *tnp;
1212	struct tarfs_fid *tfp;
1213	struct vnode *nvp;
1214	int error;
1215
1216	tfp = (struct tarfs_fid *)fhp;
1217	MP_TO_TARFS_MOUNT(mp);
1218	if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
1219		return (ESTALE);
1220
1221	error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
1222	if (error != 0) {
1223		*vpp = NULLVP;
1224		return (error);
1225	}
1226	tnp = VP_TO_TARFS_NODE(nvp);
1227	if (tnp->mode == 0 ||
1228	    tnp->gen != tfp->gen ||
1229	    tnp->nlink <= 0) {
1230		vput(nvp);
1231		*vpp = NULLVP;
1232		return (ESTALE);
1233	}
1234	*vpp = nvp;
1235	return (0);
1236}
1237
1238static struct vfsops tarfs_vfsops = {
1239	.vfs_fhtovp =	tarfs_fhtovp,
1240	.vfs_mount =	tarfs_mount,
1241	.vfs_root =	tarfs_root,
1242	.vfs_statfs =	tarfs_statfs,
1243	.vfs_unmount =	tarfs_unmount,
1244	.vfs_vget =	tarfs_vget,
1245};
1246VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
1247MODULE_VERSION(tarfs, 1);
1248MODULE_DEPEND(tarfs, xz, 1, 1, 1);
1249