vfs_mount.c revision 138461
1/*
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 138461 2004-12-06 16:39:05Z phk $");
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/cons.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/malloc.h>
47#include <sys/mount.h>
48#include <sys/mutex.h>
49#include <sys/namei.h>
50#include <sys/proc.h>
51#include <sys/filedesc.h>
52#include <sys/reboot.h>
53#include <sys/sysproto.h>
54#include <sys/sx.h>
55#include <sys/sysctl.h>
56#include <sys/sysent.h>
57#include <sys/systm.h>
58#include <sys/vnode.h>
59
60#include <geom/geom.h>
61
62#include <machine/stdarg.h>
63
64#include "opt_rootdevname.h"
65#include "opt_ddb.h"
66#include "opt_mac.h"
67
68#ifdef DDB
69#include <ddb/ddb.h>
70#endif
71
72#define	ROOTNAME		"root_device"
73#define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
74
75static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
76static struct cdev *getdiskbyname(char *_name);
77static void	gets(char *cp);
78static int	vfs_domount(struct thread *td, const char *fstype,
79		    char *fspath, int fsflags, void *fsdata, int compat);
80static int	vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
81		    const char *fspath, struct thread *td, struct mount **mpp);
82static int	vfs_mountroot_ask(void);
83static int	vfs_mountroot_try(const char *mountfrom);
84static int	vfs_donmount(struct thread *td, int fsflags,
85		    struct uio *fsoptions);
86
87static int	usermount = 0;
88SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
89    "Unprivileged users may mount and unmount file systems");
90
91MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
92
93/* List of mounted filesystems. */
94struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
95
96/* For any iteration/modification of mountlist */
97struct mtx mountlist_mtx;
98
99TAILQ_HEAD(vfsoptlist, vfsopt);
100struct vfsopt {
101	TAILQ_ENTRY(vfsopt) link;
102	char	*name;
103	void	*value;
104	int	len;
105};
106
107/*
108 * The vnode of the system's root (/ in the filesystem, without chroot
109 * active.)
110 */
111struct vnode	*rootvnode;
112
113/*
114 * The root filesystem is detailed in the kernel environment variable
115 * vfs.root.mountfrom, which is expected to be in the general format
116 *
117 * <vfsname>:[<path>]
118 * vfsname   := the name of a VFS known to the kernel and capable
119 *              of being mounted as root
120 * path      := disk device name or other data used by the filesystem
121 *              to locate its physical store
122 */
123
124/*
125 * The root specifiers we will try if RB_CDROM is specified.
126 */
127static char *cdrom_rootdevnames[] = {
128	"cd9660:cd0",
129	"cd9660:acd0",
130	NULL
131};
132
133/* legacy find-root code */
134char		*rootdevnames[2] = {NULL, NULL};
135struct cdev *rootdev = NULL;
136#ifdef ROOTDEVNAME
137const char	*ctrootdevname = ROOTDEVNAME;
138#else
139const char	*ctrootdevname = NULL;
140#endif
141
142/* Remove one mount option. */
143static void
144vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
145{
146
147	TAILQ_REMOVE(opts, opt, link);
148	free(opt->name, M_MOUNT);
149	if (opt->value != NULL)
150		free(opt->value, M_MOUNT);
151#ifdef INVARIANTS
152	else if (opt->len != 0)
153		panic("%s: mount option with NULL value but length != 0",
154		    __func__);
155#endif
156	free(opt, M_MOUNT);
157}
158
159/* Release all resources related to the mount options. */
160static void
161vfs_freeopts(struct vfsoptlist *opts)
162{
163	struct vfsopt *opt;
164
165	while (!TAILQ_EMPTY(opts)) {
166		opt = TAILQ_FIRST(opts);
167		vfs_freeopt(opts, opt);
168	}
169	free(opts, M_MOUNT);
170}
171
172/*
173 * Check if options are equal (with or without the "no" prefix).
174 */
175static int
176vfs_equalopts(const char *opt1, const char *opt2)
177{
178
179	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
180	if (strcmp(opt1, opt2) == 0)
181		return (1);
182	/* "noopt" vs. "opt" */
183	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
184		return (1);
185	/* "opt" vs. "noopt" */
186	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
187		return (1);
188	return (0);
189}
190
191/*
192 * If a mount option is specified several times,
193 * (with or without the "no" prefix) only keep
194 * the last occurence of it.
195 */
196static void
197vfs_sanitizeopts(struct vfsoptlist *opts)
198{
199	struct vfsopt *opt, *opt2, *tmp;
200
201	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
202		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
203		while (opt2 != NULL) {
204			if (vfs_equalopts(opt->name, opt2->name)) {
205				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
206				vfs_freeopt(opts, opt2);
207				opt2 = tmp;
208			} else {
209				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
210			}
211		}
212	}
213}
214
215/*
216 * Build a linked list of mount options from a struct uio.
217 */
218static int
219vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
220{
221	struct vfsoptlist *opts;
222	struct vfsopt *opt;
223	size_t memused;
224	unsigned int i, iovcnt;
225	int error, namelen, optlen;
226
227	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
228	TAILQ_INIT(opts);
229	memused = 0;
230	iovcnt = auio->uio_iovcnt;
231	for (i = 0; i < iovcnt; i += 2) {
232		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
233		namelen = auio->uio_iov[i].iov_len;
234		optlen = auio->uio_iov[i + 1].iov_len;
235		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
236		opt->value = NULL;
237		opt->len = 0;
238
239		/*
240		 * Do this early, so jumps to "bad" will free the current
241		 * option.
242		 */
243		TAILQ_INSERT_TAIL(opts, opt, link);
244		memused += sizeof(struct vfsopt) + optlen + namelen;
245
246		/*
247		 * Avoid consuming too much memory, and attempts to overflow
248		 * memused.
249		 */
250		if (memused > VFS_MOUNTARG_SIZE_MAX ||
251		    optlen > VFS_MOUNTARG_SIZE_MAX ||
252		    namelen > VFS_MOUNTARG_SIZE_MAX) {
253			error = EINVAL;
254			goto bad;
255		}
256
257		if (auio->uio_segflg == UIO_SYSSPACE) {
258			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
259		} else {
260			error = copyin(auio->uio_iov[i].iov_base, opt->name,
261			    namelen);
262			if (error)
263				goto bad;
264		}
265		/* Ensure names are null-terminated strings. */
266		if (opt->name[namelen - 1] != '\0') {
267			error = EINVAL;
268			goto bad;
269		}
270		if (optlen != 0) {
271			opt->len = optlen;
272			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
273			if (auio->uio_segflg == UIO_SYSSPACE) {
274				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
275				    optlen);
276			} else {
277				error = copyin(auio->uio_iov[i + 1].iov_base,
278				    opt->value, optlen);
279				if (error)
280					goto bad;
281			}
282		}
283	}
284	vfs_sanitizeopts(opts);
285	*options = opts;
286	return (0);
287bad:
288	vfs_freeopts(opts);
289	return (error);
290}
291
292/*
293 * Merge the old mount options with the new ones passed
294 * in the MNT_UPDATE case.
295 */
296static void
297vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
298{
299	struct vfsopt *opt, *opt2, *new;
300
301	TAILQ_FOREACH(opt, opts, link) {
302		/*
303		 * Check that this option hasn't been redefined
304		 * nor cancelled with a "no" mount option.
305		 */
306		opt2 = TAILQ_FIRST(toopts);
307		while (opt2 != NULL) {
308			if (strcmp(opt2->name, opt->name) == 0)
309				goto next;
310			if (strncmp(opt2->name, "no", 2) == 0 &&
311			    strcmp(opt2->name + 2, opt->name) == 0) {
312				vfs_freeopt(toopts, opt2);
313				goto next;
314			}
315			opt2 = TAILQ_NEXT(opt2, link);
316		}
317		/* We want this option, duplicate it. */
318		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
319		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
320		strcpy(new->name, opt->name);
321		if (opt->len != 0) {
322			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
323			bcopy(opt->value, new->value, opt->len);
324		} else {
325			new->value = NULL;
326		}
327		new->len = opt->len;
328		TAILQ_INSERT_TAIL(toopts, new, link);
329next:
330		continue;
331	}
332}
333
334/*
335 * New mount API.
336 */
337int
338nmount(td, uap)
339	struct thread *td;
340	struct nmount_args /* {
341		struct iovec *iovp;
342		unsigned int iovcnt;
343		int flags;
344	} */ *uap;
345{
346	struct uio *auio;
347	struct iovec *iov;
348	unsigned int i;
349	int error;
350	u_int iovcnt;
351
352	/* Kick out MNT_ROOTFS early as it is legal internally */
353	if (uap->flags & MNT_ROOTFS)
354		return (EINVAL);
355
356	iovcnt = uap->iovcnt;
357	/*
358	 * Check that we have an even number of iovec's
359	 * and that we have at least two options.
360	 */
361	if ((iovcnt & 1) || (iovcnt < 4))
362		return (EINVAL);
363
364	error = copyinuio(uap->iovp, iovcnt, &auio);
365	if (error)
366		return (error);
367	iov = auio->uio_iov;
368	for (i = 0; i < iovcnt; i++) {
369		if (iov->iov_len > MMAXOPTIONLEN) {
370			free(auio, M_IOV);
371			return (EINVAL);
372		}
373		iov++;
374	}
375	error = vfs_donmount(td, uap->flags, auio);
376	free(auio, M_IOV);
377	return (error);
378}
379
380/*
381 * Allocate and initialize the mount point struct.
382 */
383static int
384vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
385    const char *fspath, struct thread *td, struct mount **mpp)
386{
387	struct mount *mp;
388
389	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
390	TAILQ_INIT(&mp->mnt_nvnodelist);
391	mp->mnt_nvnodelistsize = 0;
392	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
393	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
394	vfs_busy(mp, LK_NOWAIT, 0, td);
395	mp->mnt_op = vfsp->vfc_vfsops;
396	mp->mnt_vfc = vfsp;
397	vfsp->vfc_refcount++;
398	mp->mnt_stat.f_type = vfsp->vfc_typenum;
399	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
400	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
401	mp->mnt_vnodecovered = vp;
402	mp->mnt_cred = crdup(td->td_ucred);
403	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
404	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
405	mp->mnt_iosize_max = DFLTPHYS;
406#ifdef MAC
407	mac_init_mount(mp);
408	mac_create_mount(td->td_ucred, mp);
409#endif
410	*mpp = mp;
411	return (0);
412}
413
414/*
415 * Destroy the mount struct previously allocated by vfs_mount_alloc().
416 */
417void
418vfs_mount_destroy(struct mount *mp, struct thread *td)
419{
420
421	mp->mnt_vfc->vfc_refcount--;
422	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
423		panic("unmount: dangling vnode");
424	vfs_unbusy(mp,td);
425	lockdestroy(&mp->mnt_lock);
426	mtx_destroy(&mp->mnt_mtx);
427	if (mp->mnt_kern_flag & MNTK_MWAIT)
428		wakeup(mp);
429#ifdef MAC
430	mac_destroy_mount(mp);
431#endif
432	if (mp->mnt_opt != NULL)
433		vfs_freeopts(mp->mnt_opt);
434	crfree(mp->mnt_cred);
435	free(mp, M_MOUNT);
436}
437
438static int
439vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
440{
441	struct vfsoptlist *optlist;
442	char *fstype, *fspath;
443	int error, fstypelen, fspathlen;
444
445	error = vfs_buildopts(fsoptions, &optlist);
446	if (error)
447		return (error);
448
449	/*
450	 * We need these two options before the others,
451	 * and they are mandatory for any filesystem.
452	 * Ensure they are NUL terminated as well.
453	 */
454	fstypelen = 0;
455	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
456	if (error || fstype[fstypelen - 1] != '\0') {
457		error = EINVAL;
458		goto bail;
459	}
460	fspathlen = 0;
461	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
462	if (error || fspath[fspathlen - 1] != '\0') {
463		error = EINVAL;
464		goto bail;
465	}
466
467	/*
468	 * Be ultra-paranoid about making sure the type and fspath
469	 * variables will fit in our mp buffers, including the
470	 * terminating NUL.
471	 */
472	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
473		error = ENAMETOOLONG;
474		goto bail;
475	}
476
477	mtx_lock(&Giant);
478	error = vfs_domount(td, fstype, fspath, fsflags, optlist, 0);
479	mtx_unlock(&Giant);
480bail:
481	if (error)
482		vfs_freeopts(optlist);
483	return (error);
484}
485
486/*
487 * Old mount API.
488 */
489#ifndef _SYS_SYSPROTO_H_
490struct mount_args {
491	char	*type;
492	char	*path;
493	int	flags;
494	caddr_t	data;
495};
496#endif
497/* ARGSUSED */
498int
499mount(td, uap)
500	struct thread *td;
501	struct mount_args /* {
502		char *type;
503		char *path;
504		int flags;
505		caddr_t data;
506	} */ *uap;
507{
508	char *fstype;
509	char *fspath;
510	struct vfsconf *vfsp;
511	struct mntarg *ma = NULL;
512	int error;
513
514	/* Kick out MNT_ROOTFS early as it is legal internally */
515	if (uap->flags & MNT_ROOTFS)
516		return (EINVAL);
517
518	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
519
520	/*
521	 * vfs_mount() actually takes a kernel string for `type' and
522	 * `path' now, so extract them.
523	 */
524	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
525	mtx_lock(&Giant); /* XXX ? */
526	vfsp = vfs_byname_kld(fstype, td, &error);
527	mtx_unlock(&Giant); /* XXX ? */
528	if (vfsp == NULL) {
529		free(fstype, M_TEMP);
530		return (ENOENT);
531	}
532	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
533	error = copyinstr(uap->path, fspath, MNAMELEN, NULL);
534	if (error == 0 && vfsp->vfc_vfsops->vfs_cmount != NULL) {
535		ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
536		ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
537		ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
538		ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
539		ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
540		error = vfsp->vfc_vfsops->vfs_cmount(
541		    ma, uap->data, uap->flags, td);
542	} else if (error == 0) {
543		mtx_lock(&Giant);
544		error = vfs_domount(td, fstype, fspath,
545		    uap->flags, uap->data, 1);
546		mtx_unlock(&Giant);
547	}
548	free(fstype, M_TEMP);
549	free(fspath, M_TEMP);
550	return (error);
551}
552
553/*
554 * vfs_domount(): actually attempt a filesystem mount.
555 */
556static int
557vfs_domount(
558	struct thread *td,	/* Flags common to all filesystems. */
559	const char *fstype,	/* Filesystem type. */
560	char *fspath,		/* Mount path. */
561	int fsflags,		/* Flags common to all filesystems. */
562	void *fsdata,		/* Options local to the filesystem. */
563	int compat		/* Invocation from compat syscall. */
564	)
565{
566	struct vnode *vp;
567	struct mount *mp;
568	struct vfsconf *vfsp;
569	int error, flag = 0, kern_flag = 0;
570	struct vattr va;
571	struct nameidata nd;
572
573	mtx_assert(&Giant, MA_OWNED);
574
575	/*
576	 * Be ultra-paranoid about making sure the type and fspath
577	 * variables will fit in our mp buffers, including the
578	 * terminating NUL.
579	 */
580	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
581		return (ENAMETOOLONG);
582
583	if (jailed(td->td_ucred))
584		return (EPERM);
585	if (usermount == 0) {
586		if ((error = suser(td)) != 0)
587			return (error);
588	}
589
590	/*
591	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
592	 */
593	if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
594		if ((error = suser(td)) != 0)
595			return (error);
596	}
597	/*
598	 * Silently enforce MNT_NOSUID and MNT_USER for
599	 * unprivileged users.
600	 */
601	if (suser(td) != 0)
602		fsflags |= MNT_NOSUID | MNT_USER;
603	/*
604	 * Get vnode to be covered
605	 */
606	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
607	if ((error = namei(&nd)) != 0)
608		return (error);
609	NDFREE(&nd, NDF_ONLY_PNBUF);
610	vp = nd.ni_vp;
611	if (fsflags & MNT_UPDATE) {
612		if ((vp->v_vflag & VV_ROOT) == 0) {
613			vput(vp);
614			return (EINVAL);
615		}
616		mp = vp->v_mount;
617		flag = mp->mnt_flag;
618		kern_flag = mp->mnt_kern_flag;
619		/*
620		 * We only allow the filesystem to be reloaded if it
621		 * is currently mounted read-only.
622		 */
623		if ((fsflags & MNT_RELOAD) &&
624		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
625			vput(vp);
626			return (EOPNOTSUPP);	/* Needs translation */
627		}
628		/*
629		 * Only privileged root, or (if MNT_USER is set) the user that
630		 * did the original mount is permitted to update it.
631		 */
632		error = vfs_suser(mp, td);
633		if (error) {
634			vput(vp);
635			return (error);
636		}
637		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
638			vput(vp);
639			return (EBUSY);
640		}
641		VI_LOCK(vp);
642		if ((vp->v_iflag & VI_MOUNT) != 0 ||
643		    vp->v_mountedhere != NULL) {
644			VI_UNLOCK(vp);
645			vfs_unbusy(mp, td);
646			vput(vp);
647			return (EBUSY);
648		}
649		vp->v_iflag |= VI_MOUNT;
650		VI_UNLOCK(vp);
651		mp->mnt_flag |= fsflags &
652		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
653		VOP_UNLOCK(vp, 0, td);
654		if (compat == 0) {
655			mp->mnt_optnew = fsdata;
656			vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
657		}
658	} else {
659		/*
660		 * If the user is not root, ensure that they own the directory
661		 * onto which we are attempting to mount.
662		 */
663		error = VOP_GETATTR(vp, &va, td->td_ucred, td);
664		if (error) {
665			vput(vp);
666			return (error);
667		}
668		if (va.va_uid != td->td_ucred->cr_uid) {
669			if ((error = suser(td)) != 0) {
670				vput(vp);
671				return (error);
672			}
673		}
674		if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
675			vput(vp);
676			return (error);
677		}
678		if (vp->v_type != VDIR) {
679			vput(vp);
680			return (ENOTDIR);
681		}
682		vfsp = vfs_byname_kld(fstype, td, &error);
683		if (vfsp == NULL) {
684			vput(vp);
685			return (error);
686		}
687		VI_LOCK(vp);
688		if ((vp->v_iflag & VI_MOUNT) != 0 ||
689		    vp->v_mountedhere != NULL) {
690			VI_UNLOCK(vp);
691			vput(vp);
692			return (EBUSY);
693		}
694		vp->v_iflag |= VI_MOUNT;
695		VI_UNLOCK(vp);
696
697		/*
698		 * Allocate and initialize the filesystem.
699		 */
700		error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
701		if (error) {
702			vput(vp);
703			return (error);
704		}
705		VOP_UNLOCK(vp, 0, td);
706
707		/* XXXMAC: pass to vfs_mount_alloc? */
708		if (compat == 0)
709			mp->mnt_optnew = fsdata;
710	}
711	/*
712	 * Check if the fs implements the type VFS_[O]MOUNT()
713	 * function we are looking for.
714	 */
715	if ((compat && (mp->mnt_op->vfs_omount == NULL)) ||
716	    (!compat && (mp->mnt_op->vfs_mount == NULL))) {
717		printf("%s doesn't support the %s mount syscall\n",
718		    mp->mnt_vfc->vfc_name, compat ? "old" : "new");
719		VI_LOCK(vp);
720		vp->v_iflag &= ~VI_MOUNT;
721		VI_UNLOCK(vp);
722		if (mp->mnt_flag & MNT_UPDATE)
723			vfs_unbusy(mp, td);
724		else
725			vfs_mount_destroy(mp, td);
726		vrele(vp);
727		return (EOPNOTSUPP);
728	}
729
730	/*
731	 * Set the mount level flags.
732	 */
733	if (fsflags & MNT_RDONLY)
734		mp->mnt_flag |= MNT_RDONLY;
735	else if (mp->mnt_flag & MNT_RDONLY)
736		mp->mnt_kern_flag |= MNTK_WANTRDWR;
737	mp->mnt_flag &=~ MNT_UPDATEMASK;
738	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
739	/*
740	 * Mount the filesystem.
741	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
742	 * get.  No freeing of cn_pnbuf.
743	 */
744	if (compat)
745	    error = VFS_OMOUNT(mp, fspath, fsdata, td);
746	else
747	    error = VFS_MOUNT(mp, td);
748	if (!error) {
749		if (mp->mnt_opt != NULL)
750			vfs_freeopts(mp->mnt_opt);
751		mp->mnt_opt = mp->mnt_optnew;
752	}
753	/*
754	 * Prevent external consumers of mount options from reading
755	 * mnt_optnew.
756	*/
757	mp->mnt_optnew = NULL;
758	if (mp->mnt_flag & MNT_UPDATE) {
759		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
760			mp->mnt_flag &= ~MNT_RDONLY;
761		mp->mnt_flag &=
762		    ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
763		mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
764		if (error) {
765			mp->mnt_flag = flag;
766			mp->mnt_kern_flag = kern_flag;
767		}
768		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
769			if (mp->mnt_syncer == NULL)
770				error = vfs_allocate_syncvnode(mp);
771		} else {
772			if (mp->mnt_syncer != NULL)
773				vrele(mp->mnt_syncer);
774			mp->mnt_syncer = NULL;
775		}
776		vfs_unbusy(mp, td);
777		VI_LOCK(vp);
778		vp->v_iflag &= ~VI_MOUNT;
779		VI_UNLOCK(vp);
780		vrele(vp);
781		return (error);
782	}
783	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
784	/*
785	 * Put the new filesystem on the mount list after root.
786	 */
787	cache_purge(vp);
788	if (!error) {
789		struct vnode *newdp;
790
791		VI_LOCK(vp);
792		vp->v_iflag &= ~VI_MOUNT;
793		VI_UNLOCK(vp);
794		vp->v_mountedhere = mp;
795		mtx_lock(&mountlist_mtx);
796		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
797		mtx_unlock(&mountlist_mtx);
798		vfs_event_signal(NULL, VQ_MOUNT, 0);
799		if (VFS_ROOT(mp, &newdp, td))
800			panic("mount: lost mount");
801		checkdirs(vp, newdp);
802		vput(newdp);
803		VOP_UNLOCK(vp, 0, td);
804		if ((mp->mnt_flag & MNT_RDONLY) == 0)
805			error = vfs_allocate_syncvnode(mp);
806		vfs_unbusy(mp, td);
807		if (error || (error = VFS_START(mp, 0, td)) != 0)
808			vrele(vp);
809	} else {
810		VI_LOCK(vp);
811		vp->v_iflag &= ~VI_MOUNT;
812		VI_UNLOCK(vp);
813		vfs_mount_destroy(mp, td);
814		vput(vp);
815	}
816	return (error);
817}
818
819/*
820 * Scan all active processes to see if any of them have a current
821 * or root directory of `olddp'. If so, replace them with the new
822 * mount point.
823 */
824static void
825checkdirs(olddp, newdp)
826	struct vnode *olddp, *newdp;
827{
828	struct filedesc *fdp;
829	struct proc *p;
830	int nrele;
831
832	if (vrefcnt(olddp) == 1)
833		return;
834	sx_slock(&allproc_lock);
835	LIST_FOREACH(p, &allproc, p_list) {
836		mtx_lock(&fdesc_mtx);
837		fdp = p->p_fd;
838		if (fdp == NULL) {
839			mtx_unlock(&fdesc_mtx);
840			continue;
841		}
842		nrele = 0;
843		FILEDESC_LOCK_FAST(fdp);
844		if (fdp->fd_cdir == olddp) {
845			vref(newdp);
846			fdp->fd_cdir = newdp;
847			nrele++;
848		}
849		if (fdp->fd_rdir == olddp) {
850			vref(newdp);
851			fdp->fd_rdir = newdp;
852			nrele++;
853		}
854		FILEDESC_UNLOCK_FAST(fdp);
855		mtx_unlock(&fdesc_mtx);
856		while (nrele--)
857			vrele(olddp);
858	}
859	sx_sunlock(&allproc_lock);
860	if (rootvnode == olddp) {
861		vrele(rootvnode);
862		vref(newdp);
863		rootvnode = newdp;
864	}
865}
866
867/*
868 * Unmount a filesystem.
869 *
870 * Note: unmount takes a path to the vnode mounted on as argument,
871 * not special file (as before).
872 */
873#ifndef _SYS_SYSPROTO_H_
874struct unmount_args {
875	char	*path;
876	int	flags;
877};
878#endif
879/* ARGSUSED */
880int
881unmount(td, uap)
882	struct thread *td;
883	register struct unmount_args /* {
884		char *path;
885		int flags;
886	} */ *uap;
887{
888	struct mount *mp;
889	char *pathbuf;
890	int error, id0, id1;
891
892	if (jailed(td->td_ucred))
893		return (EPERM);
894	if (usermount == 0) {
895		if ((error = suser(td)) != 0)
896			return (error);
897	}
898
899	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
900	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
901	if (error) {
902		free(pathbuf, M_TEMP);
903		return (error);
904	}
905	if (uap->flags & MNT_BYFSID) {
906		/* Decode the filesystem ID. */
907		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
908			free(pathbuf, M_TEMP);
909			return (EINVAL);
910		}
911
912		mtx_lock(&mountlist_mtx);
913		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
914			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
915			    mp->mnt_stat.f_fsid.val[1] == id1)
916				break;
917		}
918		mtx_unlock(&mountlist_mtx);
919	} else {
920		mtx_lock(&mountlist_mtx);
921		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
922			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
923				break;
924		}
925		mtx_unlock(&mountlist_mtx);
926	}
927	free(pathbuf, M_TEMP);
928	if (mp == NULL) {
929		/*
930		 * Previously we returned ENOENT for a nonexistent path and
931		 * EINVAL for a non-mountpoint.  We cannot tell these apart
932		 * now, so in the !MNT_BYFSID case return the more likely
933		 * EINVAL for compatibility.
934		 */
935		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
936	}
937
938	/*
939	 * Only privileged root, or (if MNT_USER is set) the user that did the
940	 * original mount is permitted to unmount this filesystem.
941	 */
942	error = vfs_suser(mp, td);
943	if (error)
944		return (error);
945
946	/*
947	 * Don't allow unmounting the root filesystem.
948	 */
949	if (mp->mnt_flag & MNT_ROOTFS)
950		return (EINVAL);
951	mtx_lock(&Giant);
952	error = dounmount(mp, uap->flags, td);
953	mtx_unlock(&Giant);
954	return (error);
955}
956
957/*
958 * Do the actual filesystem unmount.
959 */
960int
961dounmount(mp, flags, td)
962	struct mount *mp;
963	int flags;
964	struct thread *td;
965{
966	struct vnode *coveredvp, *fsrootvp;
967	int error;
968	int async_flag;
969
970	mtx_assert(&Giant, MA_OWNED);
971
972	mtx_lock(&mountlist_mtx);
973	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
974		mtx_unlock(&mountlist_mtx);
975		return (EBUSY);
976	}
977	mp->mnt_kern_flag |= MNTK_UNMOUNT;
978	/* Allow filesystems to detect that a forced unmount is in progress. */
979	if (flags & MNT_FORCE)
980		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
981	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
982	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
983	if (error) {
984		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
985		if (mp->mnt_kern_flag & MNTK_MWAIT)
986			wakeup(mp);
987		return (error);
988	}
989	vn_start_write(NULL, &mp, V_WAIT);
990
991	if (mp->mnt_flag & MNT_EXPUBLIC)
992		vfs_setpublicfs(NULL, NULL, NULL);
993
994	vfs_msync(mp, MNT_WAIT);
995	async_flag = mp->mnt_flag & MNT_ASYNC;
996	mp->mnt_flag &= ~MNT_ASYNC;
997	cache_purgevfs(mp);	/* remove cache entries for this file sys */
998	if (mp->mnt_syncer != NULL)
999		vrele(mp->mnt_syncer);
1000	/*
1001	 * For forced unmounts, move process cdir/rdir refs on the fs root
1002	 * vnode to the covered vnode.  For non-forced unmounts we want
1003	 * such references to cause an EBUSY error.
1004	 */
1005	if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
1006		if (mp->mnt_vnodecovered != NULL)
1007			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1008		if (fsrootvp == rootvnode) {
1009			vrele(rootvnode);
1010			rootvnode = NULL;
1011		}
1012		vput(fsrootvp);
1013	}
1014	if (((mp->mnt_flag & MNT_RDONLY) ||
1015	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1016	    (flags & MNT_FORCE)) {
1017		error = VFS_UNMOUNT(mp, flags, td);
1018	}
1019	vn_finished_write(mp);
1020	if (error) {
1021		/* Undo cdir/rdir and rootvnode changes made above. */
1022		if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
1023			if (mp->mnt_vnodecovered != NULL)
1024				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1025			if (rootvnode == NULL) {
1026				rootvnode = fsrootvp;
1027				vref(rootvnode);
1028			}
1029			vput(fsrootvp);
1030		}
1031		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1032			(void) vfs_allocate_syncvnode(mp);
1033		mtx_lock(&mountlist_mtx);
1034		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1035		mp->mnt_flag |= async_flag;
1036		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1037		    &mountlist_mtx, td);
1038		if (mp->mnt_kern_flag & MNTK_MWAIT)
1039			wakeup(mp);
1040		return (error);
1041	}
1042	mtx_lock(&mountlist_mtx);
1043	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1044	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1045		coveredvp->v_mountedhere = NULL;
1046	mtx_unlock(&mountlist_mtx);
1047	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1048	vfs_mount_destroy(mp, td);
1049	if (coveredvp != NULL)
1050		vrele(coveredvp);
1051	return (0);
1052}
1053
1054/*
1055 * Find and mount the root filesystem
1056 */
1057void
1058vfs_mountroot(void)
1059{
1060	char *cp;
1061	int error, i, asked = 0;
1062
1063
1064	/*
1065	 * Wait for GEOM to settle down
1066	 */
1067	DROP_GIANT();
1068	g_waitidle();
1069	PICKUP_GIANT();
1070
1071	/*
1072	 * We are booted with instructions to prompt for the root filesystem.
1073	 */
1074	if (boothowto & RB_ASKNAME) {
1075		if (!vfs_mountroot_ask())
1076			return;
1077		asked = 1;
1078	}
1079
1080	/*
1081	 * The root filesystem information is compiled in, and we are
1082	 * booted with instructions to use it.
1083	 */
1084	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1085		if (!vfs_mountroot_try(ctrootdevname))
1086			return;
1087		ctrootdevname = NULL;
1088	}
1089
1090	/*
1091	 * We've been given the generic "use CDROM as root" flag.  This is
1092	 * necessary because one media may be used in many different
1093	 * devices, so we need to search for them.
1094	 */
1095	if (boothowto & RB_CDROM) {
1096		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1097			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1098				return;
1099		}
1100	}
1101
1102	/*
1103	 * Try to use the value read by the loader from /etc/fstab, or
1104	 * supplied via some other means.  This is the preferred
1105	 * mechanism.
1106	 */
1107	cp = getenv("vfs.root.mountfrom");
1108	if (cp != NULL) {
1109		error = vfs_mountroot_try(cp);
1110		freeenv(cp);
1111		if (!error)
1112			return;
1113	}
1114
1115	/*
1116	 * Try values that may have been computed by code during boot
1117	 */
1118	if (!vfs_mountroot_try(rootdevnames[0]))
1119		return;
1120	if (!vfs_mountroot_try(rootdevnames[1]))
1121		return;
1122
1123	/*
1124	 * If we (still) have a compiled-in default, try it.
1125	 */
1126	if (ctrootdevname != NULL)
1127		if (!vfs_mountroot_try(ctrootdevname))
1128			return;
1129
1130	/*
1131	 * Everything so far has failed, prompt on the console if we haven't
1132	 * already tried that.
1133	 */
1134	if (!asked)
1135		if (!vfs_mountroot_ask())
1136			return;
1137	panic("Root mount failed, startup aborted.");
1138}
1139
1140/*
1141 * Mount (mountfrom) as the root filesystem.
1142 */
1143static int
1144vfs_mountroot_try(const char *mountfrom)
1145{
1146        struct mount	*mp;
1147	struct thread	*td = curthread;
1148	struct vfsconf	*vfsp;
1149	char		*vfsname, *path;
1150	int		error;
1151	char		patt[32];
1152	int		s;
1153
1154	vfsname = NULL;
1155	path    = NULL;
1156	mp      = NULL;
1157	error   = EINVAL;
1158
1159	if (mountfrom == NULL)
1160		return (error);		/* don't complain */
1161
1162	s = splcam();			/* Overkill, but annoying without it */
1163	printf("Trying to mount root from %s\n", mountfrom);
1164	splx(s);
1165
1166	/* parse vfs name and path */
1167	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1168	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1169	vfsname[0] = path[0] = 0;
1170	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1171	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1172		goto done;
1173
1174	if (path[0] == '\0')
1175		strcpy(path, ROOTNAME);
1176
1177	vfsp = vfs_byname(vfsname);
1178	if (vfsp == NULL) {
1179		printf("Can't find filesystem \"%s\"\n", vfsname);
1180		goto done;
1181	}
1182	error = vfs_mount_alloc(NULLVP, vfsp, "/", td, &mp);
1183	if (error) {
1184		printf("Could not alloc mountpoint\n");
1185		goto done;
1186	}
1187
1188	mp->mnt_flag |= MNT_RDONLY | MNT_ROOTFS;
1189
1190	strlcpy(mp->mnt_stat.f_mntfromname, path, MNAMELEN);
1191
1192	/*
1193	 * do our best to set rootdev
1194	 * XXX: This does not belong here!
1195	 */
1196	if (path[0] != '\0') {
1197		struct cdev *diskdev;
1198		diskdev = getdiskbyname(path);
1199		if (diskdev != NULL)
1200			rootdev = diskdev;
1201		else
1202			printf("setrootbyname failed\n");
1203	}
1204
1205	error = VFS_OMOUNT(mp, path, NULL, curthread);
1206
1207done:
1208	if (vfsname != NULL)
1209		free(vfsname, M_MOUNT);
1210	if (path != NULL)
1211		free(path, M_MOUNT);
1212	if (error != 0) {
1213		if (mp != NULL)
1214			vfs_mount_destroy(mp, curthread);
1215		printf("Root mount failed: %d\n", error);
1216	} else {
1217
1218		/* register with list of mounted filesystems */
1219		mtx_lock(&mountlist_mtx);
1220		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1221		mtx_unlock(&mountlist_mtx);
1222
1223		/* sanity check system clock against root fs timestamp */
1224		inittodr(mp->mnt_time);
1225		vfs_unbusy(mp, curthread);
1226		error = VFS_START(mp, 0, curthread);
1227	}
1228	return (error);
1229}
1230
1231/*
1232 * Spin prompting on the console for a suitable root filesystem
1233 */
1234static int
1235vfs_mountroot_ask(void)
1236{
1237	char name[128];
1238
1239	for(;;) {
1240		printf("\nManual root filesystem specification:\n");
1241		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1242#if defined(__i386__) || defined(__ia64__)
1243		printf("                       eg. ufs:da0s1a\n");
1244#else
1245		printf("                       eg. ufs:/dev/da0a\n");
1246#endif
1247		printf("  ?                  List valid disk boot devices\n");
1248		printf("  <empty line>       Abort manual input\n");
1249		printf("\nmountroot> ");
1250		gets(name);
1251		if (name[0] == '\0')
1252			return (1);
1253		if (name[0] == '?') {
1254			printf("\nList of GEOM managed disk devices:\n  ");
1255			g_dev_print();
1256			continue;
1257		}
1258		if (!vfs_mountroot_try(name))
1259			return (0);
1260	}
1261}
1262
1263/*
1264 * Local helper function for vfs_mountroot_ask.
1265 */
1266static void
1267gets(char *cp)
1268{
1269	char *lp;
1270	int c;
1271
1272	lp = cp;
1273	for (;;) {
1274		printf("%c", c = cngetc() & 0177);
1275		switch (c) {
1276		case -1:
1277		case '\n':
1278		case '\r':
1279			*lp++ = '\0';
1280			return;
1281		case '\b':
1282		case '\177':
1283			if (lp > cp) {
1284				printf(" \b");
1285				lp--;
1286			}
1287			continue;
1288		case '#':
1289			lp--;
1290			if (lp < cp)
1291				lp = cp;
1292			continue;
1293		case '@':
1294		case 'u' & 037:
1295			lp = cp;
1296			printf("%c", '\n');
1297			continue;
1298		default:
1299			*lp++ = c;
1300		}
1301	}
1302}
1303
1304/*
1305 * Convert a given name to the cdev pointer of the device, which is probably
1306 * but not by definition, a disk.  Mount a DEVFS (on nothing), look the name
1307 * up, extract the cdev from the vnode and unmount it again.  Unfortunately
1308 * we cannot use the vnode directly (because we unmount the DEVFS again)
1309 * so the filesystems still have to do the bdevvp() stunt.
1310 */
1311static struct cdev *
1312getdiskbyname(char *name)
1313{
1314	char *cp = name;
1315	struct cdev *dev = NULL;
1316	struct thread *td = curthread;
1317	struct vfsconf *vfsp;
1318	struct mount *mp = NULL;
1319	struct vnode *vroot = NULL;
1320	struct nameidata nid;
1321	int error;
1322
1323	if (!bcmp(cp, "/dev/", 5))
1324		cp += 5;
1325
1326	do {
1327		vfsp = vfs_byname("devfs");
1328		if (vfsp == NULL)
1329			break;
1330		error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
1331		if (error)
1332			break;
1333		mp->mnt_flag |= MNT_RDONLY;
1334
1335		error = VFS_MOUNT(mp, curthread);
1336		if (error)
1337			break;
1338		VFS_START(mp, 0, td);
1339		VFS_ROOT(mp, &vroot, td);
1340		VOP_UNLOCK(vroot, 0, td);
1341
1342		NDINIT(&nid, LOOKUP, NOCACHE|FOLLOW,
1343		    UIO_SYSSPACE, cp, curthread);
1344		nid.ni_startdir = vroot;
1345		nid.ni_pathlen = strlen(cp);
1346		nid.ni_cnd.cn_cred = curthread->td_ucred;
1347		nid.ni_cnd.cn_nameptr = cp;
1348
1349		error = lookup(&nid);
1350		if (error)
1351			break;
1352		if (nid.ni_vp->v_type != VCHR)
1353			dev = NULL;
1354		else
1355			dev = nid.ni_vp->v_rdev;
1356		NDFREE(&nid, 0);
1357	} while (0);
1358
1359	if (vroot != NULL)
1360		VFS_UNMOUNT(mp, 0, td);
1361	if (mp != NULL)
1362		vfs_mount_destroy(mp, td);
1363  	return (dev);
1364}
1365
1366/* Show the struct cdev *for a disk specified by name */
1367#ifdef DDB
1368DB_SHOW_COMMAND(disk, db_getdiskbyname)
1369{
1370	struct cdev *dev;
1371
1372	if (modif[0] == '\0') {
1373		db_error("usage: show disk/devicename");
1374		return;
1375	}
1376	dev = getdiskbyname(modif);
1377	if (dev != NULL)
1378		db_printf("struct cdev *= %p\n", dev);
1379	else
1380		db_printf("No disk device matched.\n");
1381}
1382#endif
1383
1384/*
1385 * Get a mount option by its name.
1386 *
1387 * Return 0 if the option was found, ENOENT otherwise.
1388 * If len is non-NULL it will be filled with the length
1389 * of the option. If buf is non-NULL, it will be filled
1390 * with the address of the option.
1391 */
1392int
1393vfs_getopt(opts, name, buf, len)
1394	struct vfsoptlist *opts;
1395	const char *name;
1396	void **buf;
1397	int *len;
1398{
1399	struct vfsopt *opt;
1400
1401	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1402
1403	TAILQ_FOREACH(opt, opts, link) {
1404		if (strcmp(name, opt->name) == 0) {
1405			if (len != NULL)
1406				*len = opt->len;
1407			if (buf != NULL)
1408				*buf = opt->value;
1409			return (0);
1410		}
1411	}
1412	return (ENOENT);
1413}
1414
1415/*
1416 * Find and copy a mount option.
1417 *
1418 * The size of the buffer has to be specified
1419 * in len, if it is not the same length as the
1420 * mount option, EINVAL is returned.
1421 * Returns ENOENT if the option is not found.
1422 */
1423int
1424vfs_copyopt(opts, name, dest, len)
1425	struct vfsoptlist *opts;
1426	const char *name;
1427	void *dest;
1428	int len;
1429{
1430	struct vfsopt *opt;
1431
1432	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1433
1434	TAILQ_FOREACH(opt, opts, link) {
1435		if (strcmp(name, opt->name) == 0) {
1436			if (len != opt->len)
1437				return (EINVAL);
1438			bcopy(opt->value, dest, opt->len);
1439			return (0);
1440		}
1441	}
1442	return (ENOENT);
1443}
1444
1445
1446/*
1447 * This is a helper function for filesystems to traverse their
1448 * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
1449 */
1450
1451struct vnode *
1452__mnt_vnode_next(struct vnode **nvp, struct mount *mp)
1453{
1454	struct vnode *vp;
1455
1456	mtx_assert(&mp->mnt_mtx, MA_OWNED);
1457
1458	vp = *nvp;
1459	/* Check if we are done */
1460	if (vp == NULL)
1461		return (NULL);
1462	/* If our next vnode is no longer ours, start over */
1463	if (vp->v_mount != mp)
1464		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1465	/* Save pointer to next vnode in list */
1466	if (vp != NULL)
1467		*nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1468	else
1469		*nvp = NULL;
1470	return (vp);
1471}
1472
1473int
1474__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
1475{
1476	int error;
1477
1478	error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
1479	if (sbp != &mp->mnt_stat)
1480		memcpy(sbp, &mp->mnt_stat, sizeof sbp);
1481	return (error);
1482}
1483
1484/*
1485 * ---------------------------------------------------------------------
1486 * This is the api for building mount args and mounting filesystems from
1487 * inside the kernel.
1488 *
1489 * The API works by accumulation of individual args.  First error is
1490 * latched.
1491 *
1492 * XXX: should be documented in new manpage kernel_mount(9)
1493 */
1494
1495/* A memory allocation which must be freed when we are done */
1496struct mntaarg {
1497	SLIST_ENTRY(mntaarg)	next;
1498};
1499
1500/* The header for the mount arguments */
1501struct mntarg {
1502	struct iovec *v;
1503	int len;
1504	int error;
1505	SLIST_HEAD(, mntaarg)	list;
1506};
1507
1508/*
1509 * Add a boolean argument.
1510 *
1511 * flag is the boolean value.
1512 * name must start with "no".
1513 */
1514struct mntarg *
1515mount_argb(struct mntarg *ma, int flag, const char *name)
1516{
1517
1518	KASSERT(name[0] == 'n' && name[1] == 'o',
1519	    ("mount_argb(...,%s): name must start with 'no'", name));
1520
1521	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
1522}
1523
1524/*
1525 * Add an argument printf style
1526 */
1527struct mntarg *
1528mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
1529{
1530	va_list ap;
1531	struct mntaarg *maa;
1532	struct sbuf *sb;
1533	int len;
1534
1535	if (ma == NULL) {
1536		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1537		SLIST_INIT(&ma->list);
1538	}
1539	if (ma->error)
1540		return (ma);
1541
1542	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1543	    M_MOUNT, M_WAITOK);
1544	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1545	ma->v[ma->len].iov_len = strlen(name) + 1;
1546	ma->len++;
1547
1548	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1549	va_start(ap, fmt);
1550	sbuf_vprintf(sb, fmt, ap);
1551	va_end(ap);
1552	sbuf_finish(sb);
1553	len = sbuf_len(sb) + 1;
1554	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1555	SLIST_INSERT_HEAD(&ma->list, maa, next);
1556	bcopy(sbuf_data(sb), maa + 1, len);
1557	sbuf_delete(sb);
1558
1559	ma->v[ma->len].iov_base = maa + 1;
1560	ma->v[ma->len].iov_len = len;
1561	ma->len++;
1562
1563	return (ma);
1564}
1565
1566/*
1567 * Add an argument which is a userland string.
1568 */
1569struct mntarg *
1570mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
1571{
1572	struct mntaarg *maa;
1573	char *tbuf;
1574
1575	if (val == NULL)
1576		return (ma);
1577	if (ma == NULL) {
1578		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1579		SLIST_INIT(&ma->list);
1580	}
1581	if (ma->error)
1582		return (ma);
1583	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1584	SLIST_INSERT_HEAD(&ma->list, maa, next);
1585	tbuf = (void *)(maa + 1);
1586	ma->error = copyinstr(val, tbuf, len, NULL);
1587	return (mount_arg(ma, name, tbuf, -1));
1588}
1589
1590/*
1591 * Plain argument.
1592 *
1593 * If length is -1, use printf.
1594 */
1595struct mntarg *
1596mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
1597{
1598
1599	if (ma == NULL) {
1600		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1601		SLIST_INIT(&ma->list);
1602	}
1603	if (ma->error)
1604		return (ma);
1605
1606	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1607	    M_MOUNT, M_WAITOK);
1608	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1609	ma->v[ma->len].iov_len = strlen(name) + 1;
1610	ma->len++;
1611
1612	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
1613	if (len < 0)
1614		ma->v[ma->len].iov_len = strlen(val) + 1;
1615	else
1616		ma->v[ma->len].iov_len = len;
1617	ma->len++;
1618	return (ma);
1619}
1620
1621/*
1622 * Free a mntarg structure
1623 */
1624void
1625free_mntarg(struct mntarg *ma)
1626{
1627	struct mntaarg *maa;
1628
1629	while (!SLIST_EMPTY(&ma->list)) {
1630		maa = SLIST_FIRST(&ma->list);
1631		SLIST_REMOVE_HEAD(&ma->list, next);
1632		free(maa, M_MOUNT);
1633	}
1634	free(ma->v, M_MOUNT);
1635	free(ma, M_MOUNT);
1636}
1637
1638/*
1639 * Mount a filesystem
1640 */
1641int
1642kernel_mount(struct mntarg *ma, int flags)
1643{
1644	struct uio auio;
1645	int error;
1646
1647	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
1648	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
1649	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
1650
1651	auio.uio_iov = ma->v;
1652	auio.uio_iovcnt = ma->len;
1653	auio.uio_segflg = UIO_SYSSPACE;
1654
1655	error = ma->error;
1656	if (!error)
1657		error = vfs_donmount(curthread, flags, &auio);
1658	free_mntarg(ma);
1659	return (error);
1660}
1661
1662/*
1663 * A printflike function to mount a filesystem.
1664 */
1665int
1666kernel_vmount(int flags, ...)
1667{
1668	struct mntarg *ma = NULL;
1669	va_list ap;
1670	const char *cp;
1671	const void *vp;
1672	int error;
1673
1674	va_start(ap, flags);
1675	for (;;) {
1676		cp = va_arg(ap, const char *);
1677		if (cp == NULL)
1678			break;
1679		vp = va_arg(ap, const void *);
1680		ma = mount_arg(ma, cp, vp, -1);
1681	}
1682	va_end(ap);
1683
1684	error = kernel_mount(ma, flags);
1685	return (error);
1686}
1687