vfs_mount.c revision 134827
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * Copyright (c) 1999 Michael Smith
35 * All rights reserved.
36 * Copyright (c) 1999 Poul-Henning Kamp
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 *    notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61#include <sys/cdefs.h>
62__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 134827 2004-09-05 22:24:28Z alfred $");
63
64#include <sys/param.h>
65#include <sys/conf.h>
66#include <sys/cons.h>
67#include <sys/jail.h>
68#include <sys/kernel.h>
69#include <sys/linker.h>
70#include <sys/mac.h>
71#include <sys/malloc.h>
72#include <sys/mount.h>
73#include <sys/mutex.h>
74#include <sys/namei.h>
75#include <sys/proc.h>
76#include <sys/filedesc.h>
77#include <sys/reboot.h>
78#include <sys/sysproto.h>
79#include <sys/sx.h>
80#include <sys/sysctl.h>
81#include <sys/sysent.h>
82#include <sys/systm.h>
83#include <sys/vnode.h>
84
85#include <geom/geom.h>
86
87#include <machine/stdarg.h>
88
89#include "opt_rootdevname.h"
90#include "opt_ddb.h"
91#include "opt_mac.h"
92
93#ifdef DDB
94#include <ddb/ddb.h>
95#endif
96
97#define	ROOTNAME		"root_device"
98#define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
99
100static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
101static void	gets(char *cp);
102static int	vfs_domount(struct thread *td, const char *fstype,
103		    char *fspath, int fsflags, void *fsdata, int compat);
104static int	vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
105		    const char *fspath, struct thread *td, struct mount **mpp);
106static int	vfs_mountroot_ask(void);
107static int	vfs_mountroot_try(const char *mountfrom);
108static int	vfs_donmount(struct thread *td, int fsflags,
109		    struct uio *fsoptions);
110
111static int	usermount = 0;
112SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
113    "Unprivileged users may mount and unmount file systems");
114
115MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
116
117/* List of mounted filesystems. */
118struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
119
120/* For any iteration/modification of mountlist */
121struct mtx mountlist_mtx;
122
123/*
124 * The vnode of the system's root (/ in the filesystem, without chroot
125 * active.)
126 */
127struct vnode	*rootvnode;
128
129/*
130 * The root filesystem is detailed in the kernel environment variable
131 * vfs.root.mountfrom, which is expected to be in the general format
132 *
133 * <vfsname>:[<path>]
134 * vfsname   := the name of a VFS known to the kernel and capable
135 *              of being mounted as root
136 * path      := disk device name or other data used by the filesystem
137 *              to locate its physical store
138 */
139
140/*
141 * The root specifiers we will try if RB_CDROM is specified.
142 */
143static char *cdrom_rootdevnames[] = {
144	"cd9660:cd0",
145	"cd9660:acd0",
146	NULL
147};
148
149/* legacy find-root code */
150char		*rootdevnames[2] = {NULL, NULL};
151struct cdev *rootdev = NULL;
152#ifdef ROOTDEVNAME
153const char	*ctrootdevname = ROOTDEVNAME;
154#else
155const char	*ctrootdevname = NULL;
156#endif
157
158/*
159 * Has to be dynamic as the value of rootdev can change; however, it can't
160 * change after the root is mounted, so a user process can't access this
161 * sysctl until after the value is unchangeable.
162 */
163static int
164sysctl_rootdev(SYSCTL_HANDLER_ARGS)
165{
166	int error;
167
168	/* _RD prevents this from happening. */
169	KASSERT(req->newptr == NULL, ("Attempt to change root device name"));
170
171	if (rootdev != NULL)
172		error = sysctl_handle_string(oidp, rootdev->si_name, 0, req);
173	else
174		error = sysctl_handle_string(oidp, "", 0, req);
175
176	return (error);
177}
178
179SYSCTL_PROC(_kern, OID_AUTO, rootdev, CTLTYPE_STRING | CTLFLAG_RD,
180    0, 0, sysctl_rootdev, "A", "Root file system device");
181
182/* Remove one mount option. */
183static void
184vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
185{
186
187	TAILQ_REMOVE(opts, opt, link);
188	free(opt->name, M_MOUNT);
189	if (opt->value != NULL)
190		free(opt->value, M_MOUNT);
191#ifdef INVARIANTS
192	else if (opt->len != 0)
193		panic("%s: mount option with NULL value but length != 0",
194		    __func__);
195#endif
196	free(opt, M_MOUNT);
197}
198
199/* Release all resources related to the mount options. */
200static void
201vfs_freeopts(struct vfsoptlist *opts)
202{
203	struct vfsopt *opt;
204
205	while (!TAILQ_EMPTY(opts)) {
206		opt = TAILQ_FIRST(opts);
207		vfs_freeopt(opts, opt);
208	}
209	free(opts, M_MOUNT);
210}
211
212/*
213 * Check if options are equal (with or without the "no" prefix).
214 */
215static int
216vfs_equalopts(const char *opt1, const char *opt2)
217{
218
219	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
220	if (strcmp(opt1, opt2) == 0)
221		return (1);
222	/* "noopt" vs. "opt" */
223	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
224		return (1);
225	/* "opt" vs. "noopt" */
226	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
227		return (1);
228	return (0);
229}
230
231/*
232 * If a mount option is specified several times,
233 * (with or without the "no" prefix) only keep
234 * the last occurence of it.
235 */
236static void
237vfs_sanitizeopts(struct vfsoptlist *opts)
238{
239	struct vfsopt *opt, *opt2, *tmp;
240
241	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
242		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
243		while (opt2 != NULL) {
244			if (vfs_equalopts(opt->name, opt2->name)) {
245				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
246				vfs_freeopt(opts, opt2);
247				opt2 = tmp;
248			} else {
249				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
250			}
251		}
252	}
253}
254
255/*
256 * Build a linked list of mount options from a struct uio.
257 */
258static int
259vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
260{
261	struct vfsoptlist *opts;
262	struct vfsopt *opt;
263	size_t memused;
264	unsigned int i, iovcnt;
265	int error, namelen, optlen;
266
267	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
268	TAILQ_INIT(opts);
269	memused = 0;
270	iovcnt = auio->uio_iovcnt;
271	for (i = 0; i < iovcnt; i += 2) {
272		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
273		namelen = auio->uio_iov[i].iov_len;
274		optlen = auio->uio_iov[i + 1].iov_len;
275		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
276		opt->value = NULL;
277		opt->len = 0;
278
279		/*
280		 * Do this early, so jumps to "bad" will free the current
281		 * option.
282		 */
283		TAILQ_INSERT_TAIL(opts, opt, link);
284		memused += sizeof(struct vfsopt) + optlen + namelen;
285
286		/*
287		 * Avoid consuming too much memory, and attempts to overflow
288		 * memused.
289		 */
290		if (memused > VFS_MOUNTARG_SIZE_MAX ||
291		    optlen > VFS_MOUNTARG_SIZE_MAX ||
292		    namelen > VFS_MOUNTARG_SIZE_MAX) {
293			error = EINVAL;
294			goto bad;
295		}
296
297		if (auio->uio_segflg == UIO_SYSSPACE) {
298			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
299		} else {
300			error = copyin(auio->uio_iov[i].iov_base, opt->name,
301			    namelen);
302			if (error)
303				goto bad;
304		}
305		/* Ensure names are null-terminated strings. */
306		if (opt->name[namelen - 1] != '\0') {
307			error = EINVAL;
308			goto bad;
309		}
310		if (optlen != 0) {
311			opt->len = optlen;
312			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
313			if (auio->uio_segflg == UIO_SYSSPACE) {
314				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
315				    optlen);
316			} else {
317				error = copyin(auio->uio_iov[i + 1].iov_base,
318				    opt->value, optlen);
319				if (error)
320					goto bad;
321			}
322		}
323	}
324	vfs_sanitizeopts(opts);
325	*options = opts;
326	return (0);
327bad:
328	vfs_freeopts(opts);
329	return (error);
330}
331
332/*
333 * Merge the old mount options with the new ones passed
334 * in the MNT_UPDATE case.
335 */
336static void
337vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
338{
339	struct vfsopt *opt, *opt2, *new;
340
341	TAILQ_FOREACH(opt, opts, link) {
342		/*
343		 * Check that this option hasn't been redefined
344		 * nor cancelled with a "no" mount option.
345		 */
346		opt2 = TAILQ_FIRST(toopts);
347		while (opt2 != NULL) {
348			if (strcmp(opt2->name, opt->name) == 0)
349				goto next;
350			if (strncmp(opt2->name, "no", 2) == 0 &&
351			    strcmp(opt2->name + 2, opt->name) == 0) {
352				vfs_freeopt(toopts, opt2);
353				goto next;
354			}
355			opt2 = TAILQ_NEXT(opt2, link);
356		}
357		/* We want this option, duplicate it. */
358		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
359		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
360		strcpy(new->name, opt->name);
361		if (opt->len != 0) {
362			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
363			bcopy(opt->value, new->value, opt->len);
364		} else {
365			new->value = NULL;
366		}
367		new->len = opt->len;
368		TAILQ_INSERT_TAIL(toopts, new, link);
369next:
370		continue;
371	}
372}
373
374/*
375 * New mount API.
376 */
377int
378nmount(td, uap)
379	struct thread *td;
380	struct nmount_args /* {
381		struct iovec *iovp;
382		unsigned int iovcnt;
383		int flags;
384	} */ *uap;
385{
386	struct uio *auio;
387	struct iovec *iov;
388	unsigned int i;
389	int error;
390	u_int iovcnt;
391
392	iovcnt = uap->iovcnt;
393	/*
394	 * Check that we have an even number of iovec's
395	 * and that we have at least two options.
396	 */
397	if ((iovcnt & 1) || (iovcnt < 4))
398		return (EINVAL);
399	error = copyinuio(uap->iovp, iovcnt, &auio);
400	if (error)
401		return (error);
402	iov = auio->uio_iov;
403	for (i = 0; i < iovcnt; i++) {
404		if (iov->iov_len > MMAXOPTIONLEN) {
405			free(auio, M_IOV);
406			return (EINVAL);
407		}
408		iov++;
409	}
410	error = vfs_donmount(td, uap->flags, auio);
411	free(auio, M_IOV);
412	return (error);
413}
414
415int
416kernel_mount(struct iovec *iovp, u_int iovcnt, int flags)
417{
418	struct uio auio;
419	int error;
420
421	/*
422	 * Check that we have an even number of iovec's
423	 * and that we have at least two options.
424	 */
425	if ((iovcnt & 1) || (iovcnt < 4))
426		return (EINVAL);
427
428	auio.uio_iov = iovp;
429	auio.uio_iovcnt = iovcnt;
430	auio.uio_segflg = UIO_SYSSPACE;
431
432	error = vfs_donmount(curthread, flags, &auio);
433	return (error);
434}
435
436int
437kernel_vmount(int flags, ...)
438{
439	struct iovec *iovp;
440	struct uio auio;
441	va_list ap;
442	u_int iovcnt, iovlen, len;
443	const char *cp;
444	char *buf, *pos;
445	size_t n;
446	int error, i;
447
448	len = 0;
449	va_start(ap, flags);
450	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
451		len += strlen(cp) + 1;
452	va_end(ap);
453
454	if (iovcnt < 4 || iovcnt & 1)
455		return (EINVAL);
456
457	iovlen = iovcnt * sizeof (struct iovec);
458	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
459	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
460	pos = buf;
461	va_start(ap, flags);
462	for (i = 0; i < iovcnt; i++) {
463		cp = va_arg(ap, const char *);
464		copystr(cp, pos, len - (pos - buf), &n);
465		iovp[i].iov_base = pos;
466		iovp[i].iov_len = n;
467		pos += n;
468	}
469	va_end(ap);
470
471	auio.uio_iov = iovp;
472	auio.uio_iovcnt = iovcnt;
473	auio.uio_segflg = UIO_SYSSPACE;
474
475	error = vfs_donmount(curthread, flags, &auio);
476	FREE(iovp, M_MOUNT);
477	FREE(buf, M_MOUNT);
478	return (error);
479}
480
481/*
482 * Allocate and initialize the mount point struct.
483 */
484static int
485vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
486    const char *fspath, struct thread *td, struct mount **mpp)
487{
488	struct mount *mp;
489
490	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
491	TAILQ_INIT(&mp->mnt_nvnodelist);
492	mp->mnt_nvnodelistsize = 0;
493	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
494	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
495	vfs_busy(mp, LK_NOWAIT, 0, td);
496	mp->mnt_op = vfsp->vfc_vfsops;
497	mp->mnt_vfc = vfsp;
498	vfsp->vfc_refcount++;
499	mp->mnt_stat.f_type = vfsp->vfc_typenum;
500	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
501	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
502	mp->mnt_vnodecovered = vp;
503	mp->mnt_cred = crdup(td->td_ucred);
504	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
505	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
506	mp->mnt_iosize_max = DFLTPHYS;
507#ifdef MAC
508	mac_init_mount(mp);
509	mac_create_mount(td->td_ucred, mp);
510#endif
511	*mpp = mp;
512	return (0);
513}
514
515/*
516 * Destroy the mount struct previously allocated by vfs_mount_alloc().
517 */
518void
519vfs_mount_destroy(struct mount *mp, struct thread *td)
520{
521
522	mp->mnt_vfc->vfc_refcount--;
523	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
524		panic("unmount: dangling vnode");
525	vfs_unbusy(mp,td);
526	lockdestroy(&mp->mnt_lock);
527	mtx_destroy(&mp->mnt_mtx);
528	if (mp->mnt_kern_flag & MNTK_MWAIT)
529		wakeup(mp);
530#ifdef MAC
531	mac_destroy_mount(mp);
532#endif
533	if (mp->mnt_opt != NULL)
534		vfs_freeopts(mp->mnt_opt);
535	crfree(mp->mnt_cred);
536	free(mp, M_MOUNT);
537}
538
539static int
540vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
541{
542	struct vfsoptlist *optlist;
543	char *fstype, *fspath;
544	int error, fstypelen, fspathlen;
545
546	error = vfs_buildopts(fsoptions, &optlist);
547	if (error)
548		return (error);
549
550	/*
551	 * We need these two options before the others,
552	 * and they are mandatory for any filesystem.
553	 * Ensure they are NUL terminated as well.
554	 */
555	fstypelen = 0;
556	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
557	if (error || fstype[fstypelen - 1] != '\0') {
558		error = EINVAL;
559		goto bail;
560	}
561	fspathlen = 0;
562	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
563	if (error || fspath[fspathlen - 1] != '\0') {
564		error = EINVAL;
565		goto bail;
566	}
567
568	/*
569	 * Be ultra-paranoid about making sure the type and fspath
570	 * variables will fit in our mp buffers, including the
571	 * terminating NUL.
572	 */
573	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
574		error = ENAMETOOLONG;
575		goto bail;
576	}
577
578	error = vfs_domount(td, fstype, fspath, fsflags, optlist, 0);
579bail:
580	if (error)
581		vfs_freeopts(optlist);
582	return (error);
583}
584
585/*
586 * Old mount API.
587 */
588#ifndef _SYS_SYSPROTO_H_
589struct mount_args {
590	char	*type;
591	char	*path;
592	int	flags;
593	caddr_t	data;
594};
595#endif
596/* ARGSUSED */
597int
598mount(td, uap)
599	struct thread *td;
600	struct mount_args /* {
601		char *type;
602		char *path;
603		int flags;
604		caddr_t data;
605	} */ *uap;
606{
607	char *fstype;
608	char *fspath;
609	int error;
610
611	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
612	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
613
614	/*
615	 * vfs_mount() actually takes a kernel string for `type' and
616	 * `path' now, so extract them.
617	 */
618	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
619	if (error == 0)
620		error = copyinstr(uap->path, fspath, MNAMELEN, NULL);
621	if (error == 0)
622		error = vfs_domount(td, fstype, fspath, uap->flags,
623		    uap->data, 1);
624	free(fstype, M_TEMP);
625	free(fspath, M_TEMP);
626	return (error);
627}
628
629/*
630 * vfs_mount(): actually attempt a filesystem mount.
631 *
632 * This routine is designed to be a "generic" entry point for routines
633 * that wish to mount a filesystem. All parameters except `fsdata' are
634 * pointers into kernel space. `fsdata' is currently still a pointer
635 * into userspace.
636 */
637int
638vfs_mount(td, fstype, fspath, fsflags, fsdata)
639	struct thread *td;
640	const char *fstype;
641	char *fspath;
642	int fsflags;
643	void *fsdata;
644{
645
646	return (vfs_domount(td, fstype, fspath, fsflags, fsdata, 1));
647}
648
649/*
650 * vfs_domount(): actually attempt a filesystem mount.
651 */
652static int
653vfs_domount(
654	struct thread *td,	/* Flags common to all filesystems. */
655	const char *fstype,	/* Filesystem type. */
656	char *fspath,		/* Mount path. */
657	int fsflags,		/* Flags common to all filesystems. */
658	void *fsdata,		/* Options local to the filesystem. */
659	int compat		/* Invocation from compat syscall. */
660	)
661{
662	linker_file_t lf;
663	struct vnode *vp;
664	struct mount *mp;
665	struct vfsconf *vfsp;
666	int error, flag = 0, kern_flag = 0;
667	struct vattr va;
668	struct nameidata nd;
669
670	/*
671	 * Be ultra-paranoid about making sure the type and fspath
672	 * variables will fit in our mp buffers, including the
673	 * terminating NUL.
674	 */
675	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
676		return (ENAMETOOLONG);
677
678	if (jailed(td->td_ucred))
679		return (EPERM);
680	if (usermount == 0) {
681		if ((error = suser(td)) != 0)
682			return (error);
683	}
684
685	/*
686	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
687	 */
688	if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
689		if ((error = suser(td)) != 0)
690			return (error);
691	}
692	/*
693	 * Silently enforce MNT_NODEV, MNT_NOSUID and MNT_USER for
694	 * unprivileged users.
695	 */
696	if (suser(td) != 0)
697		fsflags |= MNT_NODEV | MNT_NOSUID | MNT_USER;
698	/*
699	 * Get vnode to be covered
700	 */
701	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
702	if ((error = namei(&nd)) != 0)
703		return (error);
704	NDFREE(&nd, NDF_ONLY_PNBUF);
705	vp = nd.ni_vp;
706	if (fsflags & MNT_UPDATE) {
707		if ((vp->v_vflag & VV_ROOT) == 0) {
708			vput(vp);
709			return (EINVAL);
710		}
711		mp = vp->v_mount;
712		flag = mp->mnt_flag;
713		kern_flag = mp->mnt_kern_flag;
714		/*
715		 * We only allow the filesystem to be reloaded if it
716		 * is currently mounted read-only.
717		 */
718		if ((fsflags & MNT_RELOAD) &&
719		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
720			vput(vp);
721			return (EOPNOTSUPP);	/* Needs translation */
722		}
723		/*
724		 * Only privileged root, or (if MNT_USER is set) the user that
725		 * did the original mount is permitted to update it.
726		 */
727		error = vfs_suser(mp, td);
728		if (error) {
729			vput(vp);
730			return (error);
731		}
732		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
733			vput(vp);
734			return (EBUSY);
735		}
736		VI_LOCK(vp);
737		if ((vp->v_iflag & VI_MOUNT) != 0 ||
738		    vp->v_mountedhere != NULL) {
739			VI_UNLOCK(vp);
740			vfs_unbusy(mp, td);
741			vput(vp);
742			return (EBUSY);
743		}
744		vp->v_iflag |= VI_MOUNT;
745		VI_UNLOCK(vp);
746		mp->mnt_flag |= fsflags &
747		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
748		VOP_UNLOCK(vp, 0, td);
749		if (compat == 0) {
750			mp->mnt_optnew = fsdata;
751			vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
752		}
753	} else {
754		/*
755		 * If the user is not root, ensure that they own the directory
756		 * onto which we are attempting to mount.
757		 */
758		error = VOP_GETATTR(vp, &va, td->td_ucred, td);
759		if (error) {
760			vput(vp);
761			return (error);
762		}
763		if (va.va_uid != td->td_ucred->cr_uid) {
764			if ((error = suser(td)) != 0) {
765				vput(vp);
766				return (error);
767			}
768		}
769		if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
770			vput(vp);
771			return (error);
772		}
773		if (vp->v_type != VDIR) {
774			vput(vp);
775			return (ENOTDIR);
776		}
777		vfsp = vfs_byname(fstype);
778		if (vfsp == NULL) {
779			/* Only load modules for root (very important!). */
780			if ((error = suser(td)) != 0) {
781				vput(vp);
782				return (error);
783			}
784			error = securelevel_gt(td->td_ucred, 0);
785			if (error) {
786				vput(vp);
787				return (error);
788			}
789			error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
790			if (error || lf == NULL) {
791				vput(vp);
792				if (lf == NULL)
793					error = ENODEV;
794				return (error);
795			}
796			lf->userrefs++;
797			/* Look up again to see if the VFS was loaded. */
798			vfsp = vfs_byname(fstype);
799			if (vfsp == NULL) {
800				lf->userrefs--;
801				linker_file_unload(lf, LINKER_UNLOAD_FORCE);
802				vput(vp);
803				return (ENODEV);
804			}
805		}
806		VI_LOCK(vp);
807		if ((vp->v_iflag & VI_MOUNT) != 0 ||
808		    vp->v_mountedhere != NULL) {
809			VI_UNLOCK(vp);
810			vput(vp);
811			return (EBUSY);
812		}
813		vp->v_iflag |= VI_MOUNT;
814		VI_UNLOCK(vp);
815
816		/*
817		 * Allocate and initialize the filesystem.
818		 */
819		error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
820		if (error) {
821			vput(vp);
822			return (error);
823		}
824		VOP_UNLOCK(vp, 0, td);
825
826		/* XXXMAC: pass to vfs_mount_alloc? */
827		if (compat == 0)
828			mp->mnt_optnew = fsdata;
829	}
830	/*
831	 * Check if the fs implements the type VFS_[O]MOUNT()
832	 * function we are looking for.
833	 */
834	if ((compat == 0) == (mp->mnt_op->vfs_omount != NULL)) {
835		printf("%s doesn't support the %s mount syscall\n",
836		    mp->mnt_vfc->vfc_name, compat ? "old" : "new");
837		VI_LOCK(vp);
838		vp->v_iflag &= ~VI_MOUNT;
839		VI_UNLOCK(vp);
840		if (mp->mnt_flag & MNT_UPDATE)
841			vfs_unbusy(mp, td);
842		else
843			vfs_mount_destroy(mp, td);
844		vrele(vp);
845		return (EOPNOTSUPP);
846	}
847
848	/*
849	 * Set the mount level flags.
850	 */
851	if (fsflags & MNT_RDONLY)
852		mp->mnt_flag |= MNT_RDONLY;
853	else if (mp->mnt_flag & MNT_RDONLY)
854		mp->mnt_kern_flag |= MNTK_WANTRDWR;
855	mp->mnt_flag &=~ MNT_UPDATEMASK;
856	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
857	/*
858	 * Mount the filesystem.
859	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
860	 * get.  No freeing of cn_pnbuf.
861	 */
862	if (compat)
863	    error = VFS_OMOUNT(mp, fspath, fsdata, td);
864	else
865	    error = VFS_MOUNT(mp, td);
866	if (!error) {
867		if (mp->mnt_opt != NULL)
868			vfs_freeopts(mp->mnt_opt);
869		mp->mnt_opt = mp->mnt_optnew;
870	}
871	/*
872	 * Prevent external consumers of mount options from reading
873	 * mnt_optnew.
874	*/
875	mp->mnt_optnew = NULL;
876	if (mp->mnt_flag & MNT_UPDATE) {
877		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
878			mp->mnt_flag &= ~MNT_RDONLY;
879		mp->mnt_flag &=
880		    ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
881		mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
882		if (error) {
883			mp->mnt_flag = flag;
884			mp->mnt_kern_flag = kern_flag;
885		}
886		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
887			if (mp->mnt_syncer == NULL)
888				error = vfs_allocate_syncvnode(mp);
889		} else {
890			if (mp->mnt_syncer != NULL)
891				vrele(mp->mnt_syncer);
892			mp->mnt_syncer = NULL;
893		}
894		vfs_unbusy(mp, td);
895		VI_LOCK(vp);
896		vp->v_iflag &= ~VI_MOUNT;
897		VI_UNLOCK(vp);
898		vrele(vp);
899		return (error);
900	}
901	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
902	/*
903	 * Put the new filesystem on the mount list after root.
904	 */
905	cache_purge(vp);
906	if (!error) {
907		struct vnode *newdp;
908
909		VI_LOCK(vp);
910		vp->v_iflag &= ~VI_MOUNT;
911		VI_UNLOCK(vp);
912		vp->v_mountedhere = mp;
913		mtx_lock(&mountlist_mtx);
914		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
915		mtx_unlock(&mountlist_mtx);
916		vfs_event_signal(NULL, VQ_MOUNT, 0);
917		if (VFS_ROOT(mp, &newdp, td))
918			panic("mount: lost mount");
919		checkdirs(vp, newdp);
920		vput(newdp);
921		VOP_UNLOCK(vp, 0, td);
922		if ((mp->mnt_flag & MNT_RDONLY) == 0)
923			error = vfs_allocate_syncvnode(mp);
924		vfs_unbusy(mp, td);
925		if (error || (error = VFS_START(mp, 0, td)) != 0)
926			vrele(vp);
927	} else {
928		VI_LOCK(vp);
929		vp->v_iflag &= ~VI_MOUNT;
930		VI_UNLOCK(vp);
931		vfs_mount_destroy(mp, td);
932		vput(vp);
933	}
934	return (error);
935}
936
937/*
938 * Scan all active processes to see if any of them have a current
939 * or root directory of `olddp'. If so, replace them with the new
940 * mount point.
941 */
942static void
943checkdirs(olddp, newdp)
944	struct vnode *olddp, *newdp;
945{
946	struct filedesc *fdp;
947	struct proc *p;
948	int nrele;
949
950	if (vrefcnt(olddp) == 1)
951		return;
952	sx_slock(&allproc_lock);
953	LIST_FOREACH(p, &allproc, p_list) {
954		mtx_lock(&fdesc_mtx);
955		fdp = p->p_fd;
956		if (fdp == NULL) {
957			mtx_unlock(&fdesc_mtx);
958			continue;
959		}
960		nrele = 0;
961		FILEDESC_LOCK(fdp);
962		if (fdp->fd_cdir == olddp) {
963			VREF(newdp);
964			fdp->fd_cdir = newdp;
965			nrele++;
966		}
967		if (fdp->fd_rdir == olddp) {
968			VREF(newdp);
969			fdp->fd_rdir = newdp;
970			nrele++;
971		}
972		FILEDESC_UNLOCK(fdp);
973		mtx_unlock(&fdesc_mtx);
974		while (nrele--)
975			vrele(olddp);
976	}
977	sx_sunlock(&allproc_lock);
978	if (rootvnode == olddp) {
979		vrele(rootvnode);
980		VREF(newdp);
981		rootvnode = newdp;
982	}
983}
984
985/*
986 * Unmount a filesystem.
987 *
988 * Note: unmount takes a path to the vnode mounted on as argument,
989 * not special file (as before).
990 */
991#ifndef _SYS_SYSPROTO_H_
992struct unmount_args {
993	char	*path;
994	int	flags;
995};
996#endif
997/* ARGSUSED */
998int
999unmount(td, uap)
1000	struct thread *td;
1001	register struct unmount_args /* {
1002		char *path;
1003		int flags;
1004	} */ *uap;
1005{
1006	struct mount *mp;
1007	char *pathbuf;
1008	int error, id0, id1;
1009
1010	if (jailed(td->td_ucred))
1011		return (EPERM);
1012	if (usermount == 0) {
1013		if ((error = suser(td)) != 0)
1014			return (error);
1015	}
1016
1017	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1018	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
1019	if (error) {
1020		free(pathbuf, M_TEMP);
1021		return (error);
1022	}
1023	if (uap->flags & MNT_BYFSID) {
1024		/* Decode the filesystem ID. */
1025		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
1026			free(pathbuf, M_TEMP);
1027			return (EINVAL);
1028		}
1029
1030		mtx_lock(&mountlist_mtx);
1031		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1032			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
1033			    mp->mnt_stat.f_fsid.val[1] == id1)
1034				break;
1035		}
1036		mtx_unlock(&mountlist_mtx);
1037	} else {
1038		mtx_lock(&mountlist_mtx);
1039		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1040			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
1041				break;
1042		}
1043		mtx_unlock(&mountlist_mtx);
1044	}
1045	free(pathbuf, M_TEMP);
1046	if (mp == NULL) {
1047		/*
1048		 * Previously we returned ENOENT for a nonexistent path and
1049		 * EINVAL for a non-mountpoint.  We cannot tell these apart
1050		 * now, so in the !MNT_BYFSID case return the more likely
1051		 * EINVAL for compatibility.
1052		 */
1053		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
1054	}
1055
1056	/*
1057	 * Only privileged root, or (if MNT_USER is set) the user that did the
1058	 * original mount is permitted to unmount this filesystem.
1059	 */
1060	error = vfs_suser(mp, td);
1061	if (error)
1062		return (error);
1063
1064	/*
1065	 * Don't allow unmounting the root filesystem.
1066	 */
1067	if (mp->mnt_flag & MNT_ROOTFS)
1068		return (EINVAL);
1069	return (dounmount(mp, uap->flags, td));
1070}
1071
1072/*
1073 * Do the actual filesystem unmount.
1074 */
1075int
1076dounmount(mp, flags, td)
1077	struct mount *mp;
1078	int flags;
1079	struct thread *td;
1080{
1081	struct vnode *coveredvp, *fsrootvp;
1082	int error;
1083	int async_flag;
1084
1085	mtx_lock(&mountlist_mtx);
1086	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1087		mtx_unlock(&mountlist_mtx);
1088		return (EBUSY);
1089	}
1090	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1091	/* Allow filesystems to detect that a forced unmount is in progress. */
1092	if (flags & MNT_FORCE)
1093		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1094	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1095	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1096	if (error) {
1097		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1098		if (mp->mnt_kern_flag & MNTK_MWAIT)
1099			wakeup(mp);
1100		return (error);
1101	}
1102	vn_start_write(NULL, &mp, V_WAIT);
1103
1104	if (mp->mnt_flag & MNT_EXPUBLIC)
1105		vfs_setpublicfs(NULL, NULL, NULL);
1106
1107	vfs_msync(mp, MNT_WAIT);
1108	async_flag = mp->mnt_flag & MNT_ASYNC;
1109	mp->mnt_flag &= ~MNT_ASYNC;
1110	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1111	if (mp->mnt_syncer != NULL)
1112		vrele(mp->mnt_syncer);
1113	/*
1114	 * For forced unmounts, move process cdir/rdir refs on the fs root
1115	 * vnode to the covered vnode.  For non-forced unmounts we want
1116	 * such references to cause an EBUSY error.
1117	 */
1118	if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
1119		if (mp->mnt_vnodecovered != NULL)
1120			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1121		if (fsrootvp == rootvnode) {
1122			vrele(rootvnode);
1123			rootvnode = NULL;
1124		}
1125		vput(fsrootvp);
1126	}
1127	if (((mp->mnt_flag & MNT_RDONLY) ||
1128	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1129	    (flags & MNT_FORCE)) {
1130		error = VFS_UNMOUNT(mp, flags, td);
1131	}
1132	vn_finished_write(mp);
1133	if (error) {
1134		/* Undo cdir/rdir and rootvnode changes made above. */
1135		if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
1136			if (mp->mnt_vnodecovered != NULL)
1137				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1138			if (rootvnode == NULL) {
1139				rootvnode = fsrootvp;
1140				vref(rootvnode);
1141			}
1142			vput(fsrootvp);
1143		}
1144		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1145			(void) vfs_allocate_syncvnode(mp);
1146		mtx_lock(&mountlist_mtx);
1147		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1148		mp->mnt_flag |= async_flag;
1149		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1150		    &mountlist_mtx, td);
1151		if (mp->mnt_kern_flag & MNTK_MWAIT)
1152			wakeup(mp);
1153		return (error);
1154	}
1155	mtx_lock(&mountlist_mtx);
1156	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1157	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1158		coveredvp->v_mountedhere = NULL;
1159	mtx_unlock(&mountlist_mtx);
1160	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1161	vfs_mount_destroy(mp, td);
1162	if (coveredvp != NULL)
1163		vrele(coveredvp);
1164	return (0);
1165}
1166
1167/*
1168 * Lookup a filesystem type, and if found allocate and initialize
1169 * a mount structure for it.
1170 *
1171 * Devname is usually updated by mount(8) after booting.
1172 */
1173int
1174vfs_rootmountalloc(fstypename, devname, mpp)
1175	char *fstypename;
1176	char *devname;
1177	struct mount **mpp;
1178{
1179	struct thread *td = curthread;	/* XXX */
1180	struct vfsconf *vfsp;
1181	struct mount *mp;
1182	int error;
1183
1184	if (fstypename == NULL)
1185		return (ENODEV);
1186	vfsp = vfs_byname(fstypename);
1187	if (vfsp == NULL)
1188		return (ENODEV);
1189	error = vfs_mount_alloc(NULLVP, vfsp, "/", td, &mp);
1190	if (error)
1191		return (error);
1192	mp->mnt_flag |= MNT_RDONLY | MNT_ROOTFS;
1193	strlcpy(mp->mnt_stat.f_mntfromname, devname, MNAMELEN);
1194	*mpp = mp;
1195	return (0);
1196}
1197
1198/*
1199 * Find and mount the root filesystem
1200 */
1201void
1202vfs_mountroot(void)
1203{
1204	char *cp;
1205	int error, i, asked = 0;
1206
1207
1208	/*
1209	 * Wait for GEOM to settle down
1210	 */
1211	g_waitidle();
1212
1213	/*
1214	 * We are booted with instructions to prompt for the root filesystem.
1215	 */
1216	if (boothowto & RB_ASKNAME) {
1217		if (!vfs_mountroot_ask())
1218			return;
1219		asked = 1;
1220	}
1221
1222	/*
1223	 * The root filesystem information is compiled in, and we are
1224	 * booted with instructions to use it.
1225	 */
1226	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1227		if (!vfs_mountroot_try(ctrootdevname))
1228			return;
1229		ctrootdevname = NULL;
1230	}
1231
1232	/*
1233	 * We've been given the generic "use CDROM as root" flag.  This is
1234	 * necessary because one media may be used in many different
1235	 * devices, so we need to search for them.
1236	 */
1237	if (boothowto & RB_CDROM) {
1238		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1239			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1240				return;
1241		}
1242	}
1243
1244	/*
1245	 * Try to use the value read by the loader from /etc/fstab, or
1246	 * supplied via some other means.  This is the preferred
1247	 * mechanism.
1248	 */
1249	cp = getenv("vfs.root.mountfrom");
1250	if (cp != NULL) {
1251		error = vfs_mountroot_try(cp);
1252		freeenv(cp);
1253		if (!error)
1254			return;
1255	}
1256
1257	/*
1258	 * Try values that may have been computed by code during boot
1259	 */
1260	if (!vfs_mountroot_try(rootdevnames[0]))
1261		return;
1262	if (!vfs_mountroot_try(rootdevnames[1]))
1263		return;
1264
1265	/*
1266	 * If we (still) have a compiled-in default, try it.
1267	 */
1268	if (ctrootdevname != NULL)
1269		if (!vfs_mountroot_try(ctrootdevname))
1270			return;
1271
1272	/*
1273	 * Everything so far has failed, prompt on the console if we haven't
1274	 * already tried that.
1275	 */
1276	if (!asked)
1277		if (!vfs_mountroot_ask())
1278			return;
1279	panic("Root mount failed, startup aborted.");
1280}
1281
1282/*
1283 * Mount (mountfrom) as the root filesystem.
1284 */
1285static int
1286vfs_mountroot_try(const char *mountfrom)
1287{
1288        struct mount	*mp;
1289	char		*vfsname, *path;
1290	const char	*devname;
1291	int		error;
1292	char		patt[32];
1293	int		s;
1294
1295	vfsname = NULL;
1296	path    = NULL;
1297	mp      = NULL;
1298	error   = EINVAL;
1299
1300	if (mountfrom == NULL)
1301		return (error);		/* don't complain */
1302
1303	s = splcam();			/* Overkill, but annoying without it */
1304	printf("Mounting root from %s\n", mountfrom);
1305	splx(s);
1306
1307	/* parse vfs name and path */
1308	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1309	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1310	vfsname[0] = path[0] = 0;
1311	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1312	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1313		goto done;
1314
1315	/* allocate a root mount */
1316	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1317	    &mp);
1318	if (error != 0) {
1319		printf("Can't allocate root mount for filesystem '%s': %d\n",
1320		       vfsname, error);
1321		goto done;
1322	}
1323
1324	/*
1325	 * do our best to set rootdev
1326	 * XXX: This does not belong here!
1327	 */
1328	if (path[0] != '\0') {
1329		struct cdev *diskdev;
1330		diskdev = getdiskbyname(path);
1331		if (diskdev != NULL)
1332			rootdev = diskdev;
1333		else
1334			printf("setrootbyname failed\n");
1335	}
1336
1337	/* If the root device is a type "memory disk", mount RW */
1338	if (rootdev != NULL && devsw(rootdev) != NULL) {
1339		devname = devtoname(rootdev);
1340		if (devname[0] == 'm' && devname[1] == 'd')
1341			mp->mnt_flag &= ~MNT_RDONLY;
1342	}
1343
1344	error = VFS_OMOUNT(mp, NULL, NULL, curthread);
1345
1346done:
1347	if (vfsname != NULL)
1348		free(vfsname, M_MOUNT);
1349	if (path != NULL)
1350		free(path, M_MOUNT);
1351	if (error != 0) {
1352		if (mp != NULL)
1353			vfs_mount_destroy(mp, curthread);
1354		printf("Root mount failed: %d\n", error);
1355	} else {
1356
1357		/* register with list of mounted filesystems */
1358		mtx_lock(&mountlist_mtx);
1359		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1360		mtx_unlock(&mountlist_mtx);
1361
1362		/* sanity check system clock against root fs timestamp */
1363		inittodr(mp->mnt_time);
1364		vfs_unbusy(mp, curthread);
1365		error = VFS_START(mp, 0, curthread);
1366	}
1367	return (error);
1368}
1369
1370/*
1371 * Spin prompting on the console for a suitable root filesystem
1372 */
1373static int
1374vfs_mountroot_ask(void)
1375{
1376	char name[128];
1377
1378	for(;;) {
1379		printf("\nManual root filesystem specification:\n");
1380		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1381#if defined(__i386__) || defined(__ia64__)
1382		printf("                       eg. ufs:da0s1a\n");
1383#else
1384		printf("                       eg. ufs:/dev/da0a\n");
1385#endif
1386		printf("  ?                  List valid disk boot devices\n");
1387		printf("  <empty line>       Abort manual input\n");
1388		printf("\nmountroot> ");
1389		gets(name);
1390		if (name[0] == '\0')
1391			return (1);
1392		if (name[0] == '?') {
1393			printf("\nList of GEOM managed disk devices:\n  ");
1394			g_dev_print();
1395			continue;
1396		}
1397		if (!vfs_mountroot_try(name))
1398			return (0);
1399	}
1400}
1401
1402/*
1403 * Local helper function for vfs_mountroot_ask.
1404 */
1405static void
1406gets(char *cp)
1407{
1408	char *lp;
1409	int c;
1410
1411	lp = cp;
1412	for (;;) {
1413		printf("%c", c = cngetc() & 0177);
1414		switch (c) {
1415		case -1:
1416		case '\n':
1417		case '\r':
1418			*lp++ = '\0';
1419			return;
1420		case '\b':
1421		case '\177':
1422			if (lp > cp) {
1423				printf(" \b");
1424				lp--;
1425			}
1426			continue;
1427		case '#':
1428			lp--;
1429			if (lp < cp)
1430				lp = cp;
1431			continue;
1432		case '@':
1433		case 'u' & 037:
1434			lp = cp;
1435			printf("%c", '\n');
1436			continue;
1437		default:
1438			*lp++ = c;
1439		}
1440	}
1441}
1442
1443/*
1444 * Convert a given name to the cdev pointer of the device, which is probably
1445 * but not by definition, a disk.  Mount a DEVFS (on nothing), look the name
1446 * up, extract the cdev from the vnode and unmount it again.  Unfortunately
1447 * we cannot use the vnode directly (because we unmount the DEVFS again)
1448 * so the filesystems still have to do the bdevvp() stunt.
1449 */
1450struct cdev *
1451getdiskbyname(char *name)
1452{
1453	char *cp = name;
1454	struct cdev *dev = NULL;
1455	struct thread *td = curthread;
1456	struct vfsconf *vfsp;
1457	struct mount *mp = NULL;
1458	struct vnode *vroot = NULL;
1459	struct nameidata nid;
1460	int error;
1461
1462	if (!bcmp(cp, "/dev/", 5))
1463		cp += 5;
1464
1465	do {
1466		vfsp = vfs_byname("devfs");
1467		if (vfsp == NULL)
1468			break;
1469		error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
1470		if (error)
1471			break;
1472		mp->mnt_flag |= MNT_RDONLY;
1473
1474		error = VFS_MOUNT(mp, curthread);
1475		if (error)
1476			break;
1477		VFS_START(mp, 0, td);
1478		VFS_ROOT(mp, &vroot, td);
1479		VOP_UNLOCK(vroot, 0, td);
1480
1481		NDINIT(&nid, LOOKUP, NOCACHE|FOLLOW,
1482		    UIO_SYSSPACE, cp, curthread);
1483		nid.ni_startdir = vroot;
1484		nid.ni_pathlen = strlen(cp);
1485		nid.ni_cnd.cn_cred = curthread->td_ucred;
1486		nid.ni_cnd.cn_nameptr = cp;
1487
1488		error = lookup(&nid);
1489		if (error)
1490			break;
1491		dev = vn_todev (nid.ni_vp);
1492		NDFREE(&nid, 0);
1493	} while (0);
1494
1495	if (vroot != NULL)
1496		VFS_UNMOUNT(mp, 0, td);
1497	if (mp != NULL)
1498		vfs_mount_destroy(mp, td);
1499  	return (dev);
1500}
1501
1502/* Show the struct cdev *for a disk specified by name */
1503#ifdef DDB
1504DB_SHOW_COMMAND(disk, db_getdiskbyname)
1505{
1506	struct cdev *dev;
1507
1508	if (modif[0] == '\0') {
1509		db_error("usage: show disk/devicename");
1510		return;
1511	}
1512	dev = getdiskbyname(modif);
1513	if (dev != NULL)
1514		db_printf("struct cdev *= %p\n", dev);
1515	else
1516		db_printf("No disk device matched.\n");
1517}
1518#endif
1519
1520/*
1521 * Get a mount option by its name.
1522 *
1523 * Return 0 if the option was found, ENOENT otherwise.
1524 * If len is non-NULL it will be filled with the length
1525 * of the option. If buf is non-NULL, it will be filled
1526 * with the address of the option.
1527 */
1528int
1529vfs_getopt(opts, name, buf, len)
1530	struct vfsoptlist *opts;
1531	const char *name;
1532	void **buf;
1533	int *len;
1534{
1535	struct vfsopt *opt;
1536
1537	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1538
1539	TAILQ_FOREACH(opt, opts, link) {
1540		if (strcmp(name, opt->name) == 0) {
1541			if (len != NULL)
1542				*len = opt->len;
1543			if (buf != NULL)
1544				*buf = opt->value;
1545			return (0);
1546		}
1547	}
1548	return (ENOENT);
1549}
1550
1551/*
1552 * Find and copy a mount option.
1553 *
1554 * The size of the buffer has to be specified
1555 * in len, if it is not the same length as the
1556 * mount option, EINVAL is returned.
1557 * Returns ENOENT if the option is not found.
1558 */
1559int
1560vfs_copyopt(opts, name, dest, len)
1561	struct vfsoptlist *opts;
1562	const char *name;
1563	void *dest;
1564	int len;
1565{
1566	struct vfsopt *opt;
1567
1568	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1569
1570	TAILQ_FOREACH(opt, opts, link) {
1571		if (strcmp(name, opt->name) == 0) {
1572			if (len != opt->len)
1573				return (EINVAL);
1574			bcopy(opt->value, dest, opt->len);
1575			return (0);
1576		}
1577	}
1578	return (ENOENT);
1579}
1580
1581
1582/*
1583 * This is a helper function for filesystems to traverse their
1584 * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
1585 */
1586
1587struct vnode *
1588__mnt_vnode_next(struct vnode **nvp, struct mount *mp)
1589{
1590	struct vnode *vp;
1591
1592	mtx_assert(&mp->mnt_mtx, MA_OWNED);
1593	vp = *nvp;
1594	/* Check if we are done */
1595	if (vp == NULL)
1596		return (NULL);
1597	/* If our next vnode is no longer ours, start over */
1598	if (vp->v_mount != mp)
1599		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1600	/* Save pointer to next vnode in list */
1601	if (vp != NULL)
1602		*nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1603	else
1604		*nvp = NULL;
1605	return (vp);
1606}
1607