vfs_mount.c revision 99602
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * Copyright (c) 1999 Michael Smith
39 * All rights reserved.
40 * Copyright (c) 1999 Poul-Henning Kamp
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * $FreeBSD: head/sys/kern/vfs_mount.c 99602 2002-07-08 19:10:15Z mux $
65 */
66
67#include <sys/param.h>
68#include <sys/conf.h>
69#include <sys/cons.h>
70#include <sys/kernel.h>
71#include <sys/linker.h>
72#include <sys/malloc.h>
73#include <sys/mount.h>
74#include <sys/mutex.h>
75#include <sys/namei.h>
76#include <sys/proc.h>
77#include <sys/reboot.h>
78#include <sys/sysproto.h>
79#include <sys/sx.h>
80#include <sys/sysctl.h>
81#include <sys/sysent.h>
82#include <sys/systm.h>
83#include <sys/vnode.h>
84
85#include <machine/stdarg.h>
86
87#include "opt_rootdevname.h"
88#include "opt_ddb.h"
89
90#ifdef DDB
91#include <ddb/ddb.h>
92#endif
93
94#define ROOTNAME	"root_device"
95
96static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
97static int	vfs_nmount(struct thread *td, int, struct uio *);
98static int	vfs_mountroot_try(char *mountfrom);
99static int	vfs_mountroot_ask(void);
100static void	gets(char *cp);
101
102static int	usermount = 0;	/* if 1, non-root can mount fs. */
103SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
104
105MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
106
107/* List of mounted filesystems. */
108struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
109
110/* For any iteration/modification of mountlist */
111struct mtx mountlist_mtx;
112
113/* For any iteration/modification of mnt_vnodelist */
114struct mtx mntvnode_mtx;
115
116/*
117 * The vnode of the system's root (/ in the filesystem, without chroot
118 * active.)
119 */
120struct vnode	*rootvnode;
121
122/*
123 * The root filesystem is detailed in the kernel environment variable
124 * vfs.root.mountfrom, which is expected to be in the general format
125 *
126 * <vfsname>:[<path>]
127 * vfsname   := the name of a VFS known to the kernel and capable
128 *              of being mounted as root
129 * path      := disk device name or other data used by the filesystem
130 *              to locate its physical store
131 */
132
133/*
134 * The root specifiers we will try if RB_CDROM is specified.
135 */
136static char *cdrom_rootdevnames[] = {
137	"cd9660:cd0a",
138	"cd9660:acd0a",
139	"cd9660:wcd0a",
140	NULL
141};
142
143/* legacy find-root code */
144char		*rootdevnames[2] = {NULL, NULL};
145static int	setrootbyname(char *name);
146dev_t		rootdev = NODEV;
147
148/*
149 * Release all resources related to the
150 * mount options.
151 */
152static void
153vfs_freeopts(struct vfsoptlist *opts)
154{
155	struct vfsopt *opt;
156
157	while (!TAILQ_EMPTY(opts)) {
158		opt = TAILQ_FIRST(opts);
159		TAILQ_REMOVE(opts, opt, link);
160		free(opt->name, M_MOUNT);
161		free(opt->value, M_MOUNT);
162		free(opt, M_MOUNT);
163	}
164	free(opts, M_MOUNT);
165}
166
167/*
168 * Build a linked list of mount options from a struct uio.
169 */
170static int
171vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
172{
173	struct vfsoptlist *opts;
174	struct vfsopt *opt;
175	unsigned int i, iovcnt;
176	int error, namelen, optlen;
177
178	iovcnt = auio->uio_iovcnt;
179	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
180	TAILQ_INIT(opts);
181	for (i = 0; i < iovcnt; i += 2) {
182		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
183		namelen = auio->uio_iov[i].iov_len;
184		optlen = auio->uio_iov[i + 1].iov_len;
185		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
186		opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
187		opt->len = optlen;
188		if (auio->uio_segflg == UIO_SYSSPACE) {
189			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
190			bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
191			    optlen);
192		} else {
193			error = copyin(auio->uio_iov[i].iov_base, opt->name,
194			    namelen);
195			if (!error)
196				error = copyin(auio->uio_iov[i + 1].iov_base,
197				    opt->value, optlen);
198			if (error)
199				goto bad;
200		}
201		TAILQ_INSERT_TAIL(opts, opt, link);
202	}
203	*options = opts;
204	return (0);
205bad:
206	vfs_freeopts(opts);
207	return (error);
208}
209
210/*
211 * New mount API.
212 */
213int
214nmount(td, uap)
215	struct thread *td;
216	struct nmount_args /* {
217		syscallarg(struct iovec *) iovp;
218		syscallarg(unsigned int) iovcnt;
219		syscallarg(int) flags;
220	} */ *uap;
221{
222	struct uio auio;
223	struct iovec *iov, *needfree;
224	struct iovec aiov[UIO_SMALLIOV];
225	unsigned int i;
226	int error;
227	u_int iovlen, iovcnt;
228
229	iovcnt = SCARG(uap, iovcnt);
230	iovlen = iovcnt * sizeof (struct iovec);
231	/*
232	 * Check that we have an even number of iovec's
233	 * and that we have at least two options.
234	 */
235	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
236		return (EINVAL);
237
238	if (iovcnt > UIO_SMALLIOV) {
239		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
240		needfree = iov;
241	} else {
242		iov = aiov;
243		needfree = NULL;
244	}
245	auio.uio_iov = iov;
246	auio.uio_iovcnt = iovcnt;
247	auio.uio_segflg = UIO_USERSPACE;
248	if ((error = copyin(uap->iovp, iov, iovlen)))
249		goto finish;
250
251	for (i = 0; i < iovcnt; i++) {
252		if (iov->iov_len > MMAXOPTIONLEN) {
253			error = EINVAL;
254			goto finish;
255		}
256		iov++;
257	}
258	error = vfs_nmount(td, SCARG(uap, flags), &auio);
259finish:
260	if (needfree != NULL)
261		free(needfree, M_TEMP);
262	return (error);
263}
264
265int
266kernel_mount(iovp, iovcnt, flags)
267	struct iovec *iovp;
268	unsigned int iovcnt;
269	int flags;
270{
271	struct uio auio;
272	int error;
273
274	/*
275	 * Check that we have an even number of iovec's
276	 * and that we have at least two options.
277	 */
278	if ((iovcnt & 1) || (iovcnt < 4))
279		return (EINVAL);
280
281	auio.uio_iov = iovp;
282	auio.uio_iovcnt = iovcnt;
283	auio.uio_segflg = UIO_SYSSPACE;
284
285	error = vfs_nmount(curthread, flags, &auio);
286	return (error);
287}
288
289int
290kernel_vmount(int flags, ...)
291{
292	struct iovec *iovp;
293	struct uio auio;
294	va_list ap;
295	unsigned int iovcnt, iovlen, len;
296	const char *cp;
297	char *buf, *pos;
298	size_t n;
299	int error, i;
300
301	len = 0;
302	va_start(ap, flags);
303	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
304		len += strlen(cp) + 1;
305	va_end(ap);
306
307	if (iovcnt < 4 || iovcnt & 1)
308		return (EINVAL);
309
310	iovlen = iovcnt * sizeof (struct iovec);
311	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
312	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
313	pos = buf;
314	va_start(ap, flags);
315	for (i = 0; i < iovcnt; i++) {
316		cp = va_arg(ap, const char *);
317		copystr(cp, pos, len - (pos - buf), &n);
318		iovp[i].iov_base = pos;
319		iovp[i].iov_len = n;
320		pos += n;
321	}
322	va_end(ap);
323
324	auio.uio_iov = iovp;
325	auio.uio_iovcnt = iovcnt;
326	auio.uio_segflg = UIO_SYSSPACE;
327
328	error = vfs_nmount(curthread, flags, &auio);
329	FREE(iovp, M_MOUNT);
330	FREE(buf, M_MOUNT);
331	return (error);
332}
333
334/*
335 * vfs_nmount(): actually attempt a filesystem mount.
336 */
337static int
338vfs_nmount(td, fsflags, fsoptions)
339	struct thread *td;
340	int fsflags;		/* Flags common to all filesystems. */
341	struct uio *fsoptions;	/* Options local to the filesystem. */
342{
343	linker_file_t lf;
344	struct vnode *vp;
345	struct mount *mp;
346	struct vfsconf *vfsp;
347	struct vfsoptlist *optlist;
348	char *fstype, *fspath;
349	int error, flag = 0, kern_flag = 0;
350	int fstypelen, fspathlen;
351	struct vattr va;
352	struct nameidata nd;
353
354	error = vfs_buildopts(fsoptions, &optlist);
355	if (error)
356		return (error);
357
358	/*
359	 * We need these two options before the others,
360	 * and they are mandatory for any filesystem.
361	 * Ensure they are NUL terminated as well.
362	 */
363	fstypelen = 0;
364	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
365	if (error || fstype[fstypelen - 1] != '\0') {
366		error = EINVAL;
367		goto bad;
368	}
369	fspathlen = 0;
370	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
371	if (error || fspath[fspathlen - 1] != '\0') {
372		error = EINVAL;
373		goto bad;
374	}
375
376	/*
377	 * Be ultra-paranoid about making sure the type and fspath
378	 * variables will fit in our mp buffers, including the
379	 * terminating NUL.
380	 */
381	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
382		error = ENAMETOOLONG;
383		goto bad;
384	}
385
386	if (usermount == 0) {
387	       	error = suser(td);
388		if (error)
389			goto bad;
390	}
391	/*
392	 * Do not allow NFS export by non-root users.
393	 */
394	if (fsflags & MNT_EXPORTED) {
395		error = suser(td);
396		if (error)
397			goto bad;
398	}
399	/*
400	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
401	 */
402	if (suser(td))
403		fsflags |= MNT_NOSUID | MNT_NODEV;
404	/*
405	 * Get vnode to be covered
406	 */
407	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
408	if ((error = namei(&nd)) != 0)
409		goto bad;
410	NDFREE(&nd, NDF_ONLY_PNBUF);
411	vp = nd.ni_vp;
412	if (fsflags & MNT_UPDATE) {
413		if ((vp->v_flag & VROOT) == 0) {
414			vput(vp);
415			error = EINVAL;
416			goto bad;
417		}
418		mp = vp->v_mount;
419		flag = mp->mnt_flag;
420		kern_flag = mp->mnt_kern_flag;
421		/*
422		 * We only allow the filesystem to be reloaded if it
423		 * is currently mounted read-only.
424		 */
425		if ((fsflags & MNT_RELOAD) &&
426		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
427			vput(vp);
428			error = EOPNOTSUPP;	/* Needs translation */
429			goto bad;
430		}
431		/*
432		 * Only root, or the user that did the original mount is
433		 * permitted to update it.
434		 */
435		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
436			error = suser(td);
437			if (error) {
438				vput(vp);
439				goto bad;
440			}
441		}
442		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
443			vput(vp);
444			error = EBUSY;
445			goto bad;
446		}
447		mtx_lock(&vp->v_interlock);
448		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
449			mtx_unlock(&vp->v_interlock);
450			vfs_unbusy(mp, td);
451			vput(vp);
452			error = EBUSY;
453			goto bad;
454		}
455		vp->v_flag |= VMOUNT;
456		mtx_unlock(&vp->v_interlock);
457		mp->mnt_flag |= fsflags &
458		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
459		VOP_UNLOCK(vp, 0, td);
460		goto update;
461	}
462	/*
463	 * If the user is not root, ensure that they own the directory
464	 * onto which we are attempting to mount.
465	 */
466	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
467	if (error) {
468		vput(vp);
469		goto bad;
470	}
471	if (va.va_uid != td->td_ucred->cr_uid) {
472		error = suser(td);
473		if (error) {
474			vput(vp);
475			goto bad;
476		}
477	}
478	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
479		vput(vp);
480		goto bad;
481	}
482	if (vp->v_type != VDIR) {
483		vput(vp);
484		error = ENOTDIR;
485		goto bad;
486	}
487	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
488		if (!strcmp(vfsp->vfc_name, fstype))
489			break;
490	if (vfsp == NULL) {
491		/* Only load modules for root (very important!). */
492		error = suser(td);
493		if (error) {
494			vput(vp);
495			goto bad;
496		}
497		error = securelevel_gt(td->td_ucred, 0);
498		if (error) {
499			vput(vp);
500			goto bad;
501		}
502		error = linker_load_file(fstype, &lf);
503		if (error || lf == NULL) {
504			vput(vp);
505			if (lf == NULL)
506				error = ENODEV;
507			goto bad;
508		}
509		lf->userrefs++;
510		/* Look up again to see if the VFS was loaded. */
511		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
512			if (!strcmp(vfsp->vfc_name, fstype))
513				break;
514		if (vfsp == NULL) {
515			lf->userrefs--;
516			linker_file_unload(lf);
517			vput(vp);
518			error = ENODEV;
519			goto bad;
520		}
521	}
522	mtx_lock(&vp->v_interlock);
523	if ((vp->v_flag & VMOUNT) != 0 ||
524	    vp->v_mountedhere != NULL) {
525		mtx_unlock(&vp->v_interlock);
526		vput(vp);
527		error = EBUSY;
528		goto bad;
529	}
530	vp->v_flag |= VMOUNT;
531	mtx_unlock(&vp->v_interlock);
532
533	/*
534	 * Allocate and initialize the filesystem.
535	 */
536	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
537	TAILQ_INIT(&mp->mnt_nvnodelist);
538	TAILQ_INIT(&mp->mnt_reservedvnlist);
539	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
540	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
541	mp->mnt_op = vfsp->vfc_vfsops;
542	mp->mnt_vfc = vfsp;
543	vfsp->vfc_refcount++;
544	mp->mnt_stat.f_type = vfsp->vfc_typenum;
545	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
546	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
547	mp->mnt_vnodecovered = vp;
548	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
549	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
550	mp->mnt_iosize_max = DFLTPHYS;
551	VOP_UNLOCK(vp, 0, td);
552
553update:
554	mp->mnt_optnew = optlist;
555	/*
556	 * Check if the fs implements the new VFS_NMOUNT()
557	 * function, since the new system call was used.
558	 */
559	if (mp->mnt_op->vfs_mount != NULL) {
560		printf("%s doesn't support the new mount syscall\n",
561		    mp->mnt_vfc->vfc_name);
562		mtx_lock(&vp->v_interlock);
563		vp->v_flag &= ~VMOUNT;
564		mtx_unlock(&vp->v_interlock);
565		if (mp->mnt_flag & MNT_UPDATE)
566			vfs_unbusy(mp, td);
567		else {
568			mp->mnt_vfc->vfc_refcount--;
569			vfs_unbusy(mp, td);
570			free(mp, M_MOUNT);
571		}
572		vrele(vp);
573		error = EOPNOTSUPP;
574		goto bad;
575	}
576
577	/*
578	 * Set the mount level flags.
579	 */
580	if (fsflags & MNT_RDONLY)
581		mp->mnt_flag |= MNT_RDONLY;
582	else if (mp->mnt_flag & MNT_RDONLY)
583		mp->mnt_kern_flag |= MNTK_WANTRDWR;
584	mp->mnt_flag &=~ MNT_UPDATEMASK;
585	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
586	/*
587	 * Mount the filesystem.
588	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
589	 * get.  No freeing of cn_pnbuf.
590	 */
591	error = VFS_NMOUNT(mp, &nd, td);
592	if (!error) {
593		if (mp->mnt_opt != NULL)
594			vfs_freeopts(mp->mnt_opt);
595		mp->mnt_opt = mp->mnt_optnew;
596	}
597	/*
598	 * Prevent external consumers of mount
599	 * options to read mnt_optnew.
600	 */
601	mp->mnt_optnew = NULL;
602	if (mp->mnt_flag & MNT_UPDATE) {
603		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
604			mp->mnt_flag &= ~MNT_RDONLY;
605		mp->mnt_flag &=~
606		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
607		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
608		if (error) {
609			mp->mnt_flag = flag;
610			mp->mnt_kern_flag = kern_flag;
611		}
612		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
613			if (mp->mnt_syncer == NULL)
614				error = vfs_allocate_syncvnode(mp);
615		} else {
616			if (mp->mnt_syncer != NULL)
617				vrele(mp->mnt_syncer);
618			mp->mnt_syncer = NULL;
619		}
620		vfs_unbusy(mp, td);
621		mtx_lock(&vp->v_interlock);
622		vp->v_flag &= ~VMOUNT;
623		mtx_unlock(&vp->v_interlock);
624		vrele(vp);
625		return (error);
626	}
627	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
628	/*
629	 * Put the new filesystem on the mount list after root.
630	 */
631	cache_purge(vp);
632	if (!error) {
633		struct vnode *newdp;
634
635		mtx_lock(&vp->v_interlock);
636		vp->v_flag &= ~VMOUNT;
637		vp->v_mountedhere = mp;
638		mtx_unlock(&vp->v_interlock);
639		mtx_lock(&mountlist_mtx);
640		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
641		mtx_unlock(&mountlist_mtx);
642		if (VFS_ROOT(mp, &newdp))
643			panic("mount: lost mount");
644		checkdirs(vp, newdp);
645		vput(newdp);
646		VOP_UNLOCK(vp, 0, td);
647		if ((mp->mnt_flag & MNT_RDONLY) == 0)
648			error = vfs_allocate_syncvnode(mp);
649		vfs_unbusy(mp, td);
650		if ((error = VFS_START(mp, 0, td)) != 0) {
651			vrele(vp);
652			goto bad;
653		}
654	} else {
655		mtx_lock(&vp->v_interlock);
656		vp->v_flag &= ~VMOUNT;
657		mtx_unlock(&vp->v_interlock);
658		mp->mnt_vfc->vfc_refcount--;
659		vfs_unbusy(mp, td);
660		free(mp, M_MOUNT);
661		vput(vp);
662		goto bad;
663	}
664	return (0);
665bad:
666	vfs_freeopts(optlist);
667	return (error);
668}
669
670/*
671 * Old mount API.
672 */
673#ifndef _SYS_SYSPROTO_H_
674struct mount_args {
675	char	*type;
676	char	*path;
677	int	flags;
678	caddr_t	data;
679};
680#endif
681/* ARGSUSED */
682int
683mount(td, uap)
684	struct thread *td;
685	struct mount_args /* {
686		syscallarg(char *) type;
687		syscallarg(char *) path;
688		syscallarg(int) flags;
689		syscallarg(caddr_t) data;
690	} */ *uap;
691{
692	char *fstype;
693	char *fspath;
694	int error;
695
696	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
697	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
698
699	/*
700	 * vfs_mount() actually takes a kernel string for `type' and
701	 * `path' now, so extract them.
702	 */
703	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
704	if (error)
705		goto finish;
706	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
707	if (error)
708		goto finish;
709	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
710	    SCARG(uap, data));
711finish:
712	free(fstype, M_TEMP);
713	free(fspath, M_TEMP);
714	return (error);
715}
716
717/*
718 * vfs_mount(): actually attempt a filesystem mount.
719 *
720 * This routine is designed to be a "generic" entry point for routines
721 * that wish to mount a filesystem. All parameters except `fsdata' are
722 * pointers into kernel space. `fsdata' is currently still a pointer
723 * into userspace.
724 */
725int
726vfs_mount(td, fstype, fspath, fsflags, fsdata)
727	struct thread *td;
728	const char *fstype;
729	char *fspath;
730	int fsflags;
731	void *fsdata;
732{
733	linker_file_t lf;
734	struct vnode *vp;
735	struct mount *mp;
736	struct vfsconf *vfsp;
737	int error, flag = 0, kern_flag = 0;
738	struct vattr va;
739	struct nameidata nd;
740
741	/*
742	 * Be ultra-paranoid about making sure the type and fspath
743	 * variables will fit in our mp buffers, including the
744	 * terminating NUL.
745	 */
746	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
747		return (ENAMETOOLONG);
748
749	if (usermount == 0) {
750		error = suser(td);
751		if (error)
752			return (error);
753	}
754	/*
755	 * Do not allow NFS export by non-root users.
756	 */
757	if (fsflags & MNT_EXPORTED) {
758		error = suser(td);
759		if (error)
760			return (error);
761	}
762	/*
763	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
764	 */
765	if (suser(td))
766		fsflags |= MNT_NOSUID | MNT_NODEV;
767	/*
768	 * Get vnode to be covered
769	 */
770	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
771	if ((error = namei(&nd)) != 0)
772		return (error);
773	NDFREE(&nd, NDF_ONLY_PNBUF);
774	vp = nd.ni_vp;
775	if (fsflags & MNT_UPDATE) {
776		if ((vp->v_flag & VROOT) == 0) {
777			vput(vp);
778			return (EINVAL);
779		}
780		mp = vp->v_mount;
781		flag = mp->mnt_flag;
782		kern_flag = mp->mnt_kern_flag;
783		/*
784		 * We only allow the filesystem to be reloaded if it
785		 * is currently mounted read-only.
786		 */
787		if ((fsflags & MNT_RELOAD) &&
788		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
789			vput(vp);
790			return (EOPNOTSUPP);	/* Needs translation */
791		}
792		/*
793		 * Only root, or the user that did the original mount is
794		 * permitted to update it.
795		 */
796		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
797			error = suser(td);
798			if (error) {
799				vput(vp);
800				return (error);
801			}
802		}
803		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
804			vput(vp);
805			return (EBUSY);
806		}
807		mtx_lock(&vp->v_interlock);
808		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
809			mtx_unlock(&vp->v_interlock);
810			vfs_unbusy(mp, td);
811			vput(vp);
812			return (EBUSY);
813		}
814		vp->v_flag |= VMOUNT;
815		mtx_unlock(&vp->v_interlock);
816		mp->mnt_flag |= fsflags &
817		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
818		VOP_UNLOCK(vp, 0, td);
819		goto update;
820	}
821	/*
822	 * If the user is not root, ensure that they own the directory
823	 * onto which we are attempting to mount.
824	 */
825	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
826	if (error) {
827		vput(vp);
828		return (error);
829	}
830	if (va.va_uid != td->td_ucred->cr_uid) {
831		error = suser(td);
832		if (error) {
833			vput(vp);
834			return (error);
835		}
836	}
837	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
838		vput(vp);
839		return (error);
840	}
841	if (vp->v_type != VDIR) {
842		vput(vp);
843		return (ENOTDIR);
844	}
845	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
846		if (!strcmp(vfsp->vfc_name, fstype))
847			break;
848	if (vfsp == NULL) {
849		/* Only load modules for root (very important!). */
850		error = suser(td);
851		if (error) {
852			vput(vp);
853			return (error);
854		}
855		error = securelevel_gt(td->td_ucred, 0);
856		if (error) {
857			vput(vp);
858			return (error);
859		}
860		error = linker_load_file(fstype, &lf);
861		if (error || lf == NULL) {
862			vput(vp);
863			if (lf == NULL)
864				error = ENODEV;
865			return (error);
866		}
867		lf->userrefs++;
868		/* Look up again to see if the VFS was loaded. */
869		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
870			if (!strcmp(vfsp->vfc_name, fstype))
871				break;
872		if (vfsp == NULL) {
873			lf->userrefs--;
874			linker_file_unload(lf);
875			vput(vp);
876			return (ENODEV);
877		}
878	}
879	mtx_lock(&vp->v_interlock);
880	if ((vp->v_flag & VMOUNT) != 0 ||
881	    vp->v_mountedhere != NULL) {
882		mtx_unlock(&vp->v_interlock);
883		vput(vp);
884		return (EBUSY);
885	}
886	vp->v_flag |= VMOUNT;
887	mtx_unlock(&vp->v_interlock);
888
889	/*
890	 * Allocate and initialize the filesystem.
891	 */
892	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
893	TAILQ_INIT(&mp->mnt_nvnodelist);
894	TAILQ_INIT(&mp->mnt_reservedvnlist);
895	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
896	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
897	mp->mnt_op = vfsp->vfc_vfsops;
898	mp->mnt_vfc = vfsp;
899	vfsp->vfc_refcount++;
900	mp->mnt_stat.f_type = vfsp->vfc_typenum;
901	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
902	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
903	mp->mnt_vnodecovered = vp;
904	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
905	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
906	mp->mnt_iosize_max = DFLTPHYS;
907	VOP_UNLOCK(vp, 0, td);
908update:
909	/*
910	 * Check if the fs implements the old VFS_MOUNT()
911	 * function, since the old system call was used.
912	 */
913	if (mp->mnt_op->vfs_mount == NULL) {
914		printf("%s doesn't support the old mount syscall\n",
915		    mp->mnt_vfc->vfc_name);
916		mtx_lock(&vp->v_interlock);
917		vp->v_flag &= ~VMOUNT;
918		mtx_unlock(&vp->v_interlock);
919		if (mp->mnt_flag & MNT_UPDATE)
920			vfs_unbusy(mp, td);
921		else {
922			mp->mnt_vfc->vfc_refcount--;
923			vfs_unbusy(mp, td);
924			free(mp, M_MOUNT);
925		}
926		vrele(vp);
927		return (EOPNOTSUPP);
928	}
929
930	/*
931	 * Set the mount level flags.
932	 */
933	if (fsflags & MNT_RDONLY)
934		mp->mnt_flag |= MNT_RDONLY;
935	else if (mp->mnt_flag & MNT_RDONLY)
936		mp->mnt_kern_flag |= MNTK_WANTRDWR;
937	mp->mnt_flag &=~ MNT_UPDATEMASK;
938	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
939	/*
940	 * Mount the filesystem.
941	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
942	 * get.  No freeing of cn_pnbuf.
943	 */
944	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
945	if (mp->mnt_flag & MNT_UPDATE) {
946		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
947			mp->mnt_flag &= ~MNT_RDONLY;
948		mp->mnt_flag &=~
949		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
950		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
951		if (error) {
952			mp->mnt_flag = flag;
953			mp->mnt_kern_flag = kern_flag;
954		}
955		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
956			if (mp->mnt_syncer == NULL)
957				error = vfs_allocate_syncvnode(mp);
958		} else {
959			if (mp->mnt_syncer != NULL)
960				vrele(mp->mnt_syncer);
961			mp->mnt_syncer = NULL;
962		}
963		vfs_unbusy(mp, td);
964		mtx_lock(&vp->v_interlock);
965		vp->v_flag &= ~VMOUNT;
966		mtx_unlock(&vp->v_interlock);
967		vrele(vp);
968		return (error);
969	}
970	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
971	/*
972	 * Put the new filesystem on the mount list after root.
973	 */
974	cache_purge(vp);
975	if (!error) {
976		struct vnode *newdp;
977
978		mtx_lock(&vp->v_interlock);
979		vp->v_flag &= ~VMOUNT;
980		vp->v_mountedhere = mp;
981		mtx_unlock(&vp->v_interlock);
982		mtx_lock(&mountlist_mtx);
983		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
984		mtx_unlock(&mountlist_mtx);
985		if (VFS_ROOT(mp, &newdp))
986			panic("mount: lost mount");
987		checkdirs(vp, newdp);
988		vput(newdp);
989		VOP_UNLOCK(vp, 0, td);
990		if ((mp->mnt_flag & MNT_RDONLY) == 0)
991			error = vfs_allocate_syncvnode(mp);
992		vfs_unbusy(mp, td);
993		if ((error = VFS_START(mp, 0, td)) != 0)
994			vrele(vp);
995	} else {
996		mtx_lock(&vp->v_interlock);
997		vp->v_flag &= ~VMOUNT;
998		mtx_unlock(&vp->v_interlock);
999		mp->mnt_vfc->vfc_refcount--;
1000		vfs_unbusy(mp, td);
1001		free(mp, M_MOUNT);
1002		vput(vp);
1003	}
1004	return (error);
1005}
1006
1007/*
1008 * Scan all active processes to see if any of them have a current
1009 * or root directory of `olddp'. If so, replace them with the new
1010 * mount point.
1011 */
1012static void
1013checkdirs(olddp, newdp)
1014	struct vnode *olddp, *newdp;
1015{
1016	struct filedesc *fdp;
1017	struct proc *p;
1018	int nrele;
1019
1020	if (olddp->v_usecount == 1)
1021		return;
1022	sx_slock(&allproc_lock);
1023	LIST_FOREACH(p, &allproc, p_list) {
1024		PROC_LOCK(p);
1025		fdp = p->p_fd;
1026		if (fdp == NULL) {
1027			PROC_UNLOCK(p);
1028			continue;
1029		}
1030		nrele = 0;
1031		FILEDESC_LOCK(fdp);
1032		if (fdp->fd_cdir == olddp) {
1033			VREF(newdp);
1034			fdp->fd_cdir = newdp;
1035			nrele++;
1036		}
1037		if (fdp->fd_rdir == olddp) {
1038			VREF(newdp);
1039			fdp->fd_rdir = newdp;
1040			nrele++;
1041		}
1042		FILEDESC_UNLOCK(fdp);
1043		PROC_UNLOCK(p);
1044		while (nrele--)
1045			vrele(olddp);
1046	}
1047	sx_sunlock(&allproc_lock);
1048	if (rootvnode == olddp) {
1049		vrele(rootvnode);
1050		VREF(newdp);
1051		rootvnode = newdp;
1052	}
1053}
1054
1055/*
1056 * Unmount a filesystem.
1057 *
1058 * Note: unmount takes a path to the vnode mounted on as argument,
1059 * not special file (as before).
1060 */
1061#ifndef _SYS_SYSPROTO_H_
1062struct unmount_args {
1063	char	*path;
1064	int	flags;
1065};
1066#endif
1067/* ARGSUSED */
1068int
1069unmount(td, uap)
1070	struct thread *td;
1071	register struct unmount_args /* {
1072		syscallarg(char *) path;
1073		syscallarg(int) flags;
1074	} */ *uap;
1075{
1076	register struct vnode *vp;
1077	struct mount *mp;
1078	int error;
1079	struct nameidata nd;
1080
1081	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1082	    SCARG(uap, path), td);
1083	if ((error = namei(&nd)) != 0)
1084		return (error);
1085	vp = nd.ni_vp;
1086	NDFREE(&nd, NDF_ONLY_PNBUF);
1087	mp = vp->v_mount;
1088
1089	/*
1090	 * Only root, or the user that did the original mount is
1091	 * permitted to unmount this filesystem.
1092	 */
1093	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1094		error = suser(td);
1095		if (error) {
1096			vput(vp);
1097			return (error);
1098		}
1099	}
1100
1101	/*
1102	 * Don't allow unmounting the root filesystem.
1103	 */
1104	if (mp->mnt_flag & MNT_ROOTFS) {
1105		vput(vp);
1106		return (EINVAL);
1107	}
1108
1109	/*
1110	 * Must be the root of the filesystem
1111	 */
1112	if ((vp->v_flag & VROOT) == 0) {
1113		vput(vp);
1114		return (EINVAL);
1115	}
1116	vput(vp);
1117	return (dounmount(mp, SCARG(uap, flags), td));
1118}
1119
1120/*
1121 * Do the actual filesystem unmount.
1122 */
1123int
1124dounmount(mp, flags, td)
1125	struct mount *mp;
1126	int flags;
1127	struct thread *td;
1128{
1129	struct vnode *coveredvp, *fsrootvp;
1130	int error;
1131	int async_flag;
1132
1133	mtx_lock(&mountlist_mtx);
1134	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1135		mtx_unlock(&mountlist_mtx);
1136		return (EBUSY);
1137	}
1138	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1139	/* Allow filesystems to detect that a forced unmount is in progress. */
1140	if (flags & MNT_FORCE)
1141		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1142	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1143	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1144	if (error) {
1145		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1146		if (mp->mnt_kern_flag & MNTK_MWAIT)
1147			wakeup(mp);
1148		return (error);
1149	}
1150	vn_start_write(NULL, &mp, V_WAIT);
1151
1152	if (mp->mnt_flag & MNT_EXPUBLIC)
1153		vfs_setpublicfs(NULL, NULL, NULL);
1154
1155	vfs_msync(mp, MNT_WAIT);
1156	async_flag = mp->mnt_flag & MNT_ASYNC;
1157	mp->mnt_flag &=~ MNT_ASYNC;
1158	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1159	if (mp->mnt_syncer != NULL)
1160		vrele(mp->mnt_syncer);
1161	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1162	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1163		if (mp->mnt_vnodecovered != NULL)
1164			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1165		if (fsrootvp == rootvnode) {
1166			vrele(rootvnode);
1167			rootvnode = NULL;
1168		}
1169		vput(fsrootvp);
1170	}
1171	if (((mp->mnt_flag & MNT_RDONLY) ||
1172	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1173	    (flags & MNT_FORCE)) {
1174		error = VFS_UNMOUNT(mp, flags, td);
1175	}
1176	vn_finished_write(mp);
1177	if (error) {
1178		/* Undo cdir/rdir and rootvnode changes made above. */
1179		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1180			if (mp->mnt_vnodecovered != NULL)
1181				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1182			if (rootvnode == NULL) {
1183				rootvnode = fsrootvp;
1184				vref(rootvnode);
1185			}
1186			vput(fsrootvp);
1187		}
1188		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1189			(void) vfs_allocate_syncvnode(mp);
1190		mtx_lock(&mountlist_mtx);
1191		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1192		mp->mnt_flag |= async_flag;
1193		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1194		    &mountlist_mtx, td);
1195		if (mp->mnt_kern_flag & MNTK_MWAIT)
1196			wakeup(mp);
1197		return (error);
1198	}
1199	mtx_lock(&mountlist_mtx);
1200	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1201	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1202		coveredvp->v_mountedhere = NULL;
1203	mp->mnt_vfc->vfc_refcount--;
1204	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1205		panic("unmount: dangling vnode");
1206	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1207	lockdestroy(&mp->mnt_lock);
1208	if (coveredvp != NULL)
1209		vrele(coveredvp);
1210	if (mp->mnt_kern_flag & MNTK_MWAIT)
1211		wakeup(mp);
1212	if (mp->mnt_op->vfs_mount == NULL)
1213		vfs_freeopts(mp->mnt_opt);
1214	free(mp, M_MOUNT);
1215	return (0);
1216}
1217
1218/*
1219 * Lookup a filesystem type, and if found allocate and initialize
1220 * a mount structure for it.
1221 *
1222 * Devname is usually updated by mount(8) after booting.
1223 */
1224int
1225vfs_rootmountalloc(fstypename, devname, mpp)
1226	char *fstypename;
1227	char *devname;
1228	struct mount **mpp;
1229{
1230	struct thread *td = curthread;	/* XXX */
1231	struct vfsconf *vfsp;
1232	struct mount *mp;
1233
1234	if (fstypename == NULL)
1235		return (ENODEV);
1236	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1237		if (!strcmp(vfsp->vfc_name, fstypename))
1238			break;
1239	if (vfsp == NULL)
1240		return (ENODEV);
1241	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1242	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1243	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1244	TAILQ_INIT(&mp->mnt_nvnodelist);
1245	TAILQ_INIT(&mp->mnt_reservedvnlist);
1246	mp->mnt_vfc = vfsp;
1247	mp->mnt_op = vfsp->vfc_vfsops;
1248	mp->mnt_flag = MNT_RDONLY;
1249	mp->mnt_vnodecovered = NULLVP;
1250	vfsp->vfc_refcount++;
1251	mp->mnt_iosize_max = DFLTPHYS;
1252	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1253	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1254	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1255	mp->mnt_stat.f_mntonname[0] = '/';
1256	mp->mnt_stat.f_mntonname[1] = 0;
1257	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
1258	*mpp = mp;
1259	return (0);
1260}
1261
1262/*
1263 * Find and mount the root filesystem
1264 */
1265void
1266vfs_mountroot(void)
1267{
1268	char		*cp;
1269	int		i, error;
1270
1271	/*
1272	 * The root filesystem information is compiled in, and we are
1273	 * booted with instructions to use it.
1274	 */
1275#ifdef ROOTDEVNAME
1276	if ((boothowto & RB_DFLTROOT) &&
1277	    !vfs_mountroot_try(ROOTDEVNAME))
1278		return;
1279#endif
1280	/*
1281	 * We are booted with instructions to prompt for the root filesystem,
1282	 * or to use the compiled-in default when it doesn't exist.
1283	 */
1284	if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1285		if (!vfs_mountroot_ask())
1286			return;
1287	}
1288
1289	/*
1290	 * We've been given the generic "use CDROM as root" flag.  This is
1291	 * necessary because one media may be used in many different
1292	 * devices, so we need to search for them.
1293	 */
1294	if (boothowto & RB_CDROM) {
1295		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1296			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1297				return;
1298		}
1299	}
1300
1301	/*
1302	 * Try to use the value read by the loader from /etc/fstab, or
1303	 * supplied via some other means.  This is the preferred
1304	 * mechanism.
1305	 */
1306	if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1307		error = vfs_mountroot_try(cp);
1308		freeenv(cp);
1309		if (!error)
1310			return;
1311	}
1312
1313	/*
1314	 * Try values that may have been computed by the machine-dependant
1315	 * legacy code.
1316	 */
1317	if (!vfs_mountroot_try(rootdevnames[0]))
1318		return;
1319	if (!vfs_mountroot_try(rootdevnames[1]))
1320		return;
1321
1322	/*
1323	 * If we have a compiled-in default, and haven't already tried it, try
1324	 * it now.
1325	 */
1326#ifdef ROOTDEVNAME
1327	if (!(boothowto & RB_DFLTROOT))
1328		if (!vfs_mountroot_try(ROOTDEVNAME))
1329			return;
1330#endif
1331
1332	/*
1333	 * Everything so far has failed, prompt on the console if we haven't
1334	 * already tried that.
1335	 */
1336	if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1337		return;
1338	panic("Root mount failed, startup aborted.");
1339}
1340
1341/*
1342 * Mount (mountfrom) as the root filesystem.
1343 */
1344static int
1345vfs_mountroot_try(char *mountfrom)
1346{
1347        struct mount	*mp;
1348	char		*vfsname, *path;
1349	int		error;
1350	char		patt[32];
1351	int		s;
1352
1353	vfsname = NULL;
1354	path    = NULL;
1355	mp      = NULL;
1356	error   = EINVAL;
1357
1358	if (mountfrom == NULL)
1359		return(error);		/* don't complain */
1360
1361	s = splcam();			/* Overkill, but annoying without it */
1362	printf("Mounting root from %s\n", mountfrom);
1363	splx(s);
1364
1365	/* parse vfs name and path */
1366	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1367	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1368	vfsname[0] = path[0] = 0;
1369	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1370	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1371		goto done;
1372
1373	/* allocate a root mount */
1374	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1375				   &mp);
1376	if (error != 0) {
1377		printf("Can't allocate root mount for filesystem '%s': %d\n",
1378		       vfsname, error);
1379		goto done;
1380	}
1381	mp->mnt_flag |= MNT_ROOTFS;
1382
1383	/* do our best to set rootdev */
1384	if ((path[0] != 0) && setrootbyname(path))
1385		printf("setrootbyname failed\n");
1386
1387	/* If the root device is a type "memory disk", mount RW */
1388	if (rootdev != NODEV && devsw(rootdev) &&
1389	    (devsw(rootdev)->d_flags & D_MEMDISK))
1390		mp->mnt_flag &= ~MNT_RDONLY;
1391
1392	/*
1393	 * Set the mount path to be something useful, because the
1394	 * filesystem code isn't responsible now for initialising
1395	 * f_mntonname unless they want to override the default
1396	 * (which is `path'.)
1397	 */
1398	strncpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1399
1400	error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1401
1402done:
1403	if (vfsname != NULL)
1404		free(vfsname, M_MOUNT);
1405	if (path != NULL)
1406		free(path, M_MOUNT);
1407	if (error != 0) {
1408		if (mp != NULL) {
1409			vfs_unbusy(mp, curthread);
1410			free(mp, M_MOUNT);
1411		}
1412		printf("Root mount failed: %d\n", error);
1413	} else {
1414
1415		/* register with list of mounted filesystems */
1416		mtx_lock(&mountlist_mtx);
1417		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1418		mtx_unlock(&mountlist_mtx);
1419
1420		/* sanity check system clock against root fs timestamp */
1421		inittodr(mp->mnt_time);
1422		vfs_unbusy(mp, curthread);
1423		error = VFS_START(mp, 0, curthread);
1424	}
1425	return(error);
1426}
1427
1428/*
1429 * Spin prompting on the console for a suitable root filesystem
1430 */
1431static int
1432vfs_mountroot_ask(void)
1433{
1434	char name[128];
1435	int i;
1436	dev_t dev;
1437
1438	for(;;) {
1439		printf("\nManual root filesystem specification:\n");
1440		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1441#if defined(__i386__) || defined(__ia64__)
1442		printf("                       eg. ufs:da0s1a\n");
1443#else
1444		printf("                       eg. ufs:da0a\n");
1445#endif
1446		printf("  ?                  List valid disk boot devices\n");
1447		printf("  <empty line>       Abort manual input\n");
1448		printf("\nmountroot> ");
1449		gets(name);
1450		if (name[0] == 0)
1451			return(1);
1452		if (name[0] == '?') {
1453			printf("Possibly valid devices for 'ufs' root:\n");
1454			for (i = 0; i < NUMCDEVSW; i++) {
1455				dev = makedev(i, 0);
1456				if (devsw(dev) != NULL)
1457					printf(" \"%s\"", devsw(dev)->d_name);
1458			}
1459			printf("\n");
1460			continue;
1461		}
1462		if (!vfs_mountroot_try(name))
1463			return(0);
1464	}
1465}
1466
1467/*
1468 * Local helper function for vfs_mountroot_ask.
1469 */
1470static void
1471gets(char *cp)
1472{
1473	char *lp;
1474	int c;
1475
1476	lp = cp;
1477	for (;;) {
1478		printf("%c", c = cngetc() & 0177);
1479		switch (c) {
1480		case -1:
1481		case '\n':
1482		case '\r':
1483			*lp++ = '\0';
1484			return;
1485		case '\b':
1486		case '\177':
1487			if (lp > cp) {
1488				printf(" \b");
1489				lp--;
1490			}
1491			continue;
1492		case '#':
1493			lp--;
1494			if (lp < cp)
1495				lp = cp;
1496			continue;
1497		case '@':
1498		case 'u' & 037:
1499			lp = cp;
1500			printf("%c", '\n');
1501			continue;
1502		default:
1503			*lp++ = c;
1504		}
1505	}
1506}
1507
1508/*
1509 * Convert a given name to the dev_t of the disk-like device
1510 * it refers to.
1511 */
1512dev_t
1513getdiskbyname(char *name) {
1514	char *cp;
1515	dev_t dev;
1516
1517	cp = name;
1518	if (!bcmp(cp, "/dev/", 5))
1519		cp += 5;
1520
1521	dev = NODEV;
1522	EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1523	return (dev);
1524}
1525
1526/*
1527 * Set rootdev to match (name), given that we expect it to
1528 * refer to a disk-like device.
1529 */
1530static int
1531setrootbyname(char *name)
1532{
1533	dev_t diskdev;
1534
1535	diskdev = getdiskbyname(name);
1536	if (diskdev != NODEV) {
1537		rootdev = diskdev;
1538		return (0);
1539	}
1540
1541	return (1);
1542}
1543
1544/* Show the dev_t for a disk specified by name */
1545#ifdef DDB
1546DB_SHOW_COMMAND(disk, db_getdiskbyname)
1547{
1548	dev_t dev;
1549
1550	if (modif[0] == '\0') {
1551		db_error("usage: show disk/devicename");
1552		return;
1553	}
1554	dev = getdiskbyname(modif);
1555	if (dev != NODEV)
1556		db_printf("dev_t = %p\n", dev);
1557	else
1558		db_printf("No disk device matched.\n");
1559}
1560#endif
1561
1562/*
1563 * Get a mount option by its name.
1564 *
1565 * Return 0 if the option was found, ENOENT otherwise.
1566 * If len is non-NULL it will be filled with the length
1567 * of the option. If buf is non-NULL, it will be filled
1568 * with the address of the option.
1569 */
1570int
1571vfs_getopt(opts, name, buf, len)
1572	struct vfsoptlist *opts;
1573	const char *name;
1574	void **buf;
1575	int *len;
1576{
1577	struct vfsopt *opt;
1578
1579	TAILQ_FOREACH(opt, opts, link) {
1580		if (strcmp(name, opt->name) == 0) {
1581			if (len != NULL)
1582				*len = opt->len;
1583			if (buf != NULL)
1584				*buf = opt->value;
1585			return (0);
1586		}
1587	}
1588	return (ENOENT);
1589}
1590
1591/*
1592 * Find and copy a mount option.
1593 *
1594 * The size of the buffer has to be specified
1595 * in len, if it is not the same length as the
1596 * mount option, EINVAL is returned.
1597 * Returns ENOENT if the option is not found.
1598 */
1599int
1600vfs_copyopt(opts, name, dest, len)
1601	struct vfsoptlist *opts;
1602	const char *name;
1603	void *dest;
1604	int len;
1605{
1606	struct vfsopt *opt;
1607
1608	TAILQ_FOREACH(opt, opts, link) {
1609		if (strcmp(name, opt->name) == 0) {
1610			if (len != opt->len)
1611				return (EINVAL);
1612			bcopy(opt->value, dest, opt->len);
1613			return (0);
1614		}
1615	}
1616	return (ENOENT);
1617}
1618