vfs_mount.c revision 99423
179968Sobrien/*
279968Sobrien * Copyright (c) 1989, 1993
379968Sobrien *	The Regents of the University of California.  All rights reserved.
479968Sobrien * (c) UNIX System Laboratories, Inc.
579968Sobrien * All or some portions of this file are derived from material licensed
679968Sobrien * to the University of California by American Telephone and Telegraph
779968Sobrien * Co. or Unix System Laboratories, Inc. and are reproduced herein with
879968Sobrien * the permission of UNIX System Laboratories, Inc.
979968Sobrien *
1079968Sobrien * Redistribution and use in source and binary forms, with or without
1179968Sobrien * modification, are permitted provided that the following conditions
1279968Sobrien * are met:
13133936Sobrien * 1. Redistributions of source code must retain the above copyright
1479968Sobrien *    notice, this list of conditions and the following disclaimer.
1579968Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1679968Sobrien *    notice, this list of conditions and the following disclaimer in the
1779968Sobrien *    documentation and/or other materials provided with the distribution.
1879968Sobrien * 3. All advertising materials mentioning features or use of this software
1979968Sobrien *    must display the following acknowledgement:
2079968Sobrien *	This product includes software developed by the University of
2179968Sobrien *	California, Berkeley and its contributors.
2279968Sobrien * 4. Neither the name of the University nor the names of its contributors
2379968Sobrien *    may be used to endorse or promote products derived from this software
2479968Sobrien *    without specific prior written permission.
2579968Sobrien *
2679968Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2779968Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2879968Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2979968Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30133936Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31133936Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32133936Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33133936Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34133936Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35133936Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36133936Sobrien * SUCH DAMAGE.
37133936Sobrien *
38133936Sobrien * Copyright (c) 1999 Michael Smith
39133936Sobrien * All rights reserved.
40133936Sobrien * Copyright (c) 1999 Poul-Henning Kamp
41133936Sobrien * All rights reserved.
42133936Sobrien *
43133936Sobrien * Redistribution and use in source and binary forms, with or without
44133936Sobrien * modification, are permitted provided that the following conditions
45133936Sobrien * are met:
46133936Sobrien * 1. Redistributions of source code must retain the above copyright
47133936Sobrien *    notice, this list of conditions and the following disclaimer.
48133936Sobrien * 2. Redistributions in binary form must reproduce the above copyright
49133936Sobrien *    notice, this list of conditions and the following disclaimer in the
50133936Sobrien *    documentation and/or other materials provided with the distribution.
51133936Sobrien *
52133936Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53133936Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54133936Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55108746Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56133936Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
5779968Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58108746Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59108746Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60108746Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61108746Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62108746Sobrien * SUCH DAMAGE.
63108746Sobrien *
64108746Sobrien * $FreeBSD: head/sys/kern/vfs_mount.c 99423 2002-07-05 05:15:30Z jeff $
65133936Sobrien */
66133936Sobrien
67133936Sobrien#include <sys/param.h>
68108746Sobrien#include <sys/conf.h>
69108746Sobrien#include <sys/cons.h>
70133936Sobrien#include <sys/kernel.h>
71133936Sobrien#include <sys/linker.h>
7279968Sobrien#include <sys/malloc.h>
7379968Sobrien#include <sys/mount.h>
7479968Sobrien#include <sys/mutex.h>
7579968Sobrien#include <sys/namei.h>
7679968Sobrien#include <sys/proc.h>
7779968Sobrien#include <sys/reboot.h>
7879968Sobrien#include <sys/sysproto.h>
7979968Sobrien#include <sys/sx.h>
8079968Sobrien#include <sys/sysctl.h>
8179968Sobrien#include <sys/sysent.h>
8279968Sobrien#include <sys/systm.h>
83133936Sobrien#include <sys/vnode.h>
8479968Sobrien
8579968Sobrien#include <machine/stdarg.h>
8679968Sobrien
8779968Sobrien#include "opt_rootdevname.h"
8879968Sobrien#include "opt_ddb.h"
8979968Sobrien
9079968Sobrien#ifdef DDB
9179968Sobrien#include <ddb/ddb.h>
9279968Sobrien#endif
9379968Sobrien
9479968Sobrien#define ROOTNAME	"root_device"
9579968Sobrien
9679968Sobrienstatic void	checkdirs(struct vnode *olddp, struct vnode *newdp);
9779968Sobrienstatic int	vfs_nmount(struct thread *td, int, struct uio *);
98108746Sobrienstatic int	vfs_mountroot_try(char *mountfrom);
9979968Sobrienstatic int	vfs_mountroot_ask(void);
10079968Sobrienstatic void	gets(char *cp);
10179968Sobrien
10279968Sobrienstatic int	usermount = 0;	/* if 1, non-root can mount fs. */
10379968SobrienSYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
10479968Sobrien
10579968SobrienMALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
10679968Sobrien
10779968Sobrien/* List of mounted filesystems. */
10879968Sobrienstruct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
10979968Sobrien
11079968Sobrien/* For any iteration/modification of mountlist */
11179968Sobrienstruct mtx mountlist_mtx;
11279968Sobrien
11379968Sobrien/* For any iteration/modification of mnt_vnodelist */
11479968Sobrienstruct mtx mntvnode_mtx;
11579968Sobrien
11679968Sobrien/*
11779968Sobrien * The vnode of the system's root (/ in the filesystem, without chroot
11879968Sobrien * active.)
11979968Sobrien */
12079968Sobrienstruct vnode	*rootvnode;
12179968Sobrien
12279968Sobrien/*
123133936Sobrien * The root filesystem is detailed in the kernel environment variable
12479968Sobrien * vfs.root.mountfrom, which is expected to be in the general format
12579968Sobrien *
12679968Sobrien * <vfsname>:[<path>]
12779968Sobrien * vfsname   := the name of a VFS known to the kernel and capable
12879968Sobrien *              of being mounted as root
12979968Sobrien * path      := disk device name or other data used by the filesystem
13079968Sobrien *              to locate its physical store
13179968Sobrien */
13279968Sobrien
13379968Sobrien/*
13479968Sobrien * The root specifiers we will try if RB_CDROM is specified.
13579968Sobrien */
13679968Sobrienstatic char *cdrom_rootdevnames[] = {
13779968Sobrien	"cd9660:cd0a",
13879968Sobrien	"cd9660:acd0a",
13979968Sobrien	"cd9660:wcd0a",
14079968Sobrien	NULL
14179968Sobrien};
14279968Sobrien
14379968Sobrien/* legacy find-root code */
14479968Sobrienchar		*rootdevnames[2] = {NULL, NULL};
14579968Sobrienstatic int	setrootbyname(char *name);
14679968Sobriendev_t		rootdev = NODEV;
147133936Sobrien
148133936Sobrien/*
149133936Sobrien * Release all resources related to the
150133936Sobrien * mount options.
151133936Sobrien */
152133936Sobrienstatic void
153133936Sobrienvfs_freeopts(struct vfsoptlist *opts)
154133936Sobrien{
155133936Sobrien	struct vfsopt *opt;
156133936Sobrien
157161764Sobrien	while (!TAILQ_EMPTY(opts)) {
158161764Sobrien		opt = TAILQ_FIRST(opts);
159161764Sobrien		TAILQ_REMOVE(opts, opt, link);
160161764Sobrien		free(opt->name, M_MOUNT);
161161764Sobrien		free(opt->value, M_MOUNT);
162161764Sobrien		free(opt, M_MOUNT);
163133936Sobrien	}
164	free(opts, M_MOUNT);
165}
166
167/*
168 * Build a linked list of mount options from a struct uio.
169 */
170static int
171vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
172{
173	struct vfsoptlist *opts;
174	struct vfsopt *opt;
175	unsigned int i, iovcnt;
176	int error, namelen, optlen;
177
178	iovcnt = auio->uio_iovcnt;
179	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
180	TAILQ_INIT(opts);
181	for (i = 0; i < iovcnt; i += 2) {
182		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
183		namelen = auio->uio_iov[i].iov_len;
184		optlen = auio->uio_iov[i + 1].iov_len;
185		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
186		opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
187		opt->len = optlen;
188		if (auio->uio_segflg == UIO_SYSSPACE) {
189			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
190			bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
191			    optlen);
192		} else {
193			error = copyin(auio->uio_iov[i].iov_base, opt->name,
194			    namelen);
195			if (!error)
196				error = copyin(auio->uio_iov[i + 1].iov_base,
197				    opt->value, optlen);
198			if (error)
199				goto bad;
200		}
201		TAILQ_INSERT_TAIL(opts, opt, link);
202	}
203	*options = opts;
204	return (0);
205bad:
206	vfs_freeopts(opts);
207	return (error);
208}
209
210/*
211 * New mount API.
212 */
213int
214nmount(td, uap)
215	struct thread *td;
216	struct nmount_args /* {
217		syscallarg(struct iovec *) iovp;
218		syscallarg(unsigned int) iovcnt;
219		syscallarg(int) flags;
220	} */ *uap;
221{
222	struct uio auio;
223	struct iovec *iov, *needfree;
224	struct iovec aiov[UIO_SMALLIOV];
225	unsigned int i;
226	int error;
227	u_int iovlen, iovcnt;
228
229	iovcnt = SCARG(uap, iovcnt);
230	iovlen = iovcnt * sizeof (struct iovec);
231	/*
232	 * Check that we have an even number of iovec's
233	 * and that we have at least two options.
234	 */
235	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
236		return (EINVAL);
237
238	if (iovcnt > UIO_SMALLIOV) {
239		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
240		needfree = iov;
241	} else {
242		iov = aiov;
243		needfree = NULL;
244	}
245	auio.uio_iov = iov;
246	auio.uio_iovcnt = iovcnt;
247	auio.uio_segflg = UIO_USERSPACE;
248	if ((error = copyin(uap->iovp, iov, iovlen)))
249		goto finish;
250
251	for (i = 0; i < iovcnt; i++) {
252		if (iov->iov_len > MMAXOPTIONLEN) {
253			error = EINVAL;
254			goto finish;
255		}
256		iov++;
257	}
258	error = vfs_nmount(td, SCARG(uap, flags), &auio);
259finish:
260	if (needfree != NULL)
261		free(needfree, M_TEMP);
262	return (error);
263}
264
265int
266kernel_mount(iovp, iovcnt, flags)
267	struct iovec *iovp;
268	unsigned int iovcnt;
269	int flags;
270{
271	struct uio auio;
272	int error;
273
274	/*
275	 * Check that we have an even number of iovec's
276	 * and that we have at least two options.
277	 */
278	if ((iovcnt & 1) || (iovcnt < 4))
279		return (EINVAL);
280
281	auio.uio_iov = iovp;
282	auio.uio_iovcnt = iovcnt;
283	auio.uio_segflg = UIO_SYSSPACE;
284
285	error = vfs_nmount(curthread, flags, &auio);
286	return (error);
287}
288
289int
290kernel_vmount(int flags, ...)
291{
292	struct iovec *iovp;
293	struct uio auio;
294	va_list ap;
295	unsigned int iovcnt, iovlen, len;
296	const char *cp;
297	char *buf, *pos;
298	size_t n;
299	int error, i;
300
301	len = 0;
302	va_start(ap, flags);
303	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
304		len += strlen(cp) + 1;
305	va_end(ap);
306
307	if (iovcnt < 4 || iovcnt & 1)
308		return (EINVAL);
309
310	iovlen = iovcnt * sizeof (struct iovec);
311	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
312	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
313	pos = buf;
314	va_start(ap, flags);
315	for (i = 0; i < iovcnt; i++) {
316		cp = va_arg(ap, const char *);
317		copystr(cp, pos, len - (pos - buf), &n);
318		iovp[i].iov_base = pos;
319		iovp[i].iov_len = n;
320		pos += n;
321	}
322	va_end(ap);
323
324	auio.uio_iov = iovp;
325	auio.uio_iovcnt = iovcnt;
326	auio.uio_segflg = UIO_SYSSPACE;
327
328	error = vfs_nmount(curthread, flags, &auio);
329	FREE(iovp, M_MOUNT);
330	FREE(buf, M_MOUNT);
331	return (error);
332}
333
334/*
335 * vfs_nmount(): actually attempt a filesystem mount.
336 */
337static int
338vfs_nmount(td, fsflags, fsoptions)
339	struct thread *td;
340	int fsflags;		/* Flags common to all filesystems. */
341	struct uio *fsoptions;	/* Options local to the filesystem. */
342{
343	linker_file_t lf;
344	struct vnode *vp;
345	struct mount *mp;
346	struct vfsconf *vfsp;
347	struct vfsoptlist *optlist;
348	char *fstype, *fspath;
349	int error, flag = 0, kern_flag = 0;
350	int fstypelen, fspathlen;
351	struct vattr va;
352	struct nameidata nd;
353
354	error = vfs_buildopts(fsoptions, &optlist);
355	if (error)
356		return (error);
357
358	/*
359	 * We need these two options before the others,
360	 * and they are mandatory for any filesystem.
361	 * Ensure they are NUL terminated as well.
362	 */
363	fstypelen = 0;
364	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
365	if (error || fstype[fstypelen - 1] != '\0') {
366		error = EINVAL;
367		goto bad;
368	}
369	fspathlen = 0;
370	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
371	if (error || fspath[fspathlen - 1] != '\0') {
372		error = EINVAL;
373		goto bad;
374	}
375
376	/*
377	 * Be ultra-paranoid about making sure the type and fspath
378	 * variables will fit in our mp buffers, including the
379	 * terminating NUL.
380	 */
381	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
382		error = ENAMETOOLONG;
383		goto bad;
384	}
385
386	if (usermount == 0) {
387	       	error = suser(td);
388		if (error)
389			goto bad;
390	}
391	/*
392	 * Do not allow NFS export by non-root users.
393	 */
394	if (fsflags & MNT_EXPORTED) {
395		error = suser(td);
396		if (error)
397			goto bad;
398	}
399	/*
400	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
401	 */
402	if (suser(td))
403		fsflags |= MNT_NOSUID | MNT_NODEV;
404	/*
405	 * Get vnode to be covered
406	 */
407	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
408	if ((error = namei(&nd)) != 0)
409		goto bad;
410	NDFREE(&nd, NDF_ONLY_PNBUF);
411	vp = nd.ni_vp;
412	if (fsflags & MNT_UPDATE) {
413		if ((vp->v_flag & VROOT) == 0) {
414			vput(vp);
415			error = EINVAL;
416			goto bad;
417		}
418		mp = vp->v_mount;
419		flag = mp->mnt_flag;
420		kern_flag = mp->mnt_kern_flag;
421		/*
422		 * We only allow the filesystem to be reloaded if it
423		 * is currently mounted read-only.
424		 */
425		if ((fsflags & MNT_RELOAD) &&
426		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
427			vput(vp);
428			error = EOPNOTSUPP;	/* Needs translation */
429			goto bad;
430		}
431		/*
432		 * Only root, or the user that did the original mount is
433		 * permitted to update it.
434		 */
435		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
436			error = suser(td);
437			if (error) {
438				vput(vp);
439				goto bad;
440			}
441		}
442		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
443			vput(vp);
444			error = EBUSY;
445			goto bad;
446		}
447		mtx_lock(&vp->v_interlock);
448		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
449			mtx_unlock(&vp->v_interlock);
450			vfs_unbusy(mp, td);
451			vput(vp);
452			error = EBUSY;
453			goto bad;
454		}
455		vp->v_flag |= VMOUNT;
456		mtx_unlock(&vp->v_interlock);
457		mp->mnt_flag |= fsflags &
458		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
459		VOP_UNLOCK(vp, 0, td);
460		goto update;
461	}
462	/*
463	 * If the user is not root, ensure that they own the directory
464	 * onto which we are attempting to mount.
465	 */
466	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
467	if (error) {
468		vput(vp);
469		goto bad;
470	}
471	if (va.va_uid != td->td_ucred->cr_uid) {
472		error = suser(td);
473		if (error) {
474			vput(vp);
475			goto bad;
476		}
477	}
478	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
479		vput(vp);
480		goto bad;
481	}
482	if (vp->v_type != VDIR) {
483		vput(vp);
484		error = ENOTDIR;
485		goto bad;
486	}
487	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
488		if (!strcmp(vfsp->vfc_name, fstype))
489			break;
490	if (vfsp == NULL) {
491		/* Only load modules for root (very important!). */
492		error = suser(td);
493		if (error) {
494			vput(vp);
495			goto bad;
496		}
497		error = securelevel_gt(td->td_ucred, 0);
498		if (error) {
499			vput(vp);
500			goto bad;
501		}
502		error = linker_load_file(fstype, &lf);
503		if (error || lf == NULL) {
504			vput(vp);
505			if (lf == NULL)
506				error = ENODEV;
507			goto bad;
508		}
509		lf->userrefs++;
510		/* Look up again to see if the VFS was loaded. */
511		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
512			if (!strcmp(vfsp->vfc_name, fstype))
513				break;
514		if (vfsp == NULL) {
515			lf->userrefs--;
516			linker_file_unload(lf);
517			vput(vp);
518			error = ENODEV;
519			goto bad;
520		}
521	}
522	mtx_lock(&vp->v_interlock);
523	if ((vp->v_flag & VMOUNT) != 0 ||
524	    vp->v_mountedhere != NULL) {
525		mtx_unlock(&vp->v_interlock);
526		vput(vp);
527		error = EBUSY;
528		goto bad;
529	}
530	vp->v_flag |= VMOUNT;
531	mtx_unlock(&vp->v_interlock);
532
533	/*
534	 * Allocate and initialize the filesystem.
535	 */
536	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
537	TAILQ_INIT(&mp->mnt_nvnodelist);
538	TAILQ_INIT(&mp->mnt_reservedvnlist);
539	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
540	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
541	mp->mnt_op = vfsp->vfc_vfsops;
542	mp->mnt_vfc = vfsp;
543	vfsp->vfc_refcount++;
544	mp->mnt_stat.f_type = vfsp->vfc_typenum;
545	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
546	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
547	mp->mnt_vnodecovered = vp;
548	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
549	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
550	mp->mnt_iosize_max = DFLTPHYS;
551	VOP_UNLOCK(vp, 0, td);
552
553update:
554	mp->mnt_optnew = optlist;
555	/*
556	 * Check if the fs implements the new VFS_NMOUNT()
557	 * function, since the new system call was used.
558	 */
559	if (mp->mnt_op->vfs_mount != NULL) {
560		printf("%s doesn't support the new mount syscall\n",
561		    mp->mnt_vfc->vfc_name);
562		mtx_lock(&vp->v_interlock);
563		vp->v_flag &= ~VMOUNT;
564		mtx_unlock(&vp->v_interlock);
565		if (mp->mnt_flag & MNT_UPDATE)
566			vfs_unbusy(mp, td);
567		else {
568			mp->mnt_vfc->vfc_refcount--;
569			vfs_unbusy(mp, td);
570			free(mp, M_MOUNT);
571		}
572		vrele(vp);
573		error = EOPNOTSUPP;
574		goto bad;
575	}
576
577	/*
578	 * Set the mount level flags.
579	 */
580	if (fsflags & MNT_RDONLY)
581		mp->mnt_flag |= MNT_RDONLY;
582	else if (mp->mnt_flag & MNT_RDONLY)
583		mp->mnt_kern_flag |= MNTK_WANTRDWR;
584	mp->mnt_flag &=~ MNT_UPDATEMASK;
585	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
586	/*
587	 * Mount the filesystem.
588	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
589	 * get.  No freeing of cn_pnbuf.
590	 */
591	error = VFS_NMOUNT(mp, &nd, td);
592	if (!error) {
593		if (mp->mnt_opt != NULL)
594			vfs_freeopts(mp->mnt_opt);
595		mp->mnt_opt = mp->mnt_optnew;
596	}
597	/*
598	 * Prevent external consumers of mount
599	 * options to read mnt_optnew.
600	 */
601	mp->mnt_optnew = NULL;
602	if (mp->mnt_flag & MNT_UPDATE) {
603		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
604			mp->mnt_flag &= ~MNT_RDONLY;
605		mp->mnt_flag &=~
606		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
607		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
608		if (error) {
609			mp->mnt_flag = flag;
610			mp->mnt_kern_flag = kern_flag;
611		}
612		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
613			if (mp->mnt_syncer == NULL)
614				error = vfs_allocate_syncvnode(mp);
615		} else {
616			if (mp->mnt_syncer != NULL)
617				vrele(mp->mnt_syncer);
618			mp->mnt_syncer = NULL;
619		}
620		vfs_unbusy(mp, td);
621		mtx_lock(&vp->v_interlock);
622		vp->v_flag &= ~VMOUNT;
623		mtx_unlock(&vp->v_interlock);
624		vrele(vp);
625		return (error);
626	}
627	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
628	/*
629	 * Put the new filesystem on the mount list after root.
630	 */
631	cache_purge(vp);
632	if (!error) {
633		struct vnode *newdp;
634
635		mtx_lock(&vp->v_interlock);
636		vp->v_flag &= ~VMOUNT;
637		vp->v_mountedhere = mp;
638		mtx_unlock(&vp->v_interlock);
639		mtx_lock(&mountlist_mtx);
640		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
641		mtx_unlock(&mountlist_mtx);
642		if (VFS_ROOT(mp, &newdp))
643			panic("mount: lost mount");
644		checkdirs(vp, newdp);
645		vput(newdp);
646		VOP_UNLOCK(vp, 0, td);
647		if ((mp->mnt_flag & MNT_RDONLY) == 0)
648			error = vfs_allocate_syncvnode(mp);
649		vfs_unbusy(mp, td);
650		if ((error = VFS_START(mp, 0, td)) != 0) {
651			vrele(vp);
652			goto bad;
653		}
654	} else {
655		mtx_lock(&vp->v_interlock);
656		vp->v_flag &= ~VMOUNT;
657		mtx_unlock(&vp->v_interlock);
658		mp->mnt_vfc->vfc_refcount--;
659		vfs_unbusy(mp, td);
660		free(mp, M_MOUNT);
661		vput(vp);
662		goto bad;
663	}
664	return (0);
665bad:
666	vfs_freeopts(optlist);
667	return (error);
668}
669
670/*
671 * Old mount API.
672 */
673#ifndef _SYS_SYSPROTO_H_
674struct mount_args {
675	char	*type;
676	char	*path;
677	int	flags;
678	caddr_t	data;
679};
680#endif
681/* ARGSUSED */
682int
683mount(td, uap)
684	struct thread *td;
685	struct mount_args /* {
686		syscallarg(char *) type;
687		syscallarg(char *) path;
688		syscallarg(int) flags;
689		syscallarg(caddr_t) data;
690	} */ *uap;
691{
692	char *fstype;
693	char *fspath;
694	int error;
695
696	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
697	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
698
699	/*
700	 * vfs_mount() actually takes a kernel string for `type' and
701	 * `path' now, so extract them.
702	 */
703	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
704	if (error)
705		goto finish;
706	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
707	if (error)
708		goto finish;
709	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
710	    SCARG(uap, data));
711finish:
712	free(fstype, M_TEMP);
713	free(fspath, M_TEMP);
714	return (error);
715}
716
717/*
718 * vfs_mount(): actually attempt a filesystem mount.
719 *
720 * This routine is designed to be a "generic" entry point for routines
721 * that wish to mount a filesystem. All parameters except `fsdata' are
722 * pointers into kernel space. `fsdata' is currently still a pointer
723 * into userspace.
724 */
725int
726vfs_mount(td, fstype, fspath, fsflags, fsdata)
727	struct thread *td;
728	const char *fstype;
729	char *fspath;
730	int fsflags;
731	void *fsdata;
732{
733	linker_file_t lf;
734	struct vnode *vp;
735	struct mount *mp;
736	struct vfsconf *vfsp;
737	int error, flag = 0, kern_flag = 0;
738	struct vattr va;
739	struct nameidata nd;
740
741	/*
742	 * Be ultra-paranoid about making sure the type and fspath
743	 * variables will fit in our mp buffers, including the
744	 * terminating NUL.
745	 */
746	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
747		return (ENAMETOOLONG);
748
749	if (usermount == 0) {
750		error = suser(td);
751		if (error)
752			return (error);
753	}
754	/*
755	 * Do not allow NFS export by non-root users.
756	 */
757	if (fsflags & MNT_EXPORTED) {
758		error = suser(td);
759		if (error)
760			return (error);
761	}
762	/*
763	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
764	 */
765	if (suser(td))
766		fsflags |= MNT_NOSUID | MNT_NODEV;
767	/*
768	 * Get vnode to be covered
769	 */
770	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
771	if ((error = namei(&nd)) != 0)
772		return (error);
773	NDFREE(&nd, NDF_ONLY_PNBUF);
774	vp = nd.ni_vp;
775	if (fsflags & MNT_UPDATE) {
776		if ((vp->v_flag & VROOT) == 0) {
777			vput(vp);
778			return (EINVAL);
779		}
780		mp = vp->v_mount;
781		flag = mp->mnt_flag;
782		kern_flag = mp->mnt_kern_flag;
783		/*
784		 * We only allow the filesystem to be reloaded if it
785		 * is currently mounted read-only.
786		 */
787		if ((fsflags & MNT_RELOAD) &&
788		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
789			vput(vp);
790			return (EOPNOTSUPP);	/* Needs translation */
791		}
792		/*
793		 * Only root, or the user that did the original mount is
794		 * permitted to update it.
795		 */
796		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
797			error = suser(td);
798			if (error) {
799				vput(vp);
800				return (error);
801			}
802		}
803		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
804			vput(vp);
805			return (EBUSY);
806		}
807		mtx_lock(&vp->v_interlock);
808		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
809			mtx_unlock(&vp->v_interlock);
810			vfs_unbusy(mp, td);
811			vput(vp);
812			return (EBUSY);
813		}
814		vp->v_flag |= VMOUNT;
815		mtx_unlock(&vp->v_interlock);
816		mp->mnt_flag |= fsflags &
817		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
818		VOP_UNLOCK(vp, 0, td);
819		goto update;
820	}
821	/*
822	 * If the user is not root, ensure that they own the directory
823	 * onto which we are attempting to mount.
824	 */
825	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
826	if (error) {
827		vput(vp);
828		return (error);
829	}
830	if (va.va_uid != td->td_ucred->cr_uid) {
831		error = suser(td);
832		if (error) {
833			vput(vp);
834			return (error);
835		}
836	}
837	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
838		vput(vp);
839		return (error);
840	}
841	if (vp->v_type != VDIR) {
842		vput(vp);
843		return (ENOTDIR);
844	}
845	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
846		if (!strcmp(vfsp->vfc_name, fstype))
847			break;
848	if (vfsp == NULL) {
849		/* Only load modules for root (very important!). */
850		error = suser(td);
851		if (error) {
852			vput(vp);
853			return (error);
854		}
855		error = securelevel_gt(td->td_ucred, 0);
856		if (error) {
857			vput(vp);
858			return (error);
859		}
860		error = linker_load_file(fstype, &lf);
861		if (error || lf == NULL) {
862			vput(vp);
863			if (lf == NULL)
864				error = ENODEV;
865			return (error);
866		}
867		lf->userrefs++;
868		/* Look up again to see if the VFS was loaded. */
869		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
870			if (!strcmp(vfsp->vfc_name, fstype))
871				break;
872		if (vfsp == NULL) {
873			lf->userrefs--;
874			linker_file_unload(lf);
875			vput(vp);
876			return (ENODEV);
877		}
878	}
879	mtx_lock(&vp->v_interlock);
880	if ((vp->v_flag & VMOUNT) != 0 ||
881	    vp->v_mountedhere != NULL) {
882		mtx_unlock(&vp->v_interlock);
883		vput(vp);
884		return (EBUSY);
885	}
886	vp->v_flag |= VMOUNT;
887	mtx_unlock(&vp->v_interlock);
888
889	/*
890	 * Allocate and initialize the filesystem.
891	 */
892	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
893	TAILQ_INIT(&mp->mnt_nvnodelist);
894	TAILQ_INIT(&mp->mnt_reservedvnlist);
895	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
896	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
897	mp->mnt_op = vfsp->vfc_vfsops;
898	mp->mnt_vfc = vfsp;
899	vfsp->vfc_refcount++;
900	mp->mnt_stat.f_type = vfsp->vfc_typenum;
901	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
902	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
903	mp->mnt_vnodecovered = vp;
904	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
905	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
906	mp->mnt_iosize_max = DFLTPHYS;
907	VOP_UNLOCK(vp, 0, td);
908update:
909	/*
910	 * Check if the fs implements the old VFS_MOUNT()
911	 * function, since the old system call was used.
912	 */
913	if (mp->mnt_op->vfs_mount == NULL) {
914		printf("%s doesn't support the old mount syscall\n",
915		    mp->mnt_vfc->vfc_name);
916		mtx_lock(&vp->v_interlock);
917		vp->v_flag &= ~VMOUNT;
918		mtx_unlock(&vp->v_interlock);
919		if (mp->mnt_flag & MNT_UPDATE)
920			vfs_unbusy(mp, td);
921		else {
922			mp->mnt_vfc->vfc_refcount--;
923			vfs_unbusy(mp, td);
924			free(mp, M_MOUNT);
925		}
926		vrele(vp);
927		return (EOPNOTSUPP);
928	}
929
930	/*
931	 * Set the mount level flags.
932	 */
933	if (fsflags & MNT_RDONLY)
934		mp->mnt_flag |= MNT_RDONLY;
935	else if (mp->mnt_flag & MNT_RDONLY)
936		mp->mnt_kern_flag |= MNTK_WANTRDWR;
937	mp->mnt_flag &=~ MNT_UPDATEMASK;
938	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
939	/*
940	 * Mount the filesystem.
941	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
942	 * get.  No freeing of cn_pnbuf.
943	 */
944	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
945	if (mp->mnt_flag & MNT_UPDATE) {
946		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
947			mp->mnt_flag &= ~MNT_RDONLY;
948		mp->mnt_flag &=~
949		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
950		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
951		if (error) {
952			mp->mnt_flag = flag;
953			mp->mnt_kern_flag = kern_flag;
954		}
955		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
956			if (mp->mnt_syncer == NULL)
957				error = vfs_allocate_syncvnode(mp);
958		} else {
959			if (mp->mnt_syncer != NULL)
960				vrele(mp->mnt_syncer);
961			mp->mnt_syncer = NULL;
962		}
963		vfs_unbusy(mp, td);
964		mtx_lock(&vp->v_interlock);
965		vp->v_flag &= ~VMOUNT;
966		mtx_unlock(&vp->v_interlock);
967		vrele(vp);
968		return (error);
969	}
970	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
971	/*
972	 * Put the new filesystem on the mount list after root.
973	 */
974	cache_purge(vp);
975	if (!error) {
976		struct vnode *newdp;
977
978		mtx_lock(&vp->v_interlock);
979		vp->v_flag &= ~VMOUNT;
980		vp->v_mountedhere = mp;
981		mtx_unlock(&vp->v_interlock);
982		mtx_lock(&mountlist_mtx);
983		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
984		mtx_unlock(&mountlist_mtx);
985		if (VFS_ROOT(mp, &newdp))
986			panic("mount: lost mount");
987		checkdirs(vp, newdp);
988		vput(newdp);
989		VOP_UNLOCK(vp, 0, td);
990		if ((mp->mnt_flag & MNT_RDONLY) == 0)
991			error = vfs_allocate_syncvnode(mp);
992		vfs_unbusy(mp, td);
993		if ((error = VFS_START(mp, 0, td)) != 0)
994			vrele(vp);
995	} else {
996		mtx_lock(&vp->v_interlock);
997		vp->v_flag &= ~VMOUNT;
998		mtx_unlock(&vp->v_interlock);
999		mp->mnt_vfc->vfc_refcount--;
1000		vfs_unbusy(mp, td);
1001		free(mp, M_MOUNT);
1002		vput(vp);
1003	}
1004	return (error);
1005}
1006
1007/*
1008 * Scan all active processes to see if any of them have a current
1009 * or root directory of `olddp'. If so, replace them with the new
1010 * mount point.
1011 */
1012static void
1013checkdirs(olddp, newdp)
1014	struct vnode *olddp, *newdp;
1015{
1016	struct filedesc *fdp;
1017	struct proc *p;
1018	int nrele;
1019
1020	if (olddp->v_usecount == 1)
1021		return;
1022	sx_slock(&allproc_lock);
1023	LIST_FOREACH(p, &allproc, p_list) {
1024		PROC_LOCK(p);
1025		fdp = p->p_fd;
1026		if (fdp == NULL) {
1027			PROC_UNLOCK(p);
1028			continue;
1029		}
1030		nrele = 0;
1031		FILEDESC_LOCK(fdp);
1032		if (fdp->fd_cdir == olddp) {
1033			VREF(newdp);
1034			fdp->fd_cdir = newdp;
1035			nrele++;
1036		}
1037		if (fdp->fd_rdir == olddp) {
1038			VREF(newdp);
1039			fdp->fd_rdir = newdp;
1040			nrele++;
1041		}
1042		FILEDESC_UNLOCK(fdp);
1043		PROC_UNLOCK(p);
1044		while (nrele--)
1045			vrele(olddp);
1046	}
1047	sx_sunlock(&allproc_lock);
1048	if (rootvnode == olddp) {
1049		vrele(rootvnode);
1050		VREF(newdp);
1051		rootvnode = newdp;
1052	}
1053}
1054
1055/*
1056 * Unmount a filesystem.
1057 *
1058 * Note: unmount takes a path to the vnode mounted on as argument,
1059 * not special file (as before).
1060 */
1061#ifndef _SYS_SYSPROTO_H_
1062struct unmount_args {
1063	char	*path;
1064	int	flags;
1065};
1066#endif
1067/* ARGSUSED */
1068int
1069unmount(td, uap)
1070	struct thread *td;
1071	register struct unmount_args /* {
1072		syscallarg(char *) path;
1073		syscallarg(int) flags;
1074	} */ *uap;
1075{
1076	register struct vnode *vp;
1077	struct mount *mp;
1078	int error;
1079	struct nameidata nd;
1080
1081	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1082	    SCARG(uap, path), td);
1083	if ((error = namei(&nd)) != 0)
1084		return (error);
1085	vp = nd.ni_vp;
1086	NDFREE(&nd, NDF_ONLY_PNBUF);
1087	mp = vp->v_mount;
1088
1089	/*
1090	 * Only root, or the user that did the original mount is
1091	 * permitted to unmount this filesystem.
1092	 */
1093	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1094		error = suser(td);
1095		if (error) {
1096			vput(vp);
1097			return (error);
1098		}
1099	}
1100
1101	/*
1102	 * Don't allow unmounting the root filesystem.
1103	 */
1104	if (mp->mnt_flag & MNT_ROOTFS) {
1105		vput(vp);
1106		return (EINVAL);
1107	}
1108
1109	/*
1110	 * Must be the root of the filesystem
1111	 */
1112	if ((vp->v_flag & VROOT) == 0) {
1113		vput(vp);
1114		return (EINVAL);
1115	}
1116	vput(vp);
1117	return (dounmount(mp, SCARG(uap, flags), td));
1118}
1119
1120/*
1121 * Do the actual filesystem unmount.
1122 */
1123int
1124dounmount(mp, flags, td)
1125	struct mount *mp;
1126	int flags;
1127	struct thread *td;
1128{
1129	struct vnode *coveredvp, *fsrootvp;
1130	int error;
1131	int async_flag;
1132
1133	mtx_lock(&mountlist_mtx);
1134	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1135		mtx_unlock(&mountlist_mtx);
1136		return (EBUSY);
1137	}
1138	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1139	/* Allow filesystems to detect that a forced unmount is in progress. */
1140	if (flags & MNT_FORCE)
1141		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1142	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1143	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1144	if (error) {
1145		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1146		if (mp->mnt_kern_flag & MNTK_MWAIT)
1147			wakeup(mp);
1148		return (error);
1149	}
1150	vn_start_write(NULL, &mp, V_WAIT);
1151
1152	if (mp->mnt_flag & MNT_EXPUBLIC)
1153		vfs_setpublicfs(NULL, NULL, NULL);
1154
1155	vfs_msync(mp, MNT_WAIT);
1156	async_flag = mp->mnt_flag & MNT_ASYNC;
1157	mp->mnt_flag &=~ MNT_ASYNC;
1158	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1159	if (mp->mnt_syncer != NULL)
1160		vrele(mp->mnt_syncer);
1161	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1162	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1163		if (mp->mnt_vnodecovered != NULL)
1164			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1165		if (fsrootvp == rootvnode) {
1166			vrele(rootvnode);
1167			rootvnode = NULL;
1168		}
1169		vput(fsrootvp);
1170	}
1171	if (((mp->mnt_flag & MNT_RDONLY) ||
1172	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1173	    (flags & MNT_FORCE)) {
1174		error = VFS_UNMOUNT(mp, flags, td);
1175	}
1176	vn_finished_write(mp);
1177	if (error) {
1178		/* Undo cdir/rdir and rootvnode changes made above. */
1179		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1180			if (mp->mnt_vnodecovered != NULL)
1181				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1182			if (rootvnode == NULL) {
1183				rootvnode = fsrootvp;
1184				vref(rootvnode);
1185			}
1186			vput(fsrootvp);
1187		}
1188		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1189			(void) vfs_allocate_syncvnode(mp);
1190		mtx_lock(&mountlist_mtx);
1191		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1192		mp->mnt_flag |= async_flag;
1193		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1194		    &mountlist_mtx, td);
1195		if (mp->mnt_kern_flag & MNTK_MWAIT)
1196			wakeup(mp);
1197		return (error);
1198	}
1199	mtx_lock(&mountlist_mtx);
1200	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1201	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1202		coveredvp->v_mountedhere = NULL;
1203	mp->mnt_vfc->vfc_refcount--;
1204	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1205		panic("unmount: dangling vnode");
1206	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1207	lockdestroy(&mp->mnt_lock);
1208	if (coveredvp != NULL)
1209		vrele(coveredvp);
1210	if (mp->mnt_kern_flag & MNTK_MWAIT)
1211		wakeup(mp);
1212	if (mp->mnt_op->vfs_mount == NULL)
1213		vfs_freeopts(mp->mnt_opt);
1214	free(mp, M_MOUNT);
1215	return (0);
1216}
1217
1218/*
1219 * Lookup a filesystem type, and if found allocate and initialize
1220 * a mount structure for it.
1221 *
1222 * Devname is usually updated by mount(8) after booting.
1223 */
1224int
1225vfs_rootmountalloc(fstypename, devname, mpp)
1226	char *fstypename;
1227	char *devname;
1228	struct mount **mpp;
1229{
1230	struct thread *td = curthread;	/* XXX */
1231	struct vfsconf *vfsp;
1232	struct mount *mp;
1233
1234	if (fstypename == NULL)
1235		return (ENODEV);
1236	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1237		if (!strcmp(vfsp->vfc_name, fstypename))
1238			break;
1239	if (vfsp == NULL)
1240		return (ENODEV);
1241	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1242	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1243	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1244	TAILQ_INIT(&mp->mnt_nvnodelist);
1245	TAILQ_INIT(&mp->mnt_reservedvnlist);
1246	mp->mnt_vfc = vfsp;
1247	mp->mnt_op = vfsp->vfc_vfsops;
1248	mp->mnt_flag = MNT_RDONLY;
1249	mp->mnt_vnodecovered = NULLVP;
1250	vfsp->vfc_refcount++;
1251	mp->mnt_iosize_max = DFLTPHYS;
1252	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1253	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1254	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1255	mp->mnt_stat.f_mntonname[0] = '/';
1256	mp->mnt_stat.f_mntonname[1] = 0;
1257	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
1258	*mpp = mp;
1259	return (0);
1260}
1261
1262/*
1263 * Find and mount the root filesystem
1264 */
1265void
1266vfs_mountroot(void)
1267{
1268	char		*cp;
1269	int		i, error;
1270
1271	/*
1272	 * The root filesystem information is compiled in, and we are
1273	 * booted with instructions to use it.
1274	 */
1275#ifdef ROOTDEVNAME
1276	if ((boothowto & RB_DFLTROOT) &&
1277	    !vfs_mountroot_try(ROOTDEVNAME))
1278		return;
1279#endif
1280	/*
1281	 * We are booted with instructions to prompt for the root filesystem,
1282	 * or to use the compiled-in default when it doesn't exist.
1283	 */
1284	if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1285		if (!vfs_mountroot_ask())
1286			return;
1287	}
1288
1289	/*
1290	 * We've been given the generic "use CDROM as root" flag.  This is
1291	 * necessary because one media may be used in many different
1292	 * devices, so we need to search for them.
1293	 */
1294	if (boothowto & RB_CDROM) {
1295		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1296			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1297				return;
1298		}
1299	}
1300
1301	/*
1302	 * Try to use the value read by the loader from /etc/fstab, or
1303	 * supplied via some other means.  This is the preferred
1304	 * mechanism.
1305	 */
1306	if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1307		error = vfs_mountroot_try(cp);
1308		freeenv(cp);
1309		if (!error)
1310			return;
1311	}
1312
1313	/*
1314	 * Try values that may have been computed by the machine-dependant
1315	 * legacy code.
1316	 */
1317	if (!vfs_mountroot_try(rootdevnames[0]))
1318		return;
1319	if (!vfs_mountroot_try(rootdevnames[1]))
1320		return;
1321
1322	/*
1323	 * If we have a compiled-in default, and haven't already tried it, try
1324	 * it now.
1325	 */
1326#ifdef ROOTDEVNAME
1327	if (!(boothowto & RB_DFLTROOT))
1328		if (!vfs_mountroot_try(ROOTDEVNAME))
1329			return;
1330#endif
1331
1332	/*
1333	 * Everything so far has failed, prompt on the console if we haven't
1334	 * already tried that.
1335	 */
1336	if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1337		return;
1338	panic("Root mount failed, startup aborted.");
1339}
1340
1341/*
1342 * Mount (mountfrom) as the root filesystem.
1343 */
1344static int
1345vfs_mountroot_try(char *mountfrom)
1346{
1347        struct mount	*mp;
1348	char		*vfsname, *path;
1349	int		error;
1350	char		patt[32];
1351	int		s;
1352
1353	vfsname = NULL;
1354	path    = NULL;
1355	mp      = NULL;
1356	error   = EINVAL;
1357
1358	if (mountfrom == NULL)
1359		return(error);		/* don't complain */
1360
1361	s = splcam();			/* Overkill, but annoying without it */
1362	printf("Mounting root from %s\n", mountfrom);
1363	splx(s);
1364
1365	/* parse vfs name and path */
1366	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1367	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1368	vfsname[0] = path[0] = 0;
1369	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1370	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1371		goto done;
1372
1373	/* allocate a root mount */
1374	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1375				   &mp);
1376	if (error != 0) {
1377		printf("Can't allocate root mount for filesystem '%s': %d\n",
1378		       vfsname, error);
1379		goto done;
1380	}
1381	mp->mnt_flag |= MNT_ROOTFS;
1382
1383	/* do our best to set rootdev */
1384	if ((path[0] != 0) && setrootbyname(path))
1385		printf("setrootbyname failed\n");
1386
1387	/* If the root device is a type "memory disk", mount RW */
1388	if (rootdev != NODEV && devsw(rootdev) &&
1389	    (devsw(rootdev)->d_flags & D_MEMDISK))
1390		mp->mnt_flag &= ~MNT_RDONLY;
1391
1392	/*
1393	 * Set the mount path to be something useful, because the
1394	 * filesystem code isn't responsible now for initialising
1395	 * f_mntonname unless they want to override the default
1396	 * (which is `path'.)
1397	 */
1398	strncpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1399
1400	error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1401
1402done:
1403	if (vfsname != NULL)
1404		free(vfsname, M_MOUNT);
1405	if (path != NULL)
1406		free(path, M_MOUNT);
1407	if (error != 0) {
1408		if (mp != NULL) {
1409			vfs_unbusy(mp, curthread);
1410			free(mp, M_MOUNT);
1411		}
1412		printf("Root mount failed: %d\n", error);
1413	} else {
1414
1415		/* register with list of mounted filesystems */
1416		mtx_lock(&mountlist_mtx);
1417		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1418		mtx_unlock(&mountlist_mtx);
1419
1420		/* sanity check system clock against root filesystem timestamp */
1421		inittodr(mp->mnt_time);
1422		vfs_unbusy(mp, curthread);
1423	}
1424	return(error);
1425}
1426
1427/*
1428 * Spin prompting on the console for a suitable root filesystem
1429 */
1430static int
1431vfs_mountroot_ask(void)
1432{
1433	char name[128];
1434	int i;
1435	dev_t dev;
1436
1437	for(;;) {
1438		printf("\nManual root filesystem specification:\n");
1439		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1440#if defined(__i386__) || defined(__ia64__)
1441		printf("                       eg. ufs:da0s1a\n");
1442#else
1443		printf("                       eg. ufs:da0a\n");
1444#endif
1445		printf("  ?                  List valid disk boot devices\n");
1446		printf("  <empty line>       Abort manual input\n");
1447		printf("\nmountroot> ");
1448		gets(name);
1449		if (name[0] == 0)
1450			return(1);
1451		if (name[0] == '?') {
1452			printf("Possibly valid devices for 'ufs' root:\n");
1453			for (i = 0; i < NUMCDEVSW; i++) {
1454				dev = makedev(i, 0);
1455				if (devsw(dev) != NULL)
1456					printf(" \"%s\"", devsw(dev)->d_name);
1457			}
1458			printf("\n");
1459			continue;
1460		}
1461		if (!vfs_mountroot_try(name))
1462			return(0);
1463	}
1464}
1465
1466/*
1467 * Local helper function for vfs_mountroot_ask.
1468 */
1469static void
1470gets(char *cp)
1471{
1472	char *lp;
1473	int c;
1474
1475	lp = cp;
1476	for (;;) {
1477		printf("%c", c = cngetc() & 0177);
1478		switch (c) {
1479		case -1:
1480		case '\n':
1481		case '\r':
1482			*lp++ = '\0';
1483			return;
1484		case '\b':
1485		case '\177':
1486			if (lp > cp) {
1487				printf(" \b");
1488				lp--;
1489			}
1490			continue;
1491		case '#':
1492			lp--;
1493			if (lp < cp)
1494				lp = cp;
1495			continue;
1496		case '@':
1497		case 'u' & 037:
1498			lp = cp;
1499			printf("%c", '\n');
1500			continue;
1501		default:
1502			*lp++ = c;
1503		}
1504	}
1505}
1506
1507/*
1508 * Convert a given name to the dev_t of the disk-like device
1509 * it refers to.
1510 */
1511dev_t
1512getdiskbyname(char *name) {
1513	char *cp;
1514	dev_t dev;
1515
1516	cp = name;
1517	if (!bcmp(cp, "/dev/", 5))
1518		cp += 5;
1519
1520	dev = NODEV;
1521	EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1522	return (dev);
1523}
1524
1525/*
1526 * Set rootdev to match (name), given that we expect it to
1527 * refer to a disk-like device.
1528 */
1529static int
1530setrootbyname(char *name)
1531{
1532	dev_t diskdev;
1533
1534	diskdev = getdiskbyname(name);
1535	if (diskdev != NODEV) {
1536		rootdev = diskdev;
1537		return (0);
1538	}
1539
1540	return (1);
1541}
1542
1543/* Show the dev_t for a disk specified by name */
1544#ifdef DDB
1545DB_SHOW_COMMAND(disk, db_getdiskbyname)
1546{
1547	dev_t dev;
1548
1549	if (modif[0] == '\0') {
1550		db_error("usage: show disk/devicename");
1551		return;
1552	}
1553	dev = getdiskbyname(modif);
1554	if (dev != NODEV)
1555		db_printf("dev_t = %p\n", dev);
1556	else
1557		db_printf("No disk device matched.\n");
1558}
1559#endif
1560
1561/*
1562 * Get a mount option by its name.
1563 *
1564 * Return 0 if the option was found, ENOENT otherwise.
1565 * If len is non-NULL it will be filled with the length
1566 * of the option. If buf is non-NULL, it will be filled
1567 * with the address of the option.
1568 */
1569int
1570vfs_getopt(opts, name, buf, len)
1571	struct vfsoptlist *opts;
1572	const char *name;
1573	void **buf;
1574	int *len;
1575{
1576	struct vfsopt *opt;
1577
1578	TAILQ_FOREACH(opt, opts, link) {
1579		if (strcmp(name, opt->name) == 0) {
1580			if (len != NULL)
1581				*len = opt->len;
1582			if (buf != NULL)
1583				*buf = opt->value;
1584			return (0);
1585		}
1586	}
1587	return (ENOENT);
1588}
1589
1590/*
1591 * Find and copy a mount option.
1592 *
1593 * The size of the buffer has to be specified
1594 * in len, if it is not the same length as the
1595 * mount option, EINVAL is returned.
1596 * Returns ENOENT if the option is not found.
1597 */
1598int
1599vfs_copyopt(opts, name, dest, len)
1600	struct vfsoptlist *opts;
1601	const char *name;
1602	void *dest;
1603	int len;
1604{
1605	struct vfsopt *opt;
1606
1607	TAILQ_FOREACH(opt, opts, link) {
1608		if (strcmp(name, opt->name) == 0) {
1609			if (len != opt->len)
1610				return (EINVAL);
1611			bcopy(opt->value, dest, opt->len);
1612			return (0);
1613		}
1614	}
1615	return (ENOENT);
1616}
1617