vfs_mount.c revision 153541
1219820Sjeff/*- 2219820Sjeff * Copyright (c) 1999-2004 Poul-Henning Kamp 3219820Sjeff * Copyright (c) 1999 Michael Smith 4219820Sjeff * Copyright (c) 1989, 1993 5219820Sjeff * The Regents of the University of California. All rights reserved. 6219820Sjeff * (c) UNIX System Laboratories, Inc. 7219820Sjeff * All or some portions of this file are derived from material licensed 8219820Sjeff * to the University of California by American Telephone and Telegraph 9219820Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10219820Sjeff * the permission of UNIX System Laboratories, Inc. 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or without 13219820Sjeff * modification, are permitted provided that the following conditions 14219820Sjeff * are met: 15219820Sjeff * 1. Redistributions of source code must retain the above copyright 16219820Sjeff * notice, this list of conditions and the following disclaimer. 17219820Sjeff * 2. Redistributions in binary form must reproduce the above copyright 18219820Sjeff * notice, this list of conditions and the following disclaimer in the 19219820Sjeff * documentation and/or other materials provided with the distribution. 20219820Sjeff * 4. Neither the name of the University nor the names of its contributors 21219820Sjeff * may be used to endorse or promote products derived from this software 22219820Sjeff * without specific prior written permission. 23219820Sjeff * 24219820Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25219820Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26219820Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27219820Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28219820Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29219820Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30219820Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31219820Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32219820Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33219820Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34219820Sjeff * SUCH DAMAGE. 35219820Sjeff */ 36219820Sjeff 37219820Sjeff#include <sys/cdefs.h> 38219820Sjeff__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 153541 2005-12-19 16:27:13Z pjd $"); 39219820Sjeff 40219820Sjeff#include <sys/param.h> 41219820Sjeff#include <sys/conf.h> 42219820Sjeff#include <sys/jail.h> 43219820Sjeff#include <sys/kernel.h> 44219820Sjeff#include <sys/libkern.h> 45219820Sjeff#include <sys/mac.h> 46253785Sjhb#include <sys/malloc.h> 47219820Sjeff#include <sys/mount.h> 48219820Sjeff#include <sys/mutex.h> 49219820Sjeff#include <sys/namei.h> 50219820Sjeff#include <sys/proc.h> 51219820Sjeff#include <sys/filedesc.h> 52219820Sjeff#include <sys/reboot.h> 53219820Sjeff#include <sys/syscallsubr.h> 54219820Sjeff#include <sys/sysproto.h> 55258280Salfred#include <sys/sx.h> 56258280Salfred#include <sys/sysctl.h> 57258280Salfred#include <sys/sysent.h> 58219820Sjeff#include <sys/systm.h> 59219820Sjeff#include <sys/vnode.h> 60219820Sjeff 61219893Sjeff#include <geom/geom.h> 62219893Sjeff 63219820Sjeff#include <machine/stdarg.h> 64219820Sjeff 65219820Sjeff#include "opt_rootdevname.h" 66219820Sjeff#include "opt_ddb.h" 67219820Sjeff#include "opt_mac.h" 68219820Sjeff 69219820Sjeff#ifdef DDB 70219820Sjeff#include <ddb/ddb.h> 71219820Sjeff#endif 72219820Sjeff 73219820Sjeff#define ROOTNAME "root_device" 74219820Sjeff#define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 75219820Sjeff 76219820Sjeffstatic int vfs_domount(struct thread *td, const char *fstype, 77219820Sjeff char *fspath, int fsflags, void *fsdata); 78219820Sjeffstatic int vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp, 79258280Salfred const char *fspath, struct thread *td, struct mount **mpp); 80258280Salfredstatic int vfs_mountroot_ask(void); 81258280Salfredstatic int vfs_mountroot_try(const char *mountfrom); 82219820Sjeffstatic int vfs_donmount(struct thread *td, int fsflags, 83219820Sjeff struct uio *fsoptions); 84219820Sjeffstatic void free_mntarg(struct mntarg *ma); 85219893Sjeffstatic void vfs_mount_destroy(struct mount *, struct thread *); 86219893Sjeffstatic int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); 87219820Sjeff 88219820Sjeffstatic int usermount = 0; 89219820SjeffSYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 90219820Sjeff "Unprivileged users may mount and unmount file systems"); 91219820Sjeff 92219820SjeffMALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 93219820Sjeff 94219820Sjeff/* List of mounted filesystems. */ 95219820Sjeffstruct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 96219820Sjeff 97219820Sjeff/* For any iteration/modification of mountlist */ 98219820Sjeffstruct mtx mountlist_mtx; 99219820SjeffMTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 100219820Sjeff 101219820SjeffTAILQ_HEAD(vfsoptlist, vfsopt); 102219820Sjeffstruct vfsopt { 103219820Sjeff TAILQ_ENTRY(vfsopt) link; 104219820Sjeff char *name; 105219820Sjeff void *value; 106219820Sjeff int len; 107219820Sjeff}; 108219820Sjeff 109219820Sjeff/* 110219820Sjeff * The vnode of the system's root (/ in the filesystem, without chroot 111241844Seadler * active.) 112219820Sjeff */ 113219820Sjeffstruct vnode *rootvnode; 114219820Sjeff 115219820Sjeff/* 116219820Sjeff * The root filesystem is detailed in the kernel environment variable 117219820Sjeff * vfs.root.mountfrom, which is expected to be in the general format 118219820Sjeff * 119219820Sjeff * <vfsname>:[<path>] 120219820Sjeff * vfsname := the name of a VFS known to the kernel and capable 121219820Sjeff * of being mounted as root 122219820Sjeff * path := disk device name or other data used by the filesystem 123219820Sjeff * to locate its physical store 124219820Sjeff */ 125219820Sjeff 126219820Sjeff/* 127219820Sjeff * Global opts, taken by all filesystems 128219820Sjeff */ 129219820Sjeffstatic const char *global_opts[] = { 130219820Sjeff "errmsg", 131219820Sjeff "fstype", 132219820Sjeff "fspath", 133219820Sjeff "rdonly", 134219820Sjeff "ro", 135219820Sjeff "rw", 136219820Sjeff "suid", 137219820Sjeff "exec", 138219820Sjeff NULL 139219820Sjeff}; 140219820Sjeff 141219820Sjeff/* 142219820Sjeff * The root specifiers we will try if RB_CDROM is specified. 143219820Sjeff */ 144219820Sjeffstatic char *cdrom_rootdevnames[] = { 145219820Sjeff "cd9660:cd0", 146219820Sjeff "cd9660:acd0", 147219820Sjeff NULL 148219820Sjeff}; 149219820Sjeff 150219820Sjeff/* legacy find-root code */ 151219820Sjeffchar *rootdevnames[2] = {NULL, NULL}; 152219820Sjeff#ifndef ROOTDEVNAME 153219820Sjeff# define ROOTDEVNAME NULL 154219820Sjeff#endif 155219820Sjeffstatic const char *ctrootdevname = ROOTDEVNAME; 156259616Salfred 157259616Salfred/* 158259616Salfred * --------------------------------------------------------------------- 159259616Salfred * Functions for building and sanitizing the mount options 160259616Salfred */ 161259616Salfred 162219820Sjeff/* Remove one mount option. */ 163259616Salfredstatic void 164259616Salfredvfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 165259616Salfred{ 166259616Salfred 167259616Salfred TAILQ_REMOVE(opts, opt, link); 168259616Salfred free(opt->name, M_MOUNT); 169259616Salfred if (opt->value != NULL) 170219820Sjeff free(opt->value, M_MOUNT); 171219820Sjeff#ifdef INVARIANTS 172219820Sjeff else if (opt->len != 0) 173219820Sjeff panic("%s: mount option with NULL value but length != 0", 174219820Sjeff __func__); 175219820Sjeff#endif 176219820Sjeff free(opt, M_MOUNT); 177219820Sjeff} 178219820Sjeff 179219820Sjeff/* Release all resources related to the mount options. */ 180219820Sjeffstatic void 181219820Sjeffvfs_freeopts(struct vfsoptlist *opts) 182219820Sjeff{ 183219820Sjeff struct vfsopt *opt; 184219820Sjeff 185219820Sjeff while (!TAILQ_EMPTY(opts)) { 186219820Sjeff opt = TAILQ_FIRST(opts); 187219820Sjeff vfs_freeopt(opts, opt); 188219820Sjeff } 189219820Sjeff free(opts, M_MOUNT); 190219820Sjeff} 191219820Sjeff 192219820Sjeff/* 193219820Sjeff * Check if options are equal (with or without the "no" prefix). 194219820Sjeff */ 195219820Sjeffstatic int 196219820Sjeffvfs_equalopts(const char *opt1, const char *opt2) 197219820Sjeff{ 198219820Sjeff 199219820Sjeff /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 200219820Sjeff if (strcmp(opt1, opt2) == 0) 201219820Sjeff return (1); 202219820Sjeff /* "noopt" vs. "opt" */ 203219820Sjeff if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 204219820Sjeff return (1); 205219820Sjeff /* "opt" vs. "noopt" */ 206219820Sjeff if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 207219820Sjeff return (1); 208219820Sjeff return (0); 209219820Sjeff} 210219820Sjeff 211219820Sjeff/* 212219820Sjeff * If a mount option is specified several times, 213219820Sjeff * (with or without the "no" prefix) only keep 214219820Sjeff * the last occurence of it. 215219820Sjeff */ 216219820Sjeffstatic void 217219820Sjeffvfs_sanitizeopts(struct vfsoptlist *opts) 218219820Sjeff{ 219219820Sjeff struct vfsopt *opt, *opt2, *tmp; 220219820Sjeff 221219820Sjeff TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 222219820Sjeff opt2 = TAILQ_PREV(opt, vfsoptlist, link); 223219820Sjeff while (opt2 != NULL) { 224219820Sjeff if (vfs_equalopts(opt->name, opt2->name)) { 225219820Sjeff tmp = TAILQ_PREV(opt2, vfsoptlist, link); 226219820Sjeff vfs_freeopt(opts, opt2); 227219820Sjeff opt2 = tmp; 228219820Sjeff } else { 229219820Sjeff opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 230219820Sjeff } 231219820Sjeff } 232219820Sjeff } 233219820Sjeff} 234219820Sjeff 235219820Sjeff/* 236219820Sjeff * Build a linked list of mount options from a struct uio. 237219820Sjeff */ 238219820Sjeffstatic int 239219820Sjeffvfs_buildopts(struct uio *auio, struct vfsoptlist **options) 240219820Sjeff{ 241219820Sjeff struct vfsoptlist *opts; 242219820Sjeff struct vfsopt *opt; 243219820Sjeff size_t memused; 244219820Sjeff unsigned int i, iovcnt; 245219820Sjeff int error, namelen, optlen; 246219820Sjeff 247219820Sjeff opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 248219820Sjeff TAILQ_INIT(opts); 249219820Sjeff memused = 0; 250219820Sjeff iovcnt = auio->uio_iovcnt; 251219820Sjeff for (i = 0; i < iovcnt; i += 2) { 252219820Sjeff opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 253219820Sjeff namelen = auio->uio_iov[i].iov_len; 254219820Sjeff optlen = auio->uio_iov[i + 1].iov_len; 255219820Sjeff opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 256219820Sjeff opt->value = NULL; 257219820Sjeff opt->len = 0; 258219820Sjeff 259219820Sjeff /* 260219820Sjeff * Do this early, so jumps to "bad" will free the current 261219820Sjeff * option. 262219820Sjeff */ 263219820Sjeff TAILQ_INSERT_TAIL(opts, opt, link); 264219820Sjeff memused += sizeof(struct vfsopt) + optlen + namelen; 265219820Sjeff 266219820Sjeff /* 267219820Sjeff * Avoid consuming too much memory, and attempts to overflow 268219820Sjeff * memused. 269219820Sjeff */ 270219820Sjeff if (memused > VFS_MOUNTARG_SIZE_MAX || 271219820Sjeff optlen > VFS_MOUNTARG_SIZE_MAX || 272219820Sjeff namelen > VFS_MOUNTARG_SIZE_MAX) { 273219820Sjeff error = EINVAL; 274219820Sjeff goto bad; 275219820Sjeff } 276219820Sjeff 277219820Sjeff if (auio->uio_segflg == UIO_SYSSPACE) { 278219820Sjeff bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 279219820Sjeff } else { 280219820Sjeff error = copyin(auio->uio_iov[i].iov_base, opt->name, 281219820Sjeff namelen); 282219820Sjeff if (error) 283219820Sjeff goto bad; 284219820Sjeff } 285219820Sjeff /* Ensure names are null-terminated strings. */ 286219820Sjeff if (opt->name[namelen - 1] != '\0') { 287219820Sjeff error = EINVAL; 288219820Sjeff goto bad; 289219820Sjeff } 290219820Sjeff if (optlen != 0) { 291219820Sjeff opt->len = optlen; 292219820Sjeff opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 293219820Sjeff if (auio->uio_segflg == UIO_SYSSPACE) { 294219820Sjeff bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 295219820Sjeff optlen); 296219820Sjeff } else { 297219820Sjeff error = copyin(auio->uio_iov[i + 1].iov_base, 298219859Sjeff opt->value, optlen); 299219820Sjeff if (error) 300219820Sjeff goto bad; 301219820Sjeff } 302219820Sjeff } 303219820Sjeff } 304219820Sjeff vfs_sanitizeopts(opts); 305219820Sjeff *options = opts; 306219820Sjeff return (0); 307219820Sjeffbad: 308219820Sjeff vfs_freeopts(opts); 309219820Sjeff return (error); 310219820Sjeff} 311219820Sjeff 312219820Sjeff/* 313219820Sjeff * Merge the old mount options with the new ones passed 314219820Sjeff * in the MNT_UPDATE case. 315219820Sjeff */ 316219820Sjeffstatic void 317219820Sjeffvfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) 318219820Sjeff{ 319219820Sjeff struct vfsopt *opt, *opt2, *new; 320219820Sjeff 321219820Sjeff TAILQ_FOREACH(opt, opts, link) { 322219820Sjeff /* 323219820Sjeff * Check that this option hasn't been redefined 324219820Sjeff * nor cancelled with a "no" mount option. 325219820Sjeff */ 326219820Sjeff opt2 = TAILQ_FIRST(toopts); 327219820Sjeff while (opt2 != NULL) { 328219820Sjeff if (strcmp(opt2->name, opt->name) == 0) 329219820Sjeff goto next; 330219820Sjeff if (strncmp(opt2->name, "no", 2) == 0 && 331219820Sjeff strcmp(opt2->name + 2, opt->name) == 0) { 332219820Sjeff vfs_freeopt(toopts, opt2); 333219820Sjeff goto next; 334219820Sjeff } 335219820Sjeff opt2 = TAILQ_NEXT(opt2, link); 336219820Sjeff } 337219820Sjeff /* We want this option, duplicate it. */ 338219820Sjeff new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 339219820Sjeff new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); 340257867Salfred strcpy(new->name, opt->name); 341257867Salfred if (opt->len != 0) { 342257867Salfred new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 343219820Sjeff bcopy(opt->value, new->value, opt->len); 344219820Sjeff } else { 345219820Sjeff new->value = NULL; 346219820Sjeff } 347219820Sjeff new->len = opt->len; 348219820Sjeff TAILQ_INSERT_TAIL(toopts, new, link); 349219820Sjeffnext: 350219820Sjeff continue; 351219820Sjeff } 352219820Sjeff} 353219820Sjeff 354219820Sjeff/* 355219820Sjeff * --------------------------------------------------------------------- 356219820Sjeff * Mount a filesystem 357219820Sjeff */ 358219820Sjeffint 359219820Sjeffnmount(td, uap) 360219820Sjeff struct thread *td; 361219820Sjeff struct nmount_args /* { 362219820Sjeff struct iovec *iovp; 363219820Sjeff unsigned int iovcnt; 364219820Sjeff int flags; 365219820Sjeff } */ *uap; 366219820Sjeff{ 367219820Sjeff struct uio *auio; 368219820Sjeff struct iovec *iov; 369219820Sjeff unsigned int i; 370219820Sjeff int error; 371219820Sjeff u_int iovcnt; 372219820Sjeff 373257867Salfred /* Kick out MNT_ROOTFS early as it is legal internally */ 374219820Sjeff if (uap->flags & MNT_ROOTFS) 375219820Sjeff return (EINVAL); 376219820Sjeff 377257867Salfred iovcnt = uap->iovcnt; 378257867Salfred /* 379257867Salfred * Check that we have an even number of iovec's 380257867Salfred * and that we have at least two options. 381257867Salfred */ 382219820Sjeff if ((iovcnt & 1) || (iovcnt < 4)) 383257867Salfred return (EINVAL); 384257867Salfred 385257867Salfred error = copyinuio(uap->iovp, iovcnt, &auio); 386257867Salfred if (error) 387257867Salfred return (error); 388257867Salfred iov = auio->uio_iov; 389219820Sjeff for (i = 0; i < iovcnt; i++) { 390257867Salfred if (iov->iov_len > MMAXOPTIONLEN) { 391257867Salfred free(auio, M_IOV); 392257867Salfred return (EINVAL); 393257867Salfred } 394219820Sjeff iov++; 395219820Sjeff } 396219820Sjeff error = vfs_donmount(td, uap->flags, auio); 397219820Sjeff 398219820Sjeff free(auio, M_IOV); 399219820Sjeff return (error); 400219820Sjeff} 401219820Sjeff 402219820Sjeff/* 403219820Sjeff * --------------------------------------------------------------------- 404257867Salfred * Various utility functions 405257867Salfred */ 406219820Sjeff 407219820Sjeff/* 408257867Salfred * Allocate and initialize the mount point struct. 409257867Salfred */ 410257867Salfredstatic int 411219820Sjeffvfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, 412219820Sjeff const char *fspath, struct thread *td, struct mount **mpp) 413257867Salfred{ 414257867Salfred struct mount *mp; 415257867Salfred 416219820Sjeff mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); 417257867Salfred TAILQ_INIT(&mp->mnt_nvnodelist); 418257867Salfred mp->mnt_nvnodelistsize = 0; 419219820Sjeff mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 420219820Sjeff lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 421219820Sjeff vfs_busy(mp, LK_NOWAIT, 0, td); 422219820Sjeff mp->mnt_op = vfsp->vfc_vfsops; 423219820Sjeff mp->mnt_vfc = vfsp; 424219820Sjeff vfsp->vfc_refcount++; 425219820Sjeff mp->mnt_stat.f_type = vfsp->vfc_typenum; 426219820Sjeff mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 427219820Sjeff strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 428219820Sjeff mp->mnt_vnodecovered = vp; 429219820Sjeff mp->mnt_cred = crdup(td->td_ucred); 430219820Sjeff mp->mnt_stat.f_owner = td->td_ucred->cr_uid; 431219820Sjeff strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 432219820Sjeff mp->mnt_iosize_max = DFLTPHYS; 433219820Sjeff#ifdef MAC 434219820Sjeff mac_init_mount(mp); 435219820Sjeff mac_create_mount(td->td_ucred, mp); 436219820Sjeff#endif 437219820Sjeff arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 438219820Sjeff *mpp = mp; 439219820Sjeff return (0); 440219820Sjeff} 441219820Sjeff 442219820Sjeff/* 443219820Sjeff * Destroy the mount struct previously allocated by vfs_mount_alloc(). 444219820Sjeff */ 445219820Sjeffstatic void 446219820Sjeffvfs_mount_destroy(struct mount *mp, struct thread *td) 447219820Sjeff{ 448219820Sjeff 449219820Sjeff mp->mnt_vfc->vfc_refcount--; 450219820Sjeff if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 451219820Sjeff panic("unmount: dangling vnode"); 452219820Sjeff vfs_unbusy(mp,td); 453219820Sjeff lockdestroy(&mp->mnt_lock); 454219820Sjeff MNT_ILOCK(mp); 455219820Sjeff if (mp->mnt_kern_flag & MNTK_MWAIT) 456219820Sjeff wakeup(mp); 457219820Sjeff MNT_IUNLOCK(mp); 458219820Sjeff mtx_destroy(&mp->mnt_mtx); 459219820Sjeff#ifdef MAC 460219820Sjeff mac_destroy_mount(mp); 461219820Sjeff#endif 462219820Sjeff if (mp->mnt_opt != NULL) 463219820Sjeff vfs_freeopts(mp->mnt_opt); 464219820Sjeff crfree(mp->mnt_cred); 465219820Sjeff free(mp, M_MOUNT); 466219820Sjeff} 467219820Sjeff 468219820Sjeffstatic int 469219820Sjeffvfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 470219820Sjeff{ 471219820Sjeff struct vfsoptlist *optlist; 472219820Sjeff char *fstype, *fspath, *errmsg; 473219820Sjeff int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 474219820Sjeff 475219820Sjeff errmsg_len = 0; 476219820Sjeff errmsg_pos = -1; 477219820Sjeff 478219820Sjeff error = vfs_buildopts(fsoptions, &optlist); 479219820Sjeff if (error) 480219820Sjeff return (error); 481219820Sjeff 482219820Sjeff if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 483219820Sjeff errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 484219820Sjeff else 485219820Sjeff errmsg_len = 0; 486219820Sjeff 487219820Sjeff /* 488219820Sjeff * We need these two options before the others, 489259616Salfred * and they are mandatory for any filesystem. 490219820Sjeff * Ensure they are NUL terminated as well. 491219820Sjeff */ 492219820Sjeff fstypelen = 0; 493219820Sjeff error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 494219820Sjeff if (error || fstype[fstypelen - 1] != '\0') { 495219820Sjeff error = EINVAL; 496219820Sjeff if (errmsg != NULL) 497219820Sjeff strncpy(errmsg, "Invalid fstype", errmsg_len); 498219820Sjeff goto bail; 499219820Sjeff } 500219820Sjeff fspathlen = 0; 501219820Sjeff error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 502219820Sjeff if (error || fspath[fspathlen - 1] != '\0') { 503219820Sjeff error = EINVAL; 504219820Sjeff if (errmsg != NULL) 505219820Sjeff strncpy(errmsg, "Invalid fspath", errmsg_len); 506219820Sjeff goto bail; 507219820Sjeff } 508219820Sjeff 509219820Sjeff /* 510219820Sjeff * We need to see if we have the "update" option 511219820Sjeff * before we call vfs_domount(), since vfs_domount() has special 512219820Sjeff * logic based on MNT_UPDATE. This is very important 513219820Sjeff * when we want to update the root filesystem. 514219820Sjeff */ 515259616Salfred if (vfs_getopt(optlist, "update", NULL, NULL) == 0) 516259616Salfred fsflags |= MNT_UPDATE; 517259616Salfred 518259616Salfred if (vfs_getopt(optlist, "async", NULL, NULL) == 0) 519259616Salfred fsflags |= MNT_ASYNC; 520259616Salfred 521259616Salfred if (vfs_getopt(optlist, "force", NULL, NULL) == 0) 522259616Salfred fsflags |= MNT_FORCE; 523259616Salfred 524259616Salfred if (vfs_getopt(optlist, "multilabel", NULL, NULL) == 0) 525259616Salfred fsflags |= MNT_MULTILABEL; 526219820Sjeff 527259616Salfred if (vfs_getopt(optlist, "noasync", NULL, NULL) == 0) 528219820Sjeff fsflags &= ~MNT_ASYNC; 529259616Salfred 530259616Salfred if (vfs_getopt(optlist, "noatime", NULL, NULL) == 0) 531259616Salfred fsflags |= MNT_NOATIME; 532259616Salfred 533259616Salfred if (vfs_getopt(optlist, "noclusterr", NULL, NULL) == 0) 534259616Salfred fsflags |= MNT_NOCLUSTERR; 535259616Salfred 536259616Salfred if (vfs_getopt(optlist, "noclusterw", NULL, NULL) == 0) 537259616Salfred fsflags |= MNT_NOCLUSTERW; 538259616Salfred 539259616Salfred if (vfs_getopt(optlist, "noexec", NULL, NULL) == 0) 540259616Salfred fsflags |= MNT_NOEXEC; 541219820Sjeff 542219820Sjeff if (vfs_getopt(optlist, "nosuid", NULL, NULL) == 0) 543219820Sjeff fsflags |= MNT_NOSUID; 544219820Sjeff 545220016Sjeff if (vfs_getopt(optlist, "nosymfollow", NULL, NULL) == 0) 546219820Sjeff fsflags |= MNT_NOSYMFOLLOW; 547219820Sjeff 548219820Sjeff if (vfs_getopt(optlist, "noro", NULL, NULL) == 0) 549219820Sjeff fsflags &= ~MNT_RDONLY; 550219820Sjeff 551219820Sjeff if (vfs_getopt(optlist, "ro", NULL, NULL) == 0) 552219820Sjeff fsflags |= MNT_RDONLY; 553219820Sjeff 554219820Sjeff if (vfs_getopt(optlist, "rdonly", NULL, NULL) == 0) 555219820Sjeff fsflags |= MNT_RDONLY; 556219820Sjeff 557219820Sjeff if (vfs_getopt(optlist, "rw", NULL, NULL) == 0) 558219820Sjeff fsflags &= ~MNT_RDONLY; 559219820Sjeff 560219820Sjeff if (vfs_getopt(optlist, "snapshot", NULL, NULL) == 0) 561219820Sjeff fsflags |= MNT_SNAPSHOT; 562219820Sjeff 563219820Sjeff if (vfs_getopt(optlist, "suiddir", NULL, NULL) == 0) 564219820Sjeff fsflags |= MNT_SUIDDIR; 565219820Sjeff 566219820Sjeff if (vfs_getopt(optlist, "sync", NULL, NULL) == 0) 567219820Sjeff fsflags |= MNT_SYNCHRONOUS; 568219820Sjeff 569219820Sjeff if (vfs_getopt(optlist, "union", NULL, NULL) == 0) 570219820Sjeff fsflags |= MNT_UNION; 571219820Sjeff 572219820Sjeff /* 573219820Sjeff * Be ultra-paranoid about making sure the type and fspath 574219820Sjeff * variables will fit in our mp buffers, including the 575219820Sjeff * terminating NUL. 576219820Sjeff */ 577219820Sjeff if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 578219820Sjeff error = ENAMETOOLONG; 579219820Sjeff goto bail; 580219820Sjeff } 581219820Sjeff 582219820Sjeff mtx_lock(&Giant); 583219820Sjeff error = vfs_domount(td, fstype, fspath, fsflags, optlist); 584219820Sjeff mtx_unlock(&Giant); 585219820Sjeffbail: 586219820Sjeff /* copyout the errmsg */ 587219820Sjeff if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 588219820Sjeff && errmsg_len > 0 && errmsg != NULL) { 589219820Sjeff if (fsoptions->uio_segflg == UIO_SYSSPACE) { 590219820Sjeff strncpy(fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 591219820Sjeff errmsg, 592219820Sjeff fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 593219820Sjeff } else { 594219820Sjeff copystr(errmsg, 595219820Sjeff fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 596219820Sjeff fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len, 597219820Sjeff NULL); 598219820Sjeff } 599219820Sjeff } 600219820Sjeff 601219820Sjeff if (error != 0) 602219820Sjeff vfs_freeopts(optlist); 603219820Sjeff return (error); 604219820Sjeff} 605219820Sjeff 606219820Sjeff/* 607219820Sjeff * --------------------------------------------------------------------- 608219820Sjeff * Old mount API. 609219820Sjeff */ 610219820Sjeff#ifndef _SYS_SYSPROTO_H_ 611219820Sjeffstruct mount_args { 612219820Sjeff char *type; 613219820Sjeff char *path; 614219820Sjeff int flags; 615219820Sjeff caddr_t data; 616219820Sjeff}; 617219820Sjeff#endif 618219820Sjeff/* ARGSUSED */ 619219820Sjeffint 620219820Sjeffmount(td, uap) 621219820Sjeff struct thread *td; 622219820Sjeff struct mount_args /* { 623219820Sjeff char *type; 624219820Sjeff char *path; 625219820Sjeff int flags; 626219820Sjeff caddr_t data; 627219820Sjeff } */ *uap; 628219820Sjeff{ 629219820Sjeff char *fstype; 630219820Sjeff struct vfsconf *vfsp = NULL; 631219820Sjeff struct mntarg *ma = NULL; 632219820Sjeff int error; 633219820Sjeff 634219820Sjeff /* Kick out MNT_ROOTFS early as it is legal internally */ 635219820Sjeff uap->flags &= ~MNT_ROOTFS; 636219820Sjeff 637219820Sjeff if (uap->data == NULL) 638219820Sjeff return (EINVAL); 639219820Sjeff 640219820Sjeff fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 641219820Sjeff error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 642219820Sjeff if (!error) { 643219820Sjeff mtx_lock(&Giant); /* XXX ? */ 644219820Sjeff vfsp = vfs_byname_kld(fstype, td, &error); 645219820Sjeff mtx_unlock(&Giant); 646219820Sjeff } 647219820Sjeff free(fstype, M_TEMP); 648219820Sjeff if (error) 649255932Salfred return (error); 650258242Salfred if (vfsp == NULL) 651258242Salfred return (ENOENT); 652219820Sjeff if (vfsp->vfc_vfsops->vfs_cmount == NULL) 653219820Sjeff return (EOPNOTSUPP); 654219820Sjeff 655219820Sjeff ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 656219820Sjeff ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 657219820Sjeff ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 658219820Sjeff ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 659219820Sjeff ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 660219820Sjeff 661219820Sjeff error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td); 662219820Sjeff return (error); 663219820Sjeff} 664219820Sjeff 665219820Sjeff 666219820Sjeff/* 667219820Sjeff * vfs_domount(): actually attempt a filesystem mount. 668219820Sjeff */ 669219820Sjeffstatic int 670219820Sjeffvfs_domount( 671219820Sjeff struct thread *td, /* Flags common to all filesystems. */ 672219820Sjeff const char *fstype, /* Filesystem type. */ 673219820Sjeff char *fspath, /* Mount path. */ 674219820Sjeff int fsflags, /* Flags common to all filesystems. */ 675220016Sjeff void *fsdata /* Options local to the filesystem. */ 676220016Sjeff ) 677220016Sjeff{ 678220016Sjeff struct vnode *vp; 679220016Sjeff struct mount *mp; 680220016Sjeff struct vfsconf *vfsp; 681220016Sjeff int error, flag = 0, kern_flag = 0; 682220016Sjeff struct vattr va; 683220016Sjeff struct nameidata nd; 684220016Sjeff 685220016Sjeff mtx_assert(&Giant, MA_OWNED); 686220016Sjeff 687220016Sjeff /* 688220016Sjeff * Be ultra-paranoid about making sure the type and fspath 689220016Sjeff * variables will fit in our mp buffers, including the 690220016Sjeff * terminating NUL. 691220016Sjeff */ 692220016Sjeff if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 693220016Sjeff return (ENAMETOOLONG); 694219820Sjeff 695219820Sjeff if (jailed(td->td_ucred)) 696219820Sjeff return (EPERM); 697219820Sjeff if (usermount == 0) { 698219820Sjeff if ((error = suser(td)) != 0) 699219820Sjeff return (error); 700253774Sjhb } 701253774Sjhb 702219820Sjeff /* 703219820Sjeff * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 704219820Sjeff */ 705219820Sjeff if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) { 706219820Sjeff if ((error = suser(td)) != 0) 707219820Sjeff return (error); 708220016Sjeff } 709220016Sjeff /* 710220016Sjeff * Silently enforce MNT_NOSUID and MNT_USER for 711220016Sjeff * unprivileged users. 712219820Sjeff */ 713255932Salfred if (suser(td) != 0) 714219820Sjeff fsflags |= MNT_NOSUID | MNT_USER; 715219820Sjeff /* 716219820Sjeff * Get vnode to be covered 717219820Sjeff */ 718219820Sjeff NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); 719219820Sjeff if ((error = namei(&nd)) != 0) 720219820Sjeff return (error); 721219820Sjeff NDFREE(&nd, NDF_ONLY_PNBUF); 722219820Sjeff vp = nd.ni_vp; 723219820Sjeff if (fsflags & MNT_UPDATE) { 724219820Sjeff if ((vp->v_vflag & VV_ROOT) == 0) { 725219820Sjeff vput(vp); 726219820Sjeff return (EINVAL); 727219820Sjeff } 728219820Sjeff mp = vp->v_mount; 729219820Sjeff flag = mp->mnt_flag; 730219820Sjeff kern_flag = mp->mnt_kern_flag; 731259616Salfred /* 732219820Sjeff * We only allow the filesystem to be reloaded if it 733219820Sjeff * is currently mounted read-only. 734219820Sjeff */ 735219820Sjeff if ((fsflags & MNT_RELOAD) && 736219820Sjeff ((mp->mnt_flag & MNT_RDONLY) == 0)) { 737219820Sjeff vput(vp); 738219820Sjeff return (EOPNOTSUPP); /* Needs translation */ 739219820Sjeff } 740219820Sjeff /* 741219820Sjeff * Only privileged root, or (if MNT_USER is set) the user that 742219820Sjeff * did the original mount is permitted to update it. 743219820Sjeff */ 744219820Sjeff error = vfs_suser(mp, td); 745219820Sjeff if (error) { 746255932Salfred vput(vp); 747219820Sjeff return (error); 748219820Sjeff } 749219820Sjeff if (vfs_busy(mp, LK_NOWAIT, 0, td)) { 750219820Sjeff vput(vp); 751219820Sjeff return (EBUSY); 752219820Sjeff } 753219820Sjeff VI_LOCK(vp); 754219820Sjeff if ((vp->v_iflag & VI_MOUNT) != 0 || 755219820Sjeff vp->v_mountedhere != NULL) { 756219820Sjeff VI_UNLOCK(vp); 757219820Sjeff vfs_unbusy(mp, td); 758219820Sjeff vput(vp); 759219820Sjeff return (EBUSY); 760219820Sjeff } 761219820Sjeff vp->v_iflag |= VI_MOUNT; 762219820Sjeff VI_UNLOCK(vp); 763219820Sjeff mp->mnt_flag |= fsflags & 764219820Sjeff (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS); 765219820Sjeff VOP_UNLOCK(vp, 0, td); 766219820Sjeff mp->mnt_optnew = fsdata; 767219820Sjeff vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 768219820Sjeff } else { 769219820Sjeff /* 770219820Sjeff * If the user is not root, ensure that they own the directory 771219820Sjeff * onto which we are attempting to mount. 772219820Sjeff */ 773253774Sjhb error = VOP_GETATTR(vp, &va, td->td_ucred, td); 774219820Sjeff if (error) { 775219820Sjeff vput(vp); 776219820Sjeff return (error); 777219820Sjeff } 778219820Sjeff if (va.va_uid != td->td_ucred->cr_uid) { 779219820Sjeff if ((error = suser(td)) != 0) { 780219820Sjeff vput(vp); 781219820Sjeff return (error); 782219820Sjeff } 783219820Sjeff } 784219820Sjeff error = vinvalbuf(vp, V_SAVE, td, 0, 0); 785219820Sjeff if (error != 0) { 786219820Sjeff vput(vp); 787219820Sjeff return (error); 788219820Sjeff } 789219820Sjeff if (vp->v_type != VDIR) { 790219820Sjeff vput(vp); 791219820Sjeff return (ENOTDIR); 792219820Sjeff } 793219820Sjeff vfsp = vfs_byname_kld(fstype, td, &error); 794219820Sjeff if (vfsp == NULL) { 795219820Sjeff vput(vp); 796219820Sjeff return (error); 797219820Sjeff } 798219820Sjeff VI_LOCK(vp); 799219820Sjeff if ((vp->v_iflag & VI_MOUNT) != 0 || 800219820Sjeff vp->v_mountedhere != NULL) { 801259616Salfred VI_UNLOCK(vp); 802259616Salfred vput(vp); 803219820Sjeff return (EBUSY); 804219820Sjeff } 805219820Sjeff vp->v_iflag |= VI_MOUNT; 806219820Sjeff VI_UNLOCK(vp); 807219820Sjeff 808219820Sjeff /* 809219820Sjeff * Allocate and initialize the filesystem. 810219820Sjeff */ 811219820Sjeff error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp); 812219820Sjeff if (error) { 813219820Sjeff vput(vp); 814253774Sjhb return (error); 815253774Sjhb } 816253774Sjhb VOP_UNLOCK(vp, 0, td); 817253774Sjhb 818253774Sjhb /* XXXMAC: pass to vfs_mount_alloc? */ 819253774Sjhb mp->mnt_optnew = fsdata; 820253774Sjhb } 821253774Sjhb 822253774Sjhb /* 823253774Sjhb * Set the mount level flags. 824253774Sjhb */ 825253774Sjhb if (fsflags & MNT_RDONLY) 826253774Sjhb mp->mnt_flag |= MNT_RDONLY; 827219820Sjeff mp->mnt_flag &=~ MNT_UPDATEMASK; 828219820Sjeff mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS); 829219820Sjeff /* 830219820Sjeff * Mount the filesystem. 831219820Sjeff * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 832219820Sjeff * get. No freeing of cn_pnbuf. 833259616Salfred */ 834219820Sjeff error = VFS_MOUNT(mp, td); 835219820Sjeff if (!error) { 836219820Sjeff if (mp->mnt_opt != NULL) 837253774Sjhb vfs_freeopts(mp->mnt_opt); 838219820Sjeff mp->mnt_opt = mp->mnt_optnew; 839219820Sjeff VFS_STATFS(mp, &mp->mnt_stat, td); 840219820Sjeff } 841219820Sjeff /* 842219820Sjeff * Prevent external consumers of mount options from reading 843219820Sjeff * mnt_optnew. 844219820Sjeff */ 845219820Sjeff mp->mnt_optnew = NULL; 846219820Sjeff if (mp->mnt_flag & MNT_UPDATE) { 847219820Sjeff mp->mnt_flag &= 848219820Sjeff ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); 849219820Sjeff if (error) { 850219820Sjeff mp->mnt_flag = flag; 851219820Sjeff mp->mnt_kern_flag = kern_flag; 852219820Sjeff } 853219820Sjeff if ((mp->mnt_flag & MNT_RDONLY) == 0) { 854219820Sjeff if (mp->mnt_syncer == NULL) 855219820Sjeff error = vfs_allocate_syncvnode(mp); 856259616Salfred } else { 857219820Sjeff if (mp->mnt_syncer != NULL) 858219820Sjeff vrele(mp->mnt_syncer); 859219820Sjeff mp->mnt_syncer = NULL; 860219820Sjeff } 861219820Sjeff vfs_unbusy(mp, td); 862219820Sjeff VI_LOCK(vp); 863219820Sjeff vp->v_iflag &= ~VI_MOUNT; 864219820Sjeff VI_UNLOCK(vp); 865219820Sjeff vrele(vp); 866219820Sjeff return (error); 867219820Sjeff } 868219820Sjeff vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 869219820Sjeff /* 870219820Sjeff * Put the new filesystem on the mount list after root. 871219820Sjeff */ 872219820Sjeff cache_purge(vp); 873219820Sjeff if (!error) { 874219820Sjeff struct vnode *newdp; 875219820Sjeff 876219820Sjeff VI_LOCK(vp); 877219820Sjeff vp->v_iflag &= ~VI_MOUNT; 878219820Sjeff VI_UNLOCK(vp); 879219820Sjeff vp->v_mountedhere = mp; 880219820Sjeff mtx_lock(&mountlist_mtx); 881219820Sjeff TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 882219820Sjeff mtx_unlock(&mountlist_mtx); 883219820Sjeff vfs_event_signal(NULL, VQ_MOUNT, 0); 884219820Sjeff if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td)) 885219820Sjeff panic("mount: lost mount"); 886219820Sjeff mountcheckdirs(vp, newdp); 887219820Sjeff vput(newdp); 888219820Sjeff VOP_UNLOCK(vp, 0, td); 889219820Sjeff if ((mp->mnt_flag & MNT_RDONLY) == 0) 890219820Sjeff error = vfs_allocate_syncvnode(mp); 891219820Sjeff vfs_unbusy(mp, td); 892219820Sjeff if (error) 893219820Sjeff vrele(vp); 894219820Sjeff } else { 895219820Sjeff VI_LOCK(vp); 896219820Sjeff vp->v_iflag &= ~VI_MOUNT; 897219820Sjeff VI_UNLOCK(vp); 898219820Sjeff vfs_mount_destroy(mp, td); 899219820Sjeff vput(vp); 900219820Sjeff } 901219820Sjeff return (error); 902219820Sjeff} 903219820Sjeff 904219820Sjeff/* 905219820Sjeff * --------------------------------------------------------------------- 906219820Sjeff * Unmount a filesystem. 907219820Sjeff * 908219820Sjeff * Note: unmount takes a path to the vnode mounted on as argument, 909219820Sjeff * not special file (as before). 910219820Sjeff */ 911219820Sjeff#ifndef _SYS_SYSPROTO_H_ 912219820Sjeffstruct unmount_args { 913219820Sjeff char *path; 914219820Sjeff int flags; 915219859Sjeff}; 916219820Sjeff#endif 917219820Sjeff/* ARGSUSED */ 918219820Sjeffint 919219820Sjeffunmount(td, uap) 920219820Sjeff struct thread *td; 921219820Sjeff register struct unmount_args /* { 922219820Sjeff char *path; 923219820Sjeff int flags; 924219820Sjeff } */ *uap; 925219820Sjeff{ 926219820Sjeff struct mount *mp; 927219820Sjeff char *pathbuf; 928219820Sjeff int error, id0, id1; 929219820Sjeff 930219820Sjeff if (jailed(td->td_ucred)) 931219820Sjeff return (EPERM); 932219820Sjeff if (usermount == 0) { 933219820Sjeff if ((error = suser(td)) != 0) 934219820Sjeff return (error); 935219820Sjeff } 936219820Sjeff 937219820Sjeff pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 938219820Sjeff error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 939219820Sjeff if (error) { 940219820Sjeff free(pathbuf, M_TEMP); 941219820Sjeff return (error); 942219820Sjeff } 943219820Sjeff if (uap->flags & MNT_BYFSID) { 944253774Sjhb /* Decode the filesystem ID. */ 945259616Salfred if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 946253774Sjhb free(pathbuf, M_TEMP); 947253774Sjhb return (EINVAL); 948219820Sjeff } 949219820Sjeff 950219820Sjeff mtx_lock(&mountlist_mtx); 951253774Sjhb TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 952219820Sjeff if (mp->mnt_stat.f_fsid.val[0] == id0 && 953219820Sjeff mp->mnt_stat.f_fsid.val[1] == id1) 954219820Sjeff break; 955219820Sjeff } 956219820Sjeff mtx_unlock(&mountlist_mtx); 957219820Sjeff } else { 958219820Sjeff mtx_lock(&mountlist_mtx); 959255932Salfred TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 960256810Salfred if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 961256810Salfred break; 962256810Salfred } 963219820Sjeff mtx_unlock(&mountlist_mtx); 964219820Sjeff } 965219820Sjeff free(pathbuf, M_TEMP); 966219820Sjeff if (mp == NULL) { 967219820Sjeff /* 968219820Sjeff * Previously we returned ENOENT for a nonexistent path and 969219820Sjeff * EINVAL for a non-mountpoint. We cannot tell these apart 970219820Sjeff * now, so in the !MNT_BYFSID case return the more likely 971219820Sjeff * EINVAL for compatibility. 972219820Sjeff */ 973219820Sjeff return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 974219820Sjeff } 975219820Sjeff 976219820Sjeff /* 977219820Sjeff * Only privileged root, or (if MNT_USER is set) the user that did the 978219820Sjeff * original mount is permitted to unmount this filesystem. 979219820Sjeff */ 980219820Sjeff error = vfs_suser(mp, td); 981219820Sjeff if (error) 982219820Sjeff return (error); 983219820Sjeff 984219820Sjeff /* 985219820Sjeff * Don't allow unmounting the root filesystem. 986219820Sjeff */ 987219820Sjeff if (mp->mnt_flag & MNT_ROOTFS) 988219820Sjeff return (EINVAL); 989219820Sjeff mtx_lock(&Giant); 990259616Salfred error = dounmount(mp, uap->flags, td); 991219820Sjeff mtx_unlock(&Giant); 992259616Salfred return (error); 993219820Sjeff} 994219820Sjeff 995219820Sjeff/* 996219820Sjeff * Do the actual filesystem unmount. 997219820Sjeff */ 998219820Sjeffint 999219820Sjeffdounmount(mp, flags, td) 1000219820Sjeff struct mount *mp; 1001219820Sjeff int flags; 1002219820Sjeff struct thread *td; 1003219820Sjeff{ 1004219820Sjeff struct vnode *coveredvp, *fsrootvp; 1005219820Sjeff int error; 1006219820Sjeff int async_flag; 1007219820Sjeff 1008219820Sjeff mtx_assert(&Giant, MA_OWNED); 1009219820Sjeff 1010219820Sjeff MNT_ILOCK(mp); 1011219820Sjeff if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1012259616Salfred MNT_IUNLOCK(mp); 1013259616Salfred return (EBUSY); 1014259616Salfred } 1015259616Salfred mp->mnt_kern_flag |= MNTK_UNMOUNT; 1016259616Salfred /* Allow filesystems to detect that a forced unmount is in progress. */ 1017259616Salfred if (flags & MNT_FORCE) 1018259616Salfred mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1019219820Sjeff error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | 1020219820Sjeff ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td); 1021219820Sjeff if (error) { 1022234099Sjhb MNT_ILOCK(mp); 1023234099Sjhb mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1024219820Sjeff if (mp->mnt_kern_flag & MNTK_MWAIT) 1025234099Sjhb wakeup(mp); 1026234099Sjhb MNT_IUNLOCK(mp); 1027234099Sjhb return (error); 1028234099Sjhb } 1029234099Sjhb vn_start_write(NULL, &mp, V_WAIT); 1030234099Sjhb 1031234099Sjhb if (mp->mnt_flag & MNT_EXPUBLIC) 1032234099Sjhb vfs_setpublicfs(NULL, NULL, NULL); 1033234099Sjhb 1034234099Sjhb vfs_msync(mp, MNT_WAIT); 1035234099Sjhb async_flag = mp->mnt_flag & MNT_ASYNC; 1036219820Sjeff mp->mnt_flag &= ~MNT_ASYNC; 1037219820Sjeff cache_purgevfs(mp); /* remove cache entries for this file sys */ 1038219820Sjeff if (mp->mnt_syncer != NULL) 1039219820Sjeff vrele(mp->mnt_syncer); 1040219820Sjeff /* 1041219820Sjeff * For forced unmounts, move process cdir/rdir refs on the fs root 1042219820Sjeff * vnode to the covered vnode. For non-forced unmounts we want 1043219820Sjeff * such references to cause an EBUSY error. 1044219820Sjeff */ 1045219820Sjeff if ((flags & MNT_FORCE) && 1046219820Sjeff VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1047219820Sjeff if (mp->mnt_vnodecovered != NULL) 1048219820Sjeff mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1049219820Sjeff if (fsrootvp == rootvnode) { 1050219820Sjeff vrele(rootvnode); 1051219820Sjeff rootvnode = NULL; 1052219820Sjeff } 1053219820Sjeff vput(fsrootvp); 1054219820Sjeff } 1055219820Sjeff if (((mp->mnt_flag & MNT_RDONLY) || 1056219820Sjeff (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 1057219820Sjeff (flags & MNT_FORCE)) { 1058219820Sjeff error = VFS_UNMOUNT(mp, flags, td); 1059219820Sjeff } 1060219820Sjeff vn_finished_write(mp); 1061219820Sjeff if (error) { 1062219820Sjeff /* Undo cdir/rdir and rootvnode changes made above. */ 1063219820Sjeff if ((flags & MNT_FORCE) && 1064219820Sjeff VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1065219820Sjeff if (mp->mnt_vnodecovered != NULL) 1066219820Sjeff mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1067219820Sjeff if (rootvnode == NULL) { 1068219820Sjeff rootvnode = fsrootvp; 1069219820Sjeff vref(rootvnode); 1070219820Sjeff } 1071219820Sjeff vput(fsrootvp); 1072219820Sjeff } 1073219820Sjeff if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) 1074219820Sjeff (void) vfs_allocate_syncvnode(mp); 1075219820Sjeff MNT_ILOCK(mp); 1076219820Sjeff mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1077219820Sjeff mp->mnt_flag |= async_flag; 1078219820Sjeff lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); 1079219820Sjeff if (mp->mnt_kern_flag & MNTK_MWAIT) 1080219820Sjeff wakeup(mp); 1081219820Sjeff MNT_IUNLOCK(mp); 1082219820Sjeff return (error); 1083219820Sjeff } 1084219820Sjeff mtx_lock(&mountlist_mtx); 1085219820Sjeff TAILQ_REMOVE(&mountlist, mp, mnt_list); 1086219820Sjeff if ((coveredvp = mp->mnt_vnodecovered) != NULL) 1087219820Sjeff coveredvp->v_mountedhere = NULL; 1088219820Sjeff mtx_unlock(&mountlist_mtx); 1089219820Sjeff vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1090219820Sjeff vfs_mount_destroy(mp, td); 1091219820Sjeff if (coveredvp != NULL) 1092219820Sjeff vrele(coveredvp); 1093219820Sjeff return (0); 1094219820Sjeff} 1095219820Sjeff 1096219820Sjeff/* 1097219820Sjeff * --------------------------------------------------------------------- 1098219820Sjeff * Mounting of root filesystem 1099219820Sjeff * 1100219820Sjeff */ 1101219820Sjeff 1102219820Sjeffstruct root_hold_token { 1103219820Sjeff const char *who; 1104219820Sjeff LIST_ENTRY(root_hold_token) list; 1105219820Sjeff}; 1106219820Sjeff 1107219820Sjeffstatic LIST_HEAD(, root_hold_token) root_holds = 1108219820Sjeff LIST_HEAD_INITIALIZER(&root_holds); 1109219820Sjeff 1110219820Sjeffstruct root_hold_token * 1111219820Sjeffroot_mount_hold(const char *identifier) 1112219820Sjeff{ 1113219820Sjeff struct root_hold_token *h; 1114219820Sjeff 1115219820Sjeff h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 1116219820Sjeff h->who = identifier; 1117219820Sjeff mtx_lock(&mountlist_mtx); 1118219820Sjeff LIST_INSERT_HEAD(&root_holds, h, list); 1119219820Sjeff mtx_unlock(&mountlist_mtx); 1120219820Sjeff return (h); 1121219820Sjeff} 1122219820Sjeff 1123253774Sjhbvoid 1124219820Sjeffroot_mount_rel(struct root_hold_token *h) 1125253774Sjhb{ 1126219820Sjeff 1127219820Sjeff mtx_lock(&mountlist_mtx); 1128219820Sjeff LIST_REMOVE(h, list); 1129219820Sjeff wakeup(&root_holds); 1130259616Salfred mtx_unlock(&mountlist_mtx); 1131259616Salfred free(h, M_DEVBUF); 1132259616Salfred} 1133259616Salfred 1134259616Salfredstatic void 1135259616Salfredroot_mount_wait(void) 1136259616Salfred{ 1137259616Salfred struct root_hold_token *h; 1138259616Salfred 1139259616Salfred for (;;) { 1140259616Salfred DROP_GIANT(); 1141259616Salfred g_waitidle(); 1142219820Sjeff PICKUP_GIANT(); 1143219820Sjeff mtx_lock(&mountlist_mtx); 1144219820Sjeff if (LIST_EMPTY(&root_holds)) { 1145219820Sjeff mtx_unlock(&mountlist_mtx); 1146219820Sjeff break; 1147219820Sjeff } 1148219820Sjeff printf("Root mount waiting for:"); 1149219820Sjeff LIST_FOREACH(h, &root_holds, list) 1150219820Sjeff printf(" %s", h->who); 1151219820Sjeff printf("\n"); 1152219820Sjeff msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 1153219820Sjeff hz); 1154253774Sjhb } 1155219820Sjeff} 1156219820Sjeff 1157219820Sjeffstatic void 1158219820Sjeffset_rootvnode(struct thread *td) 1159219820Sjeff{ 1160219820Sjeff struct proc *p; 1161219820Sjeff 1162219820Sjeff if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td)) 1163219820Sjeff panic("Cannot find root vnode"); 1164219820Sjeff 1165219820Sjeff p = td->td_proc; 1166220016Sjeff FILEDESC_LOCK(p->p_fd); 1167220016Sjeff 1168219820Sjeff if (p->p_fd->fd_cdir != NULL) 1169253774Sjhb vrele(p->p_fd->fd_cdir); 1170253774Sjhb p->p_fd->fd_cdir = rootvnode; 1171219820Sjeff VREF(rootvnode); 1172219820Sjeff 1173219820Sjeff if (p->p_fd->fd_rdir != NULL) 1174219820Sjeff vrele(p->p_fd->fd_rdir); 1175219820Sjeff p->p_fd->fd_rdir = rootvnode; 1176219820Sjeff VREF(rootvnode); 1177219820Sjeff 1178219820Sjeff FILEDESC_UNLOCK(p->p_fd); 1179219820Sjeff 1180219820Sjeff VOP_UNLOCK(rootvnode, 0, td); 1181219820Sjeff} 1182219820Sjeff 1183219820Sjeff/* 1184219820Sjeff * Mount /devfs as our root filesystem, but do not put it on the mountlist 1185219820Sjeff * yet. Create a /dev -> / symlink so that absolute pathnames will lookup. 1186219820Sjeff */ 1187219820Sjeff 1188219820Sjeffstatic void 1189219820Sjeffdevfs_first(void) 1190219820Sjeff{ 1191219820Sjeff struct thread *td = curthread; 1192219820Sjeff struct vfsoptlist *opts; 1193219820Sjeff struct vfsconf *vfsp; 1194219820Sjeff struct mount *mp = NULL; 1195219820Sjeff int error; 1196219820Sjeff 1197219820Sjeff vfsp = vfs_byname("devfs"); 1198219820Sjeff KASSERT(vfsp != NULL, ("Could not find devfs by name")); 1199219820Sjeff if (vfsp == NULL) 1200219820Sjeff return; 1201219820Sjeff 1202219820Sjeff error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp); 1203219820Sjeff KASSERT(error == 0, ("vfs_mount_alloc failed %d", error)); 1204259616Salfred if (error) 1205219820Sjeff return; 1206219820Sjeff 1207219820Sjeff error = VFS_MOUNT(mp, td); 1208219820Sjeff KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 1209219820Sjeff if (error) 1210219820Sjeff return; 1211219820Sjeff 1212219820Sjeff opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 1213219820Sjeff TAILQ_INIT(opts); 1214219820Sjeff mp->mnt_opt = opts; 1215259616Salfred 1216219820Sjeff mtx_lock(&mountlist_mtx); 1217219820Sjeff TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 1218219820Sjeff mtx_unlock(&mountlist_mtx); 1219219820Sjeff 1220219820Sjeff set_rootvnode(td); 1221219820Sjeff 1222219820Sjeff error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 1223219820Sjeff if (error) 1224219820Sjeff printf("kern_symlink /dev -> / returns %d\n", error); 1225219820Sjeff} 1226219820Sjeff 1227219820Sjeff/* 1228219820Sjeff * Surgically move our devfs to be mounted on /dev. 1229219820Sjeff */ 1230219820Sjeff 1231219820Sjeffstatic void 1232219820Sjeffdevfs_fixup(struct thread *td) 1233219820Sjeff{ 1234219820Sjeff struct nameidata nd; 1235219820Sjeff int error; 1236219820Sjeff struct vnode *vp, *dvp; 1237219820Sjeff struct mount *mp; 1238219820Sjeff 1239219820Sjeff /* Remove our devfs mount from the mountlist and purge the cache */ 1240219820Sjeff mtx_lock(&mountlist_mtx); 1241219820Sjeff mp = TAILQ_FIRST(&mountlist); 1242219820Sjeff TAILQ_REMOVE(&mountlist, mp, mnt_list); 1243219820Sjeff mtx_unlock(&mountlist_mtx); 1244219820Sjeff cache_purgevfs(mp); 1245219820Sjeff 1246219820Sjeff VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); 1247219820Sjeff VI_LOCK(dvp); 1248219820Sjeff dvp->v_iflag &= ~VI_MOUNT; 1249219820Sjeff dvp->v_mountedhere = NULL; 1250219820Sjeff VI_UNLOCK(dvp); 1251219820Sjeff 1252219820Sjeff /* Set up the real rootvnode, and purge the cache */ 1253219820Sjeff TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 1254219820Sjeff set_rootvnode(td); 1255219820Sjeff cache_purgevfs(rootvnode->v_mount); 1256219820Sjeff 1257219820Sjeff NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 1258219859Sjeff error = namei(&nd); 1259219859Sjeff if (error) { 1260219859Sjeff printf("Lookup of /dev for devfs, error: %d\n", error); 1261219859Sjeff return; 1262219859Sjeff } 1263219859Sjeff NDFREE(&nd, NDF_ONLY_PNBUF); 1264219859Sjeff vp = nd.ni_vp; 1265219859Sjeff if (vp->v_type != VDIR) { 1266219859Sjeff vput(vp); 1267219859Sjeff } 1268219859Sjeff error = vinvalbuf(vp, V_SAVE, td, 0, 0); 1269219859Sjeff if (error) { 1270219859Sjeff vput(vp); 1271219859Sjeff } 1272219859Sjeff cache_purge(vp); 1273219859Sjeff mp->mnt_vnodecovered = vp; 1274219859Sjeff vp->v_mountedhere = mp; 1275219859Sjeff mtx_lock(&mountlist_mtx); 1276219859Sjeff TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1277219859Sjeff mtx_unlock(&mountlist_mtx); 1278219859Sjeff VOP_UNLOCK(vp, 0, td); 1279219859Sjeff vfs_unbusy(mp, td); 1280219859Sjeff vput(dvp); 1281219859Sjeff 1282219859Sjeff /* Unlink the no longer needed /dev/dev -> / symlink */ 1283219859Sjeff kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 1284219859Sjeff} 1285219859Sjeff 1286219859Sjeff/* 1287219859Sjeff * Report errors during filesystem mounting. 1288219859Sjeff */ 1289219859Sjeffvoid 1290219859Sjeffvfs_mount_error(struct mount *mp, const char *fmt, ...) 1291219859Sjeff{ 1292219859Sjeff struct vfsoptlist *moptlist = mp->mnt_optnew; 1293219859Sjeff va_list ap; 1294219859Sjeff int error, len; 1295219859Sjeff char *errmsg; 1296219859Sjeff 1297219859Sjeff error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1298219859Sjeff if (error || errmsg == NULL || len <= 0) 1299219859Sjeff return; 1300219859Sjeff 1301219859Sjeff va_start(ap, fmt); 1302219859Sjeff vsnprintf(errmsg, (size_t)len, fmt, ap); 1303219859Sjeff va_end(ap); 1304219859Sjeff} 1305219859Sjeff 1306219859Sjeff/* 1307259616Salfred * Find and mount the root filesystem 1308219859Sjeff */ 1309219859Sjeffvoid 1310219859Sjeffvfs_mountroot(void) 1311219859Sjeff{ 1312219859Sjeff char *cp; 1313219859Sjeff int error, i, asked = 0; 1314219859Sjeff 1315219859Sjeff root_mount_wait(); 1316259616Salfred 1317219859Sjeff devfs_first(); 1318219859Sjeff 1319219859Sjeff /* 1320219859Sjeff * We are booted with instructions to prompt for the root filesystem. 1321219859Sjeff */ 1322219859Sjeff if (boothowto & RB_ASKNAME) { 1323219859Sjeff if (!vfs_mountroot_ask()) 1324219859Sjeff return; 1325219859Sjeff asked = 1; 1326219859Sjeff } 1327219859Sjeff 1328219859Sjeff /* 1329219859Sjeff * The root filesystem information is compiled in, and we are 1330219859Sjeff * booted with instructions to use it. 1331219859Sjeff */ 1332219859Sjeff if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 1333219859Sjeff if (!vfs_mountroot_try(ctrootdevname)) 1334219859Sjeff return; 1335219820Sjeff ctrootdevname = NULL; 1336219820Sjeff } 1337219820Sjeff 1338219820Sjeff /* 1339219820Sjeff * We've been given the generic "use CDROM as root" flag. This is 1340219820Sjeff * necessary because one media may be used in many different 1341219820Sjeff * devices, so we need to search for them. 1342219820Sjeff */ 1343219820Sjeff if (boothowto & RB_CDROM) { 1344219820Sjeff for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 1345219820Sjeff if (!vfs_mountroot_try(cdrom_rootdevnames[i])) 1346219820Sjeff return; 1347219820Sjeff } 1348219820Sjeff } 1349219820Sjeff 1350219820Sjeff /* 1351219820Sjeff * Try to use the value read by the loader from /etc/fstab, or 1352219820Sjeff * supplied via some other means. This is the preferred 1353219820Sjeff * mechanism. 1354219820Sjeff */ 1355219820Sjeff cp = getenv("vfs.root.mountfrom"); 1356219820Sjeff if (cp != NULL) { 1357219820Sjeff error = vfs_mountroot_try(cp); 1358219820Sjeff freeenv(cp); 1359219820Sjeff if (!error) 1360219820Sjeff return; 1361219820Sjeff } 1362219820Sjeff 1363219820Sjeff /* 1364219859Sjeff * Try values that may have been computed by code during boot 1365219859Sjeff */ 1366219820Sjeff if (!vfs_mountroot_try(rootdevnames[0])) 1367219859Sjeff return; 1368219859Sjeff if (!vfs_mountroot_try(rootdevnames[1])) 1369219820Sjeff return; 1370219859Sjeff 1371219820Sjeff /* 1372219859Sjeff * If we (still) have a compiled-in default, try it. 1373219859Sjeff */ 1374219859Sjeff if (ctrootdevname != NULL) 1375219859Sjeff if (!vfs_mountroot_try(ctrootdevname)) 1376219859Sjeff return; 1377219859Sjeff /* 1378219820Sjeff * Everything so far has failed, prompt on the console if we haven't 1379219820Sjeff * already tried that. 1380219820Sjeff */ 1381219820Sjeff if (!asked) 1382219820Sjeff if (!vfs_mountroot_ask()) 1383219820Sjeff return; 1384219820Sjeff 1385219820Sjeff panic("Root mount failed, startup aborted."); 1386219820Sjeff} 1387219820Sjeff 1388219820Sjeff/* 1389219820Sjeff * Mount (mountfrom) as the root filesystem. 1390219820Sjeff */ 1391219820Sjeffstatic int 1392219820Sjeffvfs_mountroot_try(const char *mountfrom) 1393219820Sjeff{ 1394219820Sjeff struct mount *mp; 1395219820Sjeff char *vfsname, *path; 1396219820Sjeff time_t timebase; 1397219820Sjeff int error; 1398219820Sjeff char patt[32]; 1399219820Sjeff 1400219820Sjeff vfsname = NULL; 1401219820Sjeff path = NULL; 1402219820Sjeff mp = NULL; 1403219820Sjeff error = EINVAL; 1404219820Sjeff 1405219820Sjeff if (mountfrom == NULL) 1406219820Sjeff return (error); /* don't complain */ 1407219820Sjeff printf("Trying to mount root from %s\n", mountfrom); 1408219820Sjeff 1409219820Sjeff /* parse vfs name and path */ 1410219820Sjeff vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 1411219820Sjeff path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 1412219820Sjeff vfsname[0] = path[0] = 0; 1413219820Sjeff sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 1414219820Sjeff if (sscanf(mountfrom, patt, vfsname, path) < 1) 1415219820Sjeff goto out; 1416219820Sjeff 1417219820Sjeff if (path[0] == '\0') 1418219820Sjeff strcpy(path, ROOTNAME); 1419219820Sjeff 1420219820Sjeff error = kernel_vmount( 1421219820Sjeff MNT_RDONLY | MNT_ROOTFS, 1422219820Sjeff "fstype", vfsname, 1423219820Sjeff "fspath", "/", 1424219820Sjeff "from", path, 1425219820Sjeff NULL); 1426219820Sjeff if (error == 0) { 1427219820Sjeff /* 1428219820Sjeff * We mount devfs prior to mounting the / FS, so the first 1429219820Sjeff * entry will typically be devfs. 1430219820Sjeff */ 1431219820Sjeff mp = TAILQ_FIRST(&mountlist); 1432219820Sjeff KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 1433219820Sjeff 1434219820Sjeff /* 1435219820Sjeff * Iterate over all currently mounted file systems and use 1436219820Sjeff * the time stamp found to check and/or initialize the RTC. 1437219820Sjeff * Typically devfs has no time stamp and the only other FS 1438219820Sjeff * is the actual / FS. 1439219820Sjeff * Call inittodr() only once and pass it the largest of the 1440219820Sjeff * timestamps we encounter. 1441219820Sjeff */ 1442219820Sjeff timebase = 0; 1443219820Sjeff do { 1444219820Sjeff if (mp->mnt_time > timebase) 1445219820Sjeff timebase = mp->mnt_time; 1446219820Sjeff mp = TAILQ_NEXT(mp, mnt_list); 1447219820Sjeff } while (mp != NULL); 1448219820Sjeff inittodr(timebase); 1449219820Sjeff 1450219820Sjeff devfs_fixup(curthread); 1451219820Sjeff } 1452219820Sjeffout: 1453219820Sjeff free(path, M_MOUNT); 1454219820Sjeff free(vfsname, M_MOUNT); 1455219820Sjeff return (error); 1456219820Sjeff} 1457219820Sjeff 1458219820Sjeff/* 1459219820Sjeff * --------------------------------------------------------------------- 1460219820Sjeff * Interactive root filesystem selection code. 1461219820Sjeff */ 1462219820Sjeff 1463219820Sjeffstatic int 1464219820Sjeffvfs_mountroot_ask(void) 1465219820Sjeff{ 1466219820Sjeff char name[128]; 1467219820Sjeff 1468219820Sjeff for(;;) { 1469219820Sjeff printf("\nManual root filesystem specification:\n"); 1470219820Sjeff printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 1471219820Sjeff#if defined(__i386__) || defined(__ia64__) 1472219820Sjeff printf(" eg. ufs:da0s1a\n"); 1473219820Sjeff#else 1474219820Sjeff printf(" eg. ufs:/dev/da0a\n"); 1475219820Sjeff#endif 1476219820Sjeff printf(" ? List valid disk boot devices\n"); 1477219820Sjeff printf(" <empty line> Abort manual input\n"); 1478219820Sjeff printf("\nmountroot> "); 1479219820Sjeff gets(name, sizeof(name), 1); 1480219820Sjeff if (name[0] == '\0') 1481219820Sjeff return (1); 1482219820Sjeff if (name[0] == '?') { 1483219820Sjeff printf("\nList of GEOM managed disk devices:\n "); 1484219820Sjeff g_dev_print(); 1485219820Sjeff continue; 1486219820Sjeff } 1487219820Sjeff if (!vfs_mountroot_try(name)) 1488219820Sjeff return (0); 1489219820Sjeff } 1490219820Sjeff} 1491219820Sjeff 1492219820Sjeff/* 1493219820Sjeff * --------------------------------------------------------------------- 1494219820Sjeff * Functions for querying mount options/arguments from filesystems. 1495219820Sjeff */ 1496219820Sjeff 1497219820Sjeff/* 1498219820Sjeff * Check that no unknown options are given 1499219820Sjeff */ 1500219820Sjeffint 1501219820Sjeffvfs_filteropt(struct vfsoptlist *opts, const char **legal) 1502219820Sjeff{ 1503219820Sjeff struct vfsopt *opt; 1504219820Sjeff const char **t, *p; 1505219820Sjeff 1506219820Sjeff 1507219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1508219820Sjeff p = opt->name; 1509219820Sjeff if (p[0] == 'n' && p[1] == 'o') 1510219820Sjeff p += 2; 1511219820Sjeff for(t = global_opts; *t != NULL; t++) 1512219820Sjeff if (!strcmp(*t, p)) 1513219820Sjeff break; 1514219820Sjeff if (*t != NULL) 1515219820Sjeff continue; 1516219820Sjeff for(t = legal; *t != NULL; t++) 1517219820Sjeff if (!strcmp(*t, p)) 1518219820Sjeff break; 1519219820Sjeff if (*t != NULL) 1520219820Sjeff continue; 1521219820Sjeff printf("mount option <%s> is unknown\n", p); 1522219820Sjeff return (EINVAL); 1523219820Sjeff } 1524219820Sjeff return (0); 1525219820Sjeff} 1526219820Sjeff 1527219820Sjeff/* 1528219820Sjeff * Get a mount option by its name. 1529219820Sjeff * 1530219820Sjeff * Return 0 if the option was found, ENOENT otherwise. 1531219820Sjeff * If len is non-NULL it will be filled with the length 1532219820Sjeff * of the option. If buf is non-NULL, it will be filled 1533219820Sjeff * with the address of the option. 1534219820Sjeff */ 1535219820Sjeffint 1536219820Sjeffvfs_getopt(opts, name, buf, len) 1537219820Sjeff struct vfsoptlist *opts; 1538219820Sjeff const char *name; 1539219820Sjeff void **buf; 1540219820Sjeff int *len; 1541219820Sjeff{ 1542219820Sjeff struct vfsopt *opt; 1543219820Sjeff 1544219820Sjeff KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1545219820Sjeff 1546219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1547219820Sjeff if (strcmp(name, opt->name) == 0) { 1548219820Sjeff if (len != NULL) 1549219820Sjeff *len = opt->len; 1550219820Sjeff if (buf != NULL) 1551219820Sjeff *buf = opt->value; 1552219820Sjeff return (0); 1553219820Sjeff } 1554219820Sjeff } 1555219820Sjeff return (ENOENT); 1556219820Sjeff} 1557219820Sjeff 1558219820Sjeffstatic int 1559219820Sjeffvfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1560219820Sjeff{ 1561219820Sjeff struct vfsopt *opt; 1562219820Sjeff int i; 1563219820Sjeff 1564219820Sjeff if (opts == NULL) 1565219820Sjeff return (-1); 1566219859Sjeff 1567219820Sjeff i = 0; 1568219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1569259616Salfred if (strcmp(name, opt->name) == 0) 1570259616Salfred return (i); 1571219820Sjeff ++i; 1572219820Sjeff } 1573219820Sjeff return (-1); 1574219820Sjeff} 1575219820Sjeff 1576219820Sjeffchar * 1577219820Sjeffvfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1578219820Sjeff{ 1579219820Sjeff struct vfsopt *opt; 1580219820Sjeff 1581219820Sjeff *error = 0; 1582219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1583219820Sjeff if (strcmp(name, opt->name) != 0) 1584219820Sjeff continue; 1585219820Sjeff if (((char *)opt->value)[opt->len - 1] != '\0') { 1586219820Sjeff *error = EINVAL; 1587219820Sjeff return (NULL); 1588219820Sjeff } 1589219820Sjeff return (opt->value); 1590219820Sjeff } 1591219820Sjeff return (NULL); 1592219820Sjeff} 1593219820Sjeff 1594219820Sjeffint 1595219820Sjeffvfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) 1596219820Sjeff{ 1597219820Sjeff struct vfsopt *opt; 1598219820Sjeff 1599219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1600219820Sjeff if (strcmp(name, opt->name) == 0) { 1601219820Sjeff if (w != NULL) 1602219820Sjeff *w |= val; 1603219820Sjeff return (1); 1604219820Sjeff } 1605219820Sjeff } 1606219820Sjeff if (w != NULL) 1607219820Sjeff *w &= ~val; 1608219820Sjeff return (0); 1609219820Sjeff} 1610219820Sjeff 1611234618Sbzint 1612219820Sjeffvfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1613219820Sjeff{ 1614219893Sjeff va_list ap; 1615220016Sjeff struct vfsopt *opt; 1616220016Sjeff int ret; 1617220016Sjeff 1618255932Salfred KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1619220016Sjeff 1620220016Sjeff TAILQ_FOREACH(opt, opts, link) { 1621220016Sjeff if (strcmp(name, opt->name) != 0) 1622220016Sjeff continue; 1623220016Sjeff if (((char *)opt->value)[opt->len - 1] != '\0') 1624220016Sjeff return (0); 1625220016Sjeff va_start(ap, fmt); 1626220016Sjeff ret = vsscanf(opt->value, fmt, ap); 1627220016Sjeff va_end(ap); 1628255932Salfred return (ret); 1629219820Sjeff } 1630219820Sjeff return (0); 1631219820Sjeff} 1632219820Sjeff 1633219820Sjeff/* 1634219820Sjeff * Find and copy a mount option. 1635219820Sjeff * 1636219820Sjeff * The size of the buffer has to be specified 1637219820Sjeff * in len, if it is not the same length as the 1638219820Sjeff * mount option, EINVAL is returned. 1639219820Sjeff * Returns ENOENT if the option is not found. 1640219820Sjeff */ 1641219820Sjeffint 1642219820Sjeffvfs_copyopt(opts, name, dest, len) 1643219820Sjeff struct vfsoptlist *opts; 1644219820Sjeff const char *name; 1645219820Sjeff void *dest; 1646219820Sjeff int len; 1647219820Sjeff{ 1648219820Sjeff struct vfsopt *opt; 1649219820Sjeff 1650219820Sjeff KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 1651219820Sjeff 1652219820Sjeff TAILQ_FOREACH(opt, opts, link) { 1653219820Sjeff if (strcmp(name, opt->name) == 0) { 1654219820Sjeff if (len != opt->len) 1655219820Sjeff return (EINVAL); 1656219820Sjeff bcopy(opt->value, dest, opt->len); 1657219820Sjeff return (0); 1658219820Sjeff } 1659219820Sjeff } 1660219820Sjeff return (ENOENT); 1661219820Sjeff} 1662219820Sjeff 1663219820Sjeff/* 1664219820Sjeff * This is a helper function for filesystems to traverse their 1665219820Sjeff * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 1666219820Sjeff */ 1667 1668struct vnode * 1669__mnt_vnode_next(struct vnode **nvp, struct mount *mp) 1670{ 1671 struct vnode *vp; 1672 1673 mtx_assert(&mp->mnt_mtx, MA_OWNED); 1674 1675 vp = *nvp; 1676 /* Check if we are done */ 1677 if (vp == NULL) 1678 return (NULL); 1679 /* If our next vnode is no longer ours, start over */ 1680 if (vp->v_mount != mp) 1681 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 1682 /* Save pointer to next vnode in list */ 1683 if (vp != NULL) 1684 *nvp = TAILQ_NEXT(vp, v_nmntvnodes); 1685 else 1686 *nvp = NULL; 1687 return (vp); 1688} 1689 1690int 1691__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 1692{ 1693 int error; 1694 1695 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td); 1696 if (sbp != &mp->mnt_stat) 1697 *sbp = mp->mnt_stat; 1698 return (error); 1699} 1700 1701void 1702vfs_mountedfrom(struct mount *mp, const char *from) 1703{ 1704 1705 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 1706 strlcpy(mp->mnt_stat.f_mntfromname, from, 1707 sizeof mp->mnt_stat.f_mntfromname); 1708} 1709 1710/* 1711 * --------------------------------------------------------------------- 1712 * This is the api for building mount args and mounting filesystems from 1713 * inside the kernel. 1714 * 1715 * The API works by accumulation of individual args. First error is 1716 * latched. 1717 * 1718 * XXX: should be documented in new manpage kernel_mount(9) 1719 */ 1720 1721/* A memory allocation which must be freed when we are done */ 1722struct mntaarg { 1723 SLIST_ENTRY(mntaarg) next; 1724}; 1725 1726/* The header for the mount arguments */ 1727struct mntarg { 1728 struct iovec *v; 1729 int len; 1730 int error; 1731 SLIST_HEAD(, mntaarg) list; 1732}; 1733 1734/* 1735 * Add a boolean argument. 1736 * 1737 * flag is the boolean value. 1738 * name must start with "no". 1739 */ 1740struct mntarg * 1741mount_argb(struct mntarg *ma, int flag, const char *name) 1742{ 1743 1744 KASSERT(name[0] == 'n' && name[1] == 'o', 1745 ("mount_argb(...,%s): name must start with 'no'", name)); 1746 1747 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 1748} 1749 1750/* 1751 * Add an argument printf style 1752 */ 1753struct mntarg * 1754mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 1755{ 1756 va_list ap; 1757 struct mntaarg *maa; 1758 struct sbuf *sb; 1759 int len; 1760 1761 if (ma == NULL) { 1762 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1763 SLIST_INIT(&ma->list); 1764 } 1765 if (ma->error) 1766 return (ma); 1767 1768 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1769 M_MOUNT, M_WAITOK); 1770 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1771 ma->v[ma->len].iov_len = strlen(name) + 1; 1772 ma->len++; 1773 1774 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 1775 va_start(ap, fmt); 1776 sbuf_vprintf(sb, fmt, ap); 1777 va_end(ap); 1778 sbuf_finish(sb); 1779 len = sbuf_len(sb) + 1; 1780 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1781 SLIST_INSERT_HEAD(&ma->list, maa, next); 1782 bcopy(sbuf_data(sb), maa + 1, len); 1783 sbuf_delete(sb); 1784 1785 ma->v[ma->len].iov_base = maa + 1; 1786 ma->v[ma->len].iov_len = len; 1787 ma->len++; 1788 1789 return (ma); 1790} 1791 1792/* 1793 * Add an argument which is a userland string. 1794 */ 1795struct mntarg * 1796mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 1797{ 1798 struct mntaarg *maa; 1799 char *tbuf; 1800 1801 if (val == NULL) 1802 return (ma); 1803 if (ma == NULL) { 1804 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1805 SLIST_INIT(&ma->list); 1806 } 1807 if (ma->error) 1808 return (ma); 1809 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1810 SLIST_INSERT_HEAD(&ma->list, maa, next); 1811 tbuf = (void *)(maa + 1); 1812 ma->error = copyinstr(val, tbuf, len, NULL); 1813 return (mount_arg(ma, name, tbuf, -1)); 1814} 1815 1816/* 1817 * Plain argument. 1818 * 1819 * If length is -1, use printf. 1820 */ 1821struct mntarg * 1822mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 1823{ 1824 1825 if (ma == NULL) { 1826 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1827 SLIST_INIT(&ma->list); 1828 } 1829 if (ma->error) 1830 return (ma); 1831 1832 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1833 M_MOUNT, M_WAITOK); 1834 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1835 ma->v[ma->len].iov_len = strlen(name) + 1; 1836 ma->len++; 1837 1838 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 1839 if (len < 0) 1840 ma->v[ma->len].iov_len = strlen(val) + 1; 1841 else 1842 ma->v[ma->len].iov_len = len; 1843 ma->len++; 1844 return (ma); 1845} 1846 1847/* 1848 * Free a mntarg structure 1849 */ 1850static void 1851free_mntarg(struct mntarg *ma) 1852{ 1853 struct mntaarg *maa; 1854 1855 while (!SLIST_EMPTY(&ma->list)) { 1856 maa = SLIST_FIRST(&ma->list); 1857 SLIST_REMOVE_HEAD(&ma->list, next); 1858 free(maa, M_MOUNT); 1859 } 1860 free(ma->v, M_MOUNT); 1861 free(ma, M_MOUNT); 1862} 1863 1864/* 1865 * Mount a filesystem 1866 */ 1867int 1868kernel_mount(struct mntarg *ma, int flags) 1869{ 1870 struct uio auio; 1871 int error; 1872 1873 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 1874 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 1875 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 1876 1877 auio.uio_iov = ma->v; 1878 auio.uio_iovcnt = ma->len; 1879 auio.uio_segflg = UIO_SYSSPACE; 1880 1881 error = ma->error; 1882 if (!error) 1883 error = vfs_donmount(curthread, flags, &auio); 1884 free_mntarg(ma); 1885 return (error); 1886} 1887 1888/* 1889 * A printflike function to mount a filesystem. 1890 */ 1891int 1892kernel_vmount(int flags, ...) 1893{ 1894 struct mntarg *ma = NULL; 1895 va_list ap; 1896 const char *cp; 1897 const void *vp; 1898 int error; 1899 1900 va_start(ap, flags); 1901 for (;;) { 1902 cp = va_arg(ap, const char *); 1903 if (cp == NULL) 1904 break; 1905 vp = va_arg(ap, const void *); 1906 ma = mount_arg(ma, cp, vp, -1); 1907 } 1908 va_end(ap); 1909 1910 error = kernel_mount(ma, flags); 1911 return (error); 1912} 1913