vfs_mount.c revision 190540
1/*- 2 * Copyright (c) 1999-2004 Poul-Henning Kamp 3 * Copyright (c) 1999 Michael Smith 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 190540 2009-03-30 05:57:55Z thompsa $"); 39 40#include <sys/param.h> 41#include <sys/conf.h> 42#include <sys/fcntl.h> 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/libkern.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/filedesc.h> 53#include <sys/reboot.h> 54#include <sys/syscallsubr.h> 55#include <sys/sysproto.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/systm.h> 60#include <sys/vnode.h> 61#include <vm/uma.h> 62 63#include <geom/geom.h> 64 65#include <machine/stdarg.h> 66 67#include <security/audit/audit.h> 68#include <security/mac/mac_framework.h> 69 70#include "opt_rootdevname.h" 71#include "opt_mac.h" 72 73#define ROOTNAME "root_device" 74#define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 75 76static int vfs_domount(struct thread *td, const char *fstype, 77 char *fspath, int fsflags, void *fsdata); 78static int vfs_mountroot_ask(void); 79static int vfs_mountroot_try(const char *mountfrom); 80static void free_mntarg(struct mntarg *ma); 81 82static int usermount = 0; 83SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 84 "Unprivileged users may mount and unmount file systems"); 85 86MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 87MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); 88static uma_zone_t mount_zone; 89 90/* List of mounted filesystems. */ 91struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 92 93/* For any iteration/modification of mountlist */ 94struct mtx mountlist_mtx; 95MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 96 97/* 98 * The vnode of the system's root (/ in the filesystem, without chroot 99 * active.) 100 */ 101struct vnode *rootvnode; 102 103/* 104 * The root filesystem is detailed in the kernel environment variable 105 * vfs.root.mountfrom, which is expected to be in the general format 106 * 107 * <vfsname>:[<path>] 108 * vfsname := the name of a VFS known to the kernel and capable 109 * of being mounted as root 110 * path := disk device name or other data used by the filesystem 111 * to locate its physical store 112 */ 113 114/* 115 * Global opts, taken by all filesystems 116 */ 117static const char *global_opts[] = { 118 "errmsg", 119 "fstype", 120 "fspath", 121 "ro", 122 "rw", 123 "nosuid", 124 "noexec", 125 NULL 126}; 127 128/* 129 * The root specifiers we will try if RB_CDROM is specified. 130 */ 131static char *cdrom_rootdevnames[] = { 132 "cd9660:cd0", 133 "cd9660:acd0", 134 NULL 135}; 136 137/* legacy find-root code */ 138char *rootdevnames[2] = {NULL, NULL}; 139#ifndef ROOTDEVNAME 140# define ROOTDEVNAME NULL 141#endif 142static const char *ctrootdevname = ROOTDEVNAME; 143 144/* 145 * --------------------------------------------------------------------- 146 * Functions for building and sanitizing the mount options 147 */ 148 149/* Remove one mount option. */ 150static void 151vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 152{ 153 154 TAILQ_REMOVE(opts, opt, link); 155 free(opt->name, M_MOUNT); 156 if (opt->value != NULL) 157 free(opt->value, M_MOUNT); 158 free(opt, M_MOUNT); 159} 160 161/* Release all resources related to the mount options. */ 162void 163vfs_freeopts(struct vfsoptlist *opts) 164{ 165 struct vfsopt *opt; 166 167 while (!TAILQ_EMPTY(opts)) { 168 opt = TAILQ_FIRST(opts); 169 vfs_freeopt(opts, opt); 170 } 171 free(opts, M_MOUNT); 172} 173 174void 175vfs_deleteopt(struct vfsoptlist *opts, const char *name) 176{ 177 struct vfsopt *opt, *temp; 178 179 if (opts == NULL) 180 return; 181 TAILQ_FOREACH_SAFE(opt, opts, link, temp) { 182 if (strcmp(opt->name, name) == 0) 183 vfs_freeopt(opts, opt); 184 } 185} 186 187/* 188 * Check if options are equal (with or without the "no" prefix). 189 */ 190static int 191vfs_equalopts(const char *opt1, const char *opt2) 192{ 193 char *p; 194 195 /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 196 if (strcmp(opt1, opt2) == 0) 197 return (1); 198 /* "noopt" vs. "opt" */ 199 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 200 return (1); 201 /* "opt" vs. "noopt" */ 202 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 203 return (1); 204 while ((p = strchr(opt1, '.')) != NULL && 205 !strncmp(opt1, opt2, ++p - opt1)) { 206 opt2 += p - opt1; 207 opt1 = p; 208 /* "foo.noopt" vs. "foo.opt" */ 209 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 210 return (1); 211 /* "foo.opt" vs. "foo.noopt" */ 212 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 213 return (1); 214 } 215 return (0); 216} 217 218/* 219 * If a mount option is specified several times, 220 * (with or without the "no" prefix) only keep 221 * the last occurence of it. 222 */ 223static void 224vfs_sanitizeopts(struct vfsoptlist *opts) 225{ 226 struct vfsopt *opt, *opt2, *tmp; 227 228 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 229 opt2 = TAILQ_PREV(opt, vfsoptlist, link); 230 while (opt2 != NULL) { 231 if (vfs_equalopts(opt->name, opt2->name)) { 232 tmp = TAILQ_PREV(opt2, vfsoptlist, link); 233 vfs_freeopt(opts, opt2); 234 opt2 = tmp; 235 } else { 236 opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 237 } 238 } 239 } 240} 241 242/* 243 * Build a linked list of mount options from a struct uio. 244 */ 245int 246vfs_buildopts(struct uio *auio, struct vfsoptlist **options) 247{ 248 struct vfsoptlist *opts; 249 struct vfsopt *opt; 250 size_t memused, namelen, optlen; 251 unsigned int i, iovcnt; 252 int error; 253 254 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 255 TAILQ_INIT(opts); 256 memused = 0; 257 iovcnt = auio->uio_iovcnt; 258 for (i = 0; i < iovcnt; i += 2) { 259 namelen = auio->uio_iov[i].iov_len; 260 optlen = auio->uio_iov[i + 1].iov_len; 261 memused += sizeof(struct vfsopt) + optlen + namelen; 262 /* 263 * Avoid consuming too much memory, and attempts to overflow 264 * memused. 265 */ 266 if (memused > VFS_MOUNTARG_SIZE_MAX || 267 optlen > VFS_MOUNTARG_SIZE_MAX || 268 namelen > VFS_MOUNTARG_SIZE_MAX) { 269 error = EINVAL; 270 goto bad; 271 } 272 273 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 274 opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 275 opt->value = NULL; 276 opt->len = 0; 277 opt->pos = i / 2; 278 opt->seen = 0; 279 280 /* 281 * Do this early, so jumps to "bad" will free the current 282 * option. 283 */ 284 TAILQ_INSERT_TAIL(opts, opt, link); 285 286 if (auio->uio_segflg == UIO_SYSSPACE) { 287 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 288 } else { 289 error = copyin(auio->uio_iov[i].iov_base, opt->name, 290 namelen); 291 if (error) 292 goto bad; 293 } 294 /* Ensure names are null-terminated strings. */ 295 if (namelen == 0 || opt->name[namelen - 1] != '\0') { 296 error = EINVAL; 297 goto bad; 298 } 299 if (optlen != 0) { 300 opt->len = optlen; 301 opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 302 if (auio->uio_segflg == UIO_SYSSPACE) { 303 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 304 optlen); 305 } else { 306 error = copyin(auio->uio_iov[i + 1].iov_base, 307 opt->value, optlen); 308 if (error) 309 goto bad; 310 } 311 } 312 } 313 vfs_sanitizeopts(opts); 314 *options = opts; 315 return (0); 316bad: 317 vfs_freeopts(opts); 318 return (error); 319} 320 321/* 322 * Merge the old mount options with the new ones passed 323 * in the MNT_UPDATE case. 324 * 325 * XXX This function will keep a "nofoo" option in the 326 * new options if there is no matching "foo" option 327 * to be cancelled in the old options. This is a bug 328 * if the option's canonical name is "foo". E.g., "noro" 329 * shouldn't end up in the mount point's active options, 330 * but it can. 331 */ 332static void 333vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) 334{ 335 struct vfsopt *opt, *opt2, *new; 336 337 TAILQ_FOREACH(opt, opts, link) { 338 /* 339 * Check that this option hasn't been redefined 340 * nor cancelled with a "no" mount option. 341 */ 342 opt2 = TAILQ_FIRST(toopts); 343 while (opt2 != NULL) { 344 if (strcmp(opt2->name, opt->name) == 0) 345 goto next; 346 if (strncmp(opt2->name, "no", 2) == 0 && 347 strcmp(opt2->name + 2, opt->name) == 0) { 348 vfs_freeopt(toopts, opt2); 349 goto next; 350 } 351 opt2 = TAILQ_NEXT(opt2, link); 352 } 353 /* We want this option, duplicate it. */ 354 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 355 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); 356 strcpy(new->name, opt->name); 357 if (opt->len != 0) { 358 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 359 bcopy(opt->value, new->value, opt->len); 360 } else { 361 new->value = NULL; 362 } 363 new->len = opt->len; 364 new->seen = opt->seen; 365 TAILQ_INSERT_TAIL(toopts, new, link); 366next: 367 continue; 368 } 369} 370 371/* 372 * Mount a filesystem. 373 */ 374int 375nmount(td, uap) 376 struct thread *td; 377 struct nmount_args /* { 378 struct iovec *iovp; 379 unsigned int iovcnt; 380 int flags; 381 } */ *uap; 382{ 383 struct uio *auio; 384 int error; 385 u_int iovcnt; 386 387 AUDIT_ARG(fflags, uap->flags); 388 CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__, 389 uap->iovp, uap->iovcnt, uap->flags); 390 391 /* 392 * Filter out MNT_ROOTFS. We do not want clients of nmount() in 393 * userspace to set this flag, but we must filter it out if we want 394 * MNT_UPDATE on the root file system to work. 395 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 396 */ 397 uap->flags &= ~MNT_ROOTFS; 398 399 iovcnt = uap->iovcnt; 400 /* 401 * Check that we have an even number of iovec's 402 * and that we have at least two options. 403 */ 404 if ((iovcnt & 1) || (iovcnt < 4)) { 405 CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__, 406 uap->iovcnt); 407 return (EINVAL); 408 } 409 410 error = copyinuio(uap->iovp, iovcnt, &auio); 411 if (error) { 412 CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno", 413 __func__, error); 414 return (error); 415 } 416 error = vfs_donmount(td, uap->flags, auio); 417 418 free(auio, M_IOV); 419 return (error); 420} 421 422/* 423 * --------------------------------------------------------------------- 424 * Various utility functions 425 */ 426 427void 428vfs_ref(struct mount *mp) 429{ 430 431 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 432 MNT_ILOCK(mp); 433 MNT_REF(mp); 434 MNT_IUNLOCK(mp); 435} 436 437void 438vfs_rel(struct mount *mp) 439{ 440 441 CTR2(KTR_VFS, "%s: mp %p", __func__, mp); 442 MNT_ILOCK(mp); 443 MNT_REL(mp); 444 MNT_IUNLOCK(mp); 445} 446 447static int 448mount_init(void *mem, int size, int flags) 449{ 450 struct mount *mp; 451 452 mp = (struct mount *)mem; 453 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 454 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); 455 return (0); 456} 457 458static void 459mount_fini(void *mem, int size) 460{ 461 struct mount *mp; 462 463 mp = (struct mount *)mem; 464 lockdestroy(&mp->mnt_explock); 465 mtx_destroy(&mp->mnt_mtx); 466} 467 468/* 469 * Allocate and initialize the mount point struct. 470 */ 471struct mount * 472vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, 473 struct ucred *cred) 474{ 475 struct mount *mp; 476 477 mp = uma_zalloc(mount_zone, M_WAITOK); 478 bzero(&mp->mnt_startzero, 479 __rangeof(struct mount, mnt_startzero, mnt_endzero)); 480 TAILQ_INIT(&mp->mnt_nvnodelist); 481 mp->mnt_nvnodelistsize = 0; 482 mp->mnt_ref = 0; 483 (void) vfs_busy(mp, MBF_NOWAIT); 484 mp->mnt_op = vfsp->vfc_vfsops; 485 mp->mnt_vfc = vfsp; 486 vfsp->vfc_refcount++; /* XXX Unlocked */ 487 mp->mnt_stat.f_type = vfsp->vfc_typenum; 488 mp->mnt_gen++; 489 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 490 mp->mnt_vnodecovered = vp; 491 mp->mnt_cred = crdup(cred); 492 mp->mnt_stat.f_owner = cred->cr_uid; 493 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 494 mp->mnt_iosize_max = DFLTPHYS; 495#ifdef MAC 496 mac_mount_init(mp); 497 mac_mount_create(cred, mp); 498#endif 499 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 500 return (mp); 501} 502 503/* 504 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 505 */ 506void 507vfs_mount_destroy(struct mount *mp) 508{ 509 510 MNT_ILOCK(mp); 511 mp->mnt_kern_flag |= MNTK_REFEXPIRE; 512 if (mp->mnt_kern_flag & MNTK_MWAIT) { 513 mp->mnt_kern_flag &= ~MNTK_MWAIT; 514 wakeup(mp); 515 } 516 while (mp->mnt_ref) 517 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); 518 KASSERT(mp->mnt_ref == 0, 519 ("%s: invalid refcount in the drain path @ %s:%d", __func__, 520 __FILE__, __LINE__)); 521 if (mp->mnt_writeopcount != 0) 522 panic("vfs_mount_destroy: nonzero writeopcount"); 523 if (mp->mnt_secondary_writes != 0) 524 panic("vfs_mount_destroy: nonzero secondary_writes"); 525 mp->mnt_vfc->vfc_refcount--; 526 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { 527 struct vnode *vp; 528 529 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) 530 vprint("", vp); 531 panic("unmount: dangling vnode"); 532 } 533 if (mp->mnt_nvnodelistsize != 0) 534 panic("vfs_mount_destroy: nonzero nvnodelistsize"); 535 if (mp->mnt_lockref != 0) 536 panic("vfs_mount_destroy: nonzero lock refcount"); 537 MNT_IUNLOCK(mp); 538#ifdef MAC 539 mac_mount_destroy(mp); 540#endif 541 if (mp->mnt_opt != NULL) 542 vfs_freeopts(mp->mnt_opt); 543 crfree(mp->mnt_cred); 544 uma_zfree(mount_zone, mp); 545} 546 547int 548vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 549{ 550 struct vfsoptlist *optlist; 551 struct vfsopt *opt, *noro_opt, *tmp_opt; 552 char *fstype, *fspath, *errmsg; 553 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 554 int has_rw, has_noro; 555 556 errmsg = fspath = NULL; 557 errmsg_len = has_noro = has_rw = fspathlen = 0; 558 errmsg_pos = -1; 559 560 error = vfs_buildopts(fsoptions, &optlist); 561 if (error) 562 return (error); 563 564 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 565 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 566 567 /* 568 * We need these two options before the others, 569 * and they are mandatory for any filesystem. 570 * Ensure they are NUL terminated as well. 571 */ 572 fstypelen = 0; 573 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 574 if (error || fstype[fstypelen - 1] != '\0') { 575 error = EINVAL; 576 if (errmsg != NULL) 577 strncpy(errmsg, "Invalid fstype", errmsg_len); 578 goto bail; 579 } 580 fspathlen = 0; 581 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 582 if (error || fspath[fspathlen - 1] != '\0') { 583 error = EINVAL; 584 if (errmsg != NULL) 585 strncpy(errmsg, "Invalid fspath", errmsg_len); 586 goto bail; 587 } 588 589 /* 590 * We need to see if we have the "update" option 591 * before we call vfs_domount(), since vfs_domount() has special 592 * logic based on MNT_UPDATE. This is very important 593 * when we want to update the root filesystem. 594 */ 595 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { 596 if (strcmp(opt->name, "update") == 0) { 597 fsflags |= MNT_UPDATE; 598 vfs_freeopt(optlist, opt); 599 } 600 else if (strcmp(opt->name, "async") == 0) 601 fsflags |= MNT_ASYNC; 602 else if (strcmp(opt->name, "force") == 0) { 603 fsflags |= MNT_FORCE; 604 vfs_freeopt(optlist, opt); 605 } 606 else if (strcmp(opt->name, "reload") == 0) { 607 fsflags |= MNT_RELOAD; 608 vfs_freeopt(optlist, opt); 609 } 610 else if (strcmp(opt->name, "multilabel") == 0) 611 fsflags |= MNT_MULTILABEL; 612 else if (strcmp(opt->name, "noasync") == 0) 613 fsflags &= ~MNT_ASYNC; 614 else if (strcmp(opt->name, "noatime") == 0) 615 fsflags |= MNT_NOATIME; 616 else if (strcmp(opt->name, "atime") == 0) { 617 free(opt->name, M_MOUNT); 618 opt->name = strdup("nonoatime", M_MOUNT); 619 } 620 else if (strcmp(opt->name, "noclusterr") == 0) 621 fsflags |= MNT_NOCLUSTERR; 622 else if (strcmp(opt->name, "clusterr") == 0) { 623 free(opt->name, M_MOUNT); 624 opt->name = strdup("nonoclusterr", M_MOUNT); 625 } 626 else if (strcmp(opt->name, "noclusterw") == 0) 627 fsflags |= MNT_NOCLUSTERW; 628 else if (strcmp(opt->name, "clusterw") == 0) { 629 free(opt->name, M_MOUNT); 630 opt->name = strdup("nonoclusterw", M_MOUNT); 631 } 632 else if (strcmp(opt->name, "noexec") == 0) 633 fsflags |= MNT_NOEXEC; 634 else if (strcmp(opt->name, "exec") == 0) { 635 free(opt->name, M_MOUNT); 636 opt->name = strdup("nonoexec", M_MOUNT); 637 } 638 else if (strcmp(opt->name, "nosuid") == 0) 639 fsflags |= MNT_NOSUID; 640 else if (strcmp(opt->name, "suid") == 0) { 641 free(opt->name, M_MOUNT); 642 opt->name = strdup("nonosuid", M_MOUNT); 643 } 644 else if (strcmp(opt->name, "nosymfollow") == 0) 645 fsflags |= MNT_NOSYMFOLLOW; 646 else if (strcmp(opt->name, "symfollow") == 0) { 647 free(opt->name, M_MOUNT); 648 opt->name = strdup("nonosymfollow", M_MOUNT); 649 } 650 else if (strcmp(opt->name, "noro") == 0) { 651 fsflags &= ~MNT_RDONLY; 652 has_noro = 1; 653 } 654 else if (strcmp(opt->name, "rw") == 0) { 655 fsflags &= ~MNT_RDONLY; 656 has_rw = 1; 657 } 658 else if (strcmp(opt->name, "ro") == 0) 659 fsflags |= MNT_RDONLY; 660 else if (strcmp(opt->name, "rdonly") == 0) { 661 free(opt->name, M_MOUNT); 662 opt->name = strdup("ro", M_MOUNT); 663 fsflags |= MNT_RDONLY; 664 } 665 else if (strcmp(opt->name, "suiddir") == 0) 666 fsflags |= MNT_SUIDDIR; 667 else if (strcmp(opt->name, "sync") == 0) 668 fsflags |= MNT_SYNCHRONOUS; 669 else if (strcmp(opt->name, "union") == 0) 670 fsflags |= MNT_UNION; 671 } 672 673 /* 674 * If "rw" was specified as a mount option, and we 675 * are trying to update a mount-point from "ro" to "rw", 676 * we need a mount option "noro", since in vfs_mergeopts(), 677 * "noro" will cancel "ro", but "rw" will not do anything. 678 */ 679 if (has_rw && !has_noro) { 680 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 681 noro_opt->name = strdup("noro", M_MOUNT); 682 noro_opt->value = NULL; 683 noro_opt->len = 0; 684 noro_opt->pos = -1; 685 noro_opt->seen = 1; 686 TAILQ_INSERT_TAIL(optlist, noro_opt, link); 687 } 688 689 /* 690 * Be ultra-paranoid about making sure the type and fspath 691 * variables will fit in our mp buffers, including the 692 * terminating NUL. 693 */ 694 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 695 error = ENAMETOOLONG; 696 goto bail; 697 } 698 699 mtx_lock(&Giant); 700 error = vfs_domount(td, fstype, fspath, fsflags, optlist); 701 mtx_unlock(&Giant); 702bail: 703 /* copyout the errmsg */ 704 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 705 && errmsg_len > 0 && errmsg != NULL) { 706 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 707 bcopy(errmsg, 708 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 709 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 710 } else { 711 copyout(errmsg, 712 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 713 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 714 } 715 } 716 717 if (error != 0) 718 vfs_freeopts(optlist); 719 return (error); 720} 721 722/* 723 * Old mount API. 724 */ 725#ifndef _SYS_SYSPROTO_H_ 726struct mount_args { 727 char *type; 728 char *path; 729 int flags; 730 caddr_t data; 731}; 732#endif 733/* ARGSUSED */ 734int 735mount(td, uap) 736 struct thread *td; 737 struct mount_args /* { 738 char *type; 739 char *path; 740 int flags; 741 caddr_t data; 742 } */ *uap; 743{ 744 char *fstype; 745 struct vfsconf *vfsp = NULL; 746 struct mntarg *ma = NULL; 747 int error; 748 749 AUDIT_ARG(fflags, uap->flags); 750 751 /* 752 * Filter out MNT_ROOTFS. We do not want clients of mount() in 753 * userspace to set this flag, but we must filter it out if we want 754 * MNT_UPDATE on the root file system to work. 755 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 756 */ 757 uap->flags &= ~MNT_ROOTFS; 758 759 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 760 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 761 if (error) { 762 free(fstype, M_TEMP); 763 return (error); 764 } 765 766 AUDIT_ARG(text, fstype); 767 mtx_lock(&Giant); 768 vfsp = vfs_byname_kld(fstype, td, &error); 769 free(fstype, M_TEMP); 770 if (vfsp == NULL) { 771 mtx_unlock(&Giant); 772 return (ENOENT); 773 } 774 if (vfsp->vfc_vfsops->vfs_cmount == NULL) { 775 mtx_unlock(&Giant); 776 return (EOPNOTSUPP); 777 } 778 779 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 780 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 781 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 782 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 783 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 784 785 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td); 786 mtx_unlock(&Giant); 787 return (error); 788} 789 790 791/* 792 * vfs_domount(): actually attempt a filesystem mount. 793 */ 794static int 795vfs_domount( 796 struct thread *td, /* Calling thread. */ 797 const char *fstype, /* Filesystem type. */ 798 char *fspath, /* Mount path. */ 799 int fsflags, /* Flags common to all filesystems. */ 800 void *fsdata /* Options local to the filesystem. */ 801 ) 802{ 803 struct vnode *vp; 804 struct mount *mp; 805 struct vfsconf *vfsp; 806 struct oexport_args oexport; 807 struct export_args export; 808 int error, flag = 0; 809 struct vattr va; 810 struct nameidata nd; 811 812 mtx_assert(&Giant, MA_OWNED); 813 /* 814 * Be ultra-paranoid about making sure the type and fspath 815 * variables will fit in our mp buffers, including the 816 * terminating NUL. 817 */ 818 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 819 return (ENAMETOOLONG); 820 821 if (jailed(td->td_ucred) || usermount == 0) { 822 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 823 return (error); 824 } 825 826 /* 827 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 828 */ 829 if (fsflags & MNT_EXPORTED) { 830 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); 831 if (error) 832 return (error); 833 } 834 if (fsflags & MNT_SUIDDIR) { 835 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); 836 if (error) 837 return (error); 838 } 839 /* 840 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. 841 */ 842 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { 843 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) 844 fsflags |= MNT_NOSUID | MNT_USER; 845 } 846 847 /* Load KLDs before we lock the covered vnode to avoid reversals. */ 848 vfsp = NULL; 849 if ((fsflags & MNT_UPDATE) == 0) { 850 /* Don't try to load KLDs if we're mounting the root. */ 851 if (fsflags & MNT_ROOTFS) 852 vfsp = vfs_byname(fstype); 853 else 854 vfsp = vfs_byname_kld(fstype, td, &error); 855 if (vfsp == NULL) 856 return (ENODEV); 857 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) 858 return (EPERM); 859 } 860 /* 861 * Get vnode to be covered 862 */ 863 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, 864 fspath, td); 865 if ((error = namei(&nd)) != 0) 866 return (error); 867 NDFREE(&nd, NDF_ONLY_PNBUF); 868 vp = nd.ni_vp; 869 if (fsflags & MNT_UPDATE) { 870 if ((vp->v_vflag & VV_ROOT) == 0) { 871 vput(vp); 872 return (EINVAL); 873 } 874 mp = vp->v_mount; 875 MNT_ILOCK(mp); 876 flag = mp->mnt_flag; 877 /* 878 * We only allow the filesystem to be reloaded if it 879 * is currently mounted read-only. 880 */ 881 if ((fsflags & MNT_RELOAD) && 882 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 883 MNT_IUNLOCK(mp); 884 vput(vp); 885 return (EOPNOTSUPP); /* Needs translation */ 886 } 887 MNT_IUNLOCK(mp); 888 /* 889 * Only privileged root, or (if MNT_USER is set) the user that 890 * did the original mount is permitted to update it. 891 */ 892 error = vfs_suser(mp, td); 893 if (error) { 894 vput(vp); 895 return (error); 896 } 897 if (vfs_busy(mp, MBF_NOWAIT)) { 898 vput(vp); 899 return (EBUSY); 900 } 901 VI_LOCK(vp); 902 if ((vp->v_iflag & VI_MOUNT) != 0 || 903 vp->v_mountedhere != NULL) { 904 VI_UNLOCK(vp); 905 vfs_unbusy(mp); 906 vput(vp); 907 return (EBUSY); 908 } 909 vp->v_iflag |= VI_MOUNT; 910 VI_UNLOCK(vp); 911 MNT_ILOCK(mp); 912 mp->mnt_flag |= fsflags & 913 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS); 914 MNT_IUNLOCK(mp); 915 VOP_UNLOCK(vp, 0); 916 mp->mnt_optnew = fsdata; 917 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 918 } else { 919 /* 920 * If the user is not root, ensure that they own the directory 921 * onto which we are attempting to mount. 922 */ 923 error = VOP_GETATTR(vp, &va, td->td_ucred); 924 if (error) { 925 vput(vp); 926 return (error); 927 } 928 if (va.va_uid != td->td_ucred->cr_uid) { 929 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 930 0); 931 if (error) { 932 vput(vp); 933 return (error); 934 } 935 } 936 error = vinvalbuf(vp, V_SAVE, 0, 0); 937 if (error != 0) { 938 vput(vp); 939 return (error); 940 } 941 if (vp->v_type != VDIR) { 942 vput(vp); 943 return (ENOTDIR); 944 } 945 VI_LOCK(vp); 946 if ((vp->v_iflag & VI_MOUNT) != 0 || 947 vp->v_mountedhere != NULL) { 948 VI_UNLOCK(vp); 949 vput(vp); 950 return (EBUSY); 951 } 952 vp->v_iflag |= VI_MOUNT; 953 VI_UNLOCK(vp); 954 955 /* 956 * Allocate and initialize the filesystem. 957 */ 958 mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); 959 VOP_UNLOCK(vp, 0); 960 961 /* XXXMAC: pass to vfs_mount_alloc? */ 962 mp->mnt_optnew = fsdata; 963 } 964 965 /* 966 * Set the mount level flags. 967 */ 968 MNT_ILOCK(mp); 969 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) | 970 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS | 971 MNT_RDONLY)); 972 if ((mp->mnt_flag & MNT_ASYNC) == 0) 973 mp->mnt_kern_flag &= ~MNTK_ASYNC; 974 MNT_IUNLOCK(mp); 975 /* 976 * Mount the filesystem. 977 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 978 * get. No freeing of cn_pnbuf. 979 */ 980 error = VFS_MOUNT(mp, td); 981 982 /* 983 * Process the export option only if we are 984 * updating mount options. 985 */ 986 if (!error && (fsflags & MNT_UPDATE)) { 987 if (vfs_copyopt(mp->mnt_optnew, "export", &export, 988 sizeof(export)) == 0) 989 error = vfs_export(mp, &export); 990 else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport, 991 sizeof(oexport)) == 0) { 992 export.ex_flags = oexport.ex_flags; 993 export.ex_root = oexport.ex_root; 994 export.ex_anon = oexport.ex_anon; 995 export.ex_addr = oexport.ex_addr; 996 export.ex_addrlen = oexport.ex_addrlen; 997 export.ex_mask = oexport.ex_mask; 998 export.ex_masklen = oexport.ex_masklen; 999 export.ex_indexfile = oexport.ex_indexfile; 1000 export.ex_numsecflavors = 0; 1001 error = vfs_export(mp, &export); 1002 } 1003 } 1004 1005 if (!error) { 1006 if (mp->mnt_opt != NULL) 1007 vfs_freeopts(mp->mnt_opt); 1008 mp->mnt_opt = mp->mnt_optnew; 1009 (void)VFS_STATFS(mp, &mp->mnt_stat, td); 1010 } 1011 /* 1012 * Prevent external consumers of mount options from reading 1013 * mnt_optnew. 1014 */ 1015 mp->mnt_optnew = NULL; 1016 if (mp->mnt_flag & MNT_UPDATE) { 1017 MNT_ILOCK(mp); 1018 if (error) 1019 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | 1020 (flag & ~MNT_QUOTA); 1021 else 1022 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | 1023 MNT_FORCE | MNT_SNAPSHOT); 1024 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1025 mp->mnt_kern_flag |= MNTK_ASYNC; 1026 else 1027 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1028 MNT_IUNLOCK(mp); 1029 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1030 if (mp->mnt_syncer == NULL) 1031 error = vfs_allocate_syncvnode(mp); 1032 } else { 1033 if (mp->mnt_syncer != NULL) 1034 vrele(mp->mnt_syncer); 1035 mp->mnt_syncer = NULL; 1036 } 1037 vfs_unbusy(mp); 1038 VI_LOCK(vp); 1039 vp->v_iflag &= ~VI_MOUNT; 1040 VI_UNLOCK(vp); 1041 vrele(vp); 1042 return (error); 1043 } 1044 MNT_ILOCK(mp); 1045 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1046 mp->mnt_kern_flag |= MNTK_ASYNC; 1047 else 1048 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1049 MNT_IUNLOCK(mp); 1050 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1051 /* 1052 * Put the new filesystem on the mount list after root. 1053 */ 1054 cache_purge(vp); 1055 if (!error) { 1056 struct vnode *newdp; 1057 1058 VI_LOCK(vp); 1059 vp->v_iflag &= ~VI_MOUNT; 1060 VI_UNLOCK(vp); 1061 vp->v_mountedhere = mp; 1062 mtx_lock(&mountlist_mtx); 1063 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1064 mtx_unlock(&mountlist_mtx); 1065 vfs_event_signal(NULL, VQ_MOUNT, 0); 1066 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td)) 1067 panic("mount: lost mount"); 1068 mountcheckdirs(vp, newdp); 1069 vput(newdp); 1070 VOP_UNLOCK(vp, 0); 1071 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1072 error = vfs_allocate_syncvnode(mp); 1073 vfs_unbusy(mp); 1074 if (error) 1075 vrele(vp); 1076 } else { 1077 VI_LOCK(vp); 1078 vp->v_iflag &= ~VI_MOUNT; 1079 VI_UNLOCK(vp); 1080 vfs_unbusy(mp); 1081 vfs_mount_destroy(mp); 1082 vput(vp); 1083 } 1084 return (error); 1085} 1086 1087/* 1088 * Unmount a filesystem. 1089 * 1090 * Note: unmount takes a path to the vnode mounted on as argument, not 1091 * special file (as before). 1092 */ 1093#ifndef _SYS_SYSPROTO_H_ 1094struct unmount_args { 1095 char *path; 1096 int flags; 1097}; 1098#endif 1099/* ARGSUSED */ 1100int 1101unmount(td, uap) 1102 struct thread *td; 1103 register struct unmount_args /* { 1104 char *path; 1105 int flags; 1106 } */ *uap; 1107{ 1108 struct mount *mp; 1109 char *pathbuf; 1110 int error, id0, id1; 1111 1112 if (jailed(td->td_ucred) || usermount == 0) { 1113 error = priv_check(td, PRIV_VFS_UNMOUNT); 1114 if (error) 1115 return (error); 1116 } 1117 1118 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1119 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 1120 if (error) { 1121 free(pathbuf, M_TEMP); 1122 return (error); 1123 } 1124 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1); 1125 mtx_lock(&Giant); 1126 if (uap->flags & MNT_BYFSID) { 1127 /* Decode the filesystem ID. */ 1128 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 1129 mtx_unlock(&Giant); 1130 free(pathbuf, M_TEMP); 1131 return (EINVAL); 1132 } 1133 1134 mtx_lock(&mountlist_mtx); 1135 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1136 if (mp->mnt_stat.f_fsid.val[0] == id0 && 1137 mp->mnt_stat.f_fsid.val[1] == id1) 1138 break; 1139 } 1140 mtx_unlock(&mountlist_mtx); 1141 } else { 1142 mtx_lock(&mountlist_mtx); 1143 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1144 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 1145 break; 1146 } 1147 mtx_unlock(&mountlist_mtx); 1148 } 1149 free(pathbuf, M_TEMP); 1150 if (mp == NULL) { 1151 /* 1152 * Previously we returned ENOENT for a nonexistent path and 1153 * EINVAL for a non-mountpoint. We cannot tell these apart 1154 * now, so in the !MNT_BYFSID case return the more likely 1155 * EINVAL for compatibility. 1156 */ 1157 mtx_unlock(&Giant); 1158 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 1159 } 1160 1161 /* 1162 * Don't allow unmounting the root filesystem. 1163 */ 1164 if (mp->mnt_flag & MNT_ROOTFS) { 1165 mtx_unlock(&Giant); 1166 return (EINVAL); 1167 } 1168 error = dounmount(mp, uap->flags, td); 1169 mtx_unlock(&Giant); 1170 return (error); 1171} 1172 1173/* 1174 * Do the actual filesystem unmount. 1175 */ 1176int 1177dounmount(mp, flags, td) 1178 struct mount *mp; 1179 int flags; 1180 struct thread *td; 1181{ 1182 struct vnode *coveredvp, *fsrootvp; 1183 int error; 1184 int async_flag; 1185 int mnt_gen_r; 1186 1187 mtx_assert(&Giant, MA_OWNED); 1188 1189 if ((coveredvp = mp->mnt_vnodecovered) != NULL) { 1190 mnt_gen_r = mp->mnt_gen; 1191 VI_LOCK(coveredvp); 1192 vholdl(coveredvp); 1193 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); 1194 vdrop(coveredvp); 1195 /* 1196 * Check for mp being unmounted while waiting for the 1197 * covered vnode lock. 1198 */ 1199 if (coveredvp->v_mountedhere != mp || 1200 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { 1201 VOP_UNLOCK(coveredvp, 0); 1202 return (EBUSY); 1203 } 1204 } 1205 /* 1206 * Only privileged root, or (if MNT_USER is set) the user that did the 1207 * original mount is permitted to unmount this filesystem. 1208 */ 1209 error = vfs_suser(mp, td); 1210 if (error) { 1211 if (coveredvp) 1212 VOP_UNLOCK(coveredvp, 0); 1213 return (error); 1214 } 1215 1216 MNT_ILOCK(mp); 1217 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1218 MNT_IUNLOCK(mp); 1219 if (coveredvp) 1220 VOP_UNLOCK(coveredvp, 0); 1221 return (EBUSY); 1222 } 1223 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; 1224 /* Allow filesystems to detect that a forced unmount is in progress. */ 1225 if (flags & MNT_FORCE) 1226 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1227 error = 0; 1228 if (mp->mnt_lockref) { 1229 if ((flags & MNT_FORCE) == 0) { 1230 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | 1231 MNTK_UNMOUNTF); 1232 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1233 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1234 wakeup(mp); 1235 } 1236 MNT_IUNLOCK(mp); 1237 if (coveredvp) 1238 VOP_UNLOCK(coveredvp, 0); 1239 return (EBUSY); 1240 } 1241 mp->mnt_kern_flag |= MNTK_DRAINING; 1242 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, 1243 "mount drain", 0); 1244 } 1245 MNT_IUNLOCK(mp); 1246 KASSERT(mp->mnt_lockref == 0, 1247 ("%s: invalid lock refcount in the drain path @ %s:%d", 1248 __func__, __FILE__, __LINE__)); 1249 KASSERT(error == 0, 1250 ("%s: invalid return value for msleep in the drain path @ %s:%d", 1251 __func__, __FILE__, __LINE__)); 1252 vn_start_write(NULL, &mp, V_WAIT); 1253 1254 if (mp->mnt_flag & MNT_EXPUBLIC) 1255 vfs_setpublicfs(NULL, NULL, NULL); 1256 1257 vfs_msync(mp, MNT_WAIT); 1258 MNT_ILOCK(mp); 1259 async_flag = mp->mnt_flag & MNT_ASYNC; 1260 mp->mnt_flag &= ~MNT_ASYNC; 1261 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1262 MNT_IUNLOCK(mp); 1263 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1264 if (mp->mnt_syncer != NULL) 1265 vrele(mp->mnt_syncer); 1266 /* 1267 * For forced unmounts, move process cdir/rdir refs on the fs root 1268 * vnode to the covered vnode. For non-forced unmounts we want 1269 * such references to cause an EBUSY error. 1270 */ 1271 if ((flags & MNT_FORCE) && 1272 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1273 if (mp->mnt_vnodecovered != NULL) 1274 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1275 if (fsrootvp == rootvnode) { 1276 vrele(rootvnode); 1277 rootvnode = NULL; 1278 } 1279 vput(fsrootvp); 1280 } 1281 if (((mp->mnt_flag & MNT_RDONLY) || 1282 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 1283 (flags & MNT_FORCE)) { 1284 error = VFS_UNMOUNT(mp, flags, td); 1285 } 1286 vn_finished_write(mp); 1287 /* 1288 * If we failed to flush the dirty blocks for this mount point, 1289 * undo all the cdir/rdir and rootvnode changes we made above. 1290 * Unless we failed to do so because the device is reporting that 1291 * it doesn't exist anymore. 1292 */ 1293 if (error && error != ENXIO) { 1294 if ((flags & MNT_FORCE) && 1295 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1296 if (mp->mnt_vnodecovered != NULL) 1297 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1298 if (rootvnode == NULL) { 1299 rootvnode = fsrootvp; 1300 vref(rootvnode); 1301 } 1302 vput(fsrootvp); 1303 } 1304 MNT_ILOCK(mp); 1305 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; 1306 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) { 1307 MNT_IUNLOCK(mp); 1308 (void) vfs_allocate_syncvnode(mp); 1309 MNT_ILOCK(mp); 1310 } 1311 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1312 mp->mnt_flag |= async_flag; 1313 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1314 mp->mnt_kern_flag |= MNTK_ASYNC; 1315 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1316 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1317 wakeup(mp); 1318 } 1319 MNT_IUNLOCK(mp); 1320 if (coveredvp) 1321 VOP_UNLOCK(coveredvp, 0); 1322 return (error); 1323 } 1324 mtx_lock(&mountlist_mtx); 1325 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1326 mtx_unlock(&mountlist_mtx); 1327 if (coveredvp != NULL) { 1328 coveredvp->v_mountedhere = NULL; 1329 vput(coveredvp); 1330 } 1331 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1332 vfs_mount_destroy(mp); 1333 return (0); 1334} 1335 1336/* 1337 * --------------------------------------------------------------------- 1338 * Mounting of root filesystem 1339 * 1340 */ 1341 1342struct root_hold_token { 1343 const char *who; 1344 LIST_ENTRY(root_hold_token) list; 1345}; 1346 1347static LIST_HEAD(, root_hold_token) root_holds = 1348 LIST_HEAD_INITIALIZER(&root_holds); 1349 1350static int root_mount_complete; 1351 1352/* 1353 * Hold root mount. 1354 */ 1355struct root_hold_token * 1356root_mount_hold(const char *identifier) 1357{ 1358 struct root_hold_token *h; 1359 1360 if (root_mounted()) 1361 return (NULL); 1362 1363 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 1364 h->who = identifier; 1365 mtx_lock(&mountlist_mtx); 1366 LIST_INSERT_HEAD(&root_holds, h, list); 1367 mtx_unlock(&mountlist_mtx); 1368 return (h); 1369} 1370 1371/* 1372 * Release root mount. 1373 */ 1374void 1375root_mount_rel(struct root_hold_token *h) 1376{ 1377 1378 if (h == NULL) 1379 return; 1380 mtx_lock(&mountlist_mtx); 1381 LIST_REMOVE(h, list); 1382 wakeup(&root_holds); 1383 mtx_unlock(&mountlist_mtx); 1384 free(h, M_DEVBUF); 1385} 1386 1387/* 1388 * Wait for all subsystems to release root mount. 1389 */ 1390static void 1391root_mount_prepare(void) 1392{ 1393 struct root_hold_token *h; 1394 struct timeval lastfail; 1395 int curfail = 0; 1396 1397 for (;;) { 1398 DROP_GIANT(); 1399 g_waitidle(); 1400 PICKUP_GIANT(); 1401 mtx_lock(&mountlist_mtx); 1402 if (LIST_EMPTY(&root_holds)) { 1403 mtx_unlock(&mountlist_mtx); 1404 break; 1405 } 1406 if (ppsratecheck(&lastfail, &curfail, 1)) { 1407 printf("Root mount waiting for:"); 1408 LIST_FOREACH(h, &root_holds, list) 1409 printf(" %s", h->who); 1410 printf("\n"); 1411 } 1412 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 1413 hz); 1414 } 1415} 1416 1417/* 1418 * Root was mounted, share the good news. 1419 */ 1420static void 1421root_mount_done(void) 1422{ 1423 1424 /* 1425 * Use a mutex to prevent the wakeup being missed and waiting for 1426 * an extra 1 second sleep. 1427 */ 1428 mtx_lock(&mountlist_mtx); 1429 root_mount_complete = 1; 1430 wakeup(&root_mount_complete); 1431 mtx_unlock(&mountlist_mtx); 1432} 1433 1434/* 1435 * Return true if root is already mounted. 1436 */ 1437int 1438root_mounted(void) 1439{ 1440 1441 /* No mutex is acquired here because int stores are atomic. */ 1442 return (root_mount_complete); 1443} 1444 1445/* 1446 * Wait until root is mounted. 1447 */ 1448void 1449root_mount_wait(void) 1450{ 1451 1452 /* 1453 * Panic on an obvious deadlock - the function can't be called from 1454 * a thread which is doing the whole SYSINIT stuff. 1455 */ 1456 KASSERT(curthread->td_proc->p_pid != 0, 1457 ("root_mount_wait: cannot be called from the swapper thread")); 1458 mtx_lock(&mountlist_mtx); 1459 while (!root_mount_complete) { 1460 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait", 1461 hz); 1462 } 1463 mtx_unlock(&mountlist_mtx); 1464} 1465 1466static void 1467set_rootvnode(struct thread *td) 1468{ 1469 struct proc *p; 1470 1471 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td)) 1472 panic("Cannot find root vnode"); 1473 1474 p = td->td_proc; 1475 FILEDESC_XLOCK(p->p_fd); 1476 1477 if (p->p_fd->fd_cdir != NULL) 1478 vrele(p->p_fd->fd_cdir); 1479 p->p_fd->fd_cdir = rootvnode; 1480 VREF(rootvnode); 1481 1482 if (p->p_fd->fd_rdir != NULL) 1483 vrele(p->p_fd->fd_rdir); 1484 p->p_fd->fd_rdir = rootvnode; 1485 VREF(rootvnode); 1486 1487 FILEDESC_XUNLOCK(p->p_fd); 1488 1489 VOP_UNLOCK(rootvnode, 0); 1490 1491 EVENTHANDLER_INVOKE(mountroot); 1492} 1493 1494/* 1495 * Mount /devfs as our root filesystem, but do not put it on the mountlist 1496 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup. 1497 */ 1498 1499static void 1500devfs_first(void) 1501{ 1502 struct thread *td = curthread; 1503 struct vfsoptlist *opts; 1504 struct vfsconf *vfsp; 1505 struct mount *mp = NULL; 1506 int error; 1507 1508 vfsp = vfs_byname("devfs"); 1509 KASSERT(vfsp != NULL, ("Could not find devfs by name")); 1510 if (vfsp == NULL) 1511 return; 1512 1513 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); 1514 1515 error = VFS_MOUNT(mp, td); 1516 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 1517 if (error) 1518 return; 1519 1520 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 1521 TAILQ_INIT(opts); 1522 mp->mnt_opt = opts; 1523 1524 mtx_lock(&mountlist_mtx); 1525 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 1526 mtx_unlock(&mountlist_mtx); 1527 1528 set_rootvnode(td); 1529 1530 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 1531 if (error) 1532 printf("kern_symlink /dev -> / returns %d\n", error); 1533} 1534 1535/* 1536 * Surgically move our devfs to be mounted on /dev. 1537 */ 1538 1539static void 1540devfs_fixup(struct thread *td) 1541{ 1542 struct nameidata nd; 1543 int error; 1544 struct vnode *vp, *dvp; 1545 struct mount *mp; 1546 1547 /* Remove our devfs mount from the mountlist and purge the cache */ 1548 mtx_lock(&mountlist_mtx); 1549 mp = TAILQ_FIRST(&mountlist); 1550 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1551 mtx_unlock(&mountlist_mtx); 1552 cache_purgevfs(mp); 1553 1554 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); 1555 VI_LOCK(dvp); 1556 dvp->v_iflag &= ~VI_MOUNT; 1557 VI_UNLOCK(dvp); 1558 dvp->v_mountedhere = NULL; 1559 1560 /* Set up the real rootvnode, and purge the cache */ 1561 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 1562 set_rootvnode(td); 1563 cache_purgevfs(rootvnode->v_mount); 1564 1565 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 1566 error = namei(&nd); 1567 if (error) { 1568 printf("Lookup of /dev for devfs, error: %d\n", error); 1569 return; 1570 } 1571 NDFREE(&nd, NDF_ONLY_PNBUF); 1572 vp = nd.ni_vp; 1573 if (vp->v_type != VDIR) { 1574 vput(vp); 1575 } 1576 error = vinvalbuf(vp, V_SAVE, 0, 0); 1577 if (error) { 1578 vput(vp); 1579 } 1580 cache_purge(vp); 1581 mp->mnt_vnodecovered = vp; 1582 vp->v_mountedhere = mp; 1583 mtx_lock(&mountlist_mtx); 1584 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1585 mtx_unlock(&mountlist_mtx); 1586 VOP_UNLOCK(vp, 0); 1587 vput(dvp); 1588 vfs_unbusy(mp); 1589 1590 /* Unlink the no longer needed /dev/dev -> / symlink */ 1591 kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 1592} 1593 1594/* 1595 * Report errors during filesystem mounting. 1596 */ 1597void 1598vfs_mount_error(struct mount *mp, const char *fmt, ...) 1599{ 1600 struct vfsoptlist *moptlist = mp->mnt_optnew; 1601 va_list ap; 1602 int error, len; 1603 char *errmsg; 1604 1605 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1606 if (error || errmsg == NULL || len <= 0) 1607 return; 1608 1609 va_start(ap, fmt); 1610 vsnprintf(errmsg, (size_t)len, fmt, ap); 1611 va_end(ap); 1612} 1613 1614void 1615vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...) 1616{ 1617 va_list ap; 1618 int error, len; 1619 char *errmsg; 1620 1621 error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len); 1622 if (error || errmsg == NULL || len <= 0) 1623 return; 1624 1625 va_start(ap, fmt); 1626 vsnprintf(errmsg, (size_t)len, fmt, ap); 1627 va_end(ap); 1628} 1629 1630/* 1631 * Find and mount the root filesystem 1632 */ 1633void 1634vfs_mountroot(void) 1635{ 1636 char *cp; 1637 int error, i, asked = 0; 1638 1639 root_mount_prepare(); 1640 1641 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), 1642 NULL, NULL, mount_init, mount_fini, 1643 UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1644 devfs_first(); 1645 1646 /* 1647 * We are booted with instructions to prompt for the root filesystem. 1648 */ 1649 if (boothowto & RB_ASKNAME) { 1650 if (!vfs_mountroot_ask()) 1651 goto mounted; 1652 asked = 1; 1653 } 1654 1655 /* 1656 * The root filesystem information is compiled in, and we are 1657 * booted with instructions to use it. 1658 */ 1659 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 1660 if (!vfs_mountroot_try(ctrootdevname)) 1661 goto mounted; 1662 ctrootdevname = NULL; 1663 } 1664 1665 /* 1666 * We've been given the generic "use CDROM as root" flag. This is 1667 * necessary because one media may be used in many different 1668 * devices, so we need to search for them. 1669 */ 1670 if (boothowto & RB_CDROM) { 1671 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 1672 if (!vfs_mountroot_try(cdrom_rootdevnames[i])) 1673 goto mounted; 1674 } 1675 } 1676 1677 /* 1678 * Try to use the value read by the loader from /etc/fstab, or 1679 * supplied via some other means. This is the preferred 1680 * mechanism. 1681 */ 1682 cp = getenv("vfs.root.mountfrom"); 1683 if (cp != NULL) { 1684 error = vfs_mountroot_try(cp); 1685 freeenv(cp); 1686 if (!error) 1687 goto mounted; 1688 } 1689 1690 /* 1691 * Try values that may have been computed by code during boot 1692 */ 1693 if (!vfs_mountroot_try(rootdevnames[0])) 1694 goto mounted; 1695 if (!vfs_mountroot_try(rootdevnames[1])) 1696 goto mounted; 1697 1698 /* 1699 * If we (still) have a compiled-in default, try it. 1700 */ 1701 if (ctrootdevname != NULL) 1702 if (!vfs_mountroot_try(ctrootdevname)) 1703 goto mounted; 1704 /* 1705 * Everything so far has failed, prompt on the console if we haven't 1706 * already tried that. 1707 */ 1708 if (!asked) 1709 if (!vfs_mountroot_ask()) 1710 goto mounted; 1711 1712 panic("Root mount failed, startup aborted."); 1713 1714mounted: 1715 root_mount_done(); 1716} 1717 1718/* 1719 * Mount (mountfrom) as the root filesystem. 1720 */ 1721static int 1722vfs_mountroot_try(const char *mountfrom) 1723{ 1724 struct mount *mp; 1725 char *vfsname, *path; 1726 time_t timebase; 1727 int error; 1728 char patt[32]; 1729 1730 vfsname = NULL; 1731 path = NULL; 1732 mp = NULL; 1733 error = EINVAL; 1734 1735 if (mountfrom == NULL) 1736 return (error); /* don't complain */ 1737 printf("Trying to mount root from %s\n", mountfrom); 1738 1739 /* parse vfs name and path */ 1740 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 1741 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 1742 vfsname[0] = path[0] = 0; 1743 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 1744 if (sscanf(mountfrom, patt, vfsname, path) < 1) 1745 goto out; 1746 1747 if (path[0] == '\0') 1748 strcpy(path, ROOTNAME); 1749 1750 error = kernel_vmount( 1751 MNT_RDONLY | MNT_ROOTFS, 1752 "fstype", vfsname, 1753 "fspath", "/", 1754 "from", path, 1755 NULL); 1756 if (error == 0) { 1757 /* 1758 * We mount devfs prior to mounting the / FS, so the first 1759 * entry will typically be devfs. 1760 */ 1761 mp = TAILQ_FIRST(&mountlist); 1762 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 1763 1764 /* 1765 * Iterate over all currently mounted file systems and use 1766 * the time stamp found to check and/or initialize the RTC. 1767 * Typically devfs has no time stamp and the only other FS 1768 * is the actual / FS. 1769 * Call inittodr() only once and pass it the largest of the 1770 * timestamps we encounter. 1771 */ 1772 timebase = 0; 1773 do { 1774 if (mp->mnt_time > timebase) 1775 timebase = mp->mnt_time; 1776 mp = TAILQ_NEXT(mp, mnt_list); 1777 } while (mp != NULL); 1778 inittodr(timebase); 1779 1780 devfs_fixup(curthread); 1781 } 1782out: 1783 free(path, M_MOUNT); 1784 free(vfsname, M_MOUNT); 1785 return (error); 1786} 1787 1788/* 1789 * --------------------------------------------------------------------- 1790 * Interactive root filesystem selection code. 1791 */ 1792 1793static int 1794vfs_mountroot_ask(void) 1795{ 1796 char name[128]; 1797 1798 for(;;) { 1799 printf("\nManual root filesystem specification:\n"); 1800 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 1801#if defined(__amd64__) || defined(__i386__) || defined(__ia64__) 1802 printf(" eg. ufs:da0s1a\n"); 1803#else 1804 printf(" eg. ufs:/dev/da0a\n"); 1805#endif 1806 printf(" ? List valid disk boot devices\n"); 1807 printf(" <empty line> Abort manual input\n"); 1808 printf("\nmountroot> "); 1809 gets(name, sizeof(name), 1); 1810 if (name[0] == '\0') 1811 return (1); 1812 if (name[0] == '?') { 1813 printf("\nList of GEOM managed disk devices:\n "); 1814 g_dev_print(); 1815 continue; 1816 } 1817 if (!vfs_mountroot_try(name)) 1818 return (0); 1819 } 1820} 1821 1822/* 1823 * --------------------------------------------------------------------- 1824 * Functions for querying mount options/arguments from filesystems. 1825 */ 1826 1827/* 1828 * Check that no unknown options are given 1829 */ 1830int 1831vfs_filteropt(struct vfsoptlist *opts, const char **legal) 1832{ 1833 struct vfsopt *opt; 1834 char errmsg[255]; 1835 const char **t, *p, *q; 1836 int ret = 0; 1837 1838 TAILQ_FOREACH(opt, opts, link) { 1839 p = opt->name; 1840 q = NULL; 1841 if (p[0] == 'n' && p[1] == 'o') 1842 q = p + 2; 1843 for(t = global_opts; *t != NULL; t++) { 1844 if (strcmp(*t, p) == 0) 1845 break; 1846 if (q != NULL) { 1847 if (strcmp(*t, q) == 0) 1848 break; 1849 } 1850 } 1851 if (*t != NULL) 1852 continue; 1853 for(t = legal; *t != NULL; t++) { 1854 if (strcmp(*t, p) == 0) 1855 break; 1856 if (q != NULL) { 1857 if (strcmp(*t, q) == 0) 1858 break; 1859 } 1860 } 1861 if (*t != NULL) 1862 continue; 1863 snprintf(errmsg, sizeof(errmsg), 1864 "mount option <%s> is unknown", p); 1865 printf("%s\n", errmsg); 1866 ret = EINVAL; 1867 } 1868 if (ret != 0) { 1869 TAILQ_FOREACH(opt, opts, link) { 1870 if (strcmp(opt->name, "errmsg") == 0) { 1871 strncpy((char *)opt->value, errmsg, opt->len); 1872 } 1873 } 1874 } 1875 return (ret); 1876} 1877 1878/* 1879 * Get a mount option by its name. 1880 * 1881 * Return 0 if the option was found, ENOENT otherwise. 1882 * If len is non-NULL it will be filled with the length 1883 * of the option. If buf is non-NULL, it will be filled 1884 * with the address of the option. 1885 */ 1886int 1887vfs_getopt(opts, name, buf, len) 1888 struct vfsoptlist *opts; 1889 const char *name; 1890 void **buf; 1891 int *len; 1892{ 1893 struct vfsopt *opt; 1894 1895 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1896 1897 TAILQ_FOREACH(opt, opts, link) { 1898 if (strcmp(name, opt->name) == 0) { 1899 opt->seen = 1; 1900 if (len != NULL) 1901 *len = opt->len; 1902 if (buf != NULL) 1903 *buf = opt->value; 1904 return (0); 1905 } 1906 } 1907 return (ENOENT); 1908} 1909 1910int 1911vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1912{ 1913 struct vfsopt *opt; 1914 1915 if (opts == NULL) 1916 return (-1); 1917 1918 TAILQ_FOREACH(opt, opts, link) { 1919 if (strcmp(name, opt->name) == 0) { 1920 opt->seen = 1; 1921 return (opt->pos); 1922 } 1923 } 1924 return (-1); 1925} 1926 1927char * 1928vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1929{ 1930 struct vfsopt *opt; 1931 1932 *error = 0; 1933 TAILQ_FOREACH(opt, opts, link) { 1934 if (strcmp(name, opt->name) != 0) 1935 continue; 1936 opt->seen = 1; 1937 if (opt->len == 0 || 1938 ((char *)opt->value)[opt->len - 1] != '\0') { 1939 *error = EINVAL; 1940 return (NULL); 1941 } 1942 return (opt->value); 1943 } 1944 *error = ENOENT; 1945 return (NULL); 1946} 1947 1948int 1949vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) 1950{ 1951 struct vfsopt *opt; 1952 1953 TAILQ_FOREACH(opt, opts, link) { 1954 if (strcmp(name, opt->name) == 0) { 1955 opt->seen = 1; 1956 if (w != NULL) 1957 *w |= val; 1958 return (1); 1959 } 1960 } 1961 if (w != NULL) 1962 *w &= ~val; 1963 return (0); 1964} 1965 1966int 1967vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1968{ 1969 va_list ap; 1970 struct vfsopt *opt; 1971 int ret; 1972 1973 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1974 1975 TAILQ_FOREACH(opt, opts, link) { 1976 if (strcmp(name, opt->name) != 0) 1977 continue; 1978 opt->seen = 1; 1979 if (opt->len == 0 || opt->value == NULL) 1980 return (0); 1981 if (((char *)opt->value)[opt->len - 1] != '\0') 1982 return (0); 1983 va_start(ap, fmt); 1984 ret = vsscanf(opt->value, fmt, ap); 1985 va_end(ap); 1986 return (ret); 1987 } 1988 return (0); 1989} 1990 1991int 1992vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len) 1993{ 1994 struct vfsopt *opt; 1995 1996 TAILQ_FOREACH(opt, opts, link) { 1997 if (strcmp(name, opt->name) != 0) 1998 continue; 1999 opt->seen = 1; 2000 if (opt->value == NULL) 2001 opt->len = len; 2002 else { 2003 if (opt->len != len) 2004 return (EINVAL); 2005 bcopy(value, opt->value, len); 2006 } 2007 return (0); 2008 } 2009 return (ENOENT); 2010} 2011 2012int 2013vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len) 2014{ 2015 struct vfsopt *opt; 2016 2017 TAILQ_FOREACH(opt, opts, link) { 2018 if (strcmp(name, opt->name) != 0) 2019 continue; 2020 opt->seen = 1; 2021 if (opt->value == NULL) 2022 opt->len = len; 2023 else { 2024 if (opt->len < len) 2025 return (EINVAL); 2026 opt->len = len; 2027 bcopy(value, opt->value, len); 2028 } 2029 return (0); 2030 } 2031 return (ENOENT); 2032} 2033 2034int 2035vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value) 2036{ 2037 struct vfsopt *opt; 2038 2039 TAILQ_FOREACH(opt, opts, link) { 2040 if (strcmp(name, opt->name) != 0) 2041 continue; 2042 opt->seen = 1; 2043 if (opt->value == NULL) 2044 opt->len = strlen(value) + 1; 2045 else if (strlcpy(opt->value, value, opt->len) >= opt->len) 2046 return (EINVAL); 2047 return (0); 2048 } 2049 return (ENOENT); 2050} 2051 2052/* 2053 * Find and copy a mount option. 2054 * 2055 * The size of the buffer has to be specified 2056 * in len, if it is not the same length as the 2057 * mount option, EINVAL is returned. 2058 * Returns ENOENT if the option is not found. 2059 */ 2060int 2061vfs_copyopt(opts, name, dest, len) 2062 struct vfsoptlist *opts; 2063 const char *name; 2064 void *dest; 2065 int len; 2066{ 2067 struct vfsopt *opt; 2068 2069 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 2070 2071 TAILQ_FOREACH(opt, opts, link) { 2072 if (strcmp(name, opt->name) == 0) { 2073 opt->seen = 1; 2074 if (len != opt->len) 2075 return (EINVAL); 2076 bcopy(opt->value, dest, opt->len); 2077 return (0); 2078 } 2079 } 2080 return (ENOENT); 2081} 2082 2083/* 2084 * This is a helper function for filesystems to traverse their 2085 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 2086 */ 2087 2088struct vnode * 2089__mnt_vnode_next(struct vnode **mvp, struct mount *mp) 2090{ 2091 struct vnode *vp; 2092 2093 mtx_assert(MNT_MTX(mp), MA_OWNED); 2094 2095 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2096 if ((*mvp)->v_yield++ == 500) { 2097 MNT_IUNLOCK(mp); 2098 (*mvp)->v_yield = 0; 2099 uio_yield(); 2100 MNT_ILOCK(mp); 2101 } 2102 vp = TAILQ_NEXT(*mvp, v_nmntvnodes); 2103 while (vp != NULL && vp->v_type == VMARKER) 2104 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2105 2106 /* Check if we are done */ 2107 if (vp == NULL) { 2108 __mnt_vnode_markerfree(mvp, mp); 2109 return (NULL); 2110 } 2111 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2112 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2113 return (vp); 2114} 2115 2116struct vnode * 2117__mnt_vnode_first(struct vnode **mvp, struct mount *mp) 2118{ 2119 struct vnode *vp; 2120 2121 mtx_assert(MNT_MTX(mp), MA_OWNED); 2122 2123 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2124 while (vp != NULL && vp->v_type == VMARKER) 2125 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2126 2127 /* Check if we are done */ 2128 if (vp == NULL) { 2129 *mvp = NULL; 2130 return (NULL); 2131 } 2132 MNT_REF(mp); 2133 MNT_IUNLOCK(mp); 2134 *mvp = (struct vnode *) malloc(sizeof(struct vnode), 2135 M_VNODE_MARKER, 2136 M_WAITOK | M_ZERO); 2137 MNT_ILOCK(mp); 2138 (*mvp)->v_type = VMARKER; 2139 2140 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2141 while (vp != NULL && vp->v_type == VMARKER) 2142 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2143 2144 /* Check if we are done */ 2145 if (vp == NULL) { 2146 MNT_IUNLOCK(mp); 2147 free(*mvp, M_VNODE_MARKER); 2148 MNT_ILOCK(mp); 2149 *mvp = NULL; 2150 MNT_REL(mp); 2151 return (NULL); 2152 } 2153 (*mvp)->v_mount = mp; 2154 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2155 return (vp); 2156} 2157 2158 2159void 2160__mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp) 2161{ 2162 2163 if (*mvp == NULL) 2164 return; 2165 2166 mtx_assert(MNT_MTX(mp), MA_OWNED); 2167 2168 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2169 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2170 MNT_IUNLOCK(mp); 2171 free(*mvp, M_VNODE_MARKER); 2172 MNT_ILOCK(mp); 2173 *mvp = NULL; 2174 MNT_REL(mp); 2175} 2176 2177 2178int 2179__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 2180{ 2181 int error; 2182 2183 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td); 2184 if (sbp != &mp->mnt_stat) 2185 *sbp = mp->mnt_stat; 2186 return (error); 2187} 2188 2189void 2190vfs_mountedfrom(struct mount *mp, const char *from) 2191{ 2192 2193 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 2194 strlcpy(mp->mnt_stat.f_mntfromname, from, 2195 sizeof mp->mnt_stat.f_mntfromname); 2196} 2197 2198/* 2199 * --------------------------------------------------------------------- 2200 * This is the api for building mount args and mounting filesystems from 2201 * inside the kernel. 2202 * 2203 * The API works by accumulation of individual args. First error is 2204 * latched. 2205 * 2206 * XXX: should be documented in new manpage kernel_mount(9) 2207 */ 2208 2209/* A memory allocation which must be freed when we are done */ 2210struct mntaarg { 2211 SLIST_ENTRY(mntaarg) next; 2212}; 2213 2214/* The header for the mount arguments */ 2215struct mntarg { 2216 struct iovec *v; 2217 int len; 2218 int error; 2219 SLIST_HEAD(, mntaarg) list; 2220}; 2221 2222/* 2223 * Add a boolean argument. 2224 * 2225 * flag is the boolean value. 2226 * name must start with "no". 2227 */ 2228struct mntarg * 2229mount_argb(struct mntarg *ma, int flag, const char *name) 2230{ 2231 2232 KASSERT(name[0] == 'n' && name[1] == 'o', 2233 ("mount_argb(...,%s): name must start with 'no'", name)); 2234 2235 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 2236} 2237 2238/* 2239 * Add an argument printf style 2240 */ 2241struct mntarg * 2242mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 2243{ 2244 va_list ap; 2245 struct mntaarg *maa; 2246 struct sbuf *sb; 2247 int len; 2248 2249 if (ma == NULL) { 2250 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2251 SLIST_INIT(&ma->list); 2252 } 2253 if (ma->error) 2254 return (ma); 2255 2256 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2257 M_MOUNT, M_WAITOK); 2258 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2259 ma->v[ma->len].iov_len = strlen(name) + 1; 2260 ma->len++; 2261 2262 sb = sbuf_new_auto(); 2263 va_start(ap, fmt); 2264 sbuf_vprintf(sb, fmt, ap); 2265 va_end(ap); 2266 sbuf_finish(sb); 2267 len = sbuf_len(sb) + 1; 2268 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2269 SLIST_INSERT_HEAD(&ma->list, maa, next); 2270 bcopy(sbuf_data(sb), maa + 1, len); 2271 sbuf_delete(sb); 2272 2273 ma->v[ma->len].iov_base = maa + 1; 2274 ma->v[ma->len].iov_len = len; 2275 ma->len++; 2276 2277 return (ma); 2278} 2279 2280/* 2281 * Add an argument which is a userland string. 2282 */ 2283struct mntarg * 2284mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 2285{ 2286 struct mntaarg *maa; 2287 char *tbuf; 2288 2289 if (val == NULL) 2290 return (ma); 2291 if (ma == NULL) { 2292 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2293 SLIST_INIT(&ma->list); 2294 } 2295 if (ma->error) 2296 return (ma); 2297 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2298 SLIST_INSERT_HEAD(&ma->list, maa, next); 2299 tbuf = (void *)(maa + 1); 2300 ma->error = copyinstr(val, tbuf, len, NULL); 2301 return (mount_arg(ma, name, tbuf, -1)); 2302} 2303 2304/* 2305 * Plain argument. 2306 * 2307 * If length is -1, treat value as a C string. 2308 */ 2309struct mntarg * 2310mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 2311{ 2312 2313 if (ma == NULL) { 2314 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2315 SLIST_INIT(&ma->list); 2316 } 2317 if (ma->error) 2318 return (ma); 2319 2320 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2321 M_MOUNT, M_WAITOK); 2322 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2323 ma->v[ma->len].iov_len = strlen(name) + 1; 2324 ma->len++; 2325 2326 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 2327 if (len < 0) 2328 ma->v[ma->len].iov_len = strlen(val) + 1; 2329 else 2330 ma->v[ma->len].iov_len = len; 2331 ma->len++; 2332 return (ma); 2333} 2334 2335/* 2336 * Free a mntarg structure 2337 */ 2338static void 2339free_mntarg(struct mntarg *ma) 2340{ 2341 struct mntaarg *maa; 2342 2343 while (!SLIST_EMPTY(&ma->list)) { 2344 maa = SLIST_FIRST(&ma->list); 2345 SLIST_REMOVE_HEAD(&ma->list, next); 2346 free(maa, M_MOUNT); 2347 } 2348 free(ma->v, M_MOUNT); 2349 free(ma, M_MOUNT); 2350} 2351 2352/* 2353 * Mount a filesystem 2354 */ 2355int 2356kernel_mount(struct mntarg *ma, int flags) 2357{ 2358 struct uio auio; 2359 int error; 2360 2361 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 2362 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 2363 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 2364 2365 auio.uio_iov = ma->v; 2366 auio.uio_iovcnt = ma->len; 2367 auio.uio_segflg = UIO_SYSSPACE; 2368 2369 error = ma->error; 2370 if (!error) 2371 error = vfs_donmount(curthread, flags, &auio); 2372 free_mntarg(ma); 2373 return (error); 2374} 2375 2376/* 2377 * A printflike function to mount a filesystem. 2378 */ 2379int 2380kernel_vmount(int flags, ...) 2381{ 2382 struct mntarg *ma = NULL; 2383 va_list ap; 2384 const char *cp; 2385 const void *vp; 2386 int error; 2387 2388 va_start(ap, flags); 2389 for (;;) { 2390 cp = va_arg(ap, const char *); 2391 if (cp == NULL) 2392 break; 2393 vp = va_arg(ap, const void *); 2394 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); 2395 } 2396 va_end(ap); 2397 2398 error = kernel_mount(ma, flags); 2399 return (error); 2400} 2401