vfs_mount.c revision 153051
1/*- 2 * Copyright (c) 1999-2004 Poul-Henning Kamp 3 * Copyright (c) 1999 Michael Smith 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 153051 2005-12-03 12:04:20Z rodrigc $"); 39 40#include <sys/param.h> 41#include <sys/conf.h> 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/libkern.h> 45#include <sys/mac.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/proc.h> 51#include <sys/filedesc.h> 52#include <sys/reboot.h> 53#include <sys/syscallsubr.h> 54#include <sys/sysproto.h> 55#include <sys/sx.h> 56#include <sys/sysctl.h> 57#include <sys/sysent.h> 58#include <sys/systm.h> 59#include <sys/vnode.h> 60 61#include <geom/geom.h> 62 63#include <machine/stdarg.h> 64 65#include "opt_rootdevname.h" 66#include "opt_ddb.h" 67#include "opt_mac.h" 68 69#ifdef DDB 70#include <ddb/ddb.h> 71#endif 72 73#define ROOTNAME "root_device" 74#define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 75 76static int vfs_domount(struct thread *td, const char *fstype, 77 char *fspath, int fsflags, void *fsdata); 78static int vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp, 79 const char *fspath, struct thread *td, struct mount **mpp); 80static int vfs_mountroot_ask(void); 81static int vfs_mountroot_try(const char *mountfrom); 82static int vfs_donmount(struct thread *td, int fsflags, 83 struct uio *fsoptions); 84static void free_mntarg(struct mntarg *ma); 85static void vfs_mount_destroy(struct mount *, struct thread *); 86static int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); 87 88static int usermount = 0; 89SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 90 "Unprivileged users may mount and unmount file systems"); 91 92MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 93 94/* List of mounted filesystems. */ 95struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 96 97/* For any iteration/modification of mountlist */ 98struct mtx mountlist_mtx; 99MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 100 101TAILQ_HEAD(vfsoptlist, vfsopt); 102struct vfsopt { 103 TAILQ_ENTRY(vfsopt) link; 104 char *name; 105 void *value; 106 int len; 107}; 108 109/* 110 * The vnode of the system's root (/ in the filesystem, without chroot 111 * active.) 112 */ 113struct vnode *rootvnode; 114 115/* 116 * The root filesystem is detailed in the kernel environment variable 117 * vfs.root.mountfrom, which is expected to be in the general format 118 * 119 * <vfsname>:[<path>] 120 * vfsname := the name of a VFS known to the kernel and capable 121 * of being mounted as root 122 * path := disk device name or other data used by the filesystem 123 * to locate its physical store 124 */ 125 126/* 127 * Global opts, taken by all filesystems 128 */ 129static const char *global_opts[] = { 130 "fstype", 131 "fspath", 132 "rdonly", 133 "ro", 134 "rw", 135 "suid", 136 "exec", 137 NULL 138}; 139 140/* 141 * The root specifiers we will try if RB_CDROM is specified. 142 */ 143static char *cdrom_rootdevnames[] = { 144 "cd9660:cd0", 145 "cd9660:acd0", 146 NULL 147}; 148 149/* legacy find-root code */ 150char *rootdevnames[2] = {NULL, NULL}; 151#ifndef ROOTDEVNAME 152# define ROOTDEVNAME NULL 153#endif 154static const char *ctrootdevname = ROOTDEVNAME; 155 156/* 157 * --------------------------------------------------------------------- 158 * Functions for building and sanitizing the mount options 159 */ 160 161/* Remove one mount option. */ 162static void 163vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 164{ 165 166 TAILQ_REMOVE(opts, opt, link); 167 free(opt->name, M_MOUNT); 168 if (opt->value != NULL) 169 free(opt->value, M_MOUNT); 170#ifdef INVARIANTS 171 else if (opt->len != 0) 172 panic("%s: mount option with NULL value but length != 0", 173 __func__); 174#endif 175 free(opt, M_MOUNT); 176} 177 178/* Release all resources related to the mount options. */ 179static void 180vfs_freeopts(struct vfsoptlist *opts) 181{ 182 struct vfsopt *opt; 183 184 while (!TAILQ_EMPTY(opts)) { 185 opt = TAILQ_FIRST(opts); 186 vfs_freeopt(opts, opt); 187 } 188 free(opts, M_MOUNT); 189} 190 191/* 192 * Check if options are equal (with or without the "no" prefix). 193 */ 194static int 195vfs_equalopts(const char *opt1, const char *opt2) 196{ 197 198 /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 199 if (strcmp(opt1, opt2) == 0) 200 return (1); 201 /* "noopt" vs. "opt" */ 202 if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 203 return (1); 204 /* "opt" vs. "noopt" */ 205 if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 206 return (1); 207 return (0); 208} 209 210/* 211 * If a mount option is specified several times, 212 * (with or without the "no" prefix) only keep 213 * the last occurence of it. 214 */ 215static void 216vfs_sanitizeopts(struct vfsoptlist *opts) 217{ 218 struct vfsopt *opt, *opt2, *tmp; 219 220 TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 221 opt2 = TAILQ_PREV(opt, vfsoptlist, link); 222 while (opt2 != NULL) { 223 if (vfs_equalopts(opt->name, opt2->name)) { 224 tmp = TAILQ_PREV(opt2, vfsoptlist, link); 225 vfs_freeopt(opts, opt2); 226 opt2 = tmp; 227 } else { 228 opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 229 } 230 } 231 } 232} 233 234/* 235 * Build a linked list of mount options from a struct uio. 236 */ 237static int 238vfs_buildopts(struct uio *auio, struct vfsoptlist **options) 239{ 240 struct vfsoptlist *opts; 241 struct vfsopt *opt; 242 size_t memused; 243 unsigned int i, iovcnt; 244 int error, namelen, optlen; 245 246 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 247 TAILQ_INIT(opts); 248 memused = 0; 249 iovcnt = auio->uio_iovcnt; 250 for (i = 0; i < iovcnt; i += 2) { 251 opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 252 namelen = auio->uio_iov[i].iov_len; 253 optlen = auio->uio_iov[i + 1].iov_len; 254 opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 255 opt->value = NULL; 256 opt->len = 0; 257 258 /* 259 * Do this early, so jumps to "bad" will free the current 260 * option. 261 */ 262 TAILQ_INSERT_TAIL(opts, opt, link); 263 memused += sizeof(struct vfsopt) + optlen + namelen; 264 265 /* 266 * Avoid consuming too much memory, and attempts to overflow 267 * memused. 268 */ 269 if (memused > VFS_MOUNTARG_SIZE_MAX || 270 optlen > VFS_MOUNTARG_SIZE_MAX || 271 namelen > VFS_MOUNTARG_SIZE_MAX) { 272 error = EINVAL; 273 goto bad; 274 } 275 276 if (auio->uio_segflg == UIO_SYSSPACE) { 277 bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 278 } else { 279 error = copyin(auio->uio_iov[i].iov_base, opt->name, 280 namelen); 281 if (error) 282 goto bad; 283 } 284 /* Ensure names are null-terminated strings. */ 285 if (opt->name[namelen - 1] != '\0') { 286 error = EINVAL; 287 goto bad; 288 } 289 if (optlen != 0) { 290 opt->len = optlen; 291 opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 292 if (auio->uio_segflg == UIO_SYSSPACE) { 293 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 294 optlen); 295 } else { 296 error = copyin(auio->uio_iov[i + 1].iov_base, 297 opt->value, optlen); 298 if (error) 299 goto bad; 300 } 301 } 302 } 303 vfs_sanitizeopts(opts); 304 *options = opts; 305 return (0); 306bad: 307 vfs_freeopts(opts); 308 return (error); 309} 310 311/* 312 * Merge the old mount options with the new ones passed 313 * in the MNT_UPDATE case. 314 */ 315static void 316vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) 317{ 318 struct vfsopt *opt, *opt2, *new; 319 320 TAILQ_FOREACH(opt, opts, link) { 321 /* 322 * Check that this option hasn't been redefined 323 * nor cancelled with a "no" mount option. 324 */ 325 opt2 = TAILQ_FIRST(toopts); 326 while (opt2 != NULL) { 327 if (strcmp(opt2->name, opt->name) == 0) 328 goto next; 329 if (strncmp(opt2->name, "no", 2) == 0 && 330 strcmp(opt2->name + 2, opt->name) == 0) { 331 vfs_freeopt(toopts, opt2); 332 goto next; 333 } 334 opt2 = TAILQ_NEXT(opt2, link); 335 } 336 /* We want this option, duplicate it. */ 337 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 338 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); 339 strcpy(new->name, opt->name); 340 if (opt->len != 0) { 341 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 342 bcopy(opt->value, new->value, opt->len); 343 } else { 344 new->value = NULL; 345 } 346 new->len = opt->len; 347 TAILQ_INSERT_TAIL(toopts, new, link); 348next: 349 continue; 350 } 351} 352 353/* 354 * --------------------------------------------------------------------- 355 * Mount a filesystem 356 */ 357int 358nmount(td, uap) 359 struct thread *td; 360 struct nmount_args /* { 361 struct iovec *iovp; 362 unsigned int iovcnt; 363 int flags; 364 } */ *uap; 365{ 366 struct uio *auio; 367 struct iovec *iov; 368 unsigned int i; 369 int error; 370 u_int iovcnt; 371 372 /* Kick out MNT_ROOTFS early as it is legal internally */ 373 if (uap->flags & MNT_ROOTFS) 374 return (EINVAL); 375 376 iovcnt = uap->iovcnt; 377 /* 378 * Check that we have an even number of iovec's 379 * and that we have at least two options. 380 */ 381 if ((iovcnt & 1) || (iovcnt < 4)) 382 return (EINVAL); 383 384 error = copyinuio(uap->iovp, iovcnt, &auio); 385 if (error) 386 return (error); 387 iov = auio->uio_iov; 388 for (i = 0; i < iovcnt; i++) { 389 if (iov->iov_len > MMAXOPTIONLEN) { 390 free(auio, M_IOV); 391 return (EINVAL); 392 } 393 iov++; 394 } 395 error = vfs_donmount(td, uap->flags, auio); 396 397 free(auio, M_IOV); 398 return (error); 399} 400 401/* 402 * --------------------------------------------------------------------- 403 * Various utility functions 404 */ 405 406/* 407 * Allocate and initialize the mount point struct. 408 */ 409static int 410vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, 411 const char *fspath, struct thread *td, struct mount **mpp) 412{ 413 struct mount *mp; 414 415 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); 416 TAILQ_INIT(&mp->mnt_nvnodelist); 417 mp->mnt_nvnodelistsize = 0; 418 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 419 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 420 vfs_busy(mp, LK_NOWAIT, 0, td); 421 mp->mnt_op = vfsp->vfc_vfsops; 422 mp->mnt_vfc = vfsp; 423 vfsp->vfc_refcount++; 424 mp->mnt_stat.f_type = vfsp->vfc_typenum; 425 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 426 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 427 mp->mnt_vnodecovered = vp; 428 mp->mnt_cred = crdup(td->td_ucred); 429 mp->mnt_stat.f_owner = td->td_ucred->cr_uid; 430 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 431 mp->mnt_iosize_max = DFLTPHYS; 432#ifdef MAC 433 mac_init_mount(mp); 434 mac_create_mount(td->td_ucred, mp); 435#endif 436 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 437 *mpp = mp; 438 return (0); 439} 440 441/* 442 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 443 */ 444static void 445vfs_mount_destroy(struct mount *mp, struct thread *td) 446{ 447 448 mp->mnt_vfc->vfc_refcount--; 449 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 450 panic("unmount: dangling vnode"); 451 vfs_unbusy(mp,td); 452 lockdestroy(&mp->mnt_lock); 453 MNT_ILOCK(mp); 454 if (mp->mnt_kern_flag & MNTK_MWAIT) 455 wakeup(mp); 456 MNT_IUNLOCK(mp); 457 mtx_destroy(&mp->mnt_mtx); 458#ifdef MAC 459 mac_destroy_mount(mp); 460#endif 461 if (mp->mnt_opt != NULL) 462 vfs_freeopts(mp->mnt_opt); 463 crfree(mp->mnt_cred); 464 free(mp, M_MOUNT); 465} 466 467static int 468vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 469{ 470 struct vfsoptlist *optlist; 471 char *fstype, *fspath, *errmsg; 472 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 473 474 errmsg_len = 0; 475 errmsg_pos = -1; 476 477 error = vfs_buildopts(fsoptions, &optlist); 478 if (error) 479 return (error); 480 481 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 482 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 483 else 484 errmsg_len = 0; 485 486 /* 487 * We need these two options before the others, 488 * and they are mandatory for any filesystem. 489 * Ensure they are NUL terminated as well. 490 */ 491 fstypelen = 0; 492 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 493 if (error || fstype[fstypelen - 1] != '\0') { 494 error = EINVAL; 495 if (errmsg != NULL) 496 strncpy(errmsg, "Invalid fstype", errmsg_len); 497 goto bail; 498 } 499 fspathlen = 0; 500 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 501 if (error || fspath[fspathlen - 1] != '\0') { 502 error = EINVAL; 503 if (errmsg != NULL) 504 strncpy(errmsg, "Invalid fspath", errmsg_len); 505 goto bail; 506 } 507 508 /* 509 * We need to see if we have the "update" option 510 * before we call vfs_domount(), since vfs_domount() has special 511 * logic based on MNT_UPDATE. This is very important 512 * when we want to update the root filesystem. 513 */ 514 if (vfs_getopt(optlist, "update", NULL, NULL) == 0) 515 fsflags |= MNT_UPDATE; 516 517 if (vfs_getopt(optlist, "async", NULL, NULL) == 0) 518 fsflags |= MNT_ASYNC; 519 520 if (vfs_getopt(optlist, "force", NULL, NULL) == 0) 521 fsflags |= MNT_FORCE; 522 523 if (vfs_getopt(optlist, "multilabel", NULL, NULL) == 0) 524 fsflags |= MNT_MULTILABEL; 525 526 if (vfs_getopt(optlist, "noasync", NULL, NULL) == 0) 527 fsflags &= ~MNT_ASYNC; 528 529 if (vfs_getopt(optlist, "noatime", NULL, NULL) == 0) 530 fsflags |= MNT_NOATIME; 531 532 if (vfs_getopt(optlist, "noclusterr", NULL, NULL) == 0) 533 fsflags |= MNT_NOCLUSTERR; 534 535 if (vfs_getopt(optlist, "noclusterw", NULL, NULL) == 0) 536 fsflags |= MNT_NOCLUSTERW; 537 538 if (vfs_getopt(optlist, "noexec", NULL, NULL) == 0) 539 fsflags |= MNT_NOEXEC; 540 541 if (vfs_getopt(optlist, "nosuid", NULL, NULL) == 0) 542 fsflags |= MNT_NOSUID; 543 544 if (vfs_getopt(optlist, "nosymfollow", NULL, NULL) == 0) 545 fsflags |= MNT_NOSYMFOLLOW; 546 547 if (vfs_getopt(optlist, "noro", NULL, NULL) == 0) 548 fsflags &= ~MNT_RDONLY; 549 550 if (vfs_getopt(optlist, "ro", NULL, NULL) == 0) 551 fsflags |= MNT_RDONLY; 552 553 if (vfs_getopt(optlist, "rdonly", NULL, NULL) == 0) 554 fsflags |= MNT_RDONLY; 555 556 if (vfs_getopt(optlist, "rw", NULL, NULL) == 0) 557 fsflags &= ~MNT_RDONLY; 558 559 if (vfs_getopt(optlist, "snapshot", NULL, NULL) == 0) 560 fsflags |= MNT_SNAPSHOT; 561 562 if (vfs_getopt(optlist, "suiddir", NULL, NULL) == 0) 563 fsflags |= MNT_SUIDDIR; 564 565 if (vfs_getopt(optlist, "sync", NULL, NULL) == 0) 566 fsflags |= MNT_SYNCHRONOUS; 567 568 if (vfs_getopt(optlist, "union", NULL, NULL) == 0) 569 fsflags |= MNT_UNION; 570 571 /* 572 * Be ultra-paranoid about making sure the type and fspath 573 * variables will fit in our mp buffers, including the 574 * terminating NUL. 575 */ 576 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 577 error = ENAMETOOLONG; 578 goto bail; 579 } 580 581 mtx_lock(&Giant); 582 error = vfs_domount(td, fstype, fspath, fsflags, optlist); 583 mtx_unlock(&Giant); 584bail: 585 /* copyout the errmsg */ 586 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 587 && errmsg_len > 0 && errmsg != NULL) { 588 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 589 strncpy(fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 590 errmsg, 591 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 592 } else { 593 copystr(errmsg, 594 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 595 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len, 596 NULL); 597 } 598 } 599 600 if (error != 0) 601 vfs_freeopts(optlist); 602 return (error); 603} 604 605/* 606 * --------------------------------------------------------------------- 607 * Old mount API. 608 */ 609#ifndef _SYS_SYSPROTO_H_ 610struct mount_args { 611 char *type; 612 char *path; 613 int flags; 614 caddr_t data; 615}; 616#endif 617/* ARGSUSED */ 618int 619mount(td, uap) 620 struct thread *td; 621 struct mount_args /* { 622 char *type; 623 char *path; 624 int flags; 625 caddr_t data; 626 } */ *uap; 627{ 628 char *fstype; 629 struct vfsconf *vfsp = NULL; 630 struct mntarg *ma = NULL; 631 int error; 632 633 /* Kick out MNT_ROOTFS early as it is legal internally */ 634 uap->flags &= ~MNT_ROOTFS; 635 636 if (uap->data == NULL) 637 return (EINVAL); 638 639 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 640 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 641 if (!error) { 642 mtx_lock(&Giant); /* XXX ? */ 643 vfsp = vfs_byname_kld(fstype, td, &error); 644 mtx_unlock(&Giant); 645 } 646 free(fstype, M_TEMP); 647 if (error) 648 return (error); 649 if (vfsp == NULL) 650 return (ENOENT); 651 if (vfsp->vfc_vfsops->vfs_cmount == NULL) 652 return (EOPNOTSUPP); 653 654 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 655 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 656 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 657 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 658 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 659 660 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td); 661 return (error); 662} 663 664 665/* 666 * vfs_domount(): actually attempt a filesystem mount. 667 */ 668static int 669vfs_domount( 670 struct thread *td, /* Flags common to all filesystems. */ 671 const char *fstype, /* Filesystem type. */ 672 char *fspath, /* Mount path. */ 673 int fsflags, /* Flags common to all filesystems. */ 674 void *fsdata /* Options local to the filesystem. */ 675 ) 676{ 677 struct vnode *vp; 678 struct mount *mp; 679 struct vfsconf *vfsp; 680 int error, flag = 0, kern_flag = 0; 681 struct vattr va; 682 struct nameidata nd; 683 684 mtx_assert(&Giant, MA_OWNED); 685 686 /* 687 * Be ultra-paranoid about making sure the type and fspath 688 * variables will fit in our mp buffers, including the 689 * terminating NUL. 690 */ 691 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 692 return (ENAMETOOLONG); 693 694 if (jailed(td->td_ucred)) 695 return (EPERM); 696 if (usermount == 0) { 697 if ((error = suser(td)) != 0) 698 return (error); 699 } 700 701 /* 702 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 703 */ 704 if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) { 705 if ((error = suser(td)) != 0) 706 return (error); 707 } 708 /* 709 * Silently enforce MNT_NOSUID and MNT_USER for 710 * unprivileged users. 711 */ 712 if (suser(td) != 0) 713 fsflags |= MNT_NOSUID | MNT_USER; 714 /* 715 * Get vnode to be covered 716 */ 717 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); 718 if ((error = namei(&nd)) != 0) 719 return (error); 720 NDFREE(&nd, NDF_ONLY_PNBUF); 721 vp = nd.ni_vp; 722 if (fsflags & MNT_UPDATE) { 723 if ((vp->v_vflag & VV_ROOT) == 0) { 724 vput(vp); 725 return (EINVAL); 726 } 727 mp = vp->v_mount; 728 flag = mp->mnt_flag; 729 kern_flag = mp->mnt_kern_flag; 730 /* 731 * We only allow the filesystem to be reloaded if it 732 * is currently mounted read-only. 733 */ 734 if ((fsflags & MNT_RELOAD) && 735 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 736 vput(vp); 737 return (EOPNOTSUPP); /* Needs translation */ 738 } 739 /* 740 * Only privileged root, or (if MNT_USER is set) the user that 741 * did the original mount is permitted to update it. 742 */ 743 error = vfs_suser(mp, td); 744 if (error) { 745 vput(vp); 746 return (error); 747 } 748 if (vfs_busy(mp, LK_NOWAIT, 0, td)) { 749 vput(vp); 750 return (EBUSY); 751 } 752 VI_LOCK(vp); 753 if ((vp->v_iflag & VI_MOUNT) != 0 || 754 vp->v_mountedhere != NULL) { 755 VI_UNLOCK(vp); 756 vfs_unbusy(mp, td); 757 vput(vp); 758 return (EBUSY); 759 } 760 vp->v_iflag |= VI_MOUNT; 761 VI_UNLOCK(vp); 762 mp->mnt_flag |= fsflags & 763 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS); 764 VOP_UNLOCK(vp, 0, td); 765 mp->mnt_optnew = fsdata; 766 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 767 } else { 768 /* 769 * If the user is not root, ensure that they own the directory 770 * onto which we are attempting to mount. 771 */ 772 error = VOP_GETATTR(vp, &va, td->td_ucred, td); 773 if (error) { 774 vput(vp); 775 return (error); 776 } 777 if (va.va_uid != td->td_ucred->cr_uid) { 778 if ((error = suser(td)) != 0) { 779 vput(vp); 780 return (error); 781 } 782 } 783 error = vinvalbuf(vp, V_SAVE, td, 0, 0); 784 if (error != 0) { 785 vput(vp); 786 return (error); 787 } 788 if (vp->v_type != VDIR) { 789 vput(vp); 790 return (ENOTDIR); 791 } 792 vfsp = vfs_byname_kld(fstype, td, &error); 793 if (vfsp == NULL) { 794 vput(vp); 795 return (error); 796 } 797 VI_LOCK(vp); 798 if ((vp->v_iflag & VI_MOUNT) != 0 || 799 vp->v_mountedhere != NULL) { 800 VI_UNLOCK(vp); 801 vput(vp); 802 return (EBUSY); 803 } 804 vp->v_iflag |= VI_MOUNT; 805 VI_UNLOCK(vp); 806 807 /* 808 * Allocate and initialize the filesystem. 809 */ 810 error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp); 811 if (error) { 812 vput(vp); 813 return (error); 814 } 815 VOP_UNLOCK(vp, 0, td); 816 817 /* XXXMAC: pass to vfs_mount_alloc? */ 818 mp->mnt_optnew = fsdata; 819 } 820 821 /* 822 * Set the mount level flags. 823 */ 824 if (fsflags & MNT_RDONLY) 825 mp->mnt_flag |= MNT_RDONLY; 826 mp->mnt_flag &=~ MNT_UPDATEMASK; 827 mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS); 828 /* 829 * Mount the filesystem. 830 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 831 * get. No freeing of cn_pnbuf. 832 */ 833 error = VFS_MOUNT(mp, td); 834 if (!error) { 835 if (mp->mnt_opt != NULL) 836 vfs_freeopts(mp->mnt_opt); 837 mp->mnt_opt = mp->mnt_optnew; 838 VFS_STATFS(mp, &mp->mnt_stat, td); 839 } 840 /* 841 * Prevent external consumers of mount options from reading 842 * mnt_optnew. 843 */ 844 mp->mnt_optnew = NULL; 845 if (mp->mnt_flag & MNT_UPDATE) { 846 mp->mnt_flag &= 847 ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); 848 if (error) { 849 mp->mnt_flag = flag; 850 mp->mnt_kern_flag = kern_flag; 851 } 852 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 853 if (mp->mnt_syncer == NULL) 854 error = vfs_allocate_syncvnode(mp); 855 } else { 856 if (mp->mnt_syncer != NULL) 857 vrele(mp->mnt_syncer); 858 mp->mnt_syncer = NULL; 859 } 860 vfs_unbusy(mp, td); 861 VI_LOCK(vp); 862 vp->v_iflag &= ~VI_MOUNT; 863 VI_UNLOCK(vp); 864 vrele(vp); 865 return (error); 866 } 867 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 868 /* 869 * Put the new filesystem on the mount list after root. 870 */ 871 cache_purge(vp); 872 if (!error) { 873 struct vnode *newdp; 874 875 VI_LOCK(vp); 876 vp->v_iflag &= ~VI_MOUNT; 877 VI_UNLOCK(vp); 878 vp->v_mountedhere = mp; 879 mtx_lock(&mountlist_mtx); 880 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 881 mtx_unlock(&mountlist_mtx); 882 vfs_event_signal(NULL, VQ_MOUNT, 0); 883 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td)) 884 panic("mount: lost mount"); 885 mountcheckdirs(vp, newdp); 886 vput(newdp); 887 VOP_UNLOCK(vp, 0, td); 888 if ((mp->mnt_flag & MNT_RDONLY) == 0) 889 error = vfs_allocate_syncvnode(mp); 890 vfs_unbusy(mp, td); 891 if (error) 892 vrele(vp); 893 } else { 894 VI_LOCK(vp); 895 vp->v_iflag &= ~VI_MOUNT; 896 VI_UNLOCK(vp); 897 vfs_mount_destroy(mp, td); 898 vput(vp); 899 } 900 return (error); 901} 902 903/* 904 * --------------------------------------------------------------------- 905 * Unmount a filesystem. 906 * 907 * Note: unmount takes a path to the vnode mounted on as argument, 908 * not special file (as before). 909 */ 910#ifndef _SYS_SYSPROTO_H_ 911struct unmount_args { 912 char *path; 913 int flags; 914}; 915#endif 916/* ARGSUSED */ 917int 918unmount(td, uap) 919 struct thread *td; 920 register struct unmount_args /* { 921 char *path; 922 int flags; 923 } */ *uap; 924{ 925 struct mount *mp; 926 char *pathbuf; 927 int error, id0, id1; 928 929 if (jailed(td->td_ucred)) 930 return (EPERM); 931 if (usermount == 0) { 932 if ((error = suser(td)) != 0) 933 return (error); 934 } 935 936 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 937 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 938 if (error) { 939 free(pathbuf, M_TEMP); 940 return (error); 941 } 942 if (uap->flags & MNT_BYFSID) { 943 /* Decode the filesystem ID. */ 944 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 945 free(pathbuf, M_TEMP); 946 return (EINVAL); 947 } 948 949 mtx_lock(&mountlist_mtx); 950 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 951 if (mp->mnt_stat.f_fsid.val[0] == id0 && 952 mp->mnt_stat.f_fsid.val[1] == id1) 953 break; 954 } 955 mtx_unlock(&mountlist_mtx); 956 } else { 957 mtx_lock(&mountlist_mtx); 958 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 959 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 960 break; 961 } 962 mtx_unlock(&mountlist_mtx); 963 } 964 free(pathbuf, M_TEMP); 965 if (mp == NULL) { 966 /* 967 * Previously we returned ENOENT for a nonexistent path and 968 * EINVAL for a non-mountpoint. We cannot tell these apart 969 * now, so in the !MNT_BYFSID case return the more likely 970 * EINVAL for compatibility. 971 */ 972 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 973 } 974 975 /* 976 * Only privileged root, or (if MNT_USER is set) the user that did the 977 * original mount is permitted to unmount this filesystem. 978 */ 979 error = vfs_suser(mp, td); 980 if (error) 981 return (error); 982 983 /* 984 * Don't allow unmounting the root filesystem. 985 */ 986 if (mp->mnt_flag & MNT_ROOTFS) 987 return (EINVAL); 988 mtx_lock(&Giant); 989 error = dounmount(mp, uap->flags, td); 990 mtx_unlock(&Giant); 991 return (error); 992} 993 994/* 995 * Do the actual filesystem unmount. 996 */ 997int 998dounmount(mp, flags, td) 999 struct mount *mp; 1000 int flags; 1001 struct thread *td; 1002{ 1003 struct vnode *coveredvp, *fsrootvp; 1004 int error; 1005 int async_flag; 1006 1007 mtx_assert(&Giant, MA_OWNED); 1008 1009 MNT_ILOCK(mp); 1010 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1011 MNT_IUNLOCK(mp); 1012 return (EBUSY); 1013 } 1014 mp->mnt_kern_flag |= MNTK_UNMOUNT; 1015 /* Allow filesystems to detect that a forced unmount is in progress. */ 1016 if (flags & MNT_FORCE) 1017 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1018 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | 1019 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td); 1020 if (error) { 1021 MNT_ILOCK(mp); 1022 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1023 if (mp->mnt_kern_flag & MNTK_MWAIT) 1024 wakeup(mp); 1025 MNT_IUNLOCK(mp); 1026 return (error); 1027 } 1028 vn_start_write(NULL, &mp, V_WAIT); 1029 1030 if (mp->mnt_flag & MNT_EXPUBLIC) 1031 vfs_setpublicfs(NULL, NULL, NULL); 1032 1033 vfs_msync(mp, MNT_WAIT); 1034 async_flag = mp->mnt_flag & MNT_ASYNC; 1035 mp->mnt_flag &= ~MNT_ASYNC; 1036 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1037 if (mp->mnt_syncer != NULL) 1038 vrele(mp->mnt_syncer); 1039 /* 1040 * For forced unmounts, move process cdir/rdir refs on the fs root 1041 * vnode to the covered vnode. For non-forced unmounts we want 1042 * such references to cause an EBUSY error. 1043 */ 1044 if ((flags & MNT_FORCE) && 1045 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1046 if (mp->mnt_vnodecovered != NULL) 1047 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1048 if (fsrootvp == rootvnode) { 1049 vrele(rootvnode); 1050 rootvnode = NULL; 1051 } 1052 vput(fsrootvp); 1053 } 1054 if (((mp->mnt_flag & MNT_RDONLY) || 1055 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 1056 (flags & MNT_FORCE)) { 1057 error = VFS_UNMOUNT(mp, flags, td); 1058 } 1059 vn_finished_write(mp); 1060 if (error) { 1061 /* Undo cdir/rdir and rootvnode changes made above. */ 1062 if ((flags & MNT_FORCE) && 1063 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1064 if (mp->mnt_vnodecovered != NULL) 1065 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1066 if (rootvnode == NULL) { 1067 rootvnode = fsrootvp; 1068 vref(rootvnode); 1069 } 1070 vput(fsrootvp); 1071 } 1072 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) 1073 (void) vfs_allocate_syncvnode(mp); 1074 MNT_ILOCK(mp); 1075 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1076 mp->mnt_flag |= async_flag; 1077 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); 1078 if (mp->mnt_kern_flag & MNTK_MWAIT) 1079 wakeup(mp); 1080 MNT_IUNLOCK(mp); 1081 return (error); 1082 } 1083 mtx_lock(&mountlist_mtx); 1084 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1085 if ((coveredvp = mp->mnt_vnodecovered) != NULL) 1086 coveredvp->v_mountedhere = NULL; 1087 mtx_unlock(&mountlist_mtx); 1088 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1089 vfs_mount_destroy(mp, td); 1090 if (coveredvp != NULL) 1091 vrele(coveredvp); 1092 return (0); 1093} 1094 1095/* 1096 * --------------------------------------------------------------------- 1097 * Mounting of root filesystem 1098 * 1099 */ 1100 1101struct root_hold_token { 1102 const char *who; 1103 LIST_ENTRY(root_hold_token) list; 1104}; 1105 1106static LIST_HEAD(, root_hold_token) root_holds = 1107 LIST_HEAD_INITIALIZER(&root_holds); 1108 1109struct root_hold_token * 1110root_mount_hold(const char *identifier) 1111{ 1112 struct root_hold_token *h; 1113 1114 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 1115 h->who = identifier; 1116 mtx_lock(&mountlist_mtx); 1117 LIST_INSERT_HEAD(&root_holds, h, list); 1118 mtx_unlock(&mountlist_mtx); 1119 return (h); 1120} 1121 1122void 1123root_mount_rel(struct root_hold_token *h) 1124{ 1125 1126 mtx_lock(&mountlist_mtx); 1127 LIST_REMOVE(h, list); 1128 wakeup(&root_holds); 1129 mtx_unlock(&mountlist_mtx); 1130 free(h, M_DEVBUF); 1131} 1132 1133static void 1134root_mount_wait(void) 1135{ 1136 struct root_hold_token *h; 1137 1138 for (;;) { 1139 DROP_GIANT(); 1140 g_waitidle(); 1141 PICKUP_GIANT(); 1142 mtx_lock(&mountlist_mtx); 1143 if (LIST_EMPTY(&root_holds)) { 1144 mtx_unlock(&mountlist_mtx); 1145 break; 1146 } 1147 printf("Root mount waiting for:"); 1148 LIST_FOREACH(h, &root_holds, list) 1149 printf(" %s", h->who); 1150 printf("\n"); 1151 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 1152 hz); 1153 } 1154} 1155 1156static void 1157set_rootvnode(struct thread *td) 1158{ 1159 struct proc *p; 1160 1161 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td)) 1162 panic("Cannot find root vnode"); 1163 1164 p = td->td_proc; 1165 FILEDESC_LOCK(p->p_fd); 1166 1167 if (p->p_fd->fd_cdir != NULL) 1168 vrele(p->p_fd->fd_cdir); 1169 p->p_fd->fd_cdir = rootvnode; 1170 VREF(rootvnode); 1171 1172 if (p->p_fd->fd_rdir != NULL) 1173 vrele(p->p_fd->fd_rdir); 1174 p->p_fd->fd_rdir = rootvnode; 1175 VREF(rootvnode); 1176 1177 FILEDESC_UNLOCK(p->p_fd); 1178 1179 VOP_UNLOCK(rootvnode, 0, td); 1180} 1181 1182/* 1183 * Mount /devfs as our root filesystem, but do not put it on the mountlist 1184 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup. 1185 */ 1186 1187static void 1188devfs_first(void) 1189{ 1190 struct thread *td = curthread; 1191 struct vfsconf *vfsp; 1192 struct mount *mp = NULL; 1193 int error; 1194 1195 vfsp = vfs_byname("devfs"); 1196 KASSERT(vfsp != NULL, ("Could not find devfs by name")); 1197 if (vfsp == NULL) 1198 return; 1199 1200 error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp); 1201 KASSERT(error == 0, ("vfs_mount_alloc failed %d", error)); 1202 if (error) 1203 return; 1204 1205 error = VFS_MOUNT(mp, curthread); 1206 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 1207 if (error) 1208 return; 1209 1210 mtx_lock(&mountlist_mtx); 1211 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 1212 mtx_unlock(&mountlist_mtx); 1213 1214 set_rootvnode(td); 1215 1216 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 1217 if (error) 1218 printf("kern_symlink /dev -> / returns %d\n", error); 1219} 1220 1221/* 1222 * Surgically move our devfs to be mounted on /dev. 1223 */ 1224 1225static void 1226devfs_fixup(struct thread *td) 1227{ 1228 struct nameidata nd; 1229 int error; 1230 struct vnode *vp, *dvp; 1231 struct mount *mp; 1232 1233 /* Remove our devfs mount from the mountlist and purge the cache */ 1234 mtx_lock(&mountlist_mtx); 1235 mp = TAILQ_FIRST(&mountlist); 1236 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1237 mtx_unlock(&mountlist_mtx); 1238 cache_purgevfs(mp); 1239 1240 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); 1241 VI_LOCK(dvp); 1242 dvp->v_iflag &= ~VI_MOUNT; 1243 dvp->v_mountedhere = NULL; 1244 VI_UNLOCK(dvp); 1245 1246 /* Set up the real rootvnode, and purge the cache */ 1247 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 1248 set_rootvnode(td); 1249 cache_purgevfs(rootvnode->v_mount); 1250 1251 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 1252 error = namei(&nd); 1253 if (error) { 1254 printf("Lookup of /dev for devfs, error: %d\n", error); 1255 return; 1256 } 1257 NDFREE(&nd, NDF_ONLY_PNBUF); 1258 vp = nd.ni_vp; 1259 if (vp->v_type != VDIR) { 1260 vput(vp); 1261 } 1262 error = vinvalbuf(vp, V_SAVE, td, 0, 0); 1263 if (error) { 1264 vput(vp); 1265 } 1266 cache_purge(vp); 1267 mp->mnt_vnodecovered = vp; 1268 vp->v_mountedhere = mp; 1269 mtx_lock(&mountlist_mtx); 1270 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1271 mtx_unlock(&mountlist_mtx); 1272 VOP_UNLOCK(vp, 0, td); 1273 vfs_unbusy(mp, td); 1274 vput(dvp); 1275 1276 /* Unlink the no longer needed /dev/dev -> / symlink */ 1277 kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 1278} 1279 1280/* 1281 * Report errors during filesystem mounting. 1282 */ 1283void 1284vfs_mount_error(struct mount *mp, const char *fmt, ...) 1285{ 1286 struct vfsoptlist *moptlist = mp->mnt_optnew; 1287 va_list ap; 1288 int error, len; 1289 char *errmsg; 1290 1291 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1292 if (error || errmsg == NULL || len <= 0) 1293 return; 1294 1295 va_start(ap, fmt); 1296 vsnprintf(errmsg, (size_t)len, fmt, ap); 1297 va_end(ap); 1298} 1299 1300/* 1301 * Find and mount the root filesystem 1302 */ 1303void 1304vfs_mountroot(void) 1305{ 1306 char *cp; 1307 int error, i, asked = 0; 1308 1309 root_mount_wait(); 1310 1311 devfs_first(); 1312 1313 /* 1314 * We are booted with instructions to prompt for the root filesystem. 1315 */ 1316 if (boothowto & RB_ASKNAME) { 1317 if (!vfs_mountroot_ask()) 1318 return; 1319 asked = 1; 1320 } 1321 1322 /* 1323 * The root filesystem information is compiled in, and we are 1324 * booted with instructions to use it. 1325 */ 1326 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 1327 if (!vfs_mountroot_try(ctrootdevname)) 1328 return; 1329 ctrootdevname = NULL; 1330 } 1331 1332 /* 1333 * We've been given the generic "use CDROM as root" flag. This is 1334 * necessary because one media may be used in many different 1335 * devices, so we need to search for them. 1336 */ 1337 if (boothowto & RB_CDROM) { 1338 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 1339 if (!vfs_mountroot_try(cdrom_rootdevnames[i])) 1340 return; 1341 } 1342 } 1343 1344 /* 1345 * Try to use the value read by the loader from /etc/fstab, or 1346 * supplied via some other means. This is the preferred 1347 * mechanism. 1348 */ 1349 cp = getenv("vfs.root.mountfrom"); 1350 if (cp != NULL) { 1351 error = vfs_mountroot_try(cp); 1352 freeenv(cp); 1353 if (!error) 1354 return; 1355 } 1356 1357 /* 1358 * Try values that may have been computed by code during boot 1359 */ 1360 if (!vfs_mountroot_try(rootdevnames[0])) 1361 return; 1362 if (!vfs_mountroot_try(rootdevnames[1])) 1363 return; 1364 1365 /* 1366 * If we (still) have a compiled-in default, try it. 1367 */ 1368 if (ctrootdevname != NULL) 1369 if (!vfs_mountroot_try(ctrootdevname)) 1370 return; 1371 /* 1372 * Everything so far has failed, prompt on the console if we haven't 1373 * already tried that. 1374 */ 1375 if (!asked) 1376 if (!vfs_mountroot_ask()) 1377 return; 1378 1379 panic("Root mount failed, startup aborted."); 1380} 1381 1382/* 1383 * Mount (mountfrom) as the root filesystem. 1384 */ 1385static int 1386vfs_mountroot_try(const char *mountfrom) 1387{ 1388 struct mount *mp; 1389 char *vfsname, *path; 1390 time_t timebase; 1391 int error; 1392 char patt[32]; 1393 1394 vfsname = NULL; 1395 path = NULL; 1396 mp = NULL; 1397 error = EINVAL; 1398 1399 if (mountfrom == NULL) 1400 return (error); /* don't complain */ 1401 printf("Trying to mount root from %s\n", mountfrom); 1402 1403 /* parse vfs name and path */ 1404 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 1405 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 1406 vfsname[0] = path[0] = 0; 1407 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 1408 if (sscanf(mountfrom, patt, vfsname, path) < 1) 1409 goto out; 1410 1411 if (path[0] == '\0') 1412 strcpy(path, ROOTNAME); 1413 1414 error = kernel_vmount( 1415 MNT_RDONLY | MNT_ROOTFS, 1416 "fstype", vfsname, 1417 "fspath", "/", 1418 "from", path, 1419 NULL); 1420 if (error == 0) { 1421 /* 1422 * We mount devfs prior to mounting the / FS, so the first 1423 * entry will typically be devfs. 1424 */ 1425 mp = TAILQ_FIRST(&mountlist); 1426 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 1427 1428 /* 1429 * Iterate over all currently mounted file systems and use 1430 * the time stamp found to check and/or initialize the RTC. 1431 * Typically devfs has no time stamp and the only other FS 1432 * is the actual / FS. 1433 * Call inittodr() only once and pass it the largest of the 1434 * timestamps we encounter. 1435 */ 1436 timebase = 0; 1437 do { 1438 if (mp->mnt_time > timebase) 1439 timebase = mp->mnt_time; 1440 mp = TAILQ_NEXT(mp, mnt_list); 1441 } while (mp != NULL); 1442 inittodr(timebase); 1443 1444 devfs_fixup(curthread); 1445 } 1446out: 1447 free(path, M_MOUNT); 1448 free(vfsname, M_MOUNT); 1449 return (error); 1450} 1451 1452/* 1453 * --------------------------------------------------------------------- 1454 * Interactive root filesystem selection code. 1455 */ 1456 1457static int 1458vfs_mountroot_ask(void) 1459{ 1460 char name[128]; 1461 1462 for(;;) { 1463 printf("\nManual root filesystem specification:\n"); 1464 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 1465#if defined(__i386__) || defined(__ia64__) 1466 printf(" eg. ufs:da0s1a\n"); 1467#else 1468 printf(" eg. ufs:/dev/da0a\n"); 1469#endif 1470 printf(" ? List valid disk boot devices\n"); 1471 printf(" <empty line> Abort manual input\n"); 1472 printf("\nmountroot> "); 1473 gets(name, sizeof(name), 1); 1474 if (name[0] == '\0') 1475 return (1); 1476 if (name[0] == '?') { 1477 printf("\nList of GEOM managed disk devices:\n "); 1478 g_dev_print(); 1479 continue; 1480 } 1481 if (!vfs_mountroot_try(name)) 1482 return (0); 1483 } 1484} 1485 1486/* 1487 * --------------------------------------------------------------------- 1488 * Functions for querying mount options/arguments from filesystems. 1489 */ 1490 1491/* 1492 * Check that no unknown options are given 1493 */ 1494int 1495vfs_filteropt(struct vfsoptlist *opts, const char **legal) 1496{ 1497 struct vfsopt *opt; 1498 const char **t, *p; 1499 1500 1501 TAILQ_FOREACH(opt, opts, link) { 1502 p = opt->name; 1503 if (p[0] == 'n' && p[1] == 'o') 1504 p += 2; 1505 for(t = global_opts; *t != NULL; t++) 1506 if (!strcmp(*t, p)) 1507 break; 1508 if (*t != NULL) 1509 continue; 1510 for(t = legal; *t != NULL; t++) 1511 if (!strcmp(*t, p)) 1512 break; 1513 if (*t != NULL) 1514 continue; 1515 printf("mount option <%s> is unknown\n", p); 1516 return (EINVAL); 1517 } 1518 return (0); 1519} 1520 1521/* 1522 * Get a mount option by its name. 1523 * 1524 * Return 0 if the option was found, ENOENT otherwise. 1525 * If len is non-NULL it will be filled with the length 1526 * of the option. If buf is non-NULL, it will be filled 1527 * with the address of the option. 1528 */ 1529int 1530vfs_getopt(opts, name, buf, len) 1531 struct vfsoptlist *opts; 1532 const char *name; 1533 void **buf; 1534 int *len; 1535{ 1536 struct vfsopt *opt; 1537 1538 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1539 1540 TAILQ_FOREACH(opt, opts, link) { 1541 if (strcmp(name, opt->name) == 0) { 1542 if (len != NULL) 1543 *len = opt->len; 1544 if (buf != NULL) 1545 *buf = opt->value; 1546 return (0); 1547 } 1548 } 1549 return (ENOENT); 1550} 1551 1552static int 1553vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1554{ 1555 struct vfsopt *opt; 1556 int i; 1557 1558 if (opts == NULL) 1559 return (-1); 1560 1561 i = 0; 1562 TAILQ_FOREACH(opt, opts, link) { 1563 if (strcmp(name, opt->name) == 0) 1564 return (i); 1565 ++i; 1566 } 1567 return (-1); 1568} 1569 1570char * 1571vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1572{ 1573 struct vfsopt *opt; 1574 1575 *error = 0; 1576 TAILQ_FOREACH(opt, opts, link) { 1577 if (strcmp(name, opt->name) != 0) 1578 continue; 1579 if (((char *)opt->value)[opt->len - 1] != '\0') { 1580 *error = EINVAL; 1581 return (NULL); 1582 } 1583 return (opt->value); 1584 } 1585 return (NULL); 1586} 1587 1588int 1589vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) 1590{ 1591 struct vfsopt *opt; 1592 1593 TAILQ_FOREACH(opt, opts, link) { 1594 if (strcmp(name, opt->name) == 0) { 1595 if (w != NULL) 1596 *w |= val; 1597 return (1); 1598 } 1599 } 1600 if (w != NULL) 1601 *w &= ~val; 1602 return (0); 1603} 1604 1605int 1606vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1607{ 1608 va_list ap; 1609 struct vfsopt *opt; 1610 int ret; 1611 1612 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1613 1614 TAILQ_FOREACH(opt, opts, link) { 1615 if (strcmp(name, opt->name) != 0) 1616 continue; 1617 if (((char *)opt->value)[opt->len - 1] != '\0') 1618 return (0); 1619 va_start(ap, fmt); 1620 ret = vsscanf(opt->value, fmt, ap); 1621 va_end(ap); 1622 return (ret); 1623 } 1624 return (0); 1625} 1626 1627/* 1628 * Find and copy a mount option. 1629 * 1630 * The size of the buffer has to be specified 1631 * in len, if it is not the same length as the 1632 * mount option, EINVAL is returned. 1633 * Returns ENOENT if the option is not found. 1634 */ 1635int 1636vfs_copyopt(opts, name, dest, len) 1637 struct vfsoptlist *opts; 1638 const char *name; 1639 void *dest; 1640 int len; 1641{ 1642 struct vfsopt *opt; 1643 1644 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 1645 1646 TAILQ_FOREACH(opt, opts, link) { 1647 if (strcmp(name, opt->name) == 0) { 1648 if (len != opt->len) 1649 return (EINVAL); 1650 bcopy(opt->value, dest, opt->len); 1651 return (0); 1652 } 1653 } 1654 return (ENOENT); 1655} 1656 1657/* 1658 * This is a helper function for filesystems to traverse their 1659 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 1660 */ 1661 1662struct vnode * 1663__mnt_vnode_next(struct vnode **nvp, struct mount *mp) 1664{ 1665 struct vnode *vp; 1666 1667 mtx_assert(&mp->mnt_mtx, MA_OWNED); 1668 1669 vp = *nvp; 1670 /* Check if we are done */ 1671 if (vp == NULL) 1672 return (NULL); 1673 /* If our next vnode is no longer ours, start over */ 1674 if (vp->v_mount != mp) 1675 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 1676 /* Save pointer to next vnode in list */ 1677 if (vp != NULL) 1678 *nvp = TAILQ_NEXT(vp, v_nmntvnodes); 1679 else 1680 *nvp = NULL; 1681 return (vp); 1682} 1683 1684int 1685__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 1686{ 1687 int error; 1688 1689 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td); 1690 if (sbp != &mp->mnt_stat) 1691 *sbp = mp->mnt_stat; 1692 return (error); 1693} 1694 1695void 1696vfs_mountedfrom(struct mount *mp, const char *from) 1697{ 1698 1699 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 1700 strlcpy(mp->mnt_stat.f_mntfromname, from, 1701 sizeof mp->mnt_stat.f_mntfromname); 1702} 1703 1704/* 1705 * --------------------------------------------------------------------- 1706 * This is the api for building mount args and mounting filesystems from 1707 * inside the kernel. 1708 * 1709 * The API works by accumulation of individual args. First error is 1710 * latched. 1711 * 1712 * XXX: should be documented in new manpage kernel_mount(9) 1713 */ 1714 1715/* A memory allocation which must be freed when we are done */ 1716struct mntaarg { 1717 SLIST_ENTRY(mntaarg) next; 1718}; 1719 1720/* The header for the mount arguments */ 1721struct mntarg { 1722 struct iovec *v; 1723 int len; 1724 int error; 1725 SLIST_HEAD(, mntaarg) list; 1726}; 1727 1728/* 1729 * Add a boolean argument. 1730 * 1731 * flag is the boolean value. 1732 * name must start with "no". 1733 */ 1734struct mntarg * 1735mount_argb(struct mntarg *ma, int flag, const char *name) 1736{ 1737 1738 KASSERT(name[0] == 'n' && name[1] == 'o', 1739 ("mount_argb(...,%s): name must start with 'no'", name)); 1740 1741 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 1742} 1743 1744/* 1745 * Add an argument printf style 1746 */ 1747struct mntarg * 1748mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 1749{ 1750 va_list ap; 1751 struct mntaarg *maa; 1752 struct sbuf *sb; 1753 int len; 1754 1755 if (ma == NULL) { 1756 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1757 SLIST_INIT(&ma->list); 1758 } 1759 if (ma->error) 1760 return (ma); 1761 1762 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1763 M_MOUNT, M_WAITOK); 1764 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1765 ma->v[ma->len].iov_len = strlen(name) + 1; 1766 ma->len++; 1767 1768 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 1769 va_start(ap, fmt); 1770 sbuf_vprintf(sb, fmt, ap); 1771 va_end(ap); 1772 sbuf_finish(sb); 1773 len = sbuf_len(sb) + 1; 1774 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1775 SLIST_INSERT_HEAD(&ma->list, maa, next); 1776 bcopy(sbuf_data(sb), maa + 1, len); 1777 sbuf_delete(sb); 1778 1779 ma->v[ma->len].iov_base = maa + 1; 1780 ma->v[ma->len].iov_len = len; 1781 ma->len++; 1782 1783 return (ma); 1784} 1785 1786/* 1787 * Add an argument which is a userland string. 1788 */ 1789struct mntarg * 1790mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 1791{ 1792 struct mntaarg *maa; 1793 char *tbuf; 1794 1795 if (val == NULL) 1796 return (ma); 1797 if (ma == NULL) { 1798 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1799 SLIST_INIT(&ma->list); 1800 } 1801 if (ma->error) 1802 return (ma); 1803 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 1804 SLIST_INSERT_HEAD(&ma->list, maa, next); 1805 tbuf = (void *)(maa + 1); 1806 ma->error = copyinstr(val, tbuf, len, NULL); 1807 return (mount_arg(ma, name, tbuf, -1)); 1808} 1809 1810/* 1811 * Plain argument. 1812 * 1813 * If length is -1, use printf. 1814 */ 1815struct mntarg * 1816mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 1817{ 1818 1819 if (ma == NULL) { 1820 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 1821 SLIST_INIT(&ma->list); 1822 } 1823 if (ma->error) 1824 return (ma); 1825 1826 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 1827 M_MOUNT, M_WAITOK); 1828 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 1829 ma->v[ma->len].iov_len = strlen(name) + 1; 1830 ma->len++; 1831 1832 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 1833 if (len < 0) 1834 ma->v[ma->len].iov_len = strlen(val) + 1; 1835 else 1836 ma->v[ma->len].iov_len = len; 1837 ma->len++; 1838 return (ma); 1839} 1840 1841/* 1842 * Free a mntarg structure 1843 */ 1844static void 1845free_mntarg(struct mntarg *ma) 1846{ 1847 struct mntaarg *maa; 1848 1849 while (!SLIST_EMPTY(&ma->list)) { 1850 maa = SLIST_FIRST(&ma->list); 1851 SLIST_REMOVE_HEAD(&ma->list, next); 1852 free(maa, M_MOUNT); 1853 } 1854 free(ma->v, M_MOUNT); 1855 free(ma, M_MOUNT); 1856} 1857 1858/* 1859 * Mount a filesystem 1860 */ 1861int 1862kernel_mount(struct mntarg *ma, int flags) 1863{ 1864 struct uio auio; 1865 int error; 1866 1867 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 1868 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 1869 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 1870 1871 auio.uio_iov = ma->v; 1872 auio.uio_iovcnt = ma->len; 1873 auio.uio_segflg = UIO_SYSSPACE; 1874 1875 error = ma->error; 1876 if (!error) 1877 error = vfs_donmount(curthread, flags, &auio); 1878 free_mntarg(ma); 1879 return (error); 1880} 1881 1882/* 1883 * A printflike function to mount a filesystem. 1884 */ 1885int 1886kernel_vmount(int flags, ...) 1887{ 1888 struct mntarg *ma = NULL; 1889 va_list ap; 1890 const char *cp; 1891 const void *vp; 1892 int error; 1893 1894 va_start(ap, flags); 1895 for (;;) { 1896 cp = va_arg(ap, const char *); 1897 if (cp == NULL) 1898 break; 1899 vp = va_arg(ap, const void *); 1900 ma = mount_arg(ma, cp, vp, -1); 1901 } 1902 va_end(ap); 1903 1904 error = kernel_mount(ma, flags); 1905 return (error); 1906} 1907