1/*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD$"); 37 38 39#include "opt_bootp.h" 40#include "opt_nfsroot.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/bio.h> 46#include <sys/buf.h> 47#include <sys/jail.h> 48#include <sys/limits.h> 49#include <sys/lock.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/module.h> 53#include <sys/mount.h> 54#include <sys/proc.h> 55#include <sys/socket.h> 56#include <sys/socketvar.h> 57#include <sys/sockio.h> 58#include <sys/sysctl.h> 59#include <sys/syslog.h> 60#include <sys/vnode.h> 61#include <sys/signalvar.h> 62 63#include <vm/vm.h> 64#include <vm/vm_extern.h> 65#include <vm/uma.h> 66 67#include <net/if.h> 68#include <net/route.h> 69#include <net/vnet.h> 70 71#include <netinet/in.h> 72 73#include <rpc/rpc.h> 74 75#include <nfs/nfsproto.h> 76#include <nfsclient/nfs.h> 77#include <nfsclient/nfsnode.h> 78#include <nfsclient/nfsmount.h> 79#include <nfs/xdr_subs.h> 80#include <nfsclient/nfsm_subs.h> 81#include <nfs/nfsdiskless.h> 82 83FEATURE(nfsclient, "NFS client"); 84 85MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header"); 86MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle"); 87MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data"); 88MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables"); 89MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state"); 90 91uma_zone_t nfsmount_zone; 92 93struct nfsstats nfsstats; 94 95SYSCTL_NODE(_vfs, OID_AUTO, oldnfs, CTLFLAG_RW, 0, "Old NFS filesystem"); 96SYSCTL_STRUCT(_vfs_oldnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW, 97 &nfsstats, nfsstats, "S,nfsstats"); 98static int nfs_ip_paranoia = 1; 99SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, 100 &nfs_ip_paranoia, 0, 101 "Disallow accepting replies from IPs which differ from those sent"); 102#ifdef NFS_DEBUG 103int nfs_debug; 104SYSCTL_INT(_vfs_oldnfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, 105 "Toggle debug flag"); 106#endif 107static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; 108SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_INITIAL_DELAY, 109 downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, 110 "Delay before printing \"nfs server not responding\" messages"); 111/* how long between console messages "nfs server foo not responding" */ 112static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; 113SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_DELAY, 114 downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, 115 "Delay between printing \"nfs server not responding\" messages"); 116 117static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, 118 struct nfs_args *argp, const char *hostname); 119static int mountnfs(struct nfs_args *, struct mount *, 120 struct sockaddr *, char *, struct vnode **, 121 struct ucred *cred, int, int); 122static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, 123 struct sockaddr_storage *, int *, off_t *, 124 struct timeval *); 125static vfs_mount_t nfs_mount; 126static vfs_cmount_t nfs_cmount; 127static vfs_unmount_t nfs_unmount; 128static vfs_root_t nfs_root; 129static vfs_statfs_t nfs_statfs; 130static vfs_sync_t nfs_sync; 131static vfs_sysctl_t nfs_sysctl; 132 133static int fake_wchan; 134 135/* 136 * nfs vfs operations. 137 */ 138static struct vfsops nfs_vfsops = { 139 .vfs_init = nfs_init, 140 .vfs_mount = nfs_mount, 141 .vfs_cmount = nfs_cmount, 142 .vfs_root = nfs_root, 143 .vfs_statfs = nfs_statfs, 144 .vfs_sync = nfs_sync, 145 .vfs_uninit = nfs_uninit, 146 .vfs_unmount = nfs_unmount, 147 .vfs_sysctl = nfs_sysctl, 148}; 149VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK | VFCF_SBDRY); 150 151/* So that loader and kldload(2) can find us, wherever we are.. */ 152MODULE_VERSION(oldnfs, 1); 153MODULE_DEPEND(oldnfs, krpc, 1, 1, 1); 154#ifdef KGSSAPI 155MODULE_DEPEND(oldnfs, kgssapi, 1, 1, 1); 156#endif 157MODULE_DEPEND(oldnfs, nfs_common, 1, 1, 1); 158MODULE_DEPEND(oldnfs, nfslock, 1, 1, 1); 159 160static struct nfs_rpcops nfs_rpcops = { 161 nfs_readrpc, 162 nfs_writerpc, 163 nfs_writebp, 164 nfs_readlinkrpc, 165 nfs_invaldir, 166 nfs_commit, 167}; 168 169/* 170 * This structure is now defined in sys/nfs/nfs_diskless.c so that it 171 * can be shared by both NFS clients. It is declared here so that it 172 * will be defined for kernels built without NFS_ROOT, although it 173 * isn't used in that case. 174 */ 175#ifndef NFS_ROOT 176struct nfs_diskless nfs_diskless = { { { 0 } } }; 177struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; 178int nfs_diskless_valid = 0; 179#endif 180 181SYSCTL_INT(_vfs_oldnfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 182 &nfs_diskless_valid, 0, 183 "Has the diskless struct been filled correctly"); 184 185SYSCTL_STRING(_vfs_oldnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 186 nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); 187 188SYSCTL_OPAQUE(_vfs_oldnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 189 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, 190 "%Ssockaddr_in", "Diskless root nfs address"); 191 192 193void nfsargs_ntoh(struct nfs_args *); 194static int nfs_mountdiskless(char *, 195 struct sockaddr_in *, struct nfs_args *, 196 struct thread *, struct vnode **, struct mount *); 197static void nfs_convert_diskless(void); 198static void nfs_convert_oargs(struct nfs_args *args, 199 struct onfs_args *oargs); 200 201int 202nfs_iosize(struct nfsmount *nmp) 203{ 204 int iosize; 205 206 /* 207 * Calculate the size used for io buffers. Use the larger 208 * of the two sizes to minimise nfs requests but make sure 209 * that it is at least one VM page to avoid wasting buffer 210 * space. 211 */ 212 iosize = imax(nmp->nm_rsize, nmp->nm_wsize); 213 iosize = imax(iosize, PAGE_SIZE); 214 return (iosize); 215} 216 217static void 218nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 219{ 220 221 args->version = NFS_ARGSVERSION; 222 args->addr = oargs->addr; 223 args->addrlen = oargs->addrlen; 224 args->sotype = oargs->sotype; 225 args->proto = oargs->proto; 226 args->fh = oargs->fh; 227 args->fhsize = oargs->fhsize; 228 args->flags = oargs->flags; 229 args->wsize = oargs->wsize; 230 args->rsize = oargs->rsize; 231 args->readdirsize = oargs->readdirsize; 232 args->timeo = oargs->timeo; 233 args->retrans = oargs->retrans; 234 args->maxgrouplist = oargs->maxgrouplist; 235 args->readahead = oargs->readahead; 236 args->deadthresh = oargs->deadthresh; 237 args->hostname = oargs->hostname; 238} 239 240static void 241nfs_convert_diskless(void) 242{ 243 244 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, 245 sizeof(struct ifaliasreq)); 246 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, 247 sizeof(struct sockaddr_in)); 248 nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); 249 if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { 250 nfsv3_diskless.root_fhsize = NFSX_V3FH; 251 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH); 252 } else { 253 nfsv3_diskless.root_fhsize = NFSX_V2FH; 254 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); 255 } 256 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, 257 sizeof(struct sockaddr_in)); 258 bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); 259 nfsv3_diskless.root_time = nfs_diskless.root_time; 260 bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, 261 MAXHOSTNAMELEN); 262 nfs_diskless_valid = 3; 263} 264 265/* 266 * nfs statfs call 267 */ 268static int 269nfs_statfs(struct mount *mp, struct statfs *sbp) 270{ 271 struct vnode *vp; 272 struct thread *td; 273 struct nfs_statfs *sfp; 274 caddr_t bpos, dpos; 275 struct nfsmount *nmp = VFSTONFS(mp); 276 int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; 277 struct mbuf *mreq, *mrep, *md, *mb; 278 struct nfsnode *np; 279 u_quad_t tquad; 280 281 td = curthread; 282#ifndef nolint 283 sfp = NULL; 284#endif 285 error = vfs_busy(mp, MBF_NOWAIT); 286 if (error) 287 return (error); 288 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 289 if (error) { 290 vfs_unbusy(mp); 291 return (error); 292 } 293 vp = NFSTOV(np); 294 mtx_lock(&nmp->nm_mtx); 295 if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 296 mtx_unlock(&nmp->nm_mtx); 297 (void)nfs_fsinfo(nmp, vp, td->td_ucred, td); 298 } else 299 mtx_unlock(&nmp->nm_mtx); 300 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 301 mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0); 302 mb = mreq; 303 bpos = mtod(mb, caddr_t); 304 nfsm_fhtom(vp, v3); 305 nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred); 306 if (v3) 307 nfsm_postop_attr(vp, retattr); 308 if (error) { 309 if (mrep != NULL) 310 m_freem(mrep); 311 goto nfsmout; 312 } 313 sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3)); 314 mtx_lock(&nmp->nm_mtx); 315 sbp->f_iosize = nfs_iosize(nmp); 316 mtx_unlock(&nmp->nm_mtx); 317 if (v3) { 318 sbp->f_bsize = NFS_FABLKSIZE; 319 tquad = fxdr_hyper(&sfp->sf_tbytes); 320 sbp->f_blocks = tquad / NFS_FABLKSIZE; 321 tquad = fxdr_hyper(&sfp->sf_fbytes); 322 sbp->f_bfree = tquad / NFS_FABLKSIZE; 323 tquad = fxdr_hyper(&sfp->sf_abytes); 324 sbp->f_bavail = tquad / NFS_FABLKSIZE; 325 sbp->f_files = (fxdr_unsigned(int32_t, 326 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); 327 sbp->f_ffree = (fxdr_unsigned(int32_t, 328 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); 329 } else { 330 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 331 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 332 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 333 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 334 sbp->f_files = 0; 335 sbp->f_ffree = 0; 336 } 337 m_freem(mrep); 338nfsmout: 339 vput(vp); 340 vfs_unbusy(mp); 341 return (error); 342} 343 344/* 345 * nfs version 3 fsinfo rpc call 346 */ 347int 348nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, 349 struct thread *td) 350{ 351 struct nfsv3_fsinfo *fsp; 352 u_int32_t pref, max; 353 caddr_t bpos, dpos; 354 int error = 0, retattr; 355 struct mbuf *mreq, *mrep, *md, *mb; 356 u_int64_t maxfsize; 357 358 nfsstats.rpccnt[NFSPROC_FSINFO]++; 359 mreq = m_get2(NFSX_FH(1), M_WAITOK, MT_DATA, 0); 360 mb = mreq; 361 bpos = mtod(mb, caddr_t); 362 nfsm_fhtom(vp, 1); 363 nfsm_request(vp, NFSPROC_FSINFO, td, cred); 364 nfsm_postop_attr(vp, retattr); 365 if (!error) { 366 fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO); 367 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); 368 mtx_lock(&nmp->nm_mtx); 369 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) 370 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & 371 ~(NFS_FABLKSIZE - 1); 372 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); 373 if (max < nmp->nm_wsize && max > 0) { 374 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); 375 if (nmp->nm_wsize == 0) 376 nmp->nm_wsize = max; 377 } 378 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); 379 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) 380 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & 381 ~(NFS_FABLKSIZE - 1); 382 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); 383 if (max < nmp->nm_rsize && max > 0) { 384 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); 385 if (nmp->nm_rsize == 0) 386 nmp->nm_rsize = max; 387 } 388 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); 389 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) 390 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & 391 ~(NFS_DIRBLKSIZ - 1); 392 if (max < nmp->nm_readdirsize && max > 0) { 393 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); 394 if (nmp->nm_readdirsize == 0) 395 nmp->nm_readdirsize = max; 396 } 397 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); 398 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) 399 nmp->nm_maxfilesize = maxfsize; 400 nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp); 401 nmp->nm_state |= NFSSTA_GOTFSINFO; 402 mtx_unlock(&nmp->nm_mtx); 403 } 404 m_freem(mrep); 405nfsmout: 406 return (error); 407} 408 409/* 410 * Mount a remote root fs via. nfs. This depends on the info in the 411 * nfs_diskless structure that has been filled in properly by some primary 412 * bootstrap. 413 * It goes something like this: 414 * - do enough of "ifconfig" by calling ifioctl() so that the system 415 * can talk to the server 416 * - If nfs_diskless.mygateway is filled in, use that address as 417 * a default gateway. 418 * - build the rootfs mount point and call mountnfs() to do the rest. 419 * 420 * It is assumed to be safe to read, modify, and write the nfsv3_diskless 421 * structure, as well as other global NFS client variables here, as 422 * nfs_mountroot() will be called once in the boot before any other NFS 423 * client activity occurs. 424 */ 425int 426nfs_mountroot(struct mount *mp) 427{ 428 struct thread *td = curthread; 429 struct nfsv3_diskless *nd = &nfsv3_diskless; 430 struct socket *so; 431 struct vnode *vp; 432 struct ifreq ir; 433 int error; 434 u_long l; 435 char buf[128]; 436 char *cp; 437 438 439#if defined(BOOTP_NFSROOT) && defined(BOOTP) 440 bootpc_init(); /* use bootp to get nfs_diskless filled in */ 441#elif defined(NFS_ROOT) 442 nfs_setup_diskless(); 443#endif 444 445 if (nfs_diskless_valid == 0) { 446 return (-1); 447 } 448 if (nfs_diskless_valid == 1) 449 nfs_convert_diskless(); 450 451 /* 452 * XXX splnet, so networks will receive... 453 */ 454 splnet(); 455 456 /* 457 * Do enough of ifconfig(8) so that the critical net interface can 458 * talk to the server. 459 */ 460 error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, 461 td->td_ucred, td); 462 if (error) 463 panic("nfs_mountroot: socreate(%04x): %d", 464 nd->myif.ifra_addr.sa_family, error); 465 466#if 0 /* XXX Bad idea */ 467 /* 468 * We might not have been told the right interface, so we pass 469 * over the first ten interfaces of the same kind, until we get 470 * one of them configured. 471 */ 472 473 for (i = strlen(nd->myif.ifra_name) - 1; 474 nd->myif.ifra_name[i] >= '0' && 475 nd->myif.ifra_name[i] <= '9'; 476 nd->myif.ifra_name[i] ++) { 477 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 478 if(!error) 479 break; 480 } 481#endif 482 483 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); 484 if (error) 485 panic("nfs_mountroot: SIOCAIFADDR: %d", error); 486 487 if ((cp = getenv("boot.netif.mtu")) != NULL) { 488 ir.ifr_mtu = strtol(cp, NULL, 10); 489 bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); 490 freeenv(cp); 491 error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); 492 if (error) 493 printf("nfs_mountroot: SIOCSIFMTU: %d", error); 494 } 495 soclose(so); 496 497 /* 498 * If the gateway field is filled in, set it as the default route. 499 * Note that pxeboot will set a default route of 0 if the route 500 * is not set by the DHCP server. Check also for a value of 0 501 * to avoid panicking inappropriately in that situation. 502 */ 503 if (nd->mygateway.sin_len != 0 && 504 nd->mygateway.sin_addr.s_addr != 0) { 505 struct sockaddr_in mask, sin; 506 507 bzero((caddr_t)&mask, sizeof(mask)); 508 sin = mask; 509 sin.sin_family = AF_INET; 510 sin.sin_len = sizeof(sin); 511 /* XXX MRT use table 0 for this sort of thing */ 512 CURVNET_SET(TD_TO_VNET(td)); 513 error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, 514 (struct sockaddr *)&nd->mygateway, 515 (struct sockaddr *)&mask, 516 RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); 517 CURVNET_RESTORE(); 518 if (error) 519 panic("nfs_mountroot: RTM_ADD: %d", error); 520 } 521 522 /* 523 * Create the rootfs mount point. 524 */ 525 nd->root_args.fh = nd->root_fh; 526 nd->root_args.fhsize = nd->root_fhsize; 527 l = ntohl(nd->root_saddr.sin_addr.s_addr); 528 snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 529 (l >> 24) & 0xff, (l >> 16) & 0xff, 530 (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); 531 printf("NFS ROOT: %s\n", buf); 532 nd->root_args.hostname = buf; 533 if ((error = nfs_mountdiskless(buf, 534 &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { 535 return (error); 536 } 537 538 /* 539 * This is not really an nfs issue, but it is much easier to 540 * set hostname here and then let the "/etc/rc.xxx" files 541 * mount the right /var based upon its preset value. 542 */ 543 mtx_lock(&prison0.pr_mtx); 544 strlcpy(prison0.pr_hostname, nd->my_hostnam, 545 sizeof (prison0.pr_hostname)); 546 mtx_unlock(&prison0.pr_mtx); 547 inittodr(ntohl(nd->root_time)); 548 return (0); 549} 550 551/* 552 * Internal version of mount system call for diskless setup. 553 */ 554static int 555nfs_mountdiskless(char *path, 556 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 557 struct vnode **vpp, struct mount *mp) 558{ 559 struct sockaddr *nam; 560 int error; 561 562 nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); 563 if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred, 564 NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO)) != 0) { 565 printf("nfs_mountroot: mount %s on /: %d\n", path, error); 566 return (error); 567 } 568 return (0); 569} 570 571static int 572nfs_sec_name_to_num(char *sec) 573{ 574 if (!strcmp(sec, "krb5")) 575 return (RPCSEC_GSS_KRB5); 576 if (!strcmp(sec, "krb5i")) 577 return (RPCSEC_GSS_KRB5I); 578 if (!strcmp(sec, "krb5p")) 579 return (RPCSEC_GSS_KRB5P); 580 if (!strcmp(sec, "sys")) 581 return (AUTH_SYS); 582 /* 583 * Userland should validate the string but we will try and 584 * cope with unexpected values. 585 */ 586 return (AUTH_SYS); 587} 588 589static void 590nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, 591 const char *hostname) 592{ 593 int s; 594 int adjsock; 595 int maxio; 596 char *p; 597 char *secname; 598 char *principal; 599 600 s = splnet(); 601 602 /* 603 * Set read-only flag if requested; otherwise, clear it if this is 604 * an update. If this is not an update, then either the read-only 605 * flag is already clear, or this is a root mount and it was set 606 * intentionally at some previous point. 607 */ 608 if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { 609 MNT_ILOCK(mp); 610 mp->mnt_flag |= MNT_RDONLY; 611 MNT_IUNLOCK(mp); 612 } else if (mp->mnt_flag & MNT_UPDATE) { 613 MNT_ILOCK(mp); 614 mp->mnt_flag &= ~MNT_RDONLY; 615 MNT_IUNLOCK(mp); 616 } 617 618 /* 619 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 620 * no sense in that context. Also, set up appropriate retransmit 621 * and soft timeout behavior. 622 */ 623 if (argp->sotype == SOCK_STREAM) { 624 nmp->nm_flag &= ~NFSMNT_NOCONN; 625 nmp->nm_flag |= NFSMNT_DUMBTIMR; 626 nmp->nm_timeo = NFS_MAXTIMEO; 627 nmp->nm_retry = NFS_RETRANS_TCP; 628 } 629 630 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ 631 if ((argp->flags & NFSMNT_NFSV3) == 0) 632 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 633 634 /* Re-bind if rsrvd port requested and wasn't on one */ 635 adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) 636 && (argp->flags & NFSMNT_RESVPORT); 637 /* Also re-bind if we're switching to/from a connected UDP socket */ 638 adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != 639 (argp->flags & NFSMNT_NOCONN)); 640 641 /* Update flags atomically. Don't change the lock bits. */ 642 nmp->nm_flag = argp->flags | nmp->nm_flag; 643 splx(s); 644 645 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 646 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 647 if (nmp->nm_timeo < NFS_MINTIMEO) 648 nmp->nm_timeo = NFS_MINTIMEO; 649 else if (nmp->nm_timeo > NFS_MAXTIMEO) 650 nmp->nm_timeo = NFS_MAXTIMEO; 651 } 652 653 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 654 nmp->nm_retry = argp->retrans; 655 if (nmp->nm_retry > NFS_MAXREXMIT) 656 nmp->nm_retry = NFS_MAXREXMIT; 657 } 658 659 if (argp->flags & NFSMNT_NFSV3) { 660 if (argp->sotype == SOCK_DGRAM) 661 maxio = NFS_MAXDGRAMDATA; 662 else 663 maxio = NFS_MAXDATA; 664 } else 665 maxio = NFS_V2MAXDATA; 666 667 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 668 nmp->nm_wsize = argp->wsize; 669 /* Round down to multiple of blocksize */ 670 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 671 if (nmp->nm_wsize <= 0) 672 nmp->nm_wsize = NFS_FABLKSIZE; 673 } 674 if (nmp->nm_wsize > maxio) 675 nmp->nm_wsize = maxio; 676 if (nmp->nm_wsize > MAXBSIZE) 677 nmp->nm_wsize = MAXBSIZE; 678 679 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 680 nmp->nm_rsize = argp->rsize; 681 /* Round down to multiple of blocksize */ 682 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 683 if (nmp->nm_rsize <= 0) 684 nmp->nm_rsize = NFS_FABLKSIZE; 685 } 686 if (nmp->nm_rsize > maxio) 687 nmp->nm_rsize = maxio; 688 if (nmp->nm_rsize > MAXBSIZE) 689 nmp->nm_rsize = MAXBSIZE; 690 691 if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { 692 nmp->nm_readdirsize = argp->readdirsize; 693 } 694 if (nmp->nm_readdirsize > maxio) 695 nmp->nm_readdirsize = maxio; 696 if (nmp->nm_readdirsize > nmp->nm_rsize) 697 nmp->nm_readdirsize = nmp->nm_rsize; 698 699 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 700 nmp->nm_acregmin = argp->acregmin; 701 else 702 nmp->nm_acregmin = NFS_MINATTRTIMO; 703 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 704 nmp->nm_acregmax = argp->acregmax; 705 else 706 nmp->nm_acregmax = NFS_MAXATTRTIMO; 707 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 708 nmp->nm_acdirmin = argp->acdirmin; 709 else 710 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 711 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 712 nmp->nm_acdirmax = argp->acdirmax; 713 else 714 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 715 if (nmp->nm_acdirmin > nmp->nm_acdirmax) 716 nmp->nm_acdirmin = nmp->nm_acdirmax; 717 if (nmp->nm_acregmin > nmp->nm_acregmax) 718 nmp->nm_acregmin = nmp->nm_acregmax; 719 720 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { 721 if (argp->maxgrouplist <= NFS_MAXGRPS) 722 nmp->nm_numgrps = argp->maxgrouplist; 723 else 724 nmp->nm_numgrps = NFS_MAXGRPS; 725 } 726 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 727 if (argp->readahead <= NFS_MAXRAHEAD) 728 nmp->nm_readahead = argp->readahead; 729 else 730 nmp->nm_readahead = NFS_MAXRAHEAD; 731 } 732 if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { 733 if (argp->wcommitsize < nmp->nm_wsize) 734 nmp->nm_wcommitsize = nmp->nm_wsize; 735 else 736 nmp->nm_wcommitsize = argp->wcommitsize; 737 } 738 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { 739 if (argp->deadthresh <= NFS_MAXDEADTHRESH) 740 nmp->nm_deadthresh = argp->deadthresh; 741 else 742 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 743 } 744 745 adjsock |= ((nmp->nm_sotype != argp->sotype) || 746 (nmp->nm_soproto != argp->proto)); 747 nmp->nm_sotype = argp->sotype; 748 nmp->nm_soproto = argp->proto; 749 750 if (nmp->nm_client && adjsock) { 751 nfs_safedisconnect(nmp); 752 if (nmp->nm_sotype == SOCK_DGRAM) 753 while (nfs_connect(nmp)) { 754 printf("nfs_args: retrying connect\n"); 755 (void) tsleep(&fake_wchan, PSOCK, "nfscon", hz); 756 } 757 } 758 759 if (hostname) { 760 strlcpy(nmp->nm_hostname, hostname, 761 sizeof(nmp->nm_hostname)); 762 p = strchr(nmp->nm_hostname, ':'); 763 if (p) 764 *p = '\0'; 765 } 766 767 if (vfs_getopt(mp->mnt_optnew, "sec", 768 (void **) &secname, NULL) == 0) { 769 nmp->nm_secflavor = nfs_sec_name_to_num(secname); 770 } else { 771 nmp->nm_secflavor = AUTH_SYS; 772 } 773 774 if (vfs_getopt(mp->mnt_optnew, "principal", 775 (void **) &principal, NULL) == 0) { 776 strlcpy(nmp->nm_principal, principal, 777 sizeof(nmp->nm_principal)); 778 } else { 779 snprintf(nmp->nm_principal, sizeof(nmp->nm_principal), 780 "nfs@%s", nmp->nm_hostname); 781 } 782} 783 784static const char *nfs_opts[] = { "from", "nfs_args", 785 "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", 786 "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", 787 "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport", 788 "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", 789 "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin", 790 "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3", 791 "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize", 792 "nametimeo", 793 NULL }; 794 795/* 796 * VFS Operations. 797 * 798 * mount system call 799 * It seems a bit dumb to copyinstr() the host and path here and then 800 * bcopy() them in mountnfs(), but I wanted to detect errors before 801 * doing the sockargs() call because sockargs() allocates an mbuf and 802 * an error after that means that I have to release the mbuf. 803 */ 804/* ARGSUSED */ 805static int 806nfs_mount(struct mount *mp) 807{ 808 struct nfs_args args = { 809 .version = NFS_ARGSVERSION, 810 .addr = NULL, 811 .addrlen = sizeof (struct sockaddr_in), 812 .sotype = SOCK_STREAM, 813 .proto = 0, 814 .fh = NULL, 815 .fhsize = 0, 816 .flags = NFSMNT_RESVPORT, 817 .wsize = NFS_WSIZE, 818 .rsize = NFS_RSIZE, 819 .readdirsize = NFS_READDIRSIZE, 820 .timeo = 10, 821 .retrans = NFS_RETRANS, 822 .maxgrouplist = NFS_MAXGRPS, 823 .readahead = NFS_DEFRAHEAD, 824 .wcommitsize = 0, /* was: NQ_DEFLEASE */ 825 .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */ 826 .hostname = NULL, 827 /* args version 4 */ 828 .acregmin = NFS_MINATTRTIMO, 829 .acregmax = NFS_MAXATTRTIMO, 830 .acdirmin = NFS_MINDIRATTRTIMO, 831 .acdirmax = NFS_MAXDIRATTRTIMO, 832 }; 833 int error, ret, has_nfs_args_opt; 834 int has_addr_opt, has_fh_opt, has_hostname_opt; 835 struct sockaddr *nam; 836 struct vnode *vp; 837 char hst[MNAMELEN]; 838 size_t len; 839 u_char nfh[NFSX_V3FHMAX]; 840 char *opt; 841 int nametimeo = NFS_DEFAULT_NAMETIMEO; 842 int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; 843 844 has_nfs_args_opt = 0; 845 has_addr_opt = 0; 846 has_fh_opt = 0; 847 has_hostname_opt = 0; 848 849 if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { 850 error = EINVAL; 851 goto out; 852 } 853 854 if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { 855 error = nfs_mountroot(mp); 856 goto out; 857 } 858 859 /* 860 * The old mount_nfs program passed the struct nfs_args 861 * from userspace to kernel. The new mount_nfs program 862 * passes string options via nmount() from userspace to kernel 863 * and we populate the struct nfs_args in the kernel. 864 */ 865 if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { 866 error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, 867 sizeof args); 868 if (error) 869 goto out; 870 871 if (args.version != NFS_ARGSVERSION) { 872 error = EPROGMISMATCH; 873 goto out; 874 } 875 has_nfs_args_opt = 1; 876 } 877 878 if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0) 879 args.flags |= NFSMNT_DUMBTIMR; 880 if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) 881 args.flags |= NFSMNT_NOCONN; 882 if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) 883 args.flags |= NFSMNT_NOCONN; 884 if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) 885 args.flags |= NFSMNT_NOLOCKD; 886 if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) 887 args.flags &= ~NFSMNT_NOLOCKD; 888 if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) 889 args.flags |= NFSMNT_INT; 890 if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) 891 args.flags |= NFSMNT_RDIRPLUS; 892 if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) 893 args.flags |= NFSMNT_RESVPORT; 894 if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) 895 args.flags &= ~NFSMNT_RESVPORT; 896 if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) 897 args.flags |= NFSMNT_SOFT; 898 if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) 899 args.flags &= ~NFSMNT_SOFT; 900 if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) 901 args.sotype = SOCK_DGRAM; 902 if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) 903 args.sotype = SOCK_DGRAM; 904 if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) 905 args.sotype = SOCK_STREAM; 906 if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) 907 args.flags |= NFSMNT_NFSV3; 908 if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) 909 args.flags |= NFSMNT_NOCTO; 910 if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { 911 if (opt == NULL) { 912 vfs_mount_error(mp, "illegal readdirsize"); 913 error = EINVAL; 914 goto out; 915 } 916 ret = sscanf(opt, "%d", &args.readdirsize); 917 if (ret != 1 || args.readdirsize <= 0) { 918 vfs_mount_error(mp, "illegal readdirsize: %s", 919 opt); 920 error = EINVAL; 921 goto out; 922 } 923 args.flags |= NFSMNT_READDIRSIZE; 924 } 925 if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { 926 if (opt == NULL) { 927 vfs_mount_error(mp, "illegal readahead"); 928 error = EINVAL; 929 goto out; 930 } 931 ret = sscanf(opt, "%d", &args.readahead); 932 if (ret != 1 || args.readahead <= 0) { 933 vfs_mount_error(mp, "illegal readahead: %s", 934 opt); 935 error = EINVAL; 936 goto out; 937 } 938 args.flags |= NFSMNT_READAHEAD; 939 } 940 if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { 941 if (opt == NULL) { 942 vfs_mount_error(mp, "illegal wsize"); 943 error = EINVAL; 944 goto out; 945 } 946 ret = sscanf(opt, "%d", &args.wsize); 947 if (ret != 1 || args.wsize <= 0) { 948 vfs_mount_error(mp, "illegal wsize: %s", 949 opt); 950 error = EINVAL; 951 goto out; 952 } 953 args.flags |= NFSMNT_WSIZE; 954 } 955 if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { 956 if (opt == NULL) { 957 vfs_mount_error(mp, "illegal rsize"); 958 error = EINVAL; 959 goto out; 960 } 961 ret = sscanf(opt, "%d", &args.rsize); 962 if (ret != 1 || args.rsize <= 0) { 963 vfs_mount_error(mp, "illegal wsize: %s", 964 opt); 965 error = EINVAL; 966 goto out; 967 } 968 args.flags |= NFSMNT_RSIZE; 969 } 970 if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { 971 if (opt == NULL) { 972 vfs_mount_error(mp, "illegal retrans"); 973 error = EINVAL; 974 goto out; 975 } 976 ret = sscanf(opt, "%d", &args.retrans); 977 if (ret != 1 || args.retrans <= 0) { 978 vfs_mount_error(mp, "illegal retrans: %s", 979 opt); 980 error = EINVAL; 981 goto out; 982 } 983 args.flags |= NFSMNT_RETRANS; 984 } 985 if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { 986 ret = sscanf(opt, "%d", &args.acregmin); 987 if (ret != 1 || args.acregmin < 0) { 988 vfs_mount_error(mp, "illegal acregmin: %s", 989 opt); 990 error = EINVAL; 991 goto out; 992 } 993 args.flags |= NFSMNT_ACREGMIN; 994 } 995 if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { 996 ret = sscanf(opt, "%d", &args.acregmax); 997 if (ret != 1 || args.acregmax < 0) { 998 vfs_mount_error(mp, "illegal acregmax: %s", 999 opt); 1000 error = EINVAL; 1001 goto out; 1002 } 1003 args.flags |= NFSMNT_ACREGMAX; 1004 } 1005 if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { 1006 ret = sscanf(opt, "%d", &args.acdirmin); 1007 if (ret != 1 || args.acdirmin < 0) { 1008 vfs_mount_error(mp, "illegal acdirmin: %s", 1009 opt); 1010 error = EINVAL; 1011 goto out; 1012 } 1013 args.flags |= NFSMNT_ACDIRMIN; 1014 } 1015 if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { 1016 ret = sscanf(opt, "%d", &args.acdirmax); 1017 if (ret != 1 || args.acdirmax < 0) { 1018 vfs_mount_error(mp, "illegal acdirmax: %s", 1019 opt); 1020 error = EINVAL; 1021 goto out; 1022 } 1023 args.flags |= NFSMNT_ACDIRMAX; 1024 } 1025 if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { 1026 ret = sscanf(opt, "%d", &args.wcommitsize); 1027 if (ret != 1 || args.wcommitsize < 0) { 1028 vfs_mount_error(mp, "illegal wcommitsize: %s", opt); 1029 error = EINVAL; 1030 goto out; 1031 } 1032 args.flags |= NFSMNT_WCOMMITSIZE; 1033 } 1034 if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) { 1035 ret = sscanf(opt, "%d", &args.deadthresh); 1036 if (ret != 1 || args.deadthresh <= 0) { 1037 vfs_mount_error(mp, "illegal deadthresh: %s", 1038 opt); 1039 error = EINVAL; 1040 goto out; 1041 } 1042 args.flags |= NFSMNT_DEADTHRESH; 1043 } 1044 if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { 1045 ret = sscanf(opt, "%d", &args.timeo); 1046 if (ret != 1 || args.timeo <= 0) { 1047 vfs_mount_error(mp, "illegal timeout: %s", 1048 opt); 1049 error = EINVAL; 1050 goto out; 1051 } 1052 args.flags |= NFSMNT_TIMEO; 1053 } 1054 if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) { 1055 ret = sscanf(opt, "%d", &args.maxgrouplist); 1056 if (ret != 1 || args.maxgrouplist <= 0) { 1057 vfs_mount_error(mp, "illegal maxgroups: %s", 1058 opt); 1059 error = EINVAL; 1060 goto out; 1061 } 1062 args.flags |= NFSMNT_MAXGRPS; 1063 } 1064 if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { 1065 ret = sscanf(opt, "%d", &nametimeo); 1066 if (ret != 1 || nametimeo < 0) { 1067 vfs_mount_error(mp, "illegal nametimeo: %s", opt); 1068 error = EINVAL; 1069 goto out; 1070 } 1071 } 1072 if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) 1073 == 0) { 1074 ret = sscanf(opt, "%d", &negnametimeo); 1075 if (ret != 1 || negnametimeo < 0) { 1076 vfs_mount_error(mp, "illegal negnametimeo: %s", 1077 opt); 1078 error = EINVAL; 1079 goto out; 1080 } 1081 } 1082 if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, 1083 &args.addrlen) == 0) { 1084 has_addr_opt = 1; 1085 if (args.addrlen > SOCK_MAXADDRLEN) { 1086 error = ENAMETOOLONG; 1087 goto out; 1088 } 1089 nam = malloc(args.addrlen, M_SONAME, 1090 M_WAITOK); 1091 bcopy(args.addr, nam, args.addrlen); 1092 nam->sa_len = args.addrlen; 1093 } 1094 if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, 1095 &args.fhsize) == 0) { 1096 has_fh_opt = 1; 1097 } 1098 if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, 1099 NULL) == 0) { 1100 has_hostname_opt = 1; 1101 } 1102 if (args.hostname == NULL) { 1103 vfs_mount_error(mp, "Invalid hostname"); 1104 error = EINVAL; 1105 goto out; 1106 } 1107 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { 1108 vfs_mount_error(mp, "Bad file handle"); 1109 error = EINVAL; 1110 goto out; 1111 } 1112 1113 if (mp->mnt_flag & MNT_UPDATE) { 1114 struct nfsmount *nmp = VFSTONFS(mp); 1115 1116 if (nmp == NULL) { 1117 error = EIO; 1118 goto out; 1119 } 1120 1121 /* 1122 * If a change from TCP->UDP is done and there are thread(s) 1123 * that have I/O RPC(s) in progress with a tranfer size 1124 * greater than NFS_MAXDGRAMDATA, those thread(s) will be 1125 * hung, retrying the RPC(s) forever. Usually these threads 1126 * will be seen doing an uninterruptible sleep on wait channel 1127 * "newnfsreq" (truncated to "newnfsre" by procstat). 1128 */ 1129 if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) 1130 tprintf(curthread->td_proc, LOG_WARNING, 1131 "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); 1132 1133 /* 1134 * When doing an update, we can't change from or to 1135 * v3, switch lockd strategies or change cookie translation 1136 */ 1137 args.flags = (args.flags & 1138 ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | 1139 (nmp->nm_flag & 1140 (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); 1141 nfs_decode_args(mp, nmp, &args, NULL); 1142 goto out; 1143 } 1144 1145 /* 1146 * Make the nfs_ip_paranoia sysctl serve as the default connection 1147 * or no-connection mode for those protocols that support 1148 * no-connection mode (the flag will be cleared later for protocols 1149 * that do not support no-connection mode). This will allow a client 1150 * to receive replies from a different IP then the request was 1151 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 1152 * not 0. 1153 */ 1154 if (nfs_ip_paranoia == 0) 1155 args.flags |= NFSMNT_NOCONN; 1156 1157 if (has_nfs_args_opt) { 1158 /* 1159 * In the 'nfs_args' case, the pointers in the args 1160 * structure are in userland - we copy them in here. 1161 */ 1162 if (!has_fh_opt) { 1163 error = copyin((caddr_t)args.fh, (caddr_t)nfh, 1164 args.fhsize); 1165 if (error) { 1166 goto out; 1167 } 1168 args.fh = nfh; 1169 } 1170 if (!has_hostname_opt) { 1171 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 1172 if (error) { 1173 goto out; 1174 } 1175 bzero(&hst[len], MNAMELEN - len); 1176 args.hostname = hst; 1177 } 1178 if (!has_addr_opt) { 1179 /* sockargs() call must be after above copyin() calls */ 1180 error = getsockaddr(&nam, (caddr_t)args.addr, 1181 args.addrlen); 1182 if (error) { 1183 goto out; 1184 } 1185 } 1186 } else if (has_addr_opt == 0) { 1187 vfs_mount_error(mp, "No server address"); 1188 error = EINVAL; 1189 goto out; 1190 } 1191 error = mountnfs(&args, mp, nam, args.hostname, &vp, 1192 curthread->td_ucred, nametimeo, negnametimeo); 1193out: 1194 if (!error) { 1195 MNT_ILOCK(mp); 1196 mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1197 MNT_IUNLOCK(mp); 1198 } 1199 return (error); 1200} 1201 1202 1203/* 1204 * VFS Operations. 1205 * 1206 * mount system call 1207 * It seems a bit dumb to copyinstr() the host and path here and then 1208 * bcopy() them in mountnfs(), but I wanted to detect errors before 1209 * doing the sockargs() call because sockargs() allocates an mbuf and 1210 * an error after that means that I have to release the mbuf. 1211 */ 1212/* ARGSUSED */ 1213static int 1214nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) 1215{ 1216 int error; 1217 struct nfs_args args; 1218 1219 error = copyin(data, &args, sizeof (struct nfs_args)); 1220 if (error) 1221 return error; 1222 1223 ma = mount_arg(ma, "nfs_args", &args, sizeof args); 1224 1225 error = kernel_mount(ma, flags); 1226 return (error); 1227} 1228 1229/* 1230 * Common code for mount and mountroot 1231 */ 1232static int 1233mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 1234 char *hst, struct vnode **vpp, struct ucred *cred, int nametimeo, 1235 int negnametimeo) 1236{ 1237 struct nfsmount *nmp; 1238 struct nfsnode *np; 1239 int error; 1240 struct vattr attrs; 1241 1242 if (mp->mnt_flag & MNT_UPDATE) { 1243 nmp = VFSTONFS(mp); 1244 printf("%s: MNT_UPDATE is no longer handled here\n", __func__); 1245 free(nam, M_SONAME); 1246 return (0); 1247 } else { 1248 nmp = uma_zalloc(nfsmount_zone, M_WAITOK); 1249 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 1250 TAILQ_INIT(&nmp->nm_bufq); 1251 mp->mnt_data = nmp; 1252 nmp->nm_getinfo = nfs_getnlminfo; 1253 nmp->nm_vinvalbuf = nfs_vinvalbuf; 1254 } 1255 vfs_getnewfsid(mp); 1256 nmp->nm_mountp = mp; 1257 mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF); 1258 1259 /* 1260 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 1261 * high, depending on whether we end up with negative offsets in 1262 * the client or server somewhere. 2GB-1 may be safer. 1263 * 1264 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum 1265 * that we can handle until we find out otherwise. 1266 */ 1267 if ((argp->flags & NFSMNT_NFSV3) == 0) 1268 nmp->nm_maxfilesize = 0xffffffffLL; 1269 else 1270 nmp->nm_maxfilesize = OFF_MAX; 1271 1272 nmp->nm_timeo = NFS_TIMEO; 1273 nmp->nm_retry = NFS_RETRANS; 1274 if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) { 1275 nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA; 1276 } else { 1277 nmp->nm_wsize = NFS_WSIZE; 1278 nmp->nm_rsize = NFS_RSIZE; 1279 } 1280 nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); 1281 nmp->nm_readdirsize = NFS_READDIRSIZE; 1282 nmp->nm_numgrps = NFS_MAXGRPS; 1283 nmp->nm_readahead = NFS_DEFRAHEAD; 1284 nmp->nm_deadthresh = NFS_MAXDEADTHRESH; 1285 nmp->nm_nametimeo = nametimeo; 1286 nmp->nm_negnametimeo = negnametimeo; 1287 nmp->nm_tprintf_delay = nfs_tprintf_delay; 1288 if (nmp->nm_tprintf_delay < 0) 1289 nmp->nm_tprintf_delay = 0; 1290 nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; 1291 if (nmp->nm_tprintf_initial_delay < 0) 1292 nmp->nm_tprintf_initial_delay = 0; 1293 nmp->nm_fhsize = argp->fhsize; 1294 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 1295 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 1296 nmp->nm_nam = nam; 1297 /* Set up the sockets and per-host congestion */ 1298 nmp->nm_sotype = argp->sotype; 1299 nmp->nm_soproto = argp->proto; 1300 nmp->nm_rpcops = &nfs_rpcops; 1301 1302 nfs_decode_args(mp, nmp, argp, hst); 1303 1304 /* 1305 * For Connection based sockets (TCP,...) defer the connect until 1306 * the first request, in case the server is not responding. 1307 */ 1308 if (nmp->nm_sotype == SOCK_DGRAM && 1309 (error = nfs_connect(nmp))) 1310 goto bad; 1311 1312 /* 1313 * This is silly, but it has to be set so that vinifod() works. 1314 * We do not want to do an nfs_statfs() here since we can get 1315 * stuck on a dead server and we are holding a lock on the mount 1316 * point. 1317 */ 1318 mtx_lock(&nmp->nm_mtx); 1319 mp->mnt_stat.f_iosize = nfs_iosize(nmp); 1320 mtx_unlock(&nmp->nm_mtx); 1321 /* 1322 * A reference count is needed on the nfsnode representing the 1323 * remote root. If this object is not persistent, then backward 1324 * traversals of the mount point (i.e. "..") will not work if 1325 * the nfsnode gets flushed out of the cache. Ufs does not have 1326 * this problem, because one can identify root inodes by their 1327 * number == ROOTINO (2). 1328 */ 1329 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); 1330 if (error) 1331 goto bad; 1332 *vpp = NFSTOV(np); 1333 1334 /* 1335 * Get file attributes and transfer parameters for the 1336 * mountpoint. This has the side effect of filling in 1337 * (*vpp)->v_type with the correct value. 1338 */ 1339 if (argp->flags & NFSMNT_NFSV3) 1340 nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread); 1341 else 1342 VOP_GETATTR(*vpp, &attrs, curthread->td_ucred); 1343 1344 /* 1345 * Lose the lock but keep the ref. 1346 */ 1347 VOP_UNLOCK(*vpp, 0); 1348 1349 return (0); 1350bad: 1351 nfs_disconnect(nmp); 1352 mtx_destroy(&nmp->nm_mtx); 1353 uma_zfree(nfsmount_zone, nmp); 1354 free(nam, M_SONAME); 1355 return (error); 1356} 1357 1358/* 1359 * unmount system call 1360 */ 1361static int 1362nfs_unmount(struct mount *mp, int mntflags) 1363{ 1364 struct nfsmount *nmp; 1365 int error, flags = 0, i; 1366 1367 if (mntflags & MNT_FORCE) 1368 flags |= FORCECLOSE; 1369 nmp = VFSTONFS(mp); 1370 /* 1371 * Goes something like this.. 1372 * - Call vflush() to clear out vnodes for this filesystem 1373 * - Close the socket 1374 * - Free up the data structures 1375 */ 1376 /* In the forced case, cancel any outstanding requests. */ 1377 if (flags & FORCECLOSE) { 1378 error = nfs_nmcancelreqs(nmp); 1379 if (error) 1380 goto out; 1381 } 1382 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ 1383 error = vflush(mp, 1, flags, curthread); 1384 if (error) 1385 goto out; 1386 1387 /* 1388 * We are now committed to the unmount. 1389 */ 1390 /* Make sure no nfsiods are assigned to this mount. */ 1391 mtx_lock(&nfs_iod_mtx); 1392 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 1393 if (nfs_iodmount[i] == nmp) { 1394 nfs_iodwant[i] = NFSIOD_AVAILABLE; 1395 nfs_iodmount[i] = NULL; 1396 } 1397 mtx_unlock(&nfs_iod_mtx); 1398 nfs_disconnect(nmp); 1399 free(nmp->nm_nam, M_SONAME); 1400 1401 mtx_destroy(&nmp->nm_mtx); 1402 uma_zfree(nfsmount_zone, nmp); 1403out: 1404 return (error); 1405} 1406 1407/* 1408 * Return root of a filesystem 1409 */ 1410static int 1411nfs_root(struct mount *mp, int flags, struct vnode **vpp) 1412{ 1413 struct vnode *vp; 1414 struct nfsmount *nmp; 1415 struct nfsnode *np; 1416 int error; 1417 1418 nmp = VFSTONFS(mp); 1419 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags); 1420 if (error) 1421 return error; 1422 vp = NFSTOV(np); 1423 /* 1424 * Get transfer parameters and attributes for root vnode once. 1425 */ 1426 mtx_lock(&nmp->nm_mtx); 1427 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 && 1428 (nmp->nm_flag & NFSMNT_NFSV3)) { 1429 mtx_unlock(&nmp->nm_mtx); 1430 nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread); 1431 } else 1432 mtx_unlock(&nmp->nm_mtx); 1433 if (vp->v_type == VNON) 1434 vp->v_type = VDIR; 1435 vp->v_vflag |= VV_ROOT; 1436 *vpp = vp; 1437 return (0); 1438} 1439 1440/* 1441 * Flush out the buffer cache 1442 */ 1443/* ARGSUSED */ 1444static int 1445nfs_sync(struct mount *mp, int waitfor) 1446{ 1447 struct vnode *vp, *mvp; 1448 struct thread *td; 1449 int error, allerror = 0; 1450 1451 td = curthread; 1452 1453 MNT_ILOCK(mp); 1454 /* 1455 * If a forced dismount is in progress, return from here so that 1456 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before 1457 * calling VFS_UNMOUNT(). 1458 */ 1459 if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { 1460 MNT_IUNLOCK(mp); 1461 return (EBADF); 1462 } 1463 MNT_IUNLOCK(mp); 1464 1465 /* 1466 * Force stale buffer cache information to be flushed. 1467 */ 1468loop: 1469 MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { 1470 /* XXX Racy bv_cnt check. */ 1471 if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || 1472 waitfor == MNT_LAZY) { 1473 VI_UNLOCK(vp); 1474 continue; 1475 } 1476 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 1477 MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); 1478 goto loop; 1479 } 1480 error = VOP_FSYNC(vp, waitfor, td); 1481 if (error) 1482 allerror = error; 1483 VOP_UNLOCK(vp, 0); 1484 vrele(vp); 1485 } 1486 return (allerror); 1487} 1488 1489static int 1490nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) 1491{ 1492 struct nfsmount *nmp = VFSTONFS(mp); 1493 struct vfsquery vq; 1494 int error; 1495 1496 bzero(&vq, sizeof(vq)); 1497 switch (op) { 1498#if 0 1499 case VFS_CTL_NOLOCKS: 1500 val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; 1501 if (req->oldptr != NULL) { 1502 error = SYSCTL_OUT(req, &val, sizeof(val)); 1503 if (error) 1504 return (error); 1505 } 1506 if (req->newptr != NULL) { 1507 error = SYSCTL_IN(req, &val, sizeof(val)); 1508 if (error) 1509 return (error); 1510 if (val) 1511 nmp->nm_flag |= NFSMNT_NOLOCKS; 1512 else 1513 nmp->nm_flag &= ~NFSMNT_NOLOCKS; 1514 } 1515 break; 1516#endif 1517 case VFS_CTL_QUERY: 1518 mtx_lock(&nmp->nm_mtx); 1519 if (nmp->nm_state & NFSSTA_TIMEO) 1520 vq.vq_flags |= VQ_NOTRESP; 1521 mtx_unlock(&nmp->nm_mtx); 1522#if 0 1523 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && 1524 (nmp->nm_state & NFSSTA_LOCKTIMEO)) 1525 vq.vq_flags |= VQ_NOTRESPLOCK; 1526#endif 1527 error = SYSCTL_OUT(req, &vq, sizeof(vq)); 1528 break; 1529 case VFS_CTL_TIMEO: 1530 if (req->oldptr != NULL) { 1531 error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, 1532 sizeof(nmp->nm_tprintf_initial_delay)); 1533 if (error) 1534 return (error); 1535 } 1536 if (req->newptr != NULL) { 1537 error = vfs_suser(mp, req->td); 1538 if (error) 1539 return (error); 1540 error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, 1541 sizeof(nmp->nm_tprintf_initial_delay)); 1542 if (error) 1543 return (error); 1544 if (nmp->nm_tprintf_initial_delay < 0) 1545 nmp->nm_tprintf_initial_delay = 0; 1546 } 1547 break; 1548 default: 1549 return (ENOTSUP); 1550 } 1551 return (0); 1552} 1553 1554/* 1555 * Extract the information needed by the nlm from the nfs vnode. 1556 */ 1557static void 1558nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, 1559 struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, 1560 struct timeval *timeop) 1561{ 1562 struct nfsmount *nmp; 1563 struct nfsnode *np = VTONFS(vp); 1564 1565 nmp = VFSTONFS(vp->v_mount); 1566 if (fhlenp != NULL) 1567 *fhlenp = (size_t)np->n_fhsize; 1568 if (fhp != NULL) 1569 bcopy(np->n_fhp, fhp, np->n_fhsize); 1570 if (sp != NULL) 1571 bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); 1572 if (is_v3p != NULL) 1573 *is_v3p = NFS_ISV3(vp); 1574 if (sizep != NULL) 1575 *sizep = np->n_size; 1576 if (timeop != NULL) { 1577 timeop->tv_sec = nmp->nm_timeo / NFS_HZ; 1578 timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); 1579 } 1580} 1581 1582