1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD$");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/jail.h>
48#include <sys/limits.h>
49#include <sys/lock.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/mount.h>
54#include <sys/proc.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/syslog.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <net/vnet.h>
70
71#include <netinet/in.h>
72
73#include <rpc/rpc.h>
74
75#include <nfs/nfsproto.h>
76#include <nfsclient/nfs.h>
77#include <nfsclient/nfsnode.h>
78#include <nfsclient/nfsmount.h>
79#include <nfs/xdr_subs.h>
80#include <nfsclient/nfsm_subs.h>
81#include <nfs/nfsdiskless.h>
82
83FEATURE(nfsclient, "NFS client");
84
85MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header");
86MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle");
87MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data");
88MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables");
89MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state");
90
91uma_zone_t nfsmount_zone;
92
93struct nfsstats	nfsstats;
94
95SYSCTL_NODE(_vfs, OID_AUTO, oldnfs, CTLFLAG_RW, 0, "Old NFS filesystem");
96SYSCTL_STRUCT(_vfs_oldnfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW,
97	&nfsstats, nfsstats, "S,nfsstats");
98static int nfs_ip_paranoia = 1;
99SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
100    &nfs_ip_paranoia, 0,
101    "Disallow accepting replies from IPs which differ from those sent");
102#ifdef NFS_DEBUG
103int nfs_debug;
104SYSCTL_INT(_vfs_oldnfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105    "Toggle debug flag");
106#endif
107static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
108SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_INITIAL_DELAY,
109    downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0,
110    "Delay before printing \"nfs server not responding\" messages");
111/* how long between console messages "nfs server foo not responding" */
112static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
113SYSCTL_INT(_vfs_oldnfs, NFS_TPRINTF_DELAY,
114    downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0,
115    "Delay between printing \"nfs server not responding\" messages");
116
117static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
118		    struct nfs_args *argp, const char *hostname);
119static int	mountnfs(struct nfs_args *, struct mount *,
120		    struct sockaddr *, char *, struct vnode **,
121		    struct ucred *cred, int, int);
122static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
123		    struct sockaddr_storage *, int *, off_t *,
124		    struct timeval *);
125static vfs_mount_t nfs_mount;
126static vfs_cmount_t nfs_cmount;
127static vfs_unmount_t nfs_unmount;
128static vfs_root_t nfs_root;
129static vfs_statfs_t nfs_statfs;
130static vfs_sync_t nfs_sync;
131static vfs_sysctl_t nfs_sysctl;
132
133static int	fake_wchan;
134
135/*
136 * nfs vfs operations.
137 */
138static struct vfsops nfs_vfsops = {
139	.vfs_init =		nfs_init,
140	.vfs_mount =		nfs_mount,
141	.vfs_cmount =		nfs_cmount,
142	.vfs_root =		nfs_root,
143	.vfs_statfs =		nfs_statfs,
144	.vfs_sync =		nfs_sync,
145	.vfs_uninit =		nfs_uninit,
146	.vfs_unmount =		nfs_unmount,
147	.vfs_sysctl =		nfs_sysctl,
148};
149VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK | VFCF_SBDRY);
150
151/* So that loader and kldload(2) can find us, wherever we are.. */
152MODULE_VERSION(oldnfs, 1);
153MODULE_DEPEND(oldnfs, krpc, 1, 1, 1);
154#ifdef KGSSAPI
155MODULE_DEPEND(oldnfs, kgssapi, 1, 1, 1);
156#endif
157MODULE_DEPEND(oldnfs, nfs_common, 1, 1, 1);
158MODULE_DEPEND(oldnfs, nfslock, 1, 1, 1);
159
160static struct nfs_rpcops nfs_rpcops = {
161	nfs_readrpc,
162	nfs_writerpc,
163	nfs_writebp,
164	nfs_readlinkrpc,
165	nfs_invaldir,
166	nfs_commit,
167};
168
169/*
170 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
171 * can be shared by both NFS clients. It is declared here so that it
172 * will be defined for kernels built without NFS_ROOT, although it
173 * isn't used in that case.
174 */
175#ifndef NFS_ROOT
176struct nfs_diskless	nfs_diskless = { { { 0 } } };
177struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
178int			nfs_diskless_valid = 0;
179#endif
180
181SYSCTL_INT(_vfs_oldnfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
182    &nfs_diskless_valid, 0,
183    "Has the diskless struct been filled correctly");
184
185SYSCTL_STRING(_vfs_oldnfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
186    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
187
188SYSCTL_OPAQUE(_vfs_oldnfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
189    &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
190    "%Ssockaddr_in", "Diskless root nfs address");
191
192
193void		nfsargs_ntoh(struct nfs_args *);
194static int	nfs_mountdiskless(char *,
195		    struct sockaddr_in *, struct nfs_args *,
196		    struct thread *, struct vnode **, struct mount *);
197static void	nfs_convert_diskless(void);
198static void	nfs_convert_oargs(struct nfs_args *args,
199		    struct onfs_args *oargs);
200
201int
202nfs_iosize(struct nfsmount *nmp)
203{
204	int iosize;
205
206	/*
207	 * Calculate the size used for io buffers.  Use the larger
208	 * of the two sizes to minimise nfs requests but make sure
209	 * that it is at least one VM page to avoid wasting buffer
210	 * space.
211	 */
212	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
213	iosize = imax(iosize, PAGE_SIZE);
214	return (iosize);
215}
216
217static void
218nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
219{
220
221	args->version = NFS_ARGSVERSION;
222	args->addr = oargs->addr;
223	args->addrlen = oargs->addrlen;
224	args->sotype = oargs->sotype;
225	args->proto = oargs->proto;
226	args->fh = oargs->fh;
227	args->fhsize = oargs->fhsize;
228	args->flags = oargs->flags;
229	args->wsize = oargs->wsize;
230	args->rsize = oargs->rsize;
231	args->readdirsize = oargs->readdirsize;
232	args->timeo = oargs->timeo;
233	args->retrans = oargs->retrans;
234	args->maxgrouplist = oargs->maxgrouplist;
235	args->readahead = oargs->readahead;
236	args->deadthresh = oargs->deadthresh;
237	args->hostname = oargs->hostname;
238}
239
240static void
241nfs_convert_diskless(void)
242{
243
244	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
245		sizeof(struct ifaliasreq));
246	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
247		sizeof(struct sockaddr_in));
248	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
249	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
250		nfsv3_diskless.root_fhsize = NFSX_V3FH;
251		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH);
252	} else {
253		nfsv3_diskless.root_fhsize = NFSX_V2FH;
254		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
255	}
256	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
257		sizeof(struct sockaddr_in));
258	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
259	nfsv3_diskless.root_time = nfs_diskless.root_time;
260	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
261		MAXHOSTNAMELEN);
262	nfs_diskless_valid = 3;
263}
264
265/*
266 * nfs statfs call
267 */
268static int
269nfs_statfs(struct mount *mp, struct statfs *sbp)
270{
271	struct vnode *vp;
272	struct thread *td;
273	struct nfs_statfs *sfp;
274	caddr_t bpos, dpos;
275	struct nfsmount *nmp = VFSTONFS(mp);
276	int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr;
277	struct mbuf *mreq, *mrep, *md, *mb;
278	struct nfsnode *np;
279	u_quad_t tquad;
280
281	td = curthread;
282#ifndef nolint
283	sfp = NULL;
284#endif
285	error = vfs_busy(mp, MBF_NOWAIT);
286	if (error)
287		return (error);
288	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289	if (error) {
290		vfs_unbusy(mp);
291		return (error);
292	}
293	vp = NFSTOV(np);
294	mtx_lock(&nmp->nm_mtx);
295	if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
296		mtx_unlock(&nmp->nm_mtx);
297		(void)nfs_fsinfo(nmp, vp, td->td_ucred, td);
298	} else
299		mtx_unlock(&nmp->nm_mtx);
300	nfsstats.rpccnt[NFSPROC_FSSTAT]++;
301	mreq = m_get2(NFSX_FH(v3), M_WAITOK, MT_DATA, 0);
302	mb = mreq;
303	bpos = mtod(mb, caddr_t);
304	nfsm_fhtom(vp, v3);
305	nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred);
306	if (v3)
307		nfsm_postop_attr(vp, retattr);
308	if (error) {
309		if (mrep != NULL)
310			m_freem(mrep);
311		goto nfsmout;
312	}
313	sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3));
314	mtx_lock(&nmp->nm_mtx);
315	sbp->f_iosize = nfs_iosize(nmp);
316	mtx_unlock(&nmp->nm_mtx);
317	if (v3) {
318		sbp->f_bsize = NFS_FABLKSIZE;
319		tquad = fxdr_hyper(&sfp->sf_tbytes);
320		sbp->f_blocks = tquad / NFS_FABLKSIZE;
321		tquad = fxdr_hyper(&sfp->sf_fbytes);
322		sbp->f_bfree = tquad / NFS_FABLKSIZE;
323		tquad = fxdr_hyper(&sfp->sf_abytes);
324		sbp->f_bavail = tquad / NFS_FABLKSIZE;
325		sbp->f_files = (fxdr_unsigned(int32_t,
326		    sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
327		sbp->f_ffree = (fxdr_unsigned(int32_t,
328		    sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
329	} else {
330		sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
331		sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
332		sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
333		sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
334		sbp->f_files = 0;
335		sbp->f_ffree = 0;
336	}
337	m_freem(mrep);
338nfsmout:
339	vput(vp);
340	vfs_unbusy(mp);
341	return (error);
342}
343
344/*
345 * nfs version 3 fsinfo rpc call
346 */
347int
348nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
349    struct thread *td)
350{
351	struct nfsv3_fsinfo *fsp;
352	u_int32_t pref, max;
353	caddr_t bpos, dpos;
354	int error = 0, retattr;
355	struct mbuf *mreq, *mrep, *md, *mb;
356	u_int64_t maxfsize;
357
358	nfsstats.rpccnt[NFSPROC_FSINFO]++;
359	mreq = m_get2(NFSX_FH(1), M_WAITOK, MT_DATA, 0);
360	mb = mreq;
361	bpos = mtod(mb, caddr_t);
362	nfsm_fhtom(vp, 1);
363	nfsm_request(vp, NFSPROC_FSINFO, td, cred);
364	nfsm_postop_attr(vp, retattr);
365	if (!error) {
366		fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
367		pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
368		mtx_lock(&nmp->nm_mtx);
369		if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
370			nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
371				~(NFS_FABLKSIZE - 1);
372		max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
373		if (max < nmp->nm_wsize && max > 0) {
374			nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
375			if (nmp->nm_wsize == 0)
376				nmp->nm_wsize = max;
377		}
378		pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
379		if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
380			nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
381				~(NFS_FABLKSIZE - 1);
382		max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
383		if (max < nmp->nm_rsize && max > 0) {
384			nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
385			if (nmp->nm_rsize == 0)
386				nmp->nm_rsize = max;
387		}
388		pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
389		if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
390			nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) &
391				~(NFS_DIRBLKSIZ - 1);
392		if (max < nmp->nm_readdirsize && max > 0) {
393			nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
394			if (nmp->nm_readdirsize == 0)
395				nmp->nm_readdirsize = max;
396		}
397		maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
398		if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
399			nmp->nm_maxfilesize = maxfsize;
400		nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp);
401		nmp->nm_state |= NFSSTA_GOTFSINFO;
402		mtx_unlock(&nmp->nm_mtx);
403	}
404	m_freem(mrep);
405nfsmout:
406	return (error);
407}
408
409/*
410 * Mount a remote root fs via. nfs. This depends on the info in the
411 * nfs_diskless structure that has been filled in properly by some primary
412 * bootstrap.
413 * It goes something like this:
414 * - do enough of "ifconfig" by calling ifioctl() so that the system
415 *   can talk to the server
416 * - If nfs_diskless.mygateway is filled in, use that address as
417 *   a default gateway.
418 * - build the rootfs mount point and call mountnfs() to do the rest.
419 *
420 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
421 * structure, as well as other global NFS client variables here, as
422 * nfs_mountroot() will be called once in the boot before any other NFS
423 * client activity occurs.
424 */
425int
426nfs_mountroot(struct mount *mp)
427{
428	struct thread *td = curthread;
429	struct nfsv3_diskless *nd = &nfsv3_diskless;
430	struct socket *so;
431	struct vnode *vp;
432	struct ifreq ir;
433	int error;
434	u_long l;
435	char buf[128];
436	char *cp;
437
438
439#if defined(BOOTP_NFSROOT) && defined(BOOTP)
440	bootpc_init();		/* use bootp to get nfs_diskless filled in */
441#elif defined(NFS_ROOT)
442	nfs_setup_diskless();
443#endif
444
445	if (nfs_diskless_valid == 0) {
446		return (-1);
447	}
448	if (nfs_diskless_valid == 1)
449		nfs_convert_diskless();
450
451	/*
452	 * XXX splnet, so networks will receive...
453	 */
454	splnet();
455
456	/*
457	 * Do enough of ifconfig(8) so that the critical net interface can
458	 * talk to the server.
459	 */
460	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
461	    td->td_ucred, td);
462	if (error)
463		panic("nfs_mountroot: socreate(%04x): %d",
464			nd->myif.ifra_addr.sa_family, error);
465
466#if 0 /* XXX Bad idea */
467	/*
468	 * We might not have been told the right interface, so we pass
469	 * over the first ten interfaces of the same kind, until we get
470	 * one of them configured.
471	 */
472
473	for (i = strlen(nd->myif.ifra_name) - 1;
474		nd->myif.ifra_name[i] >= '0' &&
475		nd->myif.ifra_name[i] <= '9';
476		nd->myif.ifra_name[i] ++) {
477		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
478		if(!error)
479			break;
480	}
481#endif
482
483	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
484	if (error)
485		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
486
487	if ((cp = getenv("boot.netif.mtu")) != NULL) {
488		ir.ifr_mtu = strtol(cp, NULL, 10);
489		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
490		freeenv(cp);
491		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
492		if (error)
493			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
494	}
495	soclose(so);
496
497	/*
498	 * If the gateway field is filled in, set it as the default route.
499	 * Note that pxeboot will set a default route of 0 if the route
500	 * is not set by the DHCP server.  Check also for a value of 0
501	 * to avoid panicking inappropriately in that situation.
502	 */
503	if (nd->mygateway.sin_len != 0 &&
504	    nd->mygateway.sin_addr.s_addr != 0) {
505		struct sockaddr_in mask, sin;
506
507		bzero((caddr_t)&mask, sizeof(mask));
508		sin = mask;
509		sin.sin_family = AF_INET;
510		sin.sin_len = sizeof(sin);
511                /* XXX MRT use table 0 for this sort of thing */
512		CURVNET_SET(TD_TO_VNET(td));
513		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
514		    (struct sockaddr *)&nd->mygateway,
515		    (struct sockaddr *)&mask,
516		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
517		CURVNET_RESTORE();
518		if (error)
519			panic("nfs_mountroot: RTM_ADD: %d", error);
520	}
521
522	/*
523	 * Create the rootfs mount point.
524	 */
525	nd->root_args.fh = nd->root_fh;
526	nd->root_args.fhsize = nd->root_fhsize;
527	l = ntohl(nd->root_saddr.sin_addr.s_addr);
528	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
529		(l >> 24) & 0xff, (l >> 16) & 0xff,
530		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
531	printf("NFS ROOT: %s\n", buf);
532	nd->root_args.hostname = buf;
533	if ((error = nfs_mountdiskless(buf,
534	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
535		return (error);
536	}
537
538	/*
539	 * This is not really an nfs issue, but it is much easier to
540	 * set hostname here and then let the "/etc/rc.xxx" files
541	 * mount the right /var based upon its preset value.
542	 */
543	mtx_lock(&prison0.pr_mtx);
544	strlcpy(prison0.pr_hostname, nd->my_hostnam,
545	    sizeof (prison0.pr_hostname));
546	mtx_unlock(&prison0.pr_mtx);
547	inittodr(ntohl(nd->root_time));
548	return (0);
549}
550
551/*
552 * Internal version of mount system call for diskless setup.
553 */
554static int
555nfs_mountdiskless(char *path,
556    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
557    struct vnode **vpp, struct mount *mp)
558{
559	struct sockaddr *nam;
560	int error;
561
562	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
563	if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred,
564	    NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
565		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
566		return (error);
567	}
568	return (0);
569}
570
571static int
572nfs_sec_name_to_num(char *sec)
573{
574	if (!strcmp(sec, "krb5"))
575		return (RPCSEC_GSS_KRB5);
576	if (!strcmp(sec, "krb5i"))
577		return (RPCSEC_GSS_KRB5I);
578	if (!strcmp(sec, "krb5p"))
579		return (RPCSEC_GSS_KRB5P);
580	if (!strcmp(sec, "sys"))
581		return (AUTH_SYS);
582	/*
583	 * Userland should validate the string but we will try and
584	 * cope with unexpected values.
585	 */
586	return (AUTH_SYS);
587}
588
589static void
590nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
591	const char *hostname)
592{
593	int s;
594	int adjsock;
595	int maxio;
596	char *p;
597	char *secname;
598	char *principal;
599
600	s = splnet();
601
602	/*
603	 * Set read-only flag if requested; otherwise, clear it if this is
604	 * an update.  If this is not an update, then either the read-only
605	 * flag is already clear, or this is a root mount and it was set
606	 * intentionally at some previous point.
607	 */
608	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
609		MNT_ILOCK(mp);
610		mp->mnt_flag |= MNT_RDONLY;
611		MNT_IUNLOCK(mp);
612	} else if (mp->mnt_flag & MNT_UPDATE) {
613		MNT_ILOCK(mp);
614		mp->mnt_flag &= ~MNT_RDONLY;
615		MNT_IUNLOCK(mp);
616	}
617
618	/*
619	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
620	 * no sense in that context.  Also, set up appropriate retransmit
621	 * and soft timeout behavior.
622	 */
623	if (argp->sotype == SOCK_STREAM) {
624		nmp->nm_flag &= ~NFSMNT_NOCONN;
625		nmp->nm_flag |= NFSMNT_DUMBTIMR;
626		nmp->nm_timeo = NFS_MAXTIMEO;
627		nmp->nm_retry = NFS_RETRANS_TCP;
628	}
629
630	/* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
631	if ((argp->flags & NFSMNT_NFSV3) == 0)
632		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
633
634	/* Re-bind if rsrvd port requested and wasn't on one */
635	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
636		  && (argp->flags & NFSMNT_RESVPORT);
637	/* Also re-bind if we're switching to/from a connected UDP socket */
638	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
639		    (argp->flags & NFSMNT_NOCONN));
640
641	/* Update flags atomically.  Don't change the lock bits. */
642	nmp->nm_flag = argp->flags | nmp->nm_flag;
643	splx(s);
644
645	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
646		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
647		if (nmp->nm_timeo < NFS_MINTIMEO)
648			nmp->nm_timeo = NFS_MINTIMEO;
649		else if (nmp->nm_timeo > NFS_MAXTIMEO)
650			nmp->nm_timeo = NFS_MAXTIMEO;
651	}
652
653	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
654		nmp->nm_retry = argp->retrans;
655		if (nmp->nm_retry > NFS_MAXREXMIT)
656			nmp->nm_retry = NFS_MAXREXMIT;
657	}
658
659	if (argp->flags & NFSMNT_NFSV3) {
660		if (argp->sotype == SOCK_DGRAM)
661			maxio = NFS_MAXDGRAMDATA;
662		else
663			maxio = NFS_MAXDATA;
664	} else
665		maxio = NFS_V2MAXDATA;
666
667	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
668		nmp->nm_wsize = argp->wsize;
669		/* Round down to multiple of blocksize */
670		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
671		if (nmp->nm_wsize <= 0)
672			nmp->nm_wsize = NFS_FABLKSIZE;
673	}
674	if (nmp->nm_wsize > maxio)
675		nmp->nm_wsize = maxio;
676	if (nmp->nm_wsize > MAXBSIZE)
677		nmp->nm_wsize = MAXBSIZE;
678
679	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
680		nmp->nm_rsize = argp->rsize;
681		/* Round down to multiple of blocksize */
682		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
683		if (nmp->nm_rsize <= 0)
684			nmp->nm_rsize = NFS_FABLKSIZE;
685	}
686	if (nmp->nm_rsize > maxio)
687		nmp->nm_rsize = maxio;
688	if (nmp->nm_rsize > MAXBSIZE)
689		nmp->nm_rsize = MAXBSIZE;
690
691	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
692		nmp->nm_readdirsize = argp->readdirsize;
693	}
694	if (nmp->nm_readdirsize > maxio)
695		nmp->nm_readdirsize = maxio;
696	if (nmp->nm_readdirsize > nmp->nm_rsize)
697		nmp->nm_readdirsize = nmp->nm_rsize;
698
699	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
700		nmp->nm_acregmin = argp->acregmin;
701	else
702		nmp->nm_acregmin = NFS_MINATTRTIMO;
703	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
704		nmp->nm_acregmax = argp->acregmax;
705	else
706		nmp->nm_acregmax = NFS_MAXATTRTIMO;
707	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
708		nmp->nm_acdirmin = argp->acdirmin;
709	else
710		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
711	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
712		nmp->nm_acdirmax = argp->acdirmax;
713	else
714		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
715	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
716		nmp->nm_acdirmin = nmp->nm_acdirmax;
717	if (nmp->nm_acregmin > nmp->nm_acregmax)
718		nmp->nm_acregmin = nmp->nm_acregmax;
719
720	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
721		if (argp->maxgrouplist <= NFS_MAXGRPS)
722			nmp->nm_numgrps = argp->maxgrouplist;
723		else
724			nmp->nm_numgrps = NFS_MAXGRPS;
725	}
726	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
727		if (argp->readahead <= NFS_MAXRAHEAD)
728			nmp->nm_readahead = argp->readahead;
729		else
730			nmp->nm_readahead = NFS_MAXRAHEAD;
731	}
732	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
733		if (argp->wcommitsize < nmp->nm_wsize)
734			nmp->nm_wcommitsize = nmp->nm_wsize;
735		else
736			nmp->nm_wcommitsize = argp->wcommitsize;
737	}
738	if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
739		if (argp->deadthresh <= NFS_MAXDEADTHRESH)
740			nmp->nm_deadthresh = argp->deadthresh;
741		else
742			nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
743	}
744
745	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
746		    (nmp->nm_soproto != argp->proto));
747	nmp->nm_sotype = argp->sotype;
748	nmp->nm_soproto = argp->proto;
749
750	if (nmp->nm_client && adjsock) {
751		nfs_safedisconnect(nmp);
752		if (nmp->nm_sotype == SOCK_DGRAM)
753			while (nfs_connect(nmp)) {
754				printf("nfs_args: retrying connect\n");
755				(void) tsleep(&fake_wchan, PSOCK, "nfscon", hz);
756			}
757	}
758
759	if (hostname) {
760		strlcpy(nmp->nm_hostname, hostname,
761		    sizeof(nmp->nm_hostname));
762		p = strchr(nmp->nm_hostname, ':');
763		if (p)
764			*p = '\0';
765	}
766
767	if (vfs_getopt(mp->mnt_optnew, "sec",
768		(void **) &secname, NULL) == 0) {
769		nmp->nm_secflavor = nfs_sec_name_to_num(secname);
770	} else {
771		nmp->nm_secflavor = AUTH_SYS;
772	}
773
774	if (vfs_getopt(mp->mnt_optnew, "principal",
775		(void **) &principal, NULL) == 0) {
776		strlcpy(nmp->nm_principal, principal,
777		    sizeof(nmp->nm_principal));
778	} else {
779		snprintf(nmp->nm_principal, sizeof(nmp->nm_principal),
780		    "nfs@%s", nmp->nm_hostname);
781	}
782}
783
784static const char *nfs_opts[] = { "from", "nfs_args",
785    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
786    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
787    "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport",
788    "readahead", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp",
789    "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin",
790    "acdirmax", "deadthresh", "hostname", "timeout", "addr", "fh", "nfsv3",
791    "sec", "maxgroups", "principal", "negnametimeo", "nocto", "wcommitsize",
792    "nametimeo",
793    NULL };
794
795/*
796 * VFS Operations.
797 *
798 * mount system call
799 * It seems a bit dumb to copyinstr() the host and path here and then
800 * bcopy() them in mountnfs(), but I wanted to detect errors before
801 * doing the sockargs() call because sockargs() allocates an mbuf and
802 * an error after that means that I have to release the mbuf.
803 */
804/* ARGSUSED */
805static int
806nfs_mount(struct mount *mp)
807{
808	struct nfs_args args = {
809	    .version = NFS_ARGSVERSION,
810	    .addr = NULL,
811	    .addrlen = sizeof (struct sockaddr_in),
812	    .sotype = SOCK_STREAM,
813	    .proto = 0,
814	    .fh = NULL,
815	    .fhsize = 0,
816	    .flags = NFSMNT_RESVPORT,
817	    .wsize = NFS_WSIZE,
818	    .rsize = NFS_RSIZE,
819	    .readdirsize = NFS_READDIRSIZE,
820	    .timeo = 10,
821	    .retrans = NFS_RETRANS,
822	    .maxgrouplist = NFS_MAXGRPS,
823	    .readahead = NFS_DEFRAHEAD,
824	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
825	    .deadthresh = NFS_MAXDEADTHRESH,	/* was: NQ_DEADTHRESH */
826	    .hostname = NULL,
827	    /* args version 4 */
828	    .acregmin = NFS_MINATTRTIMO,
829	    .acregmax = NFS_MAXATTRTIMO,
830	    .acdirmin = NFS_MINDIRATTRTIMO,
831	    .acdirmax = NFS_MAXDIRATTRTIMO,
832	};
833	int error, ret, has_nfs_args_opt;
834	int has_addr_opt, has_fh_opt, has_hostname_opt;
835	struct sockaddr *nam;
836	struct vnode *vp;
837	char hst[MNAMELEN];
838	size_t len;
839	u_char nfh[NFSX_V3FHMAX];
840	char *opt;
841	int nametimeo = NFS_DEFAULT_NAMETIMEO;
842	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
843
844	has_nfs_args_opt = 0;
845	has_addr_opt = 0;
846	has_fh_opt = 0;
847	has_hostname_opt = 0;
848
849	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
850		error = EINVAL;
851		goto out;
852	}
853
854	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
855		error = nfs_mountroot(mp);
856		goto out;
857	}
858
859	/*
860	 * The old mount_nfs program passed the struct nfs_args
861	 * from userspace to kernel.  The new mount_nfs program
862	 * passes string options via nmount() from userspace to kernel
863	 * and we populate the struct nfs_args in the kernel.
864	 */
865	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
866		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
867		    sizeof args);
868		if (error)
869			goto out;
870
871		if (args.version != NFS_ARGSVERSION) {
872			error = EPROGMISMATCH;
873			goto out;
874		}
875		has_nfs_args_opt = 1;
876	}
877
878	if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0)
879		args.flags |= NFSMNT_DUMBTIMR;
880	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
881		args.flags |= NFSMNT_NOCONN;
882	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
883		args.flags |= NFSMNT_NOCONN;
884	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
885		args.flags |= NFSMNT_NOLOCKD;
886	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
887		args.flags &= ~NFSMNT_NOLOCKD;
888	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
889		args.flags |= NFSMNT_INT;
890	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
891		args.flags |= NFSMNT_RDIRPLUS;
892	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
893		args.flags |= NFSMNT_RESVPORT;
894	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
895		args.flags &= ~NFSMNT_RESVPORT;
896	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
897		args.flags |= NFSMNT_SOFT;
898	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
899		args.flags &= ~NFSMNT_SOFT;
900	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
901		args.sotype = SOCK_DGRAM;
902	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
903		args.sotype = SOCK_DGRAM;
904	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
905		args.sotype = SOCK_STREAM;
906	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
907		args.flags |= NFSMNT_NFSV3;
908	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
909		args.flags |= NFSMNT_NOCTO;
910	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
911		if (opt == NULL) {
912			vfs_mount_error(mp, "illegal readdirsize");
913			error = EINVAL;
914			goto out;
915		}
916		ret = sscanf(opt, "%d", &args.readdirsize);
917		if (ret != 1 || args.readdirsize <= 0) {
918			vfs_mount_error(mp, "illegal readdirsize: %s",
919			    opt);
920			error = EINVAL;
921			goto out;
922		}
923		args.flags |= NFSMNT_READDIRSIZE;
924	}
925	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
926		if (opt == NULL) {
927			vfs_mount_error(mp, "illegal readahead");
928			error = EINVAL;
929			goto out;
930		}
931		ret = sscanf(opt, "%d", &args.readahead);
932		if (ret != 1 || args.readahead <= 0) {
933			vfs_mount_error(mp, "illegal readahead: %s",
934			    opt);
935			error = EINVAL;
936			goto out;
937		}
938		args.flags |= NFSMNT_READAHEAD;
939	}
940	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
941		if (opt == NULL) {
942			vfs_mount_error(mp, "illegal wsize");
943			error = EINVAL;
944			goto out;
945		}
946		ret = sscanf(opt, "%d", &args.wsize);
947		if (ret != 1 || args.wsize <= 0) {
948			vfs_mount_error(mp, "illegal wsize: %s",
949			    opt);
950			error = EINVAL;
951			goto out;
952		}
953		args.flags |= NFSMNT_WSIZE;
954	}
955	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
956		if (opt == NULL) {
957			vfs_mount_error(mp, "illegal rsize");
958			error = EINVAL;
959			goto out;
960		}
961		ret = sscanf(opt, "%d", &args.rsize);
962		if (ret != 1 || args.rsize <= 0) {
963			vfs_mount_error(mp, "illegal wsize: %s",
964			    opt);
965			error = EINVAL;
966			goto out;
967		}
968		args.flags |= NFSMNT_RSIZE;
969	}
970	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
971		if (opt == NULL) {
972			vfs_mount_error(mp, "illegal retrans");
973			error = EINVAL;
974			goto out;
975		}
976		ret = sscanf(opt, "%d", &args.retrans);
977		if (ret != 1 || args.retrans <= 0) {
978			vfs_mount_error(mp, "illegal retrans: %s",
979			    opt);
980			error = EINVAL;
981			goto out;
982		}
983		args.flags |= NFSMNT_RETRANS;
984	}
985	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
986		ret = sscanf(opt, "%d", &args.acregmin);
987		if (ret != 1 || args.acregmin < 0) {
988			vfs_mount_error(mp, "illegal acregmin: %s",
989			    opt);
990			error = EINVAL;
991			goto out;
992		}
993		args.flags |= NFSMNT_ACREGMIN;
994	}
995	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
996		ret = sscanf(opt, "%d", &args.acregmax);
997		if (ret != 1 || args.acregmax < 0) {
998			vfs_mount_error(mp, "illegal acregmax: %s",
999			    opt);
1000			error = EINVAL;
1001			goto out;
1002		}
1003		args.flags |= NFSMNT_ACREGMAX;
1004	}
1005	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1006		ret = sscanf(opt, "%d", &args.acdirmin);
1007		if (ret != 1 || args.acdirmin < 0) {
1008			vfs_mount_error(mp, "illegal acdirmin: %s",
1009			    opt);
1010			error = EINVAL;
1011			goto out;
1012		}
1013		args.flags |= NFSMNT_ACDIRMIN;
1014	}
1015	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1016		ret = sscanf(opt, "%d", &args.acdirmax);
1017		if (ret != 1 || args.acdirmax < 0) {
1018			vfs_mount_error(mp, "illegal acdirmax: %s",
1019			    opt);
1020			error = EINVAL;
1021			goto out;
1022		}
1023		args.flags |= NFSMNT_ACDIRMAX;
1024	}
1025	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1026		ret = sscanf(opt, "%d", &args.wcommitsize);
1027		if (ret != 1 || args.wcommitsize < 0) {
1028			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1029			error = EINVAL;
1030			goto out;
1031		}
1032		args.flags |= NFSMNT_WCOMMITSIZE;
1033	}
1034	if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) {
1035		ret = sscanf(opt, "%d", &args.deadthresh);
1036		if (ret != 1 || args.deadthresh <= 0) {
1037			vfs_mount_error(mp, "illegal deadthresh: %s",
1038			    opt);
1039			error = EINVAL;
1040			goto out;
1041		}
1042		args.flags |= NFSMNT_DEADTHRESH;
1043	}
1044	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1045		ret = sscanf(opt, "%d", &args.timeo);
1046		if (ret != 1 || args.timeo <= 0) {
1047			vfs_mount_error(mp, "illegal timeout: %s",
1048			    opt);
1049			error = EINVAL;
1050			goto out;
1051		}
1052		args.flags |= NFSMNT_TIMEO;
1053	}
1054	if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) {
1055		ret = sscanf(opt, "%d", &args.maxgrouplist);
1056		if (ret != 1 || args.maxgrouplist <= 0) {
1057			vfs_mount_error(mp, "illegal maxgroups: %s",
1058			    opt);
1059			error = EINVAL;
1060			goto out;
1061		}
1062		args.flags |= NFSMNT_MAXGRPS;
1063	}
1064	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1065		ret = sscanf(opt, "%d", &nametimeo);
1066		if (ret != 1 || nametimeo < 0) {
1067			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1068			error = EINVAL;
1069			goto out;
1070		}
1071	}
1072	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1073	    == 0) {
1074		ret = sscanf(opt, "%d", &negnametimeo);
1075		if (ret != 1 || negnametimeo < 0) {
1076			vfs_mount_error(mp, "illegal negnametimeo: %s",
1077			    opt);
1078			error = EINVAL;
1079			goto out;
1080		}
1081	}
1082	if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr,
1083		&args.addrlen) == 0) {
1084		has_addr_opt = 1;
1085		if (args.addrlen > SOCK_MAXADDRLEN) {
1086			error = ENAMETOOLONG;
1087			goto out;
1088		}
1089		nam = malloc(args.addrlen, M_SONAME,
1090		    M_WAITOK);
1091		bcopy(args.addr, nam, args.addrlen);
1092		nam->sa_len = args.addrlen;
1093	}
1094	if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1095		&args.fhsize) == 0) {
1096		has_fh_opt = 1;
1097	}
1098	if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname,
1099		NULL) == 0) {
1100		has_hostname_opt = 1;
1101	}
1102	if (args.hostname == NULL) {
1103		vfs_mount_error(mp, "Invalid hostname");
1104		error = EINVAL;
1105		goto out;
1106	}
1107	if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1108		vfs_mount_error(mp, "Bad file handle");
1109		error = EINVAL;
1110		goto out;
1111	}
1112
1113	if (mp->mnt_flag & MNT_UPDATE) {
1114		struct nfsmount *nmp = VFSTONFS(mp);
1115
1116		if (nmp == NULL) {
1117			error = EIO;
1118			goto out;
1119		}
1120
1121		/*
1122		 * If a change from TCP->UDP is done and there are thread(s)
1123		 * that have I/O RPC(s) in progress with a tranfer size
1124		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1125		 * hung, retrying the RPC(s) forever. Usually these threads
1126		 * will be seen doing an uninterruptible sleep on wait channel
1127		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1128		 */
1129		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1130			tprintf(curthread->td_proc, LOG_WARNING,
1131	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1132
1133		/*
1134		 * When doing an update, we can't change from or to
1135		 * v3, switch lockd strategies or change cookie translation
1136		 */
1137		args.flags = (args.flags &
1138		    ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1139		    (nmp->nm_flag &
1140			(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1141		nfs_decode_args(mp, nmp, &args, NULL);
1142		goto out;
1143	}
1144
1145	/*
1146	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1147	 * or no-connection mode for those protocols that support
1148	 * no-connection mode (the flag will be cleared later for protocols
1149	 * that do not support no-connection mode).  This will allow a client
1150	 * to receive replies from a different IP then the request was
1151	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1152	 * not 0.
1153	 */
1154	if (nfs_ip_paranoia == 0)
1155		args.flags |= NFSMNT_NOCONN;
1156
1157	if (has_nfs_args_opt) {
1158		/*
1159		 * In the 'nfs_args' case, the pointers in the args
1160		 * structure are in userland - we copy them in here.
1161		 */
1162		if (!has_fh_opt) {
1163			error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1164			    args.fhsize);
1165			if (error) {
1166				goto out;
1167			}
1168			args.fh = nfh;
1169		}
1170		if (!has_hostname_opt) {
1171			error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1172			if (error) {
1173				goto out;
1174			}
1175			bzero(&hst[len], MNAMELEN - len);
1176			args.hostname = hst;
1177		}
1178		if (!has_addr_opt) {
1179			/* sockargs() call must be after above copyin() calls */
1180			error = getsockaddr(&nam, (caddr_t)args.addr,
1181			    args.addrlen);
1182			if (error) {
1183				goto out;
1184			}
1185		}
1186	} else if (has_addr_opt == 0) {
1187		vfs_mount_error(mp, "No server address");
1188		error = EINVAL;
1189		goto out;
1190	}
1191	error = mountnfs(&args, mp, nam, args.hostname, &vp,
1192	    curthread->td_ucred, nametimeo, negnametimeo);
1193out:
1194	if (!error) {
1195		MNT_ILOCK(mp);
1196		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1197		MNT_IUNLOCK(mp);
1198	}
1199	return (error);
1200}
1201
1202
1203/*
1204 * VFS Operations.
1205 *
1206 * mount system call
1207 * It seems a bit dumb to copyinstr() the host and path here and then
1208 * bcopy() them in mountnfs(), but I wanted to detect errors before
1209 * doing the sockargs() call because sockargs() allocates an mbuf and
1210 * an error after that means that I have to release the mbuf.
1211 */
1212/* ARGSUSED */
1213static int
1214nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1215{
1216	int error;
1217	struct nfs_args args;
1218
1219	error = copyin(data, &args, sizeof (struct nfs_args));
1220	if (error)
1221		return error;
1222
1223	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1224
1225	error = kernel_mount(ma, flags);
1226	return (error);
1227}
1228
1229/*
1230 * Common code for mount and mountroot
1231 */
1232static int
1233mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1234    char *hst, struct vnode **vpp, struct ucred *cred, int nametimeo,
1235    int negnametimeo)
1236{
1237	struct nfsmount *nmp;
1238	struct nfsnode *np;
1239	int error;
1240	struct vattr attrs;
1241
1242	if (mp->mnt_flag & MNT_UPDATE) {
1243		nmp = VFSTONFS(mp);
1244		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1245		free(nam, M_SONAME);
1246		return (0);
1247	} else {
1248		nmp = uma_zalloc(nfsmount_zone, M_WAITOK);
1249		bzero((caddr_t)nmp, sizeof (struct nfsmount));
1250		TAILQ_INIT(&nmp->nm_bufq);
1251		mp->mnt_data = nmp;
1252		nmp->nm_getinfo = nfs_getnlminfo;
1253		nmp->nm_vinvalbuf = nfs_vinvalbuf;
1254	}
1255	vfs_getnewfsid(mp);
1256	nmp->nm_mountp = mp;
1257	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF);
1258
1259	/*
1260	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1261	 * high, depending on whether we end up with negative offsets in
1262	 * the client or server somewhere.  2GB-1 may be safer.
1263	 *
1264	 * For V3, nfs_fsinfo will adjust this as necessary.  Assume maximum
1265	 * that we can handle until we find out otherwise.
1266	 */
1267	if ((argp->flags & NFSMNT_NFSV3) == 0)
1268		nmp->nm_maxfilesize = 0xffffffffLL;
1269	else
1270		nmp->nm_maxfilesize = OFF_MAX;
1271
1272	nmp->nm_timeo = NFS_TIMEO;
1273	nmp->nm_retry = NFS_RETRANS;
1274	if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) {
1275		nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA;
1276	} else {
1277		nmp->nm_wsize = NFS_WSIZE;
1278		nmp->nm_rsize = NFS_RSIZE;
1279	}
1280	nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1281	nmp->nm_readdirsize = NFS_READDIRSIZE;
1282	nmp->nm_numgrps = NFS_MAXGRPS;
1283	nmp->nm_readahead = NFS_DEFRAHEAD;
1284	nmp->nm_deadthresh = NFS_MAXDEADTHRESH;
1285	nmp->nm_nametimeo = nametimeo;
1286	nmp->nm_negnametimeo = negnametimeo;
1287	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1288	if (nmp->nm_tprintf_delay < 0)
1289		nmp->nm_tprintf_delay = 0;
1290	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1291	if (nmp->nm_tprintf_initial_delay < 0)
1292		nmp->nm_tprintf_initial_delay = 0;
1293	nmp->nm_fhsize = argp->fhsize;
1294	bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1295	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1296	nmp->nm_nam = nam;
1297	/* Set up the sockets and per-host congestion */
1298	nmp->nm_sotype = argp->sotype;
1299	nmp->nm_soproto = argp->proto;
1300	nmp->nm_rpcops = &nfs_rpcops;
1301
1302	nfs_decode_args(mp, nmp, argp, hst);
1303
1304	/*
1305	 * For Connection based sockets (TCP,...) defer the connect until
1306	 * the first request, in case the server is not responding.
1307	 */
1308	if (nmp->nm_sotype == SOCK_DGRAM &&
1309		(error = nfs_connect(nmp)))
1310		goto bad;
1311
1312	/*
1313	 * This is silly, but it has to be set so that vinifod() works.
1314	 * We do not want to do an nfs_statfs() here since we can get
1315	 * stuck on a dead server and we are holding a lock on the mount
1316	 * point.
1317	 */
1318	mtx_lock(&nmp->nm_mtx);
1319	mp->mnt_stat.f_iosize = nfs_iosize(nmp);
1320	mtx_unlock(&nmp->nm_mtx);
1321	/*
1322	 * A reference count is needed on the nfsnode representing the
1323	 * remote root.  If this object is not persistent, then backward
1324	 * traversals of the mount point (i.e. "..") will not work if
1325	 * the nfsnode gets flushed out of the cache. Ufs does not have
1326	 * this problem, because one can identify root inodes by their
1327	 * number == ROOTINO (2).
1328	 */
1329	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
1330	if (error)
1331		goto bad;
1332	*vpp = NFSTOV(np);
1333
1334	/*
1335	 * Get file attributes and transfer parameters for the
1336	 * mountpoint.  This has the side effect of filling in
1337	 * (*vpp)->v_type with the correct value.
1338	 */
1339	if (argp->flags & NFSMNT_NFSV3)
1340		nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread);
1341	else
1342		VOP_GETATTR(*vpp, &attrs, curthread->td_ucred);
1343
1344	/*
1345	 * Lose the lock but keep the ref.
1346	 */
1347	VOP_UNLOCK(*vpp, 0);
1348
1349	return (0);
1350bad:
1351	nfs_disconnect(nmp);
1352	mtx_destroy(&nmp->nm_mtx);
1353	uma_zfree(nfsmount_zone, nmp);
1354	free(nam, M_SONAME);
1355	return (error);
1356}
1357
1358/*
1359 * unmount system call
1360 */
1361static int
1362nfs_unmount(struct mount *mp, int mntflags)
1363{
1364	struct nfsmount *nmp;
1365	int error, flags = 0, i;
1366
1367	if (mntflags & MNT_FORCE)
1368		flags |= FORCECLOSE;
1369	nmp = VFSTONFS(mp);
1370	/*
1371	 * Goes something like this..
1372	 * - Call vflush() to clear out vnodes for this filesystem
1373	 * - Close the socket
1374	 * - Free up the data structures
1375	 */
1376	/* In the forced case, cancel any outstanding requests. */
1377	if (flags & FORCECLOSE) {
1378		error = nfs_nmcancelreqs(nmp);
1379		if (error)
1380			goto out;
1381	}
1382	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1383	error = vflush(mp, 1, flags, curthread);
1384	if (error)
1385		goto out;
1386
1387	/*
1388	 * We are now committed to the unmount.
1389	 */
1390	/* Make sure no nfsiods are assigned to this mount. */
1391	mtx_lock(&nfs_iod_mtx);
1392	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1393		if (nfs_iodmount[i] == nmp) {
1394			nfs_iodwant[i] = NFSIOD_AVAILABLE;
1395			nfs_iodmount[i] = NULL;
1396		}
1397	mtx_unlock(&nfs_iod_mtx);
1398	nfs_disconnect(nmp);
1399	free(nmp->nm_nam, M_SONAME);
1400
1401	mtx_destroy(&nmp->nm_mtx);
1402	uma_zfree(nfsmount_zone, nmp);
1403out:
1404	return (error);
1405}
1406
1407/*
1408 * Return root of a filesystem
1409 */
1410static int
1411nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1412{
1413	struct vnode *vp;
1414	struct nfsmount *nmp;
1415	struct nfsnode *np;
1416	int error;
1417
1418	nmp = VFSTONFS(mp);
1419	error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1420	if (error)
1421		return error;
1422	vp = NFSTOV(np);
1423	/*
1424	 * Get transfer parameters and attributes for root vnode once.
1425	 */
1426	mtx_lock(&nmp->nm_mtx);
1427	if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 &&
1428	    (nmp->nm_flag & NFSMNT_NFSV3)) {
1429		mtx_unlock(&nmp->nm_mtx);
1430		nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1431	} else
1432		mtx_unlock(&nmp->nm_mtx);
1433	if (vp->v_type == VNON)
1434	    vp->v_type = VDIR;
1435	vp->v_vflag |= VV_ROOT;
1436	*vpp = vp;
1437	return (0);
1438}
1439
1440/*
1441 * Flush out the buffer cache
1442 */
1443/* ARGSUSED */
1444static int
1445nfs_sync(struct mount *mp, int waitfor)
1446{
1447	struct vnode *vp, *mvp;
1448	struct thread *td;
1449	int error, allerror = 0;
1450
1451	td = curthread;
1452
1453	MNT_ILOCK(mp);
1454	/*
1455	 * If a forced dismount is in progress, return from here so that
1456	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1457	 * calling VFS_UNMOUNT().
1458	 */
1459	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1460		MNT_IUNLOCK(mp);
1461		return (EBADF);
1462	}
1463	MNT_IUNLOCK(mp);
1464
1465	/*
1466	 * Force stale buffer cache information to be flushed.
1467	 */
1468loop:
1469	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1470		/* XXX Racy bv_cnt check. */
1471		if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1472		    waitfor == MNT_LAZY) {
1473			VI_UNLOCK(vp);
1474			continue;
1475		}
1476		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1477			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1478			goto loop;
1479		}
1480		error = VOP_FSYNC(vp, waitfor, td);
1481		if (error)
1482			allerror = error;
1483		VOP_UNLOCK(vp, 0);
1484		vrele(vp);
1485	}
1486	return (allerror);
1487}
1488
1489static int
1490nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1491{
1492	struct nfsmount *nmp = VFSTONFS(mp);
1493	struct vfsquery vq;
1494	int error;
1495
1496	bzero(&vq, sizeof(vq));
1497	switch (op) {
1498#if 0
1499	case VFS_CTL_NOLOCKS:
1500		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1501 		if (req->oldptr != NULL) {
1502 			error = SYSCTL_OUT(req, &val, sizeof(val));
1503 			if (error)
1504 				return (error);
1505 		}
1506 		if (req->newptr != NULL) {
1507 			error = SYSCTL_IN(req, &val, sizeof(val));
1508 			if (error)
1509 				return (error);
1510			if (val)
1511				nmp->nm_flag |= NFSMNT_NOLOCKS;
1512			else
1513				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1514 		}
1515		break;
1516#endif
1517	case VFS_CTL_QUERY:
1518		mtx_lock(&nmp->nm_mtx);
1519		if (nmp->nm_state & NFSSTA_TIMEO)
1520			vq.vq_flags |= VQ_NOTRESP;
1521		mtx_unlock(&nmp->nm_mtx);
1522#if 0
1523		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1524		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1525			vq.vq_flags |= VQ_NOTRESPLOCK;
1526#endif
1527		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1528		break;
1529 	case VFS_CTL_TIMEO:
1530 		if (req->oldptr != NULL) {
1531 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1532 			    sizeof(nmp->nm_tprintf_initial_delay));
1533 			if (error)
1534 				return (error);
1535 		}
1536 		if (req->newptr != NULL) {
1537			error = vfs_suser(mp, req->td);
1538			if (error)
1539				return (error);
1540 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1541 			    sizeof(nmp->nm_tprintf_initial_delay));
1542 			if (error)
1543 				return (error);
1544 			if (nmp->nm_tprintf_initial_delay < 0)
1545 				nmp->nm_tprintf_initial_delay = 0;
1546 		}
1547		break;
1548	default:
1549		return (ENOTSUP);
1550	}
1551	return (0);
1552}
1553
1554/*
1555 * Extract the information needed by the nlm from the nfs vnode.
1556 */
1557static void
1558nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1559    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1560    struct timeval *timeop)
1561{
1562	struct nfsmount *nmp;
1563	struct nfsnode *np = VTONFS(vp);
1564
1565	nmp = VFSTONFS(vp->v_mount);
1566	if (fhlenp != NULL)
1567		*fhlenp = (size_t)np->n_fhsize;
1568	if (fhp != NULL)
1569		bcopy(np->n_fhp, fhp, np->n_fhsize);
1570	if (sp != NULL)
1571		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1572	if (is_v3p != NULL)
1573		*is_v3p = NFS_ISV3(vp);
1574	if (sizep != NULL)
1575		*sizep = np->n_size;
1576	if (timeop != NULL) {
1577		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1578		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1579	}
1580}
1581
1582