nfs_clvfsops.c revision 282933
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 282933 2015-05-14 22:50:07Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103
104static int	nfs_mountroot(struct mount *);
105static void	nfs_sec_name(char *, int *);
106static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107		    struct nfs_args *argp, const char *, struct ucred *,
108		    struct thread *);
109static int	mountnfs(struct nfs_args *, struct mount *,
110		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111		    u_char *, int, struct vnode **, struct ucred *,
112		    struct thread *, int, int, int);
113static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114		    struct sockaddr_storage *, int *, off_t *,
115		    struct timeval *);
116static vfs_mount_t nfs_mount;
117static vfs_cmount_t nfs_cmount;
118static vfs_unmount_t nfs_unmount;
119static vfs_root_t nfs_root;
120static vfs_statfs_t nfs_statfs;
121static vfs_sync_t nfs_sync;
122static vfs_sysctl_t nfs_sysctl;
123static vfs_purge_t nfs_purge;
124
125/*
126 * nfs vfs operations.
127 */
128static struct vfsops nfs_vfsops = {
129	.vfs_init =		ncl_init,
130	.vfs_mount =		nfs_mount,
131	.vfs_cmount =		nfs_cmount,
132	.vfs_root =		nfs_root,
133	.vfs_statfs =		nfs_statfs,
134	.vfs_sync =		nfs_sync,
135	.vfs_uninit =		ncl_uninit,
136	.vfs_unmount =		nfs_unmount,
137	.vfs_sysctl =		nfs_sysctl,
138	.vfs_purge =		nfs_purge,
139};
140VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141
142/* So that loader and kldload(2) can find us, wherever we are.. */
143MODULE_VERSION(nfs, 1);
144MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148
149/*
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
154 */
155#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156struct nfs_diskless	nfs_diskless = { { { 0 } } };
157struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158int			nfs_diskless_valid = 0;
159#endif
160
161SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162    &nfs_diskless_valid, 0,
163    "Has the diskless struct been filled correctly");
164
165SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167
168SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170    "%Ssockaddr_in", "Diskless root nfs address");
171
172
173void		newnfsargs_ntoh(struct nfs_args *);
174static int	nfs_mountdiskless(char *,
175		    struct sockaddr_in *, struct nfs_args *,
176		    struct thread *, struct vnode **, struct mount *);
177static void	nfs_convert_diskless(void);
178static void	nfs_convert_oargs(struct nfs_args *args,
179		    struct onfs_args *oargs);
180
181int
182newnfs_iosize(struct nfsmount *nmp)
183{
184	int iosize, maxio;
185
186	/* First, set the upper limit for iosize */
187	if (nmp->nm_flag & NFSMNT_NFSV4) {
188		maxio = NFS_MAXBSIZE;
189	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190		if (nmp->nm_sotype == SOCK_DGRAM)
191			maxio = NFS_MAXDGRAMDATA;
192		else
193			maxio = NFS_MAXBSIZE;
194	} else {
195		maxio = NFS_V2MAXDATA;
196	}
197	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198		nmp->nm_rsize = maxio;
199	if (nmp->nm_rsize > NFS_MAXBSIZE)
200		nmp->nm_rsize = NFS_MAXBSIZE;
201	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202		nmp->nm_readdirsize = maxio;
203	if (nmp->nm_readdirsize > nmp->nm_rsize)
204		nmp->nm_readdirsize = nmp->nm_rsize;
205	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206		nmp->nm_wsize = maxio;
207	if (nmp->nm_wsize > NFS_MAXBSIZE)
208		nmp->nm_wsize = NFS_MAXBSIZE;
209
210	/*
211	 * Calculate the size used for io buffers.  Use the larger
212	 * of the two sizes to minimise nfs requests but make sure
213	 * that it is at least one VM page to avoid wasting buffer
214	 * space.
215	 */
216	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217	iosize = imax(iosize, PAGE_SIZE);
218	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219	return (iosize);
220}
221
222static void
223nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224{
225
226	args->version = NFS_ARGSVERSION;
227	args->addr = oargs->addr;
228	args->addrlen = oargs->addrlen;
229	args->sotype = oargs->sotype;
230	args->proto = oargs->proto;
231	args->fh = oargs->fh;
232	args->fhsize = oargs->fhsize;
233	args->flags = oargs->flags;
234	args->wsize = oargs->wsize;
235	args->rsize = oargs->rsize;
236	args->readdirsize = oargs->readdirsize;
237	args->timeo = oargs->timeo;
238	args->retrans = oargs->retrans;
239	args->readahead = oargs->readahead;
240	args->hostname = oargs->hostname;
241}
242
243static void
244nfs_convert_diskless(void)
245{
246
247	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248		sizeof(struct ifaliasreq));
249	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250		sizeof(struct sockaddr_in));
251	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255	} else {
256		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258	}
259	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260		sizeof(struct sockaddr_in));
261	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262	nfsv3_diskless.root_time = nfs_diskless.root_time;
263	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264		MAXHOSTNAMELEN);
265	nfs_diskless_valid = 3;
266}
267
268/*
269 * nfs statfs call
270 */
271static int
272nfs_statfs(struct mount *mp, struct statfs *sbp)
273{
274	struct vnode *vp;
275	struct thread *td;
276	struct nfsmount *nmp = VFSTONFS(mp);
277	struct nfsvattr nfsva;
278	struct nfsfsinfo fs;
279	struct nfsstatfs sb;
280	int error = 0, attrflag, gotfsinfo = 0, ret;
281	struct nfsnode *np;
282
283	td = curthread;
284
285	error = vfs_busy(mp, MBF_NOWAIT);
286	if (error)
287		return (error);
288	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289	if (error) {
290		vfs_unbusy(mp);
291		return (error);
292	}
293	vp = NFSTOV(np);
294	mtx_lock(&nmp->nm_mtx);
295	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296		mtx_unlock(&nmp->nm_mtx);
297		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298		    &attrflag, NULL);
299		if (!error)
300			gotfsinfo = 1;
301	} else
302		mtx_unlock(&nmp->nm_mtx);
303	if (!error)
304		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305		    &attrflag, NULL);
306	if (error != 0)
307		NFSCL_DEBUG(2, "statfs=%d\n", error);
308	if (attrflag == 0) {
309		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310		    td->td_ucred, td, &nfsva, NULL, NULL);
311		if (ret) {
312			/*
313			 * Just set default values to get things going.
314			 */
315			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316			nfsva.na_vattr.va_type = VDIR;
317			nfsva.na_vattr.va_mode = 0777;
318			nfsva.na_vattr.va_nlink = 100;
319			nfsva.na_vattr.va_uid = (uid_t)0;
320			nfsva.na_vattr.va_gid = (gid_t)0;
321			nfsva.na_vattr.va_fileid = 2;
322			nfsva.na_vattr.va_gen = 1;
323			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324			nfsva.na_vattr.va_size = 512 * 1024;
325		}
326	}
327	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328	if (!error) {
329	    mtx_lock(&nmp->nm_mtx);
330	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331		nfscl_loadfsinfo(nmp, &fs);
332	    nfscl_loadsbinfo(nmp, &sb, sbp);
333	    sbp->f_iosize = newnfs_iosize(nmp);
334	    mtx_unlock(&nmp->nm_mtx);
335	    if (sbp != &mp->mnt_stat) {
336		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338	    }
339	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340	} else if (NFS_ISV4(vp)) {
341		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342	}
343	vput(vp);
344	vfs_unbusy(mp);
345	return (error);
346}
347
348/*
349 * nfs version 3 fsinfo rpc call
350 */
351int
352ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353    struct thread *td)
354{
355	struct nfsfsinfo fs;
356	struct nfsvattr nfsva;
357	int error, attrflag;
358
359	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360	if (!error) {
361		if (attrflag)
362			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363			    1);
364		mtx_lock(&nmp->nm_mtx);
365		nfscl_loadfsinfo(nmp, &fs);
366		mtx_unlock(&nmp->nm_mtx);
367	}
368	return (error);
369}
370
371/*
372 * Mount a remote root fs via. nfs. This depends on the info in the
373 * nfs_diskless structure that has been filled in properly by some primary
374 * bootstrap.
375 * It goes something like this:
376 * - do enough of "ifconfig" by calling ifioctl() so that the system
377 *   can talk to the server
378 * - If nfs_diskless.mygateway is filled in, use that address as
379 *   a default gateway.
380 * - build the rootfs mount point and call mountnfs() to do the rest.
381 *
382 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383 * structure, as well as other global NFS client variables here, as
384 * nfs_mountroot() will be called once in the boot before any other NFS
385 * client activity occurs.
386 */
387static int
388nfs_mountroot(struct mount *mp)
389{
390	struct thread *td = curthread;
391	struct nfsv3_diskless *nd = &nfsv3_diskless;
392	struct socket *so;
393	struct vnode *vp;
394	struct ifreq ir;
395	int error;
396	u_long l;
397	char buf[128];
398	char *cp;
399
400#if defined(BOOTP_NFSROOT) && defined(BOOTP)
401	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402#elif defined(NFS_ROOT)
403	nfs_setup_diskless();
404#endif
405
406	if (nfs_diskless_valid == 0)
407		return (-1);
408	if (nfs_diskless_valid == 1)
409		nfs_convert_diskless();
410
411	/*
412	 * XXX splnet, so networks will receive...
413	 */
414	splnet();
415
416	/*
417	 * Do enough of ifconfig(8) so that the critical net interface can
418	 * talk to the server.
419	 */
420	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421	    td->td_ucred, td);
422	if (error)
423		panic("nfs_mountroot: socreate(%04x): %d",
424			nd->myif.ifra_addr.sa_family, error);
425
426#if 0 /* XXX Bad idea */
427	/*
428	 * We might not have been told the right interface, so we pass
429	 * over the first ten interfaces of the same kind, until we get
430	 * one of them configured.
431	 */
432
433	for (i = strlen(nd->myif.ifra_name) - 1;
434		nd->myif.ifra_name[i] >= '0' &&
435		nd->myif.ifra_name[i] <= '9';
436		nd->myif.ifra_name[i] ++) {
437		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438		if(!error)
439			break;
440	}
441#endif
442	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443	if (error)
444		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445	if ((cp = getenv("boot.netif.mtu")) != NULL) {
446		ir.ifr_mtu = strtol(cp, NULL, 10);
447		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448		freeenv(cp);
449		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450		if (error)
451			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452	}
453	soclose(so);
454
455	/*
456	 * If the gateway field is filled in, set it as the default route.
457	 * Note that pxeboot will set a default route of 0 if the route
458	 * is not set by the DHCP server.  Check also for a value of 0
459	 * to avoid panicking inappropriately in that situation.
460	 */
461	if (nd->mygateway.sin_len != 0 &&
462	    nd->mygateway.sin_addr.s_addr != 0) {
463		struct sockaddr_in mask, sin;
464
465		bzero((caddr_t)&mask, sizeof(mask));
466		sin = mask;
467		sin.sin_family = AF_INET;
468		sin.sin_len = sizeof(sin);
469                /* XXX MRT use table 0 for this sort of thing */
470		CURVNET_SET(TD_TO_VNET(td));
471		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472		    (struct sockaddr *)&nd->mygateway,
473		    (struct sockaddr *)&mask,
474		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475		CURVNET_RESTORE();
476		if (error)
477			panic("nfs_mountroot: RTM_ADD: %d", error);
478	}
479
480	/*
481	 * Create the rootfs mount point.
482	 */
483	nd->root_args.fh = nd->root_fh;
484	nd->root_args.fhsize = nd->root_fhsize;
485	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487		(l >> 24) & 0xff, (l >> 16) & 0xff,
488		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489	printf("NFS ROOT: %s\n", buf);
490	nd->root_args.hostname = buf;
491	if ((error = nfs_mountdiskless(buf,
492	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493		return (error);
494	}
495
496	/*
497	 * This is not really an nfs issue, but it is much easier to
498	 * set hostname here and then let the "/etc/rc.xxx" files
499	 * mount the right /var based upon its preset value.
500	 */
501	mtx_lock(&prison0.pr_mtx);
502	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503	    sizeof(prison0.pr_hostname));
504	mtx_unlock(&prison0.pr_mtx);
505	inittodr(ntohl(nd->root_time));
506	return (0);
507}
508
509/*
510 * Internal version of mount system call for diskless setup.
511 */
512static int
513nfs_mountdiskless(char *path,
514    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515    struct vnode **vpp, struct mount *mp)
516{
517	struct sockaddr *nam;
518	int dirlen, error;
519	char *dirpath;
520
521	/*
522	 * Find the directory path in "path", which also has the server's
523	 * name/ip address in it.
524	 */
525	dirpath = strchr(path, ':');
526	if (dirpath != NULL)
527		dirlen = strlen(++dirpath);
528	else
529		dirlen = 0;
530	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535		return (error);
536	}
537	return (0);
538}
539
540static void
541nfs_sec_name(char *sec, int *flagsp)
542{
543	if (!strcmp(sec, "krb5"))
544		*flagsp |= NFSMNT_KERB;
545	else if (!strcmp(sec, "krb5i"))
546		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547	else if (!strcmp(sec, "krb5p"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549}
550
551static void
552nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553    const char *hostname, struct ucred *cred, struct thread *td)
554{
555	int s;
556	int adjsock;
557	char *p;
558
559	s = splnet();
560
561	/*
562	 * Set read-only flag if requested; otherwise, clear it if this is
563	 * an update.  If this is not an update, then either the read-only
564	 * flag is already clear, or this is a root mount and it was set
565	 * intentionally at some previous point.
566	 */
567	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568		MNT_ILOCK(mp);
569		mp->mnt_flag |= MNT_RDONLY;
570		MNT_IUNLOCK(mp);
571	} else if (mp->mnt_flag & MNT_UPDATE) {
572		MNT_ILOCK(mp);
573		mp->mnt_flag &= ~MNT_RDONLY;
574		MNT_IUNLOCK(mp);
575	}
576
577	/*
578	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579	 * no sense in that context.  Also, set up appropriate retransmit
580	 * and soft timeout behavior.
581	 */
582	if (argp->sotype == SOCK_STREAM) {
583		nmp->nm_flag &= ~NFSMNT_NOCONN;
584		nmp->nm_timeo = NFS_MAXTIMEO;
585		if ((argp->flags & NFSMNT_NFSV4) != 0)
586			nmp->nm_retry = INT_MAX;
587		else
588			nmp->nm_retry = NFS_RETRANS_TCP;
589	}
590
591	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593		argp->flags &= ~NFSMNT_RDIRPLUS;
594		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595	}
596
597	/* Re-bind if rsrvd port requested and wasn't on one */
598	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599		  && (argp->flags & NFSMNT_RESVPORT);
600	/* Also re-bind if we're switching to/from a connected UDP socket */
601	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602		    (argp->flags & NFSMNT_NOCONN));
603
604	/* Update flags atomically.  Don't change the lock bits. */
605	nmp->nm_flag = argp->flags | nmp->nm_flag;
606	splx(s);
607
608	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610		if (nmp->nm_timeo < NFS_MINTIMEO)
611			nmp->nm_timeo = NFS_MINTIMEO;
612		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613			nmp->nm_timeo = NFS_MAXTIMEO;
614	}
615
616	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617		nmp->nm_retry = argp->retrans;
618		if (nmp->nm_retry > NFS_MAXREXMIT)
619			nmp->nm_retry = NFS_MAXREXMIT;
620	}
621
622	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623		nmp->nm_wsize = argp->wsize;
624		/*
625		 * Clip at the power of 2 below the size. There is an
626		 * issue (not isolated) that causes intermittent page
627		 * faults if this is not done.
628		 */
629		if (nmp->nm_wsize > NFS_FABLKSIZE)
630			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
631		else
632			nmp->nm_wsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636		nmp->nm_rsize = argp->rsize;
637		/*
638		 * Clip at the power of 2 below the size. There is an
639		 * issue (not isolated) that causes intermittent page
640		 * faults if this is not done.
641		 */
642		if (nmp->nm_rsize > NFS_FABLKSIZE)
643			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
644		else
645			nmp->nm_rsize = NFS_FABLKSIZE;
646	}
647
648	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649		nmp->nm_readdirsize = argp->readdirsize;
650	}
651
652	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653		nmp->nm_acregmin = argp->acregmin;
654	else
655		nmp->nm_acregmin = NFS_MINATTRTIMO;
656	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657		nmp->nm_acregmax = argp->acregmax;
658	else
659		nmp->nm_acregmax = NFS_MAXATTRTIMO;
660	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661		nmp->nm_acdirmin = argp->acdirmin;
662	else
663		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665		nmp->nm_acdirmax = argp->acdirmax;
666	else
667		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669		nmp->nm_acdirmin = nmp->nm_acdirmax;
670	if (nmp->nm_acregmin > nmp->nm_acregmax)
671		nmp->nm_acregmin = nmp->nm_acregmax;
672
673	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674		if (argp->readahead <= NFS_MAXRAHEAD)
675			nmp->nm_readahead = argp->readahead;
676		else
677			nmp->nm_readahead = NFS_MAXRAHEAD;
678	}
679	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680		if (argp->wcommitsize < nmp->nm_wsize)
681			nmp->nm_wcommitsize = nmp->nm_wsize;
682		else
683			nmp->nm_wcommitsize = argp->wcommitsize;
684	}
685
686	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687		    (nmp->nm_soproto != argp->proto));
688
689	if (nmp->nm_client != NULL && adjsock) {
690		int haslock = 0, error = 0;
691
692		if (nmp->nm_sotype == SOCK_STREAM) {
693			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694			if (!error)
695				haslock = 1;
696		}
697		if (!error) {
698		    newnfs_disconnect(&nmp->nm_sockreq);
699		    if (haslock)
700			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701		    nmp->nm_sotype = argp->sotype;
702		    nmp->nm_soproto = argp->proto;
703		    if (nmp->nm_sotype == SOCK_DGRAM)
704			while (newnfs_connect(nmp, &nmp->nm_sockreq,
705			    cred, td, 0)) {
706				printf("newnfs_args: retrying connect\n");
707				(void) nfs_catnap(PSOCK, 0, "newnfscon");
708			}
709		}
710	} else {
711		nmp->nm_sotype = argp->sotype;
712		nmp->nm_soproto = argp->proto;
713	}
714
715	if (hostname != NULL) {
716		strlcpy(nmp->nm_hostname, hostname,
717		    sizeof(nmp->nm_hostname));
718		p = strchr(nmp->nm_hostname, ':');
719		if (p != NULL)
720			*p = '\0';
721	}
722}
723
724static const char *nfs_opts[] = { "from", "nfs_args",
725    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733    "pnfs", "wcommitsize",
734    NULL };
735
736/*
737 * VFS Operations.
738 *
739 * mount system call
740 * It seems a bit dumb to copyinstr() the host and path here and then
741 * bcopy() them in mountnfs(), but I wanted to detect errors before
742 * doing the sockargs() call because sockargs() allocates an mbuf and
743 * an error after that means that I have to release the mbuf.
744 */
745/* ARGSUSED */
746static int
747nfs_mount(struct mount *mp)
748{
749	struct nfs_args args = {
750	    .version = NFS_ARGSVERSION,
751	    .addr = NULL,
752	    .addrlen = sizeof (struct sockaddr_in),
753	    .sotype = SOCK_STREAM,
754	    .proto = 0,
755	    .fh = NULL,
756	    .fhsize = 0,
757	    .flags = NFSMNT_RESVPORT,
758	    .wsize = NFS_WSIZE,
759	    .rsize = NFS_RSIZE,
760	    .readdirsize = NFS_READDIRSIZE,
761	    .timeo = 10,
762	    .retrans = NFS_RETRANS,
763	    .readahead = NFS_DEFRAHEAD,
764	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
765	    .hostname = NULL,
766	    .acregmin = NFS_MINATTRTIMO,
767	    .acregmax = NFS_MAXATTRTIMO,
768	    .acdirmin = NFS_MINDIRATTRTIMO,
769	    .acdirmax = NFS_MAXDIRATTRTIMO,
770	};
771	int error = 0, ret, len;
772	struct sockaddr *nam = NULL;
773	struct vnode *vp;
774	struct thread *td;
775	char hst[MNAMELEN];
776	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777	char *opt, *name, *secname;
778	int nametimeo = NFS_DEFAULT_NAMETIMEO;
779	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
780	int minvers = 0;
781	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
782	size_t hstlen;
783
784	has_nfs_args_opt = 0;
785	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
786		error = EINVAL;
787		goto out;
788	}
789
790	td = curthread;
791	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792		error = nfs_mountroot(mp);
793		goto out;
794	}
795
796	nfscl_init();
797
798	/*
799	 * The old mount_nfs program passed the struct nfs_args
800	 * from userspace to kernel.  The new mount_nfs program
801	 * passes string options via nmount() from userspace to kernel
802	 * and we populate the struct nfs_args in the kernel.
803	 */
804	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
806		    sizeof(args));
807		if (error != 0)
808			goto out;
809
810		if (args.version != NFS_ARGSVERSION) {
811			error = EPROGMISMATCH;
812			goto out;
813		}
814		has_nfs_args_opt = 1;
815	}
816
817	/* Handle the new style options. */
818	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819		args.acdirmin = args.acdirmax =
820		    args.acregmin = args.acregmax = 0;
821		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
823	}
824	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825		args.flags |= NFSMNT_NOCONN;
826	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827		args.flags &= ~NFSMNT_NOCONN;
828	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829		args.flags |= NFSMNT_NOLOCKD;
830	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831		args.flags &= ~NFSMNT_NOLOCKD;
832	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833		args.flags |= NFSMNT_INT;
834	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835		args.flags |= NFSMNT_RDIRPLUS;
836	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837		args.flags |= NFSMNT_RESVPORT;
838	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839		args.flags &= ~NFSMNT_RESVPORT;
840	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841		args.flags |= NFSMNT_SOFT;
842	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843		args.flags &= ~NFSMNT_SOFT;
844	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845		args.sotype = SOCK_DGRAM;
846	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847		args.sotype = SOCK_DGRAM;
848	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849		args.sotype = SOCK_STREAM;
850	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851		args.flags |= NFSMNT_NFSV3;
852	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853		args.flags |= NFSMNT_NFSV4;
854		args.sotype = SOCK_STREAM;
855	}
856	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857		args.flags |= NFSMNT_ALLGSSNAME;
858	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859		args.flags |= NFSMNT_NOCTO;
860	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861		args.flags |= NFSMNT_NONCONTIGWR;
862	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863		args.flags |= NFSMNT_PNFS;
864	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
865		if (opt == NULL) {
866			vfs_mount_error(mp, "illegal readdirsize");
867			error = EINVAL;
868			goto out;
869		}
870		ret = sscanf(opt, "%d", &args.readdirsize);
871		if (ret != 1 || args.readdirsize <= 0) {
872			vfs_mount_error(mp, "illegal readdirsize: %s",
873			    opt);
874			error = EINVAL;
875			goto out;
876		}
877		args.flags |= NFSMNT_READDIRSIZE;
878	}
879	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
880		if (opt == NULL) {
881			vfs_mount_error(mp, "illegal readahead");
882			error = EINVAL;
883			goto out;
884		}
885		ret = sscanf(opt, "%d", &args.readahead);
886		if (ret != 1 || args.readahead <= 0) {
887			vfs_mount_error(mp, "illegal readahead: %s",
888			    opt);
889			error = EINVAL;
890			goto out;
891		}
892		args.flags |= NFSMNT_READAHEAD;
893	}
894	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
895		if (opt == NULL) {
896			vfs_mount_error(mp, "illegal wsize");
897			error = EINVAL;
898			goto out;
899		}
900		ret = sscanf(opt, "%d", &args.wsize);
901		if (ret != 1 || args.wsize <= 0) {
902			vfs_mount_error(mp, "illegal wsize: %s",
903			    opt);
904			error = EINVAL;
905			goto out;
906		}
907		args.flags |= NFSMNT_WSIZE;
908	}
909	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
910		if (opt == NULL) {
911			vfs_mount_error(mp, "illegal rsize");
912			error = EINVAL;
913			goto out;
914		}
915		ret = sscanf(opt, "%d", &args.rsize);
916		if (ret != 1 || args.rsize <= 0) {
917			vfs_mount_error(mp, "illegal wsize: %s",
918			    opt);
919			error = EINVAL;
920			goto out;
921		}
922		args.flags |= NFSMNT_RSIZE;
923	}
924	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
925		if (opt == NULL) {
926			vfs_mount_error(mp, "illegal retrans");
927			error = EINVAL;
928			goto out;
929		}
930		ret = sscanf(opt, "%d", &args.retrans);
931		if (ret != 1 || args.retrans <= 0) {
932			vfs_mount_error(mp, "illegal retrans: %s",
933			    opt);
934			error = EINVAL;
935			goto out;
936		}
937		args.flags |= NFSMNT_RETRANS;
938	}
939	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940		ret = sscanf(opt, "%d", &args.acregmin);
941		if (ret != 1 || args.acregmin < 0) {
942			vfs_mount_error(mp, "illegal actimeo: %s",
943			    opt);
944			error = EINVAL;
945			goto out;
946		}
947		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
950	}
951	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952		ret = sscanf(opt, "%d", &args.acregmin);
953		if (ret != 1 || args.acregmin < 0) {
954			vfs_mount_error(mp, "illegal acregmin: %s",
955			    opt);
956			error = EINVAL;
957			goto out;
958		}
959		args.flags |= NFSMNT_ACREGMIN;
960	}
961	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962		ret = sscanf(opt, "%d", &args.acregmax);
963		if (ret != 1 || args.acregmax < 0) {
964			vfs_mount_error(mp, "illegal acregmax: %s",
965			    opt);
966			error = EINVAL;
967			goto out;
968		}
969		args.flags |= NFSMNT_ACREGMAX;
970	}
971	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972		ret = sscanf(opt, "%d", &args.acdirmin);
973		if (ret != 1 || args.acdirmin < 0) {
974			vfs_mount_error(mp, "illegal acdirmin: %s",
975			    opt);
976			error = EINVAL;
977			goto out;
978		}
979		args.flags |= NFSMNT_ACDIRMIN;
980	}
981	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982		ret = sscanf(opt, "%d", &args.acdirmax);
983		if (ret != 1 || args.acdirmax < 0) {
984			vfs_mount_error(mp, "illegal acdirmax: %s",
985			    opt);
986			error = EINVAL;
987			goto out;
988		}
989		args.flags |= NFSMNT_ACDIRMAX;
990	}
991	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992		ret = sscanf(opt, "%d", &args.wcommitsize);
993		if (ret != 1 || args.wcommitsize < 0) {
994			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
995			error = EINVAL;
996			goto out;
997		}
998		args.flags |= NFSMNT_WCOMMITSIZE;
999	}
1000	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001		ret = sscanf(opt, "%d", &args.timeo);
1002		if (ret != 1 || args.timeo <= 0) {
1003			vfs_mount_error(mp, "illegal timeo: %s",
1004			    opt);
1005			error = EINVAL;
1006			goto out;
1007		}
1008		args.flags |= NFSMNT_TIMEO;
1009	}
1010	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011		ret = sscanf(opt, "%d", &args.timeo);
1012		if (ret != 1 || args.timeo <= 0) {
1013			vfs_mount_error(mp, "illegal timeout: %s",
1014			    opt);
1015			error = EINVAL;
1016			goto out;
1017		}
1018		args.flags |= NFSMNT_TIMEO;
1019	}
1020	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021		ret = sscanf(opt, "%d", &nametimeo);
1022		if (ret != 1 || nametimeo < 0) {
1023			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1024			error = EINVAL;
1025			goto out;
1026		}
1027	}
1028	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1029	    == 0) {
1030		ret = sscanf(opt, "%d", &negnametimeo);
1031		if (ret != 1 || negnametimeo < 0) {
1032			vfs_mount_error(mp, "illegal negnametimeo: %s",
1033			    opt);
1034			error = EINVAL;
1035			goto out;
1036		}
1037	}
1038	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1039	    0) {
1040		ret = sscanf(opt, "%d", &minvers);
1041		if (ret != 1 || minvers < 0 || minvers > 1 ||
1042		    (args.flags & NFSMNT_NFSV4) == 0) {
1043			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1044			error = EINVAL;
1045			goto out;
1046		}
1047	}
1048	if (vfs_getopt(mp->mnt_optnew, "sec",
1049		(void **) &secname, NULL) == 0)
1050		nfs_sec_name(secname, &args.flags);
1051
1052	if (mp->mnt_flag & MNT_UPDATE) {
1053		struct nfsmount *nmp = VFSTONFS(mp);
1054
1055		if (nmp == NULL) {
1056			error = EIO;
1057			goto out;
1058		}
1059
1060		/*
1061		 * If a change from TCP->UDP is done and there are thread(s)
1062		 * that have I/O RPC(s) in progress with a tranfer size
1063		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064		 * hung, retrying the RPC(s) forever. Usually these threads
1065		 * will be seen doing an uninterruptible sleep on wait channel
1066		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1067		 */
1068		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069			tprintf(td->td_proc, LOG_WARNING,
1070	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1071
1072		/*
1073		 * When doing an update, we can't change version,
1074		 * security, switch lockd strategies or change cookie
1075		 * translation
1076		 */
1077		args.flags = (args.flags &
1078		    ~(NFSMNT_NFSV3 |
1079		      NFSMNT_NFSV4 |
1080		      NFSMNT_KERB |
1081		      NFSMNT_INTEGRITY |
1082		      NFSMNT_PRIVACY |
1083		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1084		    (nmp->nm_flag &
1085			(NFSMNT_NFSV3 |
1086			 NFSMNT_NFSV4 |
1087			 NFSMNT_KERB |
1088			 NFSMNT_INTEGRITY |
1089			 NFSMNT_PRIVACY |
1090			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1092		goto out;
1093	}
1094
1095	/*
1096	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097	 * or no-connection mode for those protocols that support
1098	 * no-connection mode (the flag will be cleared later for protocols
1099	 * that do not support no-connection mode).  This will allow a client
1100	 * to receive replies from a different IP then the request was
1101	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1102	 * not 0.
1103	 */
1104	if (nfs_ip_paranoia == 0)
1105		args.flags |= NFSMNT_NOCONN;
1106
1107	if (has_nfs_args_opt != 0) {
1108		/*
1109		 * In the 'nfs_args' case, the pointers in the args
1110		 * structure are in userland - we copy them in here.
1111		 */
1112		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113			vfs_mount_error(mp, "Bad file handle");
1114			error = EINVAL;
1115			goto out;
1116		}
1117		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1118		    args.fhsize);
1119		if (error != 0)
1120			goto out;
1121		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1122		if (error != 0)
1123			goto out;
1124		bzero(&hst[hstlen], MNAMELEN - hstlen);
1125		args.hostname = hst;
1126		/* sockargs() call must be after above copyin() calls */
1127		error = getsockaddr(&nam, (caddr_t)args.addr,
1128		    args.addrlen);
1129		if (error != 0)
1130			goto out;
1131	} else {
1132		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133		    &args.fhsize) == 0) {
1134			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135				vfs_mount_error(mp, "Bad file handle");
1136				error = EINVAL;
1137				goto out;
1138			}
1139			bcopy(args.fh, nfh, args.fhsize);
1140		} else {
1141			args.fhsize = 0;
1142		}
1143		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1144		    (void **)&args.hostname, &len);
1145		if (args.hostname == NULL) {
1146			vfs_mount_error(mp, "Invalid hostname");
1147			error = EINVAL;
1148			goto out;
1149		}
1150		bcopy(args.hostname, hst, MNAMELEN);
1151		hst[MNAMELEN - 1] = '\0';
1152	}
1153
1154	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1156	else
1157		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158	srvkrbnamelen = strlen(srvkrbname);
1159
1160	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1161		strlcpy(krbname, name, sizeof (krbname));
1162	else
1163		krbname[0] = '\0';
1164	krbnamelen = strlen(krbname);
1165
1166	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1167		strlcpy(dirpath, name, sizeof (dirpath));
1168	else
1169		dirpath[0] = '\0';
1170	dirlen = strlen(dirpath);
1171
1172	if (has_nfs_args_opt == 0) {
1173		if (vfs_getopt(mp->mnt_optnew, "addr",
1174		    (void **)&args.addr, &args.addrlen) == 0) {
1175			if (args.addrlen > SOCK_MAXADDRLEN) {
1176				error = ENAMETOOLONG;
1177				goto out;
1178			}
1179			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1180			bcopy(args.addr, nam, args.addrlen);
1181			nam->sa_len = args.addrlen;
1182		} else {
1183			vfs_mount_error(mp, "No server address");
1184			error = EINVAL;
1185			goto out;
1186		}
1187	}
1188
1189	args.fh = nfh;
1190	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1191	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1192	    nametimeo, negnametimeo, minvers);
1193out:
1194	if (!error) {
1195		MNT_ILOCK(mp);
1196		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1197		    MNTK_USES_BCACHE;
1198		MNT_IUNLOCK(mp);
1199	}
1200	return (error);
1201}
1202
1203
1204/*
1205 * VFS Operations.
1206 *
1207 * mount system call
1208 * It seems a bit dumb to copyinstr() the host and path here and then
1209 * bcopy() them in mountnfs(), but I wanted to detect errors before
1210 * doing the sockargs() call because sockargs() allocates an mbuf and
1211 * an error after that means that I have to release the mbuf.
1212 */
1213/* ARGSUSED */
1214static int
1215nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1216{
1217	int error;
1218	struct nfs_args args;
1219
1220	error = copyin(data, &args, sizeof (struct nfs_args));
1221	if (error)
1222		return error;
1223
1224	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1225
1226	error = kernel_mount(ma, flags);
1227	return (error);
1228}
1229
1230/*
1231 * Common code for mount and mountroot
1232 */
1233static int
1234mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1235    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1236    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1237    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1238    int minvers)
1239{
1240	struct nfsmount *nmp;
1241	struct nfsnode *np;
1242	int error, trycnt, ret;
1243	struct nfsvattr nfsva;
1244	struct nfsclclient *clp;
1245	struct nfsclds *dsp, *tdsp;
1246	uint32_t lease;
1247	static u_int64_t clval = 0;
1248
1249	NFSCL_DEBUG(3, "in mnt\n");
1250	clp = NULL;
1251	if (mp->mnt_flag & MNT_UPDATE) {
1252		nmp = VFSTONFS(mp);
1253		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1254		FREE(nam, M_SONAME);
1255		return (0);
1256	} else {
1257		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1258		    krbnamelen + dirlen + srvkrbnamelen + 2,
1259		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1260		TAILQ_INIT(&nmp->nm_bufq);
1261		if (clval == 0)
1262			clval = (u_int64_t)nfsboottime.tv_sec;
1263		nmp->nm_clval = clval++;
1264		nmp->nm_krbnamelen = krbnamelen;
1265		nmp->nm_dirpathlen = dirlen;
1266		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1267		if (td->td_ucred->cr_uid != (uid_t)0) {
1268			/*
1269			 * nm_uid is used to get KerberosV credentials for
1270			 * the nfsv4 state handling operations if there is
1271			 * no host based principal set. Use the uid of
1272			 * this user if not root, since they are doing the
1273			 * mount. I don't think setting this for root will
1274			 * work, since root normally does not have user
1275			 * credentials in a credentials cache.
1276			 */
1277			nmp->nm_uid = td->td_ucred->cr_uid;
1278		} else {
1279			/*
1280			 * Just set to -1, so it won't be used.
1281			 */
1282			nmp->nm_uid = (uid_t)-1;
1283		}
1284
1285		/* Copy and null terminate all the names */
1286		if (nmp->nm_krbnamelen > 0) {
1287			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1288			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1289		}
1290		if (nmp->nm_dirpathlen > 0) {
1291			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1292			    nmp->nm_dirpathlen);
1293			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1294			    + 1] = '\0';
1295		}
1296		if (nmp->nm_srvkrbnamelen > 0) {
1297			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1298			    nmp->nm_srvkrbnamelen);
1299			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1300			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1301		}
1302		nmp->nm_sockreq.nr_cred = crhold(cred);
1303		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1304		mp->mnt_data = nmp;
1305		nmp->nm_getinfo = nfs_getnlminfo;
1306		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1307	}
1308	vfs_getnewfsid(mp);
1309	nmp->nm_mountp = mp;
1310	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1311
1312	/*
1313	 * Since nfs_decode_args() might optionally set them, these
1314	 * need to be set to defaults before the call, so that the
1315	 * optional settings aren't overwritten.
1316	 */
1317	nmp->nm_nametimeo = nametimeo;
1318	nmp->nm_negnametimeo = negnametimeo;
1319	nmp->nm_timeo = NFS_TIMEO;
1320	nmp->nm_retry = NFS_RETRANS;
1321	nmp->nm_readahead = NFS_DEFRAHEAD;
1322
1323	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1324	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1325	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1326		nmp->nm_wcommitsize *= 2;
1327	nmp->nm_wcommitsize *= 256;
1328
1329	if ((argp->flags & NFSMNT_NFSV4) != 0)
1330		nmp->nm_minorvers = minvers;
1331	else
1332		nmp->nm_minorvers = 0;
1333
1334	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1335
1336	/*
1337	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1338	 * high, depending on whether we end up with negative offsets in
1339	 * the client or server somewhere.  2GB-1 may be safer.
1340	 *
1341	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1342	 * that we can handle until we find out otherwise.
1343	 */
1344	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1345		nmp->nm_maxfilesize = 0xffffffffLL;
1346	else
1347		nmp->nm_maxfilesize = OFF_MAX;
1348
1349	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1350		nmp->nm_wsize = NFS_WSIZE;
1351		nmp->nm_rsize = NFS_RSIZE;
1352		nmp->nm_readdirsize = NFS_READDIRSIZE;
1353	}
1354	nmp->nm_numgrps = NFS_MAXGRPS;
1355	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1356	if (nmp->nm_tprintf_delay < 0)
1357		nmp->nm_tprintf_delay = 0;
1358	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1359	if (nmp->nm_tprintf_initial_delay < 0)
1360		nmp->nm_tprintf_initial_delay = 0;
1361	nmp->nm_fhsize = argp->fhsize;
1362	if (nmp->nm_fhsize > 0)
1363		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1364	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1365	nmp->nm_nam = nam;
1366	/* Set up the sockets and per-host congestion */
1367	nmp->nm_sotype = argp->sotype;
1368	nmp->nm_soproto = argp->proto;
1369	nmp->nm_sockreq.nr_prog = NFS_PROG;
1370	if ((argp->flags & NFSMNT_NFSV4))
1371		nmp->nm_sockreq.nr_vers = NFS_VER4;
1372	else if ((argp->flags & NFSMNT_NFSV3))
1373		nmp->nm_sockreq.nr_vers = NFS_VER3;
1374	else
1375		nmp->nm_sockreq.nr_vers = NFS_VER2;
1376
1377
1378	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1379		goto bad;
1380	/* For NFSv4.1, get the clientid now. */
1381	if (nmp->nm_minorvers > 0) {
1382		NFSCL_DEBUG(3, "at getcl\n");
1383		error = nfscl_getcl(mp, cred, td, 0, &clp);
1384		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1385		if (error != 0)
1386			goto bad;
1387	}
1388
1389	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1390	    nmp->nm_dirpathlen > 0) {
1391		NFSCL_DEBUG(3, "in dirp\n");
1392		/*
1393		 * If the fhsize on the mount point == 0 for V4, the mount
1394		 * path needs to be looked up.
1395		 */
1396		trycnt = 3;
1397		do {
1398			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1399			    cred, td);
1400			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1401			if (error)
1402				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1403		} while (error && --trycnt > 0);
1404		if (error) {
1405			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1406			goto bad;
1407		}
1408	}
1409
1410	/*
1411	 * A reference count is needed on the nfsnode representing the
1412	 * remote root.  If this object is not persistent, then backward
1413	 * traversals of the mount point (i.e. "..") will not work if
1414	 * the nfsnode gets flushed out of the cache. Ufs does not have
1415	 * this problem, because one can identify root inodes by their
1416	 * number == ROOTINO (2).
1417	 */
1418	if (nmp->nm_fhsize > 0) {
1419		/*
1420		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1421		 * non-zero for the root vnode. f_iosize will be set correctly
1422		 * by nfs_statfs() before any I/O occurs.
1423		 */
1424		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1425		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1426		    LK_EXCLUSIVE);
1427		if (error)
1428			goto bad;
1429		*vpp = NFSTOV(np);
1430
1431		/*
1432		 * Get file attributes and transfer parameters for the
1433		 * mountpoint.  This has the side effect of filling in
1434		 * (*vpp)->v_type with the correct value.
1435		 */
1436		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1437		    cred, td, &nfsva, NULL, &lease);
1438		if (ret) {
1439			/*
1440			 * Just set default values to get things going.
1441			 */
1442			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1443			nfsva.na_vattr.va_type = VDIR;
1444			nfsva.na_vattr.va_mode = 0777;
1445			nfsva.na_vattr.va_nlink = 100;
1446			nfsva.na_vattr.va_uid = (uid_t)0;
1447			nfsva.na_vattr.va_gid = (gid_t)0;
1448			nfsva.na_vattr.va_fileid = 2;
1449			nfsva.na_vattr.va_gen = 1;
1450			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1451			nfsva.na_vattr.va_size = 512 * 1024;
1452			lease = 60;
1453		}
1454		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1455		if (nmp->nm_minorvers > 0) {
1456			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1457			NFSLOCKCLSTATE();
1458			clp->nfsc_renew = NFSCL_RENEW(lease);
1459			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1460			clp->nfsc_clientidrev++;
1461			if (clp->nfsc_clientidrev == 0)
1462				clp->nfsc_clientidrev++;
1463			NFSUNLOCKCLSTATE();
1464			/*
1465			 * Mount will succeed, so the renew thread can be
1466			 * started now.
1467			 */
1468			nfscl_start_renewthread(clp);
1469			nfscl_clientrelease(clp);
1470		}
1471		if (argp->flags & NFSMNT_NFSV3)
1472			ncl_fsinfo(nmp, *vpp, cred, td);
1473
1474		/* Mark if the mount point supports NFSv4 ACLs. */
1475		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1476		    ret == 0 &&
1477		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1478			MNT_ILOCK(mp);
1479			mp->mnt_flag |= MNT_NFS4ACLS;
1480			MNT_IUNLOCK(mp);
1481		}
1482
1483		/*
1484		 * Lose the lock but keep the ref.
1485		 */
1486		NFSVOPUNLOCK(*vpp, 0);
1487		return (0);
1488	}
1489	error = EIO;
1490
1491bad:
1492	if (clp != NULL)
1493		nfscl_clientrelease(clp);
1494	newnfs_disconnect(&nmp->nm_sockreq);
1495	crfree(nmp->nm_sockreq.nr_cred);
1496	if (nmp->nm_sockreq.nr_auth != NULL)
1497		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1498	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1499	mtx_destroy(&nmp->nm_mtx);
1500	if (nmp->nm_clp != NULL) {
1501		NFSLOCKCLSTATE();
1502		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1503		NFSUNLOCKCLSTATE();
1504		free(nmp->nm_clp, M_NFSCLCLIENT);
1505	}
1506	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1507		nfscl_freenfsclds(dsp);
1508	FREE(nmp, M_NEWNFSMNT);
1509	FREE(nam, M_SONAME);
1510	return (error);
1511}
1512
1513/*
1514 * unmount system call
1515 */
1516static int
1517nfs_unmount(struct mount *mp, int mntflags)
1518{
1519	struct thread *td;
1520	struct nfsmount *nmp;
1521	int error, flags = 0, i, trycnt = 0;
1522	struct nfsclds *dsp, *tdsp;
1523
1524	td = curthread;
1525
1526	if (mntflags & MNT_FORCE)
1527		flags |= FORCECLOSE;
1528	nmp = VFSTONFS(mp);
1529	/*
1530	 * Goes something like this..
1531	 * - Call vflush() to clear out vnodes for this filesystem
1532	 * - Close the socket
1533	 * - Free up the data structures
1534	 */
1535	/* In the forced case, cancel any outstanding requests. */
1536	if (mntflags & MNT_FORCE) {
1537		error = newnfs_nmcancelreqs(nmp);
1538		if (error)
1539			goto out;
1540		/* For a forced close, get rid of the renew thread now */
1541		nfscl_umount(nmp, td);
1542	}
1543	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1544	do {
1545		error = vflush(mp, 1, flags, td);
1546		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1547			(void) nfs_catnap(PSOCK, error, "newndm");
1548	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1549	if (error)
1550		goto out;
1551
1552	/*
1553	 * We are now committed to the unmount.
1554	 */
1555	if ((mntflags & MNT_FORCE) == 0)
1556		nfscl_umount(nmp, td);
1557	/* Make sure no nfsiods are assigned to this mount. */
1558	mtx_lock(&ncl_iod_mutex);
1559	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1560		if (ncl_iodmount[i] == nmp) {
1561			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1562			ncl_iodmount[i] = NULL;
1563		}
1564	mtx_unlock(&ncl_iod_mutex);
1565	newnfs_disconnect(&nmp->nm_sockreq);
1566	crfree(nmp->nm_sockreq.nr_cred);
1567	FREE(nmp->nm_nam, M_SONAME);
1568	if (nmp->nm_sockreq.nr_auth != NULL)
1569		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1570	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1571	mtx_destroy(&nmp->nm_mtx);
1572	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1573		nfscl_freenfsclds(dsp);
1574	FREE(nmp, M_NEWNFSMNT);
1575out:
1576	return (error);
1577}
1578
1579/*
1580 * Return root of a filesystem
1581 */
1582static int
1583nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1584{
1585	struct vnode *vp;
1586	struct nfsmount *nmp;
1587	struct nfsnode *np;
1588	int error;
1589
1590	nmp = VFSTONFS(mp);
1591	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1592	if (error)
1593		return error;
1594	vp = NFSTOV(np);
1595	/*
1596	 * Get transfer parameters and attributes for root vnode once.
1597	 */
1598	mtx_lock(&nmp->nm_mtx);
1599	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1600		mtx_unlock(&nmp->nm_mtx);
1601		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1602	} else
1603		mtx_unlock(&nmp->nm_mtx);
1604	if (vp->v_type == VNON)
1605	    vp->v_type = VDIR;
1606	vp->v_vflag |= VV_ROOT;
1607	*vpp = vp;
1608	return (0);
1609}
1610
1611/*
1612 * Flush out the buffer cache
1613 */
1614/* ARGSUSED */
1615static int
1616nfs_sync(struct mount *mp, int waitfor)
1617{
1618	struct vnode *vp, *mvp;
1619	struct thread *td;
1620	int error, allerror = 0;
1621
1622	td = curthread;
1623
1624	MNT_ILOCK(mp);
1625	/*
1626	 * If a forced dismount is in progress, return from here so that
1627	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1628	 * calling VFS_UNMOUNT().
1629	 */
1630	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1631		MNT_IUNLOCK(mp);
1632		return (EBADF);
1633	}
1634	MNT_IUNLOCK(mp);
1635
1636	/*
1637	 * Force stale buffer cache information to be flushed.
1638	 */
1639loop:
1640	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1641		/* XXX Racy bv_cnt check. */
1642		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1643		    waitfor == MNT_LAZY) {
1644			VI_UNLOCK(vp);
1645			continue;
1646		}
1647		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1648			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1649			goto loop;
1650		}
1651		error = VOP_FSYNC(vp, waitfor, td);
1652		if (error)
1653			allerror = error;
1654		NFSVOPUNLOCK(vp, 0);
1655		vrele(vp);
1656	}
1657	return (allerror);
1658}
1659
1660static int
1661nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1662{
1663	struct nfsmount *nmp = VFSTONFS(mp);
1664	struct vfsquery vq;
1665	int error;
1666
1667	bzero(&vq, sizeof(vq));
1668	switch (op) {
1669#if 0
1670	case VFS_CTL_NOLOCKS:
1671		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1672 		if (req->oldptr != NULL) {
1673 			error = SYSCTL_OUT(req, &val, sizeof(val));
1674 			if (error)
1675 				return (error);
1676 		}
1677 		if (req->newptr != NULL) {
1678 			error = SYSCTL_IN(req, &val, sizeof(val));
1679 			if (error)
1680 				return (error);
1681			if (val)
1682				nmp->nm_flag |= NFSMNT_NOLOCKS;
1683			else
1684				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1685 		}
1686		break;
1687#endif
1688	case VFS_CTL_QUERY:
1689		mtx_lock(&nmp->nm_mtx);
1690		if (nmp->nm_state & NFSSTA_TIMEO)
1691			vq.vq_flags |= VQ_NOTRESP;
1692		mtx_unlock(&nmp->nm_mtx);
1693#if 0
1694		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1695		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1696			vq.vq_flags |= VQ_NOTRESPLOCK;
1697#endif
1698		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1699		break;
1700 	case VFS_CTL_TIMEO:
1701 		if (req->oldptr != NULL) {
1702 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1703 			    sizeof(nmp->nm_tprintf_initial_delay));
1704 			if (error)
1705 				return (error);
1706 		}
1707 		if (req->newptr != NULL) {
1708			error = vfs_suser(mp, req->td);
1709			if (error)
1710				return (error);
1711 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1712 			    sizeof(nmp->nm_tprintf_initial_delay));
1713 			if (error)
1714 				return (error);
1715 			if (nmp->nm_tprintf_initial_delay < 0)
1716 				nmp->nm_tprintf_initial_delay = 0;
1717 		}
1718		break;
1719	default:
1720		return (ENOTSUP);
1721	}
1722	return (0);
1723}
1724
1725/*
1726 * Purge any RPCs in progress, so that they will all return errors.
1727 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1728 * forced dismount.
1729 */
1730static void
1731nfs_purge(struct mount *mp)
1732{
1733	struct nfsmount *nmp = VFSTONFS(mp);
1734
1735	newnfs_nmcancelreqs(nmp);
1736}
1737
1738/*
1739 * Extract the information needed by the nlm from the nfs vnode.
1740 */
1741static void
1742nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1743    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1744    struct timeval *timeop)
1745{
1746	struct nfsmount *nmp;
1747	struct nfsnode *np = VTONFS(vp);
1748
1749	nmp = VFSTONFS(vp->v_mount);
1750	if (fhlenp != NULL)
1751		*fhlenp = (size_t)np->n_fhp->nfh_len;
1752	if (fhp != NULL)
1753		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1754	if (sp != NULL)
1755		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1756	if (is_v3p != NULL)
1757		*is_v3p = NFS_ISV3(vp);
1758	if (sizep != NULL)
1759		*sizep = np->n_size;
1760	if (timeop != NULL) {
1761		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1762		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1763	}
1764}
1765
1766/*
1767 * This function prints out an option name, based on the conditional
1768 * argument.
1769 */
1770static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1771    char *opt, char **buf, size_t *blen)
1772{
1773	int len;
1774
1775	if (testval != 0 && *blen > strlen(opt)) {
1776		len = snprintf(*buf, *blen, "%s", opt);
1777		if (len != strlen(opt))
1778			printf("EEK!!\n");
1779		*buf += len;
1780		*blen -= len;
1781	}
1782}
1783
1784/*
1785 * This function printf out an options integer value.
1786 */
1787static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1788    char *opt, char **buf, size_t *blen)
1789{
1790	int len;
1791
1792	if (*blen > strlen(opt) + 1) {
1793		/* Could result in truncated output string. */
1794		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1795		if (len < *blen) {
1796			*buf += len;
1797			*blen -= len;
1798		}
1799	}
1800}
1801
1802/*
1803 * Load the option flags and values into the buffer.
1804 */
1805void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1806{
1807	char *buf;
1808	size_t blen;
1809
1810	buf = buffer;
1811	blen = buflen;
1812	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1813	    &blen);
1814	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1815		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1816		    &blen);
1817		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1818		    &buf, &blen);
1819	}
1820	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1821	    &blen);
1822	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1823	    "nfsv2", &buf, &blen);
1824	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1825	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1826	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1827	    &buf, &blen);
1828	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1829	    &buf, &blen);
1830	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1831	    &blen);
1832	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1833	    &blen);
1834	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1835	    &blen);
1836	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1837	    &blen);
1838	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1839	    &blen);
1840	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1841	    ",noncontigwr", &buf, &blen);
1842	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1843	    0, ",lockd", &buf, &blen);
1844	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1845	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1846	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1847	    &buf, &blen);
1848	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1849	    &buf, &blen);
1850	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1851	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1852	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1853	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1854	    &buf, &blen);
1855	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1856	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1857	    &buf, &blen);
1858	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1859	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1860	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1861	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1862	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1863	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1864	    &blen);
1865	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1866	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1867	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1868	    &blen);
1869	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1870	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1871	    &blen);
1872	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1873	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1874}
1875
1876