nfs_clvfsops.c revision 275249
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 275249 2014-11-29 15:41:55Z trasz $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103
104static int	nfs_mountroot(struct mount *);
105static void	nfs_sec_name(char *, int *);
106static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107		    struct nfs_args *argp, const char *, struct ucred *,
108		    struct thread *);
109static int	mountnfs(struct nfs_args *, struct mount *,
110		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111		    u_char *, int, struct vnode **, struct ucred *,
112		    struct thread *, int, int, int);
113static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114		    struct sockaddr_storage *, int *, off_t *,
115		    struct timeval *);
116static vfs_mount_t nfs_mount;
117static vfs_cmount_t nfs_cmount;
118static vfs_unmount_t nfs_unmount;
119static vfs_root_t nfs_root;
120static vfs_statfs_t nfs_statfs;
121static vfs_sync_t nfs_sync;
122static vfs_sysctl_t nfs_sysctl;
123static vfs_purge_t nfs_purge;
124
125/*
126 * nfs vfs operations.
127 */
128static struct vfsops nfs_vfsops = {
129	.vfs_init =		ncl_init,
130	.vfs_mount =		nfs_mount,
131	.vfs_cmount =		nfs_cmount,
132	.vfs_root =		nfs_root,
133	.vfs_statfs =		nfs_statfs,
134	.vfs_sync =		nfs_sync,
135	.vfs_uninit =		ncl_uninit,
136	.vfs_unmount =		nfs_unmount,
137	.vfs_sysctl =		nfs_sysctl,
138	.vfs_purge =		nfs_purge,
139};
140VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141
142/* So that loader and kldload(2) can find us, wherever we are.. */
143MODULE_VERSION(nfs, 1);
144MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148
149/*
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
154 */
155#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156struct nfs_diskless	nfs_diskless = { { { 0 } } };
157struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158int			nfs_diskless_valid = 0;
159#endif
160
161SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162    &nfs_diskless_valid, 0,
163    "Has the diskless struct been filled correctly");
164
165SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167
168SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170    "%Ssockaddr_in", "Diskless root nfs address");
171
172
173void		newnfsargs_ntoh(struct nfs_args *);
174static int	nfs_mountdiskless(char *,
175		    struct sockaddr_in *, struct nfs_args *,
176		    struct thread *, struct vnode **, struct mount *);
177static void	nfs_convert_diskless(void);
178static void	nfs_convert_oargs(struct nfs_args *args,
179		    struct onfs_args *oargs);
180
181int
182newnfs_iosize(struct nfsmount *nmp)
183{
184	int iosize, maxio;
185
186	/* First, set the upper limit for iosize */
187	if (nmp->nm_flag & NFSMNT_NFSV4) {
188		maxio = NFS_MAXBSIZE;
189	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190		if (nmp->nm_sotype == SOCK_DGRAM)
191			maxio = NFS_MAXDGRAMDATA;
192		else
193			maxio = NFS_MAXBSIZE;
194	} else {
195		maxio = NFS_V2MAXDATA;
196	}
197	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198		nmp->nm_rsize = maxio;
199	if (nmp->nm_rsize > MAXBSIZE)
200		nmp->nm_rsize = MAXBSIZE;
201	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202		nmp->nm_readdirsize = maxio;
203	if (nmp->nm_readdirsize > nmp->nm_rsize)
204		nmp->nm_readdirsize = nmp->nm_rsize;
205	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206		nmp->nm_wsize = maxio;
207	if (nmp->nm_wsize > MAXBSIZE)
208		nmp->nm_wsize = MAXBSIZE;
209
210	/*
211	 * Calculate the size used for io buffers.  Use the larger
212	 * of the two sizes to minimise nfs requests but make sure
213	 * that it is at least one VM page to avoid wasting buffer
214	 * space.
215	 */
216	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217	iosize = imax(iosize, PAGE_SIZE);
218	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219	return (iosize);
220}
221
222static void
223nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224{
225
226	args->version = NFS_ARGSVERSION;
227	args->addr = oargs->addr;
228	args->addrlen = oargs->addrlen;
229	args->sotype = oargs->sotype;
230	args->proto = oargs->proto;
231	args->fh = oargs->fh;
232	args->fhsize = oargs->fhsize;
233	args->flags = oargs->flags;
234	args->wsize = oargs->wsize;
235	args->rsize = oargs->rsize;
236	args->readdirsize = oargs->readdirsize;
237	args->timeo = oargs->timeo;
238	args->retrans = oargs->retrans;
239	args->readahead = oargs->readahead;
240	args->hostname = oargs->hostname;
241}
242
243static void
244nfs_convert_diskless(void)
245{
246
247	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248		sizeof(struct ifaliasreq));
249	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250		sizeof(struct sockaddr_in));
251	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255	} else {
256		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258	}
259	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260		sizeof(struct sockaddr_in));
261	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262	nfsv3_diskless.root_time = nfs_diskless.root_time;
263	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264		MAXHOSTNAMELEN);
265	nfs_diskless_valid = 3;
266}
267
268/*
269 * nfs statfs call
270 */
271static int
272nfs_statfs(struct mount *mp, struct statfs *sbp)
273{
274	struct vnode *vp;
275	struct thread *td;
276	struct nfsmount *nmp = VFSTONFS(mp);
277	struct nfsvattr nfsva;
278	struct nfsfsinfo fs;
279	struct nfsstatfs sb;
280	int error = 0, attrflag, gotfsinfo = 0, ret;
281	struct nfsnode *np;
282
283	td = curthread;
284
285	error = vfs_busy(mp, MBF_NOWAIT);
286	if (error)
287		return (error);
288	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289	if (error) {
290		vfs_unbusy(mp);
291		return (error);
292	}
293	vp = NFSTOV(np);
294	mtx_lock(&nmp->nm_mtx);
295	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296		mtx_unlock(&nmp->nm_mtx);
297		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298		    &attrflag, NULL);
299		if (!error)
300			gotfsinfo = 1;
301	} else
302		mtx_unlock(&nmp->nm_mtx);
303	if (!error)
304		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305		    &attrflag, NULL);
306	if (error != 0)
307		NFSCL_DEBUG(2, "statfs=%d\n", error);
308	if (attrflag == 0) {
309		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310		    td->td_ucred, td, &nfsva, NULL, NULL);
311		if (ret) {
312			/*
313			 * Just set default values to get things going.
314			 */
315			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316			nfsva.na_vattr.va_type = VDIR;
317			nfsva.na_vattr.va_mode = 0777;
318			nfsva.na_vattr.va_nlink = 100;
319			nfsva.na_vattr.va_uid = (uid_t)0;
320			nfsva.na_vattr.va_gid = (gid_t)0;
321			nfsva.na_vattr.va_fileid = 2;
322			nfsva.na_vattr.va_gen = 1;
323			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324			nfsva.na_vattr.va_size = 512 * 1024;
325		}
326	}
327	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328	if (!error) {
329	    mtx_lock(&nmp->nm_mtx);
330	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331		nfscl_loadfsinfo(nmp, &fs);
332	    nfscl_loadsbinfo(nmp, &sb, sbp);
333	    sbp->f_iosize = newnfs_iosize(nmp);
334	    mtx_unlock(&nmp->nm_mtx);
335	    if (sbp != &mp->mnt_stat) {
336		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338	    }
339	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340	} else if (NFS_ISV4(vp)) {
341		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342	}
343	vput(vp);
344	vfs_unbusy(mp);
345	return (error);
346}
347
348/*
349 * nfs version 3 fsinfo rpc call
350 */
351int
352ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353    struct thread *td)
354{
355	struct nfsfsinfo fs;
356	struct nfsvattr nfsva;
357	int error, attrflag;
358
359	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360	if (!error) {
361		if (attrflag)
362			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363			    1);
364		mtx_lock(&nmp->nm_mtx);
365		nfscl_loadfsinfo(nmp, &fs);
366		mtx_unlock(&nmp->nm_mtx);
367	}
368	return (error);
369}
370
371/*
372 * Mount a remote root fs via. nfs. This depends on the info in the
373 * nfs_diskless structure that has been filled in properly by some primary
374 * bootstrap.
375 * It goes something like this:
376 * - do enough of "ifconfig" by calling ifioctl() so that the system
377 *   can talk to the server
378 * - If nfs_diskless.mygateway is filled in, use that address as
379 *   a default gateway.
380 * - build the rootfs mount point and call mountnfs() to do the rest.
381 *
382 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383 * structure, as well as other global NFS client variables here, as
384 * nfs_mountroot() will be called once in the boot before any other NFS
385 * client activity occurs.
386 */
387static int
388nfs_mountroot(struct mount *mp)
389{
390	struct thread *td = curthread;
391	struct nfsv3_diskless *nd = &nfsv3_diskless;
392	struct socket *so;
393	struct vnode *vp;
394	struct ifreq ir;
395	int error;
396	u_long l;
397	char buf[128];
398	char *cp;
399
400#if defined(BOOTP_NFSROOT) && defined(BOOTP)
401	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402#elif defined(NFS_ROOT)
403	nfs_setup_diskless();
404#endif
405
406	if (nfs_diskless_valid == 0)
407		return (-1);
408	if (nfs_diskless_valid == 1)
409		nfs_convert_diskless();
410
411	/*
412	 * XXX splnet, so networks will receive...
413	 */
414	splnet();
415
416	/*
417	 * Do enough of ifconfig(8) so that the critical net interface can
418	 * talk to the server.
419	 */
420	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421	    td->td_ucred, td);
422	if (error)
423		panic("nfs_mountroot: socreate(%04x): %d",
424			nd->myif.ifra_addr.sa_family, error);
425
426#if 0 /* XXX Bad idea */
427	/*
428	 * We might not have been told the right interface, so we pass
429	 * over the first ten interfaces of the same kind, until we get
430	 * one of them configured.
431	 */
432
433	for (i = strlen(nd->myif.ifra_name) - 1;
434		nd->myif.ifra_name[i] >= '0' &&
435		nd->myif.ifra_name[i] <= '9';
436		nd->myif.ifra_name[i] ++) {
437		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438		if(!error)
439			break;
440	}
441#endif
442	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443	if (error)
444		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445	if ((cp = getenv("boot.netif.mtu")) != NULL) {
446		ir.ifr_mtu = strtol(cp, NULL, 10);
447		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448		freeenv(cp);
449		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450		if (error)
451			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452	}
453	soclose(so);
454
455	/*
456	 * If the gateway field is filled in, set it as the default route.
457	 * Note that pxeboot will set a default route of 0 if the route
458	 * is not set by the DHCP server.  Check also for a value of 0
459	 * to avoid panicking inappropriately in that situation.
460	 */
461	if (nd->mygateway.sin_len != 0 &&
462	    nd->mygateway.sin_addr.s_addr != 0) {
463		struct sockaddr_in mask, sin;
464
465		bzero((caddr_t)&mask, sizeof(mask));
466		sin = mask;
467		sin.sin_family = AF_INET;
468		sin.sin_len = sizeof(sin);
469                /* XXX MRT use table 0 for this sort of thing */
470		CURVNET_SET(TD_TO_VNET(td));
471		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472		    (struct sockaddr *)&nd->mygateway,
473		    (struct sockaddr *)&mask,
474		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475		CURVNET_RESTORE();
476		if (error)
477			panic("nfs_mountroot: RTM_ADD: %d", error);
478	}
479
480	/*
481	 * Create the rootfs mount point.
482	 */
483	nd->root_args.fh = nd->root_fh;
484	nd->root_args.fhsize = nd->root_fhsize;
485	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487		(l >> 24) & 0xff, (l >> 16) & 0xff,
488		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489	printf("NFS ROOT: %s\n", buf);
490	nd->root_args.hostname = buf;
491	if ((error = nfs_mountdiskless(buf,
492	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493		return (error);
494	}
495
496	/*
497	 * This is not really an nfs issue, but it is much easier to
498	 * set hostname here and then let the "/etc/rc.xxx" files
499	 * mount the right /var based upon its preset value.
500	 */
501	mtx_lock(&prison0.pr_mtx);
502	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503	    sizeof(prison0.pr_hostname));
504	mtx_unlock(&prison0.pr_mtx);
505	inittodr(ntohl(nd->root_time));
506	return (0);
507}
508
509/*
510 * Internal version of mount system call for diskless setup.
511 */
512static int
513nfs_mountdiskless(char *path,
514    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515    struct vnode **vpp, struct mount *mp)
516{
517	struct sockaddr *nam;
518	int dirlen, error;
519	char *dirpath;
520
521	/*
522	 * Find the directory path in "path", which also has the server's
523	 * name/ip address in it.
524	 */
525	dirpath = strchr(path, ':');
526	if (dirpath != NULL)
527		dirlen = strlen(++dirpath);
528	else
529		dirlen = 0;
530	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535		return (error);
536	}
537	return (0);
538}
539
540static void
541nfs_sec_name(char *sec, int *flagsp)
542{
543	if (!strcmp(sec, "krb5"))
544		*flagsp |= NFSMNT_KERB;
545	else if (!strcmp(sec, "krb5i"))
546		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547	else if (!strcmp(sec, "krb5p"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549}
550
551static void
552nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553    const char *hostname, struct ucred *cred, struct thread *td)
554{
555	int s;
556	int adjsock;
557	char *p;
558
559	s = splnet();
560
561	/*
562	 * Set read-only flag if requested; otherwise, clear it if this is
563	 * an update.  If this is not an update, then either the read-only
564	 * flag is already clear, or this is a root mount and it was set
565	 * intentionally at some previous point.
566	 */
567	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568		MNT_ILOCK(mp);
569		mp->mnt_flag |= MNT_RDONLY;
570		MNT_IUNLOCK(mp);
571	} else if (mp->mnt_flag & MNT_UPDATE) {
572		MNT_ILOCK(mp);
573		mp->mnt_flag &= ~MNT_RDONLY;
574		MNT_IUNLOCK(mp);
575	}
576
577	/*
578	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579	 * no sense in that context.  Also, set up appropriate retransmit
580	 * and soft timeout behavior.
581	 */
582	if (argp->sotype == SOCK_STREAM) {
583		nmp->nm_flag &= ~NFSMNT_NOCONN;
584		nmp->nm_timeo = NFS_MAXTIMEO;
585		if ((argp->flags & NFSMNT_NFSV4) != 0)
586			nmp->nm_retry = INT_MAX;
587		else
588			nmp->nm_retry = NFS_RETRANS_TCP;
589	}
590
591	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593		argp->flags &= ~NFSMNT_RDIRPLUS;
594		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595	}
596
597	/* Re-bind if rsrvd port requested and wasn't on one */
598	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599		  && (argp->flags & NFSMNT_RESVPORT);
600	/* Also re-bind if we're switching to/from a connected UDP socket */
601	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602		    (argp->flags & NFSMNT_NOCONN));
603
604	/* Update flags atomically.  Don't change the lock bits. */
605	nmp->nm_flag = argp->flags | nmp->nm_flag;
606	splx(s);
607
608	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610		if (nmp->nm_timeo < NFS_MINTIMEO)
611			nmp->nm_timeo = NFS_MINTIMEO;
612		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613			nmp->nm_timeo = NFS_MAXTIMEO;
614	}
615
616	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617		nmp->nm_retry = argp->retrans;
618		if (nmp->nm_retry > NFS_MAXREXMIT)
619			nmp->nm_retry = NFS_MAXREXMIT;
620	}
621
622	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623		nmp->nm_wsize = argp->wsize;
624		/*
625		 * Clip at the power of 2 below the size. There is an
626		 * issue (not isolated) that causes intermittent page
627		 * faults if this is not done.
628		 */
629		if (nmp->nm_wsize > NFS_FABLKSIZE)
630			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
631		else
632			nmp->nm_wsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636		nmp->nm_rsize = argp->rsize;
637		/*
638		 * Clip at the power of 2 below the size. There is an
639		 * issue (not isolated) that causes intermittent page
640		 * faults if this is not done.
641		 */
642		if (nmp->nm_rsize > NFS_FABLKSIZE)
643			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
644		else
645			nmp->nm_rsize = NFS_FABLKSIZE;
646	}
647
648	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649		nmp->nm_readdirsize = argp->readdirsize;
650	}
651
652	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653		nmp->nm_acregmin = argp->acregmin;
654	else
655		nmp->nm_acregmin = NFS_MINATTRTIMO;
656	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657		nmp->nm_acregmax = argp->acregmax;
658	else
659		nmp->nm_acregmax = NFS_MAXATTRTIMO;
660	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661		nmp->nm_acdirmin = argp->acdirmin;
662	else
663		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665		nmp->nm_acdirmax = argp->acdirmax;
666	else
667		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669		nmp->nm_acdirmin = nmp->nm_acdirmax;
670	if (nmp->nm_acregmin > nmp->nm_acregmax)
671		nmp->nm_acregmin = nmp->nm_acregmax;
672
673	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674		if (argp->readahead <= NFS_MAXRAHEAD)
675			nmp->nm_readahead = argp->readahead;
676		else
677			nmp->nm_readahead = NFS_MAXRAHEAD;
678	}
679	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680		if (argp->wcommitsize < nmp->nm_wsize)
681			nmp->nm_wcommitsize = nmp->nm_wsize;
682		else
683			nmp->nm_wcommitsize = argp->wcommitsize;
684	}
685
686	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687		    (nmp->nm_soproto != argp->proto));
688
689	if (nmp->nm_client != NULL && adjsock) {
690		int haslock = 0, error = 0;
691
692		if (nmp->nm_sotype == SOCK_STREAM) {
693			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694			if (!error)
695				haslock = 1;
696		}
697		if (!error) {
698		    newnfs_disconnect(&nmp->nm_sockreq);
699		    if (haslock)
700			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701		    nmp->nm_sotype = argp->sotype;
702		    nmp->nm_soproto = argp->proto;
703		    if (nmp->nm_sotype == SOCK_DGRAM)
704			while (newnfs_connect(nmp, &nmp->nm_sockreq,
705			    cred, td, 0)) {
706				printf("newnfs_args: retrying connect\n");
707				(void) nfs_catnap(PSOCK, 0, "newnfscon");
708			}
709		}
710	} else {
711		nmp->nm_sotype = argp->sotype;
712		nmp->nm_soproto = argp->proto;
713	}
714
715	if (hostname != NULL) {
716		strlcpy(nmp->nm_hostname, hostname,
717		    sizeof(nmp->nm_hostname));
718		p = strchr(nmp->nm_hostname, ':');
719		if (p != NULL)
720			*p = '\0';
721	}
722}
723
724static const char *nfs_opts[] = { "from", "nfs_args",
725    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733    "pnfs", "wcommitsize",
734    NULL };
735
736/*
737 * VFS Operations.
738 *
739 * mount system call
740 * It seems a bit dumb to copyinstr() the host and path here and then
741 * bcopy() them in mountnfs(), but I wanted to detect errors before
742 * doing the sockargs() call because sockargs() allocates an mbuf and
743 * an error after that means that I have to release the mbuf.
744 */
745/* ARGSUSED */
746static int
747nfs_mount(struct mount *mp)
748{
749	struct nfs_args args = {
750	    .version = NFS_ARGSVERSION,
751	    .addr = NULL,
752	    .addrlen = sizeof (struct sockaddr_in),
753	    .sotype = SOCK_STREAM,
754	    .proto = 0,
755	    .fh = NULL,
756	    .fhsize = 0,
757	    .flags = NFSMNT_RESVPORT,
758	    .wsize = NFS_WSIZE,
759	    .rsize = NFS_RSIZE,
760	    .readdirsize = NFS_READDIRSIZE,
761	    .timeo = 10,
762	    .retrans = NFS_RETRANS,
763	    .readahead = NFS_DEFRAHEAD,
764	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
765	    .hostname = NULL,
766	    .acregmin = NFS_MINATTRTIMO,
767	    .acregmax = NFS_MAXATTRTIMO,
768	    .acdirmin = NFS_MINDIRATTRTIMO,
769	    .acdirmax = NFS_MAXDIRATTRTIMO,
770	};
771	int error = 0, ret, len;
772	struct sockaddr *nam = NULL;
773	struct vnode *vp;
774	struct thread *td;
775	char hst[MNAMELEN];
776	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777	char *opt, *name, *secname;
778	int nametimeo = NFS_DEFAULT_NAMETIMEO;
779	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
780	int minvers = 0;
781	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
782	size_t hstlen;
783
784	has_nfs_args_opt = 0;
785	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
786		error = EINVAL;
787		goto out;
788	}
789
790	td = curthread;
791	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792		error = nfs_mountroot(mp);
793		goto out;
794	}
795
796	nfscl_init();
797
798	/*
799	 * The old mount_nfs program passed the struct nfs_args
800	 * from userspace to kernel.  The new mount_nfs program
801	 * passes string options via nmount() from userspace to kernel
802	 * and we populate the struct nfs_args in the kernel.
803	 */
804	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
806		    sizeof(args));
807		if (error != 0)
808			goto out;
809
810		if (args.version != NFS_ARGSVERSION) {
811			error = EPROGMISMATCH;
812			goto out;
813		}
814		has_nfs_args_opt = 1;
815	}
816
817	/* Handle the new style options. */
818	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819		args.acdirmin = args.acdirmax =
820		    args.acregmin = args.acregmax = 0;
821		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
823	}
824	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825		args.flags |= NFSMNT_NOCONN;
826	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827		args.flags |= NFSMNT_NOCONN;
828	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829		args.flags |= NFSMNT_NOLOCKD;
830	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831		args.flags &= ~NFSMNT_NOLOCKD;
832	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833		args.flags |= NFSMNT_INT;
834	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835		args.flags |= NFSMNT_RDIRPLUS;
836	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837		args.flags |= NFSMNT_RESVPORT;
838	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839		args.flags &= ~NFSMNT_RESVPORT;
840	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841		args.flags |= NFSMNT_SOFT;
842	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843		args.flags &= ~NFSMNT_SOFT;
844	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845		args.sotype = SOCK_DGRAM;
846	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847		args.sotype = SOCK_DGRAM;
848	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849		args.sotype = SOCK_STREAM;
850	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851		args.flags |= NFSMNT_NFSV3;
852	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853		args.flags |= NFSMNT_NFSV4;
854		args.sotype = SOCK_STREAM;
855	}
856	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857		args.flags |= NFSMNT_ALLGSSNAME;
858	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859		args.flags |= NFSMNT_NOCTO;
860	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861		args.flags |= NFSMNT_NONCONTIGWR;
862	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863		args.flags |= NFSMNT_PNFS;
864	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
865		if (opt == NULL) {
866			vfs_mount_error(mp, "illegal readdirsize");
867			error = EINVAL;
868			goto out;
869		}
870		ret = sscanf(opt, "%d", &args.readdirsize);
871		if (ret != 1 || args.readdirsize <= 0) {
872			vfs_mount_error(mp, "illegal readdirsize: %s",
873			    opt);
874			error = EINVAL;
875			goto out;
876		}
877		args.flags |= NFSMNT_READDIRSIZE;
878	}
879	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
880		if (opt == NULL) {
881			vfs_mount_error(mp, "illegal readahead");
882			error = EINVAL;
883			goto out;
884		}
885		ret = sscanf(opt, "%d", &args.readahead);
886		if (ret != 1 || args.readahead <= 0) {
887			vfs_mount_error(mp, "illegal readahead: %s",
888			    opt);
889			error = EINVAL;
890			goto out;
891		}
892		args.flags |= NFSMNT_READAHEAD;
893	}
894	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
895		if (opt == NULL) {
896			vfs_mount_error(mp, "illegal wsize");
897			error = EINVAL;
898			goto out;
899		}
900		ret = sscanf(opt, "%d", &args.wsize);
901		if (ret != 1 || args.wsize <= 0) {
902			vfs_mount_error(mp, "illegal wsize: %s",
903			    opt);
904			error = EINVAL;
905			goto out;
906		}
907		args.flags |= NFSMNT_WSIZE;
908	}
909	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
910		if (opt == NULL) {
911			vfs_mount_error(mp, "illegal rsize");
912			error = EINVAL;
913			goto out;
914		}
915		ret = sscanf(opt, "%d", &args.rsize);
916		if (ret != 1 || args.rsize <= 0) {
917			vfs_mount_error(mp, "illegal wsize: %s",
918			    opt);
919			error = EINVAL;
920			goto out;
921		}
922		args.flags |= NFSMNT_RSIZE;
923	}
924	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
925		if (opt == NULL) {
926			vfs_mount_error(mp, "illegal retrans");
927			error = EINVAL;
928			goto out;
929		}
930		ret = sscanf(opt, "%d", &args.retrans);
931		if (ret != 1 || args.retrans <= 0) {
932			vfs_mount_error(mp, "illegal retrans: %s",
933			    opt);
934			error = EINVAL;
935			goto out;
936		}
937		args.flags |= NFSMNT_RETRANS;
938	}
939	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940		ret = sscanf(opt, "%d", &args.acregmin);
941		if (ret != 1 || args.acregmin < 0) {
942			vfs_mount_error(mp, "illegal actimeo: %s",
943			    opt);
944			error = EINVAL;
945			goto out;
946		}
947		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
950	}
951	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952		ret = sscanf(opt, "%d", &args.acregmin);
953		if (ret != 1 || args.acregmin < 0) {
954			vfs_mount_error(mp, "illegal acregmin: %s",
955			    opt);
956			error = EINVAL;
957			goto out;
958		}
959		args.flags |= NFSMNT_ACREGMIN;
960	}
961	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962		ret = sscanf(opt, "%d", &args.acregmax);
963		if (ret != 1 || args.acregmax < 0) {
964			vfs_mount_error(mp, "illegal acregmax: %s",
965			    opt);
966			error = EINVAL;
967			goto out;
968		}
969		args.flags |= NFSMNT_ACREGMAX;
970	}
971	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972		ret = sscanf(opt, "%d", &args.acdirmin);
973		if (ret != 1 || args.acdirmin < 0) {
974			vfs_mount_error(mp, "illegal acdirmin: %s",
975			    opt);
976			error = EINVAL;
977			goto out;
978		}
979		args.flags |= NFSMNT_ACDIRMIN;
980	}
981	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982		ret = sscanf(opt, "%d", &args.acdirmax);
983		if (ret != 1 || args.acdirmax < 0) {
984			vfs_mount_error(mp, "illegal acdirmax: %s",
985			    opt);
986			error = EINVAL;
987			goto out;
988		}
989		args.flags |= NFSMNT_ACDIRMAX;
990	}
991	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992		ret = sscanf(opt, "%d", &args.wcommitsize);
993		if (ret != 1 || args.wcommitsize < 0) {
994			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
995			error = EINVAL;
996			goto out;
997		}
998		args.flags |= NFSMNT_WCOMMITSIZE;
999	}
1000	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001		ret = sscanf(opt, "%d", &args.timeo);
1002		if (ret != 1 || args.timeo <= 0) {
1003			vfs_mount_error(mp, "illegal timeo: %s",
1004			    opt);
1005			error = EINVAL;
1006			goto out;
1007		}
1008		args.flags |= NFSMNT_TIMEO;
1009	}
1010	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011		ret = sscanf(opt, "%d", &args.timeo);
1012		if (ret != 1 || args.timeo <= 0) {
1013			vfs_mount_error(mp, "illegal timeout: %s",
1014			    opt);
1015			error = EINVAL;
1016			goto out;
1017		}
1018		args.flags |= NFSMNT_TIMEO;
1019	}
1020	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021		ret = sscanf(opt, "%d", &nametimeo);
1022		if (ret != 1 || nametimeo < 0) {
1023			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1024			error = EINVAL;
1025			goto out;
1026		}
1027	}
1028	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1029	    == 0) {
1030		ret = sscanf(opt, "%d", &negnametimeo);
1031		if (ret != 1 || negnametimeo < 0) {
1032			vfs_mount_error(mp, "illegal negnametimeo: %s",
1033			    opt);
1034			error = EINVAL;
1035			goto out;
1036		}
1037	}
1038	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1039	    0) {
1040		ret = sscanf(opt, "%d", &minvers);
1041		if (ret != 1 || minvers < 0 || minvers > 1 ||
1042		    (args.flags & NFSMNT_NFSV4) == 0) {
1043			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1044			error = EINVAL;
1045			goto out;
1046		}
1047	}
1048	if (vfs_getopt(mp->mnt_optnew, "sec",
1049		(void **) &secname, NULL) == 0)
1050		nfs_sec_name(secname, &args.flags);
1051
1052	if (mp->mnt_flag & MNT_UPDATE) {
1053		struct nfsmount *nmp = VFSTONFS(mp);
1054
1055		if (nmp == NULL) {
1056			error = EIO;
1057			goto out;
1058		}
1059
1060		/*
1061		 * If a change from TCP->UDP is done and there are thread(s)
1062		 * that have I/O RPC(s) in progress with a tranfer size
1063		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064		 * hung, retrying the RPC(s) forever. Usually these threads
1065		 * will be seen doing an uninterruptible sleep on wait channel
1066		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1067		 */
1068		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069			tprintf(td->td_proc, LOG_WARNING,
1070	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1071
1072		/*
1073		 * When doing an update, we can't change version,
1074		 * security, switch lockd strategies or change cookie
1075		 * translation
1076		 */
1077		args.flags = (args.flags &
1078		    ~(NFSMNT_NFSV3 |
1079		      NFSMNT_NFSV4 |
1080		      NFSMNT_KERB |
1081		      NFSMNT_INTEGRITY |
1082		      NFSMNT_PRIVACY |
1083		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1084		    (nmp->nm_flag &
1085			(NFSMNT_NFSV3 |
1086			 NFSMNT_NFSV4 |
1087			 NFSMNT_KERB |
1088			 NFSMNT_INTEGRITY |
1089			 NFSMNT_PRIVACY |
1090			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1092		goto out;
1093	}
1094
1095	/*
1096	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097	 * or no-connection mode for those protocols that support
1098	 * no-connection mode (the flag will be cleared later for protocols
1099	 * that do not support no-connection mode).  This will allow a client
1100	 * to receive replies from a different IP then the request was
1101	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1102	 * not 0.
1103	 */
1104	if (nfs_ip_paranoia == 0)
1105		args.flags |= NFSMNT_NOCONN;
1106
1107	if (has_nfs_args_opt != 0) {
1108		/*
1109		 * In the 'nfs_args' case, the pointers in the args
1110		 * structure are in userland - we copy them in here.
1111		 */
1112		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113			vfs_mount_error(mp, "Bad file handle");
1114			error = EINVAL;
1115			goto out;
1116		}
1117		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1118		    args.fhsize);
1119		if (error != 0)
1120			goto out;
1121		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1122		if (error != 0)
1123			goto out;
1124		bzero(&hst[hstlen], MNAMELEN - hstlen);
1125		args.hostname = hst;
1126		/* sockargs() call must be after above copyin() calls */
1127		error = getsockaddr(&nam, (caddr_t)args.addr,
1128		    args.addrlen);
1129		if (error != 0)
1130			goto out;
1131	} else {
1132		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133		    &args.fhsize) == 0) {
1134			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135				vfs_mount_error(mp, "Bad file handle");
1136				error = EINVAL;
1137				goto out;
1138			}
1139			bcopy(args.fh, nfh, args.fhsize);
1140		} else {
1141			args.fhsize = 0;
1142		}
1143		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1144		    (void **)&args.hostname, &len);
1145		if (args.hostname == NULL) {
1146			vfs_mount_error(mp, "Invalid hostname");
1147			error = EINVAL;
1148			goto out;
1149		}
1150		bcopy(args.hostname, hst, MNAMELEN);
1151		hst[MNAMELEN - 1] = '\0';
1152	}
1153
1154	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1156	else
1157		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158	srvkrbnamelen = strlen(srvkrbname);
1159
1160	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1161		strlcpy(krbname, name, sizeof (krbname));
1162	else
1163		krbname[0] = '\0';
1164	krbnamelen = strlen(krbname);
1165
1166	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1167		strlcpy(dirpath, name, sizeof (dirpath));
1168	else
1169		dirpath[0] = '\0';
1170	dirlen = strlen(dirpath);
1171
1172	if (has_nfs_args_opt == 0) {
1173		if (vfs_getopt(mp->mnt_optnew, "addr",
1174		    (void **)&args.addr, &args.addrlen) == 0) {
1175			if (args.addrlen > SOCK_MAXADDRLEN) {
1176				error = ENAMETOOLONG;
1177				goto out;
1178			}
1179			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1180			bcopy(args.addr, nam, args.addrlen);
1181			nam->sa_len = args.addrlen;
1182		} else {
1183			vfs_mount_error(mp, "No server address");
1184			error = EINVAL;
1185			goto out;
1186		}
1187	}
1188
1189	args.fh = nfh;
1190	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1191	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1192	    nametimeo, negnametimeo, minvers);
1193out:
1194	if (!error) {
1195		MNT_ILOCK(mp);
1196		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF;
1197		MNT_IUNLOCK(mp);
1198	}
1199	return (error);
1200}
1201
1202
1203/*
1204 * VFS Operations.
1205 *
1206 * mount system call
1207 * It seems a bit dumb to copyinstr() the host and path here and then
1208 * bcopy() them in mountnfs(), but I wanted to detect errors before
1209 * doing the sockargs() call because sockargs() allocates an mbuf and
1210 * an error after that means that I have to release the mbuf.
1211 */
1212/* ARGSUSED */
1213static int
1214nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1215{
1216	int error;
1217	struct nfs_args args;
1218
1219	error = copyin(data, &args, sizeof (struct nfs_args));
1220	if (error)
1221		return error;
1222
1223	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1224
1225	error = kernel_mount(ma, flags);
1226	return (error);
1227}
1228
1229/*
1230 * Common code for mount and mountroot
1231 */
1232static int
1233mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1234    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1235    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1236    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1237    int minvers)
1238{
1239	struct nfsmount *nmp;
1240	struct nfsnode *np;
1241	int error, trycnt, ret;
1242	struct nfsvattr nfsva;
1243	struct nfsclclient *clp;
1244	struct nfsclds *dsp, *tdsp;
1245	uint32_t lease;
1246	static u_int64_t clval = 0;
1247
1248	NFSCL_DEBUG(3, "in mnt\n");
1249	clp = NULL;
1250	if (mp->mnt_flag & MNT_UPDATE) {
1251		nmp = VFSTONFS(mp);
1252		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1253		FREE(nam, M_SONAME);
1254		return (0);
1255	} else {
1256		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1257		    krbnamelen + dirlen + srvkrbnamelen + 2,
1258		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1259		TAILQ_INIT(&nmp->nm_bufq);
1260		if (clval == 0)
1261			clval = (u_int64_t)nfsboottime.tv_sec;
1262		nmp->nm_clval = clval++;
1263		nmp->nm_krbnamelen = krbnamelen;
1264		nmp->nm_dirpathlen = dirlen;
1265		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1266		if (td->td_ucred->cr_uid != (uid_t)0) {
1267			/*
1268			 * nm_uid is used to get KerberosV credentials for
1269			 * the nfsv4 state handling operations if there is
1270			 * no host based principal set. Use the uid of
1271			 * this user if not root, since they are doing the
1272			 * mount. I don't think setting this for root will
1273			 * work, since root normally does not have user
1274			 * credentials in a credentials cache.
1275			 */
1276			nmp->nm_uid = td->td_ucred->cr_uid;
1277		} else {
1278			/*
1279			 * Just set to -1, so it won't be used.
1280			 */
1281			nmp->nm_uid = (uid_t)-1;
1282		}
1283
1284		/* Copy and null terminate all the names */
1285		if (nmp->nm_krbnamelen > 0) {
1286			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1287			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1288		}
1289		if (nmp->nm_dirpathlen > 0) {
1290			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1291			    nmp->nm_dirpathlen);
1292			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1293			    + 1] = '\0';
1294		}
1295		if (nmp->nm_srvkrbnamelen > 0) {
1296			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1297			    nmp->nm_srvkrbnamelen);
1298			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1299			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1300		}
1301		nmp->nm_sockreq.nr_cred = crhold(cred);
1302		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1303		mp->mnt_data = nmp;
1304		nmp->nm_getinfo = nfs_getnlminfo;
1305		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1306	}
1307	vfs_getnewfsid(mp);
1308	nmp->nm_mountp = mp;
1309	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1310
1311	/*
1312	 * Since nfs_decode_args() might optionally set them, these
1313	 * need to be set to defaults before the call, so that the
1314	 * optional settings aren't overwritten.
1315	 */
1316	nmp->nm_nametimeo = nametimeo;
1317	nmp->nm_negnametimeo = negnametimeo;
1318	nmp->nm_timeo = NFS_TIMEO;
1319	nmp->nm_retry = NFS_RETRANS;
1320	nmp->nm_readahead = NFS_DEFRAHEAD;
1321	if (desiredvnodes >= 11000)
1322		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
1323	else
1324		nmp->nm_wcommitsize = hibufspace / 10;
1325	if ((argp->flags & NFSMNT_NFSV4) != 0)
1326		nmp->nm_minorvers = minvers;
1327	else
1328		nmp->nm_minorvers = 0;
1329
1330	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1331
1332	/*
1333	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1334	 * high, depending on whether we end up with negative offsets in
1335	 * the client or server somewhere.  2GB-1 may be safer.
1336	 *
1337	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1338	 * that we can handle until we find out otherwise.
1339	 */
1340	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1341		nmp->nm_maxfilesize = 0xffffffffLL;
1342	else
1343		nmp->nm_maxfilesize = OFF_MAX;
1344
1345	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1346		nmp->nm_wsize = NFS_WSIZE;
1347		nmp->nm_rsize = NFS_RSIZE;
1348		nmp->nm_readdirsize = NFS_READDIRSIZE;
1349	}
1350	nmp->nm_numgrps = NFS_MAXGRPS;
1351	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1352	if (nmp->nm_tprintf_delay < 0)
1353		nmp->nm_tprintf_delay = 0;
1354	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1355	if (nmp->nm_tprintf_initial_delay < 0)
1356		nmp->nm_tprintf_initial_delay = 0;
1357	nmp->nm_fhsize = argp->fhsize;
1358	if (nmp->nm_fhsize > 0)
1359		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1360	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1361	nmp->nm_nam = nam;
1362	/* Set up the sockets and per-host congestion */
1363	nmp->nm_sotype = argp->sotype;
1364	nmp->nm_soproto = argp->proto;
1365	nmp->nm_sockreq.nr_prog = NFS_PROG;
1366	if ((argp->flags & NFSMNT_NFSV4))
1367		nmp->nm_sockreq.nr_vers = NFS_VER4;
1368	else if ((argp->flags & NFSMNT_NFSV3))
1369		nmp->nm_sockreq.nr_vers = NFS_VER3;
1370	else
1371		nmp->nm_sockreq.nr_vers = NFS_VER2;
1372
1373
1374	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1375		goto bad;
1376	/* For NFSv4.1, get the clientid now. */
1377	if (nmp->nm_minorvers > 0) {
1378		NFSCL_DEBUG(3, "at getcl\n");
1379		error = nfscl_getcl(mp, cred, td, 0, &clp);
1380		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1381		if (error != 0)
1382			goto bad;
1383	}
1384
1385	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1386	    nmp->nm_dirpathlen > 0) {
1387		NFSCL_DEBUG(3, "in dirp\n");
1388		/*
1389		 * If the fhsize on the mount point == 0 for V4, the mount
1390		 * path needs to be looked up.
1391		 */
1392		trycnt = 3;
1393		do {
1394			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1395			    cred, td);
1396			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1397			if (error)
1398				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1399		} while (error && --trycnt > 0);
1400		if (error) {
1401			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1402			goto bad;
1403		}
1404	}
1405
1406	/*
1407	 * A reference count is needed on the nfsnode representing the
1408	 * remote root.  If this object is not persistent, then backward
1409	 * traversals of the mount point (i.e. "..") will not work if
1410	 * the nfsnode gets flushed out of the cache. Ufs does not have
1411	 * this problem, because one can identify root inodes by their
1412	 * number == ROOTINO (2).
1413	 */
1414	if (nmp->nm_fhsize > 0) {
1415		/*
1416		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1417		 * non-zero for the root vnode. f_iosize will be set correctly
1418		 * by nfs_statfs() before any I/O occurs.
1419		 */
1420		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1421		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1422		    LK_EXCLUSIVE);
1423		if (error)
1424			goto bad;
1425		*vpp = NFSTOV(np);
1426
1427		/*
1428		 * Get file attributes and transfer parameters for the
1429		 * mountpoint.  This has the side effect of filling in
1430		 * (*vpp)->v_type with the correct value.
1431		 */
1432		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1433		    cred, td, &nfsva, NULL, &lease);
1434		if (ret) {
1435			/*
1436			 * Just set default values to get things going.
1437			 */
1438			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1439			nfsva.na_vattr.va_type = VDIR;
1440			nfsva.na_vattr.va_mode = 0777;
1441			nfsva.na_vattr.va_nlink = 100;
1442			nfsva.na_vattr.va_uid = (uid_t)0;
1443			nfsva.na_vattr.va_gid = (gid_t)0;
1444			nfsva.na_vattr.va_fileid = 2;
1445			nfsva.na_vattr.va_gen = 1;
1446			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1447			nfsva.na_vattr.va_size = 512 * 1024;
1448			lease = 60;
1449		}
1450		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1451		if (nmp->nm_minorvers > 0) {
1452			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1453			NFSLOCKCLSTATE();
1454			clp->nfsc_renew = NFSCL_RENEW(lease);
1455			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1456			clp->nfsc_clientidrev++;
1457			if (clp->nfsc_clientidrev == 0)
1458				clp->nfsc_clientidrev++;
1459			NFSUNLOCKCLSTATE();
1460			/*
1461			 * Mount will succeed, so the renew thread can be
1462			 * started now.
1463			 */
1464			nfscl_start_renewthread(clp);
1465			nfscl_clientrelease(clp);
1466		}
1467		if (argp->flags & NFSMNT_NFSV3)
1468			ncl_fsinfo(nmp, *vpp, cred, td);
1469
1470		/* Mark if the mount point supports NFSv4 ACLs. */
1471		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1472		    ret == 0 &&
1473		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1474			MNT_ILOCK(mp);
1475			mp->mnt_flag |= MNT_NFS4ACLS;
1476			MNT_IUNLOCK(mp);
1477		}
1478
1479		/*
1480		 * Lose the lock but keep the ref.
1481		 */
1482		NFSVOPUNLOCK(*vpp, 0);
1483		return (0);
1484	}
1485	error = EIO;
1486
1487bad:
1488	if (clp != NULL)
1489		nfscl_clientrelease(clp);
1490	newnfs_disconnect(&nmp->nm_sockreq);
1491	crfree(nmp->nm_sockreq.nr_cred);
1492	if (nmp->nm_sockreq.nr_auth != NULL)
1493		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1494	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1495	mtx_destroy(&nmp->nm_mtx);
1496	if (nmp->nm_clp != NULL) {
1497		NFSLOCKCLSTATE();
1498		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1499		NFSUNLOCKCLSTATE();
1500		free(nmp->nm_clp, M_NFSCLCLIENT);
1501	}
1502	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1503		nfscl_freenfsclds(dsp);
1504	FREE(nmp, M_NEWNFSMNT);
1505	FREE(nam, M_SONAME);
1506	return (error);
1507}
1508
1509/*
1510 * unmount system call
1511 */
1512static int
1513nfs_unmount(struct mount *mp, int mntflags)
1514{
1515	struct thread *td;
1516	struct nfsmount *nmp;
1517	int error, flags = 0, i, trycnt = 0;
1518	struct nfsclds *dsp, *tdsp;
1519
1520	td = curthread;
1521
1522	if (mntflags & MNT_FORCE)
1523		flags |= FORCECLOSE;
1524	nmp = VFSTONFS(mp);
1525	/*
1526	 * Goes something like this..
1527	 * - Call vflush() to clear out vnodes for this filesystem
1528	 * - Close the socket
1529	 * - Free up the data structures
1530	 */
1531	/* In the forced case, cancel any outstanding requests. */
1532	if (mntflags & MNT_FORCE) {
1533		error = newnfs_nmcancelreqs(nmp);
1534		if (error)
1535			goto out;
1536		/* For a forced close, get rid of the renew thread now */
1537		nfscl_umount(nmp, td);
1538	}
1539	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1540	do {
1541		error = vflush(mp, 1, flags, td);
1542		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1543			(void) nfs_catnap(PSOCK, error, "newndm");
1544	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1545	if (error)
1546		goto out;
1547
1548	/*
1549	 * We are now committed to the unmount.
1550	 */
1551	if ((mntflags & MNT_FORCE) == 0)
1552		nfscl_umount(nmp, td);
1553	/* Make sure no nfsiods are assigned to this mount. */
1554	mtx_lock(&ncl_iod_mutex);
1555	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1556		if (ncl_iodmount[i] == nmp) {
1557			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1558			ncl_iodmount[i] = NULL;
1559		}
1560	mtx_unlock(&ncl_iod_mutex);
1561	newnfs_disconnect(&nmp->nm_sockreq);
1562	crfree(nmp->nm_sockreq.nr_cred);
1563	FREE(nmp->nm_nam, M_SONAME);
1564	if (nmp->nm_sockreq.nr_auth != NULL)
1565		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1566	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1567	mtx_destroy(&nmp->nm_mtx);
1568	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1569		nfscl_freenfsclds(dsp);
1570	FREE(nmp, M_NEWNFSMNT);
1571out:
1572	return (error);
1573}
1574
1575/*
1576 * Return root of a filesystem
1577 */
1578static int
1579nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1580{
1581	struct vnode *vp;
1582	struct nfsmount *nmp;
1583	struct nfsnode *np;
1584	int error;
1585
1586	nmp = VFSTONFS(mp);
1587	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1588	if (error)
1589		return error;
1590	vp = NFSTOV(np);
1591	/*
1592	 * Get transfer parameters and attributes for root vnode once.
1593	 */
1594	mtx_lock(&nmp->nm_mtx);
1595	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1596		mtx_unlock(&nmp->nm_mtx);
1597		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1598	} else
1599		mtx_unlock(&nmp->nm_mtx);
1600	if (vp->v_type == VNON)
1601	    vp->v_type = VDIR;
1602	vp->v_vflag |= VV_ROOT;
1603	*vpp = vp;
1604	return (0);
1605}
1606
1607/*
1608 * Flush out the buffer cache
1609 */
1610/* ARGSUSED */
1611static int
1612nfs_sync(struct mount *mp, int waitfor)
1613{
1614	struct vnode *vp, *mvp;
1615	struct thread *td;
1616	int error, allerror = 0;
1617
1618	td = curthread;
1619
1620	MNT_ILOCK(mp);
1621	/*
1622	 * If a forced dismount is in progress, return from here so that
1623	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1624	 * calling VFS_UNMOUNT().
1625	 */
1626	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1627		MNT_IUNLOCK(mp);
1628		return (EBADF);
1629	}
1630	MNT_IUNLOCK(mp);
1631
1632	/*
1633	 * Force stale buffer cache information to be flushed.
1634	 */
1635loop:
1636	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1637		/* XXX Racy bv_cnt check. */
1638		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1639		    waitfor == MNT_LAZY) {
1640			VI_UNLOCK(vp);
1641			continue;
1642		}
1643		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1644			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1645			goto loop;
1646		}
1647		error = VOP_FSYNC(vp, waitfor, td);
1648		if (error)
1649			allerror = error;
1650		NFSVOPUNLOCK(vp, 0);
1651		vrele(vp);
1652	}
1653	return (allerror);
1654}
1655
1656static int
1657nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1658{
1659	struct nfsmount *nmp = VFSTONFS(mp);
1660	struct vfsquery vq;
1661	int error;
1662
1663	bzero(&vq, sizeof(vq));
1664	switch (op) {
1665#if 0
1666	case VFS_CTL_NOLOCKS:
1667		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1668 		if (req->oldptr != NULL) {
1669 			error = SYSCTL_OUT(req, &val, sizeof(val));
1670 			if (error)
1671 				return (error);
1672 		}
1673 		if (req->newptr != NULL) {
1674 			error = SYSCTL_IN(req, &val, sizeof(val));
1675 			if (error)
1676 				return (error);
1677			if (val)
1678				nmp->nm_flag |= NFSMNT_NOLOCKS;
1679			else
1680				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1681 		}
1682		break;
1683#endif
1684	case VFS_CTL_QUERY:
1685		mtx_lock(&nmp->nm_mtx);
1686		if (nmp->nm_state & NFSSTA_TIMEO)
1687			vq.vq_flags |= VQ_NOTRESP;
1688		mtx_unlock(&nmp->nm_mtx);
1689#if 0
1690		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1691		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1692			vq.vq_flags |= VQ_NOTRESPLOCK;
1693#endif
1694		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1695		break;
1696 	case VFS_CTL_TIMEO:
1697 		if (req->oldptr != NULL) {
1698 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1699 			    sizeof(nmp->nm_tprintf_initial_delay));
1700 			if (error)
1701 				return (error);
1702 		}
1703 		if (req->newptr != NULL) {
1704			error = vfs_suser(mp, req->td);
1705			if (error)
1706				return (error);
1707 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1708 			    sizeof(nmp->nm_tprintf_initial_delay));
1709 			if (error)
1710 				return (error);
1711 			if (nmp->nm_tprintf_initial_delay < 0)
1712 				nmp->nm_tprintf_initial_delay = 0;
1713 		}
1714		break;
1715	default:
1716		return (ENOTSUP);
1717	}
1718	return (0);
1719}
1720
1721/*
1722 * Purge any RPCs in progress, so that they will all return errors.
1723 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1724 * forced dismount.
1725 */
1726static void
1727nfs_purge(struct mount *mp)
1728{
1729	struct nfsmount *nmp = VFSTONFS(mp);
1730
1731	newnfs_nmcancelreqs(nmp);
1732}
1733
1734/*
1735 * Extract the information needed by the nlm from the nfs vnode.
1736 */
1737static void
1738nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1739    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1740    struct timeval *timeop)
1741{
1742	struct nfsmount *nmp;
1743	struct nfsnode *np = VTONFS(vp);
1744
1745	nmp = VFSTONFS(vp->v_mount);
1746	if (fhlenp != NULL)
1747		*fhlenp = (size_t)np->n_fhp->nfh_len;
1748	if (fhp != NULL)
1749		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1750	if (sp != NULL)
1751		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1752	if (is_v3p != NULL)
1753		*is_v3p = NFS_ISV3(vp);
1754	if (sizep != NULL)
1755		*sizep = np->n_size;
1756	if (timeop != NULL) {
1757		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1758		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1759	}
1760}
1761
1762/*
1763 * This function prints out an option name, based on the conditional
1764 * argument.
1765 */
1766static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1767    char *opt, char **buf, size_t *blen)
1768{
1769	int len;
1770
1771	if (testval != 0 && *blen > strlen(opt)) {
1772		len = snprintf(*buf, *blen, "%s", opt);
1773		if (len != strlen(opt))
1774			printf("EEK!!\n");
1775		*buf += len;
1776		*blen -= len;
1777	}
1778}
1779
1780/*
1781 * This function printf out an options integer value.
1782 */
1783static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1784    char *opt, char **buf, size_t *blen)
1785{
1786	int len;
1787
1788	if (*blen > strlen(opt) + 1) {
1789		/* Could result in truncated output string. */
1790		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1791		if (len < *blen) {
1792			*buf += len;
1793			*blen -= len;
1794		}
1795	}
1796}
1797
1798/*
1799 * Load the option flags and values into the buffer.
1800 */
1801void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1802{
1803	char *buf;
1804	size_t blen;
1805
1806	buf = buffer;
1807	blen = buflen;
1808	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1809	    &blen);
1810	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1811		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1812		    &blen);
1813		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1814		    &buf, &blen);
1815	}
1816	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1817	    &blen);
1818	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1819	    "nfsv2", &buf, &blen);
1820	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1821	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1822	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1823	    &buf, &blen);
1824	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1825	    &buf, &blen);
1826	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1827	    &blen);
1828	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1829	    &blen);
1830	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1831	    &blen);
1832	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1833	    &blen);
1834	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1835	    &blen);
1836	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1837	    ",noncontigwr", &buf, &blen);
1838	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1839	    0, ",lockd", &buf, &blen);
1840	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1841	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1842	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1843	    &buf, &blen);
1844	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1845	    &buf, &blen);
1846	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1847	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1848	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1849	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1850	    &buf, &blen);
1851	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1852	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1853	    &buf, &blen);
1854	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1855	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1856	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1857	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1858	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1859	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1860	    &blen);
1861	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1862	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1863	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1864	    &blen);
1865	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1866	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1867	    &blen);
1868	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1869	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1870}
1871
1872