nfs_clvfsops.c revision 314201
178064Sume/*-
262638Skris * Copyright (c) 1989, 1993, 1995
355505Sshin *	The Regents of the University of California.  All rights reserved.
455505Sshin *
555505Sshin * This code is derived from software contributed to Berkeley by
655505Sshin * Rick Macklem at The University of Guelph.
755505Sshin *
855505Sshin * Redistribution and use in source and binary forms, with or without
955505Sshin * modification, are permitted provided that the following conditions
1055505Sshin * are met:
1155505Sshin * 1. Redistributions of source code must retain the above copyright
1255505Sshin *    notice, this list of conditions and the following disclaimer.
1355505Sshin * 2. Redistributions in binary form must reproduce the above copyright
1455505Sshin *    notice, this list of conditions and the following disclaimer in the
1555505Sshin *    documentation and/or other materials provided with the distribution.
1655505Sshin * 4. Neither the name of the University nor the names of its contributors
1755505Sshin *    may be used to endorse or promote products derived from this software
1855505Sshin *    without specific prior written permission.
1955505Sshin *
2055505Sshin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2155505Sshin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2255505Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2355505Sshin * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2455505Sshin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2555505Sshin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2655505Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2755505Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2855505Sshin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2955505Sshin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3055505Sshin * SUCH DAMAGE.
3155505Sshin *
3255505Sshin *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
3355505Sshin */
3455505Sshin
3555505Sshin#include <sys/cdefs.h>
3655505Sshin__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 314201 2017-02-24 11:30:28Z kib $");
3755505Sshin
3855505Sshin
3962638Skris#include "opt_bootp.h"
4062638Skris#include "opt_nfsroot.h"
4155505Sshin
4255505Sshin#include <sys/param.h>
4362638Skris#include <sys/systm.h>
4455505Sshin#include <sys/kernel.h>
4555505Sshin#include <sys/bio.h>
4662638Skris#include <sys/buf.h>
4762638Skris#include <sys/clock.h>
4862638Skris#include <sys/jail.h>
4962638Skris#include <sys/limits.h>
5062638Skris#include <sys/lock.h>
5155505Sshin#include <sys/malloc.h>
5262638Skris#include <sys/mbuf.h>
5355505Sshin#include <sys/module.h>
5455505Sshin#include <sys/mount.h>
5562638Skris#include <sys/proc.h>
5662638Skris#include <sys/socket.h>
5762638Skris#include <sys/socketvar.h>
5855505Sshin#include <sys/sockio.h>
5962638Skris#include <sys/sysctl.h>
6062638Skris#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103
104static int	nfs_mountroot(struct mount *);
105static void	nfs_sec_name(char *, int *);
106static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107		    struct nfs_args *argp, const char *, struct ucred *,
108		    struct thread *);
109static int	mountnfs(struct nfs_args *, struct mount *,
110		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111		    u_char *, int, struct vnode **, struct ucred *,
112		    struct thread *, int, int, int);
113static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114		    struct sockaddr_storage *, int *, off_t *,
115		    struct timeval *);
116static vfs_mount_t nfs_mount;
117static vfs_cmount_t nfs_cmount;
118static vfs_unmount_t nfs_unmount;
119static vfs_root_t nfs_root;
120static vfs_statfs_t nfs_statfs;
121static vfs_sync_t nfs_sync;
122static vfs_sysctl_t nfs_sysctl;
123static vfs_purge_t nfs_purge;
124
125/*
126 * nfs vfs operations.
127 */
128static struct vfsops nfs_vfsops = {
129	.vfs_init =		ncl_init,
130	.vfs_mount =		nfs_mount,
131	.vfs_cmount =		nfs_cmount,
132	.vfs_root =		nfs_root,
133	.vfs_statfs =		nfs_statfs,
134	.vfs_sync =		nfs_sync,
135	.vfs_uninit =		ncl_uninit,
136	.vfs_unmount =		nfs_unmount,
137	.vfs_sysctl =		nfs_sysctl,
138	.vfs_purge =		nfs_purge,
139};
140VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141
142/* So that loader and kldload(2) can find us, wherever we are.. */
143MODULE_VERSION(nfs, 1);
144MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148
149/*
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
154 */
155#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156struct nfs_diskless	nfs_diskless = { { { 0 } } };
157struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158int			nfs_diskless_valid = 0;
159#endif
160
161SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162    &nfs_diskless_valid, 0,
163    "Has the diskless struct been filled correctly");
164
165SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167
168SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170    "%Ssockaddr_in", "Diskless root nfs address");
171
172
173void		newnfsargs_ntoh(struct nfs_args *);
174static int	nfs_mountdiskless(char *,
175		    struct sockaddr_in *, struct nfs_args *,
176		    struct thread *, struct vnode **, struct mount *);
177static void	nfs_convert_diskless(void);
178static void	nfs_convert_oargs(struct nfs_args *args,
179		    struct onfs_args *oargs);
180
181int
182newnfs_iosize(struct nfsmount *nmp)
183{
184	int iosize, maxio;
185
186	/* First, set the upper limit for iosize */
187	if (nmp->nm_flag & NFSMNT_NFSV4) {
188		maxio = NFS_MAXBSIZE;
189	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190		if (nmp->nm_sotype == SOCK_DGRAM)
191			maxio = NFS_MAXDGRAMDATA;
192		else
193			maxio = NFS_MAXBSIZE;
194	} else {
195		maxio = NFS_V2MAXDATA;
196	}
197	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198		nmp->nm_rsize = maxio;
199	if (nmp->nm_rsize > NFS_MAXBSIZE)
200		nmp->nm_rsize = NFS_MAXBSIZE;
201	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202		nmp->nm_readdirsize = maxio;
203	if (nmp->nm_readdirsize > nmp->nm_rsize)
204		nmp->nm_readdirsize = nmp->nm_rsize;
205	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206		nmp->nm_wsize = maxio;
207	if (nmp->nm_wsize > NFS_MAXBSIZE)
208		nmp->nm_wsize = NFS_MAXBSIZE;
209
210	/*
211	 * Calculate the size used for io buffers.  Use the larger
212	 * of the two sizes to minimise nfs requests but make sure
213	 * that it is at least one VM page to avoid wasting buffer
214	 * space.  It must also be at least NFS_DIRBLKSIZ, since
215	 * that is the buffer size used for directories.
216	 */
217	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
218	iosize = imax(iosize, PAGE_SIZE);
219	iosize = imax(iosize, NFS_DIRBLKSIZ);
220	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
221	return (iosize);
222}
223
224static void
225nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
226{
227
228	args->version = NFS_ARGSVERSION;
229	args->addr = oargs->addr;
230	args->addrlen = oargs->addrlen;
231	args->sotype = oargs->sotype;
232	args->proto = oargs->proto;
233	args->fh = oargs->fh;
234	args->fhsize = oargs->fhsize;
235	args->flags = oargs->flags;
236	args->wsize = oargs->wsize;
237	args->rsize = oargs->rsize;
238	args->readdirsize = oargs->readdirsize;
239	args->timeo = oargs->timeo;
240	args->retrans = oargs->retrans;
241	args->readahead = oargs->readahead;
242	args->hostname = oargs->hostname;
243}
244
245static void
246nfs_convert_diskless(void)
247{
248
249	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
250		sizeof(struct ifaliasreq));
251	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
252		sizeof(struct sockaddr_in));
253	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
254	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
255		nfsv3_diskless.root_fhsize = NFSX_MYFH;
256		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
257	} else {
258		nfsv3_diskless.root_fhsize = NFSX_V2FH;
259		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
260	}
261	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
262		sizeof(struct sockaddr_in));
263	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
264	nfsv3_diskless.root_time = nfs_diskless.root_time;
265	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
266		MAXHOSTNAMELEN);
267	nfs_diskless_valid = 3;
268}
269
270/*
271 * nfs statfs call
272 */
273static int
274nfs_statfs(struct mount *mp, struct statfs *sbp)
275{
276	struct vnode *vp;
277	struct thread *td;
278	struct nfsmount *nmp = VFSTONFS(mp);
279	struct nfsvattr nfsva;
280	struct nfsfsinfo fs;
281	struct nfsstatfs sb;
282	int error = 0, attrflag, gotfsinfo = 0, ret;
283	struct nfsnode *np;
284
285	td = curthread;
286
287	error = vfs_busy(mp, MBF_NOWAIT);
288	if (error)
289		return (error);
290	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
291	if (error) {
292		vfs_unbusy(mp);
293		return (error);
294	}
295	vp = NFSTOV(np);
296	mtx_lock(&nmp->nm_mtx);
297	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
298		mtx_unlock(&nmp->nm_mtx);
299		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
300		    &attrflag, NULL);
301		if (!error)
302			gotfsinfo = 1;
303	} else
304		mtx_unlock(&nmp->nm_mtx);
305	if (!error)
306		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
307		    &attrflag, NULL);
308	if (error != 0)
309		NFSCL_DEBUG(2, "statfs=%d\n", error);
310	if (attrflag == 0) {
311		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
312		    td->td_ucred, td, &nfsva, NULL, NULL);
313		if (ret) {
314			/*
315			 * Just set default values to get things going.
316			 */
317			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
318			nfsva.na_vattr.va_type = VDIR;
319			nfsva.na_vattr.va_mode = 0777;
320			nfsva.na_vattr.va_nlink = 100;
321			nfsva.na_vattr.va_uid = (uid_t)0;
322			nfsva.na_vattr.va_gid = (gid_t)0;
323			nfsva.na_vattr.va_fileid = 2;
324			nfsva.na_vattr.va_gen = 1;
325			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
326			nfsva.na_vattr.va_size = 512 * 1024;
327		}
328	}
329	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
330	if (!error) {
331	    mtx_lock(&nmp->nm_mtx);
332	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
333		nfscl_loadfsinfo(nmp, &fs);
334	    nfscl_loadsbinfo(nmp, &sb, sbp);
335	    sbp->f_iosize = newnfs_iosize(nmp);
336	    mtx_unlock(&nmp->nm_mtx);
337	    if (sbp != &mp->mnt_stat) {
338		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
339		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
340	    }
341	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
342	} else if (NFS_ISV4(vp)) {
343		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
344	}
345	vput(vp);
346	vfs_unbusy(mp);
347	return (error);
348}
349
350/*
351 * nfs version 3 fsinfo rpc call
352 */
353int
354ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
355    struct thread *td)
356{
357	struct nfsfsinfo fs;
358	struct nfsvattr nfsva;
359	int error, attrflag;
360
361	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
362	if (!error) {
363		if (attrflag)
364			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
365			    1);
366		mtx_lock(&nmp->nm_mtx);
367		nfscl_loadfsinfo(nmp, &fs);
368		mtx_unlock(&nmp->nm_mtx);
369	}
370	return (error);
371}
372
373/*
374 * Mount a remote root fs via. nfs. This depends on the info in the
375 * nfs_diskless structure that has been filled in properly by some primary
376 * bootstrap.
377 * It goes something like this:
378 * - do enough of "ifconfig" by calling ifioctl() so that the system
379 *   can talk to the server
380 * - If nfs_diskless.mygateway is filled in, use that address as
381 *   a default gateway.
382 * - build the rootfs mount point and call mountnfs() to do the rest.
383 *
384 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
385 * structure, as well as other global NFS client variables here, as
386 * nfs_mountroot() will be called once in the boot before any other NFS
387 * client activity occurs.
388 */
389static int
390nfs_mountroot(struct mount *mp)
391{
392	struct thread *td = curthread;
393	struct nfsv3_diskless *nd = &nfsv3_diskless;
394	struct socket *so;
395	struct vnode *vp;
396	struct ifreq ir;
397	int error;
398	u_long l;
399	char buf[128];
400	char *cp;
401
402#if defined(BOOTP_NFSROOT) && defined(BOOTP)
403	bootpc_init();		/* use bootp to get nfs_diskless filled in */
404#elif defined(NFS_ROOT)
405	nfs_setup_diskless();
406#endif
407
408	if (nfs_diskless_valid == 0)
409		return (-1);
410	if (nfs_diskless_valid == 1)
411		nfs_convert_diskless();
412
413	/*
414	 * XXX splnet, so networks will receive...
415	 */
416	splnet();
417
418	/*
419	 * Do enough of ifconfig(8) so that the critical net interface can
420	 * talk to the server.
421	 */
422	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
423	    td->td_ucred, td);
424	if (error)
425		panic("nfs_mountroot: socreate(%04x): %d",
426			nd->myif.ifra_addr.sa_family, error);
427
428#if 0 /* XXX Bad idea */
429	/*
430	 * We might not have been told the right interface, so we pass
431	 * over the first ten interfaces of the same kind, until we get
432	 * one of them configured.
433	 */
434
435	for (i = strlen(nd->myif.ifra_name) - 1;
436		nd->myif.ifra_name[i] >= '0' &&
437		nd->myif.ifra_name[i] <= '9';
438		nd->myif.ifra_name[i] ++) {
439		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
440		if(!error)
441			break;
442	}
443#endif
444	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445	if (error)
446		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
447	if ((cp = getenv("boot.netif.mtu")) != NULL) {
448		ir.ifr_mtu = strtol(cp, NULL, 10);
449		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
450		freeenv(cp);
451		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
452		if (error)
453			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
454	}
455	soclose(so);
456
457	/*
458	 * If the gateway field is filled in, set it as the default route.
459	 * Note that pxeboot will set a default route of 0 if the route
460	 * is not set by the DHCP server.  Check also for a value of 0
461	 * to avoid panicking inappropriately in that situation.
462	 */
463	if (nd->mygateway.sin_len != 0 &&
464	    nd->mygateway.sin_addr.s_addr != 0) {
465		struct sockaddr_in mask, sin;
466
467		bzero((caddr_t)&mask, sizeof(mask));
468		sin = mask;
469		sin.sin_family = AF_INET;
470		sin.sin_len = sizeof(sin);
471                /* XXX MRT use table 0 for this sort of thing */
472		CURVNET_SET(TD_TO_VNET(td));
473		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
474		    (struct sockaddr *)&nd->mygateway,
475		    (struct sockaddr *)&mask,
476		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
477		CURVNET_RESTORE();
478		if (error)
479			panic("nfs_mountroot: RTM_ADD: %d", error);
480	}
481
482	/*
483	 * Create the rootfs mount point.
484	 */
485	nd->root_args.fh = nd->root_fh;
486	nd->root_args.fhsize = nd->root_fhsize;
487	l = ntohl(nd->root_saddr.sin_addr.s_addr);
488	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
489		(l >> 24) & 0xff, (l >> 16) & 0xff,
490		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
491	printf("NFS ROOT: %s\n", buf);
492	nd->root_args.hostname = buf;
493	if ((error = nfs_mountdiskless(buf,
494	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
495		return (error);
496	}
497
498	/*
499	 * This is not really an nfs issue, but it is much easier to
500	 * set hostname here and then let the "/etc/rc.xxx" files
501	 * mount the right /var based upon its preset value.
502	 */
503	mtx_lock(&prison0.pr_mtx);
504	strlcpy(prison0.pr_hostname, nd->my_hostnam,
505	    sizeof(prison0.pr_hostname));
506	mtx_unlock(&prison0.pr_mtx);
507	inittodr(ntohl(nd->root_time));
508	return (0);
509}
510
511/*
512 * Internal version of mount system call for diskless setup.
513 */
514static int
515nfs_mountdiskless(char *path,
516    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
517    struct vnode **vpp, struct mount *mp)
518{
519	struct sockaddr *nam;
520	int dirlen, error;
521	char *dirpath;
522
523	/*
524	 * Find the directory path in "path", which also has the server's
525	 * name/ip address in it.
526	 */
527	dirpath = strchr(path, ':');
528	if (dirpath != NULL)
529		dirlen = strlen(++dirpath);
530	else
531		dirlen = 0;
532	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
533	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
534	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
535	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
536		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
537		return (error);
538	}
539	return (0);
540}
541
542static void
543nfs_sec_name(char *sec, int *flagsp)
544{
545	if (!strcmp(sec, "krb5"))
546		*flagsp |= NFSMNT_KERB;
547	else if (!strcmp(sec, "krb5i"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
549	else if (!strcmp(sec, "krb5p"))
550		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
551}
552
553static void
554nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
555    const char *hostname, struct ucred *cred, struct thread *td)
556{
557	int s;
558	int adjsock;
559	char *p;
560
561	s = splnet();
562
563	/*
564	 * Set read-only flag if requested; otherwise, clear it if this is
565	 * an update.  If this is not an update, then either the read-only
566	 * flag is already clear, or this is a root mount and it was set
567	 * intentionally at some previous point.
568	 */
569	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
570		MNT_ILOCK(mp);
571		mp->mnt_flag |= MNT_RDONLY;
572		MNT_IUNLOCK(mp);
573	} else if (mp->mnt_flag & MNT_UPDATE) {
574		MNT_ILOCK(mp);
575		mp->mnt_flag &= ~MNT_RDONLY;
576		MNT_IUNLOCK(mp);
577	}
578
579	/*
580	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
581	 * no sense in that context.  Also, set up appropriate retransmit
582	 * and soft timeout behavior.
583	 */
584	if (argp->sotype == SOCK_STREAM) {
585		nmp->nm_flag &= ~NFSMNT_NOCONN;
586		nmp->nm_timeo = NFS_MAXTIMEO;
587		if ((argp->flags & NFSMNT_NFSV4) != 0)
588			nmp->nm_retry = INT_MAX;
589		else
590			nmp->nm_retry = NFS_RETRANS_TCP;
591	}
592
593	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
594	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
595		argp->flags &= ~NFSMNT_RDIRPLUS;
596		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597	}
598
599	/* Re-bind if rsrvd port requested and wasn't on one */
600	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
601		  && (argp->flags & NFSMNT_RESVPORT);
602	/* Also re-bind if we're switching to/from a connected UDP socket */
603	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
604		    (argp->flags & NFSMNT_NOCONN));
605
606	/* Update flags atomically.  Don't change the lock bits. */
607	nmp->nm_flag = argp->flags | nmp->nm_flag;
608	splx(s);
609
610	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
611		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
612		if (nmp->nm_timeo < NFS_MINTIMEO)
613			nmp->nm_timeo = NFS_MINTIMEO;
614		else if (nmp->nm_timeo > NFS_MAXTIMEO)
615			nmp->nm_timeo = NFS_MAXTIMEO;
616	}
617
618	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
619		nmp->nm_retry = argp->retrans;
620		if (nmp->nm_retry > NFS_MAXREXMIT)
621			nmp->nm_retry = NFS_MAXREXMIT;
622	}
623
624	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
625		nmp->nm_wsize = argp->wsize;
626		/*
627		 * Clip at the power of 2 below the size. There is an
628		 * issue (not isolated) that causes intermittent page
629		 * faults if this is not done.
630		 */
631		if (nmp->nm_wsize > NFS_FABLKSIZE)
632			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
633		else
634			nmp->nm_wsize = NFS_FABLKSIZE;
635	}
636
637	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
638		nmp->nm_rsize = argp->rsize;
639		/*
640		 * Clip at the power of 2 below the size. There is an
641		 * issue (not isolated) that causes intermittent page
642		 * faults if this is not done.
643		 */
644		if (nmp->nm_rsize > NFS_FABLKSIZE)
645			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
646		else
647			nmp->nm_rsize = NFS_FABLKSIZE;
648	}
649
650	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
651		nmp->nm_readdirsize = argp->readdirsize;
652	}
653
654	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
655		nmp->nm_acregmin = argp->acregmin;
656	else
657		nmp->nm_acregmin = NFS_MINATTRTIMO;
658	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
659		nmp->nm_acregmax = argp->acregmax;
660	else
661		nmp->nm_acregmax = NFS_MAXATTRTIMO;
662	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
663		nmp->nm_acdirmin = argp->acdirmin;
664	else
665		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
666	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
667		nmp->nm_acdirmax = argp->acdirmax;
668	else
669		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
670	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
671		nmp->nm_acdirmin = nmp->nm_acdirmax;
672	if (nmp->nm_acregmin > nmp->nm_acregmax)
673		nmp->nm_acregmin = nmp->nm_acregmax;
674
675	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
676		if (argp->readahead <= NFS_MAXRAHEAD)
677			nmp->nm_readahead = argp->readahead;
678		else
679			nmp->nm_readahead = NFS_MAXRAHEAD;
680	}
681	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
682		if (argp->wcommitsize < nmp->nm_wsize)
683			nmp->nm_wcommitsize = nmp->nm_wsize;
684		else
685			nmp->nm_wcommitsize = argp->wcommitsize;
686	}
687
688	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
689		    (nmp->nm_soproto != argp->proto));
690
691	if (nmp->nm_client != NULL && adjsock) {
692		int haslock = 0, error = 0;
693
694		if (nmp->nm_sotype == SOCK_STREAM) {
695			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
696			if (!error)
697				haslock = 1;
698		}
699		if (!error) {
700		    newnfs_disconnect(&nmp->nm_sockreq);
701		    if (haslock)
702			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
703		    nmp->nm_sotype = argp->sotype;
704		    nmp->nm_soproto = argp->proto;
705		    if (nmp->nm_sotype == SOCK_DGRAM)
706			while (newnfs_connect(nmp, &nmp->nm_sockreq,
707			    cred, td, 0)) {
708				printf("newnfs_args: retrying connect\n");
709				(void) nfs_catnap(PSOCK, 0, "newnfscon");
710			}
711		}
712	} else {
713		nmp->nm_sotype = argp->sotype;
714		nmp->nm_soproto = argp->proto;
715	}
716
717	if (hostname != NULL) {
718		strlcpy(nmp->nm_hostname, hostname,
719		    sizeof(nmp->nm_hostname));
720		p = strchr(nmp->nm_hostname, ':');
721		if (p != NULL)
722			*p = '\0';
723	}
724}
725
726static const char *nfs_opts[] = { "from", "nfs_args",
727    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
728    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
729    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
730    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
731    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
732    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
733    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
734    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
735    "pnfs", "wcommitsize",
736    NULL };
737
738/*
739 * VFS Operations.
740 *
741 * mount system call
742 * It seems a bit dumb to copyinstr() the host and path here and then
743 * bcopy() them in mountnfs(), but I wanted to detect errors before
744 * doing the sockargs() call because sockargs() allocates an mbuf and
745 * an error after that means that I have to release the mbuf.
746 */
747/* ARGSUSED */
748static int
749nfs_mount(struct mount *mp)
750{
751	struct nfs_args args = {
752	    .version = NFS_ARGSVERSION,
753	    .addr = NULL,
754	    .addrlen = sizeof (struct sockaddr_in),
755	    .sotype = SOCK_STREAM,
756	    .proto = 0,
757	    .fh = NULL,
758	    .fhsize = 0,
759	    .flags = NFSMNT_RESVPORT,
760	    .wsize = NFS_WSIZE,
761	    .rsize = NFS_RSIZE,
762	    .readdirsize = NFS_READDIRSIZE,
763	    .timeo = 10,
764	    .retrans = NFS_RETRANS,
765	    .readahead = NFS_DEFRAHEAD,
766	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
767	    .hostname = NULL,
768	    .acregmin = NFS_MINATTRTIMO,
769	    .acregmax = NFS_MAXATTRTIMO,
770	    .acdirmin = NFS_MINDIRATTRTIMO,
771	    .acdirmax = NFS_MAXDIRATTRTIMO,
772	};
773	int error = 0, ret, len;
774	struct sockaddr *nam = NULL;
775	struct vnode *vp;
776	struct thread *td;
777	char hst[MNAMELEN];
778	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
779	char *cp, *opt, *name, *secname;
780	int nametimeo = NFS_DEFAULT_NAMETIMEO;
781	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
782	int minvers = 0;
783	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
784	size_t hstlen;
785
786	has_nfs_args_opt = 0;
787	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
788		error = EINVAL;
789		goto out;
790	}
791
792	td = curthread;
793	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
794		error = nfs_mountroot(mp);
795		goto out;
796	}
797
798	nfscl_init();
799
800	/*
801	 * The old mount_nfs program passed the struct nfs_args
802	 * from userspace to kernel.  The new mount_nfs program
803	 * passes string options via nmount() from userspace to kernel
804	 * and we populate the struct nfs_args in the kernel.
805	 */
806	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
807		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
808		    sizeof(args));
809		if (error != 0)
810			goto out;
811
812		if (args.version != NFS_ARGSVERSION) {
813			error = EPROGMISMATCH;
814			goto out;
815		}
816		has_nfs_args_opt = 1;
817	}
818
819	/* Handle the new style options. */
820	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
821		args.acdirmin = args.acdirmax =
822		    args.acregmin = args.acregmax = 0;
823		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
824		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
825	}
826	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
827		args.flags |= NFSMNT_NOCONN;
828	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
829		args.flags &= ~NFSMNT_NOCONN;
830	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
831		args.flags |= NFSMNT_NOLOCKD;
832	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
833		args.flags &= ~NFSMNT_NOLOCKD;
834	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
835		args.flags |= NFSMNT_INT;
836	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
837		args.flags |= NFSMNT_RDIRPLUS;
838	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
839		args.flags |= NFSMNT_RESVPORT;
840	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
841		args.flags &= ~NFSMNT_RESVPORT;
842	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
843		args.flags |= NFSMNT_SOFT;
844	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
845		args.flags &= ~NFSMNT_SOFT;
846	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
847		args.sotype = SOCK_DGRAM;
848	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
849		args.sotype = SOCK_DGRAM;
850	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
851		args.sotype = SOCK_STREAM;
852	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
853		args.flags |= NFSMNT_NFSV3;
854	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
855		args.flags |= NFSMNT_NFSV4;
856		args.sotype = SOCK_STREAM;
857	}
858	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
859		args.flags |= NFSMNT_ALLGSSNAME;
860	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
861		args.flags |= NFSMNT_NOCTO;
862	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
863		args.flags |= NFSMNT_NONCONTIGWR;
864	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
865		args.flags |= NFSMNT_PNFS;
866	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
867		if (opt == NULL) {
868			vfs_mount_error(mp, "illegal readdirsize");
869			error = EINVAL;
870			goto out;
871		}
872		ret = sscanf(opt, "%d", &args.readdirsize);
873		if (ret != 1 || args.readdirsize <= 0) {
874			vfs_mount_error(mp, "illegal readdirsize: %s",
875			    opt);
876			error = EINVAL;
877			goto out;
878		}
879		args.flags |= NFSMNT_READDIRSIZE;
880	}
881	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
882		if (opt == NULL) {
883			vfs_mount_error(mp, "illegal readahead");
884			error = EINVAL;
885			goto out;
886		}
887		ret = sscanf(opt, "%d", &args.readahead);
888		if (ret != 1 || args.readahead <= 0) {
889			vfs_mount_error(mp, "illegal readahead: %s",
890			    opt);
891			error = EINVAL;
892			goto out;
893		}
894		args.flags |= NFSMNT_READAHEAD;
895	}
896	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
897		if (opt == NULL) {
898			vfs_mount_error(mp, "illegal wsize");
899			error = EINVAL;
900			goto out;
901		}
902		ret = sscanf(opt, "%d", &args.wsize);
903		if (ret != 1 || args.wsize <= 0) {
904			vfs_mount_error(mp, "illegal wsize: %s",
905			    opt);
906			error = EINVAL;
907			goto out;
908		}
909		args.flags |= NFSMNT_WSIZE;
910	}
911	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
912		if (opt == NULL) {
913			vfs_mount_error(mp, "illegal rsize");
914			error = EINVAL;
915			goto out;
916		}
917		ret = sscanf(opt, "%d", &args.rsize);
918		if (ret != 1 || args.rsize <= 0) {
919			vfs_mount_error(mp, "illegal wsize: %s",
920			    opt);
921			error = EINVAL;
922			goto out;
923		}
924		args.flags |= NFSMNT_RSIZE;
925	}
926	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
927		if (opt == NULL) {
928			vfs_mount_error(mp, "illegal retrans");
929			error = EINVAL;
930			goto out;
931		}
932		ret = sscanf(opt, "%d", &args.retrans);
933		if (ret != 1 || args.retrans <= 0) {
934			vfs_mount_error(mp, "illegal retrans: %s",
935			    opt);
936			error = EINVAL;
937			goto out;
938		}
939		args.flags |= NFSMNT_RETRANS;
940	}
941	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
942		ret = sscanf(opt, "%d", &args.acregmin);
943		if (ret != 1 || args.acregmin < 0) {
944			vfs_mount_error(mp, "illegal actimeo: %s",
945			    opt);
946			error = EINVAL;
947			goto out;
948		}
949		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
950		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
951		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
952	}
953	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
954		ret = sscanf(opt, "%d", &args.acregmin);
955		if (ret != 1 || args.acregmin < 0) {
956			vfs_mount_error(mp, "illegal acregmin: %s",
957			    opt);
958			error = EINVAL;
959			goto out;
960		}
961		args.flags |= NFSMNT_ACREGMIN;
962	}
963	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
964		ret = sscanf(opt, "%d", &args.acregmax);
965		if (ret != 1 || args.acregmax < 0) {
966			vfs_mount_error(mp, "illegal acregmax: %s",
967			    opt);
968			error = EINVAL;
969			goto out;
970		}
971		args.flags |= NFSMNT_ACREGMAX;
972	}
973	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
974		ret = sscanf(opt, "%d", &args.acdirmin);
975		if (ret != 1 || args.acdirmin < 0) {
976			vfs_mount_error(mp, "illegal acdirmin: %s",
977			    opt);
978			error = EINVAL;
979			goto out;
980		}
981		args.flags |= NFSMNT_ACDIRMIN;
982	}
983	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
984		ret = sscanf(opt, "%d", &args.acdirmax);
985		if (ret != 1 || args.acdirmax < 0) {
986			vfs_mount_error(mp, "illegal acdirmax: %s",
987			    opt);
988			error = EINVAL;
989			goto out;
990		}
991		args.flags |= NFSMNT_ACDIRMAX;
992	}
993	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
994		ret = sscanf(opt, "%d", &args.wcommitsize);
995		if (ret != 1 || args.wcommitsize < 0) {
996			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
997			error = EINVAL;
998			goto out;
999		}
1000		args.flags |= NFSMNT_WCOMMITSIZE;
1001	}
1002	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1003		ret = sscanf(opt, "%d", &args.timeo);
1004		if (ret != 1 || args.timeo <= 0) {
1005			vfs_mount_error(mp, "illegal timeo: %s",
1006			    opt);
1007			error = EINVAL;
1008			goto out;
1009		}
1010		args.flags |= NFSMNT_TIMEO;
1011	}
1012	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1013		ret = sscanf(opt, "%d", &args.timeo);
1014		if (ret != 1 || args.timeo <= 0) {
1015			vfs_mount_error(mp, "illegal timeout: %s",
1016			    opt);
1017			error = EINVAL;
1018			goto out;
1019		}
1020		args.flags |= NFSMNT_TIMEO;
1021	}
1022	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1023		ret = sscanf(opt, "%d", &nametimeo);
1024		if (ret != 1 || nametimeo < 0) {
1025			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1026			error = EINVAL;
1027			goto out;
1028		}
1029	}
1030	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1031	    == 0) {
1032		ret = sscanf(opt, "%d", &negnametimeo);
1033		if (ret != 1 || negnametimeo < 0) {
1034			vfs_mount_error(mp, "illegal negnametimeo: %s",
1035			    opt);
1036			error = EINVAL;
1037			goto out;
1038		}
1039	}
1040	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1041	    0) {
1042		ret = sscanf(opt, "%d", &minvers);
1043		if (ret != 1 || minvers < 0 || minvers > 1 ||
1044		    (args.flags & NFSMNT_NFSV4) == 0) {
1045			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1046			error = EINVAL;
1047			goto out;
1048		}
1049	}
1050	if (vfs_getopt(mp->mnt_optnew, "sec",
1051		(void **) &secname, NULL) == 0)
1052		nfs_sec_name(secname, &args.flags);
1053
1054	if (mp->mnt_flag & MNT_UPDATE) {
1055		struct nfsmount *nmp = VFSTONFS(mp);
1056
1057		if (nmp == NULL) {
1058			error = EIO;
1059			goto out;
1060		}
1061
1062		/*
1063		 * If a change from TCP->UDP is done and there are thread(s)
1064		 * that have I/O RPC(s) in progress with a tranfer size
1065		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1066		 * hung, retrying the RPC(s) forever. Usually these threads
1067		 * will be seen doing an uninterruptible sleep on wait channel
1068		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1069		 */
1070		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1071			tprintf(td->td_proc, LOG_WARNING,
1072	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1073
1074		/*
1075		 * When doing an update, we can't change version,
1076		 * security, switch lockd strategies or change cookie
1077		 * translation
1078		 */
1079		args.flags = (args.flags &
1080		    ~(NFSMNT_NFSV3 |
1081		      NFSMNT_NFSV4 |
1082		      NFSMNT_KERB |
1083		      NFSMNT_INTEGRITY |
1084		      NFSMNT_PRIVACY |
1085		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1086		    (nmp->nm_flag &
1087			(NFSMNT_NFSV3 |
1088			 NFSMNT_NFSV4 |
1089			 NFSMNT_KERB |
1090			 NFSMNT_INTEGRITY |
1091			 NFSMNT_PRIVACY |
1092			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1093		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1094		goto out;
1095	}
1096
1097	/*
1098	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1099	 * or no-connection mode for those protocols that support
1100	 * no-connection mode (the flag will be cleared later for protocols
1101	 * that do not support no-connection mode).  This will allow a client
1102	 * to receive replies from a different IP then the request was
1103	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1104	 * not 0.
1105	 */
1106	if (nfs_ip_paranoia == 0)
1107		args.flags |= NFSMNT_NOCONN;
1108
1109	if (has_nfs_args_opt != 0) {
1110		/*
1111		 * In the 'nfs_args' case, the pointers in the args
1112		 * structure are in userland - we copy them in here.
1113		 */
1114		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1115			vfs_mount_error(mp, "Bad file handle");
1116			error = EINVAL;
1117			goto out;
1118		}
1119		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1120		    args.fhsize);
1121		if (error != 0)
1122			goto out;
1123		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1124		if (error != 0)
1125			goto out;
1126		bzero(&hst[hstlen], MNAMELEN - hstlen);
1127		args.hostname = hst;
1128		/* sockargs() call must be after above copyin() calls */
1129		error = getsockaddr(&nam, (caddr_t)args.addr,
1130		    args.addrlen);
1131		if (error != 0)
1132			goto out;
1133	} else {
1134		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1135		    &args.fhsize) == 0) {
1136			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1137				vfs_mount_error(mp, "Bad file handle");
1138				error = EINVAL;
1139				goto out;
1140			}
1141			bcopy(args.fh, nfh, args.fhsize);
1142		} else {
1143			args.fhsize = 0;
1144		}
1145		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1146		    (void **)&args.hostname, &len);
1147		if (args.hostname == NULL) {
1148			vfs_mount_error(mp, "Invalid hostname");
1149			error = EINVAL;
1150			goto out;
1151		}
1152		if (len >= MNAMELEN) {
1153			vfs_mount_error(mp, "Hostname too long");
1154			error = EINVAL;
1155			goto out;
1156		}
1157		bcopy(args.hostname, hst, len);
1158		hst[len] = '\0';
1159	}
1160
1161	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1162		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1163	else {
1164		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1165		cp = strchr(srvkrbname, ':');
1166		if (cp != NULL)
1167			*cp = '\0';
1168	}
1169	srvkrbnamelen = strlen(srvkrbname);
1170
1171	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1172		strlcpy(krbname, name, sizeof (krbname));
1173	else
1174		krbname[0] = '\0';
1175	krbnamelen = strlen(krbname);
1176
1177	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1178		strlcpy(dirpath, name, sizeof (dirpath));
1179	else
1180		dirpath[0] = '\0';
1181	dirlen = strlen(dirpath);
1182
1183	if (has_nfs_args_opt == 0) {
1184		if (vfs_getopt(mp->mnt_optnew, "addr",
1185		    (void **)&args.addr, &args.addrlen) == 0) {
1186			if (args.addrlen > SOCK_MAXADDRLEN) {
1187				error = ENAMETOOLONG;
1188				goto out;
1189			}
1190			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1191			bcopy(args.addr, nam, args.addrlen);
1192			nam->sa_len = args.addrlen;
1193		} else {
1194			vfs_mount_error(mp, "No server address");
1195			error = EINVAL;
1196			goto out;
1197		}
1198	}
1199
1200	args.fh = nfh;
1201	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1202	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1203	    nametimeo, negnametimeo, minvers);
1204out:
1205	if (!error) {
1206		MNT_ILOCK(mp);
1207		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1208		    MNTK_USES_BCACHE;
1209		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1210			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1211		MNT_IUNLOCK(mp);
1212	}
1213	return (error);
1214}
1215
1216
1217/*
1218 * VFS Operations.
1219 *
1220 * mount system call
1221 * It seems a bit dumb to copyinstr() the host and path here and then
1222 * bcopy() them in mountnfs(), but I wanted to detect errors before
1223 * doing the sockargs() call because sockargs() allocates an mbuf and
1224 * an error after that means that I have to release the mbuf.
1225 */
1226/* ARGSUSED */
1227static int
1228nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1229{
1230	int error;
1231	struct nfs_args args;
1232
1233	error = copyin(data, &args, sizeof (struct nfs_args));
1234	if (error)
1235		return error;
1236
1237	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1238
1239	error = kernel_mount(ma, flags);
1240	return (error);
1241}
1242
1243/*
1244 * Common code for mount and mountroot
1245 */
1246static int
1247mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1248    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1249    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1250    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1251    int minvers)
1252{
1253	struct nfsmount *nmp;
1254	struct nfsnode *np;
1255	int error, trycnt, ret;
1256	struct nfsvattr nfsva;
1257	struct nfsclclient *clp;
1258	struct nfsclds *dsp, *tdsp;
1259	uint32_t lease;
1260	static u_int64_t clval = 0;
1261
1262	NFSCL_DEBUG(3, "in mnt\n");
1263	clp = NULL;
1264	if (mp->mnt_flag & MNT_UPDATE) {
1265		nmp = VFSTONFS(mp);
1266		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1267		FREE(nam, M_SONAME);
1268		return (0);
1269	} else {
1270		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1271		    krbnamelen + dirlen + srvkrbnamelen + 2,
1272		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1273		TAILQ_INIT(&nmp->nm_bufq);
1274		if (clval == 0)
1275			clval = (u_int64_t)nfsboottime.tv_sec;
1276		nmp->nm_clval = clval++;
1277		nmp->nm_krbnamelen = krbnamelen;
1278		nmp->nm_dirpathlen = dirlen;
1279		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1280		if (td->td_ucred->cr_uid != (uid_t)0) {
1281			/*
1282			 * nm_uid is used to get KerberosV credentials for
1283			 * the nfsv4 state handling operations if there is
1284			 * no host based principal set. Use the uid of
1285			 * this user if not root, since they are doing the
1286			 * mount. I don't think setting this for root will
1287			 * work, since root normally does not have user
1288			 * credentials in a credentials cache.
1289			 */
1290			nmp->nm_uid = td->td_ucred->cr_uid;
1291		} else {
1292			/*
1293			 * Just set to -1, so it won't be used.
1294			 */
1295			nmp->nm_uid = (uid_t)-1;
1296		}
1297
1298		/* Copy and null terminate all the names */
1299		if (nmp->nm_krbnamelen > 0) {
1300			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1301			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1302		}
1303		if (nmp->nm_dirpathlen > 0) {
1304			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1305			    nmp->nm_dirpathlen);
1306			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1307			    + 1] = '\0';
1308		}
1309		if (nmp->nm_srvkrbnamelen > 0) {
1310			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1311			    nmp->nm_srvkrbnamelen);
1312			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1313			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1314		}
1315		nmp->nm_sockreq.nr_cred = crhold(cred);
1316		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1317		mp->mnt_data = nmp;
1318		nmp->nm_getinfo = nfs_getnlminfo;
1319		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1320	}
1321	vfs_getnewfsid(mp);
1322	nmp->nm_mountp = mp;
1323	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1324
1325	/*
1326	 * Since nfs_decode_args() might optionally set them, these
1327	 * need to be set to defaults before the call, so that the
1328	 * optional settings aren't overwritten.
1329	 */
1330	nmp->nm_nametimeo = nametimeo;
1331	nmp->nm_negnametimeo = negnametimeo;
1332	nmp->nm_timeo = NFS_TIMEO;
1333	nmp->nm_retry = NFS_RETRANS;
1334	nmp->nm_readahead = NFS_DEFRAHEAD;
1335
1336	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1337	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1338	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1339		nmp->nm_wcommitsize *= 2;
1340	nmp->nm_wcommitsize *= 256;
1341
1342	if ((argp->flags & NFSMNT_NFSV4) != 0)
1343		nmp->nm_minorvers = minvers;
1344	else
1345		nmp->nm_minorvers = 0;
1346
1347	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1348
1349	/*
1350	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1351	 * high, depending on whether we end up with negative offsets in
1352	 * the client or server somewhere.  2GB-1 may be safer.
1353	 *
1354	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1355	 * that we can handle until we find out otherwise.
1356	 */
1357	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1358		nmp->nm_maxfilesize = 0xffffffffLL;
1359	else
1360		nmp->nm_maxfilesize = OFF_MAX;
1361
1362	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1363		nmp->nm_wsize = NFS_WSIZE;
1364		nmp->nm_rsize = NFS_RSIZE;
1365		nmp->nm_readdirsize = NFS_READDIRSIZE;
1366	}
1367	nmp->nm_numgrps = NFS_MAXGRPS;
1368	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1369	if (nmp->nm_tprintf_delay < 0)
1370		nmp->nm_tprintf_delay = 0;
1371	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1372	if (nmp->nm_tprintf_initial_delay < 0)
1373		nmp->nm_tprintf_initial_delay = 0;
1374	nmp->nm_fhsize = argp->fhsize;
1375	if (nmp->nm_fhsize > 0)
1376		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1377	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1378	nmp->nm_nam = nam;
1379	/* Set up the sockets and per-host congestion */
1380	nmp->nm_sotype = argp->sotype;
1381	nmp->nm_soproto = argp->proto;
1382	nmp->nm_sockreq.nr_prog = NFS_PROG;
1383	if ((argp->flags & NFSMNT_NFSV4))
1384		nmp->nm_sockreq.nr_vers = NFS_VER4;
1385	else if ((argp->flags & NFSMNT_NFSV3))
1386		nmp->nm_sockreq.nr_vers = NFS_VER3;
1387	else
1388		nmp->nm_sockreq.nr_vers = NFS_VER2;
1389
1390
1391	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1392		goto bad;
1393	/* For NFSv4.1, get the clientid now. */
1394	if (nmp->nm_minorvers > 0) {
1395		NFSCL_DEBUG(3, "at getcl\n");
1396		error = nfscl_getcl(mp, cred, td, 0, &clp);
1397		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1398		if (error != 0)
1399			goto bad;
1400	}
1401
1402	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1403	    nmp->nm_dirpathlen > 0) {
1404		NFSCL_DEBUG(3, "in dirp\n");
1405		/*
1406		 * If the fhsize on the mount point == 0 for V4, the mount
1407		 * path needs to be looked up.
1408		 */
1409		trycnt = 3;
1410		do {
1411			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1412			    cred, td);
1413			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1414			if (error)
1415				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1416		} while (error && --trycnt > 0);
1417		if (error) {
1418			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1419			goto bad;
1420		}
1421	}
1422
1423	/*
1424	 * A reference count is needed on the nfsnode representing the
1425	 * remote root.  If this object is not persistent, then backward
1426	 * traversals of the mount point (i.e. "..") will not work if
1427	 * the nfsnode gets flushed out of the cache. Ufs does not have
1428	 * this problem, because one can identify root inodes by their
1429	 * number == ROOTINO (2).
1430	 */
1431	if (nmp->nm_fhsize > 0) {
1432		/*
1433		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1434		 * non-zero for the root vnode. f_iosize will be set correctly
1435		 * by nfs_statfs() before any I/O occurs.
1436		 */
1437		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1438		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1439		    LK_EXCLUSIVE);
1440		if (error)
1441			goto bad;
1442		*vpp = NFSTOV(np);
1443
1444		/*
1445		 * Get file attributes and transfer parameters for the
1446		 * mountpoint.  This has the side effect of filling in
1447		 * (*vpp)->v_type with the correct value.
1448		 */
1449		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1450		    cred, td, &nfsva, NULL, &lease);
1451		if (ret) {
1452			/*
1453			 * Just set default values to get things going.
1454			 */
1455			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1456			nfsva.na_vattr.va_type = VDIR;
1457			nfsva.na_vattr.va_mode = 0777;
1458			nfsva.na_vattr.va_nlink = 100;
1459			nfsva.na_vattr.va_uid = (uid_t)0;
1460			nfsva.na_vattr.va_gid = (gid_t)0;
1461			nfsva.na_vattr.va_fileid = 2;
1462			nfsva.na_vattr.va_gen = 1;
1463			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1464			nfsva.na_vattr.va_size = 512 * 1024;
1465			lease = 60;
1466		}
1467		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1468		if (nmp->nm_minorvers > 0) {
1469			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1470			NFSLOCKCLSTATE();
1471			clp->nfsc_renew = NFSCL_RENEW(lease);
1472			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1473			clp->nfsc_clientidrev++;
1474			if (clp->nfsc_clientidrev == 0)
1475				clp->nfsc_clientidrev++;
1476			NFSUNLOCKCLSTATE();
1477			/*
1478			 * Mount will succeed, so the renew thread can be
1479			 * started now.
1480			 */
1481			nfscl_start_renewthread(clp);
1482			nfscl_clientrelease(clp);
1483		}
1484		if (argp->flags & NFSMNT_NFSV3)
1485			ncl_fsinfo(nmp, *vpp, cred, td);
1486
1487		/* Mark if the mount point supports NFSv4 ACLs. */
1488		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1489		    ret == 0 &&
1490		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1491			MNT_ILOCK(mp);
1492			mp->mnt_flag |= MNT_NFS4ACLS;
1493			MNT_IUNLOCK(mp);
1494		}
1495
1496		/*
1497		 * Lose the lock but keep the ref.
1498		 */
1499		NFSVOPUNLOCK(*vpp, 0);
1500		return (0);
1501	}
1502	error = EIO;
1503
1504bad:
1505	if (clp != NULL)
1506		nfscl_clientrelease(clp);
1507	newnfs_disconnect(&nmp->nm_sockreq);
1508	crfree(nmp->nm_sockreq.nr_cred);
1509	if (nmp->nm_sockreq.nr_auth != NULL)
1510		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1511	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1512	mtx_destroy(&nmp->nm_mtx);
1513	if (nmp->nm_clp != NULL) {
1514		NFSLOCKCLSTATE();
1515		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1516		NFSUNLOCKCLSTATE();
1517		free(nmp->nm_clp, M_NFSCLCLIENT);
1518	}
1519	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1520		nfscl_freenfsclds(dsp);
1521	FREE(nmp, M_NEWNFSMNT);
1522	FREE(nam, M_SONAME);
1523	return (error);
1524}
1525
1526/*
1527 * unmount system call
1528 */
1529static int
1530nfs_unmount(struct mount *mp, int mntflags)
1531{
1532	struct thread *td;
1533	struct nfsmount *nmp;
1534	int error, flags = 0, i, trycnt = 0;
1535	struct nfsclds *dsp, *tdsp;
1536
1537	td = curthread;
1538
1539	if (mntflags & MNT_FORCE)
1540		flags |= FORCECLOSE;
1541	nmp = VFSTONFS(mp);
1542	/*
1543	 * Goes something like this..
1544	 * - Call vflush() to clear out vnodes for this filesystem
1545	 * - Close the socket
1546	 * - Free up the data structures
1547	 */
1548	/* In the forced case, cancel any outstanding requests. */
1549	if (mntflags & MNT_FORCE) {
1550		error = newnfs_nmcancelreqs(nmp);
1551		if (error)
1552			goto out;
1553		/* For a forced close, get rid of the renew thread now */
1554		nfscl_umount(nmp, td);
1555	}
1556	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1557	do {
1558		error = vflush(mp, 1, flags, td);
1559		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1560			(void) nfs_catnap(PSOCK, error, "newndm");
1561	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1562	if (error)
1563		goto out;
1564
1565	/*
1566	 * We are now committed to the unmount.
1567	 */
1568	if ((mntflags & MNT_FORCE) == 0)
1569		nfscl_umount(nmp, td);
1570	/* Make sure no nfsiods are assigned to this mount. */
1571	mtx_lock(&ncl_iod_mutex);
1572	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1573		if (ncl_iodmount[i] == nmp) {
1574			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1575			ncl_iodmount[i] = NULL;
1576		}
1577	mtx_unlock(&ncl_iod_mutex);
1578	newnfs_disconnect(&nmp->nm_sockreq);
1579	crfree(nmp->nm_sockreq.nr_cred);
1580	FREE(nmp->nm_nam, M_SONAME);
1581	if (nmp->nm_sockreq.nr_auth != NULL)
1582		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1583	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1584	mtx_destroy(&nmp->nm_mtx);
1585	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1586		nfscl_freenfsclds(dsp);
1587	FREE(nmp, M_NEWNFSMNT);
1588out:
1589	return (error);
1590}
1591
1592/*
1593 * Return root of a filesystem
1594 */
1595static int
1596nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1597{
1598	struct vnode *vp;
1599	struct nfsmount *nmp;
1600	struct nfsnode *np;
1601	int error;
1602
1603	nmp = VFSTONFS(mp);
1604	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1605	if (error)
1606		return error;
1607	vp = NFSTOV(np);
1608	/*
1609	 * Get transfer parameters and attributes for root vnode once.
1610	 */
1611	mtx_lock(&nmp->nm_mtx);
1612	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1613		mtx_unlock(&nmp->nm_mtx);
1614		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1615	} else
1616		mtx_unlock(&nmp->nm_mtx);
1617	if (vp->v_type == VNON)
1618	    vp->v_type = VDIR;
1619	vp->v_vflag |= VV_ROOT;
1620	*vpp = vp;
1621	return (0);
1622}
1623
1624/*
1625 * Flush out the buffer cache
1626 */
1627/* ARGSUSED */
1628static int
1629nfs_sync(struct mount *mp, int waitfor)
1630{
1631	struct vnode *vp, *mvp;
1632	struct thread *td;
1633	int error, allerror = 0;
1634
1635	td = curthread;
1636
1637	MNT_ILOCK(mp);
1638	/*
1639	 * If a forced dismount is in progress, return from here so that
1640	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1641	 * calling VFS_UNMOUNT().
1642	 */
1643	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1644		MNT_IUNLOCK(mp);
1645		return (EBADF);
1646	}
1647	MNT_IUNLOCK(mp);
1648
1649	/*
1650	 * Force stale buffer cache information to be flushed.
1651	 */
1652loop:
1653	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1654		/* XXX Racy bv_cnt check. */
1655		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1656		    waitfor == MNT_LAZY) {
1657			VI_UNLOCK(vp);
1658			continue;
1659		}
1660		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1661			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1662			goto loop;
1663		}
1664		error = VOP_FSYNC(vp, waitfor, td);
1665		if (error)
1666			allerror = error;
1667		NFSVOPUNLOCK(vp, 0);
1668		vrele(vp);
1669	}
1670	return (allerror);
1671}
1672
1673static int
1674nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1675{
1676	struct nfsmount *nmp = VFSTONFS(mp);
1677	struct vfsquery vq;
1678	int error;
1679
1680	bzero(&vq, sizeof(vq));
1681	switch (op) {
1682#if 0
1683	case VFS_CTL_NOLOCKS:
1684		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1685 		if (req->oldptr != NULL) {
1686 			error = SYSCTL_OUT(req, &val, sizeof(val));
1687 			if (error)
1688 				return (error);
1689 		}
1690 		if (req->newptr != NULL) {
1691 			error = SYSCTL_IN(req, &val, sizeof(val));
1692 			if (error)
1693 				return (error);
1694			if (val)
1695				nmp->nm_flag |= NFSMNT_NOLOCKS;
1696			else
1697				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1698 		}
1699		break;
1700#endif
1701	case VFS_CTL_QUERY:
1702		mtx_lock(&nmp->nm_mtx);
1703		if (nmp->nm_state & NFSSTA_TIMEO)
1704			vq.vq_flags |= VQ_NOTRESP;
1705		mtx_unlock(&nmp->nm_mtx);
1706#if 0
1707		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1708		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1709			vq.vq_flags |= VQ_NOTRESPLOCK;
1710#endif
1711		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1712		break;
1713 	case VFS_CTL_TIMEO:
1714 		if (req->oldptr != NULL) {
1715 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1716 			    sizeof(nmp->nm_tprintf_initial_delay));
1717 			if (error)
1718 				return (error);
1719 		}
1720 		if (req->newptr != NULL) {
1721			error = vfs_suser(mp, req->td);
1722			if (error)
1723				return (error);
1724 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1725 			    sizeof(nmp->nm_tprintf_initial_delay));
1726 			if (error)
1727 				return (error);
1728 			if (nmp->nm_tprintf_initial_delay < 0)
1729 				nmp->nm_tprintf_initial_delay = 0;
1730 		}
1731		break;
1732	default:
1733		return (ENOTSUP);
1734	}
1735	return (0);
1736}
1737
1738/*
1739 * Purge any RPCs in progress, so that they will all return errors.
1740 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1741 * forced dismount.
1742 */
1743static void
1744nfs_purge(struct mount *mp)
1745{
1746	struct nfsmount *nmp = VFSTONFS(mp);
1747
1748	newnfs_nmcancelreqs(nmp);
1749}
1750
1751/*
1752 * Extract the information needed by the nlm from the nfs vnode.
1753 */
1754static void
1755nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1756    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1757    struct timeval *timeop)
1758{
1759	struct nfsmount *nmp;
1760	struct nfsnode *np = VTONFS(vp);
1761
1762	nmp = VFSTONFS(vp->v_mount);
1763	if (fhlenp != NULL)
1764		*fhlenp = (size_t)np->n_fhp->nfh_len;
1765	if (fhp != NULL)
1766		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1767	if (sp != NULL)
1768		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1769	if (is_v3p != NULL)
1770		*is_v3p = NFS_ISV3(vp);
1771	if (sizep != NULL)
1772		*sizep = np->n_size;
1773	if (timeop != NULL) {
1774		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1775		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1776	}
1777}
1778
1779/*
1780 * This function prints out an option name, based on the conditional
1781 * argument.
1782 */
1783static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1784    char *opt, char **buf, size_t *blen)
1785{
1786	int len;
1787
1788	if (testval != 0 && *blen > strlen(opt)) {
1789		len = snprintf(*buf, *blen, "%s", opt);
1790		if (len != strlen(opt))
1791			printf("EEK!!\n");
1792		*buf += len;
1793		*blen -= len;
1794	}
1795}
1796
1797/*
1798 * This function printf out an options integer value.
1799 */
1800static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1801    char *opt, char **buf, size_t *blen)
1802{
1803	int len;
1804
1805	if (*blen > strlen(opt) + 1) {
1806		/* Could result in truncated output string. */
1807		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1808		if (len < *blen) {
1809			*buf += len;
1810			*blen -= len;
1811		}
1812	}
1813}
1814
1815/*
1816 * Load the option flags and values into the buffer.
1817 */
1818void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1819{
1820	char *buf;
1821	size_t blen;
1822
1823	buf = buffer;
1824	blen = buflen;
1825	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1826	    &blen);
1827	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1828		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1829		    &blen);
1830		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1831		    &buf, &blen);
1832	}
1833	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1834	    &blen);
1835	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1836	    "nfsv2", &buf, &blen);
1837	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1838	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1839	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1840	    &buf, &blen);
1841	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1842	    &buf, &blen);
1843	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1844	    &blen);
1845	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1846	    &blen);
1847	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1848	    &blen);
1849	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1850	    &blen);
1851	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1852	    &blen);
1853	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1854	    ",noncontigwr", &buf, &blen);
1855	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1856	    0, ",lockd", &buf, &blen);
1857	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1858	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1859	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1860	    &buf, &blen);
1861	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1862	    &buf, &blen);
1863	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1864	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1865	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1866	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1867	    &buf, &blen);
1868	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1869	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1870	    &buf, &blen);
1871	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1872	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1873	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1874	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1875	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1876	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1877	    &blen);
1878	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1879	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1880	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1881	    &blen);
1882	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1883	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1884	    &blen);
1885	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1886	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1887}
1888
1889