nfs_clvfsops.c revision 317975
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 317975 2017-05-08 19:57:43Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103
104static int	nfs_mountroot(struct mount *);
105static void	nfs_sec_name(char *, int *);
106static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107		    struct nfs_args *argp, const char *, struct ucred *,
108		    struct thread *);
109static int	mountnfs(struct nfs_args *, struct mount *,
110		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111		    u_char *, int, struct vnode **, struct ucred *,
112		    struct thread *, int, int, int);
113static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114		    struct sockaddr_storage *, int *, off_t *,
115		    struct timeval *);
116static vfs_mount_t nfs_mount;
117static vfs_cmount_t nfs_cmount;
118static vfs_unmount_t nfs_unmount;
119static vfs_root_t nfs_root;
120static vfs_statfs_t nfs_statfs;
121static vfs_sync_t nfs_sync;
122static vfs_sysctl_t nfs_sysctl;
123static vfs_purge_t nfs_purge;
124
125/*
126 * nfs vfs operations.
127 */
128static struct vfsops nfs_vfsops = {
129	.vfs_init =		ncl_init,
130	.vfs_mount =		nfs_mount,
131	.vfs_cmount =		nfs_cmount,
132	.vfs_root =		nfs_root,
133	.vfs_statfs =		nfs_statfs,
134	.vfs_sync =		nfs_sync,
135	.vfs_uninit =		ncl_uninit,
136	.vfs_unmount =		nfs_unmount,
137	.vfs_sysctl =		nfs_sysctl,
138	.vfs_purge =		nfs_purge,
139};
140VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141
142/* So that loader and kldload(2) can find us, wherever we are.. */
143MODULE_VERSION(nfs, 1);
144MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148
149/*
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
154 */
155#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156struct nfs_diskless	nfs_diskless = { { { 0 } } };
157struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158int			nfs_diskless_valid = 0;
159#endif
160
161SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162    &nfs_diskless_valid, 0,
163    "Has the diskless struct been filled correctly");
164
165SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167
168SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170    "%Ssockaddr_in", "Diskless root nfs address");
171
172
173void		newnfsargs_ntoh(struct nfs_args *);
174static int	nfs_mountdiskless(char *,
175		    struct sockaddr_in *, struct nfs_args *,
176		    struct thread *, struct vnode **, struct mount *);
177static void	nfs_convert_diskless(void);
178static void	nfs_convert_oargs(struct nfs_args *args,
179		    struct onfs_args *oargs);
180
181int
182newnfs_iosize(struct nfsmount *nmp)
183{
184	int iosize, maxio;
185
186	/* First, set the upper limit for iosize */
187	if (nmp->nm_flag & NFSMNT_NFSV4) {
188		maxio = NFS_MAXBSIZE;
189	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190		if (nmp->nm_sotype == SOCK_DGRAM)
191			maxio = NFS_MAXDGRAMDATA;
192		else
193			maxio = NFS_MAXBSIZE;
194	} else {
195		maxio = NFS_V2MAXDATA;
196	}
197	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198		nmp->nm_rsize = maxio;
199	if (nmp->nm_rsize > NFS_MAXBSIZE)
200		nmp->nm_rsize = NFS_MAXBSIZE;
201	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202		nmp->nm_readdirsize = maxio;
203	if (nmp->nm_readdirsize > nmp->nm_rsize)
204		nmp->nm_readdirsize = nmp->nm_rsize;
205	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206		nmp->nm_wsize = maxio;
207	if (nmp->nm_wsize > NFS_MAXBSIZE)
208		nmp->nm_wsize = NFS_MAXBSIZE;
209
210	/*
211	 * Calculate the size used for io buffers.  Use the larger
212	 * of the two sizes to minimise nfs requests but make sure
213	 * that it is at least one VM page to avoid wasting buffer
214	 * space.  It must also be at least NFS_DIRBLKSIZ, since
215	 * that is the buffer size used for directories.
216	 */
217	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
218	iosize = imax(iosize, PAGE_SIZE);
219	iosize = imax(iosize, NFS_DIRBLKSIZ);
220	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
221	return (iosize);
222}
223
224static void
225nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
226{
227
228	args->version = NFS_ARGSVERSION;
229	args->addr = oargs->addr;
230	args->addrlen = oargs->addrlen;
231	args->sotype = oargs->sotype;
232	args->proto = oargs->proto;
233	args->fh = oargs->fh;
234	args->fhsize = oargs->fhsize;
235	args->flags = oargs->flags;
236	args->wsize = oargs->wsize;
237	args->rsize = oargs->rsize;
238	args->readdirsize = oargs->readdirsize;
239	args->timeo = oargs->timeo;
240	args->retrans = oargs->retrans;
241	args->readahead = oargs->readahead;
242	args->hostname = oargs->hostname;
243}
244
245static void
246nfs_convert_diskless(void)
247{
248
249	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
250		sizeof(struct ifaliasreq));
251	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
252		sizeof(struct sockaddr_in));
253	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
254	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
255		nfsv3_diskless.root_fhsize = NFSX_MYFH;
256		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
257	} else {
258		nfsv3_diskless.root_fhsize = NFSX_V2FH;
259		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
260	}
261	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
262		sizeof(struct sockaddr_in));
263	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
264	nfsv3_diskless.root_time = nfs_diskless.root_time;
265	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
266		MAXHOSTNAMELEN);
267	nfs_diskless_valid = 3;
268}
269
270/*
271 * nfs statfs call
272 */
273static int
274nfs_statfs(struct mount *mp, struct statfs *sbp)
275{
276	struct vnode *vp;
277	struct thread *td;
278	struct nfsmount *nmp = VFSTONFS(mp);
279	struct nfsvattr nfsva;
280	struct nfsfsinfo fs;
281	struct nfsstatfs sb;
282	int error = 0, attrflag, gotfsinfo = 0, ret;
283	struct nfsnode *np;
284
285	td = curthread;
286
287	error = vfs_busy(mp, MBF_NOWAIT);
288	if (error)
289		return (error);
290	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
291	if (error) {
292		vfs_unbusy(mp);
293		return (error);
294	}
295	vp = NFSTOV(np);
296	mtx_lock(&nmp->nm_mtx);
297	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
298		mtx_unlock(&nmp->nm_mtx);
299		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
300		    &attrflag, NULL);
301		if (!error)
302			gotfsinfo = 1;
303	} else
304		mtx_unlock(&nmp->nm_mtx);
305	if (!error)
306		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
307		    &attrflag, NULL);
308	if (error != 0)
309		NFSCL_DEBUG(2, "statfs=%d\n", error);
310	if (attrflag == 0) {
311		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
312		    td->td_ucred, td, &nfsva, NULL, NULL);
313		if (ret) {
314			/*
315			 * Just set default values to get things going.
316			 */
317			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
318			nfsva.na_vattr.va_type = VDIR;
319			nfsva.na_vattr.va_mode = 0777;
320			nfsva.na_vattr.va_nlink = 100;
321			nfsva.na_vattr.va_uid = (uid_t)0;
322			nfsva.na_vattr.va_gid = (gid_t)0;
323			nfsva.na_vattr.va_fileid = 2;
324			nfsva.na_vattr.va_gen = 1;
325			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
326			nfsva.na_vattr.va_size = 512 * 1024;
327		}
328	}
329	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
330	if (!error) {
331	    mtx_lock(&nmp->nm_mtx);
332	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
333		nfscl_loadfsinfo(nmp, &fs);
334	    nfscl_loadsbinfo(nmp, &sb, sbp);
335	    sbp->f_iosize = newnfs_iosize(nmp);
336	    mtx_unlock(&nmp->nm_mtx);
337	    if (sbp != &mp->mnt_stat) {
338		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
339		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
340	    }
341	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
342	} else if (NFS_ISV4(vp)) {
343		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
344	}
345	vput(vp);
346	vfs_unbusy(mp);
347	return (error);
348}
349
350/*
351 * nfs version 3 fsinfo rpc call
352 */
353int
354ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
355    struct thread *td)
356{
357	struct nfsfsinfo fs;
358	struct nfsvattr nfsva;
359	int error, attrflag;
360
361	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
362	if (!error) {
363		if (attrflag)
364			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
365			    1);
366		mtx_lock(&nmp->nm_mtx);
367		nfscl_loadfsinfo(nmp, &fs);
368		mtx_unlock(&nmp->nm_mtx);
369	}
370	return (error);
371}
372
373/*
374 * Mount a remote root fs via. nfs. This depends on the info in the
375 * nfs_diskless structure that has been filled in properly by some primary
376 * bootstrap.
377 * It goes something like this:
378 * - do enough of "ifconfig" by calling ifioctl() so that the system
379 *   can talk to the server
380 * - If nfs_diskless.mygateway is filled in, use that address as
381 *   a default gateway.
382 * - build the rootfs mount point and call mountnfs() to do the rest.
383 *
384 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
385 * structure, as well as other global NFS client variables here, as
386 * nfs_mountroot() will be called once in the boot before any other NFS
387 * client activity occurs.
388 */
389static int
390nfs_mountroot(struct mount *mp)
391{
392	struct thread *td = curthread;
393	struct nfsv3_diskless *nd = &nfsv3_diskless;
394	struct socket *so;
395	struct vnode *vp;
396	struct ifreq ir;
397	int error;
398	u_long l;
399	char buf[128];
400	char *cp;
401
402#if defined(BOOTP_NFSROOT) && defined(BOOTP)
403	bootpc_init();		/* use bootp to get nfs_diskless filled in */
404#elif defined(NFS_ROOT)
405	nfs_setup_diskless();
406#endif
407
408	if (nfs_diskless_valid == 0)
409		return (-1);
410	if (nfs_diskless_valid == 1)
411		nfs_convert_diskless();
412
413	/*
414	 * XXX splnet, so networks will receive...
415	 */
416	splnet();
417
418	/*
419	 * Do enough of ifconfig(8) so that the critical net interface can
420	 * talk to the server.
421	 */
422	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
423	    td->td_ucred, td);
424	if (error)
425		panic("nfs_mountroot: socreate(%04x): %d",
426			nd->myif.ifra_addr.sa_family, error);
427
428#if 0 /* XXX Bad idea */
429	/*
430	 * We might not have been told the right interface, so we pass
431	 * over the first ten interfaces of the same kind, until we get
432	 * one of them configured.
433	 */
434
435	for (i = strlen(nd->myif.ifra_name) - 1;
436		nd->myif.ifra_name[i] >= '0' &&
437		nd->myif.ifra_name[i] <= '9';
438		nd->myif.ifra_name[i] ++) {
439		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
440		if(!error)
441			break;
442	}
443#endif
444	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
445	if (error)
446		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
447	if ((cp = getenv("boot.netif.mtu")) != NULL) {
448		ir.ifr_mtu = strtol(cp, NULL, 10);
449		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
450		freeenv(cp);
451		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
452		if (error)
453			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
454	}
455	soclose(so);
456
457	/*
458	 * If the gateway field is filled in, set it as the default route.
459	 * Note that pxeboot will set a default route of 0 if the route
460	 * is not set by the DHCP server.  Check also for a value of 0
461	 * to avoid panicking inappropriately in that situation.
462	 */
463	if (nd->mygateway.sin_len != 0 &&
464	    nd->mygateway.sin_addr.s_addr != 0) {
465		struct sockaddr_in mask, sin;
466
467		bzero((caddr_t)&mask, sizeof(mask));
468		sin = mask;
469		sin.sin_family = AF_INET;
470		sin.sin_len = sizeof(sin);
471                /* XXX MRT use table 0 for this sort of thing */
472		CURVNET_SET(TD_TO_VNET(td));
473		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
474		    (struct sockaddr *)&nd->mygateway,
475		    (struct sockaddr *)&mask,
476		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
477		CURVNET_RESTORE();
478		if (error)
479			panic("nfs_mountroot: RTM_ADD: %d", error);
480	}
481
482	/*
483	 * Create the rootfs mount point.
484	 */
485	nd->root_args.fh = nd->root_fh;
486	nd->root_args.fhsize = nd->root_fhsize;
487	l = ntohl(nd->root_saddr.sin_addr.s_addr);
488	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
489		(l >> 24) & 0xff, (l >> 16) & 0xff,
490		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
491	printf("NFS ROOT: %s\n", buf);
492	nd->root_args.hostname = buf;
493	if ((error = nfs_mountdiskless(buf,
494	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
495		return (error);
496	}
497
498	/*
499	 * This is not really an nfs issue, but it is much easier to
500	 * set hostname here and then let the "/etc/rc.xxx" files
501	 * mount the right /var based upon its preset value.
502	 */
503	mtx_lock(&prison0.pr_mtx);
504	strlcpy(prison0.pr_hostname, nd->my_hostnam,
505	    sizeof(prison0.pr_hostname));
506	mtx_unlock(&prison0.pr_mtx);
507	inittodr(ntohl(nd->root_time));
508	return (0);
509}
510
511/*
512 * Internal version of mount system call for diskless setup.
513 */
514static int
515nfs_mountdiskless(char *path,
516    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
517    struct vnode **vpp, struct mount *mp)
518{
519	struct sockaddr *nam;
520	int dirlen, error;
521	char *dirpath;
522
523	/*
524	 * Find the directory path in "path", which also has the server's
525	 * name/ip address in it.
526	 */
527	dirpath = strchr(path, ':');
528	if (dirpath != NULL)
529		dirlen = strlen(++dirpath);
530	else
531		dirlen = 0;
532	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
533	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
534	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
535	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
536		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
537		return (error);
538	}
539	return (0);
540}
541
542static void
543nfs_sec_name(char *sec, int *flagsp)
544{
545	if (!strcmp(sec, "krb5"))
546		*flagsp |= NFSMNT_KERB;
547	else if (!strcmp(sec, "krb5i"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
549	else if (!strcmp(sec, "krb5p"))
550		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
551}
552
553static void
554nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
555    const char *hostname, struct ucred *cred, struct thread *td)
556{
557	int s;
558	int adjsock;
559	char *p;
560
561	s = splnet();
562
563	/*
564	 * Set read-only flag if requested; otherwise, clear it if this is
565	 * an update.  If this is not an update, then either the read-only
566	 * flag is already clear, or this is a root mount and it was set
567	 * intentionally at some previous point.
568	 */
569	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
570		MNT_ILOCK(mp);
571		mp->mnt_flag |= MNT_RDONLY;
572		MNT_IUNLOCK(mp);
573	} else if (mp->mnt_flag & MNT_UPDATE) {
574		MNT_ILOCK(mp);
575		mp->mnt_flag &= ~MNT_RDONLY;
576		MNT_IUNLOCK(mp);
577	}
578
579	/*
580	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
581	 * no sense in that context.  Also, set up appropriate retransmit
582	 * and soft timeout behavior.
583	 */
584	if (argp->sotype == SOCK_STREAM) {
585		nmp->nm_flag &= ~NFSMNT_NOCONN;
586		nmp->nm_timeo = NFS_MAXTIMEO;
587		if ((argp->flags & NFSMNT_NFSV4) != 0)
588			nmp->nm_retry = INT_MAX;
589		else
590			nmp->nm_retry = NFS_RETRANS_TCP;
591	}
592
593	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
594	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
595		argp->flags &= ~NFSMNT_RDIRPLUS;
596		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597	}
598
599	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
600	if (nmp->nm_minorvers == 0) {
601		argp->flags &= ~NFSMNT_ONEOPENOWN;
602		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
603	}
604
605	/* Re-bind if rsrvd port requested and wasn't on one */
606	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
607		  && (argp->flags & NFSMNT_RESVPORT);
608	/* Also re-bind if we're switching to/from a connected UDP socket */
609	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
610		    (argp->flags & NFSMNT_NOCONN));
611
612	/* Update flags atomically.  Don't change the lock bits. */
613	nmp->nm_flag = argp->flags | nmp->nm_flag;
614	splx(s);
615
616	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
617		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
618		if (nmp->nm_timeo < NFS_MINTIMEO)
619			nmp->nm_timeo = NFS_MINTIMEO;
620		else if (nmp->nm_timeo > NFS_MAXTIMEO)
621			nmp->nm_timeo = NFS_MAXTIMEO;
622	}
623
624	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
625		nmp->nm_retry = argp->retrans;
626		if (nmp->nm_retry > NFS_MAXREXMIT)
627			nmp->nm_retry = NFS_MAXREXMIT;
628	}
629
630	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
631		nmp->nm_wsize = argp->wsize;
632		/*
633		 * Clip at the power of 2 below the size. There is an
634		 * issue (not isolated) that causes intermittent page
635		 * faults if this is not done.
636		 */
637		if (nmp->nm_wsize > NFS_FABLKSIZE)
638			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
639		else
640			nmp->nm_wsize = NFS_FABLKSIZE;
641	}
642
643	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
644		nmp->nm_rsize = argp->rsize;
645		/*
646		 * Clip at the power of 2 below the size. There is an
647		 * issue (not isolated) that causes intermittent page
648		 * faults if this is not done.
649		 */
650		if (nmp->nm_rsize > NFS_FABLKSIZE)
651			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
652		else
653			nmp->nm_rsize = NFS_FABLKSIZE;
654	}
655
656	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
657		nmp->nm_readdirsize = argp->readdirsize;
658	}
659
660	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
661		nmp->nm_acregmin = argp->acregmin;
662	else
663		nmp->nm_acregmin = NFS_MINATTRTIMO;
664	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
665		nmp->nm_acregmax = argp->acregmax;
666	else
667		nmp->nm_acregmax = NFS_MAXATTRTIMO;
668	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
669		nmp->nm_acdirmin = argp->acdirmin;
670	else
671		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
672	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
673		nmp->nm_acdirmax = argp->acdirmax;
674	else
675		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
676	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
677		nmp->nm_acdirmin = nmp->nm_acdirmax;
678	if (nmp->nm_acregmin > nmp->nm_acregmax)
679		nmp->nm_acregmin = nmp->nm_acregmax;
680
681	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
682		if (argp->readahead <= NFS_MAXRAHEAD)
683			nmp->nm_readahead = argp->readahead;
684		else
685			nmp->nm_readahead = NFS_MAXRAHEAD;
686	}
687	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
688		if (argp->wcommitsize < nmp->nm_wsize)
689			nmp->nm_wcommitsize = nmp->nm_wsize;
690		else
691			nmp->nm_wcommitsize = argp->wcommitsize;
692	}
693
694	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
695		    (nmp->nm_soproto != argp->proto));
696
697	if (nmp->nm_client != NULL && adjsock) {
698		int haslock = 0, error = 0;
699
700		if (nmp->nm_sotype == SOCK_STREAM) {
701			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
702			if (!error)
703				haslock = 1;
704		}
705		if (!error) {
706		    newnfs_disconnect(&nmp->nm_sockreq);
707		    if (haslock)
708			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
709		    nmp->nm_sotype = argp->sotype;
710		    nmp->nm_soproto = argp->proto;
711		    if (nmp->nm_sotype == SOCK_DGRAM)
712			while (newnfs_connect(nmp, &nmp->nm_sockreq,
713			    cred, td, 0)) {
714				printf("newnfs_args: retrying connect\n");
715				(void) nfs_catnap(PSOCK, 0, "newnfscon");
716			}
717		}
718	} else {
719		nmp->nm_sotype = argp->sotype;
720		nmp->nm_soproto = argp->proto;
721	}
722
723	if (hostname != NULL) {
724		strlcpy(nmp->nm_hostname, hostname,
725		    sizeof(nmp->nm_hostname));
726		p = strchr(nmp->nm_hostname, ':');
727		if (p != NULL)
728			*p = '\0';
729	}
730}
731
732static const char *nfs_opts[] = { "from", "nfs_args",
733    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
734    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
735    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
736    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
737    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
738    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
739    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
740    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
741    "pnfs", "wcommitsize", "oneopenown",
742    NULL };
743
744/*
745 * VFS Operations.
746 *
747 * mount system call
748 * It seems a bit dumb to copyinstr() the host and path here and then
749 * bcopy() them in mountnfs(), but I wanted to detect errors before
750 * doing the sockargs() call because sockargs() allocates an mbuf and
751 * an error after that means that I have to release the mbuf.
752 */
753/* ARGSUSED */
754static int
755nfs_mount(struct mount *mp)
756{
757	struct nfs_args args = {
758	    .version = NFS_ARGSVERSION,
759	    .addr = NULL,
760	    .addrlen = sizeof (struct sockaddr_in),
761	    .sotype = SOCK_STREAM,
762	    .proto = 0,
763	    .fh = NULL,
764	    .fhsize = 0,
765	    .flags = NFSMNT_RESVPORT,
766	    .wsize = NFS_WSIZE,
767	    .rsize = NFS_RSIZE,
768	    .readdirsize = NFS_READDIRSIZE,
769	    .timeo = 10,
770	    .retrans = NFS_RETRANS,
771	    .readahead = NFS_DEFRAHEAD,
772	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
773	    .hostname = NULL,
774	    .acregmin = NFS_MINATTRTIMO,
775	    .acregmax = NFS_MAXATTRTIMO,
776	    .acdirmin = NFS_MINDIRATTRTIMO,
777	    .acdirmax = NFS_MAXDIRATTRTIMO,
778	};
779	int error = 0, ret, len;
780	struct sockaddr *nam = NULL;
781	struct vnode *vp;
782	struct thread *td;
783	char hst[MNAMELEN];
784	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
785	char *cp, *opt, *name, *secname;
786	int nametimeo = NFS_DEFAULT_NAMETIMEO;
787	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
788	int minvers = 0;
789	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
790	size_t hstlen;
791
792	has_nfs_args_opt = 0;
793	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
794		error = EINVAL;
795		goto out;
796	}
797
798	td = curthread;
799	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
800		error = nfs_mountroot(mp);
801		goto out;
802	}
803
804	nfscl_init();
805
806	/*
807	 * The old mount_nfs program passed the struct nfs_args
808	 * from userspace to kernel.  The new mount_nfs program
809	 * passes string options via nmount() from userspace to kernel
810	 * and we populate the struct nfs_args in the kernel.
811	 */
812	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
813		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
814		    sizeof(args));
815		if (error != 0)
816			goto out;
817
818		if (args.version != NFS_ARGSVERSION) {
819			error = EPROGMISMATCH;
820			goto out;
821		}
822		has_nfs_args_opt = 1;
823	}
824
825	/* Handle the new style options. */
826	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
827		args.acdirmin = args.acdirmax =
828		    args.acregmin = args.acregmax = 0;
829		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
830		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
831	}
832	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
833		args.flags |= NFSMNT_NOCONN;
834	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
835		args.flags &= ~NFSMNT_NOCONN;
836	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
837		args.flags |= NFSMNT_NOLOCKD;
838	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
839		args.flags &= ~NFSMNT_NOLOCKD;
840	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
841		args.flags |= NFSMNT_INT;
842	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
843		args.flags |= NFSMNT_RDIRPLUS;
844	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
845		args.flags |= NFSMNT_RESVPORT;
846	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
847		args.flags &= ~NFSMNT_RESVPORT;
848	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
849		args.flags |= NFSMNT_SOFT;
850	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
851		args.flags &= ~NFSMNT_SOFT;
852	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
853		args.sotype = SOCK_DGRAM;
854	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
855		args.sotype = SOCK_DGRAM;
856	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
857		args.sotype = SOCK_STREAM;
858	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
859		args.flags |= NFSMNT_NFSV3;
860	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
861		args.flags |= NFSMNT_NFSV4;
862		args.sotype = SOCK_STREAM;
863	}
864	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
865		args.flags |= NFSMNT_ALLGSSNAME;
866	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
867		args.flags |= NFSMNT_NOCTO;
868	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
869		args.flags |= NFSMNT_NONCONTIGWR;
870	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
871		args.flags |= NFSMNT_PNFS;
872	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
873		args.flags |= NFSMNT_ONEOPENOWN;
874	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
875		if (opt == NULL) {
876			vfs_mount_error(mp, "illegal readdirsize");
877			error = EINVAL;
878			goto out;
879		}
880		ret = sscanf(opt, "%d", &args.readdirsize);
881		if (ret != 1 || args.readdirsize <= 0) {
882			vfs_mount_error(mp, "illegal readdirsize: %s",
883			    opt);
884			error = EINVAL;
885			goto out;
886		}
887		args.flags |= NFSMNT_READDIRSIZE;
888	}
889	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
890		if (opt == NULL) {
891			vfs_mount_error(mp, "illegal readahead");
892			error = EINVAL;
893			goto out;
894		}
895		ret = sscanf(opt, "%d", &args.readahead);
896		if (ret != 1 || args.readahead <= 0) {
897			vfs_mount_error(mp, "illegal readahead: %s",
898			    opt);
899			error = EINVAL;
900			goto out;
901		}
902		args.flags |= NFSMNT_READAHEAD;
903	}
904	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
905		if (opt == NULL) {
906			vfs_mount_error(mp, "illegal wsize");
907			error = EINVAL;
908			goto out;
909		}
910		ret = sscanf(opt, "%d", &args.wsize);
911		if (ret != 1 || args.wsize <= 0) {
912			vfs_mount_error(mp, "illegal wsize: %s",
913			    opt);
914			error = EINVAL;
915			goto out;
916		}
917		args.flags |= NFSMNT_WSIZE;
918	}
919	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
920		if (opt == NULL) {
921			vfs_mount_error(mp, "illegal rsize");
922			error = EINVAL;
923			goto out;
924		}
925		ret = sscanf(opt, "%d", &args.rsize);
926		if (ret != 1 || args.rsize <= 0) {
927			vfs_mount_error(mp, "illegal wsize: %s",
928			    opt);
929			error = EINVAL;
930			goto out;
931		}
932		args.flags |= NFSMNT_RSIZE;
933	}
934	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
935		if (opt == NULL) {
936			vfs_mount_error(mp, "illegal retrans");
937			error = EINVAL;
938			goto out;
939		}
940		ret = sscanf(opt, "%d", &args.retrans);
941		if (ret != 1 || args.retrans <= 0) {
942			vfs_mount_error(mp, "illegal retrans: %s",
943			    opt);
944			error = EINVAL;
945			goto out;
946		}
947		args.flags |= NFSMNT_RETRANS;
948	}
949	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
950		ret = sscanf(opt, "%d", &args.acregmin);
951		if (ret != 1 || args.acregmin < 0) {
952			vfs_mount_error(mp, "illegal actimeo: %s",
953			    opt);
954			error = EINVAL;
955			goto out;
956		}
957		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
958		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
959		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
960	}
961	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
962		ret = sscanf(opt, "%d", &args.acregmin);
963		if (ret != 1 || args.acregmin < 0) {
964			vfs_mount_error(mp, "illegal acregmin: %s",
965			    opt);
966			error = EINVAL;
967			goto out;
968		}
969		args.flags |= NFSMNT_ACREGMIN;
970	}
971	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
972		ret = sscanf(opt, "%d", &args.acregmax);
973		if (ret != 1 || args.acregmax < 0) {
974			vfs_mount_error(mp, "illegal acregmax: %s",
975			    opt);
976			error = EINVAL;
977			goto out;
978		}
979		args.flags |= NFSMNT_ACREGMAX;
980	}
981	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
982		ret = sscanf(opt, "%d", &args.acdirmin);
983		if (ret != 1 || args.acdirmin < 0) {
984			vfs_mount_error(mp, "illegal acdirmin: %s",
985			    opt);
986			error = EINVAL;
987			goto out;
988		}
989		args.flags |= NFSMNT_ACDIRMIN;
990	}
991	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
992		ret = sscanf(opt, "%d", &args.acdirmax);
993		if (ret != 1 || args.acdirmax < 0) {
994			vfs_mount_error(mp, "illegal acdirmax: %s",
995			    opt);
996			error = EINVAL;
997			goto out;
998		}
999		args.flags |= NFSMNT_ACDIRMAX;
1000	}
1001	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1002		ret = sscanf(opt, "%d", &args.wcommitsize);
1003		if (ret != 1 || args.wcommitsize < 0) {
1004			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1005			error = EINVAL;
1006			goto out;
1007		}
1008		args.flags |= NFSMNT_WCOMMITSIZE;
1009	}
1010	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1011		ret = sscanf(opt, "%d", &args.timeo);
1012		if (ret != 1 || args.timeo <= 0) {
1013			vfs_mount_error(mp, "illegal timeo: %s",
1014			    opt);
1015			error = EINVAL;
1016			goto out;
1017		}
1018		args.flags |= NFSMNT_TIMEO;
1019	}
1020	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1021		ret = sscanf(opt, "%d", &args.timeo);
1022		if (ret != 1 || args.timeo <= 0) {
1023			vfs_mount_error(mp, "illegal timeout: %s",
1024			    opt);
1025			error = EINVAL;
1026			goto out;
1027		}
1028		args.flags |= NFSMNT_TIMEO;
1029	}
1030	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1031		ret = sscanf(opt, "%d", &nametimeo);
1032		if (ret != 1 || nametimeo < 0) {
1033			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1034			error = EINVAL;
1035			goto out;
1036		}
1037	}
1038	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1039	    == 0) {
1040		ret = sscanf(opt, "%d", &negnametimeo);
1041		if (ret != 1 || negnametimeo < 0) {
1042			vfs_mount_error(mp, "illegal negnametimeo: %s",
1043			    opt);
1044			error = EINVAL;
1045			goto out;
1046		}
1047	}
1048	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1049	    0) {
1050		ret = sscanf(opt, "%d", &minvers);
1051		if (ret != 1 || minvers < 0 || minvers > 1 ||
1052		    (args.flags & NFSMNT_NFSV4) == 0) {
1053			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1054			error = EINVAL;
1055			goto out;
1056		}
1057	}
1058	if (vfs_getopt(mp->mnt_optnew, "sec",
1059		(void **) &secname, NULL) == 0)
1060		nfs_sec_name(secname, &args.flags);
1061
1062	if (mp->mnt_flag & MNT_UPDATE) {
1063		struct nfsmount *nmp = VFSTONFS(mp);
1064
1065		if (nmp == NULL) {
1066			error = EIO;
1067			goto out;
1068		}
1069
1070		/*
1071		 * If a change from TCP->UDP is done and there are thread(s)
1072		 * that have I/O RPC(s) in progress with a tranfer size
1073		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1074		 * hung, retrying the RPC(s) forever. Usually these threads
1075		 * will be seen doing an uninterruptible sleep on wait channel
1076		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1077		 */
1078		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1079			tprintf(td->td_proc, LOG_WARNING,
1080	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1081
1082		/*
1083		 * When doing an update, we can't change version,
1084		 * security, switch lockd strategies, change cookie
1085		 * translation or switch oneopenown.
1086		 */
1087		args.flags = (args.flags &
1088		    ~(NFSMNT_NFSV3 |
1089		      NFSMNT_NFSV4 |
1090		      NFSMNT_KERB |
1091		      NFSMNT_INTEGRITY |
1092		      NFSMNT_PRIVACY |
1093		      NFSMNT_ONEOPENOWN |
1094		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1095		    (nmp->nm_flag &
1096			(NFSMNT_NFSV3 |
1097			 NFSMNT_NFSV4 |
1098			 NFSMNT_KERB |
1099			 NFSMNT_INTEGRITY |
1100			 NFSMNT_PRIVACY |
1101			 NFSMNT_ONEOPENOWN |
1102			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1103		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1104		goto out;
1105	}
1106
1107	/*
1108	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1109	 * or no-connection mode for those protocols that support
1110	 * no-connection mode (the flag will be cleared later for protocols
1111	 * that do not support no-connection mode).  This will allow a client
1112	 * to receive replies from a different IP then the request was
1113	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1114	 * not 0.
1115	 */
1116	if (nfs_ip_paranoia == 0)
1117		args.flags |= NFSMNT_NOCONN;
1118
1119	if (has_nfs_args_opt != 0) {
1120		/*
1121		 * In the 'nfs_args' case, the pointers in the args
1122		 * structure are in userland - we copy them in here.
1123		 */
1124		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1125			vfs_mount_error(mp, "Bad file handle");
1126			error = EINVAL;
1127			goto out;
1128		}
1129		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1130		    args.fhsize);
1131		if (error != 0)
1132			goto out;
1133		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1134		if (error != 0)
1135			goto out;
1136		bzero(&hst[hstlen], MNAMELEN - hstlen);
1137		args.hostname = hst;
1138		/* sockargs() call must be after above copyin() calls */
1139		error = getsockaddr(&nam, (caddr_t)args.addr,
1140		    args.addrlen);
1141		if (error != 0)
1142			goto out;
1143	} else {
1144		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1145		    &args.fhsize) == 0) {
1146			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1147				vfs_mount_error(mp, "Bad file handle");
1148				error = EINVAL;
1149				goto out;
1150			}
1151			bcopy(args.fh, nfh, args.fhsize);
1152		} else {
1153			args.fhsize = 0;
1154		}
1155		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1156		    (void **)&args.hostname, &len);
1157		if (args.hostname == NULL) {
1158			vfs_mount_error(mp, "Invalid hostname");
1159			error = EINVAL;
1160			goto out;
1161		}
1162		if (len >= MNAMELEN) {
1163			vfs_mount_error(mp, "Hostname too long");
1164			error = EINVAL;
1165			goto out;
1166		}
1167		bcopy(args.hostname, hst, len);
1168		hst[len] = '\0';
1169	}
1170
1171	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1172		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1173	else {
1174		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1175		cp = strchr(srvkrbname, ':');
1176		if (cp != NULL)
1177			*cp = '\0';
1178	}
1179	srvkrbnamelen = strlen(srvkrbname);
1180
1181	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1182		strlcpy(krbname, name, sizeof (krbname));
1183	else
1184		krbname[0] = '\0';
1185	krbnamelen = strlen(krbname);
1186
1187	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1188		strlcpy(dirpath, name, sizeof (dirpath));
1189	else
1190		dirpath[0] = '\0';
1191	dirlen = strlen(dirpath);
1192
1193	if (has_nfs_args_opt == 0) {
1194		if (vfs_getopt(mp->mnt_optnew, "addr",
1195		    (void **)&args.addr, &args.addrlen) == 0) {
1196			if (args.addrlen > SOCK_MAXADDRLEN) {
1197				error = ENAMETOOLONG;
1198				goto out;
1199			}
1200			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1201			bcopy(args.addr, nam, args.addrlen);
1202			nam->sa_len = args.addrlen;
1203		} else {
1204			vfs_mount_error(mp, "No server address");
1205			error = EINVAL;
1206			goto out;
1207		}
1208	}
1209
1210	args.fh = nfh;
1211	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1212	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1213	    nametimeo, negnametimeo, minvers);
1214out:
1215	if (!error) {
1216		MNT_ILOCK(mp);
1217		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1218		    MNTK_USES_BCACHE;
1219		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1220			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1221		MNT_IUNLOCK(mp);
1222	}
1223	return (error);
1224}
1225
1226
1227/*
1228 * VFS Operations.
1229 *
1230 * mount system call
1231 * It seems a bit dumb to copyinstr() the host and path here and then
1232 * bcopy() them in mountnfs(), but I wanted to detect errors before
1233 * doing the sockargs() call because sockargs() allocates an mbuf and
1234 * an error after that means that I have to release the mbuf.
1235 */
1236/* ARGSUSED */
1237static int
1238nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1239{
1240	int error;
1241	struct nfs_args args;
1242
1243	error = copyin(data, &args, sizeof (struct nfs_args));
1244	if (error)
1245		return error;
1246
1247	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1248
1249	error = kernel_mount(ma, flags);
1250	return (error);
1251}
1252
1253/*
1254 * Common code for mount and mountroot
1255 */
1256static int
1257mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1258    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1259    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1260    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1261    int minvers)
1262{
1263	struct nfsmount *nmp;
1264	struct nfsnode *np;
1265	int error, trycnt, ret;
1266	struct nfsvattr nfsva;
1267	struct nfsclclient *clp;
1268	struct nfsclds *dsp, *tdsp;
1269	uint32_t lease;
1270	static u_int64_t clval = 0;
1271
1272	NFSCL_DEBUG(3, "in mnt\n");
1273	clp = NULL;
1274	if (mp->mnt_flag & MNT_UPDATE) {
1275		nmp = VFSTONFS(mp);
1276		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1277		FREE(nam, M_SONAME);
1278		return (0);
1279	} else {
1280		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1281		    krbnamelen + dirlen + srvkrbnamelen + 2,
1282		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1283		TAILQ_INIT(&nmp->nm_bufq);
1284		TAILQ_INIT(&nmp->nm_sess);
1285		if (clval == 0)
1286			clval = (u_int64_t)nfsboottime.tv_sec;
1287		nmp->nm_clval = clval++;
1288		nmp->nm_krbnamelen = krbnamelen;
1289		nmp->nm_dirpathlen = dirlen;
1290		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1291		if (td->td_ucred->cr_uid != (uid_t)0) {
1292			/*
1293			 * nm_uid is used to get KerberosV credentials for
1294			 * the nfsv4 state handling operations if there is
1295			 * no host based principal set. Use the uid of
1296			 * this user if not root, since they are doing the
1297			 * mount. I don't think setting this for root will
1298			 * work, since root normally does not have user
1299			 * credentials in a credentials cache.
1300			 */
1301			nmp->nm_uid = td->td_ucred->cr_uid;
1302		} else {
1303			/*
1304			 * Just set to -1, so it won't be used.
1305			 */
1306			nmp->nm_uid = (uid_t)-1;
1307		}
1308
1309		/* Copy and null terminate all the names */
1310		if (nmp->nm_krbnamelen > 0) {
1311			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1312			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1313		}
1314		if (nmp->nm_dirpathlen > 0) {
1315			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1316			    nmp->nm_dirpathlen);
1317			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1318			    + 1] = '\0';
1319		}
1320		if (nmp->nm_srvkrbnamelen > 0) {
1321			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1322			    nmp->nm_srvkrbnamelen);
1323			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1324			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1325		}
1326		nmp->nm_sockreq.nr_cred = crhold(cred);
1327		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1328		mp->mnt_data = nmp;
1329		nmp->nm_getinfo = nfs_getnlminfo;
1330		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1331	}
1332	vfs_getnewfsid(mp);
1333	nmp->nm_mountp = mp;
1334	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1335
1336	/*
1337	 * Since nfs_decode_args() might optionally set them, these
1338	 * need to be set to defaults before the call, so that the
1339	 * optional settings aren't overwritten.
1340	 */
1341	nmp->nm_nametimeo = nametimeo;
1342	nmp->nm_negnametimeo = negnametimeo;
1343	nmp->nm_timeo = NFS_TIMEO;
1344	nmp->nm_retry = NFS_RETRANS;
1345	nmp->nm_readahead = NFS_DEFRAHEAD;
1346
1347	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1348	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1349	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1350		nmp->nm_wcommitsize *= 2;
1351	nmp->nm_wcommitsize *= 256;
1352
1353	if ((argp->flags & NFSMNT_NFSV4) != 0)
1354		nmp->nm_minorvers = minvers;
1355	else
1356		nmp->nm_minorvers = 0;
1357
1358	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1359
1360	/*
1361	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1362	 * high, depending on whether we end up with negative offsets in
1363	 * the client or server somewhere.  2GB-1 may be safer.
1364	 *
1365	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1366	 * that we can handle until we find out otherwise.
1367	 */
1368	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1369		nmp->nm_maxfilesize = 0xffffffffLL;
1370	else
1371		nmp->nm_maxfilesize = OFF_MAX;
1372
1373	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1374		nmp->nm_wsize = NFS_WSIZE;
1375		nmp->nm_rsize = NFS_RSIZE;
1376		nmp->nm_readdirsize = NFS_READDIRSIZE;
1377	}
1378	nmp->nm_numgrps = NFS_MAXGRPS;
1379	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1380	if (nmp->nm_tprintf_delay < 0)
1381		nmp->nm_tprintf_delay = 0;
1382	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1383	if (nmp->nm_tprintf_initial_delay < 0)
1384		nmp->nm_tprintf_initial_delay = 0;
1385	nmp->nm_fhsize = argp->fhsize;
1386	if (nmp->nm_fhsize > 0)
1387		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1388	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1389	nmp->nm_nam = nam;
1390	/* Set up the sockets and per-host congestion */
1391	nmp->nm_sotype = argp->sotype;
1392	nmp->nm_soproto = argp->proto;
1393	nmp->nm_sockreq.nr_prog = NFS_PROG;
1394	if ((argp->flags & NFSMNT_NFSV4))
1395		nmp->nm_sockreq.nr_vers = NFS_VER4;
1396	else if ((argp->flags & NFSMNT_NFSV3))
1397		nmp->nm_sockreq.nr_vers = NFS_VER3;
1398	else
1399		nmp->nm_sockreq.nr_vers = NFS_VER2;
1400
1401
1402	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1403		goto bad;
1404	/* For NFSv4.1, get the clientid now. */
1405	if (nmp->nm_minorvers > 0) {
1406		NFSCL_DEBUG(3, "at getcl\n");
1407		error = nfscl_getcl(mp, cred, td, 0, &clp);
1408		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1409		if (error != 0)
1410			goto bad;
1411	}
1412
1413	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1414	    nmp->nm_dirpathlen > 0) {
1415		NFSCL_DEBUG(3, "in dirp\n");
1416		/*
1417		 * If the fhsize on the mount point == 0 for V4, the mount
1418		 * path needs to be looked up.
1419		 */
1420		trycnt = 3;
1421		do {
1422			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1423			    cred, td);
1424			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1425			if (error)
1426				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1427		} while (error && --trycnt > 0);
1428		if (error) {
1429			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1430			goto bad;
1431		}
1432	}
1433
1434	/*
1435	 * A reference count is needed on the nfsnode representing the
1436	 * remote root.  If this object is not persistent, then backward
1437	 * traversals of the mount point (i.e. "..") will not work if
1438	 * the nfsnode gets flushed out of the cache. Ufs does not have
1439	 * this problem, because one can identify root inodes by their
1440	 * number == ROOTINO (2).
1441	 */
1442	if (nmp->nm_fhsize > 0) {
1443		/*
1444		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1445		 * non-zero for the root vnode. f_iosize will be set correctly
1446		 * by nfs_statfs() before any I/O occurs.
1447		 */
1448		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1449		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1450		    LK_EXCLUSIVE);
1451		if (error)
1452			goto bad;
1453		*vpp = NFSTOV(np);
1454
1455		/*
1456		 * Get file attributes and transfer parameters for the
1457		 * mountpoint.  This has the side effect of filling in
1458		 * (*vpp)->v_type with the correct value.
1459		 */
1460		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1461		    cred, td, &nfsva, NULL, &lease);
1462		if (ret) {
1463			/*
1464			 * Just set default values to get things going.
1465			 */
1466			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1467			nfsva.na_vattr.va_type = VDIR;
1468			nfsva.na_vattr.va_mode = 0777;
1469			nfsva.na_vattr.va_nlink = 100;
1470			nfsva.na_vattr.va_uid = (uid_t)0;
1471			nfsva.na_vattr.va_gid = (gid_t)0;
1472			nfsva.na_vattr.va_fileid = 2;
1473			nfsva.na_vattr.va_gen = 1;
1474			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1475			nfsva.na_vattr.va_size = 512 * 1024;
1476			lease = 60;
1477		}
1478		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1479		if (nmp->nm_minorvers > 0) {
1480			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1481			NFSLOCKCLSTATE();
1482			clp->nfsc_renew = NFSCL_RENEW(lease);
1483			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1484			clp->nfsc_clientidrev++;
1485			if (clp->nfsc_clientidrev == 0)
1486				clp->nfsc_clientidrev++;
1487			NFSUNLOCKCLSTATE();
1488			/*
1489			 * Mount will succeed, so the renew thread can be
1490			 * started now.
1491			 */
1492			nfscl_start_renewthread(clp);
1493			nfscl_clientrelease(clp);
1494		}
1495		if (argp->flags & NFSMNT_NFSV3)
1496			ncl_fsinfo(nmp, *vpp, cred, td);
1497
1498		/* Mark if the mount point supports NFSv4 ACLs. */
1499		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1500		    ret == 0 &&
1501		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1502			MNT_ILOCK(mp);
1503			mp->mnt_flag |= MNT_NFS4ACLS;
1504			MNT_IUNLOCK(mp);
1505		}
1506
1507		/*
1508		 * Lose the lock but keep the ref.
1509		 */
1510		NFSVOPUNLOCK(*vpp, 0);
1511		return (0);
1512	}
1513	error = EIO;
1514
1515bad:
1516	if (clp != NULL)
1517		nfscl_clientrelease(clp);
1518	newnfs_disconnect(&nmp->nm_sockreq);
1519	crfree(nmp->nm_sockreq.nr_cred);
1520	if (nmp->nm_sockreq.nr_auth != NULL)
1521		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1522	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1523	mtx_destroy(&nmp->nm_mtx);
1524	if (nmp->nm_clp != NULL) {
1525		NFSLOCKCLSTATE();
1526		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1527		NFSUNLOCKCLSTATE();
1528		free(nmp->nm_clp, M_NFSCLCLIENT);
1529	}
1530	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1531		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1532		    dsp->nfsclds_sockp != NULL)
1533			newnfs_disconnect(dsp->nfsclds_sockp);
1534		nfscl_freenfsclds(dsp);
1535	}
1536	FREE(nmp, M_NEWNFSMNT);
1537	FREE(nam, M_SONAME);
1538	return (error);
1539}
1540
1541/*
1542 * unmount system call
1543 */
1544static int
1545nfs_unmount(struct mount *mp, int mntflags)
1546{
1547	struct thread *td;
1548	struct nfsmount *nmp;
1549	int error, flags = 0, i, trycnt = 0;
1550	struct nfsclds *dsp, *tdsp;
1551
1552	td = curthread;
1553
1554	if (mntflags & MNT_FORCE)
1555		flags |= FORCECLOSE;
1556	nmp = VFSTONFS(mp);
1557	/*
1558	 * Goes something like this..
1559	 * - Call vflush() to clear out vnodes for this filesystem
1560	 * - Close the socket
1561	 * - Free up the data structures
1562	 */
1563	/* In the forced case, cancel any outstanding requests. */
1564	if (mntflags & MNT_FORCE) {
1565		error = newnfs_nmcancelreqs(nmp);
1566		if (error)
1567			goto out;
1568		/* For a forced close, get rid of the renew thread now */
1569		nfscl_umount(nmp, td);
1570	}
1571	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1572	do {
1573		error = vflush(mp, 1, flags, td);
1574		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1575			(void) nfs_catnap(PSOCK, error, "newndm");
1576	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1577	if (error)
1578		goto out;
1579
1580	/*
1581	 * We are now committed to the unmount.
1582	 */
1583	if ((mntflags & MNT_FORCE) == 0)
1584		nfscl_umount(nmp, td);
1585	/* Make sure no nfsiods are assigned to this mount. */
1586	mtx_lock(&ncl_iod_mutex);
1587	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1588		if (ncl_iodmount[i] == nmp) {
1589			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1590			ncl_iodmount[i] = NULL;
1591		}
1592	mtx_unlock(&ncl_iod_mutex);
1593	newnfs_disconnect(&nmp->nm_sockreq);
1594	crfree(nmp->nm_sockreq.nr_cred);
1595	FREE(nmp->nm_nam, M_SONAME);
1596	if (nmp->nm_sockreq.nr_auth != NULL)
1597		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1598	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1599	mtx_destroy(&nmp->nm_mtx);
1600	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1601		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1602		    dsp->nfsclds_sockp != NULL)
1603			newnfs_disconnect(dsp->nfsclds_sockp);
1604		nfscl_freenfsclds(dsp);
1605	}
1606	FREE(nmp, M_NEWNFSMNT);
1607out:
1608	return (error);
1609}
1610
1611/*
1612 * Return root of a filesystem
1613 */
1614static int
1615nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1616{
1617	struct vnode *vp;
1618	struct nfsmount *nmp;
1619	struct nfsnode *np;
1620	int error;
1621
1622	nmp = VFSTONFS(mp);
1623	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1624	if (error)
1625		return error;
1626	vp = NFSTOV(np);
1627	/*
1628	 * Get transfer parameters and attributes for root vnode once.
1629	 */
1630	mtx_lock(&nmp->nm_mtx);
1631	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1632		mtx_unlock(&nmp->nm_mtx);
1633		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1634	} else
1635		mtx_unlock(&nmp->nm_mtx);
1636	if (vp->v_type == VNON)
1637	    vp->v_type = VDIR;
1638	vp->v_vflag |= VV_ROOT;
1639	*vpp = vp;
1640	return (0);
1641}
1642
1643/*
1644 * Flush out the buffer cache
1645 */
1646/* ARGSUSED */
1647static int
1648nfs_sync(struct mount *mp, int waitfor)
1649{
1650	struct vnode *vp, *mvp;
1651	struct thread *td;
1652	int error, allerror = 0;
1653
1654	td = curthread;
1655
1656	MNT_ILOCK(mp);
1657	/*
1658	 * If a forced dismount is in progress, return from here so that
1659	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1660	 * calling VFS_UNMOUNT().
1661	 */
1662	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1663		MNT_IUNLOCK(mp);
1664		return (EBADF);
1665	}
1666	MNT_IUNLOCK(mp);
1667
1668	/*
1669	 * Force stale buffer cache information to be flushed.
1670	 */
1671loop:
1672	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1673		/* XXX Racy bv_cnt check. */
1674		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1675		    waitfor == MNT_LAZY) {
1676			VI_UNLOCK(vp);
1677			continue;
1678		}
1679		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1680			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1681			goto loop;
1682		}
1683		error = VOP_FSYNC(vp, waitfor, td);
1684		if (error)
1685			allerror = error;
1686		NFSVOPUNLOCK(vp, 0);
1687		vrele(vp);
1688	}
1689	return (allerror);
1690}
1691
1692static int
1693nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1694{
1695	struct nfsmount *nmp = VFSTONFS(mp);
1696	struct vfsquery vq;
1697	int error;
1698
1699	bzero(&vq, sizeof(vq));
1700	switch (op) {
1701#if 0
1702	case VFS_CTL_NOLOCKS:
1703		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1704 		if (req->oldptr != NULL) {
1705 			error = SYSCTL_OUT(req, &val, sizeof(val));
1706 			if (error)
1707 				return (error);
1708 		}
1709 		if (req->newptr != NULL) {
1710 			error = SYSCTL_IN(req, &val, sizeof(val));
1711 			if (error)
1712 				return (error);
1713			if (val)
1714				nmp->nm_flag |= NFSMNT_NOLOCKS;
1715			else
1716				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1717 		}
1718		break;
1719#endif
1720	case VFS_CTL_QUERY:
1721		mtx_lock(&nmp->nm_mtx);
1722		if (nmp->nm_state & NFSSTA_TIMEO)
1723			vq.vq_flags |= VQ_NOTRESP;
1724		mtx_unlock(&nmp->nm_mtx);
1725#if 0
1726		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1727		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1728			vq.vq_flags |= VQ_NOTRESPLOCK;
1729#endif
1730		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1731		break;
1732 	case VFS_CTL_TIMEO:
1733 		if (req->oldptr != NULL) {
1734 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1735 			    sizeof(nmp->nm_tprintf_initial_delay));
1736 			if (error)
1737 				return (error);
1738 		}
1739 		if (req->newptr != NULL) {
1740			error = vfs_suser(mp, req->td);
1741			if (error)
1742				return (error);
1743 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1744 			    sizeof(nmp->nm_tprintf_initial_delay));
1745 			if (error)
1746 				return (error);
1747 			if (nmp->nm_tprintf_initial_delay < 0)
1748 				nmp->nm_tprintf_initial_delay = 0;
1749 		}
1750		break;
1751	default:
1752		return (ENOTSUP);
1753	}
1754	return (0);
1755}
1756
1757/*
1758 * Purge any RPCs in progress, so that they will all return errors.
1759 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1760 * forced dismount.
1761 */
1762static void
1763nfs_purge(struct mount *mp)
1764{
1765	struct nfsmount *nmp = VFSTONFS(mp);
1766
1767	newnfs_nmcancelreqs(nmp);
1768}
1769
1770/*
1771 * Extract the information needed by the nlm from the nfs vnode.
1772 */
1773static void
1774nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1775    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1776    struct timeval *timeop)
1777{
1778	struct nfsmount *nmp;
1779	struct nfsnode *np = VTONFS(vp);
1780
1781	nmp = VFSTONFS(vp->v_mount);
1782	if (fhlenp != NULL)
1783		*fhlenp = (size_t)np->n_fhp->nfh_len;
1784	if (fhp != NULL)
1785		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1786	if (sp != NULL)
1787		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1788	if (is_v3p != NULL)
1789		*is_v3p = NFS_ISV3(vp);
1790	if (sizep != NULL)
1791		*sizep = np->n_size;
1792	if (timeop != NULL) {
1793		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1794		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1795	}
1796}
1797
1798/*
1799 * This function prints out an option name, based on the conditional
1800 * argument.
1801 */
1802static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1803    char *opt, char **buf, size_t *blen)
1804{
1805	int len;
1806
1807	if (testval != 0 && *blen > strlen(opt)) {
1808		len = snprintf(*buf, *blen, "%s", opt);
1809		if (len != strlen(opt))
1810			printf("EEK!!\n");
1811		*buf += len;
1812		*blen -= len;
1813	}
1814}
1815
1816/*
1817 * This function printf out an options integer value.
1818 */
1819static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1820    char *opt, char **buf, size_t *blen)
1821{
1822	int len;
1823
1824	if (*blen > strlen(opt) + 1) {
1825		/* Could result in truncated output string. */
1826		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1827		if (len < *blen) {
1828			*buf += len;
1829			*blen -= len;
1830		}
1831	}
1832}
1833
1834/*
1835 * Load the option flags and values into the buffer.
1836 */
1837void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1838{
1839	char *buf;
1840	size_t blen;
1841
1842	buf = buffer;
1843	blen = buflen;
1844	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1845	    &blen);
1846	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1847		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1848		    &blen);
1849		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1850		    &buf, &blen);
1851		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1852		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1853	}
1854	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1855	    &blen);
1856	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1857	    "nfsv2", &buf, &blen);
1858	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1859	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1860	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1861	    &buf, &blen);
1862	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1863	    &buf, &blen);
1864	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1865	    &blen);
1866	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1867	    &blen);
1868	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1869	    &blen);
1870	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1871	    &blen);
1872	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1873	    &blen);
1874	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1875	    ",noncontigwr", &buf, &blen);
1876	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1877	    0, ",lockd", &buf, &blen);
1878	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1879	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1880	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1881	    &buf, &blen);
1882	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1883	    &buf, &blen);
1884	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1885	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1886	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1887	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1888	    &buf, &blen);
1889	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1890	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1891	    &buf, &blen);
1892	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1893	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1894	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1895	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1896	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1897	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1898	    &blen);
1899	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1900	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1901	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1902	    &blen);
1903	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1904	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1905	    &blen);
1906	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1907	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1908}
1909
1910