nfs_clvfsops.c revision 306659
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 306659 2016-10-03 22:11:45Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern int nfsrv_useacl;
82extern int nfscl_debuglevel;
83extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85extern struct mtx ncl_iod_mutex;
86NFSCLSTATEMUTEX;
87
88MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
89MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
90
91SYSCTL_DECL(_vfs_nfs);
92static int nfs_ip_paranoia = 1;
93SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94    &nfs_ip_paranoia, 0, "");
95static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
96SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
97        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
98/* how long between console messages "nfs server foo not responding" */
99static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
100SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
101        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102
103static int	nfs_mountroot(struct mount *);
104static void	nfs_sec_name(char *, int *);
105static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
106		    struct nfs_args *argp, const char *, struct ucred *,
107		    struct thread *);
108static int	mountnfs(struct nfs_args *, struct mount *,
109		    struct sockaddr *, char *, u_char *, int, u_char *, int,
110		    u_char *, int, struct vnode **, struct ucred *,
111		    struct thread *, int, int, int);
112static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
113		    struct sockaddr_storage *, int *, off_t *,
114		    struct timeval *);
115static vfs_mount_t nfs_mount;
116static vfs_cmount_t nfs_cmount;
117static vfs_unmount_t nfs_unmount;
118static vfs_root_t nfs_root;
119static vfs_statfs_t nfs_statfs;
120static vfs_sync_t nfs_sync;
121static vfs_sysctl_t nfs_sysctl;
122static vfs_purge_t nfs_purge;
123
124/*
125 * nfs vfs operations.
126 */
127static struct vfsops nfs_vfsops = {
128	.vfs_init =		ncl_init,
129	.vfs_mount =		nfs_mount,
130	.vfs_cmount =		nfs_cmount,
131	.vfs_root =		nfs_root,
132	.vfs_statfs =		nfs_statfs,
133	.vfs_sync =		nfs_sync,
134	.vfs_uninit =		ncl_uninit,
135	.vfs_unmount =		nfs_unmount,
136	.vfs_sysctl =		nfs_sysctl,
137	.vfs_purge =		nfs_purge,
138};
139VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
140
141/* So that loader and kldload(2) can find us, wherever we are.. */
142MODULE_VERSION(nfs, 1);
143MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
144MODULE_DEPEND(nfs, krpc, 1, 1, 1);
145MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
147
148/*
149 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
150 * can be shared by both NFS clients. It is declared here so that it
151 * will be defined for kernels built without NFS_ROOT, although it
152 * isn't used in that case.
153 */
154#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
155struct nfs_diskless	nfs_diskless = { { { 0 } } };
156struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
157int			nfs_diskless_valid = 0;
158#endif
159
160SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
161    &nfs_diskless_valid, 0,
162    "Has the diskless struct been filled correctly");
163
164SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
165    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
166
167SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
168    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
169    "%Ssockaddr_in", "Diskless root nfs address");
170
171
172void		newnfsargs_ntoh(struct nfs_args *);
173static int	nfs_mountdiskless(char *,
174		    struct sockaddr_in *, struct nfs_args *,
175		    struct thread *, struct vnode **, struct mount *);
176static void	nfs_convert_diskless(void);
177static void	nfs_convert_oargs(struct nfs_args *args,
178		    struct onfs_args *oargs);
179
180int
181newnfs_iosize(struct nfsmount *nmp)
182{
183	int iosize, maxio;
184
185	/* First, set the upper limit for iosize */
186	if (nmp->nm_flag & NFSMNT_NFSV4) {
187		maxio = NFS_MAXBSIZE;
188	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
189		if (nmp->nm_sotype == SOCK_DGRAM)
190			maxio = NFS_MAXDGRAMDATA;
191		else
192			maxio = NFS_MAXBSIZE;
193	} else {
194		maxio = NFS_V2MAXDATA;
195	}
196	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
197		nmp->nm_rsize = maxio;
198	if (nmp->nm_rsize > NFS_MAXBSIZE)
199		nmp->nm_rsize = NFS_MAXBSIZE;
200	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
201		nmp->nm_readdirsize = maxio;
202	if (nmp->nm_readdirsize > nmp->nm_rsize)
203		nmp->nm_readdirsize = nmp->nm_rsize;
204	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
205		nmp->nm_wsize = maxio;
206	if (nmp->nm_wsize > NFS_MAXBSIZE)
207		nmp->nm_wsize = NFS_MAXBSIZE;
208
209	/*
210	 * Calculate the size used for io buffers.  Use the larger
211	 * of the two sizes to minimise nfs requests but make sure
212	 * that it is at least one VM page to avoid wasting buffer
213	 * space.  It must also be at least NFS_DIRBLKSIZ, since
214	 * that is the buffer size used for directories.
215	 */
216	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217	iosize = imax(iosize, PAGE_SIZE);
218	iosize = imax(iosize, NFS_DIRBLKSIZ);
219	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
220	return (iosize);
221}
222
223static void
224nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
225{
226
227	args->version = NFS_ARGSVERSION;
228	args->addr = oargs->addr;
229	args->addrlen = oargs->addrlen;
230	args->sotype = oargs->sotype;
231	args->proto = oargs->proto;
232	args->fh = oargs->fh;
233	args->fhsize = oargs->fhsize;
234	args->flags = oargs->flags;
235	args->wsize = oargs->wsize;
236	args->rsize = oargs->rsize;
237	args->readdirsize = oargs->readdirsize;
238	args->timeo = oargs->timeo;
239	args->retrans = oargs->retrans;
240	args->readahead = oargs->readahead;
241	args->hostname = oargs->hostname;
242}
243
244static void
245nfs_convert_diskless(void)
246{
247
248	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
249		sizeof(struct ifaliasreq));
250	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
251		sizeof(struct sockaddr_in));
252	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
253	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
254		nfsv3_diskless.root_fhsize = NFSX_MYFH;
255		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
256	} else {
257		nfsv3_diskless.root_fhsize = NFSX_V2FH;
258		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
259	}
260	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
261		sizeof(struct sockaddr_in));
262	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
263	nfsv3_diskless.root_time = nfs_diskless.root_time;
264	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
265		MAXHOSTNAMELEN);
266	nfs_diskless_valid = 3;
267}
268
269/*
270 * nfs statfs call
271 */
272static int
273nfs_statfs(struct mount *mp, struct statfs *sbp)
274{
275	struct vnode *vp;
276	struct thread *td;
277	struct nfsmount *nmp = VFSTONFS(mp);
278	struct nfsvattr nfsva;
279	struct nfsfsinfo fs;
280	struct nfsstatfs sb;
281	int error = 0, attrflag, gotfsinfo = 0, ret;
282	struct nfsnode *np;
283
284	td = curthread;
285
286	error = vfs_busy(mp, MBF_NOWAIT);
287	if (error)
288		return (error);
289	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
290	if (error) {
291		vfs_unbusy(mp);
292		return (error);
293	}
294	vp = NFSTOV(np);
295	mtx_lock(&nmp->nm_mtx);
296	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
297		mtx_unlock(&nmp->nm_mtx);
298		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
299		    &attrflag, NULL);
300		if (!error)
301			gotfsinfo = 1;
302	} else
303		mtx_unlock(&nmp->nm_mtx);
304	if (!error)
305		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
306		    &attrflag, NULL);
307	if (error != 0)
308		NFSCL_DEBUG(2, "statfs=%d\n", error);
309	if (attrflag == 0) {
310		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
311		    td->td_ucred, td, &nfsva, NULL, NULL);
312		if (ret) {
313			/*
314			 * Just set default values to get things going.
315			 */
316			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
317			nfsva.na_vattr.va_type = VDIR;
318			nfsva.na_vattr.va_mode = 0777;
319			nfsva.na_vattr.va_nlink = 100;
320			nfsva.na_vattr.va_uid = (uid_t)0;
321			nfsva.na_vattr.va_gid = (gid_t)0;
322			nfsva.na_vattr.va_fileid = 2;
323			nfsva.na_vattr.va_gen = 1;
324			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
325			nfsva.na_vattr.va_size = 512 * 1024;
326		}
327	}
328	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
329	if (!error) {
330	    mtx_lock(&nmp->nm_mtx);
331	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
332		nfscl_loadfsinfo(nmp, &fs);
333	    nfscl_loadsbinfo(nmp, &sb, sbp);
334	    sbp->f_iosize = newnfs_iosize(nmp);
335	    mtx_unlock(&nmp->nm_mtx);
336	    if (sbp != &mp->mnt_stat) {
337		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
338		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
339	    }
340	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
341	} else if (NFS_ISV4(vp)) {
342		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
343	}
344	vput(vp);
345	vfs_unbusy(mp);
346	return (error);
347}
348
349/*
350 * nfs version 3 fsinfo rpc call
351 */
352int
353ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
354    struct thread *td)
355{
356	struct nfsfsinfo fs;
357	struct nfsvattr nfsva;
358	int error, attrflag;
359
360	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
361	if (!error) {
362		if (attrflag)
363			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
364			    1);
365		mtx_lock(&nmp->nm_mtx);
366		nfscl_loadfsinfo(nmp, &fs);
367		mtx_unlock(&nmp->nm_mtx);
368	}
369	return (error);
370}
371
372/*
373 * Mount a remote root fs via. nfs. This depends on the info in the
374 * nfs_diskless structure that has been filled in properly by some primary
375 * bootstrap.
376 * It goes something like this:
377 * - do enough of "ifconfig" by calling ifioctl() so that the system
378 *   can talk to the server
379 * - If nfs_diskless.mygateway is filled in, use that address as
380 *   a default gateway.
381 * - build the rootfs mount point and call mountnfs() to do the rest.
382 *
383 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
384 * structure, as well as other global NFS client variables here, as
385 * nfs_mountroot() will be called once in the boot before any other NFS
386 * client activity occurs.
387 */
388static int
389nfs_mountroot(struct mount *mp)
390{
391	struct thread *td = curthread;
392	struct nfsv3_diskless *nd = &nfsv3_diskless;
393	struct socket *so;
394	struct vnode *vp;
395	struct ifreq ir;
396	int error;
397	u_long l;
398	char buf[128];
399	char *cp;
400
401#if defined(BOOTP_NFSROOT) && defined(BOOTP)
402	bootpc_init();		/* use bootp to get nfs_diskless filled in */
403#elif defined(NFS_ROOT)
404	nfs_setup_diskless();
405#endif
406
407	if (nfs_diskless_valid == 0)
408		return (-1);
409	if (nfs_diskless_valid == 1)
410		nfs_convert_diskless();
411
412	/*
413	 * XXX splnet, so networks will receive...
414	 */
415	splnet();
416
417	/*
418	 * Do enough of ifconfig(8) so that the critical net interface can
419	 * talk to the server.
420	 */
421	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
422	    td->td_ucred, td);
423	if (error)
424		panic("nfs_mountroot: socreate(%04x): %d",
425			nd->myif.ifra_addr.sa_family, error);
426
427#if 0 /* XXX Bad idea */
428	/*
429	 * We might not have been told the right interface, so we pass
430	 * over the first ten interfaces of the same kind, until we get
431	 * one of them configured.
432	 */
433
434	for (i = strlen(nd->myif.ifra_name) - 1;
435		nd->myif.ifra_name[i] >= '0' &&
436		nd->myif.ifra_name[i] <= '9';
437		nd->myif.ifra_name[i] ++) {
438		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
439		if(!error)
440			break;
441	}
442#endif
443	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444	if (error)
445		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
446	if ((cp = getenv("boot.netif.mtu")) != NULL) {
447		ir.ifr_mtu = strtol(cp, NULL, 10);
448		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
449		freeenv(cp);
450		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
451		if (error)
452			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
453	}
454	soclose(so);
455
456	/*
457	 * If the gateway field is filled in, set it as the default route.
458	 * Note that pxeboot will set a default route of 0 if the route
459	 * is not set by the DHCP server.  Check also for a value of 0
460	 * to avoid panicking inappropriately in that situation.
461	 */
462	if (nd->mygateway.sin_len != 0 &&
463	    nd->mygateway.sin_addr.s_addr != 0) {
464		struct sockaddr_in mask, sin;
465
466		bzero((caddr_t)&mask, sizeof(mask));
467		sin = mask;
468		sin.sin_family = AF_INET;
469		sin.sin_len = sizeof(sin);
470                /* XXX MRT use table 0 for this sort of thing */
471		CURVNET_SET(TD_TO_VNET(td));
472		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
473		    (struct sockaddr *)&nd->mygateway,
474		    (struct sockaddr *)&mask,
475		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
476		CURVNET_RESTORE();
477		if (error)
478			panic("nfs_mountroot: RTM_ADD: %d", error);
479	}
480
481	/*
482	 * Create the rootfs mount point.
483	 */
484	nd->root_args.fh = nd->root_fh;
485	nd->root_args.fhsize = nd->root_fhsize;
486	l = ntohl(nd->root_saddr.sin_addr.s_addr);
487	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
488		(l >> 24) & 0xff, (l >> 16) & 0xff,
489		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
490	printf("NFS ROOT: %s\n", buf);
491	nd->root_args.hostname = buf;
492	if ((error = nfs_mountdiskless(buf,
493	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
494		return (error);
495	}
496
497	/*
498	 * This is not really an nfs issue, but it is much easier to
499	 * set hostname here and then let the "/etc/rc.xxx" files
500	 * mount the right /var based upon its preset value.
501	 */
502	mtx_lock(&prison0.pr_mtx);
503	strlcpy(prison0.pr_hostname, nd->my_hostnam,
504	    sizeof(prison0.pr_hostname));
505	mtx_unlock(&prison0.pr_mtx);
506	inittodr(ntohl(nd->root_time));
507	return (0);
508}
509
510/*
511 * Internal version of mount system call for diskless setup.
512 */
513static int
514nfs_mountdiskless(char *path,
515    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
516    struct vnode **vpp, struct mount *mp)
517{
518	struct sockaddr *nam;
519	int dirlen, error;
520	char *dirpath;
521
522	/*
523	 * Find the directory path in "path", which also has the server's
524	 * name/ip address in it.
525	 */
526	dirpath = strchr(path, ':');
527	if (dirpath != NULL)
528		dirlen = strlen(++dirpath);
529	else
530		dirlen = 0;
531	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
532	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
533	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
534	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
535		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
536		return (error);
537	}
538	return (0);
539}
540
541static void
542nfs_sec_name(char *sec, int *flagsp)
543{
544	if (!strcmp(sec, "krb5"))
545		*flagsp |= NFSMNT_KERB;
546	else if (!strcmp(sec, "krb5i"))
547		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
548	else if (!strcmp(sec, "krb5p"))
549		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
550}
551
552static void
553nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
554    const char *hostname, struct ucred *cred, struct thread *td)
555{
556	int s;
557	int adjsock;
558	char *p;
559
560	s = splnet();
561
562	/*
563	 * Set read-only flag if requested; otherwise, clear it if this is
564	 * an update.  If this is not an update, then either the read-only
565	 * flag is already clear, or this is a root mount and it was set
566	 * intentionally at some previous point.
567	 */
568	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
569		MNT_ILOCK(mp);
570		mp->mnt_flag |= MNT_RDONLY;
571		MNT_IUNLOCK(mp);
572	} else if (mp->mnt_flag & MNT_UPDATE) {
573		MNT_ILOCK(mp);
574		mp->mnt_flag &= ~MNT_RDONLY;
575		MNT_IUNLOCK(mp);
576	}
577
578	/*
579	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
580	 * no sense in that context.  Also, set up appropriate retransmit
581	 * and soft timeout behavior.
582	 */
583	if (argp->sotype == SOCK_STREAM) {
584		nmp->nm_flag &= ~NFSMNT_NOCONN;
585		nmp->nm_timeo = NFS_MAXTIMEO;
586		if ((argp->flags & NFSMNT_NFSV4) != 0)
587			nmp->nm_retry = INT_MAX;
588		else
589			nmp->nm_retry = NFS_RETRANS_TCP;
590	}
591
592	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
593	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
594		argp->flags &= ~NFSMNT_RDIRPLUS;
595		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
596	}
597
598	/* Re-bind if rsrvd port requested and wasn't on one */
599	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
600		  && (argp->flags & NFSMNT_RESVPORT);
601	/* Also re-bind if we're switching to/from a connected UDP socket */
602	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
603		    (argp->flags & NFSMNT_NOCONN));
604
605	/* Update flags atomically.  Don't change the lock bits. */
606	nmp->nm_flag = argp->flags | nmp->nm_flag;
607	splx(s);
608
609	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
610		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
611		if (nmp->nm_timeo < NFS_MINTIMEO)
612			nmp->nm_timeo = NFS_MINTIMEO;
613		else if (nmp->nm_timeo > NFS_MAXTIMEO)
614			nmp->nm_timeo = NFS_MAXTIMEO;
615	}
616
617	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
618		nmp->nm_retry = argp->retrans;
619		if (nmp->nm_retry > NFS_MAXREXMIT)
620			nmp->nm_retry = NFS_MAXREXMIT;
621	}
622
623	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
624		nmp->nm_wsize = argp->wsize;
625		/*
626		 * Clip at the power of 2 below the size. There is an
627		 * issue (not isolated) that causes intermittent page
628		 * faults if this is not done.
629		 */
630		if (nmp->nm_wsize > NFS_FABLKSIZE)
631			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
632		else
633			nmp->nm_wsize = NFS_FABLKSIZE;
634	}
635
636	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
637		nmp->nm_rsize = argp->rsize;
638		/*
639		 * Clip at the power of 2 below the size. There is an
640		 * issue (not isolated) that causes intermittent page
641		 * faults if this is not done.
642		 */
643		if (nmp->nm_rsize > NFS_FABLKSIZE)
644			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
645		else
646			nmp->nm_rsize = NFS_FABLKSIZE;
647	}
648
649	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
650		nmp->nm_readdirsize = argp->readdirsize;
651	}
652
653	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
654		nmp->nm_acregmin = argp->acregmin;
655	else
656		nmp->nm_acregmin = NFS_MINATTRTIMO;
657	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
658		nmp->nm_acregmax = argp->acregmax;
659	else
660		nmp->nm_acregmax = NFS_MAXATTRTIMO;
661	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
662		nmp->nm_acdirmin = argp->acdirmin;
663	else
664		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
665	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
666		nmp->nm_acdirmax = argp->acdirmax;
667	else
668		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
669	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
670		nmp->nm_acdirmin = nmp->nm_acdirmax;
671	if (nmp->nm_acregmin > nmp->nm_acregmax)
672		nmp->nm_acregmin = nmp->nm_acregmax;
673
674	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
675		if (argp->readahead <= NFS_MAXRAHEAD)
676			nmp->nm_readahead = argp->readahead;
677		else
678			nmp->nm_readahead = NFS_MAXRAHEAD;
679	}
680	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
681		if (argp->wcommitsize < nmp->nm_wsize)
682			nmp->nm_wcommitsize = nmp->nm_wsize;
683		else
684			nmp->nm_wcommitsize = argp->wcommitsize;
685	}
686
687	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
688		    (nmp->nm_soproto != argp->proto));
689
690	if (nmp->nm_client != NULL && adjsock) {
691		int haslock = 0, error = 0;
692
693		if (nmp->nm_sotype == SOCK_STREAM) {
694			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
695			if (!error)
696				haslock = 1;
697		}
698		if (!error) {
699		    newnfs_disconnect(&nmp->nm_sockreq);
700		    if (haslock)
701			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
702		    nmp->nm_sotype = argp->sotype;
703		    nmp->nm_soproto = argp->proto;
704		    if (nmp->nm_sotype == SOCK_DGRAM)
705			while (newnfs_connect(nmp, &nmp->nm_sockreq,
706			    cred, td, 0)) {
707				printf("newnfs_args: retrying connect\n");
708				(void) nfs_catnap(PSOCK, 0, "newnfscon");
709			}
710		}
711	} else {
712		nmp->nm_sotype = argp->sotype;
713		nmp->nm_soproto = argp->proto;
714	}
715
716	if (hostname != NULL) {
717		strlcpy(nmp->nm_hostname, hostname,
718		    sizeof(nmp->nm_hostname));
719		p = strchr(nmp->nm_hostname, ':');
720		if (p != NULL)
721			*p = '\0';
722	}
723}
724
725static const char *nfs_opts[] = { "from", "nfs_args",
726    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
727    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
728    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
729    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
730    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
731    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
732    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
733    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
734    "pnfs", "wcommitsize",
735    NULL };
736
737/*
738 * VFS Operations.
739 *
740 * mount system call
741 * It seems a bit dumb to copyinstr() the host and path here and then
742 * bcopy() them in mountnfs(), but I wanted to detect errors before
743 * doing the sockargs() call because sockargs() allocates an mbuf and
744 * an error after that means that I have to release the mbuf.
745 */
746/* ARGSUSED */
747static int
748nfs_mount(struct mount *mp)
749{
750	struct nfs_args args = {
751	    .version = NFS_ARGSVERSION,
752	    .addr = NULL,
753	    .addrlen = sizeof (struct sockaddr_in),
754	    .sotype = SOCK_STREAM,
755	    .proto = 0,
756	    .fh = NULL,
757	    .fhsize = 0,
758	    .flags = NFSMNT_RESVPORT,
759	    .wsize = NFS_WSIZE,
760	    .rsize = NFS_RSIZE,
761	    .readdirsize = NFS_READDIRSIZE,
762	    .timeo = 10,
763	    .retrans = NFS_RETRANS,
764	    .readahead = NFS_DEFRAHEAD,
765	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
766	    .hostname = NULL,
767	    .acregmin = NFS_MINATTRTIMO,
768	    .acregmax = NFS_MAXATTRTIMO,
769	    .acdirmin = NFS_MINDIRATTRTIMO,
770	    .acdirmax = NFS_MAXDIRATTRTIMO,
771	};
772	int error = 0, ret, len;
773	struct sockaddr *nam = NULL;
774	struct vnode *vp;
775	struct thread *td;
776	char hst[MNAMELEN];
777	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
778	char *cp, *opt, *name, *secname;
779	int nametimeo = NFS_DEFAULT_NAMETIMEO;
780	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
781	int minvers = 0;
782	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
783	size_t hstlen;
784
785	has_nfs_args_opt = 0;
786	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
787		error = EINVAL;
788		goto out;
789	}
790
791	td = curthread;
792	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
793		error = nfs_mountroot(mp);
794		goto out;
795	}
796
797	nfscl_init();
798
799	/*
800	 * The old mount_nfs program passed the struct nfs_args
801	 * from userspace to kernel.  The new mount_nfs program
802	 * passes string options via nmount() from userspace to kernel
803	 * and we populate the struct nfs_args in the kernel.
804	 */
805	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
806		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
807		    sizeof(args));
808		if (error != 0)
809			goto out;
810
811		if (args.version != NFS_ARGSVERSION) {
812			error = EPROGMISMATCH;
813			goto out;
814		}
815		has_nfs_args_opt = 1;
816	}
817
818	/* Handle the new style options. */
819	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
820		args.acdirmin = args.acdirmax =
821		    args.acregmin = args.acregmax = 0;
822		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
823		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
824	}
825	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
826		args.flags |= NFSMNT_NOCONN;
827	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
828		args.flags &= ~NFSMNT_NOCONN;
829	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
830		args.flags |= NFSMNT_NOLOCKD;
831	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
832		args.flags &= ~NFSMNT_NOLOCKD;
833	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
834		args.flags |= NFSMNT_INT;
835	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
836		args.flags |= NFSMNT_RDIRPLUS;
837	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
838		args.flags |= NFSMNT_RESVPORT;
839	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
840		args.flags &= ~NFSMNT_RESVPORT;
841	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
842		args.flags |= NFSMNT_SOFT;
843	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
844		args.flags &= ~NFSMNT_SOFT;
845	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
846		args.sotype = SOCK_DGRAM;
847	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
848		args.sotype = SOCK_DGRAM;
849	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
850		args.sotype = SOCK_STREAM;
851	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
852		args.flags |= NFSMNT_NFSV3;
853	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
854		args.flags |= NFSMNT_NFSV4;
855		args.sotype = SOCK_STREAM;
856	}
857	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
858		args.flags |= NFSMNT_ALLGSSNAME;
859	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
860		args.flags |= NFSMNT_NOCTO;
861	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
862		args.flags |= NFSMNT_NONCONTIGWR;
863	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
864		args.flags |= NFSMNT_PNFS;
865	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
866		if (opt == NULL) {
867			vfs_mount_error(mp, "illegal readdirsize");
868			error = EINVAL;
869			goto out;
870		}
871		ret = sscanf(opt, "%d", &args.readdirsize);
872		if (ret != 1 || args.readdirsize <= 0) {
873			vfs_mount_error(mp, "illegal readdirsize: %s",
874			    opt);
875			error = EINVAL;
876			goto out;
877		}
878		args.flags |= NFSMNT_READDIRSIZE;
879	}
880	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
881		if (opt == NULL) {
882			vfs_mount_error(mp, "illegal readahead");
883			error = EINVAL;
884			goto out;
885		}
886		ret = sscanf(opt, "%d", &args.readahead);
887		if (ret != 1 || args.readahead <= 0) {
888			vfs_mount_error(mp, "illegal readahead: %s",
889			    opt);
890			error = EINVAL;
891			goto out;
892		}
893		args.flags |= NFSMNT_READAHEAD;
894	}
895	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
896		if (opt == NULL) {
897			vfs_mount_error(mp, "illegal wsize");
898			error = EINVAL;
899			goto out;
900		}
901		ret = sscanf(opt, "%d", &args.wsize);
902		if (ret != 1 || args.wsize <= 0) {
903			vfs_mount_error(mp, "illegal wsize: %s",
904			    opt);
905			error = EINVAL;
906			goto out;
907		}
908		args.flags |= NFSMNT_WSIZE;
909	}
910	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
911		if (opt == NULL) {
912			vfs_mount_error(mp, "illegal rsize");
913			error = EINVAL;
914			goto out;
915		}
916		ret = sscanf(opt, "%d", &args.rsize);
917		if (ret != 1 || args.rsize <= 0) {
918			vfs_mount_error(mp, "illegal wsize: %s",
919			    opt);
920			error = EINVAL;
921			goto out;
922		}
923		args.flags |= NFSMNT_RSIZE;
924	}
925	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
926		if (opt == NULL) {
927			vfs_mount_error(mp, "illegal retrans");
928			error = EINVAL;
929			goto out;
930		}
931		ret = sscanf(opt, "%d", &args.retrans);
932		if (ret != 1 || args.retrans <= 0) {
933			vfs_mount_error(mp, "illegal retrans: %s",
934			    opt);
935			error = EINVAL;
936			goto out;
937		}
938		args.flags |= NFSMNT_RETRANS;
939	}
940	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
941		ret = sscanf(opt, "%d", &args.acregmin);
942		if (ret != 1 || args.acregmin < 0) {
943			vfs_mount_error(mp, "illegal actimeo: %s",
944			    opt);
945			error = EINVAL;
946			goto out;
947		}
948		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
949		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
950		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
951	}
952	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
953		ret = sscanf(opt, "%d", &args.acregmin);
954		if (ret != 1 || args.acregmin < 0) {
955			vfs_mount_error(mp, "illegal acregmin: %s",
956			    opt);
957			error = EINVAL;
958			goto out;
959		}
960		args.flags |= NFSMNT_ACREGMIN;
961	}
962	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
963		ret = sscanf(opt, "%d", &args.acregmax);
964		if (ret != 1 || args.acregmax < 0) {
965			vfs_mount_error(mp, "illegal acregmax: %s",
966			    opt);
967			error = EINVAL;
968			goto out;
969		}
970		args.flags |= NFSMNT_ACREGMAX;
971	}
972	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
973		ret = sscanf(opt, "%d", &args.acdirmin);
974		if (ret != 1 || args.acdirmin < 0) {
975			vfs_mount_error(mp, "illegal acdirmin: %s",
976			    opt);
977			error = EINVAL;
978			goto out;
979		}
980		args.flags |= NFSMNT_ACDIRMIN;
981	}
982	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
983		ret = sscanf(opt, "%d", &args.acdirmax);
984		if (ret != 1 || args.acdirmax < 0) {
985			vfs_mount_error(mp, "illegal acdirmax: %s",
986			    opt);
987			error = EINVAL;
988			goto out;
989		}
990		args.flags |= NFSMNT_ACDIRMAX;
991	}
992	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
993		ret = sscanf(opt, "%d", &args.wcommitsize);
994		if (ret != 1 || args.wcommitsize < 0) {
995			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
996			error = EINVAL;
997			goto out;
998		}
999		args.flags |= NFSMNT_WCOMMITSIZE;
1000	}
1001	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1002		ret = sscanf(opt, "%d", &args.timeo);
1003		if (ret != 1 || args.timeo <= 0) {
1004			vfs_mount_error(mp, "illegal timeo: %s",
1005			    opt);
1006			error = EINVAL;
1007			goto out;
1008		}
1009		args.flags |= NFSMNT_TIMEO;
1010	}
1011	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1012		ret = sscanf(opt, "%d", &args.timeo);
1013		if (ret != 1 || args.timeo <= 0) {
1014			vfs_mount_error(mp, "illegal timeout: %s",
1015			    opt);
1016			error = EINVAL;
1017			goto out;
1018		}
1019		args.flags |= NFSMNT_TIMEO;
1020	}
1021	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1022		ret = sscanf(opt, "%d", &nametimeo);
1023		if (ret != 1 || nametimeo < 0) {
1024			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1025			error = EINVAL;
1026			goto out;
1027		}
1028	}
1029	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1030	    == 0) {
1031		ret = sscanf(opt, "%d", &negnametimeo);
1032		if (ret != 1 || negnametimeo < 0) {
1033			vfs_mount_error(mp, "illegal negnametimeo: %s",
1034			    opt);
1035			error = EINVAL;
1036			goto out;
1037		}
1038	}
1039	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1040	    0) {
1041		ret = sscanf(opt, "%d", &minvers);
1042		if (ret != 1 || minvers < 0 || minvers > 1 ||
1043		    (args.flags & NFSMNT_NFSV4) == 0) {
1044			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1045			error = EINVAL;
1046			goto out;
1047		}
1048	}
1049	if (vfs_getopt(mp->mnt_optnew, "sec",
1050		(void **) &secname, NULL) == 0)
1051		nfs_sec_name(secname, &args.flags);
1052
1053	if (mp->mnt_flag & MNT_UPDATE) {
1054		struct nfsmount *nmp = VFSTONFS(mp);
1055
1056		if (nmp == NULL) {
1057			error = EIO;
1058			goto out;
1059		}
1060
1061		/*
1062		 * If a change from TCP->UDP is done and there are thread(s)
1063		 * that have I/O RPC(s) in progress with a tranfer size
1064		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1065		 * hung, retrying the RPC(s) forever. Usually these threads
1066		 * will be seen doing an uninterruptible sleep on wait channel
1067		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1068		 */
1069		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1070			tprintf(td->td_proc, LOG_WARNING,
1071	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1072
1073		/*
1074		 * When doing an update, we can't change version,
1075		 * security, switch lockd strategies or change cookie
1076		 * translation
1077		 */
1078		args.flags = (args.flags &
1079		    ~(NFSMNT_NFSV3 |
1080		      NFSMNT_NFSV4 |
1081		      NFSMNT_KERB |
1082		      NFSMNT_INTEGRITY |
1083		      NFSMNT_PRIVACY |
1084		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1085		    (nmp->nm_flag &
1086			(NFSMNT_NFSV3 |
1087			 NFSMNT_NFSV4 |
1088			 NFSMNT_KERB |
1089			 NFSMNT_INTEGRITY |
1090			 NFSMNT_PRIVACY |
1091			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1092		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1093		goto out;
1094	}
1095
1096	/*
1097	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1098	 * or no-connection mode for those protocols that support
1099	 * no-connection mode (the flag will be cleared later for protocols
1100	 * that do not support no-connection mode).  This will allow a client
1101	 * to receive replies from a different IP then the request was
1102	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1103	 * not 0.
1104	 */
1105	if (nfs_ip_paranoia == 0)
1106		args.flags |= NFSMNT_NOCONN;
1107
1108	if (has_nfs_args_opt != 0) {
1109		/*
1110		 * In the 'nfs_args' case, the pointers in the args
1111		 * structure are in userland - we copy them in here.
1112		 */
1113		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1114			vfs_mount_error(mp, "Bad file handle");
1115			error = EINVAL;
1116			goto out;
1117		}
1118		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1119		    args.fhsize);
1120		if (error != 0)
1121			goto out;
1122		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1123		if (error != 0)
1124			goto out;
1125		bzero(&hst[hstlen], MNAMELEN - hstlen);
1126		args.hostname = hst;
1127		/* sockargs() call must be after above copyin() calls */
1128		error = getsockaddr(&nam, (caddr_t)args.addr,
1129		    args.addrlen);
1130		if (error != 0)
1131			goto out;
1132	} else {
1133		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1134		    &args.fhsize) == 0) {
1135			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1136				vfs_mount_error(mp, "Bad file handle");
1137				error = EINVAL;
1138				goto out;
1139			}
1140			bcopy(args.fh, nfh, args.fhsize);
1141		} else {
1142			args.fhsize = 0;
1143		}
1144		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1145		    (void **)&args.hostname, &len);
1146		if (args.hostname == NULL) {
1147			vfs_mount_error(mp, "Invalid hostname");
1148			error = EINVAL;
1149			goto out;
1150		}
1151		bcopy(args.hostname, hst, MNAMELEN);
1152		hst[MNAMELEN - 1] = '\0';
1153	}
1154
1155	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1156		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1157	else {
1158		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1159		cp = strchr(srvkrbname, ':');
1160		if (cp != NULL)
1161			*cp = '\0';
1162	}
1163	srvkrbnamelen = strlen(srvkrbname);
1164
1165	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1166		strlcpy(krbname, name, sizeof (krbname));
1167	else
1168		krbname[0] = '\0';
1169	krbnamelen = strlen(krbname);
1170
1171	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1172		strlcpy(dirpath, name, sizeof (dirpath));
1173	else
1174		dirpath[0] = '\0';
1175	dirlen = strlen(dirpath);
1176
1177	if (has_nfs_args_opt == 0) {
1178		if (vfs_getopt(mp->mnt_optnew, "addr",
1179		    (void **)&args.addr, &args.addrlen) == 0) {
1180			if (args.addrlen > SOCK_MAXADDRLEN) {
1181				error = ENAMETOOLONG;
1182				goto out;
1183			}
1184			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1185			bcopy(args.addr, nam, args.addrlen);
1186			nam->sa_len = args.addrlen;
1187		} else {
1188			vfs_mount_error(mp, "No server address");
1189			error = EINVAL;
1190			goto out;
1191		}
1192	}
1193
1194	args.fh = nfh;
1195	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1196	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1197	    nametimeo, negnametimeo, minvers);
1198out:
1199	if (!error) {
1200		MNT_ILOCK(mp);
1201		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1202		    MNTK_USES_BCACHE;
1203		MNT_IUNLOCK(mp);
1204	}
1205	return (error);
1206}
1207
1208
1209/*
1210 * VFS Operations.
1211 *
1212 * mount system call
1213 * It seems a bit dumb to copyinstr() the host and path here and then
1214 * bcopy() them in mountnfs(), but I wanted to detect errors before
1215 * doing the sockargs() call because sockargs() allocates an mbuf and
1216 * an error after that means that I have to release the mbuf.
1217 */
1218/* ARGSUSED */
1219static int
1220nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1221{
1222	int error;
1223	struct nfs_args args;
1224
1225	error = copyin(data, &args, sizeof (struct nfs_args));
1226	if (error)
1227		return error;
1228
1229	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1230
1231	error = kernel_mount(ma, flags);
1232	return (error);
1233}
1234
1235/*
1236 * Common code for mount and mountroot
1237 */
1238static int
1239mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1240    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1241    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1242    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1243    int minvers)
1244{
1245	struct nfsmount *nmp;
1246	struct nfsnode *np;
1247	int error, trycnt, ret;
1248	struct nfsvattr nfsva;
1249	struct nfsclclient *clp;
1250	struct nfsclds *dsp, *tdsp;
1251	uint32_t lease;
1252	static u_int64_t clval = 0;
1253
1254	NFSCL_DEBUG(3, "in mnt\n");
1255	clp = NULL;
1256	if (mp->mnt_flag & MNT_UPDATE) {
1257		nmp = VFSTONFS(mp);
1258		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1259		FREE(nam, M_SONAME);
1260		return (0);
1261	} else {
1262		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1263		    krbnamelen + dirlen + srvkrbnamelen + 2,
1264		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1265		TAILQ_INIT(&nmp->nm_bufq);
1266		if (clval == 0)
1267			clval = (u_int64_t)nfsboottime.tv_sec;
1268		nmp->nm_clval = clval++;
1269		nmp->nm_krbnamelen = krbnamelen;
1270		nmp->nm_dirpathlen = dirlen;
1271		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1272		if (td->td_ucred->cr_uid != (uid_t)0) {
1273			/*
1274			 * nm_uid is used to get KerberosV credentials for
1275			 * the nfsv4 state handling operations if there is
1276			 * no host based principal set. Use the uid of
1277			 * this user if not root, since they are doing the
1278			 * mount. I don't think setting this for root will
1279			 * work, since root normally does not have user
1280			 * credentials in a credentials cache.
1281			 */
1282			nmp->nm_uid = td->td_ucred->cr_uid;
1283		} else {
1284			/*
1285			 * Just set to -1, so it won't be used.
1286			 */
1287			nmp->nm_uid = (uid_t)-1;
1288		}
1289
1290		/* Copy and null terminate all the names */
1291		if (nmp->nm_krbnamelen > 0) {
1292			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1293			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1294		}
1295		if (nmp->nm_dirpathlen > 0) {
1296			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1297			    nmp->nm_dirpathlen);
1298			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1299			    + 1] = '\0';
1300		}
1301		if (nmp->nm_srvkrbnamelen > 0) {
1302			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1303			    nmp->nm_srvkrbnamelen);
1304			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1305			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1306		}
1307		nmp->nm_sockreq.nr_cred = crhold(cred);
1308		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1309		mp->mnt_data = nmp;
1310		nmp->nm_getinfo = nfs_getnlminfo;
1311		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1312	}
1313	vfs_getnewfsid(mp);
1314	nmp->nm_mountp = mp;
1315	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1316
1317	/*
1318	 * Since nfs_decode_args() might optionally set them, these
1319	 * need to be set to defaults before the call, so that the
1320	 * optional settings aren't overwritten.
1321	 */
1322	nmp->nm_nametimeo = nametimeo;
1323	nmp->nm_negnametimeo = negnametimeo;
1324	nmp->nm_timeo = NFS_TIMEO;
1325	nmp->nm_retry = NFS_RETRANS;
1326	nmp->nm_readahead = NFS_DEFRAHEAD;
1327
1328	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1329	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1330	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1331		nmp->nm_wcommitsize *= 2;
1332	nmp->nm_wcommitsize *= 256;
1333
1334	if ((argp->flags & NFSMNT_NFSV4) != 0)
1335		nmp->nm_minorvers = minvers;
1336	else
1337		nmp->nm_minorvers = 0;
1338
1339	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1340
1341	/*
1342	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1343	 * high, depending on whether we end up with negative offsets in
1344	 * the client or server somewhere.  2GB-1 may be safer.
1345	 *
1346	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1347	 * that we can handle until we find out otherwise.
1348	 */
1349	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1350		nmp->nm_maxfilesize = 0xffffffffLL;
1351	else
1352		nmp->nm_maxfilesize = OFF_MAX;
1353
1354	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1355		nmp->nm_wsize = NFS_WSIZE;
1356		nmp->nm_rsize = NFS_RSIZE;
1357		nmp->nm_readdirsize = NFS_READDIRSIZE;
1358	}
1359	nmp->nm_numgrps = NFS_MAXGRPS;
1360	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1361	if (nmp->nm_tprintf_delay < 0)
1362		nmp->nm_tprintf_delay = 0;
1363	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1364	if (nmp->nm_tprintf_initial_delay < 0)
1365		nmp->nm_tprintf_initial_delay = 0;
1366	nmp->nm_fhsize = argp->fhsize;
1367	if (nmp->nm_fhsize > 0)
1368		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1369	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1370	nmp->nm_nam = nam;
1371	/* Set up the sockets and per-host congestion */
1372	nmp->nm_sotype = argp->sotype;
1373	nmp->nm_soproto = argp->proto;
1374	nmp->nm_sockreq.nr_prog = NFS_PROG;
1375	if ((argp->flags & NFSMNT_NFSV4))
1376		nmp->nm_sockreq.nr_vers = NFS_VER4;
1377	else if ((argp->flags & NFSMNT_NFSV3))
1378		nmp->nm_sockreq.nr_vers = NFS_VER3;
1379	else
1380		nmp->nm_sockreq.nr_vers = NFS_VER2;
1381
1382
1383	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1384		goto bad;
1385	/* For NFSv4.1, get the clientid now. */
1386	if (nmp->nm_minorvers > 0) {
1387		NFSCL_DEBUG(3, "at getcl\n");
1388		error = nfscl_getcl(mp, cred, td, 0, &clp);
1389		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1390		if (error != 0)
1391			goto bad;
1392	}
1393
1394	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1395	    nmp->nm_dirpathlen > 0) {
1396		NFSCL_DEBUG(3, "in dirp\n");
1397		/*
1398		 * If the fhsize on the mount point == 0 for V4, the mount
1399		 * path needs to be looked up.
1400		 */
1401		trycnt = 3;
1402		do {
1403			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1404			    cred, td);
1405			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1406			if (error)
1407				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1408		} while (error && --trycnt > 0);
1409		if (error) {
1410			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1411			goto bad;
1412		}
1413	}
1414
1415	/*
1416	 * A reference count is needed on the nfsnode representing the
1417	 * remote root.  If this object is not persistent, then backward
1418	 * traversals of the mount point (i.e. "..") will not work if
1419	 * the nfsnode gets flushed out of the cache. Ufs does not have
1420	 * this problem, because one can identify root inodes by their
1421	 * number == ROOTINO (2).
1422	 */
1423	if (nmp->nm_fhsize > 0) {
1424		/*
1425		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1426		 * non-zero for the root vnode. f_iosize will be set correctly
1427		 * by nfs_statfs() before any I/O occurs.
1428		 */
1429		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1430		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1431		    LK_EXCLUSIVE);
1432		if (error)
1433			goto bad;
1434		*vpp = NFSTOV(np);
1435
1436		/*
1437		 * Get file attributes and transfer parameters for the
1438		 * mountpoint.  This has the side effect of filling in
1439		 * (*vpp)->v_type with the correct value.
1440		 */
1441		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1442		    cred, td, &nfsva, NULL, &lease);
1443		if (ret) {
1444			/*
1445			 * Just set default values to get things going.
1446			 */
1447			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1448			nfsva.na_vattr.va_type = VDIR;
1449			nfsva.na_vattr.va_mode = 0777;
1450			nfsva.na_vattr.va_nlink = 100;
1451			nfsva.na_vattr.va_uid = (uid_t)0;
1452			nfsva.na_vattr.va_gid = (gid_t)0;
1453			nfsva.na_vattr.va_fileid = 2;
1454			nfsva.na_vattr.va_gen = 1;
1455			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1456			nfsva.na_vattr.va_size = 512 * 1024;
1457			lease = 60;
1458		}
1459		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1460		if (nmp->nm_minorvers > 0) {
1461			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1462			NFSLOCKCLSTATE();
1463			clp->nfsc_renew = NFSCL_RENEW(lease);
1464			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1465			clp->nfsc_clientidrev++;
1466			if (clp->nfsc_clientidrev == 0)
1467				clp->nfsc_clientidrev++;
1468			NFSUNLOCKCLSTATE();
1469			/*
1470			 * Mount will succeed, so the renew thread can be
1471			 * started now.
1472			 */
1473			nfscl_start_renewthread(clp);
1474			nfscl_clientrelease(clp);
1475		}
1476		if (argp->flags & NFSMNT_NFSV3)
1477			ncl_fsinfo(nmp, *vpp, cred, td);
1478
1479		/* Mark if the mount point supports NFSv4 ACLs. */
1480		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1481		    ret == 0 &&
1482		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1483			MNT_ILOCK(mp);
1484			mp->mnt_flag |= MNT_NFS4ACLS;
1485			MNT_IUNLOCK(mp);
1486		}
1487
1488		/*
1489		 * Lose the lock but keep the ref.
1490		 */
1491		NFSVOPUNLOCK(*vpp, 0);
1492		return (0);
1493	}
1494	error = EIO;
1495
1496bad:
1497	if (clp != NULL)
1498		nfscl_clientrelease(clp);
1499	newnfs_disconnect(&nmp->nm_sockreq);
1500	crfree(nmp->nm_sockreq.nr_cred);
1501	if (nmp->nm_sockreq.nr_auth != NULL)
1502		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1503	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1504	mtx_destroy(&nmp->nm_mtx);
1505	if (nmp->nm_clp != NULL) {
1506		NFSLOCKCLSTATE();
1507		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1508		NFSUNLOCKCLSTATE();
1509		free(nmp->nm_clp, M_NFSCLCLIENT);
1510	}
1511	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1512		nfscl_freenfsclds(dsp);
1513	FREE(nmp, M_NEWNFSMNT);
1514	FREE(nam, M_SONAME);
1515	return (error);
1516}
1517
1518/*
1519 * unmount system call
1520 */
1521static int
1522nfs_unmount(struct mount *mp, int mntflags)
1523{
1524	struct thread *td;
1525	struct nfsmount *nmp;
1526	int error, flags = 0, i, trycnt = 0;
1527	struct nfsclds *dsp, *tdsp;
1528
1529	td = curthread;
1530
1531	if (mntflags & MNT_FORCE)
1532		flags |= FORCECLOSE;
1533	nmp = VFSTONFS(mp);
1534	/*
1535	 * Goes something like this..
1536	 * - Call vflush() to clear out vnodes for this filesystem
1537	 * - Close the socket
1538	 * - Free up the data structures
1539	 */
1540	/* In the forced case, cancel any outstanding requests. */
1541	if (mntflags & MNT_FORCE) {
1542		error = newnfs_nmcancelreqs(nmp);
1543		if (error)
1544			goto out;
1545		/* For a forced close, get rid of the renew thread now */
1546		nfscl_umount(nmp, td);
1547	}
1548	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1549	do {
1550		error = vflush(mp, 1, flags, td);
1551		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1552			(void) nfs_catnap(PSOCK, error, "newndm");
1553	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1554	if (error)
1555		goto out;
1556
1557	/*
1558	 * We are now committed to the unmount.
1559	 */
1560	if ((mntflags & MNT_FORCE) == 0)
1561		nfscl_umount(nmp, td);
1562	/* Make sure no nfsiods are assigned to this mount. */
1563	mtx_lock(&ncl_iod_mutex);
1564	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1565		if (ncl_iodmount[i] == nmp) {
1566			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1567			ncl_iodmount[i] = NULL;
1568		}
1569	mtx_unlock(&ncl_iod_mutex);
1570	newnfs_disconnect(&nmp->nm_sockreq);
1571	crfree(nmp->nm_sockreq.nr_cred);
1572	FREE(nmp->nm_nam, M_SONAME);
1573	if (nmp->nm_sockreq.nr_auth != NULL)
1574		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1575	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1576	mtx_destroy(&nmp->nm_mtx);
1577	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1578		nfscl_freenfsclds(dsp);
1579	FREE(nmp, M_NEWNFSMNT);
1580out:
1581	return (error);
1582}
1583
1584/*
1585 * Return root of a filesystem
1586 */
1587static int
1588nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1589{
1590	struct vnode *vp;
1591	struct nfsmount *nmp;
1592	struct nfsnode *np;
1593	int error;
1594
1595	nmp = VFSTONFS(mp);
1596	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1597	if (error)
1598		return error;
1599	vp = NFSTOV(np);
1600	/*
1601	 * Get transfer parameters and attributes for root vnode once.
1602	 */
1603	mtx_lock(&nmp->nm_mtx);
1604	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1605		mtx_unlock(&nmp->nm_mtx);
1606		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1607	} else
1608		mtx_unlock(&nmp->nm_mtx);
1609	if (vp->v_type == VNON)
1610	    vp->v_type = VDIR;
1611	vp->v_vflag |= VV_ROOT;
1612	*vpp = vp;
1613	return (0);
1614}
1615
1616/*
1617 * Flush out the buffer cache
1618 */
1619/* ARGSUSED */
1620static int
1621nfs_sync(struct mount *mp, int waitfor)
1622{
1623	struct vnode *vp, *mvp;
1624	struct thread *td;
1625	int error, allerror = 0;
1626
1627	td = curthread;
1628
1629	MNT_ILOCK(mp);
1630	/*
1631	 * If a forced dismount is in progress, return from here so that
1632	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1633	 * calling VFS_UNMOUNT().
1634	 */
1635	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1636		MNT_IUNLOCK(mp);
1637		return (EBADF);
1638	}
1639	MNT_IUNLOCK(mp);
1640
1641	/*
1642	 * Force stale buffer cache information to be flushed.
1643	 */
1644loop:
1645	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1646		/* XXX Racy bv_cnt check. */
1647		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1648		    waitfor == MNT_LAZY) {
1649			VI_UNLOCK(vp);
1650			continue;
1651		}
1652		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1653			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1654			goto loop;
1655		}
1656		error = VOP_FSYNC(vp, waitfor, td);
1657		if (error)
1658			allerror = error;
1659		NFSVOPUNLOCK(vp, 0);
1660		vrele(vp);
1661	}
1662	return (allerror);
1663}
1664
1665static int
1666nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1667{
1668	struct nfsmount *nmp = VFSTONFS(mp);
1669	struct vfsquery vq;
1670	int error;
1671
1672	bzero(&vq, sizeof(vq));
1673	switch (op) {
1674#if 0
1675	case VFS_CTL_NOLOCKS:
1676		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1677 		if (req->oldptr != NULL) {
1678 			error = SYSCTL_OUT(req, &val, sizeof(val));
1679 			if (error)
1680 				return (error);
1681 		}
1682 		if (req->newptr != NULL) {
1683 			error = SYSCTL_IN(req, &val, sizeof(val));
1684 			if (error)
1685 				return (error);
1686			if (val)
1687				nmp->nm_flag |= NFSMNT_NOLOCKS;
1688			else
1689				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1690 		}
1691		break;
1692#endif
1693	case VFS_CTL_QUERY:
1694		mtx_lock(&nmp->nm_mtx);
1695		if (nmp->nm_state & NFSSTA_TIMEO)
1696			vq.vq_flags |= VQ_NOTRESP;
1697		mtx_unlock(&nmp->nm_mtx);
1698#if 0
1699		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1700		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1701			vq.vq_flags |= VQ_NOTRESPLOCK;
1702#endif
1703		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1704		break;
1705 	case VFS_CTL_TIMEO:
1706 		if (req->oldptr != NULL) {
1707 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1708 			    sizeof(nmp->nm_tprintf_initial_delay));
1709 			if (error)
1710 				return (error);
1711 		}
1712 		if (req->newptr != NULL) {
1713			error = vfs_suser(mp, req->td);
1714			if (error)
1715				return (error);
1716 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1717 			    sizeof(nmp->nm_tprintf_initial_delay));
1718 			if (error)
1719 				return (error);
1720 			if (nmp->nm_tprintf_initial_delay < 0)
1721 				nmp->nm_tprintf_initial_delay = 0;
1722 		}
1723		break;
1724	default:
1725		return (ENOTSUP);
1726	}
1727	return (0);
1728}
1729
1730/*
1731 * Purge any RPCs in progress, so that they will all return errors.
1732 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1733 * forced dismount.
1734 */
1735static void
1736nfs_purge(struct mount *mp)
1737{
1738	struct nfsmount *nmp = VFSTONFS(mp);
1739
1740	newnfs_nmcancelreqs(nmp);
1741}
1742
1743/*
1744 * Extract the information needed by the nlm from the nfs vnode.
1745 */
1746static void
1747nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1748    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1749    struct timeval *timeop)
1750{
1751	struct nfsmount *nmp;
1752	struct nfsnode *np = VTONFS(vp);
1753
1754	nmp = VFSTONFS(vp->v_mount);
1755	if (fhlenp != NULL)
1756		*fhlenp = (size_t)np->n_fhp->nfh_len;
1757	if (fhp != NULL)
1758		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1759	if (sp != NULL)
1760		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1761	if (is_v3p != NULL)
1762		*is_v3p = NFS_ISV3(vp);
1763	if (sizep != NULL)
1764		*sizep = np->n_size;
1765	if (timeop != NULL) {
1766		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1767		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1768	}
1769}
1770
1771/*
1772 * This function prints out an option name, based on the conditional
1773 * argument.
1774 */
1775static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1776    char *opt, char **buf, size_t *blen)
1777{
1778	int len;
1779
1780	if (testval != 0 && *blen > strlen(opt)) {
1781		len = snprintf(*buf, *blen, "%s", opt);
1782		if (len != strlen(opt))
1783			printf("EEK!!\n");
1784		*buf += len;
1785		*blen -= len;
1786	}
1787}
1788
1789/*
1790 * This function printf out an options integer value.
1791 */
1792static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1793    char *opt, char **buf, size_t *blen)
1794{
1795	int len;
1796
1797	if (*blen > strlen(opt) + 1) {
1798		/* Could result in truncated output string. */
1799		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1800		if (len < *blen) {
1801			*buf += len;
1802			*blen -= len;
1803		}
1804	}
1805}
1806
1807/*
1808 * Load the option flags and values into the buffer.
1809 */
1810void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1811{
1812	char *buf;
1813	size_t blen;
1814
1815	buf = buffer;
1816	blen = buflen;
1817	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1818	    &blen);
1819	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1820		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1821		    &blen);
1822		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1823		    &buf, &blen);
1824	}
1825	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1826	    &blen);
1827	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1828	    "nfsv2", &buf, &blen);
1829	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1830	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1831	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1832	    &buf, &blen);
1833	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1834	    &buf, &blen);
1835	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1836	    &blen);
1837	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1838	    &blen);
1839	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1840	    &blen);
1841	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1842	    &blen);
1843	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1844	    &blen);
1845	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1846	    ",noncontigwr", &buf, &blen);
1847	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1848	    0, ",lockd", &buf, &blen);
1849	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1850	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1851	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1852	    &buf, &blen);
1853	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1854	    &buf, &blen);
1855	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1856	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1857	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1858	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1859	    &buf, &blen);
1860	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1861	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1862	    &buf, &blen);
1863	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1864	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1865	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1866	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1867	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1868	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1869	    &blen);
1870	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1871	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1872	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1873	    &blen);
1874	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1875	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1876	    &blen);
1877	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1878	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1879}
1880
1881