nfs_clvfsops.c revision 286141
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 286141 2015-07-31 21:31:58Z rmacklem $");
37
38
39#include "opt_bootp.h"
40#include "opt_nfsroot.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/clock.h>
48#include <sys/jail.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/mount.h>
55#include <sys/proc.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/vnode.h>
61#include <sys/signalvar.h>
62
63#include <vm/vm.h>
64#include <vm/vm_extern.h>
65#include <vm/uma.h>
66
67#include <net/if.h>
68#include <net/route.h>
69#include <netinet/in.h>
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <nfs/nfsdiskless.h>
76
77FEATURE(nfscl, "NFSv4 client");
78
79extern int nfscl_ticks;
80extern struct timeval nfsboottime;
81extern struct nfsstats	newnfsstats;
82extern int nfsrv_useacl;
83extern int nfscl_debuglevel;
84extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
85extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
86extern struct mtx ncl_iod_mutex;
87NFSCLSTATEMUTEX;
88
89MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
90MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
91
92SYSCTL_DECL(_vfs_nfs);
93static int nfs_ip_paranoia = 1;
94SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
95    &nfs_ip_paranoia, 0, "");
96static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
97SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
98        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
99/* how long between console messages "nfs server foo not responding" */
100static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
101SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
102        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
103
104static int	nfs_mountroot(struct mount *);
105static void	nfs_sec_name(char *, int *);
106static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
107		    struct nfs_args *argp, const char *, struct ucred *,
108		    struct thread *);
109static int	mountnfs(struct nfs_args *, struct mount *,
110		    struct sockaddr *, char *, u_char *, int, u_char *, int,
111		    u_char *, int, struct vnode **, struct ucred *,
112		    struct thread *, int, int, int);
113static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
114		    struct sockaddr_storage *, int *, off_t *,
115		    struct timeval *);
116static vfs_mount_t nfs_mount;
117static vfs_cmount_t nfs_cmount;
118static vfs_unmount_t nfs_unmount;
119static vfs_root_t nfs_root;
120static vfs_statfs_t nfs_statfs;
121static vfs_sync_t nfs_sync;
122static vfs_sysctl_t nfs_sysctl;
123static vfs_purge_t nfs_purge;
124
125/*
126 * nfs vfs operations.
127 */
128static struct vfsops nfs_vfsops = {
129	.vfs_init =		ncl_init,
130	.vfs_mount =		nfs_mount,
131	.vfs_cmount =		nfs_cmount,
132	.vfs_root =		nfs_root,
133	.vfs_statfs =		nfs_statfs,
134	.vfs_sync =		nfs_sync,
135	.vfs_uninit =		ncl_uninit,
136	.vfs_unmount =		nfs_unmount,
137	.vfs_sysctl =		nfs_sysctl,
138	.vfs_purge =		nfs_purge,
139};
140VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
141
142/* So that loader and kldload(2) can find us, wherever we are.. */
143MODULE_VERSION(nfs, 1);
144MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
145MODULE_DEPEND(nfs, krpc, 1, 1, 1);
146MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
147MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
148
149/*
150 * This structure is now defined in sys/nfs/nfs_diskless.c so that it
151 * can be shared by both NFS clients. It is declared here so that it
152 * will be defined for kernels built without NFS_ROOT, although it
153 * isn't used in that case.
154 */
155#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
156struct nfs_diskless	nfs_diskless = { { { 0 } } };
157struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
158int			nfs_diskless_valid = 0;
159#endif
160
161SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
162    &nfs_diskless_valid, 0,
163    "Has the diskless struct been filled correctly");
164
165SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
166    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
167
168SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
169    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
170    "%Ssockaddr_in", "Diskless root nfs address");
171
172
173void		newnfsargs_ntoh(struct nfs_args *);
174static int	nfs_mountdiskless(char *,
175		    struct sockaddr_in *, struct nfs_args *,
176		    struct thread *, struct vnode **, struct mount *);
177static void	nfs_convert_diskless(void);
178static void	nfs_convert_oargs(struct nfs_args *args,
179		    struct onfs_args *oargs);
180
181int
182newnfs_iosize(struct nfsmount *nmp)
183{
184	int iosize, maxio;
185
186	/* First, set the upper limit for iosize */
187	if (nmp->nm_flag & NFSMNT_NFSV4) {
188		maxio = NFS_MAXBSIZE;
189	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
190		if (nmp->nm_sotype == SOCK_DGRAM)
191			maxio = NFS_MAXDGRAMDATA;
192		else
193			maxio = NFS_MAXBSIZE;
194	} else {
195		maxio = NFS_V2MAXDATA;
196	}
197	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
198		nmp->nm_rsize = maxio;
199	if (nmp->nm_rsize > NFS_MAXBSIZE)
200		nmp->nm_rsize = NFS_MAXBSIZE;
201	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
202		nmp->nm_readdirsize = maxio;
203	if (nmp->nm_readdirsize > nmp->nm_rsize)
204		nmp->nm_readdirsize = nmp->nm_rsize;
205	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
206		nmp->nm_wsize = maxio;
207	if (nmp->nm_wsize > NFS_MAXBSIZE)
208		nmp->nm_wsize = NFS_MAXBSIZE;
209
210	/*
211	 * Calculate the size used for io buffers.  Use the larger
212	 * of the two sizes to minimise nfs requests but make sure
213	 * that it is at least one VM page to avoid wasting buffer
214	 * space.
215	 */
216	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
217	iosize = imax(iosize, PAGE_SIZE);
218	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
219	return (iosize);
220}
221
222static void
223nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
224{
225
226	args->version = NFS_ARGSVERSION;
227	args->addr = oargs->addr;
228	args->addrlen = oargs->addrlen;
229	args->sotype = oargs->sotype;
230	args->proto = oargs->proto;
231	args->fh = oargs->fh;
232	args->fhsize = oargs->fhsize;
233	args->flags = oargs->flags;
234	args->wsize = oargs->wsize;
235	args->rsize = oargs->rsize;
236	args->readdirsize = oargs->readdirsize;
237	args->timeo = oargs->timeo;
238	args->retrans = oargs->retrans;
239	args->readahead = oargs->readahead;
240	args->hostname = oargs->hostname;
241}
242
243static void
244nfs_convert_diskless(void)
245{
246
247	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
248		sizeof(struct ifaliasreq));
249	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
250		sizeof(struct sockaddr_in));
251	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
252	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
253		nfsv3_diskless.root_fhsize = NFSX_MYFH;
254		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
255	} else {
256		nfsv3_diskless.root_fhsize = NFSX_V2FH;
257		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
258	}
259	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
260		sizeof(struct sockaddr_in));
261	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
262	nfsv3_diskless.root_time = nfs_diskless.root_time;
263	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
264		MAXHOSTNAMELEN);
265	nfs_diskless_valid = 3;
266}
267
268/*
269 * nfs statfs call
270 */
271static int
272nfs_statfs(struct mount *mp, struct statfs *sbp)
273{
274	struct vnode *vp;
275	struct thread *td;
276	struct nfsmount *nmp = VFSTONFS(mp);
277	struct nfsvattr nfsva;
278	struct nfsfsinfo fs;
279	struct nfsstatfs sb;
280	int error = 0, attrflag, gotfsinfo = 0, ret;
281	struct nfsnode *np;
282
283	td = curthread;
284
285	error = vfs_busy(mp, MBF_NOWAIT);
286	if (error)
287		return (error);
288	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
289	if (error) {
290		vfs_unbusy(mp);
291		return (error);
292	}
293	vp = NFSTOV(np);
294	mtx_lock(&nmp->nm_mtx);
295	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
296		mtx_unlock(&nmp->nm_mtx);
297		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
298		    &attrflag, NULL);
299		if (!error)
300			gotfsinfo = 1;
301	} else
302		mtx_unlock(&nmp->nm_mtx);
303	if (!error)
304		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
305		    &attrflag, NULL);
306	if (error != 0)
307		NFSCL_DEBUG(2, "statfs=%d\n", error);
308	if (attrflag == 0) {
309		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
310		    td->td_ucred, td, &nfsva, NULL, NULL);
311		if (ret) {
312			/*
313			 * Just set default values to get things going.
314			 */
315			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
316			nfsva.na_vattr.va_type = VDIR;
317			nfsva.na_vattr.va_mode = 0777;
318			nfsva.na_vattr.va_nlink = 100;
319			nfsva.na_vattr.va_uid = (uid_t)0;
320			nfsva.na_vattr.va_gid = (gid_t)0;
321			nfsva.na_vattr.va_fileid = 2;
322			nfsva.na_vattr.va_gen = 1;
323			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
324			nfsva.na_vattr.va_size = 512 * 1024;
325		}
326	}
327	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
328	if (!error) {
329	    mtx_lock(&nmp->nm_mtx);
330	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
331		nfscl_loadfsinfo(nmp, &fs);
332	    nfscl_loadsbinfo(nmp, &sb, sbp);
333	    sbp->f_iosize = newnfs_iosize(nmp);
334	    mtx_unlock(&nmp->nm_mtx);
335	    if (sbp != &mp->mnt_stat) {
336		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
337		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
338	    }
339	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
340	} else if (NFS_ISV4(vp)) {
341		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
342	}
343	vput(vp);
344	vfs_unbusy(mp);
345	return (error);
346}
347
348/*
349 * nfs version 3 fsinfo rpc call
350 */
351int
352ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
353    struct thread *td)
354{
355	struct nfsfsinfo fs;
356	struct nfsvattr nfsva;
357	int error, attrflag;
358
359	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
360	if (!error) {
361		if (attrflag)
362			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
363			    1);
364		mtx_lock(&nmp->nm_mtx);
365		nfscl_loadfsinfo(nmp, &fs);
366		mtx_unlock(&nmp->nm_mtx);
367	}
368	return (error);
369}
370
371/*
372 * Mount a remote root fs via. nfs. This depends on the info in the
373 * nfs_diskless structure that has been filled in properly by some primary
374 * bootstrap.
375 * It goes something like this:
376 * - do enough of "ifconfig" by calling ifioctl() so that the system
377 *   can talk to the server
378 * - If nfs_diskless.mygateway is filled in, use that address as
379 *   a default gateway.
380 * - build the rootfs mount point and call mountnfs() to do the rest.
381 *
382 * It is assumed to be safe to read, modify, and write the nfsv3_diskless
383 * structure, as well as other global NFS client variables here, as
384 * nfs_mountroot() will be called once in the boot before any other NFS
385 * client activity occurs.
386 */
387static int
388nfs_mountroot(struct mount *mp)
389{
390	struct thread *td = curthread;
391	struct nfsv3_diskless *nd = &nfsv3_diskless;
392	struct socket *so;
393	struct vnode *vp;
394	struct ifreq ir;
395	int error;
396	u_long l;
397	char buf[128];
398	char *cp;
399
400#if defined(BOOTP_NFSROOT) && defined(BOOTP)
401	bootpc_init();		/* use bootp to get nfs_diskless filled in */
402#elif defined(NFS_ROOT)
403	nfs_setup_diskless();
404#endif
405
406	if (nfs_diskless_valid == 0)
407		return (-1);
408	if (nfs_diskless_valid == 1)
409		nfs_convert_diskless();
410
411	/*
412	 * XXX splnet, so networks will receive...
413	 */
414	splnet();
415
416	/*
417	 * Do enough of ifconfig(8) so that the critical net interface can
418	 * talk to the server.
419	 */
420	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
421	    td->td_ucred, td);
422	if (error)
423		panic("nfs_mountroot: socreate(%04x): %d",
424			nd->myif.ifra_addr.sa_family, error);
425
426#if 0 /* XXX Bad idea */
427	/*
428	 * We might not have been told the right interface, so we pass
429	 * over the first ten interfaces of the same kind, until we get
430	 * one of them configured.
431	 */
432
433	for (i = strlen(nd->myif.ifra_name) - 1;
434		nd->myif.ifra_name[i] >= '0' &&
435		nd->myif.ifra_name[i] <= '9';
436		nd->myif.ifra_name[i] ++) {
437		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
438		if(!error)
439			break;
440	}
441#endif
442	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
443	if (error)
444		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
445	if ((cp = getenv("boot.netif.mtu")) != NULL) {
446		ir.ifr_mtu = strtol(cp, NULL, 10);
447		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
448		freeenv(cp);
449		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
450		if (error)
451			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
452	}
453	soclose(so);
454
455	/*
456	 * If the gateway field is filled in, set it as the default route.
457	 * Note that pxeboot will set a default route of 0 if the route
458	 * is not set by the DHCP server.  Check also for a value of 0
459	 * to avoid panicking inappropriately in that situation.
460	 */
461	if (nd->mygateway.sin_len != 0 &&
462	    nd->mygateway.sin_addr.s_addr != 0) {
463		struct sockaddr_in mask, sin;
464
465		bzero((caddr_t)&mask, sizeof(mask));
466		sin = mask;
467		sin.sin_family = AF_INET;
468		sin.sin_len = sizeof(sin);
469                /* XXX MRT use table 0 for this sort of thing */
470		CURVNET_SET(TD_TO_VNET(td));
471		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
472		    (struct sockaddr *)&nd->mygateway,
473		    (struct sockaddr *)&mask,
474		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
475		CURVNET_RESTORE();
476		if (error)
477			panic("nfs_mountroot: RTM_ADD: %d", error);
478	}
479
480	/*
481	 * Create the rootfs mount point.
482	 */
483	nd->root_args.fh = nd->root_fh;
484	nd->root_args.fhsize = nd->root_fhsize;
485	l = ntohl(nd->root_saddr.sin_addr.s_addr);
486	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
487		(l >> 24) & 0xff, (l >> 16) & 0xff,
488		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
489	printf("NFS ROOT: %s\n", buf);
490	nd->root_args.hostname = buf;
491	if ((error = nfs_mountdiskless(buf,
492	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
493		return (error);
494	}
495
496	/*
497	 * This is not really an nfs issue, but it is much easier to
498	 * set hostname here and then let the "/etc/rc.xxx" files
499	 * mount the right /var based upon its preset value.
500	 */
501	mtx_lock(&prison0.pr_mtx);
502	strlcpy(prison0.pr_hostname, nd->my_hostnam,
503	    sizeof(prison0.pr_hostname));
504	mtx_unlock(&prison0.pr_mtx);
505	inittodr(ntohl(nd->root_time));
506	return (0);
507}
508
509/*
510 * Internal version of mount system call for diskless setup.
511 */
512static int
513nfs_mountdiskless(char *path,
514    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
515    struct vnode **vpp, struct mount *mp)
516{
517	struct sockaddr *nam;
518	int dirlen, error;
519	char *dirpath;
520
521	/*
522	 * Find the directory path in "path", which also has the server's
523	 * name/ip address in it.
524	 */
525	dirpath = strchr(path, ':');
526	if (dirpath != NULL)
527		dirlen = strlen(++dirpath);
528	else
529		dirlen = 0;
530	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
531	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
532	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
533	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
534		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
535		return (error);
536	}
537	return (0);
538}
539
540static void
541nfs_sec_name(char *sec, int *flagsp)
542{
543	if (!strcmp(sec, "krb5"))
544		*flagsp |= NFSMNT_KERB;
545	else if (!strcmp(sec, "krb5i"))
546		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
547	else if (!strcmp(sec, "krb5p"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
549}
550
551static void
552nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
553    const char *hostname, struct ucred *cred, struct thread *td)
554{
555	int s;
556	int adjsock;
557	char *p;
558
559	s = splnet();
560
561	/*
562	 * Set read-only flag if requested; otherwise, clear it if this is
563	 * an update.  If this is not an update, then either the read-only
564	 * flag is already clear, or this is a root mount and it was set
565	 * intentionally at some previous point.
566	 */
567	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568		MNT_ILOCK(mp);
569		mp->mnt_flag |= MNT_RDONLY;
570		MNT_IUNLOCK(mp);
571	} else if (mp->mnt_flag & MNT_UPDATE) {
572		MNT_ILOCK(mp);
573		mp->mnt_flag &= ~MNT_RDONLY;
574		MNT_IUNLOCK(mp);
575	}
576
577	/*
578	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579	 * no sense in that context.  Also, set up appropriate retransmit
580	 * and soft timeout behavior.
581	 */
582	if (argp->sotype == SOCK_STREAM) {
583		nmp->nm_flag &= ~NFSMNT_NOCONN;
584		nmp->nm_timeo = NFS_MAXTIMEO;
585		if ((argp->flags & NFSMNT_NFSV4) != 0)
586			nmp->nm_retry = INT_MAX;
587		else
588			nmp->nm_retry = NFS_RETRANS_TCP;
589	}
590
591	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593		argp->flags &= ~NFSMNT_RDIRPLUS;
594		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595	}
596
597	/* Re-bind if rsrvd port requested and wasn't on one */
598	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
599		  && (argp->flags & NFSMNT_RESVPORT);
600	/* Also re-bind if we're switching to/from a connected UDP socket */
601	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
602		    (argp->flags & NFSMNT_NOCONN));
603
604	/* Update flags atomically.  Don't change the lock bits. */
605	nmp->nm_flag = argp->flags | nmp->nm_flag;
606	splx(s);
607
608	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
609		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
610		if (nmp->nm_timeo < NFS_MINTIMEO)
611			nmp->nm_timeo = NFS_MINTIMEO;
612		else if (nmp->nm_timeo > NFS_MAXTIMEO)
613			nmp->nm_timeo = NFS_MAXTIMEO;
614	}
615
616	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
617		nmp->nm_retry = argp->retrans;
618		if (nmp->nm_retry > NFS_MAXREXMIT)
619			nmp->nm_retry = NFS_MAXREXMIT;
620	}
621
622	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
623		nmp->nm_wsize = argp->wsize;
624		/*
625		 * Clip at the power of 2 below the size. There is an
626		 * issue (not isolated) that causes intermittent page
627		 * faults if this is not done.
628		 */
629		if (nmp->nm_wsize > NFS_FABLKSIZE)
630			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
631		else
632			nmp->nm_wsize = NFS_FABLKSIZE;
633	}
634
635	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
636		nmp->nm_rsize = argp->rsize;
637		/*
638		 * Clip at the power of 2 below the size. There is an
639		 * issue (not isolated) that causes intermittent page
640		 * faults if this is not done.
641		 */
642		if (nmp->nm_rsize > NFS_FABLKSIZE)
643			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
644		else
645			nmp->nm_rsize = NFS_FABLKSIZE;
646	}
647
648	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
649		nmp->nm_readdirsize = argp->readdirsize;
650	}
651
652	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
653		nmp->nm_acregmin = argp->acregmin;
654	else
655		nmp->nm_acregmin = NFS_MINATTRTIMO;
656	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
657		nmp->nm_acregmax = argp->acregmax;
658	else
659		nmp->nm_acregmax = NFS_MAXATTRTIMO;
660	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
661		nmp->nm_acdirmin = argp->acdirmin;
662	else
663		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
664	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
665		nmp->nm_acdirmax = argp->acdirmax;
666	else
667		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
668	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
669		nmp->nm_acdirmin = nmp->nm_acdirmax;
670	if (nmp->nm_acregmin > nmp->nm_acregmax)
671		nmp->nm_acregmin = nmp->nm_acregmax;
672
673	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
674		if (argp->readahead <= NFS_MAXRAHEAD)
675			nmp->nm_readahead = argp->readahead;
676		else
677			nmp->nm_readahead = NFS_MAXRAHEAD;
678	}
679	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
680		if (argp->wcommitsize < nmp->nm_wsize)
681			nmp->nm_wcommitsize = nmp->nm_wsize;
682		else
683			nmp->nm_wcommitsize = argp->wcommitsize;
684	}
685
686	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
687		    (nmp->nm_soproto != argp->proto));
688
689	if (nmp->nm_client != NULL && adjsock) {
690		int haslock = 0, error = 0;
691
692		if (nmp->nm_sotype == SOCK_STREAM) {
693			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
694			if (!error)
695				haslock = 1;
696		}
697		if (!error) {
698		    newnfs_disconnect(&nmp->nm_sockreq);
699		    if (haslock)
700			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
701		    nmp->nm_sotype = argp->sotype;
702		    nmp->nm_soproto = argp->proto;
703		    if (nmp->nm_sotype == SOCK_DGRAM)
704			while (newnfs_connect(nmp, &nmp->nm_sockreq,
705			    cred, td, 0)) {
706				printf("newnfs_args: retrying connect\n");
707				(void) nfs_catnap(PSOCK, 0, "newnfscon");
708			}
709		}
710	} else {
711		nmp->nm_sotype = argp->sotype;
712		nmp->nm_soproto = argp->proto;
713	}
714
715	if (hostname != NULL) {
716		strlcpy(nmp->nm_hostname, hostname,
717		    sizeof(nmp->nm_hostname));
718		p = strchr(nmp->nm_hostname, ':');
719		if (p != NULL)
720			*p = '\0';
721	}
722}
723
724static const char *nfs_opts[] = { "from", "nfs_args",
725    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
726    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
727    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
728    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
729    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
730    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
731    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
732    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
733    "pnfs", "wcommitsize",
734    NULL };
735
736/*
737 * VFS Operations.
738 *
739 * mount system call
740 * It seems a bit dumb to copyinstr() the host and path here and then
741 * bcopy() them in mountnfs(), but I wanted to detect errors before
742 * doing the sockargs() call because sockargs() allocates an mbuf and
743 * an error after that means that I have to release the mbuf.
744 */
745/* ARGSUSED */
746static int
747nfs_mount(struct mount *mp)
748{
749	struct nfs_args args = {
750	    .version = NFS_ARGSVERSION,
751	    .addr = NULL,
752	    .addrlen = sizeof (struct sockaddr_in),
753	    .sotype = SOCK_STREAM,
754	    .proto = 0,
755	    .fh = NULL,
756	    .fhsize = 0,
757	    .flags = NFSMNT_RESVPORT,
758	    .wsize = NFS_WSIZE,
759	    .rsize = NFS_RSIZE,
760	    .readdirsize = NFS_READDIRSIZE,
761	    .timeo = 10,
762	    .retrans = NFS_RETRANS,
763	    .readahead = NFS_DEFRAHEAD,
764	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
765	    .hostname = NULL,
766	    .acregmin = NFS_MINATTRTIMO,
767	    .acregmax = NFS_MAXATTRTIMO,
768	    .acdirmin = NFS_MINDIRATTRTIMO,
769	    .acdirmax = NFS_MAXDIRATTRTIMO,
770	};
771	int error = 0, ret, len;
772	struct sockaddr *nam = NULL;
773	struct vnode *vp;
774	struct thread *td;
775	char hst[MNAMELEN];
776	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
777	char *cp, *opt, *name, *secname;
778	int nametimeo = NFS_DEFAULT_NAMETIMEO;
779	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
780	int minvers = 0;
781	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
782	size_t hstlen;
783
784	has_nfs_args_opt = 0;
785	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
786		error = EINVAL;
787		goto out;
788	}
789
790	td = curthread;
791	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
792		error = nfs_mountroot(mp);
793		goto out;
794	}
795
796	nfscl_init();
797
798	/*
799	 * The old mount_nfs program passed the struct nfs_args
800	 * from userspace to kernel.  The new mount_nfs program
801	 * passes string options via nmount() from userspace to kernel
802	 * and we populate the struct nfs_args in the kernel.
803	 */
804	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
805		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
806		    sizeof(args));
807		if (error != 0)
808			goto out;
809
810		if (args.version != NFS_ARGSVERSION) {
811			error = EPROGMISMATCH;
812			goto out;
813		}
814		has_nfs_args_opt = 1;
815	}
816
817	/* Handle the new style options. */
818	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
819		args.acdirmin = args.acdirmax =
820		    args.acregmin = args.acregmax = 0;
821		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
822		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
823	}
824	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
825		args.flags |= NFSMNT_NOCONN;
826	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
827		args.flags &= ~NFSMNT_NOCONN;
828	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
829		args.flags |= NFSMNT_NOLOCKD;
830	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
831		args.flags &= ~NFSMNT_NOLOCKD;
832	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
833		args.flags |= NFSMNT_INT;
834	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
835		args.flags |= NFSMNT_RDIRPLUS;
836	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
837		args.flags |= NFSMNT_RESVPORT;
838	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
839		args.flags &= ~NFSMNT_RESVPORT;
840	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
841		args.flags |= NFSMNT_SOFT;
842	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
843		args.flags &= ~NFSMNT_SOFT;
844	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
845		args.sotype = SOCK_DGRAM;
846	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
847		args.sotype = SOCK_DGRAM;
848	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
849		args.sotype = SOCK_STREAM;
850	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
851		args.flags |= NFSMNT_NFSV3;
852	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
853		args.flags |= NFSMNT_NFSV4;
854		args.sotype = SOCK_STREAM;
855	}
856	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
857		args.flags |= NFSMNT_ALLGSSNAME;
858	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
859		args.flags |= NFSMNT_NOCTO;
860	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
861		args.flags |= NFSMNT_NONCONTIGWR;
862	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
863		args.flags |= NFSMNT_PNFS;
864	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
865		if (opt == NULL) {
866			vfs_mount_error(mp, "illegal readdirsize");
867			error = EINVAL;
868			goto out;
869		}
870		ret = sscanf(opt, "%d", &args.readdirsize);
871		if (ret != 1 || args.readdirsize <= 0) {
872			vfs_mount_error(mp, "illegal readdirsize: %s",
873			    opt);
874			error = EINVAL;
875			goto out;
876		}
877		args.flags |= NFSMNT_READDIRSIZE;
878	}
879	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
880		if (opt == NULL) {
881			vfs_mount_error(mp, "illegal readahead");
882			error = EINVAL;
883			goto out;
884		}
885		ret = sscanf(opt, "%d", &args.readahead);
886		if (ret != 1 || args.readahead <= 0) {
887			vfs_mount_error(mp, "illegal readahead: %s",
888			    opt);
889			error = EINVAL;
890			goto out;
891		}
892		args.flags |= NFSMNT_READAHEAD;
893	}
894	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
895		if (opt == NULL) {
896			vfs_mount_error(mp, "illegal wsize");
897			error = EINVAL;
898			goto out;
899		}
900		ret = sscanf(opt, "%d", &args.wsize);
901		if (ret != 1 || args.wsize <= 0) {
902			vfs_mount_error(mp, "illegal wsize: %s",
903			    opt);
904			error = EINVAL;
905			goto out;
906		}
907		args.flags |= NFSMNT_WSIZE;
908	}
909	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
910		if (opt == NULL) {
911			vfs_mount_error(mp, "illegal rsize");
912			error = EINVAL;
913			goto out;
914		}
915		ret = sscanf(opt, "%d", &args.rsize);
916		if (ret != 1 || args.rsize <= 0) {
917			vfs_mount_error(mp, "illegal wsize: %s",
918			    opt);
919			error = EINVAL;
920			goto out;
921		}
922		args.flags |= NFSMNT_RSIZE;
923	}
924	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
925		if (opt == NULL) {
926			vfs_mount_error(mp, "illegal retrans");
927			error = EINVAL;
928			goto out;
929		}
930		ret = sscanf(opt, "%d", &args.retrans);
931		if (ret != 1 || args.retrans <= 0) {
932			vfs_mount_error(mp, "illegal retrans: %s",
933			    opt);
934			error = EINVAL;
935			goto out;
936		}
937		args.flags |= NFSMNT_RETRANS;
938	}
939	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
940		ret = sscanf(opt, "%d", &args.acregmin);
941		if (ret != 1 || args.acregmin < 0) {
942			vfs_mount_error(mp, "illegal actimeo: %s",
943			    opt);
944			error = EINVAL;
945			goto out;
946		}
947		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
948		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
949		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
950	}
951	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
952		ret = sscanf(opt, "%d", &args.acregmin);
953		if (ret != 1 || args.acregmin < 0) {
954			vfs_mount_error(mp, "illegal acregmin: %s",
955			    opt);
956			error = EINVAL;
957			goto out;
958		}
959		args.flags |= NFSMNT_ACREGMIN;
960	}
961	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
962		ret = sscanf(opt, "%d", &args.acregmax);
963		if (ret != 1 || args.acregmax < 0) {
964			vfs_mount_error(mp, "illegal acregmax: %s",
965			    opt);
966			error = EINVAL;
967			goto out;
968		}
969		args.flags |= NFSMNT_ACREGMAX;
970	}
971	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
972		ret = sscanf(opt, "%d", &args.acdirmin);
973		if (ret != 1 || args.acdirmin < 0) {
974			vfs_mount_error(mp, "illegal acdirmin: %s",
975			    opt);
976			error = EINVAL;
977			goto out;
978		}
979		args.flags |= NFSMNT_ACDIRMIN;
980	}
981	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
982		ret = sscanf(opt, "%d", &args.acdirmax);
983		if (ret != 1 || args.acdirmax < 0) {
984			vfs_mount_error(mp, "illegal acdirmax: %s",
985			    opt);
986			error = EINVAL;
987			goto out;
988		}
989		args.flags |= NFSMNT_ACDIRMAX;
990	}
991	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
992		ret = sscanf(opt, "%d", &args.wcommitsize);
993		if (ret != 1 || args.wcommitsize < 0) {
994			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
995			error = EINVAL;
996			goto out;
997		}
998		args.flags |= NFSMNT_WCOMMITSIZE;
999	}
1000	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1001		ret = sscanf(opt, "%d", &args.timeo);
1002		if (ret != 1 || args.timeo <= 0) {
1003			vfs_mount_error(mp, "illegal timeo: %s",
1004			    opt);
1005			error = EINVAL;
1006			goto out;
1007		}
1008		args.flags |= NFSMNT_TIMEO;
1009	}
1010	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1011		ret = sscanf(opt, "%d", &args.timeo);
1012		if (ret != 1 || args.timeo <= 0) {
1013			vfs_mount_error(mp, "illegal timeout: %s",
1014			    opt);
1015			error = EINVAL;
1016			goto out;
1017		}
1018		args.flags |= NFSMNT_TIMEO;
1019	}
1020	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1021		ret = sscanf(opt, "%d", &nametimeo);
1022		if (ret != 1 || nametimeo < 0) {
1023			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1024			error = EINVAL;
1025			goto out;
1026		}
1027	}
1028	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1029	    == 0) {
1030		ret = sscanf(opt, "%d", &negnametimeo);
1031		if (ret != 1 || negnametimeo < 0) {
1032			vfs_mount_error(mp, "illegal negnametimeo: %s",
1033			    opt);
1034			error = EINVAL;
1035			goto out;
1036		}
1037	}
1038	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1039	    0) {
1040		ret = sscanf(opt, "%d", &minvers);
1041		if (ret != 1 || minvers < 0 || minvers > 1 ||
1042		    (args.flags & NFSMNT_NFSV4) == 0) {
1043			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1044			error = EINVAL;
1045			goto out;
1046		}
1047	}
1048	if (vfs_getopt(mp->mnt_optnew, "sec",
1049		(void **) &secname, NULL) == 0)
1050		nfs_sec_name(secname, &args.flags);
1051
1052	if (mp->mnt_flag & MNT_UPDATE) {
1053		struct nfsmount *nmp = VFSTONFS(mp);
1054
1055		if (nmp == NULL) {
1056			error = EIO;
1057			goto out;
1058		}
1059
1060		/*
1061		 * If a change from TCP->UDP is done and there are thread(s)
1062		 * that have I/O RPC(s) in progress with a tranfer size
1063		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1064		 * hung, retrying the RPC(s) forever. Usually these threads
1065		 * will be seen doing an uninterruptible sleep on wait channel
1066		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1067		 */
1068		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1069			tprintf(td->td_proc, LOG_WARNING,
1070	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1071
1072		/*
1073		 * When doing an update, we can't change version,
1074		 * security, switch lockd strategies or change cookie
1075		 * translation
1076		 */
1077		args.flags = (args.flags &
1078		    ~(NFSMNT_NFSV3 |
1079		      NFSMNT_NFSV4 |
1080		      NFSMNT_KERB |
1081		      NFSMNT_INTEGRITY |
1082		      NFSMNT_PRIVACY |
1083		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1084		    (nmp->nm_flag &
1085			(NFSMNT_NFSV3 |
1086			 NFSMNT_NFSV4 |
1087			 NFSMNT_KERB |
1088			 NFSMNT_INTEGRITY |
1089			 NFSMNT_PRIVACY |
1090			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1091		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1092		goto out;
1093	}
1094
1095	/*
1096	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1097	 * or no-connection mode for those protocols that support
1098	 * no-connection mode (the flag will be cleared later for protocols
1099	 * that do not support no-connection mode).  This will allow a client
1100	 * to receive replies from a different IP then the request was
1101	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1102	 * not 0.
1103	 */
1104	if (nfs_ip_paranoia == 0)
1105		args.flags |= NFSMNT_NOCONN;
1106
1107	if (has_nfs_args_opt != 0) {
1108		/*
1109		 * In the 'nfs_args' case, the pointers in the args
1110		 * structure are in userland - we copy them in here.
1111		 */
1112		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1113			vfs_mount_error(mp, "Bad file handle");
1114			error = EINVAL;
1115			goto out;
1116		}
1117		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1118		    args.fhsize);
1119		if (error != 0)
1120			goto out;
1121		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1122		if (error != 0)
1123			goto out;
1124		bzero(&hst[hstlen], MNAMELEN - hstlen);
1125		args.hostname = hst;
1126		/* sockargs() call must be after above copyin() calls */
1127		error = getsockaddr(&nam, (caddr_t)args.addr,
1128		    args.addrlen);
1129		if (error != 0)
1130			goto out;
1131	} else {
1132		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1133		    &args.fhsize) == 0) {
1134			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1135				vfs_mount_error(mp, "Bad file handle");
1136				error = EINVAL;
1137				goto out;
1138			}
1139			bcopy(args.fh, nfh, args.fhsize);
1140		} else {
1141			args.fhsize = 0;
1142		}
1143		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1144		    (void **)&args.hostname, &len);
1145		if (args.hostname == NULL) {
1146			vfs_mount_error(mp, "Invalid hostname");
1147			error = EINVAL;
1148			goto out;
1149		}
1150		bcopy(args.hostname, hst, MNAMELEN);
1151		hst[MNAMELEN - 1] = '\0';
1152	}
1153
1154	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1155		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1156	else {
1157		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1158		cp = strchr(srvkrbname, ':');
1159		if (cp != NULL)
1160			*cp = '\0';
1161	}
1162	srvkrbnamelen = strlen(srvkrbname);
1163
1164	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1165		strlcpy(krbname, name, sizeof (krbname));
1166	else
1167		krbname[0] = '\0';
1168	krbnamelen = strlen(krbname);
1169
1170	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1171		strlcpy(dirpath, name, sizeof (dirpath));
1172	else
1173		dirpath[0] = '\0';
1174	dirlen = strlen(dirpath);
1175
1176	if (has_nfs_args_opt == 0) {
1177		if (vfs_getopt(mp->mnt_optnew, "addr",
1178		    (void **)&args.addr, &args.addrlen) == 0) {
1179			if (args.addrlen > SOCK_MAXADDRLEN) {
1180				error = ENAMETOOLONG;
1181				goto out;
1182			}
1183			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1184			bcopy(args.addr, nam, args.addrlen);
1185			nam->sa_len = args.addrlen;
1186		} else {
1187			vfs_mount_error(mp, "No server address");
1188			error = EINVAL;
1189			goto out;
1190		}
1191	}
1192
1193	args.fh = nfh;
1194	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1195	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1196	    nametimeo, negnametimeo, minvers);
1197out:
1198	if (!error) {
1199		MNT_ILOCK(mp);
1200		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1201		    MNTK_USES_BCACHE;
1202		MNT_IUNLOCK(mp);
1203	}
1204	return (error);
1205}
1206
1207
1208/*
1209 * VFS Operations.
1210 *
1211 * mount system call
1212 * It seems a bit dumb to copyinstr() the host and path here and then
1213 * bcopy() them in mountnfs(), but I wanted to detect errors before
1214 * doing the sockargs() call because sockargs() allocates an mbuf and
1215 * an error after that means that I have to release the mbuf.
1216 */
1217/* ARGSUSED */
1218static int
1219nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1220{
1221	int error;
1222	struct nfs_args args;
1223
1224	error = copyin(data, &args, sizeof (struct nfs_args));
1225	if (error)
1226		return error;
1227
1228	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1229
1230	error = kernel_mount(ma, flags);
1231	return (error);
1232}
1233
1234/*
1235 * Common code for mount and mountroot
1236 */
1237static int
1238mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1239    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1240    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1241    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1242    int minvers)
1243{
1244	struct nfsmount *nmp;
1245	struct nfsnode *np;
1246	int error, trycnt, ret;
1247	struct nfsvattr nfsva;
1248	struct nfsclclient *clp;
1249	struct nfsclds *dsp, *tdsp;
1250	uint32_t lease;
1251	static u_int64_t clval = 0;
1252
1253	NFSCL_DEBUG(3, "in mnt\n");
1254	clp = NULL;
1255	if (mp->mnt_flag & MNT_UPDATE) {
1256		nmp = VFSTONFS(mp);
1257		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1258		FREE(nam, M_SONAME);
1259		return (0);
1260	} else {
1261		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1262		    krbnamelen + dirlen + srvkrbnamelen + 2,
1263		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1264		TAILQ_INIT(&nmp->nm_bufq);
1265		if (clval == 0)
1266			clval = (u_int64_t)nfsboottime.tv_sec;
1267		nmp->nm_clval = clval++;
1268		nmp->nm_krbnamelen = krbnamelen;
1269		nmp->nm_dirpathlen = dirlen;
1270		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1271		if (td->td_ucred->cr_uid != (uid_t)0) {
1272			/*
1273			 * nm_uid is used to get KerberosV credentials for
1274			 * the nfsv4 state handling operations if there is
1275			 * no host based principal set. Use the uid of
1276			 * this user if not root, since they are doing the
1277			 * mount. I don't think setting this for root will
1278			 * work, since root normally does not have user
1279			 * credentials in a credentials cache.
1280			 */
1281			nmp->nm_uid = td->td_ucred->cr_uid;
1282		} else {
1283			/*
1284			 * Just set to -1, so it won't be used.
1285			 */
1286			nmp->nm_uid = (uid_t)-1;
1287		}
1288
1289		/* Copy and null terminate all the names */
1290		if (nmp->nm_krbnamelen > 0) {
1291			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1292			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1293		}
1294		if (nmp->nm_dirpathlen > 0) {
1295			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1296			    nmp->nm_dirpathlen);
1297			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1298			    + 1] = '\0';
1299		}
1300		if (nmp->nm_srvkrbnamelen > 0) {
1301			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1302			    nmp->nm_srvkrbnamelen);
1303			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1304			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1305		}
1306		nmp->nm_sockreq.nr_cred = crhold(cred);
1307		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1308		mp->mnt_data = nmp;
1309		nmp->nm_getinfo = nfs_getnlminfo;
1310		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1311	}
1312	vfs_getnewfsid(mp);
1313	nmp->nm_mountp = mp;
1314	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1315
1316	/*
1317	 * Since nfs_decode_args() might optionally set them, these
1318	 * need to be set to defaults before the call, so that the
1319	 * optional settings aren't overwritten.
1320	 */
1321	nmp->nm_nametimeo = nametimeo;
1322	nmp->nm_negnametimeo = negnametimeo;
1323	nmp->nm_timeo = NFS_TIMEO;
1324	nmp->nm_retry = NFS_RETRANS;
1325	nmp->nm_readahead = NFS_DEFRAHEAD;
1326
1327	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1328	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1329	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1330		nmp->nm_wcommitsize *= 2;
1331	nmp->nm_wcommitsize *= 256;
1332
1333	if ((argp->flags & NFSMNT_NFSV4) != 0)
1334		nmp->nm_minorvers = minvers;
1335	else
1336		nmp->nm_minorvers = 0;
1337
1338	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1339
1340	/*
1341	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1342	 * high, depending on whether we end up with negative offsets in
1343	 * the client or server somewhere.  2GB-1 may be safer.
1344	 *
1345	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1346	 * that we can handle until we find out otherwise.
1347	 */
1348	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1349		nmp->nm_maxfilesize = 0xffffffffLL;
1350	else
1351		nmp->nm_maxfilesize = OFF_MAX;
1352
1353	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1354		nmp->nm_wsize = NFS_WSIZE;
1355		nmp->nm_rsize = NFS_RSIZE;
1356		nmp->nm_readdirsize = NFS_READDIRSIZE;
1357	}
1358	nmp->nm_numgrps = NFS_MAXGRPS;
1359	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1360	if (nmp->nm_tprintf_delay < 0)
1361		nmp->nm_tprintf_delay = 0;
1362	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1363	if (nmp->nm_tprintf_initial_delay < 0)
1364		nmp->nm_tprintf_initial_delay = 0;
1365	nmp->nm_fhsize = argp->fhsize;
1366	if (nmp->nm_fhsize > 0)
1367		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1368	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1369	nmp->nm_nam = nam;
1370	/* Set up the sockets and per-host congestion */
1371	nmp->nm_sotype = argp->sotype;
1372	nmp->nm_soproto = argp->proto;
1373	nmp->nm_sockreq.nr_prog = NFS_PROG;
1374	if ((argp->flags & NFSMNT_NFSV4))
1375		nmp->nm_sockreq.nr_vers = NFS_VER4;
1376	else if ((argp->flags & NFSMNT_NFSV3))
1377		nmp->nm_sockreq.nr_vers = NFS_VER3;
1378	else
1379		nmp->nm_sockreq.nr_vers = NFS_VER2;
1380
1381
1382	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1383		goto bad;
1384	/* For NFSv4.1, get the clientid now. */
1385	if (nmp->nm_minorvers > 0) {
1386		NFSCL_DEBUG(3, "at getcl\n");
1387		error = nfscl_getcl(mp, cred, td, 0, &clp);
1388		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1389		if (error != 0)
1390			goto bad;
1391	}
1392
1393	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1394	    nmp->nm_dirpathlen > 0) {
1395		NFSCL_DEBUG(3, "in dirp\n");
1396		/*
1397		 * If the fhsize on the mount point == 0 for V4, the mount
1398		 * path needs to be looked up.
1399		 */
1400		trycnt = 3;
1401		do {
1402			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1403			    cred, td);
1404			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1405			if (error)
1406				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1407		} while (error && --trycnt > 0);
1408		if (error) {
1409			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1410			goto bad;
1411		}
1412	}
1413
1414	/*
1415	 * A reference count is needed on the nfsnode representing the
1416	 * remote root.  If this object is not persistent, then backward
1417	 * traversals of the mount point (i.e. "..") will not work if
1418	 * the nfsnode gets flushed out of the cache. Ufs does not have
1419	 * this problem, because one can identify root inodes by their
1420	 * number == ROOTINO (2).
1421	 */
1422	if (nmp->nm_fhsize > 0) {
1423		/*
1424		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1425		 * non-zero for the root vnode. f_iosize will be set correctly
1426		 * by nfs_statfs() before any I/O occurs.
1427		 */
1428		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1429		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1430		    LK_EXCLUSIVE);
1431		if (error)
1432			goto bad;
1433		*vpp = NFSTOV(np);
1434
1435		/*
1436		 * Get file attributes and transfer parameters for the
1437		 * mountpoint.  This has the side effect of filling in
1438		 * (*vpp)->v_type with the correct value.
1439		 */
1440		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1441		    cred, td, &nfsva, NULL, &lease);
1442		if (ret) {
1443			/*
1444			 * Just set default values to get things going.
1445			 */
1446			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1447			nfsva.na_vattr.va_type = VDIR;
1448			nfsva.na_vattr.va_mode = 0777;
1449			nfsva.na_vattr.va_nlink = 100;
1450			nfsva.na_vattr.va_uid = (uid_t)0;
1451			nfsva.na_vattr.va_gid = (gid_t)0;
1452			nfsva.na_vattr.va_fileid = 2;
1453			nfsva.na_vattr.va_gen = 1;
1454			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1455			nfsva.na_vattr.va_size = 512 * 1024;
1456			lease = 60;
1457		}
1458		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1459		if (nmp->nm_minorvers > 0) {
1460			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1461			NFSLOCKCLSTATE();
1462			clp->nfsc_renew = NFSCL_RENEW(lease);
1463			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1464			clp->nfsc_clientidrev++;
1465			if (clp->nfsc_clientidrev == 0)
1466				clp->nfsc_clientidrev++;
1467			NFSUNLOCKCLSTATE();
1468			/*
1469			 * Mount will succeed, so the renew thread can be
1470			 * started now.
1471			 */
1472			nfscl_start_renewthread(clp);
1473			nfscl_clientrelease(clp);
1474		}
1475		if (argp->flags & NFSMNT_NFSV3)
1476			ncl_fsinfo(nmp, *vpp, cred, td);
1477
1478		/* Mark if the mount point supports NFSv4 ACLs. */
1479		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1480		    ret == 0 &&
1481		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1482			MNT_ILOCK(mp);
1483			mp->mnt_flag |= MNT_NFS4ACLS;
1484			MNT_IUNLOCK(mp);
1485		}
1486
1487		/*
1488		 * Lose the lock but keep the ref.
1489		 */
1490		NFSVOPUNLOCK(*vpp, 0);
1491		return (0);
1492	}
1493	error = EIO;
1494
1495bad:
1496	if (clp != NULL)
1497		nfscl_clientrelease(clp);
1498	newnfs_disconnect(&nmp->nm_sockreq);
1499	crfree(nmp->nm_sockreq.nr_cred);
1500	if (nmp->nm_sockreq.nr_auth != NULL)
1501		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1502	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1503	mtx_destroy(&nmp->nm_mtx);
1504	if (nmp->nm_clp != NULL) {
1505		NFSLOCKCLSTATE();
1506		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1507		NFSUNLOCKCLSTATE();
1508		free(nmp->nm_clp, M_NFSCLCLIENT);
1509	}
1510	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1511		nfscl_freenfsclds(dsp);
1512	FREE(nmp, M_NEWNFSMNT);
1513	FREE(nam, M_SONAME);
1514	return (error);
1515}
1516
1517/*
1518 * unmount system call
1519 */
1520static int
1521nfs_unmount(struct mount *mp, int mntflags)
1522{
1523	struct thread *td;
1524	struct nfsmount *nmp;
1525	int error, flags = 0, i, trycnt = 0;
1526	struct nfsclds *dsp, *tdsp;
1527
1528	td = curthread;
1529
1530	if (mntflags & MNT_FORCE)
1531		flags |= FORCECLOSE;
1532	nmp = VFSTONFS(mp);
1533	/*
1534	 * Goes something like this..
1535	 * - Call vflush() to clear out vnodes for this filesystem
1536	 * - Close the socket
1537	 * - Free up the data structures
1538	 */
1539	/* In the forced case, cancel any outstanding requests. */
1540	if (mntflags & MNT_FORCE) {
1541		error = newnfs_nmcancelreqs(nmp);
1542		if (error)
1543			goto out;
1544		/* For a forced close, get rid of the renew thread now */
1545		nfscl_umount(nmp, td);
1546	}
1547	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1548	do {
1549		error = vflush(mp, 1, flags, td);
1550		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1551			(void) nfs_catnap(PSOCK, error, "newndm");
1552	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1553	if (error)
1554		goto out;
1555
1556	/*
1557	 * We are now committed to the unmount.
1558	 */
1559	if ((mntflags & MNT_FORCE) == 0)
1560		nfscl_umount(nmp, td);
1561	/* Make sure no nfsiods are assigned to this mount. */
1562	mtx_lock(&ncl_iod_mutex);
1563	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1564		if (ncl_iodmount[i] == nmp) {
1565			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1566			ncl_iodmount[i] = NULL;
1567		}
1568	mtx_unlock(&ncl_iod_mutex);
1569	newnfs_disconnect(&nmp->nm_sockreq);
1570	crfree(nmp->nm_sockreq.nr_cred);
1571	FREE(nmp->nm_nam, M_SONAME);
1572	if (nmp->nm_sockreq.nr_auth != NULL)
1573		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1574	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1575	mtx_destroy(&nmp->nm_mtx);
1576	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1577		nfscl_freenfsclds(dsp);
1578	FREE(nmp, M_NEWNFSMNT);
1579out:
1580	return (error);
1581}
1582
1583/*
1584 * Return root of a filesystem
1585 */
1586static int
1587nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1588{
1589	struct vnode *vp;
1590	struct nfsmount *nmp;
1591	struct nfsnode *np;
1592	int error;
1593
1594	nmp = VFSTONFS(mp);
1595	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1596	if (error)
1597		return error;
1598	vp = NFSTOV(np);
1599	/*
1600	 * Get transfer parameters and attributes for root vnode once.
1601	 */
1602	mtx_lock(&nmp->nm_mtx);
1603	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1604		mtx_unlock(&nmp->nm_mtx);
1605		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1606	} else
1607		mtx_unlock(&nmp->nm_mtx);
1608	if (vp->v_type == VNON)
1609	    vp->v_type = VDIR;
1610	vp->v_vflag |= VV_ROOT;
1611	*vpp = vp;
1612	return (0);
1613}
1614
1615/*
1616 * Flush out the buffer cache
1617 */
1618/* ARGSUSED */
1619static int
1620nfs_sync(struct mount *mp, int waitfor)
1621{
1622	struct vnode *vp, *mvp;
1623	struct thread *td;
1624	int error, allerror = 0;
1625
1626	td = curthread;
1627
1628	MNT_ILOCK(mp);
1629	/*
1630	 * If a forced dismount is in progress, return from here so that
1631	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1632	 * calling VFS_UNMOUNT().
1633	 */
1634	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1635		MNT_IUNLOCK(mp);
1636		return (EBADF);
1637	}
1638	MNT_IUNLOCK(mp);
1639
1640	/*
1641	 * Force stale buffer cache information to be flushed.
1642	 */
1643loop:
1644	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1645		/* XXX Racy bv_cnt check. */
1646		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1647		    waitfor == MNT_LAZY) {
1648			VI_UNLOCK(vp);
1649			continue;
1650		}
1651		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1652			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1653			goto loop;
1654		}
1655		error = VOP_FSYNC(vp, waitfor, td);
1656		if (error)
1657			allerror = error;
1658		NFSVOPUNLOCK(vp, 0);
1659		vrele(vp);
1660	}
1661	return (allerror);
1662}
1663
1664static int
1665nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1666{
1667	struct nfsmount *nmp = VFSTONFS(mp);
1668	struct vfsquery vq;
1669	int error;
1670
1671	bzero(&vq, sizeof(vq));
1672	switch (op) {
1673#if 0
1674	case VFS_CTL_NOLOCKS:
1675		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1676 		if (req->oldptr != NULL) {
1677 			error = SYSCTL_OUT(req, &val, sizeof(val));
1678 			if (error)
1679 				return (error);
1680 		}
1681 		if (req->newptr != NULL) {
1682 			error = SYSCTL_IN(req, &val, sizeof(val));
1683 			if (error)
1684 				return (error);
1685			if (val)
1686				nmp->nm_flag |= NFSMNT_NOLOCKS;
1687			else
1688				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1689 		}
1690		break;
1691#endif
1692	case VFS_CTL_QUERY:
1693		mtx_lock(&nmp->nm_mtx);
1694		if (nmp->nm_state & NFSSTA_TIMEO)
1695			vq.vq_flags |= VQ_NOTRESP;
1696		mtx_unlock(&nmp->nm_mtx);
1697#if 0
1698		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1699		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1700			vq.vq_flags |= VQ_NOTRESPLOCK;
1701#endif
1702		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1703		break;
1704 	case VFS_CTL_TIMEO:
1705 		if (req->oldptr != NULL) {
1706 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1707 			    sizeof(nmp->nm_tprintf_initial_delay));
1708 			if (error)
1709 				return (error);
1710 		}
1711 		if (req->newptr != NULL) {
1712			error = vfs_suser(mp, req->td);
1713			if (error)
1714				return (error);
1715 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1716 			    sizeof(nmp->nm_tprintf_initial_delay));
1717 			if (error)
1718 				return (error);
1719 			if (nmp->nm_tprintf_initial_delay < 0)
1720 				nmp->nm_tprintf_initial_delay = 0;
1721 		}
1722		break;
1723	default:
1724		return (ENOTSUP);
1725	}
1726	return (0);
1727}
1728
1729/*
1730 * Purge any RPCs in progress, so that they will all return errors.
1731 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1732 * forced dismount.
1733 */
1734static void
1735nfs_purge(struct mount *mp)
1736{
1737	struct nfsmount *nmp = VFSTONFS(mp);
1738
1739	newnfs_nmcancelreqs(nmp);
1740}
1741
1742/*
1743 * Extract the information needed by the nlm from the nfs vnode.
1744 */
1745static void
1746nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1747    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1748    struct timeval *timeop)
1749{
1750	struct nfsmount *nmp;
1751	struct nfsnode *np = VTONFS(vp);
1752
1753	nmp = VFSTONFS(vp->v_mount);
1754	if (fhlenp != NULL)
1755		*fhlenp = (size_t)np->n_fhp->nfh_len;
1756	if (fhp != NULL)
1757		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1758	if (sp != NULL)
1759		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1760	if (is_v3p != NULL)
1761		*is_v3p = NFS_ISV3(vp);
1762	if (sizep != NULL)
1763		*sizep = np->n_size;
1764	if (timeop != NULL) {
1765		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1766		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1767	}
1768}
1769
1770/*
1771 * This function prints out an option name, based on the conditional
1772 * argument.
1773 */
1774static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1775    char *opt, char **buf, size_t *blen)
1776{
1777	int len;
1778
1779	if (testval != 0 && *blen > strlen(opt)) {
1780		len = snprintf(*buf, *blen, "%s", opt);
1781		if (len != strlen(opt))
1782			printf("EEK!!\n");
1783		*buf += len;
1784		*blen -= len;
1785	}
1786}
1787
1788/*
1789 * This function printf out an options integer value.
1790 */
1791static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1792    char *opt, char **buf, size_t *blen)
1793{
1794	int len;
1795
1796	if (*blen > strlen(opt) + 1) {
1797		/* Could result in truncated output string. */
1798		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1799		if (len < *blen) {
1800			*buf += len;
1801			*blen -= len;
1802		}
1803	}
1804}
1805
1806/*
1807 * Load the option flags and values into the buffer.
1808 */
1809void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1810{
1811	char *buf;
1812	size_t blen;
1813
1814	buf = buffer;
1815	blen = buflen;
1816	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1817	    &blen);
1818	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1819		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1820		    &blen);
1821		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1822		    &buf, &blen);
1823	}
1824	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1825	    &blen);
1826	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1827	    "nfsv2", &buf, &blen);
1828	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1829	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1830	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1831	    &buf, &blen);
1832	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1833	    &buf, &blen);
1834	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1835	    &blen);
1836	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1837	    &blen);
1838	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1839	    &blen);
1840	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1841	    &blen);
1842	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1843	    &blen);
1844	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1845	    ",noncontigwr", &buf, &blen);
1846	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1847	    0, ",lockd", &buf, &blen);
1848	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1849	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1850	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1851	    &buf, &blen);
1852	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1853	    &buf, &blen);
1854	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1855	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1856	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1857	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1858	    &buf, &blen);
1859	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1860	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1861	    &buf, &blen);
1862	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1863	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1864	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1865	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1866	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1867	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1868	    &blen);
1869	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1870	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1871	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1872	    &blen);
1873	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1874	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1875	    &blen);
1876	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1877	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1878}
1879
1880