nfs_clvfsops.c revision 291551
1195609Smp/*-
259243Sobrien * Copyright (c) 1989, 1993, 1995
359243Sobrien *	The Regents of the University of California.  All rights reserved.
459243Sobrien *
559243Sobrien * This code is derived from software contributed to Berkeley by
659243Sobrien * Rick Macklem at The University of Guelph.
759243Sobrien *
859243Sobrien * Redistribution and use in source and binary forms, with or without
959243Sobrien * modification, are permitted provided that the following conditions
1059243Sobrien * are met:
1159243Sobrien * 1. Redistributions of source code must retain the above copyright
1259243Sobrien *    notice, this list of conditions and the following disclaimer.
1359243Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1459243Sobrien *    notice, this list of conditions and the following disclaimer in the
1559243Sobrien *    documentation and/or other materials provided with the distribution.
1659243Sobrien * 4. Neither the name of the University nor the names of its contributors
17100616Smp *    may be used to endorse or promote products derived from this software
1859243Sobrien *    without specific prior written permission.
1959243Sobrien *
2059243Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2159243Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2259243Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2359243Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2459243Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2559243Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2659243Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2759243Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2859243Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2959243Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3059243Sobrien * SUCH DAMAGE.
3159243Sobrien *
3259243Sobrien *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
3359243Sobrien */
3459243Sobrien
3559243Sobrien#include <sys/cdefs.h>
3659243Sobrien__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 291551 2015-12-01 02:30:41Z rmacklem $");
3759243Sobrien
3859243Sobrien
3959243Sobrien#include "opt_bootp.h"
4059243Sobrien#include "opt_nfsroot.h"
4159243Sobrien
4259243Sobrien#include <sys/param.h>
4359243Sobrien#include <sys/systm.h>
4459243Sobrien#include <sys/kernel.h>
4559243Sobrien#include <sys/bio.h>
4659243Sobrien#include <sys/buf.h>
4759243Sobrien#include <sys/clock.h>
4859243Sobrien#include <sys/jail.h>
4959243Sobrien#include <sys/limits.h>
5059243Sobrien#include <sys/lock.h>
5159243Sobrien#include <sys/malloc.h>
5259243Sobrien#include <sys/mbuf.h>
5359243Sobrien#include <sys/module.h>
5459243Sobrien#include <sys/mount.h>
5559243Sobrien#include <sys/proc.h>
5659243Sobrien#include <sys/socket.h>
5759243Sobrien#include <sys/socketvar.h>
5859243Sobrien#include <sys/sockio.h>
5959243Sobrien#include <sys/sysctl.h>
6059243Sobrien#include <sys/vnode.h>
6159243Sobrien#include <sys/signalvar.h>
6259243Sobrien
6359243Sobrien#include <vm/vm.h>
6459243Sobrien#include <vm/vm_extern.h>
6559243Sobrien#include <vm/uma.h>
6659243Sobrien
6759243Sobrien#include <net/if.h>
6859243Sobrien#include <net/route.h>
6959243Sobrien#include <netinet/in.h>
7059243Sobrien
7159243Sobrien#include <fs/nfs/nfsport.h>
7259243Sobrien#include <fs/nfsclient/nfsnode.h>
7359243Sobrien#include <fs/nfsclient/nfsmount.h>
7459243Sobrien#include <fs/nfsclient/nfs.h>
7559243Sobrien#include <nfs/nfsdiskless.h>
7659243Sobrien
7759243SobrienFEATURE(nfscl, "NFSv4 client");
7859243Sobrien
7959243Sobrienextern int nfscl_ticks;
8059243Sobrienextern struct timeval nfsboottime;
8159243Sobrienextern struct nfsstats	newnfsstats;
8259243Sobrienextern int nfsrv_useacl;
8359243Sobrienextern int nfscl_debuglevel;
8459243Sobrienextern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
8559243Sobrienextern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
8659243Sobrienextern struct mtx ncl_iod_mutex;
8759243SobrienNFSCLSTATEMUTEX;
8859243Sobrien
8959243SobrienMALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
9059243SobrienMALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
9159243Sobrien
9259243SobrienSYSCTL_DECL(_vfs_nfs);
9359243Sobrienstatic int nfs_ip_paranoia = 1;
9459243SobrienSYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
9559243Sobrien    &nfs_ip_paranoia, 0, "");
9659243Sobrienstatic int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
9759243SobrienSYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
9859243Sobrien        downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
9959243Sobrien/* how long between console messages "nfs server foo not responding" */
10059243Sobrienstatic int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
10159243SobrienSYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
10259243Sobrien        downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
10359243Sobrien
10459243Sobrienstatic int	nfs_mountroot(struct mount *);
10559243Sobrienstatic void	nfs_sec_name(char *, int *);
10659243Sobrienstatic void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
10759243Sobrien		    struct nfs_args *argp, const char *, struct ucred *,
10859243Sobrien		    struct thread *);
10959243Sobrienstatic int	mountnfs(struct nfs_args *, struct mount *,
11059243Sobrien		    struct sockaddr *, char *, u_char *, int, u_char *, int,
11159243Sobrien		    u_char *, int, struct vnode **, struct ucred *,
11259243Sobrien		    struct thread *, int, int, int);
11359243Sobrienstatic void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
11459243Sobrien		    struct sockaddr_storage *, int *, off_t *,
11559243Sobrien		    struct timeval *);
11659243Sobrienstatic vfs_mount_t nfs_mount;
11759243Sobrienstatic vfs_cmount_t nfs_cmount;
11859243Sobrienstatic vfs_unmount_t nfs_unmount;
11959243Sobrienstatic vfs_root_t nfs_root;
12059243Sobrienstatic vfs_statfs_t nfs_statfs;
12159243Sobrienstatic vfs_sync_t nfs_sync;
12259243Sobrienstatic vfs_sysctl_t nfs_sysctl;
12359243Sobrienstatic vfs_purge_t nfs_purge;
12459243Sobrien
12559243Sobrien/*
12659243Sobrien * nfs vfs operations.
12759243Sobrien */
12859243Sobrienstatic struct vfsops nfs_vfsops = {
12959243Sobrien	.vfs_init =		ncl_init,
13059243Sobrien	.vfs_mount =		nfs_mount,
13159243Sobrien	.vfs_cmount =		nfs_cmount,
13259243Sobrien	.vfs_root =		nfs_root,
13359243Sobrien	.vfs_statfs =		nfs_statfs,
13459243Sobrien	.vfs_sync =		nfs_sync,
13559243Sobrien	.vfs_uninit =		ncl_uninit,
13659243Sobrien	.vfs_unmount =		nfs_unmount,
13759243Sobrien	.vfs_sysctl =		nfs_sysctl,
13859243Sobrien	.vfs_purge =		nfs_purge,
13959243Sobrien};
14059243SobrienVFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
14159243Sobrien
14259243Sobrien/* So that loader and kldload(2) can find us, wherever we are.. */
14359243SobrienMODULE_VERSION(nfs, 1);
14459243SobrienMODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
14559243SobrienMODULE_DEPEND(nfs, krpc, 1, 1, 1);
14659243SobrienMODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
14759243SobrienMODULE_DEPEND(nfs, nfslock, 1, 1, 1);
14859243Sobrien
14959243Sobrien/*
15059243Sobrien * This structure is now defined in sys/nfs/nfs_diskless.c so that it
15159243Sobrien * can be shared by both NFS clients. It is declared here so that it
15259243Sobrien * will be defined for kernels built without NFS_ROOT, although it
15359243Sobrien * isn't used in that case.
15459243Sobrien */
15559243Sobrien#if !defined(NFS_ROOT) && !defined(NFSCLIENT)
15659243Sobrienstruct nfs_diskless	nfs_diskless = { { { 0 } } };
15759243Sobrienstruct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
15859243Sobrienint			nfs_diskless_valid = 0;
15959243Sobrien#endif
16059243Sobrien
16159243SobrienSYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
16259243Sobrien    &nfs_diskless_valid, 0,
16359243Sobrien    "Has the diskless struct been filled correctly");
16459243Sobrien
16559243SobrienSYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
16659243Sobrien    nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
16759243Sobrien
16859243SobrienSYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
16959243Sobrien    &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
17059243Sobrien    "%Ssockaddr_in", "Diskless root nfs address");
17159243Sobrien
17259243Sobrien
17359243Sobrienvoid		newnfsargs_ntoh(struct nfs_args *);
17459243Sobrienstatic int	nfs_mountdiskless(char *,
17559243Sobrien		    struct sockaddr_in *, struct nfs_args *,
17659243Sobrien		    struct thread *, struct vnode **, struct mount *);
17759243Sobrienstatic void	nfs_convert_diskless(void);
17859243Sobrienstatic void	nfs_convert_oargs(struct nfs_args *args,
17959243Sobrien		    struct onfs_args *oargs);
18059243Sobrien
18159243Sobrienint
18259243Sobriennewnfs_iosize(struct nfsmount *nmp)
18359243Sobrien{
18459243Sobrien	int iosize, maxio;
18559243Sobrien
18659243Sobrien	/* First, set the upper limit for iosize */
18759243Sobrien	if (nmp->nm_flag & NFSMNT_NFSV4) {
18859243Sobrien		maxio = NFS_MAXBSIZE;
18959243Sobrien	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
19059243Sobrien		if (nmp->nm_sotype == SOCK_DGRAM)
19159243Sobrien			maxio = NFS_MAXDGRAMDATA;
19259243Sobrien		else
19359243Sobrien			maxio = NFS_MAXBSIZE;
19459243Sobrien	} else {
19559243Sobrien		maxio = NFS_V2MAXDATA;
19659243Sobrien	}
19759243Sobrien	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
19859243Sobrien		nmp->nm_rsize = maxio;
19959243Sobrien	if (nmp->nm_rsize > NFS_MAXBSIZE)
20059243Sobrien		nmp->nm_rsize = NFS_MAXBSIZE;
20159243Sobrien	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
20259243Sobrien		nmp->nm_readdirsize = maxio;
20359243Sobrien	if (nmp->nm_readdirsize > nmp->nm_rsize)
20459243Sobrien		nmp->nm_readdirsize = nmp->nm_rsize;
20559243Sobrien	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
20659243Sobrien		nmp->nm_wsize = maxio;
20759243Sobrien	if (nmp->nm_wsize > NFS_MAXBSIZE)
20859243Sobrien		nmp->nm_wsize = NFS_MAXBSIZE;
20959243Sobrien
21059243Sobrien	/*
21159243Sobrien	 * Calculate the size used for io buffers.  Use the larger
21259243Sobrien	 * of the two sizes to minimise nfs requests but make sure
21359243Sobrien	 * that it is at least one VM page to avoid wasting buffer
21459243Sobrien	 * space.  It must also be at least NFS_DIRBLKSIZ, since
21559243Sobrien	 * that is the buffer size used for directories.
21659243Sobrien	 */
21759243Sobrien	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
21859243Sobrien	iosize = imax(iosize, PAGE_SIZE);
21959243Sobrien	iosize = imax(iosize, NFS_DIRBLKSIZ);
22059243Sobrien	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
22159243Sobrien	return (iosize);
22259243Sobrien}
22359243Sobrien
22459243Sobrienstatic void
22559243Sobriennfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
22659243Sobrien{
22759243Sobrien
22859243Sobrien	args->version = NFS_ARGSVERSION;
22959243Sobrien	args->addr = oargs->addr;
23059243Sobrien	args->addrlen = oargs->addrlen;
23159243Sobrien	args->sotype = oargs->sotype;
23259243Sobrien	args->proto = oargs->proto;
23359243Sobrien	args->fh = oargs->fh;
23459243Sobrien	args->fhsize = oargs->fhsize;
23559243Sobrien	args->flags = oargs->flags;
23659243Sobrien	args->wsize = oargs->wsize;
23759243Sobrien	args->rsize = oargs->rsize;
23859243Sobrien	args->readdirsize = oargs->readdirsize;
23959243Sobrien	args->timeo = oargs->timeo;
24059243Sobrien	args->retrans = oargs->retrans;
24159243Sobrien	args->readahead = oargs->readahead;
24259243Sobrien	args->hostname = oargs->hostname;
24359243Sobrien}
24459243Sobrien
24559243Sobrienstatic void
24659243Sobriennfs_convert_diskless(void)
24759243Sobrien{
24859243Sobrien
24959243Sobrien	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
25059243Sobrien		sizeof(struct ifaliasreq));
25159243Sobrien	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
25259243Sobrien		sizeof(struct sockaddr_in));
25359243Sobrien	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
25459243Sobrien	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
25559243Sobrien		nfsv3_diskless.root_fhsize = NFSX_MYFH;
25659243Sobrien		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
25759243Sobrien	} else {
25859243Sobrien		nfsv3_diskless.root_fhsize = NFSX_V2FH;
25959243Sobrien		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
26059243Sobrien	}
26159243Sobrien	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
26259243Sobrien		sizeof(struct sockaddr_in));
26359243Sobrien	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
26459243Sobrien	nfsv3_diskless.root_time = nfs_diskless.root_time;
26559243Sobrien	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
26659243Sobrien		MAXHOSTNAMELEN);
26759243Sobrien	nfs_diskless_valid = 3;
26859243Sobrien}
26959243Sobrien
27059243Sobrien/*
27159243Sobrien * nfs statfs call
27259243Sobrien */
27359243Sobrienstatic int
27459243Sobriennfs_statfs(struct mount *mp, struct statfs *sbp)
27559243Sobrien{
27659243Sobrien	struct vnode *vp;
27759243Sobrien	struct thread *td;
27859243Sobrien	struct nfsmount *nmp = VFSTONFS(mp);
27959243Sobrien	struct nfsvattr nfsva;
28059243Sobrien	struct nfsfsinfo fs;
28159243Sobrien	struct nfsstatfs sb;
28259243Sobrien	int error = 0, attrflag, gotfsinfo = 0, ret;
28359243Sobrien	struct nfsnode *np;
28459243Sobrien
28559243Sobrien	td = curthread;
28659243Sobrien
28759243Sobrien	error = vfs_busy(mp, MBF_NOWAIT);
28859243Sobrien	if (error)
28959243Sobrien		return (error);
29059243Sobrien	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
29159243Sobrien	if (error) {
29259243Sobrien		vfs_unbusy(mp);
29359243Sobrien		return (error);
29459243Sobrien	}
29559243Sobrien	vp = NFSTOV(np);
29659243Sobrien	mtx_lock(&nmp->nm_mtx);
29759243Sobrien	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
29859243Sobrien		mtx_unlock(&nmp->nm_mtx);
29959243Sobrien		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
30059243Sobrien		    &attrflag, NULL);
30159243Sobrien		if (!error)
30259243Sobrien			gotfsinfo = 1;
30359243Sobrien	} else
30459243Sobrien		mtx_unlock(&nmp->nm_mtx);
30559243Sobrien	if (!error)
30659243Sobrien		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
30759243Sobrien		    &attrflag, NULL);
30859243Sobrien	if (error != 0)
30959243Sobrien		NFSCL_DEBUG(2, "statfs=%d\n", error);
31059243Sobrien	if (attrflag == 0) {
31159243Sobrien		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
31259243Sobrien		    td->td_ucred, td, &nfsva, NULL, NULL);
31359243Sobrien		if (ret) {
31459243Sobrien			/*
31559243Sobrien			 * Just set default values to get things going.
31659243Sobrien			 */
31759243Sobrien			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
31859243Sobrien			nfsva.na_vattr.va_type = VDIR;
31959243Sobrien			nfsva.na_vattr.va_mode = 0777;
32059243Sobrien			nfsva.na_vattr.va_nlink = 100;
32159243Sobrien			nfsva.na_vattr.va_uid = (uid_t)0;
32259243Sobrien			nfsva.na_vattr.va_gid = (gid_t)0;
32359243Sobrien			nfsva.na_vattr.va_fileid = 2;
32459243Sobrien			nfsva.na_vattr.va_gen = 1;
32559243Sobrien			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
32659243Sobrien			nfsva.na_vattr.va_size = 512 * 1024;
32759243Sobrien		}
32859243Sobrien	}
32959243Sobrien	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
33059243Sobrien	if (!error) {
33159243Sobrien	    mtx_lock(&nmp->nm_mtx);
33259243Sobrien	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
33359243Sobrien		nfscl_loadfsinfo(nmp, &fs);
33459243Sobrien	    nfscl_loadsbinfo(nmp, &sb, sbp);
33559243Sobrien	    sbp->f_iosize = newnfs_iosize(nmp);
33659243Sobrien	    mtx_unlock(&nmp->nm_mtx);
33759243Sobrien	    if (sbp != &mp->mnt_stat) {
33859243Sobrien		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
33959243Sobrien		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
34059243Sobrien	    }
34159243Sobrien	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
34259243Sobrien	} else if (NFS_ISV4(vp)) {
34359243Sobrien		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
34459243Sobrien	}
34559243Sobrien	vput(vp);
34659243Sobrien	vfs_unbusy(mp);
34759243Sobrien	return (error);
34859243Sobrien}
34959243Sobrien
35059243Sobrien/*
35159243Sobrien * nfs version 3 fsinfo rpc call
35259243Sobrien */
35359243Sobrienint
35459243Sobrienncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
35559243Sobrien    struct thread *td)
35659243Sobrien{
35759243Sobrien	struct nfsfsinfo fs;
35859243Sobrien	struct nfsvattr nfsva;
35959243Sobrien	int error, attrflag;
36059243Sobrien
36159243Sobrien	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
36259243Sobrien	if (!error) {
36359243Sobrien		if (attrflag)
36459243Sobrien			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
36559243Sobrien			    1);
36659243Sobrien		mtx_lock(&nmp->nm_mtx);
36759243Sobrien		nfscl_loadfsinfo(nmp, &fs);
36859243Sobrien		mtx_unlock(&nmp->nm_mtx);
36959243Sobrien	}
37059243Sobrien	return (error);
37159243Sobrien}
37259243Sobrien
37359243Sobrien/*
37459243Sobrien * Mount a remote root fs via. nfs. This depends on the info in the
37559243Sobrien * nfs_diskless structure that has been filled in properly by some primary
37659243Sobrien * bootstrap.
37759243Sobrien * It goes something like this:
37859243Sobrien * - do enough of "ifconfig" by calling ifioctl() so that the system
37959243Sobrien *   can talk to the server
38059243Sobrien * - If nfs_diskless.mygateway is filled in, use that address as
38159243Sobrien *   a default gateway.
38259243Sobrien * - build the rootfs mount point and call mountnfs() to do the rest.
38359243Sobrien *
38459243Sobrien * It is assumed to be safe to read, modify, and write the nfsv3_diskless
38559243Sobrien * structure, as well as other global NFS client variables here, as
38659243Sobrien * nfs_mountroot() will be called once in the boot before any other NFS
38759243Sobrien * client activity occurs.
38859243Sobrien */
38959243Sobrienstatic int
39059243Sobriennfs_mountroot(struct mount *mp)
39159243Sobrien{
39259243Sobrien	struct thread *td = curthread;
39359243Sobrien	struct nfsv3_diskless *nd = &nfsv3_diskless;
39459243Sobrien	struct socket *so;
39559243Sobrien	struct vnode *vp;
39659243Sobrien	struct ifreq ir;
39759243Sobrien	int error;
39859243Sobrien	u_long l;
39959243Sobrien	char buf[128];
40059243Sobrien	char *cp;
40159243Sobrien
40259243Sobrien#if defined(BOOTP_NFSROOT) && defined(BOOTP)
40359243Sobrien	bootpc_init();		/* use bootp to get nfs_diskless filled in */
40459243Sobrien#elif defined(NFS_ROOT)
40559243Sobrien	nfs_setup_diskless();
40659243Sobrien#endif
40759243Sobrien
40859243Sobrien	if (nfs_diskless_valid == 0)
40959243Sobrien		return (-1);
41059243Sobrien	if (nfs_diskless_valid == 1)
41159243Sobrien		nfs_convert_diskless();
41259243Sobrien
41359243Sobrien	/*
41459243Sobrien	 * XXX splnet, so networks will receive...
41559243Sobrien	 */
41659243Sobrien	splnet();
41759243Sobrien
41859243Sobrien	/*
41959243Sobrien	 * Do enough of ifconfig(8) so that the critical net interface can
42059243Sobrien	 * talk to the server.
42159243Sobrien	 */
42259243Sobrien	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
42359243Sobrien	    td->td_ucred, td);
42459243Sobrien	if (error)
42559243Sobrien		panic("nfs_mountroot: socreate(%04x): %d",
42659243Sobrien			nd->myif.ifra_addr.sa_family, error);
42759243Sobrien
42859243Sobrien#if 0 /* XXX Bad idea */
42959243Sobrien	/*
43059243Sobrien	 * We might not have been told the right interface, so we pass
43159243Sobrien	 * over the first ten interfaces of the same kind, until we get
43259243Sobrien	 * one of them configured.
43359243Sobrien	 */
43459243Sobrien
43559243Sobrien	for (i = strlen(nd->myif.ifra_name) - 1;
43659243Sobrien		nd->myif.ifra_name[i] >= '0' &&
43759243Sobrien		nd->myif.ifra_name[i] <= '9';
43859243Sobrien		nd->myif.ifra_name[i] ++) {
43959243Sobrien		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
44059243Sobrien		if(!error)
44159243Sobrien			break;
44259243Sobrien	}
44359243Sobrien#endif
44459243Sobrien	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
44559243Sobrien	if (error)
44659243Sobrien		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
44759243Sobrien	if ((cp = getenv("boot.netif.mtu")) != NULL) {
44859243Sobrien		ir.ifr_mtu = strtol(cp, NULL, 10);
44959243Sobrien		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
45059243Sobrien		freeenv(cp);
45159243Sobrien		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
45259243Sobrien		if (error)
453100616Smp			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
454100616Smp	}
45559243Sobrien	soclose(so);
456195609Smp
45759243Sobrien	/*
45859243Sobrien	 * If the gateway field is filled in, set it as the default route.
45959243Sobrien	 * Note that pxeboot will set a default route of 0 if the route
46059243Sobrien	 * is not set by the DHCP server.  Check also for a value of 0
46159243Sobrien	 * to avoid panicking inappropriately in that situation.
46259243Sobrien	 */
46359243Sobrien	if (nd->mygateway.sin_len != 0 &&
46459243Sobrien	    nd->mygateway.sin_addr.s_addr != 0) {
46559243Sobrien		struct sockaddr_in mask, sin;
46659243Sobrien
46759243Sobrien		bzero((caddr_t)&mask, sizeof(mask));
46859243Sobrien		sin = mask;
46959243Sobrien		sin.sin_family = AF_INET;
47059243Sobrien		sin.sin_len = sizeof(sin);
47159243Sobrien                /* XXX MRT use table 0 for this sort of thing */
47259243Sobrien		CURVNET_SET(TD_TO_VNET(td));
47359243Sobrien		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
47459243Sobrien		    (struct sockaddr *)&nd->mygateway,
47559243Sobrien		    (struct sockaddr *)&mask,
47659243Sobrien		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
47759243Sobrien		CURVNET_RESTORE();
47859243Sobrien		if (error)
47959243Sobrien			panic("nfs_mountroot: RTM_ADD: %d", error);
48059243Sobrien	}
48159243Sobrien
48259243Sobrien	/*
48359243Sobrien	 * Create the rootfs mount point.
48459243Sobrien	 */
48559243Sobrien	nd->root_args.fh = nd->root_fh;
48659243Sobrien	nd->root_args.fhsize = nd->root_fhsize;
48759243Sobrien	l = ntohl(nd->root_saddr.sin_addr.s_addr);
48859243Sobrien	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
489100616Smp		(l >> 24) & 0xff, (l >> 16) & 0xff,
490100616Smp		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
491100616Smp	printf("NFS ROOT: %s\n", buf);
492100616Smp	nd->root_args.hostname = buf;
493100616Smp	if ((error = nfs_mountdiskless(buf,
494100616Smp	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
495100616Smp		return (error);
496100616Smp	}
497100616Smp
498100616Smp	/*
499100616Smp	 * This is not really an nfs issue, but it is much easier to
500100616Smp	 * set hostname here and then let the "/etc/rc.xxx" files
501100616Smp	 * mount the right /var based upon its preset value.
502100616Smp	 */
503100616Smp	mtx_lock(&prison0.pr_mtx);
504100616Smp	strlcpy(prison0.pr_hostname, nd->my_hostnam,
505100616Smp	    sizeof(prison0.pr_hostname));
506100616Smp	mtx_unlock(&prison0.pr_mtx);
507100616Smp	inittodr(ntohl(nd->root_time));
508100616Smp	return (0);
509100616Smp}
510100616Smp
511100616Smp/*
512100616Smp * Internal version of mount system call for diskless setup.
513100616Smp */
514100616Smpstatic int
515100616Smpnfs_mountdiskless(char *path,
516100616Smp    struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
517100616Smp    struct vnode **vpp, struct mount *mp)
518100616Smp{
519100616Smp	struct sockaddr *nam;
520100616Smp	int dirlen, error;
521100616Smp	char *dirpath;
52259243Sobrien
523	/*
524	 * Find the directory path in "path", which also has the server's
525	 * name/ip address in it.
526	 */
527	dirpath = strchr(path, ':');
528	if (dirpath != NULL)
529		dirlen = strlen(++dirpath);
530	else
531		dirlen = 0;
532	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
533	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
534	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
535	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
536		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
537		return (error);
538	}
539	return (0);
540}
541
542static void
543nfs_sec_name(char *sec, int *flagsp)
544{
545	if (!strcmp(sec, "krb5"))
546		*flagsp |= NFSMNT_KERB;
547	else if (!strcmp(sec, "krb5i"))
548		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
549	else if (!strcmp(sec, "krb5p"))
550		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
551}
552
553static void
554nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
555    const char *hostname, struct ucred *cred, struct thread *td)
556{
557	int s;
558	int adjsock;
559	char *p;
560
561	s = splnet();
562
563	/*
564	 * Set read-only flag if requested; otherwise, clear it if this is
565	 * an update.  If this is not an update, then either the read-only
566	 * flag is already clear, or this is a root mount and it was set
567	 * intentionally at some previous point.
568	 */
569	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
570		MNT_ILOCK(mp);
571		mp->mnt_flag |= MNT_RDONLY;
572		MNT_IUNLOCK(mp);
573	} else if (mp->mnt_flag & MNT_UPDATE) {
574		MNT_ILOCK(mp);
575		mp->mnt_flag &= ~MNT_RDONLY;
576		MNT_IUNLOCK(mp);
577	}
578
579	/*
580	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
581	 * no sense in that context.  Also, set up appropriate retransmit
582	 * and soft timeout behavior.
583	 */
584	if (argp->sotype == SOCK_STREAM) {
585		nmp->nm_flag &= ~NFSMNT_NOCONN;
586		nmp->nm_timeo = NFS_MAXTIMEO;
587		if ((argp->flags & NFSMNT_NFSV4) != 0)
588			nmp->nm_retry = INT_MAX;
589		else
590			nmp->nm_retry = NFS_RETRANS_TCP;
591	}
592
593	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
594	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
595		argp->flags &= ~NFSMNT_RDIRPLUS;
596		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
597	}
598
599	/* Re-bind if rsrvd port requested and wasn't on one */
600	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
601		  && (argp->flags & NFSMNT_RESVPORT);
602	/* Also re-bind if we're switching to/from a connected UDP socket */
603	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
604		    (argp->flags & NFSMNT_NOCONN));
605
606	/* Update flags atomically.  Don't change the lock bits. */
607	nmp->nm_flag = argp->flags | nmp->nm_flag;
608	splx(s);
609
610	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
611		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
612		if (nmp->nm_timeo < NFS_MINTIMEO)
613			nmp->nm_timeo = NFS_MINTIMEO;
614		else if (nmp->nm_timeo > NFS_MAXTIMEO)
615			nmp->nm_timeo = NFS_MAXTIMEO;
616	}
617
618	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
619		nmp->nm_retry = argp->retrans;
620		if (nmp->nm_retry > NFS_MAXREXMIT)
621			nmp->nm_retry = NFS_MAXREXMIT;
622	}
623
624	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
625		nmp->nm_wsize = argp->wsize;
626		/*
627		 * Clip at the power of 2 below the size. There is an
628		 * issue (not isolated) that causes intermittent page
629		 * faults if this is not done.
630		 */
631		if (nmp->nm_wsize > NFS_FABLKSIZE)
632			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
633		else
634			nmp->nm_wsize = NFS_FABLKSIZE;
635	}
636
637	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
638		nmp->nm_rsize = argp->rsize;
639		/*
640		 * Clip at the power of 2 below the size. There is an
641		 * issue (not isolated) that causes intermittent page
642		 * faults if this is not done.
643		 */
644		if (nmp->nm_rsize > NFS_FABLKSIZE)
645			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
646		else
647			nmp->nm_rsize = NFS_FABLKSIZE;
648	}
649
650	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
651		nmp->nm_readdirsize = argp->readdirsize;
652	}
653
654	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
655		nmp->nm_acregmin = argp->acregmin;
656	else
657		nmp->nm_acregmin = NFS_MINATTRTIMO;
658	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
659		nmp->nm_acregmax = argp->acregmax;
660	else
661		nmp->nm_acregmax = NFS_MAXATTRTIMO;
662	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
663		nmp->nm_acdirmin = argp->acdirmin;
664	else
665		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
666	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
667		nmp->nm_acdirmax = argp->acdirmax;
668	else
669		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
670	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
671		nmp->nm_acdirmin = nmp->nm_acdirmax;
672	if (nmp->nm_acregmin > nmp->nm_acregmax)
673		nmp->nm_acregmin = nmp->nm_acregmax;
674
675	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
676		if (argp->readahead <= NFS_MAXRAHEAD)
677			nmp->nm_readahead = argp->readahead;
678		else
679			nmp->nm_readahead = NFS_MAXRAHEAD;
680	}
681	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
682		if (argp->wcommitsize < nmp->nm_wsize)
683			nmp->nm_wcommitsize = nmp->nm_wsize;
684		else
685			nmp->nm_wcommitsize = argp->wcommitsize;
686	}
687
688	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
689		    (nmp->nm_soproto != argp->proto));
690
691	if (nmp->nm_client != NULL && adjsock) {
692		int haslock = 0, error = 0;
693
694		if (nmp->nm_sotype == SOCK_STREAM) {
695			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
696			if (!error)
697				haslock = 1;
698		}
699		if (!error) {
700		    newnfs_disconnect(&nmp->nm_sockreq);
701		    if (haslock)
702			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
703		    nmp->nm_sotype = argp->sotype;
704		    nmp->nm_soproto = argp->proto;
705		    if (nmp->nm_sotype == SOCK_DGRAM)
706			while (newnfs_connect(nmp, &nmp->nm_sockreq,
707			    cred, td, 0)) {
708				printf("newnfs_args: retrying connect\n");
709				(void) nfs_catnap(PSOCK, 0, "newnfscon");
710			}
711		}
712	} else {
713		nmp->nm_sotype = argp->sotype;
714		nmp->nm_soproto = argp->proto;
715	}
716
717	if (hostname != NULL) {
718		strlcpy(nmp->nm_hostname, hostname,
719		    sizeof(nmp->nm_hostname));
720		p = strchr(nmp->nm_hostname, ':');
721		if (p != NULL)
722			*p = '\0';
723	}
724}
725
726static const char *nfs_opts[] = { "from", "nfs_args",
727    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
728    "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
729    "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
730    "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
731    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
732    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
733    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
734    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
735    "pnfs", "wcommitsize",
736    NULL };
737
738/*
739 * VFS Operations.
740 *
741 * mount system call
742 * It seems a bit dumb to copyinstr() the host and path here and then
743 * bcopy() them in mountnfs(), but I wanted to detect errors before
744 * doing the sockargs() call because sockargs() allocates an mbuf and
745 * an error after that means that I have to release the mbuf.
746 */
747/* ARGSUSED */
748static int
749nfs_mount(struct mount *mp)
750{
751	struct nfs_args args = {
752	    .version = NFS_ARGSVERSION,
753	    .addr = NULL,
754	    .addrlen = sizeof (struct sockaddr_in),
755	    .sotype = SOCK_STREAM,
756	    .proto = 0,
757	    .fh = NULL,
758	    .fhsize = 0,
759	    .flags = NFSMNT_RESVPORT,
760	    .wsize = NFS_WSIZE,
761	    .rsize = NFS_RSIZE,
762	    .readdirsize = NFS_READDIRSIZE,
763	    .timeo = 10,
764	    .retrans = NFS_RETRANS,
765	    .readahead = NFS_DEFRAHEAD,
766	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
767	    .hostname = NULL,
768	    .acregmin = NFS_MINATTRTIMO,
769	    .acregmax = NFS_MAXATTRTIMO,
770	    .acdirmin = NFS_MINDIRATTRTIMO,
771	    .acdirmax = NFS_MAXDIRATTRTIMO,
772	};
773	int error = 0, ret, len;
774	struct sockaddr *nam = NULL;
775	struct vnode *vp;
776	struct thread *td;
777	char hst[MNAMELEN];
778	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
779	char *cp, *opt, *name, *secname;
780	int nametimeo = NFS_DEFAULT_NAMETIMEO;
781	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
782	int minvers = 0;
783	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
784	size_t hstlen;
785
786	has_nfs_args_opt = 0;
787	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
788		error = EINVAL;
789		goto out;
790	}
791
792	td = curthread;
793	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) {
794		error = nfs_mountroot(mp);
795		goto out;
796	}
797
798	nfscl_init();
799
800	/*
801	 * The old mount_nfs program passed the struct nfs_args
802	 * from userspace to kernel.  The new mount_nfs program
803	 * passes string options via nmount() from userspace to kernel
804	 * and we populate the struct nfs_args in the kernel.
805	 */
806	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
807		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
808		    sizeof(args));
809		if (error != 0)
810			goto out;
811
812		if (args.version != NFS_ARGSVERSION) {
813			error = EPROGMISMATCH;
814			goto out;
815		}
816		has_nfs_args_opt = 1;
817	}
818
819	/* Handle the new style options. */
820	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
821		args.acdirmin = args.acdirmax =
822		    args.acregmin = args.acregmax = 0;
823		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
824		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
825	}
826	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
827		args.flags |= NFSMNT_NOCONN;
828	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
829		args.flags &= ~NFSMNT_NOCONN;
830	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
831		args.flags |= NFSMNT_NOLOCKD;
832	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
833		args.flags &= ~NFSMNT_NOLOCKD;
834	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
835		args.flags |= NFSMNT_INT;
836	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
837		args.flags |= NFSMNT_RDIRPLUS;
838	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
839		args.flags |= NFSMNT_RESVPORT;
840	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
841		args.flags &= ~NFSMNT_RESVPORT;
842	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
843		args.flags |= NFSMNT_SOFT;
844	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
845		args.flags &= ~NFSMNT_SOFT;
846	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
847		args.sotype = SOCK_DGRAM;
848	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
849		args.sotype = SOCK_DGRAM;
850	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
851		args.sotype = SOCK_STREAM;
852	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
853		args.flags |= NFSMNT_NFSV3;
854	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
855		args.flags |= NFSMNT_NFSV4;
856		args.sotype = SOCK_STREAM;
857	}
858	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
859		args.flags |= NFSMNT_ALLGSSNAME;
860	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
861		args.flags |= NFSMNT_NOCTO;
862	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
863		args.flags |= NFSMNT_NONCONTIGWR;
864	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
865		args.flags |= NFSMNT_PNFS;
866	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
867		if (opt == NULL) {
868			vfs_mount_error(mp, "illegal readdirsize");
869			error = EINVAL;
870			goto out;
871		}
872		ret = sscanf(opt, "%d", &args.readdirsize);
873		if (ret != 1 || args.readdirsize <= 0) {
874			vfs_mount_error(mp, "illegal readdirsize: %s",
875			    opt);
876			error = EINVAL;
877			goto out;
878		}
879		args.flags |= NFSMNT_READDIRSIZE;
880	}
881	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
882		if (opt == NULL) {
883			vfs_mount_error(mp, "illegal readahead");
884			error = EINVAL;
885			goto out;
886		}
887		ret = sscanf(opt, "%d", &args.readahead);
888		if (ret != 1 || args.readahead <= 0) {
889			vfs_mount_error(mp, "illegal readahead: %s",
890			    opt);
891			error = EINVAL;
892			goto out;
893		}
894		args.flags |= NFSMNT_READAHEAD;
895	}
896	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
897		if (opt == NULL) {
898			vfs_mount_error(mp, "illegal wsize");
899			error = EINVAL;
900			goto out;
901		}
902		ret = sscanf(opt, "%d", &args.wsize);
903		if (ret != 1 || args.wsize <= 0) {
904			vfs_mount_error(mp, "illegal wsize: %s",
905			    opt);
906			error = EINVAL;
907			goto out;
908		}
909		args.flags |= NFSMNT_WSIZE;
910	}
911	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
912		if (opt == NULL) {
913			vfs_mount_error(mp, "illegal rsize");
914			error = EINVAL;
915			goto out;
916		}
917		ret = sscanf(opt, "%d", &args.rsize);
918		if (ret != 1 || args.rsize <= 0) {
919			vfs_mount_error(mp, "illegal wsize: %s",
920			    opt);
921			error = EINVAL;
922			goto out;
923		}
924		args.flags |= NFSMNT_RSIZE;
925	}
926	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
927		if (opt == NULL) {
928			vfs_mount_error(mp, "illegal retrans");
929			error = EINVAL;
930			goto out;
931		}
932		ret = sscanf(opt, "%d", &args.retrans);
933		if (ret != 1 || args.retrans <= 0) {
934			vfs_mount_error(mp, "illegal retrans: %s",
935			    opt);
936			error = EINVAL;
937			goto out;
938		}
939		args.flags |= NFSMNT_RETRANS;
940	}
941	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
942		ret = sscanf(opt, "%d", &args.acregmin);
943		if (ret != 1 || args.acregmin < 0) {
944			vfs_mount_error(mp, "illegal actimeo: %s",
945			    opt);
946			error = EINVAL;
947			goto out;
948		}
949		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
950		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
951		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
952	}
953	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
954		ret = sscanf(opt, "%d", &args.acregmin);
955		if (ret != 1 || args.acregmin < 0) {
956			vfs_mount_error(mp, "illegal acregmin: %s",
957			    opt);
958			error = EINVAL;
959			goto out;
960		}
961		args.flags |= NFSMNT_ACREGMIN;
962	}
963	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
964		ret = sscanf(opt, "%d", &args.acregmax);
965		if (ret != 1 || args.acregmax < 0) {
966			vfs_mount_error(mp, "illegal acregmax: %s",
967			    opt);
968			error = EINVAL;
969			goto out;
970		}
971		args.flags |= NFSMNT_ACREGMAX;
972	}
973	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
974		ret = sscanf(opt, "%d", &args.acdirmin);
975		if (ret != 1 || args.acdirmin < 0) {
976			vfs_mount_error(mp, "illegal acdirmin: %s",
977			    opt);
978			error = EINVAL;
979			goto out;
980		}
981		args.flags |= NFSMNT_ACDIRMIN;
982	}
983	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
984		ret = sscanf(opt, "%d", &args.acdirmax);
985		if (ret != 1 || args.acdirmax < 0) {
986			vfs_mount_error(mp, "illegal acdirmax: %s",
987			    opt);
988			error = EINVAL;
989			goto out;
990		}
991		args.flags |= NFSMNT_ACDIRMAX;
992	}
993	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
994		ret = sscanf(opt, "%d", &args.wcommitsize);
995		if (ret != 1 || args.wcommitsize < 0) {
996			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
997			error = EINVAL;
998			goto out;
999		}
1000		args.flags |= NFSMNT_WCOMMITSIZE;
1001	}
1002	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1003		ret = sscanf(opt, "%d", &args.timeo);
1004		if (ret != 1 || args.timeo <= 0) {
1005			vfs_mount_error(mp, "illegal timeo: %s",
1006			    opt);
1007			error = EINVAL;
1008			goto out;
1009		}
1010		args.flags |= NFSMNT_TIMEO;
1011	}
1012	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1013		ret = sscanf(opt, "%d", &args.timeo);
1014		if (ret != 1 || args.timeo <= 0) {
1015			vfs_mount_error(mp, "illegal timeout: %s",
1016			    opt);
1017			error = EINVAL;
1018			goto out;
1019		}
1020		args.flags |= NFSMNT_TIMEO;
1021	}
1022	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1023		ret = sscanf(opt, "%d", &nametimeo);
1024		if (ret != 1 || nametimeo < 0) {
1025			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1026			error = EINVAL;
1027			goto out;
1028		}
1029	}
1030	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1031	    == 0) {
1032		ret = sscanf(opt, "%d", &negnametimeo);
1033		if (ret != 1 || negnametimeo < 0) {
1034			vfs_mount_error(mp, "illegal negnametimeo: %s",
1035			    opt);
1036			error = EINVAL;
1037			goto out;
1038		}
1039	}
1040	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1041	    0) {
1042		ret = sscanf(opt, "%d", &minvers);
1043		if (ret != 1 || minvers < 0 || minvers > 1 ||
1044		    (args.flags & NFSMNT_NFSV4) == 0) {
1045			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1046			error = EINVAL;
1047			goto out;
1048		}
1049	}
1050	if (vfs_getopt(mp->mnt_optnew, "sec",
1051		(void **) &secname, NULL) == 0)
1052		nfs_sec_name(secname, &args.flags);
1053
1054	if (mp->mnt_flag & MNT_UPDATE) {
1055		struct nfsmount *nmp = VFSTONFS(mp);
1056
1057		if (nmp == NULL) {
1058			error = EIO;
1059			goto out;
1060		}
1061
1062		/*
1063		 * If a change from TCP->UDP is done and there are thread(s)
1064		 * that have I/O RPC(s) in progress with a tranfer size
1065		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1066		 * hung, retrying the RPC(s) forever. Usually these threads
1067		 * will be seen doing an uninterruptible sleep on wait channel
1068		 * "newnfsreq" (truncated to "newnfsre" by procstat).
1069		 */
1070		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1071			tprintf(td->td_proc, LOG_WARNING,
1072	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1073
1074		/*
1075		 * When doing an update, we can't change version,
1076		 * security, switch lockd strategies or change cookie
1077		 * translation
1078		 */
1079		args.flags = (args.flags &
1080		    ~(NFSMNT_NFSV3 |
1081		      NFSMNT_NFSV4 |
1082		      NFSMNT_KERB |
1083		      NFSMNT_INTEGRITY |
1084		      NFSMNT_PRIVACY |
1085		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1086		    (nmp->nm_flag &
1087			(NFSMNT_NFSV3 |
1088			 NFSMNT_NFSV4 |
1089			 NFSMNT_KERB |
1090			 NFSMNT_INTEGRITY |
1091			 NFSMNT_PRIVACY |
1092			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1093		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1094		goto out;
1095	}
1096
1097	/*
1098	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1099	 * or no-connection mode for those protocols that support
1100	 * no-connection mode (the flag will be cleared later for protocols
1101	 * that do not support no-connection mode).  This will allow a client
1102	 * to receive replies from a different IP then the request was
1103	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1104	 * not 0.
1105	 */
1106	if (nfs_ip_paranoia == 0)
1107		args.flags |= NFSMNT_NOCONN;
1108
1109	if (has_nfs_args_opt != 0) {
1110		/*
1111		 * In the 'nfs_args' case, the pointers in the args
1112		 * structure are in userland - we copy them in here.
1113		 */
1114		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1115			vfs_mount_error(mp, "Bad file handle");
1116			error = EINVAL;
1117			goto out;
1118		}
1119		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1120		    args.fhsize);
1121		if (error != 0)
1122			goto out;
1123		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1124		if (error != 0)
1125			goto out;
1126		bzero(&hst[hstlen], MNAMELEN - hstlen);
1127		args.hostname = hst;
1128		/* sockargs() call must be after above copyin() calls */
1129		error = getsockaddr(&nam, (caddr_t)args.addr,
1130		    args.addrlen);
1131		if (error != 0)
1132			goto out;
1133	} else {
1134		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1135		    &args.fhsize) == 0) {
1136			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1137				vfs_mount_error(mp, "Bad file handle");
1138				error = EINVAL;
1139				goto out;
1140			}
1141			bcopy(args.fh, nfh, args.fhsize);
1142		} else {
1143			args.fhsize = 0;
1144		}
1145		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1146		    (void **)&args.hostname, &len);
1147		if (args.hostname == NULL) {
1148			vfs_mount_error(mp, "Invalid hostname");
1149			error = EINVAL;
1150			goto out;
1151		}
1152		bcopy(args.hostname, hst, MNAMELEN);
1153		hst[MNAMELEN - 1] = '\0';
1154	}
1155
1156	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1157		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1158	else {
1159		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1160		cp = strchr(srvkrbname, ':');
1161		if (cp != NULL)
1162			*cp = '\0';
1163	}
1164	srvkrbnamelen = strlen(srvkrbname);
1165
1166	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1167		strlcpy(krbname, name, sizeof (krbname));
1168	else
1169		krbname[0] = '\0';
1170	krbnamelen = strlen(krbname);
1171
1172	if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0)
1173		strlcpy(dirpath, name, sizeof (dirpath));
1174	else
1175		dirpath[0] = '\0';
1176	dirlen = strlen(dirpath);
1177
1178	if (has_nfs_args_opt == 0) {
1179		if (vfs_getopt(mp->mnt_optnew, "addr",
1180		    (void **)&args.addr, &args.addrlen) == 0) {
1181			if (args.addrlen > SOCK_MAXADDRLEN) {
1182				error = ENAMETOOLONG;
1183				goto out;
1184			}
1185			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1186			bcopy(args.addr, nam, args.addrlen);
1187			nam->sa_len = args.addrlen;
1188		} else {
1189			vfs_mount_error(mp, "No server address");
1190			error = EINVAL;
1191			goto out;
1192		}
1193	}
1194
1195	args.fh = nfh;
1196	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1197	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1198	    nametimeo, negnametimeo, minvers);
1199out:
1200	if (!error) {
1201		MNT_ILOCK(mp);
1202		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1203		    MNTK_USES_BCACHE;
1204		MNT_IUNLOCK(mp);
1205	}
1206	return (error);
1207}
1208
1209
1210/*
1211 * VFS Operations.
1212 *
1213 * mount system call
1214 * It seems a bit dumb to copyinstr() the host and path here and then
1215 * bcopy() them in mountnfs(), but I wanted to detect errors before
1216 * doing the sockargs() call because sockargs() allocates an mbuf and
1217 * an error after that means that I have to release the mbuf.
1218 */
1219/* ARGSUSED */
1220static int
1221nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1222{
1223	int error;
1224	struct nfs_args args;
1225
1226	error = copyin(data, &args, sizeof (struct nfs_args));
1227	if (error)
1228		return error;
1229
1230	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1231
1232	error = kernel_mount(ma, flags);
1233	return (error);
1234}
1235
1236/*
1237 * Common code for mount and mountroot
1238 */
1239static int
1240mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1241    char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1242    u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1243    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1244    int minvers)
1245{
1246	struct nfsmount *nmp;
1247	struct nfsnode *np;
1248	int error, trycnt, ret;
1249	struct nfsvattr nfsva;
1250	struct nfsclclient *clp;
1251	struct nfsclds *dsp, *tdsp;
1252	uint32_t lease;
1253	static u_int64_t clval = 0;
1254
1255	NFSCL_DEBUG(3, "in mnt\n");
1256	clp = NULL;
1257	if (mp->mnt_flag & MNT_UPDATE) {
1258		nmp = VFSTONFS(mp);
1259		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1260		FREE(nam, M_SONAME);
1261		return (0);
1262	} else {
1263		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1264		    krbnamelen + dirlen + srvkrbnamelen + 2,
1265		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1266		TAILQ_INIT(&nmp->nm_bufq);
1267		if (clval == 0)
1268			clval = (u_int64_t)nfsboottime.tv_sec;
1269		nmp->nm_clval = clval++;
1270		nmp->nm_krbnamelen = krbnamelen;
1271		nmp->nm_dirpathlen = dirlen;
1272		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1273		if (td->td_ucred->cr_uid != (uid_t)0) {
1274			/*
1275			 * nm_uid is used to get KerberosV credentials for
1276			 * the nfsv4 state handling operations if there is
1277			 * no host based principal set. Use the uid of
1278			 * this user if not root, since they are doing the
1279			 * mount. I don't think setting this for root will
1280			 * work, since root normally does not have user
1281			 * credentials in a credentials cache.
1282			 */
1283			nmp->nm_uid = td->td_ucred->cr_uid;
1284		} else {
1285			/*
1286			 * Just set to -1, so it won't be used.
1287			 */
1288			nmp->nm_uid = (uid_t)-1;
1289		}
1290
1291		/* Copy and null terminate all the names */
1292		if (nmp->nm_krbnamelen > 0) {
1293			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1294			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1295		}
1296		if (nmp->nm_dirpathlen > 0) {
1297			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1298			    nmp->nm_dirpathlen);
1299			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1300			    + 1] = '\0';
1301		}
1302		if (nmp->nm_srvkrbnamelen > 0) {
1303			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1304			    nmp->nm_srvkrbnamelen);
1305			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1306			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1307		}
1308		nmp->nm_sockreq.nr_cred = crhold(cred);
1309		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1310		mp->mnt_data = nmp;
1311		nmp->nm_getinfo = nfs_getnlminfo;
1312		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1313	}
1314	vfs_getnewfsid(mp);
1315	nmp->nm_mountp = mp;
1316	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1317
1318	/*
1319	 * Since nfs_decode_args() might optionally set them, these
1320	 * need to be set to defaults before the call, so that the
1321	 * optional settings aren't overwritten.
1322	 */
1323	nmp->nm_nametimeo = nametimeo;
1324	nmp->nm_negnametimeo = negnametimeo;
1325	nmp->nm_timeo = NFS_TIMEO;
1326	nmp->nm_retry = NFS_RETRANS;
1327	nmp->nm_readahead = NFS_DEFRAHEAD;
1328
1329	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1330	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1331	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1332		nmp->nm_wcommitsize *= 2;
1333	nmp->nm_wcommitsize *= 256;
1334
1335	if ((argp->flags & NFSMNT_NFSV4) != 0)
1336		nmp->nm_minorvers = minvers;
1337	else
1338		nmp->nm_minorvers = 0;
1339
1340	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1341
1342	/*
1343	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1344	 * high, depending on whether we end up with negative offsets in
1345	 * the client or server somewhere.  2GB-1 may be safer.
1346	 *
1347	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1348	 * that we can handle until we find out otherwise.
1349	 */
1350	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1351		nmp->nm_maxfilesize = 0xffffffffLL;
1352	else
1353		nmp->nm_maxfilesize = OFF_MAX;
1354
1355	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1356		nmp->nm_wsize = NFS_WSIZE;
1357		nmp->nm_rsize = NFS_RSIZE;
1358		nmp->nm_readdirsize = NFS_READDIRSIZE;
1359	}
1360	nmp->nm_numgrps = NFS_MAXGRPS;
1361	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1362	if (nmp->nm_tprintf_delay < 0)
1363		nmp->nm_tprintf_delay = 0;
1364	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1365	if (nmp->nm_tprintf_initial_delay < 0)
1366		nmp->nm_tprintf_initial_delay = 0;
1367	nmp->nm_fhsize = argp->fhsize;
1368	if (nmp->nm_fhsize > 0)
1369		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1370	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1371	nmp->nm_nam = nam;
1372	/* Set up the sockets and per-host congestion */
1373	nmp->nm_sotype = argp->sotype;
1374	nmp->nm_soproto = argp->proto;
1375	nmp->nm_sockreq.nr_prog = NFS_PROG;
1376	if ((argp->flags & NFSMNT_NFSV4))
1377		nmp->nm_sockreq.nr_vers = NFS_VER4;
1378	else if ((argp->flags & NFSMNT_NFSV3))
1379		nmp->nm_sockreq.nr_vers = NFS_VER3;
1380	else
1381		nmp->nm_sockreq.nr_vers = NFS_VER2;
1382
1383
1384	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1385		goto bad;
1386	/* For NFSv4.1, get the clientid now. */
1387	if (nmp->nm_minorvers > 0) {
1388		NFSCL_DEBUG(3, "at getcl\n");
1389		error = nfscl_getcl(mp, cred, td, 0, &clp);
1390		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1391		if (error != 0)
1392			goto bad;
1393	}
1394
1395	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1396	    nmp->nm_dirpathlen > 0) {
1397		NFSCL_DEBUG(3, "in dirp\n");
1398		/*
1399		 * If the fhsize on the mount point == 0 for V4, the mount
1400		 * path needs to be looked up.
1401		 */
1402		trycnt = 3;
1403		do {
1404			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1405			    cred, td);
1406			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1407			if (error)
1408				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1409		} while (error && --trycnt > 0);
1410		if (error) {
1411			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1412			goto bad;
1413		}
1414	}
1415
1416	/*
1417	 * A reference count is needed on the nfsnode representing the
1418	 * remote root.  If this object is not persistent, then backward
1419	 * traversals of the mount point (i.e. "..") will not work if
1420	 * the nfsnode gets flushed out of the cache. Ufs does not have
1421	 * this problem, because one can identify root inodes by their
1422	 * number == ROOTINO (2).
1423	 */
1424	if (nmp->nm_fhsize > 0) {
1425		/*
1426		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1427		 * non-zero for the root vnode. f_iosize will be set correctly
1428		 * by nfs_statfs() before any I/O occurs.
1429		 */
1430		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1431		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1432		    LK_EXCLUSIVE);
1433		if (error)
1434			goto bad;
1435		*vpp = NFSTOV(np);
1436
1437		/*
1438		 * Get file attributes and transfer parameters for the
1439		 * mountpoint.  This has the side effect of filling in
1440		 * (*vpp)->v_type with the correct value.
1441		 */
1442		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1443		    cred, td, &nfsva, NULL, &lease);
1444		if (ret) {
1445			/*
1446			 * Just set default values to get things going.
1447			 */
1448			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1449			nfsva.na_vattr.va_type = VDIR;
1450			nfsva.na_vattr.va_mode = 0777;
1451			nfsva.na_vattr.va_nlink = 100;
1452			nfsva.na_vattr.va_uid = (uid_t)0;
1453			nfsva.na_vattr.va_gid = (gid_t)0;
1454			nfsva.na_vattr.va_fileid = 2;
1455			nfsva.na_vattr.va_gen = 1;
1456			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1457			nfsva.na_vattr.va_size = 512 * 1024;
1458			lease = 60;
1459		}
1460		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1461		if (nmp->nm_minorvers > 0) {
1462			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1463			NFSLOCKCLSTATE();
1464			clp->nfsc_renew = NFSCL_RENEW(lease);
1465			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1466			clp->nfsc_clientidrev++;
1467			if (clp->nfsc_clientidrev == 0)
1468				clp->nfsc_clientidrev++;
1469			NFSUNLOCKCLSTATE();
1470			/*
1471			 * Mount will succeed, so the renew thread can be
1472			 * started now.
1473			 */
1474			nfscl_start_renewthread(clp);
1475			nfscl_clientrelease(clp);
1476		}
1477		if (argp->flags & NFSMNT_NFSV3)
1478			ncl_fsinfo(nmp, *vpp, cred, td);
1479
1480		/* Mark if the mount point supports NFSv4 ACLs. */
1481		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1482		    ret == 0 &&
1483		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1484			MNT_ILOCK(mp);
1485			mp->mnt_flag |= MNT_NFS4ACLS;
1486			MNT_IUNLOCK(mp);
1487		}
1488
1489		/*
1490		 * Lose the lock but keep the ref.
1491		 */
1492		NFSVOPUNLOCK(*vpp, 0);
1493		return (0);
1494	}
1495	error = EIO;
1496
1497bad:
1498	if (clp != NULL)
1499		nfscl_clientrelease(clp);
1500	newnfs_disconnect(&nmp->nm_sockreq);
1501	crfree(nmp->nm_sockreq.nr_cred);
1502	if (nmp->nm_sockreq.nr_auth != NULL)
1503		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1504	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1505	mtx_destroy(&nmp->nm_mtx);
1506	if (nmp->nm_clp != NULL) {
1507		NFSLOCKCLSTATE();
1508		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1509		NFSUNLOCKCLSTATE();
1510		free(nmp->nm_clp, M_NFSCLCLIENT);
1511	}
1512	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1513		nfscl_freenfsclds(dsp);
1514	FREE(nmp, M_NEWNFSMNT);
1515	FREE(nam, M_SONAME);
1516	return (error);
1517}
1518
1519/*
1520 * unmount system call
1521 */
1522static int
1523nfs_unmount(struct mount *mp, int mntflags)
1524{
1525	struct thread *td;
1526	struct nfsmount *nmp;
1527	int error, flags = 0, i, trycnt = 0;
1528	struct nfsclds *dsp, *tdsp;
1529
1530	td = curthread;
1531
1532	if (mntflags & MNT_FORCE)
1533		flags |= FORCECLOSE;
1534	nmp = VFSTONFS(mp);
1535	/*
1536	 * Goes something like this..
1537	 * - Call vflush() to clear out vnodes for this filesystem
1538	 * - Close the socket
1539	 * - Free up the data structures
1540	 */
1541	/* In the forced case, cancel any outstanding requests. */
1542	if (mntflags & MNT_FORCE) {
1543		error = newnfs_nmcancelreqs(nmp);
1544		if (error)
1545			goto out;
1546		/* For a forced close, get rid of the renew thread now */
1547		nfscl_umount(nmp, td);
1548	}
1549	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1550	do {
1551		error = vflush(mp, 1, flags, td);
1552		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1553			(void) nfs_catnap(PSOCK, error, "newndm");
1554	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1555	if (error)
1556		goto out;
1557
1558	/*
1559	 * We are now committed to the unmount.
1560	 */
1561	if ((mntflags & MNT_FORCE) == 0)
1562		nfscl_umount(nmp, td);
1563	/* Make sure no nfsiods are assigned to this mount. */
1564	mtx_lock(&ncl_iod_mutex);
1565	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1566		if (ncl_iodmount[i] == nmp) {
1567			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1568			ncl_iodmount[i] = NULL;
1569		}
1570	mtx_unlock(&ncl_iod_mutex);
1571	newnfs_disconnect(&nmp->nm_sockreq);
1572	crfree(nmp->nm_sockreq.nr_cred);
1573	FREE(nmp->nm_nam, M_SONAME);
1574	if (nmp->nm_sockreq.nr_auth != NULL)
1575		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1576	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1577	mtx_destroy(&nmp->nm_mtx);
1578	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1579		nfscl_freenfsclds(dsp);
1580	FREE(nmp, M_NEWNFSMNT);
1581out:
1582	return (error);
1583}
1584
1585/*
1586 * Return root of a filesystem
1587 */
1588static int
1589nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1590{
1591	struct vnode *vp;
1592	struct nfsmount *nmp;
1593	struct nfsnode *np;
1594	int error;
1595
1596	nmp = VFSTONFS(mp);
1597	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1598	if (error)
1599		return error;
1600	vp = NFSTOV(np);
1601	/*
1602	 * Get transfer parameters and attributes for root vnode once.
1603	 */
1604	mtx_lock(&nmp->nm_mtx);
1605	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1606		mtx_unlock(&nmp->nm_mtx);
1607		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1608	} else
1609		mtx_unlock(&nmp->nm_mtx);
1610	if (vp->v_type == VNON)
1611	    vp->v_type = VDIR;
1612	vp->v_vflag |= VV_ROOT;
1613	*vpp = vp;
1614	return (0);
1615}
1616
1617/*
1618 * Flush out the buffer cache
1619 */
1620/* ARGSUSED */
1621static int
1622nfs_sync(struct mount *mp, int waitfor)
1623{
1624	struct vnode *vp, *mvp;
1625	struct thread *td;
1626	int error, allerror = 0;
1627
1628	td = curthread;
1629
1630	MNT_ILOCK(mp);
1631	/*
1632	 * If a forced dismount is in progress, return from here so that
1633	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1634	 * calling VFS_UNMOUNT().
1635	 */
1636	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1637		MNT_IUNLOCK(mp);
1638		return (EBADF);
1639	}
1640	MNT_IUNLOCK(mp);
1641
1642	/*
1643	 * Force stale buffer cache information to be flushed.
1644	 */
1645loop:
1646	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1647		/* XXX Racy bv_cnt check. */
1648		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1649		    waitfor == MNT_LAZY) {
1650			VI_UNLOCK(vp);
1651			continue;
1652		}
1653		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1654			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1655			goto loop;
1656		}
1657		error = VOP_FSYNC(vp, waitfor, td);
1658		if (error)
1659			allerror = error;
1660		NFSVOPUNLOCK(vp, 0);
1661		vrele(vp);
1662	}
1663	return (allerror);
1664}
1665
1666static int
1667nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1668{
1669	struct nfsmount *nmp = VFSTONFS(mp);
1670	struct vfsquery vq;
1671	int error;
1672
1673	bzero(&vq, sizeof(vq));
1674	switch (op) {
1675#if 0
1676	case VFS_CTL_NOLOCKS:
1677		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1678 		if (req->oldptr != NULL) {
1679 			error = SYSCTL_OUT(req, &val, sizeof(val));
1680 			if (error)
1681 				return (error);
1682 		}
1683 		if (req->newptr != NULL) {
1684 			error = SYSCTL_IN(req, &val, sizeof(val));
1685 			if (error)
1686 				return (error);
1687			if (val)
1688				nmp->nm_flag |= NFSMNT_NOLOCKS;
1689			else
1690				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1691 		}
1692		break;
1693#endif
1694	case VFS_CTL_QUERY:
1695		mtx_lock(&nmp->nm_mtx);
1696		if (nmp->nm_state & NFSSTA_TIMEO)
1697			vq.vq_flags |= VQ_NOTRESP;
1698		mtx_unlock(&nmp->nm_mtx);
1699#if 0
1700		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1701		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1702			vq.vq_flags |= VQ_NOTRESPLOCK;
1703#endif
1704		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1705		break;
1706 	case VFS_CTL_TIMEO:
1707 		if (req->oldptr != NULL) {
1708 			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1709 			    sizeof(nmp->nm_tprintf_initial_delay));
1710 			if (error)
1711 				return (error);
1712 		}
1713 		if (req->newptr != NULL) {
1714			error = vfs_suser(mp, req->td);
1715			if (error)
1716				return (error);
1717 			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1718 			    sizeof(nmp->nm_tprintf_initial_delay));
1719 			if (error)
1720 				return (error);
1721 			if (nmp->nm_tprintf_initial_delay < 0)
1722 				nmp->nm_tprintf_initial_delay = 0;
1723 		}
1724		break;
1725	default:
1726		return (ENOTSUP);
1727	}
1728	return (0);
1729}
1730
1731/*
1732 * Purge any RPCs in progress, so that they will all return errors.
1733 * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1734 * forced dismount.
1735 */
1736static void
1737nfs_purge(struct mount *mp)
1738{
1739	struct nfsmount *nmp = VFSTONFS(mp);
1740
1741	newnfs_nmcancelreqs(nmp);
1742}
1743
1744/*
1745 * Extract the information needed by the nlm from the nfs vnode.
1746 */
1747static void
1748nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1749    struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1750    struct timeval *timeop)
1751{
1752	struct nfsmount *nmp;
1753	struct nfsnode *np = VTONFS(vp);
1754
1755	nmp = VFSTONFS(vp->v_mount);
1756	if (fhlenp != NULL)
1757		*fhlenp = (size_t)np->n_fhp->nfh_len;
1758	if (fhp != NULL)
1759		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1760	if (sp != NULL)
1761		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1762	if (is_v3p != NULL)
1763		*is_v3p = NFS_ISV3(vp);
1764	if (sizep != NULL)
1765		*sizep = np->n_size;
1766	if (timeop != NULL) {
1767		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1768		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1769	}
1770}
1771
1772/*
1773 * This function prints out an option name, based on the conditional
1774 * argument.
1775 */
1776static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1777    char *opt, char **buf, size_t *blen)
1778{
1779	int len;
1780
1781	if (testval != 0 && *blen > strlen(opt)) {
1782		len = snprintf(*buf, *blen, "%s", opt);
1783		if (len != strlen(opt))
1784			printf("EEK!!\n");
1785		*buf += len;
1786		*blen -= len;
1787	}
1788}
1789
1790/*
1791 * This function printf out an options integer value.
1792 */
1793static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1794    char *opt, char **buf, size_t *blen)
1795{
1796	int len;
1797
1798	if (*blen > strlen(opt) + 1) {
1799		/* Could result in truncated output string. */
1800		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1801		if (len < *blen) {
1802			*buf += len;
1803			*blen -= len;
1804		}
1805	}
1806}
1807
1808/*
1809 * Load the option flags and values into the buffer.
1810 */
1811void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1812{
1813	char *buf;
1814	size_t blen;
1815
1816	buf = buffer;
1817	blen = buflen;
1818	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1819	    &blen);
1820	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1821		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1822		    &blen);
1823		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1824		    &buf, &blen);
1825	}
1826	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1827	    &blen);
1828	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1829	    "nfsv2", &buf, &blen);
1830	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1831	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1832	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1833	    &buf, &blen);
1834	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1835	    &buf, &blen);
1836	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1837	    &blen);
1838	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1839	    &blen);
1840	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1841	    &blen);
1842	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1843	    &blen);
1844	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1845	    &blen);
1846	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1847	    ",noncontigwr", &buf, &blen);
1848	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1849	    0, ",lockd", &buf, &blen);
1850	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1851	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1852	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1853	    &buf, &blen);
1854	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1855	    &buf, &blen);
1856	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1857	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1858	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1859	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1860	    &buf, &blen);
1861	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1862	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1863	    &buf, &blen);
1864	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1865	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1866	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1867	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1868	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1869	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1870	    &blen);
1871	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1872	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1873	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1874	    &blen);
1875	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1876	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1877	    &blen);
1878	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
1879	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
1880}
1881
1882