1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Rick Macklem, University of Guelph
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30#include <sys/cdefs.h>
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#include <sys/extattr.h>
34#include <fs/nfs/nfsport.h>
35
36int nfsrv_issuedelegs = 0;
37int nfsrv_dolocallocks = 0;
38struct nfsv4lock nfsv4rootfs_lock;
39time_t nfsdev_time = 0;
40int nfsrv_layouthashsize;
41volatile int nfsrv_layoutcnt = 0;
42
43NFSD_VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst);
44
45NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
46NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
47
48extern uint32_t nfs_srvmaxio;
49extern int nfsrv_lease;
50extern struct timeval nfsboottime;
51extern u_int32_t newnfs_true, newnfs_false;
52extern struct mtx nfsrv_dslock_mtx;
53extern struct mtx nfsrv_recalllock_mtx;
54extern struct mtx nfsrv_dontlistlock_mtx;
55extern int nfsd_debuglevel;
56extern u_int nfsrv_dsdirsize;
57extern struct nfsdevicehead nfsrv_devidhead;
58extern int nfsrv_doflexfile;
59extern int nfsrv_maxpnfsmirror;
60NFSV4ROOTLOCKMUTEX;
61NFSSTATESPINLOCK;
62extern struct nfsdontlisthead nfsrv_dontlisthead;
63extern volatile int nfsrv_devidcnt;
64extern struct nfslayouthead nfsrv_recalllisthead;
65extern char *nfsrv_zeropnfsdat;
66
67SYSCTL_DECL(_vfs_nfsd);
68int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
69SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
70    &nfsrv_statehashsize, 0,
71    "Size of state hash table set via loader.conf");
72
73int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
74SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
75    &nfsrv_clienthashsize, 0,
76    "Size of client hash table set via loader.conf");
77
78int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
79SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
80    &nfsrv_lockhashsize, 0,
81    "Size of file handle hash table set via loader.conf");
82
83int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
84SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
85    &nfsrv_sessionhashsize, 0,
86    "Size of session hash table set via loader.conf");
87
88int	nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
89SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
90    &nfsrv_layouthighwater, 0,
91    "High water mark for number of layouts set via loader.conf");
92
93static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
94SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
95    &nfsrv_v4statelimit, 0,
96    "High water limit for NFSv4 opens+locks+delegations");
97
98static int	nfsrv_writedelegifpos = 0;
99SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
100    &nfsrv_writedelegifpos, 0,
101    "Issue a write delegation for read opens if possible");
102
103static int	nfsrv_allowreadforwriteopen = 1;
104SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
105    &nfsrv_allowreadforwriteopen, 0,
106    "Allow Reads to be done with Write Access StateIDs");
107
108int	nfsrv_pnfsatime = 0;
109SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
110    &nfsrv_pnfsatime, 0,
111    "For pNFS service, do Getattr ops to keep atime up-to-date");
112
113int	nfsrv_flexlinuxhack = 0;
114SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
115    &nfsrv_flexlinuxhack, 0,
116    "For Linux clients, hack around Flex File Layout bug");
117
118/*
119 * Hash lists for nfs V4.
120 */
121NFSD_VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash);
122NFSD_VNET_DEFINE(struct nfslockhashhead *, nfslockhash);
123NFSD_VNET_DEFINE(struct nfssessionhash *, nfssessionhash);
124
125struct nfslayouthash		*nfslayouthash;
126volatile int nfsrv_dontlistlen = 0;
127
128static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
129static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
130static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
131static int nfsrv_nogsscallback = 0;
132static volatile int nfsrv_writedelegcnt = 0;
133static int nfsrv_faildscnt;
134
135NFSD_VNET_DEFINE_STATIC(time_t, nfsrvboottime);
136
137/* local functions */
138static void nfsrv_dumpaclient(struct nfsclient *clp,
139    struct nfsd_dumpclients *dumpp);
140static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
141    NFSPROC_T *p);
142static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
143    NFSPROC_T *p);
144static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
145    NFSPROC_T *p);
146static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
147    int cansleep, NFSPROC_T *p);
148static void nfsrv_freenfslock(struct nfslock *lop);
149static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
150static void nfsrv_freedeleg(struct nfsstate *);
151static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
152    u_int32_t flags, struct nfsstate **stpp);
153static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
154    struct nfsstate **stpp);
155static int nfsrv_getlockfh(vnode_t vp, u_short flags,
156    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
157static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
158    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
159static void nfsrv_insertlock(struct nfslock *new_lop,
160    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
161static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
162    struct nfslock **other_lopp, struct nfslockfile *lfp);
163static int nfsrv_getipnumber(u_char *cp);
164static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
165    nfsv4stateid_t *stateidp, int specialid);
166static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
167    u_int32_t flags);
168static int nfsrv_docallback(struct nfsclient *clp, int procnum,
169    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
170    struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
171static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
172    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
173    int *slotposp);
174static u_int32_t nfsrv_nextclientindex(void);
175static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
176static void nfsrv_markstable(struct nfsclient *clp);
177static void nfsrv_markreclaim(struct nfsclient *clp);
178static int nfsrv_checkstable(struct nfsclient *clp);
179static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
180    vnode *vp, NFSPROC_T *p);
181static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
182    NFSPROC_T *p, vnode_t vp);
183static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
184    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
185static int nfsrv_notsamecredname(int op, struct nfsrv_descript *nd,
186    struct nfsclient *clp);
187static time_t nfsrv_leaseexpiry(void);
188static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
189static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
190    struct nfsstate *stp, struct nfsrvcache *op);
191static int nfsrv_nootherstate(struct nfsstate *stp);
192static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
193    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
194static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
195    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
196static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
197    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
198    NFSPROC_T *p);
199static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
200    NFSPROC_T *p);
201static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
202    uint64_t first, uint64_t end);
203static void nfsrv_locklf(struct nfslockfile *lfp);
204static void nfsrv_unlocklf(struct nfslockfile *lfp);
205static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
206static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
207    uint8_t *sessionid);
208static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
209    int dont_replycache, struct nfsdsession **sepp, int *slotposp);
210static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
211static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
212    nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
213static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
214static void nfsrv_freelayoutlist(nfsquad_t clientid);
215static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
216    int iomode);
217static void nfsrv_freealllayouts(void);
218static void nfsrv_freedevid(struct nfsdevice *ds);
219static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
220    struct nfsdevice **dsp);
221static void nfsrv_deleteds(struct nfsdevice *fndds);
222static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
223static void nfsrv_freealldevids(void);
224static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
225    int maxcnt, NFSPROC_T *p);
226static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
227    fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
228    NFSPROC_T *p);
229static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
230    NFSPROC_T *, struct nfslayout **lypp);
231static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
232static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
233    fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
234static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
235    int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
236static int nfsrv_dontlayout(fhandle_t *fhp);
237static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
238    vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
239    vnode_t *tvpp);
240static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
241static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
242    struct nfsclient *clp);
243
244/*
245 * Scan the client list for a match and either return the current one,
246 * create a new entry or return an error.
247 * If returning a non-error, the clp structure must either be linked into
248 * the client list or free'd.
249 */
250int
251nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
252    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
253{
254	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
255	int i, error = 0, ret;
256	struct nfsstate *stp, *tstp;
257#ifdef INET
258	struct sockaddr_in *sin, *rin;
259#endif
260#ifdef INET6
261	struct sockaddr_in6 *sin6, *rin6;
262#endif
263	struct nfsdsession *sep, *nsep;
264	int zapit = 0, gotit, hasstate = 0, igotlock;
265	static u_int64_t confirm_index = 0;
266
267	/*
268	 * Check for state resource limit exceeded.
269	 */
270	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
271		error = NFSERR_RESOURCE;
272		goto out;
273	}
274
275	if (nfsrv_issuedelegs == 0 ||
276	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
277		/*
278		 * Don't do callbacks when delegations are disabled or
279		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
280		 * If establishing a callback connection is attempted
281		 * when a firewall is blocking the callback path, the
282		 * server may wait too long for the connect attempt to
283		 * succeed during the Open. Some clients, such as Linux,
284		 * may timeout and give up on the Open before the server
285		 * replies. Also, since AUTH_GSS callbacks are not
286		 * yet interoperability tested, they might cause the
287		 * server to crap out, if they get past the Init call to
288		 * the client.
289		 */
290		new_clp->lc_program = 0;
291
292	/* Lock out other nfsd threads */
293	NFSLOCKV4ROOTMUTEX();
294	nfsv4_relref(&nfsv4rootfs_lock);
295	do {
296		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
297		    NFSV4ROOTLOCKMUTEXPTR, NULL);
298	} while (!igotlock);
299	NFSUNLOCKV4ROOTMUTEX();
300
301	/*
302	 * Search for a match in the client list.
303	 */
304	gotit = i = 0;
305	while (i < nfsrv_clienthashsize && !gotit) {
306	    LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
307		if (new_clp->lc_idlen == clp->lc_idlen &&
308		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
309			gotit = 1;
310			break;
311		}
312	    }
313	    if (gotit == 0)
314		i++;
315	}
316	if (!gotit ||
317	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
318		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
319			/*
320			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
321			 * client is trying to update a confirmed clientid.
322			 */
323			NFSLOCKV4ROOTMUTEX();
324			nfsv4_unlock(&nfsv4rootfs_lock, 1);
325			NFSUNLOCKV4ROOTMUTEX();
326			confirmp->lval[1] = 0;
327			error = NFSERR_NOENT;
328			goto out;
329		}
330		/*
331		 * Get rid of the old one.
332		 */
333		if (i != nfsrv_clienthashsize) {
334			LIST_REMOVE(clp, lc_hash);
335			nfsrv_cleanclient(clp, p);
336			nfsrv_freedeleglist(&clp->lc_deleg);
337			nfsrv_freedeleglist(&clp->lc_olddeleg);
338			zapit = 1;
339		}
340		/*
341		 * Add it after assigning a client id to it.
342		 */
343		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
344		if ((nd->nd_flag & ND_NFSV41) != 0) {
345			confirmp->lval[0] = ++confirm_index;
346			new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
347		} else
348			confirmp->qval = new_clp->lc_confirm.qval =
349			    ++confirm_index;
350		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
351		    NFSD_VNET(nfsrvboottime);
352		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
353		    nfsrv_nextclientindex();
354		new_clp->lc_stateindex = 0;
355		new_clp->lc_statemaxindex = 0;
356		new_clp->lc_prevsess = 0;
357		new_clp->lc_cbref = 0;
358		new_clp->lc_expiry = nfsrv_leaseexpiry();
359		LIST_INIT(&new_clp->lc_open);
360		LIST_INIT(&new_clp->lc_deleg);
361		LIST_INIT(&new_clp->lc_olddeleg);
362		LIST_INIT(&new_clp->lc_session);
363		for (i = 0; i < nfsrv_statehashsize; i++)
364			LIST_INIT(&new_clp->lc_stateid[i]);
365		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
366		    lc_hash);
367		NFSD_VNET(nfsstatsv1_p)->srvclients++;
368		nfsrv_openpluslock++;
369		nfsrv_clients++;
370		NFSLOCKV4ROOTMUTEX();
371		nfsv4_unlock(&nfsv4rootfs_lock, 1);
372		NFSUNLOCKV4ROOTMUTEX();
373		if (zapit)
374			nfsrv_zapclient(clp, p);
375		*new_clpp = NULL;
376		goto out;
377	}
378
379	/*
380	 * Now, handle the cases where the id is already issued.
381	 */
382	if (nfsrv_notsamecredname(NFSV4OP_EXCHANGEID, nd, clp)) {
383	    /*
384	     * Check to see if there is expired state that should go away.
385	     */
386	    if (clp->lc_expiry < NFSD_MONOSEC &&
387	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
388		nfsrv_cleanclient(clp, p);
389		nfsrv_freedeleglist(&clp->lc_deleg);
390	    }
391
392	    /*
393	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
394	     * RFC3530 Sec. 8.1.2 last para.
395	     */
396	    if (!LIST_EMPTY(&clp->lc_deleg)) {
397		hasstate = 1;
398	    } else if (LIST_EMPTY(&clp->lc_open)) {
399		hasstate = 0;
400	    } else {
401		hasstate = 0;
402		/* Look for an Open on the OpenOwner */
403		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
404		    if (!LIST_EMPTY(&stp->ls_open)) {
405			hasstate = 1;
406			break;
407		    }
408		}
409	    }
410	    if (hasstate) {
411		/*
412		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
413		 * filling out the correct ipaddr and portnum.
414		 */
415		switch (clp->lc_req.nr_nam->sa_family) {
416#ifdef INET
417		case AF_INET:
418			sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
419			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
420			sin->sin_addr.s_addr = rin->sin_addr.s_addr;
421			sin->sin_port = rin->sin_port;
422			break;
423#endif
424#ifdef INET6
425		case AF_INET6:
426			sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
427			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
428			sin6->sin6_addr = rin6->sin6_addr;
429			sin6->sin6_port = rin6->sin6_port;
430			break;
431#endif
432		}
433		NFSLOCKV4ROOTMUTEX();
434		nfsv4_unlock(&nfsv4rootfs_lock, 1);
435		NFSUNLOCKV4ROOTMUTEX();
436		error = NFSERR_CLIDINUSE;
437		goto out;
438	    }
439	}
440
441	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
442		/*
443		 * If the verifier has changed, the client has rebooted
444		 * and a new client id is issued. The old state info
445		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
446		 */
447		LIST_REMOVE(clp, lc_hash);
448
449		/* Get rid of all sessions on this clientid. */
450		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
451			ret = nfsrv_freesession(NULL, sep, NULL);
452			if (ret != 0)
453				printf("nfsrv_setclient: verifier changed free"
454				    " session failed=%d\n", ret);
455		}
456
457		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
458		if ((nd->nd_flag & ND_NFSV41) != 0) {
459			confirmp->lval[0] = ++confirm_index;
460			new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
461		} else
462			confirmp->qval = new_clp->lc_confirm.qval =
463			    ++confirm_index;
464		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
465		    NFSD_VNET(nfsrvboottime);
466		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
467		    nfsrv_nextclientindex();
468		new_clp->lc_stateindex = 0;
469		new_clp->lc_statemaxindex = 0;
470		new_clp->lc_prevsess = 0;
471		new_clp->lc_cbref = 0;
472		new_clp->lc_expiry = nfsrv_leaseexpiry();
473
474		/*
475		 * Save the state until confirmed.
476		 */
477		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
478		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
479			tstp->ls_clp = new_clp;
480		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
481		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
482			tstp->ls_clp = new_clp;
483		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
484		    ls_list);
485		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
486			tstp->ls_clp = new_clp;
487		for (i = 0; i < nfsrv_statehashsize; i++) {
488			LIST_NEWHEAD(&new_clp->lc_stateid[i],
489			    &clp->lc_stateid[i], ls_hash);
490			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
491				tstp->ls_clp = new_clp;
492		}
493		LIST_INIT(&new_clp->lc_session);
494		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
495		    lc_hash);
496		NFSD_VNET(nfsstatsv1_p)->srvclients++;
497		nfsrv_openpluslock++;
498		nfsrv_clients++;
499		NFSLOCKV4ROOTMUTEX();
500		nfsv4_unlock(&nfsv4rootfs_lock, 1);
501		NFSUNLOCKV4ROOTMUTEX();
502
503		/*
504		 * Must wait until any outstanding callback on the old clp
505		 * completes.
506		 */
507		NFSLOCKSTATE();
508		while (clp->lc_cbref) {
509			clp->lc_flags |= LCL_WAKEUPWANTED;
510			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
511			    "nfsd clp", 10 * hz);
512		}
513		NFSUNLOCKSTATE();
514		nfsrv_zapclient(clp, p);
515		*new_clpp = NULL;
516		goto out;
517	}
518
519	/* For NFSv4.1, mark that we found a confirmed clientid. */
520	if ((nd->nd_flag & ND_NFSV41) != 0) {
521		clientidp->lval[0] = clp->lc_clientid.lval[0];
522		clientidp->lval[1] = clp->lc_clientid.lval[1];
523		confirmp->lval[0] = 0;	/* Ignored by client */
524		confirmp->lval[1] = 1;
525	} else {
526		/*
527		 * id and verifier match, so update the net address info
528		 * and get rid of any existing callback authentication
529		 * handle, so a new one will be acquired.
530		 */
531		LIST_REMOVE(clp, lc_hash);
532		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
533		new_clp->lc_expiry = nfsrv_leaseexpiry();
534		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
535		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
536		    clp->lc_clientid.lval[0];
537		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
538		    clp->lc_clientid.lval[1];
539		new_clp->lc_delegtime = clp->lc_delegtime;
540		new_clp->lc_stateindex = clp->lc_stateindex;
541		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
542		new_clp->lc_cbref = 0;
543		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
544		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
545			tstp->ls_clp = new_clp;
546		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
547		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
548			tstp->ls_clp = new_clp;
549		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
550		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
551			tstp->ls_clp = new_clp;
552		for (i = 0; i < nfsrv_statehashsize; i++) {
553			LIST_NEWHEAD(&new_clp->lc_stateid[i],
554			    &clp->lc_stateid[i], ls_hash);
555			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
556				tstp->ls_clp = new_clp;
557		}
558		LIST_INIT(&new_clp->lc_session);
559		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
560		    lc_hash);
561		NFSD_VNET(nfsstatsv1_p)->srvclients++;
562		nfsrv_openpluslock++;
563		nfsrv_clients++;
564	}
565	NFSLOCKV4ROOTMUTEX();
566	nfsv4_unlock(&nfsv4rootfs_lock, 1);
567	NFSUNLOCKV4ROOTMUTEX();
568
569	if ((nd->nd_flag & ND_NFSV41) == 0) {
570		/*
571		 * Must wait until any outstanding callback on the old clp
572		 * completes.
573		 */
574		NFSLOCKSTATE();
575		while (clp->lc_cbref) {
576			clp->lc_flags |= LCL_WAKEUPWANTED;
577			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
578			    "nfsdclp", 10 * hz);
579		}
580		NFSUNLOCKSTATE();
581		nfsrv_zapclient(clp, p);
582		*new_clpp = NULL;
583	}
584
585out:
586	NFSEXITCODE2(error, nd);
587	return (error);
588}
589
590/*
591 * Check to see if the client id exists and optionally confirm it.
592 */
593int
594nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
595    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
596    struct nfsrv_descript *nd, NFSPROC_T *p)
597{
598	struct nfsclient *clp;
599	struct nfsstate *stp;
600	int i;
601	struct nfsclienthashhead *hp;
602	int error = 0, igotlock, doneok;
603	struct nfssessionhash *shp;
604	struct nfsdsession *sep;
605	uint64_t sessid[2];
606	bool sess_replay;
607	static uint64_t next_sess = 0;
608
609	if (clpp)
610		*clpp = NULL;
611	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
612	    opflags != CLOPS_RENEW) && NFSD_VNET(nfsrvboottime) !=
613	    clientid.lval[0]) {
614		error = NFSERR_STALECLIENTID;
615		goto out;
616	}
617
618	/*
619	 * If called with opflags == CLOPS_RENEW, the State Lock is
620	 * already held. Otherwise, we need to get either that or,
621	 * for the case of Confirm, lock out the nfsd threads.
622	 */
623	if (opflags & CLOPS_CONFIRM) {
624		NFSLOCKV4ROOTMUTEX();
625		nfsv4_relref(&nfsv4rootfs_lock);
626		do {
627			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
628			    NFSV4ROOTLOCKMUTEXPTR, NULL);
629		} while (!igotlock);
630		/*
631		 * Create a new sessionid here, since we need to do it where
632		 * there is a mutex held to serialize update of next_sess.
633		 */
634		if ((nd->nd_flag & ND_NFSV41) != 0) {
635			sessid[0] = ++next_sess;
636			sessid[1] = clientid.qval;
637		}
638		NFSUNLOCKV4ROOTMUTEX();
639	} else if (opflags != CLOPS_RENEW) {
640		NFSLOCKSTATE();
641	}
642
643	/* For NFSv4.1, the clp is acquired from the associated session. */
644	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
645	    opflags == CLOPS_RENEW) {
646		clp = NULL;
647		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
648			shp = NFSSESSIONHASH(nd->nd_sessionid);
649			NFSLOCKSESSION(shp);
650			sep = nfsrv_findsession(nd->nd_sessionid);
651			if (sep != NULL)
652				clp = sep->sess_clp;
653			NFSUNLOCKSESSION(shp);
654		}
655	} else {
656		hp = NFSCLIENTHASH(clientid);
657		LIST_FOREACH(clp, hp, lc_hash) {
658			if (clp->lc_clientid.lval[1] == clientid.lval[1])
659				break;
660		}
661	}
662	if (clp == NULL) {
663		if (opflags & CLOPS_CONFIRM)
664			error = NFSERR_STALECLIENTID;
665		else
666			error = NFSERR_EXPIRED;
667	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
668		/*
669		 * If marked admin revoked, just return the error.
670		 */
671		error = NFSERR_ADMINREVOKED;
672	}
673	if (error) {
674		if (opflags & CLOPS_CONFIRM) {
675			NFSLOCKV4ROOTMUTEX();
676			nfsv4_unlock(&nfsv4rootfs_lock, 1);
677			NFSUNLOCKV4ROOTMUTEX();
678		} else if (opflags != CLOPS_RENEW) {
679			NFSUNLOCKSTATE();
680		}
681		goto out;
682	}
683
684	/*
685	 * Perform any operations specified by the opflags.
686	 */
687	if (opflags & CLOPS_CONFIRM) {
688		sess_replay = false;
689		if ((nd->nd_flag & ND_NFSV41) != 0) {
690		    /*
691		     * For the case where lc_confirm.lval[0] == confirm.lval[0],
692		     * use the new session, but with the previous sessionid.
693		     * This is not exactly what the RFC describes, but should
694		     * result in the same reply as the previous CreateSession.
695		     */
696		    if (clp->lc_confirm.lval[0] + 1 == confirm.lval[0]) {
697			clp->lc_confirm.lval[0] = confirm.lval[0];
698			clp->lc_prevsess = sessid[0];
699		    } else if (clp->lc_confirm.lval[0] == confirm.lval[0]) {
700			if (clp->lc_prevsess == 0)
701			    error = NFSERR_SEQMISORDERED;
702			else
703			    sessid[0] = clp->lc_prevsess;
704			sess_replay = true;
705		    } else
706			error = NFSERR_SEQMISORDERED;
707		} else if ((nd->nd_flag & ND_NFSV41) == 0 &&
708		     clp->lc_confirm.qval != confirm.qval)
709			error = NFSERR_STALECLIENTID;
710		if (error == 0 && nfsrv_notsamecredname(NFSV4OP_CREATESESSION,
711		    nd, clp))
712			error = NFSERR_CLIDINUSE;
713
714		if (!error) {
715		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
716			LCL_NEEDSCONFIRM) {
717			/*
718			 * Hang onto the delegations (as old delegations)
719			 * for an Open with CLAIM_DELEGATE_PREV unless in
720			 * grace, but get rid of the rest of the state.
721			 */
722			nfsrv_cleanclient(clp, p);
723			nfsrv_freedeleglist(&clp->lc_olddeleg);
724			if (nfsrv_checkgrace(nd, clp, 0)) {
725			    /* In grace, so just delete delegations */
726			    nfsrv_freedeleglist(&clp->lc_deleg);
727			} else {
728			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
729				stp->ls_flags |= NFSLCK_OLDDELEG;
730			    clp->lc_delegtime = NFSD_MONOSEC +
731				nfsrv_lease + NFSRV_LEASEDELTA;
732			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
733				ls_list);
734			}
735			if ((nd->nd_flag & ND_NFSV41) != 0)
736			    clp->lc_program = cbprogram;
737		    }
738		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
739		    if (clp->lc_program)
740			clp->lc_flags |= LCL_NEEDSCBNULL;
741		    /* For NFSv4.1, link the session onto the client. */
742		    if (nsep != NULL) {
743			/* Hold a reference on the xprt for a backchannel. */
744			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
745			    != 0 && !sess_replay) {
746			    if (clp->lc_req.nr_client == NULL)
747				clp->lc_req.nr_client = (struct __rpc_client *)
748				    clnt_bck_create(nd->nd_xprt->xp_socket,
749				    cbprogram, NFSV4_CBVERS);
750			    if (clp->lc_req.nr_client != NULL) {
751				SVC_ACQUIRE(nd->nd_xprt);
752				CLNT_ACQUIRE(clp->lc_req.nr_client);
753				nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
754				/* Disable idle timeout. */
755				nd->nd_xprt->xp_idletimeout = 0;
756				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
757			    } else
758				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
759			}
760			NFSBCOPY(sessid, nsep->sess_sessionid,
761			    NFSX_V4SESSIONID);
762			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
763			    NFSX_V4SESSIONID);
764			if (!sess_replay) {
765			    shp = NFSSESSIONHASH(nsep->sess_sessionid);
766			    NFSLOCKSTATE();
767			    NFSLOCKSESSION(shp);
768			    LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
769			    LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
770			    nsep->sess_clp = clp;
771			    NFSUNLOCKSESSION(shp);
772			    NFSUNLOCKSTATE();
773			}
774		    }
775		}
776	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
777		error = NFSERR_EXPIRED;
778	}
779
780	/*
781	 * If called by the Renew Op, we must check the principal.
782	 */
783	if (!error && (opflags & CLOPS_RENEWOP)) {
784	    if (nfsrv_notsamecredname(0, nd, clp)) {
785		doneok = 0;
786		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
787		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
788			if ((stp->ls_flags & NFSLCK_OPEN) &&
789			    stp->ls_uid == nd->nd_cred->cr_uid) {
790				doneok = 1;
791				break;
792			}
793		    }
794		}
795		if (!doneok)
796			error = NFSERR_ACCES;
797	    }
798	    if (!error && (clp->lc_flags & LCL_CBDOWN))
799		error = NFSERR_CBPATHDOWN;
800	}
801	if ((!error || error == NFSERR_CBPATHDOWN) &&
802	     (opflags & CLOPS_RENEW)) {
803		clp->lc_expiry = nfsrv_leaseexpiry();
804	}
805	if (opflags & CLOPS_CONFIRM) {
806		NFSLOCKV4ROOTMUTEX();
807		nfsv4_unlock(&nfsv4rootfs_lock, 1);
808		NFSUNLOCKV4ROOTMUTEX();
809	} else if (opflags != CLOPS_RENEW) {
810		NFSUNLOCKSTATE();
811	}
812	if (clpp)
813		*clpp = clp;
814
815out:
816	NFSEXITCODE2(error, nd);
817	return (error);
818}
819
820/*
821 * Perform the NFSv4.1 destroy clientid.
822 */
823int
824nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
825{
826	struct nfsclient *clp;
827	struct nfsclienthashhead *hp;
828	int error = 0, i, igotlock;
829
830	if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
831		error = NFSERR_STALECLIENTID;
832		goto out;
833	}
834
835	/* Lock out other nfsd threads */
836	NFSLOCKV4ROOTMUTEX();
837	nfsv4_relref(&nfsv4rootfs_lock);
838	do {
839		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
840		    NFSV4ROOTLOCKMUTEXPTR, NULL);
841	} while (igotlock == 0);
842	NFSUNLOCKV4ROOTMUTEX();
843
844	hp = NFSCLIENTHASH(clientid);
845	LIST_FOREACH(clp, hp, lc_hash) {
846		if (clp->lc_clientid.lval[1] == clientid.lval[1])
847			break;
848	}
849	if (clp == NULL) {
850		NFSLOCKV4ROOTMUTEX();
851		nfsv4_unlock(&nfsv4rootfs_lock, 1);
852		NFSUNLOCKV4ROOTMUTEX();
853		/* Just return ok, since it is gone. */
854		goto out;
855	}
856
857	/* Check for the SP4_MACH_CRED case. */
858	error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
859	if (error != 0) {
860		NFSLOCKV4ROOTMUTEX();
861		nfsv4_unlock(&nfsv4rootfs_lock, 1);
862		NFSUNLOCKV4ROOTMUTEX();
863		goto out;
864	}
865
866	/*
867	 * Free up all layouts on the clientid.  Should the client return the
868	 * layouts?
869	 */
870	nfsrv_freelayoutlist(clientid);
871
872	/* Scan for state on the clientid. */
873	for (i = 0; i < nfsrv_statehashsize; i++)
874		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
875			NFSLOCKV4ROOTMUTEX();
876			nfsv4_unlock(&nfsv4rootfs_lock, 1);
877			NFSUNLOCKV4ROOTMUTEX();
878			error = NFSERR_CLIENTIDBUSY;
879			goto out;
880		}
881	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
882		NFSLOCKV4ROOTMUTEX();
883		nfsv4_unlock(&nfsv4rootfs_lock, 1);
884		NFSUNLOCKV4ROOTMUTEX();
885		error = NFSERR_CLIENTIDBUSY;
886		goto out;
887	}
888
889	/* Destroy the clientid and return ok. */
890	nfsrv_cleanclient(clp, p);
891	nfsrv_freedeleglist(&clp->lc_deleg);
892	nfsrv_freedeleglist(&clp->lc_olddeleg);
893	LIST_REMOVE(clp, lc_hash);
894	NFSLOCKV4ROOTMUTEX();
895	nfsv4_unlock(&nfsv4rootfs_lock, 1);
896	NFSUNLOCKV4ROOTMUTEX();
897	nfsrv_zapclient(clp, p);
898out:
899	NFSEXITCODE2(error, nd);
900	return (error);
901}
902
903/*
904 * Called from the new nfssvc syscall to admin revoke a clientid.
905 * Returns 0 for success, error otherwise.
906 */
907int
908nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
909{
910	struct nfsclient *clp = NULL;
911	int i, error = 0;
912	int gotit, igotlock;
913
914	/*
915	 * First, lock out the nfsd so that state won't change while the
916	 * revocation record is being written to the stable storage restart
917	 * file.
918	 */
919	NFSLOCKV4ROOTMUTEX();
920	do {
921		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
922		    NFSV4ROOTLOCKMUTEXPTR, NULL);
923	} while (!igotlock);
924	NFSUNLOCKV4ROOTMUTEX();
925
926	/*
927	 * Search for a match in the client list.
928	 */
929	gotit = i = 0;
930	while (i < nfsrv_clienthashsize && !gotit) {
931	    LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
932		if (revokep->nclid_idlen == clp->lc_idlen &&
933		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
934			gotit = 1;
935			break;
936		}
937	    }
938	    i++;
939	}
940	if (!gotit) {
941		NFSLOCKV4ROOTMUTEX();
942		nfsv4_unlock(&nfsv4rootfs_lock, 0);
943		NFSUNLOCKV4ROOTMUTEX();
944		error = EPERM;
945		goto out;
946	}
947
948	/*
949	 * Now, write out the revocation record
950	 */
951	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
952	nfsrv_backupstable();
953
954	/*
955	 * and clear out the state, marking the clientid revoked.
956	 */
957	clp->lc_flags &= ~LCL_CALLBACKSON;
958	clp->lc_flags |= LCL_ADMINREVOKED;
959	nfsrv_cleanclient(clp, p);
960	nfsrv_freedeleglist(&clp->lc_deleg);
961	nfsrv_freedeleglist(&clp->lc_olddeleg);
962	NFSLOCKV4ROOTMUTEX();
963	nfsv4_unlock(&nfsv4rootfs_lock, 0);
964	NFSUNLOCKV4ROOTMUTEX();
965
966out:
967	NFSEXITCODE(error);
968	return (error);
969}
970
971/*
972 * Dump out stats for all clients. Called from nfssvc(2), that is used
973 * nfsstatsv1.
974 */
975void
976nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
977{
978	struct nfsclient *clp;
979	int i = 0, cnt = 0;
980
981	/*
982	 * First, get a reference on the nfsv4rootfs_lock so that an
983	 * exclusive lock cannot be acquired while dumping the clients.
984	 */
985	NFSLOCKV4ROOTMUTEX();
986	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
987	NFSUNLOCKV4ROOTMUTEX();
988	NFSLOCKSTATE();
989	/*
990	 * Rattle through the client lists until done.
991	 */
992	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
993	    clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
994	    while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i]) && cnt <
995		maxcnt) {
996		nfsrv_dumpaclient(clp, &dumpp[cnt]);
997		cnt++;
998		clp = LIST_NEXT(clp, lc_hash);
999	    }
1000	    i++;
1001	}
1002	if (cnt < maxcnt)
1003	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
1004	NFSUNLOCKSTATE();
1005	NFSLOCKV4ROOTMUTEX();
1006	nfsv4_relref(&nfsv4rootfs_lock);
1007	NFSUNLOCKV4ROOTMUTEX();
1008}
1009
1010/*
1011 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
1012 */
1013static void
1014nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
1015{
1016	struct nfsstate *stp, *openstp, *lckownstp;
1017	struct nfslock *lop;
1018	sa_family_t af;
1019#ifdef INET
1020	struct sockaddr_in *rin;
1021#endif
1022#ifdef INET6
1023	struct sockaddr_in6 *rin6;
1024#endif
1025
1026	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
1027	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
1028	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
1029	dumpp->ndcl_flags = clp->lc_flags;
1030	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
1031	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
1032	af = clp->lc_req.nr_nam->sa_family;
1033	dumpp->ndcl_addrfam = af;
1034	switch (af) {
1035#ifdef INET
1036	case AF_INET:
1037		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
1038		dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
1039		break;
1040#endif
1041#ifdef INET6
1042	case AF_INET6:
1043		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
1044		dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
1045		break;
1046#endif
1047	}
1048
1049	/*
1050	 * Now, scan the state lists and total up the opens and locks.
1051	 */
1052	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
1053	    dumpp->ndcl_nopenowners++;
1054	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
1055		dumpp->ndcl_nopens++;
1056		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
1057		    dumpp->ndcl_nlockowners++;
1058		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
1059			dumpp->ndcl_nlocks++;
1060		    }
1061		}
1062	    }
1063	}
1064
1065	/*
1066	 * and the delegation lists.
1067	 */
1068	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
1069	    dumpp->ndcl_ndelegs++;
1070	}
1071	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
1072	    dumpp->ndcl_nolddelegs++;
1073	}
1074}
1075
1076/*
1077 * Dump out lock stats for a file.
1078 */
1079void
1080nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
1081    NFSPROC_T *p)
1082{
1083	struct nfsstate *stp;
1084	struct nfslock *lop;
1085	int cnt = 0;
1086	struct nfslockfile *lfp;
1087	sa_family_t af;
1088#ifdef INET
1089	struct sockaddr_in *rin;
1090#endif
1091#ifdef INET6
1092	struct sockaddr_in6 *rin6;
1093#endif
1094	int ret;
1095	fhandle_t nfh;
1096
1097	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
1098	/*
1099	 * First, get a reference on the nfsv4rootfs_lock so that an
1100	 * exclusive lock on it cannot be acquired while dumping the locks.
1101	 */
1102	NFSLOCKV4ROOTMUTEX();
1103	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1104	NFSUNLOCKV4ROOTMUTEX();
1105	NFSLOCKSTATE();
1106	if (!ret)
1107		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
1108	if (ret) {
1109		ldumpp[0].ndlck_clid.nclid_idlen = 0;
1110		NFSUNLOCKSTATE();
1111		NFSLOCKV4ROOTMUTEX();
1112		nfsv4_relref(&nfsv4rootfs_lock);
1113		NFSUNLOCKV4ROOTMUTEX();
1114		return;
1115	}
1116
1117	/*
1118	 * For each open share on file, dump it out.
1119	 */
1120	stp = LIST_FIRST(&lfp->lf_open);
1121	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1122		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1123		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1124		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1125		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1126		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1127		ldumpp[cnt].ndlck_owner.nclid_idlen =
1128		    stp->ls_openowner->ls_ownerlen;
1129		NFSBCOPY(stp->ls_openowner->ls_owner,
1130		    ldumpp[cnt].ndlck_owner.nclid_id,
1131		    stp->ls_openowner->ls_ownerlen);
1132		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1133		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1134		    stp->ls_clp->lc_idlen);
1135		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1136		ldumpp[cnt].ndlck_addrfam = af;
1137		switch (af) {
1138#ifdef INET
1139		case AF_INET:
1140			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1141			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1142			break;
1143#endif
1144#ifdef INET6
1145		case AF_INET6:
1146			rin6 = (struct sockaddr_in6 *)
1147			    stp->ls_clp->lc_req.nr_nam;
1148			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1149			break;
1150#endif
1151		}
1152		stp = LIST_NEXT(stp, ls_file);
1153		cnt++;
1154	}
1155
1156	/*
1157	 * and all locks.
1158	 */
1159	lop = LIST_FIRST(&lfp->lf_lock);
1160	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1161		stp = lop->lo_stp;
1162		ldumpp[cnt].ndlck_flags = lop->lo_flags;
1163		ldumpp[cnt].ndlck_first = lop->lo_first;
1164		ldumpp[cnt].ndlck_end = lop->lo_end;
1165		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1166		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1167		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1168		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1169		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1170		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1171		    stp->ls_ownerlen);
1172		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1173		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1174		    stp->ls_clp->lc_idlen);
1175		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1176		ldumpp[cnt].ndlck_addrfam = af;
1177		switch (af) {
1178#ifdef INET
1179		case AF_INET:
1180			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1181			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1182			break;
1183#endif
1184#ifdef INET6
1185		case AF_INET6:
1186			rin6 = (struct sockaddr_in6 *)
1187			    stp->ls_clp->lc_req.nr_nam;
1188			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1189			break;
1190#endif
1191		}
1192		lop = LIST_NEXT(lop, lo_lckfile);
1193		cnt++;
1194	}
1195
1196	/*
1197	 * and the delegations.
1198	 */
1199	stp = LIST_FIRST(&lfp->lf_deleg);
1200	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1201		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1202		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1203		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1204		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1205		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1206		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1207		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1208		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1209		    stp->ls_clp->lc_idlen);
1210		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1211		ldumpp[cnt].ndlck_addrfam = af;
1212		switch (af) {
1213#ifdef INET
1214		case AF_INET:
1215			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1216			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1217			break;
1218#endif
1219#ifdef INET6
1220		case AF_INET6:
1221			rin6 = (struct sockaddr_in6 *)
1222			    stp->ls_clp->lc_req.nr_nam;
1223			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1224			break;
1225#endif
1226		}
1227		stp = LIST_NEXT(stp, ls_file);
1228		cnt++;
1229	}
1230
1231	/*
1232	 * If list isn't full, mark end of list by setting the client name
1233	 * to zero length.
1234	 */
1235	if (cnt < maxcnt)
1236		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1237	NFSUNLOCKSTATE();
1238	NFSLOCKV4ROOTMUTEX();
1239	nfsv4_relref(&nfsv4rootfs_lock);
1240	NFSUNLOCKV4ROOTMUTEX();
1241}
1242
1243/*
1244 * Server timer routine. It can scan any linked list, so long
1245 * as it holds the spin/mutex lock and there is no exclusive lock on
1246 * nfsv4rootfs_lock.
1247 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1248 *  to do this from a callout, since the spin locks work. For
1249 *  Darwin, I'm not sure what will work correctly yet.)
1250 * Should be called once per second.
1251 */
1252void
1253nfsrv_servertimer(void *arg __unused)
1254{
1255	struct nfsclient *clp, *nclp;
1256	struct nfsstate *stp, *nstp;
1257	int got_ref, i;
1258
1259	/*
1260	 * Make sure nfsboottime is set. This is used by V3 as well
1261	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1262	 * only used by the V4 server for leases.
1263	 */
1264	if (nfsboottime.tv_sec == 0)
1265		NFSSETBOOTTIME(nfsboottime);
1266
1267	/*
1268	 * If server hasn't started yet, just return.
1269	 */
1270	NFSLOCKSTATE();
1271	if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce == 0) {
1272		NFSUNLOCKSTATE();
1273		return;
1274	}
1275	if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) {
1276		if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags &
1277		      NFSNSF_GRACEOVER) &&
1278		    NFSD_MONOSEC > NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
1279			NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1280			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1281		NFSUNLOCKSTATE();
1282		return;
1283	}
1284
1285	/*
1286	 * Try and get a reference count on the nfsv4rootfs_lock so that
1287	 * no nfsd thread can acquire an exclusive lock on it before this
1288	 * call is done. If it is already exclusively locked, just return.
1289	 */
1290	NFSLOCKV4ROOTMUTEX();
1291	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1292	NFSUNLOCKV4ROOTMUTEX();
1293	if (got_ref == 0) {
1294		NFSUNLOCKSTATE();
1295		return;
1296	}
1297
1298	/*
1299	 * For each client...
1300	 */
1301	for (i = 0; i < nfsrv_clienthashsize; i++) {
1302	    clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1303	    while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i])) {
1304		nclp = LIST_NEXT(clp, lc_hash);
1305		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1306		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1307			 && ((LIST_EMPTY(&clp->lc_deleg)
1308			      && LIST_EMPTY(&clp->lc_open)) ||
1309			     nfsrv_clients > nfsrv_clienthighwater)) ||
1310			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1311			(clp->lc_expiry < NFSD_MONOSEC &&
1312			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1313			/*
1314			 * Lease has expired several nfsrv_lease times ago:
1315			 * PLUS
1316			 *    - no state is associated with it
1317			 *    OR
1318			 *    - above high water mark for number of clients
1319			 *      (nfsrv_clienthighwater should be large enough
1320			 *       that this only occurs when clients fail to
1321			 *       use the same nfs_client_id4.id. Maybe somewhat
1322			 *       higher that the maximum number of clients that
1323			 *       will mount this server?)
1324			 * OR
1325			 * Lease has expired a very long time ago
1326			 * OR
1327			 * Lease has expired PLUS the number of opens + locks
1328			 * has exceeded 90% of capacity
1329			 *
1330			 * --> Mark for expiry. The actual expiry will be done
1331			 *     by an nfsd sometime soon.
1332			 */
1333			clp->lc_flags |= LCL_EXPIREIT;
1334			NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1335			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1336		    } else {
1337			/*
1338			 * If there are no opens, increment no open tick cnt
1339			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1340			 * otherwise, if there is an open, reset no open time
1341			 * Hopefully, this will avoid excessive re-creation
1342			 * of open owners and subsequent open confirms.
1343			 */
1344			stp = LIST_FIRST(&clp->lc_open);
1345			while (stp != LIST_END(&clp->lc_open)) {
1346				nstp = LIST_NEXT(stp, ls_list);
1347				if (LIST_EMPTY(&stp->ls_open)) {
1348					stp->ls_noopens++;
1349					if (stp->ls_noopens > NFSNOOPEN ||
1350					    (nfsrv_openpluslock * 2) >
1351					    nfsrv_v4statelimit)
1352						NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1353							NFSNSF_NOOPENS;
1354				} else {
1355					stp->ls_noopens = 0;
1356				}
1357				stp = nstp;
1358			}
1359		    }
1360		}
1361		clp = nclp;
1362	    }
1363	}
1364	NFSUNLOCKSTATE();
1365	NFSLOCKV4ROOTMUTEX();
1366	nfsv4_relref(&nfsv4rootfs_lock);
1367	NFSUNLOCKV4ROOTMUTEX();
1368}
1369
1370/*
1371 * The following set of functions free up the various data structures.
1372 */
1373/*
1374 * Clear out all open/lock state related to this nfsclient.
1375 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1376 * there are no other active nfsd threads.
1377 */
1378void
1379nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1380{
1381	struct nfsstate *stp, *nstp;
1382	struct nfsdsession *sep, *nsep;
1383
1384	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1385		nfsrv_freeopenowner(stp, 1, p);
1386	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1387		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1388			(void)nfsrv_freesession(NULL, sep, NULL);
1389}
1390
1391/*
1392 * Free a client that has been cleaned. It should also already have been
1393 * removed from the lists.
1394 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1395 *  softclock interrupts are enabled.)
1396 */
1397void
1398nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1399{
1400
1401#ifdef notyet
1402	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1403	     (LCL_GSS | LCL_CALLBACKSON) &&
1404	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1405	    clp->lc_handlelen > 0) {
1406		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1407		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1408		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1409			NULL, 0, NULL, NULL, NULL, 0, p);
1410	}
1411#endif
1412	newnfs_disconnect(NULL, &clp->lc_req);
1413	free(clp->lc_req.nr_nam, M_SONAME);
1414	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1415	free(clp->lc_stateid, M_NFSDCLIENT);
1416	free(clp, M_NFSDCLIENT);
1417	NFSLOCKSTATE();
1418	NFSD_VNET(nfsstatsv1_p)->srvclients--;
1419	nfsrv_openpluslock--;
1420	nfsrv_clients--;
1421	NFSUNLOCKSTATE();
1422}
1423
1424/*
1425 * Free a list of delegation state structures.
1426 * (This function will also free all nfslockfile structures that no
1427 *  longer have associated state.)
1428 */
1429void
1430nfsrv_freedeleglist(struct nfsstatehead *sthp)
1431{
1432	struct nfsstate *stp, *nstp;
1433
1434	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1435		nfsrv_freedeleg(stp);
1436	}
1437	LIST_INIT(sthp);
1438}
1439
1440/*
1441 * Free up a delegation.
1442 */
1443static void
1444nfsrv_freedeleg(struct nfsstate *stp)
1445{
1446	struct nfslockfile *lfp;
1447
1448	LIST_REMOVE(stp, ls_hash);
1449	LIST_REMOVE(stp, ls_list);
1450	LIST_REMOVE(stp, ls_file);
1451	if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1452		nfsrv_writedelegcnt--;
1453	lfp = stp->ls_lfp;
1454	if (LIST_EMPTY(&lfp->lf_open) &&
1455	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1456	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1457	    lfp->lf_usecount == 0 &&
1458	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1459		nfsrv_freenfslockfile(lfp);
1460	free(stp, M_NFSDSTATE);
1461	NFSD_VNET(nfsstatsv1_p)->srvdelegates--;
1462	nfsrv_openpluslock--;
1463	nfsrv_delegatecnt--;
1464}
1465
1466/*
1467 * This function frees an open owner and all associated opens.
1468 */
1469static void
1470nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1471{
1472	struct nfsstate *nstp, *tstp;
1473
1474	LIST_REMOVE(stp, ls_list);
1475	/*
1476	 * Now, free all associated opens.
1477	 */
1478	nstp = LIST_FIRST(&stp->ls_open);
1479	while (nstp != LIST_END(&stp->ls_open)) {
1480		tstp = nstp;
1481		nstp = LIST_NEXT(nstp, ls_list);
1482		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1483	}
1484	if (stp->ls_op)
1485		nfsrvd_derefcache(stp->ls_op);
1486	free(stp, M_NFSDSTATE);
1487	NFSD_VNET(nfsstatsv1_p)->srvopenowners--;
1488	nfsrv_openpluslock--;
1489}
1490
1491/*
1492 * This function frees an open (nfsstate open structure) with all associated
1493 * lock_owners and locks. It also frees the nfslockfile structure iff there
1494 * are no other opens on the file.
1495 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1496 */
1497static int
1498nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1499{
1500	struct nfsstate *nstp, *tstp;
1501	struct nfslockfile *lfp;
1502	int ret;
1503
1504	LIST_REMOVE(stp, ls_hash);
1505	LIST_REMOVE(stp, ls_list);
1506	LIST_REMOVE(stp, ls_file);
1507
1508	lfp = stp->ls_lfp;
1509	/*
1510	 * Now, free all lockowners associated with this open.
1511	 */
1512	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1513		nfsrv_freelockowner(tstp, vp, cansleep, p);
1514
1515	/*
1516	 * The nfslockfile is freed here if there are no locks
1517	 * associated with the open.
1518	 * If there are locks associated with the open, the
1519	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1520	 * Acquire the state mutex to avoid races with calls to
1521	 * nfsrv_getlockfile().
1522	 */
1523	if (cansleep != 0)
1524		NFSLOCKSTATE();
1525	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1526	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1527	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1528	    lfp->lf_usecount == 0 &&
1529	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1530		nfsrv_freenfslockfile(lfp);
1531		ret = 1;
1532	} else
1533		ret = 0;
1534	if (cansleep != 0)
1535		NFSUNLOCKSTATE();
1536	free(stp, M_NFSDSTATE);
1537	NFSD_VNET(nfsstatsv1_p)->srvopens--;
1538	nfsrv_openpluslock--;
1539	return (ret);
1540}
1541
1542/*
1543 * Frees a lockowner and all associated locks.
1544 */
1545static void
1546nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1547    NFSPROC_T *p)
1548{
1549
1550	LIST_REMOVE(stp, ls_hash);
1551	LIST_REMOVE(stp, ls_list);
1552	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1553	if (stp->ls_op)
1554		nfsrvd_derefcache(stp->ls_op);
1555	free(stp, M_NFSDSTATE);
1556	NFSD_VNET(nfsstatsv1_p)->srvlockowners--;
1557	nfsrv_openpluslock--;
1558}
1559
1560/*
1561 * Free all the nfs locks on a lockowner.
1562 */
1563static void
1564nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1565    NFSPROC_T *p)
1566{
1567	struct nfslock *lop, *nlop;
1568	struct nfsrollback *rlp, *nrlp;
1569	struct nfslockfile *lfp = NULL;
1570	int gottvp = 0;
1571	vnode_t tvp = NULL;
1572	uint64_t first, end;
1573
1574	if (vp != NULL)
1575		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1576	lop = LIST_FIRST(&stp->ls_lock);
1577	while (lop != LIST_END(&stp->ls_lock)) {
1578		nlop = LIST_NEXT(lop, lo_lckowner);
1579		/*
1580		 * Since all locks should be for the same file, lfp should
1581		 * not change.
1582		 */
1583		if (lfp == NULL)
1584			lfp = lop->lo_lfp;
1585		else if (lfp != lop->lo_lfp)
1586			panic("allnfslocks");
1587		/*
1588		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1589		 * from the file handle. This only occurs when called from
1590		 * nfsrv_cleanclient().
1591		 */
1592		if (gottvp == 0) {
1593			if (nfsrv_dolocallocks == 0)
1594				tvp = NULL;
1595			else if (vp == NULL && cansleep != 0) {
1596				tvp = nfsvno_getvp(&lfp->lf_fh);
1597				if (tvp != NULL)
1598					NFSVOPUNLOCK(tvp);
1599			} else
1600				tvp = vp;
1601			gottvp = 1;
1602		}
1603
1604		if (tvp != NULL) {
1605			if (cansleep == 0)
1606				panic("allnfs2");
1607			first = lop->lo_first;
1608			end = lop->lo_end;
1609			nfsrv_freenfslock(lop);
1610			nfsrv_localunlock(tvp, lfp, first, end, p);
1611			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1612			    nrlp)
1613				free(rlp, M_NFSDROLLBACK);
1614			LIST_INIT(&lfp->lf_rollback);
1615		} else
1616			nfsrv_freenfslock(lop);
1617		lop = nlop;
1618	}
1619	if (vp == NULL && tvp != NULL)
1620		vrele(tvp);
1621}
1622
1623/*
1624 * Free an nfslock structure.
1625 */
1626static void
1627nfsrv_freenfslock(struct nfslock *lop)
1628{
1629
1630	if (lop->lo_lckfile.le_prev != NULL) {
1631		LIST_REMOVE(lop, lo_lckfile);
1632		NFSD_VNET(nfsstatsv1_p)->srvlocks--;
1633		nfsrv_openpluslock--;
1634	}
1635	LIST_REMOVE(lop, lo_lckowner);
1636	free(lop, M_NFSDLOCK);
1637}
1638
1639/*
1640 * This function frees an nfslockfile structure.
1641 */
1642static void
1643nfsrv_freenfslockfile(struct nfslockfile *lfp)
1644{
1645
1646	LIST_REMOVE(lfp, lf_hash);
1647	free(lfp, M_NFSDLOCKFILE);
1648}
1649
1650/*
1651 * This function looks up an nfsstate structure via stateid.
1652 */
1653static int
1654nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1655    struct nfsstate **stpp)
1656{
1657	struct nfsstate *stp;
1658	struct nfsstatehead *hp;
1659	int error = 0;
1660
1661	*stpp = NULL;
1662	hp = NFSSTATEHASH(clp, *stateidp);
1663	LIST_FOREACH(stp, hp, ls_hash) {
1664		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1665			NFSX_STATEIDOTHER))
1666			break;
1667	}
1668
1669	/*
1670	 * If no state id in list, return NFSERR_BADSTATEID.
1671	 */
1672	if (stp == LIST_END(hp)) {
1673		error = NFSERR_BADSTATEID;
1674		goto out;
1675	}
1676	*stpp = stp;
1677
1678out:
1679	NFSEXITCODE(error);
1680	return (error);
1681}
1682
1683/*
1684 * This function gets an nfsstate structure via owner string.
1685 */
1686static void
1687nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1688    struct nfsstate **stpp)
1689{
1690	struct nfsstate *stp;
1691
1692	*stpp = NULL;
1693	LIST_FOREACH(stp, hp, ls_list) {
1694		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1695		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1696			*stpp = stp;
1697			return;
1698		}
1699	}
1700}
1701
1702/*
1703 * Lock control function called to update lock status.
1704 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1705 * that one isn't to be created and an NFSERR_xxx for other errors.
1706 * The structures new_stp and new_lop are passed in as pointers that should
1707 * be set to NULL if the structure is used and shouldn't be free'd.
1708 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1709 * never used and can safely be allocated on the stack. For all other
1710 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1711 * in case they are used.
1712 */
1713int
1714nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1715    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1716    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1717    __unused struct nfsexstuff *exp,
1718    struct nfsrv_descript *nd, NFSPROC_T *p)
1719{
1720	struct nfslock *lop;
1721	struct nfsstate *new_stp = *new_stpp;
1722	struct nfslock *new_lop = *new_lopp;
1723	struct nfsstate *tstp, *mystp, *nstp;
1724	int specialid = 0;
1725	struct nfslockfile *lfp;
1726	struct nfslock *other_lop = NULL;
1727	struct nfsstate *stp, *lckstp = NULL;
1728	struct nfsclient *clp = NULL;
1729	u_int32_t bits;
1730	int error = 0, haslock = 0, ret, reterr;
1731	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1732	fhandle_t nfh;
1733	uint64_t first, end;
1734	uint32_t lock_flags;
1735
1736	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1737		/*
1738		 * Note the special cases of "all 1s" or "all 0s" stateids and
1739		 * let reads with all 1s go ahead.
1740		 */
1741		if (new_stp->ls_stateid.seqid == 0x0 &&
1742		    new_stp->ls_stateid.other[0] == 0x0 &&
1743		    new_stp->ls_stateid.other[1] == 0x0 &&
1744		    new_stp->ls_stateid.other[2] == 0x0)
1745			specialid = 1;
1746		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1747		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1748		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1749		    new_stp->ls_stateid.other[2] == 0xffffffff)
1750			specialid = 2;
1751	}
1752
1753	/*
1754	 * Check for restart conditions (client and server).
1755	 */
1756	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1757	    &new_stp->ls_stateid, specialid);
1758	if (error)
1759		goto out;
1760
1761	/*
1762	 * Check for state resource limit exceeded.
1763	 */
1764	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1765	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1766		error = NFSERR_RESOURCE;
1767		goto out;
1768	}
1769
1770	/*
1771	 * For the lock case, get another nfslock structure,
1772	 * just in case we need it.
1773	 * Malloc now, before we start sifting through the linked lists,
1774	 * in case we have to wait for memory.
1775	 */
1776tryagain:
1777	if (new_stp->ls_flags & NFSLCK_LOCK)
1778		other_lop = malloc(sizeof (struct nfslock),
1779		    M_NFSDLOCK, M_WAITOK);
1780	filestruct_locked = 0;
1781	reterr = 0;
1782	lfp = NULL;
1783
1784	/*
1785	 * Get the lockfile structure for CFH now, so we can do a sanity
1786	 * check against the stateid, before incrementing the seqid#, since
1787	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1788	 * shouldn't be incremented for this case.
1789	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1790	 * will be handled later.
1791	 * If we are doing Lock/LockU and local locking is enabled, sleep
1792	 * lock the nfslockfile structure.
1793	 */
1794	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1795	NFSLOCKSTATE();
1796	if (getlckret == 0) {
1797		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1798		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1799			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1800			    &lfp, &nfh, 1);
1801			if (getlckret == 0)
1802				filestruct_locked = 1;
1803		} else
1804			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1805			    &lfp, &nfh, 0);
1806	}
1807	if (getlckret != 0 && getlckret != -1)
1808		reterr = getlckret;
1809
1810	if (filestruct_locked != 0) {
1811		LIST_INIT(&lfp->lf_rollback);
1812		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1813			/*
1814			 * For local locking, do the advisory locking now, so
1815			 * that any conflict can be detected. A failure later
1816			 * can be rolled back locally. If an error is returned,
1817			 * struct nfslockfile has been unlocked and any local
1818			 * locking rolled back.
1819			 */
1820			NFSUNLOCKSTATE();
1821			if (vnode_unlocked == 0) {
1822				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1823				vnode_unlocked = 1;
1824				NFSVOPUNLOCK(vp);
1825			}
1826			reterr = nfsrv_locallock(vp, lfp,
1827			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1828			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1829			NFSLOCKSTATE();
1830		}
1831	}
1832
1833	if (specialid == 0) {
1834	    if (new_stp->ls_flags & NFSLCK_TEST) {
1835		/*
1836		 * RFC 3530 does not list LockT as an op that renews a
1837		 * lease, but the consensus seems to be that it is ok
1838		 * for a server to do so.
1839		 */
1840		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1841		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1842
1843		/*
1844		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1845		 * error returns for LockT, just go ahead and test for a lock,
1846		 * since there are no locks for this client, but other locks
1847		 * can conflict. (ie. same client will always be false)
1848		 */
1849		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1850		    error = 0;
1851		lckstp = new_stp;
1852	    } else {
1853	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1854		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1855	      if (error == 0)
1856		/*
1857		 * Look up the stateid
1858		 */
1859		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1860		  new_stp->ls_flags, &stp);
1861	      /*
1862	       * do some sanity checks for an unconfirmed open or a
1863	       * stateid that refers to the wrong file, for an open stateid
1864	       */
1865	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1866		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1867		   (getlckret == 0 && stp->ls_lfp != lfp))){
1868		      /*
1869		       * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1870		       * The only exception is using SETATTR with SIZE.
1871		       * */
1872                    if ((new_stp->ls_flags &
1873                         (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1874			     error = NFSERR_BADSTATEID;
1875	      }
1876
1877		if (error == 0 &&
1878		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1879		  getlckret == 0 && stp->ls_lfp != lfp)
1880			error = NFSERR_BADSTATEID;
1881
1882	      /*
1883	       * If the lockowner stateid doesn't refer to the same file,
1884	       * I believe that is considered ok, since some clients will
1885	       * only create a single lockowner and use that for all locks
1886	       * on all files.
1887	       * For now, log it as a diagnostic, instead of considering it
1888	       * a BadStateid.
1889	       */
1890	      if (error == 0 && (stp->ls_flags &
1891		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1892		  getlckret == 0 && stp->ls_lfp != lfp) {
1893#ifdef DIAGNOSTIC
1894		  printf("Got a lock statid for different file open\n");
1895#endif
1896		  /*
1897		  error = NFSERR_BADSTATEID;
1898		  */
1899	      }
1900
1901	      if (error == 0) {
1902		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1903			/*
1904			 * If haslock set, we've already checked the seqid.
1905			 */
1906			if (!haslock) {
1907			    if (stp->ls_flags & NFSLCK_OPEN)
1908				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1909				    stp->ls_openowner, new_stp->ls_op);
1910			    else
1911				error = NFSERR_BADSTATEID;
1912			}
1913			if (!error)
1914			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1915			if (lckstp) {
1916			    /*
1917			     * For NFSv4.1 and NFSv4.2 allow an
1918			     * open_to_lock_owner when the lock_owner already
1919			     * exists.  Just clear NFSLCK_OPENTOLOCK so that
1920			     * a new lock_owner will not be created.
1921			     * RFC7530 states that the error for NFSv4.0
1922			     * is NFS4ERR_BAD_SEQID.
1923			     */
1924			    if ((nd->nd_flag & ND_NFSV41) != 0)
1925				new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
1926			    else
1927				error = NFSERR_BADSEQID;
1928			} else
1929			    lckstp = new_stp;
1930		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1931			/*
1932			 * If haslock set, ditto above.
1933			 */
1934			if (!haslock) {
1935			    if (stp->ls_flags & NFSLCK_OPEN)
1936				error = NFSERR_BADSTATEID;
1937			    else
1938				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1939				    stp, new_stp->ls_op);
1940			}
1941			lckstp = stp;
1942		    } else {
1943			lckstp = stp;
1944		    }
1945	      }
1946	      /*
1947	       * If the seqid part of the stateid isn't the same, return
1948	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1949	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1950	       * nfsrv_returnoldstateid is set. (The consensus on the email
1951	       * list was that most clients would prefer to not receive
1952	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1953	       * is what will happen, so I use the nfsrv_returnoldstateid to
1954	       * allow for either server configuration.)
1955	       */
1956	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1957		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1958		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1959		    nfsrv_returnoldstateid)) ||
1960		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1961		    new_stp->ls_stateid.seqid != 0)))
1962		    error = NFSERR_OLDSTATEID;
1963	    }
1964	}
1965
1966	/*
1967	 * Now we can check for grace.
1968	 */
1969	if (!error)
1970		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1971	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1972		nfsrv_checkstable(clp))
1973		error = NFSERR_NOGRACE;
1974	/*
1975	 * If we successfully Reclaimed state, note that.
1976	 */
1977	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1978		nfsrv_markstable(clp);
1979
1980	/*
1981	 * At this point, either error == NFSERR_BADSTATEID or the
1982	 * seqid# has been updated, so we can return any error.
1983	 * If error == 0, there may be an error in:
1984	 *    nd_repstat - Set by the calling function.
1985	 *    reterr - Set above, if getting the nfslockfile structure
1986	 *       or acquiring the local lock failed.
1987	 *    (If both of these are set, nd_repstat should probably be
1988	 *     returned, since that error was detected before this
1989	 *     function call.)
1990	 */
1991	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1992		if (error == 0) {
1993			if (nd->nd_repstat != 0)
1994				error = nd->nd_repstat;
1995			else
1996				error = reterr;
1997		}
1998		if (filestruct_locked != 0) {
1999			/* Roll back local locks. */
2000			NFSUNLOCKSTATE();
2001			if (vnode_unlocked == 0) {
2002				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
2003				vnode_unlocked = 1;
2004				NFSVOPUNLOCK(vp);
2005			}
2006			nfsrv_locallock_rollback(vp, lfp, p);
2007			NFSLOCKSTATE();
2008			nfsrv_unlocklf(lfp);
2009		}
2010		NFSUNLOCKSTATE();
2011		goto out;
2012	}
2013
2014	/*
2015	 * Check the nfsrv_getlockfile return.
2016	 * Returned -1 if no structure found.
2017	 */
2018	if (getlckret == -1) {
2019		error = NFSERR_EXPIRED;
2020		/*
2021		 * Called from lockt, so no lock is OK.
2022		 */
2023		if (new_stp->ls_flags & NFSLCK_TEST) {
2024			error = 0;
2025		} else if (new_stp->ls_flags &
2026		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
2027			/*
2028			 * Called to check for a lock, OK if the stateid is all
2029			 * 1s or all 0s, but there should be an nfsstate
2030			 * otherwise.
2031			 * (ie. If there is no open, I'll assume no share
2032			 *  deny bits.)
2033			 */
2034			if (specialid)
2035				error = 0;
2036			else
2037				error = NFSERR_BADSTATEID;
2038		}
2039		NFSUNLOCKSTATE();
2040		goto out;
2041	}
2042
2043	/*
2044	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
2045	 * For NFSLCK_CHECK, allow a read if write access is granted,
2046	 * but check for a deny. For NFSLCK_LOCK, require correct access,
2047	 * which implies a conflicting deny can't exist.
2048	 */
2049	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
2050	    /*
2051	     * Four kinds of state id:
2052	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
2053	     * - stateid for an open
2054	     * - stateid for a delegation
2055	     * - stateid for a lock owner
2056	     */
2057	    if (!specialid) {
2058		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2059		    delegation = 1;
2060		    mystp = stp;
2061		    nfsrv_delaydelegtimeout(stp);
2062	        } else if (stp->ls_flags & NFSLCK_OPEN) {
2063		    mystp = stp;
2064		} else {
2065		    mystp = stp->ls_openstp;
2066		}
2067		/*
2068		 * If locking or checking, require correct access
2069		 * bit set.
2070		 */
2071		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
2072		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
2073		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
2074		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
2075		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
2076		     !(mystp->ls_flags & NFSLCK_READACCESS) &&
2077		     nfsrv_allowreadforwriteopen == 0) ||
2078		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
2079		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
2080		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
2081			if (filestruct_locked != 0) {
2082				/* Roll back local locks. */
2083				NFSUNLOCKSTATE();
2084				if (vnode_unlocked == 0) {
2085					ASSERT_VOP_ELOCKED(vp,
2086					    "nfsrv_lockctrl3");
2087					vnode_unlocked = 1;
2088					NFSVOPUNLOCK(vp);
2089				}
2090				nfsrv_locallock_rollback(vp, lfp, p);
2091				NFSLOCKSTATE();
2092				nfsrv_unlocklf(lfp);
2093			}
2094			NFSUNLOCKSTATE();
2095			error = NFSERR_OPENMODE;
2096			goto out;
2097		}
2098	    } else
2099		mystp = NULL;
2100	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
2101		/*
2102		 * Check for a conflicting deny bit.
2103		 */
2104		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
2105		    if (tstp != mystp) {
2106			bits = tstp->ls_flags;
2107			bits >>= NFSLCK_SHIFT;
2108			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
2109			    KASSERT(vnode_unlocked == 0,
2110				("nfsrv_lockctrl: vnode unlocked1"));
2111			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
2112				vp, p);
2113			    if (ret == 1) {
2114				/*
2115				* nfsrv_clientconflict unlocks state
2116				 * when it returns non-zero.
2117				 */
2118				lckstp = NULL;
2119				goto tryagain;
2120			    }
2121			    if (ret == 0)
2122				NFSUNLOCKSTATE();
2123			    if (ret == 2)
2124				error = NFSERR_PERM;
2125			    else
2126				error = NFSERR_OPENMODE;
2127			    goto out;
2128			}
2129		    }
2130		}
2131
2132		/* We're outta here */
2133		NFSUNLOCKSTATE();
2134		goto out;
2135	    }
2136	}
2137
2138	/*
2139	 * For setattr, just get rid of all the Delegations for other clients.
2140	 */
2141	if (new_stp->ls_flags & NFSLCK_SETATTR) {
2142		KASSERT(vnode_unlocked == 0,
2143		    ("nfsrv_lockctrl: vnode unlocked2"));
2144		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2145		if (ret) {
2146			/*
2147			 * nfsrv_cleandeleg() unlocks state when it
2148			 * returns non-zero.
2149			 */
2150			if (ret == -1) {
2151				lckstp = NULL;
2152				goto tryagain;
2153			}
2154			error = ret;
2155			goto out;
2156		}
2157		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2158		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2159		     LIST_EMPTY(&lfp->lf_deleg))) {
2160			NFSUNLOCKSTATE();
2161			goto out;
2162		}
2163	}
2164
2165	/*
2166	 * Check for a conflicting delegation. If one is found, call
2167	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2168	 * been set yet, it will get the lock. Otherwise, it will recall
2169	 * the delegation. Then, we try try again...
2170	 * I currently believe the conflict algorithm to be:
2171	 * For Lock Ops (Lock/LockT/LockU)
2172	 * - there is a conflict iff a different client has a write delegation
2173	 * For Reading (Read Op)
2174	 * - there is a conflict iff a different client has a write delegation
2175	 *   (the specialids are always a different client)
2176	 * For Writing (Write/Setattr of size)
2177	 * - there is a conflict if a different client has any delegation
2178	 * - there is a conflict if the same client has a read delegation
2179	 *   (I don't understand why this isn't allowed, but that seems to be
2180	 *    the current consensus?)
2181	 */
2182	tstp = LIST_FIRST(&lfp->lf_deleg);
2183	while (tstp != LIST_END(&lfp->lf_deleg)) {
2184	    nstp = LIST_NEXT(tstp, ls_file);
2185	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2186		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2187		  (new_lop->lo_flags & NFSLCK_READ))) &&
2188		  clp != tstp->ls_clp &&
2189		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2190		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2191		   (new_lop->lo_flags & NFSLCK_WRITE) &&
2192		  (clp != tstp->ls_clp ||
2193		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2194		ret = 0;
2195		if (filestruct_locked != 0) {
2196			/* Roll back local locks. */
2197			NFSUNLOCKSTATE();
2198			if (vnode_unlocked == 0) {
2199				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2200				NFSVOPUNLOCK(vp);
2201			}
2202			nfsrv_locallock_rollback(vp, lfp, p);
2203			NFSLOCKSTATE();
2204			nfsrv_unlocklf(lfp);
2205			NFSUNLOCKSTATE();
2206			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2207			vnode_unlocked = 0;
2208			if (VN_IS_DOOMED(vp))
2209				ret = NFSERR_SERVERFAULT;
2210			NFSLOCKSTATE();
2211		}
2212		if (ret == 0)
2213			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2214		if (ret) {
2215		    /*
2216		     * nfsrv_delegconflict unlocks state when it
2217		     * returns non-zero, which it always does.
2218		     */
2219		    if (other_lop) {
2220			free(other_lop, M_NFSDLOCK);
2221			other_lop = NULL;
2222		    }
2223		    if (ret == -1) {
2224			lckstp = NULL;
2225			goto tryagain;
2226		    }
2227		    error = ret;
2228		    goto out;
2229		}
2230		/* Never gets here. */
2231	    }
2232	    tstp = nstp;
2233	}
2234
2235	/*
2236	 * Handle the unlock case by calling nfsrv_updatelock().
2237	 * (Should I have done some access checking above for unlock? For now,
2238	 *  just let it happen.)
2239	 */
2240	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2241		first = new_lop->lo_first;
2242		end = new_lop->lo_end;
2243		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2244		stateidp->seqid = ++(stp->ls_stateid.seqid);
2245		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2246			stateidp->seqid = stp->ls_stateid.seqid = 1;
2247		stateidp->other[0] = stp->ls_stateid.other[0];
2248		stateidp->other[1] = stp->ls_stateid.other[1];
2249		stateidp->other[2] = stp->ls_stateid.other[2];
2250		if (filestruct_locked != 0) {
2251			NFSUNLOCKSTATE();
2252			if (vnode_unlocked == 0) {
2253				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2254				vnode_unlocked = 1;
2255				NFSVOPUNLOCK(vp);
2256			}
2257			/* Update the local locks. */
2258			nfsrv_localunlock(vp, lfp, first, end, p);
2259			NFSLOCKSTATE();
2260			nfsrv_unlocklf(lfp);
2261		}
2262		NFSUNLOCKSTATE();
2263		goto out;
2264	}
2265
2266	/*
2267	 * Search for a conflicting lock. A lock conflicts if:
2268	 * - the lock range overlaps and
2269	 * - at least one lock is a write lock and
2270	 * - it is not owned by the same lock owner
2271	 */
2272	if (!delegation) {
2273	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2274	    if (new_lop->lo_end > lop->lo_first &&
2275		new_lop->lo_first < lop->lo_end &&
2276		(new_lop->lo_flags == NFSLCK_WRITE ||
2277		 lop->lo_flags == NFSLCK_WRITE) &&
2278		lckstp != lop->lo_stp &&
2279		(clp != lop->lo_stp->ls_clp ||
2280		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2281		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2282		    lckstp->ls_ownerlen))) {
2283		if (other_lop) {
2284		    free(other_lop, M_NFSDLOCK);
2285		    other_lop = NULL;
2286		}
2287		if (vnode_unlocked != 0)
2288		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2289			NULL, p);
2290		else
2291		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2292			vp, p);
2293		if (ret == 1) {
2294		    if (filestruct_locked != 0) {
2295			if (vnode_unlocked == 0) {
2296				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2297				NFSVOPUNLOCK(vp);
2298			}
2299			/* Roll back local locks. */
2300			nfsrv_locallock_rollback(vp, lfp, p);
2301			NFSLOCKSTATE();
2302			nfsrv_unlocklf(lfp);
2303			NFSUNLOCKSTATE();
2304			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2305			vnode_unlocked = 0;
2306			if (VN_IS_DOOMED(vp)) {
2307				error = NFSERR_SERVERFAULT;
2308				goto out;
2309			}
2310		    }
2311		    /*
2312		     * nfsrv_clientconflict() unlocks state when it
2313		     * returns non-zero.
2314		     */
2315		    lckstp = NULL;
2316		    goto tryagain;
2317		}
2318		/*
2319		 * Found a conflicting lock, so record the conflict and
2320		 * return the error.
2321		 */
2322		if (cfp != NULL && ret == 0) {
2323		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2324		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2325		    cfp->cl_first = lop->lo_first;
2326		    cfp->cl_end = lop->lo_end;
2327		    cfp->cl_flags = lop->lo_flags;
2328		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2329		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2330			cfp->cl_ownerlen);
2331		}
2332		if (ret == 2)
2333		    error = NFSERR_PERM;
2334		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2335		    error = NFSERR_RECLAIMCONFLICT;
2336		else if (new_stp->ls_flags & NFSLCK_CHECK)
2337		    error = NFSERR_LOCKED;
2338		else
2339		    error = NFSERR_DENIED;
2340		if (filestruct_locked != 0 && ret == 0) {
2341			/* Roll back local locks. */
2342			NFSUNLOCKSTATE();
2343			if (vnode_unlocked == 0) {
2344				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2345				vnode_unlocked = 1;
2346				NFSVOPUNLOCK(vp);
2347			}
2348			nfsrv_locallock_rollback(vp, lfp, p);
2349			NFSLOCKSTATE();
2350			nfsrv_unlocklf(lfp);
2351		}
2352		if (ret == 0)
2353			NFSUNLOCKSTATE();
2354		goto out;
2355	    }
2356	  }
2357	}
2358
2359	/*
2360	 * We only get here if there was no lock that conflicted.
2361	 */
2362	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2363		NFSUNLOCKSTATE();
2364		goto out;
2365	}
2366
2367	/*
2368	 * We only get here when we are creating or modifying a lock.
2369	 * There are two variants:
2370	 * - exist_lock_owner where lock_owner exists
2371	 * - open_to_lock_owner with new lock_owner
2372	 */
2373	first = new_lop->lo_first;
2374	end = new_lop->lo_end;
2375	lock_flags = new_lop->lo_flags;
2376	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2377		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2378		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2379		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2380			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2381		stateidp->other[0] = lckstp->ls_stateid.other[0];
2382		stateidp->other[1] = lckstp->ls_stateid.other[1];
2383		stateidp->other[2] = lckstp->ls_stateid.other[2];
2384	} else {
2385		/*
2386		 * The new open_to_lock_owner case.
2387		 * Link the new nfsstate into the lists.
2388		 */
2389		new_stp->ls_seq = new_stp->ls_opentolockseq;
2390		nfsrvd_refcache(new_stp->ls_op);
2391		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2392		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2393		    clp->lc_clientid.lval[0];
2394		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2395		    clp->lc_clientid.lval[1];
2396		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2397		    nfsrv_nextstateindex(clp);
2398		new_stp->ls_clp = clp;
2399		LIST_INIT(&new_stp->ls_lock);
2400		new_stp->ls_openstp = stp;
2401		new_stp->ls_lfp = lfp;
2402		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2403		    lfp);
2404		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2405		    new_stp, ls_hash);
2406		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2407		*new_lopp = NULL;
2408		*new_stpp = NULL;
2409		NFSD_VNET(nfsstatsv1_p)->srvlockowners++;
2410		nfsrv_openpluslock++;
2411	}
2412	if (filestruct_locked != 0) {
2413		NFSUNLOCKSTATE();
2414		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2415		NFSLOCKSTATE();
2416		nfsrv_unlocklf(lfp);
2417	}
2418	NFSUNLOCKSTATE();
2419
2420out:
2421	if (haslock) {
2422		NFSLOCKV4ROOTMUTEX();
2423		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2424		NFSUNLOCKV4ROOTMUTEX();
2425	}
2426	if (vnode_unlocked != 0) {
2427		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2428		if (error == 0 && VN_IS_DOOMED(vp))
2429			error = NFSERR_SERVERFAULT;
2430	}
2431	if (other_lop)
2432		free(other_lop, M_NFSDLOCK);
2433	NFSEXITCODE2(error, nd);
2434	return (error);
2435}
2436
2437/*
2438 * Check for state errors for Open.
2439 * repstat is passed back out as an error if more critical errors
2440 * are not detected.
2441 */
2442int
2443nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2444    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2445    NFSPROC_T *p, int repstat)
2446{
2447	struct nfsstate *stp, *nstp;
2448	struct nfsclient *clp;
2449	struct nfsstate *ownerstp;
2450	struct nfslockfile *lfp, *new_lfp;
2451	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2452
2453	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2454		readonly = 1;
2455	/*
2456	 * Check for restart conditions (client and server).
2457	 */
2458	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2459		&new_stp->ls_stateid, 0);
2460	if (error)
2461		goto out;
2462
2463	/*
2464	 * Check for state resource limit exceeded.
2465	 * Technically this should be SMP protected, but the worst
2466	 * case error is "out by one or two" on the count when it
2467	 * returns NFSERR_RESOURCE and the limit is just a rather
2468	 * arbitrary high water mark, so no harm is done.
2469	 */
2470	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2471		error = NFSERR_RESOURCE;
2472		goto out;
2473	}
2474
2475tryagain:
2476	new_lfp = malloc(sizeof (struct nfslockfile),
2477	    M_NFSDLOCKFILE, M_WAITOK);
2478	if (vp)
2479		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2480		    NULL, p);
2481	NFSLOCKSTATE();
2482	/*
2483	 * Get the nfsclient structure.
2484	 */
2485	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2486	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2487
2488	/*
2489	 * Look up the open owner. See if it needs confirmation and
2490	 * check the seq#, as required.
2491	 */
2492	if (!error)
2493		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2494
2495	if (!error && ownerstp) {
2496		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2497		    new_stp->ls_op);
2498		/*
2499		 * If the OpenOwner hasn't been confirmed, assume the
2500		 * old one was a replay and this one is ok.
2501		 * See: RFC3530 Sec. 14.2.18.
2502		 */
2503		if (error == NFSERR_BADSEQID &&
2504		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2505			error = 0;
2506	}
2507
2508	/*
2509	 * Check for grace.
2510	 */
2511	if (!error)
2512		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2513	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2514		nfsrv_checkstable(clp))
2515		error = NFSERR_NOGRACE;
2516
2517	/*
2518	 * If none of the above errors occurred, let repstat be
2519	 * returned.
2520	 */
2521	if (repstat && !error)
2522		error = repstat;
2523	if (error) {
2524		NFSUNLOCKSTATE();
2525		if (haslock) {
2526			NFSLOCKV4ROOTMUTEX();
2527			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2528			NFSUNLOCKV4ROOTMUTEX();
2529		}
2530		free(new_lfp, M_NFSDLOCKFILE);
2531		goto out;
2532	}
2533
2534	/*
2535	 * If vp == NULL, the file doesn't exist yet, so return ok.
2536	 * (This always happens on the first pass, so haslock must be 0.)
2537	 */
2538	if (vp == NULL) {
2539		NFSUNLOCKSTATE();
2540		free(new_lfp, M_NFSDLOCKFILE);
2541		goto out;
2542	}
2543
2544	/*
2545	 * Get the structure for the underlying file.
2546	 */
2547	if (getfhret)
2548		error = getfhret;
2549	else
2550		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2551		    NULL, 0);
2552	if (new_lfp)
2553		free(new_lfp, M_NFSDLOCKFILE);
2554	if (error) {
2555		NFSUNLOCKSTATE();
2556		if (haslock) {
2557			NFSLOCKV4ROOTMUTEX();
2558			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2559			NFSUNLOCKV4ROOTMUTEX();
2560		}
2561		goto out;
2562	}
2563
2564	/*
2565	 * Search for a conflicting open/share.
2566	 */
2567	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2568	    /*
2569	     * For Delegate_Cur, search for the matching Delegation,
2570	     * which indicates no conflict.
2571	     * An old delegation should have been recovered by the
2572	     * client doing a Claim_DELEGATE_Prev, so I won't let
2573	     * it match and return NFSERR_EXPIRED. Should I let it
2574	     * match?
2575	     */
2576	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2577		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2578		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2579		    stateidp->seqid == 0) ||
2580		    stateidp->seqid == stp->ls_stateid.seqid) &&
2581		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2582			  NFSX_STATEIDOTHER))
2583			break;
2584	    }
2585	    if (stp == LIST_END(&lfp->lf_deleg) ||
2586		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2587		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2588		NFSUNLOCKSTATE();
2589		if (haslock) {
2590			NFSLOCKV4ROOTMUTEX();
2591			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2592			NFSUNLOCKV4ROOTMUTEX();
2593		}
2594		error = NFSERR_EXPIRED;
2595		goto out;
2596	    }
2597	}
2598
2599	/*
2600	 * Check for access/deny bit conflicts. I check for the same
2601	 * owner as well, in case the client didn't bother.
2602	 */
2603	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2604		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2605		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2606		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2607		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2608		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2609			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2610			if (ret == 1) {
2611				/*
2612				 * nfsrv_clientconflict() unlocks
2613				 * state when it returns non-zero.
2614				 */
2615				goto tryagain;
2616			}
2617			if (ret == 2)
2618				error = NFSERR_PERM;
2619			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2620				error = NFSERR_RECLAIMCONFLICT;
2621			else
2622				error = NFSERR_SHAREDENIED;
2623			if (ret == 0)
2624				NFSUNLOCKSTATE();
2625			if (haslock) {
2626				NFSLOCKV4ROOTMUTEX();
2627				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2628				NFSUNLOCKV4ROOTMUTEX();
2629			}
2630			goto out;
2631		}
2632	}
2633
2634	/*
2635	 * Check for a conflicting delegation. If one is found, call
2636	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2637	 * been set yet, it will get the lock. Otherwise, it will recall
2638	 * the delegation. Then, we try try again...
2639	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2640	 *  isn't a conflict.)
2641	 * I currently believe the conflict algorithm to be:
2642	 * For Open with Read Access and Deny None
2643	 * - there is a conflict iff a different client has a write delegation
2644	 * For Open with other Write Access or any Deny except None
2645	 * - there is a conflict if a different client has any delegation
2646	 * - there is a conflict if the same client has a read delegation
2647	 *   (The current consensus is that this last case should be
2648	 *    considered a conflict since the client with a read delegation
2649	 *    could have done an Open with ReadAccess and WriteDeny
2650	 *    locally and then not have checked for the WriteDeny.)
2651	 * Don't check for a Reclaim, since that will be dealt with
2652	 * by nfsrv_openctrl().
2653	 */
2654	if (!(new_stp->ls_flags &
2655		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2656	    stp = LIST_FIRST(&lfp->lf_deleg);
2657	    while (stp != LIST_END(&lfp->lf_deleg)) {
2658		nstp = LIST_NEXT(stp, ls_file);
2659		if ((readonly && stp->ls_clp != clp &&
2660		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2661		    (!readonly && (stp->ls_clp != clp ||
2662		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2663			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2664			if (ret) {
2665			    /*
2666			     * nfsrv_delegconflict() unlocks state
2667			     * when it returns non-zero.
2668			     */
2669			    if (ret == -1)
2670				goto tryagain;
2671			    error = ret;
2672			    goto out;
2673			}
2674		}
2675		stp = nstp;
2676	    }
2677	}
2678	NFSUNLOCKSTATE();
2679	if (haslock) {
2680		NFSLOCKV4ROOTMUTEX();
2681		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2682		NFSUNLOCKV4ROOTMUTEX();
2683	}
2684
2685out:
2686	NFSEXITCODE2(error, nd);
2687	return (error);
2688}
2689
2690/*
2691 * Open control function to create/update open state for an open.
2692 */
2693int
2694nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2695    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2696    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2697    NFSPROC_T *p, u_quad_t filerev)
2698{
2699	struct nfsstate *new_stp = *new_stpp;
2700	struct nfsstate *stp, *nstp;
2701	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2702	struct nfslockfile *lfp, *new_lfp;
2703	struct nfsclient *clp;
2704	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2705	int readonly = 0, cbret = 1, getfhret = 0;
2706	int gotstate = 0, len = 0;
2707	u_char *clidp = NULL;
2708
2709	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2710		readonly = 1;
2711	/*
2712	 * Check for restart conditions (client and server).
2713	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2714	 * If an error does show up, return NFSERR_EXPIRED, since the
2715	 * the seqid# has already been incremented.
2716	 */
2717	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2718	    &new_stp->ls_stateid, 0);
2719	if (error) {
2720		printf("Nfsd: openctrl unexpected restart err=%d\n",
2721		    error);
2722		error = NFSERR_EXPIRED;
2723		goto out;
2724	}
2725
2726	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2727tryagain:
2728	new_lfp = malloc(sizeof (struct nfslockfile),
2729	    M_NFSDLOCKFILE, M_WAITOK);
2730	new_open = malloc(sizeof (struct nfsstate),
2731	    M_NFSDSTATE, M_WAITOK);
2732	new_deleg = malloc(sizeof (struct nfsstate),
2733	    M_NFSDSTATE, M_WAITOK);
2734	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2735	    NULL, p);
2736	NFSLOCKSTATE();
2737	/*
2738	 * Get the client structure. Since the linked lists could be changed
2739	 * by other nfsd processes if this process does a tsleep(), one of
2740	 * two things must be done.
2741	 * 1 - don't tsleep()
2742	 * or
2743	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2744	 *     before using the lists, since this lock stops the other
2745	 *     nfsd. This should only be used for rare cases, since it
2746	 *     essentially single threads the nfsd.
2747	 *     At this time, it is only done for cases where the stable
2748	 *     storage file must be written prior to completion of state
2749	 *     expiration.
2750	 */
2751	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2752	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2753	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2754	    clp->lc_program) {
2755		/*
2756		 * This happens on the first open for a client
2757		 * that supports callbacks.
2758		 */
2759		NFSUNLOCKSTATE();
2760		/*
2761		 * Although nfsrv_docallback() will sleep, clp won't
2762		 * go away, since they are only removed when the
2763		 * nfsv4_lock() has blocked the nfsd threads. The
2764		 * fields in clp can change, but having multiple
2765		 * threads do this Null callback RPC should be
2766		 * harmless.
2767		 */
2768		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2769		    NULL, 0, NULL, NULL, NULL, 0, p);
2770		NFSLOCKSTATE();
2771		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2772		if (!cbret)
2773			clp->lc_flags |= LCL_CALLBACKSON;
2774	}
2775
2776	/*
2777	 * Look up the open owner. See if it needs confirmation and
2778	 * check the seq#, as required.
2779	 */
2780	if (!error)
2781		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2782
2783	if (error) {
2784		NFSUNLOCKSTATE();
2785		printf("Nfsd: openctrl unexpected state err=%d\n",
2786			error);
2787		free(new_lfp, M_NFSDLOCKFILE);
2788		free(new_open, M_NFSDSTATE);
2789		free(new_deleg, M_NFSDSTATE);
2790		if (haslock) {
2791			NFSLOCKV4ROOTMUTEX();
2792			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2793			NFSUNLOCKV4ROOTMUTEX();
2794		}
2795		error = NFSERR_EXPIRED;
2796		goto out;
2797	}
2798
2799	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2800		nfsrv_markstable(clp);
2801
2802	/*
2803	 * Get the structure for the underlying file.
2804	 */
2805	if (getfhret)
2806		error = getfhret;
2807	else
2808		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2809		    NULL, 0);
2810	if (new_lfp)
2811		free(new_lfp, M_NFSDLOCKFILE);
2812	if (error) {
2813		NFSUNLOCKSTATE();
2814		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2815		    error);
2816		free(new_open, M_NFSDSTATE);
2817		free(new_deleg, M_NFSDSTATE);
2818		if (haslock) {
2819			NFSLOCKV4ROOTMUTEX();
2820			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2821			NFSUNLOCKV4ROOTMUTEX();
2822		}
2823		goto out;
2824	}
2825
2826	/*
2827	 * Search for a conflicting open/share.
2828	 */
2829	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2830	    /*
2831	     * For Delegate_Cur, search for the matching Delegation,
2832	     * which indicates no conflict.
2833	     * An old delegation should have been recovered by the
2834	     * client doing a Claim_DELEGATE_Prev, so I won't let
2835	     * it match and return NFSERR_EXPIRED. Should I let it
2836	     * match?
2837	     */
2838	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2839		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2840		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2841		    stateidp->seqid == 0) ||
2842		    stateidp->seqid == stp->ls_stateid.seqid) &&
2843		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2844			NFSX_STATEIDOTHER))
2845			break;
2846	    }
2847	    if (stp == LIST_END(&lfp->lf_deleg) ||
2848		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2849		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2850		NFSUNLOCKSTATE();
2851		printf("Nfsd openctrl unexpected expiry\n");
2852		free(new_open, M_NFSDSTATE);
2853		free(new_deleg, M_NFSDSTATE);
2854		if (haslock) {
2855			NFSLOCKV4ROOTMUTEX();
2856			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2857			NFSUNLOCKV4ROOTMUTEX();
2858		}
2859		error = NFSERR_EXPIRED;
2860		goto out;
2861	    }
2862
2863	    /*
2864	     * Don't issue a Delegation, since one already exists and
2865	     * delay delegation timeout, as required.
2866	     */
2867	    delegate = 0;
2868	    nfsrv_delaydelegtimeout(stp);
2869	}
2870
2871	/*
2872	 * Check for access/deny bit conflicts. I also check for the
2873	 * same owner, since the client might not have bothered to check.
2874	 * Also, note an open for the same file and owner, if found,
2875	 * which is all we do here for Delegate_Cur, since conflict
2876	 * checking is already done.
2877	 */
2878	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2879		if (ownerstp && stp->ls_openowner == ownerstp)
2880			openstp = stp;
2881		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2882		    /*
2883		     * If another client has the file open, the only
2884		     * delegation that can be issued is a Read delegation
2885		     * and only if it is a Read open with Deny none.
2886		     */
2887		    if (clp != stp->ls_clp) {
2888			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2889			    NFSLCK_READACCESS)
2890			    writedeleg = 0;
2891			else
2892			    delegate = 0;
2893		    }
2894		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2895		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2896		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2897		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2898			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2899			if (ret == 1) {
2900				/*
2901				 * nfsrv_clientconflict() unlocks state
2902				 * when it returns non-zero.
2903				 */
2904				free(new_open, M_NFSDSTATE);
2905				free(new_deleg, M_NFSDSTATE);
2906				openstp = NULL;
2907				goto tryagain;
2908			}
2909			if (ret == 2)
2910				error = NFSERR_PERM;
2911			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2912				error = NFSERR_RECLAIMCONFLICT;
2913			else
2914				error = NFSERR_SHAREDENIED;
2915			if (ret == 0)
2916				NFSUNLOCKSTATE();
2917			if (haslock) {
2918				NFSLOCKV4ROOTMUTEX();
2919				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2920				NFSUNLOCKV4ROOTMUTEX();
2921			}
2922			free(new_open, M_NFSDSTATE);
2923			free(new_deleg, M_NFSDSTATE);
2924			printf("nfsd openctrl unexpected client cnfl\n");
2925			goto out;
2926		    }
2927		}
2928	}
2929
2930	/*
2931	 * Check for a conflicting delegation. If one is found, call
2932	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2933	 * been set yet, it will get the lock. Otherwise, it will recall
2934	 * the delegation. Then, we try try again...
2935	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2936	 *  isn't a conflict.)
2937	 * I currently believe the conflict algorithm to be:
2938	 * For Open with Read Access and Deny None
2939	 * - there is a conflict iff a different client has a write delegation
2940	 * For Open with other Write Access or any Deny except None
2941	 * - there is a conflict if a different client has any delegation
2942	 * - there is a conflict if the same client has a read delegation
2943	 *   (The current consensus is that this last case should be
2944	 *    considered a conflict since the client with a read delegation
2945	 *    could have done an Open with ReadAccess and WriteDeny
2946	 *    locally and then not have checked for the WriteDeny.)
2947	 */
2948	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2949	    stp = LIST_FIRST(&lfp->lf_deleg);
2950	    while (stp != LIST_END(&lfp->lf_deleg)) {
2951		nstp = LIST_NEXT(stp, ls_file);
2952		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2953			writedeleg = 0;
2954		else
2955			delegate = 0;
2956		if ((readonly && stp->ls_clp != clp &&
2957		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2958		    (!readonly && (stp->ls_clp != clp ||
2959		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2960		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2961			delegate = 2;
2962		    } else {
2963			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2964			if (ret) {
2965			    /*
2966			     * nfsrv_delegconflict() unlocks state
2967			     * when it returns non-zero.
2968			     */
2969			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2970			    free(new_open, M_NFSDSTATE);
2971			    free(new_deleg, M_NFSDSTATE);
2972			    if (ret == -1) {
2973				openstp = NULL;
2974				goto tryagain;
2975			    }
2976			    error = ret;
2977			    goto out;
2978			}
2979		    }
2980		}
2981		stp = nstp;
2982	    }
2983	}
2984
2985	/*
2986	 * We only get here if there was no open that conflicted.
2987	 * If an open for the owner exists, or in the access/deny bits.
2988	 * Otherwise it is a new open. If the open_owner hasn't been
2989	 * confirmed, replace the open with the new one needing confirmation,
2990	 * otherwise add the open.
2991	 */
2992	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2993	    /*
2994	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2995	     * a match. If found, just move the old delegation to the current
2996	     * delegation list and issue open. If not found, return
2997	     * NFSERR_EXPIRED.
2998	     */
2999	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
3000		if (stp->ls_lfp == lfp) {
3001		    /* Found it */
3002		    if (stp->ls_clp != clp)
3003			panic("olddeleg clp");
3004		    LIST_REMOVE(stp, ls_list);
3005		    LIST_REMOVE(stp, ls_hash);
3006		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
3007		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
3008		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
3009			clp->lc_clientid.lval[0];
3010		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
3011			clp->lc_clientid.lval[1];
3012		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
3013			nfsrv_nextstateindex(clp);
3014		    stp->ls_compref = nd->nd_compref;
3015		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
3016		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3017			stp->ls_stateid), stp, ls_hash);
3018		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
3019			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3020		    else
3021			*rflagsp |= NFSV4OPEN_READDELEGATE;
3022		    clp->lc_delegtime = NFSD_MONOSEC +
3023			nfsrv_lease + NFSRV_LEASEDELTA;
3024
3025		    /*
3026		     * Now, do the associated open.
3027		     */
3028		    new_open->ls_stateid.seqid = 1;
3029		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3030		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3031		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3032		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
3033			NFSLCK_OPEN;
3034		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
3035			new_open->ls_flags |= (NFSLCK_READACCESS |
3036			    NFSLCK_WRITEACCESS);
3037		    else
3038			new_open->ls_flags |= NFSLCK_READACCESS;
3039		    new_open->ls_uid = new_stp->ls_uid;
3040		    new_open->ls_lfp = lfp;
3041		    new_open->ls_clp = clp;
3042		    LIST_INIT(&new_open->ls_open);
3043		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3044		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3045			new_open, ls_hash);
3046		    /*
3047		     * and handle the open owner
3048		     */
3049		    if (ownerstp) {
3050			new_open->ls_openowner = ownerstp;
3051			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
3052		    } else {
3053			new_open->ls_openowner = new_stp;
3054			new_stp->ls_flags = 0;
3055			nfsrvd_refcache(new_stp->ls_op);
3056			new_stp->ls_noopens = 0;
3057			LIST_INIT(&new_stp->ls_open);
3058			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3059			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3060			*new_stpp = NULL;
3061			NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3062			nfsrv_openpluslock++;
3063		    }
3064		    openstp = new_open;
3065		    new_open = NULL;
3066		    NFSD_VNET(nfsstatsv1_p)->srvopens++;
3067		    nfsrv_openpluslock++;
3068		    break;
3069		}
3070	    }
3071	    if (stp == LIST_END(&clp->lc_olddeleg))
3072		error = NFSERR_EXPIRED;
3073	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
3074	    /*
3075	     * Scan to see that no delegation for this client and file
3076	     * doesn't already exist.
3077	     * There also shouldn't yet be an Open for this file and
3078	     * openowner.
3079	     */
3080	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
3081		if (stp->ls_clp == clp)
3082		    break;
3083	    }
3084	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
3085		/*
3086		 * This is the Claim_Previous case with a delegation
3087		 * type != Delegate_None.
3088		 */
3089		/*
3090		 * First, add the delegation. (Although we must issue the
3091		 * delegation, we can also ask for an immediate return.)
3092		 */
3093		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3094		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
3095		    clp->lc_clientid.lval[0];
3096		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
3097		    clp->lc_clientid.lval[1];
3098		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
3099		    nfsrv_nextstateindex(clp);
3100		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
3101		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3102			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3103		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3104		    nfsrv_writedelegcnt++;
3105		} else {
3106		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3107			NFSLCK_READACCESS);
3108		    *rflagsp |= NFSV4OPEN_READDELEGATE;
3109		}
3110		new_deleg->ls_uid = new_stp->ls_uid;
3111		new_deleg->ls_lfp = lfp;
3112		new_deleg->ls_clp = clp;
3113		new_deleg->ls_filerev = filerev;
3114		new_deleg->ls_compref = nd->nd_compref;
3115		new_deleg->ls_lastrecall = 0;
3116		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3117		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3118		    new_deleg->ls_stateid), new_deleg, ls_hash);
3119		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3120		new_deleg = NULL;
3121		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3122		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3123		     LCL_CALLBACKSON ||
3124		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3125		    !NFSVNO_DELEGOK(vp))
3126		    *rflagsp |= NFSV4OPEN_RECALL;
3127		NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3128		nfsrv_openpluslock++;
3129		nfsrv_delegatecnt++;
3130
3131		/*
3132		 * Now, do the associated open.
3133		 */
3134		new_open->ls_stateid.seqid = 1;
3135		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3136		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3137		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3138		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3139		    NFSLCK_OPEN;
3140		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3141			new_open->ls_flags |= (NFSLCK_READACCESS |
3142			    NFSLCK_WRITEACCESS);
3143		else
3144			new_open->ls_flags |= NFSLCK_READACCESS;
3145		new_open->ls_uid = new_stp->ls_uid;
3146		new_open->ls_lfp = lfp;
3147		new_open->ls_clp = clp;
3148		LIST_INIT(&new_open->ls_open);
3149		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3150		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3151		   new_open, ls_hash);
3152		/*
3153		 * and handle the open owner
3154		 */
3155		if (ownerstp) {
3156		    new_open->ls_openowner = ownerstp;
3157		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3158		} else {
3159		    new_open->ls_openowner = new_stp;
3160		    new_stp->ls_flags = 0;
3161		    nfsrvd_refcache(new_stp->ls_op);
3162		    new_stp->ls_noopens = 0;
3163		    LIST_INIT(&new_stp->ls_open);
3164		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3165		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3166		    *new_stpp = NULL;
3167		    NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3168		    nfsrv_openpluslock++;
3169		}
3170		openstp = new_open;
3171		new_open = NULL;
3172		NFSD_VNET(nfsstatsv1_p)->srvopens++;
3173		nfsrv_openpluslock++;
3174	    } else {
3175		error = NFSERR_RECLAIMCONFLICT;
3176	    }
3177	} else if (ownerstp) {
3178		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3179		    /* Replace the open */
3180		    if (ownerstp->ls_op)
3181			nfsrvd_derefcache(ownerstp->ls_op);
3182		    ownerstp->ls_op = new_stp->ls_op;
3183		    nfsrvd_refcache(ownerstp->ls_op);
3184		    ownerstp->ls_seq = new_stp->ls_seq;
3185		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3186		    stp = LIST_FIRST(&ownerstp->ls_open);
3187		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3188			NFSLCK_OPEN;
3189		    stp->ls_stateid.seqid = 1;
3190		    stp->ls_uid = new_stp->ls_uid;
3191		    if (lfp != stp->ls_lfp) {
3192			LIST_REMOVE(stp, ls_file);
3193			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3194			stp->ls_lfp = lfp;
3195		    }
3196		    openstp = stp;
3197		} else if (openstp) {
3198		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3199		    openstp->ls_stateid.seqid++;
3200		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
3201			openstp->ls_stateid.seqid == 0)
3202			openstp->ls_stateid.seqid = 1;
3203
3204		    /*
3205		     * This is where we can choose to issue a delegation.
3206		     */
3207		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3208			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3209		    else if (nfsrv_issuedelegs == 0)
3210			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
3211		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3212			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3213		    else if (delegate == 0 || writedeleg == 0 ||
3214			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
3215			nfsrv_writedelegifpos == 0) ||
3216			!NFSVNO_DELEGOK(vp) ||
3217			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
3218			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3219			 LCL_CALLBACKSON)
3220			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3221		    else {
3222			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3223			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3224			    = clp->lc_clientid.lval[0];
3225			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3226			    = clp->lc_clientid.lval[1];
3227			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3228			    = nfsrv_nextstateindex(clp);
3229			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3230			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3231			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3232			new_deleg->ls_uid = new_stp->ls_uid;
3233			new_deleg->ls_lfp = lfp;
3234			new_deleg->ls_clp = clp;
3235			new_deleg->ls_filerev = filerev;
3236			new_deleg->ls_compref = nd->nd_compref;
3237			new_deleg->ls_lastrecall = 0;
3238			nfsrv_writedelegcnt++;
3239			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3240			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3241			    new_deleg->ls_stateid), new_deleg, ls_hash);
3242			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3243			new_deleg = NULL;
3244			NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3245			nfsrv_openpluslock++;
3246			nfsrv_delegatecnt++;
3247		    }
3248		} else {
3249		    new_open->ls_stateid.seqid = 1;
3250		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3251		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3252		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3253		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3254			NFSLCK_OPEN;
3255		    new_open->ls_uid = new_stp->ls_uid;
3256		    new_open->ls_openowner = ownerstp;
3257		    new_open->ls_lfp = lfp;
3258		    new_open->ls_clp = clp;
3259		    LIST_INIT(&new_open->ls_open);
3260		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3261		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3262		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3263			new_open, ls_hash);
3264		    openstp = new_open;
3265		    new_open = NULL;
3266		    NFSD_VNET(nfsstatsv1_p)->srvopens++;
3267		    nfsrv_openpluslock++;
3268
3269		    /*
3270		     * This is where we can choose to issue a delegation.
3271		     */
3272		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3273			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3274		    else if (nfsrv_issuedelegs == 0)
3275			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
3276		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3277			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3278		    else if (delegate == 0 || (writedeleg == 0 &&
3279			readonly == 0) || !NFSVNO_DELEGOK(vp) ||
3280			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3281			 LCL_CALLBACKSON)
3282			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3283		    else {
3284			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3285			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3286			    = clp->lc_clientid.lval[0];
3287			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3288			    = clp->lc_clientid.lval[1];
3289			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3290			    = nfsrv_nextstateindex(clp);
3291			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3292			    (nfsrv_writedelegifpos || !readonly) &&
3293			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3294			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3295				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3296			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3297			    nfsrv_writedelegcnt++;
3298			} else {
3299			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3300				NFSLCK_READACCESS);
3301			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3302			}
3303			new_deleg->ls_uid = new_stp->ls_uid;
3304			new_deleg->ls_lfp = lfp;
3305			new_deleg->ls_clp = clp;
3306			new_deleg->ls_filerev = filerev;
3307			new_deleg->ls_compref = nd->nd_compref;
3308			new_deleg->ls_lastrecall = 0;
3309			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3310			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3311			    new_deleg->ls_stateid), new_deleg, ls_hash);
3312			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3313			new_deleg = NULL;
3314			NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3315			nfsrv_openpluslock++;
3316			nfsrv_delegatecnt++;
3317		    }
3318		}
3319	} else {
3320		/*
3321		 * New owner case. Start the open_owner sequence with a
3322		 * Needs confirmation (unless a reclaim) and hang the
3323		 * new open off it.
3324		 */
3325		new_open->ls_stateid.seqid = 1;
3326		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3327		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3328		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3329		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3330		    NFSLCK_OPEN;
3331		new_open->ls_uid = new_stp->ls_uid;
3332		LIST_INIT(&new_open->ls_open);
3333		new_open->ls_openowner = new_stp;
3334		new_open->ls_lfp = lfp;
3335		new_open->ls_clp = clp;
3336		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3337		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3338			new_stp->ls_flags = 0;
3339		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3340			/* NFSv4.1 never needs confirmation. */
3341			new_stp->ls_flags = 0;
3342
3343			/*
3344			 * This is where we can choose to issue a delegation.
3345			 */
3346			if (delegate && nfsrv_issuedelegs &&
3347			    (writedeleg || readonly) &&
3348			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3349			     LCL_CALLBACKSON &&
3350			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3351			    NFSVNO_DELEGOK(vp) &&
3352			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3353			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3354				new_deleg->ls_stateid.seqid =
3355				    delegstateidp->seqid = 1;
3356				new_deleg->ls_stateid.other[0] =
3357				    delegstateidp->other[0]
3358				    = clp->lc_clientid.lval[0];
3359				new_deleg->ls_stateid.other[1] =
3360				    delegstateidp->other[1]
3361				    = clp->lc_clientid.lval[1];
3362				new_deleg->ls_stateid.other[2] =
3363				    delegstateidp->other[2]
3364				    = nfsrv_nextstateindex(clp);
3365				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3366				    (nfsrv_writedelegifpos || !readonly) &&
3367				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3368				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3369				     0)) {
3370					new_deleg->ls_flags =
3371					    (NFSLCK_DELEGWRITE |
3372					     NFSLCK_READACCESS |
3373					     NFSLCK_WRITEACCESS);
3374					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3375					nfsrv_writedelegcnt++;
3376				} else {
3377					new_deleg->ls_flags =
3378					    (NFSLCK_DELEGREAD |
3379					     NFSLCK_READACCESS);
3380					*rflagsp |= NFSV4OPEN_READDELEGATE;
3381				}
3382				new_deleg->ls_uid = new_stp->ls_uid;
3383				new_deleg->ls_lfp = lfp;
3384				new_deleg->ls_clp = clp;
3385				new_deleg->ls_filerev = filerev;
3386				new_deleg->ls_compref = nd->nd_compref;
3387				new_deleg->ls_lastrecall = 0;
3388				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3389				    ls_file);
3390				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3391				    new_deleg->ls_stateid), new_deleg, ls_hash);
3392				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3393				    ls_list);
3394				new_deleg = NULL;
3395				NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3396				nfsrv_openpluslock++;
3397				nfsrv_delegatecnt++;
3398			}
3399			/*
3400			 * Since NFSv4.1 never does an OpenConfirm, the first
3401			 * open state will be acquired here.
3402			 */
3403			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3404				clp->lc_flags |= LCL_STAMPEDSTABLE;
3405				len = clp->lc_idlen;
3406				NFSBCOPY(clp->lc_id, clidp, len);
3407				gotstate = 1;
3408			}
3409		} else {
3410			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3411			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3412		}
3413		nfsrvd_refcache(new_stp->ls_op);
3414		new_stp->ls_noopens = 0;
3415		LIST_INIT(&new_stp->ls_open);
3416		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3417		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3418		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3419		    new_open, ls_hash);
3420		openstp = new_open;
3421		new_open = NULL;
3422		*new_stpp = NULL;
3423		NFSD_VNET(nfsstatsv1_p)->srvopens++;
3424		nfsrv_openpluslock++;
3425		NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3426		nfsrv_openpluslock++;
3427	}
3428	if (!error) {
3429		stateidp->seqid = openstp->ls_stateid.seqid;
3430		stateidp->other[0] = openstp->ls_stateid.other[0];
3431		stateidp->other[1] = openstp->ls_stateid.other[1];
3432		stateidp->other[2] = openstp->ls_stateid.other[2];
3433	}
3434	NFSUNLOCKSTATE();
3435	if (haslock) {
3436		NFSLOCKV4ROOTMUTEX();
3437		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3438		NFSUNLOCKV4ROOTMUTEX();
3439	}
3440	if (new_open)
3441		free(new_open, M_NFSDSTATE);
3442	if (new_deleg)
3443		free(new_deleg, M_NFSDSTATE);
3444
3445	/*
3446	 * If the NFSv4.1 client just acquired its first open, write a timestamp
3447	 * to the stable storage file.
3448	 */
3449	if (gotstate != 0) {
3450		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3451		nfsrv_backupstable();
3452	}
3453
3454out:
3455	free(clidp, M_TEMP);
3456	NFSEXITCODE2(error, nd);
3457	return (error);
3458}
3459
3460/*
3461 * Open update. Does the confirm, downgrade and close.
3462 */
3463int
3464nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3465    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
3466    int *retwriteaccessp)
3467{
3468	struct nfsstate *stp;
3469	struct nfsclient *clp;
3470	struct nfslockfile *lfp;
3471	u_int32_t bits;
3472	int error = 0, gotstate = 0, len = 0;
3473	u_char *clidp = NULL;
3474
3475	/*
3476	 * Check for restart conditions (client and server).
3477	 */
3478	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3479	    &new_stp->ls_stateid, 0);
3480	if (error)
3481		goto out;
3482
3483	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3484	NFSLOCKSTATE();
3485	/*
3486	 * Get the open structure via clientid and stateid.
3487	 */
3488	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3489	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3490	if (!error)
3491		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3492		    new_stp->ls_flags, &stp);
3493
3494	/*
3495	 * Sanity check the open.
3496	 */
3497	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3498		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3499		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3500		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3501		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3502		error = NFSERR_BADSTATEID;
3503
3504	if (!error)
3505		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3506		    stp->ls_openowner, new_stp->ls_op);
3507	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3508	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3509	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3510	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3511	      new_stp->ls_stateid.seqid != 0)))
3512		error = NFSERR_OLDSTATEID;
3513	if (!error && vp->v_type != VREG) {
3514		if (vp->v_type == VDIR)
3515			error = NFSERR_ISDIR;
3516		else
3517			error = NFSERR_INVAL;
3518	}
3519
3520	if (error) {
3521		/*
3522		 * If a client tries to confirm an Open with a bad
3523		 * seqid# and there are no byte range locks or other Opens
3524		 * on the openowner, just throw it away, so the next use of the
3525		 * openowner will start a fresh seq#.
3526		 */
3527		if (error == NFSERR_BADSEQID &&
3528		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3529		    nfsrv_nootherstate(stp))
3530			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3531		NFSUNLOCKSTATE();
3532		goto out;
3533	}
3534
3535	/*
3536	 * Set the return stateid.
3537	 */
3538	stateidp->seqid = stp->ls_stateid.seqid + 1;
3539	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3540		stateidp->seqid = 1;
3541	stateidp->other[0] = stp->ls_stateid.other[0];
3542	stateidp->other[1] = stp->ls_stateid.other[1];
3543	stateidp->other[2] = stp->ls_stateid.other[2];
3544	/*
3545	 * Now, handle the three cases.
3546	 */
3547	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3548		/*
3549		 * If the open doesn't need confirmation, it seems to me that
3550		 * there is a client error, but I'll just log it and keep going?
3551		 */
3552		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3553			printf("Nfsv4d: stray open confirm\n");
3554		stp->ls_openowner->ls_flags = 0;
3555		stp->ls_stateid.seqid++;
3556		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3557		    stp->ls_stateid.seqid == 0)
3558			stp->ls_stateid.seqid = 1;
3559		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3560			clp->lc_flags |= LCL_STAMPEDSTABLE;
3561			len = clp->lc_idlen;
3562			NFSBCOPY(clp->lc_id, clidp, len);
3563			gotstate = 1;
3564		}
3565		NFSUNLOCKSTATE();
3566	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3567		lfp = stp->ls_lfp;
3568		if (retwriteaccessp != NULL) {
3569			if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
3570				*retwriteaccessp = 1;
3571			else
3572				*retwriteaccessp = 0;
3573		}
3574		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3575			/* Get the lf lock */
3576			nfsrv_locklf(lfp);
3577			NFSUNLOCKSTATE();
3578			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3579			NFSVOPUNLOCK(vp);
3580			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3581				NFSLOCKSTATE();
3582				nfsrv_unlocklf(lfp);
3583				NFSUNLOCKSTATE();
3584			}
3585			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3586		} else {
3587			(void) nfsrv_freeopen(stp, NULL, 0, p);
3588			NFSUNLOCKSTATE();
3589		}
3590	} else {
3591		/*
3592		 * Update the share bits, making sure that the new set are a
3593		 * subset of the old ones.
3594		 */
3595		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3596		if (~(stp->ls_flags) & bits) {
3597			NFSUNLOCKSTATE();
3598			error = NFSERR_INVAL;
3599			goto out;
3600		}
3601		stp->ls_flags = (bits | NFSLCK_OPEN);
3602		stp->ls_stateid.seqid++;
3603		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3604		    stp->ls_stateid.seqid == 0)
3605			stp->ls_stateid.seqid = 1;
3606		NFSUNLOCKSTATE();
3607	}
3608
3609	/*
3610	 * If the client just confirmed its first open, write a timestamp
3611	 * to the stable storage file.
3612	 */
3613	if (gotstate != 0) {
3614		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3615		nfsrv_backupstable();
3616	}
3617
3618out:
3619	free(clidp, M_TEMP);
3620	NFSEXITCODE2(error, nd);
3621	return (error);
3622}
3623
3624/*
3625 * Delegation update. Does the purge and return.
3626 */
3627int
3628nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3629    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3630    NFSPROC_T *p, int *retwriteaccessp)
3631{
3632	struct nfsstate *stp;
3633	struct nfsclient *clp;
3634	int error = 0;
3635	fhandle_t fh;
3636
3637	/*
3638	 * Do a sanity check against the file handle for DelegReturn.
3639	 */
3640	if (vp) {
3641		error = nfsvno_getfh(vp, &fh, p);
3642		if (error)
3643			goto out;
3644	}
3645	/*
3646	 * Check for restart conditions (client and server).
3647	 */
3648	if (op == NFSV4OP_DELEGRETURN)
3649		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3650			stateidp, 0);
3651	else
3652		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3653			stateidp, 0);
3654
3655	NFSLOCKSTATE();
3656	/*
3657	 * Get the open structure via clientid and stateid.
3658	 */
3659	if (!error)
3660	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3661		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3662	if (error) {
3663		if (error == NFSERR_CBPATHDOWN)
3664			error = 0;
3665		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3666			error = NFSERR_STALESTATEID;
3667	}
3668	if (!error && op == NFSV4OP_DELEGRETURN) {
3669	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3670	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3671		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3672		error = NFSERR_OLDSTATEID;
3673	}
3674	/*
3675	 * NFSERR_EXPIRED means that the state has gone away,
3676	 * so Delegations have been purged. Just return ok.
3677	 */
3678	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3679		NFSUNLOCKSTATE();
3680		error = 0;
3681		goto out;
3682	}
3683	if (error) {
3684		NFSUNLOCKSTATE();
3685		goto out;
3686	}
3687
3688	if (op == NFSV4OP_DELEGRETURN) {
3689		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3690		    sizeof (fhandle_t))) {
3691			NFSUNLOCKSTATE();
3692			error = NFSERR_BADSTATEID;
3693			goto out;
3694		}
3695		if (retwriteaccessp != NULL) {
3696			if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
3697				*retwriteaccessp = 1;
3698			else
3699				*retwriteaccessp = 0;
3700		}
3701		nfsrv_freedeleg(stp);
3702	} else {
3703		nfsrv_freedeleglist(&clp->lc_olddeleg);
3704	}
3705	NFSUNLOCKSTATE();
3706	error = 0;
3707
3708out:
3709	NFSEXITCODE(error);
3710	return (error);
3711}
3712
3713/*
3714 * Release lock owner.
3715 */
3716int
3717nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3718    NFSPROC_T *p)
3719{
3720	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3721	struct nfsclient *clp;
3722	int error = 0;
3723
3724	/*
3725	 * Check for restart conditions (client and server).
3726	 */
3727	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3728	    &new_stp->ls_stateid, 0);
3729	if (error)
3730		goto out;
3731
3732	NFSLOCKSTATE();
3733	/*
3734	 * Get the lock owner by name.
3735	 */
3736	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3737	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3738	if (error) {
3739		NFSUNLOCKSTATE();
3740		goto out;
3741	}
3742	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3743	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3744		stp = LIST_FIRST(&openstp->ls_open);
3745		while (stp != LIST_END(&openstp->ls_open)) {
3746		    nstp = LIST_NEXT(stp, ls_list);
3747		    /*
3748		     * If the owner matches, check for locks and
3749		     * then free or return an error.
3750		     */
3751		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3752			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3753			 stp->ls_ownerlen)){
3754			if (LIST_EMPTY(&stp->ls_lock)) {
3755			    nfsrv_freelockowner(stp, NULL, 0, p);
3756			} else {
3757			    NFSUNLOCKSTATE();
3758			    error = NFSERR_LOCKSHELD;
3759			    goto out;
3760			}
3761		    }
3762		    stp = nstp;
3763		}
3764	    }
3765	}
3766	NFSUNLOCKSTATE();
3767
3768out:
3769	NFSEXITCODE(error);
3770	return (error);
3771}
3772
3773/*
3774 * Get the file handle for a lock structure.
3775 */
3776static int
3777nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3778    fhandle_t *nfhp, NFSPROC_T *p)
3779{
3780	fhandle_t *fhp = NULL;
3781	int error;
3782
3783	/*
3784	 * For lock, use the new nfslock structure, otherwise just
3785	 * a fhandle_t on the stack.
3786	 */
3787	if (flags & NFSLCK_OPEN) {
3788		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3789		fhp = &new_lfp->lf_fh;
3790	} else if (nfhp) {
3791		fhp = nfhp;
3792	} else {
3793		panic("nfsrv_getlockfh");
3794	}
3795	error = nfsvno_getfh(vp, fhp, p);
3796	NFSEXITCODE(error);
3797	return (error);
3798}
3799
3800/*
3801 * Get an nfs lock structure. Allocate one, as required, and return a
3802 * pointer to it.
3803 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3804 */
3805static int
3806nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3807    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3808{
3809	struct nfslockfile *lfp;
3810	fhandle_t *fhp = NULL, *tfhp;
3811	struct nfslockhashhead *hp;
3812	struct nfslockfile *new_lfp = NULL;
3813
3814	/*
3815	 * For lock, use the new nfslock structure, otherwise just
3816	 * a fhandle_t on the stack.
3817	 */
3818	if (flags & NFSLCK_OPEN) {
3819		new_lfp = *new_lfpp;
3820		fhp = &new_lfp->lf_fh;
3821	} else if (nfhp) {
3822		fhp = nfhp;
3823	} else {
3824		panic("nfsrv_getlockfile");
3825	}
3826
3827	hp = NFSLOCKHASH(fhp);
3828	LIST_FOREACH(lfp, hp, lf_hash) {
3829		tfhp = &lfp->lf_fh;
3830		if (NFSVNO_CMPFH(fhp, tfhp)) {
3831			if (lockit)
3832				nfsrv_locklf(lfp);
3833			*lfpp = lfp;
3834			return (0);
3835		}
3836	}
3837	if (!(flags & NFSLCK_OPEN))
3838		return (-1);
3839
3840	/*
3841	 * No match, so chain the new one into the list.
3842	 */
3843	LIST_INIT(&new_lfp->lf_open);
3844	LIST_INIT(&new_lfp->lf_lock);
3845	LIST_INIT(&new_lfp->lf_deleg);
3846	LIST_INIT(&new_lfp->lf_locallock);
3847	LIST_INIT(&new_lfp->lf_rollback);
3848	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3849	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3850	new_lfp->lf_usecount = 0;
3851	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3852	*lfpp = new_lfp;
3853	*new_lfpp = NULL;
3854	return (0);
3855}
3856
3857/*
3858 * This function adds a nfslock lock structure to the list for the associated
3859 * nfsstate and nfslockfile structures. It will be inserted after the
3860 * entry pointed at by insert_lop.
3861 */
3862static void
3863nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3864    struct nfsstate *stp, struct nfslockfile *lfp)
3865{
3866	struct nfslock *lop, *nlop;
3867
3868	new_lop->lo_stp = stp;
3869	new_lop->lo_lfp = lfp;
3870
3871	if (stp != NULL) {
3872		/* Insert in increasing lo_first order */
3873		lop = LIST_FIRST(&lfp->lf_lock);
3874		if (lop == LIST_END(&lfp->lf_lock) ||
3875		    new_lop->lo_first <= lop->lo_first) {
3876			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3877		} else {
3878			nlop = LIST_NEXT(lop, lo_lckfile);
3879			while (nlop != LIST_END(&lfp->lf_lock) &&
3880			       nlop->lo_first < new_lop->lo_first) {
3881				lop = nlop;
3882				nlop = LIST_NEXT(lop, lo_lckfile);
3883			}
3884			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3885		}
3886	} else {
3887		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3888	}
3889
3890	/*
3891	 * Insert after insert_lop, which is overloaded as stp or lfp for
3892	 * an empty list.
3893	 */
3894	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3895		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3896	else if ((struct nfsstate *)insert_lop == stp)
3897		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3898	else
3899		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3900	if (stp != NULL) {
3901		NFSD_VNET(nfsstatsv1_p)->srvlocks++;
3902		nfsrv_openpluslock++;
3903	}
3904}
3905
3906/*
3907 * This function updates the locking for a lock owner and given file. It
3908 * maintains a list of lock ranges ordered on increasing file offset that
3909 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3910 * It always adds new_lop to the list and sometimes uses the one pointed
3911 * at by other_lopp.
3912 */
3913static void
3914nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3915    struct nfslock **other_lopp, struct nfslockfile *lfp)
3916{
3917	struct nfslock *new_lop = *new_lopp;
3918	struct nfslock *lop, *tlop, *ilop;
3919	struct nfslock *other_lop = *other_lopp;
3920	int unlock = 0, myfile = 0;
3921	u_int64_t tmp;
3922
3923	/*
3924	 * Work down the list until the lock is merged.
3925	 */
3926	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3927		unlock = 1;
3928	if (stp != NULL) {
3929		ilop = (struct nfslock *)stp;
3930		lop = LIST_FIRST(&stp->ls_lock);
3931	} else {
3932		ilop = (struct nfslock *)lfp;
3933		lop = LIST_FIRST(&lfp->lf_locallock);
3934	}
3935	while (lop != NULL) {
3936	    /*
3937	     * Only check locks for this file that aren't before the start of
3938	     * new lock's range.
3939	     */
3940	    if (lop->lo_lfp == lfp) {
3941	      myfile = 1;
3942	      if (lop->lo_end >= new_lop->lo_first) {
3943		if (new_lop->lo_end < lop->lo_first) {
3944			/*
3945			 * If the new lock ends before the start of the
3946			 * current lock's range, no merge, just insert
3947			 * the new lock.
3948			 */
3949			break;
3950		}
3951		if (new_lop->lo_flags == lop->lo_flags ||
3952		    (new_lop->lo_first <= lop->lo_first &&
3953		     new_lop->lo_end >= lop->lo_end)) {
3954			/*
3955			 * This lock can be absorbed by the new lock/unlock.
3956			 * This happens when it covers the entire range
3957			 * of the old lock or is contiguous
3958			 * with the old lock and is of the same type or an
3959			 * unlock.
3960			 */
3961			if (lop->lo_first < new_lop->lo_first)
3962				new_lop->lo_first = lop->lo_first;
3963			if (lop->lo_end > new_lop->lo_end)
3964				new_lop->lo_end = lop->lo_end;
3965			tlop = lop;
3966			lop = LIST_NEXT(lop, lo_lckowner);
3967			nfsrv_freenfslock(tlop);
3968			continue;
3969		}
3970
3971		/*
3972		 * All these cases are for contiguous locks that are not the
3973		 * same type, so they can't be merged.
3974		 */
3975		if (new_lop->lo_first <= lop->lo_first) {
3976			/*
3977			 * This case is where the new lock overlaps with the
3978			 * first part of the old lock. Move the start of the
3979			 * old lock to just past the end of the new lock. The
3980			 * new lock will be inserted in front of the old, since
3981			 * ilop hasn't been updated. (We are done now.)
3982			 */
3983			lop->lo_first = new_lop->lo_end;
3984			break;
3985		}
3986		if (new_lop->lo_end >= lop->lo_end) {
3987			/*
3988			 * This case is where the new lock overlaps with the
3989			 * end of the old lock's range. Move the old lock's
3990			 * end to just before the new lock's first and insert
3991			 * the new lock after the old lock.
3992			 * Might not be done yet, since the new lock could
3993			 * overlap further locks with higher ranges.
3994			 */
3995			lop->lo_end = new_lop->lo_first;
3996			ilop = lop;
3997			lop = LIST_NEXT(lop, lo_lckowner);
3998			continue;
3999		}
4000		/*
4001		 * The final case is where the new lock's range is in the
4002		 * middle of the current lock's and splits the current lock
4003		 * up. Use *other_lopp to handle the second part of the
4004		 * split old lock range. (We are done now.)
4005		 * For unlock, we use new_lop as other_lop and tmp, since
4006		 * other_lop and new_lop are the same for this case.
4007		 * We noted the unlock case above, so we don't need
4008		 * new_lop->lo_flags any longer.
4009		 */
4010		tmp = new_lop->lo_first;
4011		if (other_lop == NULL) {
4012			if (!unlock)
4013				panic("nfsd srv update unlock");
4014			other_lop = new_lop;
4015			*new_lopp = NULL;
4016		}
4017		other_lop->lo_first = new_lop->lo_end;
4018		other_lop->lo_end = lop->lo_end;
4019		other_lop->lo_flags = lop->lo_flags;
4020		other_lop->lo_stp = stp;
4021		other_lop->lo_lfp = lfp;
4022		lop->lo_end = tmp;
4023		nfsrv_insertlock(other_lop, lop, stp, lfp);
4024		*other_lopp = NULL;
4025		ilop = lop;
4026		break;
4027	      }
4028	    }
4029	    ilop = lop;
4030	    lop = LIST_NEXT(lop, lo_lckowner);
4031	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
4032		break;
4033	}
4034
4035	/*
4036	 * Insert the new lock in the list at the appropriate place.
4037	 */
4038	if (!unlock) {
4039		nfsrv_insertlock(new_lop, ilop, stp, lfp);
4040		*new_lopp = NULL;
4041	}
4042}
4043
4044/*
4045 * This function handles sequencing of locks, etc.
4046 * It returns an error that indicates what the caller should do.
4047 */
4048static int
4049nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
4050    struct nfsstate *stp, struct nfsrvcache *op)
4051{
4052	int error = 0;
4053
4054	if ((nd->nd_flag & ND_NFSV41) != 0)
4055		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
4056		goto out;
4057	if (op != nd->nd_rp)
4058		panic("nfsrvstate checkseqid");
4059	if (!(op->rc_flag & RC_INPROG))
4060		panic("nfsrvstate not inprog");
4061	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
4062		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
4063		panic("nfsrvstate op refcnt");
4064	}
4065
4066	/* If ND_ERELOOKUP is set, the seqid has already been handled. */
4067	if ((nd->nd_flag & ND_ERELOOKUP) != 0)
4068		goto out;
4069
4070	if ((stp->ls_seq + 1) == seqid) {
4071		if (stp->ls_op)
4072			nfsrvd_derefcache(stp->ls_op);
4073		stp->ls_op = op;
4074		nfsrvd_refcache(op);
4075		stp->ls_seq = seqid;
4076		goto out;
4077	} else if (stp->ls_seq == seqid && stp->ls_op &&
4078		op->rc_xid == stp->ls_op->rc_xid &&
4079		op->rc_refcnt == 0 &&
4080		op->rc_reqlen == stp->ls_op->rc_reqlen &&
4081		op->rc_cksum == stp->ls_op->rc_cksum) {
4082		if (stp->ls_op->rc_flag & RC_INPROG) {
4083			error = NFSERR_DONTREPLY;
4084			goto out;
4085		}
4086		nd->nd_rp = stp->ls_op;
4087		nd->nd_rp->rc_flag |= RC_INPROG;
4088		nfsrvd_delcache(op);
4089		error = NFSERR_REPLYFROMCACHE;
4090		goto out;
4091	}
4092	error = NFSERR_BADSEQID;
4093
4094out:
4095	NFSEXITCODE2(error, nd);
4096	return (error);
4097}
4098
4099/*
4100 * Get the client ip address for callbacks. If the strings can't be parsed,
4101 * just set lc_program to 0 to indicate no callbacks are possible.
4102 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
4103 *  the address to the client's transport address. This won't be used
4104 *  for callbacks, but can be printed out by nfsstats for info.)
4105 * Return error if the xdr can't be parsed, 0 otherwise.
4106 */
4107int
4108nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
4109{
4110	u_int32_t *tl;
4111	u_char *cp, *cp2;
4112	int i, j, maxalen = 0, minalen = 0;
4113	sa_family_t af;
4114#ifdef INET
4115	struct sockaddr_in *rin = NULL, *sin;
4116#endif
4117#ifdef INET6
4118	struct sockaddr_in6 *rin6 = NULL, *sin6;
4119#endif
4120	u_char *addr;
4121	int error = 0, cantparse = 0;
4122	union {
4123		in_addr_t ival;
4124		u_char cval[4];
4125	} ip;
4126	union {
4127		in_port_t sval;
4128		u_char cval[2];
4129	} port;
4130
4131	/* 8 is the maximum length of the port# string. */
4132	addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
4133	clp->lc_req.nr_client = NULL;
4134	clp->lc_req.nr_lock = 0;
4135	af = AF_UNSPEC;
4136	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4137	i = fxdr_unsigned(int, *tl);
4138	if (i >= 3 && i <= 4) {
4139		error = nfsrv_mtostr(nd, addr, i);
4140		if (error)
4141			goto nfsmout;
4142#ifdef INET
4143		if (!strcmp(addr, "tcp")) {
4144			clp->lc_flags |= LCL_TCPCALLBACK;
4145			clp->lc_req.nr_sotype = SOCK_STREAM;
4146			clp->lc_req.nr_soproto = IPPROTO_TCP;
4147			af = AF_INET;
4148		} else if (!strcmp(addr, "udp")) {
4149			clp->lc_req.nr_sotype = SOCK_DGRAM;
4150			clp->lc_req.nr_soproto = IPPROTO_UDP;
4151			af = AF_INET;
4152		}
4153#endif
4154#ifdef INET6
4155		if (af == AF_UNSPEC) {
4156			if (!strcmp(addr, "tcp6")) {
4157				clp->lc_flags |= LCL_TCPCALLBACK;
4158				clp->lc_req.nr_sotype = SOCK_STREAM;
4159				clp->lc_req.nr_soproto = IPPROTO_TCP;
4160				af = AF_INET6;
4161			} else if (!strcmp(addr, "udp6")) {
4162				clp->lc_req.nr_sotype = SOCK_DGRAM;
4163				clp->lc_req.nr_soproto = IPPROTO_UDP;
4164				af = AF_INET6;
4165			}
4166		}
4167#endif
4168		if (af == AF_UNSPEC) {
4169			cantparse = 1;
4170		}
4171	} else {
4172		cantparse = 1;
4173		if (i > 0) {
4174			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4175			if (error)
4176				goto nfsmout;
4177		}
4178	}
4179	/*
4180	 * The caller has allocated clp->lc_req.nr_nam to be large enough
4181	 * for either AF_INET or AF_INET6 and zeroed out the contents.
4182	 * maxalen is set to the maximum length of the host IP address string
4183	 * plus 8 for the maximum length of the port#.
4184	 * minalen is set to the minimum length of the host IP address string
4185	 * plus 4 for the minimum length of the port#.
4186	 * These lengths do not include NULL termination,
4187	 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
4188	 */
4189	switch (af) {
4190#ifdef INET
4191	case AF_INET:
4192		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4193		rin->sin_family = AF_INET;
4194		rin->sin_len = sizeof(struct sockaddr_in);
4195		maxalen = INET_ADDRSTRLEN - 1 + 8;
4196		minalen = 7 + 4;
4197		break;
4198#endif
4199#ifdef INET6
4200	case AF_INET6:
4201		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4202		rin6->sin6_family = AF_INET6;
4203		rin6->sin6_len = sizeof(struct sockaddr_in6);
4204		maxalen = INET6_ADDRSTRLEN - 1 + 8;
4205		minalen = 3 + 4;
4206		break;
4207#endif
4208	}
4209	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4210	i = fxdr_unsigned(int, *tl);
4211	if (i < 0) {
4212		error = NFSERR_BADXDR;
4213		goto nfsmout;
4214	} else if (i == 0) {
4215		cantparse = 1;
4216	} else if (!cantparse && i <= maxalen && i >= minalen) {
4217		error = nfsrv_mtostr(nd, addr, i);
4218		if (error)
4219			goto nfsmout;
4220
4221		/*
4222		 * Parse out the address fields. We expect 6 decimal numbers
4223		 * separated by '.'s for AF_INET and two decimal numbers
4224		 * preceeded by '.'s for AF_INET6.
4225		 */
4226		cp = NULL;
4227		switch (af) {
4228#ifdef INET6
4229		/*
4230		 * For AF_INET6, first parse the host address.
4231		 */
4232		case AF_INET6:
4233			cp = strchr(addr, '.');
4234			if (cp != NULL) {
4235				*cp++ = '\0';
4236				if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
4237					i = 4;
4238				else {
4239					cp = NULL;
4240					cantparse = 1;
4241				}
4242			}
4243			break;
4244#endif
4245#ifdef INET
4246		case AF_INET:
4247			cp = addr;
4248			i = 0;
4249			break;
4250#endif
4251		}
4252		while (cp != NULL && *cp && i < 6) {
4253			cp2 = cp;
4254			while (*cp2 && *cp2 != '.')
4255				cp2++;
4256			if (*cp2)
4257				*cp2++ = '\0';
4258			else if (i != 5) {
4259				cantparse = 1;
4260				break;
4261			}
4262			j = nfsrv_getipnumber(cp);
4263			if (j >= 0) {
4264				if (i < 4)
4265					ip.cval[3 - i] = j;
4266				else
4267					port.cval[5 - i] = j;
4268			} else {
4269				cantparse = 1;
4270				break;
4271			}
4272			cp = cp2;
4273			i++;
4274		}
4275		if (!cantparse) {
4276			/*
4277			 * The host address INADDR_ANY is (mis)used to indicate
4278			 * "there is no valid callback address".
4279			 */
4280			switch (af) {
4281#ifdef INET6
4282			case AF_INET6:
4283				if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
4284				    &in6addr_any))
4285					rin6->sin6_port = htons(port.sval);
4286				else
4287					cantparse = 1;
4288				break;
4289#endif
4290#ifdef INET
4291			case AF_INET:
4292				if (ip.ival != INADDR_ANY) {
4293					rin->sin_addr.s_addr = htonl(ip.ival);
4294					rin->sin_port = htons(port.sval);
4295				} else {
4296					cantparse = 1;
4297				}
4298				break;
4299#endif
4300			}
4301		}
4302	} else {
4303		cantparse = 1;
4304		if (i > 0) {
4305			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4306			if (error)
4307				goto nfsmout;
4308		}
4309	}
4310	if (cantparse) {
4311		switch (nd->nd_nam->sa_family) {
4312#ifdef INET
4313		case AF_INET:
4314			sin = (struct sockaddr_in *)nd->nd_nam;
4315			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4316			rin->sin_family = AF_INET;
4317			rin->sin_len = sizeof(struct sockaddr_in);
4318			rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4319			rin->sin_port = 0x0;
4320			break;
4321#endif
4322#ifdef INET6
4323		case AF_INET6:
4324			sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4325			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4326			rin6->sin6_family = AF_INET6;
4327			rin6->sin6_len = sizeof(struct sockaddr_in6);
4328			rin6->sin6_addr = sin6->sin6_addr;
4329			rin6->sin6_port = 0x0;
4330			break;
4331#endif
4332		}
4333		clp->lc_program = 0;
4334	}
4335nfsmout:
4336	free(addr, M_TEMP);
4337	NFSEXITCODE2(error, nd);
4338	return (error);
4339}
4340
4341/*
4342 * Turn a string of up to three decimal digits into a number. Return -1 upon
4343 * error.
4344 */
4345static int
4346nfsrv_getipnumber(u_char *cp)
4347{
4348	int i = 0, j = 0;
4349
4350	while (*cp) {
4351		if (j > 2 || *cp < '0' || *cp > '9')
4352			return (-1);
4353		i *= 10;
4354		i += (*cp - '0');
4355		cp++;
4356		j++;
4357	}
4358	if (i < 256)
4359		return (i);
4360	return (-1);
4361}
4362
4363/*
4364 * This function checks for restart conditions.
4365 */
4366static int
4367nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4368    nfsv4stateid_t *stateidp, int specialid)
4369{
4370	int ret = 0;
4371
4372	/*
4373	 * First check for a server restart. Open, LockT, ReleaseLockOwner
4374	 * and DelegPurge have a clientid, the rest a stateid.
4375	 */
4376	if (flags &
4377	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4378		if (clientid.lval[0] != NFSD_VNET(nfsrvboottime)) {
4379			ret = NFSERR_STALECLIENTID;
4380			goto out;
4381		}
4382	} else if (stateidp->other[0] != NFSD_VNET(nfsrvboottime) &&
4383		specialid == 0) {
4384		ret = NFSERR_STALESTATEID;
4385		goto out;
4386	}
4387
4388	/*
4389	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4390	 * not use a lock/open owner seqid#, so the check can be done now.
4391	 * (The others will be checked, as required, later.)
4392	 */
4393	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4394		goto out;
4395
4396	NFSLOCKSTATE();
4397	ret = nfsrv_checkgrace(NULL, NULL, flags);
4398	NFSUNLOCKSTATE();
4399
4400out:
4401	NFSEXITCODE(ret);
4402	return (ret);
4403}
4404
4405/*
4406 * Check for grace.
4407 */
4408static int
4409nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4410    u_int32_t flags)
4411{
4412	int error = 0, notreclaimed;
4413	struct nfsrv_stable *sp;
4414
4415	if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE |
4416	     NFSNSF_GRACEOVER)) == 0) {
4417		/*
4418		 * First, check to see if all of the clients have done a
4419		 * ReclaimComplete.  If so, grace can end now.
4420		 */
4421		notreclaimed = 0;
4422		LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
4423		    nst_list) {
4424			if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4425				notreclaimed = 1;
4426				break;
4427			}
4428		}
4429		if (notreclaimed == 0)
4430			NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
4431			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4432	}
4433
4434	if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) {
4435		if (flags & NFSLCK_RECLAIM) {
4436			error = NFSERR_NOGRACE;
4437			goto out;
4438		}
4439	} else {
4440		if (!(flags & NFSLCK_RECLAIM)) {
4441			error = NFSERR_GRACE;
4442			goto out;
4443		}
4444		if (nd != NULL && clp != NULL &&
4445		    (nd->nd_flag & ND_NFSV41) != 0 &&
4446		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4447			error = NFSERR_NOGRACE;
4448			goto out;
4449		}
4450
4451		/*
4452		 * If grace is almost over and we are still getting Reclaims,
4453		 * extend grace a bit.
4454		 */
4455		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4456		    NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
4457			NFSD_VNET(nfsrv_stablefirst).nsf_eograce =
4458				NFSD_MONOSEC + NFSRV_LEASEDELTA;
4459	}
4460
4461out:
4462	NFSEXITCODE(error);
4463	return (error);
4464}
4465
4466/*
4467 * Do a server callback.
4468 * The "trunc" argument is slightly overloaded and refers to different
4469 * boolean arguments for CBRECALL and CBLAYOUTRECALL.
4470 */
4471static int
4472nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
4473    int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
4474    int laytype, NFSPROC_T *p)
4475{
4476	struct mbuf *m;
4477	u_int32_t *tl;
4478	struct nfsrv_descript *nd;
4479	struct ucred *cred;
4480	int error = 0, slotpos;
4481	u_int32_t callback;
4482	struct nfsdsession *sep = NULL;
4483	uint64_t tval;
4484	bool dotls;
4485
4486	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
4487	cred = newnfs_getcred();
4488	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4489	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4490		NFSUNLOCKSTATE();
4491		panic("docallb");
4492	}
4493	clp->lc_cbref++;
4494
4495	/*
4496	 * Fill the callback program# and version into the request
4497	 * structure for newnfs_connect() to use.
4498	 */
4499	clp->lc_req.nr_prog = clp->lc_program;
4500#ifdef notnow
4501	if ((clp->lc_flags & LCL_NFSV41) != 0)
4502		clp->lc_req.nr_vers = NFSV41_CBVERS;
4503	else
4504#endif
4505		clp->lc_req.nr_vers = NFSV4_CBVERS;
4506
4507	/*
4508	 * First, fill in some of the fields of nd and cr.
4509	 */
4510	nd->nd_flag = ND_NFSV4;
4511	if (clp->lc_flags & LCL_GSS)
4512		nd->nd_flag |= ND_KERBV;
4513	if ((clp->lc_flags & LCL_NFSV41) != 0)
4514		nd->nd_flag |= ND_NFSV41;
4515	if ((clp->lc_flags & LCL_NFSV42) != 0)
4516		nd->nd_flag |= ND_NFSV42;
4517	nd->nd_repstat = 0;
4518	cred->cr_uid = clp->lc_uid;
4519	cred->cr_gid = clp->lc_gid;
4520	callback = clp->lc_callback;
4521	NFSUNLOCKSTATE();
4522	cred->cr_ngroups = 1;
4523
4524	/*
4525	 * Get the first mbuf for the request.
4526	 */
4527	MGET(m, M_WAITOK, MT_DATA);
4528	m->m_len = 0;
4529	nd->nd_mreq = nd->nd_mb = m;
4530	nd->nd_bpos = mtod(m, caddr_t);
4531
4532	/*
4533	 * and build the callback request.
4534	 */
4535	if (procnum == NFSV4OP_CBGETATTR) {
4536		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4537		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4538		    "CB Getattr", &sep, &slotpos);
4539		if (error != 0) {
4540			m_freem(nd->nd_mreq);
4541			goto errout;
4542		}
4543		(void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4544		(void)nfsrv_putattrbit(nd, attrbitp);
4545	} else if (procnum == NFSV4OP_CBRECALL) {
4546		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4547		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4548		    "CB Recall", &sep, &slotpos);
4549		if (error != 0) {
4550			m_freem(nd->nd_mreq);
4551			goto errout;
4552		}
4553		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4554		*tl++ = txdr_unsigned(stateidp->seqid);
4555		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4556		    NFSX_STATEIDOTHER);
4557		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4558		if (trunc)
4559			*tl = newnfs_true;
4560		else
4561			*tl = newnfs_false;
4562		(void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4563	} else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
4564		NFSD_DEBUG(4, "docallback layout recall\n");
4565		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4566		error = nfsrv_cbcallargs(nd, clp, callback,
4567		    NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
4568		NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
4569		if (error != 0) {
4570			m_freem(nd->nd_mreq);
4571			goto errout;
4572		}
4573		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
4574		*tl++ = txdr_unsigned(laytype);
4575		*tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
4576		if (trunc)
4577			*tl++ = newnfs_true;
4578		else
4579			*tl++ = newnfs_false;
4580		*tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
4581		(void)nfsm_fhtom(NULL, nd, (uint8_t *)fhp, NFSX_MYFH, 0);
4582		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
4583		tval = 0;
4584		txdr_hyper(tval, tl); tl += 2;
4585		tval = UINT64_MAX;
4586		txdr_hyper(tval, tl); tl += 2;
4587		*tl++ = txdr_unsigned(stateidp->seqid);
4588		NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
4589		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4590		NFSD_DEBUG(4, "aft args\n");
4591	} else if (procnum == NFSV4PROC_CBNULL) {
4592		nd->nd_procnum = NFSV4PROC_CBNULL;
4593		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4594			error = nfsv4_getcbsession(clp, &sep);
4595			if (error != 0) {
4596				m_freem(nd->nd_mreq);
4597				goto errout;
4598			}
4599		}
4600	} else {
4601		error = NFSERR_SERVERFAULT;
4602		m_freem(nd->nd_mreq);
4603		goto errout;
4604	}
4605
4606	/*
4607	 * Call newnfs_connect(), as required, and then newnfs_request().
4608	 */
4609	dotls = false;
4610	if ((clp->lc_flags & LCL_TLSCB) != 0)
4611		dotls = true;
4612	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4613	if (clp->lc_req.nr_client == NULL) {
4614		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4615			error = ECONNREFUSED;
4616			if (procnum != NFSV4PROC_CBNULL)
4617				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4618				    true);
4619			nfsrv_freesession(NULL, sep, NULL);
4620		} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4621			error = newnfs_connect(NULL, &clp->lc_req, cred,
4622			    NULL, 1, dotls, &clp->lc_req.nr_client);
4623		else
4624			error = newnfs_connect(NULL, &clp->lc_req, cred,
4625			    NULL, 3, dotls, &clp->lc_req.nr_client);
4626	}
4627	newnfs_sndunlock(&clp->lc_req.nr_lock);
4628	NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
4629	if (!error) {
4630		if ((nd->nd_flag & ND_NFSV41) != 0) {
4631			KASSERT(sep != NULL, ("sep NULL"));
4632			if (sep->sess_cbsess.nfsess_xprt != NULL)
4633				error = newnfs_request(nd, NULL, clp,
4634				    &clp->lc_req, NULL, NULL, cred,
4635				    clp->lc_program, clp->lc_req.nr_vers, NULL,
4636				    1, NULL, &sep->sess_cbsess);
4637			else {
4638				/*
4639				 * This should probably never occur, but if a
4640				 * client somehow does an RPC without a
4641				 * SequenceID Op that causes a callback just
4642				 * after the nfsd threads have been terminated
4643				 * and restarted we could conceivably get here
4644				 * without a backchannel xprt.
4645				 */
4646				printf("nfsrv_docallback: no xprt\n");
4647				error = ECONNREFUSED;
4648			}
4649			NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
4650			if (error != 0 && procnum != NFSV4PROC_CBNULL) {
4651				/*
4652				 * It is likely that the callback was never
4653				 * processed by the client and, as such,
4654				 * the sequence# for the session slot needs
4655				 * to be backed up by one to avoid a
4656				 * NFSERR_SEQMISORDERED error reply.
4657				 * For the unlikely case where the callback
4658				 * was processed by the client, this will
4659				 * make the next callback on the slot
4660				 * appear to be a retry.
4661				 * Since callbacks never specify that the
4662				 * reply be cached, this "apparent retry"
4663				 * should not be a problem.
4664				 */
4665				nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4666				    true);
4667			}
4668			nfsrv_freesession(NULL, sep, NULL);
4669		} else
4670			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4671			    NULL, NULL, cred, clp->lc_program,
4672			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4673	}
4674errout:
4675	NFSFREECRED(cred);
4676
4677	/*
4678	 * If error is set here, the Callback path isn't working
4679	 * properly, so twiddle the appropriate LCL_ flags.
4680	 * (nd_repstat != 0 indicates the Callback path is working,
4681	 *  but the callback failed on the client.)
4682	 */
4683	if (error) {
4684		/*
4685		 * Mark the callback pathway down, which disabled issuing
4686		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4687		 */
4688		NFSLOCKSTATE();
4689		clp->lc_flags |= LCL_CBDOWN;
4690		NFSUNLOCKSTATE();
4691	} else {
4692		/*
4693		 * Callback worked. If the callback path was down, disable
4694		 * callbacks, so no more delegations will be issued. (This
4695		 * is done on the assumption that the callback pathway is
4696		 * flakey.)
4697		 */
4698		NFSLOCKSTATE();
4699		if (clp->lc_flags & LCL_CBDOWN)
4700			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4701		NFSUNLOCKSTATE();
4702		if (nd->nd_repstat) {
4703			error = nd->nd_repstat;
4704			NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
4705			    procnum, error);
4706		} else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4707			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4708			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4709			    p, NULL);
4710		m_freem(nd->nd_mrep);
4711	}
4712	NFSLOCKSTATE();
4713	clp->lc_cbref--;
4714	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4715		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4716		wakeup(clp);
4717	}
4718	NFSUNLOCKSTATE();
4719
4720	free(nd, M_TEMP);
4721	NFSEXITCODE(error);
4722	return (error);
4723}
4724
4725/*
4726 * Set up the compound RPC for the callback.
4727 */
4728static int
4729nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4730    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
4731    int *slotposp)
4732{
4733	uint32_t *tl;
4734	int error, len;
4735
4736	len = strlen(optag);
4737	(void)nfsm_strtom(nd, optag, len);
4738	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4739	if ((nd->nd_flag & ND_NFSV41) != 0) {
4740		if ((nd->nd_flag & ND_NFSV42) != 0)
4741			*tl++ = txdr_unsigned(NFSV42_MINORVERSION);
4742		else
4743			*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4744		*tl++ = txdr_unsigned(callback);
4745		*tl++ = txdr_unsigned(2);
4746		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4747		error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
4748		if (error != 0)
4749			return (error);
4750		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4751		*tl = txdr_unsigned(op);
4752	} else {
4753		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4754		*tl++ = txdr_unsigned(callback);
4755		*tl++ = txdr_unsigned(1);
4756		*tl = txdr_unsigned(op);
4757	}
4758	return (0);
4759}
4760
4761/*
4762 * Return the next index# for a clientid. Mostly just increment and return
4763 * the next one, but... if the 32bit unsigned does actually wrap around,
4764 * it should be rebooted.
4765 * At an average rate of one new client per second, it will wrap around in
4766 * approximately 136 years. (I think the server will have been shut
4767 * down or rebooted before then.)
4768 */
4769static u_int32_t
4770nfsrv_nextclientindex(void)
4771{
4772	static u_int32_t client_index = 0;
4773
4774	client_index++;
4775	if (client_index != 0)
4776		return (client_index);
4777
4778	printf("%s: out of clientids\n", __func__);
4779	return (client_index);
4780}
4781
4782/*
4783 * Return the next index# for a stateid. Mostly just increment and return
4784 * the next one, but... if the 32bit unsigned does actually wrap around
4785 * (will a BSD server stay up that long?), find
4786 * new start and end values.
4787 */
4788static u_int32_t
4789nfsrv_nextstateindex(struct nfsclient *clp)
4790{
4791	struct nfsstate *stp;
4792	int i;
4793	u_int32_t canuse, min_index, max_index;
4794
4795	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4796		clp->lc_stateindex++;
4797		if (clp->lc_stateindex != clp->lc_statemaxindex)
4798			return (clp->lc_stateindex);
4799	}
4800
4801	/*
4802	 * Yuck, we've hit the end.
4803	 * Look for a new min and max.
4804	 */
4805	min_index = 0;
4806	max_index = 0xffffffff;
4807	for (i = 0; i < nfsrv_statehashsize; i++) {
4808	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4809		if (stp->ls_stateid.other[2] > 0x80000000) {
4810		    if (stp->ls_stateid.other[2] < max_index)
4811			max_index = stp->ls_stateid.other[2];
4812		} else {
4813		    if (stp->ls_stateid.other[2] > min_index)
4814			min_index = stp->ls_stateid.other[2];
4815		}
4816	    }
4817	}
4818
4819	/*
4820	 * Yikes, highly unlikely, but I'll handle it anyhow.
4821	 */
4822	if (min_index == 0x80000000 && max_index == 0x80000001) {
4823	    canuse = 0;
4824	    /*
4825	     * Loop around until we find an unused entry. Return that
4826	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4827	     * (This is one of those rare cases where a goto is the
4828	     *  cleanest way to code the loop.)
4829	     */
4830tryagain:
4831	    for (i = 0; i < nfsrv_statehashsize; i++) {
4832		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4833		    if (stp->ls_stateid.other[2] == canuse) {
4834			canuse++;
4835			goto tryagain;
4836		    }
4837		}
4838	    }
4839	    clp->lc_flags |= LCL_INDEXNOTOK;
4840	    return (canuse);
4841	}
4842
4843	/*
4844	 * Ok to start again from min + 1.
4845	 */
4846	clp->lc_stateindex = min_index + 1;
4847	clp->lc_statemaxindex = max_index;
4848	clp->lc_flags &= ~LCL_INDEXNOTOK;
4849	return (clp->lc_stateindex);
4850}
4851
4852/*
4853 * The following functions handle the stable storage file that deals with
4854 * the edge conditions described in RFC3530 Sec. 8.6.3.
4855 * The file is as follows:
4856 * - a single record at the beginning that has the lease time of the
4857 *   previous server instance (before the last reboot) and the nfsrvboottime
4858 *   values for the previous server boots.
4859 *   These previous boot times are used to ensure that the current
4860 *   nfsrvboottime does not, somehow, get set to a previous one.
4861 *   (This is important so that Stale ClientIDs and StateIDs can
4862 *    be recognized.)
4863 *   The number of previous nfsvrboottime values precedes the list.
4864 * - followed by some number of appended records with:
4865 *   - client id string
4866 *   - flag that indicates it is a record revoking state via lease
4867 *     expiration or similar
4868 *     OR has successfully acquired state.
4869 * These structures vary in length, with the client string at the end, up
4870 * to NFSV4_OPAQUELIMIT in size.
4871 *
4872 * At the end of the grace period, the file is truncated, the first
4873 * record is rewritten with updated information and any acquired state
4874 * records for successful reclaims of state are written.
4875 *
4876 * Subsequent records are appended when the first state is issued to
4877 * a client and when state is revoked for a client.
4878 *
4879 * When reading the file in, state issued records that come later in
4880 * the file override older ones, since the append log is in cronological order.
4881 * If, for some reason, the file can't be read, the grace period is
4882 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4883 */
4884
4885/*
4886 * Read in the stable storage file. Called by nfssvc() before the nfsd
4887 * processes start servicing requests.
4888 */
4889void
4890nfsrv_setupstable(NFSPROC_T *p)
4891{
4892	struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4893	struct nfsrv_stable *sp, *nsp;
4894	struct nfst_rec *tsp;
4895	int error, i, tryagain;
4896	off_t off = 0;
4897	ssize_t aresid, len;
4898
4899	/*
4900	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4901	 * a reboot, so state has not been lost.
4902	 */
4903	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4904		return;
4905	/*
4906	 * Set Grace over just until the file reads successfully.
4907	 */
4908	NFSD_VNET(nfsrvboottime) = time_second;
4909	LIST_INIT(&sf->nsf_head);
4910	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4911	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4912	if (sf->nsf_fp == NULL)
4913		return;
4914	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4915	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4916	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4917	if (error || aresid || sf->nsf_numboots == 0 ||
4918		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4919		return;
4920
4921	/*
4922	 * Now, read in the boottimes.
4923	 */
4924	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4925		sizeof(time_t), M_TEMP, M_WAITOK);
4926	off = sizeof (struct nfsf_rec);
4927	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4928	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4929	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4930	if (error || aresid) {
4931		free(sf->nsf_bootvals, M_TEMP);
4932		sf->nsf_bootvals = NULL;
4933		return;
4934	}
4935
4936	/*
4937	 * Make sure this nfsrvboottime is different from all recorded
4938	 * previous ones.
4939	 */
4940	do {
4941		tryagain = 0;
4942		for (i = 0; i < sf->nsf_numboots; i++) {
4943			if (NFSD_VNET(nfsrvboottime) == sf->nsf_bootvals[i]) {
4944				NFSD_VNET(nfsrvboottime)++;
4945				tryagain = 1;
4946				break;
4947			}
4948		}
4949	} while (tryagain);
4950
4951	sf->nsf_flags |= NFSNSF_OK;
4952	off += (sf->nsf_numboots * sizeof (time_t));
4953
4954	/*
4955	 * Read through the file, building a list of records for grace
4956	 * checking.
4957	 * Each record is between sizeof (struct nfst_rec) and
4958	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4959	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4960	 */
4961	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4962		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4963	do {
4964	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4965	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4966	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4967	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4968	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4969		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4970		/*
4971		 * Yuck, the file has been corrupted, so just return
4972		 * after clearing out any restart state, so the grace period
4973		 * is over.
4974		 */
4975		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4976			LIST_REMOVE(sp, nst_list);
4977			free(sp, M_TEMP);
4978		}
4979		free(tsp, M_TEMP);
4980		sf->nsf_flags &= ~NFSNSF_OK;
4981		free(sf->nsf_bootvals, M_TEMP);
4982		sf->nsf_bootvals = NULL;
4983		return;
4984	    }
4985	    if (len > 0) {
4986		off += sizeof (struct nfst_rec) + tsp->len - 1;
4987		/*
4988		 * Search the list for a matching client.
4989		 */
4990		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4991			if (tsp->len == sp->nst_len &&
4992			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4993				break;
4994		}
4995		if (sp == LIST_END(&sf->nsf_head)) {
4996			sp = (struct nfsrv_stable *)malloc(tsp->len +
4997				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4998				M_WAITOK);
4999			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
5000				sizeof (struct nfst_rec) + tsp->len - 1);
5001			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
5002		} else {
5003			if (tsp->flag == NFSNST_REVOKE)
5004				sp->nst_flag |= NFSNST_REVOKE;
5005			else
5006				/*
5007				 * A subsequent timestamp indicates the client
5008				 * did a setclientid/confirm and any previous
5009				 * revoke is no longer relevant.
5010				 */
5011				sp->nst_flag &= ~NFSNST_REVOKE;
5012		}
5013	    }
5014	} while (len > 0);
5015	free(tsp, M_TEMP);
5016	sf->nsf_flags = NFSNSF_OK;
5017	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
5018		NFSRV_LEASEDELTA;
5019}
5020
5021/*
5022 * Update the stable storage file, now that the grace period is over.
5023 */
5024void
5025nfsrv_updatestable(NFSPROC_T *p)
5026{
5027	struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
5028	struct nfsrv_stable *sp, *nsp;
5029	int i;
5030	struct nfsvattr nva;
5031	vnode_t vp;
5032#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
5033	mount_t mp = NULL;
5034#endif
5035	int error;
5036
5037	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
5038		return;
5039	sf->nsf_flags |= NFSNSF_UPDATEDONE;
5040	/*
5041	 * Ok, we need to rewrite the stable storage file.
5042	 * - truncate to 0 length
5043	 * - write the new first structure
5044	 * - loop through the data structures, writing out any that
5045	 *   have timestamps older than the old boot
5046	 */
5047	if (sf->nsf_bootvals) {
5048		sf->nsf_numboots++;
5049		for (i = sf->nsf_numboots - 2; i >= 0; i--)
5050			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
5051	} else {
5052		sf->nsf_numboots = 1;
5053		sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t),
5054			M_TEMP, M_WAITOK);
5055	}
5056	sf->nsf_bootvals[0] = NFSD_VNET(nfsrvboottime);
5057	sf->nsf_lease = nfsrv_lease;
5058	NFSVNO_ATTRINIT(&nva);
5059	NFSVNO_SETATTRVAL(&nva, size, 0);
5060	vp = NFSFPVNODE(sf->nsf_fp);
5061	vn_start_write(vp, &mp, V_WAIT);
5062	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5063		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
5064		    NULL);
5065		NFSVOPUNLOCK(vp);
5066	} else
5067		error = EPERM;
5068	vn_finished_write(mp);
5069	if (!error)
5070	    error = NFSD_RDWR(UIO_WRITE, vp,
5071		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
5072		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5073	if (!error)
5074	    error = NFSD_RDWR(UIO_WRITE, vp,
5075		(caddr_t)sf->nsf_bootvals,
5076		sf->nsf_numboots * sizeof (time_t),
5077		(off_t)(sizeof (struct nfsf_rec)),
5078		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
5079	free(sf->nsf_bootvals, M_TEMP);
5080	sf->nsf_bootvals = NULL;
5081	if (error) {
5082		sf->nsf_flags &= ~NFSNSF_OK;
5083		printf("EEK! Can't write NfsV4 stable storage file\n");
5084		return;
5085	}
5086	sf->nsf_flags |= NFSNSF_OK;
5087
5088	/*
5089	 * Loop through the list and write out timestamp records for
5090	 * any clients that successfully reclaimed state.
5091	 */
5092	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
5093		if (sp->nst_flag & NFSNST_GOTSTATE) {
5094			nfsrv_writestable(sp->nst_client, sp->nst_len,
5095				NFSNST_NEWSTATE, p);
5096			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
5097		}
5098		LIST_REMOVE(sp, nst_list);
5099		free(sp, M_TEMP);
5100	}
5101	nfsrv_backupstable();
5102}
5103
5104/*
5105 * Append a record to the stable storage file.
5106 */
5107void
5108nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
5109{
5110	struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
5111	struct nfst_rec *sp;
5112	int error;
5113
5114	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
5115		return;
5116	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
5117		len - 1, M_TEMP, M_WAITOK);
5118	sp->len = len;
5119	NFSBCOPY(client, sp->client, len);
5120	sp->flag = flag;
5121	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
5122	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
5123	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
5124	free(sp, M_TEMP);
5125	if (error) {
5126		sf->nsf_flags &= ~NFSNSF_OK;
5127		printf("EEK! Can't write NfsV4 stable storage file\n");
5128	}
5129}
5130
5131/*
5132 * This function is called during the grace period to mark a client
5133 * that successfully reclaimed state.
5134 */
5135static void
5136nfsrv_markstable(struct nfsclient *clp)
5137{
5138	struct nfsrv_stable *sp;
5139
5140	/*
5141	 * First find the client structure.
5142	 */
5143	LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5144		if (sp->nst_len == clp->lc_idlen &&
5145		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5146			break;
5147	}
5148	if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5149		return;
5150
5151	/*
5152	 * Now, just mark it and set the nfsclient back pointer.
5153	 */
5154	sp->nst_flag |= NFSNST_GOTSTATE;
5155	sp->nst_clp = clp;
5156}
5157
5158/*
5159 * This function is called when a NFSv4.1 client does a ReclaimComplete.
5160 * Very similar to nfsrv_markstable(), except for the flag being set.
5161 */
5162static void
5163nfsrv_markreclaim(struct nfsclient *clp)
5164{
5165	struct nfsrv_stable *sp;
5166
5167	/*
5168	 * First find the client structure.
5169	 */
5170	LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5171		if (sp->nst_len == clp->lc_idlen &&
5172		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5173			break;
5174	}
5175	if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5176		return;
5177
5178	/*
5179	 * Now, just set the flag.
5180	 */
5181	sp->nst_flag |= NFSNST_RECLAIMED;
5182}
5183
5184/*
5185 * This function is called for a reclaim, to see if it gets grace.
5186 * It returns 0 if a reclaim is allowed, 1 otherwise.
5187 */
5188static int
5189nfsrv_checkstable(struct nfsclient *clp)
5190{
5191	struct nfsrv_stable *sp;
5192
5193	/*
5194	 * First, find the entry for the client.
5195	 */
5196	LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5197		if (sp->nst_len == clp->lc_idlen &&
5198		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5199			break;
5200	}
5201
5202	/*
5203	 * If not in the list, state was revoked or no state was issued
5204	 * since the previous reboot, a reclaim is denied.
5205	 */
5206	if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head) ||
5207	    (sp->nst_flag & NFSNST_REVOKE) ||
5208	    !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK))
5209		return (1);
5210	return (0);
5211}
5212
5213/*
5214 * Test for and try to clear out a conflicting client. This is called by
5215 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5216 * a found.
5217 * The trick here is that it can't revoke a conflicting client with an
5218 * expired lease unless it holds the v4root lock, so...
5219 * If no v4root lock, get the lock and return 1 to indicate "try again".
5220 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5221 * the revocation worked and the conflicting client is "bye, bye", so it
5222 * can be tried again.
5223 * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
5224 * Unlocks State before a non-zero value is returned.
5225 */
5226static int
5227nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5228    NFSPROC_T *p)
5229{
5230	int gotlock, lktype = 0;
5231
5232	/*
5233	 * If lease hasn't expired, we can't fix it.
5234	 */
5235	if (clp->lc_expiry >= NFSD_MONOSEC ||
5236	    !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE))
5237		return (0);
5238	if (*haslockp == 0) {
5239		NFSUNLOCKSTATE();
5240		if (vp != NULL) {
5241			lktype = NFSVOPISLOCKED(vp);
5242			NFSVOPUNLOCK(vp);
5243		}
5244		NFSLOCKV4ROOTMUTEX();
5245		nfsv4_relref(&nfsv4rootfs_lock);
5246		do {
5247			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5248			    NFSV4ROOTLOCKMUTEXPTR, NULL);
5249		} while (!gotlock);
5250		NFSUNLOCKV4ROOTMUTEX();
5251		*haslockp = 1;
5252		if (vp != NULL) {
5253			NFSVOPLOCK(vp, lktype | LK_RETRY);
5254			if (VN_IS_DOOMED(vp))
5255				return (2);
5256		}
5257		return (1);
5258	}
5259	NFSUNLOCKSTATE();
5260
5261	/*
5262	 * Ok, we can expire the conflicting client.
5263	 */
5264	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5265	nfsrv_backupstable();
5266	nfsrv_cleanclient(clp, p);
5267	nfsrv_freedeleglist(&clp->lc_deleg);
5268	nfsrv_freedeleglist(&clp->lc_olddeleg);
5269	LIST_REMOVE(clp, lc_hash);
5270	nfsrv_zapclient(clp, p);
5271	return (1);
5272}
5273
5274/*
5275 * Resolve a delegation conflict.
5276 * Returns 0 to indicate the conflict was resolved without sleeping.
5277 * Return -1 to indicate that the caller should check for conflicts again.
5278 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5279 *
5280 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5281 * for a return of 0, since there was no sleep and it could be required
5282 * later. It is released for a return of NFSERR_DELAY, since the caller
5283 * will return that error. It is released when a sleep was done waiting
5284 * for the delegation to be returned or expire (so that other nfsds can
5285 * handle ops). Then, it must be acquired for the write to stable storage.
5286 * (This function is somewhat similar to nfsrv_clientconflict(), but
5287 *  the semantics differ in a couple of subtle ways. The return of 0
5288 *  indicates the conflict was resolved without sleeping here, not
5289 *  that the conflict can't be resolved and the handling of nfsv4root_lock
5290 *  differs, as noted above.)
5291 * Unlocks State before returning a non-zero value.
5292 */
5293static int
5294nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5295    vnode_t vp)
5296{
5297	struct nfsclient *clp = stp->ls_clp;
5298	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5299	nfsv4stateid_t tstateid;
5300	fhandle_t tfh;
5301
5302	/*
5303	 * If the conflict is with an old delegation...
5304	 */
5305	if (stp->ls_flags & NFSLCK_OLDDELEG) {
5306		/*
5307		 * You can delete it, if it has expired.
5308		 */
5309		if (clp->lc_delegtime < NFSD_MONOSEC) {
5310			nfsrv_freedeleg(stp);
5311			NFSUNLOCKSTATE();
5312			error = -1;
5313			goto out;
5314		}
5315		NFSUNLOCKSTATE();
5316		/*
5317		 * During this delay, the old delegation could expire or it
5318		 * could be recovered by the client via an Open with
5319		 * CLAIM_DELEGATE_PREV.
5320		 * Release the nfsv4root_lock, if held.
5321		 */
5322		if (*haslockp) {
5323			*haslockp = 0;
5324			NFSLOCKV4ROOTMUTEX();
5325			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5326			NFSUNLOCKV4ROOTMUTEX();
5327		}
5328		error = NFSERR_DELAY;
5329		goto out;
5330	}
5331
5332	/*
5333	 * It's a current delegation, so:
5334	 * - check to see if the delegation has expired
5335	 *   - if so, get the v4root lock and then expire it
5336	 */
5337	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
5338	    NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
5339	    stp->ls_delegtime >= NFSD_MONOSEC)) {
5340		/*
5341		 * - do a recall callback, since not yet done
5342		 * For now, never allow truncate to be set. To use
5343		 * truncate safely, it must be guaranteed that the
5344		 * Remove, Rename or Setattr with size of 0 will
5345		 * succeed and that would require major changes to
5346		 * the VFS/Vnode OPs.
5347		 * Set the expiry time large enough so that it won't expire
5348		 * until after the callback, then set it correctly, once
5349		 * the callback is done. (The delegation will now time
5350		 * out whether or not the Recall worked ok. The timeout
5351		 * will be extended when ops are done on the delegation
5352		 * stateid, up to the timelimit.)
5353		 */
5354		if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
5355			stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5356			    NFSRV_LEASEDELTA;
5357			stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
5358			    nfsrv_lease) + NFSRV_LEASEDELTA;
5359			stp->ls_flags |= NFSLCK_DELEGRECALL;
5360		}
5361		stp->ls_lastrecall = time_uptime + 1;
5362
5363		/*
5364		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5365		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5366		 * in order to try and avoid a race that could happen
5367		 * when a CBRecall request passed the Open reply with
5368		 * the delegation in it when transitting the network.
5369		 * Since nfsrv_docallback will sleep, don't use stp after
5370		 * the call.
5371		 */
5372		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5373		    sizeof (tstateid));
5374		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5375		    sizeof (tfh));
5376		NFSUNLOCKSTATE();
5377		if (*haslockp) {
5378			*haslockp = 0;
5379			NFSLOCKV4ROOTMUTEX();
5380			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5381			NFSUNLOCKV4ROOTMUTEX();
5382		}
5383		retrycnt = 0;
5384		do {
5385		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5386			&tstateid, 0, &tfh, NULL, NULL, 0, p);
5387		    retrycnt++;
5388		} while ((error == NFSERR_BADSTATEID ||
5389		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5390		error = NFSERR_DELAY;
5391		goto out;
5392	}
5393
5394	if (clp->lc_expiry >= NFSD_MONOSEC &&
5395	    stp->ls_delegtime >= NFSD_MONOSEC) {
5396		NFSUNLOCKSTATE();
5397		/*
5398		 * A recall has been done, but it has not yet expired.
5399		 * So, RETURN_DELAY.
5400		 */
5401		if (*haslockp) {
5402			*haslockp = 0;
5403			NFSLOCKV4ROOTMUTEX();
5404			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5405			NFSUNLOCKV4ROOTMUTEX();
5406		}
5407		error = NFSERR_DELAY;
5408		goto out;
5409	}
5410
5411	/*
5412	 * If we don't yet have the lock, just get it and then return,
5413	 * since we need that before deleting expired state, such as
5414	 * this delegation.
5415	 * When getting the lock, unlock the vnode, so other nfsds that
5416	 * are in progress, won't get stuck waiting for the vnode lock.
5417	 */
5418	if (*haslockp == 0) {
5419		NFSUNLOCKSTATE();
5420		if (vp != NULL) {
5421			lktype = NFSVOPISLOCKED(vp);
5422			NFSVOPUNLOCK(vp);
5423		}
5424		NFSLOCKV4ROOTMUTEX();
5425		nfsv4_relref(&nfsv4rootfs_lock);
5426		do {
5427			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5428			    NFSV4ROOTLOCKMUTEXPTR, NULL);
5429		} while (!gotlock);
5430		NFSUNLOCKV4ROOTMUTEX();
5431		*haslockp = 1;
5432		if (vp != NULL) {
5433			NFSVOPLOCK(vp, lktype | LK_RETRY);
5434			if (VN_IS_DOOMED(vp)) {
5435				*haslockp = 0;
5436				NFSLOCKV4ROOTMUTEX();
5437				nfsv4_unlock(&nfsv4rootfs_lock, 1);
5438				NFSUNLOCKV4ROOTMUTEX();
5439				error = NFSERR_PERM;
5440				goto out;
5441			}
5442		}
5443		error = -1;
5444		goto out;
5445	}
5446
5447	NFSUNLOCKSTATE();
5448	/*
5449	 * Ok, we can delete the expired delegation.
5450	 * First, write the Revoke record to stable storage and then
5451	 * clear out the conflict.
5452	 * Since all other nfsd threads are now blocked, we can safely
5453	 * sleep without the state changing.
5454	 */
5455	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5456	nfsrv_backupstable();
5457	if (clp->lc_expiry < NFSD_MONOSEC) {
5458		nfsrv_cleanclient(clp, p);
5459		nfsrv_freedeleglist(&clp->lc_deleg);
5460		nfsrv_freedeleglist(&clp->lc_olddeleg);
5461		LIST_REMOVE(clp, lc_hash);
5462		zapped_clp = 1;
5463	} else {
5464		nfsrv_freedeleg(stp);
5465		zapped_clp = 0;
5466	}
5467	if (zapped_clp)
5468		nfsrv_zapclient(clp, p);
5469	error = -1;
5470
5471out:
5472	NFSEXITCODE(error);
5473	return (error);
5474}
5475
5476/*
5477 * Check for a remove allowed, if remove is set to 1 and get rid of
5478 * delegations.
5479 */
5480int
5481nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
5482    nfsquad_t clientid, NFSPROC_T *p)
5483{
5484	struct nfsclient *clp;
5485	struct nfsstate *stp;
5486	struct nfslockfile *lfp;
5487	int error, haslock = 0;
5488	fhandle_t nfh;
5489
5490	clp = NULL;
5491	/*
5492	 * First, get the lock file structure.
5493	 * (A return of -1 means no associated state, so remove ok.)
5494	 */
5495	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5496tryagain:
5497	NFSLOCKSTATE();
5498	if (error == 0 && clientid.qval != 0)
5499		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
5500		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
5501	if (!error)
5502		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5503	if (error) {
5504		NFSUNLOCKSTATE();
5505		if (haslock) {
5506			NFSLOCKV4ROOTMUTEX();
5507			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5508			NFSUNLOCKV4ROOTMUTEX();
5509		}
5510		if (error == -1)
5511			error = 0;
5512		goto out;
5513	}
5514
5515	/*
5516	 * Now, we must Recall any delegations.
5517	 */
5518	error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
5519	if (error) {
5520		/*
5521		 * nfsrv_cleandeleg() unlocks state for non-zero
5522		 * return.
5523		 */
5524		if (error == -1)
5525			goto tryagain;
5526		if (haslock) {
5527			NFSLOCKV4ROOTMUTEX();
5528			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5529			NFSUNLOCKV4ROOTMUTEX();
5530		}
5531		goto out;
5532	}
5533
5534	/*
5535	 * Now, look for a conflicting open share.
5536	 */
5537	if (remove) {
5538		/*
5539		 * If the entry in the directory was the last reference to the
5540		 * corresponding filesystem object, the object can be destroyed
5541		 * */
5542		if(lfp->lf_usecount>1)
5543			LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5544				if (stp->ls_flags & NFSLCK_WRITEDENY) {
5545					error = NFSERR_FILEOPEN;
5546					break;
5547				}
5548			}
5549	}
5550
5551	NFSUNLOCKSTATE();
5552	if (haslock) {
5553		NFSLOCKV4ROOTMUTEX();
5554		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5555		NFSUNLOCKV4ROOTMUTEX();
5556	}
5557
5558out:
5559	NFSEXITCODE(error);
5560	return (error);
5561}
5562
5563/*
5564 * Clear out all delegations for the file referred to by lfp.
5565 * May return NFSERR_DELAY, if there will be a delay waiting for
5566 * delegations to expire.
5567 * Returns -1 to indicate it slept while recalling a delegation.
5568 * This function has the side effect of deleting the nfslockfile structure,
5569 * if it no longer has associated state and didn't have to sleep.
5570 * Unlocks State before a non-zero value is returned.
5571 */
5572static int
5573nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5574    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5575{
5576	struct nfsstate *stp, *nstp;
5577	int ret = 0;
5578
5579	stp = LIST_FIRST(&lfp->lf_deleg);
5580	while (stp != LIST_END(&lfp->lf_deleg)) {
5581		nstp = LIST_NEXT(stp, ls_file);
5582		if (stp->ls_clp != clp) {
5583			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5584			if (ret) {
5585				/*
5586				 * nfsrv_delegconflict() unlocks state
5587				 * when it returns non-zero.
5588				 */
5589				goto out;
5590			}
5591		}
5592		stp = nstp;
5593	}
5594out:
5595	NFSEXITCODE(ret);
5596	return (ret);
5597}
5598
5599/*
5600 * There are certain operations that, when being done outside of NFSv4,
5601 * require that any NFSv4 delegation for the file be recalled.
5602 * This function is to be called for those cases:
5603 * VOP_RENAME() - When a delegation is being recalled for any reason,
5604 *	the client may have to do Opens against the server, using the file's
5605 *	final component name. If the file has been renamed on the server,
5606 *	that component name will be incorrect and the Open will fail.
5607 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5608 *	been removed on the server, if there is a delegation issued to
5609 *	that client for the file. I say "theoretically" since clients
5610 *	normally do an Access Op before the Open and that Access Op will
5611 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5612 *	they will detect the file's removal in the same manner. (There is
5613 *	one case where RFC3530 allows a client to do an Open without first
5614 *	doing an Access Op, which is passage of a check against the ACE
5615 *	returned with a Write delegation, but current practice is to ignore
5616 *	the ACE and always do an Access Op.)
5617 *	Since the functions can only be called with an unlocked vnode, this
5618 *	can't be done at this time.
5619 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5620 *	locks locally in the client, which are not visible to the server. To
5621 *	deal with this, issuing of delegations for a vnode must be disabled
5622 *	and all delegations for the vnode recalled. This is done via the
5623 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5624 */
5625void
5626nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5627{
5628	time_t starttime;
5629	int error;
5630
5631	/*
5632	 * First, check to see if the server is currently running and it has
5633	 * been called for a regular file when issuing delegations.
5634	 */
5635	if (NFSD_VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG ||
5636	    nfsrv_issuedelegs == 0)
5637		return;
5638
5639	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5640	/*
5641	 * First, get a reference on the nfsv4rootfs_lock so that an
5642	 * exclusive lock cannot be acquired by another thread.
5643	 */
5644	NFSLOCKV4ROOTMUTEX();
5645	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5646	NFSUNLOCKV4ROOTMUTEX();
5647
5648	/*
5649	 * Now, call nfsrv_checkremove() in a loop while it returns
5650	 * NFSERR_DELAY. Return upon any other error or when timed out.
5651	 */
5652	starttime = NFSD_MONOSEC;
5653	do {
5654		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5655			error = nfsrv_checkremove(vp, 0, NULL,
5656			    (nfsquad_t)((u_quad_t)0), p);
5657			NFSVOPUNLOCK(vp);
5658		} else
5659			error = EPERM;
5660		if (error == NFSERR_DELAY) {
5661			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5662				break;
5663			/* Sleep for a short period of time */
5664			(void) nfs_catnap(PZERO, 0, "nfsremove");
5665		}
5666	} while (error == NFSERR_DELAY);
5667	NFSLOCKV4ROOTMUTEX();
5668	nfsv4_relref(&nfsv4rootfs_lock);
5669	NFSUNLOCKV4ROOTMUTEX();
5670}
5671
5672void
5673nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5674{
5675
5676#ifdef VV_DISABLEDELEG
5677	/*
5678	 * First, flag issuance of delegations disabled.
5679	 */
5680	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5681#endif
5682
5683	/*
5684	 * Then call nfsd_recalldelegation() to get rid of all extant
5685	 * delegations.
5686	 */
5687	nfsd_recalldelegation(vp, p);
5688}
5689
5690/*
5691 * Check for conflicting locks, etc. and then get rid of delegations.
5692 * (At one point I thought that I should get rid of delegations for any
5693 *  Setattr, since it could potentially disallow the I/O op (read or write)
5694 *  allowed by the delegation. However, Setattr Ops that aren't changing
5695 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5696 *  for the same client or a different one, so I decided to only get rid
5697 *  of delegations for other clients when the size is being changed.)
5698 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5699 * as Write backs, even if there is no delegation, so it really isn't any
5700 * different?)
5701 */
5702int
5703nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5704    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5705    struct nfsexstuff *exp, NFSPROC_T *p)
5706{
5707	struct nfsstate st, *stp = &st;
5708	struct nfslock lo, *lop = &lo;
5709	int error = 0;
5710	nfsquad_t clientid;
5711
5712	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5713		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5714		lop->lo_first = nvap->na_size;
5715	} else {
5716		stp->ls_flags = 0;
5717		lop->lo_first = 0;
5718	}
5719	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5720	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5721	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5722	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5723		stp->ls_flags |= NFSLCK_SETATTR;
5724	if (stp->ls_flags == 0)
5725		goto out;
5726	lop->lo_end = NFS64BITSSET;
5727	lop->lo_flags = NFSLCK_WRITE;
5728	stp->ls_ownerlen = 0;
5729	stp->ls_op = NULL;
5730	stp->ls_uid = nd->nd_cred->cr_uid;
5731	stp->ls_stateid.seqid = stateidp->seqid;
5732	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5733	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5734	stp->ls_stateid.other[2] = stateidp->other[2];
5735	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5736	    stateidp, exp, nd, p);
5737
5738out:
5739	NFSEXITCODE2(error, nd);
5740	return (error);
5741}
5742
5743/*
5744 * Check for a write delegation and do a CBGETATTR if there is one, updating
5745 * the attributes, as required.
5746 * Should I return an error if I can't get the attributes? (For now, I'll
5747 * just return ok.
5748 */
5749int
5750nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5751    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
5752{
5753	struct nfsstate *stp;
5754	struct nfslockfile *lfp;
5755	struct nfsclient *clp;
5756	struct nfsvattr nva;
5757	fhandle_t nfh;
5758	int error = 0;
5759	nfsattrbit_t cbbits;
5760	u_quad_t delegfilerev;
5761
5762	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5763	if (!NFSNONZERO_ATTRBIT(&cbbits))
5764		goto out;
5765	if (nfsrv_writedelegcnt == 0)
5766		goto out;
5767
5768	/*
5769	 * Get the lock file structure.
5770	 * (A return of -1 means no associated state, so return ok.)
5771	 */
5772	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5773	NFSLOCKSTATE();
5774	if (!error)
5775		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5776	if (error) {
5777		NFSUNLOCKSTATE();
5778		if (error == -1)
5779			error = 0;
5780		goto out;
5781	}
5782
5783	/*
5784	 * Now, look for a write delegation.
5785	 */
5786	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5787		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5788			break;
5789	}
5790	if (stp == LIST_END(&lfp->lf_deleg)) {
5791		NFSUNLOCKSTATE();
5792		goto out;
5793	}
5794	clp = stp->ls_clp;
5795
5796	/* If the clientid is not confirmed, ignore the delegation. */
5797	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5798		NFSUNLOCKSTATE();
5799		goto out;
5800	}
5801
5802	delegfilerev = stp->ls_filerev;
5803	/*
5804	 * If the Write delegation was issued as a part of this Compound RPC
5805	 * or if we have an Implied Clientid (used in a previous Op in this
5806	 * compound) and it is the client the delegation was issued to,
5807	 * just return ok.
5808	 * I also assume that it is from the same client iff the network
5809	 * host IP address is the same as the callback address. (Not
5810	 * exactly correct by the RFC, but avoids a lot of Getattr
5811	 * callbacks.)
5812	 */
5813	if (nd->nd_compref == stp->ls_compref ||
5814	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5815	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5816	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5817		NFSUNLOCKSTATE();
5818		goto out;
5819	}
5820
5821	/*
5822	 * We are now done with the delegation state structure,
5823	 * so the statelock can be released and we can now tsleep().
5824	 */
5825
5826	/*
5827	 * Now, we must do the CB Getattr callback, to see if Change or Size
5828	 * has changed.
5829	 */
5830	if (clp->lc_expiry >= NFSD_MONOSEC) {
5831		NFSUNLOCKSTATE();
5832		NFSVNO_ATTRINIT(&nva);
5833		nva.na_filerev = NFS64BITSSET;
5834		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5835		    0, &nfh, &nva, &cbbits, 0, p);
5836		if (!error) {
5837			if ((nva.na_filerev != NFS64BITSSET &&
5838			    nva.na_filerev > delegfilerev) ||
5839			    (NFSVNO_ISSETSIZE(&nva) &&
5840			     nva.na_size != nvap->na_size)) {
5841				error = nfsvno_updfilerev(vp, nvap, nd, p);
5842				if (NFSVNO_ISSETSIZE(&nva))
5843					nvap->na_size = nva.na_size;
5844			}
5845		} else
5846			error = 0;	/* Ignore callback errors for now. */
5847	} else {
5848		NFSUNLOCKSTATE();
5849	}
5850
5851out:
5852	NFSEXITCODE2(error, nd);
5853	return (error);
5854}
5855
5856/*
5857 * This function looks for openowners that haven't had any opens for
5858 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5859 * is set.
5860 */
5861void
5862nfsrv_throwawayopens(NFSPROC_T *p)
5863{
5864	struct nfsclient *clp, *nclp;
5865	struct nfsstate *stp, *nstp;
5866	int i;
5867
5868	NFSLOCKSTATE();
5869	NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS;
5870	/*
5871	 * For each client...
5872	 */
5873	for (i = 0; i < nfsrv_clienthashsize; i++) {
5874	    LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
5875		nclp) {
5876		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5877			if (LIST_EMPTY(&stp->ls_open) &&
5878			    (stp->ls_noopens > NFSNOOPEN ||
5879			     (nfsrv_openpluslock * 2) >
5880			     nfsrv_v4statelimit))
5881				nfsrv_freeopenowner(stp, 0, p);
5882		}
5883	    }
5884	}
5885	NFSUNLOCKSTATE();
5886}
5887
5888/*
5889 * This function checks to see if the credentials are the same.
5890 * The check for same credentials is needed for state management operations
5891 * for NFSv4.0 or NFSv4.1/4.2 when SP4_MACH_CRED is configured via
5892 * ExchangeID.
5893 * Returns 1 for not same, 0 otherwise.
5894 */
5895static int
5896nfsrv_notsamecredname(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
5897{
5898
5899	/* Check for the SP4_MACH_CRED case. */
5900	if (op != 0 && nfsrv_checkmachcred(op, nd, clp) != 0)
5901		return (1);
5902
5903	/* For NFSv4.1/4.2, SP4_NONE always allows this. */
5904	if ((nd->nd_flag & ND_NFSV41) != 0)
5905		return (0);
5906
5907	if (nd->nd_flag & ND_GSS) {
5908		if (!(clp->lc_flags & LCL_GSS))
5909			return (1);
5910		if (clp->lc_flags & LCL_NAME) {
5911			if (nd->nd_princlen != clp->lc_namelen ||
5912			    NFSBCMP(nd->nd_principal, clp->lc_name,
5913				clp->lc_namelen))
5914				return (1);
5915			else
5916				return (0);
5917		}
5918		if (nd->nd_cred->cr_uid == clp->lc_uid)
5919			return (0);
5920		else
5921			return (1);
5922	} else if (clp->lc_flags & LCL_GSS)
5923		return (1);
5924	/*
5925	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5926	 * in RFC3530, which talks about principals, but doesn't say anything
5927	 * about uids for AUTH_SYS.)
5928	 */
5929	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5930		return (0);
5931	else
5932		return (1);
5933}
5934
5935/*
5936 * Calculate the lease expiry time.
5937 */
5938static time_t
5939nfsrv_leaseexpiry(void)
5940{
5941
5942	if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC)
5943		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5944	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5945}
5946
5947/*
5948 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5949 */
5950static void
5951nfsrv_delaydelegtimeout(struct nfsstate *stp)
5952{
5953
5954	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5955		return;
5956
5957	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5958	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5959		stp->ls_delegtime += nfsrv_lease;
5960		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5961			stp->ls_delegtime = stp->ls_delegtimelimit;
5962	}
5963}
5964
5965/*
5966 * This function checks to see if there is any other state associated
5967 * with the openowner for this Open.
5968 * It returns 1 if there is no other state, 0 otherwise.
5969 */
5970static int
5971nfsrv_nootherstate(struct nfsstate *stp)
5972{
5973	struct nfsstate *tstp;
5974
5975	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5976		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5977			return (0);
5978	}
5979	return (1);
5980}
5981
5982/*
5983 * Create a list of lock deltas (changes to local byte range locking
5984 * that can be rolled back using the list) and apply the changes via
5985 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5986 * the rollback or update function will be called after this.
5987 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5988 * call fails. If it returns an error, it will unlock the list.
5989 */
5990static int
5991nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5992    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5993{
5994	struct nfslock *lop, *nlop;
5995	int error = 0;
5996
5997	/* Loop through the list of locks. */
5998	lop = LIST_FIRST(&lfp->lf_locallock);
5999	while (first < end && lop != NULL) {
6000		nlop = LIST_NEXT(lop, lo_lckowner);
6001		if (first >= lop->lo_end) {
6002			/* not there yet */
6003			lop = nlop;
6004		} else if (first < lop->lo_first) {
6005			/* new one starts before entry in list */
6006			if (end <= lop->lo_first) {
6007				/* no overlap between old and new */
6008				error = nfsrv_dolocal(vp, lfp, flags,
6009				    NFSLCK_UNLOCK, first, end, cfp, p);
6010				if (error != 0)
6011					break;
6012				first = end;
6013			} else {
6014				/* handle fragment overlapped with new one */
6015				error = nfsrv_dolocal(vp, lfp, flags,
6016				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
6017				    p);
6018				if (error != 0)
6019					break;
6020				first = lop->lo_first;
6021			}
6022		} else {
6023			/* new one overlaps this entry in list */
6024			if (end <= lop->lo_end) {
6025				/* overlaps all of new one */
6026				error = nfsrv_dolocal(vp, lfp, flags,
6027				    lop->lo_flags, first, end, cfp, p);
6028				if (error != 0)
6029					break;
6030				first = end;
6031			} else {
6032				/* handle fragment overlapped with new one */
6033				error = nfsrv_dolocal(vp, lfp, flags,
6034				    lop->lo_flags, first, lop->lo_end, cfp, p);
6035				if (error != 0)
6036					break;
6037				first = lop->lo_end;
6038				lop = nlop;
6039			}
6040		}
6041	}
6042	if (first < end && error == 0)
6043		/* handle fragment past end of list */
6044		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
6045		    end, cfp, p);
6046
6047	NFSEXITCODE(error);
6048	return (error);
6049}
6050
6051/*
6052 * Local lock unlock. Unlock all byte ranges that are no longer locked
6053 * by NFSv4. To do this, unlock any subranges of first-->end that
6054 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
6055 * list. This list has all locks for the file held by other
6056 * <clientid, lockowner> tuples. The list is ordered by increasing
6057 * lo_first value, but may have entries that overlap each other, for
6058 * the case of read locks.
6059 */
6060static void
6061nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
6062    uint64_t init_end, NFSPROC_T *p)
6063{
6064	struct nfslock *lop;
6065	uint64_t first, end, prevfirst __unused;
6066
6067	first = init_first;
6068	end = init_end;
6069	while (first < init_end) {
6070		/* Loop through all nfs locks, adjusting first and end */
6071		prevfirst = 0;
6072		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
6073			KASSERT(prevfirst <= lop->lo_first,
6074			    ("nfsv4 locks out of order"));
6075			KASSERT(lop->lo_first < lop->lo_end,
6076			    ("nfsv4 bogus lock"));
6077			prevfirst = lop->lo_first;
6078			if (first >= lop->lo_first &&
6079			    first < lop->lo_end)
6080				/*
6081				 * Overlaps with initial part, so trim
6082				 * off that initial part by moving first past
6083				 * it.
6084				 */
6085				first = lop->lo_end;
6086			else if (end > lop->lo_first &&
6087			    lop->lo_first > first) {
6088				/*
6089				 * This lock defines the end of the
6090				 * segment to unlock, so set end to the
6091				 * start of it and break out of the loop.
6092				 */
6093				end = lop->lo_first;
6094				break;
6095			}
6096			if (first >= end)
6097				/*
6098				 * There is no segment left to do, so
6099				 * break out of this loop and then exit
6100				 * the outer while() since first will be set
6101				 * to end, which must equal init_end here.
6102				 */
6103				break;
6104		}
6105		if (first < end) {
6106			/* Unlock this segment */
6107			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
6108			    NFSLCK_READ, first, end, NULL, p);
6109			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
6110			    first, end);
6111		}
6112		/*
6113		 * Now move past this segment and look for any further
6114		 * segment in the range, if there is one.
6115		 */
6116		first = end;
6117		end = init_end;
6118	}
6119}
6120
6121/*
6122 * Do the local lock operation and update the rollback list, as required.
6123 * Perform the rollback and return the error if nfsvno_advlock() fails.
6124 */
6125static int
6126nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
6127    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
6128{
6129	struct nfsrollback *rlp;
6130	int error = 0, ltype, oldltype;
6131
6132	if (flags & NFSLCK_WRITE)
6133		ltype = F_WRLCK;
6134	else if (flags & NFSLCK_READ)
6135		ltype = F_RDLCK;
6136	else
6137		ltype = F_UNLCK;
6138	if (oldflags & NFSLCK_WRITE)
6139		oldltype = F_WRLCK;
6140	else if (oldflags & NFSLCK_READ)
6141		oldltype = F_RDLCK;
6142	else
6143		oldltype = F_UNLCK;
6144	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
6145		/* nothing to do */
6146		goto out;
6147	error = nfsvno_advlock(vp, ltype, first, end, p);
6148	if (error != 0) {
6149		if (cfp != NULL) {
6150			cfp->cl_clientid.lval[0] = 0;
6151			cfp->cl_clientid.lval[1] = 0;
6152			cfp->cl_first = 0;
6153			cfp->cl_end = NFS64BITSSET;
6154			cfp->cl_flags = NFSLCK_WRITE;
6155			cfp->cl_ownerlen = 5;
6156			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
6157		}
6158		nfsrv_locallock_rollback(vp, lfp, p);
6159	} else if (ltype != F_UNLCK) {
6160		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
6161		    M_WAITOK);
6162		rlp->rlck_first = first;
6163		rlp->rlck_end = end;
6164		rlp->rlck_type = oldltype;
6165		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
6166	}
6167
6168out:
6169	NFSEXITCODE(error);
6170	return (error);
6171}
6172
6173/*
6174 * Roll back local lock changes and free up the rollback list.
6175 */
6176static void
6177nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
6178{
6179	struct nfsrollback *rlp, *nrlp;
6180
6181	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
6182		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
6183		    rlp->rlck_end, p);
6184		free(rlp, M_NFSDROLLBACK);
6185	}
6186	LIST_INIT(&lfp->lf_rollback);
6187}
6188
6189/*
6190 * Update local lock list and delete rollback list (ie now committed to the
6191 * local locks). Most of the work is done by the internal function.
6192 */
6193static void
6194nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
6195    uint64_t end)
6196{
6197	struct nfsrollback *rlp, *nrlp;
6198	struct nfslock *new_lop, *other_lop;
6199
6200	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
6201	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
6202		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
6203		    M_WAITOK);
6204	else
6205		other_lop = NULL;
6206	new_lop->lo_flags = flags;
6207	new_lop->lo_first = first;
6208	new_lop->lo_end = end;
6209	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
6210	if (new_lop != NULL)
6211		free(new_lop, M_NFSDLOCK);
6212	if (other_lop != NULL)
6213		free(other_lop, M_NFSDLOCK);
6214
6215	/* and get rid of the rollback list */
6216	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
6217		free(rlp, M_NFSDROLLBACK);
6218	LIST_INIT(&lfp->lf_rollback);
6219}
6220
6221/*
6222 * Lock the struct nfslockfile for local lock updating.
6223 */
6224static void
6225nfsrv_locklf(struct nfslockfile *lfp)
6226{
6227	int gotlock;
6228
6229	/* lf_usecount ensures *lfp won't be free'd */
6230	lfp->lf_usecount++;
6231	do {
6232		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6233		    NFSSTATEMUTEXPTR, NULL);
6234	} while (gotlock == 0);
6235	lfp->lf_usecount--;
6236}
6237
6238/*
6239 * Unlock the struct nfslockfile after local lock updating.
6240 */
6241static void
6242nfsrv_unlocklf(struct nfslockfile *lfp)
6243{
6244
6245	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6246}
6247
6248/*
6249 * Clear out all state for the NFSv4 server.
6250 * Must be called by a thread that can sleep when no nfsds are running.
6251 */
6252void
6253nfsrv_throwawayallstate(NFSPROC_T *p)
6254{
6255	struct nfsclient *clp, *nclp;
6256	struct nfslockfile *lfp, *nlfp;
6257	int i;
6258
6259	/*
6260	 * For each client, clean out the state and then free the structure.
6261	 */
6262	for (i = 0; i < nfsrv_clienthashsize; i++) {
6263		LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
6264		    nclp) {
6265			nfsrv_cleanclient(clp, p);
6266			nfsrv_freedeleglist(&clp->lc_deleg);
6267			nfsrv_freedeleglist(&clp->lc_olddeleg);
6268			free(clp->lc_stateid, M_NFSDCLIENT);
6269			free(clp, M_NFSDCLIENT);
6270		}
6271	}
6272
6273	/*
6274	 * Also, free up any remaining lock file structures.
6275	 */
6276	for (i = 0; i < nfsrv_lockhashsize; i++) {
6277		LIST_FOREACH_SAFE(lfp, &NFSD_VNET(nfslockhash)[i], lf_hash,
6278		    nlfp) {
6279			printf("nfsd unload: fnd a lock file struct\n");
6280			nfsrv_freenfslockfile(lfp);
6281		}
6282	}
6283
6284	/* And get rid of the deviceid structures and layouts. */
6285	nfsrv_freealllayoutsanddevids();
6286}
6287
6288/*
6289 * Check the sequence# for the session and slot provided as an argument.
6290 * Also, renew the lease if the session will return NFS_OK.
6291 */
6292int
6293nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6294    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6295    uint32_t *sflagsp, NFSPROC_T *p)
6296{
6297	struct nfsdsession *sep;
6298	struct nfssessionhash *shp;
6299	int error;
6300
6301	shp = NFSSESSIONHASH(nd->nd_sessionid);
6302	NFSLOCKSESSION(shp);
6303	sep = nfsrv_findsession(nd->nd_sessionid);
6304	if (sep == NULL) {
6305		NFSUNLOCKSESSION(shp);
6306		return (NFSERR_BADSESSION);
6307	}
6308	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6309	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6310	if (error != 0) {
6311		NFSUNLOCKSESSION(shp);
6312		return (error);
6313	}
6314	if (cache_this != 0)
6315		nd->nd_flag |= ND_SAVEREPLY;
6316	/* Renew the lease. */
6317	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6318	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6319	nd->nd_flag |= ND_IMPLIEDCLID;
6320
6321	/* Handle the SP4_MECH_CRED case for NFSv4.1/4.2. */
6322	if ((sep->sess_clp->lc_flags & LCL_MACHCRED) != 0 &&
6323	    (nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
6324	    nd->nd_princlen == sep->sess_clp->lc_namelen &&
6325	    !NFSBCMP(sep->sess_clp->lc_name, nd->nd_principal,
6326	    nd->nd_princlen)) {
6327		nd->nd_flag |= ND_MACHCRED;
6328		NFSSET_OPBIT(&nd->nd_allowops, &sep->sess_clp->lc_allowops);
6329	}
6330
6331	/* Save maximum request and reply sizes. */
6332	nd->nd_maxreq = sep->sess_maxreq;
6333	nd->nd_maxresp = sep->sess_maxresp;
6334
6335	*sflagsp = 0;
6336	if (sep->sess_clp->lc_req.nr_client == NULL ||
6337	    (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
6338		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
6339	NFSUNLOCKSESSION(shp);
6340	if (error == NFSERR_EXPIRED) {
6341		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6342		error = 0;
6343	} else if (error == NFSERR_ADMINREVOKED) {
6344		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6345		error = 0;
6346	}
6347	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6348	return (0);
6349}
6350
6351/*
6352 * Check/set reclaim complete for this session/clientid.
6353 */
6354int
6355nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6356{
6357	struct nfsdsession *sep;
6358	struct nfssessionhash *shp;
6359	int error = 0;
6360
6361	shp = NFSSESSIONHASH(nd->nd_sessionid);
6362	NFSLOCKSTATE();
6363	NFSLOCKSESSION(shp);
6364	sep = nfsrv_findsession(nd->nd_sessionid);
6365	if (sep == NULL) {
6366		NFSUNLOCKSESSION(shp);
6367		NFSUNLOCKSTATE();
6368		return (NFSERR_BADSESSION);
6369	}
6370
6371	if (onefs != 0)
6372		sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6373		/* Check to see if reclaim complete has already happened. */
6374	else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6375		error = NFSERR_COMPLETEALREADY;
6376	else {
6377		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6378		nfsrv_markreclaim(sep->sess_clp);
6379	}
6380	NFSUNLOCKSESSION(shp);
6381	NFSUNLOCKSTATE();
6382	return (error);
6383}
6384
6385/*
6386 * Cache the reply in a session slot.
6387 */
6388void
6389nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6390{
6391	struct nfsdsession *sep;
6392	struct nfssessionhash *shp;
6393	char *buf, *cp;
6394#ifdef INET
6395	struct sockaddr_in *sin;
6396#endif
6397#ifdef INET6
6398	struct sockaddr_in6 *sin6;
6399#endif
6400
6401	shp = NFSSESSIONHASH(nd->nd_sessionid);
6402	NFSLOCKSESSION(shp);
6403	sep = nfsrv_findsession(nd->nd_sessionid);
6404	if (sep == NULL) {
6405		NFSUNLOCKSESSION(shp);
6406		if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags &
6407		     NFSNSF_GRACEOVER) != 0) {
6408			buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6409			switch (nd->nd_nam->sa_family) {
6410#ifdef INET
6411			case AF_INET:
6412				sin = (struct sockaddr_in *)nd->nd_nam;
6413				cp = inet_ntop(sin->sin_family,
6414				    &sin->sin_addr.s_addr, buf,
6415				    INET6_ADDRSTRLEN);
6416				break;
6417#endif
6418#ifdef INET6
6419			case AF_INET6:
6420				sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6421				cp = inet_ntop(sin6->sin6_family,
6422				    &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6423				break;
6424#endif
6425			default:
6426				cp = NULL;
6427			}
6428			if (cp != NULL)
6429				printf("nfsrv_cache_session: no session "
6430				    "IPaddr=%s, check NFS clients for unique "
6431				    "/etc/hostid's\n", cp);
6432			else
6433				printf("nfsrv_cache_session: no session, "
6434				    "check NFS clients for unique "
6435				    "/etc/hostid's\n");
6436			free(buf, M_TEMP);
6437		}
6438		m_freem(*m);
6439		return;
6440	}
6441	nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6442	    m);
6443	NFSUNLOCKSESSION(shp);
6444}
6445
6446/*
6447 * Search for a session that matches the sessionid.
6448 */
6449static struct nfsdsession *
6450nfsrv_findsession(uint8_t *sessionid)
6451{
6452	struct nfsdsession *sep;
6453	struct nfssessionhash *shp;
6454
6455	shp = NFSSESSIONHASH(sessionid);
6456	LIST_FOREACH(sep, &shp->list, sess_hash) {
6457		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6458			break;
6459	}
6460	return (sep);
6461}
6462
6463/*
6464 * Destroy a session.
6465 */
6466int
6467nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6468{
6469	int error, igotlock, samesess;
6470
6471	samesess = 0;
6472	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6473	    (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6474		samesess = 1;
6475		if ((nd->nd_flag & ND_LASTOP) == 0)
6476			return (NFSERR_BADSESSION);
6477	}
6478
6479	/* Lock out other nfsd threads */
6480	NFSLOCKV4ROOTMUTEX();
6481	nfsv4_relref(&nfsv4rootfs_lock);
6482	do {
6483		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6484		    NFSV4ROOTLOCKMUTEXPTR, NULL);
6485	} while (igotlock == 0);
6486	NFSUNLOCKV4ROOTMUTEX();
6487
6488	error = nfsrv_freesession(nd, NULL, sessionid);
6489	if (error == 0 && samesess != 0)
6490		nd->nd_flag &= ~ND_HASSEQUENCE;
6491
6492	NFSLOCKV4ROOTMUTEX();
6493	nfsv4_unlock(&nfsv4rootfs_lock, 1);
6494	NFSUNLOCKV4ROOTMUTEX();
6495	return (error);
6496}
6497
6498/*
6499 * Bind a connection to a session.
6500 * For now, only certain variants are supported, since the current session
6501 * structure can only handle a single backchannel entry, which will be
6502 * applied to all connections if it is set.
6503 */
6504int
6505nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6506{
6507	struct nfssessionhash *shp;
6508	struct nfsdsession *sep;
6509	struct nfsclient *clp;
6510	SVCXPRT *savxprt;
6511	int error;
6512
6513	error = 0;
6514	savxprt = NULL;
6515	shp = NFSSESSIONHASH(sessionid);
6516	NFSLOCKSTATE();
6517	NFSLOCKSESSION(shp);
6518	sep = nfsrv_findsession(sessionid);
6519	if (sep != NULL) {
6520		clp = sep->sess_clp;
6521		error = nfsrv_checkmachcred(NFSV4OP_BINDCONNTOSESS, nd, clp);
6522		if (error != 0)
6523			goto out;
6524		if (*foreaftp == NFSCDFC4_BACK ||
6525		    *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6526		    *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6527			/* Try to set up a backchannel. */
6528			if (clp->lc_req.nr_client == NULL) {
6529				NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6530				    "backchannel\n");
6531				clp->lc_req.nr_client = (struct __rpc_client *)
6532				    clnt_bck_create(nd->nd_xprt->xp_socket,
6533				    sep->sess_cbprogram, NFSV4_CBVERS);
6534			}
6535			if (clp->lc_req.nr_client != NULL) {
6536				NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6537				    "backchannel\n");
6538				savxprt = sep->sess_cbsess.nfsess_xprt;
6539				SVC_ACQUIRE(nd->nd_xprt);
6540				CLNT_ACQUIRE(clp->lc_req.nr_client);
6541				nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
6542				/* Disable idle timeout. */
6543				nd->nd_xprt->xp_idletimeout = 0;
6544				sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6545				sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6546				clp->lc_flags |= LCL_DONEBINDCONN |
6547				    LCL_NEEDSCBNULL;
6548				clp->lc_flags &= ~LCL_CBDOWN;
6549				if (*foreaftp == NFSCDFS4_BACK)
6550					*foreaftp = NFSCDFS4_BACK;
6551				else
6552					*foreaftp = NFSCDFS4_BOTH;
6553			} else if (*foreaftp != NFSCDFC4_BACK) {
6554				NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6555				    "up backchannel\n");
6556				sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6557				clp->lc_flags |= LCL_DONEBINDCONN;
6558				*foreaftp = NFSCDFS4_FORE;
6559			} else {
6560				error = NFSERR_NOTSUPP;
6561				printf("nfsrv_bindconnsess: Can't add "
6562				    "backchannel\n");
6563			}
6564		} else {
6565			NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6566			clp->lc_flags |= LCL_DONEBINDCONN;
6567			*foreaftp = NFSCDFS4_FORE;
6568		}
6569	} else
6570		error = NFSERR_BADSESSION;
6571out:
6572	NFSUNLOCKSESSION(shp);
6573	NFSUNLOCKSTATE();
6574	if (savxprt != NULL)
6575		SVC_RELEASE(savxprt);
6576	return (error);
6577}
6578
6579/*
6580 * Free up a session structure.
6581 */
6582static int
6583nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
6584    uint8_t *sessionid)
6585{
6586	struct nfssessionhash *shp;
6587	int i;
6588
6589	NFSLOCKSTATE();
6590	if (sep == NULL) {
6591		shp = NFSSESSIONHASH(sessionid);
6592		NFSLOCKSESSION(shp);
6593		sep = nfsrv_findsession(sessionid);
6594	} else {
6595		shp = NFSSESSIONHASH(sep->sess_sessionid);
6596		NFSLOCKSESSION(shp);
6597	}
6598	if (sep != NULL) {
6599		/* Check for the SP4_MACH_CRED case. */
6600		if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
6601		    nd, sep->sess_clp) != 0) {
6602			NFSUNLOCKSESSION(shp);
6603			NFSUNLOCKSTATE();
6604			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
6605		}
6606
6607		sep->sess_refcnt--;
6608		if (sep->sess_refcnt > 0) {
6609			NFSUNLOCKSESSION(shp);
6610			NFSUNLOCKSTATE();
6611			return (NFSERR_BACKCHANBUSY);
6612		}
6613		LIST_REMOVE(sep, sess_hash);
6614		LIST_REMOVE(sep, sess_list);
6615	}
6616	NFSUNLOCKSESSION(shp);
6617	NFSUNLOCKSTATE();
6618	if (sep == NULL)
6619		return (NFSERR_BADSESSION);
6620	for (i = 0; i < NFSV4_SLOTS; i++)
6621		if (sep->sess_slots[i].nfssl_reply != NULL)
6622			m_freem(sep->sess_slots[i].nfssl_reply);
6623	if (sep->sess_cbsess.nfsess_xprt != NULL)
6624		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6625	free(sep, M_NFSDSESSION);
6626	return (0);
6627}
6628
6629/*
6630 * Free a stateid.
6631 * RFC5661 says that it should fail when there are associated opens, locks
6632 * or delegations. Since stateids represent opens, I don't see how you can
6633 * free an open stateid (it will be free'd when closed), so this function
6634 * only works for lock stateids (freeing the lock_owner) or delegations.
6635 */
6636int
6637nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6638    NFSPROC_T *p)
6639{
6640	struct nfsclient *clp;
6641	struct nfsstate *stp;
6642	int error;
6643
6644	NFSLOCKSTATE();
6645	/*
6646	 * Look up the stateid
6647	 */
6648	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6649	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6650	if (error == 0) {
6651		/* First, check for a delegation. */
6652		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6653			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6654			    NFSX_STATEIDOTHER))
6655				break;
6656		}
6657		if (stp != NULL) {
6658			nfsrv_freedeleg(stp);
6659			NFSUNLOCKSTATE();
6660			return (error);
6661		}
6662	}
6663	/* Not a delegation, try for a lock_owner. */
6664	if (error == 0)
6665		error = nfsrv_getstate(clp, stateidp, 0, &stp);
6666	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6667	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6668		/* Not a lock_owner stateid. */
6669		error = NFSERR_LOCKSHELD;
6670	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6671		error = NFSERR_LOCKSHELD;
6672	if (error == 0)
6673		nfsrv_freelockowner(stp, NULL, 0, p);
6674	NFSUNLOCKSTATE();
6675	return (error);
6676}
6677
6678/*
6679 * Test a stateid.
6680 */
6681int
6682nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6683    NFSPROC_T *p)
6684{
6685	struct nfsclient *clp;
6686	struct nfsstate *stp;
6687	int error;
6688
6689	NFSLOCKSTATE();
6690	/*
6691	 * Look up the stateid
6692	 */
6693	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6694	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6695	if (error == 0)
6696		error = nfsrv_getstate(clp, stateidp, 0, &stp);
6697	if (error == 0 && stateidp->seqid != 0 &&
6698	    SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6699		error = NFSERR_OLDSTATEID;
6700	NFSUNLOCKSTATE();
6701	return (error);
6702}
6703
6704/*
6705 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6706 */
6707static int
6708nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6709    int dont_replycache, struct nfsdsession **sepp, int *slotposp)
6710{
6711	struct nfsdsession *sep;
6712	uint32_t *tl, slotseq = 0;
6713	int maxslot;
6714	uint8_t sessionid[NFSX_V4SESSIONID];
6715	int error;
6716
6717	error = nfsv4_getcbsession(clp, sepp);
6718	if (error != 0)
6719		return (error);
6720	sep = *sepp;
6721	nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
6722	    &slotseq, sessionid, true);
6723	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6724
6725	/* Build the Sequence arguments. */
6726	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6727	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6728	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6729	nd->nd_slotseq = tl;
6730	nd->nd_slotid = *slotposp;
6731	nd->nd_flag |= ND_HASSLOTID;
6732	*tl++ = txdr_unsigned(slotseq);
6733	*tl++ = txdr_unsigned(*slotposp);
6734	*tl++ = txdr_unsigned(maxslot);
6735	if (dont_replycache == 0)
6736		*tl++ = newnfs_true;
6737	else
6738		*tl++ = newnfs_false;
6739	*tl = 0;			/* No referring call list, for now. */
6740	nd->nd_flag |= ND_HASSEQUENCE;
6741	return (0);
6742}
6743
6744/*
6745 * Get a session for the callback.
6746 */
6747static int
6748nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6749{
6750	struct nfsdsession *sep;
6751
6752	NFSLOCKSTATE();
6753	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6754		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6755			break;
6756	}
6757	if (sep == NULL) {
6758		NFSUNLOCKSTATE();
6759		return (NFSERR_BADSESSION);
6760	}
6761	sep->sess_refcnt++;
6762	*sepp = sep;
6763	NFSUNLOCKSTATE();
6764	return (0);
6765}
6766
6767/*
6768 * Free up all backchannel xprts.  This needs to be done when the nfsd threads
6769 * exit, since those transports will all be going away.
6770 * This is only called after all the nfsd threads are done performing RPCs,
6771 * so locking shouldn't be an issue.
6772 */
6773void
6774nfsrv_freeallbackchannel_xprts(void)
6775{
6776	struct nfsdsession *sep;
6777	struct nfsclient *clp;
6778	SVCXPRT *xprt;
6779	int i;
6780
6781	for (i = 0; i < nfsrv_clienthashsize; i++) {
6782		LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
6783			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6784				xprt = sep->sess_cbsess.nfsess_xprt;
6785				sep->sess_cbsess.nfsess_xprt = NULL;
6786				if (xprt != NULL)
6787					SVC_RELEASE(xprt);
6788			}
6789		}
6790	}
6791}
6792
6793/*
6794 * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
6795 * I have no idea if the rest of these arguments will ever be useful?
6796 */
6797int
6798nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
6799    int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
6800    int hasnewmtime, struct timespec *newmtimep, int reclaim,
6801    nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
6802    uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
6803{
6804	struct nfsvattr na;
6805	int error;
6806
6807	error = nfsrv_updatemdsattr(vp, &na, p);
6808	if (error == 0) {
6809		*hasnewsizep = 1;
6810		*newsizep = na.na_size;
6811	}
6812	return (error);
6813}
6814
6815/*
6816 * Try and get a layout.
6817 */
6818int
6819nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
6820    int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
6821    uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
6822    int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
6823{
6824	struct nfslayouthash *lhyp;
6825	struct nfslayout *lyp;
6826	char *devid;
6827	fhandle_t fh, *dsfhp;
6828	int error, mirrorcnt;
6829
6830	if (nfsrv_devidcnt == 0)
6831		return (NFSERR_UNKNLAYOUTTYPE);
6832
6833	if (*offset != 0)
6834		printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
6835		    (uintmax_t)*len);
6836	error = nfsvno_getfh(vp, &fh, p);
6837	NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
6838	if (error != 0)
6839		return (error);
6840
6841	/*
6842	 * For now, all layouts are for entire files.
6843	 * Only issue Read/Write layouts if requested for a non-readonly fs.
6844	 */
6845	if (NFSVNO_EXRDONLY(exp)) {
6846		if (*iomode == NFSLAYOUTIOMODE_RW)
6847			return (NFSERR_LAYOUTTRYLATER);
6848		*iomode = NFSLAYOUTIOMODE_READ;
6849	}
6850	if (*iomode != NFSLAYOUTIOMODE_RW)
6851		*iomode = NFSLAYOUTIOMODE_READ;
6852
6853	/*
6854	 * Check to see if a write layout can be issued for this file.
6855	 * This is used during mirror recovery to avoid RW layouts being
6856	 * issued for a file while it is being copied to the recovered
6857	 * mirror.
6858	 */
6859	if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
6860		return (NFSERR_LAYOUTTRYLATER);
6861
6862	*retonclose = 0;
6863	*offset = 0;
6864	*len = UINT64_MAX;
6865
6866	/* First, see if a layout already exists and return if found. */
6867	lhyp = NFSLAYOUTHASH(&fh);
6868	NFSLOCKLAYOUT(lhyp);
6869	error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
6870	NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
6871	/*
6872	 * Not sure if the seqid must be the same, so I won't check it.
6873	 */
6874	if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
6875	    stateidp->other[1] != lyp->lay_stateid.other[1] ||
6876	    stateidp->other[2] != lyp->lay_stateid.other[2])) {
6877		if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
6878			NFSUNLOCKLAYOUT(lhyp);
6879			NFSD_DEBUG(1, "ret bad stateid\n");
6880			return (NFSERR_BADSTATEID);
6881		}
6882		/*
6883		 * I believe we get here because there is a race between
6884		 * the client processing the CBLAYOUTRECALL and the layout
6885		 * being deleted here on the server.
6886		 * The client has now done a LayoutGet with a non-layout
6887		 * stateid, as it would when there is no layout.
6888		 * As such, free this layout and set error == NFSERR_BADSTATEID
6889		 * so the code below will create a new layout structure as
6890		 * would happen if no layout was found.
6891		 * "lyp" will be set before being used below, but set it NULL
6892		 * as a safety belt.
6893		 */
6894		nfsrv_freelayout(&lhyp->list, lyp);
6895		lyp = NULL;
6896		error = NFSERR_BADSTATEID;
6897	}
6898	if (error == 0) {
6899		if (lyp->lay_layoutlen > maxcnt) {
6900			NFSUNLOCKLAYOUT(lhyp);
6901			NFSD_DEBUG(1, "ret layout too small\n");
6902			return (NFSERR_TOOSMALL);
6903		}
6904		if (*iomode == NFSLAYOUTIOMODE_RW) {
6905			if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
6906				NFSUNLOCKLAYOUT(lhyp);
6907				NFSD_DEBUG(1, "ret layout nospace\n");
6908				return (NFSERR_NOSPC);
6909			}
6910			lyp->lay_flags |= NFSLAY_RW;
6911		} else
6912			lyp->lay_flags |= NFSLAY_READ;
6913		NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
6914		*layoutlenp = lyp->lay_layoutlen;
6915		if (++lyp->lay_stateid.seqid == 0)
6916			lyp->lay_stateid.seqid = 1;
6917		stateidp->seqid = lyp->lay_stateid.seqid;
6918		NFSUNLOCKLAYOUT(lhyp);
6919		NFSD_DEBUG(4, "ret fnd layout\n");
6920		return (0);
6921	}
6922	NFSUNLOCKLAYOUT(lhyp);
6923
6924	/* Find the device id and file handle. */
6925	dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6926	devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6927	error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
6928	NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
6929	if (error == 0) {
6930		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
6931			if (NFSX_V4FILELAYOUT > maxcnt)
6932				error = NFSERR_TOOSMALL;
6933			else
6934				lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
6935				    devid, vp->v_mount->mnt_stat.f_fsid);
6936		} else {
6937			if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
6938				error = NFSERR_TOOSMALL;
6939			else
6940				lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
6941				    &fh, dsfhp, devid,
6942				    vp->v_mount->mnt_stat.f_fsid);
6943		}
6944	}
6945	free(dsfhp, M_TEMP);
6946	free(devid, M_TEMP);
6947	if (error != 0)
6948		return (error);
6949
6950	/*
6951	 * Now, add this layout to the list.
6952	 */
6953	error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
6954	NFSD_DEBUG(4, "layoutget addl=%d\n", error);
6955	/*
6956	 * The lyp will be set to NULL by nfsrv_addlayout() if it
6957	 * linked the new structure into the lists.
6958	 */
6959	free(lyp, M_NFSDSTATE);
6960	return (error);
6961}
6962
6963/*
6964 * Generate a File Layout.
6965 */
6966static struct nfslayout *
6967nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
6968    fhandle_t *dsfhp, char *devid, fsid_t fs)
6969{
6970	uint32_t *tl;
6971	struct nfslayout *lyp;
6972	uint64_t pattern_offset;
6973
6974	lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
6975	    M_WAITOK | M_ZERO);
6976	lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
6977	if (iomode == NFSLAYOUTIOMODE_RW)
6978		lyp->lay_flags = NFSLAY_RW;
6979	else
6980		lyp->lay_flags = NFSLAY_READ;
6981	NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6982	lyp->lay_clientid.qval = nd->nd_clientid.qval;
6983	lyp->lay_fsid = fs;
6984	NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6985
6986	/* Fill in the xdr for the files layout. */
6987	tl = (uint32_t *)lyp->lay_xdr;
6988	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);		/* Device ID. */
6989	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6990
6991	/* Set the stripe size to the maximum I/O size. */
6992	*tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
6993	*tl++ = 0;					/* 1st stripe index. */
6994	pattern_offset = 0;
6995	txdr_hyper(pattern_offset, tl); tl += 2;	/* Pattern offset. */
6996	*tl++ = txdr_unsigned(1);			/* 1 file handle. */
6997	*tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6998	NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6999	lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
7000	return (lyp);
7001}
7002
7003#define	FLEX_OWNERID	"999"
7004#define	FLEX_UID0	"0"
7005/*
7006 * Generate a Flex File Layout.
7007 * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
7008 * string goes on the wire, it isn't supposed to be used by the client,
7009 * since this server uses tight coupling.
7010 * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
7011 * a string of "0". This works around the Linux Flex File Layout driver bug
7012 * which uses the synthetic uid/gid strings for the "tightly coupled" case.
7013 */
7014static struct nfslayout *
7015nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
7016    fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
7017{
7018	uint32_t *tl;
7019	struct nfslayout *lyp;
7020	uint64_t lenval;
7021	int i;
7022
7023	lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
7024	    M_NFSDSTATE, M_WAITOK | M_ZERO);
7025	lyp->lay_type = NFSLAYOUT_FLEXFILE;
7026	if (iomode == NFSLAYOUTIOMODE_RW)
7027		lyp->lay_flags = NFSLAY_RW;
7028	else
7029		lyp->lay_flags = NFSLAY_READ;
7030	NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
7031	lyp->lay_clientid.qval = nd->nd_clientid.qval;
7032	lyp->lay_fsid = fs;
7033	lyp->lay_mirrorcnt = mirrorcnt;
7034	NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
7035
7036	/* Fill in the xdr for the files layout. */
7037	tl = (uint32_t *)lyp->lay_xdr;
7038	lenval = 0;
7039	txdr_hyper(lenval, tl); tl += 2;		/* Stripe unit. */
7040	*tl++ = txdr_unsigned(mirrorcnt);		/* # of mirrors. */
7041	for (i = 0; i < mirrorcnt; i++) {
7042		*tl++ = txdr_unsigned(1);		/* One stripe. */
7043		NFSBCOPY(devid, tl, NFSX_V4DEVICEID);	/* Device ID. */
7044		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7045		devid += NFSX_V4DEVICEID;
7046		*tl++ = txdr_unsigned(1);		/* Efficiency. */
7047		*tl++ = 0;				/* Proxy Stateid. */
7048		*tl++ = 0x55555555;
7049		*tl++ = 0x55555555;
7050		*tl++ = 0x55555555;
7051		*tl++ = txdr_unsigned(1);		/* 1 file handle. */
7052		*tl++ = txdr_unsigned(NFSX_V4PNFSFH);
7053		NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
7054		tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
7055		dsfhp++;
7056		if (nfsrv_flexlinuxhack != 0) {
7057			*tl++ = txdr_unsigned(strlen(FLEX_UID0));
7058			*tl = 0;		/* 0 pad string. */
7059			NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7060			*tl++ = txdr_unsigned(strlen(FLEX_UID0));
7061			*tl = 0;		/* 0 pad string. */
7062			NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
7063		} else {
7064			*tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7065			NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7066			*tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
7067			NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
7068		}
7069	}
7070	*tl++ = txdr_unsigned(0);		/* ff_flags. */
7071	*tl = txdr_unsigned(60);		/* Status interval hint. */
7072	lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
7073	return (lyp);
7074}
7075
7076/*
7077 * Parse and process Flex File errors returned via LayoutReturn.
7078 */
7079static void
7080nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
7081    NFSPROC_T *p)
7082{
7083	uint32_t *tl;
7084	int cnt, errcnt, i, j, opnum, stat;
7085	char devid[NFSX_V4DEVICEID];
7086
7087	tl = layp;
7088	maxcnt -= NFSX_UNSIGNED;
7089	if (maxcnt > 0)
7090		cnt = fxdr_unsigned(int, *tl++);
7091	else
7092		cnt = 0;
7093	NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
7094	for (i = 0; i < cnt; i++) {
7095		maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
7096		    NFSX_UNSIGNED;
7097		if (maxcnt <= 0)
7098			break;
7099		/* Skip offset, length and stateid for now. */
7100		tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
7101		errcnt = fxdr_unsigned(int, *tl++);
7102		NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
7103		for (j = 0; j < errcnt; j++) {
7104			maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
7105			if (maxcnt < 0)
7106				break;
7107			NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
7108			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7109			stat = fxdr_unsigned(int, *tl++);
7110			opnum = fxdr_unsigned(int, *tl++);
7111			NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
7112			    stat);
7113			/*
7114			 * Except for NFSERR_ACCES, NFSERR_STALE and
7115			 * NFSERR_NOSPC errors, disable the mirror.
7116			 */
7117			if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
7118			    stat != NFSERR_NOSPC)
7119				nfsrv_delds(devid, p);
7120
7121			/* For NFSERR_NOSPC, mark all devids and layouts. */
7122			if (stat == NFSERR_NOSPC)
7123				nfsrv_marknospc(devid, true);
7124		}
7125	}
7126}
7127
7128/*
7129 * This function removes all flex file layouts which has a mirror with
7130 * a device id that matches the argument.
7131 * Called when the DS represented by the device id has failed.
7132 */
7133void
7134nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
7135{
7136	uint32_t *tl;
7137	struct nfslayout *lyp, *nlyp;
7138	struct nfslayouthash *lhyp;
7139	struct nfslayouthead loclyp;
7140	int i, j;
7141
7142	NFSD_DEBUG(4, "flexmirrordel\n");
7143	/* Move all layouts found onto a local list. */
7144	TAILQ_INIT(&loclyp);
7145	for (i = 0; i < nfsrv_layouthashsize; i++) {
7146		lhyp = &nfslayouthash[i];
7147		NFSLOCKLAYOUT(lhyp);
7148		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7149			if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
7150			    lyp->lay_mirrorcnt > 1) {
7151				NFSD_DEBUG(4, "possible match\n");
7152				tl = lyp->lay_xdr;
7153				tl += 3;
7154				for (j = 0; j < lyp->lay_mirrorcnt; j++) {
7155					tl++;
7156					if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
7157					    == 0) {
7158						/* Found one. */
7159						NFSD_DEBUG(4, "fnd one\n");
7160						TAILQ_REMOVE(&lhyp->list, lyp,
7161						    lay_list);
7162						TAILQ_INSERT_HEAD(&loclyp, lyp,
7163						    lay_list);
7164						break;
7165					}
7166					tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
7167					    NFSM_RNDUP(NFSX_V4PNFSFH) /
7168					    NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
7169				}
7170			}
7171		}
7172		NFSUNLOCKLAYOUT(lhyp);
7173	}
7174
7175	/* Now, try to do a Layout recall for each one found. */
7176	TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
7177		NFSD_DEBUG(4, "do layout recall\n");
7178		/*
7179		 * The layout stateid.seqid needs to be incremented
7180		 * before doing a LAYOUT_RECALL callback.
7181		 */
7182		if (++lyp->lay_stateid.seqid == 0)
7183			lyp->lay_stateid.seqid = 1;
7184		nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
7185		    &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
7186		nfsrv_freelayout(&loclyp, lyp);
7187	}
7188}
7189
7190/*
7191 * Do a recall callback to the client for this layout.
7192 */
7193static int
7194nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
7195    struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
7196{
7197	struct nfsclient *clp;
7198	int error;
7199
7200	NFSD_DEBUG(4, "nfsrv_recalllayout\n");
7201	error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
7202	    0, NULL, p);
7203	NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
7204	if (error != 0) {
7205		printf("nfsrv_recalllayout: getclient err=%d\n", error);
7206		return (error);
7207	}
7208	if ((clp->lc_flags & LCL_NFSV41) != 0) {
7209		error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
7210		    stateidp, changed, fhp, NULL, NULL, laytype, p);
7211		/* If lyp != NULL, handle an error return here. */
7212		if (error != 0 && lyp != NULL) {
7213			NFSDRECALLLOCK();
7214			/*
7215			 * Mark it returned, since no layout recall
7216			 * has been done.
7217			 * All errors seem to be non-recoverable, although
7218			 * NFSERR_NOMATCHLAYOUT is a normal event.
7219			 */
7220			if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
7221				lyp->lay_flags |= NFSLAY_RETURNED;
7222				wakeup(lyp);
7223			}
7224			NFSDRECALLUNLOCK();
7225			if (error != NFSERR_NOMATCHLAYOUT)
7226				printf("nfsrv_recalllayout: err=%d\n", error);
7227		}
7228	} else
7229		printf("nfsrv_recalllayout: clp not NFSv4.1\n");
7230	return (error);
7231}
7232
7233/*
7234 * Find a layout to recall when we exceed our high water mark.
7235 */
7236void
7237nfsrv_recalloldlayout(NFSPROC_T *p)
7238{
7239	struct nfslayouthash *lhyp;
7240	struct nfslayout *lyp;
7241	nfsquad_t clientid;
7242	nfsv4stateid_t stateid;
7243	fhandle_t fh;
7244	int error, laytype = 0, ret;
7245
7246	lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
7247	NFSLOCKLAYOUT(lhyp);
7248	TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
7249		if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
7250			lyp->lay_flags |= NFSLAY_CALLB;
7251			/*
7252			 * The layout stateid.seqid needs to be incremented
7253			 * before doing a LAYOUT_RECALL callback.
7254			 */
7255			if (++lyp->lay_stateid.seqid == 0)
7256				lyp->lay_stateid.seqid = 1;
7257			clientid = lyp->lay_clientid;
7258			stateid = lyp->lay_stateid;
7259			NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
7260			laytype = lyp->lay_type;
7261			break;
7262		}
7263	}
7264	NFSUNLOCKLAYOUT(lhyp);
7265	if (lyp != NULL) {
7266		error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
7267		    laytype, p);
7268		if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
7269			NFSD_DEBUG(4, "recallold=%d\n", error);
7270		if (error != 0) {
7271			NFSLOCKLAYOUT(lhyp);
7272			/*
7273			 * Since the hash list was unlocked, we need to
7274			 * find it again.
7275			 */
7276			ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
7277			    &lyp);
7278			if (ret == 0 &&
7279			    (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
7280			    lyp->lay_stateid.other[0] == stateid.other[0] &&
7281			    lyp->lay_stateid.other[1] == stateid.other[1] &&
7282			    lyp->lay_stateid.other[2] == stateid.other[2]) {
7283				/*
7284				 * The client no longer knows this layout, so
7285				 * it can be free'd now.
7286				 */
7287				if (error == NFSERR_NOMATCHLAYOUT)
7288					nfsrv_freelayout(&lhyp->list, lyp);
7289				else {
7290					/*
7291					 * Leave it to be tried later by
7292					 * clearing NFSLAY_CALLB and moving
7293					 * it to the head of the list, so it
7294					 * won't be tried again for a while.
7295					 */
7296					lyp->lay_flags &= ~NFSLAY_CALLB;
7297					TAILQ_REMOVE(&lhyp->list, lyp,
7298					    lay_list);
7299					TAILQ_INSERT_HEAD(&lhyp->list, lyp,
7300					    lay_list);
7301				}
7302			}
7303			NFSUNLOCKLAYOUT(lhyp);
7304		}
7305	}
7306}
7307
7308/*
7309 * Try and return layout(s).
7310 */
7311int
7312nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
7313    int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
7314    int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
7315    struct ucred *cred, NFSPROC_T *p)
7316{
7317	struct nfsvattr na;
7318	struct nfslayouthash *lhyp;
7319	struct nfslayout *lyp;
7320	fhandle_t fh;
7321	int error = 0;
7322
7323	*fndp = 0;
7324	if (kind == NFSV4LAYOUTRET_FILE) {
7325		error = nfsvno_getfh(vp, &fh, p);
7326		if (error == 0) {
7327			error = nfsrv_updatemdsattr(vp, &na, p);
7328			if (error != 0)
7329				printf("nfsrv_layoutreturn: updatemdsattr"
7330				    " failed=%d\n", error);
7331		}
7332		if (error == 0) {
7333			if (reclaim == newnfs_true) {
7334				error = nfsrv_checkgrace(NULL, NULL,
7335				    NFSLCK_RECLAIM);
7336				if (error != NFSERR_NOGRACE)
7337					error = 0;
7338				return (error);
7339			}
7340			lhyp = NFSLAYOUTHASH(&fh);
7341			NFSDRECALLLOCK();
7342			NFSLOCKLAYOUT(lhyp);
7343			error = nfsrv_findlayout(&nd->nd_clientid, &fh,
7344			    layouttype, p, &lyp);
7345			NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
7346			if (error == 0 &&
7347			    stateidp->other[0] == lyp->lay_stateid.other[0] &&
7348			    stateidp->other[1] == lyp->lay_stateid.other[1] &&
7349			    stateidp->other[2] == lyp->lay_stateid.other[2]) {
7350				NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
7351				    " %x %x %x laystateid %d %x %x %x"
7352				    " off=%ju len=%ju flgs=0x%x\n",
7353				    stateidp->seqid, stateidp->other[0],
7354				    stateidp->other[1], stateidp->other[2],
7355				    lyp->lay_stateid.seqid,
7356				    lyp->lay_stateid.other[0],
7357				    lyp->lay_stateid.other[1],
7358				    lyp->lay_stateid.other[2],
7359				    (uintmax_t)offset, (uintmax_t)len,
7360				    lyp->lay_flags);
7361				if (++lyp->lay_stateid.seqid == 0)
7362					lyp->lay_stateid.seqid = 1;
7363				stateidp->seqid = lyp->lay_stateid.seqid;
7364				if (offset == 0 && len == UINT64_MAX) {
7365					if ((iomode & NFSLAYOUTIOMODE_READ) !=
7366					    0)
7367						lyp->lay_flags &= ~NFSLAY_READ;
7368					if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7369						lyp->lay_flags &= ~NFSLAY_RW;
7370					if ((lyp->lay_flags & (NFSLAY_READ |
7371					    NFSLAY_RW)) == 0)
7372						nfsrv_freelayout(&lhyp->list,
7373						    lyp);
7374					else
7375						*fndp = 1;
7376				} else
7377					*fndp = 1;
7378			}
7379			NFSUNLOCKLAYOUT(lhyp);
7380			/* Search the nfsrv_recalllist for a match. */
7381			TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
7382				if (NFSBCMP(&lyp->lay_fh, &fh,
7383				    sizeof(fh)) == 0 &&
7384				    lyp->lay_clientid.qval ==
7385				    nd->nd_clientid.qval &&
7386				    stateidp->other[0] ==
7387				    lyp->lay_stateid.other[0] &&
7388				    stateidp->other[1] ==
7389				    lyp->lay_stateid.other[1] &&
7390				    stateidp->other[2] ==
7391				    lyp->lay_stateid.other[2]) {
7392					lyp->lay_flags |= NFSLAY_RETURNED;
7393					wakeup(lyp);
7394					error = 0;
7395				}
7396			}
7397			NFSDRECALLUNLOCK();
7398		}
7399		if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
7400			nfsrv_flexlayouterr(nd, layp, maxcnt, p);
7401	} else if (kind == NFSV4LAYOUTRET_FSID)
7402		nfsrv_freelayouts(&nd->nd_clientid,
7403		    &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
7404	else if (kind == NFSV4LAYOUTRET_ALL)
7405		nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
7406	else
7407		error = NFSERR_INVAL;
7408	if (error == -1)
7409		error = 0;
7410	return (error);
7411}
7412
7413/*
7414 * Look for an existing layout.
7415 */
7416static int
7417nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
7418    NFSPROC_T *p, struct nfslayout **lypp)
7419{
7420	struct nfslayouthash *lhyp;
7421	struct nfslayout *lyp;
7422	int ret;
7423
7424	*lypp = NULL;
7425	ret = 0;
7426	lhyp = NFSLAYOUTHASH(fhp);
7427	TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
7428		if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7429		    lyp->lay_clientid.qval == clientidp->qval &&
7430		    lyp->lay_type == laytype)
7431			break;
7432	}
7433	if (lyp != NULL)
7434		*lypp = lyp;
7435	else
7436		ret = -1;
7437	return (ret);
7438}
7439
7440/*
7441 * Add the new layout, as required.
7442 */
7443static int
7444nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
7445    nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
7446{
7447	struct nfsclient *clp;
7448	struct nfslayouthash *lhyp;
7449	struct nfslayout *lyp, *nlyp;
7450	fhandle_t *fhp;
7451	int error;
7452
7453	KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
7454	    ("nfsrv_layoutget: no nd_clientid\n"));
7455	lyp = *lypp;
7456	fhp = &lyp->lay_fh;
7457	NFSLOCKSTATE();
7458	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
7459	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
7460	if (error != 0) {
7461		NFSUNLOCKSTATE();
7462		return (error);
7463	}
7464	lyp->lay_stateid.seqid = stateidp->seqid = 1;
7465	lyp->lay_stateid.other[0] = stateidp->other[0] =
7466	    clp->lc_clientid.lval[0];
7467	lyp->lay_stateid.other[1] = stateidp->other[1] =
7468	    clp->lc_clientid.lval[1];
7469	lyp->lay_stateid.other[2] = stateidp->other[2] =
7470	    nfsrv_nextstateindex(clp);
7471	NFSUNLOCKSTATE();
7472
7473	lhyp = NFSLAYOUTHASH(fhp);
7474	NFSLOCKLAYOUT(lhyp);
7475	TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
7476		if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7477		    nlyp->lay_clientid.qval == nd->nd_clientid.qval)
7478			break;
7479	}
7480	if (nlyp != NULL) {
7481		/* A layout already exists, so use it. */
7482		nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
7483		NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
7484		*layoutlenp = nlyp->lay_layoutlen;
7485		if (++nlyp->lay_stateid.seqid == 0)
7486			nlyp->lay_stateid.seqid = 1;
7487		stateidp->seqid = nlyp->lay_stateid.seqid;
7488		stateidp->other[0] = nlyp->lay_stateid.other[0];
7489		stateidp->other[1] = nlyp->lay_stateid.other[1];
7490		stateidp->other[2] = nlyp->lay_stateid.other[2];
7491		NFSUNLOCKLAYOUT(lhyp);
7492		return (0);
7493	}
7494
7495	/* Insert the new layout in the lists. */
7496	*lypp = NULL;
7497	atomic_add_int(&nfsrv_layoutcnt, 1);
7498	NFSD_VNET(nfsstatsv1_p)->srvlayouts++;
7499	NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
7500	*layoutlenp = lyp->lay_layoutlen;
7501	TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
7502	NFSUNLOCKLAYOUT(lhyp);
7503	return (0);
7504}
7505
7506/*
7507 * Get the devinfo for a deviceid.
7508 */
7509int
7510nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
7511    uint32_t *notify, int *devaddrlen, char **devaddr)
7512{
7513	struct nfsdevice *ds;
7514
7515	if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
7516	     NFSLAYOUT_FLEXFILE) ||
7517	    (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
7518		return (NFSERR_UNKNLAYOUTTYPE);
7519
7520	/*
7521	 * Now, search for the device id.  Note that the structures won't go
7522	 * away, but the order changes in the list.  As such, the lock only
7523	 * needs to be held during the search through the list.
7524	 */
7525	NFSDDSLOCK();
7526	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7527		if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
7528		    ds->nfsdev_nmp != NULL)
7529			break;
7530	}
7531	NFSDDSUNLOCK();
7532	if (ds == NULL)
7533		return (NFSERR_NOENT);
7534
7535	/* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
7536	*devaddrlen = 0;
7537	if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
7538		*devaddrlen = ds->nfsdev_fileaddrlen;
7539		*devaddr = ds->nfsdev_fileaddr;
7540	} else if (layouttype == NFSLAYOUT_FLEXFILE) {
7541		*devaddrlen = ds->nfsdev_flexaddrlen;
7542		*devaddr = ds->nfsdev_flexaddr;
7543	}
7544	if (*devaddrlen == 0)
7545		return (NFSERR_UNKNLAYOUTTYPE);
7546
7547	/*
7548	 * The XDR overhead is 3 unsigned values: layout_type,
7549	 * length_of_address and notify bitmap.
7550	 * If the notify array is changed to not all zeros, the
7551	 * count of unsigned values must be increased.
7552	 */
7553	if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
7554	    3 * NFSX_UNSIGNED) {
7555		*maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
7556		return (NFSERR_TOOSMALL);
7557	}
7558	return (0);
7559}
7560
7561/*
7562 * Free a list of layout state structures.
7563 */
7564static void
7565nfsrv_freelayoutlist(nfsquad_t clientid)
7566{
7567	struct nfslayouthash *lhyp;
7568	struct nfslayout *lyp, *nlyp;
7569	int i;
7570
7571	for (i = 0; i < nfsrv_layouthashsize; i++) {
7572		lhyp = &nfslayouthash[i];
7573		NFSLOCKLAYOUT(lhyp);
7574		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7575			if (lyp->lay_clientid.qval == clientid.qval)
7576				nfsrv_freelayout(&lhyp->list, lyp);
7577		}
7578		NFSUNLOCKLAYOUT(lhyp);
7579	}
7580}
7581
7582/*
7583 * Free up a layout.
7584 */
7585static void
7586nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
7587{
7588
7589	NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
7590	atomic_add_int(&nfsrv_layoutcnt, -1);
7591	NFSD_VNET(nfsstatsv1_p)->srvlayouts--;
7592	TAILQ_REMOVE(lhp, lyp, lay_list);
7593	free(lyp, M_NFSDSTATE);
7594}
7595
7596/*
7597 * Free up a device id.
7598 */
7599void
7600nfsrv_freeonedevid(struct nfsdevice *ds)
7601{
7602	int i;
7603
7604	atomic_add_int(&nfsrv_devidcnt, -1);
7605	vrele(ds->nfsdev_dvp);
7606	for (i = 0; i < nfsrv_dsdirsize; i++)
7607		if (ds->nfsdev_dsdir[i] != NULL)
7608			vrele(ds->nfsdev_dsdir[i]);
7609	free(ds->nfsdev_fileaddr, M_NFSDSTATE);
7610	free(ds->nfsdev_flexaddr, M_NFSDSTATE);
7611	free(ds->nfsdev_host, M_NFSDSTATE);
7612	free(ds, M_NFSDSTATE);
7613}
7614
7615/*
7616 * Free up a device id and its mirrors.
7617 */
7618static void
7619nfsrv_freedevid(struct nfsdevice *ds)
7620{
7621
7622	TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
7623	nfsrv_freeonedevid(ds);
7624}
7625
7626/*
7627 * Free all layouts and device ids.
7628 * Done when the nfsd threads are shut down since there may be a new
7629 * modified device id list created when the nfsd is restarted.
7630 */
7631void
7632nfsrv_freealllayoutsanddevids(void)
7633{
7634	struct nfsdontlist *mrp, *nmrp;
7635	struct nfslayout *lyp, *nlyp;
7636
7637	/* Get rid of the deviceid structures. */
7638	nfsrv_freealldevids();
7639	TAILQ_INIT(&nfsrv_devidhead);
7640	nfsrv_devidcnt = 0;
7641
7642	/* Get rid of all layouts. */
7643	nfsrv_freealllayouts();
7644
7645	/* Get rid of any nfsdontlist entries. */
7646	LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
7647		free(mrp, M_NFSDSTATE);
7648	LIST_INIT(&nfsrv_dontlisthead);
7649	nfsrv_dontlistlen = 0;
7650
7651	/* Free layouts in the recall list. */
7652	TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
7653		nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
7654	TAILQ_INIT(&nfsrv_recalllisthead);
7655}
7656
7657/*
7658 * Free layouts that match the arguments.
7659 */
7660static void
7661nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
7662{
7663	struct nfslayouthash *lhyp;
7664	struct nfslayout *lyp, *nlyp;
7665	int i;
7666
7667	for (i = 0; i < nfsrv_layouthashsize; i++) {
7668		lhyp = &nfslayouthash[i];
7669		NFSLOCKLAYOUT(lhyp);
7670		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7671			if (clid->qval != lyp->lay_clientid.qval)
7672				continue;
7673			if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
7674				continue;
7675			if (laytype != lyp->lay_type)
7676				continue;
7677			if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
7678				lyp->lay_flags &= ~NFSLAY_READ;
7679			if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7680				lyp->lay_flags &= ~NFSLAY_RW;
7681			if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
7682				nfsrv_freelayout(&lhyp->list, lyp);
7683		}
7684		NFSUNLOCKLAYOUT(lhyp);
7685	}
7686}
7687
7688/*
7689 * Free all layouts for the argument file.
7690 */
7691void
7692nfsrv_freefilelayouts(fhandle_t *fhp)
7693{
7694	struct nfslayouthash *lhyp;
7695	struct nfslayout *lyp, *nlyp;
7696
7697	lhyp = NFSLAYOUTHASH(fhp);
7698	NFSLOCKLAYOUT(lhyp);
7699	TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7700		if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
7701			nfsrv_freelayout(&lhyp->list, lyp);
7702	}
7703	NFSUNLOCKLAYOUT(lhyp);
7704}
7705
7706/*
7707 * Free all layouts.
7708 */
7709static void
7710nfsrv_freealllayouts(void)
7711{
7712	struct nfslayouthash *lhyp;
7713	struct nfslayout *lyp, *nlyp;
7714	int i;
7715
7716	for (i = 0; i < nfsrv_layouthashsize; i++) {
7717		lhyp = &nfslayouthash[i];
7718		NFSLOCKLAYOUT(lhyp);
7719		TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
7720			nfsrv_freelayout(&lhyp->list, lyp);
7721		NFSUNLOCKLAYOUT(lhyp);
7722	}
7723}
7724
7725/*
7726 * Look up the mount path for the DS server.
7727 */
7728static int
7729nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
7730    struct nfsdevice **dsp)
7731{
7732	struct nameidata nd;
7733	struct nfsdevice *ds;
7734	struct mount *mp;
7735	int error, i;
7736	char *dsdirpath;
7737	size_t dsdirsize;
7738
7739	NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
7740	*dsp = NULL;
7741	if (jailed(p->td_ucred)) {
7742		printf("A pNFS nfsd cannot run in a jail\n");
7743		return (EPERM);
7744	}
7745	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
7746	    dspathp);
7747	error = namei(&nd);
7748	NFSD_DEBUG(4, "lookup=%d\n", error);
7749	if (error != 0)
7750		return (error);
7751	if (nd.ni_vp->v_type != VDIR) {
7752		vput(nd.ni_vp);
7753		NFSD_DEBUG(4, "dspath not dir\n");
7754		return (ENOTDIR);
7755	}
7756	if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7757		vput(nd.ni_vp);
7758		NFSD_DEBUG(4, "dspath not an NFS mount\n");
7759		return (ENXIO);
7760	}
7761
7762	/*
7763	 * Allocate a DS server structure with the NFS mounted directory
7764	 * vnode reference counted, so that a non-forced dismount will
7765	 * fail with EBUSY.
7766	 * This structure is always linked into the list, even if an error
7767	 * is being returned.  The caller will free the entire list upon
7768	 * an error return.
7769	 */
7770	*dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
7771	    M_NFSDSTATE, M_WAITOK | M_ZERO);
7772	ds->nfsdev_dvp = nd.ni_vp;
7773	ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
7774	NFSVOPUNLOCK(nd.ni_vp);
7775
7776	dsdirsize = strlen(dspathp) + 16;
7777	dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
7778	/* Now, create the DS directory structures. */
7779	for (i = 0; i < nfsrv_dsdirsize; i++) {
7780		snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
7781		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7782		    UIO_SYSSPACE, dsdirpath);
7783		error = namei(&nd);
7784		NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
7785		if (error != 0)
7786			break;
7787		if (nd.ni_vp->v_type != VDIR) {
7788			vput(nd.ni_vp);
7789			error = ENOTDIR;
7790			NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
7791			break;
7792		}
7793		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7794			vput(nd.ni_vp);
7795			error = ENXIO;
7796			NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
7797			break;
7798		}
7799		ds->nfsdev_dsdir[i] = nd.ni_vp;
7800		NFSVOPUNLOCK(nd.ni_vp);
7801	}
7802	free(dsdirpath, M_TEMP);
7803
7804	if (strlen(mdspathp) > 0) {
7805		/*
7806		 * This DS stores file for a specific MDS exported file
7807		 * system.
7808		 */
7809		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7810		    UIO_SYSSPACE, mdspathp);
7811		error = namei(&nd);
7812		NFSD_DEBUG(4, "mds lookup=%d\n", error);
7813		if (error != 0)
7814			goto out;
7815		if (nd.ni_vp->v_type != VDIR) {
7816			vput(nd.ni_vp);
7817			error = ENOTDIR;
7818			NFSD_DEBUG(4, "mdspath not dir\n");
7819			goto out;
7820		}
7821		mp = nd.ni_vp->v_mount;
7822		if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
7823			vput(nd.ni_vp);
7824			error = ENXIO;
7825			NFSD_DEBUG(4, "mdspath not an exported fs\n");
7826			goto out;
7827		}
7828		ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
7829		ds->nfsdev_mdsisset = 1;
7830		vput(nd.ni_vp);
7831	}
7832
7833out:
7834	TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
7835	atomic_add_int(&nfsrv_devidcnt, 1);
7836	return (error);
7837}
7838
7839/*
7840 * Look up the mount path for the DS server and delete it.
7841 */
7842int
7843nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
7844{
7845	struct mount *mp;
7846	struct nfsmount *nmp;
7847	struct nfsdevice *ds;
7848	int error;
7849
7850	NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
7851	/*
7852	 * Search for the path in the mount list.  Avoid looking the path
7853	 * up, since this mount point may be hung, with associated locked
7854	 * vnodes, etc.
7855	 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
7856	 * until this completes.
7857	 * As noted in the man page, this should be done before any forced
7858	 * dismount on the mount point, but at least the handshake on
7859	 * NFSMNTP_CANCELRPCS should make it safe.
7860	 */
7861	error = 0;
7862	ds = NULL;
7863	nmp = NULL;
7864	mtx_lock(&mountlist_mtx);
7865	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
7866		if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
7867		    strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
7868		    mp->mnt_data != NULL) {
7869			nmp = VFSTONFS(mp);
7870			NFSLOCKMNT(nmp);
7871			if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7872			     NFSMNTP_CANCELRPCS)) == 0) {
7873				nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7874				NFSUNLOCKMNT(nmp);
7875			} else {
7876				NFSUNLOCKMNT(nmp);
7877				nmp = NULL;
7878			}
7879			break;
7880		}
7881	}
7882	mtx_unlock(&mountlist_mtx);
7883
7884	if (nmp != NULL) {
7885		ds = nfsrv_deldsnmp(op, nmp, p);
7886		NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
7887		if (ds != NULL) {
7888			nfsrv_killrpcs(nmp);
7889			NFSD_DEBUG(4, "aft killrpcs\n");
7890		} else
7891			error = ENXIO;
7892		NFSLOCKMNT(nmp);
7893		nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7894		wakeup(nmp);
7895		NFSUNLOCKMNT(nmp);
7896	} else
7897		error = EINVAL;
7898	return (error);
7899}
7900
7901/*
7902 * Search for and remove a DS entry which matches the "nmp" argument.
7903 * The nfsdevice structure pointer is returned so that the caller can
7904 * free it via nfsrv_freeonedevid().
7905 * For the forced case, do not try to do LayoutRecalls, since the server
7906 * must be shut down now anyhow.
7907 */
7908struct nfsdevice *
7909nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
7910{
7911	struct nfsdevice *fndds;
7912
7913	NFSD_DEBUG(4, "deldsdvp\n");
7914	NFSDDSLOCK();
7915	if (op == PNFSDOP_FORCEDELDS)
7916		fndds = nfsv4_findmirror(nmp);
7917	else
7918		fndds = nfsrv_findmirroredds(nmp);
7919	if (fndds != NULL)
7920		nfsrv_deleteds(fndds);
7921	NFSDDSUNLOCK();
7922	if (fndds != NULL) {
7923		if (op != PNFSDOP_FORCEDELDS)
7924			nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7925		printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7926	}
7927	return (fndds);
7928}
7929
7930/*
7931 * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
7932 * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
7933 * point.
7934 * Also, returns an error instead of the nfsdevice found.
7935 */
7936int
7937nfsrv_delds(char *devid, NFSPROC_T *p)
7938{
7939	struct nfsdevice *ds, *fndds;
7940	struct nfsmount *nmp;
7941	int fndmirror;
7942
7943	NFSD_DEBUG(4, "delds\n");
7944	/*
7945	 * Search the DS server list for a match with devid.
7946	 * Remove the DS entry if found and there is a mirror.
7947	 */
7948	fndds = NULL;
7949	nmp = NULL;
7950	fndmirror = 0;
7951	NFSDDSLOCK();
7952	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7953		if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
7954		    ds->nfsdev_nmp != NULL) {
7955			NFSD_DEBUG(4, "fnd main ds\n");
7956			fndds = ds;
7957			break;
7958		}
7959	}
7960	if (fndds == NULL) {
7961		NFSDDSUNLOCK();
7962		return (ENXIO);
7963	}
7964	if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
7965		fndmirror = 1;
7966	else if (fndds->nfsdev_mdsisset != 0) {
7967		/* For the fsid is set case, search for a mirror. */
7968		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7969			if (ds != fndds && ds->nfsdev_nmp != NULL &&
7970			    ds->nfsdev_mdsisset != 0 &&
7971			    fsidcmp(&ds->nfsdev_mdsfsid,
7972			    &fndds->nfsdev_mdsfsid) == 0) {
7973				fndmirror = 1;
7974				break;
7975			}
7976		}
7977	}
7978	if (fndmirror != 0) {
7979		nmp = fndds->nfsdev_nmp;
7980		NFSLOCKMNT(nmp);
7981		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7982		     NFSMNTP_CANCELRPCS)) == 0) {
7983			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7984			NFSUNLOCKMNT(nmp);
7985			nfsrv_deleteds(fndds);
7986		} else {
7987			NFSUNLOCKMNT(nmp);
7988			nmp = NULL;
7989		}
7990	}
7991	NFSDDSUNLOCK();
7992	if (nmp != NULL) {
7993		nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7994		printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7995		nfsrv_killrpcs(nmp);
7996		NFSLOCKMNT(nmp);
7997		nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7998		wakeup(nmp);
7999		NFSUNLOCKMNT(nmp);
8000		return (0);
8001	}
8002	return (ENXIO);
8003}
8004
8005/*
8006 * Mark a DS as disabled by setting nfsdev_nmp = NULL.
8007 */
8008static void
8009nfsrv_deleteds(struct nfsdevice *fndds)
8010{
8011
8012	NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
8013	fndds->nfsdev_nmp = NULL;
8014	if (fndds->nfsdev_mdsisset == 0)
8015		nfsrv_faildscnt--;
8016}
8017
8018/*
8019 * Fill in the addr structures for the File and Flex File layouts.
8020 */
8021static void
8022nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
8023{
8024	uint32_t *tl;
8025	char *netprot;
8026	int addrlen;
8027	static uint64_t new_devid = 0;
8028
8029	if (strchr(addr, ':') != NULL)
8030		netprot = "tcp6";
8031	else
8032		netprot = "tcp";
8033
8034	/* Fill in the device id. */
8035	NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
8036	new_devid++;
8037	NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
8038	    sizeof(new_devid));
8039
8040	/*
8041	 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
8042	 * as defined in RFC5661) in XDR.
8043	 */
8044	addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8045	    6 * NFSX_UNSIGNED;
8046	NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
8047	ds->nfsdev_fileaddrlen = addrlen;
8048	tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8049	ds->nfsdev_fileaddr = (char *)tl;
8050	*tl++ = txdr_unsigned(1);		/* One stripe with index 0. */
8051	*tl++ = 0;
8052	*tl++ = txdr_unsigned(1);		/* One multipath list */
8053	*tl++ = txdr_unsigned(1);		/* with one entry in it. */
8054	/* The netaddr for this one entry. */
8055	*tl++ = txdr_unsigned(strlen(netprot));
8056	NFSBCOPY(netprot, tl, strlen(netprot));
8057	tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8058	*tl++ = txdr_unsigned(strlen(addr));
8059	NFSBCOPY(addr, tl, strlen(addr));
8060
8061	/*
8062	 * Fill in the flex file addr (actually the ff_device_addr4
8063	 * as defined for Flexible File Layout) in XDR.
8064	 */
8065	addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
8066	    14 * NFSX_UNSIGNED;
8067	ds->nfsdev_flexaddrlen = addrlen;
8068	tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
8069	ds->nfsdev_flexaddr = (char *)tl;
8070	*tl++ = txdr_unsigned(1);		/* One multipath entry. */
8071	/* The netaddr for this one entry. */
8072	*tl++ = txdr_unsigned(strlen(netprot));
8073	NFSBCOPY(netprot, tl, strlen(netprot));
8074	tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
8075	*tl++ = txdr_unsigned(strlen(addr));
8076	NFSBCOPY(addr, tl, strlen(addr));
8077	tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
8078	*tl++ = txdr_unsigned(2);		/* Two NFS Versions. */
8079	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
8080	*tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
8081	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max rsize. */
8082	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max wsize. */
8083	*tl++ = newnfs_true;			/* Tightly coupled. */
8084	*tl++ = txdr_unsigned(NFS_VER4);	/* NFSv4. */
8085	*tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
8086	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max rsize. */
8087	*tl++ = txdr_unsigned(nfs_srvmaxio);	/* DS max wsize. */
8088	*tl = newnfs_true;			/* Tightly coupled. */
8089
8090	ds->nfsdev_hostnamelen = strlen(dnshost);
8091	ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
8092	    M_WAITOK);
8093	NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
8094}
8095
8096/*
8097 * Create the device id list.
8098 * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
8099 * is misconfigured.
8100 */
8101int
8102nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
8103{
8104	struct nfsdevice *ds;
8105	char *addrp, *dnshostp, *dspathp, *mdspathp;
8106	int error, i;
8107
8108	addrp = args->addr;
8109	dnshostp = args->dnshost;
8110	dspathp = args->dspath;
8111	mdspathp = args->mdspath;
8112	nfsrv_maxpnfsmirror = args->mirrorcnt;
8113	if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
8114	    mdspathp == NULL)
8115		return (0);
8116
8117	/*
8118	 * Loop around for each nul-terminated string in args->addr,
8119	 * args->dnshost, args->dnspath and args->mdspath.
8120	 */
8121	while (addrp < (args->addr + args->addrlen) &&
8122	    dnshostp < (args->dnshost + args->dnshostlen) &&
8123	    dspathp < (args->dspath + args->dspathlen) &&
8124	    mdspathp < (args->mdspath + args->mdspathlen)) {
8125		error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
8126		if (error != 0) {
8127			/* Free all DS servers. */
8128			nfsrv_freealldevids();
8129			nfsrv_devidcnt = 0;
8130			return (ENXIO);
8131		}
8132		nfsrv_allocdevid(ds, addrp, dnshostp);
8133		addrp += (strlen(addrp) + 1);
8134		dnshostp += (strlen(dnshostp) + 1);
8135		dspathp += (strlen(dspathp) + 1);
8136		mdspathp += (strlen(mdspathp) + 1);
8137	}
8138	if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
8139		/* Free all DS servers. */
8140		nfsrv_freealldevids();
8141		nfsrv_devidcnt = 0;
8142		nfsrv_maxpnfsmirror = 1;
8143		return (ENXIO);
8144	}
8145	/* We can fail at most one less DS than the mirror level. */
8146	nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
8147
8148	/*
8149	 * Allocate the nfslayout hash table now, since this is a pNFS server.
8150	 * Make it 1% of the high water mark and at least 100.
8151	 */
8152	if (nfslayouthash == NULL) {
8153		nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
8154		if (nfsrv_layouthashsize < 100)
8155			nfsrv_layouthashsize = 100;
8156		nfslayouthash = mallocarray(nfsrv_layouthashsize,
8157		    sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
8158		    M_ZERO);
8159		for (i = 0; i < nfsrv_layouthashsize; i++) {
8160			mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
8161			TAILQ_INIT(&nfslayouthash[i].list);
8162		}
8163	}
8164	return (0);
8165}
8166
8167/*
8168 * Free all device ids.
8169 */
8170static void
8171nfsrv_freealldevids(void)
8172{
8173	struct nfsdevice *ds, *nds;
8174
8175	TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
8176		nfsrv_freedevid(ds);
8177}
8178
8179/*
8180 * Check to see if there is a Read/Write Layout plus either:
8181 * - A Write Delegation
8182 * or
8183 * - An Open with Write_access.
8184 * Return 1 if this is the case and 0 otherwise.
8185 * This function is used by nfsrv_proxyds() to decide if doing a Proxy
8186 * Getattr RPC to the Data Server (DS) is necessary.
8187 */
8188#define	NFSCLIDVECSIZE	6
8189int
8190nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
8191{
8192	fhandle_t fh, *tfhp;
8193	struct nfsstate *stp;
8194	struct nfslayout *lyp;
8195	struct nfslayouthash *lhyp;
8196	struct nfslockhashhead *hp;
8197	struct nfslockfile *lfp;
8198	nfsquad_t clid[NFSCLIDVECSIZE];
8199	int clidcnt, ret;
8200
8201	ret = nfsvno_getfh(vp, &fh, p);
8202	if (ret != 0)
8203		return (0);
8204
8205	/* First check for a Read/Write Layout. */
8206	clidcnt = 0;
8207	lhyp = NFSLAYOUTHASH(&fh);
8208	NFSLOCKLAYOUT(lhyp);
8209	TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8210		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8211		    ((lyp->lay_flags & NFSLAY_RW) != 0 ||
8212		     ((lyp->lay_flags & NFSLAY_READ) != 0 &&
8213		      nfsrv_pnfsatime != 0))) {
8214			if (clidcnt < NFSCLIDVECSIZE)
8215				clid[clidcnt].qval = lyp->lay_clientid.qval;
8216			clidcnt++;
8217		}
8218	}
8219	NFSUNLOCKLAYOUT(lhyp);
8220	if (clidcnt == 0) {
8221		/* None found, so return 0. */
8222		return (0);
8223	}
8224
8225	/* Get the nfslockfile for this fh. */
8226	NFSLOCKSTATE();
8227	hp = NFSLOCKHASH(&fh);
8228	LIST_FOREACH(lfp, hp, lf_hash) {
8229		tfhp = &lfp->lf_fh;
8230		if (NFSVNO_CMPFH(&fh, tfhp))
8231			break;
8232	}
8233	if (lfp == NULL) {
8234		/* None found, so return 0. */
8235		NFSUNLOCKSTATE();
8236		return (0);
8237	}
8238
8239	/* Now, look for a Write delegation for this clientid. */
8240	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
8241		if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8242		    nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8243			break;
8244	}
8245	if (stp != NULL) {
8246		/* Found one, so return 1. */
8247		NFSUNLOCKSTATE();
8248		return (1);
8249	}
8250
8251	/* No Write delegation, so look for an Open with Write_access. */
8252	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
8253		KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
8254		    ("nfsrv_checkdsattr: Non-open in Open list\n"));
8255		if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
8256		    nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8257			break;
8258	}
8259	NFSUNLOCKSTATE();
8260	if (stp != NULL)
8261		return (1);
8262	return (0);
8263}
8264
8265/*
8266 * Look for a matching clientid in the vector. Return 1 if one might match.
8267 */
8268static int
8269nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
8270{
8271	int i;
8272
8273	/* If too many for the vector, return 1 since there might be a match. */
8274	if (clidcnt > NFSCLIDVECSIZE)
8275		return (1);
8276
8277	for (i = 0; i < clidcnt; i++)
8278		if (clidvec[i].qval == clid.qval)
8279			return (1);
8280	return (0);
8281}
8282
8283/*
8284 * Check the don't list for "vp" and see if issuing an rw layout is allowed.
8285 * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
8286 */
8287static int
8288nfsrv_dontlayout(fhandle_t *fhp)
8289{
8290	struct nfsdontlist *mrp;
8291	int ret;
8292
8293	if (nfsrv_dontlistlen == 0)
8294		return (0);
8295	ret = 0;
8296	NFSDDONTLISTLOCK();
8297	LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8298		if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
8299		    (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
8300			ret = 1;
8301			break;
8302		}
8303	}
8304	NFSDDONTLISTUNLOCK();
8305	return (ret);
8306}
8307
8308#define	PNFSDS_COPYSIZ	65536
8309/*
8310 * Create a new file on a DS and copy the contents of an extant DS file to it.
8311 * This can be used for recovery of a DS file onto a recovered DS.
8312 * The steps are:
8313 * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
8314 * - Disable issuing of read/write layouts for the file via the nfsdontlist,
8315 *   so that they will be disabled after the MDS file's vnode is unlocked.
8316 * - Set up the nfsrv_recalllist so that recall of read/write layouts can
8317 *   be done.
8318 * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
8319 *   writes, LayoutCommits and LayoutReturns for the file when completing the
8320 *   LayoutReturn requested by the LayoutRecall callback.
8321 * - Issue a LayoutRecall callback for all read/write layouts and wait for
8322 *   them to be returned. (If the LayoutRecall callback replies
8323 *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
8324 * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
8325 *   writes are in progress or can occur during the DS file copy.
8326 *   It also blocks Setattr operations.
8327 * - Create the file on the recovered mirror.
8328 * - Copy the file from the operational DS.
8329 * - Copy any ACL from the MDS file to the new DS file.
8330 * - Set the modify time of the new DS file to that of the MDS file.
8331 * - Update the extended attribute for the MDS file.
8332 * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
8333 * - The caller will unlock the MDS file's vnode allowing operations
8334 *   to continue normally, since it is now on the mirror again.
8335 */
8336int
8337nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
8338    struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
8339    struct ucred *cred, NFSPROC_T *p)
8340{
8341	struct nfsdontlist *mrp, *nmrp;
8342	struct nfslayouthash *lhyp;
8343	struct nfslayout *lyp, *nlyp;
8344	struct nfslayouthead thl;
8345	struct mount *mp, *tvmp;
8346	struct acl *aclp;
8347	struct vattr va;
8348	struct timespec mtime;
8349	fhandle_t fh;
8350	vnode_t tvp;
8351	off_t rdpos, wrpos;
8352	ssize_t aresid;
8353	char *dat;
8354	int didprintf, ret, retacl, xfer;
8355
8356	ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
8357	ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
8358	/*
8359	 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
8360	 * so that no more RW layouts will get issued.
8361	 */
8362	ret = nfsvno_getfh(vp, &fh, p);
8363	if (ret != 0) {
8364		NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
8365		return (ret);
8366	}
8367	nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
8368	nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
8369	NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
8370	NFSDDONTLISTLOCK();
8371	LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8372		if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
8373			break;
8374	}
8375	if (mrp == NULL) {
8376		LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
8377		mrp = nmrp;
8378		nmrp = NULL;
8379		nfsrv_dontlistlen++;
8380		NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
8381	} else {
8382		NFSDDONTLISTUNLOCK();
8383		free(nmrp, M_NFSDSTATE);
8384		NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
8385		return (ENXIO);
8386	}
8387	NFSDDONTLISTUNLOCK();
8388
8389	/*
8390	 * Search for all RW layouts for this file.  Move them to the
8391	 * recall list, so they can be recalled and their return noted.
8392	 */
8393	lhyp = NFSLAYOUTHASH(&fh);
8394	NFSDRECALLLOCK();
8395	NFSLOCKLAYOUT(lhyp);
8396	TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
8397		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8398		    (lyp->lay_flags & NFSLAY_RW) != 0) {
8399			TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
8400			TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
8401			lyp->lay_trycnt = 0;
8402		}
8403	}
8404	NFSUNLOCKLAYOUT(lhyp);
8405	NFSDRECALLUNLOCK();
8406
8407	ret = 0;
8408	mp = tvmp = NULL;
8409	didprintf = 0;
8410	TAILQ_INIT(&thl);
8411	/* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
8412	NFSVOPUNLOCK(vp);
8413	/* Now, do a recall for all layouts not yet recalled. */
8414tryagain:
8415	NFSDRECALLLOCK();
8416	TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8417		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8418		    (lyp->lay_flags & NFSLAY_RECALL) == 0) {
8419			lyp->lay_flags |= NFSLAY_RECALL;
8420			/*
8421			 * The layout stateid.seqid needs to be incremented
8422			 * before doing a LAYOUT_RECALL callback.
8423			 */
8424			if (++lyp->lay_stateid.seqid == 0)
8425				lyp->lay_stateid.seqid = 1;
8426			NFSDRECALLUNLOCK();
8427			nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
8428			    &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
8429			NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
8430			goto tryagain;
8431		}
8432	}
8433
8434	/* Now wait for them to be returned. */
8435tryagain2:
8436	TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8437		if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
8438			if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
8439				TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
8440				    lay_list);
8441				TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
8442				NFSD_DEBUG(4,
8443				    "nfsrv_copymr: layout returned\n");
8444			} else {
8445				lyp->lay_trycnt++;
8446				ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
8447				    PVFS | PCATCH, "nfsmrl", hz);
8448				NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
8449				    ret);
8450				if (ret == EINTR || ret == ERESTART)
8451					break;
8452				if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
8453					/*
8454					 * Give up after 60sec and return
8455					 * ENXIO, failing the copymr.
8456					 * This layout will remain on the
8457					 * recalllist.  It can only be cleared
8458					 * by restarting the nfsd.
8459					 * This seems the safe way to handle
8460					 * it, since it cannot be safely copied
8461					 * with an outstanding RW layout.
8462					 */
8463					if (lyp->lay_trycnt >= 60) {
8464						ret = ENXIO;
8465						break;
8466					}
8467					if (didprintf == 0) {
8468						printf("nfsrv_copymr: layout "
8469						    "not returned\n");
8470						didprintf = 1;
8471					}
8472				}
8473			}
8474			goto tryagain2;
8475		}
8476	}
8477	NFSDRECALLUNLOCK();
8478	/* We can now get rid of the layouts that have been returned. */
8479	TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
8480		nfsrv_freelayout(&thl, lyp);
8481
8482	/*
8483	 * Do the vn_start_write() calls here, before the MDS vnode is
8484	 * locked and the tvp is created (locked) in the NFS file system
8485	 * that dvp is in.
8486	 * For tvmp, this probably isn't necessary, since it will be an
8487	 * NFS mount and they are not suspendable at this time.
8488	 */
8489	if (ret == 0)
8490		ret = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
8491	if (ret == 0) {
8492		tvmp = dvp->v_mount;
8493		ret = vn_start_write(NULL, &tvmp, V_WAIT | V_PCATCH);
8494	}
8495
8496	/*
8497	 * LK_EXCLUSIVE lock the MDS vnode, so that any
8498	 * proxied writes through the MDS will be blocked until we have
8499	 * completed the copy and update of the extended attributes.
8500	 * This will also ensure that any attributes and ACL will not be
8501	 * changed until the copy is complete.
8502	 */
8503	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
8504	if (ret == 0 && VN_IS_DOOMED(vp)) {
8505		NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
8506		ret = ESTALE;
8507	}
8508
8509	/* Create the data file on the recovered DS. */
8510	if (ret == 0)
8511		ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
8512
8513	/* Copy the DS file, if created successfully. */
8514	if (ret == 0) {
8515		/*
8516		 * Get any NFSv4 ACL on the MDS file, so that it can be set
8517		 * on the new DS file.
8518		 */
8519		aclp = acl_alloc(M_WAITOK | M_ZERO);
8520		retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
8521		if (retacl != 0 && retacl != ENOATTR)
8522			NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
8523		dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
8524		/* Malloc a block of 0s used to check for holes. */
8525		if (nfsrv_zeropnfsdat == NULL)
8526			nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
8527			    M_WAITOK | M_ZERO);
8528		rdpos = wrpos = 0;
8529		ret = VOP_GETATTR(fvp, &va, cred);
8530		aresid = 0;
8531		while (ret == 0 && aresid == 0) {
8532			ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
8533			    rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
8534			    &aresid, p);
8535			xfer = PNFSDS_COPYSIZ - aresid;
8536			if (ret == 0 && xfer > 0) {
8537				rdpos += xfer;
8538				/*
8539				 * Skip the write for holes, except for the
8540				 * last block.
8541				 */
8542				if (xfer < PNFSDS_COPYSIZ || rdpos ==
8543				    va.va_size || NFSBCMP(dat,
8544				    nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
8545					ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
8546					    wrpos, UIO_SYSSPACE, IO_NODELOCKED,
8547					    cred, NULL, NULL, p);
8548				if (ret == 0)
8549					wrpos += xfer;
8550			}
8551		}
8552
8553		/* If there is an ACL and the copy succeeded, set the ACL. */
8554		if (ret == 0 && retacl == 0) {
8555			ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
8556			/*
8557			 * Don't consider these as errors, since VOP_GETACL()
8558			 * can return an ACL when they are not actually
8559			 * supported.  For example, for UFS, VOP_GETACL()
8560			 * will return a trivial ACL based on the uid/gid/mode
8561			 * when there is no ACL on the file.
8562			 * This case should be recognized as a trivial ACL
8563			 * by UFS's VOP_SETACL() and succeed, but...
8564			 */
8565			if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
8566				ret = 0;
8567		}
8568
8569		if (ret == 0)
8570			ret = VOP_FSYNC(tvp, MNT_WAIT, p);
8571
8572		/* Set the DS data file's modify time that of the MDS file. */
8573		if (ret == 0)
8574			ret = VOP_GETATTR(vp, &va, cred);
8575		if (ret == 0) {
8576			mtime = va.va_mtime;
8577			VATTR_NULL(&va);
8578			va.va_mtime = mtime;
8579			ret = VOP_SETATTR(tvp, &va, cred);
8580		}
8581
8582		vput(tvp);
8583		acl_free(aclp);
8584		free(dat, M_TEMP);
8585	}
8586	if (tvmp != NULL)
8587		vn_finished_write(tvmp);
8588
8589	/* Update the extended attributes for the newly created DS file. */
8590	if (ret == 0)
8591		ret = vn_extattr_set(vp, IO_NODELOCKED,
8592		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
8593		    sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
8594	if (mp != NULL)
8595		vn_finished_write(mp);
8596
8597	/* Get rid of the dontlist entry, so that Layouts can be issued. */
8598	NFSDDONTLISTLOCK();
8599	LIST_REMOVE(mrp, nfsmr_list);
8600	NFSDDONTLISTUNLOCK();
8601	free(mrp, M_NFSDSTATE);
8602	return (ret);
8603}
8604
8605/*
8606 * Create a data storage file on the recovered DS.
8607 */
8608static int
8609nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
8610    vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
8611    vnode_t *tvpp)
8612{
8613	struct vattr va, nva;
8614	int error;
8615
8616	/* Make data file name based on FH. */
8617	error = VOP_GETATTR(vp, &va, cred);
8618	if (error == 0) {
8619		/* Set the attributes for "vp" to Setattr the DS vp. */
8620		VATTR_NULL(&nva);
8621		nva.va_uid = va.va_uid;
8622		nva.va_gid = va.va_gid;
8623		nva.va_mode = va.va_mode;
8624		nva.va_size = 0;
8625		VATTR_NULL(&va);
8626		va.va_type = VREG;
8627		va.va_mode = nva.va_mode;
8628		NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
8629		error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
8630		    pf->dsf_filename, cred, p, tvpp);
8631	}
8632	return (error);
8633}
8634
8635/*
8636 * Look up the MDS file shared locked, and then get the extended attribute
8637 * to find the extant DS file to be copied to the new mirror.
8638 * If successful, *vpp is set to the MDS file's vp and *nvpp is
8639 * set to a DS data file for the MDS file, both exclusively locked.
8640 * The "buf" argument has the pnfsdsfile structure from the MDS file
8641 * in it and buflen is set to its length.
8642 */
8643int
8644nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
8645    int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
8646    struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
8647    struct nfsdevice **fdsp)
8648{
8649	struct nameidata nd;
8650	struct vnode *vp, *curvp;
8651	struct pnfsdsfile *pf;
8652	struct nfsmount *nmp, *curnmp;
8653	int dsdir, error, mirrorcnt, ippos;
8654
8655	vp = NULL;
8656	curvp = NULL;
8657	curnmp = NULL;
8658	*dsp = NULL;
8659	*fdsp = NULL;
8660	if (dspathp == NULL && curdspathp != NULL)
8661		return (EPERM);
8662
8663	/*
8664	 * Look up the MDS file shared locked.  The lock will be upgraded
8665	 * to an exclusive lock after any rw layouts have been returned.
8666	 */
8667	NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
8668	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
8669	    mdspathp);
8670	error = namei(&nd);
8671	NFSD_DEBUG(4, "lookup=%d\n", error);
8672	if (error != 0)
8673		return (error);
8674	if (nd.ni_vp->v_type != VREG) {
8675		vput(nd.ni_vp);
8676		NFSD_DEBUG(4, "mdspath not reg\n");
8677		return (EISDIR);
8678	}
8679	vp = nd.ni_vp;
8680
8681	if (curdspathp != NULL) {
8682		/*
8683		 * Look up the current DS path and find the nfsdev structure for
8684		 * it.
8685		 */
8686		NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
8687		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8688		    UIO_SYSSPACE, curdspathp);
8689		error = namei(&nd);
8690		NFSD_DEBUG(4, "ds lookup=%d\n", error);
8691		if (error != 0) {
8692			vput(vp);
8693			return (error);
8694		}
8695		if (nd.ni_vp->v_type != VDIR) {
8696			vput(nd.ni_vp);
8697			vput(vp);
8698			NFSD_DEBUG(4, "curdspath not dir\n");
8699			return (ENOTDIR);
8700		}
8701		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8702			vput(nd.ni_vp);
8703			vput(vp);
8704			NFSD_DEBUG(4, "curdspath not an NFS mount\n");
8705			return (ENXIO);
8706		}
8707		curnmp = VFSTONFS(nd.ni_vp->v_mount);
8708
8709		/* Search the nfsdev list for a match. */
8710		NFSDDSLOCK();
8711		*fdsp = nfsv4_findmirror(curnmp);
8712		NFSDDSUNLOCK();
8713		if (*fdsp == NULL)
8714			curnmp = NULL;
8715		if (curnmp == NULL) {
8716			vput(nd.ni_vp);
8717			vput(vp);
8718			NFSD_DEBUG(4, "mdscopymr: no current ds\n");
8719			return (ENXIO);
8720		}
8721		curvp = nd.ni_vp;
8722	}
8723
8724	if (dspathp != NULL) {
8725		/* Look up the nfsdev path and find the nfsdev structure. */
8726		NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
8727		NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8728		    UIO_SYSSPACE, dspathp);
8729		error = namei(&nd);
8730		NFSD_DEBUG(4, "ds lookup=%d\n", error);
8731		if (error != 0) {
8732			vput(vp);
8733			if (curvp != NULL)
8734				vput(curvp);
8735			return (error);
8736		}
8737		if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
8738			vput(nd.ni_vp);
8739			vput(vp);
8740			if (curvp != NULL)
8741				vput(curvp);
8742			NFSD_DEBUG(4, "dspath not dir\n");
8743			if (nd.ni_vp == curvp)
8744				return (EPERM);
8745			return (ENOTDIR);
8746		}
8747		if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8748			vput(nd.ni_vp);
8749			vput(vp);
8750			if (curvp != NULL)
8751				vput(curvp);
8752			NFSD_DEBUG(4, "dspath not an NFS mount\n");
8753			return (ENXIO);
8754		}
8755		nmp = VFSTONFS(nd.ni_vp->v_mount);
8756
8757		/*
8758		 * Search the nfsdevice list for a match.  If curnmp == NULL,
8759		 * this is a recovery and there must be a mirror.
8760		 */
8761		NFSDDSLOCK();
8762		if (curnmp == NULL)
8763			*dsp = nfsrv_findmirroredds(nmp);
8764		else
8765			*dsp = nfsv4_findmirror(nmp);
8766		NFSDDSUNLOCK();
8767		if (*dsp == NULL) {
8768			vput(nd.ni_vp);
8769			vput(vp);
8770			if (curvp != NULL)
8771				vput(curvp);
8772			NFSD_DEBUG(4, "mdscopymr: no ds\n");
8773			return (ENXIO);
8774		}
8775	} else {
8776		nd.ni_vp = NULL;
8777		nmp = NULL;
8778	}
8779
8780	/*
8781	 * Get a vp for an available DS data file using the extended
8782	 * attribute on the MDS file.
8783	 * If there is a valid entry for the new DS in the extended attribute
8784	 * on the MDS file (as checked via the nmp argument),
8785	 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
8786	 */
8787	error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
8788	    NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
8789	if (curvp != NULL)
8790		vput(curvp);
8791	if (nd.ni_vp == NULL) {
8792		if (error == 0 && nmp != NULL) {
8793			/* Search the nfsdev list for a match. */
8794			NFSDDSLOCK();
8795			*dsp = nfsrv_findmirroredds(nmp);
8796			NFSDDSUNLOCK();
8797		}
8798		if (error == 0 && (nmp == NULL || *dsp == NULL)) {
8799			if (nvpp != NULL && *nvpp != NULL) {
8800				vput(*nvpp);
8801				*nvpp = NULL;
8802			}
8803			error = ENXIO;
8804		}
8805	} else
8806		vput(nd.ni_vp);
8807
8808	/*
8809	 * When dspathp != NULL and curdspathp == NULL, this is a recovery
8810	 * and is only allowed if there is a 0.0.0.0 IP address entry.
8811	 * When curdspathp != NULL, the ippos will be set to that entry.
8812	 */
8813	if (error == 0 && dspathp != NULL && ippos == -1) {
8814		if (nvpp != NULL && *nvpp != NULL) {
8815			vput(*nvpp);
8816			*nvpp = NULL;
8817		}
8818		error = ENXIO;
8819	}
8820	if (error == 0) {
8821		*vpp = vp;
8822
8823		pf = (struct pnfsdsfile *)buf;
8824		if (ippos == -1) {
8825			/* If no zeroip pnfsdsfile, add one. */
8826			ippos = *buflenp / sizeof(*pf);
8827			*buflenp += sizeof(*pf);
8828			pf += ippos;
8829			pf->dsf_dir = dsdir;
8830			strlcpy(pf->dsf_filename, fname,
8831			    sizeof(pf->dsf_filename));
8832		} else
8833			pf += ippos;
8834		*pfp = pf;
8835	} else
8836		vput(vp);
8837	return (error);
8838}
8839
8840/*
8841 * Search for a matching pnfsd mirror device structure, base on the nmp arg.
8842 * Return one if found, NULL otherwise.
8843 */
8844static struct nfsdevice *
8845nfsrv_findmirroredds(struct nfsmount *nmp)
8846{
8847	struct nfsdevice *ds, *fndds;
8848	int fndmirror;
8849
8850	mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
8851	/*
8852	 * Search the DS server list for a match with nmp.
8853	 * Remove the DS entry if found and there is a mirror.
8854	 */
8855	fndds = NULL;
8856	fndmirror = 0;
8857	if (nfsrv_devidcnt == 0)
8858		return (fndds);
8859	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8860		if (ds->nfsdev_nmp == nmp) {
8861			NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
8862			fndds = ds;
8863			break;
8864		}
8865	}
8866	if (fndds == NULL)
8867		return (fndds);
8868	if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
8869		fndmirror = 1;
8870	else if (fndds->nfsdev_mdsisset != 0) {
8871		/* For the fsid is set case, search for a mirror. */
8872		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8873			if (ds != fndds && ds->nfsdev_nmp != NULL &&
8874			    ds->nfsdev_mdsisset != 0 &&
8875			    fsidcmp(&ds->nfsdev_mdsfsid,
8876			    &fndds->nfsdev_mdsfsid) == 0) {
8877				fndmirror = 1;
8878				break;
8879			}
8880		}
8881	}
8882	if (fndmirror == 0) {
8883		NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
8884		return (NULL);
8885	}
8886	return (fndds);
8887}
8888
8889/*
8890 * Mark the appropriate devid and all associated layout as "out of space".
8891 */
8892void
8893nfsrv_marknospc(char *devid, bool setit)
8894{
8895	struct nfsdevice *ds;
8896	struct nfslayout *lyp;
8897	struct nfslayouthash *lhyp;
8898	int i;
8899
8900	NFSDDSLOCK();
8901	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8902		if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
8903			NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
8904			ds->nfsdev_nospc = setit;
8905		}
8906	}
8907	NFSDDSUNLOCK();
8908
8909	for (i = 0; i < nfsrv_layouthashsize; i++) {
8910		lhyp = &nfslayouthash[i];
8911		NFSLOCKLAYOUT(lhyp);
8912		TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8913			if (NFSBCMP(lyp->lay_deviceid, devid,
8914			    NFSX_V4DEVICEID) == 0) {
8915				NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
8916				    setit);
8917				if (setit)
8918					lyp->lay_flags |= NFSLAY_NOSPC;
8919				else
8920					lyp->lay_flags &= ~NFSLAY_NOSPC;
8921			}
8922		}
8923		NFSUNLOCKLAYOUT(lhyp);
8924	}
8925}
8926
8927/*
8928 * Check to see if SP4_MACH_CRED is in use and, if it is, check that the
8929 * correct machine credential is being used.
8930 */
8931static int
8932nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
8933{
8934
8935	if ((clp->lc_flags & LCL_MACHCRED) == 0 ||
8936	    !NFSISSET_OPBIT(&clp->lc_mustops, op))
8937		return (0);
8938	KASSERT((nd->nd_flag & ND_NFSV41) != 0,
8939	    ("nfsrv_checkmachcred: MachCred for NFSv4.0"));
8940	if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
8941	    nd->nd_princlen == clp->lc_namelen &&
8942	    !NFSBCMP(nd->nd_principal, clp->lc_name, nd->nd_princlen))
8943		return (0);
8944	return (NFSERR_AUTHERR | AUTH_TOOWEAK);
8945}
8946