nfs_nfsdstate.c revision 299222
168651Skris/*-
268651Skris * Copyright (c) 2009 Rick Macklem, University of Guelph
368651Skris * All rights reserved.
468651Skris *
568651Skris * Redistribution and use in source and binary forms, with or without
668651Skris * modification, are permitted provided that the following conditions
768651Skris * are met:
868651Skris * 1. Redistributions of source code must retain the above copyright
968651Skris *    notice, this list of conditions and the following disclaimer.
1068651Skris * 2. Redistributions in binary form must reproduce the above copyright
1168651Skris *    notice, this list of conditions and the following disclaimer in the
1268651Skris *    documentation and/or other materials provided with the distribution.
1368651Skris *
1468651Skris * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1568651Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1668651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1768651Skris * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1868651Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1968651Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2068651Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2168651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2268651Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2368651Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2468651Skris * SUCH DAMAGE.
2568651Skris *
2668651Skris */
2768651Skris
2868651Skris#include <sys/cdefs.h>
2968651Skris__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 299222 2016-05-07 20:09:15Z rmacklem $");
3068651Skris
3168651Skris#ifndef APPLEKEXT
3268651Skris#include <fs/nfs/nfsport.h>
3368651Skris
3468651Skrisstruct nfsrv_stablefirst nfsrv_stablefirst;
3568651Skrisint nfsrv_issuedelegs = 0;
3668651Skrisint nfsrv_dolocallocks = 0;
3768651Skrisstruct nfsv4lock nfsv4rootfs_lock;
3868651Skris
3968651Skrisextern int newnfs_numnfsd;
4068651Skrisextern struct nfsstats newnfsstats;
4168651Skrisextern int nfsrv_lease;
4268651Skrisextern struct timeval nfsboottime;
4368651Skrisextern u_int32_t newnfs_true, newnfs_false;
4468651SkrisNFSV4ROOTLOCKMUTEX;
4568651SkrisNFSSTATESPINLOCK;
4668651Skris
4768651SkrisSYSCTL_DECL(_vfs_nfsd);
4868651Skrisint	nfsrv_statehashsize = NFSSTATEHASHSIZE;
4968651SkrisTUNABLE_INT("vfs.nfsd.statehashsize", &nfsrv_statehashsize);
5068651SkrisSYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
5168651Skris    &nfsrv_statehashsize, 0,
5268651Skris    "Size of state hash table set via loader.conf");
5368651Skris
5468651Skrisint	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
5568651SkrisTUNABLE_INT("vfs.nfsd.clienthashsize", &nfsrv_clienthashsize);
5668651SkrisSYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
5768651Skris    &nfsrv_clienthashsize, 0,
5868651Skris    "Size of client hash table set via loader.conf");
5968651Skris
6068651Skrisint	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
6168651SkrisTUNABLE_INT("vfs.nfsd.fhhashsize", &nfsrv_lockhashsize);
6268651SkrisSYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
6368651Skris    &nfsrv_lockhashsize, 0,
6468651Skris    "Size of file handle hash table set via loader.conf");
6568651Skris
66int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
67TUNABLE_INT("vfs.nfsd.sessionhashsize", &nfsrv_sessionhashsize);
68SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
69    &nfsrv_sessionhashsize, 0,
70    "Size of session hash table set via loader.conf");
71
72static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
73TUNABLE_INT("vfs.nfsd.v4statelimit", &nfsrv_v4statelimit);
74SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
75    &nfsrv_v4statelimit, 0,
76    "High water limit for NFSv4 opens+locks+delegations");
77
78/*
79 * Hash lists for nfs V4.
80 */
81struct nfsclienthashhead	*nfsclienthash;
82struct nfslockhashhead		*nfslockhash;
83struct nfssessionhash		*nfssessionhash;
84#endif	/* !APPLEKEXT */
85
86static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
87static time_t nfsrvboottime;
88static int nfsrv_writedelegifpos = 1;
89static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
90static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
91static int nfsrv_nogsscallback = 0;
92
93/* local functions */
94static void nfsrv_dumpaclient(struct nfsclient *clp,
95    struct nfsd_dumpclients *dumpp);
96static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
97    NFSPROC_T *p);
98static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
99    NFSPROC_T *p);
100static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
101    NFSPROC_T *p);
102static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
103    int cansleep, NFSPROC_T *p);
104static void nfsrv_freenfslock(struct nfslock *lop);
105static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
106static void nfsrv_freedeleg(struct nfsstate *);
107static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
108    u_int32_t flags, struct nfsstate **stpp);
109static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
110    struct nfsstate **stpp);
111static int nfsrv_getlockfh(vnode_t vp, u_short flags,
112    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
113static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
114    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
115static void nfsrv_insertlock(struct nfslock *new_lop,
116    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
117static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
118    struct nfslock **other_lopp, struct nfslockfile *lfp);
119static int nfsrv_getipnumber(u_char *cp);
120static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
121    nfsv4stateid_t *stateidp, int specialid);
122static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
123    u_int32_t flags);
124static int nfsrv_docallback(struct nfsclient *clp, int procnum,
125    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
126    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
127static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
128    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
129static u_int32_t nfsrv_nextclientindex(void);
130static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
131static void nfsrv_markstable(struct nfsclient *clp);
132static int nfsrv_checkstable(struct nfsclient *clp);
133static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
134    vnode *vp, NFSPROC_T *p);
135static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
136    NFSPROC_T *p, vnode_t vp);
137static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
138    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
139static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
140    struct nfsclient *clp);
141static time_t nfsrv_leaseexpiry(void);
142static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
143static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
144    struct nfsstate *stp, struct nfsrvcache *op);
145static int nfsrv_nootherstate(struct nfsstate *stp);
146static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
147    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
148static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
149    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
150static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
151    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
152    NFSPROC_T *p);
153static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
154    NFSPROC_T *p);
155static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
156    uint64_t first, uint64_t end);
157static void nfsrv_locklf(struct nfslockfile *lfp);
158static void nfsrv_unlocklf(struct nfslockfile *lfp);
159static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
160static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
161static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
162    int dont_replycache, struct nfsdsession **sepp);
163static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
164
165/*
166 * Scan the client list for a match and either return the current one,
167 * create a new entry or return an error.
168 * If returning a non-error, the clp structure must either be linked into
169 * the client list or free'd.
170 */
171APPLESTATIC int
172nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
173    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
174{
175	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
176	int i, error = 0;
177	struct nfsstate *stp, *tstp;
178	struct sockaddr_in *sad, *rad;
179	int zapit = 0, gotit, hasstate = 0, igotlock;
180	static u_int64_t confirm_index = 0;
181
182	/*
183	 * Check for state resource limit exceeded.
184	 */
185	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
186		error = NFSERR_RESOURCE;
187		goto out;
188	}
189
190	if (nfsrv_issuedelegs == 0 ||
191	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
192		/*
193		 * Don't do callbacks when delegations are disabled or
194		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
195		 * If establishing a callback connection is attempted
196		 * when a firewall is blocking the callback path, the
197		 * server may wait too long for the connect attempt to
198		 * succeed during the Open. Some clients, such as Linux,
199		 * may timeout and give up on the Open before the server
200		 * replies. Also, since AUTH_GSS callbacks are not
201		 * yet interoperability tested, they might cause the
202		 * server to crap out, if they get past the Init call to
203		 * the client.
204		 */
205		new_clp->lc_program = 0;
206
207	/* Lock out other nfsd threads */
208	NFSLOCKV4ROOTMUTEX();
209	nfsv4_relref(&nfsv4rootfs_lock);
210	do {
211		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
212		    NFSV4ROOTLOCKMUTEXPTR, NULL);
213	} while (!igotlock);
214	NFSUNLOCKV4ROOTMUTEX();
215
216	/*
217	 * Search for a match in the client list.
218	 */
219	gotit = i = 0;
220	while (i < nfsrv_clienthashsize && !gotit) {
221	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
222		if (new_clp->lc_idlen == clp->lc_idlen &&
223		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
224			gotit = 1;
225			break;
226		}
227	    }
228	    if (gotit == 0)
229		i++;
230	}
231	if (!gotit ||
232	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
233		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
234			/*
235			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
236			 * client is trying to update a confirmed clientid.
237			 */
238			NFSLOCKV4ROOTMUTEX();
239			nfsv4_unlock(&nfsv4rootfs_lock, 1);
240			NFSUNLOCKV4ROOTMUTEX();
241			confirmp->lval[1] = 0;
242			error = NFSERR_NOENT;
243			goto out;
244		}
245		/*
246		 * Get rid of the old one.
247		 */
248		if (i != nfsrv_clienthashsize) {
249			LIST_REMOVE(clp, lc_hash);
250			nfsrv_cleanclient(clp, p);
251			nfsrv_freedeleglist(&clp->lc_deleg);
252			nfsrv_freedeleglist(&clp->lc_olddeleg);
253			zapit = 1;
254		}
255		/*
256		 * Add it after assigning a client id to it.
257		 */
258		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
259		if ((nd->nd_flag & ND_NFSV41) != 0)
260			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
261			    ++confirm_index;
262		else
263			confirmp->qval = new_clp->lc_confirm.qval =
264			    ++confirm_index;
265		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
266		    (u_int32_t)nfsrvboottime;
267		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
268		    nfsrv_nextclientindex();
269		new_clp->lc_stateindex = 0;
270		new_clp->lc_statemaxindex = 0;
271		new_clp->lc_cbref = 0;
272		new_clp->lc_expiry = nfsrv_leaseexpiry();
273		LIST_INIT(&new_clp->lc_open);
274		LIST_INIT(&new_clp->lc_deleg);
275		LIST_INIT(&new_clp->lc_olddeleg);
276		LIST_INIT(&new_clp->lc_session);
277		for (i = 0; i < nfsrv_statehashsize; i++)
278			LIST_INIT(&new_clp->lc_stateid[i]);
279		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
280		    lc_hash);
281		newnfsstats.srvclients++;
282		nfsrv_openpluslock++;
283		nfsrv_clients++;
284		NFSLOCKV4ROOTMUTEX();
285		nfsv4_unlock(&nfsv4rootfs_lock, 1);
286		NFSUNLOCKV4ROOTMUTEX();
287		if (zapit)
288			nfsrv_zapclient(clp, p);
289		*new_clpp = NULL;
290		goto out;
291	}
292
293	/*
294	 * Now, handle the cases where the id is already issued.
295	 */
296	if (nfsrv_notsamecredname(nd, clp)) {
297	    /*
298	     * Check to see if there is expired state that should go away.
299	     */
300	    if (clp->lc_expiry < NFSD_MONOSEC &&
301	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
302		nfsrv_cleanclient(clp, p);
303		nfsrv_freedeleglist(&clp->lc_deleg);
304	    }
305
306	    /*
307	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
308	     * RFC3530 Sec. 8.1.2 last para.
309	     */
310	    if (!LIST_EMPTY(&clp->lc_deleg)) {
311		hasstate = 1;
312	    } else if (LIST_EMPTY(&clp->lc_open)) {
313		hasstate = 0;
314	    } else {
315		hasstate = 0;
316		/* Look for an Open on the OpenOwner */
317		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
318		    if (!LIST_EMPTY(&stp->ls_open)) {
319			hasstate = 1;
320			break;
321		    }
322		}
323	    }
324	    if (hasstate) {
325		/*
326		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
327		 * filling out the correct ipaddr and portnum.
328		 */
329		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
330		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
331		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
332		sad->sin_port = rad->sin_port;
333		NFSLOCKV4ROOTMUTEX();
334		nfsv4_unlock(&nfsv4rootfs_lock, 1);
335		NFSUNLOCKV4ROOTMUTEX();
336		error = NFSERR_CLIDINUSE;
337		goto out;
338	    }
339	}
340
341	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
342		/*
343		 * If the verifier has changed, the client has rebooted
344		 * and a new client id is issued. The old state info
345		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
346		 */
347		LIST_REMOVE(clp, lc_hash);
348		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
349		if ((nd->nd_flag & ND_NFSV41) != 0)
350			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
351			    ++confirm_index;
352		else
353			confirmp->qval = new_clp->lc_confirm.qval =
354			    ++confirm_index;
355		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
356		    nfsrvboottime;
357		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
358		    nfsrv_nextclientindex();
359		new_clp->lc_stateindex = 0;
360		new_clp->lc_statemaxindex = 0;
361		new_clp->lc_cbref = 0;
362		new_clp->lc_expiry = nfsrv_leaseexpiry();
363
364		/*
365		 * Save the state until confirmed.
366		 */
367		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
368		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
369			tstp->ls_clp = new_clp;
370		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
371		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
372			tstp->ls_clp = new_clp;
373		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
374		    ls_list);
375		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
376			tstp->ls_clp = new_clp;
377		for (i = 0; i < nfsrv_statehashsize; i++) {
378			LIST_NEWHEAD(&new_clp->lc_stateid[i],
379			    &clp->lc_stateid[i], ls_hash);
380			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
381				tstp->ls_clp = new_clp;
382		}
383		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
384		    lc_hash);
385		newnfsstats.srvclients++;
386		nfsrv_openpluslock++;
387		nfsrv_clients++;
388		NFSLOCKV4ROOTMUTEX();
389		nfsv4_unlock(&nfsv4rootfs_lock, 1);
390		NFSUNLOCKV4ROOTMUTEX();
391
392		/*
393		 * Must wait until any outstanding callback on the old clp
394		 * completes.
395		 */
396		NFSLOCKSTATE();
397		while (clp->lc_cbref) {
398			clp->lc_flags |= LCL_WAKEUPWANTED;
399			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
400			    "nfsd clp", 10 * hz);
401		}
402		NFSUNLOCKSTATE();
403		nfsrv_zapclient(clp, p);
404		*new_clpp = NULL;
405		goto out;
406	}
407
408	/* For NFSv4.1, mark that we found a confirmed clientid. */
409	if ((nd->nd_flag & ND_NFSV41) != 0) {
410		clientidp->lval[0] = clp->lc_clientid.lval[0];
411		clientidp->lval[1] = clp->lc_clientid.lval[1];
412		confirmp->lval[0] = 0;	/* Ignored by client */
413		confirmp->lval[1] = 1;
414	} else {
415		/*
416		 * id and verifier match, so update the net address info
417		 * and get rid of any existing callback authentication
418		 * handle, so a new one will be acquired.
419		 */
420		LIST_REMOVE(clp, lc_hash);
421		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
422		new_clp->lc_expiry = nfsrv_leaseexpiry();
423		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
424		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
425		    clp->lc_clientid.lval[0];
426		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
427		    clp->lc_clientid.lval[1];
428		new_clp->lc_delegtime = clp->lc_delegtime;
429		new_clp->lc_stateindex = clp->lc_stateindex;
430		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
431		new_clp->lc_cbref = 0;
432		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
433		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
434			tstp->ls_clp = new_clp;
435		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
436		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
437			tstp->ls_clp = new_clp;
438		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
439		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
440			tstp->ls_clp = new_clp;
441		for (i = 0; i < nfsrv_statehashsize; i++) {
442			LIST_NEWHEAD(&new_clp->lc_stateid[i],
443			    &clp->lc_stateid[i], ls_hash);
444			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
445				tstp->ls_clp = new_clp;
446		}
447		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
448		    lc_hash);
449		newnfsstats.srvclients++;
450		nfsrv_openpluslock++;
451		nfsrv_clients++;
452	}
453	NFSLOCKV4ROOTMUTEX();
454	nfsv4_unlock(&nfsv4rootfs_lock, 1);
455	NFSUNLOCKV4ROOTMUTEX();
456
457	if ((nd->nd_flag & ND_NFSV41) == 0) {
458		/*
459		 * Must wait until any outstanding callback on the old clp
460		 * completes.
461		 */
462		NFSLOCKSTATE();
463		while (clp->lc_cbref) {
464			clp->lc_flags |= LCL_WAKEUPWANTED;
465			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
466			    "nfsdclp", 10 * hz);
467		}
468		NFSUNLOCKSTATE();
469		nfsrv_zapclient(clp, p);
470		*new_clpp = NULL;
471	}
472
473out:
474	NFSEXITCODE2(error, nd);
475	return (error);
476}
477
478/*
479 * Check to see if the client id exists and optionally confirm it.
480 */
481APPLESTATIC int
482nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
483    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
484    struct nfsrv_descript *nd, NFSPROC_T *p)
485{
486	struct nfsclient *clp;
487	struct nfsstate *stp;
488	int i;
489	struct nfsclienthashhead *hp;
490	int error = 0, igotlock, doneok;
491	struct nfssessionhash *shp;
492	struct nfsdsession *sep;
493	uint64_t sessid[2];
494	static uint64_t next_sess = 0;
495
496	if (clpp)
497		*clpp = NULL;
498	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
499	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
500		error = NFSERR_STALECLIENTID;
501		goto out;
502	}
503
504	/*
505	 * If called with opflags == CLOPS_RENEW, the State Lock is
506	 * already held. Otherwise, we need to get either that or,
507	 * for the case of Confirm, lock out the nfsd threads.
508	 */
509	if (opflags & CLOPS_CONFIRM) {
510		NFSLOCKV4ROOTMUTEX();
511		nfsv4_relref(&nfsv4rootfs_lock);
512		do {
513			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
514			    NFSV4ROOTLOCKMUTEXPTR, NULL);
515		} while (!igotlock);
516		/*
517		 * Create a new sessionid here, since we need to do it where
518		 * there is a mutex held to serialize update of next_sess.
519		 */
520		if ((nd->nd_flag & ND_NFSV41) != 0) {
521			sessid[0] = ++next_sess;
522			sessid[1] = clientid.qval;
523		}
524		NFSUNLOCKV4ROOTMUTEX();
525	} else if (opflags != CLOPS_RENEW) {
526		NFSLOCKSTATE();
527	}
528
529	/* For NFSv4.1, the clp is acquired from the associated session. */
530	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
531	    opflags == CLOPS_RENEW) {
532		clp = NULL;
533		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
534			shp = NFSSESSIONHASH(nd->nd_sessionid);
535			NFSLOCKSESSION(shp);
536			sep = nfsrv_findsession(nd->nd_sessionid);
537			if (sep != NULL)
538				clp = sep->sess_clp;
539			NFSUNLOCKSESSION(shp);
540		}
541	} else {
542		hp = NFSCLIENTHASH(clientid);
543		LIST_FOREACH(clp, hp, lc_hash) {
544			if (clp->lc_clientid.lval[1] == clientid.lval[1])
545				break;
546		}
547	}
548	if (clp == NULL) {
549		if (opflags & CLOPS_CONFIRM)
550			error = NFSERR_STALECLIENTID;
551		else
552			error = NFSERR_EXPIRED;
553	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
554		/*
555		 * If marked admin revoked, just return the error.
556		 */
557		error = NFSERR_ADMINREVOKED;
558	}
559	if (error) {
560		if (opflags & CLOPS_CONFIRM) {
561			NFSLOCKV4ROOTMUTEX();
562			nfsv4_unlock(&nfsv4rootfs_lock, 1);
563			NFSUNLOCKV4ROOTMUTEX();
564		} else if (opflags != CLOPS_RENEW) {
565			NFSUNLOCKSTATE();
566		}
567		goto out;
568	}
569
570	/*
571	 * Perform any operations specified by the opflags.
572	 */
573	if (opflags & CLOPS_CONFIRM) {
574		if (((nd->nd_flag & ND_NFSV41) != 0 &&
575		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
576		    ((nd->nd_flag & ND_NFSV41) == 0 &&
577		     clp->lc_confirm.qval != confirm.qval))
578			error = NFSERR_STALECLIENTID;
579		else if (nfsrv_notsamecredname(nd, clp))
580			error = NFSERR_CLIDINUSE;
581
582		if (!error) {
583		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
584			LCL_NEEDSCONFIRM) {
585			/*
586			 * Hang onto the delegations (as old delegations)
587			 * for an Open with CLAIM_DELEGATE_PREV unless in
588			 * grace, but get rid of the rest of the state.
589			 */
590			nfsrv_cleanclient(clp, p);
591			nfsrv_freedeleglist(&clp->lc_olddeleg);
592			if (nfsrv_checkgrace(nd, clp, 0)) {
593			    /* In grace, so just delete delegations */
594			    nfsrv_freedeleglist(&clp->lc_deleg);
595			} else {
596			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
597				stp->ls_flags |= NFSLCK_OLDDELEG;
598			    clp->lc_delegtime = NFSD_MONOSEC +
599				nfsrv_lease + NFSRV_LEASEDELTA;
600			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
601				ls_list);
602			}
603			if ((nd->nd_flag & ND_NFSV41) != 0)
604			    clp->lc_program = cbprogram;
605		    }
606		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
607		    if (clp->lc_program)
608			clp->lc_flags |= LCL_NEEDSCBNULL;
609		    /* For NFSv4.1, link the session onto the client. */
610		    if (nsep != NULL) {
611			/* Hold a reference on the xprt for a backchannel. */
612			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
613			    != 0 && clp->lc_req.nr_client == NULL) {
614			    clp->lc_req.nr_client = (struct __rpc_client *)
615				clnt_bck_create(nd->nd_xprt->xp_socket,
616				cbprogram, NFSV4_CBVERS);
617			    if (clp->lc_req.nr_client != NULL) {
618				SVC_ACQUIRE(nd->nd_xprt);
619				nd->nd_xprt->xp_p2 =
620				    clp->lc_req.nr_client->cl_private;
621				/* Disable idle timeout. */
622				nd->nd_xprt->xp_idletimeout = 0;
623				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
624			    } else
625				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
626			}
627			NFSBCOPY(sessid, nsep->sess_sessionid,
628			    NFSX_V4SESSIONID);
629			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
630			    NFSX_V4SESSIONID);
631			shp = NFSSESSIONHASH(nsep->sess_sessionid);
632			NFSLOCKSTATE();
633			NFSLOCKSESSION(shp);
634			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
635			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
636			nsep->sess_clp = clp;
637			NFSUNLOCKSESSION(shp);
638			NFSUNLOCKSTATE();
639		    }
640		}
641	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
642		error = NFSERR_EXPIRED;
643	}
644
645	/*
646	 * If called by the Renew Op, we must check the principal.
647	 */
648	if (!error && (opflags & CLOPS_RENEWOP)) {
649	    if (nfsrv_notsamecredname(nd, clp)) {
650		doneok = 0;
651		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
652		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
653			if ((stp->ls_flags & NFSLCK_OPEN) &&
654			    stp->ls_uid == nd->nd_cred->cr_uid) {
655				doneok = 1;
656				break;
657			}
658		    }
659		}
660		if (!doneok)
661			error = NFSERR_ACCES;
662	    }
663	    if (!error && (clp->lc_flags & LCL_CBDOWN))
664		error = NFSERR_CBPATHDOWN;
665	}
666	if ((!error || error == NFSERR_CBPATHDOWN) &&
667	     (opflags & CLOPS_RENEW)) {
668		clp->lc_expiry = nfsrv_leaseexpiry();
669	}
670	if (opflags & CLOPS_CONFIRM) {
671		NFSLOCKV4ROOTMUTEX();
672		nfsv4_unlock(&nfsv4rootfs_lock, 1);
673		NFSUNLOCKV4ROOTMUTEX();
674	} else if (opflags != CLOPS_RENEW) {
675		NFSUNLOCKSTATE();
676	}
677	if (clpp)
678		*clpp = clp;
679
680out:
681	NFSEXITCODE2(error, nd);
682	return (error);
683}
684
685/*
686 * Perform the NFSv4.1 destroy clientid.
687 */
688int
689nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
690{
691	struct nfsclient *clp;
692	struct nfsclienthashhead *hp;
693	int error = 0, i, igotlock;
694
695	if (nfsrvboottime != clientid.lval[0]) {
696		error = NFSERR_STALECLIENTID;
697		goto out;
698	}
699
700	/* Lock out other nfsd threads */
701	NFSLOCKV4ROOTMUTEX();
702	nfsv4_relref(&nfsv4rootfs_lock);
703	do {
704		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
705		    NFSV4ROOTLOCKMUTEXPTR, NULL);
706	} while (igotlock == 0);
707	NFSUNLOCKV4ROOTMUTEX();
708
709	hp = NFSCLIENTHASH(clientid);
710	LIST_FOREACH(clp, hp, lc_hash) {
711		if (clp->lc_clientid.lval[1] == clientid.lval[1])
712			break;
713	}
714	if (clp == NULL) {
715		NFSLOCKV4ROOTMUTEX();
716		nfsv4_unlock(&nfsv4rootfs_lock, 1);
717		NFSUNLOCKV4ROOTMUTEX();
718		/* Just return ok, since it is gone. */
719		goto out;
720	}
721
722	/* Scan for state on the clientid. */
723	for (i = 0; i < nfsrv_statehashsize; i++)
724		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
725			NFSLOCKV4ROOTMUTEX();
726			nfsv4_unlock(&nfsv4rootfs_lock, 1);
727			NFSUNLOCKV4ROOTMUTEX();
728			error = NFSERR_CLIENTIDBUSY;
729			goto out;
730		}
731	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
732		NFSLOCKV4ROOTMUTEX();
733		nfsv4_unlock(&nfsv4rootfs_lock, 1);
734		NFSUNLOCKV4ROOTMUTEX();
735		error = NFSERR_CLIENTIDBUSY;
736		goto out;
737	}
738
739	/* Destroy the clientid and return ok. */
740	nfsrv_cleanclient(clp, p);
741	nfsrv_freedeleglist(&clp->lc_deleg);
742	nfsrv_freedeleglist(&clp->lc_olddeleg);
743	LIST_REMOVE(clp, lc_hash);
744	NFSLOCKV4ROOTMUTEX();
745	nfsv4_unlock(&nfsv4rootfs_lock, 1);
746	NFSUNLOCKV4ROOTMUTEX();
747	nfsrv_zapclient(clp, p);
748out:
749	NFSEXITCODE2(error, nd);
750	return (error);
751}
752
753/*
754 * Called from the new nfssvc syscall to admin revoke a clientid.
755 * Returns 0 for success, error otherwise.
756 */
757APPLESTATIC int
758nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
759{
760	struct nfsclient *clp = NULL;
761	int i, error = 0;
762	int gotit, igotlock;
763
764	/*
765	 * First, lock out the nfsd so that state won't change while the
766	 * revocation record is being written to the stable storage restart
767	 * file.
768	 */
769	NFSLOCKV4ROOTMUTEX();
770	do {
771		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
772		    NFSV4ROOTLOCKMUTEXPTR, NULL);
773	} while (!igotlock);
774	NFSUNLOCKV4ROOTMUTEX();
775
776	/*
777	 * Search for a match in the client list.
778	 */
779	gotit = i = 0;
780	while (i < nfsrv_clienthashsize && !gotit) {
781	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
782		if (revokep->nclid_idlen == clp->lc_idlen &&
783		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
784			gotit = 1;
785			break;
786		}
787	    }
788	    i++;
789	}
790	if (!gotit) {
791		NFSLOCKV4ROOTMUTEX();
792		nfsv4_unlock(&nfsv4rootfs_lock, 0);
793		NFSUNLOCKV4ROOTMUTEX();
794		error = EPERM;
795		goto out;
796	}
797
798	/*
799	 * Now, write out the revocation record
800	 */
801	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
802	nfsrv_backupstable();
803
804	/*
805	 * and clear out the state, marking the clientid revoked.
806	 */
807	clp->lc_flags &= ~LCL_CALLBACKSON;
808	clp->lc_flags |= LCL_ADMINREVOKED;
809	nfsrv_cleanclient(clp, p);
810	nfsrv_freedeleglist(&clp->lc_deleg);
811	nfsrv_freedeleglist(&clp->lc_olddeleg);
812	NFSLOCKV4ROOTMUTEX();
813	nfsv4_unlock(&nfsv4rootfs_lock, 0);
814	NFSUNLOCKV4ROOTMUTEX();
815
816out:
817	NFSEXITCODE(error);
818	return (error);
819}
820
821/*
822 * Dump out stats for all clients. Called from nfssvc(2), that is used
823 * newnfsstats.
824 */
825APPLESTATIC void
826nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
827{
828	struct nfsclient *clp;
829	int i = 0, cnt = 0;
830
831	/*
832	 * First, get a reference on the nfsv4rootfs_lock so that an
833	 * exclusive lock cannot be acquired while dumping the clients.
834	 */
835	NFSLOCKV4ROOTMUTEX();
836	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
837	NFSUNLOCKV4ROOTMUTEX();
838	NFSLOCKSTATE();
839	/*
840	 * Rattle through the client lists until done.
841	 */
842	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
843	    clp = LIST_FIRST(&nfsclienthash[i]);
844	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
845		nfsrv_dumpaclient(clp, &dumpp[cnt]);
846		cnt++;
847		clp = LIST_NEXT(clp, lc_hash);
848	    }
849	    i++;
850	}
851	if (cnt < maxcnt)
852	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
853	NFSUNLOCKSTATE();
854	NFSLOCKV4ROOTMUTEX();
855	nfsv4_relref(&nfsv4rootfs_lock);
856	NFSUNLOCKV4ROOTMUTEX();
857}
858
859/*
860 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
861 */
862static void
863nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
864{
865	struct nfsstate *stp, *openstp, *lckownstp;
866	struct nfslock *lop;
867	struct sockaddr *sad;
868	struct sockaddr_in *rad;
869	struct sockaddr_in6 *rad6;
870
871	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
872	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
873	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
874	dumpp->ndcl_flags = clp->lc_flags;
875	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
876	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
877	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
878	dumpp->ndcl_addrfam = sad->sa_family;
879	if (sad->sa_family == AF_INET) {
880		rad = (struct sockaddr_in *)sad;
881		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
882	} else {
883		rad6 = (struct sockaddr_in6 *)sad;
884		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
885	}
886
887	/*
888	 * Now, scan the state lists and total up the opens and locks.
889	 */
890	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
891	    dumpp->ndcl_nopenowners++;
892	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
893		dumpp->ndcl_nopens++;
894		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
895		    dumpp->ndcl_nlockowners++;
896		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
897			dumpp->ndcl_nlocks++;
898		    }
899		}
900	    }
901	}
902
903	/*
904	 * and the delegation lists.
905	 */
906	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
907	    dumpp->ndcl_ndelegs++;
908	}
909	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
910	    dumpp->ndcl_nolddelegs++;
911	}
912}
913
914/*
915 * Dump out lock stats for a file.
916 */
917APPLESTATIC void
918nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
919    NFSPROC_T *p)
920{
921	struct nfsstate *stp;
922	struct nfslock *lop;
923	int cnt = 0;
924	struct nfslockfile *lfp;
925	struct sockaddr *sad;
926	struct sockaddr_in *rad;
927	struct sockaddr_in6 *rad6;
928	int ret;
929	fhandle_t nfh;
930
931	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
932	/*
933	 * First, get a reference on the nfsv4rootfs_lock so that an
934	 * exclusive lock on it cannot be acquired while dumping the locks.
935	 */
936	NFSLOCKV4ROOTMUTEX();
937	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
938	NFSUNLOCKV4ROOTMUTEX();
939	NFSLOCKSTATE();
940	if (!ret)
941		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
942	if (ret) {
943		ldumpp[0].ndlck_clid.nclid_idlen = 0;
944		NFSUNLOCKSTATE();
945		NFSLOCKV4ROOTMUTEX();
946		nfsv4_relref(&nfsv4rootfs_lock);
947		NFSUNLOCKV4ROOTMUTEX();
948		return;
949	}
950
951	/*
952	 * For each open share on file, dump it out.
953	 */
954	stp = LIST_FIRST(&lfp->lf_open);
955	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
956		ldumpp[cnt].ndlck_flags = stp->ls_flags;
957		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
958		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
959		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
960		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
961		ldumpp[cnt].ndlck_owner.nclid_idlen =
962		    stp->ls_openowner->ls_ownerlen;
963		NFSBCOPY(stp->ls_openowner->ls_owner,
964		    ldumpp[cnt].ndlck_owner.nclid_id,
965		    stp->ls_openowner->ls_ownerlen);
966		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
967		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
968		    stp->ls_clp->lc_idlen);
969		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
970		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
971		if (sad->sa_family == AF_INET) {
972			rad = (struct sockaddr_in *)sad;
973			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
974		} else {
975			rad6 = (struct sockaddr_in6 *)sad;
976			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
977		}
978		stp = LIST_NEXT(stp, ls_file);
979		cnt++;
980	}
981
982	/*
983	 * and all locks.
984	 */
985	lop = LIST_FIRST(&lfp->lf_lock);
986	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
987		stp = lop->lo_stp;
988		ldumpp[cnt].ndlck_flags = lop->lo_flags;
989		ldumpp[cnt].ndlck_first = lop->lo_first;
990		ldumpp[cnt].ndlck_end = lop->lo_end;
991		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
992		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
993		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
994		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
995		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
996		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
997		    stp->ls_ownerlen);
998		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
999		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1000		    stp->ls_clp->lc_idlen);
1001		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1002		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1003		if (sad->sa_family == AF_INET) {
1004			rad = (struct sockaddr_in *)sad;
1005			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1006		} else {
1007			rad6 = (struct sockaddr_in6 *)sad;
1008			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1009		}
1010		lop = LIST_NEXT(lop, lo_lckfile);
1011		cnt++;
1012	}
1013
1014	/*
1015	 * and the delegations.
1016	 */
1017	stp = LIST_FIRST(&lfp->lf_deleg);
1018	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1019		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1020		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1021		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1022		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1023		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1024		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1025		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1026		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1027		    stp->ls_clp->lc_idlen);
1028		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1029		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1030		if (sad->sa_family == AF_INET) {
1031			rad = (struct sockaddr_in *)sad;
1032			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1033		} else {
1034			rad6 = (struct sockaddr_in6 *)sad;
1035			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1036		}
1037		stp = LIST_NEXT(stp, ls_file);
1038		cnt++;
1039	}
1040
1041	/*
1042	 * If list isn't full, mark end of list by setting the client name
1043	 * to zero length.
1044	 */
1045	if (cnt < maxcnt)
1046		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1047	NFSUNLOCKSTATE();
1048	NFSLOCKV4ROOTMUTEX();
1049	nfsv4_relref(&nfsv4rootfs_lock);
1050	NFSUNLOCKV4ROOTMUTEX();
1051}
1052
1053/*
1054 * Server timer routine. It can scan any linked list, so long
1055 * as it holds the spin/mutex lock and there is no exclusive lock on
1056 * nfsv4rootfs_lock.
1057 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1058 *  to do this from a callout, since the spin locks work. For
1059 *  Darwin, I'm not sure what will work correctly yet.)
1060 * Should be called once per second.
1061 */
1062APPLESTATIC void
1063nfsrv_servertimer(void)
1064{
1065	struct nfsclient *clp, *nclp;
1066	struct nfsstate *stp, *nstp;
1067	int got_ref, i;
1068
1069	/*
1070	 * Make sure nfsboottime is set. This is used by V3 as well
1071	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1072	 * only used by the V4 server for leases.
1073	 */
1074	if (nfsboottime.tv_sec == 0)
1075		NFSSETBOOTTIME(nfsboottime);
1076
1077	/*
1078	 * If server hasn't started yet, just return.
1079	 */
1080	NFSLOCKSTATE();
1081	if (nfsrv_stablefirst.nsf_eograce == 0) {
1082		NFSUNLOCKSTATE();
1083		return;
1084	}
1085	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1086		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1087		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1088			nfsrv_stablefirst.nsf_flags |=
1089			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1090		NFSUNLOCKSTATE();
1091		return;
1092	}
1093
1094	/*
1095	 * Try and get a reference count on the nfsv4rootfs_lock so that
1096	 * no nfsd thread can acquire an exclusive lock on it before this
1097	 * call is done. If it is already exclusively locked, just return.
1098	 */
1099	NFSLOCKV4ROOTMUTEX();
1100	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1101	NFSUNLOCKV4ROOTMUTEX();
1102	if (got_ref == 0) {
1103		NFSUNLOCKSTATE();
1104		return;
1105	}
1106
1107	/*
1108	 * For each client...
1109	 */
1110	for (i = 0; i < nfsrv_clienthashsize; i++) {
1111	    clp = LIST_FIRST(&nfsclienthash[i]);
1112	    while (clp != LIST_END(&nfsclienthash[i])) {
1113		nclp = LIST_NEXT(clp, lc_hash);
1114		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1115		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1116			 && ((LIST_EMPTY(&clp->lc_deleg)
1117			      && LIST_EMPTY(&clp->lc_open)) ||
1118			     nfsrv_clients > nfsrv_clienthighwater)) ||
1119			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1120			(clp->lc_expiry < NFSD_MONOSEC &&
1121			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1122			/*
1123			 * Lease has expired several nfsrv_lease times ago:
1124			 * PLUS
1125			 *    - no state is associated with it
1126			 *    OR
1127			 *    - above high water mark for number of clients
1128			 *      (nfsrv_clienthighwater should be large enough
1129			 *       that this only occurs when clients fail to
1130			 *       use the same nfs_client_id4.id. Maybe somewhat
1131			 *       higher that the maximum number of clients that
1132			 *       will mount this server?)
1133			 * OR
1134			 * Lease has expired a very long time ago
1135			 * OR
1136			 * Lease has expired PLUS the number of opens + locks
1137			 * has exceeded 90% of capacity
1138			 *
1139			 * --> Mark for expiry. The actual expiry will be done
1140			 *     by an nfsd sometime soon.
1141			 */
1142			clp->lc_flags |= LCL_EXPIREIT;
1143			nfsrv_stablefirst.nsf_flags |=
1144			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1145		    } else {
1146			/*
1147			 * If there are no opens, increment no open tick cnt
1148			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1149			 * otherwise, if there is an open, reset no open time
1150			 * Hopefully, this will avoid excessive re-creation
1151			 * of open owners and subsequent open confirms.
1152			 */
1153			stp = LIST_FIRST(&clp->lc_open);
1154			while (stp != LIST_END(&clp->lc_open)) {
1155				nstp = LIST_NEXT(stp, ls_list);
1156				if (LIST_EMPTY(&stp->ls_open)) {
1157					stp->ls_noopens++;
1158					if (stp->ls_noopens > NFSNOOPEN ||
1159					    (nfsrv_openpluslock * 2) >
1160					    nfsrv_v4statelimit)
1161						nfsrv_stablefirst.nsf_flags |=
1162							NFSNSF_NOOPENS;
1163				} else {
1164					stp->ls_noopens = 0;
1165				}
1166				stp = nstp;
1167			}
1168		    }
1169		}
1170		clp = nclp;
1171	    }
1172	}
1173	NFSUNLOCKSTATE();
1174	NFSLOCKV4ROOTMUTEX();
1175	nfsv4_relref(&nfsv4rootfs_lock);
1176	NFSUNLOCKV4ROOTMUTEX();
1177}
1178
1179/*
1180 * The following set of functions free up the various data structures.
1181 */
1182/*
1183 * Clear out all open/lock state related to this nfsclient.
1184 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1185 * there are no other active nfsd threads.
1186 */
1187APPLESTATIC void
1188nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1189{
1190	struct nfsstate *stp, *nstp;
1191	struct nfsdsession *sep, *nsep;
1192
1193	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1194		nfsrv_freeopenowner(stp, 1, p);
1195	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1196		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1197			(void)nfsrv_freesession(sep, NULL);
1198}
1199
1200/*
1201 * Free a client that has been cleaned. It should also already have been
1202 * removed from the lists.
1203 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1204 *  softclock interrupts are enabled.)
1205 */
1206APPLESTATIC void
1207nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1208{
1209
1210#ifdef notyet
1211	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1212	     (LCL_GSS | LCL_CALLBACKSON) &&
1213	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1214	    clp->lc_handlelen > 0) {
1215		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1216		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1217		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1218			NULL, 0, NULL, NULL, NULL, p);
1219	}
1220#endif
1221	newnfs_disconnect(&clp->lc_req);
1222	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1223	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1224	free(clp->lc_stateid, M_NFSDCLIENT);
1225	free(clp, M_NFSDCLIENT);
1226	NFSLOCKSTATE();
1227	newnfsstats.srvclients--;
1228	nfsrv_openpluslock--;
1229	nfsrv_clients--;
1230	NFSUNLOCKSTATE();
1231}
1232
1233/*
1234 * Free a list of delegation state structures.
1235 * (This function will also free all nfslockfile structures that no
1236 *  longer have associated state.)
1237 */
1238APPLESTATIC void
1239nfsrv_freedeleglist(struct nfsstatehead *sthp)
1240{
1241	struct nfsstate *stp, *nstp;
1242
1243	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1244		nfsrv_freedeleg(stp);
1245	}
1246	LIST_INIT(sthp);
1247}
1248
1249/*
1250 * Free up a delegation.
1251 */
1252static void
1253nfsrv_freedeleg(struct nfsstate *stp)
1254{
1255	struct nfslockfile *lfp;
1256
1257	LIST_REMOVE(stp, ls_hash);
1258	LIST_REMOVE(stp, ls_list);
1259	LIST_REMOVE(stp, ls_file);
1260	lfp = stp->ls_lfp;
1261	if (LIST_EMPTY(&lfp->lf_open) &&
1262	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1263	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1264	    lfp->lf_usecount == 0 &&
1265	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1266		nfsrv_freenfslockfile(lfp);
1267	FREE((caddr_t)stp, M_NFSDSTATE);
1268	newnfsstats.srvdelegates--;
1269	nfsrv_openpluslock--;
1270	nfsrv_delegatecnt--;
1271}
1272
1273/*
1274 * This function frees an open owner and all associated opens.
1275 */
1276static void
1277nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1278{
1279	struct nfsstate *nstp, *tstp;
1280
1281	LIST_REMOVE(stp, ls_list);
1282	/*
1283	 * Now, free all associated opens.
1284	 */
1285	nstp = LIST_FIRST(&stp->ls_open);
1286	while (nstp != LIST_END(&stp->ls_open)) {
1287		tstp = nstp;
1288		nstp = LIST_NEXT(nstp, ls_list);
1289		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1290	}
1291	if (stp->ls_op)
1292		nfsrvd_derefcache(stp->ls_op);
1293	FREE((caddr_t)stp, M_NFSDSTATE);
1294	newnfsstats.srvopenowners--;
1295	nfsrv_openpluslock--;
1296}
1297
1298/*
1299 * This function frees an open (nfsstate open structure) with all associated
1300 * lock_owners and locks. It also frees the nfslockfile structure iff there
1301 * are no other opens on the file.
1302 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1303 */
1304static int
1305nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1306{
1307	struct nfsstate *nstp, *tstp;
1308	struct nfslockfile *lfp;
1309	int ret;
1310
1311	LIST_REMOVE(stp, ls_hash);
1312	LIST_REMOVE(stp, ls_list);
1313	LIST_REMOVE(stp, ls_file);
1314
1315	lfp = stp->ls_lfp;
1316	/*
1317	 * Now, free all lockowners associated with this open.
1318	 */
1319	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1320		nfsrv_freelockowner(tstp, vp, cansleep, p);
1321
1322	/*
1323	 * The nfslockfile is freed here if there are no locks
1324	 * associated with the open.
1325	 * If there are locks associated with the open, the
1326	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1327	 * Acquire the state mutex to avoid races with calls to
1328	 * nfsrv_getlockfile().
1329	 */
1330	if (cansleep != 0)
1331		NFSLOCKSTATE();
1332	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1333	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1334	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1335	    lfp->lf_usecount == 0 &&
1336	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1337		nfsrv_freenfslockfile(lfp);
1338		ret = 1;
1339	} else
1340		ret = 0;
1341	if (cansleep != 0)
1342		NFSUNLOCKSTATE();
1343	FREE((caddr_t)stp, M_NFSDSTATE);
1344	newnfsstats.srvopens--;
1345	nfsrv_openpluslock--;
1346	return (ret);
1347}
1348
1349/*
1350 * Frees a lockowner and all associated locks.
1351 */
1352static void
1353nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1354    NFSPROC_T *p)
1355{
1356
1357	LIST_REMOVE(stp, ls_hash);
1358	LIST_REMOVE(stp, ls_list);
1359	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1360	if (stp->ls_op)
1361		nfsrvd_derefcache(stp->ls_op);
1362	FREE((caddr_t)stp, M_NFSDSTATE);
1363	newnfsstats.srvlockowners--;
1364	nfsrv_openpluslock--;
1365}
1366
1367/*
1368 * Free all the nfs locks on a lockowner.
1369 */
1370static void
1371nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1372    NFSPROC_T *p)
1373{
1374	struct nfslock *lop, *nlop;
1375	struct nfsrollback *rlp, *nrlp;
1376	struct nfslockfile *lfp = NULL;
1377	int gottvp = 0;
1378	vnode_t tvp = NULL;
1379	uint64_t first, end;
1380
1381	if (vp != NULL)
1382		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1383	lop = LIST_FIRST(&stp->ls_lock);
1384	while (lop != LIST_END(&stp->ls_lock)) {
1385		nlop = LIST_NEXT(lop, lo_lckowner);
1386		/*
1387		 * Since all locks should be for the same file, lfp should
1388		 * not change.
1389		 */
1390		if (lfp == NULL)
1391			lfp = lop->lo_lfp;
1392		else if (lfp != lop->lo_lfp)
1393			panic("allnfslocks");
1394		/*
1395		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1396		 * from the file handle. This only occurs when called from
1397		 * nfsrv_cleanclient().
1398		 */
1399		if (gottvp == 0) {
1400			if (nfsrv_dolocallocks == 0)
1401				tvp = NULL;
1402			else if (vp == NULL && cansleep != 0) {
1403				tvp = nfsvno_getvp(&lfp->lf_fh);
1404				NFSVOPUNLOCK(tvp, 0);
1405			} else
1406				tvp = vp;
1407			gottvp = 1;
1408		}
1409
1410		if (tvp != NULL) {
1411			if (cansleep == 0)
1412				panic("allnfs2");
1413			first = lop->lo_first;
1414			end = lop->lo_end;
1415			nfsrv_freenfslock(lop);
1416			nfsrv_localunlock(tvp, lfp, first, end, p);
1417			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1418			    nrlp)
1419				free(rlp, M_NFSDROLLBACK);
1420			LIST_INIT(&lfp->lf_rollback);
1421		} else
1422			nfsrv_freenfslock(lop);
1423		lop = nlop;
1424	}
1425	if (vp == NULL && tvp != NULL)
1426		vrele(tvp);
1427}
1428
1429/*
1430 * Free an nfslock structure.
1431 */
1432static void
1433nfsrv_freenfslock(struct nfslock *lop)
1434{
1435
1436	if (lop->lo_lckfile.le_prev != NULL) {
1437		LIST_REMOVE(lop, lo_lckfile);
1438		newnfsstats.srvlocks--;
1439		nfsrv_openpluslock--;
1440	}
1441	LIST_REMOVE(lop, lo_lckowner);
1442	FREE((caddr_t)lop, M_NFSDLOCK);
1443}
1444
1445/*
1446 * This function frees an nfslockfile structure.
1447 */
1448static void
1449nfsrv_freenfslockfile(struct nfslockfile *lfp)
1450{
1451
1452	LIST_REMOVE(lfp, lf_hash);
1453	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1454}
1455
1456/*
1457 * This function looks up an nfsstate structure via stateid.
1458 */
1459static int
1460nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1461    struct nfsstate **stpp)
1462{
1463	struct nfsstate *stp;
1464	struct nfsstatehead *hp;
1465	int error = 0;
1466
1467	*stpp = NULL;
1468	hp = NFSSTATEHASH(clp, *stateidp);
1469	LIST_FOREACH(stp, hp, ls_hash) {
1470		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1471			NFSX_STATEIDOTHER))
1472			break;
1473	}
1474
1475	/*
1476	 * If no state id in list, return NFSERR_BADSTATEID.
1477	 */
1478	if (stp == LIST_END(hp)) {
1479		error = NFSERR_BADSTATEID;
1480		goto out;
1481	}
1482	*stpp = stp;
1483
1484out:
1485	NFSEXITCODE(error);
1486	return (error);
1487}
1488
1489/*
1490 * This function gets an nfsstate structure via owner string.
1491 */
1492static void
1493nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1494    struct nfsstate **stpp)
1495{
1496	struct nfsstate *stp;
1497
1498	*stpp = NULL;
1499	LIST_FOREACH(stp, hp, ls_list) {
1500		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1501		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1502			*stpp = stp;
1503			return;
1504		}
1505	}
1506}
1507
1508/*
1509 * Lock control function called to update lock status.
1510 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1511 * that one isn't to be created and an NFSERR_xxx for other errors.
1512 * The structures new_stp and new_lop are passed in as pointers that should
1513 * be set to NULL if the structure is used and shouldn't be free'd.
1514 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1515 * never used and can safely be allocated on the stack. For all other
1516 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1517 * in case they are used.
1518 */
1519APPLESTATIC int
1520nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1521    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1522    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1523    __unused struct nfsexstuff *exp,
1524    struct nfsrv_descript *nd, NFSPROC_T *p)
1525{
1526	struct nfslock *lop;
1527	struct nfsstate *new_stp = *new_stpp;
1528	struct nfslock *new_lop = *new_lopp;
1529	struct nfsstate *tstp, *mystp, *nstp;
1530	int specialid = 0;
1531	struct nfslockfile *lfp;
1532	struct nfslock *other_lop = NULL;
1533	struct nfsstate *stp, *lckstp = NULL;
1534	struct nfsclient *clp = NULL;
1535	u_int32_t bits;
1536	int error = 0, haslock = 0, ret, reterr;
1537	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1538	fhandle_t nfh;
1539	uint64_t first, end;
1540	uint32_t lock_flags;
1541
1542	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1543		/*
1544		 * Note the special cases of "all 1s" or "all 0s" stateids and
1545		 * let reads with all 1s go ahead.
1546		 */
1547		if (new_stp->ls_stateid.seqid == 0x0 &&
1548		    new_stp->ls_stateid.other[0] == 0x0 &&
1549		    new_stp->ls_stateid.other[1] == 0x0 &&
1550		    new_stp->ls_stateid.other[2] == 0x0)
1551			specialid = 1;
1552		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1553		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1554		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1555		    new_stp->ls_stateid.other[2] == 0xffffffff)
1556			specialid = 2;
1557	}
1558
1559	/*
1560	 * Check for restart conditions (client and server).
1561	 */
1562	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1563	    &new_stp->ls_stateid, specialid);
1564	if (error)
1565		goto out;
1566
1567	/*
1568	 * Check for state resource limit exceeded.
1569	 */
1570	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1571	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1572		error = NFSERR_RESOURCE;
1573		goto out;
1574	}
1575
1576	/*
1577	 * For the lock case, get another nfslock structure,
1578	 * just in case we need it.
1579	 * Malloc now, before we start sifting through the linked lists,
1580	 * in case we have to wait for memory.
1581	 */
1582tryagain:
1583	if (new_stp->ls_flags & NFSLCK_LOCK)
1584		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1585		    M_NFSDLOCK, M_WAITOK);
1586	filestruct_locked = 0;
1587	reterr = 0;
1588	lfp = NULL;
1589
1590	/*
1591	 * Get the lockfile structure for CFH now, so we can do a sanity
1592	 * check against the stateid, before incrementing the seqid#, since
1593	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1594	 * shouldn't be incremented for this case.
1595	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1596	 * will be handled later.
1597	 * If we are doing Lock/LockU and local locking is enabled, sleep
1598	 * lock the nfslockfile structure.
1599	 */
1600	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1601	NFSLOCKSTATE();
1602	if (getlckret == 0) {
1603		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1604		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1605			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1606			    &lfp, &nfh, 1);
1607			if (getlckret == 0)
1608				filestruct_locked = 1;
1609		} else
1610			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1611			    &lfp, &nfh, 0);
1612	}
1613	if (getlckret != 0 && getlckret != -1)
1614		reterr = getlckret;
1615
1616	if (filestruct_locked != 0) {
1617		LIST_INIT(&lfp->lf_rollback);
1618		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1619			/*
1620			 * For local locking, do the advisory locking now, so
1621			 * that any conflict can be detected. A failure later
1622			 * can be rolled back locally. If an error is returned,
1623			 * struct nfslockfile has been unlocked and any local
1624			 * locking rolled back.
1625			 */
1626			NFSUNLOCKSTATE();
1627			if (vnode_unlocked == 0) {
1628				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1629				vnode_unlocked = 1;
1630				NFSVOPUNLOCK(vp, 0);
1631			}
1632			reterr = nfsrv_locallock(vp, lfp,
1633			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1634			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1635			NFSLOCKSTATE();
1636		}
1637	}
1638
1639	if (specialid == 0) {
1640	    if (new_stp->ls_flags & NFSLCK_TEST) {
1641		/*
1642		 * RFC 3530 does not list LockT as an op that renews a
1643		 * lease, but the concensus seems to be that it is ok
1644		 * for a server to do so.
1645		 */
1646		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1647		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1648
1649		/*
1650		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1651		 * error returns for LockT, just go ahead and test for a lock,
1652		 * since there are no locks for this client, but other locks
1653		 * can conflict. (ie. same client will always be false)
1654		 */
1655		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1656		    error = 0;
1657		lckstp = new_stp;
1658	    } else {
1659	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1660		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1661	      if (error == 0)
1662		/*
1663		 * Look up the stateid
1664		 */
1665		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1666		  new_stp->ls_flags, &stp);
1667	      /*
1668	       * do some sanity checks for an unconfirmed open or a
1669	       * stateid that refers to the wrong file, for an open stateid
1670	       */
1671	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1672		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1673		   (getlckret == 0 && stp->ls_lfp != lfp)))
1674			error = NFSERR_BADSTATEID;
1675	      if (error == 0 &&
1676		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1677		  getlckret == 0 && stp->ls_lfp != lfp)
1678			error = NFSERR_BADSTATEID;
1679
1680	      /*
1681	       * If the lockowner stateid doesn't refer to the same file,
1682	       * I believe that is considered ok, since some clients will
1683	       * only create a single lockowner and use that for all locks
1684	       * on all files.
1685	       * For now, log it as a diagnostic, instead of considering it
1686	       * a BadStateid.
1687	       */
1688	      if (error == 0 && (stp->ls_flags &
1689		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1690		  getlckret == 0 && stp->ls_lfp != lfp) {
1691#ifdef DIAGNOSTIC
1692		  printf("Got a lock statid for different file open\n");
1693#endif
1694		  /*
1695		  error = NFSERR_BADSTATEID;
1696		  */
1697	      }
1698
1699	      if (error == 0) {
1700		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1701			/*
1702			 * If haslock set, we've already checked the seqid.
1703			 */
1704			if (!haslock) {
1705			    if (stp->ls_flags & NFSLCK_OPEN)
1706				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1707				    stp->ls_openowner, new_stp->ls_op);
1708			    else
1709				error = NFSERR_BADSTATEID;
1710			}
1711			if (!error)
1712			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1713			if (lckstp)
1714			    /*
1715			     * I believe this should be an error, but it
1716			     * isn't obvious what NFSERR_xxx would be
1717			     * appropriate, so I'll use NFSERR_INVAL for now.
1718			     */
1719			    error = NFSERR_INVAL;
1720			else
1721			    lckstp = new_stp;
1722		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1723			/*
1724			 * If haslock set, ditto above.
1725			 */
1726			if (!haslock) {
1727			    if (stp->ls_flags & NFSLCK_OPEN)
1728				error = NFSERR_BADSTATEID;
1729			    else
1730				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1731				    stp, new_stp->ls_op);
1732			}
1733			lckstp = stp;
1734		    } else {
1735			lckstp = stp;
1736		    }
1737	      }
1738	      /*
1739	       * If the seqid part of the stateid isn't the same, return
1740	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1741	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1742	       * nfsrv_returnoldstateid is set. (The concensus on the email
1743	       * list was that most clients would prefer to not receive
1744	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1745	       * is what will happen, so I use the nfsrv_returnoldstateid to
1746	       * allow for either server configuration.)
1747	       */
1748	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1749		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1750		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1751		    nfsrv_returnoldstateid)) ||
1752		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1753		    new_stp->ls_stateid.seqid != 0)))
1754		    error = NFSERR_OLDSTATEID;
1755	    }
1756	}
1757
1758	/*
1759	 * Now we can check for grace.
1760	 */
1761	if (!error)
1762		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1763	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1764		nfsrv_checkstable(clp))
1765		error = NFSERR_NOGRACE;
1766	/*
1767	 * If we successfully Reclaimed state, note that.
1768	 */
1769	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1770		nfsrv_markstable(clp);
1771
1772	/*
1773	 * At this point, either error == NFSERR_BADSTATEID or the
1774	 * seqid# has been updated, so we can return any error.
1775	 * If error == 0, there may be an error in:
1776	 *    nd_repstat - Set by the calling function.
1777	 *    reterr - Set above, if getting the nfslockfile structure
1778	 *       or acquiring the local lock failed.
1779	 *    (If both of these are set, nd_repstat should probably be
1780	 *     returned, since that error was detected before this
1781	 *     function call.)
1782	 */
1783	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1784		if (error == 0) {
1785			if (nd->nd_repstat != 0)
1786				error = nd->nd_repstat;
1787			else
1788				error = reterr;
1789		}
1790		if (filestruct_locked != 0) {
1791			/* Roll back local locks. */
1792			NFSUNLOCKSTATE();
1793			if (vnode_unlocked == 0) {
1794				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1795				vnode_unlocked = 1;
1796				NFSVOPUNLOCK(vp, 0);
1797			}
1798			nfsrv_locallock_rollback(vp, lfp, p);
1799			NFSLOCKSTATE();
1800			nfsrv_unlocklf(lfp);
1801		}
1802		NFSUNLOCKSTATE();
1803		goto out;
1804	}
1805
1806	/*
1807	 * Check the nfsrv_getlockfile return.
1808	 * Returned -1 if no structure found.
1809	 */
1810	if (getlckret == -1) {
1811		error = NFSERR_EXPIRED;
1812		/*
1813		 * Called from lockt, so no lock is OK.
1814		 */
1815		if (new_stp->ls_flags & NFSLCK_TEST) {
1816			error = 0;
1817		} else if (new_stp->ls_flags &
1818		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1819			/*
1820			 * Called to check for a lock, OK if the stateid is all
1821			 * 1s or all 0s, but there should be an nfsstate
1822			 * otherwise.
1823			 * (ie. If there is no open, I'll assume no share
1824			 *  deny bits.)
1825			 */
1826			if (specialid)
1827				error = 0;
1828			else
1829				error = NFSERR_BADSTATEID;
1830		}
1831		NFSUNLOCKSTATE();
1832		goto out;
1833	}
1834
1835	/*
1836	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1837	 * For NFSLCK_CHECK, allow a read if write access is granted,
1838	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1839	 * which implies a conflicting deny can't exist.
1840	 */
1841	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1842	    /*
1843	     * Four kinds of state id:
1844	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1845	     * - stateid for an open
1846	     * - stateid for a delegation
1847	     * - stateid for a lock owner
1848	     */
1849	    if (!specialid) {
1850		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1851		    delegation = 1;
1852		    mystp = stp;
1853		    nfsrv_delaydelegtimeout(stp);
1854	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1855		    mystp = stp;
1856		} else {
1857		    mystp = stp->ls_openstp;
1858		}
1859		/*
1860		 * If locking or checking, require correct access
1861		 * bit set.
1862		 */
1863		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1864		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1865		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1866		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1867		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1868		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1869		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1870		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1871		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1872			if (filestruct_locked != 0) {
1873				/* Roll back local locks. */
1874				NFSUNLOCKSTATE();
1875				if (vnode_unlocked == 0) {
1876					ASSERT_VOP_ELOCKED(vp,
1877					    "nfsrv_lockctrl3");
1878					vnode_unlocked = 1;
1879					NFSVOPUNLOCK(vp, 0);
1880				}
1881				nfsrv_locallock_rollback(vp, lfp, p);
1882				NFSLOCKSTATE();
1883				nfsrv_unlocklf(lfp);
1884			}
1885			NFSUNLOCKSTATE();
1886			error = NFSERR_OPENMODE;
1887			goto out;
1888		}
1889	    } else
1890		mystp = NULL;
1891	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1892		/*
1893		 * Check for a conflicting deny bit.
1894		 */
1895		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1896		    if (tstp != mystp) {
1897			bits = tstp->ls_flags;
1898			bits >>= NFSLCK_SHIFT;
1899			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1900			    KASSERT(vnode_unlocked == 0,
1901				("nfsrv_lockctrl: vnode unlocked1"));
1902			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1903				vp, p);
1904			    if (ret == 1) {
1905				/*
1906				* nfsrv_clientconflict unlocks state
1907				 * when it returns non-zero.
1908				 */
1909				lckstp = NULL;
1910				goto tryagain;
1911			    }
1912			    if (ret == 0)
1913				NFSUNLOCKSTATE();
1914			    if (ret == 2)
1915				error = NFSERR_PERM;
1916			    else
1917				error = NFSERR_OPENMODE;
1918			    goto out;
1919			}
1920		    }
1921		}
1922
1923		/* We're outta here */
1924		NFSUNLOCKSTATE();
1925		goto out;
1926	    }
1927	}
1928
1929	/*
1930	 * For setattr, just get rid of all the Delegations for other clients.
1931	 */
1932	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1933		KASSERT(vnode_unlocked == 0,
1934		    ("nfsrv_lockctrl: vnode unlocked2"));
1935		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1936		if (ret) {
1937			/*
1938			 * nfsrv_cleandeleg() unlocks state when it
1939			 * returns non-zero.
1940			 */
1941			if (ret == -1) {
1942				lckstp = NULL;
1943				goto tryagain;
1944			}
1945			error = ret;
1946			goto out;
1947		}
1948		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1949		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1950		     LIST_EMPTY(&lfp->lf_deleg))) {
1951			NFSUNLOCKSTATE();
1952			goto out;
1953		}
1954	}
1955
1956	/*
1957	 * Check for a conflicting delegation. If one is found, call
1958	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1959	 * been set yet, it will get the lock. Otherwise, it will recall
1960	 * the delegation. Then, we try try again...
1961	 * I currently believe the conflict algorithm to be:
1962	 * For Lock Ops (Lock/LockT/LockU)
1963	 * - there is a conflict iff a different client has a write delegation
1964	 * For Reading (Read Op)
1965	 * - there is a conflict iff a different client has a write delegation
1966	 *   (the specialids are always a different client)
1967	 * For Writing (Write/Setattr of size)
1968	 * - there is a conflict if a different client has any delegation
1969	 * - there is a conflict if the same client has a read delegation
1970	 *   (I don't understand why this isn't allowed, but that seems to be
1971	 *    the current concensus?)
1972	 */
1973	tstp = LIST_FIRST(&lfp->lf_deleg);
1974	while (tstp != LIST_END(&lfp->lf_deleg)) {
1975	    nstp = LIST_NEXT(tstp, ls_file);
1976	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1977		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1978		  (new_lop->lo_flags & NFSLCK_READ))) &&
1979		  clp != tstp->ls_clp &&
1980		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1981		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1982		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1983		  (clp != tstp->ls_clp ||
1984		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1985		ret = 0;
1986		if (filestruct_locked != 0) {
1987			/* Roll back local locks. */
1988			NFSUNLOCKSTATE();
1989			if (vnode_unlocked == 0) {
1990				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
1991				NFSVOPUNLOCK(vp, 0);
1992			}
1993			nfsrv_locallock_rollback(vp, lfp, p);
1994			NFSLOCKSTATE();
1995			nfsrv_unlocklf(lfp);
1996			NFSUNLOCKSTATE();
1997			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1998			vnode_unlocked = 0;
1999			if ((vp->v_iflag & VI_DOOMED) != 0)
2000				ret = NFSERR_SERVERFAULT;
2001			NFSLOCKSTATE();
2002		}
2003		if (ret == 0)
2004			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2005		if (ret) {
2006		    /*
2007		     * nfsrv_delegconflict unlocks state when it
2008		     * returns non-zero, which it always does.
2009		     */
2010		    if (other_lop) {
2011			FREE((caddr_t)other_lop, M_NFSDLOCK);
2012			other_lop = NULL;
2013		    }
2014		    if (ret == -1) {
2015			lckstp = NULL;
2016			goto tryagain;
2017		    }
2018		    error = ret;
2019		    goto out;
2020		}
2021		/* Never gets here. */
2022	    }
2023	    tstp = nstp;
2024	}
2025
2026	/*
2027	 * Handle the unlock case by calling nfsrv_updatelock().
2028	 * (Should I have done some access checking above for unlock? For now,
2029	 *  just let it happen.)
2030	 */
2031	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2032		first = new_lop->lo_first;
2033		end = new_lop->lo_end;
2034		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2035		stateidp->seqid = ++(stp->ls_stateid.seqid);
2036		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2037			stateidp->seqid = stp->ls_stateid.seqid = 1;
2038		stateidp->other[0] = stp->ls_stateid.other[0];
2039		stateidp->other[1] = stp->ls_stateid.other[1];
2040		stateidp->other[2] = stp->ls_stateid.other[2];
2041		if (filestruct_locked != 0) {
2042			NFSUNLOCKSTATE();
2043			if (vnode_unlocked == 0) {
2044				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2045				vnode_unlocked = 1;
2046				NFSVOPUNLOCK(vp, 0);
2047			}
2048			/* Update the local locks. */
2049			nfsrv_localunlock(vp, lfp, first, end, p);
2050			NFSLOCKSTATE();
2051			nfsrv_unlocklf(lfp);
2052		}
2053		NFSUNLOCKSTATE();
2054		goto out;
2055	}
2056
2057	/*
2058	 * Search for a conflicting lock. A lock conflicts if:
2059	 * - the lock range overlaps and
2060	 * - at least one lock is a write lock and
2061	 * - it is not owned by the same lock owner
2062	 */
2063	if (!delegation) {
2064	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2065	    if (new_lop->lo_end > lop->lo_first &&
2066		new_lop->lo_first < lop->lo_end &&
2067		(new_lop->lo_flags == NFSLCK_WRITE ||
2068		 lop->lo_flags == NFSLCK_WRITE) &&
2069		lckstp != lop->lo_stp &&
2070		(clp != lop->lo_stp->ls_clp ||
2071		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2072		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2073		    lckstp->ls_ownerlen))) {
2074		if (other_lop) {
2075		    FREE((caddr_t)other_lop, M_NFSDLOCK);
2076		    other_lop = NULL;
2077		}
2078		if (vnode_unlocked != 0)
2079		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2080			NULL, p);
2081		else
2082		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2083			vp, p);
2084		if (ret == 1) {
2085		    if (filestruct_locked != 0) {
2086			if (vnode_unlocked == 0) {
2087				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2088				NFSVOPUNLOCK(vp, 0);
2089			}
2090			/* Roll back local locks. */
2091			nfsrv_locallock_rollback(vp, lfp, p);
2092			NFSLOCKSTATE();
2093			nfsrv_unlocklf(lfp);
2094			NFSUNLOCKSTATE();
2095			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2096			vnode_unlocked = 0;
2097			if ((vp->v_iflag & VI_DOOMED) != 0) {
2098				error = NFSERR_SERVERFAULT;
2099				goto out;
2100			}
2101		    }
2102		    /*
2103		     * nfsrv_clientconflict() unlocks state when it
2104		     * returns non-zero.
2105		     */
2106		    lckstp = NULL;
2107		    goto tryagain;
2108		}
2109		/*
2110		 * Found a conflicting lock, so record the conflict and
2111		 * return the error.
2112		 */
2113		if (cfp != NULL && ret == 0) {
2114		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2115		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2116		    cfp->cl_first = lop->lo_first;
2117		    cfp->cl_end = lop->lo_end;
2118		    cfp->cl_flags = lop->lo_flags;
2119		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2120		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2121			cfp->cl_ownerlen);
2122		}
2123		if (ret == 2)
2124		    error = NFSERR_PERM;
2125		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2126		    error = NFSERR_RECLAIMCONFLICT;
2127		else if (new_stp->ls_flags & NFSLCK_CHECK)
2128		    error = NFSERR_LOCKED;
2129		else
2130		    error = NFSERR_DENIED;
2131		if (filestruct_locked != 0 && ret == 0) {
2132			/* Roll back local locks. */
2133			NFSUNLOCKSTATE();
2134			if (vnode_unlocked == 0) {
2135				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2136				vnode_unlocked = 1;
2137				NFSVOPUNLOCK(vp, 0);
2138			}
2139			nfsrv_locallock_rollback(vp, lfp, p);
2140			NFSLOCKSTATE();
2141			nfsrv_unlocklf(lfp);
2142		}
2143		if (ret == 0)
2144			NFSUNLOCKSTATE();
2145		goto out;
2146	    }
2147	  }
2148	}
2149
2150	/*
2151	 * We only get here if there was no lock that conflicted.
2152	 */
2153	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2154		NFSUNLOCKSTATE();
2155		goto out;
2156	}
2157
2158	/*
2159	 * We only get here when we are creating or modifying a lock.
2160	 * There are two variants:
2161	 * - exist_lock_owner where lock_owner exists
2162	 * - open_to_lock_owner with new lock_owner
2163	 */
2164	first = new_lop->lo_first;
2165	end = new_lop->lo_end;
2166	lock_flags = new_lop->lo_flags;
2167	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2168		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2169		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2170		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2171			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2172		stateidp->other[0] = lckstp->ls_stateid.other[0];
2173		stateidp->other[1] = lckstp->ls_stateid.other[1];
2174		stateidp->other[2] = lckstp->ls_stateid.other[2];
2175	} else {
2176		/*
2177		 * The new open_to_lock_owner case.
2178		 * Link the new nfsstate into the lists.
2179		 */
2180		new_stp->ls_seq = new_stp->ls_opentolockseq;
2181		nfsrvd_refcache(new_stp->ls_op);
2182		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2183		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2184		    clp->lc_clientid.lval[0];
2185		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2186		    clp->lc_clientid.lval[1];
2187		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2188		    nfsrv_nextstateindex(clp);
2189		new_stp->ls_clp = clp;
2190		LIST_INIT(&new_stp->ls_lock);
2191		new_stp->ls_openstp = stp;
2192		new_stp->ls_lfp = lfp;
2193		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2194		    lfp);
2195		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2196		    new_stp, ls_hash);
2197		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2198		*new_lopp = NULL;
2199		*new_stpp = NULL;
2200		newnfsstats.srvlockowners++;
2201		nfsrv_openpluslock++;
2202	}
2203	if (filestruct_locked != 0) {
2204		NFSUNLOCKSTATE();
2205		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2206		NFSLOCKSTATE();
2207		nfsrv_unlocklf(lfp);
2208	}
2209	NFSUNLOCKSTATE();
2210
2211out:
2212	if (haslock) {
2213		NFSLOCKV4ROOTMUTEX();
2214		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2215		NFSUNLOCKV4ROOTMUTEX();
2216	}
2217	if (vnode_unlocked != 0) {
2218		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2219		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2220			error = NFSERR_SERVERFAULT;
2221	}
2222	if (other_lop)
2223		FREE((caddr_t)other_lop, M_NFSDLOCK);
2224	NFSEXITCODE2(error, nd);
2225	return (error);
2226}
2227
2228/*
2229 * Check for state errors for Open.
2230 * repstat is passed back out as an error if more critical errors
2231 * are not detected.
2232 */
2233APPLESTATIC int
2234nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2235    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2236    NFSPROC_T *p, int repstat)
2237{
2238	struct nfsstate *stp, *nstp;
2239	struct nfsclient *clp;
2240	struct nfsstate *ownerstp;
2241	struct nfslockfile *lfp, *new_lfp;
2242	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2243
2244	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2245		readonly = 1;
2246	/*
2247	 * Check for restart conditions (client and server).
2248	 */
2249	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2250		&new_stp->ls_stateid, 0);
2251	if (error)
2252		goto out;
2253
2254	/*
2255	 * Check for state resource limit exceeded.
2256	 * Technically this should be SMP protected, but the worst
2257	 * case error is "out by one or two" on the count when it
2258	 * returns NFSERR_RESOURCE and the limit is just a rather
2259	 * arbitrary high water mark, so no harm is done.
2260	 */
2261	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2262		error = NFSERR_RESOURCE;
2263		goto out;
2264	}
2265
2266tryagain:
2267	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2268	    M_NFSDLOCKFILE, M_WAITOK);
2269	if (vp)
2270		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2271		    NULL, p);
2272	NFSLOCKSTATE();
2273	/*
2274	 * Get the nfsclient structure.
2275	 */
2276	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2277	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2278
2279	/*
2280	 * Look up the open owner. See if it needs confirmation and
2281	 * check the seq#, as required.
2282	 */
2283	if (!error)
2284		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2285
2286	if (!error && ownerstp) {
2287		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2288		    new_stp->ls_op);
2289		/*
2290		 * If the OpenOwner hasn't been confirmed, assume the
2291		 * old one was a replay and this one is ok.
2292		 * See: RFC3530 Sec. 14.2.18.
2293		 */
2294		if (error == NFSERR_BADSEQID &&
2295		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2296			error = 0;
2297	}
2298
2299	/*
2300	 * Check for grace.
2301	 */
2302	if (!error)
2303		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2304	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2305		nfsrv_checkstable(clp))
2306		error = NFSERR_NOGRACE;
2307
2308	/*
2309	 * If none of the above errors occurred, let repstat be
2310	 * returned.
2311	 */
2312	if (repstat && !error)
2313		error = repstat;
2314	if (error) {
2315		NFSUNLOCKSTATE();
2316		if (haslock) {
2317			NFSLOCKV4ROOTMUTEX();
2318			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2319			NFSUNLOCKV4ROOTMUTEX();
2320		}
2321		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2322		goto out;
2323	}
2324
2325	/*
2326	 * If vp == NULL, the file doesn't exist yet, so return ok.
2327	 * (This always happens on the first pass, so haslock must be 0.)
2328	 */
2329	if (vp == NULL) {
2330		NFSUNLOCKSTATE();
2331		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2332		goto out;
2333	}
2334
2335	/*
2336	 * Get the structure for the underlying file.
2337	 */
2338	if (getfhret)
2339		error = getfhret;
2340	else
2341		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2342		    NULL, 0);
2343	if (new_lfp)
2344		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2345	if (error) {
2346		NFSUNLOCKSTATE();
2347		if (haslock) {
2348			NFSLOCKV4ROOTMUTEX();
2349			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2350			NFSUNLOCKV4ROOTMUTEX();
2351		}
2352		goto out;
2353	}
2354
2355	/*
2356	 * Search for a conflicting open/share.
2357	 */
2358	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2359	    /*
2360	     * For Delegate_Cur, search for the matching Delegation,
2361	     * which indicates no conflict.
2362	     * An old delegation should have been recovered by the
2363	     * client doing a Claim_DELEGATE_Prev, so I won't let
2364	     * it match and return NFSERR_EXPIRED. Should I let it
2365	     * match?
2366	     */
2367	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2368		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2369		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2370		    stateidp->seqid == 0) ||
2371		    stateidp->seqid == stp->ls_stateid.seqid) &&
2372		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2373			  NFSX_STATEIDOTHER))
2374			break;
2375	    }
2376	    if (stp == LIST_END(&lfp->lf_deleg) ||
2377		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2378		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2379		NFSUNLOCKSTATE();
2380		if (haslock) {
2381			NFSLOCKV4ROOTMUTEX();
2382			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2383			NFSUNLOCKV4ROOTMUTEX();
2384		}
2385		error = NFSERR_EXPIRED;
2386		goto out;
2387	    }
2388	}
2389
2390	/*
2391	 * Check for access/deny bit conflicts. I check for the same
2392	 * owner as well, in case the client didn't bother.
2393	 */
2394	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2395		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2396		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2397		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2398		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2399		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2400			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2401			if (ret == 1) {
2402				/*
2403				 * nfsrv_clientconflict() unlocks
2404				 * state when it returns non-zero.
2405				 */
2406				goto tryagain;
2407			}
2408			if (ret == 2)
2409				error = NFSERR_PERM;
2410			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2411				error = NFSERR_RECLAIMCONFLICT;
2412			else
2413				error = NFSERR_SHAREDENIED;
2414			if (ret == 0)
2415				NFSUNLOCKSTATE();
2416			if (haslock) {
2417				NFSLOCKV4ROOTMUTEX();
2418				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2419				NFSUNLOCKV4ROOTMUTEX();
2420			}
2421			goto out;
2422		}
2423	}
2424
2425	/*
2426	 * Check for a conflicting delegation. If one is found, call
2427	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2428	 * been set yet, it will get the lock. Otherwise, it will recall
2429	 * the delegation. Then, we try try again...
2430	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2431	 *  isn't a conflict.)
2432	 * I currently believe the conflict algorithm to be:
2433	 * For Open with Read Access and Deny None
2434	 * - there is a conflict iff a different client has a write delegation
2435	 * For Open with other Write Access or any Deny except None
2436	 * - there is a conflict if a different client has any delegation
2437	 * - there is a conflict if the same client has a read delegation
2438	 *   (The current concensus is that this last case should be
2439	 *    considered a conflict since the client with a read delegation
2440	 *    could have done an Open with ReadAccess and WriteDeny
2441	 *    locally and then not have checked for the WriteDeny.)
2442	 * Don't check for a Reclaim, since that will be dealt with
2443	 * by nfsrv_openctrl().
2444	 */
2445	if (!(new_stp->ls_flags &
2446		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2447	    stp = LIST_FIRST(&lfp->lf_deleg);
2448	    while (stp != LIST_END(&lfp->lf_deleg)) {
2449		nstp = LIST_NEXT(stp, ls_file);
2450		if ((readonly && stp->ls_clp != clp &&
2451		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2452		    (!readonly && (stp->ls_clp != clp ||
2453		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2454			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2455			if (ret) {
2456			    /*
2457			     * nfsrv_delegconflict() unlocks state
2458			     * when it returns non-zero.
2459			     */
2460			    if (ret == -1)
2461				goto tryagain;
2462			    error = ret;
2463			    goto out;
2464			}
2465		}
2466		stp = nstp;
2467	    }
2468	}
2469	NFSUNLOCKSTATE();
2470	if (haslock) {
2471		NFSLOCKV4ROOTMUTEX();
2472		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2473		NFSUNLOCKV4ROOTMUTEX();
2474	}
2475
2476out:
2477	NFSEXITCODE2(error, nd);
2478	return (error);
2479}
2480
2481/*
2482 * Open control function to create/update open state for an open.
2483 */
2484APPLESTATIC int
2485nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2486    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2487    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2488    NFSPROC_T *p, u_quad_t filerev)
2489{
2490	struct nfsstate *new_stp = *new_stpp;
2491	struct nfsstate *stp, *nstp;
2492	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2493	struct nfslockfile *lfp, *new_lfp;
2494	struct nfsclient *clp;
2495	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2496	int readonly = 0, cbret = 1, getfhret = 0;
2497
2498	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2499		readonly = 1;
2500	/*
2501	 * Check for restart conditions (client and server).
2502	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2503	 * If an error does show up, return NFSERR_EXPIRED, since the
2504	 * the seqid# has already been incremented.
2505	 */
2506	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2507	    &new_stp->ls_stateid, 0);
2508	if (error) {
2509		printf("Nfsd: openctrl unexpected restart err=%d\n",
2510		    error);
2511		error = NFSERR_EXPIRED;
2512		goto out;
2513	}
2514
2515tryagain:
2516	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2517	    M_NFSDLOCKFILE, M_WAITOK);
2518	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2519	    M_NFSDSTATE, M_WAITOK);
2520	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2521	    M_NFSDSTATE, M_WAITOK);
2522	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2523	    NULL, p);
2524	NFSLOCKSTATE();
2525	/*
2526	 * Get the client structure. Since the linked lists could be changed
2527	 * by other nfsd processes if this process does a tsleep(), one of
2528	 * two things must be done.
2529	 * 1 - don't tsleep()
2530	 * or
2531	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2532	 *     before using the lists, since this lock stops the other
2533	 *     nfsd. This should only be used for rare cases, since it
2534	 *     essentially single threads the nfsd.
2535	 *     At this time, it is only done for cases where the stable
2536	 *     storage file must be written prior to completion of state
2537	 *     expiration.
2538	 */
2539	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2540	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2541	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2542	    clp->lc_program) {
2543		/*
2544		 * This happens on the first open for a client
2545		 * that supports callbacks.
2546		 */
2547		NFSUNLOCKSTATE();
2548		/*
2549		 * Although nfsrv_docallback() will sleep, clp won't
2550		 * go away, since they are only removed when the
2551		 * nfsv4_lock() has blocked the nfsd threads. The
2552		 * fields in clp can change, but having multiple
2553		 * threads do this Null callback RPC should be
2554		 * harmless.
2555		 */
2556		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2557		    NULL, 0, NULL, NULL, NULL, p);
2558		NFSLOCKSTATE();
2559		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2560		if (!cbret)
2561			clp->lc_flags |= LCL_CALLBACKSON;
2562	}
2563
2564	/*
2565	 * Look up the open owner. See if it needs confirmation and
2566	 * check the seq#, as required.
2567	 */
2568	if (!error)
2569		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2570
2571	if (error) {
2572		NFSUNLOCKSTATE();
2573		printf("Nfsd: openctrl unexpected state err=%d\n",
2574			error);
2575		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2576		free((caddr_t)new_open, M_NFSDSTATE);
2577		free((caddr_t)new_deleg, M_NFSDSTATE);
2578		if (haslock) {
2579			NFSLOCKV4ROOTMUTEX();
2580			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2581			NFSUNLOCKV4ROOTMUTEX();
2582		}
2583		error = NFSERR_EXPIRED;
2584		goto out;
2585	}
2586
2587	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2588		nfsrv_markstable(clp);
2589
2590	/*
2591	 * Get the structure for the underlying file.
2592	 */
2593	if (getfhret)
2594		error = getfhret;
2595	else
2596		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2597		    NULL, 0);
2598	if (new_lfp)
2599		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2600	if (error) {
2601		NFSUNLOCKSTATE();
2602		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2603		    error);
2604		free((caddr_t)new_open, M_NFSDSTATE);
2605		free((caddr_t)new_deleg, M_NFSDSTATE);
2606		if (haslock) {
2607			NFSLOCKV4ROOTMUTEX();
2608			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2609			NFSUNLOCKV4ROOTMUTEX();
2610		}
2611		goto out;
2612	}
2613
2614	/*
2615	 * Search for a conflicting open/share.
2616	 */
2617	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2618	    /*
2619	     * For Delegate_Cur, search for the matching Delegation,
2620	     * which indicates no conflict.
2621	     * An old delegation should have been recovered by the
2622	     * client doing a Claim_DELEGATE_Prev, so I won't let
2623	     * it match and return NFSERR_EXPIRED. Should I let it
2624	     * match?
2625	     */
2626	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2627		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2628		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2629		    stateidp->seqid == 0) ||
2630		    stateidp->seqid == stp->ls_stateid.seqid) &&
2631		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2632			NFSX_STATEIDOTHER))
2633			break;
2634	    }
2635	    if (stp == LIST_END(&lfp->lf_deleg) ||
2636		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2637		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2638		NFSUNLOCKSTATE();
2639		printf("Nfsd openctrl unexpected expiry\n");
2640		free((caddr_t)new_open, M_NFSDSTATE);
2641		free((caddr_t)new_deleg, M_NFSDSTATE);
2642		if (haslock) {
2643			NFSLOCKV4ROOTMUTEX();
2644			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2645			NFSUNLOCKV4ROOTMUTEX();
2646		}
2647		error = NFSERR_EXPIRED;
2648		goto out;
2649	    }
2650
2651	    /*
2652	     * Don't issue a Delegation, since one already exists and
2653	     * delay delegation timeout, as required.
2654	     */
2655	    delegate = 0;
2656	    nfsrv_delaydelegtimeout(stp);
2657	}
2658
2659	/*
2660	 * Check for access/deny bit conflicts. I also check for the
2661	 * same owner, since the client might not have bothered to check.
2662	 * Also, note an open for the same file and owner, if found,
2663	 * which is all we do here for Delegate_Cur, since conflict
2664	 * checking is already done.
2665	 */
2666	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2667		if (ownerstp && stp->ls_openowner == ownerstp)
2668			openstp = stp;
2669		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2670		    /*
2671		     * If another client has the file open, the only
2672		     * delegation that can be issued is a Read delegation
2673		     * and only if it is a Read open with Deny none.
2674		     */
2675		    if (clp != stp->ls_clp) {
2676			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2677			    NFSLCK_READACCESS)
2678			    writedeleg = 0;
2679			else
2680			    delegate = 0;
2681		    }
2682		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2683		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2684		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2685		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2686			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2687			if (ret == 1) {
2688				/*
2689				 * nfsrv_clientconflict() unlocks state
2690				 * when it returns non-zero.
2691				 */
2692				free((caddr_t)new_open, M_NFSDSTATE);
2693				free((caddr_t)new_deleg, M_NFSDSTATE);
2694				openstp = NULL;
2695				goto tryagain;
2696			}
2697			if (ret == 2)
2698				error = NFSERR_PERM;
2699			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2700				error = NFSERR_RECLAIMCONFLICT;
2701			else
2702				error = NFSERR_SHAREDENIED;
2703			if (ret == 0)
2704				NFSUNLOCKSTATE();
2705			if (haslock) {
2706				NFSLOCKV4ROOTMUTEX();
2707				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2708				NFSUNLOCKV4ROOTMUTEX();
2709			}
2710			free((caddr_t)new_open, M_NFSDSTATE);
2711			free((caddr_t)new_deleg, M_NFSDSTATE);
2712			printf("nfsd openctrl unexpected client cnfl\n");
2713			goto out;
2714		    }
2715		}
2716	}
2717
2718	/*
2719	 * Check for a conflicting delegation. If one is found, call
2720	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2721	 * been set yet, it will get the lock. Otherwise, it will recall
2722	 * the delegation. Then, we try try again...
2723	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2724	 *  isn't a conflict.)
2725	 * I currently believe the conflict algorithm to be:
2726	 * For Open with Read Access and Deny None
2727	 * - there is a conflict iff a different client has a write delegation
2728	 * For Open with other Write Access or any Deny except None
2729	 * - there is a conflict if a different client has any delegation
2730	 * - there is a conflict if the same client has a read delegation
2731	 *   (The current concensus is that this last case should be
2732	 *    considered a conflict since the client with a read delegation
2733	 *    could have done an Open with ReadAccess and WriteDeny
2734	 *    locally and then not have checked for the WriteDeny.)
2735	 */
2736	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2737	    stp = LIST_FIRST(&lfp->lf_deleg);
2738	    while (stp != LIST_END(&lfp->lf_deleg)) {
2739		nstp = LIST_NEXT(stp, ls_file);
2740		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2741			writedeleg = 0;
2742		else
2743			delegate = 0;
2744		if ((readonly && stp->ls_clp != clp &&
2745		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2746		    (!readonly && (stp->ls_clp != clp ||
2747		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2748		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2749			delegate = 2;
2750		    } else {
2751			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2752			if (ret) {
2753			    /*
2754			     * nfsrv_delegconflict() unlocks state
2755			     * when it returns non-zero.
2756			     */
2757			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2758			    free((caddr_t)new_open, M_NFSDSTATE);
2759			    free((caddr_t)new_deleg, M_NFSDSTATE);
2760			    if (ret == -1) {
2761				openstp = NULL;
2762				goto tryagain;
2763			    }
2764			    error = ret;
2765			    goto out;
2766			}
2767		    }
2768		}
2769		stp = nstp;
2770	    }
2771	}
2772
2773	/*
2774	 * We only get here if there was no open that conflicted.
2775	 * If an open for the owner exists, or in the access/deny bits.
2776	 * Otherwise it is a new open. If the open_owner hasn't been
2777	 * confirmed, replace the open with the new one needing confirmation,
2778	 * otherwise add the open.
2779	 */
2780	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2781	    /*
2782	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2783	     * a match. If found, just move the old delegation to the current
2784	     * delegation list and issue open. If not found, return
2785	     * NFSERR_EXPIRED.
2786	     */
2787	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2788		if (stp->ls_lfp == lfp) {
2789		    /* Found it */
2790		    if (stp->ls_clp != clp)
2791			panic("olddeleg clp");
2792		    LIST_REMOVE(stp, ls_list);
2793		    LIST_REMOVE(stp, ls_hash);
2794		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2795		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2796		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2797			clp->lc_clientid.lval[0];
2798		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2799			clp->lc_clientid.lval[1];
2800		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2801			nfsrv_nextstateindex(clp);
2802		    stp->ls_compref = nd->nd_compref;
2803		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2804		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2805			stp->ls_stateid), stp, ls_hash);
2806		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2807			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2808		    else
2809			*rflagsp |= NFSV4OPEN_READDELEGATE;
2810		    clp->lc_delegtime = NFSD_MONOSEC +
2811			nfsrv_lease + NFSRV_LEASEDELTA;
2812
2813		    /*
2814		     * Now, do the associated open.
2815		     */
2816		    new_open->ls_stateid.seqid = 1;
2817		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2818		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2819		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2820		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2821			NFSLCK_OPEN;
2822		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2823			new_open->ls_flags |= (NFSLCK_READACCESS |
2824			    NFSLCK_WRITEACCESS);
2825		    else
2826			new_open->ls_flags |= NFSLCK_READACCESS;
2827		    new_open->ls_uid = new_stp->ls_uid;
2828		    new_open->ls_lfp = lfp;
2829		    new_open->ls_clp = clp;
2830		    LIST_INIT(&new_open->ls_open);
2831		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2832		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2833			new_open, ls_hash);
2834		    /*
2835		     * and handle the open owner
2836		     */
2837		    if (ownerstp) {
2838			new_open->ls_openowner = ownerstp;
2839			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2840		    } else {
2841			new_open->ls_openowner = new_stp;
2842			new_stp->ls_flags = 0;
2843			nfsrvd_refcache(new_stp->ls_op);
2844			new_stp->ls_noopens = 0;
2845			LIST_INIT(&new_stp->ls_open);
2846			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2847			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2848			*new_stpp = NULL;
2849			newnfsstats.srvopenowners++;
2850			nfsrv_openpluslock++;
2851		    }
2852		    openstp = new_open;
2853		    new_open = NULL;
2854		    newnfsstats.srvopens++;
2855		    nfsrv_openpluslock++;
2856		    break;
2857		}
2858	    }
2859	    if (stp == LIST_END(&clp->lc_olddeleg))
2860		error = NFSERR_EXPIRED;
2861	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2862	    /*
2863	     * Scan to see that no delegation for this client and file
2864	     * doesn't already exist.
2865	     * There also shouldn't yet be an Open for this file and
2866	     * openowner.
2867	     */
2868	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2869		if (stp->ls_clp == clp)
2870		    break;
2871	    }
2872	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2873		/*
2874		 * This is the Claim_Previous case with a delegation
2875		 * type != Delegate_None.
2876		 */
2877		/*
2878		 * First, add the delegation. (Although we must issue the
2879		 * delegation, we can also ask for an immediate return.)
2880		 */
2881		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2882		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2883		    clp->lc_clientid.lval[0];
2884		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2885		    clp->lc_clientid.lval[1];
2886		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2887		    nfsrv_nextstateindex(clp);
2888		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2889		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2890			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2891		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2892		} else {
2893		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2894			NFSLCK_READACCESS);
2895		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2896		}
2897		new_deleg->ls_uid = new_stp->ls_uid;
2898		new_deleg->ls_lfp = lfp;
2899		new_deleg->ls_clp = clp;
2900		new_deleg->ls_filerev = filerev;
2901		new_deleg->ls_compref = nd->nd_compref;
2902		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2903		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2904		    new_deleg->ls_stateid), new_deleg, ls_hash);
2905		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2906		new_deleg = NULL;
2907		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2908		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2909		     LCL_CALLBACKSON ||
2910		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2911		    !NFSVNO_DELEGOK(vp))
2912		    *rflagsp |= NFSV4OPEN_RECALL;
2913		newnfsstats.srvdelegates++;
2914		nfsrv_openpluslock++;
2915		nfsrv_delegatecnt++;
2916
2917		/*
2918		 * Now, do the associated open.
2919		 */
2920		new_open->ls_stateid.seqid = 1;
2921		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2922		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2923		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2924		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2925		    NFSLCK_OPEN;
2926		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2927			new_open->ls_flags |= (NFSLCK_READACCESS |
2928			    NFSLCK_WRITEACCESS);
2929		else
2930			new_open->ls_flags |= NFSLCK_READACCESS;
2931		new_open->ls_uid = new_stp->ls_uid;
2932		new_open->ls_lfp = lfp;
2933		new_open->ls_clp = clp;
2934		LIST_INIT(&new_open->ls_open);
2935		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2936		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2937		   new_open, ls_hash);
2938		/*
2939		 * and handle the open owner
2940		 */
2941		if (ownerstp) {
2942		    new_open->ls_openowner = ownerstp;
2943		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2944		} else {
2945		    new_open->ls_openowner = new_stp;
2946		    new_stp->ls_flags = 0;
2947		    nfsrvd_refcache(new_stp->ls_op);
2948		    new_stp->ls_noopens = 0;
2949		    LIST_INIT(&new_stp->ls_open);
2950		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2951		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2952		    *new_stpp = NULL;
2953		    newnfsstats.srvopenowners++;
2954		    nfsrv_openpluslock++;
2955		}
2956		openstp = new_open;
2957		new_open = NULL;
2958		newnfsstats.srvopens++;
2959		nfsrv_openpluslock++;
2960	    } else {
2961		error = NFSERR_RECLAIMCONFLICT;
2962	    }
2963	} else if (ownerstp) {
2964		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2965		    /* Replace the open */
2966		    if (ownerstp->ls_op)
2967			nfsrvd_derefcache(ownerstp->ls_op);
2968		    ownerstp->ls_op = new_stp->ls_op;
2969		    nfsrvd_refcache(ownerstp->ls_op);
2970		    ownerstp->ls_seq = new_stp->ls_seq;
2971		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2972		    stp = LIST_FIRST(&ownerstp->ls_open);
2973		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2974			NFSLCK_OPEN;
2975		    stp->ls_stateid.seqid = 1;
2976		    stp->ls_uid = new_stp->ls_uid;
2977		    if (lfp != stp->ls_lfp) {
2978			LIST_REMOVE(stp, ls_file);
2979			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2980			stp->ls_lfp = lfp;
2981		    }
2982		    openstp = stp;
2983		} else if (openstp) {
2984		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2985		    openstp->ls_stateid.seqid++;
2986		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
2987			openstp->ls_stateid.seqid == 0)
2988			openstp->ls_stateid.seqid = 1;
2989
2990		    /*
2991		     * This is where we can choose to issue a delegation.
2992		     */
2993		    if (delegate == 0 || writedeleg == 0 ||
2994			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
2995			nfsrv_writedelegifpos == 0) ||
2996			!NFSVNO_DELEGOK(vp) ||
2997			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
2998			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2999			 LCL_CALLBACKSON)
3000			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3001		    else if (nfsrv_issuedelegs == 0 ||
3002			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3003			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3004		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3005			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3006		    else {
3007			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3008			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3009			    = clp->lc_clientid.lval[0];
3010			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3011			    = clp->lc_clientid.lval[1];
3012			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3013			    = nfsrv_nextstateindex(clp);
3014			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3015			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3016			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3017			new_deleg->ls_uid = new_stp->ls_uid;
3018			new_deleg->ls_lfp = lfp;
3019			new_deleg->ls_clp = clp;
3020			new_deleg->ls_filerev = filerev;
3021			new_deleg->ls_compref = nd->nd_compref;
3022			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3023			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3024			    new_deleg->ls_stateid), new_deleg, ls_hash);
3025			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3026			new_deleg = NULL;
3027			newnfsstats.srvdelegates++;
3028			nfsrv_openpluslock++;
3029			nfsrv_delegatecnt++;
3030		    }
3031		} else {
3032		    new_open->ls_stateid.seqid = 1;
3033		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3034		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3035		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3036		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3037			NFSLCK_OPEN;
3038		    new_open->ls_uid = new_stp->ls_uid;
3039		    new_open->ls_openowner = ownerstp;
3040		    new_open->ls_lfp = lfp;
3041		    new_open->ls_clp = clp;
3042		    LIST_INIT(&new_open->ls_open);
3043		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3044		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3045		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3046			new_open, ls_hash);
3047		    openstp = new_open;
3048		    new_open = NULL;
3049		    newnfsstats.srvopens++;
3050		    nfsrv_openpluslock++;
3051
3052		    /*
3053		     * This is where we can choose to issue a delegation.
3054		     */
3055		    if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
3056			!NFSVNO_DELEGOK(vp) ||
3057			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3058			 LCL_CALLBACKSON)
3059			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3060		    else if (nfsrv_issuedelegs == 0 ||
3061			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3062			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3063		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3064			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3065		    else {
3066			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3067			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3068			    = clp->lc_clientid.lval[0];
3069			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3070			    = clp->lc_clientid.lval[1];
3071			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3072			    = nfsrv_nextstateindex(clp);
3073			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3074			    (nfsrv_writedelegifpos || !readonly) &&
3075			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3076			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3077				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3078			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3079			} else {
3080			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3081				NFSLCK_READACCESS);
3082			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3083			}
3084			new_deleg->ls_uid = new_stp->ls_uid;
3085			new_deleg->ls_lfp = lfp;
3086			new_deleg->ls_clp = clp;
3087			new_deleg->ls_filerev = filerev;
3088			new_deleg->ls_compref = nd->nd_compref;
3089			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3090			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3091			    new_deleg->ls_stateid), new_deleg, ls_hash);
3092			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3093			new_deleg = NULL;
3094			newnfsstats.srvdelegates++;
3095			nfsrv_openpluslock++;
3096			nfsrv_delegatecnt++;
3097		    }
3098		}
3099	} else {
3100		/*
3101		 * New owner case. Start the open_owner sequence with a
3102		 * Needs confirmation (unless a reclaim) and hang the
3103		 * new open off it.
3104		 */
3105		new_open->ls_stateid.seqid = 1;
3106		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3107		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3108		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3109		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3110		    NFSLCK_OPEN;
3111		new_open->ls_uid = new_stp->ls_uid;
3112		LIST_INIT(&new_open->ls_open);
3113		new_open->ls_openowner = new_stp;
3114		new_open->ls_lfp = lfp;
3115		new_open->ls_clp = clp;
3116		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3117		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3118			new_stp->ls_flags = 0;
3119		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3120			/* NFSv4.1 never needs confirmation. */
3121			new_stp->ls_flags = 0;
3122
3123			/*
3124			 * This is where we can choose to issue a delegation.
3125			 */
3126			if (delegate && nfsrv_issuedelegs &&
3127			    (writedeleg || readonly) &&
3128			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3129			     LCL_CALLBACKSON &&
3130			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3131			    NFSVNO_DELEGOK(vp) &&
3132			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3133			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3134				new_deleg->ls_stateid.seqid =
3135				    delegstateidp->seqid = 1;
3136				new_deleg->ls_stateid.other[0] =
3137				    delegstateidp->other[0]
3138				    = clp->lc_clientid.lval[0];
3139				new_deleg->ls_stateid.other[1] =
3140				    delegstateidp->other[1]
3141				    = clp->lc_clientid.lval[1];
3142				new_deleg->ls_stateid.other[2] =
3143				    delegstateidp->other[2]
3144				    = nfsrv_nextstateindex(clp);
3145				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3146				    (nfsrv_writedelegifpos || !readonly) &&
3147				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3148				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3149				     0)) {
3150					new_deleg->ls_flags =
3151					    (NFSLCK_DELEGWRITE |
3152					     NFSLCK_READACCESS |
3153					     NFSLCK_WRITEACCESS);
3154					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3155				} else {
3156					new_deleg->ls_flags =
3157					    (NFSLCK_DELEGREAD |
3158					     NFSLCK_READACCESS);
3159					*rflagsp |= NFSV4OPEN_READDELEGATE;
3160				}
3161				new_deleg->ls_uid = new_stp->ls_uid;
3162				new_deleg->ls_lfp = lfp;
3163				new_deleg->ls_clp = clp;
3164				new_deleg->ls_filerev = filerev;
3165				new_deleg->ls_compref = nd->nd_compref;
3166				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3167				    ls_file);
3168				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3169				    new_deleg->ls_stateid), new_deleg, ls_hash);
3170				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3171				    ls_list);
3172				new_deleg = NULL;
3173				newnfsstats.srvdelegates++;
3174				nfsrv_openpluslock++;
3175				nfsrv_delegatecnt++;
3176			}
3177		} else {
3178			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3179			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3180		}
3181		nfsrvd_refcache(new_stp->ls_op);
3182		new_stp->ls_noopens = 0;
3183		LIST_INIT(&new_stp->ls_open);
3184		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3185		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3186		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3187		    new_open, ls_hash);
3188		openstp = new_open;
3189		new_open = NULL;
3190		*new_stpp = NULL;
3191		newnfsstats.srvopens++;
3192		nfsrv_openpluslock++;
3193		newnfsstats.srvopenowners++;
3194		nfsrv_openpluslock++;
3195	}
3196	if (!error) {
3197		stateidp->seqid = openstp->ls_stateid.seqid;
3198		stateidp->other[0] = openstp->ls_stateid.other[0];
3199		stateidp->other[1] = openstp->ls_stateid.other[1];
3200		stateidp->other[2] = openstp->ls_stateid.other[2];
3201	}
3202	NFSUNLOCKSTATE();
3203	if (haslock) {
3204		NFSLOCKV4ROOTMUTEX();
3205		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3206		NFSUNLOCKV4ROOTMUTEX();
3207	}
3208	if (new_open)
3209		FREE((caddr_t)new_open, M_NFSDSTATE);
3210	if (new_deleg)
3211		FREE((caddr_t)new_deleg, M_NFSDSTATE);
3212
3213out:
3214	NFSEXITCODE2(error, nd);
3215	return (error);
3216}
3217
3218/*
3219 * Open update. Does the confirm, downgrade and close.
3220 */
3221APPLESTATIC int
3222nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3223    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3224{
3225	struct nfsstate *stp, *ownerstp;
3226	struct nfsclient *clp;
3227	struct nfslockfile *lfp;
3228	u_int32_t bits;
3229	int error = 0, gotstate = 0, len = 0;
3230	u_char client[NFSV4_OPAQUELIMIT];
3231
3232	/*
3233	 * Check for restart conditions (client and server).
3234	 */
3235	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3236	    &new_stp->ls_stateid, 0);
3237	if (error)
3238		goto out;
3239
3240	NFSLOCKSTATE();
3241	/*
3242	 * Get the open structure via clientid and stateid.
3243	 */
3244	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3245	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3246	if (!error)
3247		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3248		    new_stp->ls_flags, &stp);
3249
3250	/*
3251	 * Sanity check the open.
3252	 */
3253	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3254		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3255		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3256		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3257		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3258		error = NFSERR_BADSTATEID;
3259
3260	if (!error)
3261		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3262		    stp->ls_openowner, new_stp->ls_op);
3263	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3264	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3265	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3266	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3267	      new_stp->ls_stateid.seqid != 0)))
3268		error = NFSERR_OLDSTATEID;
3269	if (!error && vnode_vtype(vp) != VREG) {
3270		if (vnode_vtype(vp) == VDIR)
3271			error = NFSERR_ISDIR;
3272		else
3273			error = NFSERR_INVAL;
3274	}
3275
3276	if (error) {
3277		/*
3278		 * If a client tries to confirm an Open with a bad
3279		 * seqid# and there are no byte range locks or other Opens
3280		 * on the openowner, just throw it away, so the next use of the
3281		 * openowner will start a fresh seq#.
3282		 */
3283		if (error == NFSERR_BADSEQID &&
3284		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3285		    nfsrv_nootherstate(stp))
3286			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3287		NFSUNLOCKSTATE();
3288		goto out;
3289	}
3290
3291	/*
3292	 * Set the return stateid.
3293	 */
3294	stateidp->seqid = stp->ls_stateid.seqid + 1;
3295	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3296		stateidp->seqid = 1;
3297	stateidp->other[0] = stp->ls_stateid.other[0];
3298	stateidp->other[1] = stp->ls_stateid.other[1];
3299	stateidp->other[2] = stp->ls_stateid.other[2];
3300	/*
3301	 * Now, handle the three cases.
3302	 */
3303	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3304		/*
3305		 * If the open doesn't need confirmation, it seems to me that
3306		 * there is a client error, but I'll just log it and keep going?
3307		 */
3308		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3309			printf("Nfsv4d: stray open confirm\n");
3310		stp->ls_openowner->ls_flags = 0;
3311		stp->ls_stateid.seqid++;
3312		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3313		    stp->ls_stateid.seqid == 0)
3314			stp->ls_stateid.seqid = 1;
3315		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3316			clp->lc_flags |= LCL_STAMPEDSTABLE;
3317			len = clp->lc_idlen;
3318			NFSBCOPY(clp->lc_id, client, len);
3319			gotstate = 1;
3320		}
3321		NFSUNLOCKSTATE();
3322	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3323		ownerstp = stp->ls_openowner;
3324		lfp = stp->ls_lfp;
3325		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3326			/* Get the lf lock */
3327			nfsrv_locklf(lfp);
3328			NFSUNLOCKSTATE();
3329			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3330			NFSVOPUNLOCK(vp, 0);
3331			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3332				NFSLOCKSTATE();
3333				nfsrv_unlocklf(lfp);
3334				NFSUNLOCKSTATE();
3335			}
3336			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3337		} else {
3338			(void) nfsrv_freeopen(stp, NULL, 0, p);
3339			NFSUNLOCKSTATE();
3340		}
3341	} else {
3342		/*
3343		 * Update the share bits, making sure that the new set are a
3344		 * subset of the old ones.
3345		 */
3346		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3347		if (~(stp->ls_flags) & bits) {
3348			NFSUNLOCKSTATE();
3349			error = NFSERR_INVAL;
3350			goto out;
3351		}
3352		stp->ls_flags = (bits | NFSLCK_OPEN);
3353		stp->ls_stateid.seqid++;
3354		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3355		    stp->ls_stateid.seqid == 0)
3356			stp->ls_stateid.seqid = 1;
3357		NFSUNLOCKSTATE();
3358	}
3359
3360	/*
3361	 * If the client just confirmed its first open, write a timestamp
3362	 * to the stable storage file.
3363	 */
3364	if (gotstate != 0) {
3365		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
3366		nfsrv_backupstable();
3367	}
3368
3369out:
3370	NFSEXITCODE2(error, nd);
3371	return (error);
3372}
3373
3374/*
3375 * Delegation update. Does the purge and return.
3376 */
3377APPLESTATIC int
3378nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3379    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3380    NFSPROC_T *p)
3381{
3382	struct nfsstate *stp;
3383	struct nfsclient *clp;
3384	int error = 0;
3385	fhandle_t fh;
3386
3387	/*
3388	 * Do a sanity check against the file handle for DelegReturn.
3389	 */
3390	if (vp) {
3391		error = nfsvno_getfh(vp, &fh, p);
3392		if (error)
3393			goto out;
3394	}
3395	/*
3396	 * Check for restart conditions (client and server).
3397	 */
3398	if (op == NFSV4OP_DELEGRETURN)
3399		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3400			stateidp, 0);
3401	else
3402		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3403			stateidp, 0);
3404
3405	NFSLOCKSTATE();
3406	/*
3407	 * Get the open structure via clientid and stateid.
3408	 */
3409	if (!error)
3410	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3411		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3412	if (error) {
3413		if (error == NFSERR_CBPATHDOWN)
3414			error = 0;
3415		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3416			error = NFSERR_STALESTATEID;
3417	}
3418	if (!error && op == NFSV4OP_DELEGRETURN) {
3419	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3420	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3421		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3422		error = NFSERR_OLDSTATEID;
3423	}
3424	/*
3425	 * NFSERR_EXPIRED means that the state has gone away,
3426	 * so Delegations have been purged. Just return ok.
3427	 */
3428	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3429		NFSUNLOCKSTATE();
3430		error = 0;
3431		goto out;
3432	}
3433	if (error) {
3434		NFSUNLOCKSTATE();
3435		goto out;
3436	}
3437
3438	if (op == NFSV4OP_DELEGRETURN) {
3439		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3440		    sizeof (fhandle_t))) {
3441			NFSUNLOCKSTATE();
3442			error = NFSERR_BADSTATEID;
3443			goto out;
3444		}
3445		nfsrv_freedeleg(stp);
3446	} else {
3447		nfsrv_freedeleglist(&clp->lc_olddeleg);
3448	}
3449	NFSUNLOCKSTATE();
3450	error = 0;
3451
3452out:
3453	NFSEXITCODE(error);
3454	return (error);
3455}
3456
3457/*
3458 * Release lock owner.
3459 */
3460APPLESTATIC int
3461nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3462    NFSPROC_T *p)
3463{
3464	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3465	struct nfsclient *clp;
3466	int error = 0;
3467
3468	/*
3469	 * Check for restart conditions (client and server).
3470	 */
3471	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3472	    &new_stp->ls_stateid, 0);
3473	if (error)
3474		goto out;
3475
3476	NFSLOCKSTATE();
3477	/*
3478	 * Get the lock owner by name.
3479	 */
3480	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3481	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3482	if (error) {
3483		NFSUNLOCKSTATE();
3484		goto out;
3485	}
3486	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3487	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3488		stp = LIST_FIRST(&openstp->ls_open);
3489		while (stp != LIST_END(&openstp->ls_open)) {
3490		    nstp = LIST_NEXT(stp, ls_list);
3491		    /*
3492		     * If the owner matches, check for locks and
3493		     * then free or return an error.
3494		     */
3495		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3496			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3497			 stp->ls_ownerlen)){
3498			if (LIST_EMPTY(&stp->ls_lock)) {
3499			    nfsrv_freelockowner(stp, NULL, 0, p);
3500			} else {
3501			    NFSUNLOCKSTATE();
3502			    error = NFSERR_LOCKSHELD;
3503			    goto out;
3504			}
3505		    }
3506		    stp = nstp;
3507		}
3508	    }
3509	}
3510	NFSUNLOCKSTATE();
3511
3512out:
3513	NFSEXITCODE(error);
3514	return (error);
3515}
3516
3517/*
3518 * Get the file handle for a lock structure.
3519 */
3520static int
3521nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3522    fhandle_t *nfhp, NFSPROC_T *p)
3523{
3524	fhandle_t *fhp = NULL;
3525	int error;
3526
3527	/*
3528	 * For lock, use the new nfslock structure, otherwise just
3529	 * a fhandle_t on the stack.
3530	 */
3531	if (flags & NFSLCK_OPEN) {
3532		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3533		fhp = &new_lfp->lf_fh;
3534	} else if (nfhp) {
3535		fhp = nfhp;
3536	} else {
3537		panic("nfsrv_getlockfh");
3538	}
3539	error = nfsvno_getfh(vp, fhp, p);
3540	NFSEXITCODE(error);
3541	return (error);
3542}
3543
3544/*
3545 * Get an nfs lock structure. Allocate one, as required, and return a
3546 * pointer to it.
3547 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3548 */
3549static int
3550nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3551    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3552{
3553	struct nfslockfile *lfp;
3554	fhandle_t *fhp = NULL, *tfhp;
3555	struct nfslockhashhead *hp;
3556	struct nfslockfile *new_lfp = NULL;
3557
3558	/*
3559	 * For lock, use the new nfslock structure, otherwise just
3560	 * a fhandle_t on the stack.
3561	 */
3562	if (flags & NFSLCK_OPEN) {
3563		new_lfp = *new_lfpp;
3564		fhp = &new_lfp->lf_fh;
3565	} else if (nfhp) {
3566		fhp = nfhp;
3567	} else {
3568		panic("nfsrv_getlockfile");
3569	}
3570
3571	hp = NFSLOCKHASH(fhp);
3572	LIST_FOREACH(lfp, hp, lf_hash) {
3573		tfhp = &lfp->lf_fh;
3574		if (NFSVNO_CMPFH(fhp, tfhp)) {
3575			if (lockit)
3576				nfsrv_locklf(lfp);
3577			*lfpp = lfp;
3578			return (0);
3579		}
3580	}
3581	if (!(flags & NFSLCK_OPEN))
3582		return (-1);
3583
3584	/*
3585	 * No match, so chain the new one into the list.
3586	 */
3587	LIST_INIT(&new_lfp->lf_open);
3588	LIST_INIT(&new_lfp->lf_lock);
3589	LIST_INIT(&new_lfp->lf_deleg);
3590	LIST_INIT(&new_lfp->lf_locallock);
3591	LIST_INIT(&new_lfp->lf_rollback);
3592	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3593	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3594	new_lfp->lf_usecount = 0;
3595	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3596	*lfpp = new_lfp;
3597	*new_lfpp = NULL;
3598	return (0);
3599}
3600
3601/*
3602 * This function adds a nfslock lock structure to the list for the associated
3603 * nfsstate and nfslockfile structures. It will be inserted after the
3604 * entry pointed at by insert_lop.
3605 */
3606static void
3607nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3608    struct nfsstate *stp, struct nfslockfile *lfp)
3609{
3610	struct nfslock *lop, *nlop;
3611
3612	new_lop->lo_stp = stp;
3613	new_lop->lo_lfp = lfp;
3614
3615	if (stp != NULL) {
3616		/* Insert in increasing lo_first order */
3617		lop = LIST_FIRST(&lfp->lf_lock);
3618		if (lop == LIST_END(&lfp->lf_lock) ||
3619		    new_lop->lo_first <= lop->lo_first) {
3620			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3621		} else {
3622			nlop = LIST_NEXT(lop, lo_lckfile);
3623			while (nlop != LIST_END(&lfp->lf_lock) &&
3624			       nlop->lo_first < new_lop->lo_first) {
3625				lop = nlop;
3626				nlop = LIST_NEXT(lop, lo_lckfile);
3627			}
3628			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3629		}
3630	} else {
3631		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3632	}
3633
3634	/*
3635	 * Insert after insert_lop, which is overloaded as stp or lfp for
3636	 * an empty list.
3637	 */
3638	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3639		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3640	else if ((struct nfsstate *)insert_lop == stp)
3641		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3642	else
3643		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3644	if (stp != NULL) {
3645		newnfsstats.srvlocks++;
3646		nfsrv_openpluslock++;
3647	}
3648}
3649
3650/*
3651 * This function updates the locking for a lock owner and given file. It
3652 * maintains a list of lock ranges ordered on increasing file offset that
3653 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3654 * It always adds new_lop to the list and sometimes uses the one pointed
3655 * at by other_lopp.
3656 */
3657static void
3658nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3659    struct nfslock **other_lopp, struct nfslockfile *lfp)
3660{
3661	struct nfslock *new_lop = *new_lopp;
3662	struct nfslock *lop, *tlop, *ilop;
3663	struct nfslock *other_lop = *other_lopp;
3664	int unlock = 0, myfile = 0;
3665	u_int64_t tmp;
3666
3667	/*
3668	 * Work down the list until the lock is merged.
3669	 */
3670	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3671		unlock = 1;
3672	if (stp != NULL) {
3673		ilop = (struct nfslock *)stp;
3674		lop = LIST_FIRST(&stp->ls_lock);
3675	} else {
3676		ilop = (struct nfslock *)lfp;
3677		lop = LIST_FIRST(&lfp->lf_locallock);
3678	}
3679	while (lop != NULL) {
3680	    /*
3681	     * Only check locks for this file that aren't before the start of
3682	     * new lock's range.
3683	     */
3684	    if (lop->lo_lfp == lfp) {
3685	      myfile = 1;
3686	      if (lop->lo_end >= new_lop->lo_first) {
3687		if (new_lop->lo_end < lop->lo_first) {
3688			/*
3689			 * If the new lock ends before the start of the
3690			 * current lock's range, no merge, just insert
3691			 * the new lock.
3692			 */
3693			break;
3694		}
3695		if (new_lop->lo_flags == lop->lo_flags ||
3696		    (new_lop->lo_first <= lop->lo_first &&
3697		     new_lop->lo_end >= lop->lo_end)) {
3698			/*
3699			 * This lock can be absorbed by the new lock/unlock.
3700			 * This happens when it covers the entire range
3701			 * of the old lock or is contiguous
3702			 * with the old lock and is of the same type or an
3703			 * unlock.
3704			 */
3705			if (lop->lo_first < new_lop->lo_first)
3706				new_lop->lo_first = lop->lo_first;
3707			if (lop->lo_end > new_lop->lo_end)
3708				new_lop->lo_end = lop->lo_end;
3709			tlop = lop;
3710			lop = LIST_NEXT(lop, lo_lckowner);
3711			nfsrv_freenfslock(tlop);
3712			continue;
3713		}
3714
3715		/*
3716		 * All these cases are for contiguous locks that are not the
3717		 * same type, so they can't be merged.
3718		 */
3719		if (new_lop->lo_first <= lop->lo_first) {
3720			/*
3721			 * This case is where the new lock overlaps with the
3722			 * first part of the old lock. Move the start of the
3723			 * old lock to just past the end of the new lock. The
3724			 * new lock will be inserted in front of the old, since
3725			 * ilop hasn't been updated. (We are done now.)
3726			 */
3727			lop->lo_first = new_lop->lo_end;
3728			break;
3729		}
3730		if (new_lop->lo_end >= lop->lo_end) {
3731			/*
3732			 * This case is where the new lock overlaps with the
3733			 * end of the old lock's range. Move the old lock's
3734			 * end to just before the new lock's first and insert
3735			 * the new lock after the old lock.
3736			 * Might not be done yet, since the new lock could
3737			 * overlap further locks with higher ranges.
3738			 */
3739			lop->lo_end = new_lop->lo_first;
3740			ilop = lop;
3741			lop = LIST_NEXT(lop, lo_lckowner);
3742			continue;
3743		}
3744		/*
3745		 * The final case is where the new lock's range is in the
3746		 * middle of the current lock's and splits the current lock
3747		 * up. Use *other_lopp to handle the second part of the
3748		 * split old lock range. (We are done now.)
3749		 * For unlock, we use new_lop as other_lop and tmp, since
3750		 * other_lop and new_lop are the same for this case.
3751		 * We noted the unlock case above, so we don't need
3752		 * new_lop->lo_flags any longer.
3753		 */
3754		tmp = new_lop->lo_first;
3755		if (other_lop == NULL) {
3756			if (!unlock)
3757				panic("nfsd srv update unlock");
3758			other_lop = new_lop;
3759			*new_lopp = NULL;
3760		}
3761		other_lop->lo_first = new_lop->lo_end;
3762		other_lop->lo_end = lop->lo_end;
3763		other_lop->lo_flags = lop->lo_flags;
3764		other_lop->lo_stp = stp;
3765		other_lop->lo_lfp = lfp;
3766		lop->lo_end = tmp;
3767		nfsrv_insertlock(other_lop, lop, stp, lfp);
3768		*other_lopp = NULL;
3769		ilop = lop;
3770		break;
3771	      }
3772	    }
3773	    ilop = lop;
3774	    lop = LIST_NEXT(lop, lo_lckowner);
3775	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3776		break;
3777	}
3778
3779	/*
3780	 * Insert the new lock in the list at the appropriate place.
3781	 */
3782	if (!unlock) {
3783		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3784		*new_lopp = NULL;
3785	}
3786}
3787
3788/*
3789 * This function handles sequencing of locks, etc.
3790 * It returns an error that indicates what the caller should do.
3791 */
3792static int
3793nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3794    struct nfsstate *stp, struct nfsrvcache *op)
3795{
3796	int error = 0;
3797
3798	if ((nd->nd_flag & ND_NFSV41) != 0)
3799		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
3800		goto out;
3801	if (op != nd->nd_rp)
3802		panic("nfsrvstate checkseqid");
3803	if (!(op->rc_flag & RC_INPROG))
3804		panic("nfsrvstate not inprog");
3805	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3806		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3807		panic("nfsrvstate op refcnt");
3808	}
3809	if ((stp->ls_seq + 1) == seqid) {
3810		if (stp->ls_op)
3811			nfsrvd_derefcache(stp->ls_op);
3812		stp->ls_op = op;
3813		nfsrvd_refcache(op);
3814		stp->ls_seq = seqid;
3815		goto out;
3816	} else if (stp->ls_seq == seqid && stp->ls_op &&
3817		op->rc_xid == stp->ls_op->rc_xid &&
3818		op->rc_refcnt == 0 &&
3819		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3820		op->rc_cksum == stp->ls_op->rc_cksum) {
3821		if (stp->ls_op->rc_flag & RC_INPROG) {
3822			error = NFSERR_DONTREPLY;
3823			goto out;
3824		}
3825		nd->nd_rp = stp->ls_op;
3826		nd->nd_rp->rc_flag |= RC_INPROG;
3827		nfsrvd_delcache(op);
3828		error = NFSERR_REPLYFROMCACHE;
3829		goto out;
3830	}
3831	error = NFSERR_BADSEQID;
3832
3833out:
3834	NFSEXITCODE2(error, nd);
3835	return (error);
3836}
3837
3838/*
3839 * Get the client ip address for callbacks. If the strings can't be parsed,
3840 * just set lc_program to 0 to indicate no callbacks are possible.
3841 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3842 *  the address to the client's transport address. This won't be used
3843 *  for callbacks, but can be printed out by newnfsstats for info.)
3844 * Return error if the xdr can't be parsed, 0 otherwise.
3845 */
3846APPLESTATIC int
3847nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3848{
3849	u_int32_t *tl;
3850	u_char *cp, *cp2;
3851	int i, j;
3852	struct sockaddr_in *rad, *sad;
3853	u_char protocol[5], addr[24];
3854	int error = 0, cantparse = 0;
3855	union {
3856		u_long ival;
3857		u_char cval[4];
3858	} ip;
3859	union {
3860		u_short sval;
3861		u_char cval[2];
3862	} port;
3863
3864	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3865	rad->sin_family = AF_INET;
3866	rad->sin_len = sizeof (struct sockaddr_in);
3867	rad->sin_addr.s_addr = 0;
3868	rad->sin_port = 0;
3869	clp->lc_req.nr_client = NULL;
3870	clp->lc_req.nr_lock = 0;
3871	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3872	i = fxdr_unsigned(int, *tl);
3873	if (i >= 3 && i <= 4) {
3874		error = nfsrv_mtostr(nd, protocol, i);
3875		if (error)
3876			goto nfsmout;
3877		if (!strcmp(protocol, "tcp")) {
3878			clp->lc_flags |= LCL_TCPCALLBACK;
3879			clp->lc_req.nr_sotype = SOCK_STREAM;
3880			clp->lc_req.nr_soproto = IPPROTO_TCP;
3881		} else if (!strcmp(protocol, "udp")) {
3882			clp->lc_req.nr_sotype = SOCK_DGRAM;
3883			clp->lc_req.nr_soproto = IPPROTO_UDP;
3884		} else {
3885			cantparse = 1;
3886		}
3887	} else {
3888		cantparse = 1;
3889		if (i > 0) {
3890			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3891			if (error)
3892				goto nfsmout;
3893		}
3894	}
3895	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3896	i = fxdr_unsigned(int, *tl);
3897	if (i < 0) {
3898		error = NFSERR_BADXDR;
3899		goto nfsmout;
3900	} else if (i == 0) {
3901		cantparse = 1;
3902	} else if (!cantparse && i <= 23 && i >= 11) {
3903		error = nfsrv_mtostr(nd, addr, i);
3904		if (error)
3905			goto nfsmout;
3906
3907		/*
3908		 * Parse out the address fields. We expect 6 decimal numbers
3909		 * separated by '.'s.
3910		 */
3911		cp = addr;
3912		i = 0;
3913		while (*cp && i < 6) {
3914			cp2 = cp;
3915			while (*cp2 && *cp2 != '.')
3916				cp2++;
3917			if (*cp2)
3918				*cp2++ = '\0';
3919			else if (i != 5) {
3920				cantparse = 1;
3921				break;
3922			}
3923			j = nfsrv_getipnumber(cp);
3924			if (j >= 0) {
3925				if (i < 4)
3926					ip.cval[3 - i] = j;
3927				else
3928					port.cval[5 - i] = j;
3929			} else {
3930				cantparse = 1;
3931				break;
3932			}
3933			cp = cp2;
3934			i++;
3935		}
3936		if (!cantparse) {
3937			if (ip.ival != 0x0) {
3938				rad->sin_addr.s_addr = htonl(ip.ival);
3939				rad->sin_port = htons(port.sval);
3940			} else {
3941				cantparse = 1;
3942			}
3943		}
3944	} else {
3945		cantparse = 1;
3946		if (i > 0) {
3947			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3948			if (error)
3949				goto nfsmout;
3950		}
3951	}
3952	if (cantparse) {
3953		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3954		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3955		rad->sin_port = 0x0;
3956		clp->lc_program = 0;
3957	}
3958nfsmout:
3959	NFSEXITCODE2(error, nd);
3960	return (error);
3961}
3962
3963/*
3964 * Turn a string of up to three decimal digits into a number. Return -1 upon
3965 * error.
3966 */
3967static int
3968nfsrv_getipnumber(u_char *cp)
3969{
3970	int i = 0, j = 0;
3971
3972	while (*cp) {
3973		if (j > 2 || *cp < '0' || *cp > '9')
3974			return (-1);
3975		i *= 10;
3976		i += (*cp - '0');
3977		cp++;
3978		j++;
3979	}
3980	if (i < 256)
3981		return (i);
3982	return (-1);
3983}
3984
3985/*
3986 * This function checks for restart conditions.
3987 */
3988static int
3989nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3990    nfsv4stateid_t *stateidp, int specialid)
3991{
3992	int ret = 0;
3993
3994	/*
3995	 * First check for a server restart. Open, LockT, ReleaseLockOwner
3996	 * and DelegPurge have a clientid, the rest a stateid.
3997	 */
3998	if (flags &
3999	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4000		if (clientid.lval[0] != nfsrvboottime) {
4001			ret = NFSERR_STALECLIENTID;
4002			goto out;
4003		}
4004	} else if (stateidp->other[0] != nfsrvboottime &&
4005		specialid == 0) {
4006		ret = NFSERR_STALESTATEID;
4007		goto out;
4008	}
4009
4010	/*
4011	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4012	 * not use a lock/open owner seqid#, so the check can be done now.
4013	 * (The others will be checked, as required, later.)
4014	 */
4015	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4016		goto out;
4017
4018	NFSLOCKSTATE();
4019	ret = nfsrv_checkgrace(NULL, NULL, flags);
4020	NFSUNLOCKSTATE();
4021
4022out:
4023	NFSEXITCODE(ret);
4024	return (ret);
4025}
4026
4027/*
4028 * Check for grace.
4029 */
4030static int
4031nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4032    u_int32_t flags)
4033{
4034	int error = 0;
4035
4036	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4037		if (flags & NFSLCK_RECLAIM) {
4038			error = NFSERR_NOGRACE;
4039			goto out;
4040		}
4041	} else {
4042		if (!(flags & NFSLCK_RECLAIM)) {
4043			error = NFSERR_GRACE;
4044			goto out;
4045		}
4046		if (nd != NULL && clp != NULL &&
4047		    (nd->nd_flag & ND_NFSV41) != 0 &&
4048		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4049			error = NFSERR_NOGRACE;
4050			goto out;
4051		}
4052
4053		/*
4054		 * If grace is almost over and we are still getting Reclaims,
4055		 * extend grace a bit.
4056		 */
4057		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4058		    nfsrv_stablefirst.nsf_eograce)
4059			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4060				NFSRV_LEASEDELTA;
4061	}
4062
4063out:
4064	NFSEXITCODE(error);
4065	return (error);
4066}
4067
4068/*
4069 * Do a server callback.
4070 */
4071static int
4072nfsrv_docallback(struct nfsclient *clp, int procnum,
4073    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4074    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4075{
4076	mbuf_t m;
4077	u_int32_t *tl;
4078	struct nfsrv_descript nfsd, *nd = &nfsd;
4079	struct ucred *cred;
4080	int error = 0;
4081	u_int32_t callback;
4082	struct nfsdsession *sep = NULL;
4083
4084	cred = newnfs_getcred();
4085	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4086	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4087		NFSUNLOCKSTATE();
4088		panic("docallb");
4089	}
4090	clp->lc_cbref++;
4091
4092	/*
4093	 * Fill the callback program# and version into the request
4094	 * structure for newnfs_connect() to use.
4095	 */
4096	clp->lc_req.nr_prog = clp->lc_program;
4097#ifdef notnow
4098	if ((clp->lc_flags & LCL_NFSV41) != 0)
4099		clp->lc_req.nr_vers = NFSV41_CBVERS;
4100	else
4101#endif
4102		clp->lc_req.nr_vers = NFSV4_CBVERS;
4103
4104	/*
4105	 * First, fill in some of the fields of nd and cr.
4106	 */
4107	nd->nd_flag = ND_NFSV4;
4108	if (clp->lc_flags & LCL_GSS)
4109		nd->nd_flag |= ND_KERBV;
4110	if ((clp->lc_flags & LCL_NFSV41) != 0)
4111		nd->nd_flag |= ND_NFSV41;
4112	nd->nd_repstat = 0;
4113	cred->cr_uid = clp->lc_uid;
4114	cred->cr_gid = clp->lc_gid;
4115	callback = clp->lc_callback;
4116	NFSUNLOCKSTATE();
4117	cred->cr_ngroups = 1;
4118
4119	/*
4120	 * Get the first mbuf for the request.
4121	 */
4122	MGET(m, M_WAITOK, MT_DATA);
4123	mbuf_setlen(m, 0);
4124	nd->nd_mreq = nd->nd_mb = m;
4125	nd->nd_bpos = NFSMTOD(m, caddr_t);
4126
4127	/*
4128	 * and build the callback request.
4129	 */
4130	if (procnum == NFSV4OP_CBGETATTR) {
4131		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4132		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4133		    "CB Getattr", &sep);
4134		if (error != 0) {
4135			mbuf_freem(nd->nd_mreq);
4136			goto errout;
4137		}
4138		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4139		(void)nfsrv_putattrbit(nd, attrbitp);
4140	} else if (procnum == NFSV4OP_CBRECALL) {
4141		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4142		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4143		    "CB Recall", &sep);
4144		if (error != 0) {
4145			mbuf_freem(nd->nd_mreq);
4146			goto errout;
4147		}
4148		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4149		*tl++ = txdr_unsigned(stateidp->seqid);
4150		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4151		    NFSX_STATEIDOTHER);
4152		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4153		if (trunc)
4154			*tl = newnfs_true;
4155		else
4156			*tl = newnfs_false;
4157		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4158	} else if (procnum == NFSV4PROC_CBNULL) {
4159		nd->nd_procnum = NFSV4PROC_CBNULL;
4160		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4161			error = nfsv4_getcbsession(clp, &sep);
4162			if (error != 0) {
4163				mbuf_freem(nd->nd_mreq);
4164				goto errout;
4165			}
4166		}
4167	} else {
4168		error = NFSERR_SERVERFAULT;
4169		mbuf_freem(nd->nd_mreq);
4170		goto errout;
4171	}
4172
4173	/*
4174	 * Call newnfs_connect(), as required, and then newnfs_request().
4175	 */
4176	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4177	if (clp->lc_req.nr_client == NULL) {
4178		if ((clp->lc_flags & LCL_NFSV41) != 0)
4179			error = ECONNREFUSED;
4180		else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4181			error = newnfs_connect(NULL, &clp->lc_req, cred,
4182			    NULL, 1);
4183		else
4184			error = newnfs_connect(NULL, &clp->lc_req, cred,
4185			    NULL, 3);
4186	}
4187	newnfs_sndunlock(&clp->lc_req.nr_lock);
4188	if (!error) {
4189		if ((nd->nd_flag & ND_NFSV41) != 0) {
4190			KASSERT(sep != NULL, ("sep NULL"));
4191			if (sep->sess_cbsess.nfsess_xprt != NULL)
4192				error = newnfs_request(nd, NULL, clp,
4193				    &clp->lc_req, NULL, NULL, cred,
4194				    clp->lc_program, clp->lc_req.nr_vers, NULL,
4195				    1, NULL, &sep->sess_cbsess);
4196			else {
4197				/*
4198				 * This should probably never occur, but if a
4199				 * client somehow does an RPC without a
4200				 * SequenceID Op that causes a callback just
4201				 * after the nfsd threads have been terminated
4202				 * and restared we could conceivably get here
4203				 * without a backchannel xprt.
4204				 */
4205				printf("nfsrv_docallback: no xprt\n");
4206				error = ECONNREFUSED;
4207			}
4208			nfsrv_freesession(sep, NULL);
4209		} else
4210			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4211			    NULL, NULL, cred, clp->lc_program,
4212			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4213	}
4214errout:
4215	NFSFREECRED(cred);
4216
4217	/*
4218	 * If error is set here, the Callback path isn't working
4219	 * properly, so twiddle the appropriate LCL_ flags.
4220	 * (nd_repstat != 0 indicates the Callback path is working,
4221	 *  but the callback failed on the client.)
4222	 */
4223	if (error) {
4224		/*
4225		 * Mark the callback pathway down, which disabled issuing
4226		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4227		 */
4228		NFSLOCKSTATE();
4229		clp->lc_flags |= LCL_CBDOWN;
4230		NFSUNLOCKSTATE();
4231	} else {
4232		/*
4233		 * Callback worked. If the callback path was down, disable
4234		 * callbacks, so no more delegations will be issued. (This
4235		 * is done on the assumption that the callback pathway is
4236		 * flakey.)
4237		 */
4238		NFSLOCKSTATE();
4239		if (clp->lc_flags & LCL_CBDOWN)
4240			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4241		NFSUNLOCKSTATE();
4242		if (nd->nd_repstat)
4243			error = nd->nd_repstat;
4244		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4245			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4246			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4247			    p, NULL);
4248		mbuf_freem(nd->nd_mrep);
4249	}
4250	NFSLOCKSTATE();
4251	clp->lc_cbref--;
4252	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4253		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4254		wakeup(clp);
4255	}
4256	NFSUNLOCKSTATE();
4257
4258	NFSEXITCODE(error);
4259	return (error);
4260}
4261
4262/*
4263 * Set up the compound RPC for the callback.
4264 */
4265static int
4266nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4267    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4268{
4269	uint32_t *tl;
4270	int error, len;
4271
4272	len = strlen(optag);
4273	(void)nfsm_strtom(nd, optag, len);
4274	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4275	if ((nd->nd_flag & ND_NFSV41) != 0) {
4276		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4277		*tl++ = txdr_unsigned(callback);
4278		*tl++ = txdr_unsigned(2);
4279		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4280		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4281		if (error != 0)
4282			return (error);
4283		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4284		*tl = txdr_unsigned(op);
4285	} else {
4286		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4287		*tl++ = txdr_unsigned(callback);
4288		*tl++ = txdr_unsigned(1);
4289		*tl = txdr_unsigned(op);
4290	}
4291	return (0);
4292}
4293
4294/*
4295 * Return the next index# for a clientid. Mostly just increment and return
4296 * the next one, but... if the 32bit unsigned does actually wrap around,
4297 * it should be rebooted.
4298 * At an average rate of one new client per second, it will wrap around in
4299 * approximately 136 years. (I think the server will have been shut
4300 * down or rebooted before then.)
4301 */
4302static u_int32_t
4303nfsrv_nextclientindex(void)
4304{
4305	static u_int32_t client_index = 0;
4306
4307	client_index++;
4308	if (client_index != 0)
4309		return (client_index);
4310
4311	printf("%s: out of clientids\n", __func__);
4312	return (client_index);
4313}
4314
4315/*
4316 * Return the next index# for a stateid. Mostly just increment and return
4317 * the next one, but... if the 32bit unsigned does actually wrap around
4318 * (will a BSD server stay up that long?), find
4319 * new start and end values.
4320 */
4321static u_int32_t
4322nfsrv_nextstateindex(struct nfsclient *clp)
4323{
4324	struct nfsstate *stp;
4325	int i;
4326	u_int32_t canuse, min_index, max_index;
4327
4328	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4329		clp->lc_stateindex++;
4330		if (clp->lc_stateindex != clp->lc_statemaxindex)
4331			return (clp->lc_stateindex);
4332	}
4333
4334	/*
4335	 * Yuck, we've hit the end.
4336	 * Look for a new min and max.
4337	 */
4338	min_index = 0;
4339	max_index = 0xffffffff;
4340	for (i = 0; i < nfsrv_statehashsize; i++) {
4341	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4342		if (stp->ls_stateid.other[2] > 0x80000000) {
4343		    if (stp->ls_stateid.other[2] < max_index)
4344			max_index = stp->ls_stateid.other[2];
4345		} else {
4346		    if (stp->ls_stateid.other[2] > min_index)
4347			min_index = stp->ls_stateid.other[2];
4348		}
4349	    }
4350	}
4351
4352	/*
4353	 * Yikes, highly unlikely, but I'll handle it anyhow.
4354	 */
4355	if (min_index == 0x80000000 && max_index == 0x80000001) {
4356	    canuse = 0;
4357	    /*
4358	     * Loop around until we find an unused entry. Return that
4359	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4360	     * (This is one of those rare cases where a goto is the
4361	     *  cleanest way to code the loop.)
4362	     */
4363tryagain:
4364	    for (i = 0; i < nfsrv_statehashsize; i++) {
4365		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4366		    if (stp->ls_stateid.other[2] == canuse) {
4367			canuse++;
4368			goto tryagain;
4369		    }
4370		}
4371	    }
4372	    clp->lc_flags |= LCL_INDEXNOTOK;
4373	    return (canuse);
4374	}
4375
4376	/*
4377	 * Ok to start again from min + 1.
4378	 */
4379	clp->lc_stateindex = min_index + 1;
4380	clp->lc_statemaxindex = max_index;
4381	clp->lc_flags &= ~LCL_INDEXNOTOK;
4382	return (clp->lc_stateindex);
4383}
4384
4385/*
4386 * The following functions handle the stable storage file that deals with
4387 * the edge conditions described in RFC3530 Sec. 8.6.3.
4388 * The file is as follows:
4389 * - a single record at the beginning that has the lease time of the
4390 *   previous server instance (before the last reboot) and the nfsrvboottime
4391 *   values for the previous server boots.
4392 *   These previous boot times are used to ensure that the current
4393 *   nfsrvboottime does not, somehow, get set to a previous one.
4394 *   (This is important so that Stale ClientIDs and StateIDs can
4395 *    be recognized.)
4396 *   The number of previous nfsvrboottime values preceeds the list.
4397 * - followed by some number of appended records with:
4398 *   - client id string
4399 *   - flag that indicates it is a record revoking state via lease
4400 *     expiration or similar
4401 *     OR has successfully acquired state.
4402 * These structures vary in length, with the client string at the end, up
4403 * to NFSV4_OPAQUELIMIT in size.
4404 *
4405 * At the end of the grace period, the file is truncated, the first
4406 * record is rewritten with updated information and any acquired state
4407 * records for successful reclaims of state are written.
4408 *
4409 * Subsequent records are appended when the first state is issued to
4410 * a client and when state is revoked for a client.
4411 *
4412 * When reading the file in, state issued records that come later in
4413 * the file override older ones, since the append log is in cronological order.
4414 * If, for some reason, the file can't be read, the grace period is
4415 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4416 */
4417
4418/*
4419 * Read in the stable storage file. Called by nfssvc() before the nfsd
4420 * processes start servicing requests.
4421 */
4422APPLESTATIC void
4423nfsrv_setupstable(NFSPROC_T *p)
4424{
4425	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4426	struct nfsrv_stable *sp, *nsp;
4427	struct nfst_rec *tsp;
4428	int error, i, tryagain;
4429	off_t off = 0;
4430	ssize_t aresid, len;
4431
4432	/*
4433	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4434	 * a reboot, so state has not been lost.
4435	 */
4436	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4437		return;
4438	/*
4439	 * Set Grace over just until the file reads successfully.
4440	 */
4441	nfsrvboottime = time_second;
4442	LIST_INIT(&sf->nsf_head);
4443	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4444	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4445	if (sf->nsf_fp == NULL)
4446		return;
4447	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4448	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4449	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4450	if (error || aresid || sf->nsf_numboots == 0 ||
4451		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4452		return;
4453
4454	/*
4455	 * Now, read in the boottimes.
4456	 */
4457	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4458		sizeof (time_t), M_TEMP, M_WAITOK);
4459	off = sizeof (struct nfsf_rec);
4460	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4461	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4462	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4463	if (error || aresid) {
4464		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4465		sf->nsf_bootvals = NULL;
4466		return;
4467	}
4468
4469	/*
4470	 * Make sure this nfsrvboottime is different from all recorded
4471	 * previous ones.
4472	 */
4473	do {
4474		tryagain = 0;
4475		for (i = 0; i < sf->nsf_numboots; i++) {
4476			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4477				nfsrvboottime++;
4478				tryagain = 1;
4479				break;
4480			}
4481		}
4482	} while (tryagain);
4483
4484	sf->nsf_flags |= NFSNSF_OK;
4485	off += (sf->nsf_numboots * sizeof (time_t));
4486
4487	/*
4488	 * Read through the file, building a list of records for grace
4489	 * checking.
4490	 * Each record is between sizeof (struct nfst_rec) and
4491	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4492	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4493	 */
4494	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4495		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4496	do {
4497	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4498	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4499	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4500	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4501	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4502		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4503		/*
4504		 * Yuck, the file has been corrupted, so just return
4505		 * after clearing out any restart state, so the grace period
4506		 * is over.
4507		 */
4508		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4509			LIST_REMOVE(sp, nst_list);
4510			free((caddr_t)sp, M_TEMP);
4511		}
4512		free((caddr_t)tsp, M_TEMP);
4513		sf->nsf_flags &= ~NFSNSF_OK;
4514		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4515		sf->nsf_bootvals = NULL;
4516		return;
4517	    }
4518	    if (len > 0) {
4519		off += sizeof (struct nfst_rec) + tsp->len - 1;
4520		/*
4521		 * Search the list for a matching client.
4522		 */
4523		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4524			if (tsp->len == sp->nst_len &&
4525			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4526				break;
4527		}
4528		if (sp == LIST_END(&sf->nsf_head)) {
4529			sp = (struct nfsrv_stable *)malloc(tsp->len +
4530				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4531				M_WAITOK);
4532			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4533				sizeof (struct nfst_rec) + tsp->len - 1);
4534			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4535		} else {
4536			if (tsp->flag == NFSNST_REVOKE)
4537				sp->nst_flag |= NFSNST_REVOKE;
4538			else
4539				/*
4540				 * A subsequent timestamp indicates the client
4541				 * did a setclientid/confirm and any previous
4542				 * revoke is no longer relevant.
4543				 */
4544				sp->nst_flag &= ~NFSNST_REVOKE;
4545		}
4546	    }
4547	} while (len > 0);
4548	free((caddr_t)tsp, M_TEMP);
4549	sf->nsf_flags = NFSNSF_OK;
4550	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4551		NFSRV_LEASEDELTA;
4552}
4553
4554/*
4555 * Update the stable storage file, now that the grace period is over.
4556 */
4557APPLESTATIC void
4558nfsrv_updatestable(NFSPROC_T *p)
4559{
4560	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4561	struct nfsrv_stable *sp, *nsp;
4562	int i;
4563	struct nfsvattr nva;
4564	vnode_t vp;
4565#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4566	mount_t mp = NULL;
4567#endif
4568	int error;
4569
4570	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4571		return;
4572	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4573	/*
4574	 * Ok, we need to rewrite the stable storage file.
4575	 * - truncate to 0 length
4576	 * - write the new first structure
4577	 * - loop through the data structures, writing out any that
4578	 *   have timestamps older than the old boot
4579	 */
4580	if (sf->nsf_bootvals) {
4581		sf->nsf_numboots++;
4582		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4583			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4584	} else {
4585		sf->nsf_numboots = 1;
4586		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4587			M_TEMP, M_WAITOK);
4588	}
4589	sf->nsf_bootvals[0] = nfsrvboottime;
4590	sf->nsf_lease = nfsrv_lease;
4591	NFSVNO_ATTRINIT(&nva);
4592	NFSVNO_SETATTRVAL(&nva, size, 0);
4593	vp = NFSFPVNODE(sf->nsf_fp);
4594	vn_start_write(vp, &mp, V_WAIT);
4595	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4596		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4597		    NULL);
4598		NFSVOPUNLOCK(vp, 0);
4599	} else
4600		error = EPERM;
4601	vn_finished_write(mp);
4602	if (!error)
4603	    error = NFSD_RDWR(UIO_WRITE, vp,
4604		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4605		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4606	if (!error)
4607	    error = NFSD_RDWR(UIO_WRITE, vp,
4608		(caddr_t)sf->nsf_bootvals,
4609		sf->nsf_numboots * sizeof (time_t),
4610		(off_t)(sizeof (struct nfsf_rec)),
4611		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4612	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4613	sf->nsf_bootvals = NULL;
4614	if (error) {
4615		sf->nsf_flags &= ~NFSNSF_OK;
4616		printf("EEK! Can't write NfsV4 stable storage file\n");
4617		return;
4618	}
4619	sf->nsf_flags |= NFSNSF_OK;
4620
4621	/*
4622	 * Loop through the list and write out timestamp records for
4623	 * any clients that successfully reclaimed state.
4624	 */
4625	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4626		if (sp->nst_flag & NFSNST_GOTSTATE) {
4627			nfsrv_writestable(sp->nst_client, sp->nst_len,
4628				NFSNST_NEWSTATE, p);
4629			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4630		}
4631		LIST_REMOVE(sp, nst_list);
4632		free((caddr_t)sp, M_TEMP);
4633	}
4634	nfsrv_backupstable();
4635}
4636
4637/*
4638 * Append a record to the stable storage file.
4639 */
4640APPLESTATIC void
4641nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4642{
4643	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4644	struct nfst_rec *sp;
4645	int error;
4646
4647	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4648		return;
4649	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4650		len - 1, M_TEMP, M_WAITOK);
4651	sp->len = len;
4652	NFSBCOPY(client, sp->client, len);
4653	sp->flag = flag;
4654	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4655	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4656	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4657	free((caddr_t)sp, M_TEMP);
4658	if (error) {
4659		sf->nsf_flags &= ~NFSNSF_OK;
4660		printf("EEK! Can't write NfsV4 stable storage file\n");
4661	}
4662}
4663
4664/*
4665 * This function is called during the grace period to mark a client
4666 * that successfully reclaimed state.
4667 */
4668static void
4669nfsrv_markstable(struct nfsclient *clp)
4670{
4671	struct nfsrv_stable *sp;
4672
4673	/*
4674	 * First find the client structure.
4675	 */
4676	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4677		if (sp->nst_len == clp->lc_idlen &&
4678		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4679			break;
4680	}
4681	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4682		return;
4683
4684	/*
4685	 * Now, just mark it and set the nfsclient back pointer.
4686	 */
4687	sp->nst_flag |= NFSNST_GOTSTATE;
4688	sp->nst_clp = clp;
4689}
4690
4691/*
4692 * This function is called for a reclaim, to see if it gets grace.
4693 * It returns 0 if a reclaim is allowed, 1 otherwise.
4694 */
4695static int
4696nfsrv_checkstable(struct nfsclient *clp)
4697{
4698	struct nfsrv_stable *sp;
4699
4700	/*
4701	 * First, find the entry for the client.
4702	 */
4703	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4704		if (sp->nst_len == clp->lc_idlen &&
4705		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4706			break;
4707	}
4708
4709	/*
4710	 * If not in the list, state was revoked or no state was issued
4711	 * since the previous reboot, a reclaim is denied.
4712	 */
4713	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4714	    (sp->nst_flag & NFSNST_REVOKE) ||
4715	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4716		return (1);
4717	return (0);
4718}
4719
4720/*
4721 * Test for and try to clear out a conflicting client. This is called by
4722 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4723 * a found.
4724 * The trick here is that it can't revoke a conflicting client with an
4725 * expired lease unless it holds the v4root lock, so...
4726 * If no v4root lock, get the lock and return 1 to indicate "try again".
4727 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4728 * the revocation worked and the conflicting client is "bye, bye", so it
4729 * can be tried again.
4730 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4731 * Unlocks State before a non-zero value is returned.
4732 */
4733static int
4734nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4735    NFSPROC_T *p)
4736{
4737	int gotlock, lktype = 0;
4738
4739	/*
4740	 * If lease hasn't expired, we can't fix it.
4741	 */
4742	if (clp->lc_expiry >= NFSD_MONOSEC ||
4743	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4744		return (0);
4745	if (*haslockp == 0) {
4746		NFSUNLOCKSTATE();
4747		if (vp != NULL) {
4748			lktype = NFSVOPISLOCKED(vp);
4749			NFSVOPUNLOCK(vp, 0);
4750		}
4751		NFSLOCKV4ROOTMUTEX();
4752		nfsv4_relref(&nfsv4rootfs_lock);
4753		do {
4754			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4755			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4756		} while (!gotlock);
4757		NFSUNLOCKV4ROOTMUTEX();
4758		*haslockp = 1;
4759		if (vp != NULL) {
4760			NFSVOPLOCK(vp, lktype | LK_RETRY);
4761			if ((vp->v_iflag & VI_DOOMED) != 0)
4762				return (2);
4763		}
4764		return (1);
4765	}
4766	NFSUNLOCKSTATE();
4767
4768	/*
4769	 * Ok, we can expire the conflicting client.
4770	 */
4771	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4772	nfsrv_backupstable();
4773	nfsrv_cleanclient(clp, p);
4774	nfsrv_freedeleglist(&clp->lc_deleg);
4775	nfsrv_freedeleglist(&clp->lc_olddeleg);
4776	LIST_REMOVE(clp, lc_hash);
4777	nfsrv_zapclient(clp, p);
4778	return (1);
4779}
4780
4781/*
4782 * Resolve a delegation conflict.
4783 * Returns 0 to indicate the conflict was resolved without sleeping.
4784 * Return -1 to indicate that the caller should check for conflicts again.
4785 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4786 *
4787 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4788 * for a return of 0, since there was no sleep and it could be required
4789 * later. It is released for a return of NFSERR_DELAY, since the caller
4790 * will return that error. It is released when a sleep was done waiting
4791 * for the delegation to be returned or expire (so that other nfsds can
4792 * handle ops). Then, it must be acquired for the write to stable storage.
4793 * (This function is somewhat similar to nfsrv_clientconflict(), but
4794 *  the semantics differ in a couple of subtle ways. The return of 0
4795 *  indicates the conflict was resolved without sleeping here, not
4796 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4797 *  differs, as noted above.)
4798 * Unlocks State before returning a non-zero value.
4799 */
4800static int
4801nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4802    vnode_t vp)
4803{
4804	struct nfsclient *clp = stp->ls_clp;
4805	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
4806	nfsv4stateid_t tstateid;
4807	fhandle_t tfh;
4808
4809	/*
4810	 * If the conflict is with an old delegation...
4811	 */
4812	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4813		/*
4814		 * You can delete it, if it has expired.
4815		 */
4816		if (clp->lc_delegtime < NFSD_MONOSEC) {
4817			nfsrv_freedeleg(stp);
4818			NFSUNLOCKSTATE();
4819			error = -1;
4820			goto out;
4821		}
4822		NFSUNLOCKSTATE();
4823		/*
4824		 * During this delay, the old delegation could expire or it
4825		 * could be recovered by the client via an Open with
4826		 * CLAIM_DELEGATE_PREV.
4827		 * Release the nfsv4root_lock, if held.
4828		 */
4829		if (*haslockp) {
4830			*haslockp = 0;
4831			NFSLOCKV4ROOTMUTEX();
4832			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4833			NFSUNLOCKV4ROOTMUTEX();
4834		}
4835		error = NFSERR_DELAY;
4836		goto out;
4837	}
4838
4839	/*
4840	 * It's a current delegation, so:
4841	 * - check to see if the delegation has expired
4842	 *   - if so, get the v4root lock and then expire it
4843	 */
4844	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4845		/*
4846		 * - do a recall callback, since not yet done
4847		 * For now, never allow truncate to be set. To use
4848		 * truncate safely, it must be guaranteed that the
4849		 * Remove, Rename or Setattr with size of 0 will
4850		 * succeed and that would require major changes to
4851		 * the VFS/Vnode OPs.
4852		 * Set the expiry time large enough so that it won't expire
4853		 * until after the callback, then set it correctly, once
4854		 * the callback is done. (The delegation will now time
4855		 * out whether or not the Recall worked ok. The timeout
4856		 * will be extended when ops are done on the delegation
4857		 * stateid, up to the timelimit.)
4858		 */
4859		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4860		    NFSRV_LEASEDELTA;
4861		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4862		    NFSRV_LEASEDELTA;
4863		stp->ls_flags |= NFSLCK_DELEGRECALL;
4864
4865		/*
4866		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4867		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4868		 * in order to try and avoid a race that could happen
4869		 * when a CBRecall request passed the Open reply with
4870		 * the delegation in it when transitting the network.
4871		 * Since nfsrv_docallback will sleep, don't use stp after
4872		 * the call.
4873		 */
4874		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4875		    sizeof (tstateid));
4876		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4877		    sizeof (tfh));
4878		NFSUNLOCKSTATE();
4879		if (*haslockp) {
4880			*haslockp = 0;
4881			NFSLOCKV4ROOTMUTEX();
4882			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4883			NFSUNLOCKV4ROOTMUTEX();
4884		}
4885		retrycnt = 0;
4886		do {
4887		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4888			&tstateid, 0, &tfh, NULL, NULL, p);
4889		    retrycnt++;
4890		} while ((error == NFSERR_BADSTATEID ||
4891		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4892		error = NFSERR_DELAY;
4893		goto out;
4894	}
4895
4896	if (clp->lc_expiry >= NFSD_MONOSEC &&
4897	    stp->ls_delegtime >= NFSD_MONOSEC) {
4898		NFSUNLOCKSTATE();
4899		/*
4900		 * A recall has been done, but it has not yet expired.
4901		 * So, RETURN_DELAY.
4902		 */
4903		if (*haslockp) {
4904			*haslockp = 0;
4905			NFSLOCKV4ROOTMUTEX();
4906			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4907			NFSUNLOCKV4ROOTMUTEX();
4908		}
4909		error = NFSERR_DELAY;
4910		goto out;
4911	}
4912
4913	/*
4914	 * If we don't yet have the lock, just get it and then return,
4915	 * since we need that before deleting expired state, such as
4916	 * this delegation.
4917	 * When getting the lock, unlock the vnode, so other nfsds that
4918	 * are in progress, won't get stuck waiting for the vnode lock.
4919	 */
4920	if (*haslockp == 0) {
4921		NFSUNLOCKSTATE();
4922		if (vp != NULL) {
4923			lktype = NFSVOPISLOCKED(vp);
4924			NFSVOPUNLOCK(vp, 0);
4925		}
4926		NFSLOCKV4ROOTMUTEX();
4927		nfsv4_relref(&nfsv4rootfs_lock);
4928		do {
4929			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4930			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4931		} while (!gotlock);
4932		NFSUNLOCKV4ROOTMUTEX();
4933		*haslockp = 1;
4934		if (vp != NULL) {
4935			NFSVOPLOCK(vp, lktype | LK_RETRY);
4936			if ((vp->v_iflag & VI_DOOMED) != 0) {
4937				*haslockp = 0;
4938				NFSLOCKV4ROOTMUTEX();
4939				nfsv4_unlock(&nfsv4rootfs_lock, 1);
4940				NFSUNLOCKV4ROOTMUTEX();
4941				error = NFSERR_PERM;
4942				goto out;
4943			}
4944		}
4945		error = -1;
4946		goto out;
4947	}
4948
4949	NFSUNLOCKSTATE();
4950	/*
4951	 * Ok, we can delete the expired delegation.
4952	 * First, write the Revoke record to stable storage and then
4953	 * clear out the conflict.
4954	 * Since all other nfsd threads are now blocked, we can safely
4955	 * sleep without the state changing.
4956	 */
4957	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4958	nfsrv_backupstable();
4959	if (clp->lc_expiry < NFSD_MONOSEC) {
4960		nfsrv_cleanclient(clp, p);
4961		nfsrv_freedeleglist(&clp->lc_deleg);
4962		nfsrv_freedeleglist(&clp->lc_olddeleg);
4963		LIST_REMOVE(clp, lc_hash);
4964		zapped_clp = 1;
4965	} else {
4966		nfsrv_freedeleg(stp);
4967		zapped_clp = 0;
4968	}
4969	if (zapped_clp)
4970		nfsrv_zapclient(clp, p);
4971	error = -1;
4972
4973out:
4974	NFSEXITCODE(error);
4975	return (error);
4976}
4977
4978/*
4979 * Check for a remove allowed, if remove is set to 1 and get rid of
4980 * delegations.
4981 */
4982APPLESTATIC int
4983nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4984{
4985	struct nfsstate *stp;
4986	struct nfslockfile *lfp;
4987	int error, haslock = 0;
4988	fhandle_t nfh;
4989
4990	/*
4991	 * First, get the lock file structure.
4992	 * (A return of -1 means no associated state, so remove ok.)
4993	 */
4994	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4995tryagain:
4996	NFSLOCKSTATE();
4997	if (!error)
4998		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4999	if (error) {
5000		NFSUNLOCKSTATE();
5001		if (haslock) {
5002			NFSLOCKV4ROOTMUTEX();
5003			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5004			NFSUNLOCKV4ROOTMUTEX();
5005		}
5006		if (error == -1)
5007			error = 0;
5008		goto out;
5009	}
5010
5011	/*
5012	 * Now, we must Recall any delegations.
5013	 */
5014	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
5015	if (error) {
5016		/*
5017		 * nfsrv_cleandeleg() unlocks state for non-zero
5018		 * return.
5019		 */
5020		if (error == -1)
5021			goto tryagain;
5022		if (haslock) {
5023			NFSLOCKV4ROOTMUTEX();
5024			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5025			NFSUNLOCKV4ROOTMUTEX();
5026		}
5027		goto out;
5028	}
5029
5030	/*
5031	 * Now, look for a conflicting open share.
5032	 */
5033	if (remove) {
5034		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5035			if (stp->ls_flags & NFSLCK_WRITEDENY) {
5036				error = NFSERR_FILEOPEN;
5037				break;
5038			}
5039		}
5040	}
5041
5042	NFSUNLOCKSTATE();
5043	if (haslock) {
5044		NFSLOCKV4ROOTMUTEX();
5045		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5046		NFSUNLOCKV4ROOTMUTEX();
5047	}
5048
5049out:
5050	NFSEXITCODE(error);
5051	return (error);
5052}
5053
5054/*
5055 * Clear out all delegations for the file referred to by lfp.
5056 * May return NFSERR_DELAY, if there will be a delay waiting for
5057 * delegations to expire.
5058 * Returns -1 to indicate it slept while recalling a delegation.
5059 * This function has the side effect of deleting the nfslockfile structure,
5060 * if it no longer has associated state and didn't have to sleep.
5061 * Unlocks State before a non-zero value is returned.
5062 */
5063static int
5064nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5065    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5066{
5067	struct nfsstate *stp, *nstp;
5068	int ret = 0;
5069
5070	stp = LIST_FIRST(&lfp->lf_deleg);
5071	while (stp != LIST_END(&lfp->lf_deleg)) {
5072		nstp = LIST_NEXT(stp, ls_file);
5073		if (stp->ls_clp != clp) {
5074			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5075			if (ret) {
5076				/*
5077				 * nfsrv_delegconflict() unlocks state
5078				 * when it returns non-zero.
5079				 */
5080				goto out;
5081			}
5082		}
5083		stp = nstp;
5084	}
5085out:
5086	NFSEXITCODE(ret);
5087	return (ret);
5088}
5089
5090/*
5091 * There are certain operations that, when being done outside of NFSv4,
5092 * require that any NFSv4 delegation for the file be recalled.
5093 * This function is to be called for those cases:
5094 * VOP_RENAME() - When a delegation is being recalled for any reason,
5095 *	the client may have to do Opens against the server, using the file's
5096 *	final component name. If the file has been renamed on the server,
5097 *	that component name will be incorrect and the Open will fail.
5098 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5099 *	been removed on the server, if there is a delegation issued to
5100 *	that client for the file. I say "theoretically" since clients
5101 *	normally do an Access Op before the Open and that Access Op will
5102 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5103 *	they will detect the file's removal in the same manner. (There is
5104 *	one case where RFC3530 allows a client to do an Open without first
5105 *	doing an Access Op, which is passage of a check against the ACE
5106 *	returned with a Write delegation, but current practice is to ignore
5107 *	the ACE and always do an Access Op.)
5108 *	Since the functions can only be called with an unlocked vnode, this
5109 *	can't be done at this time.
5110 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5111 *	locks locally in the client, which are not visible to the server. To
5112 *	deal with this, issuing of delegations for a vnode must be disabled
5113 *	and all delegations for the vnode recalled. This is done via the
5114 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5115 */
5116APPLESTATIC void
5117nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5118{
5119	time_t starttime;
5120	int error;
5121
5122	/*
5123	 * First, check to see if the server is currently running and it has
5124	 * been called for a regular file when issuing delegations.
5125	 */
5126	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5127	    nfsrv_issuedelegs == 0)
5128		return;
5129
5130	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5131	/*
5132	 * First, get a reference on the nfsv4rootfs_lock so that an
5133	 * exclusive lock cannot be acquired by another thread.
5134	 */
5135	NFSLOCKV4ROOTMUTEX();
5136	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5137	NFSUNLOCKV4ROOTMUTEX();
5138
5139	/*
5140	 * Now, call nfsrv_checkremove() in a loop while it returns
5141	 * NFSERR_DELAY. Return upon any other error or when timed out.
5142	 */
5143	starttime = NFSD_MONOSEC;
5144	do {
5145		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5146			error = nfsrv_checkremove(vp, 0, p);
5147			NFSVOPUNLOCK(vp, 0);
5148		} else
5149			error = EPERM;
5150		if (error == NFSERR_DELAY) {
5151			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5152				break;
5153			/* Sleep for a short period of time */
5154			(void) nfs_catnap(PZERO, 0, "nfsremove");
5155		}
5156	} while (error == NFSERR_DELAY);
5157	NFSLOCKV4ROOTMUTEX();
5158	nfsv4_relref(&nfsv4rootfs_lock);
5159	NFSUNLOCKV4ROOTMUTEX();
5160}
5161
5162APPLESTATIC void
5163nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5164{
5165
5166#ifdef VV_DISABLEDELEG
5167	/*
5168	 * First, flag issuance of delegations disabled.
5169	 */
5170	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5171#endif
5172
5173	/*
5174	 * Then call nfsd_recalldelegation() to get rid of all extant
5175	 * delegations.
5176	 */
5177	nfsd_recalldelegation(vp, p);
5178}
5179
5180/*
5181 * Check for conflicting locks, etc. and then get rid of delegations.
5182 * (At one point I thought that I should get rid of delegations for any
5183 *  Setattr, since it could potentially disallow the I/O op (read or write)
5184 *  allowed by the delegation. However, Setattr Ops that aren't changing
5185 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5186 *  for the same client or a different one, so I decided to only get rid
5187 *  of delegations for other clients when the size is being changed.)
5188 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5189 * as Write backs, even if there is no delegation, so it really isn't any
5190 * different?)
5191 */
5192APPLESTATIC int
5193nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5194    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5195    struct nfsexstuff *exp, NFSPROC_T *p)
5196{
5197	struct nfsstate st, *stp = &st;
5198	struct nfslock lo, *lop = &lo;
5199	int error = 0;
5200	nfsquad_t clientid;
5201
5202	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5203		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5204		lop->lo_first = nvap->na_size;
5205	} else {
5206		stp->ls_flags = 0;
5207		lop->lo_first = 0;
5208	}
5209	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5210	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5211	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5212	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5213		stp->ls_flags |= NFSLCK_SETATTR;
5214	if (stp->ls_flags == 0)
5215		goto out;
5216	lop->lo_end = NFS64BITSSET;
5217	lop->lo_flags = NFSLCK_WRITE;
5218	stp->ls_ownerlen = 0;
5219	stp->ls_op = NULL;
5220	stp->ls_uid = nd->nd_cred->cr_uid;
5221	stp->ls_stateid.seqid = stateidp->seqid;
5222	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5223	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5224	stp->ls_stateid.other[2] = stateidp->other[2];
5225	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5226	    stateidp, exp, nd, p);
5227
5228out:
5229	NFSEXITCODE2(error, nd);
5230	return (error);
5231}
5232
5233/*
5234 * Check for a write delegation and do a CBGETATTR if there is one, updating
5235 * the attributes, as required.
5236 * Should I return an error if I can't get the attributes? (For now, I'll
5237 * just return ok.
5238 */
5239APPLESTATIC int
5240nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5241    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5242    NFSPROC_T *p)
5243{
5244	struct nfsstate *stp;
5245	struct nfslockfile *lfp;
5246	struct nfsclient *clp;
5247	struct nfsvattr nva;
5248	fhandle_t nfh;
5249	int error = 0;
5250	nfsattrbit_t cbbits;
5251	u_quad_t delegfilerev;
5252
5253	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5254	if (!NFSNONZERO_ATTRBIT(&cbbits))
5255		goto out;
5256
5257	/*
5258	 * Get the lock file structure.
5259	 * (A return of -1 means no associated state, so return ok.)
5260	 */
5261	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5262	NFSLOCKSTATE();
5263	if (!error)
5264		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5265	if (error) {
5266		NFSUNLOCKSTATE();
5267		if (error == -1)
5268			error = 0;
5269		goto out;
5270	}
5271
5272	/*
5273	 * Now, look for a write delegation.
5274	 */
5275	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5276		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5277			break;
5278	}
5279	if (stp == LIST_END(&lfp->lf_deleg)) {
5280		NFSUNLOCKSTATE();
5281		goto out;
5282	}
5283	clp = stp->ls_clp;
5284	delegfilerev = stp->ls_filerev;
5285
5286	/*
5287	 * If the Write delegation was issued as a part of this Compound RPC
5288	 * or if we have an Implied Clientid (used in a previous Op in this
5289	 * compound) and it is the client the delegation was issued to,
5290	 * just return ok.
5291	 * I also assume that it is from the same client iff the network
5292	 * host IP address is the same as the callback address. (Not
5293	 * exactly correct by the RFC, but avoids a lot of Getattr
5294	 * callbacks.)
5295	 */
5296	if (nd->nd_compref == stp->ls_compref ||
5297	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5298	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5299	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5300		NFSUNLOCKSTATE();
5301		goto out;
5302	}
5303
5304	/*
5305	 * We are now done with the delegation state structure,
5306	 * so the statelock can be released and we can now tsleep().
5307	 */
5308
5309	/*
5310	 * Now, we must do the CB Getattr callback, to see if Change or Size
5311	 * has changed.
5312	 */
5313	if (clp->lc_expiry >= NFSD_MONOSEC) {
5314		NFSUNLOCKSTATE();
5315		NFSVNO_ATTRINIT(&nva);
5316		nva.na_filerev = NFS64BITSSET;
5317		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5318		    0, &nfh, &nva, &cbbits, p);
5319		if (!error) {
5320			if ((nva.na_filerev != NFS64BITSSET &&
5321			    nva.na_filerev > delegfilerev) ||
5322			    (NFSVNO_ISSETSIZE(&nva) &&
5323			     nva.na_size != nvap->na_size)) {
5324				error = nfsvno_updfilerev(vp, nvap, cred, p);
5325				if (NFSVNO_ISSETSIZE(&nva))
5326					nvap->na_size = nva.na_size;
5327			}
5328		} else
5329			error = 0;	/* Ignore callback errors for now. */
5330	} else {
5331		NFSUNLOCKSTATE();
5332	}
5333
5334out:
5335	NFSEXITCODE2(error, nd);
5336	return (error);
5337}
5338
5339/*
5340 * This function looks for openowners that haven't had any opens for
5341 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5342 * is set.
5343 */
5344APPLESTATIC void
5345nfsrv_throwawayopens(NFSPROC_T *p)
5346{
5347	struct nfsclient *clp, *nclp;
5348	struct nfsstate *stp, *nstp;
5349	int i;
5350
5351	NFSLOCKSTATE();
5352	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5353	/*
5354	 * For each client...
5355	 */
5356	for (i = 0; i < nfsrv_clienthashsize; i++) {
5357	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5358		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5359			if (LIST_EMPTY(&stp->ls_open) &&
5360			    (stp->ls_noopens > NFSNOOPEN ||
5361			     (nfsrv_openpluslock * 2) >
5362			     nfsrv_v4statelimit))
5363				nfsrv_freeopenowner(stp, 0, p);
5364		}
5365	    }
5366	}
5367	NFSUNLOCKSTATE();
5368}
5369
5370/*
5371 * This function checks to see if the credentials are the same.
5372 * Returns 1 for not same, 0 otherwise.
5373 */
5374static int
5375nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5376{
5377
5378	if (nd->nd_flag & ND_GSS) {
5379		if (!(clp->lc_flags & LCL_GSS))
5380			return (1);
5381		if (clp->lc_flags & LCL_NAME) {
5382			if (nd->nd_princlen != clp->lc_namelen ||
5383			    NFSBCMP(nd->nd_principal, clp->lc_name,
5384				clp->lc_namelen))
5385				return (1);
5386			else
5387				return (0);
5388		}
5389		if (nd->nd_cred->cr_uid == clp->lc_uid)
5390			return (0);
5391		else
5392			return (1);
5393	} else if (clp->lc_flags & LCL_GSS)
5394		return (1);
5395	/*
5396	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5397	 * in RFC3530, which talks about principals, but doesn't say anything
5398	 * about uids for AUTH_SYS.)
5399	 */
5400	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5401		return (0);
5402	else
5403		return (1);
5404}
5405
5406/*
5407 * Calculate the lease expiry time.
5408 */
5409static time_t
5410nfsrv_leaseexpiry(void)
5411{
5412
5413	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5414		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5415	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5416}
5417
5418/*
5419 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5420 */
5421static void
5422nfsrv_delaydelegtimeout(struct nfsstate *stp)
5423{
5424
5425	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5426		return;
5427
5428	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5429	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5430		stp->ls_delegtime += nfsrv_lease;
5431		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5432			stp->ls_delegtime = stp->ls_delegtimelimit;
5433	}
5434}
5435
5436/*
5437 * This function checks to see if there is any other state associated
5438 * with the openowner for this Open.
5439 * It returns 1 if there is no other state, 0 otherwise.
5440 */
5441static int
5442nfsrv_nootherstate(struct nfsstate *stp)
5443{
5444	struct nfsstate *tstp;
5445
5446	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5447		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5448			return (0);
5449	}
5450	return (1);
5451}
5452
5453/*
5454 * Create a list of lock deltas (changes to local byte range locking
5455 * that can be rolled back using the list) and apply the changes via
5456 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5457 * the rollback or update function will be called after this.
5458 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5459 * call fails. If it returns an error, it will unlock the list.
5460 */
5461static int
5462nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5463    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5464{
5465	struct nfslock *lop, *nlop;
5466	int error = 0;
5467
5468	/* Loop through the list of locks. */
5469	lop = LIST_FIRST(&lfp->lf_locallock);
5470	while (first < end && lop != NULL) {
5471		nlop = LIST_NEXT(lop, lo_lckowner);
5472		if (first >= lop->lo_end) {
5473			/* not there yet */
5474			lop = nlop;
5475		} else if (first < lop->lo_first) {
5476			/* new one starts before entry in list */
5477			if (end <= lop->lo_first) {
5478				/* no overlap between old and new */
5479				error = nfsrv_dolocal(vp, lfp, flags,
5480				    NFSLCK_UNLOCK, first, end, cfp, p);
5481				if (error != 0)
5482					break;
5483				first = end;
5484			} else {
5485				/* handle fragment overlapped with new one */
5486				error = nfsrv_dolocal(vp, lfp, flags,
5487				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5488				    p);
5489				if (error != 0)
5490					break;
5491				first = lop->lo_first;
5492			}
5493		} else {
5494			/* new one overlaps this entry in list */
5495			if (end <= lop->lo_end) {
5496				/* overlaps all of new one */
5497				error = nfsrv_dolocal(vp, lfp, flags,
5498				    lop->lo_flags, first, end, cfp, p);
5499				if (error != 0)
5500					break;
5501				first = end;
5502			} else {
5503				/* handle fragment overlapped with new one */
5504				error = nfsrv_dolocal(vp, lfp, flags,
5505				    lop->lo_flags, first, lop->lo_end, cfp, p);
5506				if (error != 0)
5507					break;
5508				first = lop->lo_end;
5509				lop = nlop;
5510			}
5511		}
5512	}
5513	if (first < end && error == 0)
5514		/* handle fragment past end of list */
5515		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5516		    end, cfp, p);
5517
5518	NFSEXITCODE(error);
5519	return (error);
5520}
5521
5522/*
5523 * Local lock unlock. Unlock all byte ranges that are no longer locked
5524 * by NFSv4. To do this, unlock any subranges of first-->end that
5525 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5526 * list. This list has all locks for the file held by other
5527 * <clientid, lockowner> tuples. The list is ordered by increasing
5528 * lo_first value, but may have entries that overlap each other, for
5529 * the case of read locks.
5530 */
5531static void
5532nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5533    uint64_t init_end, NFSPROC_T *p)
5534{
5535	struct nfslock *lop;
5536	uint64_t first, end, prevfirst;
5537
5538	first = init_first;
5539	end = init_end;
5540	while (first < init_end) {
5541		/* Loop through all nfs locks, adjusting first and end */
5542		prevfirst = 0;
5543		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5544			KASSERT(prevfirst <= lop->lo_first,
5545			    ("nfsv4 locks out of order"));
5546			KASSERT(lop->lo_first < lop->lo_end,
5547			    ("nfsv4 bogus lock"));
5548			prevfirst = lop->lo_first;
5549			if (first >= lop->lo_first &&
5550			    first < lop->lo_end)
5551				/*
5552				 * Overlaps with initial part, so trim
5553				 * off that initial part by moving first past
5554				 * it.
5555				 */
5556				first = lop->lo_end;
5557			else if (end > lop->lo_first &&
5558			    lop->lo_first > first) {
5559				/*
5560				 * This lock defines the end of the
5561				 * segment to unlock, so set end to the
5562				 * start of it and break out of the loop.
5563				 */
5564				end = lop->lo_first;
5565				break;
5566			}
5567			if (first >= end)
5568				/*
5569				 * There is no segment left to do, so
5570				 * break out of this loop and then exit
5571				 * the outer while() since first will be set
5572				 * to end, which must equal init_end here.
5573				 */
5574				break;
5575		}
5576		if (first < end) {
5577			/* Unlock this segment */
5578			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5579			    NFSLCK_READ, first, end, NULL, p);
5580			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5581			    first, end);
5582		}
5583		/*
5584		 * Now move past this segment and look for any further
5585		 * segment in the range, if there is one.
5586		 */
5587		first = end;
5588		end = init_end;
5589	}
5590}
5591
5592/*
5593 * Do the local lock operation and update the rollback list, as required.
5594 * Perform the rollback and return the error if nfsvno_advlock() fails.
5595 */
5596static int
5597nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5598    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5599{
5600	struct nfsrollback *rlp;
5601	int error = 0, ltype, oldltype;
5602
5603	if (flags & NFSLCK_WRITE)
5604		ltype = F_WRLCK;
5605	else if (flags & NFSLCK_READ)
5606		ltype = F_RDLCK;
5607	else
5608		ltype = F_UNLCK;
5609	if (oldflags & NFSLCK_WRITE)
5610		oldltype = F_WRLCK;
5611	else if (oldflags & NFSLCK_READ)
5612		oldltype = F_RDLCK;
5613	else
5614		oldltype = F_UNLCK;
5615	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5616		/* nothing to do */
5617		goto out;
5618	error = nfsvno_advlock(vp, ltype, first, end, p);
5619	if (error != 0) {
5620		if (cfp != NULL) {
5621			cfp->cl_clientid.lval[0] = 0;
5622			cfp->cl_clientid.lval[1] = 0;
5623			cfp->cl_first = 0;
5624			cfp->cl_end = NFS64BITSSET;
5625			cfp->cl_flags = NFSLCK_WRITE;
5626			cfp->cl_ownerlen = 5;
5627			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5628		}
5629		nfsrv_locallock_rollback(vp, lfp, p);
5630	} else if (ltype != F_UNLCK) {
5631		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5632		    M_WAITOK);
5633		rlp->rlck_first = first;
5634		rlp->rlck_end = end;
5635		rlp->rlck_type = oldltype;
5636		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5637	}
5638
5639out:
5640	NFSEXITCODE(error);
5641	return (error);
5642}
5643
5644/*
5645 * Roll back local lock changes and free up the rollback list.
5646 */
5647static void
5648nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5649{
5650	struct nfsrollback *rlp, *nrlp;
5651
5652	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5653		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5654		    rlp->rlck_end, p);
5655		free(rlp, M_NFSDROLLBACK);
5656	}
5657	LIST_INIT(&lfp->lf_rollback);
5658}
5659
5660/*
5661 * Update local lock list and delete rollback list (ie now committed to the
5662 * local locks). Most of the work is done by the internal function.
5663 */
5664static void
5665nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5666    uint64_t end)
5667{
5668	struct nfsrollback *rlp, *nrlp;
5669	struct nfslock *new_lop, *other_lop;
5670
5671	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5672	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5673		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5674		    M_WAITOK);
5675	else
5676		other_lop = NULL;
5677	new_lop->lo_flags = flags;
5678	new_lop->lo_first = first;
5679	new_lop->lo_end = end;
5680	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5681	if (new_lop != NULL)
5682		free(new_lop, M_NFSDLOCK);
5683	if (other_lop != NULL)
5684		free(other_lop, M_NFSDLOCK);
5685
5686	/* and get rid of the rollback list */
5687	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5688		free(rlp, M_NFSDROLLBACK);
5689	LIST_INIT(&lfp->lf_rollback);
5690}
5691
5692/*
5693 * Lock the struct nfslockfile for local lock updating.
5694 */
5695static void
5696nfsrv_locklf(struct nfslockfile *lfp)
5697{
5698	int gotlock;
5699
5700	/* lf_usecount ensures *lfp won't be free'd */
5701	lfp->lf_usecount++;
5702	do {
5703		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5704		    NFSSTATEMUTEXPTR, NULL);
5705	} while (gotlock == 0);
5706	lfp->lf_usecount--;
5707}
5708
5709/*
5710 * Unlock the struct nfslockfile after local lock updating.
5711 */
5712static void
5713nfsrv_unlocklf(struct nfslockfile *lfp)
5714{
5715
5716	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5717}
5718
5719/*
5720 * Clear out all state for the NFSv4 server.
5721 * Must be called by a thread that can sleep when no nfsds are running.
5722 */
5723void
5724nfsrv_throwawayallstate(NFSPROC_T *p)
5725{
5726	struct nfsclient *clp, *nclp;
5727	struct nfslockfile *lfp, *nlfp;
5728	int i;
5729
5730	/*
5731	 * For each client, clean out the state and then free the structure.
5732	 */
5733	for (i = 0; i < nfsrv_clienthashsize; i++) {
5734		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5735			nfsrv_cleanclient(clp, p);
5736			nfsrv_freedeleglist(&clp->lc_deleg);
5737			nfsrv_freedeleglist(&clp->lc_olddeleg);
5738			free(clp->lc_stateid, M_NFSDCLIENT);
5739			free(clp, M_NFSDCLIENT);
5740		}
5741	}
5742
5743	/*
5744	 * Also, free up any remaining lock file structures.
5745	 */
5746	for (i = 0; i < nfsrv_lockhashsize; i++) {
5747		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5748			printf("nfsd unload: fnd a lock file struct\n");
5749			nfsrv_freenfslockfile(lfp);
5750		}
5751	}
5752}
5753
5754/*
5755 * Check the sequence# for the session and slot provided as an argument.
5756 * Also, renew the lease if the session will return NFS_OK.
5757 */
5758int
5759nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
5760    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
5761    uint32_t *sflagsp, NFSPROC_T *p)
5762{
5763	struct nfsdsession *sep;
5764	struct nfssessionhash *shp;
5765	int error;
5766	SVCXPRT *savxprt;
5767
5768	shp = NFSSESSIONHASH(nd->nd_sessionid);
5769	NFSLOCKSESSION(shp);
5770	sep = nfsrv_findsession(nd->nd_sessionid);
5771	if (sep == NULL) {
5772		NFSUNLOCKSESSION(shp);
5773		return (NFSERR_BADSESSION);
5774	}
5775	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
5776	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
5777	if (error != 0) {
5778		NFSUNLOCKSESSION(shp);
5779		return (error);
5780	}
5781	if (cache_this != 0)
5782		nd->nd_flag |= ND_SAVEREPLY;
5783	/* Renew the lease. */
5784	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
5785	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
5786	nd->nd_flag |= ND_IMPLIEDCLID;
5787
5788	/*
5789	 * If this session handles the backchannel, save the nd_xprt for this
5790	 * RPC, since this is the one being used.
5791	 */
5792	if (sep->sess_clp->lc_req.nr_client != NULL &&
5793	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
5794		savxprt = sep->sess_cbsess.nfsess_xprt;
5795		SVC_ACQUIRE(nd->nd_xprt);
5796		nd->nd_xprt->xp_p2 =
5797		    sep->sess_clp->lc_req.nr_client->cl_private;
5798		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
5799		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
5800		if (savxprt != NULL)
5801			SVC_RELEASE(savxprt);
5802	}
5803
5804	*sflagsp = 0;
5805	if (sep->sess_clp->lc_req.nr_client == NULL)
5806		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
5807	NFSUNLOCKSESSION(shp);
5808	if (error == NFSERR_EXPIRED) {
5809		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
5810		error = 0;
5811	} else if (error == NFSERR_ADMINREVOKED) {
5812		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
5813		error = 0;
5814	}
5815	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
5816	return (0);
5817}
5818
5819/*
5820 * Check/set reclaim complete for this session/clientid.
5821 */
5822int
5823nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
5824{
5825	struct nfsdsession *sep;
5826	struct nfssessionhash *shp;
5827	int error = 0;
5828
5829	shp = NFSSESSIONHASH(nd->nd_sessionid);
5830	NFSLOCKSTATE();
5831	NFSLOCKSESSION(shp);
5832	sep = nfsrv_findsession(nd->nd_sessionid);
5833	if (sep == NULL) {
5834		NFSUNLOCKSESSION(shp);
5835		NFSUNLOCKSTATE();
5836		return (NFSERR_BADSESSION);
5837	}
5838
5839	/* Check to see if reclaim complete has already happened. */
5840	if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
5841		error = NFSERR_COMPLETEALREADY;
5842	else
5843		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
5844	NFSUNLOCKSESSION(shp);
5845	NFSUNLOCKSTATE();
5846	return (error);
5847}
5848
5849/*
5850 * Cache the reply in a session slot.
5851 */
5852void
5853nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
5854   struct mbuf **m)
5855{
5856	struct nfsdsession *sep;
5857	struct nfssessionhash *shp;
5858
5859	shp = NFSSESSIONHASH(sessionid);
5860	NFSLOCKSESSION(shp);
5861	sep = nfsrv_findsession(sessionid);
5862	if (sep == NULL) {
5863		NFSUNLOCKSESSION(shp);
5864		printf("nfsrv_cache_session: no session\n");
5865		m_freem(*m);
5866		return;
5867	}
5868	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
5869	NFSUNLOCKSESSION(shp);
5870}
5871
5872/*
5873 * Search for a session that matches the sessionid.
5874 */
5875static struct nfsdsession *
5876nfsrv_findsession(uint8_t *sessionid)
5877{
5878	struct nfsdsession *sep;
5879	struct nfssessionhash *shp;
5880
5881	shp = NFSSESSIONHASH(sessionid);
5882	LIST_FOREACH(sep, &shp->list, sess_hash) {
5883		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
5884			break;
5885	}
5886	return (sep);
5887}
5888
5889/*
5890 * Destroy a session.
5891 */
5892int
5893nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
5894{
5895	int error, samesess;
5896
5897	samesess = 0;
5898	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
5899		samesess = 1;
5900		if ((nd->nd_flag & ND_LASTOP) == 0)
5901			return (NFSERR_BADSESSION);
5902	}
5903	error = nfsrv_freesession(NULL, sessionid);
5904	if (error == 0 && samesess != 0)
5905		nd->nd_flag &= ~ND_HASSEQUENCE;
5906	return (error);
5907}
5908
5909/*
5910 * Free up a session structure.
5911 */
5912static int
5913nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
5914{
5915	struct nfssessionhash *shp;
5916	int i;
5917
5918	NFSLOCKSTATE();
5919	if (sep == NULL) {
5920		shp = NFSSESSIONHASH(sessionid);
5921		NFSLOCKSESSION(shp);
5922		sep = nfsrv_findsession(sessionid);
5923	} else {
5924		shp = NFSSESSIONHASH(sep->sess_sessionid);
5925		NFSLOCKSESSION(shp);
5926	}
5927	if (sep != NULL) {
5928		sep->sess_refcnt--;
5929		if (sep->sess_refcnt > 0) {
5930			NFSUNLOCKSESSION(shp);
5931			NFSUNLOCKSTATE();
5932			return (0);
5933		}
5934		LIST_REMOVE(sep, sess_hash);
5935		LIST_REMOVE(sep, sess_list);
5936	}
5937	NFSUNLOCKSESSION(shp);
5938	NFSUNLOCKSTATE();
5939	if (sep == NULL)
5940		return (NFSERR_BADSESSION);
5941	for (i = 0; i < NFSV4_SLOTS; i++)
5942		if (sep->sess_slots[i].nfssl_reply != NULL)
5943			m_freem(sep->sess_slots[i].nfssl_reply);
5944	if (sep->sess_cbsess.nfsess_xprt != NULL)
5945		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
5946	free(sep, M_NFSDSESSION);
5947	return (0);
5948}
5949
5950/*
5951 * Free a stateid.
5952 * RFC5661 says that it should fail when there are associated opens, locks
5953 * or delegations. Since stateids represent opens, I don't see how you can
5954 * free an open stateid (it will be free'd when closed), so this function
5955 * only works for lock stateids (freeing the lock_owner) or delegations.
5956 */
5957int
5958nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5959    NFSPROC_T *p)
5960{
5961	struct nfsclient *clp;
5962	struct nfsstate *stp;
5963	int error;
5964
5965	NFSLOCKSTATE();
5966	/*
5967	 * Look up the stateid
5968	 */
5969	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
5970	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
5971	if (error == 0) {
5972		/* First, check for a delegation. */
5973		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
5974			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
5975			    NFSX_STATEIDOTHER))
5976				break;
5977		}
5978		if (stp != NULL) {
5979			nfsrv_freedeleg(stp);
5980			NFSUNLOCKSTATE();
5981			return (error);
5982		}
5983	}
5984	/* Not a delegation, try for a lock_owner. */
5985	if (error == 0)
5986		error = nfsrv_getstate(clp, stateidp, 0, &stp);
5987	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
5988	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
5989		/* Not a lock_owner stateid. */
5990		error = NFSERR_LOCKSHELD;
5991	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
5992		error = NFSERR_LOCKSHELD;
5993	if (error == 0)
5994		nfsrv_freelockowner(stp, NULL, 0, p);
5995	NFSUNLOCKSTATE();
5996	return (error);
5997}
5998
5999/*
6000 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6001 */
6002static int
6003nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6004    int dont_replycache, struct nfsdsession **sepp)
6005{
6006	struct nfsdsession *sep;
6007	uint32_t *tl, slotseq = 0;
6008	int maxslot, slotpos;
6009	uint8_t sessionid[NFSX_V4SESSIONID];
6010	int error;
6011
6012	error = nfsv4_getcbsession(clp, sepp);
6013	if (error != 0)
6014		return (error);
6015	sep = *sepp;
6016	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
6017	    &slotseq, sessionid);
6018	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6019
6020	/* Build the Sequence arguments. */
6021	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6022	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6023	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6024	nd->nd_slotseq = tl;
6025	*tl++ = txdr_unsigned(slotseq);
6026	*tl++ = txdr_unsigned(slotpos);
6027	*tl++ = txdr_unsigned(maxslot);
6028	if (dont_replycache == 0)
6029		*tl++ = newnfs_true;
6030	else
6031		*tl++ = newnfs_false;
6032	*tl = 0;			/* No referring call list, for now. */
6033	nd->nd_flag |= ND_HASSEQUENCE;
6034	return (0);
6035}
6036
6037/*
6038 * Get a session for the callback.
6039 */
6040static int
6041nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6042{
6043	struct nfsdsession *sep;
6044
6045	NFSLOCKSTATE();
6046	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6047		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6048			break;
6049	}
6050	if (sep == NULL) {
6051		NFSUNLOCKSTATE();
6052		return (NFSERR_BADSESSION);
6053	}
6054	sep->sess_refcnt++;
6055	*sepp = sep;
6056	NFSUNLOCKSTATE();
6057	return (0);
6058}
6059
6060/*
6061 * Free up all backchannel xprts.  This needs to be done when the nfsd threads
6062 * exit, since those transports will all be going away.
6063 * This is only called after all the nfsd threads are done performing RPCs,
6064 * so locking shouldn't be an issue.
6065 */
6066APPLESTATIC void
6067nfsrv_freeallbackchannel_xprts(void)
6068{
6069	struct nfsdsession *sep;
6070	struct nfsclient *clp;
6071	SVCXPRT *xprt;
6072	int i;
6073
6074	for (i = 0; i < nfsrv_clienthashsize; i++) {
6075		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
6076			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6077				xprt = sep->sess_cbsess.nfsess_xprt;
6078				sep->sess_cbsess.nfsess_xprt = NULL;
6079				if (xprt != NULL)
6080					SVC_RELEASE(xprt);
6081			}
6082		}
6083	}
6084}
6085
6086