nfs_nfsdstate.c revision 310303
1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 310303 2016-12-19 22:28:28Z rmacklem $");
30
31#ifndef APPLEKEXT
32#include <fs/nfs/nfsport.h>
33
34struct nfsrv_stablefirst nfsrv_stablefirst;
35int nfsrv_issuedelegs = 0;
36int nfsrv_dolocallocks = 0;
37struct nfsv4lock nfsv4rootfs_lock;
38
39extern int newnfs_numnfsd;
40extern struct nfsstats newnfsstats;
41extern int nfsrv_lease;
42extern struct timeval nfsboottime;
43extern u_int32_t newnfs_true, newnfs_false;
44NFSV4ROOTLOCKMUTEX;
45NFSSTATESPINLOCK;
46
47SYSCTL_DECL(_vfs_nfsd);
48int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
49TUNABLE_INT("vfs.nfsd.statehashsize", &nfsrv_statehashsize);
50SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
51    &nfsrv_statehashsize, 0,
52    "Size of state hash table set via loader.conf");
53
54int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
55TUNABLE_INT("vfs.nfsd.clienthashsize", &nfsrv_clienthashsize);
56SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
57    &nfsrv_clienthashsize, 0,
58    "Size of client hash table set via loader.conf");
59
60int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
61TUNABLE_INT("vfs.nfsd.fhhashsize", &nfsrv_lockhashsize);
62SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
63    &nfsrv_lockhashsize, 0,
64    "Size of file handle hash table set via loader.conf");
65
66int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
67TUNABLE_INT("vfs.nfsd.sessionhashsize", &nfsrv_sessionhashsize);
68SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
69    &nfsrv_sessionhashsize, 0,
70    "Size of session hash table set via loader.conf");
71
72static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
73TUNABLE_INT("vfs.nfsd.v4statelimit", &nfsrv_v4statelimit);
74SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
75    &nfsrv_v4statelimit, 0,
76    "High water limit for NFSv4 opens+locks+delegations");
77
78static int	nfsrv_writedelegifpos = 0;
79SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
80    &nfsrv_writedelegifpos, 0,
81    "Issue a write delegation for read opens if possible");
82
83/*
84 * Hash lists for nfs V4.
85 */
86struct nfsclienthashhead	*nfsclienthash;
87struct nfslockhashhead		*nfslockhash;
88struct nfssessionhash		*nfssessionhash;
89#endif	/* !APPLEKEXT */
90
91static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
92static time_t nfsrvboottime;
93static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
94static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
95static int nfsrv_nogsscallback = 0;
96
97/* local functions */
98static void nfsrv_dumpaclient(struct nfsclient *clp,
99    struct nfsd_dumpclients *dumpp);
100static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
101    NFSPROC_T *p);
102static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
103    NFSPROC_T *p);
104static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
105    NFSPROC_T *p);
106static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
107    int cansleep, NFSPROC_T *p);
108static void nfsrv_freenfslock(struct nfslock *lop);
109static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
110static void nfsrv_freedeleg(struct nfsstate *);
111static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
112    u_int32_t flags, struct nfsstate **stpp);
113static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
114    struct nfsstate **stpp);
115static int nfsrv_getlockfh(vnode_t vp, u_short flags,
116    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
117static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
118    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
119static void nfsrv_insertlock(struct nfslock *new_lop,
120    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
121static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
122    struct nfslock **other_lopp, struct nfslockfile *lfp);
123static int nfsrv_getipnumber(u_char *cp);
124static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
125    nfsv4stateid_t *stateidp, int specialid);
126static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
127    u_int32_t flags);
128static int nfsrv_docallback(struct nfsclient *clp, int procnum,
129    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
130    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
131static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
132    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
133static u_int32_t nfsrv_nextclientindex(void);
134static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
135static void nfsrv_markstable(struct nfsclient *clp);
136static int nfsrv_checkstable(struct nfsclient *clp);
137static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
138    vnode *vp, NFSPROC_T *p);
139static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
140    NFSPROC_T *p, vnode_t vp);
141static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
142    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
143static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
144    struct nfsclient *clp);
145static time_t nfsrv_leaseexpiry(void);
146static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
147static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
148    struct nfsstate *stp, struct nfsrvcache *op);
149static int nfsrv_nootherstate(struct nfsstate *stp);
150static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
151    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
152static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
153    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
154static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
155    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
156    NFSPROC_T *p);
157static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
158    NFSPROC_T *p);
159static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
160    uint64_t first, uint64_t end);
161static void nfsrv_locklf(struct nfslockfile *lfp);
162static void nfsrv_unlocklf(struct nfslockfile *lfp);
163static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
164static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
165static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
166    int dont_replycache, struct nfsdsession **sepp);
167static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
168
169/*
170 * Scan the client list for a match and either return the current one,
171 * create a new entry or return an error.
172 * If returning a non-error, the clp structure must either be linked into
173 * the client list or free'd.
174 */
175APPLESTATIC int
176nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
177    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
178{
179	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
180	int i, error = 0;
181	struct nfsstate *stp, *tstp;
182	struct sockaddr_in *sad, *rad;
183	int zapit = 0, gotit, hasstate = 0, igotlock;
184	static u_int64_t confirm_index = 0;
185
186	/*
187	 * Check for state resource limit exceeded.
188	 */
189	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
190		error = NFSERR_RESOURCE;
191		goto out;
192	}
193
194	if (nfsrv_issuedelegs == 0 ||
195	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
196		/*
197		 * Don't do callbacks when delegations are disabled or
198		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
199		 * If establishing a callback connection is attempted
200		 * when a firewall is blocking the callback path, the
201		 * server may wait too long for the connect attempt to
202		 * succeed during the Open. Some clients, such as Linux,
203		 * may timeout and give up on the Open before the server
204		 * replies. Also, since AUTH_GSS callbacks are not
205		 * yet interoperability tested, they might cause the
206		 * server to crap out, if they get past the Init call to
207		 * the client.
208		 */
209		new_clp->lc_program = 0;
210
211	/* Lock out other nfsd threads */
212	NFSLOCKV4ROOTMUTEX();
213	nfsv4_relref(&nfsv4rootfs_lock);
214	do {
215		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
216		    NFSV4ROOTLOCKMUTEXPTR, NULL);
217	} while (!igotlock);
218	NFSUNLOCKV4ROOTMUTEX();
219
220	/*
221	 * Search for a match in the client list.
222	 */
223	gotit = i = 0;
224	while (i < nfsrv_clienthashsize && !gotit) {
225	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
226		if (new_clp->lc_idlen == clp->lc_idlen &&
227		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
228			gotit = 1;
229			break;
230		}
231	    }
232	    if (gotit == 0)
233		i++;
234	}
235	if (!gotit ||
236	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
237		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
238			/*
239			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
240			 * client is trying to update a confirmed clientid.
241			 */
242			NFSLOCKV4ROOTMUTEX();
243			nfsv4_unlock(&nfsv4rootfs_lock, 1);
244			NFSUNLOCKV4ROOTMUTEX();
245			confirmp->lval[1] = 0;
246			error = NFSERR_NOENT;
247			goto out;
248		}
249		/*
250		 * Get rid of the old one.
251		 */
252		if (i != nfsrv_clienthashsize) {
253			LIST_REMOVE(clp, lc_hash);
254			nfsrv_cleanclient(clp, p);
255			nfsrv_freedeleglist(&clp->lc_deleg);
256			nfsrv_freedeleglist(&clp->lc_olddeleg);
257			zapit = 1;
258		}
259		/*
260		 * Add it after assigning a client id to it.
261		 */
262		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
263		if ((nd->nd_flag & ND_NFSV41) != 0)
264			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
265			    ++confirm_index;
266		else
267			confirmp->qval = new_clp->lc_confirm.qval =
268			    ++confirm_index;
269		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
270		    (u_int32_t)nfsrvboottime;
271		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
272		    nfsrv_nextclientindex();
273		new_clp->lc_stateindex = 0;
274		new_clp->lc_statemaxindex = 0;
275		new_clp->lc_cbref = 0;
276		new_clp->lc_expiry = nfsrv_leaseexpiry();
277		LIST_INIT(&new_clp->lc_open);
278		LIST_INIT(&new_clp->lc_deleg);
279		LIST_INIT(&new_clp->lc_olddeleg);
280		LIST_INIT(&new_clp->lc_session);
281		for (i = 0; i < nfsrv_statehashsize; i++)
282			LIST_INIT(&new_clp->lc_stateid[i]);
283		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
284		    lc_hash);
285		newnfsstats.srvclients++;
286		nfsrv_openpluslock++;
287		nfsrv_clients++;
288		NFSLOCKV4ROOTMUTEX();
289		nfsv4_unlock(&nfsv4rootfs_lock, 1);
290		NFSUNLOCKV4ROOTMUTEX();
291		if (zapit)
292			nfsrv_zapclient(clp, p);
293		*new_clpp = NULL;
294		goto out;
295	}
296
297	/*
298	 * Now, handle the cases where the id is already issued.
299	 */
300	if (nfsrv_notsamecredname(nd, clp)) {
301	    /*
302	     * Check to see if there is expired state that should go away.
303	     */
304	    if (clp->lc_expiry < NFSD_MONOSEC &&
305	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
306		nfsrv_cleanclient(clp, p);
307		nfsrv_freedeleglist(&clp->lc_deleg);
308	    }
309
310	    /*
311	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
312	     * RFC3530 Sec. 8.1.2 last para.
313	     */
314	    if (!LIST_EMPTY(&clp->lc_deleg)) {
315		hasstate = 1;
316	    } else if (LIST_EMPTY(&clp->lc_open)) {
317		hasstate = 0;
318	    } else {
319		hasstate = 0;
320		/* Look for an Open on the OpenOwner */
321		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
322		    if (!LIST_EMPTY(&stp->ls_open)) {
323			hasstate = 1;
324			break;
325		    }
326		}
327	    }
328	    if (hasstate) {
329		/*
330		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
331		 * filling out the correct ipaddr and portnum.
332		 */
333		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
334		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
335		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
336		sad->sin_port = rad->sin_port;
337		NFSLOCKV4ROOTMUTEX();
338		nfsv4_unlock(&nfsv4rootfs_lock, 1);
339		NFSUNLOCKV4ROOTMUTEX();
340		error = NFSERR_CLIDINUSE;
341		goto out;
342	    }
343	}
344
345	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
346		/*
347		 * If the verifier has changed, the client has rebooted
348		 * and a new client id is issued. The old state info
349		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
350		 */
351		LIST_REMOVE(clp, lc_hash);
352		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
353		if ((nd->nd_flag & ND_NFSV41) != 0)
354			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
355			    ++confirm_index;
356		else
357			confirmp->qval = new_clp->lc_confirm.qval =
358			    ++confirm_index;
359		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
360		    nfsrvboottime;
361		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
362		    nfsrv_nextclientindex();
363		new_clp->lc_stateindex = 0;
364		new_clp->lc_statemaxindex = 0;
365		new_clp->lc_cbref = 0;
366		new_clp->lc_expiry = nfsrv_leaseexpiry();
367
368		/*
369		 * Save the state until confirmed.
370		 */
371		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
372		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
373			tstp->ls_clp = new_clp;
374		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
375		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
376			tstp->ls_clp = new_clp;
377		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
378		    ls_list);
379		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
380			tstp->ls_clp = new_clp;
381		for (i = 0; i < nfsrv_statehashsize; i++) {
382			LIST_NEWHEAD(&new_clp->lc_stateid[i],
383			    &clp->lc_stateid[i], ls_hash);
384			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
385				tstp->ls_clp = new_clp;
386		}
387		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
388		    lc_hash);
389		newnfsstats.srvclients++;
390		nfsrv_openpluslock++;
391		nfsrv_clients++;
392		NFSLOCKV4ROOTMUTEX();
393		nfsv4_unlock(&nfsv4rootfs_lock, 1);
394		NFSUNLOCKV4ROOTMUTEX();
395
396		/*
397		 * Must wait until any outstanding callback on the old clp
398		 * completes.
399		 */
400		NFSLOCKSTATE();
401		while (clp->lc_cbref) {
402			clp->lc_flags |= LCL_WAKEUPWANTED;
403			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
404			    "nfsd clp", 10 * hz);
405		}
406		NFSUNLOCKSTATE();
407		nfsrv_zapclient(clp, p);
408		*new_clpp = NULL;
409		goto out;
410	}
411
412	/* For NFSv4.1, mark that we found a confirmed clientid. */
413	if ((nd->nd_flag & ND_NFSV41) != 0) {
414		clientidp->lval[0] = clp->lc_clientid.lval[0];
415		clientidp->lval[1] = clp->lc_clientid.lval[1];
416		confirmp->lval[0] = 0;	/* Ignored by client */
417		confirmp->lval[1] = 1;
418	} else {
419		/*
420		 * id and verifier match, so update the net address info
421		 * and get rid of any existing callback authentication
422		 * handle, so a new one will be acquired.
423		 */
424		LIST_REMOVE(clp, lc_hash);
425		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
426		new_clp->lc_expiry = nfsrv_leaseexpiry();
427		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
428		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
429		    clp->lc_clientid.lval[0];
430		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
431		    clp->lc_clientid.lval[1];
432		new_clp->lc_delegtime = clp->lc_delegtime;
433		new_clp->lc_stateindex = clp->lc_stateindex;
434		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
435		new_clp->lc_cbref = 0;
436		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
437		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
438			tstp->ls_clp = new_clp;
439		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
440		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
441			tstp->ls_clp = new_clp;
442		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
443		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
444			tstp->ls_clp = new_clp;
445		for (i = 0; i < nfsrv_statehashsize; i++) {
446			LIST_NEWHEAD(&new_clp->lc_stateid[i],
447			    &clp->lc_stateid[i], ls_hash);
448			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
449				tstp->ls_clp = new_clp;
450		}
451		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
452		    lc_hash);
453		newnfsstats.srvclients++;
454		nfsrv_openpluslock++;
455		nfsrv_clients++;
456	}
457	NFSLOCKV4ROOTMUTEX();
458	nfsv4_unlock(&nfsv4rootfs_lock, 1);
459	NFSUNLOCKV4ROOTMUTEX();
460
461	if ((nd->nd_flag & ND_NFSV41) == 0) {
462		/*
463		 * Must wait until any outstanding callback on the old clp
464		 * completes.
465		 */
466		NFSLOCKSTATE();
467		while (clp->lc_cbref) {
468			clp->lc_flags |= LCL_WAKEUPWANTED;
469			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
470			    "nfsdclp", 10 * hz);
471		}
472		NFSUNLOCKSTATE();
473		nfsrv_zapclient(clp, p);
474		*new_clpp = NULL;
475	}
476
477out:
478	NFSEXITCODE2(error, nd);
479	return (error);
480}
481
482/*
483 * Check to see if the client id exists and optionally confirm it.
484 */
485APPLESTATIC int
486nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
487    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
488    struct nfsrv_descript *nd, NFSPROC_T *p)
489{
490	struct nfsclient *clp;
491	struct nfsstate *stp;
492	int i;
493	struct nfsclienthashhead *hp;
494	int error = 0, igotlock, doneok;
495	struct nfssessionhash *shp;
496	struct nfsdsession *sep;
497	uint64_t sessid[2];
498	static uint64_t next_sess = 0;
499
500	if (clpp)
501		*clpp = NULL;
502	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
503	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
504		error = NFSERR_STALECLIENTID;
505		goto out;
506	}
507
508	/*
509	 * If called with opflags == CLOPS_RENEW, the State Lock is
510	 * already held. Otherwise, we need to get either that or,
511	 * for the case of Confirm, lock out the nfsd threads.
512	 */
513	if (opflags & CLOPS_CONFIRM) {
514		NFSLOCKV4ROOTMUTEX();
515		nfsv4_relref(&nfsv4rootfs_lock);
516		do {
517			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
518			    NFSV4ROOTLOCKMUTEXPTR, NULL);
519		} while (!igotlock);
520		/*
521		 * Create a new sessionid here, since we need to do it where
522		 * there is a mutex held to serialize update of next_sess.
523		 */
524		if ((nd->nd_flag & ND_NFSV41) != 0) {
525			sessid[0] = ++next_sess;
526			sessid[1] = clientid.qval;
527		}
528		NFSUNLOCKV4ROOTMUTEX();
529	} else if (opflags != CLOPS_RENEW) {
530		NFSLOCKSTATE();
531	}
532
533	/* For NFSv4.1, the clp is acquired from the associated session. */
534	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
535	    opflags == CLOPS_RENEW) {
536		clp = NULL;
537		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
538			shp = NFSSESSIONHASH(nd->nd_sessionid);
539			NFSLOCKSESSION(shp);
540			sep = nfsrv_findsession(nd->nd_sessionid);
541			if (sep != NULL)
542				clp = sep->sess_clp;
543			NFSUNLOCKSESSION(shp);
544		}
545	} else {
546		hp = NFSCLIENTHASH(clientid);
547		LIST_FOREACH(clp, hp, lc_hash) {
548			if (clp->lc_clientid.lval[1] == clientid.lval[1])
549				break;
550		}
551	}
552	if (clp == NULL) {
553		if (opflags & CLOPS_CONFIRM)
554			error = NFSERR_STALECLIENTID;
555		else
556			error = NFSERR_EXPIRED;
557	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
558		/*
559		 * If marked admin revoked, just return the error.
560		 */
561		error = NFSERR_ADMINREVOKED;
562	}
563	if (error) {
564		if (opflags & CLOPS_CONFIRM) {
565			NFSLOCKV4ROOTMUTEX();
566			nfsv4_unlock(&nfsv4rootfs_lock, 1);
567			NFSUNLOCKV4ROOTMUTEX();
568		} else if (opflags != CLOPS_RENEW) {
569			NFSUNLOCKSTATE();
570		}
571		goto out;
572	}
573
574	/*
575	 * Perform any operations specified by the opflags.
576	 */
577	if (opflags & CLOPS_CONFIRM) {
578		if (((nd->nd_flag & ND_NFSV41) != 0 &&
579		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
580		    ((nd->nd_flag & ND_NFSV41) == 0 &&
581		     clp->lc_confirm.qval != confirm.qval))
582			error = NFSERR_STALECLIENTID;
583		else if (nfsrv_notsamecredname(nd, clp))
584			error = NFSERR_CLIDINUSE;
585
586		if (!error) {
587		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
588			LCL_NEEDSCONFIRM) {
589			/*
590			 * Hang onto the delegations (as old delegations)
591			 * for an Open with CLAIM_DELEGATE_PREV unless in
592			 * grace, but get rid of the rest of the state.
593			 */
594			nfsrv_cleanclient(clp, p);
595			nfsrv_freedeleglist(&clp->lc_olddeleg);
596			if (nfsrv_checkgrace(nd, clp, 0)) {
597			    /* In grace, so just delete delegations */
598			    nfsrv_freedeleglist(&clp->lc_deleg);
599			} else {
600			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
601				stp->ls_flags |= NFSLCK_OLDDELEG;
602			    clp->lc_delegtime = NFSD_MONOSEC +
603				nfsrv_lease + NFSRV_LEASEDELTA;
604			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
605				ls_list);
606			}
607			if ((nd->nd_flag & ND_NFSV41) != 0)
608			    clp->lc_program = cbprogram;
609		    }
610		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
611		    if (clp->lc_program)
612			clp->lc_flags |= LCL_NEEDSCBNULL;
613		    /* For NFSv4.1, link the session onto the client. */
614		    if (nsep != NULL) {
615			/* Hold a reference on the xprt for a backchannel. */
616			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
617			    != 0 && clp->lc_req.nr_client == NULL) {
618			    clp->lc_req.nr_client = (struct __rpc_client *)
619				clnt_bck_create(nd->nd_xprt->xp_socket,
620				cbprogram, NFSV4_CBVERS);
621			    if (clp->lc_req.nr_client != NULL) {
622				SVC_ACQUIRE(nd->nd_xprt);
623				nd->nd_xprt->xp_p2 =
624				    clp->lc_req.nr_client->cl_private;
625				/* Disable idle timeout. */
626				nd->nd_xprt->xp_idletimeout = 0;
627				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
628			    } else
629				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
630			}
631			NFSBCOPY(sessid, nsep->sess_sessionid,
632			    NFSX_V4SESSIONID);
633			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
634			    NFSX_V4SESSIONID);
635			shp = NFSSESSIONHASH(nsep->sess_sessionid);
636			NFSLOCKSTATE();
637			NFSLOCKSESSION(shp);
638			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
639			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
640			nsep->sess_clp = clp;
641			NFSUNLOCKSESSION(shp);
642			NFSUNLOCKSTATE();
643		    }
644		}
645	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
646		error = NFSERR_EXPIRED;
647	}
648
649	/*
650	 * If called by the Renew Op, we must check the principal.
651	 */
652	if (!error && (opflags & CLOPS_RENEWOP)) {
653	    if (nfsrv_notsamecredname(nd, clp)) {
654		doneok = 0;
655		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
656		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
657			if ((stp->ls_flags & NFSLCK_OPEN) &&
658			    stp->ls_uid == nd->nd_cred->cr_uid) {
659				doneok = 1;
660				break;
661			}
662		    }
663		}
664		if (!doneok)
665			error = NFSERR_ACCES;
666	    }
667	    if (!error && (clp->lc_flags & LCL_CBDOWN))
668		error = NFSERR_CBPATHDOWN;
669	}
670	if ((!error || error == NFSERR_CBPATHDOWN) &&
671	     (opflags & CLOPS_RENEW)) {
672		clp->lc_expiry = nfsrv_leaseexpiry();
673	}
674	if (opflags & CLOPS_CONFIRM) {
675		NFSLOCKV4ROOTMUTEX();
676		nfsv4_unlock(&nfsv4rootfs_lock, 1);
677		NFSUNLOCKV4ROOTMUTEX();
678	} else if (opflags != CLOPS_RENEW) {
679		NFSUNLOCKSTATE();
680	}
681	if (clpp)
682		*clpp = clp;
683
684out:
685	NFSEXITCODE2(error, nd);
686	return (error);
687}
688
689/*
690 * Perform the NFSv4.1 destroy clientid.
691 */
692int
693nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
694{
695	struct nfsclient *clp;
696	struct nfsclienthashhead *hp;
697	int error = 0, i, igotlock;
698
699	if (nfsrvboottime != clientid.lval[0]) {
700		error = NFSERR_STALECLIENTID;
701		goto out;
702	}
703
704	/* Lock out other nfsd threads */
705	NFSLOCKV4ROOTMUTEX();
706	nfsv4_relref(&nfsv4rootfs_lock);
707	do {
708		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
709		    NFSV4ROOTLOCKMUTEXPTR, NULL);
710	} while (igotlock == 0);
711	NFSUNLOCKV4ROOTMUTEX();
712
713	hp = NFSCLIENTHASH(clientid);
714	LIST_FOREACH(clp, hp, lc_hash) {
715		if (clp->lc_clientid.lval[1] == clientid.lval[1])
716			break;
717	}
718	if (clp == NULL) {
719		NFSLOCKV4ROOTMUTEX();
720		nfsv4_unlock(&nfsv4rootfs_lock, 1);
721		NFSUNLOCKV4ROOTMUTEX();
722		/* Just return ok, since it is gone. */
723		goto out;
724	}
725
726	/* Scan for state on the clientid. */
727	for (i = 0; i < nfsrv_statehashsize; i++)
728		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
729			NFSLOCKV4ROOTMUTEX();
730			nfsv4_unlock(&nfsv4rootfs_lock, 1);
731			NFSUNLOCKV4ROOTMUTEX();
732			error = NFSERR_CLIENTIDBUSY;
733			goto out;
734		}
735	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
736		NFSLOCKV4ROOTMUTEX();
737		nfsv4_unlock(&nfsv4rootfs_lock, 1);
738		NFSUNLOCKV4ROOTMUTEX();
739		error = NFSERR_CLIENTIDBUSY;
740		goto out;
741	}
742
743	/* Destroy the clientid and return ok. */
744	nfsrv_cleanclient(clp, p);
745	nfsrv_freedeleglist(&clp->lc_deleg);
746	nfsrv_freedeleglist(&clp->lc_olddeleg);
747	LIST_REMOVE(clp, lc_hash);
748	NFSLOCKV4ROOTMUTEX();
749	nfsv4_unlock(&nfsv4rootfs_lock, 1);
750	NFSUNLOCKV4ROOTMUTEX();
751	nfsrv_zapclient(clp, p);
752out:
753	NFSEXITCODE2(error, nd);
754	return (error);
755}
756
757/*
758 * Called from the new nfssvc syscall to admin revoke a clientid.
759 * Returns 0 for success, error otherwise.
760 */
761APPLESTATIC int
762nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
763{
764	struct nfsclient *clp = NULL;
765	int i, error = 0;
766	int gotit, igotlock;
767
768	/*
769	 * First, lock out the nfsd so that state won't change while the
770	 * revocation record is being written to the stable storage restart
771	 * file.
772	 */
773	NFSLOCKV4ROOTMUTEX();
774	do {
775		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
776		    NFSV4ROOTLOCKMUTEXPTR, NULL);
777	} while (!igotlock);
778	NFSUNLOCKV4ROOTMUTEX();
779
780	/*
781	 * Search for a match in the client list.
782	 */
783	gotit = i = 0;
784	while (i < nfsrv_clienthashsize && !gotit) {
785	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
786		if (revokep->nclid_idlen == clp->lc_idlen &&
787		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
788			gotit = 1;
789			break;
790		}
791	    }
792	    i++;
793	}
794	if (!gotit) {
795		NFSLOCKV4ROOTMUTEX();
796		nfsv4_unlock(&nfsv4rootfs_lock, 0);
797		NFSUNLOCKV4ROOTMUTEX();
798		error = EPERM;
799		goto out;
800	}
801
802	/*
803	 * Now, write out the revocation record
804	 */
805	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
806	nfsrv_backupstable();
807
808	/*
809	 * and clear out the state, marking the clientid revoked.
810	 */
811	clp->lc_flags &= ~LCL_CALLBACKSON;
812	clp->lc_flags |= LCL_ADMINREVOKED;
813	nfsrv_cleanclient(clp, p);
814	nfsrv_freedeleglist(&clp->lc_deleg);
815	nfsrv_freedeleglist(&clp->lc_olddeleg);
816	NFSLOCKV4ROOTMUTEX();
817	nfsv4_unlock(&nfsv4rootfs_lock, 0);
818	NFSUNLOCKV4ROOTMUTEX();
819
820out:
821	NFSEXITCODE(error);
822	return (error);
823}
824
825/*
826 * Dump out stats for all clients. Called from nfssvc(2), that is used
827 * newnfsstats.
828 */
829APPLESTATIC void
830nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
831{
832	struct nfsclient *clp;
833	int i = 0, cnt = 0;
834
835	/*
836	 * First, get a reference on the nfsv4rootfs_lock so that an
837	 * exclusive lock cannot be acquired while dumping the clients.
838	 */
839	NFSLOCKV4ROOTMUTEX();
840	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
841	NFSUNLOCKV4ROOTMUTEX();
842	NFSLOCKSTATE();
843	/*
844	 * Rattle through the client lists until done.
845	 */
846	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
847	    clp = LIST_FIRST(&nfsclienthash[i]);
848	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
849		nfsrv_dumpaclient(clp, &dumpp[cnt]);
850		cnt++;
851		clp = LIST_NEXT(clp, lc_hash);
852	    }
853	    i++;
854	}
855	if (cnt < maxcnt)
856	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
857	NFSUNLOCKSTATE();
858	NFSLOCKV4ROOTMUTEX();
859	nfsv4_relref(&nfsv4rootfs_lock);
860	NFSUNLOCKV4ROOTMUTEX();
861}
862
863/*
864 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
865 */
866static void
867nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
868{
869	struct nfsstate *stp, *openstp, *lckownstp;
870	struct nfslock *lop;
871	struct sockaddr *sad;
872	struct sockaddr_in *rad;
873	struct sockaddr_in6 *rad6;
874
875	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
876	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
877	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
878	dumpp->ndcl_flags = clp->lc_flags;
879	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
880	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
881	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
882	dumpp->ndcl_addrfam = sad->sa_family;
883	if (sad->sa_family == AF_INET) {
884		rad = (struct sockaddr_in *)sad;
885		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
886	} else {
887		rad6 = (struct sockaddr_in6 *)sad;
888		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
889	}
890
891	/*
892	 * Now, scan the state lists and total up the opens and locks.
893	 */
894	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
895	    dumpp->ndcl_nopenowners++;
896	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
897		dumpp->ndcl_nopens++;
898		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
899		    dumpp->ndcl_nlockowners++;
900		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
901			dumpp->ndcl_nlocks++;
902		    }
903		}
904	    }
905	}
906
907	/*
908	 * and the delegation lists.
909	 */
910	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
911	    dumpp->ndcl_ndelegs++;
912	}
913	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
914	    dumpp->ndcl_nolddelegs++;
915	}
916}
917
918/*
919 * Dump out lock stats for a file.
920 */
921APPLESTATIC void
922nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
923    NFSPROC_T *p)
924{
925	struct nfsstate *stp;
926	struct nfslock *lop;
927	int cnt = 0;
928	struct nfslockfile *lfp;
929	struct sockaddr *sad;
930	struct sockaddr_in *rad;
931	struct sockaddr_in6 *rad6;
932	int ret;
933	fhandle_t nfh;
934
935	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
936	/*
937	 * First, get a reference on the nfsv4rootfs_lock so that an
938	 * exclusive lock on it cannot be acquired while dumping the locks.
939	 */
940	NFSLOCKV4ROOTMUTEX();
941	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
942	NFSUNLOCKV4ROOTMUTEX();
943	NFSLOCKSTATE();
944	if (!ret)
945		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
946	if (ret) {
947		ldumpp[0].ndlck_clid.nclid_idlen = 0;
948		NFSUNLOCKSTATE();
949		NFSLOCKV4ROOTMUTEX();
950		nfsv4_relref(&nfsv4rootfs_lock);
951		NFSUNLOCKV4ROOTMUTEX();
952		return;
953	}
954
955	/*
956	 * For each open share on file, dump it out.
957	 */
958	stp = LIST_FIRST(&lfp->lf_open);
959	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
960		ldumpp[cnt].ndlck_flags = stp->ls_flags;
961		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
962		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
963		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
964		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
965		ldumpp[cnt].ndlck_owner.nclid_idlen =
966		    stp->ls_openowner->ls_ownerlen;
967		NFSBCOPY(stp->ls_openowner->ls_owner,
968		    ldumpp[cnt].ndlck_owner.nclid_id,
969		    stp->ls_openowner->ls_ownerlen);
970		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
971		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
972		    stp->ls_clp->lc_idlen);
973		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
974		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
975		if (sad->sa_family == AF_INET) {
976			rad = (struct sockaddr_in *)sad;
977			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
978		} else {
979			rad6 = (struct sockaddr_in6 *)sad;
980			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
981		}
982		stp = LIST_NEXT(stp, ls_file);
983		cnt++;
984	}
985
986	/*
987	 * and all locks.
988	 */
989	lop = LIST_FIRST(&lfp->lf_lock);
990	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
991		stp = lop->lo_stp;
992		ldumpp[cnt].ndlck_flags = lop->lo_flags;
993		ldumpp[cnt].ndlck_first = lop->lo_first;
994		ldumpp[cnt].ndlck_end = lop->lo_end;
995		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
996		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
997		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
998		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
999		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1000		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1001		    stp->ls_ownerlen);
1002		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1003		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1004		    stp->ls_clp->lc_idlen);
1005		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1006		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1007		if (sad->sa_family == AF_INET) {
1008			rad = (struct sockaddr_in *)sad;
1009			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1010		} else {
1011			rad6 = (struct sockaddr_in6 *)sad;
1012			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1013		}
1014		lop = LIST_NEXT(lop, lo_lckfile);
1015		cnt++;
1016	}
1017
1018	/*
1019	 * and the delegations.
1020	 */
1021	stp = LIST_FIRST(&lfp->lf_deleg);
1022	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1023		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1024		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1025		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1026		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1027		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1028		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1029		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1030		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1031		    stp->ls_clp->lc_idlen);
1032		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1033		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1034		if (sad->sa_family == AF_INET) {
1035			rad = (struct sockaddr_in *)sad;
1036			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1037		} else {
1038			rad6 = (struct sockaddr_in6 *)sad;
1039			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1040		}
1041		stp = LIST_NEXT(stp, ls_file);
1042		cnt++;
1043	}
1044
1045	/*
1046	 * If list isn't full, mark end of list by setting the client name
1047	 * to zero length.
1048	 */
1049	if (cnt < maxcnt)
1050		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1051	NFSUNLOCKSTATE();
1052	NFSLOCKV4ROOTMUTEX();
1053	nfsv4_relref(&nfsv4rootfs_lock);
1054	NFSUNLOCKV4ROOTMUTEX();
1055}
1056
1057/*
1058 * Server timer routine. It can scan any linked list, so long
1059 * as it holds the spin/mutex lock and there is no exclusive lock on
1060 * nfsv4rootfs_lock.
1061 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1062 *  to do this from a callout, since the spin locks work. For
1063 *  Darwin, I'm not sure what will work correctly yet.)
1064 * Should be called once per second.
1065 */
1066APPLESTATIC void
1067nfsrv_servertimer(void)
1068{
1069	struct nfsclient *clp, *nclp;
1070	struct nfsstate *stp, *nstp;
1071	int got_ref, i;
1072
1073	/*
1074	 * Make sure nfsboottime is set. This is used by V3 as well
1075	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1076	 * only used by the V4 server for leases.
1077	 */
1078	if (nfsboottime.tv_sec == 0)
1079		NFSSETBOOTTIME(nfsboottime);
1080
1081	/*
1082	 * If server hasn't started yet, just return.
1083	 */
1084	NFSLOCKSTATE();
1085	if (nfsrv_stablefirst.nsf_eograce == 0) {
1086		NFSUNLOCKSTATE();
1087		return;
1088	}
1089	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1090		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1091		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1092			nfsrv_stablefirst.nsf_flags |=
1093			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1094		NFSUNLOCKSTATE();
1095		return;
1096	}
1097
1098	/*
1099	 * Try and get a reference count on the nfsv4rootfs_lock so that
1100	 * no nfsd thread can acquire an exclusive lock on it before this
1101	 * call is done. If it is already exclusively locked, just return.
1102	 */
1103	NFSLOCKV4ROOTMUTEX();
1104	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1105	NFSUNLOCKV4ROOTMUTEX();
1106	if (got_ref == 0) {
1107		NFSUNLOCKSTATE();
1108		return;
1109	}
1110
1111	/*
1112	 * For each client...
1113	 */
1114	for (i = 0; i < nfsrv_clienthashsize; i++) {
1115	    clp = LIST_FIRST(&nfsclienthash[i]);
1116	    while (clp != LIST_END(&nfsclienthash[i])) {
1117		nclp = LIST_NEXT(clp, lc_hash);
1118		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1119		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1120			 && ((LIST_EMPTY(&clp->lc_deleg)
1121			      && LIST_EMPTY(&clp->lc_open)) ||
1122			     nfsrv_clients > nfsrv_clienthighwater)) ||
1123			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1124			(clp->lc_expiry < NFSD_MONOSEC &&
1125			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1126			/*
1127			 * Lease has expired several nfsrv_lease times ago:
1128			 * PLUS
1129			 *    - no state is associated with it
1130			 *    OR
1131			 *    - above high water mark for number of clients
1132			 *      (nfsrv_clienthighwater should be large enough
1133			 *       that this only occurs when clients fail to
1134			 *       use the same nfs_client_id4.id. Maybe somewhat
1135			 *       higher that the maximum number of clients that
1136			 *       will mount this server?)
1137			 * OR
1138			 * Lease has expired a very long time ago
1139			 * OR
1140			 * Lease has expired PLUS the number of opens + locks
1141			 * has exceeded 90% of capacity
1142			 *
1143			 * --> Mark for expiry. The actual expiry will be done
1144			 *     by an nfsd sometime soon.
1145			 */
1146			clp->lc_flags |= LCL_EXPIREIT;
1147			nfsrv_stablefirst.nsf_flags |=
1148			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1149		    } else {
1150			/*
1151			 * If there are no opens, increment no open tick cnt
1152			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1153			 * otherwise, if there is an open, reset no open time
1154			 * Hopefully, this will avoid excessive re-creation
1155			 * of open owners and subsequent open confirms.
1156			 */
1157			stp = LIST_FIRST(&clp->lc_open);
1158			while (stp != LIST_END(&clp->lc_open)) {
1159				nstp = LIST_NEXT(stp, ls_list);
1160				if (LIST_EMPTY(&stp->ls_open)) {
1161					stp->ls_noopens++;
1162					if (stp->ls_noopens > NFSNOOPEN ||
1163					    (nfsrv_openpluslock * 2) >
1164					    nfsrv_v4statelimit)
1165						nfsrv_stablefirst.nsf_flags |=
1166							NFSNSF_NOOPENS;
1167				} else {
1168					stp->ls_noopens = 0;
1169				}
1170				stp = nstp;
1171			}
1172		    }
1173		}
1174		clp = nclp;
1175	    }
1176	}
1177	NFSUNLOCKSTATE();
1178	NFSLOCKV4ROOTMUTEX();
1179	nfsv4_relref(&nfsv4rootfs_lock);
1180	NFSUNLOCKV4ROOTMUTEX();
1181}
1182
1183/*
1184 * The following set of functions free up the various data structures.
1185 */
1186/*
1187 * Clear out all open/lock state related to this nfsclient.
1188 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1189 * there are no other active nfsd threads.
1190 */
1191APPLESTATIC void
1192nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1193{
1194	struct nfsstate *stp, *nstp;
1195	struct nfsdsession *sep, *nsep;
1196
1197	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1198		nfsrv_freeopenowner(stp, 1, p);
1199	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1200		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1201			(void)nfsrv_freesession(sep, NULL);
1202}
1203
1204/*
1205 * Free a client that has been cleaned. It should also already have been
1206 * removed from the lists.
1207 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1208 *  softclock interrupts are enabled.)
1209 */
1210APPLESTATIC void
1211nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1212{
1213
1214#ifdef notyet
1215	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1216	     (LCL_GSS | LCL_CALLBACKSON) &&
1217	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1218	    clp->lc_handlelen > 0) {
1219		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1220		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1221		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1222			NULL, 0, NULL, NULL, NULL, p);
1223	}
1224#endif
1225	newnfs_disconnect(&clp->lc_req);
1226	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1227	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1228	free(clp->lc_stateid, M_NFSDCLIENT);
1229	free(clp, M_NFSDCLIENT);
1230	NFSLOCKSTATE();
1231	newnfsstats.srvclients--;
1232	nfsrv_openpluslock--;
1233	nfsrv_clients--;
1234	NFSUNLOCKSTATE();
1235}
1236
1237/*
1238 * Free a list of delegation state structures.
1239 * (This function will also free all nfslockfile structures that no
1240 *  longer have associated state.)
1241 */
1242APPLESTATIC void
1243nfsrv_freedeleglist(struct nfsstatehead *sthp)
1244{
1245	struct nfsstate *stp, *nstp;
1246
1247	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1248		nfsrv_freedeleg(stp);
1249	}
1250	LIST_INIT(sthp);
1251}
1252
1253/*
1254 * Free up a delegation.
1255 */
1256static void
1257nfsrv_freedeleg(struct nfsstate *stp)
1258{
1259	struct nfslockfile *lfp;
1260
1261	LIST_REMOVE(stp, ls_hash);
1262	LIST_REMOVE(stp, ls_list);
1263	LIST_REMOVE(stp, ls_file);
1264	lfp = stp->ls_lfp;
1265	if (LIST_EMPTY(&lfp->lf_open) &&
1266	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1267	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1268	    lfp->lf_usecount == 0 &&
1269	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1270		nfsrv_freenfslockfile(lfp);
1271	FREE((caddr_t)stp, M_NFSDSTATE);
1272	newnfsstats.srvdelegates--;
1273	nfsrv_openpluslock--;
1274	nfsrv_delegatecnt--;
1275}
1276
1277/*
1278 * This function frees an open owner and all associated opens.
1279 */
1280static void
1281nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1282{
1283	struct nfsstate *nstp, *tstp;
1284
1285	LIST_REMOVE(stp, ls_list);
1286	/*
1287	 * Now, free all associated opens.
1288	 */
1289	nstp = LIST_FIRST(&stp->ls_open);
1290	while (nstp != LIST_END(&stp->ls_open)) {
1291		tstp = nstp;
1292		nstp = LIST_NEXT(nstp, ls_list);
1293		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1294	}
1295	if (stp->ls_op)
1296		nfsrvd_derefcache(stp->ls_op);
1297	FREE((caddr_t)stp, M_NFSDSTATE);
1298	newnfsstats.srvopenowners--;
1299	nfsrv_openpluslock--;
1300}
1301
1302/*
1303 * This function frees an open (nfsstate open structure) with all associated
1304 * lock_owners and locks. It also frees the nfslockfile structure iff there
1305 * are no other opens on the file.
1306 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1307 */
1308static int
1309nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1310{
1311	struct nfsstate *nstp, *tstp;
1312	struct nfslockfile *lfp;
1313	int ret;
1314
1315	LIST_REMOVE(stp, ls_hash);
1316	LIST_REMOVE(stp, ls_list);
1317	LIST_REMOVE(stp, ls_file);
1318
1319	lfp = stp->ls_lfp;
1320	/*
1321	 * Now, free all lockowners associated with this open.
1322	 */
1323	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1324		nfsrv_freelockowner(tstp, vp, cansleep, p);
1325
1326	/*
1327	 * The nfslockfile is freed here if there are no locks
1328	 * associated with the open.
1329	 * If there are locks associated with the open, the
1330	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1331	 * Acquire the state mutex to avoid races with calls to
1332	 * nfsrv_getlockfile().
1333	 */
1334	if (cansleep != 0)
1335		NFSLOCKSTATE();
1336	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1337	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1338	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1339	    lfp->lf_usecount == 0 &&
1340	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1341		nfsrv_freenfslockfile(lfp);
1342		ret = 1;
1343	} else
1344		ret = 0;
1345	if (cansleep != 0)
1346		NFSUNLOCKSTATE();
1347	FREE((caddr_t)stp, M_NFSDSTATE);
1348	newnfsstats.srvopens--;
1349	nfsrv_openpluslock--;
1350	return (ret);
1351}
1352
1353/*
1354 * Frees a lockowner and all associated locks.
1355 */
1356static void
1357nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1358    NFSPROC_T *p)
1359{
1360
1361	LIST_REMOVE(stp, ls_hash);
1362	LIST_REMOVE(stp, ls_list);
1363	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1364	if (stp->ls_op)
1365		nfsrvd_derefcache(stp->ls_op);
1366	FREE((caddr_t)stp, M_NFSDSTATE);
1367	newnfsstats.srvlockowners--;
1368	nfsrv_openpluslock--;
1369}
1370
1371/*
1372 * Free all the nfs locks on a lockowner.
1373 */
1374static void
1375nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1376    NFSPROC_T *p)
1377{
1378	struct nfslock *lop, *nlop;
1379	struct nfsrollback *rlp, *nrlp;
1380	struct nfslockfile *lfp = NULL;
1381	int gottvp = 0;
1382	vnode_t tvp = NULL;
1383	uint64_t first, end;
1384
1385	if (vp != NULL)
1386		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1387	lop = LIST_FIRST(&stp->ls_lock);
1388	while (lop != LIST_END(&stp->ls_lock)) {
1389		nlop = LIST_NEXT(lop, lo_lckowner);
1390		/*
1391		 * Since all locks should be for the same file, lfp should
1392		 * not change.
1393		 */
1394		if (lfp == NULL)
1395			lfp = lop->lo_lfp;
1396		else if (lfp != lop->lo_lfp)
1397			panic("allnfslocks");
1398		/*
1399		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1400		 * from the file handle. This only occurs when called from
1401		 * nfsrv_cleanclient().
1402		 */
1403		if (gottvp == 0) {
1404			if (nfsrv_dolocallocks == 0)
1405				tvp = NULL;
1406			else if (vp == NULL && cansleep != 0) {
1407				tvp = nfsvno_getvp(&lfp->lf_fh);
1408				NFSVOPUNLOCK(tvp, 0);
1409			} else
1410				tvp = vp;
1411			gottvp = 1;
1412		}
1413
1414		if (tvp != NULL) {
1415			if (cansleep == 0)
1416				panic("allnfs2");
1417			first = lop->lo_first;
1418			end = lop->lo_end;
1419			nfsrv_freenfslock(lop);
1420			nfsrv_localunlock(tvp, lfp, first, end, p);
1421			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1422			    nrlp)
1423				free(rlp, M_NFSDROLLBACK);
1424			LIST_INIT(&lfp->lf_rollback);
1425		} else
1426			nfsrv_freenfslock(lop);
1427		lop = nlop;
1428	}
1429	if (vp == NULL && tvp != NULL)
1430		vrele(tvp);
1431}
1432
1433/*
1434 * Free an nfslock structure.
1435 */
1436static void
1437nfsrv_freenfslock(struct nfslock *lop)
1438{
1439
1440	if (lop->lo_lckfile.le_prev != NULL) {
1441		LIST_REMOVE(lop, lo_lckfile);
1442		newnfsstats.srvlocks--;
1443		nfsrv_openpluslock--;
1444	}
1445	LIST_REMOVE(lop, lo_lckowner);
1446	FREE((caddr_t)lop, M_NFSDLOCK);
1447}
1448
1449/*
1450 * This function frees an nfslockfile structure.
1451 */
1452static void
1453nfsrv_freenfslockfile(struct nfslockfile *lfp)
1454{
1455
1456	LIST_REMOVE(lfp, lf_hash);
1457	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1458}
1459
1460/*
1461 * This function looks up an nfsstate structure via stateid.
1462 */
1463static int
1464nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1465    struct nfsstate **stpp)
1466{
1467	struct nfsstate *stp;
1468	struct nfsstatehead *hp;
1469	int error = 0;
1470
1471	*stpp = NULL;
1472	hp = NFSSTATEHASH(clp, *stateidp);
1473	LIST_FOREACH(stp, hp, ls_hash) {
1474		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1475			NFSX_STATEIDOTHER))
1476			break;
1477	}
1478
1479	/*
1480	 * If no state id in list, return NFSERR_BADSTATEID.
1481	 */
1482	if (stp == LIST_END(hp)) {
1483		error = NFSERR_BADSTATEID;
1484		goto out;
1485	}
1486	*stpp = stp;
1487
1488out:
1489	NFSEXITCODE(error);
1490	return (error);
1491}
1492
1493/*
1494 * This function gets an nfsstate structure via owner string.
1495 */
1496static void
1497nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1498    struct nfsstate **stpp)
1499{
1500	struct nfsstate *stp;
1501
1502	*stpp = NULL;
1503	LIST_FOREACH(stp, hp, ls_list) {
1504		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1505		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1506			*stpp = stp;
1507			return;
1508		}
1509	}
1510}
1511
1512/*
1513 * Lock control function called to update lock status.
1514 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1515 * that one isn't to be created and an NFSERR_xxx for other errors.
1516 * The structures new_stp and new_lop are passed in as pointers that should
1517 * be set to NULL if the structure is used and shouldn't be free'd.
1518 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1519 * never used and can safely be allocated on the stack. For all other
1520 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1521 * in case they are used.
1522 */
1523APPLESTATIC int
1524nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1525    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1526    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1527    __unused struct nfsexstuff *exp,
1528    struct nfsrv_descript *nd, NFSPROC_T *p)
1529{
1530	struct nfslock *lop;
1531	struct nfsstate *new_stp = *new_stpp;
1532	struct nfslock *new_lop = *new_lopp;
1533	struct nfsstate *tstp, *mystp, *nstp;
1534	int specialid = 0;
1535	struct nfslockfile *lfp;
1536	struct nfslock *other_lop = NULL;
1537	struct nfsstate *stp, *lckstp = NULL;
1538	struct nfsclient *clp = NULL;
1539	u_int32_t bits;
1540	int error = 0, haslock = 0, ret, reterr;
1541	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1542	fhandle_t nfh;
1543	uint64_t first, end;
1544	uint32_t lock_flags;
1545
1546	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1547		/*
1548		 * Note the special cases of "all 1s" or "all 0s" stateids and
1549		 * let reads with all 1s go ahead.
1550		 */
1551		if (new_stp->ls_stateid.seqid == 0x0 &&
1552		    new_stp->ls_stateid.other[0] == 0x0 &&
1553		    new_stp->ls_stateid.other[1] == 0x0 &&
1554		    new_stp->ls_stateid.other[2] == 0x0)
1555			specialid = 1;
1556		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1557		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1558		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1559		    new_stp->ls_stateid.other[2] == 0xffffffff)
1560			specialid = 2;
1561	}
1562
1563	/*
1564	 * Check for restart conditions (client and server).
1565	 */
1566	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1567	    &new_stp->ls_stateid, specialid);
1568	if (error)
1569		goto out;
1570
1571	/*
1572	 * Check for state resource limit exceeded.
1573	 */
1574	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1575	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1576		error = NFSERR_RESOURCE;
1577		goto out;
1578	}
1579
1580	/*
1581	 * For the lock case, get another nfslock structure,
1582	 * just in case we need it.
1583	 * Malloc now, before we start sifting through the linked lists,
1584	 * in case we have to wait for memory.
1585	 */
1586tryagain:
1587	if (new_stp->ls_flags & NFSLCK_LOCK)
1588		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1589		    M_NFSDLOCK, M_WAITOK);
1590	filestruct_locked = 0;
1591	reterr = 0;
1592	lfp = NULL;
1593
1594	/*
1595	 * Get the lockfile structure for CFH now, so we can do a sanity
1596	 * check against the stateid, before incrementing the seqid#, since
1597	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1598	 * shouldn't be incremented for this case.
1599	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1600	 * will be handled later.
1601	 * If we are doing Lock/LockU and local locking is enabled, sleep
1602	 * lock the nfslockfile structure.
1603	 */
1604	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1605	NFSLOCKSTATE();
1606	if (getlckret == 0) {
1607		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1608		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1609			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1610			    &lfp, &nfh, 1);
1611			if (getlckret == 0)
1612				filestruct_locked = 1;
1613		} else
1614			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1615			    &lfp, &nfh, 0);
1616	}
1617	if (getlckret != 0 && getlckret != -1)
1618		reterr = getlckret;
1619
1620	if (filestruct_locked != 0) {
1621		LIST_INIT(&lfp->lf_rollback);
1622		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1623			/*
1624			 * For local locking, do the advisory locking now, so
1625			 * that any conflict can be detected. A failure later
1626			 * can be rolled back locally. If an error is returned,
1627			 * struct nfslockfile has been unlocked and any local
1628			 * locking rolled back.
1629			 */
1630			NFSUNLOCKSTATE();
1631			if (vnode_unlocked == 0) {
1632				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1633				vnode_unlocked = 1;
1634				NFSVOPUNLOCK(vp, 0);
1635			}
1636			reterr = nfsrv_locallock(vp, lfp,
1637			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1638			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1639			NFSLOCKSTATE();
1640		}
1641	}
1642
1643	if (specialid == 0) {
1644	    if (new_stp->ls_flags & NFSLCK_TEST) {
1645		/*
1646		 * RFC 3530 does not list LockT as an op that renews a
1647		 * lease, but the concensus seems to be that it is ok
1648		 * for a server to do so.
1649		 */
1650		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1651		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1652
1653		/*
1654		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1655		 * error returns for LockT, just go ahead and test for a lock,
1656		 * since there are no locks for this client, but other locks
1657		 * can conflict. (ie. same client will always be false)
1658		 */
1659		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1660		    error = 0;
1661		lckstp = new_stp;
1662	    } else {
1663	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1664		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1665	      if (error == 0)
1666		/*
1667		 * Look up the stateid
1668		 */
1669		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1670		  new_stp->ls_flags, &stp);
1671	      /*
1672	       * do some sanity checks for an unconfirmed open or a
1673	       * stateid that refers to the wrong file, for an open stateid
1674	       */
1675	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1676		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1677		   (getlckret == 0 && stp->ls_lfp != lfp)))
1678			error = NFSERR_BADSTATEID;
1679	      if (error == 0 &&
1680		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1681		  getlckret == 0 && stp->ls_lfp != lfp)
1682			error = NFSERR_BADSTATEID;
1683
1684	      /*
1685	       * If the lockowner stateid doesn't refer to the same file,
1686	       * I believe that is considered ok, since some clients will
1687	       * only create a single lockowner and use that for all locks
1688	       * on all files.
1689	       * For now, log it as a diagnostic, instead of considering it
1690	       * a BadStateid.
1691	       */
1692	      if (error == 0 && (stp->ls_flags &
1693		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1694		  getlckret == 0 && stp->ls_lfp != lfp) {
1695#ifdef DIAGNOSTIC
1696		  printf("Got a lock statid for different file open\n");
1697#endif
1698		  /*
1699		  error = NFSERR_BADSTATEID;
1700		  */
1701	      }
1702
1703	      if (error == 0) {
1704		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1705			/*
1706			 * If haslock set, we've already checked the seqid.
1707			 */
1708			if (!haslock) {
1709			    if (stp->ls_flags & NFSLCK_OPEN)
1710				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1711				    stp->ls_openowner, new_stp->ls_op);
1712			    else
1713				error = NFSERR_BADSTATEID;
1714			}
1715			if (!error)
1716			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1717			if (lckstp)
1718			    /*
1719			     * I believe this should be an error, but it
1720			     * isn't obvious what NFSERR_xxx would be
1721			     * appropriate, so I'll use NFSERR_INVAL for now.
1722			     */
1723			    error = NFSERR_INVAL;
1724			else
1725			    lckstp = new_stp;
1726		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1727			/*
1728			 * If haslock set, ditto above.
1729			 */
1730			if (!haslock) {
1731			    if (stp->ls_flags & NFSLCK_OPEN)
1732				error = NFSERR_BADSTATEID;
1733			    else
1734				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1735				    stp, new_stp->ls_op);
1736			}
1737			lckstp = stp;
1738		    } else {
1739			lckstp = stp;
1740		    }
1741	      }
1742	      /*
1743	       * If the seqid part of the stateid isn't the same, return
1744	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1745	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1746	       * nfsrv_returnoldstateid is set. (The concensus on the email
1747	       * list was that most clients would prefer to not receive
1748	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1749	       * is what will happen, so I use the nfsrv_returnoldstateid to
1750	       * allow for either server configuration.)
1751	       */
1752	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1753		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1754		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1755		    nfsrv_returnoldstateid)) ||
1756		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1757		    new_stp->ls_stateid.seqid != 0)))
1758		    error = NFSERR_OLDSTATEID;
1759	    }
1760	}
1761
1762	/*
1763	 * Now we can check for grace.
1764	 */
1765	if (!error)
1766		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1767	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1768		nfsrv_checkstable(clp))
1769		error = NFSERR_NOGRACE;
1770	/*
1771	 * If we successfully Reclaimed state, note that.
1772	 */
1773	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1774		nfsrv_markstable(clp);
1775
1776	/*
1777	 * At this point, either error == NFSERR_BADSTATEID or the
1778	 * seqid# has been updated, so we can return any error.
1779	 * If error == 0, there may be an error in:
1780	 *    nd_repstat - Set by the calling function.
1781	 *    reterr - Set above, if getting the nfslockfile structure
1782	 *       or acquiring the local lock failed.
1783	 *    (If both of these are set, nd_repstat should probably be
1784	 *     returned, since that error was detected before this
1785	 *     function call.)
1786	 */
1787	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1788		if (error == 0) {
1789			if (nd->nd_repstat != 0)
1790				error = nd->nd_repstat;
1791			else
1792				error = reterr;
1793		}
1794		if (filestruct_locked != 0) {
1795			/* Roll back local locks. */
1796			NFSUNLOCKSTATE();
1797			if (vnode_unlocked == 0) {
1798				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1799				vnode_unlocked = 1;
1800				NFSVOPUNLOCK(vp, 0);
1801			}
1802			nfsrv_locallock_rollback(vp, lfp, p);
1803			NFSLOCKSTATE();
1804			nfsrv_unlocklf(lfp);
1805		}
1806		NFSUNLOCKSTATE();
1807		goto out;
1808	}
1809
1810	/*
1811	 * Check the nfsrv_getlockfile return.
1812	 * Returned -1 if no structure found.
1813	 */
1814	if (getlckret == -1) {
1815		error = NFSERR_EXPIRED;
1816		/*
1817		 * Called from lockt, so no lock is OK.
1818		 */
1819		if (new_stp->ls_flags & NFSLCK_TEST) {
1820			error = 0;
1821		} else if (new_stp->ls_flags &
1822		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1823			/*
1824			 * Called to check for a lock, OK if the stateid is all
1825			 * 1s or all 0s, but there should be an nfsstate
1826			 * otherwise.
1827			 * (ie. If there is no open, I'll assume no share
1828			 *  deny bits.)
1829			 */
1830			if (specialid)
1831				error = 0;
1832			else
1833				error = NFSERR_BADSTATEID;
1834		}
1835		NFSUNLOCKSTATE();
1836		goto out;
1837	}
1838
1839	/*
1840	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1841	 * For NFSLCK_CHECK, allow a read if write access is granted,
1842	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1843	 * which implies a conflicting deny can't exist.
1844	 */
1845	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1846	    /*
1847	     * Four kinds of state id:
1848	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1849	     * - stateid for an open
1850	     * - stateid for a delegation
1851	     * - stateid for a lock owner
1852	     */
1853	    if (!specialid) {
1854		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1855		    delegation = 1;
1856		    mystp = stp;
1857		    nfsrv_delaydelegtimeout(stp);
1858	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1859		    mystp = stp;
1860		} else {
1861		    mystp = stp->ls_openstp;
1862		}
1863		/*
1864		 * If locking or checking, require correct access
1865		 * bit set.
1866		 */
1867		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1868		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1869		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1870		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1871		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1872		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1873		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1874		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1875		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1876			if (filestruct_locked != 0) {
1877				/* Roll back local locks. */
1878				NFSUNLOCKSTATE();
1879				if (vnode_unlocked == 0) {
1880					ASSERT_VOP_ELOCKED(vp,
1881					    "nfsrv_lockctrl3");
1882					vnode_unlocked = 1;
1883					NFSVOPUNLOCK(vp, 0);
1884				}
1885				nfsrv_locallock_rollback(vp, lfp, p);
1886				NFSLOCKSTATE();
1887				nfsrv_unlocklf(lfp);
1888			}
1889			NFSUNLOCKSTATE();
1890			error = NFSERR_OPENMODE;
1891			goto out;
1892		}
1893	    } else
1894		mystp = NULL;
1895	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1896		/*
1897		 * Check for a conflicting deny bit.
1898		 */
1899		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1900		    if (tstp != mystp) {
1901			bits = tstp->ls_flags;
1902			bits >>= NFSLCK_SHIFT;
1903			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1904			    KASSERT(vnode_unlocked == 0,
1905				("nfsrv_lockctrl: vnode unlocked1"));
1906			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1907				vp, p);
1908			    if (ret == 1) {
1909				/*
1910				* nfsrv_clientconflict unlocks state
1911				 * when it returns non-zero.
1912				 */
1913				lckstp = NULL;
1914				goto tryagain;
1915			    }
1916			    if (ret == 0)
1917				NFSUNLOCKSTATE();
1918			    if (ret == 2)
1919				error = NFSERR_PERM;
1920			    else
1921				error = NFSERR_OPENMODE;
1922			    goto out;
1923			}
1924		    }
1925		}
1926
1927		/* We're outta here */
1928		NFSUNLOCKSTATE();
1929		goto out;
1930	    }
1931	}
1932
1933	/*
1934	 * For setattr, just get rid of all the Delegations for other clients.
1935	 */
1936	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1937		KASSERT(vnode_unlocked == 0,
1938		    ("nfsrv_lockctrl: vnode unlocked2"));
1939		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1940		if (ret) {
1941			/*
1942			 * nfsrv_cleandeleg() unlocks state when it
1943			 * returns non-zero.
1944			 */
1945			if (ret == -1) {
1946				lckstp = NULL;
1947				goto tryagain;
1948			}
1949			error = ret;
1950			goto out;
1951		}
1952		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1953		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1954		     LIST_EMPTY(&lfp->lf_deleg))) {
1955			NFSUNLOCKSTATE();
1956			goto out;
1957		}
1958	}
1959
1960	/*
1961	 * Check for a conflicting delegation. If one is found, call
1962	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1963	 * been set yet, it will get the lock. Otherwise, it will recall
1964	 * the delegation. Then, we try try again...
1965	 * I currently believe the conflict algorithm to be:
1966	 * For Lock Ops (Lock/LockT/LockU)
1967	 * - there is a conflict iff a different client has a write delegation
1968	 * For Reading (Read Op)
1969	 * - there is a conflict iff a different client has a write delegation
1970	 *   (the specialids are always a different client)
1971	 * For Writing (Write/Setattr of size)
1972	 * - there is a conflict if a different client has any delegation
1973	 * - there is a conflict if the same client has a read delegation
1974	 *   (I don't understand why this isn't allowed, but that seems to be
1975	 *    the current concensus?)
1976	 */
1977	tstp = LIST_FIRST(&lfp->lf_deleg);
1978	while (tstp != LIST_END(&lfp->lf_deleg)) {
1979	    nstp = LIST_NEXT(tstp, ls_file);
1980	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1981		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1982		  (new_lop->lo_flags & NFSLCK_READ))) &&
1983		  clp != tstp->ls_clp &&
1984		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1985		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1986		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1987		  (clp != tstp->ls_clp ||
1988		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1989		ret = 0;
1990		if (filestruct_locked != 0) {
1991			/* Roll back local locks. */
1992			NFSUNLOCKSTATE();
1993			if (vnode_unlocked == 0) {
1994				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
1995				NFSVOPUNLOCK(vp, 0);
1996			}
1997			nfsrv_locallock_rollback(vp, lfp, p);
1998			NFSLOCKSTATE();
1999			nfsrv_unlocklf(lfp);
2000			NFSUNLOCKSTATE();
2001			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2002			vnode_unlocked = 0;
2003			if ((vp->v_iflag & VI_DOOMED) != 0)
2004				ret = NFSERR_SERVERFAULT;
2005			NFSLOCKSTATE();
2006		}
2007		if (ret == 0)
2008			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2009		if (ret) {
2010		    /*
2011		     * nfsrv_delegconflict unlocks state when it
2012		     * returns non-zero, which it always does.
2013		     */
2014		    if (other_lop) {
2015			FREE((caddr_t)other_lop, M_NFSDLOCK);
2016			other_lop = NULL;
2017		    }
2018		    if (ret == -1) {
2019			lckstp = NULL;
2020			goto tryagain;
2021		    }
2022		    error = ret;
2023		    goto out;
2024		}
2025		/* Never gets here. */
2026	    }
2027	    tstp = nstp;
2028	}
2029
2030	/*
2031	 * Handle the unlock case by calling nfsrv_updatelock().
2032	 * (Should I have done some access checking above for unlock? For now,
2033	 *  just let it happen.)
2034	 */
2035	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2036		first = new_lop->lo_first;
2037		end = new_lop->lo_end;
2038		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2039		stateidp->seqid = ++(stp->ls_stateid.seqid);
2040		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2041			stateidp->seqid = stp->ls_stateid.seqid = 1;
2042		stateidp->other[0] = stp->ls_stateid.other[0];
2043		stateidp->other[1] = stp->ls_stateid.other[1];
2044		stateidp->other[2] = stp->ls_stateid.other[2];
2045		if (filestruct_locked != 0) {
2046			NFSUNLOCKSTATE();
2047			if (vnode_unlocked == 0) {
2048				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2049				vnode_unlocked = 1;
2050				NFSVOPUNLOCK(vp, 0);
2051			}
2052			/* Update the local locks. */
2053			nfsrv_localunlock(vp, lfp, first, end, p);
2054			NFSLOCKSTATE();
2055			nfsrv_unlocklf(lfp);
2056		}
2057		NFSUNLOCKSTATE();
2058		goto out;
2059	}
2060
2061	/*
2062	 * Search for a conflicting lock. A lock conflicts if:
2063	 * - the lock range overlaps and
2064	 * - at least one lock is a write lock and
2065	 * - it is not owned by the same lock owner
2066	 */
2067	if (!delegation) {
2068	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2069	    if (new_lop->lo_end > lop->lo_first &&
2070		new_lop->lo_first < lop->lo_end &&
2071		(new_lop->lo_flags == NFSLCK_WRITE ||
2072		 lop->lo_flags == NFSLCK_WRITE) &&
2073		lckstp != lop->lo_stp &&
2074		(clp != lop->lo_stp->ls_clp ||
2075		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2076		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2077		    lckstp->ls_ownerlen))) {
2078		if (other_lop) {
2079		    FREE((caddr_t)other_lop, M_NFSDLOCK);
2080		    other_lop = NULL;
2081		}
2082		if (vnode_unlocked != 0)
2083		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2084			NULL, p);
2085		else
2086		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2087			vp, p);
2088		if (ret == 1) {
2089		    if (filestruct_locked != 0) {
2090			if (vnode_unlocked == 0) {
2091				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2092				NFSVOPUNLOCK(vp, 0);
2093			}
2094			/* Roll back local locks. */
2095			nfsrv_locallock_rollback(vp, lfp, p);
2096			NFSLOCKSTATE();
2097			nfsrv_unlocklf(lfp);
2098			NFSUNLOCKSTATE();
2099			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2100			vnode_unlocked = 0;
2101			if ((vp->v_iflag & VI_DOOMED) != 0) {
2102				error = NFSERR_SERVERFAULT;
2103				goto out;
2104			}
2105		    }
2106		    /*
2107		     * nfsrv_clientconflict() unlocks state when it
2108		     * returns non-zero.
2109		     */
2110		    lckstp = NULL;
2111		    goto tryagain;
2112		}
2113		/*
2114		 * Found a conflicting lock, so record the conflict and
2115		 * return the error.
2116		 */
2117		if (cfp != NULL && ret == 0) {
2118		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2119		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2120		    cfp->cl_first = lop->lo_first;
2121		    cfp->cl_end = lop->lo_end;
2122		    cfp->cl_flags = lop->lo_flags;
2123		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2124		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2125			cfp->cl_ownerlen);
2126		}
2127		if (ret == 2)
2128		    error = NFSERR_PERM;
2129		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2130		    error = NFSERR_RECLAIMCONFLICT;
2131		else if (new_stp->ls_flags & NFSLCK_CHECK)
2132		    error = NFSERR_LOCKED;
2133		else
2134		    error = NFSERR_DENIED;
2135		if (filestruct_locked != 0 && ret == 0) {
2136			/* Roll back local locks. */
2137			NFSUNLOCKSTATE();
2138			if (vnode_unlocked == 0) {
2139				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2140				vnode_unlocked = 1;
2141				NFSVOPUNLOCK(vp, 0);
2142			}
2143			nfsrv_locallock_rollback(vp, lfp, p);
2144			NFSLOCKSTATE();
2145			nfsrv_unlocklf(lfp);
2146		}
2147		if (ret == 0)
2148			NFSUNLOCKSTATE();
2149		goto out;
2150	    }
2151	  }
2152	}
2153
2154	/*
2155	 * We only get here if there was no lock that conflicted.
2156	 */
2157	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2158		NFSUNLOCKSTATE();
2159		goto out;
2160	}
2161
2162	/*
2163	 * We only get here when we are creating or modifying a lock.
2164	 * There are two variants:
2165	 * - exist_lock_owner where lock_owner exists
2166	 * - open_to_lock_owner with new lock_owner
2167	 */
2168	first = new_lop->lo_first;
2169	end = new_lop->lo_end;
2170	lock_flags = new_lop->lo_flags;
2171	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2172		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2173		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2174		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2175			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2176		stateidp->other[0] = lckstp->ls_stateid.other[0];
2177		stateidp->other[1] = lckstp->ls_stateid.other[1];
2178		stateidp->other[2] = lckstp->ls_stateid.other[2];
2179	} else {
2180		/*
2181		 * The new open_to_lock_owner case.
2182		 * Link the new nfsstate into the lists.
2183		 */
2184		new_stp->ls_seq = new_stp->ls_opentolockseq;
2185		nfsrvd_refcache(new_stp->ls_op);
2186		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2187		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2188		    clp->lc_clientid.lval[0];
2189		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2190		    clp->lc_clientid.lval[1];
2191		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2192		    nfsrv_nextstateindex(clp);
2193		new_stp->ls_clp = clp;
2194		LIST_INIT(&new_stp->ls_lock);
2195		new_stp->ls_openstp = stp;
2196		new_stp->ls_lfp = lfp;
2197		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2198		    lfp);
2199		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2200		    new_stp, ls_hash);
2201		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2202		*new_lopp = NULL;
2203		*new_stpp = NULL;
2204		newnfsstats.srvlockowners++;
2205		nfsrv_openpluslock++;
2206	}
2207	if (filestruct_locked != 0) {
2208		NFSUNLOCKSTATE();
2209		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2210		NFSLOCKSTATE();
2211		nfsrv_unlocklf(lfp);
2212	}
2213	NFSUNLOCKSTATE();
2214
2215out:
2216	if (haslock) {
2217		NFSLOCKV4ROOTMUTEX();
2218		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2219		NFSUNLOCKV4ROOTMUTEX();
2220	}
2221	if (vnode_unlocked != 0) {
2222		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2223		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2224			error = NFSERR_SERVERFAULT;
2225	}
2226	if (other_lop)
2227		FREE((caddr_t)other_lop, M_NFSDLOCK);
2228	NFSEXITCODE2(error, nd);
2229	return (error);
2230}
2231
2232/*
2233 * Check for state errors for Open.
2234 * repstat is passed back out as an error if more critical errors
2235 * are not detected.
2236 */
2237APPLESTATIC int
2238nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2239    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2240    NFSPROC_T *p, int repstat)
2241{
2242	struct nfsstate *stp, *nstp;
2243	struct nfsclient *clp;
2244	struct nfsstate *ownerstp;
2245	struct nfslockfile *lfp, *new_lfp;
2246	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2247
2248	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2249		readonly = 1;
2250	/*
2251	 * Check for restart conditions (client and server).
2252	 */
2253	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2254		&new_stp->ls_stateid, 0);
2255	if (error)
2256		goto out;
2257
2258	/*
2259	 * Check for state resource limit exceeded.
2260	 * Technically this should be SMP protected, but the worst
2261	 * case error is "out by one or two" on the count when it
2262	 * returns NFSERR_RESOURCE and the limit is just a rather
2263	 * arbitrary high water mark, so no harm is done.
2264	 */
2265	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2266		error = NFSERR_RESOURCE;
2267		goto out;
2268	}
2269
2270tryagain:
2271	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2272	    M_NFSDLOCKFILE, M_WAITOK);
2273	if (vp)
2274		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2275		    NULL, p);
2276	NFSLOCKSTATE();
2277	/*
2278	 * Get the nfsclient structure.
2279	 */
2280	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2281	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2282
2283	/*
2284	 * Look up the open owner. See if it needs confirmation and
2285	 * check the seq#, as required.
2286	 */
2287	if (!error)
2288		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2289
2290	if (!error && ownerstp) {
2291		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2292		    new_stp->ls_op);
2293		/*
2294		 * If the OpenOwner hasn't been confirmed, assume the
2295		 * old one was a replay and this one is ok.
2296		 * See: RFC3530 Sec. 14.2.18.
2297		 */
2298		if (error == NFSERR_BADSEQID &&
2299		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2300			error = 0;
2301	}
2302
2303	/*
2304	 * Check for grace.
2305	 */
2306	if (!error)
2307		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2308	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2309		nfsrv_checkstable(clp))
2310		error = NFSERR_NOGRACE;
2311
2312	/*
2313	 * If none of the above errors occurred, let repstat be
2314	 * returned.
2315	 */
2316	if (repstat && !error)
2317		error = repstat;
2318	if (error) {
2319		NFSUNLOCKSTATE();
2320		if (haslock) {
2321			NFSLOCKV4ROOTMUTEX();
2322			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2323			NFSUNLOCKV4ROOTMUTEX();
2324		}
2325		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2326		goto out;
2327	}
2328
2329	/*
2330	 * If vp == NULL, the file doesn't exist yet, so return ok.
2331	 * (This always happens on the first pass, so haslock must be 0.)
2332	 */
2333	if (vp == NULL) {
2334		NFSUNLOCKSTATE();
2335		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2336		goto out;
2337	}
2338
2339	/*
2340	 * Get the structure for the underlying file.
2341	 */
2342	if (getfhret)
2343		error = getfhret;
2344	else
2345		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2346		    NULL, 0);
2347	if (new_lfp)
2348		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2349	if (error) {
2350		NFSUNLOCKSTATE();
2351		if (haslock) {
2352			NFSLOCKV4ROOTMUTEX();
2353			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2354			NFSUNLOCKV4ROOTMUTEX();
2355		}
2356		goto out;
2357	}
2358
2359	/*
2360	 * Search for a conflicting open/share.
2361	 */
2362	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2363	    /*
2364	     * For Delegate_Cur, search for the matching Delegation,
2365	     * which indicates no conflict.
2366	     * An old delegation should have been recovered by the
2367	     * client doing a Claim_DELEGATE_Prev, so I won't let
2368	     * it match and return NFSERR_EXPIRED. Should I let it
2369	     * match?
2370	     */
2371	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2372		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2373		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2374		    stateidp->seqid == 0) ||
2375		    stateidp->seqid == stp->ls_stateid.seqid) &&
2376		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2377			  NFSX_STATEIDOTHER))
2378			break;
2379	    }
2380	    if (stp == LIST_END(&lfp->lf_deleg) ||
2381		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2382		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2383		NFSUNLOCKSTATE();
2384		if (haslock) {
2385			NFSLOCKV4ROOTMUTEX();
2386			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2387			NFSUNLOCKV4ROOTMUTEX();
2388		}
2389		error = NFSERR_EXPIRED;
2390		goto out;
2391	    }
2392	}
2393
2394	/*
2395	 * Check for access/deny bit conflicts. I check for the same
2396	 * owner as well, in case the client didn't bother.
2397	 */
2398	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2399		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2400		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2401		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2402		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2403		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2404			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2405			if (ret == 1) {
2406				/*
2407				 * nfsrv_clientconflict() unlocks
2408				 * state when it returns non-zero.
2409				 */
2410				goto tryagain;
2411			}
2412			if (ret == 2)
2413				error = NFSERR_PERM;
2414			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2415				error = NFSERR_RECLAIMCONFLICT;
2416			else
2417				error = NFSERR_SHAREDENIED;
2418			if (ret == 0)
2419				NFSUNLOCKSTATE();
2420			if (haslock) {
2421				NFSLOCKV4ROOTMUTEX();
2422				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2423				NFSUNLOCKV4ROOTMUTEX();
2424			}
2425			goto out;
2426		}
2427	}
2428
2429	/*
2430	 * Check for a conflicting delegation. If one is found, call
2431	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2432	 * been set yet, it will get the lock. Otherwise, it will recall
2433	 * the delegation. Then, we try try again...
2434	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2435	 *  isn't a conflict.)
2436	 * I currently believe the conflict algorithm to be:
2437	 * For Open with Read Access and Deny None
2438	 * - there is a conflict iff a different client has a write delegation
2439	 * For Open with other Write Access or any Deny except None
2440	 * - there is a conflict if a different client has any delegation
2441	 * - there is a conflict if the same client has a read delegation
2442	 *   (The current concensus is that this last case should be
2443	 *    considered a conflict since the client with a read delegation
2444	 *    could have done an Open with ReadAccess and WriteDeny
2445	 *    locally and then not have checked for the WriteDeny.)
2446	 * Don't check for a Reclaim, since that will be dealt with
2447	 * by nfsrv_openctrl().
2448	 */
2449	if (!(new_stp->ls_flags &
2450		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2451	    stp = LIST_FIRST(&lfp->lf_deleg);
2452	    while (stp != LIST_END(&lfp->lf_deleg)) {
2453		nstp = LIST_NEXT(stp, ls_file);
2454		if ((readonly && stp->ls_clp != clp &&
2455		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2456		    (!readonly && (stp->ls_clp != clp ||
2457		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2458			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2459			if (ret) {
2460			    /*
2461			     * nfsrv_delegconflict() unlocks state
2462			     * when it returns non-zero.
2463			     */
2464			    if (ret == -1)
2465				goto tryagain;
2466			    error = ret;
2467			    goto out;
2468			}
2469		}
2470		stp = nstp;
2471	    }
2472	}
2473	NFSUNLOCKSTATE();
2474	if (haslock) {
2475		NFSLOCKV4ROOTMUTEX();
2476		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2477		NFSUNLOCKV4ROOTMUTEX();
2478	}
2479
2480out:
2481	NFSEXITCODE2(error, nd);
2482	return (error);
2483}
2484
2485/*
2486 * Open control function to create/update open state for an open.
2487 */
2488APPLESTATIC int
2489nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2490    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2491    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2492    NFSPROC_T *p, u_quad_t filerev)
2493{
2494	struct nfsstate *new_stp = *new_stpp;
2495	struct nfsstate *stp, *nstp;
2496	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2497	struct nfslockfile *lfp, *new_lfp;
2498	struct nfsclient *clp;
2499	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2500	int readonly = 0, cbret = 1, getfhret = 0;
2501	int gotstate = 0, len = 0;
2502	u_char *clidp = NULL;
2503
2504	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2505		readonly = 1;
2506	/*
2507	 * Check for restart conditions (client and server).
2508	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2509	 * If an error does show up, return NFSERR_EXPIRED, since the
2510	 * the seqid# has already been incremented.
2511	 */
2512	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2513	    &new_stp->ls_stateid, 0);
2514	if (error) {
2515		printf("Nfsd: openctrl unexpected restart err=%d\n",
2516		    error);
2517		error = NFSERR_EXPIRED;
2518		goto out;
2519	}
2520
2521	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2522tryagain:
2523	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2524	    M_NFSDLOCKFILE, M_WAITOK);
2525	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2526	    M_NFSDSTATE, M_WAITOK);
2527	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2528	    M_NFSDSTATE, M_WAITOK);
2529	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2530	    NULL, p);
2531	NFSLOCKSTATE();
2532	/*
2533	 * Get the client structure. Since the linked lists could be changed
2534	 * by other nfsd processes if this process does a tsleep(), one of
2535	 * two things must be done.
2536	 * 1 - don't tsleep()
2537	 * or
2538	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2539	 *     before using the lists, since this lock stops the other
2540	 *     nfsd. This should only be used for rare cases, since it
2541	 *     essentially single threads the nfsd.
2542	 *     At this time, it is only done for cases where the stable
2543	 *     storage file must be written prior to completion of state
2544	 *     expiration.
2545	 */
2546	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2547	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2548	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2549	    clp->lc_program) {
2550		/*
2551		 * This happens on the first open for a client
2552		 * that supports callbacks.
2553		 */
2554		NFSUNLOCKSTATE();
2555		/*
2556		 * Although nfsrv_docallback() will sleep, clp won't
2557		 * go away, since they are only removed when the
2558		 * nfsv4_lock() has blocked the nfsd threads. The
2559		 * fields in clp can change, but having multiple
2560		 * threads do this Null callback RPC should be
2561		 * harmless.
2562		 */
2563		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2564		    NULL, 0, NULL, NULL, NULL, p);
2565		NFSLOCKSTATE();
2566		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2567		if (!cbret)
2568			clp->lc_flags |= LCL_CALLBACKSON;
2569	}
2570
2571	/*
2572	 * Look up the open owner. See if it needs confirmation and
2573	 * check the seq#, as required.
2574	 */
2575	if (!error)
2576		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2577
2578	if (error) {
2579		NFSUNLOCKSTATE();
2580		printf("Nfsd: openctrl unexpected state err=%d\n",
2581			error);
2582		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2583		free((caddr_t)new_open, M_NFSDSTATE);
2584		free((caddr_t)new_deleg, M_NFSDSTATE);
2585		if (haslock) {
2586			NFSLOCKV4ROOTMUTEX();
2587			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2588			NFSUNLOCKV4ROOTMUTEX();
2589		}
2590		error = NFSERR_EXPIRED;
2591		goto out;
2592	}
2593
2594	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2595		nfsrv_markstable(clp);
2596
2597	/*
2598	 * Get the structure for the underlying file.
2599	 */
2600	if (getfhret)
2601		error = getfhret;
2602	else
2603		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2604		    NULL, 0);
2605	if (new_lfp)
2606		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2607	if (error) {
2608		NFSUNLOCKSTATE();
2609		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2610		    error);
2611		free((caddr_t)new_open, M_NFSDSTATE);
2612		free((caddr_t)new_deleg, M_NFSDSTATE);
2613		if (haslock) {
2614			NFSLOCKV4ROOTMUTEX();
2615			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2616			NFSUNLOCKV4ROOTMUTEX();
2617		}
2618		goto out;
2619	}
2620
2621	/*
2622	 * Search for a conflicting open/share.
2623	 */
2624	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2625	    /*
2626	     * For Delegate_Cur, search for the matching Delegation,
2627	     * which indicates no conflict.
2628	     * An old delegation should have been recovered by the
2629	     * client doing a Claim_DELEGATE_Prev, so I won't let
2630	     * it match and return NFSERR_EXPIRED. Should I let it
2631	     * match?
2632	     */
2633	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2634		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2635		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2636		    stateidp->seqid == 0) ||
2637		    stateidp->seqid == stp->ls_stateid.seqid) &&
2638		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2639			NFSX_STATEIDOTHER))
2640			break;
2641	    }
2642	    if (stp == LIST_END(&lfp->lf_deleg) ||
2643		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2644		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2645		NFSUNLOCKSTATE();
2646		printf("Nfsd openctrl unexpected expiry\n");
2647		free((caddr_t)new_open, M_NFSDSTATE);
2648		free((caddr_t)new_deleg, M_NFSDSTATE);
2649		if (haslock) {
2650			NFSLOCKV4ROOTMUTEX();
2651			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2652			NFSUNLOCKV4ROOTMUTEX();
2653		}
2654		error = NFSERR_EXPIRED;
2655		goto out;
2656	    }
2657
2658	    /*
2659	     * Don't issue a Delegation, since one already exists and
2660	     * delay delegation timeout, as required.
2661	     */
2662	    delegate = 0;
2663	    nfsrv_delaydelegtimeout(stp);
2664	}
2665
2666	/*
2667	 * Check for access/deny bit conflicts. I also check for the
2668	 * same owner, since the client might not have bothered to check.
2669	 * Also, note an open for the same file and owner, if found,
2670	 * which is all we do here for Delegate_Cur, since conflict
2671	 * checking is already done.
2672	 */
2673	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2674		if (ownerstp && stp->ls_openowner == ownerstp)
2675			openstp = stp;
2676		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2677		    /*
2678		     * If another client has the file open, the only
2679		     * delegation that can be issued is a Read delegation
2680		     * and only if it is a Read open with Deny none.
2681		     */
2682		    if (clp != stp->ls_clp) {
2683			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2684			    NFSLCK_READACCESS)
2685			    writedeleg = 0;
2686			else
2687			    delegate = 0;
2688		    }
2689		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2690		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2691		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2692		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2693			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2694			if (ret == 1) {
2695				/*
2696				 * nfsrv_clientconflict() unlocks state
2697				 * when it returns non-zero.
2698				 */
2699				free((caddr_t)new_open, M_NFSDSTATE);
2700				free((caddr_t)new_deleg, M_NFSDSTATE);
2701				openstp = NULL;
2702				goto tryagain;
2703			}
2704			if (ret == 2)
2705				error = NFSERR_PERM;
2706			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2707				error = NFSERR_RECLAIMCONFLICT;
2708			else
2709				error = NFSERR_SHAREDENIED;
2710			if (ret == 0)
2711				NFSUNLOCKSTATE();
2712			if (haslock) {
2713				NFSLOCKV4ROOTMUTEX();
2714				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2715				NFSUNLOCKV4ROOTMUTEX();
2716			}
2717			free((caddr_t)new_open, M_NFSDSTATE);
2718			free((caddr_t)new_deleg, M_NFSDSTATE);
2719			printf("nfsd openctrl unexpected client cnfl\n");
2720			goto out;
2721		    }
2722		}
2723	}
2724
2725	/*
2726	 * Check for a conflicting delegation. If one is found, call
2727	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2728	 * been set yet, it will get the lock. Otherwise, it will recall
2729	 * the delegation. Then, we try try again...
2730	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2731	 *  isn't a conflict.)
2732	 * I currently believe the conflict algorithm to be:
2733	 * For Open with Read Access and Deny None
2734	 * - there is a conflict iff a different client has a write delegation
2735	 * For Open with other Write Access or any Deny except None
2736	 * - there is a conflict if a different client has any delegation
2737	 * - there is a conflict if the same client has a read delegation
2738	 *   (The current concensus is that this last case should be
2739	 *    considered a conflict since the client with a read delegation
2740	 *    could have done an Open with ReadAccess and WriteDeny
2741	 *    locally and then not have checked for the WriteDeny.)
2742	 */
2743	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2744	    stp = LIST_FIRST(&lfp->lf_deleg);
2745	    while (stp != LIST_END(&lfp->lf_deleg)) {
2746		nstp = LIST_NEXT(stp, ls_file);
2747		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2748			writedeleg = 0;
2749		else
2750			delegate = 0;
2751		if ((readonly && stp->ls_clp != clp &&
2752		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2753		    (!readonly && (stp->ls_clp != clp ||
2754		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2755		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2756			delegate = 2;
2757		    } else {
2758			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2759			if (ret) {
2760			    /*
2761			     * nfsrv_delegconflict() unlocks state
2762			     * when it returns non-zero.
2763			     */
2764			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2765			    free((caddr_t)new_open, M_NFSDSTATE);
2766			    free((caddr_t)new_deleg, M_NFSDSTATE);
2767			    if (ret == -1) {
2768				openstp = NULL;
2769				goto tryagain;
2770			    }
2771			    error = ret;
2772			    goto out;
2773			}
2774		    }
2775		}
2776		stp = nstp;
2777	    }
2778	}
2779
2780	/*
2781	 * We only get here if there was no open that conflicted.
2782	 * If an open for the owner exists, or in the access/deny bits.
2783	 * Otherwise it is a new open. If the open_owner hasn't been
2784	 * confirmed, replace the open with the new one needing confirmation,
2785	 * otherwise add the open.
2786	 */
2787	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2788	    /*
2789	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2790	     * a match. If found, just move the old delegation to the current
2791	     * delegation list and issue open. If not found, return
2792	     * NFSERR_EXPIRED.
2793	     */
2794	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2795		if (stp->ls_lfp == lfp) {
2796		    /* Found it */
2797		    if (stp->ls_clp != clp)
2798			panic("olddeleg clp");
2799		    LIST_REMOVE(stp, ls_list);
2800		    LIST_REMOVE(stp, ls_hash);
2801		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2802		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2803		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2804			clp->lc_clientid.lval[0];
2805		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2806			clp->lc_clientid.lval[1];
2807		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2808			nfsrv_nextstateindex(clp);
2809		    stp->ls_compref = nd->nd_compref;
2810		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2811		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2812			stp->ls_stateid), stp, ls_hash);
2813		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2814			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2815		    else
2816			*rflagsp |= NFSV4OPEN_READDELEGATE;
2817		    clp->lc_delegtime = NFSD_MONOSEC +
2818			nfsrv_lease + NFSRV_LEASEDELTA;
2819
2820		    /*
2821		     * Now, do the associated open.
2822		     */
2823		    new_open->ls_stateid.seqid = 1;
2824		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2825		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2826		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2827		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2828			NFSLCK_OPEN;
2829		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2830			new_open->ls_flags |= (NFSLCK_READACCESS |
2831			    NFSLCK_WRITEACCESS);
2832		    else
2833			new_open->ls_flags |= NFSLCK_READACCESS;
2834		    new_open->ls_uid = new_stp->ls_uid;
2835		    new_open->ls_lfp = lfp;
2836		    new_open->ls_clp = clp;
2837		    LIST_INIT(&new_open->ls_open);
2838		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2839		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2840			new_open, ls_hash);
2841		    /*
2842		     * and handle the open owner
2843		     */
2844		    if (ownerstp) {
2845			new_open->ls_openowner = ownerstp;
2846			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2847		    } else {
2848			new_open->ls_openowner = new_stp;
2849			new_stp->ls_flags = 0;
2850			nfsrvd_refcache(new_stp->ls_op);
2851			new_stp->ls_noopens = 0;
2852			LIST_INIT(&new_stp->ls_open);
2853			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2854			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2855			*new_stpp = NULL;
2856			newnfsstats.srvopenowners++;
2857			nfsrv_openpluslock++;
2858		    }
2859		    openstp = new_open;
2860		    new_open = NULL;
2861		    newnfsstats.srvopens++;
2862		    nfsrv_openpluslock++;
2863		    break;
2864		}
2865	    }
2866	    if (stp == LIST_END(&clp->lc_olddeleg))
2867		error = NFSERR_EXPIRED;
2868	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2869	    /*
2870	     * Scan to see that no delegation for this client and file
2871	     * doesn't already exist.
2872	     * There also shouldn't yet be an Open for this file and
2873	     * openowner.
2874	     */
2875	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2876		if (stp->ls_clp == clp)
2877		    break;
2878	    }
2879	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2880		/*
2881		 * This is the Claim_Previous case with a delegation
2882		 * type != Delegate_None.
2883		 */
2884		/*
2885		 * First, add the delegation. (Although we must issue the
2886		 * delegation, we can also ask for an immediate return.)
2887		 */
2888		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2889		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2890		    clp->lc_clientid.lval[0];
2891		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2892		    clp->lc_clientid.lval[1];
2893		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2894		    nfsrv_nextstateindex(clp);
2895		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2896		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2897			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2898		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2899		} else {
2900		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2901			NFSLCK_READACCESS);
2902		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2903		}
2904		new_deleg->ls_uid = new_stp->ls_uid;
2905		new_deleg->ls_lfp = lfp;
2906		new_deleg->ls_clp = clp;
2907		new_deleg->ls_filerev = filerev;
2908		new_deleg->ls_compref = nd->nd_compref;
2909		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2910		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2911		    new_deleg->ls_stateid), new_deleg, ls_hash);
2912		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2913		new_deleg = NULL;
2914		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2915		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2916		     LCL_CALLBACKSON ||
2917		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2918		    !NFSVNO_DELEGOK(vp))
2919		    *rflagsp |= NFSV4OPEN_RECALL;
2920		newnfsstats.srvdelegates++;
2921		nfsrv_openpluslock++;
2922		nfsrv_delegatecnt++;
2923
2924		/*
2925		 * Now, do the associated open.
2926		 */
2927		new_open->ls_stateid.seqid = 1;
2928		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2929		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2930		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2931		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2932		    NFSLCK_OPEN;
2933		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2934			new_open->ls_flags |= (NFSLCK_READACCESS |
2935			    NFSLCK_WRITEACCESS);
2936		else
2937			new_open->ls_flags |= NFSLCK_READACCESS;
2938		new_open->ls_uid = new_stp->ls_uid;
2939		new_open->ls_lfp = lfp;
2940		new_open->ls_clp = clp;
2941		LIST_INIT(&new_open->ls_open);
2942		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2943		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2944		   new_open, ls_hash);
2945		/*
2946		 * and handle the open owner
2947		 */
2948		if (ownerstp) {
2949		    new_open->ls_openowner = ownerstp;
2950		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2951		} else {
2952		    new_open->ls_openowner = new_stp;
2953		    new_stp->ls_flags = 0;
2954		    nfsrvd_refcache(new_stp->ls_op);
2955		    new_stp->ls_noopens = 0;
2956		    LIST_INIT(&new_stp->ls_open);
2957		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2958		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2959		    *new_stpp = NULL;
2960		    newnfsstats.srvopenowners++;
2961		    nfsrv_openpluslock++;
2962		}
2963		openstp = new_open;
2964		new_open = NULL;
2965		newnfsstats.srvopens++;
2966		nfsrv_openpluslock++;
2967	    } else {
2968		error = NFSERR_RECLAIMCONFLICT;
2969	    }
2970	} else if (ownerstp) {
2971		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2972		    /* Replace the open */
2973		    if (ownerstp->ls_op)
2974			nfsrvd_derefcache(ownerstp->ls_op);
2975		    ownerstp->ls_op = new_stp->ls_op;
2976		    nfsrvd_refcache(ownerstp->ls_op);
2977		    ownerstp->ls_seq = new_stp->ls_seq;
2978		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2979		    stp = LIST_FIRST(&ownerstp->ls_open);
2980		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2981			NFSLCK_OPEN;
2982		    stp->ls_stateid.seqid = 1;
2983		    stp->ls_uid = new_stp->ls_uid;
2984		    if (lfp != stp->ls_lfp) {
2985			LIST_REMOVE(stp, ls_file);
2986			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2987			stp->ls_lfp = lfp;
2988		    }
2989		    openstp = stp;
2990		} else if (openstp) {
2991		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2992		    openstp->ls_stateid.seqid++;
2993		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
2994			openstp->ls_stateid.seqid == 0)
2995			openstp->ls_stateid.seqid = 1;
2996
2997		    /*
2998		     * This is where we can choose to issue a delegation.
2999		     */
3000		    if (delegate == 0 || writedeleg == 0 ||
3001			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
3002			nfsrv_writedelegifpos == 0) ||
3003			!NFSVNO_DELEGOK(vp) ||
3004			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
3005			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3006			 LCL_CALLBACKSON)
3007			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3008		    else if (nfsrv_issuedelegs == 0 ||
3009			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3010			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3011		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3012			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3013		    else {
3014			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3015			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3016			    = clp->lc_clientid.lval[0];
3017			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3018			    = clp->lc_clientid.lval[1];
3019			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3020			    = nfsrv_nextstateindex(clp);
3021			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3022			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3023			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3024			new_deleg->ls_uid = new_stp->ls_uid;
3025			new_deleg->ls_lfp = lfp;
3026			new_deleg->ls_clp = clp;
3027			new_deleg->ls_filerev = filerev;
3028			new_deleg->ls_compref = nd->nd_compref;
3029			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3030			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3031			    new_deleg->ls_stateid), new_deleg, ls_hash);
3032			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3033			new_deleg = NULL;
3034			newnfsstats.srvdelegates++;
3035			nfsrv_openpluslock++;
3036			nfsrv_delegatecnt++;
3037		    }
3038		} else {
3039		    new_open->ls_stateid.seqid = 1;
3040		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3041		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3042		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3043		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3044			NFSLCK_OPEN;
3045		    new_open->ls_uid = new_stp->ls_uid;
3046		    new_open->ls_openowner = ownerstp;
3047		    new_open->ls_lfp = lfp;
3048		    new_open->ls_clp = clp;
3049		    LIST_INIT(&new_open->ls_open);
3050		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3051		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3052		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3053			new_open, ls_hash);
3054		    openstp = new_open;
3055		    new_open = NULL;
3056		    newnfsstats.srvopens++;
3057		    nfsrv_openpluslock++;
3058
3059		    /*
3060		     * This is where we can choose to issue a delegation.
3061		     */
3062		    if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
3063			!NFSVNO_DELEGOK(vp) ||
3064			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3065			 LCL_CALLBACKSON)
3066			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3067		    else if (nfsrv_issuedelegs == 0 ||
3068			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3069			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3070		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3071			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3072		    else {
3073			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3074			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3075			    = clp->lc_clientid.lval[0];
3076			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3077			    = clp->lc_clientid.lval[1];
3078			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3079			    = nfsrv_nextstateindex(clp);
3080			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3081			    (nfsrv_writedelegifpos || !readonly) &&
3082			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3083			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3084				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3085			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3086			} else {
3087			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3088				NFSLCK_READACCESS);
3089			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3090			}
3091			new_deleg->ls_uid = new_stp->ls_uid;
3092			new_deleg->ls_lfp = lfp;
3093			new_deleg->ls_clp = clp;
3094			new_deleg->ls_filerev = filerev;
3095			new_deleg->ls_compref = nd->nd_compref;
3096			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3097			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3098			    new_deleg->ls_stateid), new_deleg, ls_hash);
3099			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3100			new_deleg = NULL;
3101			newnfsstats.srvdelegates++;
3102			nfsrv_openpluslock++;
3103			nfsrv_delegatecnt++;
3104		    }
3105		}
3106	} else {
3107		/*
3108		 * New owner case. Start the open_owner sequence with a
3109		 * Needs confirmation (unless a reclaim) and hang the
3110		 * new open off it.
3111		 */
3112		new_open->ls_stateid.seqid = 1;
3113		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3114		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3115		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3116		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3117		    NFSLCK_OPEN;
3118		new_open->ls_uid = new_stp->ls_uid;
3119		LIST_INIT(&new_open->ls_open);
3120		new_open->ls_openowner = new_stp;
3121		new_open->ls_lfp = lfp;
3122		new_open->ls_clp = clp;
3123		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3124		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3125			new_stp->ls_flags = 0;
3126		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3127			/* NFSv4.1 never needs confirmation. */
3128			new_stp->ls_flags = 0;
3129
3130			/*
3131			 * This is where we can choose to issue a delegation.
3132			 */
3133			if (delegate && nfsrv_issuedelegs &&
3134			    (writedeleg || readonly) &&
3135			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3136			     LCL_CALLBACKSON &&
3137			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3138			    NFSVNO_DELEGOK(vp) &&
3139			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3140			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3141				new_deleg->ls_stateid.seqid =
3142				    delegstateidp->seqid = 1;
3143				new_deleg->ls_stateid.other[0] =
3144				    delegstateidp->other[0]
3145				    = clp->lc_clientid.lval[0];
3146				new_deleg->ls_stateid.other[1] =
3147				    delegstateidp->other[1]
3148				    = clp->lc_clientid.lval[1];
3149				new_deleg->ls_stateid.other[2] =
3150				    delegstateidp->other[2]
3151				    = nfsrv_nextstateindex(clp);
3152				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3153				    (nfsrv_writedelegifpos || !readonly) &&
3154				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3155				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3156				     0)) {
3157					new_deleg->ls_flags =
3158					    (NFSLCK_DELEGWRITE |
3159					     NFSLCK_READACCESS |
3160					     NFSLCK_WRITEACCESS);
3161					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3162				} else {
3163					new_deleg->ls_flags =
3164					    (NFSLCK_DELEGREAD |
3165					     NFSLCK_READACCESS);
3166					*rflagsp |= NFSV4OPEN_READDELEGATE;
3167				}
3168				new_deleg->ls_uid = new_stp->ls_uid;
3169				new_deleg->ls_lfp = lfp;
3170				new_deleg->ls_clp = clp;
3171				new_deleg->ls_filerev = filerev;
3172				new_deleg->ls_compref = nd->nd_compref;
3173				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3174				    ls_file);
3175				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3176				    new_deleg->ls_stateid), new_deleg, ls_hash);
3177				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3178				    ls_list);
3179				new_deleg = NULL;
3180				newnfsstats.srvdelegates++;
3181				nfsrv_openpluslock++;
3182				nfsrv_delegatecnt++;
3183			}
3184			/*
3185			 * Since NFSv4.1 never does an OpenConfirm, the first
3186			 * open state will be acquired here.
3187			 */
3188			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3189				clp->lc_flags |= LCL_STAMPEDSTABLE;
3190				len = clp->lc_idlen;
3191				NFSBCOPY(clp->lc_id, clidp, len);
3192				gotstate = 1;
3193			}
3194		} else {
3195			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3196			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3197		}
3198		nfsrvd_refcache(new_stp->ls_op);
3199		new_stp->ls_noopens = 0;
3200		LIST_INIT(&new_stp->ls_open);
3201		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3202		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3203		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3204		    new_open, ls_hash);
3205		openstp = new_open;
3206		new_open = NULL;
3207		*new_stpp = NULL;
3208		newnfsstats.srvopens++;
3209		nfsrv_openpluslock++;
3210		newnfsstats.srvopenowners++;
3211		nfsrv_openpluslock++;
3212	}
3213	if (!error) {
3214		stateidp->seqid = openstp->ls_stateid.seqid;
3215		stateidp->other[0] = openstp->ls_stateid.other[0];
3216		stateidp->other[1] = openstp->ls_stateid.other[1];
3217		stateidp->other[2] = openstp->ls_stateid.other[2];
3218	}
3219	NFSUNLOCKSTATE();
3220	if (haslock) {
3221		NFSLOCKV4ROOTMUTEX();
3222		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3223		NFSUNLOCKV4ROOTMUTEX();
3224	}
3225	if (new_open)
3226		FREE((caddr_t)new_open, M_NFSDSTATE);
3227	if (new_deleg)
3228		FREE((caddr_t)new_deleg, M_NFSDSTATE);
3229
3230	/*
3231	 * If the NFSv4.1 client just acquired its first open, write a timestamp
3232	 * to the stable storage file.
3233	 */
3234	if (gotstate != 0) {
3235		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3236		nfsrv_backupstable();
3237	}
3238
3239out:
3240	free(clidp, M_TEMP);
3241	NFSEXITCODE2(error, nd);
3242	return (error);
3243}
3244
3245/*
3246 * Open update. Does the confirm, downgrade and close.
3247 */
3248APPLESTATIC int
3249nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3250    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3251{
3252	struct nfsstate *stp, *ownerstp;
3253	struct nfsclient *clp;
3254	struct nfslockfile *lfp;
3255	u_int32_t bits;
3256	int error = 0, gotstate = 0, len = 0;
3257	u_char *clidp = NULL;
3258
3259	/*
3260	 * Check for restart conditions (client and server).
3261	 */
3262	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3263	    &new_stp->ls_stateid, 0);
3264	if (error)
3265		goto out;
3266
3267	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3268	NFSLOCKSTATE();
3269	/*
3270	 * Get the open structure via clientid and stateid.
3271	 */
3272	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3273	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3274	if (!error)
3275		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3276		    new_stp->ls_flags, &stp);
3277
3278	/*
3279	 * Sanity check the open.
3280	 */
3281	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3282		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3283		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3284		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3285		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3286		error = NFSERR_BADSTATEID;
3287
3288	if (!error)
3289		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3290		    stp->ls_openowner, new_stp->ls_op);
3291	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3292	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3293	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3294	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3295	      new_stp->ls_stateid.seqid != 0)))
3296		error = NFSERR_OLDSTATEID;
3297	if (!error && vnode_vtype(vp) != VREG) {
3298		if (vnode_vtype(vp) == VDIR)
3299			error = NFSERR_ISDIR;
3300		else
3301			error = NFSERR_INVAL;
3302	}
3303
3304	if (error) {
3305		/*
3306		 * If a client tries to confirm an Open with a bad
3307		 * seqid# and there are no byte range locks or other Opens
3308		 * on the openowner, just throw it away, so the next use of the
3309		 * openowner will start a fresh seq#.
3310		 */
3311		if (error == NFSERR_BADSEQID &&
3312		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3313		    nfsrv_nootherstate(stp))
3314			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3315		NFSUNLOCKSTATE();
3316		goto out;
3317	}
3318
3319	/*
3320	 * Set the return stateid.
3321	 */
3322	stateidp->seqid = stp->ls_stateid.seqid + 1;
3323	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3324		stateidp->seqid = 1;
3325	stateidp->other[0] = stp->ls_stateid.other[0];
3326	stateidp->other[1] = stp->ls_stateid.other[1];
3327	stateidp->other[2] = stp->ls_stateid.other[2];
3328	/*
3329	 * Now, handle the three cases.
3330	 */
3331	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3332		/*
3333		 * If the open doesn't need confirmation, it seems to me that
3334		 * there is a client error, but I'll just log it and keep going?
3335		 */
3336		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3337			printf("Nfsv4d: stray open confirm\n");
3338		stp->ls_openowner->ls_flags = 0;
3339		stp->ls_stateid.seqid++;
3340		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3341		    stp->ls_stateid.seqid == 0)
3342			stp->ls_stateid.seqid = 1;
3343		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3344			clp->lc_flags |= LCL_STAMPEDSTABLE;
3345			len = clp->lc_idlen;
3346			NFSBCOPY(clp->lc_id, clidp, len);
3347			gotstate = 1;
3348		}
3349		NFSUNLOCKSTATE();
3350	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3351		ownerstp = stp->ls_openowner;
3352		lfp = stp->ls_lfp;
3353		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3354			/* Get the lf lock */
3355			nfsrv_locklf(lfp);
3356			NFSUNLOCKSTATE();
3357			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3358			NFSVOPUNLOCK(vp, 0);
3359			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3360				NFSLOCKSTATE();
3361				nfsrv_unlocklf(lfp);
3362				NFSUNLOCKSTATE();
3363			}
3364			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3365		} else {
3366			(void) nfsrv_freeopen(stp, NULL, 0, p);
3367			NFSUNLOCKSTATE();
3368		}
3369	} else {
3370		/*
3371		 * Update the share bits, making sure that the new set are a
3372		 * subset of the old ones.
3373		 */
3374		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3375		if (~(stp->ls_flags) & bits) {
3376			NFSUNLOCKSTATE();
3377			error = NFSERR_INVAL;
3378			goto out;
3379		}
3380		stp->ls_flags = (bits | NFSLCK_OPEN);
3381		stp->ls_stateid.seqid++;
3382		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3383		    stp->ls_stateid.seqid == 0)
3384			stp->ls_stateid.seqid = 1;
3385		NFSUNLOCKSTATE();
3386	}
3387
3388	/*
3389	 * If the client just confirmed its first open, write a timestamp
3390	 * to the stable storage file.
3391	 */
3392	if (gotstate != 0) {
3393		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3394		nfsrv_backupstable();
3395	}
3396
3397out:
3398	free(clidp, M_TEMP);
3399	NFSEXITCODE2(error, nd);
3400	return (error);
3401}
3402
3403/*
3404 * Delegation update. Does the purge and return.
3405 */
3406APPLESTATIC int
3407nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3408    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3409    NFSPROC_T *p)
3410{
3411	struct nfsstate *stp;
3412	struct nfsclient *clp;
3413	int error = 0;
3414	fhandle_t fh;
3415
3416	/*
3417	 * Do a sanity check against the file handle for DelegReturn.
3418	 */
3419	if (vp) {
3420		error = nfsvno_getfh(vp, &fh, p);
3421		if (error)
3422			goto out;
3423	}
3424	/*
3425	 * Check for restart conditions (client and server).
3426	 */
3427	if (op == NFSV4OP_DELEGRETURN)
3428		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3429			stateidp, 0);
3430	else
3431		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3432			stateidp, 0);
3433
3434	NFSLOCKSTATE();
3435	/*
3436	 * Get the open structure via clientid and stateid.
3437	 */
3438	if (!error)
3439	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3440		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3441	if (error) {
3442		if (error == NFSERR_CBPATHDOWN)
3443			error = 0;
3444		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3445			error = NFSERR_STALESTATEID;
3446	}
3447	if (!error && op == NFSV4OP_DELEGRETURN) {
3448	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3449	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3450		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3451		error = NFSERR_OLDSTATEID;
3452	}
3453	/*
3454	 * NFSERR_EXPIRED means that the state has gone away,
3455	 * so Delegations have been purged. Just return ok.
3456	 */
3457	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3458		NFSUNLOCKSTATE();
3459		error = 0;
3460		goto out;
3461	}
3462	if (error) {
3463		NFSUNLOCKSTATE();
3464		goto out;
3465	}
3466
3467	if (op == NFSV4OP_DELEGRETURN) {
3468		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3469		    sizeof (fhandle_t))) {
3470			NFSUNLOCKSTATE();
3471			error = NFSERR_BADSTATEID;
3472			goto out;
3473		}
3474		nfsrv_freedeleg(stp);
3475	} else {
3476		nfsrv_freedeleglist(&clp->lc_olddeleg);
3477	}
3478	NFSUNLOCKSTATE();
3479	error = 0;
3480
3481out:
3482	NFSEXITCODE(error);
3483	return (error);
3484}
3485
3486/*
3487 * Release lock owner.
3488 */
3489APPLESTATIC int
3490nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3491    NFSPROC_T *p)
3492{
3493	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3494	struct nfsclient *clp;
3495	int error = 0;
3496
3497	/*
3498	 * Check for restart conditions (client and server).
3499	 */
3500	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3501	    &new_stp->ls_stateid, 0);
3502	if (error)
3503		goto out;
3504
3505	NFSLOCKSTATE();
3506	/*
3507	 * Get the lock owner by name.
3508	 */
3509	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3510	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3511	if (error) {
3512		NFSUNLOCKSTATE();
3513		goto out;
3514	}
3515	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3516	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3517		stp = LIST_FIRST(&openstp->ls_open);
3518		while (stp != LIST_END(&openstp->ls_open)) {
3519		    nstp = LIST_NEXT(stp, ls_list);
3520		    /*
3521		     * If the owner matches, check for locks and
3522		     * then free or return an error.
3523		     */
3524		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3525			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3526			 stp->ls_ownerlen)){
3527			if (LIST_EMPTY(&stp->ls_lock)) {
3528			    nfsrv_freelockowner(stp, NULL, 0, p);
3529			} else {
3530			    NFSUNLOCKSTATE();
3531			    error = NFSERR_LOCKSHELD;
3532			    goto out;
3533			}
3534		    }
3535		    stp = nstp;
3536		}
3537	    }
3538	}
3539	NFSUNLOCKSTATE();
3540
3541out:
3542	NFSEXITCODE(error);
3543	return (error);
3544}
3545
3546/*
3547 * Get the file handle for a lock structure.
3548 */
3549static int
3550nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3551    fhandle_t *nfhp, NFSPROC_T *p)
3552{
3553	fhandle_t *fhp = NULL;
3554	int error;
3555
3556	/*
3557	 * For lock, use the new nfslock structure, otherwise just
3558	 * a fhandle_t on the stack.
3559	 */
3560	if (flags & NFSLCK_OPEN) {
3561		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3562		fhp = &new_lfp->lf_fh;
3563	} else if (nfhp) {
3564		fhp = nfhp;
3565	} else {
3566		panic("nfsrv_getlockfh");
3567	}
3568	error = nfsvno_getfh(vp, fhp, p);
3569	NFSEXITCODE(error);
3570	return (error);
3571}
3572
3573/*
3574 * Get an nfs lock structure. Allocate one, as required, and return a
3575 * pointer to it.
3576 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3577 */
3578static int
3579nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3580    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3581{
3582	struct nfslockfile *lfp;
3583	fhandle_t *fhp = NULL, *tfhp;
3584	struct nfslockhashhead *hp;
3585	struct nfslockfile *new_lfp = NULL;
3586
3587	/*
3588	 * For lock, use the new nfslock structure, otherwise just
3589	 * a fhandle_t on the stack.
3590	 */
3591	if (flags & NFSLCK_OPEN) {
3592		new_lfp = *new_lfpp;
3593		fhp = &new_lfp->lf_fh;
3594	} else if (nfhp) {
3595		fhp = nfhp;
3596	} else {
3597		panic("nfsrv_getlockfile");
3598	}
3599
3600	hp = NFSLOCKHASH(fhp);
3601	LIST_FOREACH(lfp, hp, lf_hash) {
3602		tfhp = &lfp->lf_fh;
3603		if (NFSVNO_CMPFH(fhp, tfhp)) {
3604			if (lockit)
3605				nfsrv_locklf(lfp);
3606			*lfpp = lfp;
3607			return (0);
3608		}
3609	}
3610	if (!(flags & NFSLCK_OPEN))
3611		return (-1);
3612
3613	/*
3614	 * No match, so chain the new one into the list.
3615	 */
3616	LIST_INIT(&new_lfp->lf_open);
3617	LIST_INIT(&new_lfp->lf_lock);
3618	LIST_INIT(&new_lfp->lf_deleg);
3619	LIST_INIT(&new_lfp->lf_locallock);
3620	LIST_INIT(&new_lfp->lf_rollback);
3621	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3622	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3623	new_lfp->lf_usecount = 0;
3624	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3625	*lfpp = new_lfp;
3626	*new_lfpp = NULL;
3627	return (0);
3628}
3629
3630/*
3631 * This function adds a nfslock lock structure to the list for the associated
3632 * nfsstate and nfslockfile structures. It will be inserted after the
3633 * entry pointed at by insert_lop.
3634 */
3635static void
3636nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3637    struct nfsstate *stp, struct nfslockfile *lfp)
3638{
3639	struct nfslock *lop, *nlop;
3640
3641	new_lop->lo_stp = stp;
3642	new_lop->lo_lfp = lfp;
3643
3644	if (stp != NULL) {
3645		/* Insert in increasing lo_first order */
3646		lop = LIST_FIRST(&lfp->lf_lock);
3647		if (lop == LIST_END(&lfp->lf_lock) ||
3648		    new_lop->lo_first <= lop->lo_first) {
3649			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3650		} else {
3651			nlop = LIST_NEXT(lop, lo_lckfile);
3652			while (nlop != LIST_END(&lfp->lf_lock) &&
3653			       nlop->lo_first < new_lop->lo_first) {
3654				lop = nlop;
3655				nlop = LIST_NEXT(lop, lo_lckfile);
3656			}
3657			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3658		}
3659	} else {
3660		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3661	}
3662
3663	/*
3664	 * Insert after insert_lop, which is overloaded as stp or lfp for
3665	 * an empty list.
3666	 */
3667	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3668		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3669	else if ((struct nfsstate *)insert_lop == stp)
3670		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3671	else
3672		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3673	if (stp != NULL) {
3674		newnfsstats.srvlocks++;
3675		nfsrv_openpluslock++;
3676	}
3677}
3678
3679/*
3680 * This function updates the locking for a lock owner and given file. It
3681 * maintains a list of lock ranges ordered on increasing file offset that
3682 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3683 * It always adds new_lop to the list and sometimes uses the one pointed
3684 * at by other_lopp.
3685 */
3686static void
3687nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3688    struct nfslock **other_lopp, struct nfslockfile *lfp)
3689{
3690	struct nfslock *new_lop = *new_lopp;
3691	struct nfslock *lop, *tlop, *ilop;
3692	struct nfslock *other_lop = *other_lopp;
3693	int unlock = 0, myfile = 0;
3694	u_int64_t tmp;
3695
3696	/*
3697	 * Work down the list until the lock is merged.
3698	 */
3699	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3700		unlock = 1;
3701	if (stp != NULL) {
3702		ilop = (struct nfslock *)stp;
3703		lop = LIST_FIRST(&stp->ls_lock);
3704	} else {
3705		ilop = (struct nfslock *)lfp;
3706		lop = LIST_FIRST(&lfp->lf_locallock);
3707	}
3708	while (lop != NULL) {
3709	    /*
3710	     * Only check locks for this file that aren't before the start of
3711	     * new lock's range.
3712	     */
3713	    if (lop->lo_lfp == lfp) {
3714	      myfile = 1;
3715	      if (lop->lo_end >= new_lop->lo_first) {
3716		if (new_lop->lo_end < lop->lo_first) {
3717			/*
3718			 * If the new lock ends before the start of the
3719			 * current lock's range, no merge, just insert
3720			 * the new lock.
3721			 */
3722			break;
3723		}
3724		if (new_lop->lo_flags == lop->lo_flags ||
3725		    (new_lop->lo_first <= lop->lo_first &&
3726		     new_lop->lo_end >= lop->lo_end)) {
3727			/*
3728			 * This lock can be absorbed by the new lock/unlock.
3729			 * This happens when it covers the entire range
3730			 * of the old lock or is contiguous
3731			 * with the old lock and is of the same type or an
3732			 * unlock.
3733			 */
3734			if (lop->lo_first < new_lop->lo_first)
3735				new_lop->lo_first = lop->lo_first;
3736			if (lop->lo_end > new_lop->lo_end)
3737				new_lop->lo_end = lop->lo_end;
3738			tlop = lop;
3739			lop = LIST_NEXT(lop, lo_lckowner);
3740			nfsrv_freenfslock(tlop);
3741			continue;
3742		}
3743
3744		/*
3745		 * All these cases are for contiguous locks that are not the
3746		 * same type, so they can't be merged.
3747		 */
3748		if (new_lop->lo_first <= lop->lo_first) {
3749			/*
3750			 * This case is where the new lock overlaps with the
3751			 * first part of the old lock. Move the start of the
3752			 * old lock to just past the end of the new lock. The
3753			 * new lock will be inserted in front of the old, since
3754			 * ilop hasn't been updated. (We are done now.)
3755			 */
3756			lop->lo_first = new_lop->lo_end;
3757			break;
3758		}
3759		if (new_lop->lo_end >= lop->lo_end) {
3760			/*
3761			 * This case is where the new lock overlaps with the
3762			 * end of the old lock's range. Move the old lock's
3763			 * end to just before the new lock's first and insert
3764			 * the new lock after the old lock.
3765			 * Might not be done yet, since the new lock could
3766			 * overlap further locks with higher ranges.
3767			 */
3768			lop->lo_end = new_lop->lo_first;
3769			ilop = lop;
3770			lop = LIST_NEXT(lop, lo_lckowner);
3771			continue;
3772		}
3773		/*
3774		 * The final case is where the new lock's range is in the
3775		 * middle of the current lock's and splits the current lock
3776		 * up. Use *other_lopp to handle the second part of the
3777		 * split old lock range. (We are done now.)
3778		 * For unlock, we use new_lop as other_lop and tmp, since
3779		 * other_lop and new_lop are the same for this case.
3780		 * We noted the unlock case above, so we don't need
3781		 * new_lop->lo_flags any longer.
3782		 */
3783		tmp = new_lop->lo_first;
3784		if (other_lop == NULL) {
3785			if (!unlock)
3786				panic("nfsd srv update unlock");
3787			other_lop = new_lop;
3788			*new_lopp = NULL;
3789		}
3790		other_lop->lo_first = new_lop->lo_end;
3791		other_lop->lo_end = lop->lo_end;
3792		other_lop->lo_flags = lop->lo_flags;
3793		other_lop->lo_stp = stp;
3794		other_lop->lo_lfp = lfp;
3795		lop->lo_end = tmp;
3796		nfsrv_insertlock(other_lop, lop, stp, lfp);
3797		*other_lopp = NULL;
3798		ilop = lop;
3799		break;
3800	      }
3801	    }
3802	    ilop = lop;
3803	    lop = LIST_NEXT(lop, lo_lckowner);
3804	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3805		break;
3806	}
3807
3808	/*
3809	 * Insert the new lock in the list at the appropriate place.
3810	 */
3811	if (!unlock) {
3812		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3813		*new_lopp = NULL;
3814	}
3815}
3816
3817/*
3818 * This function handles sequencing of locks, etc.
3819 * It returns an error that indicates what the caller should do.
3820 */
3821static int
3822nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3823    struct nfsstate *stp, struct nfsrvcache *op)
3824{
3825	int error = 0;
3826
3827	if ((nd->nd_flag & ND_NFSV41) != 0)
3828		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
3829		goto out;
3830	if (op != nd->nd_rp)
3831		panic("nfsrvstate checkseqid");
3832	if (!(op->rc_flag & RC_INPROG))
3833		panic("nfsrvstate not inprog");
3834	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3835		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3836		panic("nfsrvstate op refcnt");
3837	}
3838	if ((stp->ls_seq + 1) == seqid) {
3839		if (stp->ls_op)
3840			nfsrvd_derefcache(stp->ls_op);
3841		stp->ls_op = op;
3842		nfsrvd_refcache(op);
3843		stp->ls_seq = seqid;
3844		goto out;
3845	} else if (stp->ls_seq == seqid && stp->ls_op &&
3846		op->rc_xid == stp->ls_op->rc_xid &&
3847		op->rc_refcnt == 0 &&
3848		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3849		op->rc_cksum == stp->ls_op->rc_cksum) {
3850		if (stp->ls_op->rc_flag & RC_INPROG) {
3851			error = NFSERR_DONTREPLY;
3852			goto out;
3853		}
3854		nd->nd_rp = stp->ls_op;
3855		nd->nd_rp->rc_flag |= RC_INPROG;
3856		nfsrvd_delcache(op);
3857		error = NFSERR_REPLYFROMCACHE;
3858		goto out;
3859	}
3860	error = NFSERR_BADSEQID;
3861
3862out:
3863	NFSEXITCODE2(error, nd);
3864	return (error);
3865}
3866
3867/*
3868 * Get the client ip address for callbacks. If the strings can't be parsed,
3869 * just set lc_program to 0 to indicate no callbacks are possible.
3870 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3871 *  the address to the client's transport address. This won't be used
3872 *  for callbacks, but can be printed out by newnfsstats for info.)
3873 * Return error if the xdr can't be parsed, 0 otherwise.
3874 */
3875APPLESTATIC int
3876nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3877{
3878	u_int32_t *tl;
3879	u_char *cp, *cp2;
3880	int i, j;
3881	struct sockaddr_in *rad, *sad;
3882	u_char protocol[5], addr[24];
3883	int error = 0, cantparse = 0;
3884	union {
3885		u_long ival;
3886		u_char cval[4];
3887	} ip;
3888	union {
3889		u_short sval;
3890		u_char cval[2];
3891	} port;
3892
3893	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3894	rad->sin_family = AF_INET;
3895	rad->sin_len = sizeof (struct sockaddr_in);
3896	rad->sin_addr.s_addr = 0;
3897	rad->sin_port = 0;
3898	clp->lc_req.nr_client = NULL;
3899	clp->lc_req.nr_lock = 0;
3900	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3901	i = fxdr_unsigned(int, *tl);
3902	if (i >= 3 && i <= 4) {
3903		error = nfsrv_mtostr(nd, protocol, i);
3904		if (error)
3905			goto nfsmout;
3906		if (!strcmp(protocol, "tcp")) {
3907			clp->lc_flags |= LCL_TCPCALLBACK;
3908			clp->lc_req.nr_sotype = SOCK_STREAM;
3909			clp->lc_req.nr_soproto = IPPROTO_TCP;
3910		} else if (!strcmp(protocol, "udp")) {
3911			clp->lc_req.nr_sotype = SOCK_DGRAM;
3912			clp->lc_req.nr_soproto = IPPROTO_UDP;
3913		} else {
3914			cantparse = 1;
3915		}
3916	} else {
3917		cantparse = 1;
3918		if (i > 0) {
3919			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3920			if (error)
3921				goto nfsmout;
3922		}
3923	}
3924	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3925	i = fxdr_unsigned(int, *tl);
3926	if (i < 0) {
3927		error = NFSERR_BADXDR;
3928		goto nfsmout;
3929	} else if (i == 0) {
3930		cantparse = 1;
3931	} else if (!cantparse && i <= 23 && i >= 11) {
3932		error = nfsrv_mtostr(nd, addr, i);
3933		if (error)
3934			goto nfsmout;
3935
3936		/*
3937		 * Parse out the address fields. We expect 6 decimal numbers
3938		 * separated by '.'s.
3939		 */
3940		cp = addr;
3941		i = 0;
3942		while (*cp && i < 6) {
3943			cp2 = cp;
3944			while (*cp2 && *cp2 != '.')
3945				cp2++;
3946			if (*cp2)
3947				*cp2++ = '\0';
3948			else if (i != 5) {
3949				cantparse = 1;
3950				break;
3951			}
3952			j = nfsrv_getipnumber(cp);
3953			if (j >= 0) {
3954				if (i < 4)
3955					ip.cval[3 - i] = j;
3956				else
3957					port.cval[5 - i] = j;
3958			} else {
3959				cantparse = 1;
3960				break;
3961			}
3962			cp = cp2;
3963			i++;
3964		}
3965		if (!cantparse) {
3966			if (ip.ival != 0x0) {
3967				rad->sin_addr.s_addr = htonl(ip.ival);
3968				rad->sin_port = htons(port.sval);
3969			} else {
3970				cantparse = 1;
3971			}
3972		}
3973	} else {
3974		cantparse = 1;
3975		if (i > 0) {
3976			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3977			if (error)
3978				goto nfsmout;
3979		}
3980	}
3981	if (cantparse) {
3982		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3983		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3984		rad->sin_port = 0x0;
3985		clp->lc_program = 0;
3986	}
3987nfsmout:
3988	NFSEXITCODE2(error, nd);
3989	return (error);
3990}
3991
3992/*
3993 * Turn a string of up to three decimal digits into a number. Return -1 upon
3994 * error.
3995 */
3996static int
3997nfsrv_getipnumber(u_char *cp)
3998{
3999	int i = 0, j = 0;
4000
4001	while (*cp) {
4002		if (j > 2 || *cp < '0' || *cp > '9')
4003			return (-1);
4004		i *= 10;
4005		i += (*cp - '0');
4006		cp++;
4007		j++;
4008	}
4009	if (i < 256)
4010		return (i);
4011	return (-1);
4012}
4013
4014/*
4015 * This function checks for restart conditions.
4016 */
4017static int
4018nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4019    nfsv4stateid_t *stateidp, int specialid)
4020{
4021	int ret = 0;
4022
4023	/*
4024	 * First check for a server restart. Open, LockT, ReleaseLockOwner
4025	 * and DelegPurge have a clientid, the rest a stateid.
4026	 */
4027	if (flags &
4028	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4029		if (clientid.lval[0] != nfsrvboottime) {
4030			ret = NFSERR_STALECLIENTID;
4031			goto out;
4032		}
4033	} else if (stateidp->other[0] != nfsrvboottime &&
4034		specialid == 0) {
4035		ret = NFSERR_STALESTATEID;
4036		goto out;
4037	}
4038
4039	/*
4040	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4041	 * not use a lock/open owner seqid#, so the check can be done now.
4042	 * (The others will be checked, as required, later.)
4043	 */
4044	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4045		goto out;
4046
4047	NFSLOCKSTATE();
4048	ret = nfsrv_checkgrace(NULL, NULL, flags);
4049	NFSUNLOCKSTATE();
4050
4051out:
4052	NFSEXITCODE(ret);
4053	return (ret);
4054}
4055
4056/*
4057 * Check for grace.
4058 */
4059static int
4060nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4061    u_int32_t flags)
4062{
4063	int error = 0;
4064
4065	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4066		if (flags & NFSLCK_RECLAIM) {
4067			error = NFSERR_NOGRACE;
4068			goto out;
4069		}
4070	} else {
4071		if (!(flags & NFSLCK_RECLAIM)) {
4072			error = NFSERR_GRACE;
4073			goto out;
4074		}
4075		if (nd != NULL && clp != NULL &&
4076		    (nd->nd_flag & ND_NFSV41) != 0 &&
4077		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4078			error = NFSERR_NOGRACE;
4079			goto out;
4080		}
4081
4082		/*
4083		 * If grace is almost over and we are still getting Reclaims,
4084		 * extend grace a bit.
4085		 */
4086		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4087		    nfsrv_stablefirst.nsf_eograce)
4088			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4089				NFSRV_LEASEDELTA;
4090	}
4091
4092out:
4093	NFSEXITCODE(error);
4094	return (error);
4095}
4096
4097/*
4098 * Do a server callback.
4099 */
4100static int
4101nfsrv_docallback(struct nfsclient *clp, int procnum,
4102    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4103    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4104{
4105	mbuf_t m;
4106	u_int32_t *tl;
4107	struct nfsrv_descript nfsd, *nd = &nfsd;
4108	struct ucred *cred;
4109	int error = 0;
4110	u_int32_t callback;
4111	struct nfsdsession *sep = NULL;
4112
4113	cred = newnfs_getcred();
4114	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4115	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4116		NFSUNLOCKSTATE();
4117		panic("docallb");
4118	}
4119	clp->lc_cbref++;
4120
4121	/*
4122	 * Fill the callback program# and version into the request
4123	 * structure for newnfs_connect() to use.
4124	 */
4125	clp->lc_req.nr_prog = clp->lc_program;
4126#ifdef notnow
4127	if ((clp->lc_flags & LCL_NFSV41) != 0)
4128		clp->lc_req.nr_vers = NFSV41_CBVERS;
4129	else
4130#endif
4131		clp->lc_req.nr_vers = NFSV4_CBVERS;
4132
4133	/*
4134	 * First, fill in some of the fields of nd and cr.
4135	 */
4136	nd->nd_flag = ND_NFSV4;
4137	if (clp->lc_flags & LCL_GSS)
4138		nd->nd_flag |= ND_KERBV;
4139	if ((clp->lc_flags & LCL_NFSV41) != 0)
4140		nd->nd_flag |= ND_NFSV41;
4141	nd->nd_repstat = 0;
4142	cred->cr_uid = clp->lc_uid;
4143	cred->cr_gid = clp->lc_gid;
4144	callback = clp->lc_callback;
4145	NFSUNLOCKSTATE();
4146	cred->cr_ngroups = 1;
4147
4148	/*
4149	 * Get the first mbuf for the request.
4150	 */
4151	MGET(m, M_WAITOK, MT_DATA);
4152	mbuf_setlen(m, 0);
4153	nd->nd_mreq = nd->nd_mb = m;
4154	nd->nd_bpos = NFSMTOD(m, caddr_t);
4155
4156	/*
4157	 * and build the callback request.
4158	 */
4159	if (procnum == NFSV4OP_CBGETATTR) {
4160		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4161		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4162		    "CB Getattr", &sep);
4163		if (error != 0) {
4164			mbuf_freem(nd->nd_mreq);
4165			goto errout;
4166		}
4167		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4168		(void)nfsrv_putattrbit(nd, attrbitp);
4169	} else if (procnum == NFSV4OP_CBRECALL) {
4170		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4171		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4172		    "CB Recall", &sep);
4173		if (error != 0) {
4174			mbuf_freem(nd->nd_mreq);
4175			goto errout;
4176		}
4177		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4178		*tl++ = txdr_unsigned(stateidp->seqid);
4179		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4180		    NFSX_STATEIDOTHER);
4181		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4182		if (trunc)
4183			*tl = newnfs_true;
4184		else
4185			*tl = newnfs_false;
4186		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4187	} else if (procnum == NFSV4PROC_CBNULL) {
4188		nd->nd_procnum = NFSV4PROC_CBNULL;
4189		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4190			error = nfsv4_getcbsession(clp, &sep);
4191			if (error != 0) {
4192				mbuf_freem(nd->nd_mreq);
4193				goto errout;
4194			}
4195		}
4196	} else {
4197		error = NFSERR_SERVERFAULT;
4198		mbuf_freem(nd->nd_mreq);
4199		goto errout;
4200	}
4201
4202	/*
4203	 * Call newnfs_connect(), as required, and then newnfs_request().
4204	 */
4205	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4206	if (clp->lc_req.nr_client == NULL) {
4207		if ((clp->lc_flags & LCL_NFSV41) != 0)
4208			error = ECONNREFUSED;
4209		else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4210			error = newnfs_connect(NULL, &clp->lc_req, cred,
4211			    NULL, 1);
4212		else
4213			error = newnfs_connect(NULL, &clp->lc_req, cred,
4214			    NULL, 3);
4215	}
4216	newnfs_sndunlock(&clp->lc_req.nr_lock);
4217	if (!error) {
4218		if ((nd->nd_flag & ND_NFSV41) != 0) {
4219			KASSERT(sep != NULL, ("sep NULL"));
4220			if (sep->sess_cbsess.nfsess_xprt != NULL)
4221				error = newnfs_request(nd, NULL, clp,
4222				    &clp->lc_req, NULL, NULL, cred,
4223				    clp->lc_program, clp->lc_req.nr_vers, NULL,
4224				    1, NULL, &sep->sess_cbsess);
4225			else {
4226				/*
4227				 * This should probably never occur, but if a
4228				 * client somehow does an RPC without a
4229				 * SequenceID Op that causes a callback just
4230				 * after the nfsd threads have been terminated
4231				 * and restared we could conceivably get here
4232				 * without a backchannel xprt.
4233				 */
4234				printf("nfsrv_docallback: no xprt\n");
4235				error = ECONNREFUSED;
4236			}
4237			nfsrv_freesession(sep, NULL);
4238		} else
4239			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4240			    NULL, NULL, cred, clp->lc_program,
4241			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4242	}
4243errout:
4244	NFSFREECRED(cred);
4245
4246	/*
4247	 * If error is set here, the Callback path isn't working
4248	 * properly, so twiddle the appropriate LCL_ flags.
4249	 * (nd_repstat != 0 indicates the Callback path is working,
4250	 *  but the callback failed on the client.)
4251	 */
4252	if (error) {
4253		/*
4254		 * Mark the callback pathway down, which disabled issuing
4255		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4256		 */
4257		NFSLOCKSTATE();
4258		clp->lc_flags |= LCL_CBDOWN;
4259		NFSUNLOCKSTATE();
4260	} else {
4261		/*
4262		 * Callback worked. If the callback path was down, disable
4263		 * callbacks, so no more delegations will be issued. (This
4264		 * is done on the assumption that the callback pathway is
4265		 * flakey.)
4266		 */
4267		NFSLOCKSTATE();
4268		if (clp->lc_flags & LCL_CBDOWN)
4269			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4270		NFSUNLOCKSTATE();
4271		if (nd->nd_repstat)
4272			error = nd->nd_repstat;
4273		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4274			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4275			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4276			    p, NULL);
4277		mbuf_freem(nd->nd_mrep);
4278	}
4279	NFSLOCKSTATE();
4280	clp->lc_cbref--;
4281	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4282		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4283		wakeup(clp);
4284	}
4285	NFSUNLOCKSTATE();
4286
4287	NFSEXITCODE(error);
4288	return (error);
4289}
4290
4291/*
4292 * Set up the compound RPC for the callback.
4293 */
4294static int
4295nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4296    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4297{
4298	uint32_t *tl;
4299	int error, len;
4300
4301	len = strlen(optag);
4302	(void)nfsm_strtom(nd, optag, len);
4303	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4304	if ((nd->nd_flag & ND_NFSV41) != 0) {
4305		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4306		*tl++ = txdr_unsigned(callback);
4307		*tl++ = txdr_unsigned(2);
4308		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4309		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4310		if (error != 0)
4311			return (error);
4312		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4313		*tl = txdr_unsigned(op);
4314	} else {
4315		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4316		*tl++ = txdr_unsigned(callback);
4317		*tl++ = txdr_unsigned(1);
4318		*tl = txdr_unsigned(op);
4319	}
4320	return (0);
4321}
4322
4323/*
4324 * Return the next index# for a clientid. Mostly just increment and return
4325 * the next one, but... if the 32bit unsigned does actually wrap around,
4326 * it should be rebooted.
4327 * At an average rate of one new client per second, it will wrap around in
4328 * approximately 136 years. (I think the server will have been shut
4329 * down or rebooted before then.)
4330 */
4331static u_int32_t
4332nfsrv_nextclientindex(void)
4333{
4334	static u_int32_t client_index = 0;
4335
4336	client_index++;
4337	if (client_index != 0)
4338		return (client_index);
4339
4340	printf("%s: out of clientids\n", __func__);
4341	return (client_index);
4342}
4343
4344/*
4345 * Return the next index# for a stateid. Mostly just increment and return
4346 * the next one, but... if the 32bit unsigned does actually wrap around
4347 * (will a BSD server stay up that long?), find
4348 * new start and end values.
4349 */
4350static u_int32_t
4351nfsrv_nextstateindex(struct nfsclient *clp)
4352{
4353	struct nfsstate *stp;
4354	int i;
4355	u_int32_t canuse, min_index, max_index;
4356
4357	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4358		clp->lc_stateindex++;
4359		if (clp->lc_stateindex != clp->lc_statemaxindex)
4360			return (clp->lc_stateindex);
4361	}
4362
4363	/*
4364	 * Yuck, we've hit the end.
4365	 * Look for a new min and max.
4366	 */
4367	min_index = 0;
4368	max_index = 0xffffffff;
4369	for (i = 0; i < nfsrv_statehashsize; i++) {
4370	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4371		if (stp->ls_stateid.other[2] > 0x80000000) {
4372		    if (stp->ls_stateid.other[2] < max_index)
4373			max_index = stp->ls_stateid.other[2];
4374		} else {
4375		    if (stp->ls_stateid.other[2] > min_index)
4376			min_index = stp->ls_stateid.other[2];
4377		}
4378	    }
4379	}
4380
4381	/*
4382	 * Yikes, highly unlikely, but I'll handle it anyhow.
4383	 */
4384	if (min_index == 0x80000000 && max_index == 0x80000001) {
4385	    canuse = 0;
4386	    /*
4387	     * Loop around until we find an unused entry. Return that
4388	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4389	     * (This is one of those rare cases where a goto is the
4390	     *  cleanest way to code the loop.)
4391	     */
4392tryagain:
4393	    for (i = 0; i < nfsrv_statehashsize; i++) {
4394		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4395		    if (stp->ls_stateid.other[2] == canuse) {
4396			canuse++;
4397			goto tryagain;
4398		    }
4399		}
4400	    }
4401	    clp->lc_flags |= LCL_INDEXNOTOK;
4402	    return (canuse);
4403	}
4404
4405	/*
4406	 * Ok to start again from min + 1.
4407	 */
4408	clp->lc_stateindex = min_index + 1;
4409	clp->lc_statemaxindex = max_index;
4410	clp->lc_flags &= ~LCL_INDEXNOTOK;
4411	return (clp->lc_stateindex);
4412}
4413
4414/*
4415 * The following functions handle the stable storage file that deals with
4416 * the edge conditions described in RFC3530 Sec. 8.6.3.
4417 * The file is as follows:
4418 * - a single record at the beginning that has the lease time of the
4419 *   previous server instance (before the last reboot) and the nfsrvboottime
4420 *   values for the previous server boots.
4421 *   These previous boot times are used to ensure that the current
4422 *   nfsrvboottime does not, somehow, get set to a previous one.
4423 *   (This is important so that Stale ClientIDs and StateIDs can
4424 *    be recognized.)
4425 *   The number of previous nfsvrboottime values preceeds the list.
4426 * - followed by some number of appended records with:
4427 *   - client id string
4428 *   - flag that indicates it is a record revoking state via lease
4429 *     expiration or similar
4430 *     OR has successfully acquired state.
4431 * These structures vary in length, with the client string at the end, up
4432 * to NFSV4_OPAQUELIMIT in size.
4433 *
4434 * At the end of the grace period, the file is truncated, the first
4435 * record is rewritten with updated information and any acquired state
4436 * records for successful reclaims of state are written.
4437 *
4438 * Subsequent records are appended when the first state is issued to
4439 * a client and when state is revoked for a client.
4440 *
4441 * When reading the file in, state issued records that come later in
4442 * the file override older ones, since the append log is in cronological order.
4443 * If, for some reason, the file can't be read, the grace period is
4444 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4445 */
4446
4447/*
4448 * Read in the stable storage file. Called by nfssvc() before the nfsd
4449 * processes start servicing requests.
4450 */
4451APPLESTATIC void
4452nfsrv_setupstable(NFSPROC_T *p)
4453{
4454	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4455	struct nfsrv_stable *sp, *nsp;
4456	struct nfst_rec *tsp;
4457	int error, i, tryagain;
4458	off_t off = 0;
4459	ssize_t aresid, len;
4460
4461	/*
4462	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4463	 * a reboot, so state has not been lost.
4464	 */
4465	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4466		return;
4467	/*
4468	 * Set Grace over just until the file reads successfully.
4469	 */
4470	nfsrvboottime = time_second;
4471	LIST_INIT(&sf->nsf_head);
4472	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4473	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4474	if (sf->nsf_fp == NULL)
4475		return;
4476	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4477	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4478	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4479	if (error || aresid || sf->nsf_numboots == 0 ||
4480		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4481		return;
4482
4483	/*
4484	 * Now, read in the boottimes.
4485	 */
4486	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4487		sizeof (time_t), M_TEMP, M_WAITOK);
4488	off = sizeof (struct nfsf_rec);
4489	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4490	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4491	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4492	if (error || aresid) {
4493		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4494		sf->nsf_bootvals = NULL;
4495		return;
4496	}
4497
4498	/*
4499	 * Make sure this nfsrvboottime is different from all recorded
4500	 * previous ones.
4501	 */
4502	do {
4503		tryagain = 0;
4504		for (i = 0; i < sf->nsf_numboots; i++) {
4505			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4506				nfsrvboottime++;
4507				tryagain = 1;
4508				break;
4509			}
4510		}
4511	} while (tryagain);
4512
4513	sf->nsf_flags |= NFSNSF_OK;
4514	off += (sf->nsf_numboots * sizeof (time_t));
4515
4516	/*
4517	 * Read through the file, building a list of records for grace
4518	 * checking.
4519	 * Each record is between sizeof (struct nfst_rec) and
4520	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4521	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4522	 */
4523	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4524		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4525	do {
4526	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4527	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4528	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4529	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4530	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4531		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4532		/*
4533		 * Yuck, the file has been corrupted, so just return
4534		 * after clearing out any restart state, so the grace period
4535		 * is over.
4536		 */
4537		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4538			LIST_REMOVE(sp, nst_list);
4539			free((caddr_t)sp, M_TEMP);
4540		}
4541		free((caddr_t)tsp, M_TEMP);
4542		sf->nsf_flags &= ~NFSNSF_OK;
4543		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4544		sf->nsf_bootvals = NULL;
4545		return;
4546	    }
4547	    if (len > 0) {
4548		off += sizeof (struct nfst_rec) + tsp->len - 1;
4549		/*
4550		 * Search the list for a matching client.
4551		 */
4552		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4553			if (tsp->len == sp->nst_len &&
4554			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4555				break;
4556		}
4557		if (sp == LIST_END(&sf->nsf_head)) {
4558			sp = (struct nfsrv_stable *)malloc(tsp->len +
4559				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4560				M_WAITOK);
4561			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4562				sizeof (struct nfst_rec) + tsp->len - 1);
4563			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4564		} else {
4565			if (tsp->flag == NFSNST_REVOKE)
4566				sp->nst_flag |= NFSNST_REVOKE;
4567			else
4568				/*
4569				 * A subsequent timestamp indicates the client
4570				 * did a setclientid/confirm and any previous
4571				 * revoke is no longer relevant.
4572				 */
4573				sp->nst_flag &= ~NFSNST_REVOKE;
4574		}
4575	    }
4576	} while (len > 0);
4577	free((caddr_t)tsp, M_TEMP);
4578	sf->nsf_flags = NFSNSF_OK;
4579	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4580		NFSRV_LEASEDELTA;
4581}
4582
4583/*
4584 * Update the stable storage file, now that the grace period is over.
4585 */
4586APPLESTATIC void
4587nfsrv_updatestable(NFSPROC_T *p)
4588{
4589	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4590	struct nfsrv_stable *sp, *nsp;
4591	int i;
4592	struct nfsvattr nva;
4593	vnode_t vp;
4594#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4595	mount_t mp = NULL;
4596#endif
4597	int error;
4598
4599	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4600		return;
4601	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4602	/*
4603	 * Ok, we need to rewrite the stable storage file.
4604	 * - truncate to 0 length
4605	 * - write the new first structure
4606	 * - loop through the data structures, writing out any that
4607	 *   have timestamps older than the old boot
4608	 */
4609	if (sf->nsf_bootvals) {
4610		sf->nsf_numboots++;
4611		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4612			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4613	} else {
4614		sf->nsf_numboots = 1;
4615		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4616			M_TEMP, M_WAITOK);
4617	}
4618	sf->nsf_bootvals[0] = nfsrvboottime;
4619	sf->nsf_lease = nfsrv_lease;
4620	NFSVNO_ATTRINIT(&nva);
4621	NFSVNO_SETATTRVAL(&nva, size, 0);
4622	vp = NFSFPVNODE(sf->nsf_fp);
4623	vn_start_write(vp, &mp, V_WAIT);
4624	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4625		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4626		    NULL);
4627		NFSVOPUNLOCK(vp, 0);
4628	} else
4629		error = EPERM;
4630	vn_finished_write(mp);
4631	if (!error)
4632	    error = NFSD_RDWR(UIO_WRITE, vp,
4633		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4634		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4635	if (!error)
4636	    error = NFSD_RDWR(UIO_WRITE, vp,
4637		(caddr_t)sf->nsf_bootvals,
4638		sf->nsf_numboots * sizeof (time_t),
4639		(off_t)(sizeof (struct nfsf_rec)),
4640		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4641	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4642	sf->nsf_bootvals = NULL;
4643	if (error) {
4644		sf->nsf_flags &= ~NFSNSF_OK;
4645		printf("EEK! Can't write NfsV4 stable storage file\n");
4646		return;
4647	}
4648	sf->nsf_flags |= NFSNSF_OK;
4649
4650	/*
4651	 * Loop through the list and write out timestamp records for
4652	 * any clients that successfully reclaimed state.
4653	 */
4654	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4655		if (sp->nst_flag & NFSNST_GOTSTATE) {
4656			nfsrv_writestable(sp->nst_client, sp->nst_len,
4657				NFSNST_NEWSTATE, p);
4658			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4659		}
4660		LIST_REMOVE(sp, nst_list);
4661		free((caddr_t)sp, M_TEMP);
4662	}
4663	nfsrv_backupstable();
4664}
4665
4666/*
4667 * Append a record to the stable storage file.
4668 */
4669APPLESTATIC void
4670nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4671{
4672	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4673	struct nfst_rec *sp;
4674	int error;
4675
4676	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4677		return;
4678	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4679		len - 1, M_TEMP, M_WAITOK);
4680	sp->len = len;
4681	NFSBCOPY(client, sp->client, len);
4682	sp->flag = flag;
4683	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4684	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4685	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4686	free((caddr_t)sp, M_TEMP);
4687	if (error) {
4688		sf->nsf_flags &= ~NFSNSF_OK;
4689		printf("EEK! Can't write NfsV4 stable storage file\n");
4690	}
4691}
4692
4693/*
4694 * This function is called during the grace period to mark a client
4695 * that successfully reclaimed state.
4696 */
4697static void
4698nfsrv_markstable(struct nfsclient *clp)
4699{
4700	struct nfsrv_stable *sp;
4701
4702	/*
4703	 * First find the client structure.
4704	 */
4705	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4706		if (sp->nst_len == clp->lc_idlen &&
4707		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4708			break;
4709	}
4710	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4711		return;
4712
4713	/*
4714	 * Now, just mark it and set the nfsclient back pointer.
4715	 */
4716	sp->nst_flag |= NFSNST_GOTSTATE;
4717	sp->nst_clp = clp;
4718}
4719
4720/*
4721 * This function is called for a reclaim, to see if it gets grace.
4722 * It returns 0 if a reclaim is allowed, 1 otherwise.
4723 */
4724static int
4725nfsrv_checkstable(struct nfsclient *clp)
4726{
4727	struct nfsrv_stable *sp;
4728
4729	/*
4730	 * First, find the entry for the client.
4731	 */
4732	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4733		if (sp->nst_len == clp->lc_idlen &&
4734		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4735			break;
4736	}
4737
4738	/*
4739	 * If not in the list, state was revoked or no state was issued
4740	 * since the previous reboot, a reclaim is denied.
4741	 */
4742	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4743	    (sp->nst_flag & NFSNST_REVOKE) ||
4744	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4745		return (1);
4746	return (0);
4747}
4748
4749/*
4750 * Test for and try to clear out a conflicting client. This is called by
4751 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4752 * a found.
4753 * The trick here is that it can't revoke a conflicting client with an
4754 * expired lease unless it holds the v4root lock, so...
4755 * If no v4root lock, get the lock and return 1 to indicate "try again".
4756 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4757 * the revocation worked and the conflicting client is "bye, bye", so it
4758 * can be tried again.
4759 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4760 * Unlocks State before a non-zero value is returned.
4761 */
4762static int
4763nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4764    NFSPROC_T *p)
4765{
4766	int gotlock, lktype = 0;
4767
4768	/*
4769	 * If lease hasn't expired, we can't fix it.
4770	 */
4771	if (clp->lc_expiry >= NFSD_MONOSEC ||
4772	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4773		return (0);
4774	if (*haslockp == 0) {
4775		NFSUNLOCKSTATE();
4776		if (vp != NULL) {
4777			lktype = NFSVOPISLOCKED(vp);
4778			NFSVOPUNLOCK(vp, 0);
4779		}
4780		NFSLOCKV4ROOTMUTEX();
4781		nfsv4_relref(&nfsv4rootfs_lock);
4782		do {
4783			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4784			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4785		} while (!gotlock);
4786		NFSUNLOCKV4ROOTMUTEX();
4787		*haslockp = 1;
4788		if (vp != NULL) {
4789			NFSVOPLOCK(vp, lktype | LK_RETRY);
4790			if ((vp->v_iflag & VI_DOOMED) != 0)
4791				return (2);
4792		}
4793		return (1);
4794	}
4795	NFSUNLOCKSTATE();
4796
4797	/*
4798	 * Ok, we can expire the conflicting client.
4799	 */
4800	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4801	nfsrv_backupstable();
4802	nfsrv_cleanclient(clp, p);
4803	nfsrv_freedeleglist(&clp->lc_deleg);
4804	nfsrv_freedeleglist(&clp->lc_olddeleg);
4805	LIST_REMOVE(clp, lc_hash);
4806	nfsrv_zapclient(clp, p);
4807	return (1);
4808}
4809
4810/*
4811 * Resolve a delegation conflict.
4812 * Returns 0 to indicate the conflict was resolved without sleeping.
4813 * Return -1 to indicate that the caller should check for conflicts again.
4814 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4815 *
4816 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4817 * for a return of 0, since there was no sleep and it could be required
4818 * later. It is released for a return of NFSERR_DELAY, since the caller
4819 * will return that error. It is released when a sleep was done waiting
4820 * for the delegation to be returned or expire (so that other nfsds can
4821 * handle ops). Then, it must be acquired for the write to stable storage.
4822 * (This function is somewhat similar to nfsrv_clientconflict(), but
4823 *  the semantics differ in a couple of subtle ways. The return of 0
4824 *  indicates the conflict was resolved without sleeping here, not
4825 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4826 *  differs, as noted above.)
4827 * Unlocks State before returning a non-zero value.
4828 */
4829static int
4830nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4831    vnode_t vp)
4832{
4833	struct nfsclient *clp = stp->ls_clp;
4834	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
4835	nfsv4stateid_t tstateid;
4836	fhandle_t tfh;
4837
4838	/*
4839	 * If the conflict is with an old delegation...
4840	 */
4841	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4842		/*
4843		 * You can delete it, if it has expired.
4844		 */
4845		if (clp->lc_delegtime < NFSD_MONOSEC) {
4846			nfsrv_freedeleg(stp);
4847			NFSUNLOCKSTATE();
4848			error = -1;
4849			goto out;
4850		}
4851		NFSUNLOCKSTATE();
4852		/*
4853		 * During this delay, the old delegation could expire or it
4854		 * could be recovered by the client via an Open with
4855		 * CLAIM_DELEGATE_PREV.
4856		 * Release the nfsv4root_lock, if held.
4857		 */
4858		if (*haslockp) {
4859			*haslockp = 0;
4860			NFSLOCKV4ROOTMUTEX();
4861			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4862			NFSUNLOCKV4ROOTMUTEX();
4863		}
4864		error = NFSERR_DELAY;
4865		goto out;
4866	}
4867
4868	/*
4869	 * It's a current delegation, so:
4870	 * - check to see if the delegation has expired
4871	 *   - if so, get the v4root lock and then expire it
4872	 */
4873	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4874		/*
4875		 * - do a recall callback, since not yet done
4876		 * For now, never allow truncate to be set. To use
4877		 * truncate safely, it must be guaranteed that the
4878		 * Remove, Rename or Setattr with size of 0 will
4879		 * succeed and that would require major changes to
4880		 * the VFS/Vnode OPs.
4881		 * Set the expiry time large enough so that it won't expire
4882		 * until after the callback, then set it correctly, once
4883		 * the callback is done. (The delegation will now time
4884		 * out whether or not the Recall worked ok. The timeout
4885		 * will be extended when ops are done on the delegation
4886		 * stateid, up to the timelimit.)
4887		 */
4888		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4889		    NFSRV_LEASEDELTA;
4890		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4891		    NFSRV_LEASEDELTA;
4892		stp->ls_flags |= NFSLCK_DELEGRECALL;
4893
4894		/*
4895		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4896		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4897		 * in order to try and avoid a race that could happen
4898		 * when a CBRecall request passed the Open reply with
4899		 * the delegation in it when transitting the network.
4900		 * Since nfsrv_docallback will sleep, don't use stp after
4901		 * the call.
4902		 */
4903		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4904		    sizeof (tstateid));
4905		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4906		    sizeof (tfh));
4907		NFSUNLOCKSTATE();
4908		if (*haslockp) {
4909			*haslockp = 0;
4910			NFSLOCKV4ROOTMUTEX();
4911			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4912			NFSUNLOCKV4ROOTMUTEX();
4913		}
4914		retrycnt = 0;
4915		do {
4916		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4917			&tstateid, 0, &tfh, NULL, NULL, p);
4918		    retrycnt++;
4919		} while ((error == NFSERR_BADSTATEID ||
4920		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4921		error = NFSERR_DELAY;
4922		goto out;
4923	}
4924
4925	if (clp->lc_expiry >= NFSD_MONOSEC &&
4926	    stp->ls_delegtime >= NFSD_MONOSEC) {
4927		NFSUNLOCKSTATE();
4928		/*
4929		 * A recall has been done, but it has not yet expired.
4930		 * So, RETURN_DELAY.
4931		 */
4932		if (*haslockp) {
4933			*haslockp = 0;
4934			NFSLOCKV4ROOTMUTEX();
4935			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4936			NFSUNLOCKV4ROOTMUTEX();
4937		}
4938		error = NFSERR_DELAY;
4939		goto out;
4940	}
4941
4942	/*
4943	 * If we don't yet have the lock, just get it and then return,
4944	 * since we need that before deleting expired state, such as
4945	 * this delegation.
4946	 * When getting the lock, unlock the vnode, so other nfsds that
4947	 * are in progress, won't get stuck waiting for the vnode lock.
4948	 */
4949	if (*haslockp == 0) {
4950		NFSUNLOCKSTATE();
4951		if (vp != NULL) {
4952			lktype = NFSVOPISLOCKED(vp);
4953			NFSVOPUNLOCK(vp, 0);
4954		}
4955		NFSLOCKV4ROOTMUTEX();
4956		nfsv4_relref(&nfsv4rootfs_lock);
4957		do {
4958			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4959			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4960		} while (!gotlock);
4961		NFSUNLOCKV4ROOTMUTEX();
4962		*haslockp = 1;
4963		if (vp != NULL) {
4964			NFSVOPLOCK(vp, lktype | LK_RETRY);
4965			if ((vp->v_iflag & VI_DOOMED) != 0) {
4966				*haslockp = 0;
4967				NFSLOCKV4ROOTMUTEX();
4968				nfsv4_unlock(&nfsv4rootfs_lock, 1);
4969				NFSUNLOCKV4ROOTMUTEX();
4970				error = NFSERR_PERM;
4971				goto out;
4972			}
4973		}
4974		error = -1;
4975		goto out;
4976	}
4977
4978	NFSUNLOCKSTATE();
4979	/*
4980	 * Ok, we can delete the expired delegation.
4981	 * First, write the Revoke record to stable storage and then
4982	 * clear out the conflict.
4983	 * Since all other nfsd threads are now blocked, we can safely
4984	 * sleep without the state changing.
4985	 */
4986	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4987	nfsrv_backupstable();
4988	if (clp->lc_expiry < NFSD_MONOSEC) {
4989		nfsrv_cleanclient(clp, p);
4990		nfsrv_freedeleglist(&clp->lc_deleg);
4991		nfsrv_freedeleglist(&clp->lc_olddeleg);
4992		LIST_REMOVE(clp, lc_hash);
4993		zapped_clp = 1;
4994	} else {
4995		nfsrv_freedeleg(stp);
4996		zapped_clp = 0;
4997	}
4998	if (zapped_clp)
4999		nfsrv_zapclient(clp, p);
5000	error = -1;
5001
5002out:
5003	NFSEXITCODE(error);
5004	return (error);
5005}
5006
5007/*
5008 * Check for a remove allowed, if remove is set to 1 and get rid of
5009 * delegations.
5010 */
5011APPLESTATIC int
5012nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
5013{
5014	struct nfsstate *stp;
5015	struct nfslockfile *lfp;
5016	int error, haslock = 0;
5017	fhandle_t nfh;
5018
5019	/*
5020	 * First, get the lock file structure.
5021	 * (A return of -1 means no associated state, so remove ok.)
5022	 */
5023	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5024tryagain:
5025	NFSLOCKSTATE();
5026	if (!error)
5027		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5028	if (error) {
5029		NFSUNLOCKSTATE();
5030		if (haslock) {
5031			NFSLOCKV4ROOTMUTEX();
5032			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5033			NFSUNLOCKV4ROOTMUTEX();
5034		}
5035		if (error == -1)
5036			error = 0;
5037		goto out;
5038	}
5039
5040	/*
5041	 * Now, we must Recall any delegations.
5042	 */
5043	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
5044	if (error) {
5045		/*
5046		 * nfsrv_cleandeleg() unlocks state for non-zero
5047		 * return.
5048		 */
5049		if (error == -1)
5050			goto tryagain;
5051		if (haslock) {
5052			NFSLOCKV4ROOTMUTEX();
5053			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5054			NFSUNLOCKV4ROOTMUTEX();
5055		}
5056		goto out;
5057	}
5058
5059	/*
5060	 * Now, look for a conflicting open share.
5061	 */
5062	if (remove) {
5063		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5064			if (stp->ls_flags & NFSLCK_WRITEDENY) {
5065				error = NFSERR_FILEOPEN;
5066				break;
5067			}
5068		}
5069	}
5070
5071	NFSUNLOCKSTATE();
5072	if (haslock) {
5073		NFSLOCKV4ROOTMUTEX();
5074		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5075		NFSUNLOCKV4ROOTMUTEX();
5076	}
5077
5078out:
5079	NFSEXITCODE(error);
5080	return (error);
5081}
5082
5083/*
5084 * Clear out all delegations for the file referred to by lfp.
5085 * May return NFSERR_DELAY, if there will be a delay waiting for
5086 * delegations to expire.
5087 * Returns -1 to indicate it slept while recalling a delegation.
5088 * This function has the side effect of deleting the nfslockfile structure,
5089 * if it no longer has associated state and didn't have to sleep.
5090 * Unlocks State before a non-zero value is returned.
5091 */
5092static int
5093nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5094    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5095{
5096	struct nfsstate *stp, *nstp;
5097	int ret = 0;
5098
5099	stp = LIST_FIRST(&lfp->lf_deleg);
5100	while (stp != LIST_END(&lfp->lf_deleg)) {
5101		nstp = LIST_NEXT(stp, ls_file);
5102		if (stp->ls_clp != clp) {
5103			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5104			if (ret) {
5105				/*
5106				 * nfsrv_delegconflict() unlocks state
5107				 * when it returns non-zero.
5108				 */
5109				goto out;
5110			}
5111		}
5112		stp = nstp;
5113	}
5114out:
5115	NFSEXITCODE(ret);
5116	return (ret);
5117}
5118
5119/*
5120 * There are certain operations that, when being done outside of NFSv4,
5121 * require that any NFSv4 delegation for the file be recalled.
5122 * This function is to be called for those cases:
5123 * VOP_RENAME() - When a delegation is being recalled for any reason,
5124 *	the client may have to do Opens against the server, using the file's
5125 *	final component name. If the file has been renamed on the server,
5126 *	that component name will be incorrect and the Open will fail.
5127 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5128 *	been removed on the server, if there is a delegation issued to
5129 *	that client for the file. I say "theoretically" since clients
5130 *	normally do an Access Op before the Open and that Access Op will
5131 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5132 *	they will detect the file's removal in the same manner. (There is
5133 *	one case where RFC3530 allows a client to do an Open without first
5134 *	doing an Access Op, which is passage of a check against the ACE
5135 *	returned with a Write delegation, but current practice is to ignore
5136 *	the ACE and always do an Access Op.)
5137 *	Since the functions can only be called with an unlocked vnode, this
5138 *	can't be done at this time.
5139 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5140 *	locks locally in the client, which are not visible to the server. To
5141 *	deal with this, issuing of delegations for a vnode must be disabled
5142 *	and all delegations for the vnode recalled. This is done via the
5143 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5144 */
5145APPLESTATIC void
5146nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5147{
5148	time_t starttime;
5149	int error;
5150
5151	/*
5152	 * First, check to see if the server is currently running and it has
5153	 * been called for a regular file when issuing delegations.
5154	 */
5155	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5156	    nfsrv_issuedelegs == 0)
5157		return;
5158
5159	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5160	/*
5161	 * First, get a reference on the nfsv4rootfs_lock so that an
5162	 * exclusive lock cannot be acquired by another thread.
5163	 */
5164	NFSLOCKV4ROOTMUTEX();
5165	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5166	NFSUNLOCKV4ROOTMUTEX();
5167
5168	/*
5169	 * Now, call nfsrv_checkremove() in a loop while it returns
5170	 * NFSERR_DELAY. Return upon any other error or when timed out.
5171	 */
5172	starttime = NFSD_MONOSEC;
5173	do {
5174		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5175			error = nfsrv_checkremove(vp, 0, p);
5176			NFSVOPUNLOCK(vp, 0);
5177		} else
5178			error = EPERM;
5179		if (error == NFSERR_DELAY) {
5180			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5181				break;
5182			/* Sleep for a short period of time */
5183			(void) nfs_catnap(PZERO, 0, "nfsremove");
5184		}
5185	} while (error == NFSERR_DELAY);
5186	NFSLOCKV4ROOTMUTEX();
5187	nfsv4_relref(&nfsv4rootfs_lock);
5188	NFSUNLOCKV4ROOTMUTEX();
5189}
5190
5191APPLESTATIC void
5192nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5193{
5194
5195#ifdef VV_DISABLEDELEG
5196	/*
5197	 * First, flag issuance of delegations disabled.
5198	 */
5199	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5200#endif
5201
5202	/*
5203	 * Then call nfsd_recalldelegation() to get rid of all extant
5204	 * delegations.
5205	 */
5206	nfsd_recalldelegation(vp, p);
5207}
5208
5209/*
5210 * Check for conflicting locks, etc. and then get rid of delegations.
5211 * (At one point I thought that I should get rid of delegations for any
5212 *  Setattr, since it could potentially disallow the I/O op (read or write)
5213 *  allowed by the delegation. However, Setattr Ops that aren't changing
5214 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5215 *  for the same client or a different one, so I decided to only get rid
5216 *  of delegations for other clients when the size is being changed.)
5217 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5218 * as Write backs, even if there is no delegation, so it really isn't any
5219 * different?)
5220 */
5221APPLESTATIC int
5222nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5223    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5224    struct nfsexstuff *exp, NFSPROC_T *p)
5225{
5226	struct nfsstate st, *stp = &st;
5227	struct nfslock lo, *lop = &lo;
5228	int error = 0;
5229	nfsquad_t clientid;
5230
5231	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5232		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5233		lop->lo_first = nvap->na_size;
5234	} else {
5235		stp->ls_flags = 0;
5236		lop->lo_first = 0;
5237	}
5238	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5239	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5240	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5241	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5242		stp->ls_flags |= NFSLCK_SETATTR;
5243	if (stp->ls_flags == 0)
5244		goto out;
5245	lop->lo_end = NFS64BITSSET;
5246	lop->lo_flags = NFSLCK_WRITE;
5247	stp->ls_ownerlen = 0;
5248	stp->ls_op = NULL;
5249	stp->ls_uid = nd->nd_cred->cr_uid;
5250	stp->ls_stateid.seqid = stateidp->seqid;
5251	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5252	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5253	stp->ls_stateid.other[2] = stateidp->other[2];
5254	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5255	    stateidp, exp, nd, p);
5256
5257out:
5258	NFSEXITCODE2(error, nd);
5259	return (error);
5260}
5261
5262/*
5263 * Check for a write delegation and do a CBGETATTR if there is one, updating
5264 * the attributes, as required.
5265 * Should I return an error if I can't get the attributes? (For now, I'll
5266 * just return ok.
5267 */
5268APPLESTATIC int
5269nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5270    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5271    NFSPROC_T *p)
5272{
5273	struct nfsstate *stp;
5274	struct nfslockfile *lfp;
5275	struct nfsclient *clp;
5276	struct nfsvattr nva;
5277	fhandle_t nfh;
5278	int error = 0;
5279	nfsattrbit_t cbbits;
5280	u_quad_t delegfilerev;
5281
5282	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5283	if (!NFSNONZERO_ATTRBIT(&cbbits))
5284		goto out;
5285
5286	/*
5287	 * Get the lock file structure.
5288	 * (A return of -1 means no associated state, so return ok.)
5289	 */
5290	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5291	NFSLOCKSTATE();
5292	if (!error)
5293		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5294	if (error) {
5295		NFSUNLOCKSTATE();
5296		if (error == -1)
5297			error = 0;
5298		goto out;
5299	}
5300
5301	/*
5302	 * Now, look for a write delegation.
5303	 */
5304	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5305		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5306			break;
5307	}
5308	if (stp == LIST_END(&lfp->lf_deleg)) {
5309		NFSUNLOCKSTATE();
5310		goto out;
5311	}
5312	clp = stp->ls_clp;
5313	delegfilerev = stp->ls_filerev;
5314
5315	/*
5316	 * If the Write delegation was issued as a part of this Compound RPC
5317	 * or if we have an Implied Clientid (used in a previous Op in this
5318	 * compound) and it is the client the delegation was issued to,
5319	 * just return ok.
5320	 * I also assume that it is from the same client iff the network
5321	 * host IP address is the same as the callback address. (Not
5322	 * exactly correct by the RFC, but avoids a lot of Getattr
5323	 * callbacks.)
5324	 */
5325	if (nd->nd_compref == stp->ls_compref ||
5326	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5327	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5328	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5329		NFSUNLOCKSTATE();
5330		goto out;
5331	}
5332
5333	/*
5334	 * We are now done with the delegation state structure,
5335	 * so the statelock can be released and we can now tsleep().
5336	 */
5337
5338	/*
5339	 * Now, we must do the CB Getattr callback, to see if Change or Size
5340	 * has changed.
5341	 */
5342	if (clp->lc_expiry >= NFSD_MONOSEC) {
5343		NFSUNLOCKSTATE();
5344		NFSVNO_ATTRINIT(&nva);
5345		nva.na_filerev = NFS64BITSSET;
5346		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5347		    0, &nfh, &nva, &cbbits, p);
5348		if (!error) {
5349			if ((nva.na_filerev != NFS64BITSSET &&
5350			    nva.na_filerev > delegfilerev) ||
5351			    (NFSVNO_ISSETSIZE(&nva) &&
5352			     nva.na_size != nvap->na_size)) {
5353				error = nfsvno_updfilerev(vp, nvap, cred, p);
5354				if (NFSVNO_ISSETSIZE(&nva))
5355					nvap->na_size = nva.na_size;
5356			}
5357		} else
5358			error = 0;	/* Ignore callback errors for now. */
5359	} else {
5360		NFSUNLOCKSTATE();
5361	}
5362
5363out:
5364	NFSEXITCODE2(error, nd);
5365	return (error);
5366}
5367
5368/*
5369 * This function looks for openowners that haven't had any opens for
5370 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5371 * is set.
5372 */
5373APPLESTATIC void
5374nfsrv_throwawayopens(NFSPROC_T *p)
5375{
5376	struct nfsclient *clp, *nclp;
5377	struct nfsstate *stp, *nstp;
5378	int i;
5379
5380	NFSLOCKSTATE();
5381	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5382	/*
5383	 * For each client...
5384	 */
5385	for (i = 0; i < nfsrv_clienthashsize; i++) {
5386	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5387		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5388			if (LIST_EMPTY(&stp->ls_open) &&
5389			    (stp->ls_noopens > NFSNOOPEN ||
5390			     (nfsrv_openpluslock * 2) >
5391			     nfsrv_v4statelimit))
5392				nfsrv_freeopenowner(stp, 0, p);
5393		}
5394	    }
5395	}
5396	NFSUNLOCKSTATE();
5397}
5398
5399/*
5400 * This function checks to see if the credentials are the same.
5401 * Returns 1 for not same, 0 otherwise.
5402 */
5403static int
5404nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5405{
5406
5407	if (nd->nd_flag & ND_GSS) {
5408		if (!(clp->lc_flags & LCL_GSS))
5409			return (1);
5410		if (clp->lc_flags & LCL_NAME) {
5411			if (nd->nd_princlen != clp->lc_namelen ||
5412			    NFSBCMP(nd->nd_principal, clp->lc_name,
5413				clp->lc_namelen))
5414				return (1);
5415			else
5416				return (0);
5417		}
5418		if (nd->nd_cred->cr_uid == clp->lc_uid)
5419			return (0);
5420		else
5421			return (1);
5422	} else if (clp->lc_flags & LCL_GSS)
5423		return (1);
5424	/*
5425	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5426	 * in RFC3530, which talks about principals, but doesn't say anything
5427	 * about uids for AUTH_SYS.)
5428	 */
5429	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5430		return (0);
5431	else
5432		return (1);
5433}
5434
5435/*
5436 * Calculate the lease expiry time.
5437 */
5438static time_t
5439nfsrv_leaseexpiry(void)
5440{
5441
5442	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5443		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5444	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5445}
5446
5447/*
5448 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5449 */
5450static void
5451nfsrv_delaydelegtimeout(struct nfsstate *stp)
5452{
5453
5454	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5455		return;
5456
5457	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5458	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5459		stp->ls_delegtime += nfsrv_lease;
5460		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5461			stp->ls_delegtime = stp->ls_delegtimelimit;
5462	}
5463}
5464
5465/*
5466 * This function checks to see if there is any other state associated
5467 * with the openowner for this Open.
5468 * It returns 1 if there is no other state, 0 otherwise.
5469 */
5470static int
5471nfsrv_nootherstate(struct nfsstate *stp)
5472{
5473	struct nfsstate *tstp;
5474
5475	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5476		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5477			return (0);
5478	}
5479	return (1);
5480}
5481
5482/*
5483 * Create a list of lock deltas (changes to local byte range locking
5484 * that can be rolled back using the list) and apply the changes via
5485 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5486 * the rollback or update function will be called after this.
5487 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5488 * call fails. If it returns an error, it will unlock the list.
5489 */
5490static int
5491nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5492    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5493{
5494	struct nfslock *lop, *nlop;
5495	int error = 0;
5496
5497	/* Loop through the list of locks. */
5498	lop = LIST_FIRST(&lfp->lf_locallock);
5499	while (first < end && lop != NULL) {
5500		nlop = LIST_NEXT(lop, lo_lckowner);
5501		if (first >= lop->lo_end) {
5502			/* not there yet */
5503			lop = nlop;
5504		} else if (first < lop->lo_first) {
5505			/* new one starts before entry in list */
5506			if (end <= lop->lo_first) {
5507				/* no overlap between old and new */
5508				error = nfsrv_dolocal(vp, lfp, flags,
5509				    NFSLCK_UNLOCK, first, end, cfp, p);
5510				if (error != 0)
5511					break;
5512				first = end;
5513			} else {
5514				/* handle fragment overlapped with new one */
5515				error = nfsrv_dolocal(vp, lfp, flags,
5516				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5517				    p);
5518				if (error != 0)
5519					break;
5520				first = lop->lo_first;
5521			}
5522		} else {
5523			/* new one overlaps this entry in list */
5524			if (end <= lop->lo_end) {
5525				/* overlaps all of new one */
5526				error = nfsrv_dolocal(vp, lfp, flags,
5527				    lop->lo_flags, first, end, cfp, p);
5528				if (error != 0)
5529					break;
5530				first = end;
5531			} else {
5532				/* handle fragment overlapped with new one */
5533				error = nfsrv_dolocal(vp, lfp, flags,
5534				    lop->lo_flags, first, lop->lo_end, cfp, p);
5535				if (error != 0)
5536					break;
5537				first = lop->lo_end;
5538				lop = nlop;
5539			}
5540		}
5541	}
5542	if (first < end && error == 0)
5543		/* handle fragment past end of list */
5544		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5545		    end, cfp, p);
5546
5547	NFSEXITCODE(error);
5548	return (error);
5549}
5550
5551/*
5552 * Local lock unlock. Unlock all byte ranges that are no longer locked
5553 * by NFSv4. To do this, unlock any subranges of first-->end that
5554 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5555 * list. This list has all locks for the file held by other
5556 * <clientid, lockowner> tuples. The list is ordered by increasing
5557 * lo_first value, but may have entries that overlap each other, for
5558 * the case of read locks.
5559 */
5560static void
5561nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5562    uint64_t init_end, NFSPROC_T *p)
5563{
5564	struct nfslock *lop;
5565	uint64_t first, end, prevfirst;
5566
5567	first = init_first;
5568	end = init_end;
5569	while (first < init_end) {
5570		/* Loop through all nfs locks, adjusting first and end */
5571		prevfirst = 0;
5572		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5573			KASSERT(prevfirst <= lop->lo_first,
5574			    ("nfsv4 locks out of order"));
5575			KASSERT(lop->lo_first < lop->lo_end,
5576			    ("nfsv4 bogus lock"));
5577			prevfirst = lop->lo_first;
5578			if (first >= lop->lo_first &&
5579			    first < lop->lo_end)
5580				/*
5581				 * Overlaps with initial part, so trim
5582				 * off that initial part by moving first past
5583				 * it.
5584				 */
5585				first = lop->lo_end;
5586			else if (end > lop->lo_first &&
5587			    lop->lo_first > first) {
5588				/*
5589				 * This lock defines the end of the
5590				 * segment to unlock, so set end to the
5591				 * start of it and break out of the loop.
5592				 */
5593				end = lop->lo_first;
5594				break;
5595			}
5596			if (first >= end)
5597				/*
5598				 * There is no segment left to do, so
5599				 * break out of this loop and then exit
5600				 * the outer while() since first will be set
5601				 * to end, which must equal init_end here.
5602				 */
5603				break;
5604		}
5605		if (first < end) {
5606			/* Unlock this segment */
5607			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5608			    NFSLCK_READ, first, end, NULL, p);
5609			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5610			    first, end);
5611		}
5612		/*
5613		 * Now move past this segment and look for any further
5614		 * segment in the range, if there is one.
5615		 */
5616		first = end;
5617		end = init_end;
5618	}
5619}
5620
5621/*
5622 * Do the local lock operation and update the rollback list, as required.
5623 * Perform the rollback and return the error if nfsvno_advlock() fails.
5624 */
5625static int
5626nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5627    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5628{
5629	struct nfsrollback *rlp;
5630	int error = 0, ltype, oldltype;
5631
5632	if (flags & NFSLCK_WRITE)
5633		ltype = F_WRLCK;
5634	else if (flags & NFSLCK_READ)
5635		ltype = F_RDLCK;
5636	else
5637		ltype = F_UNLCK;
5638	if (oldflags & NFSLCK_WRITE)
5639		oldltype = F_WRLCK;
5640	else if (oldflags & NFSLCK_READ)
5641		oldltype = F_RDLCK;
5642	else
5643		oldltype = F_UNLCK;
5644	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5645		/* nothing to do */
5646		goto out;
5647	error = nfsvno_advlock(vp, ltype, first, end, p);
5648	if (error != 0) {
5649		if (cfp != NULL) {
5650			cfp->cl_clientid.lval[0] = 0;
5651			cfp->cl_clientid.lval[1] = 0;
5652			cfp->cl_first = 0;
5653			cfp->cl_end = NFS64BITSSET;
5654			cfp->cl_flags = NFSLCK_WRITE;
5655			cfp->cl_ownerlen = 5;
5656			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5657		}
5658		nfsrv_locallock_rollback(vp, lfp, p);
5659	} else if (ltype != F_UNLCK) {
5660		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5661		    M_WAITOK);
5662		rlp->rlck_first = first;
5663		rlp->rlck_end = end;
5664		rlp->rlck_type = oldltype;
5665		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5666	}
5667
5668out:
5669	NFSEXITCODE(error);
5670	return (error);
5671}
5672
5673/*
5674 * Roll back local lock changes and free up the rollback list.
5675 */
5676static void
5677nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5678{
5679	struct nfsrollback *rlp, *nrlp;
5680
5681	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5682		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5683		    rlp->rlck_end, p);
5684		free(rlp, M_NFSDROLLBACK);
5685	}
5686	LIST_INIT(&lfp->lf_rollback);
5687}
5688
5689/*
5690 * Update local lock list and delete rollback list (ie now committed to the
5691 * local locks). Most of the work is done by the internal function.
5692 */
5693static void
5694nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5695    uint64_t end)
5696{
5697	struct nfsrollback *rlp, *nrlp;
5698	struct nfslock *new_lop, *other_lop;
5699
5700	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5701	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5702		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5703		    M_WAITOK);
5704	else
5705		other_lop = NULL;
5706	new_lop->lo_flags = flags;
5707	new_lop->lo_first = first;
5708	new_lop->lo_end = end;
5709	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5710	if (new_lop != NULL)
5711		free(new_lop, M_NFSDLOCK);
5712	if (other_lop != NULL)
5713		free(other_lop, M_NFSDLOCK);
5714
5715	/* and get rid of the rollback list */
5716	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5717		free(rlp, M_NFSDROLLBACK);
5718	LIST_INIT(&lfp->lf_rollback);
5719}
5720
5721/*
5722 * Lock the struct nfslockfile for local lock updating.
5723 */
5724static void
5725nfsrv_locklf(struct nfslockfile *lfp)
5726{
5727	int gotlock;
5728
5729	/* lf_usecount ensures *lfp won't be free'd */
5730	lfp->lf_usecount++;
5731	do {
5732		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5733		    NFSSTATEMUTEXPTR, NULL);
5734	} while (gotlock == 0);
5735	lfp->lf_usecount--;
5736}
5737
5738/*
5739 * Unlock the struct nfslockfile after local lock updating.
5740 */
5741static void
5742nfsrv_unlocklf(struct nfslockfile *lfp)
5743{
5744
5745	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5746}
5747
5748/*
5749 * Clear out all state for the NFSv4 server.
5750 * Must be called by a thread that can sleep when no nfsds are running.
5751 */
5752void
5753nfsrv_throwawayallstate(NFSPROC_T *p)
5754{
5755	struct nfsclient *clp, *nclp;
5756	struct nfslockfile *lfp, *nlfp;
5757	int i;
5758
5759	/*
5760	 * For each client, clean out the state and then free the structure.
5761	 */
5762	for (i = 0; i < nfsrv_clienthashsize; i++) {
5763		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5764			nfsrv_cleanclient(clp, p);
5765			nfsrv_freedeleglist(&clp->lc_deleg);
5766			nfsrv_freedeleglist(&clp->lc_olddeleg);
5767			free(clp->lc_stateid, M_NFSDCLIENT);
5768			free(clp, M_NFSDCLIENT);
5769		}
5770	}
5771
5772	/*
5773	 * Also, free up any remaining lock file structures.
5774	 */
5775	for (i = 0; i < nfsrv_lockhashsize; i++) {
5776		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5777			printf("nfsd unload: fnd a lock file struct\n");
5778			nfsrv_freenfslockfile(lfp);
5779		}
5780	}
5781}
5782
5783/*
5784 * Check the sequence# for the session and slot provided as an argument.
5785 * Also, renew the lease if the session will return NFS_OK.
5786 */
5787int
5788nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
5789    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
5790    uint32_t *sflagsp, NFSPROC_T *p)
5791{
5792	struct nfsdsession *sep;
5793	struct nfssessionhash *shp;
5794	int error;
5795	SVCXPRT *savxprt;
5796
5797	shp = NFSSESSIONHASH(nd->nd_sessionid);
5798	NFSLOCKSESSION(shp);
5799	sep = nfsrv_findsession(nd->nd_sessionid);
5800	if (sep == NULL) {
5801		NFSUNLOCKSESSION(shp);
5802		return (NFSERR_BADSESSION);
5803	}
5804	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
5805	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
5806	if (error != 0) {
5807		NFSUNLOCKSESSION(shp);
5808		return (error);
5809	}
5810	if (cache_this != 0)
5811		nd->nd_flag |= ND_SAVEREPLY;
5812	/* Renew the lease. */
5813	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
5814	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
5815	nd->nd_flag |= ND_IMPLIEDCLID;
5816
5817	/*
5818	 * If this session handles the backchannel, save the nd_xprt for this
5819	 * RPC, since this is the one being used.
5820	 */
5821	if (sep->sess_clp->lc_req.nr_client != NULL &&
5822	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
5823		savxprt = sep->sess_cbsess.nfsess_xprt;
5824		SVC_ACQUIRE(nd->nd_xprt);
5825		nd->nd_xprt->xp_p2 =
5826		    sep->sess_clp->lc_req.nr_client->cl_private;
5827		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
5828		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
5829		if (savxprt != NULL)
5830			SVC_RELEASE(savxprt);
5831	}
5832
5833	*sflagsp = 0;
5834	if (sep->sess_clp->lc_req.nr_client == NULL)
5835		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
5836	NFSUNLOCKSESSION(shp);
5837	if (error == NFSERR_EXPIRED) {
5838		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
5839		error = 0;
5840	} else if (error == NFSERR_ADMINREVOKED) {
5841		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
5842		error = 0;
5843	}
5844	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
5845	return (0);
5846}
5847
5848/*
5849 * Check/set reclaim complete for this session/clientid.
5850 */
5851int
5852nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
5853{
5854	struct nfsdsession *sep;
5855	struct nfssessionhash *shp;
5856	int error = 0;
5857
5858	shp = NFSSESSIONHASH(nd->nd_sessionid);
5859	NFSLOCKSTATE();
5860	NFSLOCKSESSION(shp);
5861	sep = nfsrv_findsession(nd->nd_sessionid);
5862	if (sep == NULL) {
5863		NFSUNLOCKSESSION(shp);
5864		NFSUNLOCKSTATE();
5865		return (NFSERR_BADSESSION);
5866	}
5867
5868	/* Check to see if reclaim complete has already happened. */
5869	if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
5870		error = NFSERR_COMPLETEALREADY;
5871	else
5872		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
5873	NFSUNLOCKSESSION(shp);
5874	NFSUNLOCKSTATE();
5875	return (error);
5876}
5877
5878/*
5879 * Cache the reply in a session slot.
5880 */
5881void
5882nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
5883   struct mbuf **m)
5884{
5885	struct nfsdsession *sep;
5886	struct nfssessionhash *shp;
5887
5888	shp = NFSSESSIONHASH(sessionid);
5889	NFSLOCKSESSION(shp);
5890	sep = nfsrv_findsession(sessionid);
5891	if (sep == NULL) {
5892		NFSUNLOCKSESSION(shp);
5893		printf("nfsrv_cache_session: no session\n");
5894		m_freem(*m);
5895		return;
5896	}
5897	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
5898	NFSUNLOCKSESSION(shp);
5899}
5900
5901/*
5902 * Search for a session that matches the sessionid.
5903 */
5904static struct nfsdsession *
5905nfsrv_findsession(uint8_t *sessionid)
5906{
5907	struct nfsdsession *sep;
5908	struct nfssessionhash *shp;
5909
5910	shp = NFSSESSIONHASH(sessionid);
5911	LIST_FOREACH(sep, &shp->list, sess_hash) {
5912		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
5913			break;
5914	}
5915	return (sep);
5916}
5917
5918/*
5919 * Destroy a session.
5920 */
5921int
5922nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
5923{
5924	int error, samesess;
5925
5926	samesess = 0;
5927	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
5928		samesess = 1;
5929		if ((nd->nd_flag & ND_LASTOP) == 0)
5930			return (NFSERR_BADSESSION);
5931	}
5932	error = nfsrv_freesession(NULL, sessionid);
5933	if (error == 0 && samesess != 0)
5934		nd->nd_flag &= ~ND_HASSEQUENCE;
5935	return (error);
5936}
5937
5938/*
5939 * Free up a session structure.
5940 */
5941static int
5942nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
5943{
5944	struct nfssessionhash *shp;
5945	int i;
5946
5947	NFSLOCKSTATE();
5948	if (sep == NULL) {
5949		shp = NFSSESSIONHASH(sessionid);
5950		NFSLOCKSESSION(shp);
5951		sep = nfsrv_findsession(sessionid);
5952	} else {
5953		shp = NFSSESSIONHASH(sep->sess_sessionid);
5954		NFSLOCKSESSION(shp);
5955	}
5956	if (sep != NULL) {
5957		sep->sess_refcnt--;
5958		if (sep->sess_refcnt > 0) {
5959			NFSUNLOCKSESSION(shp);
5960			NFSUNLOCKSTATE();
5961			return (0);
5962		}
5963		LIST_REMOVE(sep, sess_hash);
5964		LIST_REMOVE(sep, sess_list);
5965	}
5966	NFSUNLOCKSESSION(shp);
5967	NFSUNLOCKSTATE();
5968	if (sep == NULL)
5969		return (NFSERR_BADSESSION);
5970	for (i = 0; i < NFSV4_SLOTS; i++)
5971		if (sep->sess_slots[i].nfssl_reply != NULL)
5972			m_freem(sep->sess_slots[i].nfssl_reply);
5973	if (sep->sess_cbsess.nfsess_xprt != NULL)
5974		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
5975	free(sep, M_NFSDSESSION);
5976	return (0);
5977}
5978
5979/*
5980 * Free a stateid.
5981 * RFC5661 says that it should fail when there are associated opens, locks
5982 * or delegations. Since stateids represent opens, I don't see how you can
5983 * free an open stateid (it will be free'd when closed), so this function
5984 * only works for lock stateids (freeing the lock_owner) or delegations.
5985 */
5986int
5987nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5988    NFSPROC_T *p)
5989{
5990	struct nfsclient *clp;
5991	struct nfsstate *stp;
5992	int error;
5993
5994	NFSLOCKSTATE();
5995	/*
5996	 * Look up the stateid
5997	 */
5998	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
5999	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6000	if (error == 0) {
6001		/* First, check for a delegation. */
6002		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6003			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6004			    NFSX_STATEIDOTHER))
6005				break;
6006		}
6007		if (stp != NULL) {
6008			nfsrv_freedeleg(stp);
6009			NFSUNLOCKSTATE();
6010			return (error);
6011		}
6012	}
6013	/* Not a delegation, try for a lock_owner. */
6014	if (error == 0)
6015		error = nfsrv_getstate(clp, stateidp, 0, &stp);
6016	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6017	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6018		/* Not a lock_owner stateid. */
6019		error = NFSERR_LOCKSHELD;
6020	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6021		error = NFSERR_LOCKSHELD;
6022	if (error == 0)
6023		nfsrv_freelockowner(stp, NULL, 0, p);
6024	NFSUNLOCKSTATE();
6025	return (error);
6026}
6027
6028/*
6029 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6030 */
6031static int
6032nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6033    int dont_replycache, struct nfsdsession **sepp)
6034{
6035	struct nfsdsession *sep;
6036	uint32_t *tl, slotseq = 0;
6037	int maxslot, slotpos;
6038	uint8_t sessionid[NFSX_V4SESSIONID];
6039	int error;
6040
6041	error = nfsv4_getcbsession(clp, sepp);
6042	if (error != 0)
6043		return (error);
6044	sep = *sepp;
6045	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
6046	    &slotseq, sessionid);
6047	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6048
6049	/* Build the Sequence arguments. */
6050	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6051	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6052	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6053	nd->nd_slotseq = tl;
6054	*tl++ = txdr_unsigned(slotseq);
6055	*tl++ = txdr_unsigned(slotpos);
6056	*tl++ = txdr_unsigned(maxslot);
6057	if (dont_replycache == 0)
6058		*tl++ = newnfs_true;
6059	else
6060		*tl++ = newnfs_false;
6061	*tl = 0;			/* No referring call list, for now. */
6062	nd->nd_flag |= ND_HASSEQUENCE;
6063	return (0);
6064}
6065
6066/*
6067 * Get a session for the callback.
6068 */
6069static int
6070nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6071{
6072	struct nfsdsession *sep;
6073
6074	NFSLOCKSTATE();
6075	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6076		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6077			break;
6078	}
6079	if (sep == NULL) {
6080		NFSUNLOCKSTATE();
6081		return (NFSERR_BADSESSION);
6082	}
6083	sep->sess_refcnt++;
6084	*sepp = sep;
6085	NFSUNLOCKSTATE();
6086	return (0);
6087}
6088
6089/*
6090 * Free up all backchannel xprts.  This needs to be done when the nfsd threads
6091 * exit, since those transports will all be going away.
6092 * This is only called after all the nfsd threads are done performing RPCs,
6093 * so locking shouldn't be an issue.
6094 */
6095APPLESTATIC void
6096nfsrv_freeallbackchannel_xprts(void)
6097{
6098	struct nfsdsession *sep;
6099	struct nfsclient *clp;
6100	SVCXPRT *xprt;
6101	int i;
6102
6103	for (i = 0; i < nfsrv_clienthashsize; i++) {
6104		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
6105			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6106				xprt = sep->sess_cbsess.nfsess_xprt;
6107				sep->sess_cbsess.nfsess_xprt = NULL;
6108				if (xprt != NULL)
6109					SVC_RELEASE(xprt);
6110			}
6111		}
6112	}
6113}
6114
6115