nfs_nfsdstate.c revision 308241
1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 308241 2016-11-03 00:58:50Z rmacklem $");
30
31#ifndef APPLEKEXT
32#include <fs/nfs/nfsport.h>
33
34struct nfsrv_stablefirst nfsrv_stablefirst;
35int nfsrv_issuedelegs = 0;
36int nfsrv_dolocallocks = 0;
37struct nfsv4lock nfsv4rootfs_lock;
38
39extern int newnfs_numnfsd;
40extern struct nfsstats newnfsstats;
41extern int nfsrv_lease;
42extern struct timeval nfsboottime;
43extern u_int32_t newnfs_true, newnfs_false;
44NFSV4ROOTLOCKMUTEX;
45NFSSTATESPINLOCK;
46
47SYSCTL_DECL(_vfs_nfsd);
48int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
49TUNABLE_INT("vfs.nfsd.statehashsize", &nfsrv_statehashsize);
50SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
51    &nfsrv_statehashsize, 0,
52    "Size of state hash table set via loader.conf");
53
54int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
55TUNABLE_INT("vfs.nfsd.clienthashsize", &nfsrv_clienthashsize);
56SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
57    &nfsrv_clienthashsize, 0,
58    "Size of client hash table set via loader.conf");
59
60int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
61TUNABLE_INT("vfs.nfsd.fhhashsize", &nfsrv_lockhashsize);
62SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
63    &nfsrv_lockhashsize, 0,
64    "Size of file handle hash table set via loader.conf");
65
66int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
67TUNABLE_INT("vfs.nfsd.sessionhashsize", &nfsrv_sessionhashsize);
68SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
69    &nfsrv_sessionhashsize, 0,
70    "Size of session hash table set via loader.conf");
71
72static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
73TUNABLE_INT("vfs.nfsd.v4statelimit", &nfsrv_v4statelimit);
74SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
75    &nfsrv_v4statelimit, 0,
76    "High water limit for NFSv4 opens+locks+delegations");
77
78static int	nfsrv_writedelegifpos = 0;
79SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
80    &nfsrv_writedelegifpos, 0,
81    "Issue a write delegation for read opens if possible");
82
83/*
84 * Hash lists for nfs V4.
85 */
86struct nfsclienthashhead	*nfsclienthash;
87struct nfslockhashhead		*nfslockhash;
88struct nfssessionhash		*nfssessionhash;
89#endif	/* !APPLEKEXT */
90
91static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
92static time_t nfsrvboottime;
93static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
94static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
95static int nfsrv_nogsscallback = 0;
96
97/* local functions */
98static void nfsrv_dumpaclient(struct nfsclient *clp,
99    struct nfsd_dumpclients *dumpp);
100static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
101    NFSPROC_T *p);
102static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
103    NFSPROC_T *p);
104static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
105    NFSPROC_T *p);
106static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
107    int cansleep, NFSPROC_T *p);
108static void nfsrv_freenfslock(struct nfslock *lop);
109static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
110static void nfsrv_freedeleg(struct nfsstate *);
111static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
112    u_int32_t flags, struct nfsstate **stpp);
113static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
114    struct nfsstate **stpp);
115static int nfsrv_getlockfh(vnode_t vp, u_short flags,
116    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
117static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
118    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
119static void nfsrv_insertlock(struct nfslock *new_lop,
120    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
121static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
122    struct nfslock **other_lopp, struct nfslockfile *lfp);
123static int nfsrv_getipnumber(u_char *cp);
124static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
125    nfsv4stateid_t *stateidp, int specialid);
126static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
127    u_int32_t flags);
128static int nfsrv_docallback(struct nfsclient *clp, int procnum,
129    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
130    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
131static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
132    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
133static u_int32_t nfsrv_nextclientindex(void);
134static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
135static void nfsrv_markstable(struct nfsclient *clp);
136static int nfsrv_checkstable(struct nfsclient *clp);
137static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
138    vnode *vp, NFSPROC_T *p);
139static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
140    NFSPROC_T *p, vnode_t vp);
141static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
142    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
143static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
144    struct nfsclient *clp);
145static time_t nfsrv_leaseexpiry(void);
146static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
147static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
148    struct nfsstate *stp, struct nfsrvcache *op);
149static int nfsrv_nootherstate(struct nfsstate *stp);
150static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
151    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
152static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
153    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
154static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
155    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
156    NFSPROC_T *p);
157static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
158    NFSPROC_T *p);
159static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
160    uint64_t first, uint64_t end);
161static void nfsrv_locklf(struct nfslockfile *lfp);
162static void nfsrv_unlocklf(struct nfslockfile *lfp);
163static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
164static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
165static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
166    int dont_replycache, struct nfsdsession **sepp);
167static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
168
169/*
170 * Scan the client list for a match and either return the current one,
171 * create a new entry or return an error.
172 * If returning a non-error, the clp structure must either be linked into
173 * the client list or free'd.
174 */
175APPLESTATIC int
176nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
177    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
178{
179	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
180	int i, error = 0;
181	struct nfsstate *stp, *tstp;
182	struct sockaddr_in *sad, *rad;
183	int zapit = 0, gotit, hasstate = 0, igotlock;
184	static u_int64_t confirm_index = 0;
185
186	/*
187	 * Check for state resource limit exceeded.
188	 */
189	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
190		error = NFSERR_RESOURCE;
191		goto out;
192	}
193
194	if (nfsrv_issuedelegs == 0 ||
195	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
196		/*
197		 * Don't do callbacks when delegations are disabled or
198		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
199		 * If establishing a callback connection is attempted
200		 * when a firewall is blocking the callback path, the
201		 * server may wait too long for the connect attempt to
202		 * succeed during the Open. Some clients, such as Linux,
203		 * may timeout and give up on the Open before the server
204		 * replies. Also, since AUTH_GSS callbacks are not
205		 * yet interoperability tested, they might cause the
206		 * server to crap out, if they get past the Init call to
207		 * the client.
208		 */
209		new_clp->lc_program = 0;
210
211	/* Lock out other nfsd threads */
212	NFSLOCKV4ROOTMUTEX();
213	nfsv4_relref(&nfsv4rootfs_lock);
214	do {
215		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
216		    NFSV4ROOTLOCKMUTEXPTR, NULL);
217	} while (!igotlock);
218	NFSUNLOCKV4ROOTMUTEX();
219
220	/*
221	 * Search for a match in the client list.
222	 */
223	gotit = i = 0;
224	while (i < nfsrv_clienthashsize && !gotit) {
225	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
226		if (new_clp->lc_idlen == clp->lc_idlen &&
227		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
228			gotit = 1;
229			break;
230		}
231	    }
232	    if (gotit == 0)
233		i++;
234	}
235	if (!gotit ||
236	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
237		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
238			/*
239			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
240			 * client is trying to update a confirmed clientid.
241			 */
242			NFSLOCKV4ROOTMUTEX();
243			nfsv4_unlock(&nfsv4rootfs_lock, 1);
244			NFSUNLOCKV4ROOTMUTEX();
245			confirmp->lval[1] = 0;
246			error = NFSERR_NOENT;
247			goto out;
248		}
249		/*
250		 * Get rid of the old one.
251		 */
252		if (i != nfsrv_clienthashsize) {
253			LIST_REMOVE(clp, lc_hash);
254			nfsrv_cleanclient(clp, p);
255			nfsrv_freedeleglist(&clp->lc_deleg);
256			nfsrv_freedeleglist(&clp->lc_olddeleg);
257			zapit = 1;
258		}
259		/*
260		 * Add it after assigning a client id to it.
261		 */
262		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
263		if ((nd->nd_flag & ND_NFSV41) != 0)
264			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
265			    ++confirm_index;
266		else
267			confirmp->qval = new_clp->lc_confirm.qval =
268			    ++confirm_index;
269		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
270		    (u_int32_t)nfsrvboottime;
271		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
272		    nfsrv_nextclientindex();
273		new_clp->lc_stateindex = 0;
274		new_clp->lc_statemaxindex = 0;
275		new_clp->lc_cbref = 0;
276		new_clp->lc_expiry = nfsrv_leaseexpiry();
277		LIST_INIT(&new_clp->lc_open);
278		LIST_INIT(&new_clp->lc_deleg);
279		LIST_INIT(&new_clp->lc_olddeleg);
280		LIST_INIT(&new_clp->lc_session);
281		for (i = 0; i < nfsrv_statehashsize; i++)
282			LIST_INIT(&new_clp->lc_stateid[i]);
283		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
284		    lc_hash);
285		newnfsstats.srvclients++;
286		nfsrv_openpluslock++;
287		nfsrv_clients++;
288		NFSLOCKV4ROOTMUTEX();
289		nfsv4_unlock(&nfsv4rootfs_lock, 1);
290		NFSUNLOCKV4ROOTMUTEX();
291		if (zapit)
292			nfsrv_zapclient(clp, p);
293		*new_clpp = NULL;
294		goto out;
295	}
296
297	/*
298	 * Now, handle the cases where the id is already issued.
299	 */
300	if (nfsrv_notsamecredname(nd, clp)) {
301	    /*
302	     * Check to see if there is expired state that should go away.
303	     */
304	    if (clp->lc_expiry < NFSD_MONOSEC &&
305	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
306		nfsrv_cleanclient(clp, p);
307		nfsrv_freedeleglist(&clp->lc_deleg);
308	    }
309
310	    /*
311	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
312	     * RFC3530 Sec. 8.1.2 last para.
313	     */
314	    if (!LIST_EMPTY(&clp->lc_deleg)) {
315		hasstate = 1;
316	    } else if (LIST_EMPTY(&clp->lc_open)) {
317		hasstate = 0;
318	    } else {
319		hasstate = 0;
320		/* Look for an Open on the OpenOwner */
321		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
322		    if (!LIST_EMPTY(&stp->ls_open)) {
323			hasstate = 1;
324			break;
325		    }
326		}
327	    }
328	    if (hasstate) {
329		/*
330		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
331		 * filling out the correct ipaddr and portnum.
332		 */
333		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
334		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
335		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
336		sad->sin_port = rad->sin_port;
337		NFSLOCKV4ROOTMUTEX();
338		nfsv4_unlock(&nfsv4rootfs_lock, 1);
339		NFSUNLOCKV4ROOTMUTEX();
340		error = NFSERR_CLIDINUSE;
341		goto out;
342	    }
343	}
344
345	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
346		/*
347		 * If the verifier has changed, the client has rebooted
348		 * and a new client id is issued. The old state info
349		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
350		 */
351		LIST_REMOVE(clp, lc_hash);
352		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
353		if ((nd->nd_flag & ND_NFSV41) != 0)
354			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
355			    ++confirm_index;
356		else
357			confirmp->qval = new_clp->lc_confirm.qval =
358			    ++confirm_index;
359		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
360		    nfsrvboottime;
361		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
362		    nfsrv_nextclientindex();
363		new_clp->lc_stateindex = 0;
364		new_clp->lc_statemaxindex = 0;
365		new_clp->lc_cbref = 0;
366		new_clp->lc_expiry = nfsrv_leaseexpiry();
367
368		/*
369		 * Save the state until confirmed.
370		 */
371		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
372		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
373			tstp->ls_clp = new_clp;
374		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
375		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
376			tstp->ls_clp = new_clp;
377		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
378		    ls_list);
379		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
380			tstp->ls_clp = new_clp;
381		for (i = 0; i < nfsrv_statehashsize; i++) {
382			LIST_NEWHEAD(&new_clp->lc_stateid[i],
383			    &clp->lc_stateid[i], ls_hash);
384			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
385				tstp->ls_clp = new_clp;
386		}
387		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
388		    lc_hash);
389		newnfsstats.srvclients++;
390		nfsrv_openpluslock++;
391		nfsrv_clients++;
392		NFSLOCKV4ROOTMUTEX();
393		nfsv4_unlock(&nfsv4rootfs_lock, 1);
394		NFSUNLOCKV4ROOTMUTEX();
395
396		/*
397		 * Must wait until any outstanding callback on the old clp
398		 * completes.
399		 */
400		NFSLOCKSTATE();
401		while (clp->lc_cbref) {
402			clp->lc_flags |= LCL_WAKEUPWANTED;
403			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
404			    "nfsd clp", 10 * hz);
405		}
406		NFSUNLOCKSTATE();
407		nfsrv_zapclient(clp, p);
408		*new_clpp = NULL;
409		goto out;
410	}
411
412	/* For NFSv4.1, mark that we found a confirmed clientid. */
413	if ((nd->nd_flag & ND_NFSV41) != 0) {
414		clientidp->lval[0] = clp->lc_clientid.lval[0];
415		clientidp->lval[1] = clp->lc_clientid.lval[1];
416		confirmp->lval[0] = 0;	/* Ignored by client */
417		confirmp->lval[1] = 1;
418	} else {
419		/*
420		 * id and verifier match, so update the net address info
421		 * and get rid of any existing callback authentication
422		 * handle, so a new one will be acquired.
423		 */
424		LIST_REMOVE(clp, lc_hash);
425		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
426		new_clp->lc_expiry = nfsrv_leaseexpiry();
427		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
428		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
429		    clp->lc_clientid.lval[0];
430		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
431		    clp->lc_clientid.lval[1];
432		new_clp->lc_delegtime = clp->lc_delegtime;
433		new_clp->lc_stateindex = clp->lc_stateindex;
434		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
435		new_clp->lc_cbref = 0;
436		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
437		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
438			tstp->ls_clp = new_clp;
439		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
440		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
441			tstp->ls_clp = new_clp;
442		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
443		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
444			tstp->ls_clp = new_clp;
445		for (i = 0; i < nfsrv_statehashsize; i++) {
446			LIST_NEWHEAD(&new_clp->lc_stateid[i],
447			    &clp->lc_stateid[i], ls_hash);
448			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
449				tstp->ls_clp = new_clp;
450		}
451		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
452		    lc_hash);
453		newnfsstats.srvclients++;
454		nfsrv_openpluslock++;
455		nfsrv_clients++;
456	}
457	NFSLOCKV4ROOTMUTEX();
458	nfsv4_unlock(&nfsv4rootfs_lock, 1);
459	NFSUNLOCKV4ROOTMUTEX();
460
461	if ((nd->nd_flag & ND_NFSV41) == 0) {
462		/*
463		 * Must wait until any outstanding callback on the old clp
464		 * completes.
465		 */
466		NFSLOCKSTATE();
467		while (clp->lc_cbref) {
468			clp->lc_flags |= LCL_WAKEUPWANTED;
469			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
470			    "nfsdclp", 10 * hz);
471		}
472		NFSUNLOCKSTATE();
473		nfsrv_zapclient(clp, p);
474		*new_clpp = NULL;
475	}
476
477out:
478	NFSEXITCODE2(error, nd);
479	return (error);
480}
481
482/*
483 * Check to see if the client id exists and optionally confirm it.
484 */
485APPLESTATIC int
486nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
487    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
488    struct nfsrv_descript *nd, NFSPROC_T *p)
489{
490	struct nfsclient *clp;
491	struct nfsstate *stp;
492	int i;
493	struct nfsclienthashhead *hp;
494	int error = 0, igotlock, doneok;
495	struct nfssessionhash *shp;
496	struct nfsdsession *sep;
497	uint64_t sessid[2];
498	static uint64_t next_sess = 0;
499
500	if (clpp)
501		*clpp = NULL;
502	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
503	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
504		error = NFSERR_STALECLIENTID;
505		goto out;
506	}
507
508	/*
509	 * If called with opflags == CLOPS_RENEW, the State Lock is
510	 * already held. Otherwise, we need to get either that or,
511	 * for the case of Confirm, lock out the nfsd threads.
512	 */
513	if (opflags & CLOPS_CONFIRM) {
514		NFSLOCKV4ROOTMUTEX();
515		nfsv4_relref(&nfsv4rootfs_lock);
516		do {
517			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
518			    NFSV4ROOTLOCKMUTEXPTR, NULL);
519		} while (!igotlock);
520		/*
521		 * Create a new sessionid here, since we need to do it where
522		 * there is a mutex held to serialize update of next_sess.
523		 */
524		if ((nd->nd_flag & ND_NFSV41) != 0) {
525			sessid[0] = ++next_sess;
526			sessid[1] = clientid.qval;
527		}
528		NFSUNLOCKV4ROOTMUTEX();
529	} else if (opflags != CLOPS_RENEW) {
530		NFSLOCKSTATE();
531	}
532
533	/* For NFSv4.1, the clp is acquired from the associated session. */
534	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
535	    opflags == CLOPS_RENEW) {
536		clp = NULL;
537		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
538			shp = NFSSESSIONHASH(nd->nd_sessionid);
539			NFSLOCKSESSION(shp);
540			sep = nfsrv_findsession(nd->nd_sessionid);
541			if (sep != NULL)
542				clp = sep->sess_clp;
543			NFSUNLOCKSESSION(shp);
544		}
545	} else {
546		hp = NFSCLIENTHASH(clientid);
547		LIST_FOREACH(clp, hp, lc_hash) {
548			if (clp->lc_clientid.lval[1] == clientid.lval[1])
549				break;
550		}
551	}
552	if (clp == NULL) {
553		if (opflags & CLOPS_CONFIRM)
554			error = NFSERR_STALECLIENTID;
555		else
556			error = NFSERR_EXPIRED;
557	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
558		/*
559		 * If marked admin revoked, just return the error.
560		 */
561		error = NFSERR_ADMINREVOKED;
562	}
563	if (error) {
564		if (opflags & CLOPS_CONFIRM) {
565			NFSLOCKV4ROOTMUTEX();
566			nfsv4_unlock(&nfsv4rootfs_lock, 1);
567			NFSUNLOCKV4ROOTMUTEX();
568		} else if (opflags != CLOPS_RENEW) {
569			NFSUNLOCKSTATE();
570		}
571		goto out;
572	}
573
574	/*
575	 * Perform any operations specified by the opflags.
576	 */
577	if (opflags & CLOPS_CONFIRM) {
578		if (((nd->nd_flag & ND_NFSV41) != 0 &&
579		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
580		    ((nd->nd_flag & ND_NFSV41) == 0 &&
581		     clp->lc_confirm.qval != confirm.qval))
582			error = NFSERR_STALECLIENTID;
583		else if (nfsrv_notsamecredname(nd, clp))
584			error = NFSERR_CLIDINUSE;
585
586		if (!error) {
587		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
588			LCL_NEEDSCONFIRM) {
589			/*
590			 * Hang onto the delegations (as old delegations)
591			 * for an Open with CLAIM_DELEGATE_PREV unless in
592			 * grace, but get rid of the rest of the state.
593			 */
594			nfsrv_cleanclient(clp, p);
595			nfsrv_freedeleglist(&clp->lc_olddeleg);
596			if (nfsrv_checkgrace(nd, clp, 0)) {
597			    /* In grace, so just delete delegations */
598			    nfsrv_freedeleglist(&clp->lc_deleg);
599			} else {
600			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
601				stp->ls_flags |= NFSLCK_OLDDELEG;
602			    clp->lc_delegtime = NFSD_MONOSEC +
603				nfsrv_lease + NFSRV_LEASEDELTA;
604			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
605				ls_list);
606			}
607			if ((nd->nd_flag & ND_NFSV41) != 0)
608			    clp->lc_program = cbprogram;
609		    }
610		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
611		    if (clp->lc_program)
612			clp->lc_flags |= LCL_NEEDSCBNULL;
613		    /* For NFSv4.1, link the session onto the client. */
614		    if (nsep != NULL) {
615			/* Hold a reference on the xprt for a backchannel. */
616			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
617			    != 0 && clp->lc_req.nr_client == NULL) {
618			    clp->lc_req.nr_client = (struct __rpc_client *)
619				clnt_bck_create(nd->nd_xprt->xp_socket,
620				cbprogram, NFSV4_CBVERS);
621			    if (clp->lc_req.nr_client != NULL) {
622				SVC_ACQUIRE(nd->nd_xprt);
623				nd->nd_xprt->xp_p2 =
624				    clp->lc_req.nr_client->cl_private;
625				/* Disable idle timeout. */
626				nd->nd_xprt->xp_idletimeout = 0;
627				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
628			    } else
629				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
630			}
631			NFSBCOPY(sessid, nsep->sess_sessionid,
632			    NFSX_V4SESSIONID);
633			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
634			    NFSX_V4SESSIONID);
635			shp = NFSSESSIONHASH(nsep->sess_sessionid);
636			NFSLOCKSTATE();
637			NFSLOCKSESSION(shp);
638			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
639			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
640			nsep->sess_clp = clp;
641			NFSUNLOCKSESSION(shp);
642			NFSUNLOCKSTATE();
643		    }
644		}
645	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
646		error = NFSERR_EXPIRED;
647	}
648
649	/*
650	 * If called by the Renew Op, we must check the principal.
651	 */
652	if (!error && (opflags & CLOPS_RENEWOP)) {
653	    if (nfsrv_notsamecredname(nd, clp)) {
654		doneok = 0;
655		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
656		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
657			if ((stp->ls_flags & NFSLCK_OPEN) &&
658			    stp->ls_uid == nd->nd_cred->cr_uid) {
659				doneok = 1;
660				break;
661			}
662		    }
663		}
664		if (!doneok)
665			error = NFSERR_ACCES;
666	    }
667	    if (!error && (clp->lc_flags & LCL_CBDOWN))
668		error = NFSERR_CBPATHDOWN;
669	}
670	if ((!error || error == NFSERR_CBPATHDOWN) &&
671	     (opflags & CLOPS_RENEW)) {
672		clp->lc_expiry = nfsrv_leaseexpiry();
673	}
674	if (opflags & CLOPS_CONFIRM) {
675		NFSLOCKV4ROOTMUTEX();
676		nfsv4_unlock(&nfsv4rootfs_lock, 1);
677		NFSUNLOCKV4ROOTMUTEX();
678	} else if (opflags != CLOPS_RENEW) {
679		NFSUNLOCKSTATE();
680	}
681	if (clpp)
682		*clpp = clp;
683
684out:
685	NFSEXITCODE2(error, nd);
686	return (error);
687}
688
689/*
690 * Perform the NFSv4.1 destroy clientid.
691 */
692int
693nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
694{
695	struct nfsclient *clp;
696	struct nfsclienthashhead *hp;
697	int error = 0, i, igotlock;
698
699	if (nfsrvboottime != clientid.lval[0]) {
700		error = NFSERR_STALECLIENTID;
701		goto out;
702	}
703
704	/* Lock out other nfsd threads */
705	NFSLOCKV4ROOTMUTEX();
706	nfsv4_relref(&nfsv4rootfs_lock);
707	do {
708		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
709		    NFSV4ROOTLOCKMUTEXPTR, NULL);
710	} while (igotlock == 0);
711	NFSUNLOCKV4ROOTMUTEX();
712
713	hp = NFSCLIENTHASH(clientid);
714	LIST_FOREACH(clp, hp, lc_hash) {
715		if (clp->lc_clientid.lval[1] == clientid.lval[1])
716			break;
717	}
718	if (clp == NULL) {
719		NFSLOCKV4ROOTMUTEX();
720		nfsv4_unlock(&nfsv4rootfs_lock, 1);
721		NFSUNLOCKV4ROOTMUTEX();
722		/* Just return ok, since it is gone. */
723		goto out;
724	}
725
726	/* Scan for state on the clientid. */
727	for (i = 0; i < nfsrv_statehashsize; i++)
728		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
729			NFSLOCKV4ROOTMUTEX();
730			nfsv4_unlock(&nfsv4rootfs_lock, 1);
731			NFSUNLOCKV4ROOTMUTEX();
732			error = NFSERR_CLIENTIDBUSY;
733			goto out;
734		}
735	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
736		NFSLOCKV4ROOTMUTEX();
737		nfsv4_unlock(&nfsv4rootfs_lock, 1);
738		NFSUNLOCKV4ROOTMUTEX();
739		error = NFSERR_CLIENTIDBUSY;
740		goto out;
741	}
742
743	/* Destroy the clientid and return ok. */
744	nfsrv_cleanclient(clp, p);
745	nfsrv_freedeleglist(&clp->lc_deleg);
746	nfsrv_freedeleglist(&clp->lc_olddeleg);
747	LIST_REMOVE(clp, lc_hash);
748	NFSLOCKV4ROOTMUTEX();
749	nfsv4_unlock(&nfsv4rootfs_lock, 1);
750	NFSUNLOCKV4ROOTMUTEX();
751	nfsrv_zapclient(clp, p);
752out:
753	NFSEXITCODE2(error, nd);
754	return (error);
755}
756
757/*
758 * Called from the new nfssvc syscall to admin revoke a clientid.
759 * Returns 0 for success, error otherwise.
760 */
761APPLESTATIC int
762nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
763{
764	struct nfsclient *clp = NULL;
765	int i, error = 0;
766	int gotit, igotlock;
767
768	/*
769	 * First, lock out the nfsd so that state won't change while the
770	 * revocation record is being written to the stable storage restart
771	 * file.
772	 */
773	NFSLOCKV4ROOTMUTEX();
774	do {
775		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
776		    NFSV4ROOTLOCKMUTEXPTR, NULL);
777	} while (!igotlock);
778	NFSUNLOCKV4ROOTMUTEX();
779
780	/*
781	 * Search for a match in the client list.
782	 */
783	gotit = i = 0;
784	while (i < nfsrv_clienthashsize && !gotit) {
785	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
786		if (revokep->nclid_idlen == clp->lc_idlen &&
787		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
788			gotit = 1;
789			break;
790		}
791	    }
792	    i++;
793	}
794	if (!gotit) {
795		NFSLOCKV4ROOTMUTEX();
796		nfsv4_unlock(&nfsv4rootfs_lock, 0);
797		NFSUNLOCKV4ROOTMUTEX();
798		error = EPERM;
799		goto out;
800	}
801
802	/*
803	 * Now, write out the revocation record
804	 */
805	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
806	nfsrv_backupstable();
807
808	/*
809	 * and clear out the state, marking the clientid revoked.
810	 */
811	clp->lc_flags &= ~LCL_CALLBACKSON;
812	clp->lc_flags |= LCL_ADMINREVOKED;
813	nfsrv_cleanclient(clp, p);
814	nfsrv_freedeleglist(&clp->lc_deleg);
815	nfsrv_freedeleglist(&clp->lc_olddeleg);
816	NFSLOCKV4ROOTMUTEX();
817	nfsv4_unlock(&nfsv4rootfs_lock, 0);
818	NFSUNLOCKV4ROOTMUTEX();
819
820out:
821	NFSEXITCODE(error);
822	return (error);
823}
824
825/*
826 * Dump out stats for all clients. Called from nfssvc(2), that is used
827 * newnfsstats.
828 */
829APPLESTATIC void
830nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
831{
832	struct nfsclient *clp;
833	int i = 0, cnt = 0;
834
835	/*
836	 * First, get a reference on the nfsv4rootfs_lock so that an
837	 * exclusive lock cannot be acquired while dumping the clients.
838	 */
839	NFSLOCKV4ROOTMUTEX();
840	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
841	NFSUNLOCKV4ROOTMUTEX();
842	NFSLOCKSTATE();
843	/*
844	 * Rattle through the client lists until done.
845	 */
846	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
847	    clp = LIST_FIRST(&nfsclienthash[i]);
848	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
849		nfsrv_dumpaclient(clp, &dumpp[cnt]);
850		cnt++;
851		clp = LIST_NEXT(clp, lc_hash);
852	    }
853	    i++;
854	}
855	if (cnt < maxcnt)
856	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
857	NFSUNLOCKSTATE();
858	NFSLOCKV4ROOTMUTEX();
859	nfsv4_relref(&nfsv4rootfs_lock);
860	NFSUNLOCKV4ROOTMUTEX();
861}
862
863/*
864 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
865 */
866static void
867nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
868{
869	struct nfsstate *stp, *openstp, *lckownstp;
870	struct nfslock *lop;
871	struct sockaddr *sad;
872	struct sockaddr_in *rad;
873	struct sockaddr_in6 *rad6;
874
875	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
876	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
877	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
878	dumpp->ndcl_flags = clp->lc_flags;
879	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
880	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
881	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
882	dumpp->ndcl_addrfam = sad->sa_family;
883	if (sad->sa_family == AF_INET) {
884		rad = (struct sockaddr_in *)sad;
885		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
886	} else {
887		rad6 = (struct sockaddr_in6 *)sad;
888		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
889	}
890
891	/*
892	 * Now, scan the state lists and total up the opens and locks.
893	 */
894	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
895	    dumpp->ndcl_nopenowners++;
896	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
897		dumpp->ndcl_nopens++;
898		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
899		    dumpp->ndcl_nlockowners++;
900		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
901			dumpp->ndcl_nlocks++;
902		    }
903		}
904	    }
905	}
906
907	/*
908	 * and the delegation lists.
909	 */
910	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
911	    dumpp->ndcl_ndelegs++;
912	}
913	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
914	    dumpp->ndcl_nolddelegs++;
915	}
916}
917
918/*
919 * Dump out lock stats for a file.
920 */
921APPLESTATIC void
922nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
923    NFSPROC_T *p)
924{
925	struct nfsstate *stp;
926	struct nfslock *lop;
927	int cnt = 0;
928	struct nfslockfile *lfp;
929	struct sockaddr *sad;
930	struct sockaddr_in *rad;
931	struct sockaddr_in6 *rad6;
932	int ret;
933	fhandle_t nfh;
934
935	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
936	/*
937	 * First, get a reference on the nfsv4rootfs_lock so that an
938	 * exclusive lock on it cannot be acquired while dumping the locks.
939	 */
940	NFSLOCKV4ROOTMUTEX();
941	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
942	NFSUNLOCKV4ROOTMUTEX();
943	NFSLOCKSTATE();
944	if (!ret)
945		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
946	if (ret) {
947		ldumpp[0].ndlck_clid.nclid_idlen = 0;
948		NFSUNLOCKSTATE();
949		NFSLOCKV4ROOTMUTEX();
950		nfsv4_relref(&nfsv4rootfs_lock);
951		NFSUNLOCKV4ROOTMUTEX();
952		return;
953	}
954
955	/*
956	 * For each open share on file, dump it out.
957	 */
958	stp = LIST_FIRST(&lfp->lf_open);
959	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
960		ldumpp[cnt].ndlck_flags = stp->ls_flags;
961		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
962		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
963		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
964		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
965		ldumpp[cnt].ndlck_owner.nclid_idlen =
966		    stp->ls_openowner->ls_ownerlen;
967		NFSBCOPY(stp->ls_openowner->ls_owner,
968		    ldumpp[cnt].ndlck_owner.nclid_id,
969		    stp->ls_openowner->ls_ownerlen);
970		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
971		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
972		    stp->ls_clp->lc_idlen);
973		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
974		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
975		if (sad->sa_family == AF_INET) {
976			rad = (struct sockaddr_in *)sad;
977			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
978		} else {
979			rad6 = (struct sockaddr_in6 *)sad;
980			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
981		}
982		stp = LIST_NEXT(stp, ls_file);
983		cnt++;
984	}
985
986	/*
987	 * and all locks.
988	 */
989	lop = LIST_FIRST(&lfp->lf_lock);
990	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
991		stp = lop->lo_stp;
992		ldumpp[cnt].ndlck_flags = lop->lo_flags;
993		ldumpp[cnt].ndlck_first = lop->lo_first;
994		ldumpp[cnt].ndlck_end = lop->lo_end;
995		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
996		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
997		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
998		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
999		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1000		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1001		    stp->ls_ownerlen);
1002		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1003		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1004		    stp->ls_clp->lc_idlen);
1005		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1006		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1007		if (sad->sa_family == AF_INET) {
1008			rad = (struct sockaddr_in *)sad;
1009			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1010		} else {
1011			rad6 = (struct sockaddr_in6 *)sad;
1012			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1013		}
1014		lop = LIST_NEXT(lop, lo_lckfile);
1015		cnt++;
1016	}
1017
1018	/*
1019	 * and the delegations.
1020	 */
1021	stp = LIST_FIRST(&lfp->lf_deleg);
1022	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1023		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1024		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1025		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1026		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1027		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1028		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1029		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1030		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1031		    stp->ls_clp->lc_idlen);
1032		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1033		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1034		if (sad->sa_family == AF_INET) {
1035			rad = (struct sockaddr_in *)sad;
1036			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1037		} else {
1038			rad6 = (struct sockaddr_in6 *)sad;
1039			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1040		}
1041		stp = LIST_NEXT(stp, ls_file);
1042		cnt++;
1043	}
1044
1045	/*
1046	 * If list isn't full, mark end of list by setting the client name
1047	 * to zero length.
1048	 */
1049	if (cnt < maxcnt)
1050		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1051	NFSUNLOCKSTATE();
1052	NFSLOCKV4ROOTMUTEX();
1053	nfsv4_relref(&nfsv4rootfs_lock);
1054	NFSUNLOCKV4ROOTMUTEX();
1055}
1056
1057/*
1058 * Server timer routine. It can scan any linked list, so long
1059 * as it holds the spin/mutex lock and there is no exclusive lock on
1060 * nfsv4rootfs_lock.
1061 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1062 *  to do this from a callout, since the spin locks work. For
1063 *  Darwin, I'm not sure what will work correctly yet.)
1064 * Should be called once per second.
1065 */
1066APPLESTATIC void
1067nfsrv_servertimer(void)
1068{
1069	struct nfsclient *clp, *nclp;
1070	struct nfsstate *stp, *nstp;
1071	int got_ref, i;
1072
1073	/*
1074	 * Make sure nfsboottime is set. This is used by V3 as well
1075	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1076	 * only used by the V4 server for leases.
1077	 */
1078	if (nfsboottime.tv_sec == 0)
1079		NFSSETBOOTTIME(nfsboottime);
1080
1081	/*
1082	 * If server hasn't started yet, just return.
1083	 */
1084	NFSLOCKSTATE();
1085	if (nfsrv_stablefirst.nsf_eograce == 0) {
1086		NFSUNLOCKSTATE();
1087		return;
1088	}
1089	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1090		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1091		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1092			nfsrv_stablefirst.nsf_flags |=
1093			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1094		NFSUNLOCKSTATE();
1095		return;
1096	}
1097
1098	/*
1099	 * Try and get a reference count on the nfsv4rootfs_lock so that
1100	 * no nfsd thread can acquire an exclusive lock on it before this
1101	 * call is done. If it is already exclusively locked, just return.
1102	 */
1103	NFSLOCKV4ROOTMUTEX();
1104	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1105	NFSUNLOCKV4ROOTMUTEX();
1106	if (got_ref == 0) {
1107		NFSUNLOCKSTATE();
1108		return;
1109	}
1110
1111	/*
1112	 * For each client...
1113	 */
1114	for (i = 0; i < nfsrv_clienthashsize; i++) {
1115	    clp = LIST_FIRST(&nfsclienthash[i]);
1116	    while (clp != LIST_END(&nfsclienthash[i])) {
1117		nclp = LIST_NEXT(clp, lc_hash);
1118		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1119		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1120			 && ((LIST_EMPTY(&clp->lc_deleg)
1121			      && LIST_EMPTY(&clp->lc_open)) ||
1122			     nfsrv_clients > nfsrv_clienthighwater)) ||
1123			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1124			(clp->lc_expiry < NFSD_MONOSEC &&
1125			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1126			/*
1127			 * Lease has expired several nfsrv_lease times ago:
1128			 * PLUS
1129			 *    - no state is associated with it
1130			 *    OR
1131			 *    - above high water mark for number of clients
1132			 *      (nfsrv_clienthighwater should be large enough
1133			 *       that this only occurs when clients fail to
1134			 *       use the same nfs_client_id4.id. Maybe somewhat
1135			 *       higher that the maximum number of clients that
1136			 *       will mount this server?)
1137			 * OR
1138			 * Lease has expired a very long time ago
1139			 * OR
1140			 * Lease has expired PLUS the number of opens + locks
1141			 * has exceeded 90% of capacity
1142			 *
1143			 * --> Mark for expiry. The actual expiry will be done
1144			 *     by an nfsd sometime soon.
1145			 */
1146			clp->lc_flags |= LCL_EXPIREIT;
1147			nfsrv_stablefirst.nsf_flags |=
1148			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1149		    } else {
1150			/*
1151			 * If there are no opens, increment no open tick cnt
1152			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1153			 * otherwise, if there is an open, reset no open time
1154			 * Hopefully, this will avoid excessive re-creation
1155			 * of open owners and subsequent open confirms.
1156			 */
1157			stp = LIST_FIRST(&clp->lc_open);
1158			while (stp != LIST_END(&clp->lc_open)) {
1159				nstp = LIST_NEXT(stp, ls_list);
1160				if (LIST_EMPTY(&stp->ls_open)) {
1161					stp->ls_noopens++;
1162					if (stp->ls_noopens > NFSNOOPEN ||
1163					    (nfsrv_openpluslock * 2) >
1164					    nfsrv_v4statelimit)
1165						nfsrv_stablefirst.nsf_flags |=
1166							NFSNSF_NOOPENS;
1167				} else {
1168					stp->ls_noopens = 0;
1169				}
1170				stp = nstp;
1171			}
1172		    }
1173		}
1174		clp = nclp;
1175	    }
1176	}
1177	NFSUNLOCKSTATE();
1178	NFSLOCKV4ROOTMUTEX();
1179	nfsv4_relref(&nfsv4rootfs_lock);
1180	NFSUNLOCKV4ROOTMUTEX();
1181}
1182
1183/*
1184 * The following set of functions free up the various data structures.
1185 */
1186/*
1187 * Clear out all open/lock state related to this nfsclient.
1188 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1189 * there are no other active nfsd threads.
1190 */
1191APPLESTATIC void
1192nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1193{
1194	struct nfsstate *stp, *nstp;
1195	struct nfsdsession *sep, *nsep;
1196
1197	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1198		nfsrv_freeopenowner(stp, 1, p);
1199	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1200		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1201			(void)nfsrv_freesession(sep, NULL);
1202}
1203
1204/*
1205 * Free a client that has been cleaned. It should also already have been
1206 * removed from the lists.
1207 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1208 *  softclock interrupts are enabled.)
1209 */
1210APPLESTATIC void
1211nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1212{
1213
1214#ifdef notyet
1215	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1216	     (LCL_GSS | LCL_CALLBACKSON) &&
1217	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1218	    clp->lc_handlelen > 0) {
1219		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1220		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1221		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1222			NULL, 0, NULL, NULL, NULL, p);
1223	}
1224#endif
1225	newnfs_disconnect(&clp->lc_req);
1226	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1227	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1228	free(clp->lc_stateid, M_NFSDCLIENT);
1229	free(clp, M_NFSDCLIENT);
1230	NFSLOCKSTATE();
1231	newnfsstats.srvclients--;
1232	nfsrv_openpluslock--;
1233	nfsrv_clients--;
1234	NFSUNLOCKSTATE();
1235}
1236
1237/*
1238 * Free a list of delegation state structures.
1239 * (This function will also free all nfslockfile structures that no
1240 *  longer have associated state.)
1241 */
1242APPLESTATIC void
1243nfsrv_freedeleglist(struct nfsstatehead *sthp)
1244{
1245	struct nfsstate *stp, *nstp;
1246
1247	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1248		nfsrv_freedeleg(stp);
1249	}
1250	LIST_INIT(sthp);
1251}
1252
1253/*
1254 * Free up a delegation.
1255 */
1256static void
1257nfsrv_freedeleg(struct nfsstate *stp)
1258{
1259	struct nfslockfile *lfp;
1260
1261	LIST_REMOVE(stp, ls_hash);
1262	LIST_REMOVE(stp, ls_list);
1263	LIST_REMOVE(stp, ls_file);
1264	lfp = stp->ls_lfp;
1265	if (LIST_EMPTY(&lfp->lf_open) &&
1266	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1267	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1268	    lfp->lf_usecount == 0 &&
1269	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1270		nfsrv_freenfslockfile(lfp);
1271	FREE((caddr_t)stp, M_NFSDSTATE);
1272	newnfsstats.srvdelegates--;
1273	nfsrv_openpluslock--;
1274	nfsrv_delegatecnt--;
1275}
1276
1277/*
1278 * This function frees an open owner and all associated opens.
1279 */
1280static void
1281nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1282{
1283	struct nfsstate *nstp, *tstp;
1284
1285	LIST_REMOVE(stp, ls_list);
1286	/*
1287	 * Now, free all associated opens.
1288	 */
1289	nstp = LIST_FIRST(&stp->ls_open);
1290	while (nstp != LIST_END(&stp->ls_open)) {
1291		tstp = nstp;
1292		nstp = LIST_NEXT(nstp, ls_list);
1293		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1294	}
1295	if (stp->ls_op)
1296		nfsrvd_derefcache(stp->ls_op);
1297	FREE((caddr_t)stp, M_NFSDSTATE);
1298	newnfsstats.srvopenowners--;
1299	nfsrv_openpluslock--;
1300}
1301
1302/*
1303 * This function frees an open (nfsstate open structure) with all associated
1304 * lock_owners and locks. It also frees the nfslockfile structure iff there
1305 * are no other opens on the file.
1306 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1307 */
1308static int
1309nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1310{
1311	struct nfsstate *nstp, *tstp;
1312	struct nfslockfile *lfp;
1313	int ret;
1314
1315	LIST_REMOVE(stp, ls_hash);
1316	LIST_REMOVE(stp, ls_list);
1317	LIST_REMOVE(stp, ls_file);
1318
1319	lfp = stp->ls_lfp;
1320	/*
1321	 * Now, free all lockowners associated with this open.
1322	 */
1323	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1324		nfsrv_freelockowner(tstp, vp, cansleep, p);
1325
1326	/*
1327	 * The nfslockfile is freed here if there are no locks
1328	 * associated with the open.
1329	 * If there are locks associated with the open, the
1330	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1331	 * Acquire the state mutex to avoid races with calls to
1332	 * nfsrv_getlockfile().
1333	 */
1334	if (cansleep != 0)
1335		NFSLOCKSTATE();
1336	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1337	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1338	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1339	    lfp->lf_usecount == 0 &&
1340	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1341		nfsrv_freenfslockfile(lfp);
1342		ret = 1;
1343	} else
1344		ret = 0;
1345	if (cansleep != 0)
1346		NFSUNLOCKSTATE();
1347	FREE((caddr_t)stp, M_NFSDSTATE);
1348	newnfsstats.srvopens--;
1349	nfsrv_openpluslock--;
1350	return (ret);
1351}
1352
1353/*
1354 * Frees a lockowner and all associated locks.
1355 */
1356static void
1357nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1358    NFSPROC_T *p)
1359{
1360
1361	LIST_REMOVE(stp, ls_hash);
1362	LIST_REMOVE(stp, ls_list);
1363	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1364	if (stp->ls_op)
1365		nfsrvd_derefcache(stp->ls_op);
1366	FREE((caddr_t)stp, M_NFSDSTATE);
1367	newnfsstats.srvlockowners--;
1368	nfsrv_openpluslock--;
1369}
1370
1371/*
1372 * Free all the nfs locks on a lockowner.
1373 */
1374static void
1375nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1376    NFSPROC_T *p)
1377{
1378	struct nfslock *lop, *nlop;
1379	struct nfsrollback *rlp, *nrlp;
1380	struct nfslockfile *lfp = NULL;
1381	int gottvp = 0;
1382	vnode_t tvp = NULL;
1383	uint64_t first, end;
1384
1385	if (vp != NULL)
1386		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1387	lop = LIST_FIRST(&stp->ls_lock);
1388	while (lop != LIST_END(&stp->ls_lock)) {
1389		nlop = LIST_NEXT(lop, lo_lckowner);
1390		/*
1391		 * Since all locks should be for the same file, lfp should
1392		 * not change.
1393		 */
1394		if (lfp == NULL)
1395			lfp = lop->lo_lfp;
1396		else if (lfp != lop->lo_lfp)
1397			panic("allnfslocks");
1398		/*
1399		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1400		 * from the file handle. This only occurs when called from
1401		 * nfsrv_cleanclient().
1402		 */
1403		if (gottvp == 0) {
1404			if (nfsrv_dolocallocks == 0)
1405				tvp = NULL;
1406			else if (vp == NULL && cansleep != 0) {
1407				tvp = nfsvno_getvp(&lfp->lf_fh);
1408				NFSVOPUNLOCK(tvp, 0);
1409			} else
1410				tvp = vp;
1411			gottvp = 1;
1412		}
1413
1414		if (tvp != NULL) {
1415			if (cansleep == 0)
1416				panic("allnfs2");
1417			first = lop->lo_first;
1418			end = lop->lo_end;
1419			nfsrv_freenfslock(lop);
1420			nfsrv_localunlock(tvp, lfp, first, end, p);
1421			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1422			    nrlp)
1423				free(rlp, M_NFSDROLLBACK);
1424			LIST_INIT(&lfp->lf_rollback);
1425		} else
1426			nfsrv_freenfslock(lop);
1427		lop = nlop;
1428	}
1429	if (vp == NULL && tvp != NULL)
1430		vrele(tvp);
1431}
1432
1433/*
1434 * Free an nfslock structure.
1435 */
1436static void
1437nfsrv_freenfslock(struct nfslock *lop)
1438{
1439
1440	if (lop->lo_lckfile.le_prev != NULL) {
1441		LIST_REMOVE(lop, lo_lckfile);
1442		newnfsstats.srvlocks--;
1443		nfsrv_openpluslock--;
1444	}
1445	LIST_REMOVE(lop, lo_lckowner);
1446	FREE((caddr_t)lop, M_NFSDLOCK);
1447}
1448
1449/*
1450 * This function frees an nfslockfile structure.
1451 */
1452static void
1453nfsrv_freenfslockfile(struct nfslockfile *lfp)
1454{
1455
1456	LIST_REMOVE(lfp, lf_hash);
1457	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1458}
1459
1460/*
1461 * This function looks up an nfsstate structure via stateid.
1462 */
1463static int
1464nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1465    struct nfsstate **stpp)
1466{
1467	struct nfsstate *stp;
1468	struct nfsstatehead *hp;
1469	int error = 0;
1470
1471	*stpp = NULL;
1472	hp = NFSSTATEHASH(clp, *stateidp);
1473	LIST_FOREACH(stp, hp, ls_hash) {
1474		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1475			NFSX_STATEIDOTHER))
1476			break;
1477	}
1478
1479	/*
1480	 * If no state id in list, return NFSERR_BADSTATEID.
1481	 */
1482	if (stp == LIST_END(hp)) {
1483		error = NFSERR_BADSTATEID;
1484		goto out;
1485	}
1486	*stpp = stp;
1487
1488out:
1489	NFSEXITCODE(error);
1490	return (error);
1491}
1492
1493/*
1494 * This function gets an nfsstate structure via owner string.
1495 */
1496static void
1497nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1498    struct nfsstate **stpp)
1499{
1500	struct nfsstate *stp;
1501
1502	*stpp = NULL;
1503	LIST_FOREACH(stp, hp, ls_list) {
1504		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1505		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1506			*stpp = stp;
1507			return;
1508		}
1509	}
1510}
1511
1512/*
1513 * Lock control function called to update lock status.
1514 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1515 * that one isn't to be created and an NFSERR_xxx for other errors.
1516 * The structures new_stp and new_lop are passed in as pointers that should
1517 * be set to NULL if the structure is used and shouldn't be free'd.
1518 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1519 * never used and can safely be allocated on the stack. For all other
1520 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1521 * in case they are used.
1522 */
1523APPLESTATIC int
1524nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1525    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1526    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1527    __unused struct nfsexstuff *exp,
1528    struct nfsrv_descript *nd, NFSPROC_T *p)
1529{
1530	struct nfslock *lop;
1531	struct nfsstate *new_stp = *new_stpp;
1532	struct nfslock *new_lop = *new_lopp;
1533	struct nfsstate *tstp, *mystp, *nstp;
1534	int specialid = 0;
1535	struct nfslockfile *lfp;
1536	struct nfslock *other_lop = NULL;
1537	struct nfsstate *stp, *lckstp = NULL;
1538	struct nfsclient *clp = NULL;
1539	u_int32_t bits;
1540	int error = 0, haslock = 0, ret, reterr;
1541	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1542	fhandle_t nfh;
1543	uint64_t first, end;
1544	uint32_t lock_flags;
1545
1546	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1547		/*
1548		 * Note the special cases of "all 1s" or "all 0s" stateids and
1549		 * let reads with all 1s go ahead.
1550		 */
1551		if (new_stp->ls_stateid.seqid == 0x0 &&
1552		    new_stp->ls_stateid.other[0] == 0x0 &&
1553		    new_stp->ls_stateid.other[1] == 0x0 &&
1554		    new_stp->ls_stateid.other[2] == 0x0)
1555			specialid = 1;
1556		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1557		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1558		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1559		    new_stp->ls_stateid.other[2] == 0xffffffff)
1560			specialid = 2;
1561	}
1562
1563	/*
1564	 * Check for restart conditions (client and server).
1565	 */
1566	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1567	    &new_stp->ls_stateid, specialid);
1568	if (error)
1569		goto out;
1570
1571	/*
1572	 * Check for state resource limit exceeded.
1573	 */
1574	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1575	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1576		error = NFSERR_RESOURCE;
1577		goto out;
1578	}
1579
1580	/*
1581	 * For the lock case, get another nfslock structure,
1582	 * just in case we need it.
1583	 * Malloc now, before we start sifting through the linked lists,
1584	 * in case we have to wait for memory.
1585	 */
1586tryagain:
1587	if (new_stp->ls_flags & NFSLCK_LOCK)
1588		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1589		    M_NFSDLOCK, M_WAITOK);
1590	filestruct_locked = 0;
1591	reterr = 0;
1592	lfp = NULL;
1593
1594	/*
1595	 * Get the lockfile structure for CFH now, so we can do a sanity
1596	 * check against the stateid, before incrementing the seqid#, since
1597	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1598	 * shouldn't be incremented for this case.
1599	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1600	 * will be handled later.
1601	 * If we are doing Lock/LockU and local locking is enabled, sleep
1602	 * lock the nfslockfile structure.
1603	 */
1604	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1605	NFSLOCKSTATE();
1606	if (getlckret == 0) {
1607		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1608		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1609			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1610			    &lfp, &nfh, 1);
1611			if (getlckret == 0)
1612				filestruct_locked = 1;
1613		} else
1614			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1615			    &lfp, &nfh, 0);
1616	}
1617	if (getlckret != 0 && getlckret != -1)
1618		reterr = getlckret;
1619
1620	if (filestruct_locked != 0) {
1621		LIST_INIT(&lfp->lf_rollback);
1622		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1623			/*
1624			 * For local locking, do the advisory locking now, so
1625			 * that any conflict can be detected. A failure later
1626			 * can be rolled back locally. If an error is returned,
1627			 * struct nfslockfile has been unlocked and any local
1628			 * locking rolled back.
1629			 */
1630			NFSUNLOCKSTATE();
1631			if (vnode_unlocked == 0) {
1632				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1633				vnode_unlocked = 1;
1634				NFSVOPUNLOCK(vp, 0);
1635			}
1636			reterr = nfsrv_locallock(vp, lfp,
1637			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1638			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1639			NFSLOCKSTATE();
1640		}
1641	}
1642
1643	if (specialid == 0) {
1644	    if (new_stp->ls_flags & NFSLCK_TEST) {
1645		/*
1646		 * RFC 3530 does not list LockT as an op that renews a
1647		 * lease, but the concensus seems to be that it is ok
1648		 * for a server to do so.
1649		 */
1650		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1651		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1652
1653		/*
1654		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1655		 * error returns for LockT, just go ahead and test for a lock,
1656		 * since there are no locks for this client, but other locks
1657		 * can conflict. (ie. same client will always be false)
1658		 */
1659		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1660		    error = 0;
1661		lckstp = new_stp;
1662	    } else {
1663	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1664		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1665	      if (error == 0)
1666		/*
1667		 * Look up the stateid
1668		 */
1669		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1670		  new_stp->ls_flags, &stp);
1671	      /*
1672	       * do some sanity checks for an unconfirmed open or a
1673	       * stateid that refers to the wrong file, for an open stateid
1674	       */
1675	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1676		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1677		   (getlckret == 0 && stp->ls_lfp != lfp)))
1678			error = NFSERR_BADSTATEID;
1679	      if (error == 0 &&
1680		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1681		  getlckret == 0 && stp->ls_lfp != lfp)
1682			error = NFSERR_BADSTATEID;
1683
1684	      /*
1685	       * If the lockowner stateid doesn't refer to the same file,
1686	       * I believe that is considered ok, since some clients will
1687	       * only create a single lockowner and use that for all locks
1688	       * on all files.
1689	       * For now, log it as a diagnostic, instead of considering it
1690	       * a BadStateid.
1691	       */
1692	      if (error == 0 && (stp->ls_flags &
1693		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1694		  getlckret == 0 && stp->ls_lfp != lfp) {
1695#ifdef DIAGNOSTIC
1696		  printf("Got a lock statid for different file open\n");
1697#endif
1698		  /*
1699		  error = NFSERR_BADSTATEID;
1700		  */
1701	      }
1702
1703	      if (error == 0) {
1704		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1705			/*
1706			 * If haslock set, we've already checked the seqid.
1707			 */
1708			if (!haslock) {
1709			    if (stp->ls_flags & NFSLCK_OPEN)
1710				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1711				    stp->ls_openowner, new_stp->ls_op);
1712			    else
1713				error = NFSERR_BADSTATEID;
1714			}
1715			if (!error)
1716			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1717			if (lckstp)
1718			    /*
1719			     * I believe this should be an error, but it
1720			     * isn't obvious what NFSERR_xxx would be
1721			     * appropriate, so I'll use NFSERR_INVAL for now.
1722			     */
1723			    error = NFSERR_INVAL;
1724			else
1725			    lckstp = new_stp;
1726		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1727			/*
1728			 * If haslock set, ditto above.
1729			 */
1730			if (!haslock) {
1731			    if (stp->ls_flags & NFSLCK_OPEN)
1732				error = NFSERR_BADSTATEID;
1733			    else
1734				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1735				    stp, new_stp->ls_op);
1736			}
1737			lckstp = stp;
1738		    } else {
1739			lckstp = stp;
1740		    }
1741	      }
1742	      /*
1743	       * If the seqid part of the stateid isn't the same, return
1744	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1745	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1746	       * nfsrv_returnoldstateid is set. (The concensus on the email
1747	       * list was that most clients would prefer to not receive
1748	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1749	       * is what will happen, so I use the nfsrv_returnoldstateid to
1750	       * allow for either server configuration.)
1751	       */
1752	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1753		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1754		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1755		    nfsrv_returnoldstateid)) ||
1756		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1757		    new_stp->ls_stateid.seqid != 0)))
1758		    error = NFSERR_OLDSTATEID;
1759	    }
1760	}
1761
1762	/*
1763	 * Now we can check for grace.
1764	 */
1765	if (!error)
1766		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1767	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1768		nfsrv_checkstable(clp))
1769		error = NFSERR_NOGRACE;
1770	/*
1771	 * If we successfully Reclaimed state, note that.
1772	 */
1773	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1774		nfsrv_markstable(clp);
1775
1776	/*
1777	 * At this point, either error == NFSERR_BADSTATEID or the
1778	 * seqid# has been updated, so we can return any error.
1779	 * If error == 0, there may be an error in:
1780	 *    nd_repstat - Set by the calling function.
1781	 *    reterr - Set above, if getting the nfslockfile structure
1782	 *       or acquiring the local lock failed.
1783	 *    (If both of these are set, nd_repstat should probably be
1784	 *     returned, since that error was detected before this
1785	 *     function call.)
1786	 */
1787	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1788		if (error == 0) {
1789			if (nd->nd_repstat != 0)
1790				error = nd->nd_repstat;
1791			else
1792				error = reterr;
1793		}
1794		if (filestruct_locked != 0) {
1795			/* Roll back local locks. */
1796			NFSUNLOCKSTATE();
1797			if (vnode_unlocked == 0) {
1798				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1799				vnode_unlocked = 1;
1800				NFSVOPUNLOCK(vp, 0);
1801			}
1802			nfsrv_locallock_rollback(vp, lfp, p);
1803			NFSLOCKSTATE();
1804			nfsrv_unlocklf(lfp);
1805		}
1806		NFSUNLOCKSTATE();
1807		goto out;
1808	}
1809
1810	/*
1811	 * Check the nfsrv_getlockfile return.
1812	 * Returned -1 if no structure found.
1813	 */
1814	if (getlckret == -1) {
1815		error = NFSERR_EXPIRED;
1816		/*
1817		 * Called from lockt, so no lock is OK.
1818		 */
1819		if (new_stp->ls_flags & NFSLCK_TEST) {
1820			error = 0;
1821		} else if (new_stp->ls_flags &
1822		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1823			/*
1824			 * Called to check for a lock, OK if the stateid is all
1825			 * 1s or all 0s, but there should be an nfsstate
1826			 * otherwise.
1827			 * (ie. If there is no open, I'll assume no share
1828			 *  deny bits.)
1829			 */
1830			if (specialid)
1831				error = 0;
1832			else
1833				error = NFSERR_BADSTATEID;
1834		}
1835		NFSUNLOCKSTATE();
1836		goto out;
1837	}
1838
1839	/*
1840	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1841	 * For NFSLCK_CHECK, allow a read if write access is granted,
1842	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1843	 * which implies a conflicting deny can't exist.
1844	 */
1845	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1846	    /*
1847	     * Four kinds of state id:
1848	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1849	     * - stateid for an open
1850	     * - stateid for a delegation
1851	     * - stateid for a lock owner
1852	     */
1853	    if (!specialid) {
1854		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1855		    delegation = 1;
1856		    mystp = stp;
1857		    nfsrv_delaydelegtimeout(stp);
1858	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1859		    mystp = stp;
1860		} else {
1861		    mystp = stp->ls_openstp;
1862		}
1863		/*
1864		 * If locking or checking, require correct access
1865		 * bit set.
1866		 */
1867		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1868		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1869		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1870		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1871		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1872		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1873		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1874		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1875		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1876			if (filestruct_locked != 0) {
1877				/* Roll back local locks. */
1878				NFSUNLOCKSTATE();
1879				if (vnode_unlocked == 0) {
1880					ASSERT_VOP_ELOCKED(vp,
1881					    "nfsrv_lockctrl3");
1882					vnode_unlocked = 1;
1883					NFSVOPUNLOCK(vp, 0);
1884				}
1885				nfsrv_locallock_rollback(vp, lfp, p);
1886				NFSLOCKSTATE();
1887				nfsrv_unlocklf(lfp);
1888			}
1889			NFSUNLOCKSTATE();
1890			error = NFSERR_OPENMODE;
1891			goto out;
1892		}
1893	    } else
1894		mystp = NULL;
1895	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1896		/*
1897		 * Check for a conflicting deny bit.
1898		 */
1899		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1900		    if (tstp != mystp) {
1901			bits = tstp->ls_flags;
1902			bits >>= NFSLCK_SHIFT;
1903			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1904			    KASSERT(vnode_unlocked == 0,
1905				("nfsrv_lockctrl: vnode unlocked1"));
1906			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1907				vp, p);
1908			    if (ret == 1) {
1909				/*
1910				* nfsrv_clientconflict unlocks state
1911				 * when it returns non-zero.
1912				 */
1913				lckstp = NULL;
1914				goto tryagain;
1915			    }
1916			    if (ret == 0)
1917				NFSUNLOCKSTATE();
1918			    if (ret == 2)
1919				error = NFSERR_PERM;
1920			    else
1921				error = NFSERR_OPENMODE;
1922			    goto out;
1923			}
1924		    }
1925		}
1926
1927		/* We're outta here */
1928		NFSUNLOCKSTATE();
1929		goto out;
1930	    }
1931	}
1932
1933	/*
1934	 * For setattr, just get rid of all the Delegations for other clients.
1935	 */
1936	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1937		KASSERT(vnode_unlocked == 0,
1938		    ("nfsrv_lockctrl: vnode unlocked2"));
1939		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1940		if (ret) {
1941			/*
1942			 * nfsrv_cleandeleg() unlocks state when it
1943			 * returns non-zero.
1944			 */
1945			if (ret == -1) {
1946				lckstp = NULL;
1947				goto tryagain;
1948			}
1949			error = ret;
1950			goto out;
1951		}
1952		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1953		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1954		     LIST_EMPTY(&lfp->lf_deleg))) {
1955			NFSUNLOCKSTATE();
1956			goto out;
1957		}
1958	}
1959
1960	/*
1961	 * Check for a conflicting delegation. If one is found, call
1962	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1963	 * been set yet, it will get the lock. Otherwise, it will recall
1964	 * the delegation. Then, we try try again...
1965	 * I currently believe the conflict algorithm to be:
1966	 * For Lock Ops (Lock/LockT/LockU)
1967	 * - there is a conflict iff a different client has a write delegation
1968	 * For Reading (Read Op)
1969	 * - there is a conflict iff a different client has a write delegation
1970	 *   (the specialids are always a different client)
1971	 * For Writing (Write/Setattr of size)
1972	 * - there is a conflict if a different client has any delegation
1973	 * - there is a conflict if the same client has a read delegation
1974	 *   (I don't understand why this isn't allowed, but that seems to be
1975	 *    the current concensus?)
1976	 */
1977	tstp = LIST_FIRST(&lfp->lf_deleg);
1978	while (tstp != LIST_END(&lfp->lf_deleg)) {
1979	    nstp = LIST_NEXT(tstp, ls_file);
1980	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1981		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1982		  (new_lop->lo_flags & NFSLCK_READ))) &&
1983		  clp != tstp->ls_clp &&
1984		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1985		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1986		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1987		  (clp != tstp->ls_clp ||
1988		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1989		ret = 0;
1990		if (filestruct_locked != 0) {
1991			/* Roll back local locks. */
1992			NFSUNLOCKSTATE();
1993			if (vnode_unlocked == 0) {
1994				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
1995				NFSVOPUNLOCK(vp, 0);
1996			}
1997			nfsrv_locallock_rollback(vp, lfp, p);
1998			NFSLOCKSTATE();
1999			nfsrv_unlocklf(lfp);
2000			NFSUNLOCKSTATE();
2001			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2002			vnode_unlocked = 0;
2003			if ((vp->v_iflag & VI_DOOMED) != 0)
2004				ret = NFSERR_SERVERFAULT;
2005			NFSLOCKSTATE();
2006		}
2007		if (ret == 0)
2008			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2009		if (ret) {
2010		    /*
2011		     * nfsrv_delegconflict unlocks state when it
2012		     * returns non-zero, which it always does.
2013		     */
2014		    if (other_lop) {
2015			FREE((caddr_t)other_lop, M_NFSDLOCK);
2016			other_lop = NULL;
2017		    }
2018		    if (ret == -1) {
2019			lckstp = NULL;
2020			goto tryagain;
2021		    }
2022		    error = ret;
2023		    goto out;
2024		}
2025		/* Never gets here. */
2026	    }
2027	    tstp = nstp;
2028	}
2029
2030	/*
2031	 * Handle the unlock case by calling nfsrv_updatelock().
2032	 * (Should I have done some access checking above for unlock? For now,
2033	 *  just let it happen.)
2034	 */
2035	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2036		first = new_lop->lo_first;
2037		end = new_lop->lo_end;
2038		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2039		stateidp->seqid = ++(stp->ls_stateid.seqid);
2040		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2041			stateidp->seqid = stp->ls_stateid.seqid = 1;
2042		stateidp->other[0] = stp->ls_stateid.other[0];
2043		stateidp->other[1] = stp->ls_stateid.other[1];
2044		stateidp->other[2] = stp->ls_stateid.other[2];
2045		if (filestruct_locked != 0) {
2046			NFSUNLOCKSTATE();
2047			if (vnode_unlocked == 0) {
2048				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2049				vnode_unlocked = 1;
2050				NFSVOPUNLOCK(vp, 0);
2051			}
2052			/* Update the local locks. */
2053			nfsrv_localunlock(vp, lfp, first, end, p);
2054			NFSLOCKSTATE();
2055			nfsrv_unlocklf(lfp);
2056		}
2057		NFSUNLOCKSTATE();
2058		goto out;
2059	}
2060
2061	/*
2062	 * Search for a conflicting lock. A lock conflicts if:
2063	 * - the lock range overlaps and
2064	 * - at least one lock is a write lock and
2065	 * - it is not owned by the same lock owner
2066	 */
2067	if (!delegation) {
2068	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2069	    if (new_lop->lo_end > lop->lo_first &&
2070		new_lop->lo_first < lop->lo_end &&
2071		(new_lop->lo_flags == NFSLCK_WRITE ||
2072		 lop->lo_flags == NFSLCK_WRITE) &&
2073		lckstp != lop->lo_stp &&
2074		(clp != lop->lo_stp->ls_clp ||
2075		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2076		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2077		    lckstp->ls_ownerlen))) {
2078		if (other_lop) {
2079		    FREE((caddr_t)other_lop, M_NFSDLOCK);
2080		    other_lop = NULL;
2081		}
2082		if (vnode_unlocked != 0)
2083		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2084			NULL, p);
2085		else
2086		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2087			vp, p);
2088		if (ret == 1) {
2089		    if (filestruct_locked != 0) {
2090			if (vnode_unlocked == 0) {
2091				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2092				NFSVOPUNLOCK(vp, 0);
2093			}
2094			/* Roll back local locks. */
2095			nfsrv_locallock_rollback(vp, lfp, p);
2096			NFSLOCKSTATE();
2097			nfsrv_unlocklf(lfp);
2098			NFSUNLOCKSTATE();
2099			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2100			vnode_unlocked = 0;
2101			if ((vp->v_iflag & VI_DOOMED) != 0) {
2102				error = NFSERR_SERVERFAULT;
2103				goto out;
2104			}
2105		    }
2106		    /*
2107		     * nfsrv_clientconflict() unlocks state when it
2108		     * returns non-zero.
2109		     */
2110		    lckstp = NULL;
2111		    goto tryagain;
2112		}
2113		/*
2114		 * Found a conflicting lock, so record the conflict and
2115		 * return the error.
2116		 */
2117		if (cfp != NULL && ret == 0) {
2118		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2119		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2120		    cfp->cl_first = lop->lo_first;
2121		    cfp->cl_end = lop->lo_end;
2122		    cfp->cl_flags = lop->lo_flags;
2123		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2124		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2125			cfp->cl_ownerlen);
2126		}
2127		if (ret == 2)
2128		    error = NFSERR_PERM;
2129		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2130		    error = NFSERR_RECLAIMCONFLICT;
2131		else if (new_stp->ls_flags & NFSLCK_CHECK)
2132		    error = NFSERR_LOCKED;
2133		else
2134		    error = NFSERR_DENIED;
2135		if (filestruct_locked != 0 && ret == 0) {
2136			/* Roll back local locks. */
2137			NFSUNLOCKSTATE();
2138			if (vnode_unlocked == 0) {
2139				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2140				vnode_unlocked = 1;
2141				NFSVOPUNLOCK(vp, 0);
2142			}
2143			nfsrv_locallock_rollback(vp, lfp, p);
2144			NFSLOCKSTATE();
2145			nfsrv_unlocklf(lfp);
2146		}
2147		if (ret == 0)
2148			NFSUNLOCKSTATE();
2149		goto out;
2150	    }
2151	  }
2152	}
2153
2154	/*
2155	 * We only get here if there was no lock that conflicted.
2156	 */
2157	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2158		NFSUNLOCKSTATE();
2159		goto out;
2160	}
2161
2162	/*
2163	 * We only get here when we are creating or modifying a lock.
2164	 * There are two variants:
2165	 * - exist_lock_owner where lock_owner exists
2166	 * - open_to_lock_owner with new lock_owner
2167	 */
2168	first = new_lop->lo_first;
2169	end = new_lop->lo_end;
2170	lock_flags = new_lop->lo_flags;
2171	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2172		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2173		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2174		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2175			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2176		stateidp->other[0] = lckstp->ls_stateid.other[0];
2177		stateidp->other[1] = lckstp->ls_stateid.other[1];
2178		stateidp->other[2] = lckstp->ls_stateid.other[2];
2179	} else {
2180		/*
2181		 * The new open_to_lock_owner case.
2182		 * Link the new nfsstate into the lists.
2183		 */
2184		new_stp->ls_seq = new_stp->ls_opentolockseq;
2185		nfsrvd_refcache(new_stp->ls_op);
2186		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2187		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2188		    clp->lc_clientid.lval[0];
2189		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2190		    clp->lc_clientid.lval[1];
2191		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2192		    nfsrv_nextstateindex(clp);
2193		new_stp->ls_clp = clp;
2194		LIST_INIT(&new_stp->ls_lock);
2195		new_stp->ls_openstp = stp;
2196		new_stp->ls_lfp = lfp;
2197		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2198		    lfp);
2199		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2200		    new_stp, ls_hash);
2201		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2202		*new_lopp = NULL;
2203		*new_stpp = NULL;
2204		newnfsstats.srvlockowners++;
2205		nfsrv_openpluslock++;
2206	}
2207	if (filestruct_locked != 0) {
2208		NFSUNLOCKSTATE();
2209		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2210		NFSLOCKSTATE();
2211		nfsrv_unlocklf(lfp);
2212	}
2213	NFSUNLOCKSTATE();
2214
2215out:
2216	if (haslock) {
2217		NFSLOCKV4ROOTMUTEX();
2218		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2219		NFSUNLOCKV4ROOTMUTEX();
2220	}
2221	if (vnode_unlocked != 0) {
2222		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2223		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2224			error = NFSERR_SERVERFAULT;
2225	}
2226	if (other_lop)
2227		FREE((caddr_t)other_lop, M_NFSDLOCK);
2228	NFSEXITCODE2(error, nd);
2229	return (error);
2230}
2231
2232/*
2233 * Check for state errors for Open.
2234 * repstat is passed back out as an error if more critical errors
2235 * are not detected.
2236 */
2237APPLESTATIC int
2238nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2239    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2240    NFSPROC_T *p, int repstat)
2241{
2242	struct nfsstate *stp, *nstp;
2243	struct nfsclient *clp;
2244	struct nfsstate *ownerstp;
2245	struct nfslockfile *lfp, *new_lfp;
2246	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2247
2248	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2249		readonly = 1;
2250	/*
2251	 * Check for restart conditions (client and server).
2252	 */
2253	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2254		&new_stp->ls_stateid, 0);
2255	if (error)
2256		goto out;
2257
2258	/*
2259	 * Check for state resource limit exceeded.
2260	 * Technically this should be SMP protected, but the worst
2261	 * case error is "out by one or two" on the count when it
2262	 * returns NFSERR_RESOURCE and the limit is just a rather
2263	 * arbitrary high water mark, so no harm is done.
2264	 */
2265	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2266		error = NFSERR_RESOURCE;
2267		goto out;
2268	}
2269
2270tryagain:
2271	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2272	    M_NFSDLOCKFILE, M_WAITOK);
2273	if (vp)
2274		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2275		    NULL, p);
2276	NFSLOCKSTATE();
2277	/*
2278	 * Get the nfsclient structure.
2279	 */
2280	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2281	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2282
2283	/*
2284	 * Look up the open owner. See if it needs confirmation and
2285	 * check the seq#, as required.
2286	 */
2287	if (!error)
2288		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2289
2290	if (!error && ownerstp) {
2291		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2292		    new_stp->ls_op);
2293		/*
2294		 * If the OpenOwner hasn't been confirmed, assume the
2295		 * old one was a replay and this one is ok.
2296		 * See: RFC3530 Sec. 14.2.18.
2297		 */
2298		if (error == NFSERR_BADSEQID &&
2299		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2300			error = 0;
2301	}
2302
2303	/*
2304	 * Check for grace.
2305	 */
2306	if (!error)
2307		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2308	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2309		nfsrv_checkstable(clp))
2310		error = NFSERR_NOGRACE;
2311
2312	/*
2313	 * If none of the above errors occurred, let repstat be
2314	 * returned.
2315	 */
2316	if (repstat && !error)
2317		error = repstat;
2318	if (error) {
2319		NFSUNLOCKSTATE();
2320		if (haslock) {
2321			NFSLOCKV4ROOTMUTEX();
2322			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2323			NFSUNLOCKV4ROOTMUTEX();
2324		}
2325		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2326		goto out;
2327	}
2328
2329	/*
2330	 * If vp == NULL, the file doesn't exist yet, so return ok.
2331	 * (This always happens on the first pass, so haslock must be 0.)
2332	 */
2333	if (vp == NULL) {
2334		NFSUNLOCKSTATE();
2335		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2336		goto out;
2337	}
2338
2339	/*
2340	 * Get the structure for the underlying file.
2341	 */
2342	if (getfhret)
2343		error = getfhret;
2344	else
2345		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2346		    NULL, 0);
2347	if (new_lfp)
2348		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2349	if (error) {
2350		NFSUNLOCKSTATE();
2351		if (haslock) {
2352			NFSLOCKV4ROOTMUTEX();
2353			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2354			NFSUNLOCKV4ROOTMUTEX();
2355		}
2356		goto out;
2357	}
2358
2359	/*
2360	 * Search for a conflicting open/share.
2361	 */
2362	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2363	    /*
2364	     * For Delegate_Cur, search for the matching Delegation,
2365	     * which indicates no conflict.
2366	     * An old delegation should have been recovered by the
2367	     * client doing a Claim_DELEGATE_Prev, so I won't let
2368	     * it match and return NFSERR_EXPIRED. Should I let it
2369	     * match?
2370	     */
2371	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2372		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2373		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2374		    stateidp->seqid == 0) ||
2375		    stateidp->seqid == stp->ls_stateid.seqid) &&
2376		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2377			  NFSX_STATEIDOTHER))
2378			break;
2379	    }
2380	    if (stp == LIST_END(&lfp->lf_deleg) ||
2381		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2382		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2383		NFSUNLOCKSTATE();
2384		if (haslock) {
2385			NFSLOCKV4ROOTMUTEX();
2386			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2387			NFSUNLOCKV4ROOTMUTEX();
2388		}
2389		error = NFSERR_EXPIRED;
2390		goto out;
2391	    }
2392	}
2393
2394	/*
2395	 * Check for access/deny bit conflicts. I check for the same
2396	 * owner as well, in case the client didn't bother.
2397	 */
2398	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2399		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2400		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2401		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2402		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2403		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2404			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2405			if (ret == 1) {
2406				/*
2407				 * nfsrv_clientconflict() unlocks
2408				 * state when it returns non-zero.
2409				 */
2410				goto tryagain;
2411			}
2412			if (ret == 2)
2413				error = NFSERR_PERM;
2414			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2415				error = NFSERR_RECLAIMCONFLICT;
2416			else
2417				error = NFSERR_SHAREDENIED;
2418			if (ret == 0)
2419				NFSUNLOCKSTATE();
2420			if (haslock) {
2421				NFSLOCKV4ROOTMUTEX();
2422				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2423				NFSUNLOCKV4ROOTMUTEX();
2424			}
2425			goto out;
2426		}
2427	}
2428
2429	/*
2430	 * Check for a conflicting delegation. If one is found, call
2431	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2432	 * been set yet, it will get the lock. Otherwise, it will recall
2433	 * the delegation. Then, we try try again...
2434	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2435	 *  isn't a conflict.)
2436	 * I currently believe the conflict algorithm to be:
2437	 * For Open with Read Access and Deny None
2438	 * - there is a conflict iff a different client has a write delegation
2439	 * For Open with other Write Access or any Deny except None
2440	 * - there is a conflict if a different client has any delegation
2441	 * - there is a conflict if the same client has a read delegation
2442	 *   (The current concensus is that this last case should be
2443	 *    considered a conflict since the client with a read delegation
2444	 *    could have done an Open with ReadAccess and WriteDeny
2445	 *    locally and then not have checked for the WriteDeny.)
2446	 * Don't check for a Reclaim, since that will be dealt with
2447	 * by nfsrv_openctrl().
2448	 */
2449	if (!(new_stp->ls_flags &
2450		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2451	    stp = LIST_FIRST(&lfp->lf_deleg);
2452	    while (stp != LIST_END(&lfp->lf_deleg)) {
2453		nstp = LIST_NEXT(stp, ls_file);
2454		if ((readonly && stp->ls_clp != clp &&
2455		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2456		    (!readonly && (stp->ls_clp != clp ||
2457		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2458			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2459			if (ret) {
2460			    /*
2461			     * nfsrv_delegconflict() unlocks state
2462			     * when it returns non-zero.
2463			     */
2464			    if (ret == -1)
2465				goto tryagain;
2466			    error = ret;
2467			    goto out;
2468			}
2469		}
2470		stp = nstp;
2471	    }
2472	}
2473	NFSUNLOCKSTATE();
2474	if (haslock) {
2475		NFSLOCKV4ROOTMUTEX();
2476		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2477		NFSUNLOCKV4ROOTMUTEX();
2478	}
2479
2480out:
2481	NFSEXITCODE2(error, nd);
2482	return (error);
2483}
2484
2485/*
2486 * Open control function to create/update open state for an open.
2487 */
2488APPLESTATIC int
2489nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2490    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2491    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2492    NFSPROC_T *p, u_quad_t filerev)
2493{
2494	struct nfsstate *new_stp = *new_stpp;
2495	struct nfsstate *stp, *nstp;
2496	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2497	struct nfslockfile *lfp, *new_lfp;
2498	struct nfsclient *clp;
2499	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2500	int readonly = 0, cbret = 1, getfhret = 0;
2501
2502	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2503		readonly = 1;
2504	/*
2505	 * Check for restart conditions (client and server).
2506	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2507	 * If an error does show up, return NFSERR_EXPIRED, since the
2508	 * the seqid# has already been incremented.
2509	 */
2510	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2511	    &new_stp->ls_stateid, 0);
2512	if (error) {
2513		printf("Nfsd: openctrl unexpected restart err=%d\n",
2514		    error);
2515		error = NFSERR_EXPIRED;
2516		goto out;
2517	}
2518
2519tryagain:
2520	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2521	    M_NFSDLOCKFILE, M_WAITOK);
2522	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2523	    M_NFSDSTATE, M_WAITOK);
2524	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2525	    M_NFSDSTATE, M_WAITOK);
2526	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2527	    NULL, p);
2528	NFSLOCKSTATE();
2529	/*
2530	 * Get the client structure. Since the linked lists could be changed
2531	 * by other nfsd processes if this process does a tsleep(), one of
2532	 * two things must be done.
2533	 * 1 - don't tsleep()
2534	 * or
2535	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2536	 *     before using the lists, since this lock stops the other
2537	 *     nfsd. This should only be used for rare cases, since it
2538	 *     essentially single threads the nfsd.
2539	 *     At this time, it is only done for cases where the stable
2540	 *     storage file must be written prior to completion of state
2541	 *     expiration.
2542	 */
2543	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2544	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2545	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2546	    clp->lc_program) {
2547		/*
2548		 * This happens on the first open for a client
2549		 * that supports callbacks.
2550		 */
2551		NFSUNLOCKSTATE();
2552		/*
2553		 * Although nfsrv_docallback() will sleep, clp won't
2554		 * go away, since they are only removed when the
2555		 * nfsv4_lock() has blocked the nfsd threads. The
2556		 * fields in clp can change, but having multiple
2557		 * threads do this Null callback RPC should be
2558		 * harmless.
2559		 */
2560		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2561		    NULL, 0, NULL, NULL, NULL, p);
2562		NFSLOCKSTATE();
2563		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2564		if (!cbret)
2565			clp->lc_flags |= LCL_CALLBACKSON;
2566	}
2567
2568	/*
2569	 * Look up the open owner. See if it needs confirmation and
2570	 * check the seq#, as required.
2571	 */
2572	if (!error)
2573		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2574
2575	if (error) {
2576		NFSUNLOCKSTATE();
2577		printf("Nfsd: openctrl unexpected state err=%d\n",
2578			error);
2579		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2580		free((caddr_t)new_open, M_NFSDSTATE);
2581		free((caddr_t)new_deleg, M_NFSDSTATE);
2582		if (haslock) {
2583			NFSLOCKV4ROOTMUTEX();
2584			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2585			NFSUNLOCKV4ROOTMUTEX();
2586		}
2587		error = NFSERR_EXPIRED;
2588		goto out;
2589	}
2590
2591	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2592		nfsrv_markstable(clp);
2593
2594	/*
2595	 * Get the structure for the underlying file.
2596	 */
2597	if (getfhret)
2598		error = getfhret;
2599	else
2600		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2601		    NULL, 0);
2602	if (new_lfp)
2603		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2604	if (error) {
2605		NFSUNLOCKSTATE();
2606		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2607		    error);
2608		free((caddr_t)new_open, M_NFSDSTATE);
2609		free((caddr_t)new_deleg, M_NFSDSTATE);
2610		if (haslock) {
2611			NFSLOCKV4ROOTMUTEX();
2612			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2613			NFSUNLOCKV4ROOTMUTEX();
2614		}
2615		goto out;
2616	}
2617
2618	/*
2619	 * Search for a conflicting open/share.
2620	 */
2621	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2622	    /*
2623	     * For Delegate_Cur, search for the matching Delegation,
2624	     * which indicates no conflict.
2625	     * An old delegation should have been recovered by the
2626	     * client doing a Claim_DELEGATE_Prev, so I won't let
2627	     * it match and return NFSERR_EXPIRED. Should I let it
2628	     * match?
2629	     */
2630	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2631		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2632		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2633		    stateidp->seqid == 0) ||
2634		    stateidp->seqid == stp->ls_stateid.seqid) &&
2635		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2636			NFSX_STATEIDOTHER))
2637			break;
2638	    }
2639	    if (stp == LIST_END(&lfp->lf_deleg) ||
2640		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2641		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2642		NFSUNLOCKSTATE();
2643		printf("Nfsd openctrl unexpected expiry\n");
2644		free((caddr_t)new_open, M_NFSDSTATE);
2645		free((caddr_t)new_deleg, M_NFSDSTATE);
2646		if (haslock) {
2647			NFSLOCKV4ROOTMUTEX();
2648			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2649			NFSUNLOCKV4ROOTMUTEX();
2650		}
2651		error = NFSERR_EXPIRED;
2652		goto out;
2653	    }
2654
2655	    /*
2656	     * Don't issue a Delegation, since one already exists and
2657	     * delay delegation timeout, as required.
2658	     */
2659	    delegate = 0;
2660	    nfsrv_delaydelegtimeout(stp);
2661	}
2662
2663	/*
2664	 * Check for access/deny bit conflicts. I also check for the
2665	 * same owner, since the client might not have bothered to check.
2666	 * Also, note an open for the same file and owner, if found,
2667	 * which is all we do here for Delegate_Cur, since conflict
2668	 * checking is already done.
2669	 */
2670	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2671		if (ownerstp && stp->ls_openowner == ownerstp)
2672			openstp = stp;
2673		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2674		    /*
2675		     * If another client has the file open, the only
2676		     * delegation that can be issued is a Read delegation
2677		     * and only if it is a Read open with Deny none.
2678		     */
2679		    if (clp != stp->ls_clp) {
2680			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2681			    NFSLCK_READACCESS)
2682			    writedeleg = 0;
2683			else
2684			    delegate = 0;
2685		    }
2686		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2687		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2688		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2689		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2690			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2691			if (ret == 1) {
2692				/*
2693				 * nfsrv_clientconflict() unlocks state
2694				 * when it returns non-zero.
2695				 */
2696				free((caddr_t)new_open, M_NFSDSTATE);
2697				free((caddr_t)new_deleg, M_NFSDSTATE);
2698				openstp = NULL;
2699				goto tryagain;
2700			}
2701			if (ret == 2)
2702				error = NFSERR_PERM;
2703			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2704				error = NFSERR_RECLAIMCONFLICT;
2705			else
2706				error = NFSERR_SHAREDENIED;
2707			if (ret == 0)
2708				NFSUNLOCKSTATE();
2709			if (haslock) {
2710				NFSLOCKV4ROOTMUTEX();
2711				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2712				NFSUNLOCKV4ROOTMUTEX();
2713			}
2714			free((caddr_t)new_open, M_NFSDSTATE);
2715			free((caddr_t)new_deleg, M_NFSDSTATE);
2716			printf("nfsd openctrl unexpected client cnfl\n");
2717			goto out;
2718		    }
2719		}
2720	}
2721
2722	/*
2723	 * Check for a conflicting delegation. If one is found, call
2724	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2725	 * been set yet, it will get the lock. Otherwise, it will recall
2726	 * the delegation. Then, we try try again...
2727	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2728	 *  isn't a conflict.)
2729	 * I currently believe the conflict algorithm to be:
2730	 * For Open with Read Access and Deny None
2731	 * - there is a conflict iff a different client has a write delegation
2732	 * For Open with other Write Access or any Deny except None
2733	 * - there is a conflict if a different client has any delegation
2734	 * - there is a conflict if the same client has a read delegation
2735	 *   (The current concensus is that this last case should be
2736	 *    considered a conflict since the client with a read delegation
2737	 *    could have done an Open with ReadAccess and WriteDeny
2738	 *    locally and then not have checked for the WriteDeny.)
2739	 */
2740	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2741	    stp = LIST_FIRST(&lfp->lf_deleg);
2742	    while (stp != LIST_END(&lfp->lf_deleg)) {
2743		nstp = LIST_NEXT(stp, ls_file);
2744		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2745			writedeleg = 0;
2746		else
2747			delegate = 0;
2748		if ((readonly && stp->ls_clp != clp &&
2749		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2750		    (!readonly && (stp->ls_clp != clp ||
2751		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2752		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2753			delegate = 2;
2754		    } else {
2755			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2756			if (ret) {
2757			    /*
2758			     * nfsrv_delegconflict() unlocks state
2759			     * when it returns non-zero.
2760			     */
2761			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2762			    free((caddr_t)new_open, M_NFSDSTATE);
2763			    free((caddr_t)new_deleg, M_NFSDSTATE);
2764			    if (ret == -1) {
2765				openstp = NULL;
2766				goto tryagain;
2767			    }
2768			    error = ret;
2769			    goto out;
2770			}
2771		    }
2772		}
2773		stp = nstp;
2774	    }
2775	}
2776
2777	/*
2778	 * We only get here if there was no open that conflicted.
2779	 * If an open for the owner exists, or in the access/deny bits.
2780	 * Otherwise it is a new open. If the open_owner hasn't been
2781	 * confirmed, replace the open with the new one needing confirmation,
2782	 * otherwise add the open.
2783	 */
2784	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2785	    /*
2786	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2787	     * a match. If found, just move the old delegation to the current
2788	     * delegation list and issue open. If not found, return
2789	     * NFSERR_EXPIRED.
2790	     */
2791	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2792		if (stp->ls_lfp == lfp) {
2793		    /* Found it */
2794		    if (stp->ls_clp != clp)
2795			panic("olddeleg clp");
2796		    LIST_REMOVE(stp, ls_list);
2797		    LIST_REMOVE(stp, ls_hash);
2798		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2799		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2800		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2801			clp->lc_clientid.lval[0];
2802		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2803			clp->lc_clientid.lval[1];
2804		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2805			nfsrv_nextstateindex(clp);
2806		    stp->ls_compref = nd->nd_compref;
2807		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2808		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2809			stp->ls_stateid), stp, ls_hash);
2810		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2811			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2812		    else
2813			*rflagsp |= NFSV4OPEN_READDELEGATE;
2814		    clp->lc_delegtime = NFSD_MONOSEC +
2815			nfsrv_lease + NFSRV_LEASEDELTA;
2816
2817		    /*
2818		     * Now, do the associated open.
2819		     */
2820		    new_open->ls_stateid.seqid = 1;
2821		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2822		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2823		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2824		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2825			NFSLCK_OPEN;
2826		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2827			new_open->ls_flags |= (NFSLCK_READACCESS |
2828			    NFSLCK_WRITEACCESS);
2829		    else
2830			new_open->ls_flags |= NFSLCK_READACCESS;
2831		    new_open->ls_uid = new_stp->ls_uid;
2832		    new_open->ls_lfp = lfp;
2833		    new_open->ls_clp = clp;
2834		    LIST_INIT(&new_open->ls_open);
2835		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2836		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2837			new_open, ls_hash);
2838		    /*
2839		     * and handle the open owner
2840		     */
2841		    if (ownerstp) {
2842			new_open->ls_openowner = ownerstp;
2843			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2844		    } else {
2845			new_open->ls_openowner = new_stp;
2846			new_stp->ls_flags = 0;
2847			nfsrvd_refcache(new_stp->ls_op);
2848			new_stp->ls_noopens = 0;
2849			LIST_INIT(&new_stp->ls_open);
2850			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2851			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2852			*new_stpp = NULL;
2853			newnfsstats.srvopenowners++;
2854			nfsrv_openpluslock++;
2855		    }
2856		    openstp = new_open;
2857		    new_open = NULL;
2858		    newnfsstats.srvopens++;
2859		    nfsrv_openpluslock++;
2860		    break;
2861		}
2862	    }
2863	    if (stp == LIST_END(&clp->lc_olddeleg))
2864		error = NFSERR_EXPIRED;
2865	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2866	    /*
2867	     * Scan to see that no delegation for this client and file
2868	     * doesn't already exist.
2869	     * There also shouldn't yet be an Open for this file and
2870	     * openowner.
2871	     */
2872	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2873		if (stp->ls_clp == clp)
2874		    break;
2875	    }
2876	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2877		/*
2878		 * This is the Claim_Previous case with a delegation
2879		 * type != Delegate_None.
2880		 */
2881		/*
2882		 * First, add the delegation. (Although we must issue the
2883		 * delegation, we can also ask for an immediate return.)
2884		 */
2885		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2886		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2887		    clp->lc_clientid.lval[0];
2888		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2889		    clp->lc_clientid.lval[1];
2890		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2891		    nfsrv_nextstateindex(clp);
2892		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2893		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2894			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2895		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2896		} else {
2897		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2898			NFSLCK_READACCESS);
2899		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2900		}
2901		new_deleg->ls_uid = new_stp->ls_uid;
2902		new_deleg->ls_lfp = lfp;
2903		new_deleg->ls_clp = clp;
2904		new_deleg->ls_filerev = filerev;
2905		new_deleg->ls_compref = nd->nd_compref;
2906		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2907		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2908		    new_deleg->ls_stateid), new_deleg, ls_hash);
2909		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2910		new_deleg = NULL;
2911		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2912		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2913		     LCL_CALLBACKSON ||
2914		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2915		    !NFSVNO_DELEGOK(vp))
2916		    *rflagsp |= NFSV4OPEN_RECALL;
2917		newnfsstats.srvdelegates++;
2918		nfsrv_openpluslock++;
2919		nfsrv_delegatecnt++;
2920
2921		/*
2922		 * Now, do the associated open.
2923		 */
2924		new_open->ls_stateid.seqid = 1;
2925		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2926		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2927		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2928		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2929		    NFSLCK_OPEN;
2930		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2931			new_open->ls_flags |= (NFSLCK_READACCESS |
2932			    NFSLCK_WRITEACCESS);
2933		else
2934			new_open->ls_flags |= NFSLCK_READACCESS;
2935		new_open->ls_uid = new_stp->ls_uid;
2936		new_open->ls_lfp = lfp;
2937		new_open->ls_clp = clp;
2938		LIST_INIT(&new_open->ls_open);
2939		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2940		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2941		   new_open, ls_hash);
2942		/*
2943		 * and handle the open owner
2944		 */
2945		if (ownerstp) {
2946		    new_open->ls_openowner = ownerstp;
2947		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2948		} else {
2949		    new_open->ls_openowner = new_stp;
2950		    new_stp->ls_flags = 0;
2951		    nfsrvd_refcache(new_stp->ls_op);
2952		    new_stp->ls_noopens = 0;
2953		    LIST_INIT(&new_stp->ls_open);
2954		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2955		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2956		    *new_stpp = NULL;
2957		    newnfsstats.srvopenowners++;
2958		    nfsrv_openpluslock++;
2959		}
2960		openstp = new_open;
2961		new_open = NULL;
2962		newnfsstats.srvopens++;
2963		nfsrv_openpluslock++;
2964	    } else {
2965		error = NFSERR_RECLAIMCONFLICT;
2966	    }
2967	} else if (ownerstp) {
2968		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2969		    /* Replace the open */
2970		    if (ownerstp->ls_op)
2971			nfsrvd_derefcache(ownerstp->ls_op);
2972		    ownerstp->ls_op = new_stp->ls_op;
2973		    nfsrvd_refcache(ownerstp->ls_op);
2974		    ownerstp->ls_seq = new_stp->ls_seq;
2975		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2976		    stp = LIST_FIRST(&ownerstp->ls_open);
2977		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2978			NFSLCK_OPEN;
2979		    stp->ls_stateid.seqid = 1;
2980		    stp->ls_uid = new_stp->ls_uid;
2981		    if (lfp != stp->ls_lfp) {
2982			LIST_REMOVE(stp, ls_file);
2983			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2984			stp->ls_lfp = lfp;
2985		    }
2986		    openstp = stp;
2987		} else if (openstp) {
2988		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2989		    openstp->ls_stateid.seqid++;
2990		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
2991			openstp->ls_stateid.seqid == 0)
2992			openstp->ls_stateid.seqid = 1;
2993
2994		    /*
2995		     * This is where we can choose to issue a delegation.
2996		     */
2997		    if (delegate == 0 || writedeleg == 0 ||
2998			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
2999			nfsrv_writedelegifpos == 0) ||
3000			!NFSVNO_DELEGOK(vp) ||
3001			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
3002			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3003			 LCL_CALLBACKSON)
3004			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3005		    else if (nfsrv_issuedelegs == 0 ||
3006			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3007			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3008		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3009			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3010		    else {
3011			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3012			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3013			    = clp->lc_clientid.lval[0];
3014			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3015			    = clp->lc_clientid.lval[1];
3016			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3017			    = nfsrv_nextstateindex(clp);
3018			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3019			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3020			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3021			new_deleg->ls_uid = new_stp->ls_uid;
3022			new_deleg->ls_lfp = lfp;
3023			new_deleg->ls_clp = clp;
3024			new_deleg->ls_filerev = filerev;
3025			new_deleg->ls_compref = nd->nd_compref;
3026			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3027			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3028			    new_deleg->ls_stateid), new_deleg, ls_hash);
3029			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3030			new_deleg = NULL;
3031			newnfsstats.srvdelegates++;
3032			nfsrv_openpluslock++;
3033			nfsrv_delegatecnt++;
3034		    }
3035		} else {
3036		    new_open->ls_stateid.seqid = 1;
3037		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3038		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3039		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3040		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3041			NFSLCK_OPEN;
3042		    new_open->ls_uid = new_stp->ls_uid;
3043		    new_open->ls_openowner = ownerstp;
3044		    new_open->ls_lfp = lfp;
3045		    new_open->ls_clp = clp;
3046		    LIST_INIT(&new_open->ls_open);
3047		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3048		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3049		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3050			new_open, ls_hash);
3051		    openstp = new_open;
3052		    new_open = NULL;
3053		    newnfsstats.srvopens++;
3054		    nfsrv_openpluslock++;
3055
3056		    /*
3057		     * This is where we can choose to issue a delegation.
3058		     */
3059		    if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
3060			!NFSVNO_DELEGOK(vp) ||
3061			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3062			 LCL_CALLBACKSON)
3063			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3064		    else if (nfsrv_issuedelegs == 0 ||
3065			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3066			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3067		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3068			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3069		    else {
3070			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3071			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3072			    = clp->lc_clientid.lval[0];
3073			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3074			    = clp->lc_clientid.lval[1];
3075			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3076			    = nfsrv_nextstateindex(clp);
3077			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3078			    (nfsrv_writedelegifpos || !readonly) &&
3079			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3080			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3081				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3082			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3083			} else {
3084			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3085				NFSLCK_READACCESS);
3086			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3087			}
3088			new_deleg->ls_uid = new_stp->ls_uid;
3089			new_deleg->ls_lfp = lfp;
3090			new_deleg->ls_clp = clp;
3091			new_deleg->ls_filerev = filerev;
3092			new_deleg->ls_compref = nd->nd_compref;
3093			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3094			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3095			    new_deleg->ls_stateid), new_deleg, ls_hash);
3096			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3097			new_deleg = NULL;
3098			newnfsstats.srvdelegates++;
3099			nfsrv_openpluslock++;
3100			nfsrv_delegatecnt++;
3101		    }
3102		}
3103	} else {
3104		/*
3105		 * New owner case. Start the open_owner sequence with a
3106		 * Needs confirmation (unless a reclaim) and hang the
3107		 * new open off it.
3108		 */
3109		new_open->ls_stateid.seqid = 1;
3110		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3111		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3112		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3113		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3114		    NFSLCK_OPEN;
3115		new_open->ls_uid = new_stp->ls_uid;
3116		LIST_INIT(&new_open->ls_open);
3117		new_open->ls_openowner = new_stp;
3118		new_open->ls_lfp = lfp;
3119		new_open->ls_clp = clp;
3120		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3121		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3122			new_stp->ls_flags = 0;
3123		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3124			/* NFSv4.1 never needs confirmation. */
3125			new_stp->ls_flags = 0;
3126
3127			/*
3128			 * This is where we can choose to issue a delegation.
3129			 */
3130			if (delegate && nfsrv_issuedelegs &&
3131			    (writedeleg || readonly) &&
3132			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3133			     LCL_CALLBACKSON &&
3134			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3135			    NFSVNO_DELEGOK(vp) &&
3136			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3137			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3138				new_deleg->ls_stateid.seqid =
3139				    delegstateidp->seqid = 1;
3140				new_deleg->ls_stateid.other[0] =
3141				    delegstateidp->other[0]
3142				    = clp->lc_clientid.lval[0];
3143				new_deleg->ls_stateid.other[1] =
3144				    delegstateidp->other[1]
3145				    = clp->lc_clientid.lval[1];
3146				new_deleg->ls_stateid.other[2] =
3147				    delegstateidp->other[2]
3148				    = nfsrv_nextstateindex(clp);
3149				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3150				    (nfsrv_writedelegifpos || !readonly) &&
3151				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3152				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3153				     0)) {
3154					new_deleg->ls_flags =
3155					    (NFSLCK_DELEGWRITE |
3156					     NFSLCK_READACCESS |
3157					     NFSLCK_WRITEACCESS);
3158					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3159				} else {
3160					new_deleg->ls_flags =
3161					    (NFSLCK_DELEGREAD |
3162					     NFSLCK_READACCESS);
3163					*rflagsp |= NFSV4OPEN_READDELEGATE;
3164				}
3165				new_deleg->ls_uid = new_stp->ls_uid;
3166				new_deleg->ls_lfp = lfp;
3167				new_deleg->ls_clp = clp;
3168				new_deleg->ls_filerev = filerev;
3169				new_deleg->ls_compref = nd->nd_compref;
3170				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3171				    ls_file);
3172				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3173				    new_deleg->ls_stateid), new_deleg, ls_hash);
3174				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3175				    ls_list);
3176				new_deleg = NULL;
3177				newnfsstats.srvdelegates++;
3178				nfsrv_openpluslock++;
3179				nfsrv_delegatecnt++;
3180			}
3181		} else {
3182			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3183			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3184		}
3185		nfsrvd_refcache(new_stp->ls_op);
3186		new_stp->ls_noopens = 0;
3187		LIST_INIT(&new_stp->ls_open);
3188		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3189		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3190		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3191		    new_open, ls_hash);
3192		openstp = new_open;
3193		new_open = NULL;
3194		*new_stpp = NULL;
3195		newnfsstats.srvopens++;
3196		nfsrv_openpluslock++;
3197		newnfsstats.srvopenowners++;
3198		nfsrv_openpluslock++;
3199	}
3200	if (!error) {
3201		stateidp->seqid = openstp->ls_stateid.seqid;
3202		stateidp->other[0] = openstp->ls_stateid.other[0];
3203		stateidp->other[1] = openstp->ls_stateid.other[1];
3204		stateidp->other[2] = openstp->ls_stateid.other[2];
3205	}
3206	NFSUNLOCKSTATE();
3207	if (haslock) {
3208		NFSLOCKV4ROOTMUTEX();
3209		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3210		NFSUNLOCKV4ROOTMUTEX();
3211	}
3212	if (new_open)
3213		FREE((caddr_t)new_open, M_NFSDSTATE);
3214	if (new_deleg)
3215		FREE((caddr_t)new_deleg, M_NFSDSTATE);
3216
3217out:
3218	NFSEXITCODE2(error, nd);
3219	return (error);
3220}
3221
3222/*
3223 * Open update. Does the confirm, downgrade and close.
3224 */
3225APPLESTATIC int
3226nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3227    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3228{
3229	struct nfsstate *stp, *ownerstp;
3230	struct nfsclient *clp;
3231	struct nfslockfile *lfp;
3232	u_int32_t bits;
3233	int error = 0, gotstate = 0, len = 0;
3234	u_char client[NFSV4_OPAQUELIMIT];
3235
3236	/*
3237	 * Check for restart conditions (client and server).
3238	 */
3239	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3240	    &new_stp->ls_stateid, 0);
3241	if (error)
3242		goto out;
3243
3244	NFSLOCKSTATE();
3245	/*
3246	 * Get the open structure via clientid and stateid.
3247	 */
3248	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3249	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3250	if (!error)
3251		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3252		    new_stp->ls_flags, &stp);
3253
3254	/*
3255	 * Sanity check the open.
3256	 */
3257	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3258		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3259		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3260		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3261		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3262		error = NFSERR_BADSTATEID;
3263
3264	if (!error)
3265		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3266		    stp->ls_openowner, new_stp->ls_op);
3267	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3268	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3269	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3270	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3271	      new_stp->ls_stateid.seqid != 0)))
3272		error = NFSERR_OLDSTATEID;
3273	if (!error && vnode_vtype(vp) != VREG) {
3274		if (vnode_vtype(vp) == VDIR)
3275			error = NFSERR_ISDIR;
3276		else
3277			error = NFSERR_INVAL;
3278	}
3279
3280	if (error) {
3281		/*
3282		 * If a client tries to confirm an Open with a bad
3283		 * seqid# and there are no byte range locks or other Opens
3284		 * on the openowner, just throw it away, so the next use of the
3285		 * openowner will start a fresh seq#.
3286		 */
3287		if (error == NFSERR_BADSEQID &&
3288		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3289		    nfsrv_nootherstate(stp))
3290			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3291		NFSUNLOCKSTATE();
3292		goto out;
3293	}
3294
3295	/*
3296	 * Set the return stateid.
3297	 */
3298	stateidp->seqid = stp->ls_stateid.seqid + 1;
3299	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3300		stateidp->seqid = 1;
3301	stateidp->other[0] = stp->ls_stateid.other[0];
3302	stateidp->other[1] = stp->ls_stateid.other[1];
3303	stateidp->other[2] = stp->ls_stateid.other[2];
3304	/*
3305	 * Now, handle the three cases.
3306	 */
3307	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3308		/*
3309		 * If the open doesn't need confirmation, it seems to me that
3310		 * there is a client error, but I'll just log it and keep going?
3311		 */
3312		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3313			printf("Nfsv4d: stray open confirm\n");
3314		stp->ls_openowner->ls_flags = 0;
3315		stp->ls_stateid.seqid++;
3316		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3317		    stp->ls_stateid.seqid == 0)
3318			stp->ls_stateid.seqid = 1;
3319		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3320			clp->lc_flags |= LCL_STAMPEDSTABLE;
3321			len = clp->lc_idlen;
3322			NFSBCOPY(clp->lc_id, client, len);
3323			gotstate = 1;
3324		}
3325		NFSUNLOCKSTATE();
3326	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3327		ownerstp = stp->ls_openowner;
3328		lfp = stp->ls_lfp;
3329		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3330			/* Get the lf lock */
3331			nfsrv_locklf(lfp);
3332			NFSUNLOCKSTATE();
3333			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3334			NFSVOPUNLOCK(vp, 0);
3335			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3336				NFSLOCKSTATE();
3337				nfsrv_unlocklf(lfp);
3338				NFSUNLOCKSTATE();
3339			}
3340			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3341		} else {
3342			(void) nfsrv_freeopen(stp, NULL, 0, p);
3343			NFSUNLOCKSTATE();
3344		}
3345	} else {
3346		/*
3347		 * Update the share bits, making sure that the new set are a
3348		 * subset of the old ones.
3349		 */
3350		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3351		if (~(stp->ls_flags) & bits) {
3352			NFSUNLOCKSTATE();
3353			error = NFSERR_INVAL;
3354			goto out;
3355		}
3356		stp->ls_flags = (bits | NFSLCK_OPEN);
3357		stp->ls_stateid.seqid++;
3358		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3359		    stp->ls_stateid.seqid == 0)
3360			stp->ls_stateid.seqid = 1;
3361		NFSUNLOCKSTATE();
3362	}
3363
3364	/*
3365	 * If the client just confirmed its first open, write a timestamp
3366	 * to the stable storage file.
3367	 */
3368	if (gotstate != 0) {
3369		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
3370		nfsrv_backupstable();
3371	}
3372
3373out:
3374	NFSEXITCODE2(error, nd);
3375	return (error);
3376}
3377
3378/*
3379 * Delegation update. Does the purge and return.
3380 */
3381APPLESTATIC int
3382nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3383    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3384    NFSPROC_T *p)
3385{
3386	struct nfsstate *stp;
3387	struct nfsclient *clp;
3388	int error = 0;
3389	fhandle_t fh;
3390
3391	/*
3392	 * Do a sanity check against the file handle for DelegReturn.
3393	 */
3394	if (vp) {
3395		error = nfsvno_getfh(vp, &fh, p);
3396		if (error)
3397			goto out;
3398	}
3399	/*
3400	 * Check for restart conditions (client and server).
3401	 */
3402	if (op == NFSV4OP_DELEGRETURN)
3403		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3404			stateidp, 0);
3405	else
3406		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3407			stateidp, 0);
3408
3409	NFSLOCKSTATE();
3410	/*
3411	 * Get the open structure via clientid and stateid.
3412	 */
3413	if (!error)
3414	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3415		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3416	if (error) {
3417		if (error == NFSERR_CBPATHDOWN)
3418			error = 0;
3419		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3420			error = NFSERR_STALESTATEID;
3421	}
3422	if (!error && op == NFSV4OP_DELEGRETURN) {
3423	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3424	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3425		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3426		error = NFSERR_OLDSTATEID;
3427	}
3428	/*
3429	 * NFSERR_EXPIRED means that the state has gone away,
3430	 * so Delegations have been purged. Just return ok.
3431	 */
3432	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3433		NFSUNLOCKSTATE();
3434		error = 0;
3435		goto out;
3436	}
3437	if (error) {
3438		NFSUNLOCKSTATE();
3439		goto out;
3440	}
3441
3442	if (op == NFSV4OP_DELEGRETURN) {
3443		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3444		    sizeof (fhandle_t))) {
3445			NFSUNLOCKSTATE();
3446			error = NFSERR_BADSTATEID;
3447			goto out;
3448		}
3449		nfsrv_freedeleg(stp);
3450	} else {
3451		nfsrv_freedeleglist(&clp->lc_olddeleg);
3452	}
3453	NFSUNLOCKSTATE();
3454	error = 0;
3455
3456out:
3457	NFSEXITCODE(error);
3458	return (error);
3459}
3460
3461/*
3462 * Release lock owner.
3463 */
3464APPLESTATIC int
3465nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3466    NFSPROC_T *p)
3467{
3468	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3469	struct nfsclient *clp;
3470	int error = 0;
3471
3472	/*
3473	 * Check for restart conditions (client and server).
3474	 */
3475	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3476	    &new_stp->ls_stateid, 0);
3477	if (error)
3478		goto out;
3479
3480	NFSLOCKSTATE();
3481	/*
3482	 * Get the lock owner by name.
3483	 */
3484	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3485	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3486	if (error) {
3487		NFSUNLOCKSTATE();
3488		goto out;
3489	}
3490	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3491	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3492		stp = LIST_FIRST(&openstp->ls_open);
3493		while (stp != LIST_END(&openstp->ls_open)) {
3494		    nstp = LIST_NEXT(stp, ls_list);
3495		    /*
3496		     * If the owner matches, check for locks and
3497		     * then free or return an error.
3498		     */
3499		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3500			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3501			 stp->ls_ownerlen)){
3502			if (LIST_EMPTY(&stp->ls_lock)) {
3503			    nfsrv_freelockowner(stp, NULL, 0, p);
3504			} else {
3505			    NFSUNLOCKSTATE();
3506			    error = NFSERR_LOCKSHELD;
3507			    goto out;
3508			}
3509		    }
3510		    stp = nstp;
3511		}
3512	    }
3513	}
3514	NFSUNLOCKSTATE();
3515
3516out:
3517	NFSEXITCODE(error);
3518	return (error);
3519}
3520
3521/*
3522 * Get the file handle for a lock structure.
3523 */
3524static int
3525nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3526    fhandle_t *nfhp, NFSPROC_T *p)
3527{
3528	fhandle_t *fhp = NULL;
3529	int error;
3530
3531	/*
3532	 * For lock, use the new nfslock structure, otherwise just
3533	 * a fhandle_t on the stack.
3534	 */
3535	if (flags & NFSLCK_OPEN) {
3536		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3537		fhp = &new_lfp->lf_fh;
3538	} else if (nfhp) {
3539		fhp = nfhp;
3540	} else {
3541		panic("nfsrv_getlockfh");
3542	}
3543	error = nfsvno_getfh(vp, fhp, p);
3544	NFSEXITCODE(error);
3545	return (error);
3546}
3547
3548/*
3549 * Get an nfs lock structure. Allocate one, as required, and return a
3550 * pointer to it.
3551 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3552 */
3553static int
3554nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3555    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3556{
3557	struct nfslockfile *lfp;
3558	fhandle_t *fhp = NULL, *tfhp;
3559	struct nfslockhashhead *hp;
3560	struct nfslockfile *new_lfp = NULL;
3561
3562	/*
3563	 * For lock, use the new nfslock structure, otherwise just
3564	 * a fhandle_t on the stack.
3565	 */
3566	if (flags & NFSLCK_OPEN) {
3567		new_lfp = *new_lfpp;
3568		fhp = &new_lfp->lf_fh;
3569	} else if (nfhp) {
3570		fhp = nfhp;
3571	} else {
3572		panic("nfsrv_getlockfile");
3573	}
3574
3575	hp = NFSLOCKHASH(fhp);
3576	LIST_FOREACH(lfp, hp, lf_hash) {
3577		tfhp = &lfp->lf_fh;
3578		if (NFSVNO_CMPFH(fhp, tfhp)) {
3579			if (lockit)
3580				nfsrv_locklf(lfp);
3581			*lfpp = lfp;
3582			return (0);
3583		}
3584	}
3585	if (!(flags & NFSLCK_OPEN))
3586		return (-1);
3587
3588	/*
3589	 * No match, so chain the new one into the list.
3590	 */
3591	LIST_INIT(&new_lfp->lf_open);
3592	LIST_INIT(&new_lfp->lf_lock);
3593	LIST_INIT(&new_lfp->lf_deleg);
3594	LIST_INIT(&new_lfp->lf_locallock);
3595	LIST_INIT(&new_lfp->lf_rollback);
3596	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3597	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3598	new_lfp->lf_usecount = 0;
3599	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3600	*lfpp = new_lfp;
3601	*new_lfpp = NULL;
3602	return (0);
3603}
3604
3605/*
3606 * This function adds a nfslock lock structure to the list for the associated
3607 * nfsstate and nfslockfile structures. It will be inserted after the
3608 * entry pointed at by insert_lop.
3609 */
3610static void
3611nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3612    struct nfsstate *stp, struct nfslockfile *lfp)
3613{
3614	struct nfslock *lop, *nlop;
3615
3616	new_lop->lo_stp = stp;
3617	new_lop->lo_lfp = lfp;
3618
3619	if (stp != NULL) {
3620		/* Insert in increasing lo_first order */
3621		lop = LIST_FIRST(&lfp->lf_lock);
3622		if (lop == LIST_END(&lfp->lf_lock) ||
3623		    new_lop->lo_first <= lop->lo_first) {
3624			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3625		} else {
3626			nlop = LIST_NEXT(lop, lo_lckfile);
3627			while (nlop != LIST_END(&lfp->lf_lock) &&
3628			       nlop->lo_first < new_lop->lo_first) {
3629				lop = nlop;
3630				nlop = LIST_NEXT(lop, lo_lckfile);
3631			}
3632			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3633		}
3634	} else {
3635		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3636	}
3637
3638	/*
3639	 * Insert after insert_lop, which is overloaded as stp or lfp for
3640	 * an empty list.
3641	 */
3642	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3643		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3644	else if ((struct nfsstate *)insert_lop == stp)
3645		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3646	else
3647		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3648	if (stp != NULL) {
3649		newnfsstats.srvlocks++;
3650		nfsrv_openpluslock++;
3651	}
3652}
3653
3654/*
3655 * This function updates the locking for a lock owner and given file. It
3656 * maintains a list of lock ranges ordered on increasing file offset that
3657 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3658 * It always adds new_lop to the list and sometimes uses the one pointed
3659 * at by other_lopp.
3660 */
3661static void
3662nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3663    struct nfslock **other_lopp, struct nfslockfile *lfp)
3664{
3665	struct nfslock *new_lop = *new_lopp;
3666	struct nfslock *lop, *tlop, *ilop;
3667	struct nfslock *other_lop = *other_lopp;
3668	int unlock = 0, myfile = 0;
3669	u_int64_t tmp;
3670
3671	/*
3672	 * Work down the list until the lock is merged.
3673	 */
3674	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3675		unlock = 1;
3676	if (stp != NULL) {
3677		ilop = (struct nfslock *)stp;
3678		lop = LIST_FIRST(&stp->ls_lock);
3679	} else {
3680		ilop = (struct nfslock *)lfp;
3681		lop = LIST_FIRST(&lfp->lf_locallock);
3682	}
3683	while (lop != NULL) {
3684	    /*
3685	     * Only check locks for this file that aren't before the start of
3686	     * new lock's range.
3687	     */
3688	    if (lop->lo_lfp == lfp) {
3689	      myfile = 1;
3690	      if (lop->lo_end >= new_lop->lo_first) {
3691		if (new_lop->lo_end < lop->lo_first) {
3692			/*
3693			 * If the new lock ends before the start of the
3694			 * current lock's range, no merge, just insert
3695			 * the new lock.
3696			 */
3697			break;
3698		}
3699		if (new_lop->lo_flags == lop->lo_flags ||
3700		    (new_lop->lo_first <= lop->lo_first &&
3701		     new_lop->lo_end >= lop->lo_end)) {
3702			/*
3703			 * This lock can be absorbed by the new lock/unlock.
3704			 * This happens when it covers the entire range
3705			 * of the old lock or is contiguous
3706			 * with the old lock and is of the same type or an
3707			 * unlock.
3708			 */
3709			if (lop->lo_first < new_lop->lo_first)
3710				new_lop->lo_first = lop->lo_first;
3711			if (lop->lo_end > new_lop->lo_end)
3712				new_lop->lo_end = lop->lo_end;
3713			tlop = lop;
3714			lop = LIST_NEXT(lop, lo_lckowner);
3715			nfsrv_freenfslock(tlop);
3716			continue;
3717		}
3718
3719		/*
3720		 * All these cases are for contiguous locks that are not the
3721		 * same type, so they can't be merged.
3722		 */
3723		if (new_lop->lo_first <= lop->lo_first) {
3724			/*
3725			 * This case is where the new lock overlaps with the
3726			 * first part of the old lock. Move the start of the
3727			 * old lock to just past the end of the new lock. The
3728			 * new lock will be inserted in front of the old, since
3729			 * ilop hasn't been updated. (We are done now.)
3730			 */
3731			lop->lo_first = new_lop->lo_end;
3732			break;
3733		}
3734		if (new_lop->lo_end >= lop->lo_end) {
3735			/*
3736			 * This case is where the new lock overlaps with the
3737			 * end of the old lock's range. Move the old lock's
3738			 * end to just before the new lock's first and insert
3739			 * the new lock after the old lock.
3740			 * Might not be done yet, since the new lock could
3741			 * overlap further locks with higher ranges.
3742			 */
3743			lop->lo_end = new_lop->lo_first;
3744			ilop = lop;
3745			lop = LIST_NEXT(lop, lo_lckowner);
3746			continue;
3747		}
3748		/*
3749		 * The final case is where the new lock's range is in the
3750		 * middle of the current lock's and splits the current lock
3751		 * up. Use *other_lopp to handle the second part of the
3752		 * split old lock range. (We are done now.)
3753		 * For unlock, we use new_lop as other_lop and tmp, since
3754		 * other_lop and new_lop are the same for this case.
3755		 * We noted the unlock case above, so we don't need
3756		 * new_lop->lo_flags any longer.
3757		 */
3758		tmp = new_lop->lo_first;
3759		if (other_lop == NULL) {
3760			if (!unlock)
3761				panic("nfsd srv update unlock");
3762			other_lop = new_lop;
3763			*new_lopp = NULL;
3764		}
3765		other_lop->lo_first = new_lop->lo_end;
3766		other_lop->lo_end = lop->lo_end;
3767		other_lop->lo_flags = lop->lo_flags;
3768		other_lop->lo_stp = stp;
3769		other_lop->lo_lfp = lfp;
3770		lop->lo_end = tmp;
3771		nfsrv_insertlock(other_lop, lop, stp, lfp);
3772		*other_lopp = NULL;
3773		ilop = lop;
3774		break;
3775	      }
3776	    }
3777	    ilop = lop;
3778	    lop = LIST_NEXT(lop, lo_lckowner);
3779	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3780		break;
3781	}
3782
3783	/*
3784	 * Insert the new lock in the list at the appropriate place.
3785	 */
3786	if (!unlock) {
3787		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3788		*new_lopp = NULL;
3789	}
3790}
3791
3792/*
3793 * This function handles sequencing of locks, etc.
3794 * It returns an error that indicates what the caller should do.
3795 */
3796static int
3797nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3798    struct nfsstate *stp, struct nfsrvcache *op)
3799{
3800	int error = 0;
3801
3802	if ((nd->nd_flag & ND_NFSV41) != 0)
3803		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
3804		goto out;
3805	if (op != nd->nd_rp)
3806		panic("nfsrvstate checkseqid");
3807	if (!(op->rc_flag & RC_INPROG))
3808		panic("nfsrvstate not inprog");
3809	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3810		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3811		panic("nfsrvstate op refcnt");
3812	}
3813	if ((stp->ls_seq + 1) == seqid) {
3814		if (stp->ls_op)
3815			nfsrvd_derefcache(stp->ls_op);
3816		stp->ls_op = op;
3817		nfsrvd_refcache(op);
3818		stp->ls_seq = seqid;
3819		goto out;
3820	} else if (stp->ls_seq == seqid && stp->ls_op &&
3821		op->rc_xid == stp->ls_op->rc_xid &&
3822		op->rc_refcnt == 0 &&
3823		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3824		op->rc_cksum == stp->ls_op->rc_cksum) {
3825		if (stp->ls_op->rc_flag & RC_INPROG) {
3826			error = NFSERR_DONTREPLY;
3827			goto out;
3828		}
3829		nd->nd_rp = stp->ls_op;
3830		nd->nd_rp->rc_flag |= RC_INPROG;
3831		nfsrvd_delcache(op);
3832		error = NFSERR_REPLYFROMCACHE;
3833		goto out;
3834	}
3835	error = NFSERR_BADSEQID;
3836
3837out:
3838	NFSEXITCODE2(error, nd);
3839	return (error);
3840}
3841
3842/*
3843 * Get the client ip address for callbacks. If the strings can't be parsed,
3844 * just set lc_program to 0 to indicate no callbacks are possible.
3845 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3846 *  the address to the client's transport address. This won't be used
3847 *  for callbacks, but can be printed out by newnfsstats for info.)
3848 * Return error if the xdr can't be parsed, 0 otherwise.
3849 */
3850APPLESTATIC int
3851nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3852{
3853	u_int32_t *tl;
3854	u_char *cp, *cp2;
3855	int i, j;
3856	struct sockaddr_in *rad, *sad;
3857	u_char protocol[5], addr[24];
3858	int error = 0, cantparse = 0;
3859	union {
3860		u_long ival;
3861		u_char cval[4];
3862	} ip;
3863	union {
3864		u_short sval;
3865		u_char cval[2];
3866	} port;
3867
3868	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3869	rad->sin_family = AF_INET;
3870	rad->sin_len = sizeof (struct sockaddr_in);
3871	rad->sin_addr.s_addr = 0;
3872	rad->sin_port = 0;
3873	clp->lc_req.nr_client = NULL;
3874	clp->lc_req.nr_lock = 0;
3875	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3876	i = fxdr_unsigned(int, *tl);
3877	if (i >= 3 && i <= 4) {
3878		error = nfsrv_mtostr(nd, protocol, i);
3879		if (error)
3880			goto nfsmout;
3881		if (!strcmp(protocol, "tcp")) {
3882			clp->lc_flags |= LCL_TCPCALLBACK;
3883			clp->lc_req.nr_sotype = SOCK_STREAM;
3884			clp->lc_req.nr_soproto = IPPROTO_TCP;
3885		} else if (!strcmp(protocol, "udp")) {
3886			clp->lc_req.nr_sotype = SOCK_DGRAM;
3887			clp->lc_req.nr_soproto = IPPROTO_UDP;
3888		} else {
3889			cantparse = 1;
3890		}
3891	} else {
3892		cantparse = 1;
3893		if (i > 0) {
3894			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3895			if (error)
3896				goto nfsmout;
3897		}
3898	}
3899	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3900	i = fxdr_unsigned(int, *tl);
3901	if (i < 0) {
3902		error = NFSERR_BADXDR;
3903		goto nfsmout;
3904	} else if (i == 0) {
3905		cantparse = 1;
3906	} else if (!cantparse && i <= 23 && i >= 11) {
3907		error = nfsrv_mtostr(nd, addr, i);
3908		if (error)
3909			goto nfsmout;
3910
3911		/*
3912		 * Parse out the address fields. We expect 6 decimal numbers
3913		 * separated by '.'s.
3914		 */
3915		cp = addr;
3916		i = 0;
3917		while (*cp && i < 6) {
3918			cp2 = cp;
3919			while (*cp2 && *cp2 != '.')
3920				cp2++;
3921			if (*cp2)
3922				*cp2++ = '\0';
3923			else if (i != 5) {
3924				cantparse = 1;
3925				break;
3926			}
3927			j = nfsrv_getipnumber(cp);
3928			if (j >= 0) {
3929				if (i < 4)
3930					ip.cval[3 - i] = j;
3931				else
3932					port.cval[5 - i] = j;
3933			} else {
3934				cantparse = 1;
3935				break;
3936			}
3937			cp = cp2;
3938			i++;
3939		}
3940		if (!cantparse) {
3941			if (ip.ival != 0x0) {
3942				rad->sin_addr.s_addr = htonl(ip.ival);
3943				rad->sin_port = htons(port.sval);
3944			} else {
3945				cantparse = 1;
3946			}
3947		}
3948	} else {
3949		cantparse = 1;
3950		if (i > 0) {
3951			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3952			if (error)
3953				goto nfsmout;
3954		}
3955	}
3956	if (cantparse) {
3957		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3958		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3959		rad->sin_port = 0x0;
3960		clp->lc_program = 0;
3961	}
3962nfsmout:
3963	NFSEXITCODE2(error, nd);
3964	return (error);
3965}
3966
3967/*
3968 * Turn a string of up to three decimal digits into a number. Return -1 upon
3969 * error.
3970 */
3971static int
3972nfsrv_getipnumber(u_char *cp)
3973{
3974	int i = 0, j = 0;
3975
3976	while (*cp) {
3977		if (j > 2 || *cp < '0' || *cp > '9')
3978			return (-1);
3979		i *= 10;
3980		i += (*cp - '0');
3981		cp++;
3982		j++;
3983	}
3984	if (i < 256)
3985		return (i);
3986	return (-1);
3987}
3988
3989/*
3990 * This function checks for restart conditions.
3991 */
3992static int
3993nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3994    nfsv4stateid_t *stateidp, int specialid)
3995{
3996	int ret = 0;
3997
3998	/*
3999	 * First check for a server restart. Open, LockT, ReleaseLockOwner
4000	 * and DelegPurge have a clientid, the rest a stateid.
4001	 */
4002	if (flags &
4003	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4004		if (clientid.lval[0] != nfsrvboottime) {
4005			ret = NFSERR_STALECLIENTID;
4006			goto out;
4007		}
4008	} else if (stateidp->other[0] != nfsrvboottime &&
4009		specialid == 0) {
4010		ret = NFSERR_STALESTATEID;
4011		goto out;
4012	}
4013
4014	/*
4015	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4016	 * not use a lock/open owner seqid#, so the check can be done now.
4017	 * (The others will be checked, as required, later.)
4018	 */
4019	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4020		goto out;
4021
4022	NFSLOCKSTATE();
4023	ret = nfsrv_checkgrace(NULL, NULL, flags);
4024	NFSUNLOCKSTATE();
4025
4026out:
4027	NFSEXITCODE(ret);
4028	return (ret);
4029}
4030
4031/*
4032 * Check for grace.
4033 */
4034static int
4035nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4036    u_int32_t flags)
4037{
4038	int error = 0;
4039
4040	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4041		if (flags & NFSLCK_RECLAIM) {
4042			error = NFSERR_NOGRACE;
4043			goto out;
4044		}
4045	} else {
4046		if (!(flags & NFSLCK_RECLAIM)) {
4047			error = NFSERR_GRACE;
4048			goto out;
4049		}
4050		if (nd != NULL && clp != NULL &&
4051		    (nd->nd_flag & ND_NFSV41) != 0 &&
4052		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4053			error = NFSERR_NOGRACE;
4054			goto out;
4055		}
4056
4057		/*
4058		 * If grace is almost over and we are still getting Reclaims,
4059		 * extend grace a bit.
4060		 */
4061		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4062		    nfsrv_stablefirst.nsf_eograce)
4063			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4064				NFSRV_LEASEDELTA;
4065	}
4066
4067out:
4068	NFSEXITCODE(error);
4069	return (error);
4070}
4071
4072/*
4073 * Do a server callback.
4074 */
4075static int
4076nfsrv_docallback(struct nfsclient *clp, int procnum,
4077    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4078    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4079{
4080	mbuf_t m;
4081	u_int32_t *tl;
4082	struct nfsrv_descript nfsd, *nd = &nfsd;
4083	struct ucred *cred;
4084	int error = 0;
4085	u_int32_t callback;
4086	struct nfsdsession *sep = NULL;
4087
4088	cred = newnfs_getcred();
4089	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4090	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4091		NFSUNLOCKSTATE();
4092		panic("docallb");
4093	}
4094	clp->lc_cbref++;
4095
4096	/*
4097	 * Fill the callback program# and version into the request
4098	 * structure for newnfs_connect() to use.
4099	 */
4100	clp->lc_req.nr_prog = clp->lc_program;
4101#ifdef notnow
4102	if ((clp->lc_flags & LCL_NFSV41) != 0)
4103		clp->lc_req.nr_vers = NFSV41_CBVERS;
4104	else
4105#endif
4106		clp->lc_req.nr_vers = NFSV4_CBVERS;
4107
4108	/*
4109	 * First, fill in some of the fields of nd and cr.
4110	 */
4111	nd->nd_flag = ND_NFSV4;
4112	if (clp->lc_flags & LCL_GSS)
4113		nd->nd_flag |= ND_KERBV;
4114	if ((clp->lc_flags & LCL_NFSV41) != 0)
4115		nd->nd_flag |= ND_NFSV41;
4116	nd->nd_repstat = 0;
4117	cred->cr_uid = clp->lc_uid;
4118	cred->cr_gid = clp->lc_gid;
4119	callback = clp->lc_callback;
4120	NFSUNLOCKSTATE();
4121	cred->cr_ngroups = 1;
4122
4123	/*
4124	 * Get the first mbuf for the request.
4125	 */
4126	MGET(m, M_WAITOK, MT_DATA);
4127	mbuf_setlen(m, 0);
4128	nd->nd_mreq = nd->nd_mb = m;
4129	nd->nd_bpos = NFSMTOD(m, caddr_t);
4130
4131	/*
4132	 * and build the callback request.
4133	 */
4134	if (procnum == NFSV4OP_CBGETATTR) {
4135		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4136		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4137		    "CB Getattr", &sep);
4138		if (error != 0) {
4139			mbuf_freem(nd->nd_mreq);
4140			goto errout;
4141		}
4142		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4143		(void)nfsrv_putattrbit(nd, attrbitp);
4144	} else if (procnum == NFSV4OP_CBRECALL) {
4145		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4146		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4147		    "CB Recall", &sep);
4148		if (error != 0) {
4149			mbuf_freem(nd->nd_mreq);
4150			goto errout;
4151		}
4152		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4153		*tl++ = txdr_unsigned(stateidp->seqid);
4154		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4155		    NFSX_STATEIDOTHER);
4156		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4157		if (trunc)
4158			*tl = newnfs_true;
4159		else
4160			*tl = newnfs_false;
4161		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4162	} else if (procnum == NFSV4PROC_CBNULL) {
4163		nd->nd_procnum = NFSV4PROC_CBNULL;
4164		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4165			error = nfsv4_getcbsession(clp, &sep);
4166			if (error != 0) {
4167				mbuf_freem(nd->nd_mreq);
4168				goto errout;
4169			}
4170		}
4171	} else {
4172		error = NFSERR_SERVERFAULT;
4173		mbuf_freem(nd->nd_mreq);
4174		goto errout;
4175	}
4176
4177	/*
4178	 * Call newnfs_connect(), as required, and then newnfs_request().
4179	 */
4180	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4181	if (clp->lc_req.nr_client == NULL) {
4182		if ((clp->lc_flags & LCL_NFSV41) != 0)
4183			error = ECONNREFUSED;
4184		else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4185			error = newnfs_connect(NULL, &clp->lc_req, cred,
4186			    NULL, 1);
4187		else
4188			error = newnfs_connect(NULL, &clp->lc_req, cred,
4189			    NULL, 3);
4190	}
4191	newnfs_sndunlock(&clp->lc_req.nr_lock);
4192	if (!error) {
4193		if ((nd->nd_flag & ND_NFSV41) != 0) {
4194			KASSERT(sep != NULL, ("sep NULL"));
4195			if (sep->sess_cbsess.nfsess_xprt != NULL)
4196				error = newnfs_request(nd, NULL, clp,
4197				    &clp->lc_req, NULL, NULL, cred,
4198				    clp->lc_program, clp->lc_req.nr_vers, NULL,
4199				    1, NULL, &sep->sess_cbsess);
4200			else {
4201				/*
4202				 * This should probably never occur, but if a
4203				 * client somehow does an RPC without a
4204				 * SequenceID Op that causes a callback just
4205				 * after the nfsd threads have been terminated
4206				 * and restared we could conceivably get here
4207				 * without a backchannel xprt.
4208				 */
4209				printf("nfsrv_docallback: no xprt\n");
4210				error = ECONNREFUSED;
4211			}
4212			nfsrv_freesession(sep, NULL);
4213		} else
4214			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4215			    NULL, NULL, cred, clp->lc_program,
4216			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4217	}
4218errout:
4219	NFSFREECRED(cred);
4220
4221	/*
4222	 * If error is set here, the Callback path isn't working
4223	 * properly, so twiddle the appropriate LCL_ flags.
4224	 * (nd_repstat != 0 indicates the Callback path is working,
4225	 *  but the callback failed on the client.)
4226	 */
4227	if (error) {
4228		/*
4229		 * Mark the callback pathway down, which disabled issuing
4230		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4231		 */
4232		NFSLOCKSTATE();
4233		clp->lc_flags |= LCL_CBDOWN;
4234		NFSUNLOCKSTATE();
4235	} else {
4236		/*
4237		 * Callback worked. If the callback path was down, disable
4238		 * callbacks, so no more delegations will be issued. (This
4239		 * is done on the assumption that the callback pathway is
4240		 * flakey.)
4241		 */
4242		NFSLOCKSTATE();
4243		if (clp->lc_flags & LCL_CBDOWN)
4244			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4245		NFSUNLOCKSTATE();
4246		if (nd->nd_repstat)
4247			error = nd->nd_repstat;
4248		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4249			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4250			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4251			    p, NULL);
4252		mbuf_freem(nd->nd_mrep);
4253	}
4254	NFSLOCKSTATE();
4255	clp->lc_cbref--;
4256	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4257		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4258		wakeup(clp);
4259	}
4260	NFSUNLOCKSTATE();
4261
4262	NFSEXITCODE(error);
4263	return (error);
4264}
4265
4266/*
4267 * Set up the compound RPC for the callback.
4268 */
4269static int
4270nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4271    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4272{
4273	uint32_t *tl;
4274	int error, len;
4275
4276	len = strlen(optag);
4277	(void)nfsm_strtom(nd, optag, len);
4278	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4279	if ((nd->nd_flag & ND_NFSV41) != 0) {
4280		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4281		*tl++ = txdr_unsigned(callback);
4282		*tl++ = txdr_unsigned(2);
4283		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4284		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4285		if (error != 0)
4286			return (error);
4287		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4288		*tl = txdr_unsigned(op);
4289	} else {
4290		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4291		*tl++ = txdr_unsigned(callback);
4292		*tl++ = txdr_unsigned(1);
4293		*tl = txdr_unsigned(op);
4294	}
4295	return (0);
4296}
4297
4298/*
4299 * Return the next index# for a clientid. Mostly just increment and return
4300 * the next one, but... if the 32bit unsigned does actually wrap around,
4301 * it should be rebooted.
4302 * At an average rate of one new client per second, it will wrap around in
4303 * approximately 136 years. (I think the server will have been shut
4304 * down or rebooted before then.)
4305 */
4306static u_int32_t
4307nfsrv_nextclientindex(void)
4308{
4309	static u_int32_t client_index = 0;
4310
4311	client_index++;
4312	if (client_index != 0)
4313		return (client_index);
4314
4315	printf("%s: out of clientids\n", __func__);
4316	return (client_index);
4317}
4318
4319/*
4320 * Return the next index# for a stateid. Mostly just increment and return
4321 * the next one, but... if the 32bit unsigned does actually wrap around
4322 * (will a BSD server stay up that long?), find
4323 * new start and end values.
4324 */
4325static u_int32_t
4326nfsrv_nextstateindex(struct nfsclient *clp)
4327{
4328	struct nfsstate *stp;
4329	int i;
4330	u_int32_t canuse, min_index, max_index;
4331
4332	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4333		clp->lc_stateindex++;
4334		if (clp->lc_stateindex != clp->lc_statemaxindex)
4335			return (clp->lc_stateindex);
4336	}
4337
4338	/*
4339	 * Yuck, we've hit the end.
4340	 * Look for a new min and max.
4341	 */
4342	min_index = 0;
4343	max_index = 0xffffffff;
4344	for (i = 0; i < nfsrv_statehashsize; i++) {
4345	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4346		if (stp->ls_stateid.other[2] > 0x80000000) {
4347		    if (stp->ls_stateid.other[2] < max_index)
4348			max_index = stp->ls_stateid.other[2];
4349		} else {
4350		    if (stp->ls_stateid.other[2] > min_index)
4351			min_index = stp->ls_stateid.other[2];
4352		}
4353	    }
4354	}
4355
4356	/*
4357	 * Yikes, highly unlikely, but I'll handle it anyhow.
4358	 */
4359	if (min_index == 0x80000000 && max_index == 0x80000001) {
4360	    canuse = 0;
4361	    /*
4362	     * Loop around until we find an unused entry. Return that
4363	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4364	     * (This is one of those rare cases where a goto is the
4365	     *  cleanest way to code the loop.)
4366	     */
4367tryagain:
4368	    for (i = 0; i < nfsrv_statehashsize; i++) {
4369		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4370		    if (stp->ls_stateid.other[2] == canuse) {
4371			canuse++;
4372			goto tryagain;
4373		    }
4374		}
4375	    }
4376	    clp->lc_flags |= LCL_INDEXNOTOK;
4377	    return (canuse);
4378	}
4379
4380	/*
4381	 * Ok to start again from min + 1.
4382	 */
4383	clp->lc_stateindex = min_index + 1;
4384	clp->lc_statemaxindex = max_index;
4385	clp->lc_flags &= ~LCL_INDEXNOTOK;
4386	return (clp->lc_stateindex);
4387}
4388
4389/*
4390 * The following functions handle the stable storage file that deals with
4391 * the edge conditions described in RFC3530 Sec. 8.6.3.
4392 * The file is as follows:
4393 * - a single record at the beginning that has the lease time of the
4394 *   previous server instance (before the last reboot) and the nfsrvboottime
4395 *   values for the previous server boots.
4396 *   These previous boot times are used to ensure that the current
4397 *   nfsrvboottime does not, somehow, get set to a previous one.
4398 *   (This is important so that Stale ClientIDs and StateIDs can
4399 *    be recognized.)
4400 *   The number of previous nfsvrboottime values preceeds the list.
4401 * - followed by some number of appended records with:
4402 *   - client id string
4403 *   - flag that indicates it is a record revoking state via lease
4404 *     expiration or similar
4405 *     OR has successfully acquired state.
4406 * These structures vary in length, with the client string at the end, up
4407 * to NFSV4_OPAQUELIMIT in size.
4408 *
4409 * At the end of the grace period, the file is truncated, the first
4410 * record is rewritten with updated information and any acquired state
4411 * records for successful reclaims of state are written.
4412 *
4413 * Subsequent records are appended when the first state is issued to
4414 * a client and when state is revoked for a client.
4415 *
4416 * When reading the file in, state issued records that come later in
4417 * the file override older ones, since the append log is in cronological order.
4418 * If, for some reason, the file can't be read, the grace period is
4419 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4420 */
4421
4422/*
4423 * Read in the stable storage file. Called by nfssvc() before the nfsd
4424 * processes start servicing requests.
4425 */
4426APPLESTATIC void
4427nfsrv_setupstable(NFSPROC_T *p)
4428{
4429	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4430	struct nfsrv_stable *sp, *nsp;
4431	struct nfst_rec *tsp;
4432	int error, i, tryagain;
4433	off_t off = 0;
4434	ssize_t aresid, len;
4435
4436	/*
4437	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4438	 * a reboot, so state has not been lost.
4439	 */
4440	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4441		return;
4442	/*
4443	 * Set Grace over just until the file reads successfully.
4444	 */
4445	nfsrvboottime = time_second;
4446	LIST_INIT(&sf->nsf_head);
4447	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4448	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4449	if (sf->nsf_fp == NULL)
4450		return;
4451	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4452	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4453	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4454	if (error || aresid || sf->nsf_numboots == 0 ||
4455		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4456		return;
4457
4458	/*
4459	 * Now, read in the boottimes.
4460	 */
4461	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4462		sizeof (time_t), M_TEMP, M_WAITOK);
4463	off = sizeof (struct nfsf_rec);
4464	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4465	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4466	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4467	if (error || aresid) {
4468		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4469		sf->nsf_bootvals = NULL;
4470		return;
4471	}
4472
4473	/*
4474	 * Make sure this nfsrvboottime is different from all recorded
4475	 * previous ones.
4476	 */
4477	do {
4478		tryagain = 0;
4479		for (i = 0; i < sf->nsf_numboots; i++) {
4480			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4481				nfsrvboottime++;
4482				tryagain = 1;
4483				break;
4484			}
4485		}
4486	} while (tryagain);
4487
4488	sf->nsf_flags |= NFSNSF_OK;
4489	off += (sf->nsf_numboots * sizeof (time_t));
4490
4491	/*
4492	 * Read through the file, building a list of records for grace
4493	 * checking.
4494	 * Each record is between sizeof (struct nfst_rec) and
4495	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4496	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4497	 */
4498	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4499		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4500	do {
4501	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4502	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4503	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4504	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4505	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4506		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4507		/*
4508		 * Yuck, the file has been corrupted, so just return
4509		 * after clearing out any restart state, so the grace period
4510		 * is over.
4511		 */
4512		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4513			LIST_REMOVE(sp, nst_list);
4514			free((caddr_t)sp, M_TEMP);
4515		}
4516		free((caddr_t)tsp, M_TEMP);
4517		sf->nsf_flags &= ~NFSNSF_OK;
4518		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4519		sf->nsf_bootvals = NULL;
4520		return;
4521	    }
4522	    if (len > 0) {
4523		off += sizeof (struct nfst_rec) + tsp->len - 1;
4524		/*
4525		 * Search the list for a matching client.
4526		 */
4527		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4528			if (tsp->len == sp->nst_len &&
4529			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4530				break;
4531		}
4532		if (sp == LIST_END(&sf->nsf_head)) {
4533			sp = (struct nfsrv_stable *)malloc(tsp->len +
4534				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4535				M_WAITOK);
4536			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4537				sizeof (struct nfst_rec) + tsp->len - 1);
4538			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4539		} else {
4540			if (tsp->flag == NFSNST_REVOKE)
4541				sp->nst_flag |= NFSNST_REVOKE;
4542			else
4543				/*
4544				 * A subsequent timestamp indicates the client
4545				 * did a setclientid/confirm and any previous
4546				 * revoke is no longer relevant.
4547				 */
4548				sp->nst_flag &= ~NFSNST_REVOKE;
4549		}
4550	    }
4551	} while (len > 0);
4552	free((caddr_t)tsp, M_TEMP);
4553	sf->nsf_flags = NFSNSF_OK;
4554	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4555		NFSRV_LEASEDELTA;
4556}
4557
4558/*
4559 * Update the stable storage file, now that the grace period is over.
4560 */
4561APPLESTATIC void
4562nfsrv_updatestable(NFSPROC_T *p)
4563{
4564	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4565	struct nfsrv_stable *sp, *nsp;
4566	int i;
4567	struct nfsvattr nva;
4568	vnode_t vp;
4569#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4570	mount_t mp = NULL;
4571#endif
4572	int error;
4573
4574	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4575		return;
4576	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4577	/*
4578	 * Ok, we need to rewrite the stable storage file.
4579	 * - truncate to 0 length
4580	 * - write the new first structure
4581	 * - loop through the data structures, writing out any that
4582	 *   have timestamps older than the old boot
4583	 */
4584	if (sf->nsf_bootvals) {
4585		sf->nsf_numboots++;
4586		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4587			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4588	} else {
4589		sf->nsf_numboots = 1;
4590		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4591			M_TEMP, M_WAITOK);
4592	}
4593	sf->nsf_bootvals[0] = nfsrvboottime;
4594	sf->nsf_lease = nfsrv_lease;
4595	NFSVNO_ATTRINIT(&nva);
4596	NFSVNO_SETATTRVAL(&nva, size, 0);
4597	vp = NFSFPVNODE(sf->nsf_fp);
4598	vn_start_write(vp, &mp, V_WAIT);
4599	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4600		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4601		    NULL);
4602		NFSVOPUNLOCK(vp, 0);
4603	} else
4604		error = EPERM;
4605	vn_finished_write(mp);
4606	if (!error)
4607	    error = NFSD_RDWR(UIO_WRITE, vp,
4608		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4609		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4610	if (!error)
4611	    error = NFSD_RDWR(UIO_WRITE, vp,
4612		(caddr_t)sf->nsf_bootvals,
4613		sf->nsf_numboots * sizeof (time_t),
4614		(off_t)(sizeof (struct nfsf_rec)),
4615		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4616	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4617	sf->nsf_bootvals = NULL;
4618	if (error) {
4619		sf->nsf_flags &= ~NFSNSF_OK;
4620		printf("EEK! Can't write NfsV4 stable storage file\n");
4621		return;
4622	}
4623	sf->nsf_flags |= NFSNSF_OK;
4624
4625	/*
4626	 * Loop through the list and write out timestamp records for
4627	 * any clients that successfully reclaimed state.
4628	 */
4629	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4630		if (sp->nst_flag & NFSNST_GOTSTATE) {
4631			nfsrv_writestable(sp->nst_client, sp->nst_len,
4632				NFSNST_NEWSTATE, p);
4633			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4634		}
4635		LIST_REMOVE(sp, nst_list);
4636		free((caddr_t)sp, M_TEMP);
4637	}
4638	nfsrv_backupstable();
4639}
4640
4641/*
4642 * Append a record to the stable storage file.
4643 */
4644APPLESTATIC void
4645nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4646{
4647	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4648	struct nfst_rec *sp;
4649	int error;
4650
4651	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4652		return;
4653	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4654		len - 1, M_TEMP, M_WAITOK);
4655	sp->len = len;
4656	NFSBCOPY(client, sp->client, len);
4657	sp->flag = flag;
4658	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4659	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4660	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4661	free((caddr_t)sp, M_TEMP);
4662	if (error) {
4663		sf->nsf_flags &= ~NFSNSF_OK;
4664		printf("EEK! Can't write NfsV4 stable storage file\n");
4665	}
4666}
4667
4668/*
4669 * This function is called during the grace period to mark a client
4670 * that successfully reclaimed state.
4671 */
4672static void
4673nfsrv_markstable(struct nfsclient *clp)
4674{
4675	struct nfsrv_stable *sp;
4676
4677	/*
4678	 * First find the client structure.
4679	 */
4680	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4681		if (sp->nst_len == clp->lc_idlen &&
4682		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4683			break;
4684	}
4685	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4686		return;
4687
4688	/*
4689	 * Now, just mark it and set the nfsclient back pointer.
4690	 */
4691	sp->nst_flag |= NFSNST_GOTSTATE;
4692	sp->nst_clp = clp;
4693}
4694
4695/*
4696 * This function is called for a reclaim, to see if it gets grace.
4697 * It returns 0 if a reclaim is allowed, 1 otherwise.
4698 */
4699static int
4700nfsrv_checkstable(struct nfsclient *clp)
4701{
4702	struct nfsrv_stable *sp;
4703
4704	/*
4705	 * First, find the entry for the client.
4706	 */
4707	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4708		if (sp->nst_len == clp->lc_idlen &&
4709		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4710			break;
4711	}
4712
4713	/*
4714	 * If not in the list, state was revoked or no state was issued
4715	 * since the previous reboot, a reclaim is denied.
4716	 */
4717	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4718	    (sp->nst_flag & NFSNST_REVOKE) ||
4719	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4720		return (1);
4721	return (0);
4722}
4723
4724/*
4725 * Test for and try to clear out a conflicting client. This is called by
4726 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4727 * a found.
4728 * The trick here is that it can't revoke a conflicting client with an
4729 * expired lease unless it holds the v4root lock, so...
4730 * If no v4root lock, get the lock and return 1 to indicate "try again".
4731 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4732 * the revocation worked and the conflicting client is "bye, bye", so it
4733 * can be tried again.
4734 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4735 * Unlocks State before a non-zero value is returned.
4736 */
4737static int
4738nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4739    NFSPROC_T *p)
4740{
4741	int gotlock, lktype = 0;
4742
4743	/*
4744	 * If lease hasn't expired, we can't fix it.
4745	 */
4746	if (clp->lc_expiry >= NFSD_MONOSEC ||
4747	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4748		return (0);
4749	if (*haslockp == 0) {
4750		NFSUNLOCKSTATE();
4751		if (vp != NULL) {
4752			lktype = NFSVOPISLOCKED(vp);
4753			NFSVOPUNLOCK(vp, 0);
4754		}
4755		NFSLOCKV4ROOTMUTEX();
4756		nfsv4_relref(&nfsv4rootfs_lock);
4757		do {
4758			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4759			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4760		} while (!gotlock);
4761		NFSUNLOCKV4ROOTMUTEX();
4762		*haslockp = 1;
4763		if (vp != NULL) {
4764			NFSVOPLOCK(vp, lktype | LK_RETRY);
4765			if ((vp->v_iflag & VI_DOOMED) != 0)
4766				return (2);
4767		}
4768		return (1);
4769	}
4770	NFSUNLOCKSTATE();
4771
4772	/*
4773	 * Ok, we can expire the conflicting client.
4774	 */
4775	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4776	nfsrv_backupstable();
4777	nfsrv_cleanclient(clp, p);
4778	nfsrv_freedeleglist(&clp->lc_deleg);
4779	nfsrv_freedeleglist(&clp->lc_olddeleg);
4780	LIST_REMOVE(clp, lc_hash);
4781	nfsrv_zapclient(clp, p);
4782	return (1);
4783}
4784
4785/*
4786 * Resolve a delegation conflict.
4787 * Returns 0 to indicate the conflict was resolved without sleeping.
4788 * Return -1 to indicate that the caller should check for conflicts again.
4789 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4790 *
4791 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4792 * for a return of 0, since there was no sleep and it could be required
4793 * later. It is released for a return of NFSERR_DELAY, since the caller
4794 * will return that error. It is released when a sleep was done waiting
4795 * for the delegation to be returned or expire (so that other nfsds can
4796 * handle ops). Then, it must be acquired for the write to stable storage.
4797 * (This function is somewhat similar to nfsrv_clientconflict(), but
4798 *  the semantics differ in a couple of subtle ways. The return of 0
4799 *  indicates the conflict was resolved without sleeping here, not
4800 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4801 *  differs, as noted above.)
4802 * Unlocks State before returning a non-zero value.
4803 */
4804static int
4805nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4806    vnode_t vp)
4807{
4808	struct nfsclient *clp = stp->ls_clp;
4809	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
4810	nfsv4stateid_t tstateid;
4811	fhandle_t tfh;
4812
4813	/*
4814	 * If the conflict is with an old delegation...
4815	 */
4816	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4817		/*
4818		 * You can delete it, if it has expired.
4819		 */
4820		if (clp->lc_delegtime < NFSD_MONOSEC) {
4821			nfsrv_freedeleg(stp);
4822			NFSUNLOCKSTATE();
4823			error = -1;
4824			goto out;
4825		}
4826		NFSUNLOCKSTATE();
4827		/*
4828		 * During this delay, the old delegation could expire or it
4829		 * could be recovered by the client via an Open with
4830		 * CLAIM_DELEGATE_PREV.
4831		 * Release the nfsv4root_lock, if held.
4832		 */
4833		if (*haslockp) {
4834			*haslockp = 0;
4835			NFSLOCKV4ROOTMUTEX();
4836			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4837			NFSUNLOCKV4ROOTMUTEX();
4838		}
4839		error = NFSERR_DELAY;
4840		goto out;
4841	}
4842
4843	/*
4844	 * It's a current delegation, so:
4845	 * - check to see if the delegation has expired
4846	 *   - if so, get the v4root lock and then expire it
4847	 */
4848	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4849		/*
4850		 * - do a recall callback, since not yet done
4851		 * For now, never allow truncate to be set. To use
4852		 * truncate safely, it must be guaranteed that the
4853		 * Remove, Rename or Setattr with size of 0 will
4854		 * succeed and that would require major changes to
4855		 * the VFS/Vnode OPs.
4856		 * Set the expiry time large enough so that it won't expire
4857		 * until after the callback, then set it correctly, once
4858		 * the callback is done. (The delegation will now time
4859		 * out whether or not the Recall worked ok. The timeout
4860		 * will be extended when ops are done on the delegation
4861		 * stateid, up to the timelimit.)
4862		 */
4863		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4864		    NFSRV_LEASEDELTA;
4865		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4866		    NFSRV_LEASEDELTA;
4867		stp->ls_flags |= NFSLCK_DELEGRECALL;
4868
4869		/*
4870		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4871		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4872		 * in order to try and avoid a race that could happen
4873		 * when a CBRecall request passed the Open reply with
4874		 * the delegation in it when transitting the network.
4875		 * Since nfsrv_docallback will sleep, don't use stp after
4876		 * the call.
4877		 */
4878		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4879		    sizeof (tstateid));
4880		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4881		    sizeof (tfh));
4882		NFSUNLOCKSTATE();
4883		if (*haslockp) {
4884			*haslockp = 0;
4885			NFSLOCKV4ROOTMUTEX();
4886			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4887			NFSUNLOCKV4ROOTMUTEX();
4888		}
4889		retrycnt = 0;
4890		do {
4891		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4892			&tstateid, 0, &tfh, NULL, NULL, p);
4893		    retrycnt++;
4894		} while ((error == NFSERR_BADSTATEID ||
4895		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4896		error = NFSERR_DELAY;
4897		goto out;
4898	}
4899
4900	if (clp->lc_expiry >= NFSD_MONOSEC &&
4901	    stp->ls_delegtime >= NFSD_MONOSEC) {
4902		NFSUNLOCKSTATE();
4903		/*
4904		 * A recall has been done, but it has not yet expired.
4905		 * So, RETURN_DELAY.
4906		 */
4907		if (*haslockp) {
4908			*haslockp = 0;
4909			NFSLOCKV4ROOTMUTEX();
4910			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4911			NFSUNLOCKV4ROOTMUTEX();
4912		}
4913		error = NFSERR_DELAY;
4914		goto out;
4915	}
4916
4917	/*
4918	 * If we don't yet have the lock, just get it and then return,
4919	 * since we need that before deleting expired state, such as
4920	 * this delegation.
4921	 * When getting the lock, unlock the vnode, so other nfsds that
4922	 * are in progress, won't get stuck waiting for the vnode lock.
4923	 */
4924	if (*haslockp == 0) {
4925		NFSUNLOCKSTATE();
4926		if (vp != NULL) {
4927			lktype = NFSVOPISLOCKED(vp);
4928			NFSVOPUNLOCK(vp, 0);
4929		}
4930		NFSLOCKV4ROOTMUTEX();
4931		nfsv4_relref(&nfsv4rootfs_lock);
4932		do {
4933			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4934			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4935		} while (!gotlock);
4936		NFSUNLOCKV4ROOTMUTEX();
4937		*haslockp = 1;
4938		if (vp != NULL) {
4939			NFSVOPLOCK(vp, lktype | LK_RETRY);
4940			if ((vp->v_iflag & VI_DOOMED) != 0) {
4941				*haslockp = 0;
4942				NFSLOCKV4ROOTMUTEX();
4943				nfsv4_unlock(&nfsv4rootfs_lock, 1);
4944				NFSUNLOCKV4ROOTMUTEX();
4945				error = NFSERR_PERM;
4946				goto out;
4947			}
4948		}
4949		error = -1;
4950		goto out;
4951	}
4952
4953	NFSUNLOCKSTATE();
4954	/*
4955	 * Ok, we can delete the expired delegation.
4956	 * First, write the Revoke record to stable storage and then
4957	 * clear out the conflict.
4958	 * Since all other nfsd threads are now blocked, we can safely
4959	 * sleep without the state changing.
4960	 */
4961	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4962	nfsrv_backupstable();
4963	if (clp->lc_expiry < NFSD_MONOSEC) {
4964		nfsrv_cleanclient(clp, p);
4965		nfsrv_freedeleglist(&clp->lc_deleg);
4966		nfsrv_freedeleglist(&clp->lc_olddeleg);
4967		LIST_REMOVE(clp, lc_hash);
4968		zapped_clp = 1;
4969	} else {
4970		nfsrv_freedeleg(stp);
4971		zapped_clp = 0;
4972	}
4973	if (zapped_clp)
4974		nfsrv_zapclient(clp, p);
4975	error = -1;
4976
4977out:
4978	NFSEXITCODE(error);
4979	return (error);
4980}
4981
4982/*
4983 * Check for a remove allowed, if remove is set to 1 and get rid of
4984 * delegations.
4985 */
4986APPLESTATIC int
4987nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4988{
4989	struct nfsstate *stp;
4990	struct nfslockfile *lfp;
4991	int error, haslock = 0;
4992	fhandle_t nfh;
4993
4994	/*
4995	 * First, get the lock file structure.
4996	 * (A return of -1 means no associated state, so remove ok.)
4997	 */
4998	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4999tryagain:
5000	NFSLOCKSTATE();
5001	if (!error)
5002		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5003	if (error) {
5004		NFSUNLOCKSTATE();
5005		if (haslock) {
5006			NFSLOCKV4ROOTMUTEX();
5007			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5008			NFSUNLOCKV4ROOTMUTEX();
5009		}
5010		if (error == -1)
5011			error = 0;
5012		goto out;
5013	}
5014
5015	/*
5016	 * Now, we must Recall any delegations.
5017	 */
5018	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
5019	if (error) {
5020		/*
5021		 * nfsrv_cleandeleg() unlocks state for non-zero
5022		 * return.
5023		 */
5024		if (error == -1)
5025			goto tryagain;
5026		if (haslock) {
5027			NFSLOCKV4ROOTMUTEX();
5028			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5029			NFSUNLOCKV4ROOTMUTEX();
5030		}
5031		goto out;
5032	}
5033
5034	/*
5035	 * Now, look for a conflicting open share.
5036	 */
5037	if (remove) {
5038		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5039			if (stp->ls_flags & NFSLCK_WRITEDENY) {
5040				error = NFSERR_FILEOPEN;
5041				break;
5042			}
5043		}
5044	}
5045
5046	NFSUNLOCKSTATE();
5047	if (haslock) {
5048		NFSLOCKV4ROOTMUTEX();
5049		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5050		NFSUNLOCKV4ROOTMUTEX();
5051	}
5052
5053out:
5054	NFSEXITCODE(error);
5055	return (error);
5056}
5057
5058/*
5059 * Clear out all delegations for the file referred to by lfp.
5060 * May return NFSERR_DELAY, if there will be a delay waiting for
5061 * delegations to expire.
5062 * Returns -1 to indicate it slept while recalling a delegation.
5063 * This function has the side effect of deleting the nfslockfile structure,
5064 * if it no longer has associated state and didn't have to sleep.
5065 * Unlocks State before a non-zero value is returned.
5066 */
5067static int
5068nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5069    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5070{
5071	struct nfsstate *stp, *nstp;
5072	int ret = 0;
5073
5074	stp = LIST_FIRST(&lfp->lf_deleg);
5075	while (stp != LIST_END(&lfp->lf_deleg)) {
5076		nstp = LIST_NEXT(stp, ls_file);
5077		if (stp->ls_clp != clp) {
5078			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5079			if (ret) {
5080				/*
5081				 * nfsrv_delegconflict() unlocks state
5082				 * when it returns non-zero.
5083				 */
5084				goto out;
5085			}
5086		}
5087		stp = nstp;
5088	}
5089out:
5090	NFSEXITCODE(ret);
5091	return (ret);
5092}
5093
5094/*
5095 * There are certain operations that, when being done outside of NFSv4,
5096 * require that any NFSv4 delegation for the file be recalled.
5097 * This function is to be called for those cases:
5098 * VOP_RENAME() - When a delegation is being recalled for any reason,
5099 *	the client may have to do Opens against the server, using the file's
5100 *	final component name. If the file has been renamed on the server,
5101 *	that component name will be incorrect and the Open will fail.
5102 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5103 *	been removed on the server, if there is a delegation issued to
5104 *	that client for the file. I say "theoretically" since clients
5105 *	normally do an Access Op before the Open and that Access Op will
5106 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5107 *	they will detect the file's removal in the same manner. (There is
5108 *	one case where RFC3530 allows a client to do an Open without first
5109 *	doing an Access Op, which is passage of a check against the ACE
5110 *	returned with a Write delegation, but current practice is to ignore
5111 *	the ACE and always do an Access Op.)
5112 *	Since the functions can only be called with an unlocked vnode, this
5113 *	can't be done at this time.
5114 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5115 *	locks locally in the client, which are not visible to the server. To
5116 *	deal with this, issuing of delegations for a vnode must be disabled
5117 *	and all delegations for the vnode recalled. This is done via the
5118 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5119 */
5120APPLESTATIC void
5121nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5122{
5123	time_t starttime;
5124	int error;
5125
5126	/*
5127	 * First, check to see if the server is currently running and it has
5128	 * been called for a regular file when issuing delegations.
5129	 */
5130	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5131	    nfsrv_issuedelegs == 0)
5132		return;
5133
5134	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5135	/*
5136	 * First, get a reference on the nfsv4rootfs_lock so that an
5137	 * exclusive lock cannot be acquired by another thread.
5138	 */
5139	NFSLOCKV4ROOTMUTEX();
5140	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5141	NFSUNLOCKV4ROOTMUTEX();
5142
5143	/*
5144	 * Now, call nfsrv_checkremove() in a loop while it returns
5145	 * NFSERR_DELAY. Return upon any other error or when timed out.
5146	 */
5147	starttime = NFSD_MONOSEC;
5148	do {
5149		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5150			error = nfsrv_checkremove(vp, 0, p);
5151			NFSVOPUNLOCK(vp, 0);
5152		} else
5153			error = EPERM;
5154		if (error == NFSERR_DELAY) {
5155			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5156				break;
5157			/* Sleep for a short period of time */
5158			(void) nfs_catnap(PZERO, 0, "nfsremove");
5159		}
5160	} while (error == NFSERR_DELAY);
5161	NFSLOCKV4ROOTMUTEX();
5162	nfsv4_relref(&nfsv4rootfs_lock);
5163	NFSUNLOCKV4ROOTMUTEX();
5164}
5165
5166APPLESTATIC void
5167nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5168{
5169
5170#ifdef VV_DISABLEDELEG
5171	/*
5172	 * First, flag issuance of delegations disabled.
5173	 */
5174	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5175#endif
5176
5177	/*
5178	 * Then call nfsd_recalldelegation() to get rid of all extant
5179	 * delegations.
5180	 */
5181	nfsd_recalldelegation(vp, p);
5182}
5183
5184/*
5185 * Check for conflicting locks, etc. and then get rid of delegations.
5186 * (At one point I thought that I should get rid of delegations for any
5187 *  Setattr, since it could potentially disallow the I/O op (read or write)
5188 *  allowed by the delegation. However, Setattr Ops that aren't changing
5189 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5190 *  for the same client or a different one, so I decided to only get rid
5191 *  of delegations for other clients when the size is being changed.)
5192 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5193 * as Write backs, even if there is no delegation, so it really isn't any
5194 * different?)
5195 */
5196APPLESTATIC int
5197nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5198    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5199    struct nfsexstuff *exp, NFSPROC_T *p)
5200{
5201	struct nfsstate st, *stp = &st;
5202	struct nfslock lo, *lop = &lo;
5203	int error = 0;
5204	nfsquad_t clientid;
5205
5206	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5207		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5208		lop->lo_first = nvap->na_size;
5209	} else {
5210		stp->ls_flags = 0;
5211		lop->lo_first = 0;
5212	}
5213	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5214	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5215	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5216	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5217		stp->ls_flags |= NFSLCK_SETATTR;
5218	if (stp->ls_flags == 0)
5219		goto out;
5220	lop->lo_end = NFS64BITSSET;
5221	lop->lo_flags = NFSLCK_WRITE;
5222	stp->ls_ownerlen = 0;
5223	stp->ls_op = NULL;
5224	stp->ls_uid = nd->nd_cred->cr_uid;
5225	stp->ls_stateid.seqid = stateidp->seqid;
5226	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5227	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5228	stp->ls_stateid.other[2] = stateidp->other[2];
5229	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5230	    stateidp, exp, nd, p);
5231
5232out:
5233	NFSEXITCODE2(error, nd);
5234	return (error);
5235}
5236
5237/*
5238 * Check for a write delegation and do a CBGETATTR if there is one, updating
5239 * the attributes, as required.
5240 * Should I return an error if I can't get the attributes? (For now, I'll
5241 * just return ok.
5242 */
5243APPLESTATIC int
5244nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5245    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5246    NFSPROC_T *p)
5247{
5248	struct nfsstate *stp;
5249	struct nfslockfile *lfp;
5250	struct nfsclient *clp;
5251	struct nfsvattr nva;
5252	fhandle_t nfh;
5253	int error = 0;
5254	nfsattrbit_t cbbits;
5255	u_quad_t delegfilerev;
5256
5257	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5258	if (!NFSNONZERO_ATTRBIT(&cbbits))
5259		goto out;
5260
5261	/*
5262	 * Get the lock file structure.
5263	 * (A return of -1 means no associated state, so return ok.)
5264	 */
5265	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5266	NFSLOCKSTATE();
5267	if (!error)
5268		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5269	if (error) {
5270		NFSUNLOCKSTATE();
5271		if (error == -1)
5272			error = 0;
5273		goto out;
5274	}
5275
5276	/*
5277	 * Now, look for a write delegation.
5278	 */
5279	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5280		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5281			break;
5282	}
5283	if (stp == LIST_END(&lfp->lf_deleg)) {
5284		NFSUNLOCKSTATE();
5285		goto out;
5286	}
5287	clp = stp->ls_clp;
5288	delegfilerev = stp->ls_filerev;
5289
5290	/*
5291	 * If the Write delegation was issued as a part of this Compound RPC
5292	 * or if we have an Implied Clientid (used in a previous Op in this
5293	 * compound) and it is the client the delegation was issued to,
5294	 * just return ok.
5295	 * I also assume that it is from the same client iff the network
5296	 * host IP address is the same as the callback address. (Not
5297	 * exactly correct by the RFC, but avoids a lot of Getattr
5298	 * callbacks.)
5299	 */
5300	if (nd->nd_compref == stp->ls_compref ||
5301	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5302	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5303	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5304		NFSUNLOCKSTATE();
5305		goto out;
5306	}
5307
5308	/*
5309	 * We are now done with the delegation state structure,
5310	 * so the statelock can be released and we can now tsleep().
5311	 */
5312
5313	/*
5314	 * Now, we must do the CB Getattr callback, to see if Change or Size
5315	 * has changed.
5316	 */
5317	if (clp->lc_expiry >= NFSD_MONOSEC) {
5318		NFSUNLOCKSTATE();
5319		NFSVNO_ATTRINIT(&nva);
5320		nva.na_filerev = NFS64BITSSET;
5321		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5322		    0, &nfh, &nva, &cbbits, p);
5323		if (!error) {
5324			if ((nva.na_filerev != NFS64BITSSET &&
5325			    nva.na_filerev > delegfilerev) ||
5326			    (NFSVNO_ISSETSIZE(&nva) &&
5327			     nva.na_size != nvap->na_size)) {
5328				error = nfsvno_updfilerev(vp, nvap, cred, p);
5329				if (NFSVNO_ISSETSIZE(&nva))
5330					nvap->na_size = nva.na_size;
5331			}
5332		} else
5333			error = 0;	/* Ignore callback errors for now. */
5334	} else {
5335		NFSUNLOCKSTATE();
5336	}
5337
5338out:
5339	NFSEXITCODE2(error, nd);
5340	return (error);
5341}
5342
5343/*
5344 * This function looks for openowners that haven't had any opens for
5345 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5346 * is set.
5347 */
5348APPLESTATIC void
5349nfsrv_throwawayopens(NFSPROC_T *p)
5350{
5351	struct nfsclient *clp, *nclp;
5352	struct nfsstate *stp, *nstp;
5353	int i;
5354
5355	NFSLOCKSTATE();
5356	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5357	/*
5358	 * For each client...
5359	 */
5360	for (i = 0; i < nfsrv_clienthashsize; i++) {
5361	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5362		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5363			if (LIST_EMPTY(&stp->ls_open) &&
5364			    (stp->ls_noopens > NFSNOOPEN ||
5365			     (nfsrv_openpluslock * 2) >
5366			     nfsrv_v4statelimit))
5367				nfsrv_freeopenowner(stp, 0, p);
5368		}
5369	    }
5370	}
5371	NFSUNLOCKSTATE();
5372}
5373
5374/*
5375 * This function checks to see if the credentials are the same.
5376 * Returns 1 for not same, 0 otherwise.
5377 */
5378static int
5379nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5380{
5381
5382	if (nd->nd_flag & ND_GSS) {
5383		if (!(clp->lc_flags & LCL_GSS))
5384			return (1);
5385		if (clp->lc_flags & LCL_NAME) {
5386			if (nd->nd_princlen != clp->lc_namelen ||
5387			    NFSBCMP(nd->nd_principal, clp->lc_name,
5388				clp->lc_namelen))
5389				return (1);
5390			else
5391				return (0);
5392		}
5393		if (nd->nd_cred->cr_uid == clp->lc_uid)
5394			return (0);
5395		else
5396			return (1);
5397	} else if (clp->lc_flags & LCL_GSS)
5398		return (1);
5399	/*
5400	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5401	 * in RFC3530, which talks about principals, but doesn't say anything
5402	 * about uids for AUTH_SYS.)
5403	 */
5404	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5405		return (0);
5406	else
5407		return (1);
5408}
5409
5410/*
5411 * Calculate the lease expiry time.
5412 */
5413static time_t
5414nfsrv_leaseexpiry(void)
5415{
5416
5417	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5418		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5419	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5420}
5421
5422/*
5423 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5424 */
5425static void
5426nfsrv_delaydelegtimeout(struct nfsstate *stp)
5427{
5428
5429	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5430		return;
5431
5432	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5433	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5434		stp->ls_delegtime += nfsrv_lease;
5435		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5436			stp->ls_delegtime = stp->ls_delegtimelimit;
5437	}
5438}
5439
5440/*
5441 * This function checks to see if there is any other state associated
5442 * with the openowner for this Open.
5443 * It returns 1 if there is no other state, 0 otherwise.
5444 */
5445static int
5446nfsrv_nootherstate(struct nfsstate *stp)
5447{
5448	struct nfsstate *tstp;
5449
5450	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5451		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5452			return (0);
5453	}
5454	return (1);
5455}
5456
5457/*
5458 * Create a list of lock deltas (changes to local byte range locking
5459 * that can be rolled back using the list) and apply the changes via
5460 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5461 * the rollback or update function will be called after this.
5462 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5463 * call fails. If it returns an error, it will unlock the list.
5464 */
5465static int
5466nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5467    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5468{
5469	struct nfslock *lop, *nlop;
5470	int error = 0;
5471
5472	/* Loop through the list of locks. */
5473	lop = LIST_FIRST(&lfp->lf_locallock);
5474	while (first < end && lop != NULL) {
5475		nlop = LIST_NEXT(lop, lo_lckowner);
5476		if (first >= lop->lo_end) {
5477			/* not there yet */
5478			lop = nlop;
5479		} else if (first < lop->lo_first) {
5480			/* new one starts before entry in list */
5481			if (end <= lop->lo_first) {
5482				/* no overlap between old and new */
5483				error = nfsrv_dolocal(vp, lfp, flags,
5484				    NFSLCK_UNLOCK, first, end, cfp, p);
5485				if (error != 0)
5486					break;
5487				first = end;
5488			} else {
5489				/* handle fragment overlapped with new one */
5490				error = nfsrv_dolocal(vp, lfp, flags,
5491				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5492				    p);
5493				if (error != 0)
5494					break;
5495				first = lop->lo_first;
5496			}
5497		} else {
5498			/* new one overlaps this entry in list */
5499			if (end <= lop->lo_end) {
5500				/* overlaps all of new one */
5501				error = nfsrv_dolocal(vp, lfp, flags,
5502				    lop->lo_flags, first, end, cfp, p);
5503				if (error != 0)
5504					break;
5505				first = end;
5506			} else {
5507				/* handle fragment overlapped with new one */
5508				error = nfsrv_dolocal(vp, lfp, flags,
5509				    lop->lo_flags, first, lop->lo_end, cfp, p);
5510				if (error != 0)
5511					break;
5512				first = lop->lo_end;
5513				lop = nlop;
5514			}
5515		}
5516	}
5517	if (first < end && error == 0)
5518		/* handle fragment past end of list */
5519		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5520		    end, cfp, p);
5521
5522	NFSEXITCODE(error);
5523	return (error);
5524}
5525
5526/*
5527 * Local lock unlock. Unlock all byte ranges that are no longer locked
5528 * by NFSv4. To do this, unlock any subranges of first-->end that
5529 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5530 * list. This list has all locks for the file held by other
5531 * <clientid, lockowner> tuples. The list is ordered by increasing
5532 * lo_first value, but may have entries that overlap each other, for
5533 * the case of read locks.
5534 */
5535static void
5536nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5537    uint64_t init_end, NFSPROC_T *p)
5538{
5539	struct nfslock *lop;
5540	uint64_t first, end, prevfirst;
5541
5542	first = init_first;
5543	end = init_end;
5544	while (first < init_end) {
5545		/* Loop through all nfs locks, adjusting first and end */
5546		prevfirst = 0;
5547		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5548			KASSERT(prevfirst <= lop->lo_first,
5549			    ("nfsv4 locks out of order"));
5550			KASSERT(lop->lo_first < lop->lo_end,
5551			    ("nfsv4 bogus lock"));
5552			prevfirst = lop->lo_first;
5553			if (first >= lop->lo_first &&
5554			    first < lop->lo_end)
5555				/*
5556				 * Overlaps with initial part, so trim
5557				 * off that initial part by moving first past
5558				 * it.
5559				 */
5560				first = lop->lo_end;
5561			else if (end > lop->lo_first &&
5562			    lop->lo_first > first) {
5563				/*
5564				 * This lock defines the end of the
5565				 * segment to unlock, so set end to the
5566				 * start of it and break out of the loop.
5567				 */
5568				end = lop->lo_first;
5569				break;
5570			}
5571			if (first >= end)
5572				/*
5573				 * There is no segment left to do, so
5574				 * break out of this loop and then exit
5575				 * the outer while() since first will be set
5576				 * to end, which must equal init_end here.
5577				 */
5578				break;
5579		}
5580		if (first < end) {
5581			/* Unlock this segment */
5582			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5583			    NFSLCK_READ, first, end, NULL, p);
5584			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5585			    first, end);
5586		}
5587		/*
5588		 * Now move past this segment and look for any further
5589		 * segment in the range, if there is one.
5590		 */
5591		first = end;
5592		end = init_end;
5593	}
5594}
5595
5596/*
5597 * Do the local lock operation and update the rollback list, as required.
5598 * Perform the rollback and return the error if nfsvno_advlock() fails.
5599 */
5600static int
5601nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5602    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5603{
5604	struct nfsrollback *rlp;
5605	int error = 0, ltype, oldltype;
5606
5607	if (flags & NFSLCK_WRITE)
5608		ltype = F_WRLCK;
5609	else if (flags & NFSLCK_READ)
5610		ltype = F_RDLCK;
5611	else
5612		ltype = F_UNLCK;
5613	if (oldflags & NFSLCK_WRITE)
5614		oldltype = F_WRLCK;
5615	else if (oldflags & NFSLCK_READ)
5616		oldltype = F_RDLCK;
5617	else
5618		oldltype = F_UNLCK;
5619	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5620		/* nothing to do */
5621		goto out;
5622	error = nfsvno_advlock(vp, ltype, first, end, p);
5623	if (error != 0) {
5624		if (cfp != NULL) {
5625			cfp->cl_clientid.lval[0] = 0;
5626			cfp->cl_clientid.lval[1] = 0;
5627			cfp->cl_first = 0;
5628			cfp->cl_end = NFS64BITSSET;
5629			cfp->cl_flags = NFSLCK_WRITE;
5630			cfp->cl_ownerlen = 5;
5631			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5632		}
5633		nfsrv_locallock_rollback(vp, lfp, p);
5634	} else if (ltype != F_UNLCK) {
5635		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5636		    M_WAITOK);
5637		rlp->rlck_first = first;
5638		rlp->rlck_end = end;
5639		rlp->rlck_type = oldltype;
5640		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5641	}
5642
5643out:
5644	NFSEXITCODE(error);
5645	return (error);
5646}
5647
5648/*
5649 * Roll back local lock changes and free up the rollback list.
5650 */
5651static void
5652nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5653{
5654	struct nfsrollback *rlp, *nrlp;
5655
5656	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5657		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5658		    rlp->rlck_end, p);
5659		free(rlp, M_NFSDROLLBACK);
5660	}
5661	LIST_INIT(&lfp->lf_rollback);
5662}
5663
5664/*
5665 * Update local lock list and delete rollback list (ie now committed to the
5666 * local locks). Most of the work is done by the internal function.
5667 */
5668static void
5669nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5670    uint64_t end)
5671{
5672	struct nfsrollback *rlp, *nrlp;
5673	struct nfslock *new_lop, *other_lop;
5674
5675	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5676	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5677		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5678		    M_WAITOK);
5679	else
5680		other_lop = NULL;
5681	new_lop->lo_flags = flags;
5682	new_lop->lo_first = first;
5683	new_lop->lo_end = end;
5684	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5685	if (new_lop != NULL)
5686		free(new_lop, M_NFSDLOCK);
5687	if (other_lop != NULL)
5688		free(other_lop, M_NFSDLOCK);
5689
5690	/* and get rid of the rollback list */
5691	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5692		free(rlp, M_NFSDROLLBACK);
5693	LIST_INIT(&lfp->lf_rollback);
5694}
5695
5696/*
5697 * Lock the struct nfslockfile for local lock updating.
5698 */
5699static void
5700nfsrv_locklf(struct nfslockfile *lfp)
5701{
5702	int gotlock;
5703
5704	/* lf_usecount ensures *lfp won't be free'd */
5705	lfp->lf_usecount++;
5706	do {
5707		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5708		    NFSSTATEMUTEXPTR, NULL);
5709	} while (gotlock == 0);
5710	lfp->lf_usecount--;
5711}
5712
5713/*
5714 * Unlock the struct nfslockfile after local lock updating.
5715 */
5716static void
5717nfsrv_unlocklf(struct nfslockfile *lfp)
5718{
5719
5720	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5721}
5722
5723/*
5724 * Clear out all state for the NFSv4 server.
5725 * Must be called by a thread that can sleep when no nfsds are running.
5726 */
5727void
5728nfsrv_throwawayallstate(NFSPROC_T *p)
5729{
5730	struct nfsclient *clp, *nclp;
5731	struct nfslockfile *lfp, *nlfp;
5732	int i;
5733
5734	/*
5735	 * For each client, clean out the state and then free the structure.
5736	 */
5737	for (i = 0; i < nfsrv_clienthashsize; i++) {
5738		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5739			nfsrv_cleanclient(clp, p);
5740			nfsrv_freedeleglist(&clp->lc_deleg);
5741			nfsrv_freedeleglist(&clp->lc_olddeleg);
5742			free(clp->lc_stateid, M_NFSDCLIENT);
5743			free(clp, M_NFSDCLIENT);
5744		}
5745	}
5746
5747	/*
5748	 * Also, free up any remaining lock file structures.
5749	 */
5750	for (i = 0; i < nfsrv_lockhashsize; i++) {
5751		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5752			printf("nfsd unload: fnd a lock file struct\n");
5753			nfsrv_freenfslockfile(lfp);
5754		}
5755	}
5756}
5757
5758/*
5759 * Check the sequence# for the session and slot provided as an argument.
5760 * Also, renew the lease if the session will return NFS_OK.
5761 */
5762int
5763nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
5764    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
5765    uint32_t *sflagsp, NFSPROC_T *p)
5766{
5767	struct nfsdsession *sep;
5768	struct nfssessionhash *shp;
5769	int error;
5770	SVCXPRT *savxprt;
5771
5772	shp = NFSSESSIONHASH(nd->nd_sessionid);
5773	NFSLOCKSESSION(shp);
5774	sep = nfsrv_findsession(nd->nd_sessionid);
5775	if (sep == NULL) {
5776		NFSUNLOCKSESSION(shp);
5777		return (NFSERR_BADSESSION);
5778	}
5779	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
5780	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
5781	if (error != 0) {
5782		NFSUNLOCKSESSION(shp);
5783		return (error);
5784	}
5785	if (cache_this != 0)
5786		nd->nd_flag |= ND_SAVEREPLY;
5787	/* Renew the lease. */
5788	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
5789	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
5790	nd->nd_flag |= ND_IMPLIEDCLID;
5791
5792	/*
5793	 * If this session handles the backchannel, save the nd_xprt for this
5794	 * RPC, since this is the one being used.
5795	 */
5796	if (sep->sess_clp->lc_req.nr_client != NULL &&
5797	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
5798		savxprt = sep->sess_cbsess.nfsess_xprt;
5799		SVC_ACQUIRE(nd->nd_xprt);
5800		nd->nd_xprt->xp_p2 =
5801		    sep->sess_clp->lc_req.nr_client->cl_private;
5802		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
5803		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
5804		if (savxprt != NULL)
5805			SVC_RELEASE(savxprt);
5806	}
5807
5808	*sflagsp = 0;
5809	if (sep->sess_clp->lc_req.nr_client == NULL)
5810		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
5811	NFSUNLOCKSESSION(shp);
5812	if (error == NFSERR_EXPIRED) {
5813		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
5814		error = 0;
5815	} else if (error == NFSERR_ADMINREVOKED) {
5816		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
5817		error = 0;
5818	}
5819	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
5820	return (0);
5821}
5822
5823/*
5824 * Check/set reclaim complete for this session/clientid.
5825 */
5826int
5827nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
5828{
5829	struct nfsdsession *sep;
5830	struct nfssessionhash *shp;
5831	int error = 0;
5832
5833	shp = NFSSESSIONHASH(nd->nd_sessionid);
5834	NFSLOCKSTATE();
5835	NFSLOCKSESSION(shp);
5836	sep = nfsrv_findsession(nd->nd_sessionid);
5837	if (sep == NULL) {
5838		NFSUNLOCKSESSION(shp);
5839		NFSUNLOCKSTATE();
5840		return (NFSERR_BADSESSION);
5841	}
5842
5843	/* Check to see if reclaim complete has already happened. */
5844	if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
5845		error = NFSERR_COMPLETEALREADY;
5846	else
5847		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
5848	NFSUNLOCKSESSION(shp);
5849	NFSUNLOCKSTATE();
5850	return (error);
5851}
5852
5853/*
5854 * Cache the reply in a session slot.
5855 */
5856void
5857nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
5858   struct mbuf **m)
5859{
5860	struct nfsdsession *sep;
5861	struct nfssessionhash *shp;
5862
5863	shp = NFSSESSIONHASH(sessionid);
5864	NFSLOCKSESSION(shp);
5865	sep = nfsrv_findsession(sessionid);
5866	if (sep == NULL) {
5867		NFSUNLOCKSESSION(shp);
5868		printf("nfsrv_cache_session: no session\n");
5869		m_freem(*m);
5870		return;
5871	}
5872	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
5873	NFSUNLOCKSESSION(shp);
5874}
5875
5876/*
5877 * Search for a session that matches the sessionid.
5878 */
5879static struct nfsdsession *
5880nfsrv_findsession(uint8_t *sessionid)
5881{
5882	struct nfsdsession *sep;
5883	struct nfssessionhash *shp;
5884
5885	shp = NFSSESSIONHASH(sessionid);
5886	LIST_FOREACH(sep, &shp->list, sess_hash) {
5887		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
5888			break;
5889	}
5890	return (sep);
5891}
5892
5893/*
5894 * Destroy a session.
5895 */
5896int
5897nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
5898{
5899	int error, samesess;
5900
5901	samesess = 0;
5902	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
5903		samesess = 1;
5904		if ((nd->nd_flag & ND_LASTOP) == 0)
5905			return (NFSERR_BADSESSION);
5906	}
5907	error = nfsrv_freesession(NULL, sessionid);
5908	if (error == 0 && samesess != 0)
5909		nd->nd_flag &= ~ND_HASSEQUENCE;
5910	return (error);
5911}
5912
5913/*
5914 * Free up a session structure.
5915 */
5916static int
5917nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
5918{
5919	struct nfssessionhash *shp;
5920	int i;
5921
5922	NFSLOCKSTATE();
5923	if (sep == NULL) {
5924		shp = NFSSESSIONHASH(sessionid);
5925		NFSLOCKSESSION(shp);
5926		sep = nfsrv_findsession(sessionid);
5927	} else {
5928		shp = NFSSESSIONHASH(sep->sess_sessionid);
5929		NFSLOCKSESSION(shp);
5930	}
5931	if (sep != NULL) {
5932		sep->sess_refcnt--;
5933		if (sep->sess_refcnt > 0) {
5934			NFSUNLOCKSESSION(shp);
5935			NFSUNLOCKSTATE();
5936			return (0);
5937		}
5938		LIST_REMOVE(sep, sess_hash);
5939		LIST_REMOVE(sep, sess_list);
5940	}
5941	NFSUNLOCKSESSION(shp);
5942	NFSUNLOCKSTATE();
5943	if (sep == NULL)
5944		return (NFSERR_BADSESSION);
5945	for (i = 0; i < NFSV4_SLOTS; i++)
5946		if (sep->sess_slots[i].nfssl_reply != NULL)
5947			m_freem(sep->sess_slots[i].nfssl_reply);
5948	if (sep->sess_cbsess.nfsess_xprt != NULL)
5949		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
5950	free(sep, M_NFSDSESSION);
5951	return (0);
5952}
5953
5954/*
5955 * Free a stateid.
5956 * RFC5661 says that it should fail when there are associated opens, locks
5957 * or delegations. Since stateids represent opens, I don't see how you can
5958 * free an open stateid (it will be free'd when closed), so this function
5959 * only works for lock stateids (freeing the lock_owner) or delegations.
5960 */
5961int
5962nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5963    NFSPROC_T *p)
5964{
5965	struct nfsclient *clp;
5966	struct nfsstate *stp;
5967	int error;
5968
5969	NFSLOCKSTATE();
5970	/*
5971	 * Look up the stateid
5972	 */
5973	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
5974	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
5975	if (error == 0) {
5976		/* First, check for a delegation. */
5977		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
5978			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
5979			    NFSX_STATEIDOTHER))
5980				break;
5981		}
5982		if (stp != NULL) {
5983			nfsrv_freedeleg(stp);
5984			NFSUNLOCKSTATE();
5985			return (error);
5986		}
5987	}
5988	/* Not a delegation, try for a lock_owner. */
5989	if (error == 0)
5990		error = nfsrv_getstate(clp, stateidp, 0, &stp);
5991	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
5992	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
5993		/* Not a lock_owner stateid. */
5994		error = NFSERR_LOCKSHELD;
5995	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
5996		error = NFSERR_LOCKSHELD;
5997	if (error == 0)
5998		nfsrv_freelockowner(stp, NULL, 0, p);
5999	NFSUNLOCKSTATE();
6000	return (error);
6001}
6002
6003/*
6004 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6005 */
6006static int
6007nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6008    int dont_replycache, struct nfsdsession **sepp)
6009{
6010	struct nfsdsession *sep;
6011	uint32_t *tl, slotseq = 0;
6012	int maxslot, slotpos;
6013	uint8_t sessionid[NFSX_V4SESSIONID];
6014	int error;
6015
6016	error = nfsv4_getcbsession(clp, sepp);
6017	if (error != 0)
6018		return (error);
6019	sep = *sepp;
6020	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
6021	    &slotseq, sessionid);
6022	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6023
6024	/* Build the Sequence arguments. */
6025	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6026	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6027	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6028	nd->nd_slotseq = tl;
6029	*tl++ = txdr_unsigned(slotseq);
6030	*tl++ = txdr_unsigned(slotpos);
6031	*tl++ = txdr_unsigned(maxslot);
6032	if (dont_replycache == 0)
6033		*tl++ = newnfs_true;
6034	else
6035		*tl++ = newnfs_false;
6036	*tl = 0;			/* No referring call list, for now. */
6037	nd->nd_flag |= ND_HASSEQUENCE;
6038	return (0);
6039}
6040
6041/*
6042 * Get a session for the callback.
6043 */
6044static int
6045nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6046{
6047	struct nfsdsession *sep;
6048
6049	NFSLOCKSTATE();
6050	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6051		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6052			break;
6053	}
6054	if (sep == NULL) {
6055		NFSUNLOCKSTATE();
6056		return (NFSERR_BADSESSION);
6057	}
6058	sep->sess_refcnt++;
6059	*sepp = sep;
6060	NFSUNLOCKSTATE();
6061	return (0);
6062}
6063
6064/*
6065 * Free up all backchannel xprts.  This needs to be done when the nfsd threads
6066 * exit, since those transports will all be going away.
6067 * This is only called after all the nfsd threads are done performing RPCs,
6068 * so locking shouldn't be an issue.
6069 */
6070APPLESTATIC void
6071nfsrv_freeallbackchannel_xprts(void)
6072{
6073	struct nfsdsession *sep;
6074	struct nfsclient *clp;
6075	SVCXPRT *xprt;
6076	int i;
6077
6078	for (i = 0; i < nfsrv_clienthashsize; i++) {
6079		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
6080			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6081				xprt = sep->sess_cbsess.nfsess_xprt;
6082				sep->sess_cbsess.nfsess_xprt = NULL;
6083				if (xprt != NULL)
6084					SVC_RELEASE(xprt);
6085			}
6086		}
6087	}
6088}
6089
6090