nfs_nfsdstate.c revision 284318
1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 284318 2015-06-12 13:42:53Z rmacklem $");
30
31#ifndef APPLEKEXT
32#include <fs/nfs/nfsport.h>
33
34struct nfsrv_stablefirst nfsrv_stablefirst;
35int nfsrv_issuedelegs = 0;
36int nfsrv_dolocallocks = 0;
37struct nfsv4lock nfsv4rootfs_lock;
38
39extern int newnfs_numnfsd;
40extern struct nfsstats newnfsstats;
41extern int nfsrv_lease;
42extern struct timeval nfsboottime;
43extern u_int32_t newnfs_true, newnfs_false;
44NFSV4ROOTLOCKMUTEX;
45NFSSTATESPINLOCK;
46
47SYSCTL_DECL(_vfs_nfsd);
48int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
49TUNABLE_INT("vfs.nfsd.statehashsize", &nfsrv_statehashsize);
50SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
51    &nfsrv_statehashsize, 0,
52    "Size of state hash table set via loader.conf");
53
54int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
55TUNABLE_INT("vfs.nfsd.clienthashsize", &nfsrv_clienthashsize);
56SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
57    &nfsrv_clienthashsize, 0,
58    "Size of client hash table set via loader.conf");
59
60int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
61TUNABLE_INT("vfs.nfsd.fhhashsize", &nfsrv_lockhashsize);
62SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
63    &nfsrv_lockhashsize, 0,
64    "Size of file handle hash table set via loader.conf");
65
66int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
67TUNABLE_INT("vfs.nfsd.sessionhashsize", &nfsrv_sessionhashsize);
68SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
69    &nfsrv_sessionhashsize, 0,
70    "Size of session hash table set via loader.conf");
71
72static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
73TUNABLE_INT("vfs.nfsd.v4statelimit", &nfsrv_v4statelimit);
74SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
75    &nfsrv_v4statelimit, 0,
76    "High water limit for NFSv4 opens+locks+delegations");
77
78/*
79 * Hash lists for nfs V4.
80 */
81struct nfsclienthashhead	*nfsclienthash;
82struct nfslockhashhead		*nfslockhash;
83struct nfssessionhash		*nfssessionhash;
84#endif	/* !APPLEKEXT */
85
86static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
87static time_t nfsrvboottime;
88static int nfsrv_writedelegifpos = 1;
89static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
90static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
91static int nfsrv_nogsscallback = 0;
92
93/* local functions */
94static void nfsrv_dumpaclient(struct nfsclient *clp,
95    struct nfsd_dumpclients *dumpp);
96static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
97    NFSPROC_T *p);
98static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
99    NFSPROC_T *p);
100static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
101    NFSPROC_T *p);
102static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
103    int cansleep, NFSPROC_T *p);
104static void nfsrv_freenfslock(struct nfslock *lop);
105static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
106static void nfsrv_freedeleg(struct nfsstate *);
107static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
108    u_int32_t flags, struct nfsstate **stpp);
109static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
110    struct nfsstate **stpp);
111static int nfsrv_getlockfh(vnode_t vp, u_short flags,
112    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
113static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
114    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
115static void nfsrv_insertlock(struct nfslock *new_lop,
116    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
117static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
118    struct nfslock **other_lopp, struct nfslockfile *lfp);
119static int nfsrv_getipnumber(u_char *cp);
120static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
121    nfsv4stateid_t *stateidp, int specialid);
122static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
123    u_int32_t flags);
124static int nfsrv_docallback(struct nfsclient *clp, int procnum,
125    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
126    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
127static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
128    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
129static u_int32_t nfsrv_nextclientindex(void);
130static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
131static void nfsrv_markstable(struct nfsclient *clp);
132static int nfsrv_checkstable(struct nfsclient *clp);
133static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
134    vnode *vp, NFSPROC_T *p);
135static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
136    NFSPROC_T *p, vnode_t vp);
137static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
138    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
139static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
140    struct nfsclient *clp);
141static time_t nfsrv_leaseexpiry(void);
142static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
143static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
144    struct nfsstate *stp, struct nfsrvcache *op);
145static int nfsrv_nootherstate(struct nfsstate *stp);
146static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
147    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
148static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
149    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
150static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
151    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
152    NFSPROC_T *p);
153static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
154    NFSPROC_T *p);
155static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
156    uint64_t first, uint64_t end);
157static void nfsrv_locklf(struct nfslockfile *lfp);
158static void nfsrv_unlocklf(struct nfslockfile *lfp);
159static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
160static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
161static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
162    int dont_replycache, struct nfsdsession **sepp);
163static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
164
165/*
166 * Scan the client list for a match and either return the current one,
167 * create a new entry or return an error.
168 * If returning a non-error, the clp structure must either be linked into
169 * the client list or free'd.
170 */
171APPLESTATIC int
172nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
173    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
174{
175	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
176	int i, error = 0;
177	struct nfsstate *stp, *tstp;
178	struct sockaddr_in *sad, *rad;
179	int zapit = 0, gotit, hasstate = 0, igotlock;
180	static u_int64_t confirm_index = 0;
181
182	/*
183	 * Check for state resource limit exceeded.
184	 */
185	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
186		error = NFSERR_RESOURCE;
187		goto out;
188	}
189
190	if (nfsrv_issuedelegs == 0 ||
191	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
192		/*
193		 * Don't do callbacks when delegations are disabled or
194		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
195		 * If establishing a callback connection is attempted
196		 * when a firewall is blocking the callback path, the
197		 * server may wait too long for the connect attempt to
198		 * succeed during the Open. Some clients, such as Linux,
199		 * may timeout and give up on the Open before the server
200		 * replies. Also, since AUTH_GSS callbacks are not
201		 * yet interoperability tested, they might cause the
202		 * server to crap out, if they get past the Init call to
203		 * the client.
204		 */
205		new_clp->lc_program = 0;
206
207	/* Lock out other nfsd threads */
208	NFSLOCKV4ROOTMUTEX();
209	nfsv4_relref(&nfsv4rootfs_lock);
210	do {
211		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
212		    NFSV4ROOTLOCKMUTEXPTR, NULL);
213	} while (!igotlock);
214	NFSUNLOCKV4ROOTMUTEX();
215
216	/*
217	 * Search for a match in the client list.
218	 */
219	gotit = i = 0;
220	while (i < nfsrv_clienthashsize && !gotit) {
221	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
222		if (new_clp->lc_idlen == clp->lc_idlen &&
223		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
224			gotit = 1;
225			break;
226		}
227	    }
228	    i++;
229	}
230	if (!gotit ||
231	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
232		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
233			/*
234			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
235			 * client is trying to update a confirmed clientid.
236			 */
237			NFSLOCKV4ROOTMUTEX();
238			nfsv4_unlock(&nfsv4rootfs_lock, 1);
239			NFSUNLOCKV4ROOTMUTEX();
240			confirmp->lval[1] = 0;
241			error = NFSERR_NOENT;
242			goto out;
243		}
244		/*
245		 * Get rid of the old one.
246		 */
247		if (i != nfsrv_clienthashsize) {
248			LIST_REMOVE(clp, lc_hash);
249			nfsrv_cleanclient(clp, p);
250			nfsrv_freedeleglist(&clp->lc_deleg);
251			nfsrv_freedeleglist(&clp->lc_olddeleg);
252			zapit = 1;
253		}
254		/*
255		 * Add it after assigning a client id to it.
256		 */
257		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
258		if ((nd->nd_flag & ND_NFSV41) != 0)
259			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
260			    ++confirm_index;
261		else
262			confirmp->qval = new_clp->lc_confirm.qval =
263			    ++confirm_index;
264		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
265		    (u_int32_t)nfsrvboottime;
266		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
267		    nfsrv_nextclientindex();
268		new_clp->lc_stateindex = 0;
269		new_clp->lc_statemaxindex = 0;
270		new_clp->lc_cbref = 0;
271		new_clp->lc_expiry = nfsrv_leaseexpiry();
272		LIST_INIT(&new_clp->lc_open);
273		LIST_INIT(&new_clp->lc_deleg);
274		LIST_INIT(&new_clp->lc_olddeleg);
275		LIST_INIT(&new_clp->lc_session);
276		for (i = 0; i < nfsrv_statehashsize; i++)
277			LIST_INIT(&new_clp->lc_stateid[i]);
278		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
279		    lc_hash);
280		newnfsstats.srvclients++;
281		nfsrv_openpluslock++;
282		nfsrv_clients++;
283		NFSLOCKV4ROOTMUTEX();
284		nfsv4_unlock(&nfsv4rootfs_lock, 1);
285		NFSUNLOCKV4ROOTMUTEX();
286		if (zapit)
287			nfsrv_zapclient(clp, p);
288		*new_clpp = NULL;
289		goto out;
290	}
291
292	/*
293	 * Now, handle the cases where the id is already issued.
294	 */
295	if (nfsrv_notsamecredname(nd, clp)) {
296	    /*
297	     * Check to see if there is expired state that should go away.
298	     */
299	    if (clp->lc_expiry < NFSD_MONOSEC &&
300	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
301		nfsrv_cleanclient(clp, p);
302		nfsrv_freedeleglist(&clp->lc_deleg);
303	    }
304
305	    /*
306	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
307	     * RFC3530 Sec. 8.1.2 last para.
308	     */
309	    if (!LIST_EMPTY(&clp->lc_deleg)) {
310		hasstate = 1;
311	    } else if (LIST_EMPTY(&clp->lc_open)) {
312		hasstate = 0;
313	    } else {
314		hasstate = 0;
315		/* Look for an Open on the OpenOwner */
316		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
317		    if (!LIST_EMPTY(&stp->ls_open)) {
318			hasstate = 1;
319			break;
320		    }
321		}
322	    }
323	    if (hasstate) {
324		/*
325		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
326		 * filling out the correct ipaddr and portnum.
327		 */
328		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
329		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
330		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
331		sad->sin_port = rad->sin_port;
332		NFSLOCKV4ROOTMUTEX();
333		nfsv4_unlock(&nfsv4rootfs_lock, 1);
334		NFSUNLOCKV4ROOTMUTEX();
335		error = NFSERR_CLIDINUSE;
336		goto out;
337	    }
338	}
339
340	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
341		/*
342		 * If the verifier has changed, the client has rebooted
343		 * and a new client id is issued. The old state info
344		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
345		 */
346		LIST_REMOVE(clp, lc_hash);
347		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
348		if ((nd->nd_flag & ND_NFSV41) != 0)
349			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
350			    ++confirm_index;
351		else
352			confirmp->qval = new_clp->lc_confirm.qval =
353			    ++confirm_index;
354		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
355		    nfsrvboottime;
356		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
357		    nfsrv_nextclientindex();
358		new_clp->lc_stateindex = 0;
359		new_clp->lc_statemaxindex = 0;
360		new_clp->lc_cbref = 0;
361		new_clp->lc_expiry = nfsrv_leaseexpiry();
362
363		/*
364		 * Save the state until confirmed.
365		 */
366		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
367		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
368			tstp->ls_clp = new_clp;
369		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
370		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
371			tstp->ls_clp = new_clp;
372		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
373		    ls_list);
374		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
375			tstp->ls_clp = new_clp;
376		for (i = 0; i < nfsrv_statehashsize; i++) {
377			LIST_NEWHEAD(&new_clp->lc_stateid[i],
378			    &clp->lc_stateid[i], ls_hash);
379			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
380				tstp->ls_clp = new_clp;
381		}
382		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
383		    lc_hash);
384		newnfsstats.srvclients++;
385		nfsrv_openpluslock++;
386		nfsrv_clients++;
387		NFSLOCKV4ROOTMUTEX();
388		nfsv4_unlock(&nfsv4rootfs_lock, 1);
389		NFSUNLOCKV4ROOTMUTEX();
390
391		/*
392		 * Must wait until any outstanding callback on the old clp
393		 * completes.
394		 */
395		NFSLOCKSTATE();
396		while (clp->lc_cbref) {
397			clp->lc_flags |= LCL_WAKEUPWANTED;
398			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
399			    "nfsd clp", 10 * hz);
400		}
401		NFSUNLOCKSTATE();
402		nfsrv_zapclient(clp, p);
403		*new_clpp = NULL;
404		goto out;
405	}
406
407	/* For NFSv4.1, mark that we found a confirmed clientid. */
408	if ((nd->nd_flag & ND_NFSV41) != 0)
409		confirmp->lval[1] = 1;
410	else {
411		/*
412		 * id and verifier match, so update the net address info
413		 * and get rid of any existing callback authentication
414		 * handle, so a new one will be acquired.
415		 */
416		LIST_REMOVE(clp, lc_hash);
417		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
418		new_clp->lc_expiry = nfsrv_leaseexpiry();
419		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
420		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
421		    clp->lc_clientid.lval[0];
422		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
423		    clp->lc_clientid.lval[1];
424		new_clp->lc_delegtime = clp->lc_delegtime;
425		new_clp->lc_stateindex = clp->lc_stateindex;
426		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
427		new_clp->lc_cbref = 0;
428		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
429		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
430			tstp->ls_clp = new_clp;
431		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
432		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
433			tstp->ls_clp = new_clp;
434		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
435		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
436			tstp->ls_clp = new_clp;
437		for (i = 0; i < nfsrv_statehashsize; i++) {
438			LIST_NEWHEAD(&new_clp->lc_stateid[i],
439			    &clp->lc_stateid[i], ls_hash);
440			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
441				tstp->ls_clp = new_clp;
442		}
443		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
444		    lc_hash);
445		newnfsstats.srvclients++;
446		nfsrv_openpluslock++;
447		nfsrv_clients++;
448	}
449	NFSLOCKV4ROOTMUTEX();
450	nfsv4_unlock(&nfsv4rootfs_lock, 1);
451	NFSUNLOCKV4ROOTMUTEX();
452
453	if ((nd->nd_flag & ND_NFSV41) == 0) {
454		/*
455		 * Must wait until any outstanding callback on the old clp
456		 * completes.
457		 */
458		NFSLOCKSTATE();
459		while (clp->lc_cbref) {
460			clp->lc_flags |= LCL_WAKEUPWANTED;
461			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
462			    "nfsdclp", 10 * hz);
463		}
464		NFSUNLOCKSTATE();
465		nfsrv_zapclient(clp, p);
466		*new_clpp = NULL;
467	}
468
469out:
470	NFSEXITCODE2(error, nd);
471	return (error);
472}
473
474/*
475 * Check to see if the client id exists and optionally confirm it.
476 */
477APPLESTATIC int
478nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
479    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
480    struct nfsrv_descript *nd, NFSPROC_T *p)
481{
482	struct nfsclient *clp;
483	struct nfsstate *stp;
484	int i;
485	struct nfsclienthashhead *hp;
486	int error = 0, igotlock, doneok;
487	struct nfssessionhash *shp;
488	struct nfsdsession *sep;
489	uint64_t sessid[2];
490	static uint64_t next_sess = 0;
491
492	if (clpp)
493		*clpp = NULL;
494	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
495	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
496		error = NFSERR_STALECLIENTID;
497		goto out;
498	}
499
500	/*
501	 * If called with opflags == CLOPS_RENEW, the State Lock is
502	 * already held. Otherwise, we need to get either that or,
503	 * for the case of Confirm, lock out the nfsd threads.
504	 */
505	if (opflags & CLOPS_CONFIRM) {
506		NFSLOCKV4ROOTMUTEX();
507		nfsv4_relref(&nfsv4rootfs_lock);
508		do {
509			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
510			    NFSV4ROOTLOCKMUTEXPTR, NULL);
511		} while (!igotlock);
512		/*
513		 * Create a new sessionid here, since we need to do it where
514		 * there is a mutex held to serialize update of next_sess.
515		 */
516		if ((nd->nd_flag & ND_NFSV41) != 0) {
517			sessid[0] = ++next_sess;
518			sessid[1] = clientid.qval;
519		}
520		NFSUNLOCKV4ROOTMUTEX();
521	} else if (opflags != CLOPS_RENEW) {
522		NFSLOCKSTATE();
523	}
524
525	/* For NFSv4.1, the clp is acquired from the associated session. */
526	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
527	    opflags == CLOPS_RENEW) {
528		clp = NULL;
529		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
530			shp = NFSSESSIONHASH(nd->nd_sessionid);
531			NFSLOCKSESSION(shp);
532			sep = nfsrv_findsession(nd->nd_sessionid);
533			if (sep != NULL)
534				clp = sep->sess_clp;
535			NFSUNLOCKSESSION(shp);
536		}
537	} else {
538		hp = NFSCLIENTHASH(clientid);
539		LIST_FOREACH(clp, hp, lc_hash) {
540			if (clp->lc_clientid.lval[1] == clientid.lval[1])
541				break;
542		}
543	}
544	if (clp == NULL) {
545		if (opflags & CLOPS_CONFIRM)
546			error = NFSERR_STALECLIENTID;
547		else
548			error = NFSERR_EXPIRED;
549	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
550		/*
551		 * If marked admin revoked, just return the error.
552		 */
553		error = NFSERR_ADMINREVOKED;
554	}
555	if (error) {
556		if (opflags & CLOPS_CONFIRM) {
557			NFSLOCKV4ROOTMUTEX();
558			nfsv4_unlock(&nfsv4rootfs_lock, 1);
559			NFSUNLOCKV4ROOTMUTEX();
560		} else if (opflags != CLOPS_RENEW) {
561			NFSUNLOCKSTATE();
562		}
563		goto out;
564	}
565
566	/*
567	 * Perform any operations specified by the opflags.
568	 */
569	if (opflags & CLOPS_CONFIRM) {
570		if (((nd->nd_flag & ND_NFSV41) != 0 &&
571		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
572		    ((nd->nd_flag & ND_NFSV41) == 0 &&
573		     clp->lc_confirm.qval != confirm.qval))
574			error = NFSERR_STALECLIENTID;
575		else if (nfsrv_notsamecredname(nd, clp))
576			error = NFSERR_CLIDINUSE;
577
578		if (!error) {
579		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
580			LCL_NEEDSCONFIRM) {
581			/*
582			 * Hang onto the delegations (as old delegations)
583			 * for an Open with CLAIM_DELEGATE_PREV unless in
584			 * grace, but get rid of the rest of the state.
585			 */
586			nfsrv_cleanclient(clp, p);
587			nfsrv_freedeleglist(&clp->lc_olddeleg);
588			if (nfsrv_checkgrace(nd, clp, 0)) {
589			    /* In grace, so just delete delegations */
590			    nfsrv_freedeleglist(&clp->lc_deleg);
591			} else {
592			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
593				stp->ls_flags |= NFSLCK_OLDDELEG;
594			    clp->lc_delegtime = NFSD_MONOSEC +
595				nfsrv_lease + NFSRV_LEASEDELTA;
596			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
597				ls_list);
598			}
599			if ((nd->nd_flag & ND_NFSV41) != 0)
600			    clp->lc_program = cbprogram;
601		    }
602		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
603		    if (clp->lc_program)
604			clp->lc_flags |= LCL_NEEDSCBNULL;
605		    /* For NFSv4.1, link the session onto the client. */
606		    if (nsep != NULL) {
607			/* Hold a reference on the xprt for a backchannel. */
608			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
609			    != 0 && clp->lc_req.nr_client == NULL) {
610			    clp->lc_req.nr_client = (struct __rpc_client *)
611				clnt_bck_create(nd->nd_xprt->xp_socket,
612				cbprogram, NFSV4_CBVERS);
613			    if (clp->lc_req.nr_client != NULL) {
614				SVC_ACQUIRE(nd->nd_xprt);
615				nd->nd_xprt->xp_p2 =
616				    clp->lc_req.nr_client->cl_private;
617				/* Disable idle timeout. */
618				nd->nd_xprt->xp_idletimeout = 0;
619				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
620			    } else
621				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
622			}
623			NFSBCOPY(sessid, nsep->sess_sessionid,
624			    NFSX_V4SESSIONID);
625			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
626			    NFSX_V4SESSIONID);
627			shp = NFSSESSIONHASH(nsep->sess_sessionid);
628			NFSLOCKSESSION(shp);
629			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
630			NFSLOCKSTATE();
631			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
632			nsep->sess_clp = clp;
633			NFSUNLOCKSTATE();
634			NFSUNLOCKSESSION(shp);
635		    }
636		}
637	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
638		error = NFSERR_EXPIRED;
639	}
640
641	/*
642	 * If called by the Renew Op, we must check the principal.
643	 */
644	if (!error && (opflags & CLOPS_RENEWOP)) {
645	    if (nfsrv_notsamecredname(nd, clp)) {
646		doneok = 0;
647		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
648		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
649			if ((stp->ls_flags & NFSLCK_OPEN) &&
650			    stp->ls_uid == nd->nd_cred->cr_uid) {
651				doneok = 1;
652				break;
653			}
654		    }
655		}
656		if (!doneok)
657			error = NFSERR_ACCES;
658	    }
659	    if (!error && (clp->lc_flags & LCL_CBDOWN))
660		error = NFSERR_CBPATHDOWN;
661	}
662	if ((!error || error == NFSERR_CBPATHDOWN) &&
663	     (opflags & CLOPS_RENEW)) {
664		clp->lc_expiry = nfsrv_leaseexpiry();
665	}
666	if (opflags & CLOPS_CONFIRM) {
667		NFSLOCKV4ROOTMUTEX();
668		nfsv4_unlock(&nfsv4rootfs_lock, 1);
669		NFSUNLOCKV4ROOTMUTEX();
670	} else if (opflags != CLOPS_RENEW) {
671		NFSUNLOCKSTATE();
672	}
673	if (clpp)
674		*clpp = clp;
675
676out:
677	NFSEXITCODE2(error, nd);
678	return (error);
679}
680
681/*
682 * Perform the NFSv4.1 destroy clientid.
683 */
684int
685nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
686{
687	struct nfsclient *clp;
688	struct nfsclienthashhead *hp;
689	int error = 0, i, igotlock;
690
691	if (nfsrvboottime != clientid.lval[0]) {
692		error = NFSERR_STALECLIENTID;
693		goto out;
694	}
695
696	/* Lock out other nfsd threads */
697	NFSLOCKV4ROOTMUTEX();
698	nfsv4_relref(&nfsv4rootfs_lock);
699	do {
700		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
701		    NFSV4ROOTLOCKMUTEXPTR, NULL);
702	} while (igotlock == 0);
703	NFSUNLOCKV4ROOTMUTEX();
704
705	hp = NFSCLIENTHASH(clientid);
706	LIST_FOREACH(clp, hp, lc_hash) {
707		if (clp->lc_clientid.lval[1] == clientid.lval[1])
708			break;
709	}
710	if (clp == NULL) {
711		NFSLOCKV4ROOTMUTEX();
712		nfsv4_unlock(&nfsv4rootfs_lock, 1);
713		NFSUNLOCKV4ROOTMUTEX();
714		/* Just return ok, since it is gone. */
715		goto out;
716	}
717
718	/* Scan for state on the clientid. */
719	for (i = 0; i < nfsrv_statehashsize; i++)
720		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
721			NFSLOCKV4ROOTMUTEX();
722			nfsv4_unlock(&nfsv4rootfs_lock, 1);
723			NFSUNLOCKV4ROOTMUTEX();
724			error = NFSERR_CLIENTIDBUSY;
725			goto out;
726		}
727	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
728		NFSLOCKV4ROOTMUTEX();
729		nfsv4_unlock(&nfsv4rootfs_lock, 1);
730		NFSUNLOCKV4ROOTMUTEX();
731		error = NFSERR_CLIENTIDBUSY;
732		goto out;
733	}
734
735	/* Destroy the clientid and return ok. */
736	nfsrv_cleanclient(clp, p);
737	nfsrv_freedeleglist(&clp->lc_deleg);
738	nfsrv_freedeleglist(&clp->lc_olddeleg);
739	LIST_REMOVE(clp, lc_hash);
740	NFSLOCKV4ROOTMUTEX();
741	nfsv4_unlock(&nfsv4rootfs_lock, 1);
742	NFSUNLOCKV4ROOTMUTEX();
743	nfsrv_zapclient(clp, p);
744out:
745	NFSEXITCODE2(error, nd);
746	return (error);
747}
748
749/*
750 * Called from the new nfssvc syscall to admin revoke a clientid.
751 * Returns 0 for success, error otherwise.
752 */
753APPLESTATIC int
754nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
755{
756	struct nfsclient *clp = NULL;
757	int i, error = 0;
758	int gotit, igotlock;
759
760	/*
761	 * First, lock out the nfsd so that state won't change while the
762	 * revocation record is being written to the stable storage restart
763	 * file.
764	 */
765	NFSLOCKV4ROOTMUTEX();
766	do {
767		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
768		    NFSV4ROOTLOCKMUTEXPTR, NULL);
769	} while (!igotlock);
770	NFSUNLOCKV4ROOTMUTEX();
771
772	/*
773	 * Search for a match in the client list.
774	 */
775	gotit = i = 0;
776	while (i < nfsrv_clienthashsize && !gotit) {
777	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
778		if (revokep->nclid_idlen == clp->lc_idlen &&
779		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
780			gotit = 1;
781			break;
782		}
783	    }
784	    i++;
785	}
786	if (!gotit) {
787		NFSLOCKV4ROOTMUTEX();
788		nfsv4_unlock(&nfsv4rootfs_lock, 0);
789		NFSUNLOCKV4ROOTMUTEX();
790		error = EPERM;
791		goto out;
792	}
793
794	/*
795	 * Now, write out the revocation record
796	 */
797	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
798	nfsrv_backupstable();
799
800	/*
801	 * and clear out the state, marking the clientid revoked.
802	 */
803	clp->lc_flags &= ~LCL_CALLBACKSON;
804	clp->lc_flags |= LCL_ADMINREVOKED;
805	nfsrv_cleanclient(clp, p);
806	nfsrv_freedeleglist(&clp->lc_deleg);
807	nfsrv_freedeleglist(&clp->lc_olddeleg);
808	NFSLOCKV4ROOTMUTEX();
809	nfsv4_unlock(&nfsv4rootfs_lock, 0);
810	NFSUNLOCKV4ROOTMUTEX();
811
812out:
813	NFSEXITCODE(error);
814	return (error);
815}
816
817/*
818 * Dump out stats for all clients. Called from nfssvc(2), that is used
819 * newnfsstats.
820 */
821APPLESTATIC void
822nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
823{
824	struct nfsclient *clp;
825	int i = 0, cnt = 0;
826
827	/*
828	 * First, get a reference on the nfsv4rootfs_lock so that an
829	 * exclusive lock cannot be acquired while dumping the clients.
830	 */
831	NFSLOCKV4ROOTMUTEX();
832	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
833	NFSUNLOCKV4ROOTMUTEX();
834	NFSLOCKSTATE();
835	/*
836	 * Rattle through the client lists until done.
837	 */
838	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
839	    clp = LIST_FIRST(&nfsclienthash[i]);
840	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
841		nfsrv_dumpaclient(clp, &dumpp[cnt]);
842		cnt++;
843		clp = LIST_NEXT(clp, lc_hash);
844	    }
845	    i++;
846	}
847	if (cnt < maxcnt)
848	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
849	NFSUNLOCKSTATE();
850	NFSLOCKV4ROOTMUTEX();
851	nfsv4_relref(&nfsv4rootfs_lock);
852	NFSUNLOCKV4ROOTMUTEX();
853}
854
855/*
856 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
857 */
858static void
859nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
860{
861	struct nfsstate *stp, *openstp, *lckownstp;
862	struct nfslock *lop;
863	struct sockaddr *sad;
864	struct sockaddr_in *rad;
865	struct sockaddr_in6 *rad6;
866
867	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
868	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
869	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
870	dumpp->ndcl_flags = clp->lc_flags;
871	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
872	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
873	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
874	dumpp->ndcl_addrfam = sad->sa_family;
875	if (sad->sa_family == AF_INET) {
876		rad = (struct sockaddr_in *)sad;
877		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
878	} else {
879		rad6 = (struct sockaddr_in6 *)sad;
880		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
881	}
882
883	/*
884	 * Now, scan the state lists and total up the opens and locks.
885	 */
886	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
887	    dumpp->ndcl_nopenowners++;
888	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
889		dumpp->ndcl_nopens++;
890		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
891		    dumpp->ndcl_nlockowners++;
892		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
893			dumpp->ndcl_nlocks++;
894		    }
895		}
896	    }
897	}
898
899	/*
900	 * and the delegation lists.
901	 */
902	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
903	    dumpp->ndcl_ndelegs++;
904	}
905	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
906	    dumpp->ndcl_nolddelegs++;
907	}
908}
909
910/*
911 * Dump out lock stats for a file.
912 */
913APPLESTATIC void
914nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
915    NFSPROC_T *p)
916{
917	struct nfsstate *stp;
918	struct nfslock *lop;
919	int cnt = 0;
920	struct nfslockfile *lfp;
921	struct sockaddr *sad;
922	struct sockaddr_in *rad;
923	struct sockaddr_in6 *rad6;
924	int ret;
925	fhandle_t nfh;
926
927	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
928	/*
929	 * First, get a reference on the nfsv4rootfs_lock so that an
930	 * exclusive lock on it cannot be acquired while dumping the locks.
931	 */
932	NFSLOCKV4ROOTMUTEX();
933	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
934	NFSUNLOCKV4ROOTMUTEX();
935	NFSLOCKSTATE();
936	if (!ret)
937		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
938	if (ret) {
939		ldumpp[0].ndlck_clid.nclid_idlen = 0;
940		NFSUNLOCKSTATE();
941		NFSLOCKV4ROOTMUTEX();
942		nfsv4_relref(&nfsv4rootfs_lock);
943		NFSUNLOCKV4ROOTMUTEX();
944		return;
945	}
946
947	/*
948	 * For each open share on file, dump it out.
949	 */
950	stp = LIST_FIRST(&lfp->lf_open);
951	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
952		ldumpp[cnt].ndlck_flags = stp->ls_flags;
953		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
954		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
955		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
956		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
957		ldumpp[cnt].ndlck_owner.nclid_idlen =
958		    stp->ls_openowner->ls_ownerlen;
959		NFSBCOPY(stp->ls_openowner->ls_owner,
960		    ldumpp[cnt].ndlck_owner.nclid_id,
961		    stp->ls_openowner->ls_ownerlen);
962		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
963		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
964		    stp->ls_clp->lc_idlen);
965		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
966		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
967		if (sad->sa_family == AF_INET) {
968			rad = (struct sockaddr_in *)sad;
969			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
970		} else {
971			rad6 = (struct sockaddr_in6 *)sad;
972			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
973		}
974		stp = LIST_NEXT(stp, ls_file);
975		cnt++;
976	}
977
978	/*
979	 * and all locks.
980	 */
981	lop = LIST_FIRST(&lfp->lf_lock);
982	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
983		stp = lop->lo_stp;
984		ldumpp[cnt].ndlck_flags = lop->lo_flags;
985		ldumpp[cnt].ndlck_first = lop->lo_first;
986		ldumpp[cnt].ndlck_end = lop->lo_end;
987		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
988		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
989		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
990		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
991		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
992		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
993		    stp->ls_ownerlen);
994		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
995		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
996		    stp->ls_clp->lc_idlen);
997		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
998		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
999		if (sad->sa_family == AF_INET) {
1000			rad = (struct sockaddr_in *)sad;
1001			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1002		} else {
1003			rad6 = (struct sockaddr_in6 *)sad;
1004			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1005		}
1006		lop = LIST_NEXT(lop, lo_lckfile);
1007		cnt++;
1008	}
1009
1010	/*
1011	 * and the delegations.
1012	 */
1013	stp = LIST_FIRST(&lfp->lf_deleg);
1014	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1015		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1016		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1017		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1018		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1019		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1020		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1021		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1022		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1023		    stp->ls_clp->lc_idlen);
1024		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
1025		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
1026		if (sad->sa_family == AF_INET) {
1027			rad = (struct sockaddr_in *)sad;
1028			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
1029		} else {
1030			rad6 = (struct sockaddr_in6 *)sad;
1031			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
1032		}
1033		stp = LIST_NEXT(stp, ls_file);
1034		cnt++;
1035	}
1036
1037	/*
1038	 * If list isn't full, mark end of list by setting the client name
1039	 * to zero length.
1040	 */
1041	if (cnt < maxcnt)
1042		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1043	NFSUNLOCKSTATE();
1044	NFSLOCKV4ROOTMUTEX();
1045	nfsv4_relref(&nfsv4rootfs_lock);
1046	NFSUNLOCKV4ROOTMUTEX();
1047}
1048
1049/*
1050 * Server timer routine. It can scan any linked list, so long
1051 * as it holds the spin/mutex lock and there is no exclusive lock on
1052 * nfsv4rootfs_lock.
1053 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1054 *  to do this from a callout, since the spin locks work. For
1055 *  Darwin, I'm not sure what will work correctly yet.)
1056 * Should be called once per second.
1057 */
1058APPLESTATIC void
1059nfsrv_servertimer(void)
1060{
1061	struct nfsclient *clp, *nclp;
1062	struct nfsstate *stp, *nstp;
1063	int got_ref, i;
1064
1065	/*
1066	 * Make sure nfsboottime is set. This is used by V3 as well
1067	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1068	 * only used by the V4 server for leases.
1069	 */
1070	if (nfsboottime.tv_sec == 0)
1071		NFSSETBOOTTIME(nfsboottime);
1072
1073	/*
1074	 * If server hasn't started yet, just return.
1075	 */
1076	NFSLOCKSTATE();
1077	if (nfsrv_stablefirst.nsf_eograce == 0) {
1078		NFSUNLOCKSTATE();
1079		return;
1080	}
1081	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1082		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1083		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1084			nfsrv_stablefirst.nsf_flags |=
1085			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1086		NFSUNLOCKSTATE();
1087		return;
1088	}
1089
1090	/*
1091	 * Try and get a reference count on the nfsv4rootfs_lock so that
1092	 * no nfsd thread can acquire an exclusive lock on it before this
1093	 * call is done. If it is already exclusively locked, just return.
1094	 */
1095	NFSLOCKV4ROOTMUTEX();
1096	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1097	NFSUNLOCKV4ROOTMUTEX();
1098	if (got_ref == 0) {
1099		NFSUNLOCKSTATE();
1100		return;
1101	}
1102
1103	/*
1104	 * For each client...
1105	 */
1106	for (i = 0; i < nfsrv_clienthashsize; i++) {
1107	    clp = LIST_FIRST(&nfsclienthash[i]);
1108	    while (clp != LIST_END(&nfsclienthash[i])) {
1109		nclp = LIST_NEXT(clp, lc_hash);
1110		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1111		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1112			 && ((LIST_EMPTY(&clp->lc_deleg)
1113			      && LIST_EMPTY(&clp->lc_open)) ||
1114			     nfsrv_clients > nfsrv_clienthighwater)) ||
1115			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1116			(clp->lc_expiry < NFSD_MONOSEC &&
1117			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1118			/*
1119			 * Lease has expired several nfsrv_lease times ago:
1120			 * PLUS
1121			 *    - no state is associated with it
1122			 *    OR
1123			 *    - above high water mark for number of clients
1124			 *      (nfsrv_clienthighwater should be large enough
1125			 *       that this only occurs when clients fail to
1126			 *       use the same nfs_client_id4.id. Maybe somewhat
1127			 *       higher that the maximum number of clients that
1128			 *       will mount this server?)
1129			 * OR
1130			 * Lease has expired a very long time ago
1131			 * OR
1132			 * Lease has expired PLUS the number of opens + locks
1133			 * has exceeded 90% of capacity
1134			 *
1135			 * --> Mark for expiry. The actual expiry will be done
1136			 *     by an nfsd sometime soon.
1137			 */
1138			clp->lc_flags |= LCL_EXPIREIT;
1139			nfsrv_stablefirst.nsf_flags |=
1140			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1141		    } else {
1142			/*
1143			 * If there are no opens, increment no open tick cnt
1144			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1145			 * otherwise, if there is an open, reset no open time
1146			 * Hopefully, this will avoid excessive re-creation
1147			 * of open owners and subsequent open confirms.
1148			 */
1149			stp = LIST_FIRST(&clp->lc_open);
1150			while (stp != LIST_END(&clp->lc_open)) {
1151				nstp = LIST_NEXT(stp, ls_list);
1152				if (LIST_EMPTY(&stp->ls_open)) {
1153					stp->ls_noopens++;
1154					if (stp->ls_noopens > NFSNOOPEN ||
1155					    (nfsrv_openpluslock * 2) >
1156					    nfsrv_v4statelimit)
1157						nfsrv_stablefirst.nsf_flags |=
1158							NFSNSF_NOOPENS;
1159				} else {
1160					stp->ls_noopens = 0;
1161				}
1162				stp = nstp;
1163			}
1164		    }
1165		}
1166		clp = nclp;
1167	    }
1168	}
1169	NFSUNLOCKSTATE();
1170	NFSLOCKV4ROOTMUTEX();
1171	nfsv4_relref(&nfsv4rootfs_lock);
1172	NFSUNLOCKV4ROOTMUTEX();
1173}
1174
1175/*
1176 * The following set of functions free up the various data structures.
1177 */
1178/*
1179 * Clear out all open/lock state related to this nfsclient.
1180 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1181 * there are no other active nfsd threads.
1182 */
1183APPLESTATIC void
1184nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1185{
1186	struct nfsstate *stp, *nstp;
1187	struct nfsdsession *sep, *nsep;
1188
1189	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1190		nfsrv_freeopenowner(stp, 1, p);
1191	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1192		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1193			(void)nfsrv_freesession(sep, NULL);
1194}
1195
1196/*
1197 * Free a client that has been cleaned. It should also already have been
1198 * removed from the lists.
1199 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1200 *  softclock interrupts are enabled.)
1201 */
1202APPLESTATIC void
1203nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1204{
1205
1206#ifdef notyet
1207	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1208	     (LCL_GSS | LCL_CALLBACKSON) &&
1209	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1210	    clp->lc_handlelen > 0) {
1211		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1212		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1213		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1214			NULL, 0, NULL, NULL, NULL, p);
1215	}
1216#endif
1217	newnfs_disconnect(&clp->lc_req);
1218	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1219	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1220	free(clp->lc_stateid, M_NFSDCLIENT);
1221	free(clp, M_NFSDCLIENT);
1222	NFSLOCKSTATE();
1223	newnfsstats.srvclients--;
1224	nfsrv_openpluslock--;
1225	nfsrv_clients--;
1226	NFSUNLOCKSTATE();
1227}
1228
1229/*
1230 * Free a list of delegation state structures.
1231 * (This function will also free all nfslockfile structures that no
1232 *  longer have associated state.)
1233 */
1234APPLESTATIC void
1235nfsrv_freedeleglist(struct nfsstatehead *sthp)
1236{
1237	struct nfsstate *stp, *nstp;
1238
1239	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1240		nfsrv_freedeleg(stp);
1241	}
1242	LIST_INIT(sthp);
1243}
1244
1245/*
1246 * Free up a delegation.
1247 */
1248static void
1249nfsrv_freedeleg(struct nfsstate *stp)
1250{
1251	struct nfslockfile *lfp;
1252
1253	LIST_REMOVE(stp, ls_hash);
1254	LIST_REMOVE(stp, ls_list);
1255	LIST_REMOVE(stp, ls_file);
1256	lfp = stp->ls_lfp;
1257	if (LIST_EMPTY(&lfp->lf_open) &&
1258	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1259	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1260	    lfp->lf_usecount == 0 &&
1261	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1262		nfsrv_freenfslockfile(lfp);
1263	FREE((caddr_t)stp, M_NFSDSTATE);
1264	newnfsstats.srvdelegates--;
1265	nfsrv_openpluslock--;
1266	nfsrv_delegatecnt--;
1267}
1268
1269/*
1270 * This function frees an open owner and all associated opens.
1271 */
1272static void
1273nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1274{
1275	struct nfsstate *nstp, *tstp;
1276
1277	LIST_REMOVE(stp, ls_list);
1278	/*
1279	 * Now, free all associated opens.
1280	 */
1281	nstp = LIST_FIRST(&stp->ls_open);
1282	while (nstp != LIST_END(&stp->ls_open)) {
1283		tstp = nstp;
1284		nstp = LIST_NEXT(nstp, ls_list);
1285		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1286	}
1287	if (stp->ls_op)
1288		nfsrvd_derefcache(stp->ls_op);
1289	FREE((caddr_t)stp, M_NFSDSTATE);
1290	newnfsstats.srvopenowners--;
1291	nfsrv_openpluslock--;
1292}
1293
1294/*
1295 * This function frees an open (nfsstate open structure) with all associated
1296 * lock_owners and locks. It also frees the nfslockfile structure iff there
1297 * are no other opens on the file.
1298 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1299 */
1300static int
1301nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1302{
1303	struct nfsstate *nstp, *tstp;
1304	struct nfslockfile *lfp;
1305	int ret;
1306
1307	LIST_REMOVE(stp, ls_hash);
1308	LIST_REMOVE(stp, ls_list);
1309	LIST_REMOVE(stp, ls_file);
1310
1311	lfp = stp->ls_lfp;
1312	/*
1313	 * Now, free all lockowners associated with this open.
1314	 */
1315	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1316		nfsrv_freelockowner(tstp, vp, cansleep, p);
1317
1318	/*
1319	 * The nfslockfile is freed here if there are no locks
1320	 * associated with the open.
1321	 * If there are locks associated with the open, the
1322	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1323	 * Acquire the state mutex to avoid races with calls to
1324	 * nfsrv_getlockfile().
1325	 */
1326	if (cansleep != 0)
1327		NFSLOCKSTATE();
1328	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1329	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1330	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1331	    lfp->lf_usecount == 0 &&
1332	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1333		nfsrv_freenfslockfile(lfp);
1334		ret = 1;
1335	} else
1336		ret = 0;
1337	if (cansleep != 0)
1338		NFSUNLOCKSTATE();
1339	FREE((caddr_t)stp, M_NFSDSTATE);
1340	newnfsstats.srvopens--;
1341	nfsrv_openpluslock--;
1342	return (ret);
1343}
1344
1345/*
1346 * Frees a lockowner and all associated locks.
1347 */
1348static void
1349nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1350    NFSPROC_T *p)
1351{
1352
1353	LIST_REMOVE(stp, ls_hash);
1354	LIST_REMOVE(stp, ls_list);
1355	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1356	if (stp->ls_op)
1357		nfsrvd_derefcache(stp->ls_op);
1358	FREE((caddr_t)stp, M_NFSDSTATE);
1359	newnfsstats.srvlockowners--;
1360	nfsrv_openpluslock--;
1361}
1362
1363/*
1364 * Free all the nfs locks on a lockowner.
1365 */
1366static void
1367nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1368    NFSPROC_T *p)
1369{
1370	struct nfslock *lop, *nlop;
1371	struct nfsrollback *rlp, *nrlp;
1372	struct nfslockfile *lfp = NULL;
1373	int gottvp = 0;
1374	vnode_t tvp = NULL;
1375	uint64_t first, end;
1376
1377	if (vp != NULL)
1378		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1379	lop = LIST_FIRST(&stp->ls_lock);
1380	while (lop != LIST_END(&stp->ls_lock)) {
1381		nlop = LIST_NEXT(lop, lo_lckowner);
1382		/*
1383		 * Since all locks should be for the same file, lfp should
1384		 * not change.
1385		 */
1386		if (lfp == NULL)
1387			lfp = lop->lo_lfp;
1388		else if (lfp != lop->lo_lfp)
1389			panic("allnfslocks");
1390		/*
1391		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1392		 * from the file handle. This only occurs when called from
1393		 * nfsrv_cleanclient().
1394		 */
1395		if (gottvp == 0) {
1396			if (nfsrv_dolocallocks == 0)
1397				tvp = NULL;
1398			else if (vp == NULL && cansleep != 0) {
1399				tvp = nfsvno_getvp(&lfp->lf_fh);
1400				NFSVOPUNLOCK(tvp, 0);
1401			} else
1402				tvp = vp;
1403			gottvp = 1;
1404		}
1405
1406		if (tvp != NULL) {
1407			if (cansleep == 0)
1408				panic("allnfs2");
1409			first = lop->lo_first;
1410			end = lop->lo_end;
1411			nfsrv_freenfslock(lop);
1412			nfsrv_localunlock(tvp, lfp, first, end, p);
1413			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1414			    nrlp)
1415				free(rlp, M_NFSDROLLBACK);
1416			LIST_INIT(&lfp->lf_rollback);
1417		} else
1418			nfsrv_freenfslock(lop);
1419		lop = nlop;
1420	}
1421	if (vp == NULL && tvp != NULL)
1422		vrele(tvp);
1423}
1424
1425/*
1426 * Free an nfslock structure.
1427 */
1428static void
1429nfsrv_freenfslock(struct nfslock *lop)
1430{
1431
1432	if (lop->lo_lckfile.le_prev != NULL) {
1433		LIST_REMOVE(lop, lo_lckfile);
1434		newnfsstats.srvlocks--;
1435		nfsrv_openpluslock--;
1436	}
1437	LIST_REMOVE(lop, lo_lckowner);
1438	FREE((caddr_t)lop, M_NFSDLOCK);
1439}
1440
1441/*
1442 * This function frees an nfslockfile structure.
1443 */
1444static void
1445nfsrv_freenfslockfile(struct nfslockfile *lfp)
1446{
1447
1448	LIST_REMOVE(lfp, lf_hash);
1449	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1450}
1451
1452/*
1453 * This function looks up an nfsstate structure via stateid.
1454 */
1455static int
1456nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1457    struct nfsstate **stpp)
1458{
1459	struct nfsstate *stp;
1460	struct nfsstatehead *hp;
1461	int error = 0;
1462
1463	*stpp = NULL;
1464	hp = NFSSTATEHASH(clp, *stateidp);
1465	LIST_FOREACH(stp, hp, ls_hash) {
1466		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1467			NFSX_STATEIDOTHER))
1468			break;
1469	}
1470
1471	/*
1472	 * If no state id in list, return NFSERR_BADSTATEID.
1473	 */
1474	if (stp == LIST_END(hp)) {
1475		error = NFSERR_BADSTATEID;
1476		goto out;
1477	}
1478	*stpp = stp;
1479
1480out:
1481	NFSEXITCODE(error);
1482	return (error);
1483}
1484
1485/*
1486 * This function gets an nfsstate structure via owner string.
1487 */
1488static void
1489nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1490    struct nfsstate **stpp)
1491{
1492	struct nfsstate *stp;
1493
1494	*stpp = NULL;
1495	LIST_FOREACH(stp, hp, ls_list) {
1496		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1497		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1498			*stpp = stp;
1499			return;
1500		}
1501	}
1502}
1503
1504/*
1505 * Lock control function called to update lock status.
1506 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1507 * that one isn't to be created and an NFSERR_xxx for other errors.
1508 * The structures new_stp and new_lop are passed in as pointers that should
1509 * be set to NULL if the structure is used and shouldn't be free'd.
1510 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1511 * never used and can safely be allocated on the stack. For all other
1512 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1513 * in case they are used.
1514 */
1515APPLESTATIC int
1516nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1517    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1518    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1519    __unused struct nfsexstuff *exp,
1520    struct nfsrv_descript *nd, NFSPROC_T *p)
1521{
1522	struct nfslock *lop;
1523	struct nfsstate *new_stp = *new_stpp;
1524	struct nfslock *new_lop = *new_lopp;
1525	struct nfsstate *tstp, *mystp, *nstp;
1526	int specialid = 0;
1527	struct nfslockfile *lfp;
1528	struct nfslock *other_lop = NULL;
1529	struct nfsstate *stp, *lckstp = NULL;
1530	struct nfsclient *clp = NULL;
1531	u_int32_t bits;
1532	int error = 0, haslock = 0, ret, reterr;
1533	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1534	fhandle_t nfh;
1535	uint64_t first, end;
1536	uint32_t lock_flags;
1537
1538	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1539		/*
1540		 * Note the special cases of "all 1s" or "all 0s" stateids and
1541		 * let reads with all 1s go ahead.
1542		 */
1543		if (new_stp->ls_stateid.seqid == 0x0 &&
1544		    new_stp->ls_stateid.other[0] == 0x0 &&
1545		    new_stp->ls_stateid.other[1] == 0x0 &&
1546		    new_stp->ls_stateid.other[2] == 0x0)
1547			specialid = 1;
1548		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1549		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1550		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1551		    new_stp->ls_stateid.other[2] == 0xffffffff)
1552			specialid = 2;
1553	}
1554
1555	/*
1556	 * Check for restart conditions (client and server).
1557	 */
1558	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1559	    &new_stp->ls_stateid, specialid);
1560	if (error)
1561		goto out;
1562
1563	/*
1564	 * Check for state resource limit exceeded.
1565	 */
1566	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1567	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1568		error = NFSERR_RESOURCE;
1569		goto out;
1570	}
1571
1572	/*
1573	 * For the lock case, get another nfslock structure,
1574	 * just in case we need it.
1575	 * Malloc now, before we start sifting through the linked lists,
1576	 * in case we have to wait for memory.
1577	 */
1578tryagain:
1579	if (new_stp->ls_flags & NFSLCK_LOCK)
1580		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1581		    M_NFSDLOCK, M_WAITOK);
1582	filestruct_locked = 0;
1583	reterr = 0;
1584	lfp = NULL;
1585
1586	/*
1587	 * Get the lockfile structure for CFH now, so we can do a sanity
1588	 * check against the stateid, before incrementing the seqid#, since
1589	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1590	 * shouldn't be incremented for this case.
1591	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1592	 * will be handled later.
1593	 * If we are doing Lock/LockU and local locking is enabled, sleep
1594	 * lock the nfslockfile structure.
1595	 */
1596	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1597	NFSLOCKSTATE();
1598	if (getlckret == 0) {
1599		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1600		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1601			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1602			    &lfp, &nfh, 1);
1603			if (getlckret == 0)
1604				filestruct_locked = 1;
1605		} else
1606			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1607			    &lfp, &nfh, 0);
1608	}
1609	if (getlckret != 0 && getlckret != -1)
1610		reterr = getlckret;
1611
1612	if (filestruct_locked != 0) {
1613		LIST_INIT(&lfp->lf_rollback);
1614		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1615			/*
1616			 * For local locking, do the advisory locking now, so
1617			 * that any conflict can be detected. A failure later
1618			 * can be rolled back locally. If an error is returned,
1619			 * struct nfslockfile has been unlocked and any local
1620			 * locking rolled back.
1621			 */
1622			NFSUNLOCKSTATE();
1623			if (vnode_unlocked == 0) {
1624				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1625				vnode_unlocked = 1;
1626				NFSVOPUNLOCK(vp, 0);
1627			}
1628			reterr = nfsrv_locallock(vp, lfp,
1629			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1630			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1631			NFSLOCKSTATE();
1632		}
1633	}
1634
1635	if (specialid == 0) {
1636	    if (new_stp->ls_flags & NFSLCK_TEST) {
1637		/*
1638		 * RFC 3530 does not list LockT as an op that renews a
1639		 * lease, but the concensus seems to be that it is ok
1640		 * for a server to do so.
1641		 */
1642		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1643		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1644
1645		/*
1646		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1647		 * error returns for LockT, just go ahead and test for a lock,
1648		 * since there are no locks for this client, but other locks
1649		 * can conflict. (ie. same client will always be false)
1650		 */
1651		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1652		    error = 0;
1653		lckstp = new_stp;
1654	    } else {
1655	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1656		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1657	      if (error == 0)
1658		/*
1659		 * Look up the stateid
1660		 */
1661		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1662		  new_stp->ls_flags, &stp);
1663	      /*
1664	       * do some sanity checks for an unconfirmed open or a
1665	       * stateid that refers to the wrong file, for an open stateid
1666	       */
1667	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1668		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1669		   (getlckret == 0 && stp->ls_lfp != lfp)))
1670			error = NFSERR_BADSTATEID;
1671	      if (error == 0 &&
1672		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1673		  getlckret == 0 && stp->ls_lfp != lfp)
1674			error = NFSERR_BADSTATEID;
1675
1676	      /*
1677	       * If the lockowner stateid doesn't refer to the same file,
1678	       * I believe that is considered ok, since some clients will
1679	       * only create a single lockowner and use that for all locks
1680	       * on all files.
1681	       * For now, log it as a diagnostic, instead of considering it
1682	       * a BadStateid.
1683	       */
1684	      if (error == 0 && (stp->ls_flags &
1685		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1686		  getlckret == 0 && stp->ls_lfp != lfp) {
1687#ifdef DIAGNOSTIC
1688		  printf("Got a lock statid for different file open\n");
1689#endif
1690		  /*
1691		  error = NFSERR_BADSTATEID;
1692		  */
1693	      }
1694
1695	      if (error == 0) {
1696		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1697			/*
1698			 * If haslock set, we've already checked the seqid.
1699			 */
1700			if (!haslock) {
1701			    if (stp->ls_flags & NFSLCK_OPEN)
1702				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1703				    stp->ls_openowner, new_stp->ls_op);
1704			    else
1705				error = NFSERR_BADSTATEID;
1706			}
1707			if (!error)
1708			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1709			if (lckstp)
1710			    /*
1711			     * I believe this should be an error, but it
1712			     * isn't obvious what NFSERR_xxx would be
1713			     * appropriate, so I'll use NFSERR_INVAL for now.
1714			     */
1715			    error = NFSERR_INVAL;
1716			else
1717			    lckstp = new_stp;
1718		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1719			/*
1720			 * If haslock set, ditto above.
1721			 */
1722			if (!haslock) {
1723			    if (stp->ls_flags & NFSLCK_OPEN)
1724				error = NFSERR_BADSTATEID;
1725			    else
1726				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1727				    stp, new_stp->ls_op);
1728			}
1729			lckstp = stp;
1730		    } else {
1731			lckstp = stp;
1732		    }
1733	      }
1734	      /*
1735	       * If the seqid part of the stateid isn't the same, return
1736	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1737	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1738	       * nfsrv_returnoldstateid is set. (The concensus on the email
1739	       * list was that most clients would prefer to not receive
1740	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1741	       * is what will happen, so I use the nfsrv_returnoldstateid to
1742	       * allow for either server configuration.)
1743	       */
1744	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1745		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1746		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1747		    nfsrv_returnoldstateid)) ||
1748		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1749		    new_stp->ls_stateid.seqid != 0)))
1750		    error = NFSERR_OLDSTATEID;
1751	    }
1752	}
1753
1754	/*
1755	 * Now we can check for grace.
1756	 */
1757	if (!error)
1758		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1759	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1760		nfsrv_checkstable(clp))
1761		error = NFSERR_NOGRACE;
1762	/*
1763	 * If we successfully Reclaimed state, note that.
1764	 */
1765	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1766		nfsrv_markstable(clp);
1767
1768	/*
1769	 * At this point, either error == NFSERR_BADSTATEID or the
1770	 * seqid# has been updated, so we can return any error.
1771	 * If error == 0, there may be an error in:
1772	 *    nd_repstat - Set by the calling function.
1773	 *    reterr - Set above, if getting the nfslockfile structure
1774	 *       or acquiring the local lock failed.
1775	 *    (If both of these are set, nd_repstat should probably be
1776	 *     returned, since that error was detected before this
1777	 *     function call.)
1778	 */
1779	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1780		if (error == 0) {
1781			if (nd->nd_repstat != 0)
1782				error = nd->nd_repstat;
1783			else
1784				error = reterr;
1785		}
1786		if (filestruct_locked != 0) {
1787			/* Roll back local locks. */
1788			NFSUNLOCKSTATE();
1789			if (vnode_unlocked == 0) {
1790				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1791				vnode_unlocked = 1;
1792				NFSVOPUNLOCK(vp, 0);
1793			}
1794			nfsrv_locallock_rollback(vp, lfp, p);
1795			NFSLOCKSTATE();
1796			nfsrv_unlocklf(lfp);
1797		}
1798		NFSUNLOCKSTATE();
1799		goto out;
1800	}
1801
1802	/*
1803	 * Check the nfsrv_getlockfile return.
1804	 * Returned -1 if no structure found.
1805	 */
1806	if (getlckret == -1) {
1807		error = NFSERR_EXPIRED;
1808		/*
1809		 * Called from lockt, so no lock is OK.
1810		 */
1811		if (new_stp->ls_flags & NFSLCK_TEST) {
1812			error = 0;
1813		} else if (new_stp->ls_flags &
1814		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1815			/*
1816			 * Called to check for a lock, OK if the stateid is all
1817			 * 1s or all 0s, but there should be an nfsstate
1818			 * otherwise.
1819			 * (ie. If there is no open, I'll assume no share
1820			 *  deny bits.)
1821			 */
1822			if (specialid)
1823				error = 0;
1824			else
1825				error = NFSERR_BADSTATEID;
1826		}
1827		NFSUNLOCKSTATE();
1828		goto out;
1829	}
1830
1831	/*
1832	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1833	 * For NFSLCK_CHECK, allow a read if write access is granted,
1834	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1835	 * which implies a conflicting deny can't exist.
1836	 */
1837	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1838	    /*
1839	     * Four kinds of state id:
1840	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1841	     * - stateid for an open
1842	     * - stateid for a delegation
1843	     * - stateid for a lock owner
1844	     */
1845	    if (!specialid) {
1846		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1847		    delegation = 1;
1848		    mystp = stp;
1849		    nfsrv_delaydelegtimeout(stp);
1850	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1851		    mystp = stp;
1852		} else {
1853		    mystp = stp->ls_openstp;
1854		}
1855		/*
1856		 * If locking or checking, require correct access
1857		 * bit set.
1858		 */
1859		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1860		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1861		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1862		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1863		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1864		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1865		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1866		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1867		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1868			if (filestruct_locked != 0) {
1869				/* Roll back local locks. */
1870				NFSUNLOCKSTATE();
1871				if (vnode_unlocked == 0) {
1872					ASSERT_VOP_ELOCKED(vp,
1873					    "nfsrv_lockctrl3");
1874					vnode_unlocked = 1;
1875					NFSVOPUNLOCK(vp, 0);
1876				}
1877				nfsrv_locallock_rollback(vp, lfp, p);
1878				NFSLOCKSTATE();
1879				nfsrv_unlocklf(lfp);
1880			}
1881			NFSUNLOCKSTATE();
1882			error = NFSERR_OPENMODE;
1883			goto out;
1884		}
1885	    } else
1886		mystp = NULL;
1887	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1888		/*
1889		 * Check for a conflicting deny bit.
1890		 */
1891		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1892		    if (tstp != mystp) {
1893			bits = tstp->ls_flags;
1894			bits >>= NFSLCK_SHIFT;
1895			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1896			    KASSERT(vnode_unlocked == 0,
1897				("nfsrv_lockctrl: vnode unlocked1"));
1898			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1899				vp, p);
1900			    if (ret == 1) {
1901				/*
1902				* nfsrv_clientconflict unlocks state
1903				 * when it returns non-zero.
1904				 */
1905				lckstp = NULL;
1906				goto tryagain;
1907			    }
1908			    if (ret == 0)
1909				NFSUNLOCKSTATE();
1910			    if (ret == 2)
1911				error = NFSERR_PERM;
1912			    else
1913				error = NFSERR_OPENMODE;
1914			    goto out;
1915			}
1916		    }
1917		}
1918
1919		/* We're outta here */
1920		NFSUNLOCKSTATE();
1921		goto out;
1922	    }
1923	}
1924
1925	/*
1926	 * For setattr, just get rid of all the Delegations for other clients.
1927	 */
1928	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1929		KASSERT(vnode_unlocked == 0,
1930		    ("nfsrv_lockctrl: vnode unlocked2"));
1931		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1932		if (ret) {
1933			/*
1934			 * nfsrv_cleandeleg() unlocks state when it
1935			 * returns non-zero.
1936			 */
1937			if (ret == -1) {
1938				lckstp = NULL;
1939				goto tryagain;
1940			}
1941			error = ret;
1942			goto out;
1943		}
1944		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1945		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1946		     LIST_EMPTY(&lfp->lf_deleg))) {
1947			NFSUNLOCKSTATE();
1948			goto out;
1949		}
1950	}
1951
1952	/*
1953	 * Check for a conflicting delegation. If one is found, call
1954	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1955	 * been set yet, it will get the lock. Otherwise, it will recall
1956	 * the delegation. Then, we try try again...
1957	 * I currently believe the conflict algorithm to be:
1958	 * For Lock Ops (Lock/LockT/LockU)
1959	 * - there is a conflict iff a different client has a write delegation
1960	 * For Reading (Read Op)
1961	 * - there is a conflict iff a different client has a write delegation
1962	 *   (the specialids are always a different client)
1963	 * For Writing (Write/Setattr of size)
1964	 * - there is a conflict if a different client has any delegation
1965	 * - there is a conflict if the same client has a read delegation
1966	 *   (I don't understand why this isn't allowed, but that seems to be
1967	 *    the current concensus?)
1968	 */
1969	tstp = LIST_FIRST(&lfp->lf_deleg);
1970	while (tstp != LIST_END(&lfp->lf_deleg)) {
1971	    nstp = LIST_NEXT(tstp, ls_file);
1972	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1973		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1974		  (new_lop->lo_flags & NFSLCK_READ))) &&
1975		  clp != tstp->ls_clp &&
1976		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1977		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1978		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1979		  (clp != tstp->ls_clp ||
1980		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1981		ret = 0;
1982		if (filestruct_locked != 0) {
1983			/* Roll back local locks. */
1984			NFSUNLOCKSTATE();
1985			if (vnode_unlocked == 0) {
1986				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
1987				NFSVOPUNLOCK(vp, 0);
1988			}
1989			nfsrv_locallock_rollback(vp, lfp, p);
1990			NFSLOCKSTATE();
1991			nfsrv_unlocklf(lfp);
1992			NFSUNLOCKSTATE();
1993			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1994			vnode_unlocked = 0;
1995			if ((vp->v_iflag & VI_DOOMED) != 0)
1996				ret = NFSERR_SERVERFAULT;
1997			NFSLOCKSTATE();
1998		}
1999		if (ret == 0)
2000			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2001		if (ret) {
2002		    /*
2003		     * nfsrv_delegconflict unlocks state when it
2004		     * returns non-zero, which it always does.
2005		     */
2006		    if (other_lop) {
2007			FREE((caddr_t)other_lop, M_NFSDLOCK);
2008			other_lop = NULL;
2009		    }
2010		    if (ret == -1) {
2011			lckstp = NULL;
2012			goto tryagain;
2013		    }
2014		    error = ret;
2015		    goto out;
2016		}
2017		/* Never gets here. */
2018	    }
2019	    tstp = nstp;
2020	}
2021
2022	/*
2023	 * Handle the unlock case by calling nfsrv_updatelock().
2024	 * (Should I have done some access checking above for unlock? For now,
2025	 *  just let it happen.)
2026	 */
2027	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2028		first = new_lop->lo_first;
2029		end = new_lop->lo_end;
2030		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2031		stateidp->seqid = ++(stp->ls_stateid.seqid);
2032		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2033			stateidp->seqid = stp->ls_stateid.seqid = 1;
2034		stateidp->other[0] = stp->ls_stateid.other[0];
2035		stateidp->other[1] = stp->ls_stateid.other[1];
2036		stateidp->other[2] = stp->ls_stateid.other[2];
2037		if (filestruct_locked != 0) {
2038			NFSUNLOCKSTATE();
2039			if (vnode_unlocked == 0) {
2040				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2041				vnode_unlocked = 1;
2042				NFSVOPUNLOCK(vp, 0);
2043			}
2044			/* Update the local locks. */
2045			nfsrv_localunlock(vp, lfp, first, end, p);
2046			NFSLOCKSTATE();
2047			nfsrv_unlocklf(lfp);
2048		}
2049		NFSUNLOCKSTATE();
2050		goto out;
2051	}
2052
2053	/*
2054	 * Search for a conflicting lock. A lock conflicts if:
2055	 * - the lock range overlaps and
2056	 * - at least one lock is a write lock and
2057	 * - it is not owned by the same lock owner
2058	 */
2059	if (!delegation) {
2060	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2061	    if (new_lop->lo_end > lop->lo_first &&
2062		new_lop->lo_first < lop->lo_end &&
2063		(new_lop->lo_flags == NFSLCK_WRITE ||
2064		 lop->lo_flags == NFSLCK_WRITE) &&
2065		lckstp != lop->lo_stp &&
2066		(clp != lop->lo_stp->ls_clp ||
2067		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2068		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2069		    lckstp->ls_ownerlen))) {
2070		if (other_lop) {
2071		    FREE((caddr_t)other_lop, M_NFSDLOCK);
2072		    other_lop = NULL;
2073		}
2074		if (vnode_unlocked != 0)
2075		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2076			NULL, p);
2077		else
2078		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2079			vp, p);
2080		if (ret == 1) {
2081		    if (filestruct_locked != 0) {
2082			if (vnode_unlocked == 0) {
2083				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2084				NFSVOPUNLOCK(vp, 0);
2085			}
2086			/* Roll back local locks. */
2087			nfsrv_locallock_rollback(vp, lfp, p);
2088			NFSLOCKSTATE();
2089			nfsrv_unlocklf(lfp);
2090			NFSUNLOCKSTATE();
2091			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2092			vnode_unlocked = 0;
2093			if ((vp->v_iflag & VI_DOOMED) != 0) {
2094				error = NFSERR_SERVERFAULT;
2095				goto out;
2096			}
2097		    }
2098		    /*
2099		     * nfsrv_clientconflict() unlocks state when it
2100		     * returns non-zero.
2101		     */
2102		    lckstp = NULL;
2103		    goto tryagain;
2104		}
2105		/*
2106		 * Found a conflicting lock, so record the conflict and
2107		 * return the error.
2108		 */
2109		if (cfp != NULL && ret == 0) {
2110		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2111		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2112		    cfp->cl_first = lop->lo_first;
2113		    cfp->cl_end = lop->lo_end;
2114		    cfp->cl_flags = lop->lo_flags;
2115		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2116		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2117			cfp->cl_ownerlen);
2118		}
2119		if (ret == 2)
2120		    error = NFSERR_PERM;
2121		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2122		    error = NFSERR_RECLAIMCONFLICT;
2123		else if (new_stp->ls_flags & NFSLCK_CHECK)
2124		    error = NFSERR_LOCKED;
2125		else
2126		    error = NFSERR_DENIED;
2127		if (filestruct_locked != 0 && ret == 0) {
2128			/* Roll back local locks. */
2129			NFSUNLOCKSTATE();
2130			if (vnode_unlocked == 0) {
2131				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2132				vnode_unlocked = 1;
2133				NFSVOPUNLOCK(vp, 0);
2134			}
2135			nfsrv_locallock_rollback(vp, lfp, p);
2136			NFSLOCKSTATE();
2137			nfsrv_unlocklf(lfp);
2138		}
2139		if (ret == 0)
2140			NFSUNLOCKSTATE();
2141		goto out;
2142	    }
2143	  }
2144	}
2145
2146	/*
2147	 * We only get here if there was no lock that conflicted.
2148	 */
2149	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2150		NFSUNLOCKSTATE();
2151		goto out;
2152	}
2153
2154	/*
2155	 * We only get here when we are creating or modifying a lock.
2156	 * There are two variants:
2157	 * - exist_lock_owner where lock_owner exists
2158	 * - open_to_lock_owner with new lock_owner
2159	 */
2160	first = new_lop->lo_first;
2161	end = new_lop->lo_end;
2162	lock_flags = new_lop->lo_flags;
2163	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2164		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2165		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2166		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2167			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2168		stateidp->other[0] = lckstp->ls_stateid.other[0];
2169		stateidp->other[1] = lckstp->ls_stateid.other[1];
2170		stateidp->other[2] = lckstp->ls_stateid.other[2];
2171	} else {
2172		/*
2173		 * The new open_to_lock_owner case.
2174		 * Link the new nfsstate into the lists.
2175		 */
2176		new_stp->ls_seq = new_stp->ls_opentolockseq;
2177		nfsrvd_refcache(new_stp->ls_op);
2178		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2179		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2180		    clp->lc_clientid.lval[0];
2181		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2182		    clp->lc_clientid.lval[1];
2183		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2184		    nfsrv_nextstateindex(clp);
2185		new_stp->ls_clp = clp;
2186		LIST_INIT(&new_stp->ls_lock);
2187		new_stp->ls_openstp = stp;
2188		new_stp->ls_lfp = lfp;
2189		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2190		    lfp);
2191		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2192		    new_stp, ls_hash);
2193		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2194		*new_lopp = NULL;
2195		*new_stpp = NULL;
2196		newnfsstats.srvlockowners++;
2197		nfsrv_openpluslock++;
2198	}
2199	if (filestruct_locked != 0) {
2200		NFSUNLOCKSTATE();
2201		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2202		NFSLOCKSTATE();
2203		nfsrv_unlocklf(lfp);
2204	}
2205	NFSUNLOCKSTATE();
2206
2207out:
2208	if (haslock) {
2209		NFSLOCKV4ROOTMUTEX();
2210		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2211		NFSUNLOCKV4ROOTMUTEX();
2212	}
2213	if (vnode_unlocked != 0) {
2214		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2215		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2216			error = NFSERR_SERVERFAULT;
2217	}
2218	if (other_lop)
2219		FREE((caddr_t)other_lop, M_NFSDLOCK);
2220	NFSEXITCODE2(error, nd);
2221	return (error);
2222}
2223
2224/*
2225 * Check for state errors for Open.
2226 * repstat is passed back out as an error if more critical errors
2227 * are not detected.
2228 */
2229APPLESTATIC int
2230nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2231    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2232    NFSPROC_T *p, int repstat)
2233{
2234	struct nfsstate *stp, *nstp;
2235	struct nfsclient *clp;
2236	struct nfsstate *ownerstp;
2237	struct nfslockfile *lfp, *new_lfp;
2238	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2239
2240	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2241		readonly = 1;
2242	/*
2243	 * Check for restart conditions (client and server).
2244	 */
2245	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2246		&new_stp->ls_stateid, 0);
2247	if (error)
2248		goto out;
2249
2250	/*
2251	 * Check for state resource limit exceeded.
2252	 * Technically this should be SMP protected, but the worst
2253	 * case error is "out by one or two" on the count when it
2254	 * returns NFSERR_RESOURCE and the limit is just a rather
2255	 * arbitrary high water mark, so no harm is done.
2256	 */
2257	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2258		error = NFSERR_RESOURCE;
2259		goto out;
2260	}
2261
2262tryagain:
2263	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2264	    M_NFSDLOCKFILE, M_WAITOK);
2265	if (vp)
2266		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2267		    NULL, p);
2268	NFSLOCKSTATE();
2269	/*
2270	 * Get the nfsclient structure.
2271	 */
2272	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2273	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2274
2275	/*
2276	 * Look up the open owner. See if it needs confirmation and
2277	 * check the seq#, as required.
2278	 */
2279	if (!error)
2280		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2281
2282	if (!error && ownerstp) {
2283		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2284		    new_stp->ls_op);
2285		/*
2286		 * If the OpenOwner hasn't been confirmed, assume the
2287		 * old one was a replay and this one is ok.
2288		 * See: RFC3530 Sec. 14.2.18.
2289		 */
2290		if (error == NFSERR_BADSEQID &&
2291		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2292			error = 0;
2293	}
2294
2295	/*
2296	 * Check for grace.
2297	 */
2298	if (!error)
2299		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2300	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2301		nfsrv_checkstable(clp))
2302		error = NFSERR_NOGRACE;
2303
2304	/*
2305	 * If none of the above errors occurred, let repstat be
2306	 * returned.
2307	 */
2308	if (repstat && !error)
2309		error = repstat;
2310	if (error) {
2311		NFSUNLOCKSTATE();
2312		if (haslock) {
2313			NFSLOCKV4ROOTMUTEX();
2314			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2315			NFSUNLOCKV4ROOTMUTEX();
2316		}
2317		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2318		goto out;
2319	}
2320
2321	/*
2322	 * If vp == NULL, the file doesn't exist yet, so return ok.
2323	 * (This always happens on the first pass, so haslock must be 0.)
2324	 */
2325	if (vp == NULL) {
2326		NFSUNLOCKSTATE();
2327		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2328		goto out;
2329	}
2330
2331	/*
2332	 * Get the structure for the underlying file.
2333	 */
2334	if (getfhret)
2335		error = getfhret;
2336	else
2337		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2338		    NULL, 0);
2339	if (new_lfp)
2340		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2341	if (error) {
2342		NFSUNLOCKSTATE();
2343		if (haslock) {
2344			NFSLOCKV4ROOTMUTEX();
2345			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2346			NFSUNLOCKV4ROOTMUTEX();
2347		}
2348		goto out;
2349	}
2350
2351	/*
2352	 * Search for a conflicting open/share.
2353	 */
2354	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2355	    /*
2356	     * For Delegate_Cur, search for the matching Delegation,
2357	     * which indicates no conflict.
2358	     * An old delegation should have been recovered by the
2359	     * client doing a Claim_DELEGATE_Prev, so I won't let
2360	     * it match and return NFSERR_EXPIRED. Should I let it
2361	     * match?
2362	     */
2363	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2364		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2365		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2366		    stateidp->seqid == 0) ||
2367		    stateidp->seqid == stp->ls_stateid.seqid) &&
2368		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2369			  NFSX_STATEIDOTHER))
2370			break;
2371	    }
2372	    if (stp == LIST_END(&lfp->lf_deleg) ||
2373		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2374		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2375		NFSUNLOCKSTATE();
2376		if (haslock) {
2377			NFSLOCKV4ROOTMUTEX();
2378			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2379			NFSUNLOCKV4ROOTMUTEX();
2380		}
2381		error = NFSERR_EXPIRED;
2382		goto out;
2383	    }
2384	}
2385
2386	/*
2387	 * Check for access/deny bit conflicts. I check for the same
2388	 * owner as well, in case the client didn't bother.
2389	 */
2390	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2391		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2392		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2393		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2394		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2395		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2396			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2397			if (ret == 1) {
2398				/*
2399				 * nfsrv_clientconflict() unlocks
2400				 * state when it returns non-zero.
2401				 */
2402				goto tryagain;
2403			}
2404			if (ret == 2)
2405				error = NFSERR_PERM;
2406			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2407				error = NFSERR_RECLAIMCONFLICT;
2408			else
2409				error = NFSERR_SHAREDENIED;
2410			if (ret == 0)
2411				NFSUNLOCKSTATE();
2412			if (haslock) {
2413				NFSLOCKV4ROOTMUTEX();
2414				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2415				NFSUNLOCKV4ROOTMUTEX();
2416			}
2417			goto out;
2418		}
2419	}
2420
2421	/*
2422	 * Check for a conflicting delegation. If one is found, call
2423	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2424	 * been set yet, it will get the lock. Otherwise, it will recall
2425	 * the delegation. Then, we try try again...
2426	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2427	 *  isn't a conflict.)
2428	 * I currently believe the conflict algorithm to be:
2429	 * For Open with Read Access and Deny None
2430	 * - there is a conflict iff a different client has a write delegation
2431	 * For Open with other Write Access or any Deny except None
2432	 * - there is a conflict if a different client has any delegation
2433	 * - there is a conflict if the same client has a read delegation
2434	 *   (The current concensus is that this last case should be
2435	 *    considered a conflict since the client with a read delegation
2436	 *    could have done an Open with ReadAccess and WriteDeny
2437	 *    locally and then not have checked for the WriteDeny.)
2438	 * Don't check for a Reclaim, since that will be dealt with
2439	 * by nfsrv_openctrl().
2440	 */
2441	if (!(new_stp->ls_flags &
2442		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2443	    stp = LIST_FIRST(&lfp->lf_deleg);
2444	    while (stp != LIST_END(&lfp->lf_deleg)) {
2445		nstp = LIST_NEXT(stp, ls_file);
2446		if ((readonly && stp->ls_clp != clp &&
2447		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2448		    (!readonly && (stp->ls_clp != clp ||
2449		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2450			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2451			if (ret) {
2452			    /*
2453			     * nfsrv_delegconflict() unlocks state
2454			     * when it returns non-zero.
2455			     */
2456			    if (ret == -1)
2457				goto tryagain;
2458			    error = ret;
2459			    goto out;
2460			}
2461		}
2462		stp = nstp;
2463	    }
2464	}
2465	NFSUNLOCKSTATE();
2466	if (haslock) {
2467		NFSLOCKV4ROOTMUTEX();
2468		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2469		NFSUNLOCKV4ROOTMUTEX();
2470	}
2471
2472out:
2473	NFSEXITCODE2(error, nd);
2474	return (error);
2475}
2476
2477/*
2478 * Open control function to create/update open state for an open.
2479 */
2480APPLESTATIC int
2481nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2482    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2483    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2484    NFSPROC_T *p, u_quad_t filerev)
2485{
2486	struct nfsstate *new_stp = *new_stpp;
2487	struct nfsstate *stp, *nstp;
2488	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2489	struct nfslockfile *lfp, *new_lfp;
2490	struct nfsclient *clp;
2491	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2492	int readonly = 0, cbret = 1, getfhret = 0;
2493
2494	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2495		readonly = 1;
2496	/*
2497	 * Check for restart conditions (client and server).
2498	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2499	 * If an error does show up, return NFSERR_EXPIRED, since the
2500	 * the seqid# has already been incremented.
2501	 */
2502	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2503	    &new_stp->ls_stateid, 0);
2504	if (error) {
2505		printf("Nfsd: openctrl unexpected restart err=%d\n",
2506		    error);
2507		error = NFSERR_EXPIRED;
2508		goto out;
2509	}
2510
2511tryagain:
2512	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2513	    M_NFSDLOCKFILE, M_WAITOK);
2514	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2515	    M_NFSDSTATE, M_WAITOK);
2516	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2517	    M_NFSDSTATE, M_WAITOK);
2518	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2519	    NULL, p);
2520	NFSLOCKSTATE();
2521	/*
2522	 * Get the client structure. Since the linked lists could be changed
2523	 * by other nfsd processes if this process does a tsleep(), one of
2524	 * two things must be done.
2525	 * 1 - don't tsleep()
2526	 * or
2527	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2528	 *     before using the lists, since this lock stops the other
2529	 *     nfsd. This should only be used for rare cases, since it
2530	 *     essentially single threads the nfsd.
2531	 *     At this time, it is only done for cases where the stable
2532	 *     storage file must be written prior to completion of state
2533	 *     expiration.
2534	 */
2535	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2536	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2537	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2538	    clp->lc_program) {
2539		/*
2540		 * This happens on the first open for a client
2541		 * that supports callbacks.
2542		 */
2543		NFSUNLOCKSTATE();
2544		/*
2545		 * Although nfsrv_docallback() will sleep, clp won't
2546		 * go away, since they are only removed when the
2547		 * nfsv4_lock() has blocked the nfsd threads. The
2548		 * fields in clp can change, but having multiple
2549		 * threads do this Null callback RPC should be
2550		 * harmless.
2551		 */
2552		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2553		    NULL, 0, NULL, NULL, NULL, p);
2554		NFSLOCKSTATE();
2555		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2556		if (!cbret)
2557			clp->lc_flags |= LCL_CALLBACKSON;
2558	}
2559
2560	/*
2561	 * Look up the open owner. See if it needs confirmation and
2562	 * check the seq#, as required.
2563	 */
2564	if (!error)
2565		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2566
2567	if (error) {
2568		NFSUNLOCKSTATE();
2569		printf("Nfsd: openctrl unexpected state err=%d\n",
2570			error);
2571		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2572		free((caddr_t)new_open, M_NFSDSTATE);
2573		free((caddr_t)new_deleg, M_NFSDSTATE);
2574		if (haslock) {
2575			NFSLOCKV4ROOTMUTEX();
2576			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2577			NFSUNLOCKV4ROOTMUTEX();
2578		}
2579		error = NFSERR_EXPIRED;
2580		goto out;
2581	}
2582
2583	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2584		nfsrv_markstable(clp);
2585
2586	/*
2587	 * Get the structure for the underlying file.
2588	 */
2589	if (getfhret)
2590		error = getfhret;
2591	else
2592		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2593		    NULL, 0);
2594	if (new_lfp)
2595		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2596	if (error) {
2597		NFSUNLOCKSTATE();
2598		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2599		    error);
2600		free((caddr_t)new_open, M_NFSDSTATE);
2601		free((caddr_t)new_deleg, M_NFSDSTATE);
2602		if (haslock) {
2603			NFSLOCKV4ROOTMUTEX();
2604			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2605			NFSUNLOCKV4ROOTMUTEX();
2606		}
2607		goto out;
2608	}
2609
2610	/*
2611	 * Search for a conflicting open/share.
2612	 */
2613	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2614	    /*
2615	     * For Delegate_Cur, search for the matching Delegation,
2616	     * which indicates no conflict.
2617	     * An old delegation should have been recovered by the
2618	     * client doing a Claim_DELEGATE_Prev, so I won't let
2619	     * it match and return NFSERR_EXPIRED. Should I let it
2620	     * match?
2621	     */
2622	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2623		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2624		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2625		    stateidp->seqid == 0) ||
2626		    stateidp->seqid == stp->ls_stateid.seqid) &&
2627		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2628			NFSX_STATEIDOTHER))
2629			break;
2630	    }
2631	    if (stp == LIST_END(&lfp->lf_deleg) ||
2632		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2633		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2634		NFSUNLOCKSTATE();
2635		printf("Nfsd openctrl unexpected expiry\n");
2636		free((caddr_t)new_open, M_NFSDSTATE);
2637		free((caddr_t)new_deleg, M_NFSDSTATE);
2638		if (haslock) {
2639			NFSLOCKV4ROOTMUTEX();
2640			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2641			NFSUNLOCKV4ROOTMUTEX();
2642		}
2643		error = NFSERR_EXPIRED;
2644		goto out;
2645	    }
2646
2647	    /*
2648	     * Don't issue a Delegation, since one already exists and
2649	     * delay delegation timeout, as required.
2650	     */
2651	    delegate = 0;
2652	    nfsrv_delaydelegtimeout(stp);
2653	}
2654
2655	/*
2656	 * Check for access/deny bit conflicts. I also check for the
2657	 * same owner, since the client might not have bothered to check.
2658	 * Also, note an open for the same file and owner, if found,
2659	 * which is all we do here for Delegate_Cur, since conflict
2660	 * checking is already done.
2661	 */
2662	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2663		if (ownerstp && stp->ls_openowner == ownerstp)
2664			openstp = stp;
2665		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2666		    /*
2667		     * If another client has the file open, the only
2668		     * delegation that can be issued is a Read delegation
2669		     * and only if it is a Read open with Deny none.
2670		     */
2671		    if (clp != stp->ls_clp) {
2672			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2673			    NFSLCK_READACCESS)
2674			    writedeleg = 0;
2675			else
2676			    delegate = 0;
2677		    }
2678		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2679		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2680		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2681		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2682			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2683			if (ret == 1) {
2684				/*
2685				 * nfsrv_clientconflict() unlocks state
2686				 * when it returns non-zero.
2687				 */
2688				free((caddr_t)new_open, M_NFSDSTATE);
2689				free((caddr_t)new_deleg, M_NFSDSTATE);
2690				openstp = NULL;
2691				goto tryagain;
2692			}
2693			if (ret == 2)
2694				error = NFSERR_PERM;
2695			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2696				error = NFSERR_RECLAIMCONFLICT;
2697			else
2698				error = NFSERR_SHAREDENIED;
2699			if (ret == 0)
2700				NFSUNLOCKSTATE();
2701			if (haslock) {
2702				NFSLOCKV4ROOTMUTEX();
2703				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2704				NFSUNLOCKV4ROOTMUTEX();
2705			}
2706			free((caddr_t)new_open, M_NFSDSTATE);
2707			free((caddr_t)new_deleg, M_NFSDSTATE);
2708			printf("nfsd openctrl unexpected client cnfl\n");
2709			goto out;
2710		    }
2711		}
2712	}
2713
2714	/*
2715	 * Check for a conflicting delegation. If one is found, call
2716	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2717	 * been set yet, it will get the lock. Otherwise, it will recall
2718	 * the delegation. Then, we try try again...
2719	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2720	 *  isn't a conflict.)
2721	 * I currently believe the conflict algorithm to be:
2722	 * For Open with Read Access and Deny None
2723	 * - there is a conflict iff a different client has a write delegation
2724	 * For Open with other Write Access or any Deny except None
2725	 * - there is a conflict if a different client has any delegation
2726	 * - there is a conflict if the same client has a read delegation
2727	 *   (The current concensus is that this last case should be
2728	 *    considered a conflict since the client with a read delegation
2729	 *    could have done an Open with ReadAccess and WriteDeny
2730	 *    locally and then not have checked for the WriteDeny.)
2731	 */
2732	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2733	    stp = LIST_FIRST(&lfp->lf_deleg);
2734	    while (stp != LIST_END(&lfp->lf_deleg)) {
2735		nstp = LIST_NEXT(stp, ls_file);
2736		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2737			writedeleg = 0;
2738		else
2739			delegate = 0;
2740		if ((readonly && stp->ls_clp != clp &&
2741		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2742		    (!readonly && (stp->ls_clp != clp ||
2743		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2744		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2745			delegate = 2;
2746		    } else {
2747			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2748			if (ret) {
2749			    /*
2750			     * nfsrv_delegconflict() unlocks state
2751			     * when it returns non-zero.
2752			     */
2753			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2754			    free((caddr_t)new_open, M_NFSDSTATE);
2755			    free((caddr_t)new_deleg, M_NFSDSTATE);
2756			    if (ret == -1) {
2757				openstp = NULL;
2758				goto tryagain;
2759			    }
2760			    error = ret;
2761			    goto out;
2762			}
2763		    }
2764		}
2765		stp = nstp;
2766	    }
2767	}
2768
2769	/*
2770	 * We only get here if there was no open that conflicted.
2771	 * If an open for the owner exists, or in the access/deny bits.
2772	 * Otherwise it is a new open. If the open_owner hasn't been
2773	 * confirmed, replace the open with the new one needing confirmation,
2774	 * otherwise add the open.
2775	 */
2776	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2777	    /*
2778	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2779	     * a match. If found, just move the old delegation to the current
2780	     * delegation list and issue open. If not found, return
2781	     * NFSERR_EXPIRED.
2782	     */
2783	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2784		if (stp->ls_lfp == lfp) {
2785		    /* Found it */
2786		    if (stp->ls_clp != clp)
2787			panic("olddeleg clp");
2788		    LIST_REMOVE(stp, ls_list);
2789		    LIST_REMOVE(stp, ls_hash);
2790		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2791		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2792		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2793			clp->lc_clientid.lval[0];
2794		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2795			clp->lc_clientid.lval[1];
2796		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2797			nfsrv_nextstateindex(clp);
2798		    stp->ls_compref = nd->nd_compref;
2799		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2800		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2801			stp->ls_stateid), stp, ls_hash);
2802		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2803			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2804		    else
2805			*rflagsp |= NFSV4OPEN_READDELEGATE;
2806		    clp->lc_delegtime = NFSD_MONOSEC +
2807			nfsrv_lease + NFSRV_LEASEDELTA;
2808
2809		    /*
2810		     * Now, do the associated open.
2811		     */
2812		    new_open->ls_stateid.seqid = 1;
2813		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2814		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2815		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2816		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2817			NFSLCK_OPEN;
2818		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2819			new_open->ls_flags |= (NFSLCK_READACCESS |
2820			    NFSLCK_WRITEACCESS);
2821		    else
2822			new_open->ls_flags |= NFSLCK_READACCESS;
2823		    new_open->ls_uid = new_stp->ls_uid;
2824		    new_open->ls_lfp = lfp;
2825		    new_open->ls_clp = clp;
2826		    LIST_INIT(&new_open->ls_open);
2827		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2828		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2829			new_open, ls_hash);
2830		    /*
2831		     * and handle the open owner
2832		     */
2833		    if (ownerstp) {
2834			new_open->ls_openowner = ownerstp;
2835			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2836		    } else {
2837			new_open->ls_openowner = new_stp;
2838			new_stp->ls_flags = 0;
2839			nfsrvd_refcache(new_stp->ls_op);
2840			new_stp->ls_noopens = 0;
2841			LIST_INIT(&new_stp->ls_open);
2842			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2843			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2844			*new_stpp = NULL;
2845			newnfsstats.srvopenowners++;
2846			nfsrv_openpluslock++;
2847		    }
2848		    openstp = new_open;
2849		    new_open = NULL;
2850		    newnfsstats.srvopens++;
2851		    nfsrv_openpluslock++;
2852		    break;
2853		}
2854	    }
2855	    if (stp == LIST_END(&clp->lc_olddeleg))
2856		error = NFSERR_EXPIRED;
2857	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2858	    /*
2859	     * Scan to see that no delegation for this client and file
2860	     * doesn't already exist.
2861	     * There also shouldn't yet be an Open for this file and
2862	     * openowner.
2863	     */
2864	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2865		if (stp->ls_clp == clp)
2866		    break;
2867	    }
2868	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2869		/*
2870		 * This is the Claim_Previous case with a delegation
2871		 * type != Delegate_None.
2872		 */
2873		/*
2874		 * First, add the delegation. (Although we must issue the
2875		 * delegation, we can also ask for an immediate return.)
2876		 */
2877		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2878		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2879		    clp->lc_clientid.lval[0];
2880		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2881		    clp->lc_clientid.lval[1];
2882		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2883		    nfsrv_nextstateindex(clp);
2884		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2885		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2886			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2887		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2888		} else {
2889		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2890			NFSLCK_READACCESS);
2891		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2892		}
2893		new_deleg->ls_uid = new_stp->ls_uid;
2894		new_deleg->ls_lfp = lfp;
2895		new_deleg->ls_clp = clp;
2896		new_deleg->ls_filerev = filerev;
2897		new_deleg->ls_compref = nd->nd_compref;
2898		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2899		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2900		    new_deleg->ls_stateid), new_deleg, ls_hash);
2901		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2902		new_deleg = NULL;
2903		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2904		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2905		     LCL_CALLBACKSON ||
2906		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2907		    !NFSVNO_DELEGOK(vp))
2908		    *rflagsp |= NFSV4OPEN_RECALL;
2909		newnfsstats.srvdelegates++;
2910		nfsrv_openpluslock++;
2911		nfsrv_delegatecnt++;
2912
2913		/*
2914		 * Now, do the associated open.
2915		 */
2916		new_open->ls_stateid.seqid = 1;
2917		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2918		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2919		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2920		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2921		    NFSLCK_OPEN;
2922		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2923			new_open->ls_flags |= (NFSLCK_READACCESS |
2924			    NFSLCK_WRITEACCESS);
2925		else
2926			new_open->ls_flags |= NFSLCK_READACCESS;
2927		new_open->ls_uid = new_stp->ls_uid;
2928		new_open->ls_lfp = lfp;
2929		new_open->ls_clp = clp;
2930		LIST_INIT(&new_open->ls_open);
2931		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2932		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2933		   new_open, ls_hash);
2934		/*
2935		 * and handle the open owner
2936		 */
2937		if (ownerstp) {
2938		    new_open->ls_openowner = ownerstp;
2939		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2940		} else {
2941		    new_open->ls_openowner = new_stp;
2942		    new_stp->ls_flags = 0;
2943		    nfsrvd_refcache(new_stp->ls_op);
2944		    new_stp->ls_noopens = 0;
2945		    LIST_INIT(&new_stp->ls_open);
2946		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2947		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2948		    *new_stpp = NULL;
2949		    newnfsstats.srvopenowners++;
2950		    nfsrv_openpluslock++;
2951		}
2952		openstp = new_open;
2953		new_open = NULL;
2954		newnfsstats.srvopens++;
2955		nfsrv_openpluslock++;
2956	    } else {
2957		error = NFSERR_RECLAIMCONFLICT;
2958	    }
2959	} else if (ownerstp) {
2960		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2961		    /* Replace the open */
2962		    if (ownerstp->ls_op)
2963			nfsrvd_derefcache(ownerstp->ls_op);
2964		    ownerstp->ls_op = new_stp->ls_op;
2965		    nfsrvd_refcache(ownerstp->ls_op);
2966		    ownerstp->ls_seq = new_stp->ls_seq;
2967		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2968		    stp = LIST_FIRST(&ownerstp->ls_open);
2969		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2970			NFSLCK_OPEN;
2971		    stp->ls_stateid.seqid = 1;
2972		    stp->ls_uid = new_stp->ls_uid;
2973		    if (lfp != stp->ls_lfp) {
2974			LIST_REMOVE(stp, ls_file);
2975			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2976			stp->ls_lfp = lfp;
2977		    }
2978		    openstp = stp;
2979		} else if (openstp) {
2980		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2981		    openstp->ls_stateid.seqid++;
2982		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
2983			openstp->ls_stateid.seqid == 0)
2984			openstp->ls_stateid.seqid = 1;
2985
2986		    /*
2987		     * This is where we can choose to issue a delegation.
2988		     */
2989		    if (delegate == 0 || writedeleg == 0 ||
2990			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
2991			nfsrv_writedelegifpos == 0) ||
2992			!NFSVNO_DELEGOK(vp) ||
2993			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
2994			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2995			 LCL_CALLBACKSON)
2996			*rflagsp |= NFSV4OPEN_WDCONTENTION;
2997		    else if (nfsrv_issuedelegs == 0 ||
2998			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
2999			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3000		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3001			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3002		    else {
3003			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3004			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3005			    = clp->lc_clientid.lval[0];
3006			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3007			    = clp->lc_clientid.lval[1];
3008			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3009			    = nfsrv_nextstateindex(clp);
3010			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3011			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3012			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3013			new_deleg->ls_uid = new_stp->ls_uid;
3014			new_deleg->ls_lfp = lfp;
3015			new_deleg->ls_clp = clp;
3016			new_deleg->ls_filerev = filerev;
3017			new_deleg->ls_compref = nd->nd_compref;
3018			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3019			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3020			    new_deleg->ls_stateid), new_deleg, ls_hash);
3021			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3022			new_deleg = NULL;
3023			newnfsstats.srvdelegates++;
3024			nfsrv_openpluslock++;
3025			nfsrv_delegatecnt++;
3026		    }
3027		} else {
3028		    new_open->ls_stateid.seqid = 1;
3029		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3030		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3031		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3032		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3033			NFSLCK_OPEN;
3034		    new_open->ls_uid = new_stp->ls_uid;
3035		    new_open->ls_openowner = ownerstp;
3036		    new_open->ls_lfp = lfp;
3037		    new_open->ls_clp = clp;
3038		    LIST_INIT(&new_open->ls_open);
3039		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3040		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3041		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3042			new_open, ls_hash);
3043		    openstp = new_open;
3044		    new_open = NULL;
3045		    newnfsstats.srvopens++;
3046		    nfsrv_openpluslock++;
3047
3048		    /*
3049		     * This is where we can choose to issue a delegation.
3050		     */
3051		    if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
3052			!NFSVNO_DELEGOK(vp) ||
3053			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3054			 LCL_CALLBACKSON)
3055			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3056		    else if (nfsrv_issuedelegs == 0 ||
3057			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3058			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3059		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3060			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3061		    else {
3062			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3063			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3064			    = clp->lc_clientid.lval[0];
3065			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3066			    = clp->lc_clientid.lval[1];
3067			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3068			    = nfsrv_nextstateindex(clp);
3069			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3070			    (nfsrv_writedelegifpos || !readonly) &&
3071			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3072			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3073				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3074			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3075			} else {
3076			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3077				NFSLCK_READACCESS);
3078			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3079			}
3080			new_deleg->ls_uid = new_stp->ls_uid;
3081			new_deleg->ls_lfp = lfp;
3082			new_deleg->ls_clp = clp;
3083			new_deleg->ls_filerev = filerev;
3084			new_deleg->ls_compref = nd->nd_compref;
3085			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3086			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3087			    new_deleg->ls_stateid), new_deleg, ls_hash);
3088			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3089			new_deleg = NULL;
3090			newnfsstats.srvdelegates++;
3091			nfsrv_openpluslock++;
3092			nfsrv_delegatecnt++;
3093		    }
3094		}
3095	} else {
3096		/*
3097		 * New owner case. Start the open_owner sequence with a
3098		 * Needs confirmation (unless a reclaim) and hang the
3099		 * new open off it.
3100		 */
3101		new_open->ls_stateid.seqid = 1;
3102		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3103		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3104		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3105		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3106		    NFSLCK_OPEN;
3107		new_open->ls_uid = new_stp->ls_uid;
3108		LIST_INIT(&new_open->ls_open);
3109		new_open->ls_openowner = new_stp;
3110		new_open->ls_lfp = lfp;
3111		new_open->ls_clp = clp;
3112		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3113		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3114			new_stp->ls_flags = 0;
3115		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3116			/* NFSv4.1 never needs confirmation. */
3117			new_stp->ls_flags = 0;
3118
3119			/*
3120			 * This is where we can choose to issue a delegation.
3121			 */
3122			if (delegate && nfsrv_issuedelegs &&
3123			    (writedeleg || readonly) &&
3124			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3125			     LCL_CALLBACKSON &&
3126			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3127			    NFSVNO_DELEGOK(vp) &&
3128			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3129			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3130				new_deleg->ls_stateid.seqid =
3131				    delegstateidp->seqid = 1;
3132				new_deleg->ls_stateid.other[0] =
3133				    delegstateidp->other[0]
3134				    = clp->lc_clientid.lval[0];
3135				new_deleg->ls_stateid.other[1] =
3136				    delegstateidp->other[1]
3137				    = clp->lc_clientid.lval[1];
3138				new_deleg->ls_stateid.other[2] =
3139				    delegstateidp->other[2]
3140				    = nfsrv_nextstateindex(clp);
3141				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3142				    (nfsrv_writedelegifpos || !readonly) &&
3143				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3144				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3145				     0)) {
3146					new_deleg->ls_flags =
3147					    (NFSLCK_DELEGWRITE |
3148					     NFSLCK_READACCESS |
3149					     NFSLCK_WRITEACCESS);
3150					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3151				} else {
3152					new_deleg->ls_flags =
3153					    (NFSLCK_DELEGREAD |
3154					     NFSLCK_READACCESS);
3155					*rflagsp |= NFSV4OPEN_READDELEGATE;
3156				}
3157				new_deleg->ls_uid = new_stp->ls_uid;
3158				new_deleg->ls_lfp = lfp;
3159				new_deleg->ls_clp = clp;
3160				new_deleg->ls_filerev = filerev;
3161				new_deleg->ls_compref = nd->nd_compref;
3162				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3163				    ls_file);
3164				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3165				    new_deleg->ls_stateid), new_deleg, ls_hash);
3166				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3167				    ls_list);
3168				new_deleg = NULL;
3169				newnfsstats.srvdelegates++;
3170				nfsrv_openpluslock++;
3171				nfsrv_delegatecnt++;
3172			}
3173		} else {
3174			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3175			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3176		}
3177		nfsrvd_refcache(new_stp->ls_op);
3178		new_stp->ls_noopens = 0;
3179		LIST_INIT(&new_stp->ls_open);
3180		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3181		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3182		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3183		    new_open, ls_hash);
3184		openstp = new_open;
3185		new_open = NULL;
3186		*new_stpp = NULL;
3187		newnfsstats.srvopens++;
3188		nfsrv_openpluslock++;
3189		newnfsstats.srvopenowners++;
3190		nfsrv_openpluslock++;
3191	}
3192	if (!error) {
3193		stateidp->seqid = openstp->ls_stateid.seqid;
3194		stateidp->other[0] = openstp->ls_stateid.other[0];
3195		stateidp->other[1] = openstp->ls_stateid.other[1];
3196		stateidp->other[2] = openstp->ls_stateid.other[2];
3197	}
3198	NFSUNLOCKSTATE();
3199	if (haslock) {
3200		NFSLOCKV4ROOTMUTEX();
3201		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3202		NFSUNLOCKV4ROOTMUTEX();
3203	}
3204	if (new_open)
3205		FREE((caddr_t)new_open, M_NFSDSTATE);
3206	if (new_deleg)
3207		FREE((caddr_t)new_deleg, M_NFSDSTATE);
3208
3209out:
3210	NFSEXITCODE2(error, nd);
3211	return (error);
3212}
3213
3214/*
3215 * Open update. Does the confirm, downgrade and close.
3216 */
3217APPLESTATIC int
3218nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3219    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3220{
3221	struct nfsstate *stp, *ownerstp;
3222	struct nfsclient *clp;
3223	struct nfslockfile *lfp;
3224	u_int32_t bits;
3225	int error = 0, gotstate = 0, len = 0;
3226	u_char client[NFSV4_OPAQUELIMIT];
3227
3228	/*
3229	 * Check for restart conditions (client and server).
3230	 */
3231	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3232	    &new_stp->ls_stateid, 0);
3233	if (error)
3234		goto out;
3235
3236	NFSLOCKSTATE();
3237	/*
3238	 * Get the open structure via clientid and stateid.
3239	 */
3240	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3241	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3242	if (!error)
3243		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3244		    new_stp->ls_flags, &stp);
3245
3246	/*
3247	 * Sanity check the open.
3248	 */
3249	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3250		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3251		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3252		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3253		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3254		error = NFSERR_BADSTATEID;
3255
3256	if (!error)
3257		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3258		    stp->ls_openowner, new_stp->ls_op);
3259	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3260	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3261	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3262	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3263	      new_stp->ls_stateid.seqid != 0)))
3264		error = NFSERR_OLDSTATEID;
3265	if (!error && vnode_vtype(vp) != VREG) {
3266		if (vnode_vtype(vp) == VDIR)
3267			error = NFSERR_ISDIR;
3268		else
3269			error = NFSERR_INVAL;
3270	}
3271
3272	if (error) {
3273		/*
3274		 * If a client tries to confirm an Open with a bad
3275		 * seqid# and there are no byte range locks or other Opens
3276		 * on the openowner, just throw it away, so the next use of the
3277		 * openowner will start a fresh seq#.
3278		 */
3279		if (error == NFSERR_BADSEQID &&
3280		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3281		    nfsrv_nootherstate(stp))
3282			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3283		NFSUNLOCKSTATE();
3284		goto out;
3285	}
3286
3287	/*
3288	 * Set the return stateid.
3289	 */
3290	stateidp->seqid = stp->ls_stateid.seqid + 1;
3291	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3292		stateidp->seqid = 1;
3293	stateidp->other[0] = stp->ls_stateid.other[0];
3294	stateidp->other[1] = stp->ls_stateid.other[1];
3295	stateidp->other[2] = stp->ls_stateid.other[2];
3296	/*
3297	 * Now, handle the three cases.
3298	 */
3299	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3300		/*
3301		 * If the open doesn't need confirmation, it seems to me that
3302		 * there is a client error, but I'll just log it and keep going?
3303		 */
3304		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3305			printf("Nfsv4d: stray open confirm\n");
3306		stp->ls_openowner->ls_flags = 0;
3307		stp->ls_stateid.seqid++;
3308		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3309		    stp->ls_stateid.seqid == 0)
3310			stp->ls_stateid.seqid = 1;
3311		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3312			clp->lc_flags |= LCL_STAMPEDSTABLE;
3313			len = clp->lc_idlen;
3314			NFSBCOPY(clp->lc_id, client, len);
3315			gotstate = 1;
3316		}
3317		NFSUNLOCKSTATE();
3318	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3319		ownerstp = stp->ls_openowner;
3320		lfp = stp->ls_lfp;
3321		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3322			/* Get the lf lock */
3323			nfsrv_locklf(lfp);
3324			NFSUNLOCKSTATE();
3325			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3326			NFSVOPUNLOCK(vp, 0);
3327			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3328				NFSLOCKSTATE();
3329				nfsrv_unlocklf(lfp);
3330				NFSUNLOCKSTATE();
3331			}
3332			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3333		} else {
3334			(void) nfsrv_freeopen(stp, NULL, 0, p);
3335			NFSUNLOCKSTATE();
3336		}
3337	} else {
3338		/*
3339		 * Update the share bits, making sure that the new set are a
3340		 * subset of the old ones.
3341		 */
3342		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3343		if (~(stp->ls_flags) & bits) {
3344			NFSUNLOCKSTATE();
3345			error = NFSERR_INVAL;
3346			goto out;
3347		}
3348		stp->ls_flags = (bits | NFSLCK_OPEN);
3349		stp->ls_stateid.seqid++;
3350		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3351		    stp->ls_stateid.seqid == 0)
3352			stp->ls_stateid.seqid = 1;
3353		NFSUNLOCKSTATE();
3354	}
3355
3356	/*
3357	 * If the client just confirmed its first open, write a timestamp
3358	 * to the stable storage file.
3359	 */
3360	if (gotstate != 0) {
3361		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
3362		nfsrv_backupstable();
3363	}
3364
3365out:
3366	NFSEXITCODE2(error, nd);
3367	return (error);
3368}
3369
3370/*
3371 * Delegation update. Does the purge and return.
3372 */
3373APPLESTATIC int
3374nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3375    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3376    NFSPROC_T *p)
3377{
3378	struct nfsstate *stp;
3379	struct nfsclient *clp;
3380	int error = 0;
3381	fhandle_t fh;
3382
3383	/*
3384	 * Do a sanity check against the file handle for DelegReturn.
3385	 */
3386	if (vp) {
3387		error = nfsvno_getfh(vp, &fh, p);
3388		if (error)
3389			goto out;
3390	}
3391	/*
3392	 * Check for restart conditions (client and server).
3393	 */
3394	if (op == NFSV4OP_DELEGRETURN)
3395		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3396			stateidp, 0);
3397	else
3398		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3399			stateidp, 0);
3400
3401	NFSLOCKSTATE();
3402	/*
3403	 * Get the open structure via clientid and stateid.
3404	 */
3405	if (!error)
3406	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3407		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3408	if (error) {
3409		if (error == NFSERR_CBPATHDOWN)
3410			error = 0;
3411		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3412			error = NFSERR_STALESTATEID;
3413	}
3414	if (!error && op == NFSV4OP_DELEGRETURN) {
3415	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3416	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3417		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3418		error = NFSERR_OLDSTATEID;
3419	}
3420	/*
3421	 * NFSERR_EXPIRED means that the state has gone away,
3422	 * so Delegations have been purged. Just return ok.
3423	 */
3424	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3425		NFSUNLOCKSTATE();
3426		error = 0;
3427		goto out;
3428	}
3429	if (error) {
3430		NFSUNLOCKSTATE();
3431		goto out;
3432	}
3433
3434	if (op == NFSV4OP_DELEGRETURN) {
3435		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3436		    sizeof (fhandle_t))) {
3437			NFSUNLOCKSTATE();
3438			error = NFSERR_BADSTATEID;
3439			goto out;
3440		}
3441		nfsrv_freedeleg(stp);
3442	} else {
3443		nfsrv_freedeleglist(&clp->lc_olddeleg);
3444	}
3445	NFSUNLOCKSTATE();
3446	error = 0;
3447
3448out:
3449	NFSEXITCODE(error);
3450	return (error);
3451}
3452
3453/*
3454 * Release lock owner.
3455 */
3456APPLESTATIC int
3457nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3458    NFSPROC_T *p)
3459{
3460	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3461	struct nfsclient *clp;
3462	int error = 0;
3463
3464	/*
3465	 * Check for restart conditions (client and server).
3466	 */
3467	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3468	    &new_stp->ls_stateid, 0);
3469	if (error)
3470		goto out;
3471
3472	NFSLOCKSTATE();
3473	/*
3474	 * Get the lock owner by name.
3475	 */
3476	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3477	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3478	if (error) {
3479		NFSUNLOCKSTATE();
3480		goto out;
3481	}
3482	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3483	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3484		stp = LIST_FIRST(&openstp->ls_open);
3485		while (stp != LIST_END(&openstp->ls_open)) {
3486		    nstp = LIST_NEXT(stp, ls_list);
3487		    /*
3488		     * If the owner matches, check for locks and
3489		     * then free or return an error.
3490		     */
3491		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3492			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3493			 stp->ls_ownerlen)){
3494			if (LIST_EMPTY(&stp->ls_lock)) {
3495			    nfsrv_freelockowner(stp, NULL, 0, p);
3496			} else {
3497			    NFSUNLOCKSTATE();
3498			    error = NFSERR_LOCKSHELD;
3499			    goto out;
3500			}
3501		    }
3502		    stp = nstp;
3503		}
3504	    }
3505	}
3506	NFSUNLOCKSTATE();
3507
3508out:
3509	NFSEXITCODE(error);
3510	return (error);
3511}
3512
3513/*
3514 * Get the file handle for a lock structure.
3515 */
3516static int
3517nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3518    fhandle_t *nfhp, NFSPROC_T *p)
3519{
3520	fhandle_t *fhp = NULL;
3521	int error;
3522
3523	/*
3524	 * For lock, use the new nfslock structure, otherwise just
3525	 * a fhandle_t on the stack.
3526	 */
3527	if (flags & NFSLCK_OPEN) {
3528		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3529		fhp = &new_lfp->lf_fh;
3530	} else if (nfhp) {
3531		fhp = nfhp;
3532	} else {
3533		panic("nfsrv_getlockfh");
3534	}
3535	error = nfsvno_getfh(vp, fhp, p);
3536	NFSEXITCODE(error);
3537	return (error);
3538}
3539
3540/*
3541 * Get an nfs lock structure. Allocate one, as required, and return a
3542 * pointer to it.
3543 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3544 */
3545static int
3546nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3547    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3548{
3549	struct nfslockfile *lfp;
3550	fhandle_t *fhp = NULL, *tfhp;
3551	struct nfslockhashhead *hp;
3552	struct nfslockfile *new_lfp = NULL;
3553
3554	/*
3555	 * For lock, use the new nfslock structure, otherwise just
3556	 * a fhandle_t on the stack.
3557	 */
3558	if (flags & NFSLCK_OPEN) {
3559		new_lfp = *new_lfpp;
3560		fhp = &new_lfp->lf_fh;
3561	} else if (nfhp) {
3562		fhp = nfhp;
3563	} else {
3564		panic("nfsrv_getlockfile");
3565	}
3566
3567	hp = NFSLOCKHASH(fhp);
3568	LIST_FOREACH(lfp, hp, lf_hash) {
3569		tfhp = &lfp->lf_fh;
3570		if (NFSVNO_CMPFH(fhp, tfhp)) {
3571			if (lockit)
3572				nfsrv_locklf(lfp);
3573			*lfpp = lfp;
3574			return (0);
3575		}
3576	}
3577	if (!(flags & NFSLCK_OPEN))
3578		return (-1);
3579
3580	/*
3581	 * No match, so chain the new one into the list.
3582	 */
3583	LIST_INIT(&new_lfp->lf_open);
3584	LIST_INIT(&new_lfp->lf_lock);
3585	LIST_INIT(&new_lfp->lf_deleg);
3586	LIST_INIT(&new_lfp->lf_locallock);
3587	LIST_INIT(&new_lfp->lf_rollback);
3588	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3589	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3590	new_lfp->lf_usecount = 0;
3591	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3592	*lfpp = new_lfp;
3593	*new_lfpp = NULL;
3594	return (0);
3595}
3596
3597/*
3598 * This function adds a nfslock lock structure to the list for the associated
3599 * nfsstate and nfslockfile structures. It will be inserted after the
3600 * entry pointed at by insert_lop.
3601 */
3602static void
3603nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3604    struct nfsstate *stp, struct nfslockfile *lfp)
3605{
3606	struct nfslock *lop, *nlop;
3607
3608	new_lop->lo_stp = stp;
3609	new_lop->lo_lfp = lfp;
3610
3611	if (stp != NULL) {
3612		/* Insert in increasing lo_first order */
3613		lop = LIST_FIRST(&lfp->lf_lock);
3614		if (lop == LIST_END(&lfp->lf_lock) ||
3615		    new_lop->lo_first <= lop->lo_first) {
3616			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3617		} else {
3618			nlop = LIST_NEXT(lop, lo_lckfile);
3619			while (nlop != LIST_END(&lfp->lf_lock) &&
3620			       nlop->lo_first < new_lop->lo_first) {
3621				lop = nlop;
3622				nlop = LIST_NEXT(lop, lo_lckfile);
3623			}
3624			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3625		}
3626	} else {
3627		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3628	}
3629
3630	/*
3631	 * Insert after insert_lop, which is overloaded as stp or lfp for
3632	 * an empty list.
3633	 */
3634	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3635		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3636	else if ((struct nfsstate *)insert_lop == stp)
3637		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3638	else
3639		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3640	if (stp != NULL) {
3641		newnfsstats.srvlocks++;
3642		nfsrv_openpluslock++;
3643	}
3644}
3645
3646/*
3647 * This function updates the locking for a lock owner and given file. It
3648 * maintains a list of lock ranges ordered on increasing file offset that
3649 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3650 * It always adds new_lop to the list and sometimes uses the one pointed
3651 * at by other_lopp.
3652 */
3653static void
3654nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3655    struct nfslock **other_lopp, struct nfslockfile *lfp)
3656{
3657	struct nfslock *new_lop = *new_lopp;
3658	struct nfslock *lop, *tlop, *ilop;
3659	struct nfslock *other_lop = *other_lopp;
3660	int unlock = 0, myfile = 0;
3661	u_int64_t tmp;
3662
3663	/*
3664	 * Work down the list until the lock is merged.
3665	 */
3666	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3667		unlock = 1;
3668	if (stp != NULL) {
3669		ilop = (struct nfslock *)stp;
3670		lop = LIST_FIRST(&stp->ls_lock);
3671	} else {
3672		ilop = (struct nfslock *)lfp;
3673		lop = LIST_FIRST(&lfp->lf_locallock);
3674	}
3675	while (lop != NULL) {
3676	    /*
3677	     * Only check locks for this file that aren't before the start of
3678	     * new lock's range.
3679	     */
3680	    if (lop->lo_lfp == lfp) {
3681	      myfile = 1;
3682	      if (lop->lo_end >= new_lop->lo_first) {
3683		if (new_lop->lo_end < lop->lo_first) {
3684			/*
3685			 * If the new lock ends before the start of the
3686			 * current lock's range, no merge, just insert
3687			 * the new lock.
3688			 */
3689			break;
3690		}
3691		if (new_lop->lo_flags == lop->lo_flags ||
3692		    (new_lop->lo_first <= lop->lo_first &&
3693		     new_lop->lo_end >= lop->lo_end)) {
3694			/*
3695			 * This lock can be absorbed by the new lock/unlock.
3696			 * This happens when it covers the entire range
3697			 * of the old lock or is contiguous
3698			 * with the old lock and is of the same type or an
3699			 * unlock.
3700			 */
3701			if (lop->lo_first < new_lop->lo_first)
3702				new_lop->lo_first = lop->lo_first;
3703			if (lop->lo_end > new_lop->lo_end)
3704				new_lop->lo_end = lop->lo_end;
3705			tlop = lop;
3706			lop = LIST_NEXT(lop, lo_lckowner);
3707			nfsrv_freenfslock(tlop);
3708			continue;
3709		}
3710
3711		/*
3712		 * All these cases are for contiguous locks that are not the
3713		 * same type, so they can't be merged.
3714		 */
3715		if (new_lop->lo_first <= lop->lo_first) {
3716			/*
3717			 * This case is where the new lock overlaps with the
3718			 * first part of the old lock. Move the start of the
3719			 * old lock to just past the end of the new lock. The
3720			 * new lock will be inserted in front of the old, since
3721			 * ilop hasn't been updated. (We are done now.)
3722			 */
3723			lop->lo_first = new_lop->lo_end;
3724			break;
3725		}
3726		if (new_lop->lo_end >= lop->lo_end) {
3727			/*
3728			 * This case is where the new lock overlaps with the
3729			 * end of the old lock's range. Move the old lock's
3730			 * end to just before the new lock's first and insert
3731			 * the new lock after the old lock.
3732			 * Might not be done yet, since the new lock could
3733			 * overlap further locks with higher ranges.
3734			 */
3735			lop->lo_end = new_lop->lo_first;
3736			ilop = lop;
3737			lop = LIST_NEXT(lop, lo_lckowner);
3738			continue;
3739		}
3740		/*
3741		 * The final case is where the new lock's range is in the
3742		 * middle of the current lock's and splits the current lock
3743		 * up. Use *other_lopp to handle the second part of the
3744		 * split old lock range. (We are done now.)
3745		 * For unlock, we use new_lop as other_lop and tmp, since
3746		 * other_lop and new_lop are the same for this case.
3747		 * We noted the unlock case above, so we don't need
3748		 * new_lop->lo_flags any longer.
3749		 */
3750		tmp = new_lop->lo_first;
3751		if (other_lop == NULL) {
3752			if (!unlock)
3753				panic("nfsd srv update unlock");
3754			other_lop = new_lop;
3755			*new_lopp = NULL;
3756		}
3757		other_lop->lo_first = new_lop->lo_end;
3758		other_lop->lo_end = lop->lo_end;
3759		other_lop->lo_flags = lop->lo_flags;
3760		other_lop->lo_stp = stp;
3761		other_lop->lo_lfp = lfp;
3762		lop->lo_end = tmp;
3763		nfsrv_insertlock(other_lop, lop, stp, lfp);
3764		*other_lopp = NULL;
3765		ilop = lop;
3766		break;
3767	      }
3768	    }
3769	    ilop = lop;
3770	    lop = LIST_NEXT(lop, lo_lckowner);
3771	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3772		break;
3773	}
3774
3775	/*
3776	 * Insert the new lock in the list at the appropriate place.
3777	 */
3778	if (!unlock) {
3779		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3780		*new_lopp = NULL;
3781	}
3782}
3783
3784/*
3785 * This function handles sequencing of locks, etc.
3786 * It returns an error that indicates what the caller should do.
3787 */
3788static int
3789nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3790    struct nfsstate *stp, struct nfsrvcache *op)
3791{
3792	int error = 0;
3793
3794	if ((nd->nd_flag & ND_NFSV41) != 0)
3795		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
3796		goto out;
3797	if (op != nd->nd_rp)
3798		panic("nfsrvstate checkseqid");
3799	if (!(op->rc_flag & RC_INPROG))
3800		panic("nfsrvstate not inprog");
3801	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3802		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3803		panic("nfsrvstate op refcnt");
3804	}
3805	if ((stp->ls_seq + 1) == seqid) {
3806		if (stp->ls_op)
3807			nfsrvd_derefcache(stp->ls_op);
3808		stp->ls_op = op;
3809		nfsrvd_refcache(op);
3810		stp->ls_seq = seqid;
3811		goto out;
3812	} else if (stp->ls_seq == seqid && stp->ls_op &&
3813		op->rc_xid == stp->ls_op->rc_xid &&
3814		op->rc_refcnt == 0 &&
3815		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3816		op->rc_cksum == stp->ls_op->rc_cksum) {
3817		if (stp->ls_op->rc_flag & RC_INPROG) {
3818			error = NFSERR_DONTREPLY;
3819			goto out;
3820		}
3821		nd->nd_rp = stp->ls_op;
3822		nd->nd_rp->rc_flag |= RC_INPROG;
3823		nfsrvd_delcache(op);
3824		error = NFSERR_REPLYFROMCACHE;
3825		goto out;
3826	}
3827	error = NFSERR_BADSEQID;
3828
3829out:
3830	NFSEXITCODE2(error, nd);
3831	return (error);
3832}
3833
3834/*
3835 * Get the client ip address for callbacks. If the strings can't be parsed,
3836 * just set lc_program to 0 to indicate no callbacks are possible.
3837 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3838 *  the address to the client's transport address. This won't be used
3839 *  for callbacks, but can be printed out by newnfsstats for info.)
3840 * Return error if the xdr can't be parsed, 0 otherwise.
3841 */
3842APPLESTATIC int
3843nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3844{
3845	u_int32_t *tl;
3846	u_char *cp, *cp2;
3847	int i, j;
3848	struct sockaddr_in *rad, *sad;
3849	u_char protocol[5], addr[24];
3850	int error = 0, cantparse = 0;
3851	union {
3852		u_long ival;
3853		u_char cval[4];
3854	} ip;
3855	union {
3856		u_short sval;
3857		u_char cval[2];
3858	} port;
3859
3860	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3861	rad->sin_family = AF_INET;
3862	rad->sin_len = sizeof (struct sockaddr_in);
3863	rad->sin_addr.s_addr = 0;
3864	rad->sin_port = 0;
3865	clp->lc_req.nr_client = NULL;
3866	clp->lc_req.nr_lock = 0;
3867	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3868	i = fxdr_unsigned(int, *tl);
3869	if (i >= 3 && i <= 4) {
3870		error = nfsrv_mtostr(nd, protocol, i);
3871		if (error)
3872			goto nfsmout;
3873		if (!strcmp(protocol, "tcp")) {
3874			clp->lc_flags |= LCL_TCPCALLBACK;
3875			clp->lc_req.nr_sotype = SOCK_STREAM;
3876			clp->lc_req.nr_soproto = IPPROTO_TCP;
3877		} else if (!strcmp(protocol, "udp")) {
3878			clp->lc_req.nr_sotype = SOCK_DGRAM;
3879			clp->lc_req.nr_soproto = IPPROTO_UDP;
3880		} else {
3881			cantparse = 1;
3882		}
3883	} else {
3884		cantparse = 1;
3885		if (i > 0) {
3886			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3887			if (error)
3888				goto nfsmout;
3889		}
3890	}
3891	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3892	i = fxdr_unsigned(int, *tl);
3893	if (i < 0) {
3894		error = NFSERR_BADXDR;
3895		goto nfsmout;
3896	} else if (i == 0) {
3897		cantparse = 1;
3898	} else if (!cantparse && i <= 23 && i >= 11) {
3899		error = nfsrv_mtostr(nd, addr, i);
3900		if (error)
3901			goto nfsmout;
3902
3903		/*
3904		 * Parse out the address fields. We expect 6 decimal numbers
3905		 * separated by '.'s.
3906		 */
3907		cp = addr;
3908		i = 0;
3909		while (*cp && i < 6) {
3910			cp2 = cp;
3911			while (*cp2 && *cp2 != '.')
3912				cp2++;
3913			if (*cp2)
3914				*cp2++ = '\0';
3915			else if (i != 5) {
3916				cantparse = 1;
3917				break;
3918			}
3919			j = nfsrv_getipnumber(cp);
3920			if (j >= 0) {
3921				if (i < 4)
3922					ip.cval[3 - i] = j;
3923				else
3924					port.cval[5 - i] = j;
3925			} else {
3926				cantparse = 1;
3927				break;
3928			}
3929			cp = cp2;
3930			i++;
3931		}
3932		if (!cantparse) {
3933			if (ip.ival != 0x0) {
3934				rad->sin_addr.s_addr = htonl(ip.ival);
3935				rad->sin_port = htons(port.sval);
3936			} else {
3937				cantparse = 1;
3938			}
3939		}
3940	} else {
3941		cantparse = 1;
3942		if (i > 0) {
3943			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3944			if (error)
3945				goto nfsmout;
3946		}
3947	}
3948	if (cantparse) {
3949		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3950		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3951		rad->sin_port = 0x0;
3952		clp->lc_program = 0;
3953	}
3954nfsmout:
3955	NFSEXITCODE2(error, nd);
3956	return (error);
3957}
3958
3959/*
3960 * Turn a string of up to three decimal digits into a number. Return -1 upon
3961 * error.
3962 */
3963static int
3964nfsrv_getipnumber(u_char *cp)
3965{
3966	int i = 0, j = 0;
3967
3968	while (*cp) {
3969		if (j > 2 || *cp < '0' || *cp > '9')
3970			return (-1);
3971		i *= 10;
3972		i += (*cp - '0');
3973		cp++;
3974		j++;
3975	}
3976	if (i < 256)
3977		return (i);
3978	return (-1);
3979}
3980
3981/*
3982 * This function checks for restart conditions.
3983 */
3984static int
3985nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3986    nfsv4stateid_t *stateidp, int specialid)
3987{
3988	int ret = 0;
3989
3990	/*
3991	 * First check for a server restart. Open, LockT, ReleaseLockOwner
3992	 * and DelegPurge have a clientid, the rest a stateid.
3993	 */
3994	if (flags &
3995	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
3996		if (clientid.lval[0] != nfsrvboottime) {
3997			ret = NFSERR_STALECLIENTID;
3998			goto out;
3999		}
4000	} else if (stateidp->other[0] != nfsrvboottime &&
4001		specialid == 0) {
4002		ret = NFSERR_STALESTATEID;
4003		goto out;
4004	}
4005
4006	/*
4007	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4008	 * not use a lock/open owner seqid#, so the check can be done now.
4009	 * (The others will be checked, as required, later.)
4010	 */
4011	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4012		goto out;
4013
4014	NFSLOCKSTATE();
4015	ret = nfsrv_checkgrace(NULL, NULL, flags);
4016	NFSUNLOCKSTATE();
4017
4018out:
4019	NFSEXITCODE(ret);
4020	return (ret);
4021}
4022
4023/*
4024 * Check for grace.
4025 */
4026static int
4027nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4028    u_int32_t flags)
4029{
4030	int error = 0;
4031
4032	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4033		if (flags & NFSLCK_RECLAIM) {
4034			error = NFSERR_NOGRACE;
4035			goto out;
4036		}
4037	} else {
4038		if (!(flags & NFSLCK_RECLAIM)) {
4039			error = NFSERR_GRACE;
4040			goto out;
4041		}
4042		if (nd != NULL && clp != NULL &&
4043		    (nd->nd_flag & ND_NFSV41) != 0 &&
4044		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4045			error = NFSERR_NOGRACE;
4046			goto out;
4047		}
4048
4049		/*
4050		 * If grace is almost over and we are still getting Reclaims,
4051		 * extend grace a bit.
4052		 */
4053		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4054		    nfsrv_stablefirst.nsf_eograce)
4055			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4056				NFSRV_LEASEDELTA;
4057	}
4058
4059out:
4060	NFSEXITCODE(error);
4061	return (error);
4062}
4063
4064/*
4065 * Do a server callback.
4066 */
4067static int
4068nfsrv_docallback(struct nfsclient *clp, int procnum,
4069    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4070    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4071{
4072	mbuf_t m;
4073	u_int32_t *tl;
4074	struct nfsrv_descript nfsd, *nd = &nfsd;
4075	struct ucred *cred;
4076	int error = 0;
4077	u_int32_t callback;
4078	struct nfsdsession *sep = NULL;
4079
4080	cred = newnfs_getcred();
4081	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4082	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4083		NFSUNLOCKSTATE();
4084		panic("docallb");
4085	}
4086	clp->lc_cbref++;
4087
4088	/*
4089	 * Fill the callback program# and version into the request
4090	 * structure for newnfs_connect() to use.
4091	 */
4092	clp->lc_req.nr_prog = clp->lc_program;
4093#ifdef notnow
4094	if ((clp->lc_flags & LCL_NFSV41) != 0)
4095		clp->lc_req.nr_vers = NFSV41_CBVERS;
4096	else
4097#endif
4098		clp->lc_req.nr_vers = NFSV4_CBVERS;
4099
4100	/*
4101	 * First, fill in some of the fields of nd and cr.
4102	 */
4103	nd->nd_flag = ND_NFSV4;
4104	if (clp->lc_flags & LCL_GSS)
4105		nd->nd_flag |= ND_KERBV;
4106	if ((clp->lc_flags & LCL_NFSV41) != 0)
4107		nd->nd_flag |= ND_NFSV41;
4108	nd->nd_repstat = 0;
4109	cred->cr_uid = clp->lc_uid;
4110	cred->cr_gid = clp->lc_gid;
4111	callback = clp->lc_callback;
4112	NFSUNLOCKSTATE();
4113	cred->cr_ngroups = 1;
4114
4115	/*
4116	 * Get the first mbuf for the request.
4117	 */
4118	MGET(m, M_WAITOK, MT_DATA);
4119	mbuf_setlen(m, 0);
4120	nd->nd_mreq = nd->nd_mb = m;
4121	nd->nd_bpos = NFSMTOD(m, caddr_t);
4122
4123	/*
4124	 * and build the callback request.
4125	 */
4126	if (procnum == NFSV4OP_CBGETATTR) {
4127		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4128		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4129		    "CB Getattr", &sep);
4130		if (error != 0) {
4131			mbuf_freem(nd->nd_mreq);
4132			goto errout;
4133		}
4134		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4135		(void)nfsrv_putattrbit(nd, attrbitp);
4136	} else if (procnum == NFSV4OP_CBRECALL) {
4137		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4138		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4139		    "CB Recall", &sep);
4140		if (error != 0) {
4141			mbuf_freem(nd->nd_mreq);
4142			goto errout;
4143		}
4144		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4145		*tl++ = txdr_unsigned(stateidp->seqid);
4146		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4147		    NFSX_STATEIDOTHER);
4148		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4149		if (trunc)
4150			*tl = newnfs_true;
4151		else
4152			*tl = newnfs_false;
4153		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4154	} else if (procnum == NFSV4PROC_CBNULL) {
4155		nd->nd_procnum = NFSV4PROC_CBNULL;
4156		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4157			error = nfsv4_getcbsession(clp, &sep);
4158			if (error != 0) {
4159				mbuf_freem(nd->nd_mreq);
4160				goto errout;
4161			}
4162		}
4163	} else {
4164		error = NFSERR_SERVERFAULT;
4165		mbuf_freem(nd->nd_mreq);
4166		goto errout;
4167	}
4168
4169	/*
4170	 * Call newnfs_connect(), as required, and then newnfs_request().
4171	 */
4172	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4173	if (clp->lc_req.nr_client == NULL) {
4174		if ((clp->lc_flags & LCL_NFSV41) != 0)
4175			error = ECONNREFUSED;
4176		else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4177			error = newnfs_connect(NULL, &clp->lc_req, cred,
4178			    NULL, 1);
4179		else
4180			error = newnfs_connect(NULL, &clp->lc_req, cred,
4181			    NULL, 3);
4182	}
4183	newnfs_sndunlock(&clp->lc_req.nr_lock);
4184	if (!error) {
4185		if ((nd->nd_flag & ND_NFSV41) != 0) {
4186			KASSERT(sep != NULL, ("sep NULL"));
4187			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4188			    NULL, NULL, cred, clp->lc_program,
4189			    clp->lc_req.nr_vers, NULL, 1, NULL,
4190			    &sep->sess_cbsess);
4191			nfsrv_freesession(sep, NULL);
4192		} else
4193			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4194			    NULL, NULL, cred, clp->lc_program,
4195			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4196	}
4197errout:
4198	NFSFREECRED(cred);
4199
4200	/*
4201	 * If error is set here, the Callback path isn't working
4202	 * properly, so twiddle the appropriate LCL_ flags.
4203	 * (nd_repstat != 0 indicates the Callback path is working,
4204	 *  but the callback failed on the client.)
4205	 */
4206	if (error) {
4207		/*
4208		 * Mark the callback pathway down, which disabled issuing
4209		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4210		 */
4211		NFSLOCKSTATE();
4212		clp->lc_flags |= LCL_CBDOWN;
4213		NFSUNLOCKSTATE();
4214	} else {
4215		/*
4216		 * Callback worked. If the callback path was down, disable
4217		 * callbacks, so no more delegations will be issued. (This
4218		 * is done on the assumption that the callback pathway is
4219		 * flakey.)
4220		 */
4221		NFSLOCKSTATE();
4222		if (clp->lc_flags & LCL_CBDOWN)
4223			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4224		NFSUNLOCKSTATE();
4225		if (nd->nd_repstat)
4226			error = nd->nd_repstat;
4227		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4228			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4229			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4230			    p, NULL);
4231		mbuf_freem(nd->nd_mrep);
4232	}
4233	NFSLOCKSTATE();
4234	clp->lc_cbref--;
4235	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4236		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4237		wakeup(clp);
4238	}
4239	NFSUNLOCKSTATE();
4240
4241	NFSEXITCODE(error);
4242	return (error);
4243}
4244
4245/*
4246 * Set up the compound RPC for the callback.
4247 */
4248static int
4249nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4250    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4251{
4252	uint32_t *tl;
4253	int error, len;
4254
4255	len = strlen(optag);
4256	(void)nfsm_strtom(nd, optag, len);
4257	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4258	if ((nd->nd_flag & ND_NFSV41) != 0) {
4259		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4260		*tl++ = txdr_unsigned(callback);
4261		*tl++ = txdr_unsigned(2);
4262		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4263		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4264		if (error != 0)
4265			return (error);
4266		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4267		*tl = txdr_unsigned(op);
4268	} else {
4269		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4270		*tl++ = txdr_unsigned(callback);
4271		*tl++ = txdr_unsigned(1);
4272		*tl = txdr_unsigned(op);
4273	}
4274	return (0);
4275}
4276
4277/*
4278 * Return the next index# for a clientid. Mostly just increment and return
4279 * the next one, but... if the 32bit unsigned does actually wrap around,
4280 * it should be rebooted.
4281 * At an average rate of one new client per second, it will wrap around in
4282 * approximately 136 years. (I think the server will have been shut
4283 * down or rebooted before then.)
4284 */
4285static u_int32_t
4286nfsrv_nextclientindex(void)
4287{
4288	static u_int32_t client_index = 0;
4289
4290	client_index++;
4291	if (client_index != 0)
4292		return (client_index);
4293
4294	printf("%s: out of clientids\n", __func__);
4295	return (client_index);
4296}
4297
4298/*
4299 * Return the next index# for a stateid. Mostly just increment and return
4300 * the next one, but... if the 32bit unsigned does actually wrap around
4301 * (will a BSD server stay up that long?), find
4302 * new start and end values.
4303 */
4304static u_int32_t
4305nfsrv_nextstateindex(struct nfsclient *clp)
4306{
4307	struct nfsstate *stp;
4308	int i;
4309	u_int32_t canuse, min_index, max_index;
4310
4311	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4312		clp->lc_stateindex++;
4313		if (clp->lc_stateindex != clp->lc_statemaxindex)
4314			return (clp->lc_stateindex);
4315	}
4316
4317	/*
4318	 * Yuck, we've hit the end.
4319	 * Look for a new min and max.
4320	 */
4321	min_index = 0;
4322	max_index = 0xffffffff;
4323	for (i = 0; i < nfsrv_statehashsize; i++) {
4324	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4325		if (stp->ls_stateid.other[2] > 0x80000000) {
4326		    if (stp->ls_stateid.other[2] < max_index)
4327			max_index = stp->ls_stateid.other[2];
4328		} else {
4329		    if (stp->ls_stateid.other[2] > min_index)
4330			min_index = stp->ls_stateid.other[2];
4331		}
4332	    }
4333	}
4334
4335	/*
4336	 * Yikes, highly unlikely, but I'll handle it anyhow.
4337	 */
4338	if (min_index == 0x80000000 && max_index == 0x80000001) {
4339	    canuse = 0;
4340	    /*
4341	     * Loop around until we find an unused entry. Return that
4342	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4343	     * (This is one of those rare cases where a goto is the
4344	     *  cleanest way to code the loop.)
4345	     */
4346tryagain:
4347	    for (i = 0; i < nfsrv_statehashsize; i++) {
4348		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4349		    if (stp->ls_stateid.other[2] == canuse) {
4350			canuse++;
4351			goto tryagain;
4352		    }
4353		}
4354	    }
4355	    clp->lc_flags |= LCL_INDEXNOTOK;
4356	    return (canuse);
4357	}
4358
4359	/*
4360	 * Ok to start again from min + 1.
4361	 */
4362	clp->lc_stateindex = min_index + 1;
4363	clp->lc_statemaxindex = max_index;
4364	clp->lc_flags &= ~LCL_INDEXNOTOK;
4365	return (clp->lc_stateindex);
4366}
4367
4368/*
4369 * The following functions handle the stable storage file that deals with
4370 * the edge conditions described in RFC3530 Sec. 8.6.3.
4371 * The file is as follows:
4372 * - a single record at the beginning that has the lease time of the
4373 *   previous server instance (before the last reboot) and the nfsrvboottime
4374 *   values for the previous server boots.
4375 *   These previous boot times are used to ensure that the current
4376 *   nfsrvboottime does not, somehow, get set to a previous one.
4377 *   (This is important so that Stale ClientIDs and StateIDs can
4378 *    be recognized.)
4379 *   The number of previous nfsvrboottime values preceeds the list.
4380 * - followed by some number of appended records with:
4381 *   - client id string
4382 *   - flag that indicates it is a record revoking state via lease
4383 *     expiration or similar
4384 *     OR has successfully acquired state.
4385 * These structures vary in length, with the client string at the end, up
4386 * to NFSV4_OPAQUELIMIT in size.
4387 *
4388 * At the end of the grace period, the file is truncated, the first
4389 * record is rewritten with updated information and any acquired state
4390 * records for successful reclaims of state are written.
4391 *
4392 * Subsequent records are appended when the first state is issued to
4393 * a client and when state is revoked for a client.
4394 *
4395 * When reading the file in, state issued records that come later in
4396 * the file override older ones, since the append log is in cronological order.
4397 * If, for some reason, the file can't be read, the grace period is
4398 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4399 */
4400
4401/*
4402 * Read in the stable storage file. Called by nfssvc() before the nfsd
4403 * processes start servicing requests.
4404 */
4405APPLESTATIC void
4406nfsrv_setupstable(NFSPROC_T *p)
4407{
4408	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4409	struct nfsrv_stable *sp, *nsp;
4410	struct nfst_rec *tsp;
4411	int error, i, tryagain;
4412	off_t off = 0;
4413	ssize_t aresid, len;
4414
4415	/*
4416	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4417	 * a reboot, so state has not been lost.
4418	 */
4419	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4420		return;
4421	/*
4422	 * Set Grace over just until the file reads successfully.
4423	 */
4424	nfsrvboottime = time_second;
4425	LIST_INIT(&sf->nsf_head);
4426	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4427	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4428	if (sf->nsf_fp == NULL)
4429		return;
4430	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4431	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4432	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4433	if (error || aresid || sf->nsf_numboots == 0 ||
4434		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4435		return;
4436
4437	/*
4438	 * Now, read in the boottimes.
4439	 */
4440	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4441		sizeof (time_t), M_TEMP, M_WAITOK);
4442	off = sizeof (struct nfsf_rec);
4443	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4444	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4445	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4446	if (error || aresid) {
4447		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4448		sf->nsf_bootvals = NULL;
4449		return;
4450	}
4451
4452	/*
4453	 * Make sure this nfsrvboottime is different from all recorded
4454	 * previous ones.
4455	 */
4456	do {
4457		tryagain = 0;
4458		for (i = 0; i < sf->nsf_numboots; i++) {
4459			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4460				nfsrvboottime++;
4461				tryagain = 1;
4462				break;
4463			}
4464		}
4465	} while (tryagain);
4466
4467	sf->nsf_flags |= NFSNSF_OK;
4468	off += (sf->nsf_numboots * sizeof (time_t));
4469
4470	/*
4471	 * Read through the file, building a list of records for grace
4472	 * checking.
4473	 * Each record is between sizeof (struct nfst_rec) and
4474	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4475	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4476	 */
4477	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4478		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4479	do {
4480	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4481	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4482	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4483	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4484	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4485		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4486		/*
4487		 * Yuck, the file has been corrupted, so just return
4488		 * after clearing out any restart state, so the grace period
4489		 * is over.
4490		 */
4491		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4492			LIST_REMOVE(sp, nst_list);
4493			free((caddr_t)sp, M_TEMP);
4494		}
4495		free((caddr_t)tsp, M_TEMP);
4496		sf->nsf_flags &= ~NFSNSF_OK;
4497		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4498		sf->nsf_bootvals = NULL;
4499		return;
4500	    }
4501	    if (len > 0) {
4502		off += sizeof (struct nfst_rec) + tsp->len - 1;
4503		/*
4504		 * Search the list for a matching client.
4505		 */
4506		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4507			if (tsp->len == sp->nst_len &&
4508			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4509				break;
4510		}
4511		if (sp == LIST_END(&sf->nsf_head)) {
4512			sp = (struct nfsrv_stable *)malloc(tsp->len +
4513				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4514				M_WAITOK);
4515			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4516				sizeof (struct nfst_rec) + tsp->len - 1);
4517			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4518		} else {
4519			if (tsp->flag == NFSNST_REVOKE)
4520				sp->nst_flag |= NFSNST_REVOKE;
4521			else
4522				/*
4523				 * A subsequent timestamp indicates the client
4524				 * did a setclientid/confirm and any previous
4525				 * revoke is no longer relevant.
4526				 */
4527				sp->nst_flag &= ~NFSNST_REVOKE;
4528		}
4529	    }
4530	} while (len > 0);
4531	free((caddr_t)tsp, M_TEMP);
4532	sf->nsf_flags = NFSNSF_OK;
4533	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4534		NFSRV_LEASEDELTA;
4535}
4536
4537/*
4538 * Update the stable storage file, now that the grace period is over.
4539 */
4540APPLESTATIC void
4541nfsrv_updatestable(NFSPROC_T *p)
4542{
4543	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4544	struct nfsrv_stable *sp, *nsp;
4545	int i;
4546	struct nfsvattr nva;
4547	vnode_t vp;
4548#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4549	mount_t mp = NULL;
4550#endif
4551	int error;
4552
4553	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4554		return;
4555	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4556	/*
4557	 * Ok, we need to rewrite the stable storage file.
4558	 * - truncate to 0 length
4559	 * - write the new first structure
4560	 * - loop through the data structures, writing out any that
4561	 *   have timestamps older than the old boot
4562	 */
4563	if (sf->nsf_bootvals) {
4564		sf->nsf_numboots++;
4565		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4566			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4567	} else {
4568		sf->nsf_numboots = 1;
4569		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4570			M_TEMP, M_WAITOK);
4571	}
4572	sf->nsf_bootvals[0] = nfsrvboottime;
4573	sf->nsf_lease = nfsrv_lease;
4574	NFSVNO_ATTRINIT(&nva);
4575	NFSVNO_SETATTRVAL(&nva, size, 0);
4576	vp = NFSFPVNODE(sf->nsf_fp);
4577	vn_start_write(vp, &mp, V_WAIT);
4578	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4579		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4580		    NULL);
4581		NFSVOPUNLOCK(vp, 0);
4582	} else
4583		error = EPERM;
4584	vn_finished_write(mp);
4585	if (!error)
4586	    error = NFSD_RDWR(UIO_WRITE, vp,
4587		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4588		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4589	if (!error)
4590	    error = NFSD_RDWR(UIO_WRITE, vp,
4591		(caddr_t)sf->nsf_bootvals,
4592		sf->nsf_numboots * sizeof (time_t),
4593		(off_t)(sizeof (struct nfsf_rec)),
4594		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4595	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4596	sf->nsf_bootvals = NULL;
4597	if (error) {
4598		sf->nsf_flags &= ~NFSNSF_OK;
4599		printf("EEK! Can't write NfsV4 stable storage file\n");
4600		return;
4601	}
4602	sf->nsf_flags |= NFSNSF_OK;
4603
4604	/*
4605	 * Loop through the list and write out timestamp records for
4606	 * any clients that successfully reclaimed state.
4607	 */
4608	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4609		if (sp->nst_flag & NFSNST_GOTSTATE) {
4610			nfsrv_writestable(sp->nst_client, sp->nst_len,
4611				NFSNST_NEWSTATE, p);
4612			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4613		}
4614		LIST_REMOVE(sp, nst_list);
4615		free((caddr_t)sp, M_TEMP);
4616	}
4617	nfsrv_backupstable();
4618}
4619
4620/*
4621 * Append a record to the stable storage file.
4622 */
4623APPLESTATIC void
4624nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4625{
4626	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4627	struct nfst_rec *sp;
4628	int error;
4629
4630	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4631		return;
4632	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4633		len - 1, M_TEMP, M_WAITOK);
4634	sp->len = len;
4635	NFSBCOPY(client, sp->client, len);
4636	sp->flag = flag;
4637	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4638	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4639	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4640	free((caddr_t)sp, M_TEMP);
4641	if (error) {
4642		sf->nsf_flags &= ~NFSNSF_OK;
4643		printf("EEK! Can't write NfsV4 stable storage file\n");
4644	}
4645}
4646
4647/*
4648 * This function is called during the grace period to mark a client
4649 * that successfully reclaimed state.
4650 */
4651static void
4652nfsrv_markstable(struct nfsclient *clp)
4653{
4654	struct nfsrv_stable *sp;
4655
4656	/*
4657	 * First find the client structure.
4658	 */
4659	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4660		if (sp->nst_len == clp->lc_idlen &&
4661		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4662			break;
4663	}
4664	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4665		return;
4666
4667	/*
4668	 * Now, just mark it and set the nfsclient back pointer.
4669	 */
4670	sp->nst_flag |= NFSNST_GOTSTATE;
4671	sp->nst_clp = clp;
4672}
4673
4674/*
4675 * This function is called for a reclaim, to see if it gets grace.
4676 * It returns 0 if a reclaim is allowed, 1 otherwise.
4677 */
4678static int
4679nfsrv_checkstable(struct nfsclient *clp)
4680{
4681	struct nfsrv_stable *sp;
4682
4683	/*
4684	 * First, find the entry for the client.
4685	 */
4686	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4687		if (sp->nst_len == clp->lc_idlen &&
4688		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4689			break;
4690	}
4691
4692	/*
4693	 * If not in the list, state was revoked or no state was issued
4694	 * since the previous reboot, a reclaim is denied.
4695	 */
4696	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4697	    (sp->nst_flag & NFSNST_REVOKE) ||
4698	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4699		return (1);
4700	return (0);
4701}
4702
4703/*
4704 * Test for and try to clear out a conflicting client. This is called by
4705 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4706 * a found.
4707 * The trick here is that it can't revoke a conflicting client with an
4708 * expired lease unless it holds the v4root lock, so...
4709 * If no v4root lock, get the lock and return 1 to indicate "try again".
4710 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4711 * the revocation worked and the conflicting client is "bye, bye", so it
4712 * can be tried again.
4713 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4714 * Unlocks State before a non-zero value is returned.
4715 */
4716static int
4717nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4718    NFSPROC_T *p)
4719{
4720	int gotlock, lktype = 0;
4721
4722	/*
4723	 * If lease hasn't expired, we can't fix it.
4724	 */
4725	if (clp->lc_expiry >= NFSD_MONOSEC ||
4726	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4727		return (0);
4728	if (*haslockp == 0) {
4729		NFSUNLOCKSTATE();
4730		if (vp != NULL) {
4731			lktype = NFSVOPISLOCKED(vp);
4732			NFSVOPUNLOCK(vp, 0);
4733		}
4734		NFSLOCKV4ROOTMUTEX();
4735		nfsv4_relref(&nfsv4rootfs_lock);
4736		do {
4737			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4738			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4739		} while (!gotlock);
4740		NFSUNLOCKV4ROOTMUTEX();
4741		*haslockp = 1;
4742		if (vp != NULL) {
4743			NFSVOPLOCK(vp, lktype | LK_RETRY);
4744			if ((vp->v_iflag & VI_DOOMED) != 0)
4745				return (2);
4746		}
4747		return (1);
4748	}
4749	NFSUNLOCKSTATE();
4750
4751	/*
4752	 * Ok, we can expire the conflicting client.
4753	 */
4754	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4755	nfsrv_backupstable();
4756	nfsrv_cleanclient(clp, p);
4757	nfsrv_freedeleglist(&clp->lc_deleg);
4758	nfsrv_freedeleglist(&clp->lc_olddeleg);
4759	LIST_REMOVE(clp, lc_hash);
4760	nfsrv_zapclient(clp, p);
4761	return (1);
4762}
4763
4764/*
4765 * Resolve a delegation conflict.
4766 * Returns 0 to indicate the conflict was resolved without sleeping.
4767 * Return -1 to indicate that the caller should check for conflicts again.
4768 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4769 *
4770 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4771 * for a return of 0, since there was no sleep and it could be required
4772 * later. It is released for a return of NFSERR_DELAY, since the caller
4773 * will return that error. It is released when a sleep was done waiting
4774 * for the delegation to be returned or expire (so that other nfsds can
4775 * handle ops). Then, it must be acquired for the write to stable storage.
4776 * (This function is somewhat similar to nfsrv_clientconflict(), but
4777 *  the semantics differ in a couple of subtle ways. The return of 0
4778 *  indicates the conflict was resolved without sleeping here, not
4779 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4780 *  differs, as noted above.)
4781 * Unlocks State before returning a non-zero value.
4782 */
4783static int
4784nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4785    vnode_t vp)
4786{
4787	struct nfsclient *clp = stp->ls_clp;
4788	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
4789	nfsv4stateid_t tstateid;
4790	fhandle_t tfh;
4791
4792	/*
4793	 * If the conflict is with an old delegation...
4794	 */
4795	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4796		/*
4797		 * You can delete it, if it has expired.
4798		 */
4799		if (clp->lc_delegtime < NFSD_MONOSEC) {
4800			nfsrv_freedeleg(stp);
4801			NFSUNLOCKSTATE();
4802			error = -1;
4803			goto out;
4804		}
4805		NFSUNLOCKSTATE();
4806		/*
4807		 * During this delay, the old delegation could expire or it
4808		 * could be recovered by the client via an Open with
4809		 * CLAIM_DELEGATE_PREV.
4810		 * Release the nfsv4root_lock, if held.
4811		 */
4812		if (*haslockp) {
4813			*haslockp = 0;
4814			NFSLOCKV4ROOTMUTEX();
4815			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4816			NFSUNLOCKV4ROOTMUTEX();
4817		}
4818		error = NFSERR_DELAY;
4819		goto out;
4820	}
4821
4822	/*
4823	 * It's a current delegation, so:
4824	 * - check to see if the delegation has expired
4825	 *   - if so, get the v4root lock and then expire it
4826	 */
4827	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4828		/*
4829		 * - do a recall callback, since not yet done
4830		 * For now, never allow truncate to be set. To use
4831		 * truncate safely, it must be guaranteed that the
4832		 * Remove, Rename or Setattr with size of 0 will
4833		 * succeed and that would require major changes to
4834		 * the VFS/Vnode OPs.
4835		 * Set the expiry time large enough so that it won't expire
4836		 * until after the callback, then set it correctly, once
4837		 * the callback is done. (The delegation will now time
4838		 * out whether or not the Recall worked ok. The timeout
4839		 * will be extended when ops are done on the delegation
4840		 * stateid, up to the timelimit.)
4841		 */
4842		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4843		    NFSRV_LEASEDELTA;
4844		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4845		    NFSRV_LEASEDELTA;
4846		stp->ls_flags |= NFSLCK_DELEGRECALL;
4847
4848		/*
4849		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4850		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4851		 * in order to try and avoid a race that could happen
4852		 * when a CBRecall request passed the Open reply with
4853		 * the delegation in it when transitting the network.
4854		 * Since nfsrv_docallback will sleep, don't use stp after
4855		 * the call.
4856		 */
4857		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4858		    sizeof (tstateid));
4859		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4860		    sizeof (tfh));
4861		NFSUNLOCKSTATE();
4862		if (*haslockp) {
4863			*haslockp = 0;
4864			NFSLOCKV4ROOTMUTEX();
4865			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4866			NFSUNLOCKV4ROOTMUTEX();
4867		}
4868		retrycnt = 0;
4869		do {
4870		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4871			&tstateid, 0, &tfh, NULL, NULL, p);
4872		    retrycnt++;
4873		} while ((error == NFSERR_BADSTATEID ||
4874		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4875		error = NFSERR_DELAY;
4876		goto out;
4877	}
4878
4879	if (clp->lc_expiry >= NFSD_MONOSEC &&
4880	    stp->ls_delegtime >= NFSD_MONOSEC) {
4881		NFSUNLOCKSTATE();
4882		/*
4883		 * A recall has been done, but it has not yet expired.
4884		 * So, RETURN_DELAY.
4885		 */
4886		if (*haslockp) {
4887			*haslockp = 0;
4888			NFSLOCKV4ROOTMUTEX();
4889			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4890			NFSUNLOCKV4ROOTMUTEX();
4891		}
4892		error = NFSERR_DELAY;
4893		goto out;
4894	}
4895
4896	/*
4897	 * If we don't yet have the lock, just get it and then return,
4898	 * since we need that before deleting expired state, such as
4899	 * this delegation.
4900	 * When getting the lock, unlock the vnode, so other nfsds that
4901	 * are in progress, won't get stuck waiting for the vnode lock.
4902	 */
4903	if (*haslockp == 0) {
4904		NFSUNLOCKSTATE();
4905		if (vp != NULL) {
4906			lktype = NFSVOPISLOCKED(vp);
4907			NFSVOPUNLOCK(vp, 0);
4908		}
4909		NFSLOCKV4ROOTMUTEX();
4910		nfsv4_relref(&nfsv4rootfs_lock);
4911		do {
4912			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4913			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4914		} while (!gotlock);
4915		NFSUNLOCKV4ROOTMUTEX();
4916		*haslockp = 1;
4917		if (vp != NULL) {
4918			NFSVOPLOCK(vp, lktype | LK_RETRY);
4919			if ((vp->v_iflag & VI_DOOMED) != 0) {
4920				*haslockp = 0;
4921				NFSLOCKV4ROOTMUTEX();
4922				nfsv4_unlock(&nfsv4rootfs_lock, 1);
4923				NFSUNLOCKV4ROOTMUTEX();
4924				error = NFSERR_PERM;
4925				goto out;
4926			}
4927		}
4928		error = -1;
4929		goto out;
4930	}
4931
4932	NFSUNLOCKSTATE();
4933	/*
4934	 * Ok, we can delete the expired delegation.
4935	 * First, write the Revoke record to stable storage and then
4936	 * clear out the conflict.
4937	 * Since all other nfsd threads are now blocked, we can safely
4938	 * sleep without the state changing.
4939	 */
4940	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4941	nfsrv_backupstable();
4942	if (clp->lc_expiry < NFSD_MONOSEC) {
4943		nfsrv_cleanclient(clp, p);
4944		nfsrv_freedeleglist(&clp->lc_deleg);
4945		nfsrv_freedeleglist(&clp->lc_olddeleg);
4946		LIST_REMOVE(clp, lc_hash);
4947		zapped_clp = 1;
4948	} else {
4949		nfsrv_freedeleg(stp);
4950		zapped_clp = 0;
4951	}
4952	if (zapped_clp)
4953		nfsrv_zapclient(clp, p);
4954	error = -1;
4955
4956out:
4957	NFSEXITCODE(error);
4958	return (error);
4959}
4960
4961/*
4962 * Check for a remove allowed, if remove is set to 1 and get rid of
4963 * delegations.
4964 */
4965APPLESTATIC int
4966nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4967{
4968	struct nfsstate *stp;
4969	struct nfslockfile *lfp;
4970	int error, haslock = 0;
4971	fhandle_t nfh;
4972
4973	/*
4974	 * First, get the lock file structure.
4975	 * (A return of -1 means no associated state, so remove ok.)
4976	 */
4977	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4978tryagain:
4979	NFSLOCKSTATE();
4980	if (!error)
4981		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4982	if (error) {
4983		NFSUNLOCKSTATE();
4984		if (haslock) {
4985			NFSLOCKV4ROOTMUTEX();
4986			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4987			NFSUNLOCKV4ROOTMUTEX();
4988		}
4989		if (error == -1)
4990			error = 0;
4991		goto out;
4992	}
4993
4994	/*
4995	 * Now, we must Recall any delegations.
4996	 */
4997	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
4998	if (error) {
4999		/*
5000		 * nfsrv_cleandeleg() unlocks state for non-zero
5001		 * return.
5002		 */
5003		if (error == -1)
5004			goto tryagain;
5005		if (haslock) {
5006			NFSLOCKV4ROOTMUTEX();
5007			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5008			NFSUNLOCKV4ROOTMUTEX();
5009		}
5010		goto out;
5011	}
5012
5013	/*
5014	 * Now, look for a conflicting open share.
5015	 */
5016	if (remove) {
5017		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5018			if (stp->ls_flags & NFSLCK_WRITEDENY) {
5019				error = NFSERR_FILEOPEN;
5020				break;
5021			}
5022		}
5023	}
5024
5025	NFSUNLOCKSTATE();
5026	if (haslock) {
5027		NFSLOCKV4ROOTMUTEX();
5028		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5029		NFSUNLOCKV4ROOTMUTEX();
5030	}
5031
5032out:
5033	NFSEXITCODE(error);
5034	return (error);
5035}
5036
5037/*
5038 * Clear out all delegations for the file referred to by lfp.
5039 * May return NFSERR_DELAY, if there will be a delay waiting for
5040 * delegations to expire.
5041 * Returns -1 to indicate it slept while recalling a delegation.
5042 * This function has the side effect of deleting the nfslockfile structure,
5043 * if it no longer has associated state and didn't have to sleep.
5044 * Unlocks State before a non-zero value is returned.
5045 */
5046static int
5047nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5048    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5049{
5050	struct nfsstate *stp, *nstp;
5051	int ret = 0;
5052
5053	stp = LIST_FIRST(&lfp->lf_deleg);
5054	while (stp != LIST_END(&lfp->lf_deleg)) {
5055		nstp = LIST_NEXT(stp, ls_file);
5056		if (stp->ls_clp != clp) {
5057			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5058			if (ret) {
5059				/*
5060				 * nfsrv_delegconflict() unlocks state
5061				 * when it returns non-zero.
5062				 */
5063				goto out;
5064			}
5065		}
5066		stp = nstp;
5067	}
5068out:
5069	NFSEXITCODE(ret);
5070	return (ret);
5071}
5072
5073/*
5074 * There are certain operations that, when being done outside of NFSv4,
5075 * require that any NFSv4 delegation for the file be recalled.
5076 * This function is to be called for those cases:
5077 * VOP_RENAME() - When a delegation is being recalled for any reason,
5078 *	the client may have to do Opens against the server, using the file's
5079 *	final component name. If the file has been renamed on the server,
5080 *	that component name will be incorrect and the Open will fail.
5081 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5082 *	been removed on the server, if there is a delegation issued to
5083 *	that client for the file. I say "theoretically" since clients
5084 *	normally do an Access Op before the Open and that Access Op will
5085 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5086 *	they will detect the file's removal in the same manner. (There is
5087 *	one case where RFC3530 allows a client to do an Open without first
5088 *	doing an Access Op, which is passage of a check against the ACE
5089 *	returned with a Write delegation, but current practice is to ignore
5090 *	the ACE and always do an Access Op.)
5091 *	Since the functions can only be called with an unlocked vnode, this
5092 *	can't be done at this time.
5093 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5094 *	locks locally in the client, which are not visible to the server. To
5095 *	deal with this, issuing of delegations for a vnode must be disabled
5096 *	and all delegations for the vnode recalled. This is done via the
5097 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5098 */
5099APPLESTATIC void
5100nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5101{
5102	time_t starttime;
5103	int error;
5104
5105	/*
5106	 * First, check to see if the server is currently running and it has
5107	 * been called for a regular file when issuing delegations.
5108	 */
5109	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5110	    nfsrv_issuedelegs == 0)
5111		return;
5112
5113	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5114	/*
5115	 * First, get a reference on the nfsv4rootfs_lock so that an
5116	 * exclusive lock cannot be acquired by another thread.
5117	 */
5118	NFSLOCKV4ROOTMUTEX();
5119	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5120	NFSUNLOCKV4ROOTMUTEX();
5121
5122	/*
5123	 * Now, call nfsrv_checkremove() in a loop while it returns
5124	 * NFSERR_DELAY. Return upon any other error or when timed out.
5125	 */
5126	starttime = NFSD_MONOSEC;
5127	do {
5128		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5129			error = nfsrv_checkremove(vp, 0, p);
5130			NFSVOPUNLOCK(vp, 0);
5131		} else
5132			error = EPERM;
5133		if (error == NFSERR_DELAY) {
5134			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5135				break;
5136			/* Sleep for a short period of time */
5137			(void) nfs_catnap(PZERO, 0, "nfsremove");
5138		}
5139	} while (error == NFSERR_DELAY);
5140	NFSLOCKV4ROOTMUTEX();
5141	nfsv4_relref(&nfsv4rootfs_lock);
5142	NFSUNLOCKV4ROOTMUTEX();
5143}
5144
5145APPLESTATIC void
5146nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5147{
5148
5149#ifdef VV_DISABLEDELEG
5150	/*
5151	 * First, flag issuance of delegations disabled.
5152	 */
5153	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5154#endif
5155
5156	/*
5157	 * Then call nfsd_recalldelegation() to get rid of all extant
5158	 * delegations.
5159	 */
5160	nfsd_recalldelegation(vp, p);
5161}
5162
5163/*
5164 * Check for conflicting locks, etc. and then get rid of delegations.
5165 * (At one point I thought that I should get rid of delegations for any
5166 *  Setattr, since it could potentially disallow the I/O op (read or write)
5167 *  allowed by the delegation. However, Setattr Ops that aren't changing
5168 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5169 *  for the same client or a different one, so I decided to only get rid
5170 *  of delegations for other clients when the size is being changed.)
5171 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5172 * as Write backs, even if there is no delegation, so it really isn't any
5173 * different?)
5174 */
5175APPLESTATIC int
5176nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5177    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5178    struct nfsexstuff *exp, NFSPROC_T *p)
5179{
5180	struct nfsstate st, *stp = &st;
5181	struct nfslock lo, *lop = &lo;
5182	int error = 0;
5183	nfsquad_t clientid;
5184
5185	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5186		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5187		lop->lo_first = nvap->na_size;
5188	} else {
5189		stp->ls_flags = 0;
5190		lop->lo_first = 0;
5191	}
5192	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5193	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5194	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5195	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5196		stp->ls_flags |= NFSLCK_SETATTR;
5197	if (stp->ls_flags == 0)
5198		goto out;
5199	lop->lo_end = NFS64BITSSET;
5200	lop->lo_flags = NFSLCK_WRITE;
5201	stp->ls_ownerlen = 0;
5202	stp->ls_op = NULL;
5203	stp->ls_uid = nd->nd_cred->cr_uid;
5204	stp->ls_stateid.seqid = stateidp->seqid;
5205	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5206	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5207	stp->ls_stateid.other[2] = stateidp->other[2];
5208	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5209	    stateidp, exp, nd, p);
5210
5211out:
5212	NFSEXITCODE2(error, nd);
5213	return (error);
5214}
5215
5216/*
5217 * Check for a write delegation and do a CBGETATTR if there is one, updating
5218 * the attributes, as required.
5219 * Should I return an error if I can't get the attributes? (For now, I'll
5220 * just return ok.
5221 */
5222APPLESTATIC int
5223nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5224    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5225    NFSPROC_T *p)
5226{
5227	struct nfsstate *stp;
5228	struct nfslockfile *lfp;
5229	struct nfsclient *clp;
5230	struct nfsvattr nva;
5231	fhandle_t nfh;
5232	int error = 0;
5233	nfsattrbit_t cbbits;
5234	u_quad_t delegfilerev;
5235
5236	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5237	if (!NFSNONZERO_ATTRBIT(&cbbits))
5238		goto out;
5239
5240	/*
5241	 * Get the lock file structure.
5242	 * (A return of -1 means no associated state, so return ok.)
5243	 */
5244	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5245	NFSLOCKSTATE();
5246	if (!error)
5247		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5248	if (error) {
5249		NFSUNLOCKSTATE();
5250		if (error == -1)
5251			error = 0;
5252		goto out;
5253	}
5254
5255	/*
5256	 * Now, look for a write delegation.
5257	 */
5258	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5259		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5260			break;
5261	}
5262	if (stp == LIST_END(&lfp->lf_deleg)) {
5263		NFSUNLOCKSTATE();
5264		goto out;
5265	}
5266	clp = stp->ls_clp;
5267	delegfilerev = stp->ls_filerev;
5268
5269	/*
5270	 * If the Write delegation was issued as a part of this Compound RPC
5271	 * or if we have an Implied Clientid (used in a previous Op in this
5272	 * compound) and it is the client the delegation was issued to,
5273	 * just return ok.
5274	 * I also assume that it is from the same client iff the network
5275	 * host IP address is the same as the callback address. (Not
5276	 * exactly correct by the RFC, but avoids a lot of Getattr
5277	 * callbacks.)
5278	 */
5279	if (nd->nd_compref == stp->ls_compref ||
5280	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5281	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5282	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5283		NFSUNLOCKSTATE();
5284		goto out;
5285	}
5286
5287	/*
5288	 * We are now done with the delegation state structure,
5289	 * so the statelock can be released and we can now tsleep().
5290	 */
5291
5292	/*
5293	 * Now, we must do the CB Getattr callback, to see if Change or Size
5294	 * has changed.
5295	 */
5296	if (clp->lc_expiry >= NFSD_MONOSEC) {
5297		NFSUNLOCKSTATE();
5298		NFSVNO_ATTRINIT(&nva);
5299		nva.na_filerev = NFS64BITSSET;
5300		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5301		    0, &nfh, &nva, &cbbits, p);
5302		if (!error) {
5303			if ((nva.na_filerev != NFS64BITSSET &&
5304			    nva.na_filerev > delegfilerev) ||
5305			    (NFSVNO_ISSETSIZE(&nva) &&
5306			     nva.na_size != nvap->na_size)) {
5307				error = nfsvno_updfilerev(vp, nvap, cred, p);
5308				if (NFSVNO_ISSETSIZE(&nva))
5309					nvap->na_size = nva.na_size;
5310			}
5311		} else
5312			error = 0;	/* Ignore callback errors for now. */
5313	} else {
5314		NFSUNLOCKSTATE();
5315	}
5316
5317out:
5318	NFSEXITCODE2(error, nd);
5319	return (error);
5320}
5321
5322/*
5323 * This function looks for openowners that haven't had any opens for
5324 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5325 * is set.
5326 */
5327APPLESTATIC void
5328nfsrv_throwawayopens(NFSPROC_T *p)
5329{
5330	struct nfsclient *clp, *nclp;
5331	struct nfsstate *stp, *nstp;
5332	int i;
5333
5334	NFSLOCKSTATE();
5335	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5336	/*
5337	 * For each client...
5338	 */
5339	for (i = 0; i < nfsrv_clienthashsize; i++) {
5340	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5341		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5342			if (LIST_EMPTY(&stp->ls_open) &&
5343			    (stp->ls_noopens > NFSNOOPEN ||
5344			     (nfsrv_openpluslock * 2) >
5345			     nfsrv_v4statelimit))
5346				nfsrv_freeopenowner(stp, 0, p);
5347		}
5348	    }
5349	}
5350	NFSUNLOCKSTATE();
5351}
5352
5353/*
5354 * This function checks to see if the credentials are the same.
5355 * Returns 1 for not same, 0 otherwise.
5356 */
5357static int
5358nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5359{
5360
5361	if (nd->nd_flag & ND_GSS) {
5362		if (!(clp->lc_flags & LCL_GSS))
5363			return (1);
5364		if (clp->lc_flags & LCL_NAME) {
5365			if (nd->nd_princlen != clp->lc_namelen ||
5366			    NFSBCMP(nd->nd_principal, clp->lc_name,
5367				clp->lc_namelen))
5368				return (1);
5369			else
5370				return (0);
5371		}
5372		if (nd->nd_cred->cr_uid == clp->lc_uid)
5373			return (0);
5374		else
5375			return (1);
5376	} else if (clp->lc_flags & LCL_GSS)
5377		return (1);
5378	/*
5379	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5380	 * in RFC3530, which talks about principals, but doesn't say anything
5381	 * about uids for AUTH_SYS.)
5382	 */
5383	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5384		return (0);
5385	else
5386		return (1);
5387}
5388
5389/*
5390 * Calculate the lease expiry time.
5391 */
5392static time_t
5393nfsrv_leaseexpiry(void)
5394{
5395
5396	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5397		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5398	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5399}
5400
5401/*
5402 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5403 */
5404static void
5405nfsrv_delaydelegtimeout(struct nfsstate *stp)
5406{
5407
5408	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5409		return;
5410
5411	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5412	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5413		stp->ls_delegtime += nfsrv_lease;
5414		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5415			stp->ls_delegtime = stp->ls_delegtimelimit;
5416	}
5417}
5418
5419/*
5420 * This function checks to see if there is any other state associated
5421 * with the openowner for this Open.
5422 * It returns 1 if there is no other state, 0 otherwise.
5423 */
5424static int
5425nfsrv_nootherstate(struct nfsstate *stp)
5426{
5427	struct nfsstate *tstp;
5428
5429	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5430		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5431			return (0);
5432	}
5433	return (1);
5434}
5435
5436/*
5437 * Create a list of lock deltas (changes to local byte range locking
5438 * that can be rolled back using the list) and apply the changes via
5439 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5440 * the rollback or update function will be called after this.
5441 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5442 * call fails. If it returns an error, it will unlock the list.
5443 */
5444static int
5445nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5446    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5447{
5448	struct nfslock *lop, *nlop;
5449	int error = 0;
5450
5451	/* Loop through the list of locks. */
5452	lop = LIST_FIRST(&lfp->lf_locallock);
5453	while (first < end && lop != NULL) {
5454		nlop = LIST_NEXT(lop, lo_lckowner);
5455		if (first >= lop->lo_end) {
5456			/* not there yet */
5457			lop = nlop;
5458		} else if (first < lop->lo_first) {
5459			/* new one starts before entry in list */
5460			if (end <= lop->lo_first) {
5461				/* no overlap between old and new */
5462				error = nfsrv_dolocal(vp, lfp, flags,
5463				    NFSLCK_UNLOCK, first, end, cfp, p);
5464				if (error != 0)
5465					break;
5466				first = end;
5467			} else {
5468				/* handle fragment overlapped with new one */
5469				error = nfsrv_dolocal(vp, lfp, flags,
5470				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5471				    p);
5472				if (error != 0)
5473					break;
5474				first = lop->lo_first;
5475			}
5476		} else {
5477			/* new one overlaps this entry in list */
5478			if (end <= lop->lo_end) {
5479				/* overlaps all of new one */
5480				error = nfsrv_dolocal(vp, lfp, flags,
5481				    lop->lo_flags, first, end, cfp, p);
5482				if (error != 0)
5483					break;
5484				first = end;
5485			} else {
5486				/* handle fragment overlapped with new one */
5487				error = nfsrv_dolocal(vp, lfp, flags,
5488				    lop->lo_flags, first, lop->lo_end, cfp, p);
5489				if (error != 0)
5490					break;
5491				first = lop->lo_end;
5492				lop = nlop;
5493			}
5494		}
5495	}
5496	if (first < end && error == 0)
5497		/* handle fragment past end of list */
5498		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5499		    end, cfp, p);
5500
5501	NFSEXITCODE(error);
5502	return (error);
5503}
5504
5505/*
5506 * Local lock unlock. Unlock all byte ranges that are no longer locked
5507 * by NFSv4. To do this, unlock any subranges of first-->end that
5508 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5509 * list. This list has all locks for the file held by other
5510 * <clientid, lockowner> tuples. The list is ordered by increasing
5511 * lo_first value, but may have entries that overlap each other, for
5512 * the case of read locks.
5513 */
5514static void
5515nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5516    uint64_t init_end, NFSPROC_T *p)
5517{
5518	struct nfslock *lop;
5519	uint64_t first, end, prevfirst;
5520
5521	first = init_first;
5522	end = init_end;
5523	while (first < init_end) {
5524		/* Loop through all nfs locks, adjusting first and end */
5525		prevfirst = 0;
5526		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5527			KASSERT(prevfirst <= lop->lo_first,
5528			    ("nfsv4 locks out of order"));
5529			KASSERT(lop->lo_first < lop->lo_end,
5530			    ("nfsv4 bogus lock"));
5531			prevfirst = lop->lo_first;
5532			if (first >= lop->lo_first &&
5533			    first < lop->lo_end)
5534				/*
5535				 * Overlaps with initial part, so trim
5536				 * off that initial part by moving first past
5537				 * it.
5538				 */
5539				first = lop->lo_end;
5540			else if (end > lop->lo_first &&
5541			    lop->lo_first > first) {
5542				/*
5543				 * This lock defines the end of the
5544				 * segment to unlock, so set end to the
5545				 * start of it and break out of the loop.
5546				 */
5547				end = lop->lo_first;
5548				break;
5549			}
5550			if (first >= end)
5551				/*
5552				 * There is no segment left to do, so
5553				 * break out of this loop and then exit
5554				 * the outer while() since first will be set
5555				 * to end, which must equal init_end here.
5556				 */
5557				break;
5558		}
5559		if (first < end) {
5560			/* Unlock this segment */
5561			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5562			    NFSLCK_READ, first, end, NULL, p);
5563			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5564			    first, end);
5565		}
5566		/*
5567		 * Now move past this segment and look for any further
5568		 * segment in the range, if there is one.
5569		 */
5570		first = end;
5571		end = init_end;
5572	}
5573}
5574
5575/*
5576 * Do the local lock operation and update the rollback list, as required.
5577 * Perform the rollback and return the error if nfsvno_advlock() fails.
5578 */
5579static int
5580nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5581    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5582{
5583	struct nfsrollback *rlp;
5584	int error = 0, ltype, oldltype;
5585
5586	if (flags & NFSLCK_WRITE)
5587		ltype = F_WRLCK;
5588	else if (flags & NFSLCK_READ)
5589		ltype = F_RDLCK;
5590	else
5591		ltype = F_UNLCK;
5592	if (oldflags & NFSLCK_WRITE)
5593		oldltype = F_WRLCK;
5594	else if (oldflags & NFSLCK_READ)
5595		oldltype = F_RDLCK;
5596	else
5597		oldltype = F_UNLCK;
5598	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5599		/* nothing to do */
5600		goto out;
5601	error = nfsvno_advlock(vp, ltype, first, end, p);
5602	if (error != 0) {
5603		if (cfp != NULL) {
5604			cfp->cl_clientid.lval[0] = 0;
5605			cfp->cl_clientid.lval[1] = 0;
5606			cfp->cl_first = 0;
5607			cfp->cl_end = NFS64BITSSET;
5608			cfp->cl_flags = NFSLCK_WRITE;
5609			cfp->cl_ownerlen = 5;
5610			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5611		}
5612		nfsrv_locallock_rollback(vp, lfp, p);
5613	} else if (ltype != F_UNLCK) {
5614		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5615		    M_WAITOK);
5616		rlp->rlck_first = first;
5617		rlp->rlck_end = end;
5618		rlp->rlck_type = oldltype;
5619		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5620	}
5621
5622out:
5623	NFSEXITCODE(error);
5624	return (error);
5625}
5626
5627/*
5628 * Roll back local lock changes and free up the rollback list.
5629 */
5630static void
5631nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5632{
5633	struct nfsrollback *rlp, *nrlp;
5634
5635	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5636		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5637		    rlp->rlck_end, p);
5638		free(rlp, M_NFSDROLLBACK);
5639	}
5640	LIST_INIT(&lfp->lf_rollback);
5641}
5642
5643/*
5644 * Update local lock list and delete rollback list (ie now committed to the
5645 * local locks). Most of the work is done by the internal function.
5646 */
5647static void
5648nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5649    uint64_t end)
5650{
5651	struct nfsrollback *rlp, *nrlp;
5652	struct nfslock *new_lop, *other_lop;
5653
5654	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5655	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5656		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5657		    M_WAITOK);
5658	else
5659		other_lop = NULL;
5660	new_lop->lo_flags = flags;
5661	new_lop->lo_first = first;
5662	new_lop->lo_end = end;
5663	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5664	if (new_lop != NULL)
5665		free(new_lop, M_NFSDLOCK);
5666	if (other_lop != NULL)
5667		free(other_lop, M_NFSDLOCK);
5668
5669	/* and get rid of the rollback list */
5670	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5671		free(rlp, M_NFSDROLLBACK);
5672	LIST_INIT(&lfp->lf_rollback);
5673}
5674
5675/*
5676 * Lock the struct nfslockfile for local lock updating.
5677 */
5678static void
5679nfsrv_locklf(struct nfslockfile *lfp)
5680{
5681	int gotlock;
5682
5683	/* lf_usecount ensures *lfp won't be free'd */
5684	lfp->lf_usecount++;
5685	do {
5686		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5687		    NFSSTATEMUTEXPTR, NULL);
5688	} while (gotlock == 0);
5689	lfp->lf_usecount--;
5690}
5691
5692/*
5693 * Unlock the struct nfslockfile after local lock updating.
5694 */
5695static void
5696nfsrv_unlocklf(struct nfslockfile *lfp)
5697{
5698
5699	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5700}
5701
5702/*
5703 * Clear out all state for the NFSv4 server.
5704 * Must be called by a thread that can sleep when no nfsds are running.
5705 */
5706void
5707nfsrv_throwawayallstate(NFSPROC_T *p)
5708{
5709	struct nfsclient *clp, *nclp;
5710	struct nfslockfile *lfp, *nlfp;
5711	int i;
5712
5713	/*
5714	 * For each client, clean out the state and then free the structure.
5715	 */
5716	for (i = 0; i < nfsrv_clienthashsize; i++) {
5717		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5718			nfsrv_cleanclient(clp, p);
5719			nfsrv_freedeleglist(&clp->lc_deleg);
5720			nfsrv_freedeleglist(&clp->lc_olddeleg);
5721			free(clp->lc_stateid, M_NFSDCLIENT);
5722			free(clp, M_NFSDCLIENT);
5723		}
5724	}
5725
5726	/*
5727	 * Also, free up any remaining lock file structures.
5728	 */
5729	for (i = 0; i < nfsrv_lockhashsize; i++) {
5730		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5731			printf("nfsd unload: fnd a lock file struct\n");
5732			nfsrv_freenfslockfile(lfp);
5733		}
5734	}
5735}
5736
5737/*
5738 * Check the sequence# for the session and slot provided as an argument.
5739 * Also, renew the lease if the session will return NFS_OK.
5740 */
5741int
5742nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
5743    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
5744    uint32_t *sflagsp, NFSPROC_T *p)
5745{
5746	struct nfsdsession *sep;
5747	struct nfssessionhash *shp;
5748	int error;
5749	SVCXPRT *savxprt;
5750
5751	shp = NFSSESSIONHASH(nd->nd_sessionid);
5752	NFSLOCKSESSION(shp);
5753	sep = nfsrv_findsession(nd->nd_sessionid);
5754	if (sep == NULL) {
5755		NFSUNLOCKSESSION(shp);
5756		return (NFSERR_BADSESSION);
5757	}
5758	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
5759	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
5760	if (error != 0) {
5761		NFSUNLOCKSESSION(shp);
5762		return (error);
5763	}
5764	if (cache_this != 0)
5765		nd->nd_flag |= ND_SAVEREPLY;
5766	/* Renew the lease. */
5767	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
5768	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
5769	nd->nd_flag |= ND_IMPLIEDCLID;
5770
5771	/*
5772	 * If this session handles the backchannel, save the nd_xprt for this
5773	 * RPC, since this is the one being used.
5774	 */
5775	if (sep->sess_cbsess.nfsess_xprt != NULL &&
5776	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
5777		savxprt = sep->sess_cbsess.nfsess_xprt;
5778		SVC_ACQUIRE(nd->nd_xprt);
5779		nd->nd_xprt->xp_p2 = savxprt->xp_p2;
5780		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
5781		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
5782		SVC_RELEASE(savxprt);
5783	}
5784
5785	*sflagsp = 0;
5786	if (sep->sess_clp->lc_req.nr_client == NULL)
5787		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
5788	NFSUNLOCKSESSION(shp);
5789	if (error == NFSERR_EXPIRED) {
5790		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
5791		error = 0;
5792	} else if (error == NFSERR_ADMINREVOKED) {
5793		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
5794		error = 0;
5795	}
5796	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
5797	return (0);
5798}
5799
5800/*
5801 * Check/set reclaim complete for this session/clientid.
5802 */
5803int
5804nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
5805{
5806	struct nfsdsession *sep;
5807	struct nfssessionhash *shp;
5808	int error = 0;
5809
5810	shp = NFSSESSIONHASH(nd->nd_sessionid);
5811	NFSLOCKSTATE();
5812	NFSLOCKSESSION(shp);
5813	sep = nfsrv_findsession(nd->nd_sessionid);
5814	if (sep == NULL) {
5815		NFSUNLOCKSESSION(shp);
5816		NFSUNLOCKSTATE();
5817		return (NFSERR_BADSESSION);
5818	}
5819
5820	/* Check to see if reclaim complete has already happened. */
5821	if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
5822		error = NFSERR_COMPLETEALREADY;
5823	else
5824		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
5825	NFSUNLOCKSESSION(shp);
5826	NFSUNLOCKSTATE();
5827	return (error);
5828}
5829
5830/*
5831 * Cache the reply in a session slot.
5832 */
5833void
5834nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
5835   struct mbuf **m)
5836{
5837	struct nfsdsession *sep;
5838	struct nfssessionhash *shp;
5839
5840	shp = NFSSESSIONHASH(sessionid);
5841	NFSLOCKSESSION(shp);
5842	sep = nfsrv_findsession(sessionid);
5843	if (sep == NULL) {
5844		NFSUNLOCKSESSION(shp);
5845		printf("nfsrv_cache_session: no session\n");
5846		m_freem(*m);
5847		return;
5848	}
5849	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
5850	NFSUNLOCKSESSION(shp);
5851}
5852
5853/*
5854 * Search for a session that matches the sessionid.
5855 */
5856static struct nfsdsession *
5857nfsrv_findsession(uint8_t *sessionid)
5858{
5859	struct nfsdsession *sep;
5860	struct nfssessionhash *shp;
5861
5862	shp = NFSSESSIONHASH(sessionid);
5863	LIST_FOREACH(sep, &shp->list, sess_hash) {
5864		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
5865			break;
5866	}
5867	return (sep);
5868}
5869
5870/*
5871 * Destroy a session.
5872 */
5873int
5874nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
5875{
5876	int error, samesess;
5877
5878	samesess = 0;
5879	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
5880		samesess = 1;
5881		if ((nd->nd_flag & ND_LASTOP) == 0)
5882			return (NFSERR_BADSESSION);
5883	}
5884	error = nfsrv_freesession(NULL, sessionid);
5885	if (error == 0 && samesess != 0)
5886		nd->nd_flag &= ~ND_HASSEQUENCE;
5887	return (error);
5888}
5889
5890/*
5891 * Free up a session structure.
5892 */
5893static int
5894nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
5895{
5896	struct nfssessionhash *shp;
5897	int i;
5898
5899	if (sep == NULL) {
5900		shp = NFSSESSIONHASH(sessionid);
5901		NFSLOCKSESSION(shp);
5902		sep = nfsrv_findsession(sessionid);
5903	} else {
5904		shp = NFSSESSIONHASH(sep->sess_sessionid);
5905		NFSLOCKSESSION(shp);
5906	}
5907	if (sep != NULL) {
5908		NFSLOCKSTATE();
5909		sep->sess_refcnt--;
5910		if (sep->sess_refcnt > 0) {
5911			NFSUNLOCKSTATE();
5912			NFSUNLOCKSESSION(shp);
5913			return (0);
5914		}
5915		LIST_REMOVE(sep, sess_hash);
5916		LIST_REMOVE(sep, sess_list);
5917		NFSUNLOCKSTATE();
5918	}
5919	NFSUNLOCKSESSION(shp);
5920	if (sep == NULL)
5921		return (NFSERR_BADSESSION);
5922	for (i = 0; i < NFSV4_SLOTS; i++)
5923		if (sep->sess_slots[i].nfssl_reply != NULL)
5924			m_freem(sep->sess_slots[i].nfssl_reply);
5925	if (sep->sess_cbsess.nfsess_xprt != NULL)
5926		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
5927	free(sep, M_NFSDSESSION);
5928	return (0);
5929}
5930
5931/*
5932 * Free a stateid.
5933 * RFC5661 says that it should fail when there are associated opens, locks
5934 * or delegations. Since stateids represent opens, I don't see how you can
5935 * free an open stateid (it will be free'd when closed), so this function
5936 * only works for lock stateids (freeing the lock_owner) or delegations.
5937 */
5938int
5939nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5940    NFSPROC_T *p)
5941{
5942	struct nfsclient *clp;
5943	struct nfsstate *stp;
5944	int error;
5945
5946	NFSLOCKSTATE();
5947	/*
5948	 * Look up the stateid
5949	 */
5950	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
5951	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
5952	if (error == 0) {
5953		/* First, check for a delegation. */
5954		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
5955			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
5956			    NFSX_STATEIDOTHER))
5957				break;
5958		}
5959		if (stp != NULL) {
5960			nfsrv_freedeleg(stp);
5961			NFSUNLOCKSTATE();
5962			return (error);
5963		}
5964	}
5965	/* Not a delegation, try for a lock_owner. */
5966	if (error == 0)
5967		error = nfsrv_getstate(clp, stateidp, 0, &stp);
5968	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
5969	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
5970		/* Not a lock_owner stateid. */
5971		error = NFSERR_LOCKSHELD;
5972	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
5973		error = NFSERR_LOCKSHELD;
5974	if (error == 0)
5975		nfsrv_freelockowner(stp, NULL, 0, p);
5976	NFSUNLOCKSTATE();
5977	return (error);
5978}
5979
5980/*
5981 * Generate the xdr for an NFSv4.1 CBSequence Operation.
5982 */
5983static int
5984nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
5985    int dont_replycache, struct nfsdsession **sepp)
5986{
5987	struct nfsdsession *sep;
5988	uint32_t *tl, slotseq = 0;
5989	int maxslot, slotpos;
5990	uint8_t sessionid[NFSX_V4SESSIONID];
5991	int error;
5992
5993	error = nfsv4_getcbsession(clp, sepp);
5994	if (error != 0)
5995		return (error);
5996	sep = *sepp;
5997	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
5998	    &slotseq, sessionid);
5999	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6000
6001	/* Build the Sequence arguments. */
6002	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6003	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6004	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6005	nd->nd_slotseq = tl;
6006	*tl++ = txdr_unsigned(slotseq);
6007	*tl++ = txdr_unsigned(slotpos);
6008	*tl++ = txdr_unsigned(maxslot);
6009	if (dont_replycache == 0)
6010		*tl++ = newnfs_true;
6011	else
6012		*tl++ = newnfs_false;
6013	*tl = 0;			/* No referring call list, for now. */
6014	nd->nd_flag |= ND_HASSEQUENCE;
6015	return (0);
6016}
6017
6018/*
6019 * Get a session for the callback.
6020 */
6021static int
6022nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6023{
6024	struct nfsdsession *sep;
6025
6026	NFSLOCKSTATE();
6027	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6028		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6029			break;
6030	}
6031	if (sep == NULL) {
6032		NFSUNLOCKSTATE();
6033		return (NFSERR_BADSESSION);
6034	}
6035	sep->sess_refcnt++;
6036	*sepp = sep;
6037	NFSUNLOCKSTATE();
6038	return (0);
6039}
6040
6041