nfs_nfsdstate.c revision 260159
1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 260159 2014-01-01 02:49:45Z rmacklem $");
30
31#ifndef APPLEKEXT
32#include <fs/nfs/nfsport.h>
33
34struct nfsrv_stablefirst nfsrv_stablefirst;
35int nfsrv_issuedelegs = 0;
36int nfsrv_dolocallocks = 0;
37struct nfsv4lock nfsv4rootfs_lock;
38
39extern int newnfs_numnfsd;
40extern struct nfsstats newnfsstats;
41extern int nfsrv_lease;
42extern struct timeval nfsboottime;
43extern u_int32_t newnfs_true, newnfs_false;
44NFSV4ROOTLOCKMUTEX;
45NFSSTATESPINLOCK;
46
47/*
48 * Hash lists for nfs V4.
49 * (Some would put them in the .h file, but I don't like declaring storage
50 *  in a .h)
51 */
52struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
53struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE];
54#endif	/* !APPLEKEXT */
55
56static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
57static time_t nfsrvboottime;
58static int nfsrv_writedelegifpos = 1;
59static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
60static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
61static int nfsrv_nogsscallback = 0;
62
63/* local functions */
64static void nfsrv_dumpaclient(struct nfsclient *clp,
65    struct nfsd_dumpclients *dumpp);
66static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
67    NFSPROC_T *p);
68static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
69    NFSPROC_T *p);
70static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
71    NFSPROC_T *p);
72static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
73    int cansleep, NFSPROC_T *p);
74static void nfsrv_freenfslock(struct nfslock *lop);
75static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
76static void nfsrv_freedeleg(struct nfsstate *);
77static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
78    u_int32_t flags, struct nfsstate **stpp);
79static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
80    struct nfsstate **stpp);
81static int nfsrv_getlockfh(vnode_t vp, u_short flags,
82    struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p);
83static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
84    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
85static void nfsrv_insertlock(struct nfslock *new_lop,
86    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
87static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
88    struct nfslock **other_lopp, struct nfslockfile *lfp);
89static int nfsrv_getipnumber(u_char *cp);
90static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
91    nfsv4stateid_t *stateidp, int specialid);
92static int nfsrv_checkgrace(u_int32_t flags);
93static int nfsrv_docallback(struct nfsclient *clp, int procnum,
94    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
95    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
96static u_int32_t nfsrv_nextclientindex(void);
97static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
98static void nfsrv_markstable(struct nfsclient *clp);
99static int nfsrv_checkstable(struct nfsclient *clp);
100static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
101    vnode *vp, NFSPROC_T *p);
102static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
103    NFSPROC_T *p, vnode_t vp);
104static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
105    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
106static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
107    struct nfsclient *clp);
108static time_t nfsrv_leaseexpiry(void);
109static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
110static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
111    struct nfsstate *stp, struct nfsrvcache *op);
112static int nfsrv_nootherstate(struct nfsstate *stp);
113static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
114    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
115static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
116    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
117static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
118    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
119    NFSPROC_T *p);
120static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
121    NFSPROC_T *p);
122static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
123    uint64_t first, uint64_t end);
124static void nfsrv_locklf(struct nfslockfile *lfp);
125static void nfsrv_unlocklf(struct nfslockfile *lfp);
126
127/*
128 * Scan the client list for a match and either return the current one,
129 * create a new entry or return an error.
130 * If returning a non-error, the clp structure must either be linked into
131 * the client list or free'd.
132 */
133APPLESTATIC int
134nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
135    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
136{
137	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
138	int i, error = 0;
139	struct nfsstate *stp, *tstp;
140	struct sockaddr_in *sad, *rad;
141	int zapit = 0, gotit, hasstate = 0, igotlock;
142	static u_int64_t confirm_index = 0;
143
144	/*
145	 * Check for state resource limit exceeded.
146	 */
147	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
148		error = NFSERR_RESOURCE;
149		goto out;
150	}
151
152	if (nfsrv_issuedelegs == 0 ||
153	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
154		/*
155		 * Don't do callbacks when delegations are disabled or
156		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
157		 * If establishing a callback connection is attempted
158		 * when a firewall is blocking the callback path, the
159		 * server may wait too long for the connect attempt to
160		 * succeed during the Open. Some clients, such as Linux,
161		 * may timeout and give up on the Open before the server
162		 * replies. Also, since AUTH_GSS callbacks are not
163		 * yet interoperability tested, they might cause the
164		 * server to crap out, if they get past the Init call to
165		 * the client.
166		 */
167		new_clp->lc_program = 0;
168
169	/* Lock out other nfsd threads */
170	NFSLOCKV4ROOTMUTEX();
171	nfsv4_relref(&nfsv4rootfs_lock);
172	do {
173		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
174		    NFSV4ROOTLOCKMUTEXPTR, NULL);
175	} while (!igotlock);
176	NFSUNLOCKV4ROOTMUTEX();
177
178	/*
179	 * Search for a match in the client list.
180	 */
181	gotit = i = 0;
182	while (i < NFSCLIENTHASHSIZE && !gotit) {
183	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
184		if (new_clp->lc_idlen == clp->lc_idlen &&
185		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
186			gotit = 1;
187			break;
188		}
189	    }
190	    i++;
191	}
192	if (!gotit ||
193	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
194		/*
195		 * Get rid of the old one.
196		 */
197		if (i != NFSCLIENTHASHSIZE) {
198			LIST_REMOVE(clp, lc_hash);
199			nfsrv_cleanclient(clp, p);
200			nfsrv_freedeleglist(&clp->lc_deleg);
201			nfsrv_freedeleglist(&clp->lc_olddeleg);
202			zapit = 1;
203		}
204		/*
205		 * Add it after assigning a client id to it.
206		 */
207		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
208		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
209		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
210		    (u_int32_t)nfsrvboottime;
211		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
212		    nfsrv_nextclientindex();
213		new_clp->lc_stateindex = 0;
214		new_clp->lc_statemaxindex = 0;
215		new_clp->lc_cbref = 0;
216		new_clp->lc_expiry = nfsrv_leaseexpiry();
217		LIST_INIT(&new_clp->lc_open);
218		LIST_INIT(&new_clp->lc_deleg);
219		LIST_INIT(&new_clp->lc_olddeleg);
220		for (i = 0; i < NFSSTATEHASHSIZE; i++)
221			LIST_INIT(&new_clp->lc_stateid[i]);
222		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
223		    lc_hash);
224		newnfsstats.srvclients++;
225		nfsrv_openpluslock++;
226		nfsrv_clients++;
227		NFSLOCKV4ROOTMUTEX();
228		nfsv4_unlock(&nfsv4rootfs_lock, 1);
229		NFSUNLOCKV4ROOTMUTEX();
230		if (zapit)
231			nfsrv_zapclient(clp, p);
232		*new_clpp = NULL;
233		goto out;
234	}
235
236	/*
237	 * Now, handle the cases where the id is already issued.
238	 */
239	if (nfsrv_notsamecredname(nd, clp)) {
240	    /*
241	     * Check to see if there is expired state that should go away.
242	     */
243	    if (clp->lc_expiry < NFSD_MONOSEC &&
244	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
245		nfsrv_cleanclient(clp, p);
246		nfsrv_freedeleglist(&clp->lc_deleg);
247	    }
248
249	    /*
250	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
251	     * RFC3530 Sec. 8.1.2 last para.
252	     */
253	    if (!LIST_EMPTY(&clp->lc_deleg)) {
254		hasstate = 1;
255	    } else if (LIST_EMPTY(&clp->lc_open)) {
256		hasstate = 0;
257	    } else {
258		hasstate = 0;
259		/* Look for an Open on the OpenOwner */
260		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
261		    if (!LIST_EMPTY(&stp->ls_open)) {
262			hasstate = 1;
263			break;
264		    }
265		}
266	    }
267	    if (hasstate) {
268		/*
269		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
270		 * filling out the correct ipaddr and portnum.
271		 */
272		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
273		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
274		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
275		sad->sin_port = rad->sin_port;
276		NFSLOCKV4ROOTMUTEX();
277		nfsv4_unlock(&nfsv4rootfs_lock, 1);
278		NFSUNLOCKV4ROOTMUTEX();
279		error = NFSERR_CLIDINUSE;
280		goto out;
281	    }
282	}
283
284	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
285		/*
286		 * If the verifier has changed, the client has rebooted
287		 * and a new client id is issued. The old state info
288		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
289		 */
290		LIST_REMOVE(clp, lc_hash);
291		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
292		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
293		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
294		    nfsrvboottime;
295		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
296		    nfsrv_nextclientindex();
297		new_clp->lc_stateindex = 0;
298		new_clp->lc_statemaxindex = 0;
299		new_clp->lc_cbref = 0;
300		new_clp->lc_expiry = nfsrv_leaseexpiry();
301
302		/*
303		 * Save the state until confirmed.
304		 */
305		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
306		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
307			tstp->ls_clp = new_clp;
308		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
309		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
310			tstp->ls_clp = new_clp;
311		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
312		    ls_list);
313		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
314			tstp->ls_clp = new_clp;
315		for (i = 0; i < NFSSTATEHASHSIZE; i++) {
316			LIST_NEWHEAD(&new_clp->lc_stateid[i],
317			    &clp->lc_stateid[i], ls_hash);
318			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
319				tstp->ls_clp = new_clp;
320		}
321		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
322		    lc_hash);
323		newnfsstats.srvclients++;
324		nfsrv_openpluslock++;
325		nfsrv_clients++;
326		NFSLOCKV4ROOTMUTEX();
327		nfsv4_unlock(&nfsv4rootfs_lock, 1);
328		NFSUNLOCKV4ROOTMUTEX();
329
330		/*
331		 * Must wait until any outstanding callback on the old clp
332		 * completes.
333		 */
334		NFSLOCKSTATE();
335		while (clp->lc_cbref) {
336			clp->lc_flags |= LCL_WAKEUPWANTED;
337			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
338			    "nfsd clp", 10 * hz);
339		}
340		NFSUNLOCKSTATE();
341		nfsrv_zapclient(clp, p);
342		*new_clpp = NULL;
343		goto out;
344	}
345	/*
346	 * id and verifier match, so update the net address info
347	 * and get rid of any existing callback authentication
348	 * handle, so a new one will be acquired.
349	 */
350	LIST_REMOVE(clp, lc_hash);
351	new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
352	new_clp->lc_expiry = nfsrv_leaseexpiry();
353	confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
354	clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
355	    clp->lc_clientid.lval[0];
356	clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
357	    clp->lc_clientid.lval[1];
358	new_clp->lc_delegtime = clp->lc_delegtime;
359	new_clp->lc_stateindex = clp->lc_stateindex;
360	new_clp->lc_statemaxindex = clp->lc_statemaxindex;
361	new_clp->lc_cbref = 0;
362	LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
363	LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
364		tstp->ls_clp = new_clp;
365	LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
366	LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
367		tstp->ls_clp = new_clp;
368	LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
369	LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
370		tstp->ls_clp = new_clp;
371	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
372		LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i],
373		    ls_hash);
374		LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
375			tstp->ls_clp = new_clp;
376	}
377	LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
378	    lc_hash);
379	newnfsstats.srvclients++;
380	nfsrv_openpluslock++;
381	nfsrv_clients++;
382	NFSLOCKV4ROOTMUTEX();
383	nfsv4_unlock(&nfsv4rootfs_lock, 1);
384	NFSUNLOCKV4ROOTMUTEX();
385
386	/*
387	 * Must wait until any outstanding callback on the old clp
388	 * completes.
389	 */
390	NFSLOCKSTATE();
391	while (clp->lc_cbref) {
392		clp->lc_flags |= LCL_WAKEUPWANTED;
393		(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp",
394		    10 * hz);
395	}
396	NFSUNLOCKSTATE();
397	nfsrv_zapclient(clp, p);
398	*new_clpp = NULL;
399
400out:
401	NFSEXITCODE2(error, nd);
402	return (error);
403}
404
405/*
406 * Check to see if the client id exists and optionally confirm it.
407 */
408APPLESTATIC int
409nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
410    nfsquad_t confirm, struct nfsrv_descript *nd, NFSPROC_T *p)
411{
412	struct nfsclient *clp;
413	struct nfsstate *stp;
414	int i;
415	struct nfsclienthashhead *hp;
416	int error = 0, igotlock, doneok;
417
418	if (clpp)
419		*clpp = NULL;
420	if (nfsrvboottime != clientid.lval[0]) {
421		error = NFSERR_STALECLIENTID;
422		goto out;
423	}
424
425	/*
426	 * If called with opflags == CLOPS_RENEW, the State Lock is
427	 * already held. Otherwise, we need to get either that or,
428	 * for the case of Confirm, lock out the nfsd threads.
429	 */
430	if (opflags & CLOPS_CONFIRM) {
431		NFSLOCKV4ROOTMUTEX();
432		nfsv4_relref(&nfsv4rootfs_lock);
433		do {
434			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
435			    NFSV4ROOTLOCKMUTEXPTR, NULL);
436		} while (!igotlock);
437		NFSUNLOCKV4ROOTMUTEX();
438	} else if (opflags != CLOPS_RENEW) {
439		NFSLOCKSTATE();
440	}
441
442	hp = NFSCLIENTHASH(clientid);
443	LIST_FOREACH(clp, hp, lc_hash) {
444		if (clp->lc_clientid.lval[1] == clientid.lval[1])
445			break;
446	}
447	if (clp == LIST_END(hp)) {
448		if (opflags & CLOPS_CONFIRM)
449			error = NFSERR_STALECLIENTID;
450		else
451			error = NFSERR_EXPIRED;
452	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
453		/*
454		 * If marked admin revoked, just return the error.
455		 */
456		error = NFSERR_ADMINREVOKED;
457	}
458	if (error) {
459		if (opflags & CLOPS_CONFIRM) {
460			NFSLOCKV4ROOTMUTEX();
461			nfsv4_unlock(&nfsv4rootfs_lock, 1);
462			NFSUNLOCKV4ROOTMUTEX();
463		} else if (opflags != CLOPS_RENEW) {
464			NFSUNLOCKSTATE();
465		}
466		goto out;
467	}
468
469	/*
470	 * Perform any operations specified by the opflags.
471	 */
472	if (opflags & CLOPS_CONFIRM) {
473		if (clp->lc_confirm.qval != confirm.qval)
474			error = NFSERR_STALECLIENTID;
475		else if (nfsrv_notsamecredname(nd, clp))
476			error = NFSERR_CLIDINUSE;
477
478		if (!error) {
479		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
480			LCL_NEEDSCONFIRM) {
481			/*
482			 * Hang onto the delegations (as old delegations)
483			 * for an Open with CLAIM_DELEGATE_PREV unless in
484			 * grace, but get rid of the rest of the state.
485			 */
486			nfsrv_cleanclient(clp, p);
487			nfsrv_freedeleglist(&clp->lc_olddeleg);
488			if (nfsrv_checkgrace(0)) {
489			    /* In grace, so just delete delegations */
490			    nfsrv_freedeleglist(&clp->lc_deleg);
491			} else {
492			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
493				stp->ls_flags |= NFSLCK_OLDDELEG;
494			    clp->lc_delegtime = NFSD_MONOSEC +
495				nfsrv_lease + NFSRV_LEASEDELTA;
496			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
497				ls_list);
498			}
499		    }
500		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
501		    if (clp->lc_program)
502			clp->lc_flags |= LCL_NEEDSCBNULL;
503		}
504	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
505		error = NFSERR_EXPIRED;
506	}
507
508	/*
509	 * If called by the Renew Op, we must check the principal.
510	 */
511	if (!error && (opflags & CLOPS_RENEWOP)) {
512	    if (nfsrv_notsamecredname(nd, clp)) {
513		doneok = 0;
514		for (i = 0; i < NFSSTATEHASHSIZE && doneok == 0; i++) {
515		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
516			if ((stp->ls_flags & NFSLCK_OPEN) &&
517			    stp->ls_uid == nd->nd_cred->cr_uid) {
518				doneok = 1;
519				break;
520			}
521		    }
522		}
523		if (!doneok)
524			error = NFSERR_ACCES;
525	    }
526	    if (!error && (clp->lc_flags & LCL_CBDOWN))
527		error = NFSERR_CBPATHDOWN;
528	}
529	if ((!error || error == NFSERR_CBPATHDOWN) &&
530	     (opflags & CLOPS_RENEW)) {
531		clp->lc_expiry = nfsrv_leaseexpiry();
532	}
533	if (opflags & CLOPS_CONFIRM) {
534		NFSLOCKV4ROOTMUTEX();
535		nfsv4_unlock(&nfsv4rootfs_lock, 1);
536		NFSUNLOCKV4ROOTMUTEX();
537	} else if (opflags != CLOPS_RENEW) {
538		NFSUNLOCKSTATE();
539	}
540	if (clpp)
541		*clpp = clp;
542
543out:
544	NFSEXITCODE2(error, nd);
545	return (error);
546}
547
548/*
549 * Called from the new nfssvc syscall to admin revoke a clientid.
550 * Returns 0 for success, error otherwise.
551 */
552APPLESTATIC int
553nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
554{
555	struct nfsclient *clp = NULL;
556	int i, error = 0;
557	int gotit, igotlock;
558
559	/*
560	 * First, lock out the nfsd so that state won't change while the
561	 * revocation record is being written to the stable storage restart
562	 * file.
563	 */
564	NFSLOCKV4ROOTMUTEX();
565	do {
566		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
567		    NFSV4ROOTLOCKMUTEXPTR, NULL);
568	} while (!igotlock);
569	NFSUNLOCKV4ROOTMUTEX();
570
571	/*
572	 * Search for a match in the client list.
573	 */
574	gotit = i = 0;
575	while (i < NFSCLIENTHASHSIZE && !gotit) {
576	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
577		if (revokep->nclid_idlen == clp->lc_idlen &&
578		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
579			gotit = 1;
580			break;
581		}
582	    }
583	    i++;
584	}
585	if (!gotit) {
586		NFSLOCKV4ROOTMUTEX();
587		nfsv4_unlock(&nfsv4rootfs_lock, 0);
588		NFSUNLOCKV4ROOTMUTEX();
589		error = EPERM;
590		goto out;
591	}
592
593	/*
594	 * Now, write out the revocation record
595	 */
596	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
597	nfsrv_backupstable();
598
599	/*
600	 * and clear out the state, marking the clientid revoked.
601	 */
602	clp->lc_flags &= ~LCL_CALLBACKSON;
603	clp->lc_flags |= LCL_ADMINREVOKED;
604	nfsrv_cleanclient(clp, p);
605	nfsrv_freedeleglist(&clp->lc_deleg);
606	nfsrv_freedeleglist(&clp->lc_olddeleg);
607	NFSLOCKV4ROOTMUTEX();
608	nfsv4_unlock(&nfsv4rootfs_lock, 0);
609	NFSUNLOCKV4ROOTMUTEX();
610
611out:
612	NFSEXITCODE(error);
613	return (error);
614}
615
616/*
617 * Dump out stats for all clients. Called from nfssvc(2), that is used
618 * newnfsstats.
619 */
620APPLESTATIC void
621nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
622{
623	struct nfsclient *clp;
624	int i = 0, cnt = 0;
625
626	/*
627	 * First, get a reference on the nfsv4rootfs_lock so that an
628	 * exclusive lock cannot be acquired while dumping the clients.
629	 */
630	NFSLOCKV4ROOTMUTEX();
631	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
632	NFSUNLOCKV4ROOTMUTEX();
633	NFSLOCKSTATE();
634	/*
635	 * Rattle through the client lists until done.
636	 */
637	while (i < NFSCLIENTHASHSIZE && cnt < maxcnt) {
638	    clp = LIST_FIRST(&nfsclienthash[i]);
639	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
640		nfsrv_dumpaclient(clp, &dumpp[cnt]);
641		cnt++;
642		clp = LIST_NEXT(clp, lc_hash);
643	    }
644	    i++;
645	}
646	if (cnt < maxcnt)
647	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
648	NFSUNLOCKSTATE();
649	NFSLOCKV4ROOTMUTEX();
650	nfsv4_relref(&nfsv4rootfs_lock);
651	NFSUNLOCKV4ROOTMUTEX();
652}
653
654/*
655 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
656 */
657static void
658nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
659{
660	struct nfsstate *stp, *openstp, *lckownstp;
661	struct nfslock *lop;
662	struct sockaddr *sad;
663	struct sockaddr_in *rad;
664	struct sockaddr_in6 *rad6;
665
666	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
667	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
668	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
669	dumpp->ndcl_flags = clp->lc_flags;
670	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
671	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
672	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
673	dumpp->ndcl_addrfam = sad->sa_family;
674	if (sad->sa_family == AF_INET) {
675		rad = (struct sockaddr_in *)sad;
676		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
677	} else {
678		rad6 = (struct sockaddr_in6 *)sad;
679		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
680	}
681
682	/*
683	 * Now, scan the state lists and total up the opens and locks.
684	 */
685	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
686	    dumpp->ndcl_nopenowners++;
687	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
688		dumpp->ndcl_nopens++;
689		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
690		    dumpp->ndcl_nlockowners++;
691		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
692			dumpp->ndcl_nlocks++;
693		    }
694		}
695	    }
696	}
697
698	/*
699	 * and the delegation lists.
700	 */
701	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
702	    dumpp->ndcl_ndelegs++;
703	}
704	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
705	    dumpp->ndcl_nolddelegs++;
706	}
707}
708
709/*
710 * Dump out lock stats for a file.
711 */
712APPLESTATIC void
713nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
714    NFSPROC_T *p)
715{
716	struct nfsstate *stp;
717	struct nfslock *lop;
718	int cnt = 0;
719	struct nfslockfile *lfp;
720	struct sockaddr *sad;
721	struct sockaddr_in *rad;
722	struct sockaddr_in6 *rad6;
723	int ret;
724	fhandle_t nfh;
725
726	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
727	/*
728	 * First, get a reference on the nfsv4rootfs_lock so that an
729	 * exclusive lock on it cannot be acquired while dumping the locks.
730	 */
731	NFSLOCKV4ROOTMUTEX();
732	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
733	NFSUNLOCKV4ROOTMUTEX();
734	NFSLOCKSTATE();
735	if (!ret)
736		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
737	if (ret) {
738		ldumpp[0].ndlck_clid.nclid_idlen = 0;
739		NFSUNLOCKSTATE();
740		NFSLOCKV4ROOTMUTEX();
741		nfsv4_relref(&nfsv4rootfs_lock);
742		NFSUNLOCKV4ROOTMUTEX();
743		return;
744	}
745
746	/*
747	 * For each open share on file, dump it out.
748	 */
749	stp = LIST_FIRST(&lfp->lf_open);
750	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
751		ldumpp[cnt].ndlck_flags = stp->ls_flags;
752		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
753		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
754		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
755		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
756		ldumpp[cnt].ndlck_owner.nclid_idlen =
757		    stp->ls_openowner->ls_ownerlen;
758		NFSBCOPY(stp->ls_openowner->ls_owner,
759		    ldumpp[cnt].ndlck_owner.nclid_id,
760		    stp->ls_openowner->ls_ownerlen);
761		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
762		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
763		    stp->ls_clp->lc_idlen);
764		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
765		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
766		if (sad->sa_family == AF_INET) {
767			rad = (struct sockaddr_in *)sad;
768			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
769		} else {
770			rad6 = (struct sockaddr_in6 *)sad;
771			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
772		}
773		stp = LIST_NEXT(stp, ls_file);
774		cnt++;
775	}
776
777	/*
778	 * and all locks.
779	 */
780	lop = LIST_FIRST(&lfp->lf_lock);
781	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
782		stp = lop->lo_stp;
783		ldumpp[cnt].ndlck_flags = lop->lo_flags;
784		ldumpp[cnt].ndlck_first = lop->lo_first;
785		ldumpp[cnt].ndlck_end = lop->lo_end;
786		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
787		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
788		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
789		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
790		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
791		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
792		    stp->ls_ownerlen);
793		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
794		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
795		    stp->ls_clp->lc_idlen);
796		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
797		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
798		if (sad->sa_family == AF_INET) {
799			rad = (struct sockaddr_in *)sad;
800			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
801		} else {
802			rad6 = (struct sockaddr_in6 *)sad;
803			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
804		}
805		lop = LIST_NEXT(lop, lo_lckfile);
806		cnt++;
807	}
808
809	/*
810	 * and the delegations.
811	 */
812	stp = LIST_FIRST(&lfp->lf_deleg);
813	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
814		ldumpp[cnt].ndlck_flags = stp->ls_flags;
815		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
816		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
817		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
818		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
819		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
820		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
821		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
822		    stp->ls_clp->lc_idlen);
823		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
824		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
825		if (sad->sa_family == AF_INET) {
826			rad = (struct sockaddr_in *)sad;
827			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
828		} else {
829			rad6 = (struct sockaddr_in6 *)sad;
830			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
831		}
832		stp = LIST_NEXT(stp, ls_file);
833		cnt++;
834	}
835
836	/*
837	 * If list isn't full, mark end of list by setting the client name
838	 * to zero length.
839	 */
840	if (cnt < maxcnt)
841		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
842	NFSUNLOCKSTATE();
843	NFSLOCKV4ROOTMUTEX();
844	nfsv4_relref(&nfsv4rootfs_lock);
845	NFSUNLOCKV4ROOTMUTEX();
846}
847
848/*
849 * Server timer routine. It can scan any linked list, so long
850 * as it holds the spin/mutex lock and there is no exclusive lock on
851 * nfsv4rootfs_lock.
852 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
853 *  to do this from a callout, since the spin locks work. For
854 *  Darwin, I'm not sure what will work correctly yet.)
855 * Should be called once per second.
856 */
857APPLESTATIC void
858nfsrv_servertimer(void)
859{
860	struct nfsclient *clp, *nclp;
861	struct nfsstate *stp, *nstp;
862	int got_ref, i;
863
864	/*
865	 * Make sure nfsboottime is set. This is used by V3 as well
866	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
867	 * only used by the V4 server for leases.
868	 */
869	if (nfsboottime.tv_sec == 0)
870		NFSSETBOOTTIME(nfsboottime);
871
872	/*
873	 * If server hasn't started yet, just return.
874	 */
875	NFSLOCKSTATE();
876	if (nfsrv_stablefirst.nsf_eograce == 0) {
877		NFSUNLOCKSTATE();
878		return;
879	}
880	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
881		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
882		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
883			nfsrv_stablefirst.nsf_flags |=
884			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
885		NFSUNLOCKSTATE();
886		return;
887	}
888
889	/*
890	 * Try and get a reference count on the nfsv4rootfs_lock so that
891	 * no nfsd thread can acquire an exclusive lock on it before this
892	 * call is done. If it is already exclusively locked, just return.
893	 */
894	NFSLOCKV4ROOTMUTEX();
895	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
896	NFSUNLOCKV4ROOTMUTEX();
897	if (got_ref == 0) {
898		NFSUNLOCKSTATE();
899		return;
900	}
901
902	/*
903	 * For each client...
904	 */
905	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
906	    clp = LIST_FIRST(&nfsclienthash[i]);
907	    while (clp != LIST_END(&nfsclienthash[i])) {
908		nclp = LIST_NEXT(clp, lc_hash);
909		if (!(clp->lc_flags & LCL_EXPIREIT)) {
910		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
911			 && ((LIST_EMPTY(&clp->lc_deleg)
912			      && LIST_EMPTY(&clp->lc_open)) ||
913			     nfsrv_clients > nfsrv_clienthighwater)) ||
914			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
915			(clp->lc_expiry < NFSD_MONOSEC &&
916			 (nfsrv_openpluslock * 10 / 9) > NFSRV_V4STATELIMIT)) {
917			/*
918			 * Lease has expired several nfsrv_lease times ago:
919			 * PLUS
920			 *    - no state is associated with it
921			 *    OR
922			 *    - above high water mark for number of clients
923			 *      (nfsrv_clienthighwater should be large enough
924			 *       that this only occurs when clients fail to
925			 *       use the same nfs_client_id4.id. Maybe somewhat
926			 *       higher that the maximum number of clients that
927			 *       will mount this server?)
928			 * OR
929			 * Lease has expired a very long time ago
930			 * OR
931			 * Lease has expired PLUS the number of opens + locks
932			 * has exceeded 90% of capacity
933			 *
934			 * --> Mark for expiry. The actual expiry will be done
935			 *     by an nfsd sometime soon.
936			 */
937			clp->lc_flags |= LCL_EXPIREIT;
938			nfsrv_stablefirst.nsf_flags |=
939			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
940		    } else {
941			/*
942			 * If there are no opens, increment no open tick cnt
943			 * If time exceeds NFSNOOPEN, mark it to be thrown away
944			 * otherwise, if there is an open, reset no open time
945			 * Hopefully, this will avoid excessive re-creation
946			 * of open owners and subsequent open confirms.
947			 */
948			stp = LIST_FIRST(&clp->lc_open);
949			while (stp != LIST_END(&clp->lc_open)) {
950				nstp = LIST_NEXT(stp, ls_list);
951				if (LIST_EMPTY(&stp->ls_open)) {
952					stp->ls_noopens++;
953					if (stp->ls_noopens > NFSNOOPEN ||
954					    (nfsrv_openpluslock * 2) >
955					    NFSRV_V4STATELIMIT)
956						nfsrv_stablefirst.nsf_flags |=
957							NFSNSF_NOOPENS;
958				} else {
959					stp->ls_noopens = 0;
960				}
961				stp = nstp;
962			}
963		    }
964		}
965		clp = nclp;
966	    }
967	}
968	NFSUNLOCKSTATE();
969	NFSLOCKV4ROOTMUTEX();
970	nfsv4_relref(&nfsv4rootfs_lock);
971	NFSUNLOCKV4ROOTMUTEX();
972}
973
974/*
975 * The following set of functions free up the various data structures.
976 */
977/*
978 * Clear out all open/lock state related to this nfsclient.
979 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
980 * there are no other active nfsd threads.
981 */
982APPLESTATIC void
983nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
984{
985	struct nfsstate *stp, *nstp;
986
987	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
988		nfsrv_freeopenowner(stp, 1, p);
989}
990
991/*
992 * Free a client that has been cleaned. It should also already have been
993 * removed from the lists.
994 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
995 *  softclock interrupts are enabled.)
996 */
997APPLESTATIC void
998nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
999{
1000
1001#ifdef notyet
1002	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1003	     (LCL_GSS | LCL_CALLBACKSON) &&
1004	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1005	    clp->lc_handlelen > 0) {
1006		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1007		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1008		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1009			NULL, 0, NULL, NULL, NULL, p);
1010	}
1011#endif
1012	newnfs_disconnect(&clp->lc_req);
1013	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1014	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1015	free((caddr_t)clp, M_NFSDCLIENT);
1016	NFSLOCKSTATE();
1017	newnfsstats.srvclients--;
1018	nfsrv_openpluslock--;
1019	nfsrv_clients--;
1020	NFSUNLOCKSTATE();
1021}
1022
1023/*
1024 * Free a list of delegation state structures.
1025 * (This function will also free all nfslockfile structures that no
1026 *  longer have associated state.)
1027 */
1028APPLESTATIC void
1029nfsrv_freedeleglist(struct nfsstatehead *sthp)
1030{
1031	struct nfsstate *stp, *nstp;
1032
1033	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1034		nfsrv_freedeleg(stp);
1035	}
1036	LIST_INIT(sthp);
1037}
1038
1039/*
1040 * Free up a delegation.
1041 */
1042static void
1043nfsrv_freedeleg(struct nfsstate *stp)
1044{
1045	struct nfslockfile *lfp;
1046
1047	LIST_REMOVE(stp, ls_hash);
1048	LIST_REMOVE(stp, ls_list);
1049	LIST_REMOVE(stp, ls_file);
1050	lfp = stp->ls_lfp;
1051	if (LIST_EMPTY(&lfp->lf_open) &&
1052	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1053	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1054	    lfp->lf_usecount == 0 &&
1055	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1056		nfsrv_freenfslockfile(lfp);
1057	FREE((caddr_t)stp, M_NFSDSTATE);
1058	newnfsstats.srvdelegates--;
1059	nfsrv_openpluslock--;
1060	nfsrv_delegatecnt--;
1061}
1062
1063/*
1064 * This function frees an open owner and all associated opens.
1065 */
1066static void
1067nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1068{
1069	struct nfsstate *nstp, *tstp;
1070
1071	LIST_REMOVE(stp, ls_list);
1072	/*
1073	 * Now, free all associated opens.
1074	 */
1075	nstp = LIST_FIRST(&stp->ls_open);
1076	while (nstp != LIST_END(&stp->ls_open)) {
1077		tstp = nstp;
1078		nstp = LIST_NEXT(nstp, ls_list);
1079		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1080	}
1081	if (stp->ls_op)
1082		nfsrvd_derefcache(stp->ls_op);
1083	FREE((caddr_t)stp, M_NFSDSTATE);
1084	newnfsstats.srvopenowners--;
1085	nfsrv_openpluslock--;
1086}
1087
1088/*
1089 * This function frees an open (nfsstate open structure) with all associated
1090 * lock_owners and locks. It also frees the nfslockfile structure iff there
1091 * are no other opens on the file.
1092 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1093 */
1094static int
1095nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1096{
1097	struct nfsstate *nstp, *tstp;
1098	struct nfslockfile *lfp;
1099	int ret;
1100
1101	LIST_REMOVE(stp, ls_hash);
1102	LIST_REMOVE(stp, ls_list);
1103	LIST_REMOVE(stp, ls_file);
1104
1105	lfp = stp->ls_lfp;
1106	/*
1107	 * Now, free all lockowners associated with this open.
1108	 */
1109	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1110		nfsrv_freelockowner(tstp, vp, cansleep, p);
1111
1112	/*
1113	 * The nfslockfile is freed here if there are no locks
1114	 * associated with the open.
1115	 * If there are locks associated with the open, the
1116	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1117	 * Acquire the state mutex to avoid races with calls to
1118	 * nfsrv_getlockfile().
1119	 */
1120	if (cansleep != 0)
1121		NFSLOCKSTATE();
1122	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1123	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1124	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1125	    lfp->lf_usecount == 0 &&
1126	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1127		nfsrv_freenfslockfile(lfp);
1128		ret = 1;
1129	} else
1130		ret = 0;
1131	if (cansleep != 0)
1132		NFSUNLOCKSTATE();
1133	FREE((caddr_t)stp, M_NFSDSTATE);
1134	newnfsstats.srvopens--;
1135	nfsrv_openpluslock--;
1136	return (ret);
1137}
1138
1139/*
1140 * Frees a lockowner and all associated locks.
1141 */
1142static void
1143nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1144    NFSPROC_T *p)
1145{
1146
1147	LIST_REMOVE(stp, ls_hash);
1148	LIST_REMOVE(stp, ls_list);
1149	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1150	if (stp->ls_op)
1151		nfsrvd_derefcache(stp->ls_op);
1152	FREE((caddr_t)stp, M_NFSDSTATE);
1153	newnfsstats.srvlockowners--;
1154	nfsrv_openpluslock--;
1155}
1156
1157/*
1158 * Free all the nfs locks on a lockowner.
1159 */
1160static void
1161nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1162    NFSPROC_T *p)
1163{
1164	struct nfslock *lop, *nlop;
1165	struct nfsrollback *rlp, *nrlp;
1166	struct nfslockfile *lfp = NULL;
1167	int gottvp = 0;
1168	vnode_t tvp = NULL;
1169	uint64_t first, end;
1170
1171	lop = LIST_FIRST(&stp->ls_lock);
1172	while (lop != LIST_END(&stp->ls_lock)) {
1173		nlop = LIST_NEXT(lop, lo_lckowner);
1174		/*
1175		 * Since all locks should be for the same file, lfp should
1176		 * not change.
1177		 */
1178		if (lfp == NULL)
1179			lfp = lop->lo_lfp;
1180		else if (lfp != lop->lo_lfp)
1181			panic("allnfslocks");
1182		/*
1183		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1184		 * from the file handle. This only occurs when called from
1185		 * nfsrv_cleanclient().
1186		 */
1187		if (gottvp == 0) {
1188			if (nfsrv_dolocallocks == 0)
1189				tvp = NULL;
1190			else if (vp == NULL && cansleep != 0)
1191				tvp = nfsvno_getvp(&lfp->lf_fh);
1192			else
1193				tvp = vp;
1194			gottvp = 1;
1195		}
1196
1197		if (tvp != NULL) {
1198			if (cansleep == 0)
1199				panic("allnfs2");
1200			first = lop->lo_first;
1201			end = lop->lo_end;
1202			nfsrv_freenfslock(lop);
1203			nfsrv_localunlock(tvp, lfp, first, end, p);
1204			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1205			    nrlp)
1206				free(rlp, M_NFSDROLLBACK);
1207			LIST_INIT(&lfp->lf_rollback);
1208		} else
1209			nfsrv_freenfslock(lop);
1210		lop = nlop;
1211	}
1212	if (vp == NULL && tvp != NULL)
1213		vput(tvp);
1214}
1215
1216/*
1217 * Free an nfslock structure.
1218 */
1219static void
1220nfsrv_freenfslock(struct nfslock *lop)
1221{
1222
1223	if (lop->lo_lckfile.le_prev != NULL) {
1224		LIST_REMOVE(lop, lo_lckfile);
1225		newnfsstats.srvlocks--;
1226		nfsrv_openpluslock--;
1227	}
1228	LIST_REMOVE(lop, lo_lckowner);
1229	FREE((caddr_t)lop, M_NFSDLOCK);
1230}
1231
1232/*
1233 * This function frees an nfslockfile structure.
1234 */
1235static void
1236nfsrv_freenfslockfile(struct nfslockfile *lfp)
1237{
1238
1239	LIST_REMOVE(lfp, lf_hash);
1240	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1241}
1242
1243/*
1244 * This function looks up an nfsstate structure via stateid.
1245 */
1246static int
1247nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1248    struct nfsstate **stpp)
1249{
1250	struct nfsstate *stp;
1251	struct nfsstatehead *hp;
1252	int error = 0;
1253
1254	*stpp = NULL;
1255	hp = NFSSTATEHASH(clp, *stateidp);
1256	LIST_FOREACH(stp, hp, ls_hash) {
1257		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1258			NFSX_STATEIDOTHER))
1259			break;
1260	}
1261
1262	/*
1263	 * If no state id in list, return NFSERR_BADSTATEID.
1264	 */
1265	if (stp == LIST_END(hp)) {
1266		error = NFSERR_BADSTATEID;
1267		goto out;
1268	}
1269	*stpp = stp;
1270
1271out:
1272	NFSEXITCODE(error);
1273	return (error);
1274}
1275
1276/*
1277 * This function gets an nfsstate structure via owner string.
1278 */
1279static void
1280nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1281    struct nfsstate **stpp)
1282{
1283	struct nfsstate *stp;
1284
1285	*stpp = NULL;
1286	LIST_FOREACH(stp, hp, ls_list) {
1287		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1288		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1289			*stpp = stp;
1290			return;
1291		}
1292	}
1293}
1294
1295/*
1296 * Lock control function called to update lock status.
1297 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1298 * that one isn't to be created and an NFSERR_xxx for other errors.
1299 * The structures new_stp and new_lop are passed in as pointers that should
1300 * be set to NULL if the structure is used and shouldn't be free'd.
1301 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1302 * never used and can safely be allocated on the stack. For all other
1303 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1304 * in case they are used.
1305 */
1306APPLESTATIC int
1307nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1308    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1309    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1310    __unused struct nfsexstuff *exp,
1311    struct nfsrv_descript *nd, NFSPROC_T *p)
1312{
1313	struct nfslock *lop;
1314	struct nfsstate *new_stp = *new_stpp;
1315	struct nfslock *new_lop = *new_lopp;
1316	struct nfsstate *tstp, *mystp, *nstp;
1317	int specialid = 0;
1318	struct nfslockfile *lfp;
1319	struct nfslock *other_lop = NULL;
1320	struct nfsstate *stp, *lckstp = NULL;
1321	struct nfsclient *clp = NULL;
1322	u_int32_t bits;
1323	int error = 0, haslock = 0, ret, reterr;
1324	int getlckret, delegation = 0, filestruct_locked;
1325	fhandle_t nfh;
1326	uint64_t first, end;
1327	uint32_t lock_flags;
1328
1329	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1330		/*
1331		 * Note the special cases of "all 1s" or "all 0s" stateids and
1332		 * let reads with all 1s go ahead.
1333		 */
1334		if (new_stp->ls_stateid.seqid == 0x0 &&
1335		    new_stp->ls_stateid.other[0] == 0x0 &&
1336		    new_stp->ls_stateid.other[1] == 0x0 &&
1337		    new_stp->ls_stateid.other[2] == 0x0)
1338			specialid = 1;
1339		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1340		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1341		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1342		    new_stp->ls_stateid.other[2] == 0xffffffff)
1343			specialid = 2;
1344	}
1345
1346	/*
1347	 * Check for restart conditions (client and server).
1348	 */
1349	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1350	    &new_stp->ls_stateid, specialid);
1351	if (error)
1352		goto out;
1353
1354	/*
1355	 * Check for state resource limit exceeded.
1356	 */
1357	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1358	    nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
1359		error = NFSERR_RESOURCE;
1360		goto out;
1361	}
1362
1363	/*
1364	 * For the lock case, get another nfslock structure,
1365	 * just in case we need it.
1366	 * Malloc now, before we start sifting through the linked lists,
1367	 * in case we have to wait for memory.
1368	 */
1369tryagain:
1370	if (new_stp->ls_flags & NFSLCK_LOCK)
1371		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1372		    M_NFSDLOCK, M_WAITOK);
1373	filestruct_locked = 0;
1374	reterr = 0;
1375	lfp = NULL;
1376
1377	/*
1378	 * Get the lockfile structure for CFH now, so we can do a sanity
1379	 * check against the stateid, before incrementing the seqid#, since
1380	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1381	 * shouldn't be incremented for this case.
1382	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1383	 * will be handled later.
1384	 * If we are doing Lock/LockU and local locking is enabled, sleep
1385	 * lock the nfslockfile structure.
1386	 */
1387	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1388	NFSLOCKSTATE();
1389	if (getlckret == 0) {
1390		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1391		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1392			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1393			    &lfp, &nfh, 1);
1394			if (getlckret == 0)
1395				filestruct_locked = 1;
1396		} else
1397			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1398			    &lfp, &nfh, 0);
1399	}
1400	if (getlckret != 0 && getlckret != -1)
1401		reterr = getlckret;
1402
1403	if (filestruct_locked != 0) {
1404		LIST_INIT(&lfp->lf_rollback);
1405		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1406			/*
1407			 * For local locking, do the advisory locking now, so
1408			 * that any conflict can be detected. A failure later
1409			 * can be rolled back locally. If an error is returned,
1410			 * struct nfslockfile has been unlocked and any local
1411			 * locking rolled back.
1412			 */
1413			NFSUNLOCKSTATE();
1414			reterr = nfsrv_locallock(vp, lfp,
1415			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1416			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1417			NFSLOCKSTATE();
1418		}
1419	}
1420
1421	if (specialid == 0) {
1422	    if (new_stp->ls_flags & NFSLCK_TEST) {
1423		/*
1424		 * RFC 3530 does not list LockT as an op that renews a
1425		 * lease, but the concensus seems to be that it is ok
1426		 * for a server to do so.
1427		 */
1428		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1429		    (nfsquad_t)((u_quad_t)0), NULL, p);
1430
1431		/*
1432		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1433		 * error returns for LockT, just go ahead and test for a lock,
1434		 * since there are no locks for this client, but other locks
1435		 * can conflict. (ie. same client will always be false)
1436		 */
1437		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1438		    error = 0;
1439		lckstp = new_stp;
1440	    } else {
1441	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1442		(nfsquad_t)((u_quad_t)0), NULL, p);
1443	      if (error == 0)
1444		/*
1445		 * Look up the stateid
1446		 */
1447		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1448		  new_stp->ls_flags, &stp);
1449	      /*
1450	       * do some sanity checks for an unconfirmed open or a
1451	       * stateid that refers to the wrong file, for an open stateid
1452	       */
1453	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1454		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1455		   (getlckret == 0 && stp->ls_lfp != lfp)))
1456			error = NFSERR_BADSTATEID;
1457	      if (error == 0 &&
1458		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1459		  getlckret == 0 && stp->ls_lfp != lfp)
1460			error = NFSERR_BADSTATEID;
1461
1462	      /*
1463	       * If the lockowner stateid doesn't refer to the same file,
1464	       * I believe that is considered ok, since some clients will
1465	       * only create a single lockowner and use that for all locks
1466	       * on all files.
1467	       * For now, log it as a diagnostic, instead of considering it
1468	       * a BadStateid.
1469	       */
1470	      if (error == 0 && (stp->ls_flags &
1471		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1472		  getlckret == 0 && stp->ls_lfp != lfp) {
1473#ifdef DIAGNOSTIC
1474		  printf("Got a lock statid for different file open\n");
1475#endif
1476		  /*
1477		  error = NFSERR_BADSTATEID;
1478		  */
1479	      }
1480
1481	      if (error == 0) {
1482		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1483			/*
1484			 * If haslock set, we've already checked the seqid.
1485			 */
1486			if (!haslock) {
1487			    if (stp->ls_flags & NFSLCK_OPEN)
1488				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1489				    stp->ls_openowner, new_stp->ls_op);
1490			    else
1491				error = NFSERR_BADSTATEID;
1492			}
1493			if (!error)
1494			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1495			if (lckstp)
1496			    /*
1497			     * I believe this should be an error, but it
1498			     * isn't obvious what NFSERR_xxx would be
1499			     * appropriate, so I'll use NFSERR_INVAL for now.
1500			     */
1501			    error = NFSERR_INVAL;
1502			else
1503			    lckstp = new_stp;
1504		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1505			/*
1506			 * If haslock set, ditto above.
1507			 */
1508			if (!haslock) {
1509			    if (stp->ls_flags & NFSLCK_OPEN)
1510				error = NFSERR_BADSTATEID;
1511			    else
1512				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1513				    stp, new_stp->ls_op);
1514			}
1515			lckstp = stp;
1516		    } else {
1517			lckstp = stp;
1518		    }
1519	      }
1520	      /*
1521	       * If the seqid part of the stateid isn't the same, return
1522	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1523	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1524	       * nfsrv_returnoldstateid is set. (The concensus on the email
1525	       * list was that most clients would prefer to not receive
1526	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1527	       * is what will happen, so I use the nfsrv_returnoldstateid to
1528	       * allow for either server configuration.)
1529	       */
1530	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1531		  (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1532		   nfsrv_returnoldstateid))
1533		    error = NFSERR_OLDSTATEID;
1534	    }
1535	}
1536
1537	/*
1538	 * Now we can check for grace.
1539	 */
1540	if (!error)
1541		error = nfsrv_checkgrace(new_stp->ls_flags);
1542	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1543		nfsrv_checkstable(clp))
1544		error = NFSERR_NOGRACE;
1545	/*
1546	 * If we successfully Reclaimed state, note that.
1547	 */
1548	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1549		nfsrv_markstable(clp);
1550
1551	/*
1552	 * At this point, either error == NFSERR_BADSTATEID or the
1553	 * seqid# has been updated, so we can return any error.
1554	 * If error == 0, there may be an error in:
1555	 *    nd_repstat - Set by the calling function.
1556	 *    reterr - Set above, if getting the nfslockfile structure
1557	 *       or acquiring the local lock failed.
1558	 *    (If both of these are set, nd_repstat should probably be
1559	 *     returned, since that error was detected before this
1560	 *     function call.)
1561	 */
1562	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1563		if (error == 0) {
1564			if (nd->nd_repstat != 0)
1565				error = nd->nd_repstat;
1566			else
1567				error = reterr;
1568		}
1569		if (filestruct_locked != 0) {
1570			/* Roll back local locks. */
1571			NFSUNLOCKSTATE();
1572			nfsrv_locallock_rollback(vp, lfp, p);
1573			NFSLOCKSTATE();
1574			nfsrv_unlocklf(lfp);
1575		}
1576		NFSUNLOCKSTATE();
1577		goto out;
1578	}
1579
1580	/*
1581	 * Check the nfsrv_getlockfile return.
1582	 * Returned -1 if no structure found.
1583	 */
1584	if (getlckret == -1) {
1585		error = NFSERR_EXPIRED;
1586		/*
1587		 * Called from lockt, so no lock is OK.
1588		 */
1589		if (new_stp->ls_flags & NFSLCK_TEST) {
1590			error = 0;
1591		} else if (new_stp->ls_flags &
1592		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1593			/*
1594			 * Called to check for a lock, OK if the stateid is all
1595			 * 1s or all 0s, but there should be an nfsstate
1596			 * otherwise.
1597			 * (ie. If there is no open, I'll assume no share
1598			 *  deny bits.)
1599			 */
1600			if (specialid)
1601				error = 0;
1602			else
1603				error = NFSERR_BADSTATEID;
1604		}
1605		NFSUNLOCKSTATE();
1606		goto out;
1607	}
1608
1609	/*
1610	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1611	 * For NFSLCK_CHECK, allow a read if write access is granted,
1612	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1613	 * which implies a conflicting deny can't exist.
1614	 */
1615	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1616	    /*
1617	     * Four kinds of state id:
1618	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1619	     * - stateid for an open
1620	     * - stateid for a delegation
1621	     * - stateid for a lock owner
1622	     */
1623	    if (!specialid) {
1624		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1625		    delegation = 1;
1626		    mystp = stp;
1627		    nfsrv_delaydelegtimeout(stp);
1628	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1629		    mystp = stp;
1630		} else {
1631		    mystp = stp->ls_openstp;
1632		}
1633		/*
1634		 * If locking or checking, require correct access
1635		 * bit set.
1636		 */
1637		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1638		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1639		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1640		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1641		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1642		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1643		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1644		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1645		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1646			if (filestruct_locked != 0) {
1647				/* Roll back local locks. */
1648				NFSUNLOCKSTATE();
1649				nfsrv_locallock_rollback(vp, lfp, p);
1650				NFSLOCKSTATE();
1651				nfsrv_unlocklf(lfp);
1652			}
1653			NFSUNLOCKSTATE();
1654			error = NFSERR_OPENMODE;
1655			goto out;
1656		}
1657	    } else
1658		mystp = NULL;
1659	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1660		/*
1661		 * Check for a conflicting deny bit.
1662		 */
1663		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1664		    if (tstp != mystp) {
1665			bits = tstp->ls_flags;
1666			bits >>= NFSLCK_SHIFT;
1667			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1668			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1669				vp, p);
1670			    if (ret == 1) {
1671				/*
1672				* nfsrv_clientconflict unlocks state
1673				 * when it returns non-zero.
1674				 */
1675				lckstp = NULL;
1676				goto tryagain;
1677			    }
1678			    if (ret == 0)
1679				NFSUNLOCKSTATE();
1680			    if (ret == 2)
1681				error = NFSERR_PERM;
1682			    else
1683				error = NFSERR_OPENMODE;
1684			    goto out;
1685			}
1686		    }
1687		}
1688
1689		/* We're outta here */
1690		NFSUNLOCKSTATE();
1691		goto out;
1692	    }
1693	}
1694
1695	/*
1696	 * For setattr, just get rid of all the Delegations for other clients.
1697	 */
1698	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1699		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1700		if (ret) {
1701			/*
1702			 * nfsrv_cleandeleg() unlocks state when it
1703			 * returns non-zero.
1704			 */
1705			if (ret == -1) {
1706				lckstp = NULL;
1707				goto tryagain;
1708			}
1709			error = ret;
1710			goto out;
1711		}
1712		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1713		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1714		     LIST_EMPTY(&lfp->lf_deleg))) {
1715			NFSUNLOCKSTATE();
1716			goto out;
1717		}
1718	}
1719
1720	/*
1721	 * Check for a conflicting delegation. If one is found, call
1722	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1723	 * been set yet, it will get the lock. Otherwise, it will recall
1724	 * the delegation. Then, we try try again...
1725	 * I currently believe the conflict algorithm to be:
1726	 * For Lock Ops (Lock/LockT/LockU)
1727	 * - there is a conflict iff a different client has a write delegation
1728	 * For Reading (Read Op)
1729	 * - there is a conflict iff a different client has a write delegation
1730	 *   (the specialids are always a different client)
1731	 * For Writing (Write/Setattr of size)
1732	 * - there is a conflict if a different client has any delegation
1733	 * - there is a conflict if the same client has a read delegation
1734	 *   (I don't understand why this isn't allowed, but that seems to be
1735	 *    the current concensus?)
1736	 */
1737	tstp = LIST_FIRST(&lfp->lf_deleg);
1738	while (tstp != LIST_END(&lfp->lf_deleg)) {
1739	    nstp = LIST_NEXT(tstp, ls_file);
1740	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1741		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1742		  (new_lop->lo_flags & NFSLCK_READ))) &&
1743		  clp != tstp->ls_clp &&
1744		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1745		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1746		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1747		  (clp != tstp->ls_clp ||
1748		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1749		if (filestruct_locked != 0) {
1750			/* Roll back local locks. */
1751			NFSUNLOCKSTATE();
1752			nfsrv_locallock_rollback(vp, lfp, p);
1753			NFSLOCKSTATE();
1754			nfsrv_unlocklf(lfp);
1755		}
1756		ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
1757		if (ret) {
1758		    /*
1759		     * nfsrv_delegconflict unlocks state when it
1760		     * returns non-zero, which it always does.
1761		     */
1762		    if (other_lop) {
1763			FREE((caddr_t)other_lop, M_NFSDLOCK);
1764			other_lop = NULL;
1765		    }
1766		    if (ret == -1) {
1767			lckstp = NULL;
1768			goto tryagain;
1769		    }
1770		    error = ret;
1771		    goto out;
1772		}
1773		/* Never gets here. */
1774	    }
1775	    tstp = nstp;
1776	}
1777
1778	/*
1779	 * Handle the unlock case by calling nfsrv_updatelock().
1780	 * (Should I have done some access checking above for unlock? For now,
1781	 *  just let it happen.)
1782	 */
1783	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
1784		first = new_lop->lo_first;
1785		end = new_lop->lo_end;
1786		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
1787		stateidp->seqid = ++(stp->ls_stateid.seqid);
1788		stateidp->other[0] = stp->ls_stateid.other[0];
1789		stateidp->other[1] = stp->ls_stateid.other[1];
1790		stateidp->other[2] = stp->ls_stateid.other[2];
1791		if (filestruct_locked != 0) {
1792			NFSUNLOCKSTATE();
1793			/* Update the local locks. */
1794			nfsrv_localunlock(vp, lfp, first, end, p);
1795			NFSLOCKSTATE();
1796			nfsrv_unlocklf(lfp);
1797		}
1798		NFSUNLOCKSTATE();
1799		goto out;
1800	}
1801
1802	/*
1803	 * Search for a conflicting lock. A lock conflicts if:
1804	 * - the lock range overlaps and
1805	 * - at least one lock is a write lock and
1806	 * - it is not owned by the same lock owner
1807	 */
1808	if (!delegation) {
1809	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
1810	    if (new_lop->lo_end > lop->lo_first &&
1811		new_lop->lo_first < lop->lo_end &&
1812		(new_lop->lo_flags == NFSLCK_WRITE ||
1813		 lop->lo_flags == NFSLCK_WRITE) &&
1814		lckstp != lop->lo_stp &&
1815		(clp != lop->lo_stp->ls_clp ||
1816		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
1817		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
1818		    lckstp->ls_ownerlen))) {
1819		if (other_lop) {
1820		    FREE((caddr_t)other_lop, M_NFSDLOCK);
1821		    other_lop = NULL;
1822		}
1823		ret = nfsrv_clientconflict(lop->lo_stp->ls_clp,&haslock,vp,p);
1824		if (ret == 1) {
1825		    if (filestruct_locked != 0) {
1826			/* Roll back local locks. */
1827			nfsrv_locallock_rollback(vp, lfp, p);
1828			NFSLOCKSTATE();
1829			nfsrv_unlocklf(lfp);
1830			NFSUNLOCKSTATE();
1831		    }
1832		    /*
1833		     * nfsrv_clientconflict() unlocks state when it
1834		     * returns non-zero.
1835		     */
1836		    lckstp = NULL;
1837		    goto tryagain;
1838		}
1839		/*
1840		 * Found a conflicting lock, so record the conflict and
1841		 * return the error.
1842		 */
1843		if (cfp != NULL && ret == 0) {
1844		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
1845		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
1846		    cfp->cl_first = lop->lo_first;
1847		    cfp->cl_end = lop->lo_end;
1848		    cfp->cl_flags = lop->lo_flags;
1849		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
1850		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
1851			cfp->cl_ownerlen);
1852		}
1853		if (ret == 2)
1854		    error = NFSERR_PERM;
1855		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
1856		    error = NFSERR_RECLAIMCONFLICT;
1857		else if (new_stp->ls_flags & NFSLCK_CHECK)
1858		    error = NFSERR_LOCKED;
1859		else
1860		    error = NFSERR_DENIED;
1861		if (filestruct_locked != 0 && ret == 0) {
1862			/* Roll back local locks. */
1863			NFSUNLOCKSTATE();
1864			nfsrv_locallock_rollback(vp, lfp, p);
1865			NFSLOCKSTATE();
1866			nfsrv_unlocklf(lfp);
1867		}
1868		if (ret == 0)
1869			NFSUNLOCKSTATE();
1870		goto out;
1871	    }
1872	  }
1873	}
1874
1875	/*
1876	 * We only get here if there was no lock that conflicted.
1877	 */
1878	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
1879		NFSUNLOCKSTATE();
1880		goto out;
1881	}
1882
1883	/*
1884	 * We only get here when we are creating or modifying a lock.
1885	 * There are two variants:
1886	 * - exist_lock_owner where lock_owner exists
1887	 * - open_to_lock_owner with new lock_owner
1888	 */
1889	first = new_lop->lo_first;
1890	end = new_lop->lo_end;
1891	lock_flags = new_lop->lo_flags;
1892	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
1893		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
1894		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
1895		stateidp->other[0] = lckstp->ls_stateid.other[0];
1896		stateidp->other[1] = lckstp->ls_stateid.other[1];
1897		stateidp->other[2] = lckstp->ls_stateid.other[2];
1898	} else {
1899		/*
1900		 * The new open_to_lock_owner case.
1901		 * Link the new nfsstate into the lists.
1902		 */
1903		new_stp->ls_seq = new_stp->ls_opentolockseq;
1904		nfsrvd_refcache(new_stp->ls_op);
1905		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
1906		stateidp->other[0] = new_stp->ls_stateid.other[0] =
1907		    clp->lc_clientid.lval[0];
1908		stateidp->other[1] = new_stp->ls_stateid.other[1] =
1909		    clp->lc_clientid.lval[1];
1910		stateidp->other[2] = new_stp->ls_stateid.other[2] =
1911		    nfsrv_nextstateindex(clp);
1912		new_stp->ls_clp = clp;
1913		LIST_INIT(&new_stp->ls_lock);
1914		new_stp->ls_openstp = stp;
1915		new_stp->ls_lfp = lfp;
1916		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
1917		    lfp);
1918		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
1919		    new_stp, ls_hash);
1920		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
1921		*new_lopp = NULL;
1922		*new_stpp = NULL;
1923		newnfsstats.srvlockowners++;
1924		nfsrv_openpluslock++;
1925	}
1926	if (filestruct_locked != 0) {
1927		NFSUNLOCKSTATE();
1928		nfsrv_locallock_commit(lfp, lock_flags, first, end);
1929		NFSLOCKSTATE();
1930		nfsrv_unlocklf(lfp);
1931	}
1932	NFSUNLOCKSTATE();
1933
1934out:
1935	if (haslock) {
1936		NFSLOCKV4ROOTMUTEX();
1937		nfsv4_unlock(&nfsv4rootfs_lock, 1);
1938		NFSUNLOCKV4ROOTMUTEX();
1939	}
1940	if (other_lop)
1941		FREE((caddr_t)other_lop, M_NFSDLOCK);
1942	NFSEXITCODE2(error, nd);
1943	return (error);
1944}
1945
1946/*
1947 * Check for state errors for Open.
1948 * repstat is passed back out as an error if more critical errors
1949 * are not detected.
1950 */
1951APPLESTATIC int
1952nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
1953    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
1954    NFSPROC_T *p, int repstat)
1955{
1956	struct nfsstate *stp, *nstp;
1957	struct nfsclient *clp;
1958	struct nfsstate *ownerstp;
1959	struct nfslockfile *lfp, *new_lfp;
1960	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
1961
1962	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
1963		readonly = 1;
1964	/*
1965	 * Check for restart conditions (client and server).
1966	 */
1967	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1968		&new_stp->ls_stateid, 0);
1969	if (error)
1970		goto out;
1971
1972	/*
1973	 * Check for state resource limit exceeded.
1974	 * Technically this should be SMP protected, but the worst
1975	 * case error is "out by one or two" on the count when it
1976	 * returns NFSERR_RESOURCE and the limit is just a rather
1977	 * arbitrary high water mark, so no harm is done.
1978	 */
1979	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
1980		error = NFSERR_RESOURCE;
1981		goto out;
1982	}
1983
1984tryagain:
1985	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
1986	    M_NFSDLOCKFILE, M_WAITOK);
1987	if (vp)
1988		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp,
1989		    NULL, p);
1990	NFSLOCKSTATE();
1991	/*
1992	 * Get the nfsclient structure.
1993	 */
1994	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1995	    (nfsquad_t)((u_quad_t)0), NULL, p);
1996
1997	/*
1998	 * Look up the open owner. See if it needs confirmation and
1999	 * check the seq#, as required.
2000	 */
2001	if (!error)
2002		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2003
2004	if (!error && ownerstp) {
2005		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2006		    new_stp->ls_op);
2007		/*
2008		 * If the OpenOwner hasn't been confirmed, assume the
2009		 * old one was a replay and this one is ok.
2010		 * See: RFC3530 Sec. 14.2.18.
2011		 */
2012		if (error == NFSERR_BADSEQID &&
2013		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2014			error = 0;
2015	}
2016
2017	/*
2018	 * Check for grace.
2019	 */
2020	if (!error)
2021		error = nfsrv_checkgrace(new_stp->ls_flags);
2022	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2023		nfsrv_checkstable(clp))
2024		error = NFSERR_NOGRACE;
2025
2026	/*
2027	 * If none of the above errors occurred, let repstat be
2028	 * returned.
2029	 */
2030	if (repstat && !error)
2031		error = repstat;
2032	if (error) {
2033		NFSUNLOCKSTATE();
2034		if (haslock) {
2035			NFSLOCKV4ROOTMUTEX();
2036			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2037			NFSUNLOCKV4ROOTMUTEX();
2038		}
2039		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2040		goto out;
2041	}
2042
2043	/*
2044	 * If vp == NULL, the file doesn't exist yet, so return ok.
2045	 * (This always happens on the first pass, so haslock must be 0.)
2046	 */
2047	if (vp == NULL) {
2048		NFSUNLOCKSTATE();
2049		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2050		goto out;
2051	}
2052
2053	/*
2054	 * Get the structure for the underlying file.
2055	 */
2056	if (getfhret)
2057		error = getfhret;
2058	else
2059		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2060		    NULL, 0);
2061	if (new_lfp)
2062		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2063	if (error) {
2064		NFSUNLOCKSTATE();
2065		if (haslock) {
2066			NFSLOCKV4ROOTMUTEX();
2067			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2068			NFSUNLOCKV4ROOTMUTEX();
2069		}
2070		goto out;
2071	}
2072
2073	/*
2074	 * Search for a conflicting open/share.
2075	 */
2076	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2077	    /*
2078	     * For Delegate_Cur, search for the matching Delegation,
2079	     * which indicates no conflict.
2080	     * An old delegation should have been recovered by the
2081	     * client doing a Claim_DELEGATE_Prev, so I won't let
2082	     * it match and return NFSERR_EXPIRED. Should I let it
2083	     * match?
2084	     */
2085	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2086		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2087		    stateidp->seqid == stp->ls_stateid.seqid &&
2088		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2089			  NFSX_STATEIDOTHER))
2090			break;
2091	    }
2092	    if (stp == LIST_END(&lfp->lf_deleg) ||
2093		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2094		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2095		NFSUNLOCKSTATE();
2096		if (haslock) {
2097			NFSLOCKV4ROOTMUTEX();
2098			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2099			NFSUNLOCKV4ROOTMUTEX();
2100		}
2101		error = NFSERR_EXPIRED;
2102		goto out;
2103	    }
2104	}
2105
2106	/*
2107	 * Check for access/deny bit conflicts. I check for the same
2108	 * owner as well, in case the client didn't bother.
2109	 */
2110	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2111		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2112		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2113		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2114		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2115		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2116			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2117			if (ret == 1) {
2118				/*
2119				 * nfsrv_clientconflict() unlocks
2120				 * state when it returns non-zero.
2121				 */
2122				goto tryagain;
2123			}
2124			if (ret == 2)
2125				error = NFSERR_PERM;
2126			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2127				error = NFSERR_RECLAIMCONFLICT;
2128			else
2129				error = NFSERR_SHAREDENIED;
2130			if (ret == 0)
2131				NFSUNLOCKSTATE();
2132			if (haslock) {
2133				NFSLOCKV4ROOTMUTEX();
2134				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2135				NFSUNLOCKV4ROOTMUTEX();
2136			}
2137			goto out;
2138		}
2139	}
2140
2141	/*
2142	 * Check for a conflicting delegation. If one is found, call
2143	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2144	 * been set yet, it will get the lock. Otherwise, it will recall
2145	 * the delegation. Then, we try try again...
2146	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2147	 *  isn't a conflict.)
2148	 * I currently believe the conflict algorithm to be:
2149	 * For Open with Read Access and Deny None
2150	 * - there is a conflict iff a different client has a write delegation
2151	 * For Open with other Write Access or any Deny except None
2152	 * - there is a conflict if a different client has any delegation
2153	 * - there is a conflict if the same client has a read delegation
2154	 *   (The current concensus is that this last case should be
2155	 *    considered a conflict since the client with a read delegation
2156	 *    could have done an Open with ReadAccess and WriteDeny
2157	 *    locally and then not have checked for the WriteDeny.)
2158	 * Don't check for a Reclaim, since that will be dealt with
2159	 * by nfsrv_openctrl().
2160	 */
2161	if (!(new_stp->ls_flags &
2162		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2163	    stp = LIST_FIRST(&lfp->lf_deleg);
2164	    while (stp != LIST_END(&lfp->lf_deleg)) {
2165		nstp = LIST_NEXT(stp, ls_file);
2166		if ((readonly && stp->ls_clp != clp &&
2167		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2168		    (!readonly && (stp->ls_clp != clp ||
2169		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2170			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2171			if (ret) {
2172			    /*
2173			     * nfsrv_delegconflict() unlocks state
2174			     * when it returns non-zero.
2175			     */
2176			    if (ret == -1)
2177				goto tryagain;
2178			    error = ret;
2179			    goto out;
2180			}
2181		}
2182		stp = nstp;
2183	    }
2184	}
2185	NFSUNLOCKSTATE();
2186	if (haslock) {
2187		NFSLOCKV4ROOTMUTEX();
2188		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2189		NFSUNLOCKV4ROOTMUTEX();
2190	}
2191
2192out:
2193	NFSEXITCODE2(error, nd);
2194	return (error);
2195}
2196
2197/*
2198 * Open control function to create/update open state for an open.
2199 */
2200APPLESTATIC int
2201nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2202    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2203    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2204    NFSPROC_T *p, u_quad_t filerev)
2205{
2206	struct nfsstate *new_stp = *new_stpp;
2207	struct nfsstate *stp, *nstp;
2208	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2209	struct nfslockfile *lfp, *new_lfp;
2210	struct nfsclient *clp;
2211	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2212	int readonly = 0, cbret = 1, getfhret = 0;
2213
2214	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2215		readonly = 1;
2216	/*
2217	 * Check for restart conditions (client and server).
2218	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2219	 * If an error does show up, return NFSERR_EXPIRED, since the
2220	 * the seqid# has already been incremented.
2221	 */
2222	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2223	    &new_stp->ls_stateid, 0);
2224	if (error) {
2225		printf("Nfsd: openctrl unexpected restart err=%d\n",
2226		    error);
2227		error = NFSERR_EXPIRED;
2228		goto out;
2229	}
2230
2231tryagain:
2232	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2233	    M_NFSDLOCKFILE, M_WAITOK);
2234	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2235	    M_NFSDSTATE, M_WAITOK);
2236	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2237	    M_NFSDSTATE, M_WAITOK);
2238	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp,
2239	    NULL, p);
2240	NFSLOCKSTATE();
2241	/*
2242	 * Get the client structure. Since the linked lists could be changed
2243	 * by other nfsd processes if this process does a tsleep(), one of
2244	 * two things must be done.
2245	 * 1 - don't tsleep()
2246	 * or
2247	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2248	 *     before using the lists, since this lock stops the other
2249	 *     nfsd. This should only be used for rare cases, since it
2250	 *     essentially single threads the nfsd.
2251	 *     At this time, it is only done for cases where the stable
2252	 *     storage file must be written prior to completion of state
2253	 *     expiration.
2254	 */
2255	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
2256	    (nfsquad_t)((u_quad_t)0), NULL, p);
2257	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2258	    clp->lc_program) {
2259		/*
2260		 * This happens on the first open for a client
2261		 * that supports callbacks.
2262		 */
2263		NFSUNLOCKSTATE();
2264		/*
2265		 * Although nfsrv_docallback() will sleep, clp won't
2266		 * go away, since they are only removed when the
2267		 * nfsv4_lock() has blocked the nfsd threads. The
2268		 * fields in clp can change, but having multiple
2269		 * threads do this Null callback RPC should be
2270		 * harmless.
2271		 */
2272		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2273		    NULL, 0, NULL, NULL, NULL, p);
2274		NFSLOCKSTATE();
2275		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2276		if (!cbret)
2277			clp->lc_flags |= LCL_CALLBACKSON;
2278	}
2279
2280	/*
2281	 * Look up the open owner. See if it needs confirmation and
2282	 * check the seq#, as required.
2283	 */
2284	if (!error)
2285		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2286
2287	if (error) {
2288		NFSUNLOCKSTATE();
2289		printf("Nfsd: openctrl unexpected state err=%d\n",
2290			error);
2291		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2292		free((caddr_t)new_open, M_NFSDSTATE);
2293		free((caddr_t)new_deleg, M_NFSDSTATE);
2294		if (haslock) {
2295			NFSLOCKV4ROOTMUTEX();
2296			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2297			NFSUNLOCKV4ROOTMUTEX();
2298		}
2299		error = NFSERR_EXPIRED;
2300		goto out;
2301	}
2302
2303	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2304		nfsrv_markstable(clp);
2305
2306	/*
2307	 * Get the structure for the underlying file.
2308	 */
2309	if (getfhret)
2310		error = getfhret;
2311	else
2312		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2313		    NULL, 0);
2314	if (new_lfp)
2315		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2316	if (error) {
2317		NFSUNLOCKSTATE();
2318		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2319		    error);
2320		free((caddr_t)new_open, M_NFSDSTATE);
2321		free((caddr_t)new_deleg, M_NFSDSTATE);
2322		if (haslock) {
2323			NFSLOCKV4ROOTMUTEX();
2324			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2325			NFSUNLOCKV4ROOTMUTEX();
2326		}
2327		goto out;
2328	}
2329
2330	/*
2331	 * Search for a conflicting open/share.
2332	 */
2333	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2334	    /*
2335	     * For Delegate_Cur, search for the matching Delegation,
2336	     * which indicates no conflict.
2337	     * An old delegation should have been recovered by the
2338	     * client doing a Claim_DELEGATE_Prev, so I won't let
2339	     * it match and return NFSERR_EXPIRED. Should I let it
2340	     * match?
2341	     */
2342	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2343		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2344		    stateidp->seqid == stp->ls_stateid.seqid &&
2345		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2346			NFSX_STATEIDOTHER))
2347			break;
2348	    }
2349	    if (stp == LIST_END(&lfp->lf_deleg) ||
2350		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2351		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2352		NFSUNLOCKSTATE();
2353		printf("Nfsd openctrl unexpected expiry\n");
2354		free((caddr_t)new_open, M_NFSDSTATE);
2355		free((caddr_t)new_deleg, M_NFSDSTATE);
2356		if (haslock) {
2357			NFSLOCKV4ROOTMUTEX();
2358			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2359			NFSUNLOCKV4ROOTMUTEX();
2360		}
2361		error = NFSERR_EXPIRED;
2362		goto out;
2363	    }
2364
2365	    /*
2366	     * Don't issue a Delegation, since one already exists and
2367	     * delay delegation timeout, as required.
2368	     */
2369	    delegate = 0;
2370	    nfsrv_delaydelegtimeout(stp);
2371	}
2372
2373	/*
2374	 * Check for access/deny bit conflicts. I also check for the
2375	 * same owner, since the client might not have bothered to check.
2376	 * Also, note an open for the same file and owner, if found,
2377	 * which is all we do here for Delegate_Cur, since conflict
2378	 * checking is already done.
2379	 */
2380	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2381		if (ownerstp && stp->ls_openowner == ownerstp)
2382			openstp = stp;
2383		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2384		    /*
2385		     * If another client has the file open, the only
2386		     * delegation that can be issued is a Read delegation
2387		     * and only if it is a Read open with Deny none.
2388		     */
2389		    if (clp != stp->ls_clp) {
2390			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2391			    NFSLCK_READACCESS)
2392			    writedeleg = 0;
2393			else
2394			    delegate = 0;
2395		    }
2396		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2397		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2398		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2399		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2400			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2401			if (ret == 1) {
2402				/*
2403				 * nfsrv_clientconflict() unlocks state
2404				 * when it returns non-zero.
2405				 */
2406				free((caddr_t)new_open, M_NFSDSTATE);
2407				free((caddr_t)new_deleg, M_NFSDSTATE);
2408				openstp = NULL;
2409				goto tryagain;
2410			}
2411			if (ret == 2)
2412				error = NFSERR_PERM;
2413			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2414				error = NFSERR_RECLAIMCONFLICT;
2415			else
2416				error = NFSERR_SHAREDENIED;
2417			if (ret == 0)
2418				NFSUNLOCKSTATE();
2419			if (haslock) {
2420				NFSLOCKV4ROOTMUTEX();
2421				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2422				NFSUNLOCKV4ROOTMUTEX();
2423			}
2424			free((caddr_t)new_open, M_NFSDSTATE);
2425			free((caddr_t)new_deleg, M_NFSDSTATE);
2426			printf("nfsd openctrl unexpected client cnfl\n");
2427			goto out;
2428		    }
2429		}
2430	}
2431
2432	/*
2433	 * Check for a conflicting delegation. If one is found, call
2434	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2435	 * been set yet, it will get the lock. Otherwise, it will recall
2436	 * the delegation. Then, we try try again...
2437	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2438	 *  isn't a conflict.)
2439	 * I currently believe the conflict algorithm to be:
2440	 * For Open with Read Access and Deny None
2441	 * - there is a conflict iff a different client has a write delegation
2442	 * For Open with other Write Access or any Deny except None
2443	 * - there is a conflict if a different client has any delegation
2444	 * - there is a conflict if the same client has a read delegation
2445	 *   (The current concensus is that this last case should be
2446	 *    considered a conflict since the client with a read delegation
2447	 *    could have done an Open with ReadAccess and WriteDeny
2448	 *    locally and then not have checked for the WriteDeny.)
2449	 */
2450	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2451	    stp = LIST_FIRST(&lfp->lf_deleg);
2452	    while (stp != LIST_END(&lfp->lf_deleg)) {
2453		nstp = LIST_NEXT(stp, ls_file);
2454		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2455			writedeleg = 0;
2456		else
2457			delegate = 0;
2458		if ((readonly && stp->ls_clp != clp &&
2459		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2460		    (!readonly && (stp->ls_clp != clp ||
2461		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2462		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2463			delegate = 2;
2464		    } else {
2465			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2466			if (ret) {
2467			    /*
2468			     * nfsrv_delegconflict() unlocks state
2469			     * when it returns non-zero.
2470			     */
2471			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2472			    free((caddr_t)new_open, M_NFSDSTATE);
2473			    free((caddr_t)new_deleg, M_NFSDSTATE);
2474			    if (ret == -1) {
2475				openstp = NULL;
2476				goto tryagain;
2477			    }
2478			    error = ret;
2479			    goto out;
2480			}
2481		    }
2482		}
2483		stp = nstp;
2484	    }
2485	}
2486
2487	/*
2488	 * We only get here if there was no open that conflicted.
2489	 * If an open for the owner exists, or in the access/deny bits.
2490	 * Otherwise it is a new open. If the open_owner hasn't been
2491	 * confirmed, replace the open with the new one needing confirmation,
2492	 * otherwise add the open.
2493	 */
2494	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2495	    /*
2496	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2497	     * a match. If found, just move the old delegation to the current
2498	     * delegation list and issue open. If not found, return
2499	     * NFSERR_EXPIRED.
2500	     */
2501	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2502		if (stp->ls_lfp == lfp) {
2503		    /* Found it */
2504		    if (stp->ls_clp != clp)
2505			panic("olddeleg clp");
2506		    LIST_REMOVE(stp, ls_list);
2507		    LIST_REMOVE(stp, ls_hash);
2508		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2509		    stp->ls_stateid.seqid = delegstateidp->seqid = 0;
2510		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2511			clp->lc_clientid.lval[0];
2512		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2513			clp->lc_clientid.lval[1];
2514		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2515			nfsrv_nextstateindex(clp);
2516		    stp->ls_compref = nd->nd_compref;
2517		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2518		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2519			stp->ls_stateid), stp, ls_hash);
2520		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2521			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2522		    else
2523			*rflagsp |= NFSV4OPEN_READDELEGATE;
2524		    clp->lc_delegtime = NFSD_MONOSEC +
2525			nfsrv_lease + NFSRV_LEASEDELTA;
2526
2527		    /*
2528		     * Now, do the associated open.
2529		     */
2530		    new_open->ls_stateid.seqid = 0;
2531		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2532		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2533		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2534		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2535			NFSLCK_OPEN;
2536		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2537			new_open->ls_flags |= (NFSLCK_READACCESS |
2538			    NFSLCK_WRITEACCESS);
2539		    else
2540			new_open->ls_flags |= NFSLCK_READACCESS;
2541		    new_open->ls_uid = new_stp->ls_uid;
2542		    new_open->ls_lfp = lfp;
2543		    new_open->ls_clp = clp;
2544		    LIST_INIT(&new_open->ls_open);
2545		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2546		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2547			new_open, ls_hash);
2548		    /*
2549		     * and handle the open owner
2550		     */
2551		    if (ownerstp) {
2552			new_open->ls_openowner = ownerstp;
2553			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2554		    } else {
2555			new_open->ls_openowner = new_stp;
2556			new_stp->ls_flags = 0;
2557			nfsrvd_refcache(new_stp->ls_op);
2558			new_stp->ls_noopens = 0;
2559			LIST_INIT(&new_stp->ls_open);
2560			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2561			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2562			*new_stpp = NULL;
2563			newnfsstats.srvopenowners++;
2564			nfsrv_openpluslock++;
2565		    }
2566		    openstp = new_open;
2567		    new_open = NULL;
2568		    newnfsstats.srvopens++;
2569		    nfsrv_openpluslock++;
2570		    break;
2571		}
2572	    }
2573	    if (stp == LIST_END(&clp->lc_olddeleg))
2574		error = NFSERR_EXPIRED;
2575	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2576	    /*
2577	     * Scan to see that no delegation for this client and file
2578	     * doesn't already exist.
2579	     * There also shouldn't yet be an Open for this file and
2580	     * openowner.
2581	     */
2582	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2583		if (stp->ls_clp == clp)
2584		    break;
2585	    }
2586	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2587		/*
2588		 * This is the Claim_Previous case with a delegation
2589		 * type != Delegate_None.
2590		 */
2591		/*
2592		 * First, add the delegation. (Although we must issue the
2593		 * delegation, we can also ask for an immediate return.)
2594		 */
2595		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2596		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2597		    clp->lc_clientid.lval[0];
2598		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2599		    clp->lc_clientid.lval[1];
2600		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2601		    nfsrv_nextstateindex(clp);
2602		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2603		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2604			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2605		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2606		} else {
2607		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2608			NFSLCK_READACCESS);
2609		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2610		}
2611		new_deleg->ls_uid = new_stp->ls_uid;
2612		new_deleg->ls_lfp = lfp;
2613		new_deleg->ls_clp = clp;
2614		new_deleg->ls_filerev = filerev;
2615		new_deleg->ls_compref = nd->nd_compref;
2616		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2617		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2618		    new_deleg->ls_stateid), new_deleg, ls_hash);
2619		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2620		new_deleg = NULL;
2621		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2622		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2623		     LCL_CALLBACKSON ||
2624		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2625		    !NFSVNO_DELEGOK(vp))
2626		    *rflagsp |= NFSV4OPEN_RECALL;
2627		newnfsstats.srvdelegates++;
2628		nfsrv_openpluslock++;
2629		nfsrv_delegatecnt++;
2630
2631		/*
2632		 * Now, do the associated open.
2633		 */
2634		new_open->ls_stateid.seqid = 0;
2635		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2636		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2637		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2638		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2639		    NFSLCK_OPEN;
2640		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2641			new_open->ls_flags |= (NFSLCK_READACCESS |
2642			    NFSLCK_WRITEACCESS);
2643		else
2644			new_open->ls_flags |= NFSLCK_READACCESS;
2645		new_open->ls_uid = new_stp->ls_uid;
2646		new_open->ls_lfp = lfp;
2647		new_open->ls_clp = clp;
2648		LIST_INIT(&new_open->ls_open);
2649		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2650		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2651		   new_open, ls_hash);
2652		/*
2653		 * and handle the open owner
2654		 */
2655		if (ownerstp) {
2656		    new_open->ls_openowner = ownerstp;
2657		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2658		} else {
2659		    new_open->ls_openowner = new_stp;
2660		    new_stp->ls_flags = 0;
2661		    nfsrvd_refcache(new_stp->ls_op);
2662		    new_stp->ls_noopens = 0;
2663		    LIST_INIT(&new_stp->ls_open);
2664		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2665		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2666		    *new_stpp = NULL;
2667		    newnfsstats.srvopenowners++;
2668		    nfsrv_openpluslock++;
2669		}
2670		openstp = new_open;
2671		new_open = NULL;
2672		newnfsstats.srvopens++;
2673		nfsrv_openpluslock++;
2674	    } else {
2675		error = NFSERR_RECLAIMCONFLICT;
2676	    }
2677	} else if (ownerstp) {
2678		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2679		    /* Replace the open */
2680		    if (ownerstp->ls_op)
2681			nfsrvd_derefcache(ownerstp->ls_op);
2682		    ownerstp->ls_op = new_stp->ls_op;
2683		    nfsrvd_refcache(ownerstp->ls_op);
2684		    ownerstp->ls_seq = new_stp->ls_seq;
2685		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2686		    stp = LIST_FIRST(&ownerstp->ls_open);
2687		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2688			NFSLCK_OPEN;
2689		    stp->ls_stateid.seqid = 0;
2690		    stp->ls_uid = new_stp->ls_uid;
2691		    if (lfp != stp->ls_lfp) {
2692			LIST_REMOVE(stp, ls_file);
2693			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2694			stp->ls_lfp = lfp;
2695		    }
2696		    openstp = stp;
2697		} else if (openstp) {
2698		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2699		    openstp->ls_stateid.seqid++;
2700
2701		    /*
2702		     * This is where we can choose to issue a delegation.
2703		     */
2704		    if (delegate && nfsrv_issuedelegs &&
2705			writedeleg && !NFSVNO_EXRDONLY(exp) &&
2706			(nfsrv_writedelegifpos || !readonly) &&
2707			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
2708			 LCL_CALLBACKSON &&
2709			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
2710			NFSVNO_DELEGOK(vp)) {
2711			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2712			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
2713			    = clp->lc_clientid.lval[0];
2714			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
2715			    = clp->lc_clientid.lval[1];
2716			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
2717			    = nfsrv_nextstateindex(clp);
2718			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2719			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2720			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2721			new_deleg->ls_uid = new_stp->ls_uid;
2722			new_deleg->ls_lfp = lfp;
2723			new_deleg->ls_clp = clp;
2724			new_deleg->ls_filerev = filerev;
2725			new_deleg->ls_compref = nd->nd_compref;
2726			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2727			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2728			    new_deleg->ls_stateid), new_deleg, ls_hash);
2729			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2730			new_deleg = NULL;
2731			newnfsstats.srvdelegates++;
2732			nfsrv_openpluslock++;
2733			nfsrv_delegatecnt++;
2734		    }
2735		} else {
2736		    new_open->ls_stateid.seqid = 0;
2737		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2738		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2739		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2740		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
2741			NFSLCK_OPEN;
2742		    new_open->ls_uid = new_stp->ls_uid;
2743		    new_open->ls_openowner = ownerstp;
2744		    new_open->ls_lfp = lfp;
2745		    new_open->ls_clp = clp;
2746		    LIST_INIT(&new_open->ls_open);
2747		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2748		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2749		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2750			new_open, ls_hash);
2751		    openstp = new_open;
2752		    new_open = NULL;
2753		    newnfsstats.srvopens++;
2754		    nfsrv_openpluslock++;
2755
2756		    /*
2757		     * This is where we can choose to issue a delegation.
2758		     */
2759		    if (delegate && nfsrv_issuedelegs &&
2760			(writedeleg || readonly) &&
2761			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
2762			 LCL_CALLBACKSON &&
2763			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
2764			NFSVNO_DELEGOK(vp)) {
2765			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2766			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
2767			    = clp->lc_clientid.lval[0];
2768			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
2769			    = clp->lc_clientid.lval[1];
2770			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
2771			    = nfsrv_nextstateindex(clp);
2772			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
2773			    (nfsrv_writedelegifpos || !readonly)) {
2774			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2775				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2776			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2777			} else {
2778			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2779				NFSLCK_READACCESS);
2780			    *rflagsp |= NFSV4OPEN_READDELEGATE;
2781			}
2782			new_deleg->ls_uid = new_stp->ls_uid;
2783			new_deleg->ls_lfp = lfp;
2784			new_deleg->ls_clp = clp;
2785			new_deleg->ls_filerev = filerev;
2786			new_deleg->ls_compref = nd->nd_compref;
2787			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2788			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2789			    new_deleg->ls_stateid), new_deleg, ls_hash);
2790			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2791			new_deleg = NULL;
2792			newnfsstats.srvdelegates++;
2793			nfsrv_openpluslock++;
2794			nfsrv_delegatecnt++;
2795		    }
2796		}
2797	} else {
2798		/*
2799		 * New owner case. Start the open_owner sequence with a
2800		 * Needs confirmation (unless a reclaim) and hang the
2801		 * new open off it.
2802		 */
2803		new_open->ls_stateid.seqid = 0;
2804		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2805		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2806		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2807		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2808		    NFSLCK_OPEN;
2809		new_open->ls_uid = new_stp->ls_uid;
2810		LIST_INIT(&new_open->ls_open);
2811		new_open->ls_openowner = new_stp;
2812		new_open->ls_lfp = lfp;
2813		new_open->ls_clp = clp;
2814		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2815		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2816			new_stp->ls_flags = 0;
2817		} else {
2818			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2819			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
2820		}
2821		nfsrvd_refcache(new_stp->ls_op);
2822		new_stp->ls_noopens = 0;
2823		LIST_INIT(&new_stp->ls_open);
2824		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2825		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2826		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2827		    new_open, ls_hash);
2828		openstp = new_open;
2829		new_open = NULL;
2830		*new_stpp = NULL;
2831		newnfsstats.srvopens++;
2832		nfsrv_openpluslock++;
2833		newnfsstats.srvopenowners++;
2834		nfsrv_openpluslock++;
2835	}
2836	if (!error) {
2837		stateidp->seqid = openstp->ls_stateid.seqid;
2838		stateidp->other[0] = openstp->ls_stateid.other[0];
2839		stateidp->other[1] = openstp->ls_stateid.other[1];
2840		stateidp->other[2] = openstp->ls_stateid.other[2];
2841	}
2842	NFSUNLOCKSTATE();
2843	if (haslock) {
2844		NFSLOCKV4ROOTMUTEX();
2845		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2846		NFSUNLOCKV4ROOTMUTEX();
2847	}
2848	if (new_open)
2849		FREE((caddr_t)new_open, M_NFSDSTATE);
2850	if (new_deleg)
2851		FREE((caddr_t)new_deleg, M_NFSDSTATE);
2852
2853out:
2854	NFSEXITCODE2(error, nd);
2855	return (error);
2856}
2857
2858/*
2859 * Open update. Does the confirm, downgrade and close.
2860 */
2861APPLESTATIC int
2862nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
2863    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
2864{
2865	struct nfsstate *stp, *ownerstp;
2866	struct nfsclient *clp;
2867	struct nfslockfile *lfp;
2868	u_int32_t bits;
2869	int error = 0, gotstate = 0, len = 0;
2870	u_char client[NFSV4_OPAQUELIMIT];
2871
2872	/*
2873	 * Check for restart conditions (client and server).
2874	 */
2875	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2876	    &new_stp->ls_stateid, 0);
2877	if (error)
2878		goto out;
2879
2880	NFSLOCKSTATE();
2881	/*
2882	 * Get the open structure via clientid and stateid.
2883	 */
2884	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
2885	    (nfsquad_t)((u_quad_t)0), NULL, p);
2886	if (!error)
2887		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
2888		    new_stp->ls_flags, &stp);
2889
2890	/*
2891	 * Sanity check the open.
2892	 */
2893	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
2894		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
2895		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
2896		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
2897		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
2898		error = NFSERR_BADSTATEID;
2899
2900	if (!error)
2901		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2902		    stp->ls_openowner, new_stp->ls_op);
2903	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
2904	    !(new_stp->ls_flags & NFSLCK_CONFIRM))
2905		error = NFSERR_OLDSTATEID;
2906	if (!error && vnode_vtype(vp) != VREG) {
2907		if (vnode_vtype(vp) == VDIR)
2908			error = NFSERR_ISDIR;
2909		else
2910			error = NFSERR_INVAL;
2911	}
2912
2913	if (error) {
2914		/*
2915		 * If a client tries to confirm an Open with a bad
2916		 * seqid# and there are no byte range locks or other Opens
2917		 * on the openowner, just throw it away, so the next use of the
2918		 * openowner will start a fresh seq#.
2919		 */
2920		if (error == NFSERR_BADSEQID &&
2921		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
2922		    nfsrv_nootherstate(stp))
2923			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
2924		NFSUNLOCKSTATE();
2925		goto out;
2926	}
2927
2928	/*
2929	 * Set the return stateid.
2930	 */
2931	stateidp->seqid = stp->ls_stateid.seqid + 1;
2932	stateidp->other[0] = stp->ls_stateid.other[0];
2933	stateidp->other[1] = stp->ls_stateid.other[1];
2934	stateidp->other[2] = stp->ls_stateid.other[2];
2935	/*
2936	 * Now, handle the three cases.
2937	 */
2938	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
2939		/*
2940		 * If the open doesn't need confirmation, it seems to me that
2941		 * there is a client error, but I'll just log it and keep going?
2942		 */
2943		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
2944			printf("Nfsv4d: stray open confirm\n");
2945		stp->ls_openowner->ls_flags = 0;
2946		stp->ls_stateid.seqid++;
2947		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
2948			clp->lc_flags |= LCL_STAMPEDSTABLE;
2949			len = clp->lc_idlen;
2950			NFSBCOPY(clp->lc_id, client, len);
2951			gotstate = 1;
2952		}
2953		NFSUNLOCKSTATE();
2954	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
2955		ownerstp = stp->ls_openowner;
2956		lfp = stp->ls_lfp;
2957		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
2958			/* Get the lf lock */
2959			nfsrv_locklf(lfp);
2960			NFSUNLOCKSTATE();
2961			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
2962				NFSLOCKSTATE();
2963				nfsrv_unlocklf(lfp);
2964				NFSUNLOCKSTATE();
2965			}
2966		} else {
2967			(void) nfsrv_freeopen(stp, NULL, 0, p);
2968			NFSUNLOCKSTATE();
2969		}
2970	} else {
2971		/*
2972		 * Update the share bits, making sure that the new set are a
2973		 * subset of the old ones.
2974		 */
2975		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
2976		if (~(stp->ls_flags) & bits) {
2977			NFSUNLOCKSTATE();
2978			error = NFSERR_INVAL;
2979			goto out;
2980		}
2981		stp->ls_flags = (bits | NFSLCK_OPEN);
2982		stp->ls_stateid.seqid++;
2983		NFSUNLOCKSTATE();
2984	}
2985
2986	/*
2987	 * If the client just confirmed its first open, write a timestamp
2988	 * to the stable storage file.
2989	 */
2990	if (gotstate != 0) {
2991		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
2992		nfsrv_backupstable();
2993	}
2994
2995out:
2996	NFSEXITCODE2(error, nd);
2997	return (error);
2998}
2999
3000/*
3001 * Delegation update. Does the purge and return.
3002 */
3003APPLESTATIC int
3004nfsrv_delegupdate(nfsquad_t clientid, nfsv4stateid_t *stateidp,
3005    vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p)
3006{
3007	struct nfsstate *stp;
3008	struct nfsclient *clp;
3009	int error = 0;
3010	fhandle_t fh;
3011
3012	/*
3013	 * Do a sanity check against the file handle for DelegReturn.
3014	 */
3015	if (vp) {
3016		error = nfsvno_getfh(vp, &fh, p);
3017		if (error)
3018			goto out;
3019	}
3020	/*
3021	 * Check for restart conditions (client and server).
3022	 */
3023	if (op == NFSV4OP_DELEGRETURN)
3024		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3025			stateidp, 0);
3026	else
3027		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3028			stateidp, 0);
3029
3030	NFSLOCKSTATE();
3031	/*
3032	 * Get the open structure via clientid and stateid.
3033	 */
3034	if (!error)
3035	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
3036		(nfsquad_t)((u_quad_t)0), NULL, p);
3037	if (error) {
3038		if (error == NFSERR_CBPATHDOWN)
3039			error = 0;
3040		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3041			error = NFSERR_STALESTATEID;
3042	}
3043	if (!error && op == NFSV4OP_DELEGRETURN) {
3044	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3045	    if (!error && stp->ls_stateid.seqid != stateidp->seqid)
3046		error = NFSERR_OLDSTATEID;
3047	}
3048	/*
3049	 * NFSERR_EXPIRED means that the state has gone away,
3050	 * so Delegations have been purged. Just return ok.
3051	 */
3052	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3053		NFSUNLOCKSTATE();
3054		error = 0;
3055		goto out;
3056	}
3057	if (error) {
3058		NFSUNLOCKSTATE();
3059		goto out;
3060	}
3061
3062	if (op == NFSV4OP_DELEGRETURN) {
3063		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3064		    sizeof (fhandle_t))) {
3065			NFSUNLOCKSTATE();
3066			error = NFSERR_BADSTATEID;
3067			goto out;
3068		}
3069		nfsrv_freedeleg(stp);
3070	} else {
3071		nfsrv_freedeleglist(&clp->lc_olddeleg);
3072	}
3073	NFSUNLOCKSTATE();
3074	error = 0;
3075
3076out:
3077	NFSEXITCODE(error);
3078	return (error);
3079}
3080
3081/*
3082 * Release lock owner.
3083 */
3084APPLESTATIC int
3085nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3086    NFSPROC_T *p)
3087{
3088	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3089	struct nfsclient *clp;
3090	int error = 0;
3091
3092	/*
3093	 * Check for restart conditions (client and server).
3094	 */
3095	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3096	    &new_stp->ls_stateid, 0);
3097	if (error)
3098		goto out;
3099
3100	NFSLOCKSTATE();
3101	/*
3102	 * Get the lock owner by name.
3103	 */
3104	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
3105	    (nfsquad_t)((u_quad_t)0), NULL, p);
3106	if (error) {
3107		NFSUNLOCKSTATE();
3108		goto out;
3109	}
3110	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3111	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3112		stp = LIST_FIRST(&openstp->ls_open);
3113		while (stp != LIST_END(&openstp->ls_open)) {
3114		    nstp = LIST_NEXT(stp, ls_list);
3115		    /*
3116		     * If the owner matches, check for locks and
3117		     * then free or return an error.
3118		     */
3119		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3120			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3121			 stp->ls_ownerlen)){
3122			if (LIST_EMPTY(&stp->ls_lock)) {
3123			    nfsrv_freelockowner(stp, NULL, 0, p);
3124			} else {
3125			    NFSUNLOCKSTATE();
3126			    error = NFSERR_LOCKSHELD;
3127			    goto out;
3128			}
3129		    }
3130		    stp = nstp;
3131		}
3132	    }
3133	}
3134	NFSUNLOCKSTATE();
3135
3136out:
3137	NFSEXITCODE(error);
3138	return (error);
3139}
3140
3141/*
3142 * Get the file handle for a lock structure.
3143 */
3144static int
3145nfsrv_getlockfh(vnode_t vp, u_short flags,
3146    struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p)
3147{
3148	fhandle_t *fhp = NULL;
3149	struct nfslockfile *new_lfp;
3150	int error;
3151
3152	/*
3153	 * For lock, use the new nfslock structure, otherwise just
3154	 * a fhandle_t on the stack.
3155	 */
3156	if (flags & NFSLCK_OPEN) {
3157		new_lfp = *new_lfpp;
3158		fhp = &new_lfp->lf_fh;
3159	} else if (nfhp) {
3160		fhp = nfhp;
3161	} else {
3162		panic("nfsrv_getlockfh");
3163	}
3164	error = nfsvno_getfh(vp, fhp, p);
3165	NFSEXITCODE(error);
3166	return (error);
3167}
3168
3169/*
3170 * Get an nfs lock structure. Allocate one, as required, and return a
3171 * pointer to it.
3172 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3173 */
3174static int
3175nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3176    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3177{
3178	struct nfslockfile *lfp;
3179	fhandle_t *fhp = NULL, *tfhp;
3180	struct nfslockhashhead *hp;
3181	struct nfslockfile *new_lfp = NULL;
3182
3183	/*
3184	 * For lock, use the new nfslock structure, otherwise just
3185	 * a fhandle_t on the stack.
3186	 */
3187	if (flags & NFSLCK_OPEN) {
3188		new_lfp = *new_lfpp;
3189		fhp = &new_lfp->lf_fh;
3190	} else if (nfhp) {
3191		fhp = nfhp;
3192	} else {
3193		panic("nfsrv_getlockfile");
3194	}
3195
3196	hp = NFSLOCKHASH(fhp);
3197	LIST_FOREACH(lfp, hp, lf_hash) {
3198		tfhp = &lfp->lf_fh;
3199		if (NFSVNO_CMPFH(fhp, tfhp)) {
3200			if (lockit)
3201				nfsrv_locklf(lfp);
3202			*lfpp = lfp;
3203			return (0);
3204		}
3205	}
3206	if (!(flags & NFSLCK_OPEN))
3207		return (-1);
3208
3209	/*
3210	 * No match, so chain the new one into the list.
3211	 */
3212	LIST_INIT(&new_lfp->lf_open);
3213	LIST_INIT(&new_lfp->lf_lock);
3214	LIST_INIT(&new_lfp->lf_deleg);
3215	LIST_INIT(&new_lfp->lf_locallock);
3216	LIST_INIT(&new_lfp->lf_rollback);
3217	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3218	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3219	new_lfp->lf_usecount = 0;
3220	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3221	*lfpp = new_lfp;
3222	*new_lfpp = NULL;
3223	return (0);
3224}
3225
3226/*
3227 * This function adds a nfslock lock structure to the list for the associated
3228 * nfsstate and nfslockfile structures. It will be inserted after the
3229 * entry pointed at by insert_lop.
3230 */
3231static void
3232nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3233    struct nfsstate *stp, struct nfslockfile *lfp)
3234{
3235	struct nfslock *lop, *nlop;
3236
3237	new_lop->lo_stp = stp;
3238	new_lop->lo_lfp = lfp;
3239
3240	if (stp != NULL) {
3241		/* Insert in increasing lo_first order */
3242		lop = LIST_FIRST(&lfp->lf_lock);
3243		if (lop == LIST_END(&lfp->lf_lock) ||
3244		    new_lop->lo_first <= lop->lo_first) {
3245			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3246		} else {
3247			nlop = LIST_NEXT(lop, lo_lckfile);
3248			while (nlop != LIST_END(&lfp->lf_lock) &&
3249			       nlop->lo_first < new_lop->lo_first) {
3250				lop = nlop;
3251				nlop = LIST_NEXT(lop, lo_lckfile);
3252			}
3253			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3254		}
3255	} else {
3256		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3257	}
3258
3259	/*
3260	 * Insert after insert_lop, which is overloaded as stp or lfp for
3261	 * an empty list.
3262	 */
3263	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3264		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3265	else if ((struct nfsstate *)insert_lop == stp)
3266		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3267	else
3268		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3269	if (stp != NULL) {
3270		newnfsstats.srvlocks++;
3271		nfsrv_openpluslock++;
3272	}
3273}
3274
3275/*
3276 * This function updates the locking for a lock owner and given file. It
3277 * maintains a list of lock ranges ordered on increasing file offset that
3278 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3279 * It always adds new_lop to the list and sometimes uses the one pointed
3280 * at by other_lopp.
3281 */
3282static void
3283nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3284    struct nfslock **other_lopp, struct nfslockfile *lfp)
3285{
3286	struct nfslock *new_lop = *new_lopp;
3287	struct nfslock *lop, *tlop, *ilop;
3288	struct nfslock *other_lop = *other_lopp;
3289	int unlock = 0, myfile = 0;
3290	u_int64_t tmp;
3291
3292	/*
3293	 * Work down the list until the lock is merged.
3294	 */
3295	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3296		unlock = 1;
3297	if (stp != NULL) {
3298		ilop = (struct nfslock *)stp;
3299		lop = LIST_FIRST(&stp->ls_lock);
3300	} else {
3301		ilop = (struct nfslock *)lfp;
3302		lop = LIST_FIRST(&lfp->lf_locallock);
3303	}
3304	while (lop != NULL) {
3305	    /*
3306	     * Only check locks for this file that aren't before the start of
3307	     * new lock's range.
3308	     */
3309	    if (lop->lo_lfp == lfp) {
3310	      myfile = 1;
3311	      if (lop->lo_end >= new_lop->lo_first) {
3312		if (new_lop->lo_end < lop->lo_first) {
3313			/*
3314			 * If the new lock ends before the start of the
3315			 * current lock's range, no merge, just insert
3316			 * the new lock.
3317			 */
3318			break;
3319		}
3320		if (new_lop->lo_flags == lop->lo_flags ||
3321		    (new_lop->lo_first <= lop->lo_first &&
3322		     new_lop->lo_end >= lop->lo_end)) {
3323			/*
3324			 * This lock can be absorbed by the new lock/unlock.
3325			 * This happens when it covers the entire range
3326			 * of the old lock or is contiguous
3327			 * with the old lock and is of the same type or an
3328			 * unlock.
3329			 */
3330			if (lop->lo_first < new_lop->lo_first)
3331				new_lop->lo_first = lop->lo_first;
3332			if (lop->lo_end > new_lop->lo_end)
3333				new_lop->lo_end = lop->lo_end;
3334			tlop = lop;
3335			lop = LIST_NEXT(lop, lo_lckowner);
3336			nfsrv_freenfslock(tlop);
3337			continue;
3338		}
3339
3340		/*
3341		 * All these cases are for contiguous locks that are not the
3342		 * same type, so they can't be merged.
3343		 */
3344		if (new_lop->lo_first <= lop->lo_first) {
3345			/*
3346			 * This case is where the new lock overlaps with the
3347			 * first part of the old lock. Move the start of the
3348			 * old lock to just past the end of the new lock. The
3349			 * new lock will be inserted in front of the old, since
3350			 * ilop hasn't been updated. (We are done now.)
3351			 */
3352			lop->lo_first = new_lop->lo_end;
3353			break;
3354		}
3355		if (new_lop->lo_end >= lop->lo_end) {
3356			/*
3357			 * This case is where the new lock overlaps with the
3358			 * end of the old lock's range. Move the old lock's
3359			 * end to just before the new lock's first and insert
3360			 * the new lock after the old lock.
3361			 * Might not be done yet, since the new lock could
3362			 * overlap further locks with higher ranges.
3363			 */
3364			lop->lo_end = new_lop->lo_first;
3365			ilop = lop;
3366			lop = LIST_NEXT(lop, lo_lckowner);
3367			continue;
3368		}
3369		/*
3370		 * The final case is where the new lock's range is in the
3371		 * middle of the current lock's and splits the current lock
3372		 * up. Use *other_lopp to handle the second part of the
3373		 * split old lock range. (We are done now.)
3374		 * For unlock, we use new_lop as other_lop and tmp, since
3375		 * other_lop and new_lop are the same for this case.
3376		 * We noted the unlock case above, so we don't need
3377		 * new_lop->lo_flags any longer.
3378		 */
3379		tmp = new_lop->lo_first;
3380		if (other_lop == NULL) {
3381			if (!unlock)
3382				panic("nfsd srv update unlock");
3383			other_lop = new_lop;
3384			*new_lopp = NULL;
3385		}
3386		other_lop->lo_first = new_lop->lo_end;
3387		other_lop->lo_end = lop->lo_end;
3388		other_lop->lo_flags = lop->lo_flags;
3389		other_lop->lo_stp = stp;
3390		other_lop->lo_lfp = lfp;
3391		lop->lo_end = tmp;
3392		nfsrv_insertlock(other_lop, lop, stp, lfp);
3393		*other_lopp = NULL;
3394		ilop = lop;
3395		break;
3396	      }
3397	    }
3398	    ilop = lop;
3399	    lop = LIST_NEXT(lop, lo_lckowner);
3400	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3401		break;
3402	}
3403
3404	/*
3405	 * Insert the new lock in the list at the appropriate place.
3406	 */
3407	if (!unlock) {
3408		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3409		*new_lopp = NULL;
3410	}
3411}
3412
3413/*
3414 * This function handles sequencing of locks, etc.
3415 * It returns an error that indicates what the caller should do.
3416 */
3417static int
3418nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3419    struct nfsstate *stp, struct nfsrvcache *op)
3420{
3421	int error = 0;
3422
3423	if (op != nd->nd_rp)
3424		panic("nfsrvstate checkseqid");
3425	if (!(op->rc_flag & RC_INPROG))
3426		panic("nfsrvstate not inprog");
3427	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3428		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3429		panic("nfsrvstate op refcnt");
3430	}
3431	if ((stp->ls_seq + 1) == seqid) {
3432		if (stp->ls_op)
3433			nfsrvd_derefcache(stp->ls_op);
3434		stp->ls_op = op;
3435		nfsrvd_refcache(op);
3436		stp->ls_seq = seqid;
3437		goto out;
3438	} else if (stp->ls_seq == seqid && stp->ls_op &&
3439		op->rc_xid == stp->ls_op->rc_xid &&
3440		op->rc_refcnt == 0 &&
3441		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3442		op->rc_cksum == stp->ls_op->rc_cksum) {
3443		if (stp->ls_op->rc_flag & RC_INPROG) {
3444			error = NFSERR_DONTREPLY;
3445			goto out;
3446		}
3447		nd->nd_rp = stp->ls_op;
3448		nd->nd_rp->rc_flag |= RC_INPROG;
3449		nfsrvd_delcache(op);
3450		error = NFSERR_REPLYFROMCACHE;
3451		goto out;
3452	}
3453	error = NFSERR_BADSEQID;
3454
3455out:
3456	NFSEXITCODE2(error, nd);
3457	return (error);
3458}
3459
3460/*
3461 * Get the client ip address for callbacks. If the strings can't be parsed,
3462 * just set lc_program to 0 to indicate no callbacks are possible.
3463 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3464 *  the address to the client's transport address. This won't be used
3465 *  for callbacks, but can be printed out by newnfsstats for info.)
3466 * Return error if the xdr can't be parsed, 0 otherwise.
3467 */
3468APPLESTATIC int
3469nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3470{
3471	u_int32_t *tl;
3472	u_char *cp, *cp2;
3473	int i, j;
3474	struct sockaddr_in *rad, *sad;
3475	u_char protocol[5], addr[24];
3476	int error = 0, cantparse = 0;
3477	union {
3478		u_long ival;
3479		u_char cval[4];
3480	} ip;
3481	union {
3482		u_short sval;
3483		u_char cval[2];
3484	} port;
3485
3486	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3487	rad->sin_family = AF_INET;
3488	rad->sin_len = sizeof (struct sockaddr_in);
3489	rad->sin_addr.s_addr = 0;
3490	rad->sin_port = 0;
3491	clp->lc_req.nr_client = NULL;
3492	clp->lc_req.nr_lock = 0;
3493	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3494	i = fxdr_unsigned(int, *tl);
3495	if (i >= 3 && i <= 4) {
3496		error = nfsrv_mtostr(nd, protocol, i);
3497		if (error)
3498			goto nfsmout;
3499		if (!strcmp(protocol, "tcp")) {
3500			clp->lc_flags |= LCL_TCPCALLBACK;
3501			clp->lc_req.nr_sotype = SOCK_STREAM;
3502			clp->lc_req.nr_soproto = IPPROTO_TCP;
3503		} else if (!strcmp(protocol, "udp")) {
3504			clp->lc_req.nr_sotype = SOCK_DGRAM;
3505			clp->lc_req.nr_soproto = IPPROTO_UDP;
3506		} else {
3507			cantparse = 1;
3508		}
3509	} else {
3510		cantparse = 1;
3511		if (i > 0) {
3512			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3513			if (error)
3514				goto nfsmout;
3515		}
3516	}
3517	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3518	i = fxdr_unsigned(int, *tl);
3519	if (i < 0) {
3520		error = NFSERR_BADXDR;
3521		goto nfsmout;
3522	} else if (i == 0) {
3523		cantparse = 1;
3524	} else if (!cantparse && i <= 23 && i >= 11) {
3525		error = nfsrv_mtostr(nd, addr, i);
3526		if (error)
3527			goto nfsmout;
3528
3529		/*
3530		 * Parse out the address fields. We expect 6 decimal numbers
3531		 * separated by '.'s.
3532		 */
3533		cp = addr;
3534		i = 0;
3535		while (*cp && i < 6) {
3536			cp2 = cp;
3537			while (*cp2 && *cp2 != '.')
3538				cp2++;
3539			if (*cp2)
3540				*cp2++ = '\0';
3541			else if (i != 5) {
3542				cantparse = 1;
3543				break;
3544			}
3545			j = nfsrv_getipnumber(cp);
3546			if (j >= 0) {
3547				if (i < 4)
3548					ip.cval[3 - i] = j;
3549				else
3550					port.cval[5 - i] = j;
3551			} else {
3552				cantparse = 1;
3553				break;
3554			}
3555			cp = cp2;
3556			i++;
3557		}
3558		if (!cantparse) {
3559			if (ip.ival != 0x0) {
3560				rad->sin_addr.s_addr = htonl(ip.ival);
3561				rad->sin_port = htons(port.sval);
3562			} else {
3563				cantparse = 1;
3564			}
3565		}
3566	} else {
3567		cantparse = 1;
3568		if (i > 0) {
3569			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3570			if (error)
3571				goto nfsmout;
3572		}
3573	}
3574	if (cantparse) {
3575		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3576		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3577		rad->sin_port = 0x0;
3578		clp->lc_program = 0;
3579	}
3580nfsmout:
3581	NFSEXITCODE2(error, nd);
3582	return (error);
3583}
3584
3585/*
3586 * Turn a string of up to three decimal digits into a number. Return -1 upon
3587 * error.
3588 */
3589static int
3590nfsrv_getipnumber(u_char *cp)
3591{
3592	int i = 0, j = 0;
3593
3594	while (*cp) {
3595		if (j > 2 || *cp < '0' || *cp > '9')
3596			return (-1);
3597		i *= 10;
3598		i += (*cp - '0');
3599		cp++;
3600		j++;
3601	}
3602	if (i < 256)
3603		return (i);
3604	return (-1);
3605}
3606
3607/*
3608 * This function checks for restart conditions.
3609 */
3610static int
3611nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3612    nfsv4stateid_t *stateidp, int specialid)
3613{
3614	int ret = 0;
3615
3616	/*
3617	 * First check for a server restart. Open, LockT, ReleaseLockOwner
3618	 * and DelegPurge have a clientid, the rest a stateid.
3619	 */
3620	if (flags &
3621	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
3622		if (clientid.lval[0] != nfsrvboottime) {
3623			ret = NFSERR_STALECLIENTID;
3624			goto out;
3625		}
3626	} else if (stateidp->other[0] != nfsrvboottime &&
3627		specialid == 0) {
3628		ret = NFSERR_STALESTATEID;
3629		goto out;
3630	}
3631
3632	/*
3633	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
3634	 * not use a lock/open owner seqid#, so the check can be done now.
3635	 * (The others will be checked, as required, later.)
3636	 */
3637	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
3638		goto out;
3639
3640	NFSLOCKSTATE();
3641	ret = nfsrv_checkgrace(flags);
3642	NFSUNLOCKSTATE();
3643
3644out:
3645	NFSEXITCODE(ret);
3646	return (ret);
3647}
3648
3649/*
3650 * Check for grace.
3651 */
3652static int
3653nfsrv_checkgrace(u_int32_t flags)
3654{
3655	int error = 0;
3656
3657	if (nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) {
3658		if (flags & NFSLCK_RECLAIM) {
3659			error = NFSERR_NOGRACE;
3660			goto out;
3661		}
3662	} else {
3663		if (!(flags & NFSLCK_RECLAIM)) {
3664			error = NFSERR_GRACE;
3665			goto out;
3666		}
3667
3668		/*
3669		 * If grace is almost over and we are still getting Reclaims,
3670		 * extend grace a bit.
3671		 */
3672		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
3673		    nfsrv_stablefirst.nsf_eograce)
3674			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
3675				NFSRV_LEASEDELTA;
3676	}
3677
3678out:
3679	NFSEXITCODE(error);
3680	return (error);
3681}
3682
3683/*
3684 * Do a server callback.
3685 */
3686static int
3687nfsrv_docallback(struct nfsclient *clp, int procnum,
3688    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
3689    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
3690{
3691	mbuf_t m;
3692	u_int32_t *tl;
3693	struct nfsrv_descript nfsd, *nd = &nfsd;
3694	struct ucred *cred;
3695	int error = 0;
3696	u_int32_t callback;
3697
3698	cred = newnfs_getcred();
3699	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
3700	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
3701		NFSUNLOCKSTATE();
3702		panic("docallb");
3703	}
3704	clp->lc_cbref++;
3705
3706	/*
3707	 * Fill the callback program# and version into the request
3708	 * structure for newnfs_connect() to use.
3709	 */
3710	clp->lc_req.nr_prog = clp->lc_program;
3711	clp->lc_req.nr_vers = NFSV4_CBVERS;
3712
3713	/*
3714	 * First, fill in some of the fields of nd and cr.
3715	 */
3716	nd->nd_flag = ND_NFSV4;
3717	if (clp->lc_flags & LCL_GSS)
3718		nd->nd_flag |= ND_KERBV;
3719	nd->nd_repstat = 0;
3720	cred->cr_uid = clp->lc_uid;
3721	cred->cr_gid = clp->lc_gid;
3722	callback = clp->lc_callback;
3723	NFSUNLOCKSTATE();
3724	cred->cr_ngroups = 1;
3725
3726	/*
3727	 * Get the first mbuf for the request.
3728	 */
3729	MGET(m, M_WAITOK, MT_DATA);
3730	mbuf_setlen(m, 0);
3731	nd->nd_mreq = nd->nd_mb = m;
3732	nd->nd_bpos = NFSMTOD(m, caddr_t);
3733
3734	/*
3735	 * and build the callback request.
3736	 */
3737	if (procnum == NFSV4OP_CBGETATTR) {
3738		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
3739		(void) nfsm_strtom(nd, "CB Getattr", 10);
3740		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3741		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
3742		*tl++ = txdr_unsigned(callback);
3743		*tl++ = txdr_unsigned(1);
3744		*tl = txdr_unsigned(NFSV4OP_CBGETATTR);
3745		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
3746		(void) nfsrv_putattrbit(nd, attrbitp);
3747	} else if (procnum == NFSV4OP_CBRECALL) {
3748		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
3749		(void) nfsm_strtom(nd, "CB Recall", 9);
3750		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID);
3751		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
3752		*tl++ = txdr_unsigned(callback);
3753		*tl++ = txdr_unsigned(1);
3754		*tl++ = txdr_unsigned(NFSV4OP_CBRECALL);
3755		*tl++ = txdr_unsigned(stateidp->seqid);
3756		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
3757		    NFSX_STATEIDOTHER);
3758		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3759		if (trunc)
3760			*tl = newnfs_true;
3761		else
3762			*tl = newnfs_false;
3763		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
3764	} else {
3765		nd->nd_procnum = NFSV4PROC_CBNULL;
3766	}
3767
3768	/*
3769	 * Call newnfs_connect(), as required, and then newnfs_request().
3770	 */
3771	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
3772	if (clp->lc_req.nr_client == NULL) {
3773		if (nd->nd_procnum == NFSV4PROC_CBNULL)
3774			error = newnfs_connect(NULL, &clp->lc_req, cred,
3775			    NULL, 1);
3776		else
3777			error = newnfs_connect(NULL, &clp->lc_req, cred,
3778			    NULL, 3);
3779	}
3780	newnfs_sndunlock(&clp->lc_req.nr_lock);
3781	if (!error) {
3782		error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL,
3783		    NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL,
3784		    NULL);
3785	}
3786	NFSFREECRED(cred);
3787
3788	/*
3789	 * If error is set here, the Callback path isn't working
3790	 * properly, so twiddle the appropriate LCL_ flags.
3791	 * (nd_repstat != 0 indicates the Callback path is working,
3792	 *  but the callback failed on the client.)
3793	 */
3794	if (error) {
3795		/*
3796		 * Mark the callback pathway down, which disabled issuing
3797		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
3798		 */
3799		NFSLOCKSTATE();
3800		clp->lc_flags |= LCL_CBDOWN;
3801		NFSUNLOCKSTATE();
3802	} else {
3803		/*
3804		 * Callback worked. If the callback path was down, disable
3805		 * callbacks, so no more delegations will be issued. (This
3806		 * is done on the assumption that the callback pathway is
3807		 * flakey.)
3808		 */
3809		NFSLOCKSTATE();
3810		if (clp->lc_flags & LCL_CBDOWN)
3811			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
3812		NFSUNLOCKSTATE();
3813		if (nd->nd_repstat)
3814			error = nd->nd_repstat;
3815		else if (procnum == NFSV4OP_CBGETATTR)
3816			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
3817			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
3818			    p, NULL);
3819		mbuf_freem(nd->nd_mrep);
3820	}
3821	NFSLOCKSTATE();
3822	clp->lc_cbref--;
3823	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
3824		clp->lc_flags &= ~LCL_WAKEUPWANTED;
3825		wakeup(clp);
3826	}
3827	NFSUNLOCKSTATE();
3828
3829	NFSEXITCODE(error);
3830	return (error);
3831}
3832
3833/*
3834 * Return the next index# for a clientid. Mostly just increment and return
3835 * the next one, but... if the 32bit unsigned does actually wrap around,
3836 * it should be rebooted.
3837 * At an average rate of one new client per second, it will wrap around in
3838 * approximately 136 years. (I think the server will have been shut
3839 * down or rebooted before then.)
3840 */
3841static u_int32_t
3842nfsrv_nextclientindex(void)
3843{
3844	static u_int32_t client_index = 0;
3845
3846	client_index++;
3847	if (client_index != 0)
3848		return (client_index);
3849
3850	printf("%s: out of clientids\n", __func__);
3851	return (client_index);
3852}
3853
3854/*
3855 * Return the next index# for a stateid. Mostly just increment and return
3856 * the next one, but... if the 32bit unsigned does actually wrap around
3857 * (will a BSD server stay up that long?), find
3858 * new start and end values.
3859 */
3860static u_int32_t
3861nfsrv_nextstateindex(struct nfsclient *clp)
3862{
3863	struct nfsstate *stp;
3864	int i;
3865	u_int32_t canuse, min_index, max_index;
3866
3867	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
3868		clp->lc_stateindex++;
3869		if (clp->lc_stateindex != clp->lc_statemaxindex)
3870			return (clp->lc_stateindex);
3871	}
3872
3873	/*
3874	 * Yuck, we've hit the end.
3875	 * Look for a new min and max.
3876	 */
3877	min_index = 0;
3878	max_index = 0xffffffff;
3879	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
3880	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
3881		if (stp->ls_stateid.other[2] > 0x80000000) {
3882		    if (stp->ls_stateid.other[2] < max_index)
3883			max_index = stp->ls_stateid.other[2];
3884		} else {
3885		    if (stp->ls_stateid.other[2] > min_index)
3886			min_index = stp->ls_stateid.other[2];
3887		}
3888	    }
3889	}
3890
3891	/*
3892	 * Yikes, highly unlikely, but I'll handle it anyhow.
3893	 */
3894	if (min_index == 0x80000000 && max_index == 0x80000001) {
3895	    canuse = 0;
3896	    /*
3897	     * Loop around until we find an unused entry. Return that
3898	     * and set LCL_INDEXNOTOK, so the search will continue next time.
3899	     * (This is one of those rare cases where a goto is the
3900	     *  cleanest way to code the loop.)
3901	     */
3902tryagain:
3903	    for (i = 0; i < NFSSTATEHASHSIZE; i++) {
3904		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
3905		    if (stp->ls_stateid.other[2] == canuse) {
3906			canuse++;
3907			goto tryagain;
3908		    }
3909		}
3910	    }
3911	    clp->lc_flags |= LCL_INDEXNOTOK;
3912	    return (canuse);
3913	}
3914
3915	/*
3916	 * Ok to start again from min + 1.
3917	 */
3918	clp->lc_stateindex = min_index + 1;
3919	clp->lc_statemaxindex = max_index;
3920	clp->lc_flags &= ~LCL_INDEXNOTOK;
3921	return (clp->lc_stateindex);
3922}
3923
3924/*
3925 * The following functions handle the stable storage file that deals with
3926 * the edge conditions described in RFC3530 Sec. 8.6.3.
3927 * The file is as follows:
3928 * - a single record at the beginning that has the lease time of the
3929 *   previous server instance (before the last reboot) and the nfsrvboottime
3930 *   values for the previous server boots.
3931 *   These previous boot times are used to ensure that the current
3932 *   nfsrvboottime does not, somehow, get set to a previous one.
3933 *   (This is important so that Stale ClientIDs and StateIDs can
3934 *    be recognized.)
3935 *   The number of previous nfsvrboottime values preceeds the list.
3936 * - followed by some number of appended records with:
3937 *   - client id string
3938 *   - flag that indicates it is a record revoking state via lease
3939 *     expiration or similar
3940 *     OR has successfully acquired state.
3941 * These structures vary in length, with the client string at the end, up
3942 * to NFSV4_OPAQUELIMIT in size.
3943 *
3944 * At the end of the grace period, the file is truncated, the first
3945 * record is rewritten with updated information and any acquired state
3946 * records for successful reclaims of state are written.
3947 *
3948 * Subsequent records are appended when the first state is issued to
3949 * a client and when state is revoked for a client.
3950 *
3951 * When reading the file in, state issued records that come later in
3952 * the file override older ones, since the append log is in cronological order.
3953 * If, for some reason, the file can't be read, the grace period is
3954 * immediately terminated and all reclaims get NFSERR_NOGRACE.
3955 */
3956
3957/*
3958 * Read in the stable storage file. Called by nfssvc() before the nfsd
3959 * processes start servicing requests.
3960 */
3961APPLESTATIC void
3962nfsrv_setupstable(NFSPROC_T *p)
3963{
3964	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
3965	struct nfsrv_stable *sp, *nsp;
3966	struct nfst_rec *tsp;
3967	int error, i, tryagain;
3968	off_t off = 0;
3969	ssize_t aresid, len;
3970
3971	/*
3972	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
3973	 * a reboot, so state has not been lost.
3974	 */
3975	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
3976		return;
3977	/*
3978	 * Set Grace over just until the file reads successfully.
3979	 */
3980	nfsrvboottime = time_second;
3981	LIST_INIT(&sf->nsf_head);
3982	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
3983	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
3984	if (sf->nsf_fp == NULL)
3985		return;
3986	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
3987	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
3988	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
3989	if (error || aresid || sf->nsf_numboots == 0 ||
3990		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
3991		return;
3992
3993	/*
3994	 * Now, read in the boottimes.
3995	 */
3996	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
3997		sizeof (time_t), M_TEMP, M_WAITOK);
3998	off = sizeof (struct nfsf_rec);
3999	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4000	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4001	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4002	if (error || aresid) {
4003		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4004		sf->nsf_bootvals = NULL;
4005		return;
4006	}
4007
4008	/*
4009	 * Make sure this nfsrvboottime is different from all recorded
4010	 * previous ones.
4011	 */
4012	do {
4013		tryagain = 0;
4014		for (i = 0; i < sf->nsf_numboots; i++) {
4015			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4016				nfsrvboottime++;
4017				tryagain = 1;
4018				break;
4019			}
4020		}
4021	} while (tryagain);
4022
4023	sf->nsf_flags |= NFSNSF_OK;
4024	off += (sf->nsf_numboots * sizeof (time_t));
4025
4026	/*
4027	 * Read through the file, building a list of records for grace
4028	 * checking.
4029	 * Each record is between sizeof (struct nfst_rec) and
4030	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4031	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4032	 */
4033	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4034		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4035	do {
4036	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4037	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4038	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4039	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4040	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4041		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4042		/*
4043		 * Yuck, the file has been corrupted, so just return
4044		 * after clearing out any restart state, so the grace period
4045		 * is over.
4046		 */
4047		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4048			LIST_REMOVE(sp, nst_list);
4049			free((caddr_t)sp, M_TEMP);
4050		}
4051		free((caddr_t)tsp, M_TEMP);
4052		sf->nsf_flags &= ~NFSNSF_OK;
4053		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4054		sf->nsf_bootvals = NULL;
4055		return;
4056	    }
4057	    if (len > 0) {
4058		off += sizeof (struct nfst_rec) + tsp->len - 1;
4059		/*
4060		 * Search the list for a matching client.
4061		 */
4062		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4063			if (tsp->len == sp->nst_len &&
4064			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4065				break;
4066		}
4067		if (sp == LIST_END(&sf->nsf_head)) {
4068			sp = (struct nfsrv_stable *)malloc(tsp->len +
4069				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4070				M_WAITOK);
4071			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4072				sizeof (struct nfst_rec) + tsp->len - 1);
4073			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4074		} else {
4075			if (tsp->flag == NFSNST_REVOKE)
4076				sp->nst_flag |= NFSNST_REVOKE;
4077			else
4078				/*
4079				 * A subsequent timestamp indicates the client
4080				 * did a setclientid/confirm and any previous
4081				 * revoke is no longer relevant.
4082				 */
4083				sp->nst_flag &= ~NFSNST_REVOKE;
4084		}
4085	    }
4086	} while (len > 0);
4087	free((caddr_t)tsp, M_TEMP);
4088	sf->nsf_flags = NFSNSF_OK;
4089	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4090		NFSRV_LEASEDELTA;
4091}
4092
4093/*
4094 * Update the stable storage file, now that the grace period is over.
4095 */
4096APPLESTATIC void
4097nfsrv_updatestable(NFSPROC_T *p)
4098{
4099	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4100	struct nfsrv_stable *sp, *nsp;
4101	int i;
4102	struct nfsvattr nva;
4103	vnode_t vp;
4104#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4105	mount_t mp = NULL;
4106#endif
4107	int error;
4108
4109	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4110		return;
4111	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4112	/*
4113	 * Ok, we need to rewrite the stable storage file.
4114	 * - truncate to 0 length
4115	 * - write the new first structure
4116	 * - loop through the data structures, writing out any that
4117	 *   have timestamps older than the old boot
4118	 */
4119	if (sf->nsf_bootvals) {
4120		sf->nsf_numboots++;
4121		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4122			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4123	} else {
4124		sf->nsf_numboots = 1;
4125		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4126			M_TEMP, M_WAITOK);
4127	}
4128	sf->nsf_bootvals[0] = nfsrvboottime;
4129	sf->nsf_lease = nfsrv_lease;
4130	NFSVNO_ATTRINIT(&nva);
4131	NFSVNO_SETATTRVAL(&nva, size, 0);
4132	vp = NFSFPVNODE(sf->nsf_fp);
4133	vn_start_write(vp, &mp, V_WAIT);
4134	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4135		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4136		    NULL);
4137		NFSVOPUNLOCK(vp, 0);
4138	} else
4139		error = EPERM;
4140	vn_finished_write(mp);
4141	if (!error)
4142	    error = NFSD_RDWR(UIO_WRITE, vp,
4143		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4144		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4145	if (!error)
4146	    error = NFSD_RDWR(UIO_WRITE, vp,
4147		(caddr_t)sf->nsf_bootvals,
4148		sf->nsf_numboots * sizeof (time_t),
4149		(off_t)(sizeof (struct nfsf_rec)),
4150		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4151	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4152	sf->nsf_bootvals = NULL;
4153	if (error) {
4154		sf->nsf_flags &= ~NFSNSF_OK;
4155		printf("EEK! Can't write NfsV4 stable storage file\n");
4156		return;
4157	}
4158	sf->nsf_flags |= NFSNSF_OK;
4159
4160	/*
4161	 * Loop through the list and write out timestamp records for
4162	 * any clients that successfully reclaimed state.
4163	 */
4164	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4165		if (sp->nst_flag & NFSNST_GOTSTATE) {
4166			nfsrv_writestable(sp->nst_client, sp->nst_len,
4167				NFSNST_NEWSTATE, p);
4168			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4169		}
4170		LIST_REMOVE(sp, nst_list);
4171		free((caddr_t)sp, M_TEMP);
4172	}
4173	nfsrv_backupstable();
4174}
4175
4176/*
4177 * Append a record to the stable storage file.
4178 */
4179APPLESTATIC void
4180nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4181{
4182	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4183	struct nfst_rec *sp;
4184	int error;
4185
4186	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4187		return;
4188	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4189		len - 1, M_TEMP, M_WAITOK);
4190	sp->len = len;
4191	NFSBCOPY(client, sp->client, len);
4192	sp->flag = flag;
4193	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4194	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4195	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4196	free((caddr_t)sp, M_TEMP);
4197	if (error) {
4198		sf->nsf_flags &= ~NFSNSF_OK;
4199		printf("EEK! Can't write NfsV4 stable storage file\n");
4200	}
4201}
4202
4203/*
4204 * This function is called during the grace period to mark a client
4205 * that successfully reclaimed state.
4206 */
4207static void
4208nfsrv_markstable(struct nfsclient *clp)
4209{
4210	struct nfsrv_stable *sp;
4211
4212	/*
4213	 * First find the client structure.
4214	 */
4215	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4216		if (sp->nst_len == clp->lc_idlen &&
4217		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4218			break;
4219	}
4220	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4221		return;
4222
4223	/*
4224	 * Now, just mark it and set the nfsclient back pointer.
4225	 */
4226	sp->nst_flag |= NFSNST_GOTSTATE;
4227	sp->nst_clp = clp;
4228}
4229
4230/*
4231 * This function is called for a reclaim, to see if it gets grace.
4232 * It returns 0 if a reclaim is allowed, 1 otherwise.
4233 */
4234static int
4235nfsrv_checkstable(struct nfsclient *clp)
4236{
4237	struct nfsrv_stable *sp;
4238
4239	/*
4240	 * First, find the entry for the client.
4241	 */
4242	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4243		if (sp->nst_len == clp->lc_idlen &&
4244		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4245			break;
4246	}
4247
4248	/*
4249	 * If not in the list, state was revoked or no state was issued
4250	 * since the previous reboot, a reclaim is denied.
4251	 */
4252	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4253	    (sp->nst_flag & NFSNST_REVOKE) ||
4254	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4255		return (1);
4256	return (0);
4257}
4258
4259/*
4260 * Test for and try to clear out a conflicting client. This is called by
4261 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4262 * a found.
4263 * The trick here is that it can't revoke a conflicting client with an
4264 * expired lease unless it holds the v4root lock, so...
4265 * If no v4root lock, get the lock and return 1 to indicate "try again".
4266 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4267 * the revocation worked and the conflicting client is "bye, bye", so it
4268 * can be tried again.
4269 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4270 * Unlocks State before a non-zero value is returned.
4271 */
4272static int
4273nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4274    NFSPROC_T *p)
4275{
4276	int gotlock, lktype;
4277
4278	/*
4279	 * If lease hasn't expired, we can't fix it.
4280	 */
4281	if (clp->lc_expiry >= NFSD_MONOSEC ||
4282	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4283		return (0);
4284	if (*haslockp == 0) {
4285		NFSUNLOCKSTATE();
4286		lktype = NFSVOPISLOCKED(vp);
4287		NFSVOPUNLOCK(vp, 0);
4288		NFSLOCKV4ROOTMUTEX();
4289		nfsv4_relref(&nfsv4rootfs_lock);
4290		do {
4291			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4292			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4293		} while (!gotlock);
4294		NFSUNLOCKV4ROOTMUTEX();
4295		*haslockp = 1;
4296		NFSVOPLOCK(vp, lktype | LK_RETRY);
4297		if ((vp->v_iflag & VI_DOOMED) != 0)
4298			return (2);
4299		else
4300			return (1);
4301	}
4302	NFSUNLOCKSTATE();
4303
4304	/*
4305	 * Ok, we can expire the conflicting client.
4306	 */
4307	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4308	nfsrv_backupstable();
4309	nfsrv_cleanclient(clp, p);
4310	nfsrv_freedeleglist(&clp->lc_deleg);
4311	nfsrv_freedeleglist(&clp->lc_olddeleg);
4312	LIST_REMOVE(clp, lc_hash);
4313	nfsrv_zapclient(clp, p);
4314	return (1);
4315}
4316
4317/*
4318 * Resolve a delegation conflict.
4319 * Returns 0 to indicate the conflict was resolved without sleeping.
4320 * Return -1 to indicate that the caller should check for conflicts again.
4321 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4322 *
4323 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4324 * for a return of 0, since there was no sleep and it could be required
4325 * later. It is released for a return of NFSERR_DELAY, since the caller
4326 * will return that error. It is released when a sleep was done waiting
4327 * for the delegation to be returned or expire (so that other nfsds can
4328 * handle ops). Then, it must be acquired for the write to stable storage.
4329 * (This function is somewhat similar to nfsrv_clientconflict(), but
4330 *  the semantics differ in a couple of subtle ways. The return of 0
4331 *  indicates the conflict was resolved without sleeping here, not
4332 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4333 *  differs, as noted above.)
4334 * Unlocks State before returning a non-zero value.
4335 */
4336static int
4337nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4338    vnode_t vp)
4339{
4340	struct nfsclient *clp = stp->ls_clp;
4341	int gotlock, error, lktype, retrycnt, zapped_clp;
4342	nfsv4stateid_t tstateid;
4343	fhandle_t tfh;
4344
4345	/*
4346	 * If the conflict is with an old delegation...
4347	 */
4348	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4349		/*
4350		 * You can delete it, if it has expired.
4351		 */
4352		if (clp->lc_delegtime < NFSD_MONOSEC) {
4353			nfsrv_freedeleg(stp);
4354			NFSUNLOCKSTATE();
4355			error = -1;
4356			goto out;
4357		}
4358		NFSUNLOCKSTATE();
4359		/*
4360		 * During this delay, the old delegation could expire or it
4361		 * could be recovered by the client via an Open with
4362		 * CLAIM_DELEGATE_PREV.
4363		 * Release the nfsv4root_lock, if held.
4364		 */
4365		if (*haslockp) {
4366			*haslockp = 0;
4367			NFSLOCKV4ROOTMUTEX();
4368			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4369			NFSUNLOCKV4ROOTMUTEX();
4370		}
4371		error = NFSERR_DELAY;
4372		goto out;
4373	}
4374
4375	/*
4376	 * It's a current delegation, so:
4377	 * - check to see if the delegation has expired
4378	 *   - if so, get the v4root lock and then expire it
4379	 */
4380	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4381		/*
4382		 * - do a recall callback, since not yet done
4383		 * For now, never allow truncate to be set. To use
4384		 * truncate safely, it must be guaranteed that the
4385		 * Remove, Rename or Setattr with size of 0 will
4386		 * succeed and that would require major changes to
4387		 * the VFS/Vnode OPs.
4388		 * Set the expiry time large enough so that it won't expire
4389		 * until after the callback, then set it correctly, once
4390		 * the callback is done. (The delegation will now time
4391		 * out whether or not the Recall worked ok. The timeout
4392		 * will be extended when ops are done on the delegation
4393		 * stateid, up to the timelimit.)
4394		 */
4395		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4396		    NFSRV_LEASEDELTA;
4397		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4398		    NFSRV_LEASEDELTA;
4399		stp->ls_flags |= NFSLCK_DELEGRECALL;
4400
4401		/*
4402		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4403		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4404		 * in order to try and avoid a race that could happen
4405		 * when a CBRecall request passed the Open reply with
4406		 * the delegation in it when transitting the network.
4407		 * Since nfsrv_docallback will sleep, don't use stp after
4408		 * the call.
4409		 */
4410		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4411		    sizeof (tstateid));
4412		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4413		    sizeof (tfh));
4414		NFSUNLOCKSTATE();
4415		if (*haslockp) {
4416			*haslockp = 0;
4417			NFSLOCKV4ROOTMUTEX();
4418			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4419			NFSUNLOCKV4ROOTMUTEX();
4420		}
4421		retrycnt = 0;
4422		do {
4423		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4424			&tstateid, 0, &tfh, NULL, NULL, p);
4425		    retrycnt++;
4426		} while ((error == NFSERR_BADSTATEID ||
4427		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4428		error = NFSERR_DELAY;
4429		goto out;
4430	}
4431
4432	if (clp->lc_expiry >= NFSD_MONOSEC &&
4433	    stp->ls_delegtime >= NFSD_MONOSEC) {
4434		NFSUNLOCKSTATE();
4435		/*
4436		 * A recall has been done, but it has not yet expired.
4437		 * So, RETURN_DELAY.
4438		 */
4439		if (*haslockp) {
4440			*haslockp = 0;
4441			NFSLOCKV4ROOTMUTEX();
4442			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4443			NFSUNLOCKV4ROOTMUTEX();
4444		}
4445		error = NFSERR_DELAY;
4446		goto out;
4447	}
4448
4449	/*
4450	 * If we don't yet have the lock, just get it and then return,
4451	 * since we need that before deleting expired state, such as
4452	 * this delegation.
4453	 * When getting the lock, unlock the vnode, so other nfsds that
4454	 * are in progress, won't get stuck waiting for the vnode lock.
4455	 */
4456	if (*haslockp == 0) {
4457		NFSUNLOCKSTATE();
4458		lktype = NFSVOPISLOCKED(vp);
4459		NFSVOPUNLOCK(vp, 0);
4460		NFSLOCKV4ROOTMUTEX();
4461		nfsv4_relref(&nfsv4rootfs_lock);
4462		do {
4463			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4464			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4465		} while (!gotlock);
4466		NFSUNLOCKV4ROOTMUTEX();
4467		*haslockp = 1;
4468		NFSVOPLOCK(vp, lktype | LK_RETRY);
4469		if ((vp->v_iflag & VI_DOOMED) != 0) {
4470			*haslockp = 0;
4471			NFSLOCKV4ROOTMUTEX();
4472			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4473			NFSUNLOCKV4ROOTMUTEX();
4474			error = NFSERR_PERM;
4475			goto out;
4476		}
4477		error = -1;
4478		goto out;
4479	}
4480
4481	NFSUNLOCKSTATE();
4482	/*
4483	 * Ok, we can delete the expired delegation.
4484	 * First, write the Revoke record to stable storage and then
4485	 * clear out the conflict.
4486	 * Since all other nfsd threads are now blocked, we can safely
4487	 * sleep without the state changing.
4488	 */
4489	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4490	nfsrv_backupstable();
4491	if (clp->lc_expiry < NFSD_MONOSEC) {
4492		nfsrv_cleanclient(clp, p);
4493		nfsrv_freedeleglist(&clp->lc_deleg);
4494		nfsrv_freedeleglist(&clp->lc_olddeleg);
4495		LIST_REMOVE(clp, lc_hash);
4496		zapped_clp = 1;
4497	} else {
4498		nfsrv_freedeleg(stp);
4499		zapped_clp = 0;
4500	}
4501	if (zapped_clp)
4502		nfsrv_zapclient(clp, p);
4503	error = -1;
4504
4505out:
4506	NFSEXITCODE(error);
4507	return (error);
4508}
4509
4510/*
4511 * Check for a remove allowed, if remove is set to 1 and get rid of
4512 * delegations.
4513 */
4514APPLESTATIC int
4515nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4516{
4517	struct nfsstate *stp;
4518	struct nfslockfile *lfp;
4519	int error, haslock = 0;
4520	fhandle_t nfh;
4521
4522	/*
4523	 * First, get the lock file structure.
4524	 * (A return of -1 means no associated state, so remove ok.)
4525	 */
4526	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4527tryagain:
4528	NFSLOCKSTATE();
4529	if (!error)
4530		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4531	if (error) {
4532		NFSUNLOCKSTATE();
4533		if (haslock) {
4534			NFSLOCKV4ROOTMUTEX();
4535			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4536			NFSUNLOCKV4ROOTMUTEX();
4537		}
4538		if (error == -1)
4539			error = 0;
4540		goto out;
4541	}
4542
4543	/*
4544	 * Now, we must Recall any delegations.
4545	 */
4546	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
4547	if (error) {
4548		/*
4549		 * nfsrv_cleandeleg() unlocks state for non-zero
4550		 * return.
4551		 */
4552		if (error == -1)
4553			goto tryagain;
4554		if (haslock) {
4555			NFSLOCKV4ROOTMUTEX();
4556			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4557			NFSUNLOCKV4ROOTMUTEX();
4558		}
4559		goto out;
4560	}
4561
4562	/*
4563	 * Now, look for a conflicting open share.
4564	 */
4565	if (remove) {
4566		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
4567			if (stp->ls_flags & NFSLCK_WRITEDENY) {
4568				error = NFSERR_FILEOPEN;
4569				break;
4570			}
4571		}
4572	}
4573
4574	NFSUNLOCKSTATE();
4575	if (haslock) {
4576		NFSLOCKV4ROOTMUTEX();
4577		nfsv4_unlock(&nfsv4rootfs_lock, 1);
4578		NFSUNLOCKV4ROOTMUTEX();
4579	}
4580
4581out:
4582	NFSEXITCODE(error);
4583	return (error);
4584}
4585
4586/*
4587 * Clear out all delegations for the file referred to by lfp.
4588 * May return NFSERR_DELAY, if there will be a delay waiting for
4589 * delegations to expire.
4590 * Returns -1 to indicate it slept while recalling a delegation.
4591 * This function has the side effect of deleting the nfslockfile structure,
4592 * if it no longer has associated state and didn't have to sleep.
4593 * Unlocks State before a non-zero value is returned.
4594 */
4595static int
4596nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
4597    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
4598{
4599	struct nfsstate *stp, *nstp;
4600	int ret = 0;
4601
4602	stp = LIST_FIRST(&lfp->lf_deleg);
4603	while (stp != LIST_END(&lfp->lf_deleg)) {
4604		nstp = LIST_NEXT(stp, ls_file);
4605		if (stp->ls_clp != clp) {
4606			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
4607			if (ret) {
4608				/*
4609				 * nfsrv_delegconflict() unlocks state
4610				 * when it returns non-zero.
4611				 */
4612				goto out;
4613			}
4614		}
4615		stp = nstp;
4616	}
4617out:
4618	NFSEXITCODE(ret);
4619	return (ret);
4620}
4621
4622/*
4623 * There are certain operations that, when being done outside of NFSv4,
4624 * require that any NFSv4 delegation for the file be recalled.
4625 * This function is to be called for those cases:
4626 * VOP_RENAME() - When a delegation is being recalled for any reason,
4627 *	the client may have to do Opens against the server, using the file's
4628 *	final component name. If the file has been renamed on the server,
4629 *	that component name will be incorrect and the Open will fail.
4630 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
4631 *	been removed on the server, if there is a delegation issued to
4632 *	that client for the file. I say "theoretically" since clients
4633 *	normally do an Access Op before the Open and that Access Op will
4634 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
4635 *	they will detect the file's removal in the same manner. (There is
4636 *	one case where RFC3530 allows a client to do an Open without first
4637 *	doing an Access Op, which is passage of a check against the ACE
4638 *	returned with a Write delegation, but current practice is to ignore
4639 *	the ACE and always do an Access Op.)
4640 *	Since the functions can only be called with an unlocked vnode, this
4641 *	can't be done at this time.
4642 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
4643 *	locks locally in the client, which are not visible to the server. To
4644 *	deal with this, issuing of delegations for a vnode must be disabled
4645 *	and all delegations for the vnode recalled. This is done via the
4646 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
4647 */
4648APPLESTATIC void
4649nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
4650{
4651	time_t starttime;
4652	int error;
4653
4654	/*
4655	 * First, check to see if the server is currently running and it has
4656	 * been called for a regular file when issuing delegations.
4657	 */
4658	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
4659	    nfsrv_issuedelegs == 0)
4660		return;
4661
4662	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
4663	/*
4664	 * First, get a reference on the nfsv4rootfs_lock so that an
4665	 * exclusive lock cannot be acquired by another thread.
4666	 */
4667	NFSLOCKV4ROOTMUTEX();
4668	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
4669	NFSUNLOCKV4ROOTMUTEX();
4670
4671	/*
4672	 * Now, call nfsrv_checkremove() in a loop while it returns
4673	 * NFSERR_DELAY. Return upon any other error or when timed out.
4674	 */
4675	starttime = NFSD_MONOSEC;
4676	do {
4677		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4678			error = nfsrv_checkremove(vp, 0, p);
4679			NFSVOPUNLOCK(vp, 0);
4680		} else
4681			error = EPERM;
4682		if (error == NFSERR_DELAY) {
4683			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
4684				break;
4685			/* Sleep for a short period of time */
4686			(void) nfs_catnap(PZERO, 0, "nfsremove");
4687		}
4688	} while (error == NFSERR_DELAY);
4689	NFSLOCKV4ROOTMUTEX();
4690	nfsv4_relref(&nfsv4rootfs_lock);
4691	NFSUNLOCKV4ROOTMUTEX();
4692}
4693
4694APPLESTATIC void
4695nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
4696{
4697
4698#ifdef VV_DISABLEDELEG
4699	/*
4700	 * First, flag issuance of delegations disabled.
4701	 */
4702	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
4703#endif
4704
4705	/*
4706	 * Then call nfsd_recalldelegation() to get rid of all extant
4707	 * delegations.
4708	 */
4709	nfsd_recalldelegation(vp, p);
4710}
4711
4712/*
4713 * Check for conflicting locks, etc. and then get rid of delegations.
4714 * (At one point I thought that I should get rid of delegations for any
4715 *  Setattr, since it could potentially disallow the I/O op (read or write)
4716 *  allowed by the delegation. However, Setattr Ops that aren't changing
4717 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
4718 *  for the same client or a different one, so I decided to only get rid
4719 *  of delegations for other clients when the size is being changed.)
4720 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
4721 * as Write backs, even if there is no delegation, so it really isn't any
4722 * different?)
4723 */
4724APPLESTATIC int
4725nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
4726    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
4727    struct nfsexstuff *exp, NFSPROC_T *p)
4728{
4729	struct nfsstate st, *stp = &st;
4730	struct nfslock lo, *lop = &lo;
4731	int error = 0;
4732	nfsquad_t clientid;
4733
4734	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
4735		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
4736		lop->lo_first = nvap->na_size;
4737	} else {
4738		stp->ls_flags = 0;
4739		lop->lo_first = 0;
4740	}
4741	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
4742	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
4743	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
4744	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
4745		stp->ls_flags |= NFSLCK_SETATTR;
4746	if (stp->ls_flags == 0)
4747		goto out;
4748	lop->lo_end = NFS64BITSSET;
4749	lop->lo_flags = NFSLCK_WRITE;
4750	stp->ls_ownerlen = 0;
4751	stp->ls_op = NULL;
4752	stp->ls_uid = nd->nd_cred->cr_uid;
4753	stp->ls_stateid.seqid = stateidp->seqid;
4754	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
4755	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
4756	stp->ls_stateid.other[2] = stateidp->other[2];
4757	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
4758	    stateidp, exp, nd, p);
4759
4760out:
4761	NFSEXITCODE2(error, nd);
4762	return (error);
4763}
4764
4765/*
4766 * Check for a write delegation and do a CBGETATTR if there is one, updating
4767 * the attributes, as required.
4768 * Should I return an error if I can't get the attributes? (For now, I'll
4769 * just return ok.
4770 */
4771APPLESTATIC int
4772nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
4773    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
4774    NFSPROC_T *p)
4775{
4776	struct nfsstate *stp;
4777	struct nfslockfile *lfp;
4778	struct nfsclient *clp;
4779	struct nfsvattr nva;
4780	fhandle_t nfh;
4781	int error = 0;
4782	nfsattrbit_t cbbits;
4783	u_quad_t delegfilerev;
4784
4785	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
4786	if (!NFSNONZERO_ATTRBIT(&cbbits))
4787		goto out;
4788
4789	/*
4790	 * Get the lock file structure.
4791	 * (A return of -1 means no associated state, so return ok.)
4792	 */
4793	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4794	NFSLOCKSTATE();
4795	if (!error)
4796		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4797	if (error) {
4798		NFSUNLOCKSTATE();
4799		if (error == -1)
4800			error = 0;
4801		goto out;
4802	}
4803
4804	/*
4805	 * Now, look for a write delegation.
4806	 */
4807	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
4808		if (stp->ls_flags & NFSLCK_DELEGWRITE)
4809			break;
4810	}
4811	if (stp == LIST_END(&lfp->lf_deleg)) {
4812		NFSUNLOCKSTATE();
4813		goto out;
4814	}
4815	clp = stp->ls_clp;
4816	delegfilerev = stp->ls_filerev;
4817
4818	/*
4819	 * If the Write delegation was issued as a part of this Compound RPC
4820	 * or if we have an Implied Clientid (used in a previous Op in this
4821	 * compound) and it is the client the delegation was issued to,
4822	 * just return ok.
4823	 * I also assume that it is from the same client iff the network
4824	 * host IP address is the same as the callback address. (Not
4825	 * exactly correct by the RFC, but avoids a lot of Getattr
4826	 * callbacks.)
4827	 */
4828	if (nd->nd_compref == stp->ls_compref ||
4829	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
4830	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
4831	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
4832		NFSUNLOCKSTATE();
4833		goto out;
4834	}
4835
4836	/*
4837	 * We are now done with the delegation state structure,
4838	 * so the statelock can be released and we can now tsleep().
4839	 */
4840
4841	/*
4842	 * Now, we must do the CB Getattr callback, to see if Change or Size
4843	 * has changed.
4844	 */
4845	if (clp->lc_expiry >= NFSD_MONOSEC) {
4846		NFSUNLOCKSTATE();
4847		NFSVNO_ATTRINIT(&nva);
4848		nva.na_filerev = NFS64BITSSET;
4849		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
4850		    0, &nfh, &nva, &cbbits, p);
4851		if (!error) {
4852			if ((nva.na_filerev != NFS64BITSSET &&
4853			    nva.na_filerev > delegfilerev) ||
4854			    (NFSVNO_ISSETSIZE(&nva) &&
4855			     nva.na_size != nvap->na_size)) {
4856				error = nfsvno_updfilerev(vp, nvap, cred, p);
4857				if (NFSVNO_ISSETSIZE(&nva))
4858					nvap->na_size = nva.na_size;
4859			}
4860		} else
4861			error = 0;	/* Ignore callback errors for now. */
4862	} else {
4863		NFSUNLOCKSTATE();
4864	}
4865
4866out:
4867	NFSEXITCODE2(error, nd);
4868	return (error);
4869}
4870
4871/*
4872 * This function looks for openowners that haven't had any opens for
4873 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
4874 * is set.
4875 */
4876APPLESTATIC void
4877nfsrv_throwawayopens(NFSPROC_T *p)
4878{
4879	struct nfsclient *clp, *nclp;
4880	struct nfsstate *stp, *nstp;
4881	int i;
4882
4883	NFSLOCKSTATE();
4884	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
4885	/*
4886	 * For each client...
4887	 */
4888	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
4889	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
4890		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
4891			if (LIST_EMPTY(&stp->ls_open) &&
4892			    (stp->ls_noopens > NFSNOOPEN ||
4893			     (nfsrv_openpluslock * 2) >
4894			     NFSRV_V4STATELIMIT))
4895				nfsrv_freeopenowner(stp, 0, p);
4896		}
4897	    }
4898	}
4899	NFSUNLOCKSTATE();
4900}
4901
4902/*
4903 * This function checks to see if the credentials are the same.
4904 * Returns 1 for not same, 0 otherwise.
4905 */
4906static int
4907nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
4908{
4909
4910	if (nd->nd_flag & ND_GSS) {
4911		if (!(clp->lc_flags & LCL_GSS))
4912			return (1);
4913		if (clp->lc_flags & LCL_NAME) {
4914			if (nd->nd_princlen != clp->lc_namelen ||
4915			    NFSBCMP(nd->nd_principal, clp->lc_name,
4916				clp->lc_namelen))
4917				return (1);
4918			else
4919				return (0);
4920		}
4921		if (nd->nd_cred->cr_uid == clp->lc_uid)
4922			return (0);
4923		else
4924			return (1);
4925	} else if (clp->lc_flags & LCL_GSS)
4926		return (1);
4927	/*
4928	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
4929	 * in RFC3530, which talks about principals, but doesn't say anything
4930	 * about uids for AUTH_SYS.)
4931	 */
4932	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
4933		return (0);
4934	else
4935		return (1);
4936}
4937
4938/*
4939 * Calculate the lease expiry time.
4940 */
4941static time_t
4942nfsrv_leaseexpiry(void)
4943{
4944
4945	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
4946		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
4947	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
4948}
4949
4950/*
4951 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
4952 */
4953static void
4954nfsrv_delaydelegtimeout(struct nfsstate *stp)
4955{
4956
4957	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
4958		return;
4959
4960	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
4961	    stp->ls_delegtime < stp->ls_delegtimelimit) {
4962		stp->ls_delegtime += nfsrv_lease;
4963		if (stp->ls_delegtime > stp->ls_delegtimelimit)
4964			stp->ls_delegtime = stp->ls_delegtimelimit;
4965	}
4966}
4967
4968/*
4969 * This function checks to see if there is any other state associated
4970 * with the openowner for this Open.
4971 * It returns 1 if there is no other state, 0 otherwise.
4972 */
4973static int
4974nfsrv_nootherstate(struct nfsstate *stp)
4975{
4976	struct nfsstate *tstp;
4977
4978	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
4979		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
4980			return (0);
4981	}
4982	return (1);
4983}
4984
4985/*
4986 * Create a list of lock deltas (changes to local byte range locking
4987 * that can be rolled back using the list) and apply the changes via
4988 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
4989 * the rollback or update function will be called after this.
4990 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
4991 * call fails. If it returns an error, it will unlock the list.
4992 */
4993static int
4994nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
4995    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
4996{
4997	struct nfslock *lop, *nlop;
4998	int error = 0;
4999
5000	/* Loop through the list of locks. */
5001	lop = LIST_FIRST(&lfp->lf_locallock);
5002	while (first < end && lop != NULL) {
5003		nlop = LIST_NEXT(lop, lo_lckowner);
5004		if (first >= lop->lo_end) {
5005			/* not there yet */
5006			lop = nlop;
5007		} else if (first < lop->lo_first) {
5008			/* new one starts before entry in list */
5009			if (end <= lop->lo_first) {
5010				/* no overlap between old and new */
5011				error = nfsrv_dolocal(vp, lfp, flags,
5012				    NFSLCK_UNLOCK, first, end, cfp, p);
5013				if (error != 0)
5014					break;
5015				first = end;
5016			} else {
5017				/* handle fragment overlapped with new one */
5018				error = nfsrv_dolocal(vp, lfp, flags,
5019				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5020				    p);
5021				if (error != 0)
5022					break;
5023				first = lop->lo_first;
5024			}
5025		} else {
5026			/* new one overlaps this entry in list */
5027			if (end <= lop->lo_end) {
5028				/* overlaps all of new one */
5029				error = nfsrv_dolocal(vp, lfp, flags,
5030				    lop->lo_flags, first, end, cfp, p);
5031				if (error != 0)
5032					break;
5033				first = end;
5034			} else {
5035				/* handle fragment overlapped with new one */
5036				error = nfsrv_dolocal(vp, lfp, flags,
5037				    lop->lo_flags, first, lop->lo_end, cfp, p);
5038				if (error != 0)
5039					break;
5040				first = lop->lo_end;
5041				lop = nlop;
5042			}
5043		}
5044	}
5045	if (first < end && error == 0)
5046		/* handle fragment past end of list */
5047		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5048		    end, cfp, p);
5049
5050	NFSEXITCODE(error);
5051	return (error);
5052}
5053
5054/*
5055 * Local lock unlock. Unlock all byte ranges that are no longer locked
5056 * by NFSv4. To do this, unlock any subranges of first-->end that
5057 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5058 * list. This list has all locks for the file held by other
5059 * <clientid, lockowner> tuples. The list is ordered by increasing
5060 * lo_first value, but may have entries that overlap each other, for
5061 * the case of read locks.
5062 */
5063static void
5064nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5065    uint64_t init_end, NFSPROC_T *p)
5066{
5067	struct nfslock *lop;
5068	uint64_t first, end, prevfirst;
5069
5070	first = init_first;
5071	end = init_end;
5072	while (first < init_end) {
5073		/* Loop through all nfs locks, adjusting first and end */
5074		prevfirst = 0;
5075		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5076			KASSERT(prevfirst <= lop->lo_first,
5077			    ("nfsv4 locks out of order"));
5078			KASSERT(lop->lo_first < lop->lo_end,
5079			    ("nfsv4 bogus lock"));
5080			prevfirst = lop->lo_first;
5081			if (first >= lop->lo_first &&
5082			    first < lop->lo_end)
5083				/*
5084				 * Overlaps with initial part, so trim
5085				 * off that initial part by moving first past
5086				 * it.
5087				 */
5088				first = lop->lo_end;
5089			else if (end > lop->lo_first &&
5090			    lop->lo_first > first) {
5091				/*
5092				 * This lock defines the end of the
5093				 * segment to unlock, so set end to the
5094				 * start of it and break out of the loop.
5095				 */
5096				end = lop->lo_first;
5097				break;
5098			}
5099			if (first >= end)
5100				/*
5101				 * There is no segment left to do, so
5102				 * break out of this loop and then exit
5103				 * the outer while() since first will be set
5104				 * to end, which must equal init_end here.
5105				 */
5106				break;
5107		}
5108		if (first < end) {
5109			/* Unlock this segment */
5110			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5111			    NFSLCK_READ, first, end, NULL, p);
5112			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5113			    first, end);
5114		}
5115		/*
5116		 * Now move past this segment and look for any further
5117		 * segment in the range, if there is one.
5118		 */
5119		first = end;
5120		end = init_end;
5121	}
5122}
5123
5124/*
5125 * Do the local lock operation and update the rollback list, as required.
5126 * Perform the rollback and return the error if nfsvno_advlock() fails.
5127 */
5128static int
5129nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5130    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5131{
5132	struct nfsrollback *rlp;
5133	int error = 0, ltype, oldltype;
5134
5135	if (flags & NFSLCK_WRITE)
5136		ltype = F_WRLCK;
5137	else if (flags & NFSLCK_READ)
5138		ltype = F_RDLCK;
5139	else
5140		ltype = F_UNLCK;
5141	if (oldflags & NFSLCK_WRITE)
5142		oldltype = F_WRLCK;
5143	else if (oldflags & NFSLCK_READ)
5144		oldltype = F_RDLCK;
5145	else
5146		oldltype = F_UNLCK;
5147	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5148		/* nothing to do */
5149		goto out;
5150	error = nfsvno_advlock(vp, ltype, first, end, p);
5151	if (error != 0) {
5152		if (cfp != NULL) {
5153			cfp->cl_clientid.lval[0] = 0;
5154			cfp->cl_clientid.lval[1] = 0;
5155			cfp->cl_first = 0;
5156			cfp->cl_end = NFS64BITSSET;
5157			cfp->cl_flags = NFSLCK_WRITE;
5158			cfp->cl_ownerlen = 5;
5159			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5160		}
5161		nfsrv_locallock_rollback(vp, lfp, p);
5162	} else if (ltype != F_UNLCK) {
5163		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5164		    M_WAITOK);
5165		rlp->rlck_first = first;
5166		rlp->rlck_end = end;
5167		rlp->rlck_type = oldltype;
5168		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5169	}
5170
5171out:
5172	NFSEXITCODE(error);
5173	return (error);
5174}
5175
5176/*
5177 * Roll back local lock changes and free up the rollback list.
5178 */
5179static void
5180nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5181{
5182	struct nfsrollback *rlp, *nrlp;
5183
5184	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5185		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5186		    rlp->rlck_end, p);
5187		free(rlp, M_NFSDROLLBACK);
5188	}
5189	LIST_INIT(&lfp->lf_rollback);
5190}
5191
5192/*
5193 * Update local lock list and delete rollback list (ie now committed to the
5194 * local locks). Most of the work is done by the internal function.
5195 */
5196static void
5197nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5198    uint64_t end)
5199{
5200	struct nfsrollback *rlp, *nrlp;
5201	struct nfslock *new_lop, *other_lop;
5202
5203	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5204	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5205		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5206		    M_WAITOK);
5207	else
5208		other_lop = NULL;
5209	new_lop->lo_flags = flags;
5210	new_lop->lo_first = first;
5211	new_lop->lo_end = end;
5212	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5213	if (new_lop != NULL)
5214		free(new_lop, M_NFSDLOCK);
5215	if (other_lop != NULL)
5216		free(other_lop, M_NFSDLOCK);
5217
5218	/* and get rid of the rollback list */
5219	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5220		free(rlp, M_NFSDROLLBACK);
5221	LIST_INIT(&lfp->lf_rollback);
5222}
5223
5224/*
5225 * Lock the struct nfslockfile for local lock updating.
5226 */
5227static void
5228nfsrv_locklf(struct nfslockfile *lfp)
5229{
5230	int gotlock;
5231
5232	/* lf_usecount ensures *lfp won't be free'd */
5233	lfp->lf_usecount++;
5234	do {
5235		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5236		    NFSSTATEMUTEXPTR, NULL);
5237	} while (gotlock == 0);
5238	lfp->lf_usecount--;
5239}
5240
5241/*
5242 * Unlock the struct nfslockfile after local lock updating.
5243 */
5244static void
5245nfsrv_unlocklf(struct nfslockfile *lfp)
5246{
5247
5248	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5249}
5250
5251/*
5252 * Clear out all state for the NFSv4 server.
5253 * Must be called by a thread that can sleep when no nfsds are running.
5254 */
5255void
5256nfsrv_throwawayallstate(NFSPROC_T *p)
5257{
5258	struct nfsclient *clp, *nclp;
5259	struct nfslockfile *lfp, *nlfp;
5260	int i;
5261
5262	/*
5263	 * For each client, clean out the state and then free the structure.
5264	 */
5265	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
5266		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5267			nfsrv_cleanclient(clp, p);
5268			nfsrv_freedeleglist(&clp->lc_deleg);
5269			nfsrv_freedeleglist(&clp->lc_olddeleg);
5270			free(clp, M_NFSDCLIENT);
5271		}
5272	}
5273
5274	/*
5275	 * Also, free up any remaining lock file structures.
5276	 */
5277	for (i = 0; i < NFSLOCKHASHSIZE; i++) {
5278		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5279			printf("nfsd unload: fnd a lock file struct\n");
5280			nfsrv_freenfslockfile(lfp);
5281		}
5282	}
5283}
5284
5285