nfs_clstate.c revision 324545
1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clstate.c 324545 2017-10-11 23:42:29Z rmacklem $");
30
31/*
32 * These functions implement the client side state handling for NFSv4.
33 * NFSv4 state handling:
34 * - A lockowner is used to determine lock contention, so it
35 *   corresponds directly to a Posix pid. (1 to 1 mapping)
36 * - The correct granularity of an OpenOwner is not nearly so
37 *   obvious. An OpenOwner does the following:
38 *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
39 *   - is used to check for Open/Share contention (not applicable to
40 *     this client, since all Opens are Deny_None)
41 *   As such, I considered both extreme.
42 *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
43 *   all Open, Close and Lock (with a new lockowner) Ops.
44 *   1 OpenOwner for each Open - This one results in an OpenConfirm for
45 *   every Open, for most servers.
46 *   So, I chose to use the same mapping as I did for LockOwnwers.
47 *   The main concern here is that you can end up with multiple Opens
48 *   for the same File Handle, but on different OpenOwners (opens
49 *   inherited from parents, grandparents...) and you do not know
50 *   which of these the vnodeop close applies to. This is handled by
51 *   delaying the Close Op(s) until all of the Opens have been closed.
52 *   (It is not yet obvious if this is the correct granularity.)
53 * - How the code handles serialization:
54 *   - For the ClientId, it uses an exclusive lock while getting its
55 *     SetClientId and during recovery. Otherwise, it uses a shared
56 *     lock via a reference count.
57 *   - For the rest of the data structures, it uses an SMP mutex
58 *     (once the nfs client is SMP safe) and doesn't sleep while
59 *     manipulating the linked lists.
60 *   - The serialization of Open/Close/Lock/LockU falls out in the
61 *     "wash", since OpenOwners and LockOwners are both mapped from
62 *     Posix pid. In other words, there is only one Posix pid using
63 *     any given owner, so that owner is serialized. (If you change
64 *     the granularity of the OpenOwner, then code must be added to
65 *     serialize Ops on the OpenOwner.)
66 * - When to get rid of OpenOwners and LockOwners.
67 *   - The function nfscl_cleanup_common() is executed after a process exits.
68 *     It goes through the client list looking for all Open and Lock Owners.
69 *     When one is found, it is marked "defunct" or in the case of
70 *     an OpenOwner without any Opens, freed.
71 *     The renew thread scans for defunct Owners and gets rid of them,
72 *     if it can. The LockOwners will also be deleted when the
73 *     associated Open is closed.
74 *   - If the LockU or Close Op(s) fail during close in a way
75 *     that could be recovered upon retry, they are relinked to the
76 *     ClientId's defunct open list and retried by the renew thread
77 *     until they succeed or an unmount/recovery occurs.
78 *     (Since we are done with them, they do not need to be recovered.)
79 */
80
81#ifndef APPLEKEXT
82#include <fs/nfs/nfsport.h>
83
84/*
85 * Global variables
86 */
87extern struct nfsstats newnfsstats;
88extern struct nfsreqhead nfsd_reqq;
89extern u_int32_t newnfs_false, newnfs_true;
90extern int nfscl_debuglevel;
91extern int nfscl_enablecallb;
92extern int nfs_numnfscbd;
93NFSREQSPINLOCK;
94NFSCLSTATEMUTEX;
95int nfscl_inited = 0;
96struct nfsclhead nfsclhead;	/* Head of clientid list */
97int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
98int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
99#endif	/* !APPLEKEXT */
100
101static int nfscl_delegcnt = 0;
102static int nfscl_layoutcnt = 0;
103static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
104    u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
105static void nfscl_clrelease(struct nfsclclient *);
106static void nfscl_cleanclient(struct nfsclclient *);
107static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
108    struct ucred *, NFSPROC_T *);
109static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
110    struct nfsmount *, struct ucred *, NFSPROC_T *);
111static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *);
112static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
113    struct nfscllock *, int);
114static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
115    struct nfscllock **, int);
116static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
117static u_int32_t nfscl_nextcbident(void);
118static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
119static struct nfsclclient *nfscl_getclnt(u_int32_t);
120static struct nfsclclient *nfscl_getclntsess(uint8_t *);
121static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
122    int);
123static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
124    int, struct nfsclrecalllayout **);
125static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
126static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
127    int);
128static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
129static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
130    u_int8_t *, struct nfscllock **);
131static void nfscl_freealllocks(struct nfscllockownerhead *, int);
132static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
133    struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
134static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
135    struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
136    struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
137static int nfscl_moveopen(vnode_t , struct nfsclclient *,
138    struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
139    struct nfscldeleg *, struct ucred *, NFSPROC_T *);
140static void nfscl_totalrecall(struct nfsclclient *);
141static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
142    struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
143static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
144    u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
145    struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
146static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
147    int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
148    struct ucred *, NFSPROC_T *);
149static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
150    struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
151static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *);
152static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
153static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
154static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
155    struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int);
156static void nfscl_freeopenowner(struct nfsclowner *, int);
157static void nfscl_cleandeleg(struct nfscldeleg *);
158static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
159    struct nfsmount *, NFSPROC_T *);
160static void nfscl_emptylockowner(struct nfscllockowner *,
161    struct nfscllockownerfhhead *);
162static void nfscl_mergeflayouts(struct nfsclflayouthead *,
163    struct nfsclflayouthead *);
164static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
165    uint64_t, uint32_t, struct nfsclrecalllayout *);
166static int nfscl_seq(uint32_t, uint32_t);
167static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
168    struct ucred *, NFSPROC_T *);
169static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
170    struct ucred *, NFSPROC_T *);
171
172static short nfscberr_null[] = {
173	0,
174	0,
175};
176
177static short nfscberr_getattr[] = {
178	NFSERR_RESOURCE,
179	NFSERR_BADHANDLE,
180	NFSERR_BADXDR,
181	NFSERR_RESOURCE,
182	NFSERR_SERVERFAULT,
183	0,
184};
185
186static short nfscberr_recall[] = {
187	NFSERR_RESOURCE,
188	NFSERR_BADHANDLE,
189	NFSERR_BADSTATEID,
190	NFSERR_BADXDR,
191	NFSERR_RESOURCE,
192	NFSERR_SERVERFAULT,
193	0,
194};
195
196static short *nfscl_cberrmap[] = {
197	nfscberr_null,
198	nfscberr_null,
199	nfscberr_null,
200	nfscberr_getattr,
201	nfscberr_recall
202};
203
204#define	NETFAMILY(clp) \
205		(((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
206
207/*
208 * Called for an open operation.
209 * If the nfhp argument is NULL, just get an openowner.
210 */
211APPLESTATIC int
212nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
213    struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
214    struct nfsclopen **opp, int *newonep, int *retp, int lockit)
215{
216	struct nfsclclient *clp;
217	struct nfsclowner *owp, *nowp;
218	struct nfsclopen *op = NULL, *nop = NULL;
219	struct nfscldeleg *dp;
220	struct nfsclownerhead *ohp;
221	u_int8_t own[NFSV4CL_LOCKNAMELEN];
222	int ret;
223
224	if (newonep != NULL)
225		*newonep = 0;
226	if (opp != NULL)
227		*opp = NULL;
228	if (owpp != NULL)
229		*owpp = NULL;
230
231	/*
232	 * Might need one or both of these, so MALLOC them now, to
233	 * avoid a tsleep() in MALLOC later.
234	 */
235	MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner),
236	    M_NFSCLOWNER, M_WAITOK);
237	if (nfhp != NULL)
238	    MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) +
239		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
240	ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
241	if (ret != 0) {
242		FREE((caddr_t)nowp, M_NFSCLOWNER);
243		if (nop != NULL)
244			FREE((caddr_t)nop, M_NFSCLOPEN);
245		return (ret);
246	}
247
248	/*
249	 * Get the Open iff it already exists.
250	 * If none found, add the new one or return error, depending upon
251	 * "create".
252	 */
253	NFSLOCKCLSTATE();
254	dp = NULL;
255	/* First check the delegation list */
256	if (nfhp != NULL && usedeleg) {
257		LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
258			if (dp->nfsdl_fhlen == fhlen &&
259			    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
260				if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
261				    (dp->nfsdl_flags & NFSCLDL_WRITE))
262					break;
263				dp = NULL;
264				break;
265			}
266		}
267	}
268
269	if (dp != NULL) {
270		nfscl_filllockowner(p->td_proc, own, F_POSIX);
271		ohp = &dp->nfsdl_owner;
272	} else {
273		/* For NFSv4.1 and this option, use a single open_owner. */
274		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
275			nfscl_filllockowner(NULL, own, F_POSIX);
276		else
277			nfscl_filllockowner(p->td_proc, own, F_POSIX);
278		ohp = &clp->nfsc_owner;
279	}
280	/* Now, search for an openowner */
281	LIST_FOREACH(owp, ohp, nfsow_list) {
282		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
283			break;
284	}
285
286	/*
287	 * Create a new open, as required.
288	 */
289	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
290	    cred, newonep);
291
292	/*
293	 * Now, check the mode on the open and return the appropriate
294	 * value.
295	 */
296	if (retp != NULL) {
297		if (nfhp != NULL && dp != NULL && nop == NULL)
298			/* new local open on delegation */
299			*retp = NFSCLOPEN_SETCRED;
300		else
301			*retp = NFSCLOPEN_OK;
302	}
303	if (op != NULL && (amode & ~(op->nfso_mode))) {
304		op->nfso_mode |= amode;
305		if (retp != NULL && dp == NULL)
306			*retp = NFSCLOPEN_DOOPEN;
307	}
308
309	/*
310	 * Serialize modifications to the open owner for multiple threads
311	 * within the same process using a read/write sleep lock.
312	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
313	 * by acquiring a shared lock.  The close operations still use an
314	 * exclusive lock for this case.
315	 */
316	if (lockit != 0) {
317		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) {
318			/*
319			 * Get a shared lock on the OpenOwner, but first
320			 * wait for any pending exclusive lock, so that the
321			 * exclusive locker gets priority.
322			 */
323			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
324			    NFSCLSTATEMUTEXPTR, NULL);
325			nfsv4_getref(&owp->nfsow_rwlock, NULL,
326			    NFSCLSTATEMUTEXPTR, NULL);
327		} else
328			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
329	}
330	NFSUNLOCKCLSTATE();
331	if (nowp != NULL)
332		FREE((caddr_t)nowp, M_NFSCLOWNER);
333	if (nop != NULL)
334		FREE((caddr_t)nop, M_NFSCLOPEN);
335	if (owpp != NULL)
336		*owpp = owp;
337	if (opp != NULL)
338		*opp = op;
339	return (0);
340}
341
342/*
343 * Create a new open, as required.
344 */
345static void
346nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
347    struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
348    struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
349    struct ucred *cred, int *newonep)
350{
351	struct nfsclowner *owp = *owpp, *nowp;
352	struct nfsclopen *op, *nop;
353
354	if (nowpp != NULL)
355		nowp = *nowpp;
356	else
357		nowp = NULL;
358	if (nopp != NULL)
359		nop = *nopp;
360	else
361		nop = NULL;
362	if (owp == NULL && nowp != NULL) {
363		NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
364		LIST_INIT(&nowp->nfsow_open);
365		nowp->nfsow_clp = clp;
366		nowp->nfsow_seqid = 0;
367		nowp->nfsow_defunct = 0;
368		nfscl_lockinit(&nowp->nfsow_rwlock);
369		if (dp != NULL) {
370			newnfsstats.cllocalopenowners++;
371			LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
372		} else {
373			newnfsstats.clopenowners++;
374			LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
375		}
376		owp = *owpp = nowp;
377		*nowpp = NULL;
378		if (newonep != NULL)
379			*newonep = 1;
380	}
381
382	 /* If an fhp has been specified, create an Open as well. */
383	if (fhp != NULL) {
384		/* and look for the correct open, based upon FH */
385		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
386			if (op->nfso_fhlen == fhlen &&
387			    !NFSBCMP(op->nfso_fh, fhp, fhlen))
388				break;
389		}
390		if (op == NULL && nop != NULL) {
391			nop->nfso_own = owp;
392			nop->nfso_mode = 0;
393			nop->nfso_opencnt = 0;
394			nop->nfso_posixlock = 1;
395			nop->nfso_fhlen = fhlen;
396			NFSBCOPY(fhp, nop->nfso_fh, fhlen);
397			LIST_INIT(&nop->nfso_lock);
398			nop->nfso_stateid.seqid = 0;
399			nop->nfso_stateid.other[0] = 0;
400			nop->nfso_stateid.other[1] = 0;
401			nop->nfso_stateid.other[2] = 0;
402			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
403			newnfs_copyincred(cred, &nop->nfso_cred);
404			if (dp != NULL) {
405				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
406				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
407				    nfsdl_list);
408				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
409				newnfsstats.cllocalopens++;
410			} else {
411				newnfsstats.clopens++;
412			}
413			LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
414			*opp = nop;
415			*nopp = NULL;
416			if (newonep != NULL)
417				*newonep = 1;
418		} else {
419			*opp = op;
420		}
421	}
422}
423
424/*
425 * Called to find/add a delegation to a client.
426 */
427APPLESTATIC int
428nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
429    int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
430{
431	struct nfscldeleg *dp = *dpp, *tdp;
432
433	/*
434	 * First, if we have received a Read delegation for a file on a
435	 * read/write file system, just return it, because they aren't
436	 * useful, imho.
437	 */
438	if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
439	    (dp->nfsdl_flags & NFSCLDL_READ)) {
440		(void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
441		FREE((caddr_t)dp, M_NFSCLDELEG);
442		*dpp = NULL;
443		return (0);
444	}
445
446	/* Look for the correct deleg, based upon FH */
447	NFSLOCKCLSTATE();
448	tdp = nfscl_finddeleg(clp, nfhp, fhlen);
449	if (tdp == NULL) {
450		if (dp == NULL) {
451			NFSUNLOCKCLSTATE();
452			return (NFSERR_BADSTATEID);
453		}
454		*dpp = NULL;
455		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
456		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
457		    nfsdl_hash);
458		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
459		newnfsstats.cldelegates++;
460		nfscl_delegcnt++;
461	} else {
462		/*
463		 * Delegation already exists, what do we do if a new one??
464		 */
465		if (dp != NULL) {
466			printf("Deleg already exists!\n");
467			FREE((caddr_t)dp, M_NFSCLDELEG);
468			*dpp = NULL;
469		} else {
470			*dpp = tdp;
471		}
472	}
473	NFSUNLOCKCLSTATE();
474	return (0);
475}
476
477/*
478 * Find a delegation for this file handle. Return NULL upon failure.
479 */
480static struct nfscldeleg *
481nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
482{
483	struct nfscldeleg *dp;
484
485	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
486	    if (dp->nfsdl_fhlen == fhlen &&
487		!NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
488		break;
489	}
490	return (dp);
491}
492
493/*
494 * Get a stateid for an I/O operation. First, look for an open and iff
495 * found, return either a lockowner stateid or the open stateid.
496 * If no Open is found, just return error and the special stateid of all zeros.
497 */
498APPLESTATIC int
499nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
500    int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
501    void **lckpp)
502{
503	struct nfsclclient *clp;
504	struct nfsclowner *owp;
505	struct nfsclopen *op = NULL, *top;
506	struct nfscllockowner *lp;
507	struct nfscldeleg *dp;
508	struct nfsnode *np;
509	struct nfsmount *nmp;
510	u_int8_t own[NFSV4CL_LOCKNAMELEN];
511	int error, done;
512
513	*lckpp = NULL;
514	/*
515	 * Initially, just set the special stateid of all zeros.
516	 * (Don't do this for a DS, since the special stateid can't be used.)
517	 */
518	if (fords == 0) {
519		stateidp->seqid = 0;
520		stateidp->other[0] = 0;
521		stateidp->other[1] = 0;
522		stateidp->other[2] = 0;
523	}
524	if (vnode_vtype(vp) != VREG)
525		return (EISDIR);
526	np = VTONFS(vp);
527	nmp = VFSTONFS(vnode_mount(vp));
528	NFSLOCKCLSTATE();
529	clp = nfscl_findcl(nmp);
530	if (clp == NULL) {
531		NFSUNLOCKCLSTATE();
532		return (EACCES);
533	}
534
535	/*
536	 * Wait for recovery to complete.
537	 */
538	while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
539		(void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
540		    PZERO, "nfsrecvr", NULL);
541
542	/*
543	 * First, look for a delegation.
544	 */
545	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
546		if (dp->nfsdl_fhlen == fhlen &&
547		    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
548			if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
549			    (dp->nfsdl_flags & NFSCLDL_WRITE)) {
550				stateidp->seqid = dp->nfsdl_stateid.seqid;
551				stateidp->other[0] = dp->nfsdl_stateid.other[0];
552				stateidp->other[1] = dp->nfsdl_stateid.other[1];
553				stateidp->other[2] = dp->nfsdl_stateid.other[2];
554				if (!(np->n_flag & NDELEGRECALL)) {
555					TAILQ_REMOVE(&clp->nfsc_deleg, dp,
556					    nfsdl_list);
557					TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
558					    nfsdl_list);
559					dp->nfsdl_timestamp = NFSD_MONOSEC +
560					    120;
561					dp->nfsdl_rwlock.nfslock_usecnt++;
562					*lckpp = (void *)&dp->nfsdl_rwlock;
563				}
564				NFSUNLOCKCLSTATE();
565				return (0);
566			}
567			break;
568		}
569	}
570
571	if (p != NULL) {
572		/*
573		 * If p != NULL, we want to search the parentage tree
574		 * for a matching OpenOwner and use that.
575		 */
576		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
577			nfscl_filllockowner(NULL, own, F_POSIX);
578		else
579			nfscl_filllockowner(p->td_proc, own, F_POSIX);
580		lp = NULL;
581		error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
582		    mode, &lp, &op);
583		if (error == 0 && lp != NULL && fords == 0) {
584			/* Don't return a lock stateid for a DS. */
585			stateidp->seqid =
586			    lp->nfsl_stateid.seqid;
587			stateidp->other[0] =
588			    lp->nfsl_stateid.other[0];
589			stateidp->other[1] =
590			    lp->nfsl_stateid.other[1];
591			stateidp->other[2] =
592			    lp->nfsl_stateid.other[2];
593			NFSUNLOCKCLSTATE();
594			return (0);
595		}
596	}
597	if (op == NULL) {
598		/* If not found, just look for any OpenOwner that will work. */
599		top = NULL;
600		done = 0;
601		owp = LIST_FIRST(&clp->nfsc_owner);
602		while (!done && owp != NULL) {
603			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
604				if (op->nfso_fhlen == fhlen &&
605				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
606					if (top == NULL && (op->nfso_mode &
607					    NFSV4OPEN_ACCESSWRITE) != 0 &&
608					    (mode & NFSV4OPEN_ACCESSREAD) != 0)
609						top = op;
610					if ((mode & op->nfso_mode) == mode) {
611						done = 1;
612						break;
613					}
614				}
615			}
616			if (!done)
617				owp = LIST_NEXT(owp, nfsow_list);
618		}
619		if (!done) {
620			NFSCL_DEBUG(2, "openmode top=%p\n", top);
621			if (top == NULL || NFSHASOPENMODE(nmp)) {
622				NFSUNLOCKCLSTATE();
623				return (ENOENT);
624			} else
625				op = top;
626		}
627		/*
628		 * For read aheads or write behinds, use the open cred.
629		 * A read ahead or write behind is indicated by p == NULL.
630		 */
631		if (p == NULL)
632			newnfs_copycred(&op->nfso_cred, cred);
633	}
634
635	/*
636	 * No lock stateid, so return the open stateid.
637	 */
638	stateidp->seqid = op->nfso_stateid.seqid;
639	stateidp->other[0] = op->nfso_stateid.other[0];
640	stateidp->other[1] = op->nfso_stateid.other[1];
641	stateidp->other[2] = op->nfso_stateid.other[2];
642	NFSUNLOCKCLSTATE();
643	return (0);
644}
645
646/*
647 * Search for a matching file, mode and, optionally, lockowner.
648 */
649static int
650nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
651    u_int8_t *openown, u_int8_t *lockown, u_int32_t mode,
652    struct nfscllockowner **lpp, struct nfsclopen **opp)
653{
654	struct nfsclowner *owp;
655	struct nfsclopen *op, *rop, *rop2;
656	struct nfscllockowner *lp;
657	int keep_looping;
658
659	if (lpp != NULL)
660		*lpp = NULL;
661	/*
662	 * rop will be set to the open to be returned. There are three
663	 * variants of this, all for an open of the correct file:
664	 * 1 - A match of lockown.
665	 * 2 - A match of the openown, when no lockown match exists.
666	 * 3 - A match for any open, if no openown or lockown match exists.
667	 * Looking for #2 over #3 probably isn't necessary, but since
668	 * RFC3530 is vague w.r.t. the relationship between openowners and
669	 * lockowners, I think this is the safer way to go.
670	 */
671	rop = NULL;
672	rop2 = NULL;
673	keep_looping = 1;
674	/* Search the client list */
675	owp = LIST_FIRST(ohp);
676	while (owp != NULL && keep_looping != 0) {
677		/* and look for the correct open */
678		op = LIST_FIRST(&owp->nfsow_open);
679		while (op != NULL && keep_looping != 0) {
680			if (op->nfso_fhlen == fhlen &&
681			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
682			    && (op->nfso_mode & mode) == mode) {
683				if (lpp != NULL) {
684					/* Now look for a matching lockowner. */
685					LIST_FOREACH(lp, &op->nfso_lock,
686					    nfsl_list) {
687						if (!NFSBCMP(lp->nfsl_owner,
688						    lockown,
689						    NFSV4CL_LOCKNAMELEN)) {
690							*lpp = lp;
691							rop = op;
692							keep_looping = 0;
693							break;
694						}
695					}
696				}
697				if (rop == NULL && !NFSBCMP(owp->nfsow_owner,
698				    openown, NFSV4CL_LOCKNAMELEN)) {
699					rop = op;
700					if (lpp == NULL)
701						keep_looping = 0;
702				}
703				if (rop2 == NULL)
704					rop2 = op;
705			}
706			op = LIST_NEXT(op, nfso_list);
707		}
708		owp = LIST_NEXT(owp, nfsow_list);
709	}
710	if (rop == NULL)
711		rop = rop2;
712	if (rop == NULL)
713		return (EBADF);
714	*opp = rop;
715	return (0);
716}
717
718/*
719 * Release use of an open owner. Called when open operations are done
720 * with the open owner.
721 */
722APPLESTATIC void
723nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
724    __unused int error, __unused int candelete, int unlocked)
725{
726
727	if (owp == NULL)
728		return;
729	NFSLOCKCLSTATE();
730	if (unlocked == 0) {
731		if (NFSHASONEOPENOWN(nmp))
732			nfsv4_relref(&owp->nfsow_rwlock);
733		else
734			nfscl_lockunlock(&owp->nfsow_rwlock);
735	}
736	nfscl_clrelease(owp->nfsow_clp);
737	NFSUNLOCKCLSTATE();
738}
739
740/*
741 * Release use of an open structure under an open owner.
742 */
743APPLESTATIC void
744nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
745    int candelete)
746{
747	struct nfsclclient *clp;
748	struct nfsclowner *owp;
749
750	if (op == NULL)
751		return;
752	NFSLOCKCLSTATE();
753	owp = op->nfso_own;
754	if (NFSHASONEOPENOWN(nmp))
755		nfsv4_relref(&owp->nfsow_rwlock);
756	else
757		nfscl_lockunlock(&owp->nfsow_rwlock);
758	clp = owp->nfsow_clp;
759	if (error && candelete && op->nfso_opencnt == 0)
760		nfscl_freeopen(op, 0);
761	nfscl_clrelease(clp);
762	NFSUNLOCKCLSTATE();
763}
764
765/*
766 * Called to get a clientid structure. It will optionally lock the
767 * client data structures to do the SetClientId/SetClientId_confirm,
768 * but will release that lock and return the clientid with a refernce
769 * count on it.
770 * If the "cred" argument is NULL, a new clientid should not be created.
771 * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
772 * be done.
773 * The start_renewthread argument tells nfscl_getcl() to start a renew
774 * thread if this creates a new clp.
775 * It always clpp with a reference count on it, unless returning an error.
776 */
777APPLESTATIC int
778nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
779    int start_renewthread, struct nfsclclient **clpp)
780{
781	struct nfsclclient *clp;
782	struct nfsclclient *newclp = NULL;
783	struct nfsmount *nmp;
784	char uuid[HOSTUUIDLEN];
785	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
786	u_int16_t idlen = 0;
787
788	nmp = VFSTONFS(mp);
789	if (cred != NULL) {
790		getcredhostuuid(cred, uuid, sizeof uuid);
791		idlen = strlen(uuid);
792		if (idlen > 0)
793			idlen += sizeof (u_int64_t);
794		else
795			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
796		MALLOC(newclp, struct nfsclclient *,
797		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
798		    M_WAITOK | M_ZERO);
799	}
800	NFSLOCKCLSTATE();
801	/*
802	 * If a forced dismount is already in progress, don't
803	 * allocate a new clientid and get out now. For the case where
804	 * clp != NULL, this is a harmless optimization.
805	 */
806	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
807		NFSUNLOCKCLSTATE();
808		if (newclp != NULL)
809			free(newclp, M_NFSCLCLIENT);
810		return (EBADF);
811	}
812	clp = nmp->nm_clp;
813	if (clp == NULL) {
814		if (newclp == NULL) {
815			NFSUNLOCKCLSTATE();
816			return (EACCES);
817		}
818		clp = newclp;
819		clp->nfsc_idlen = idlen;
820		LIST_INIT(&clp->nfsc_owner);
821		TAILQ_INIT(&clp->nfsc_deleg);
822		TAILQ_INIT(&clp->nfsc_layout);
823		LIST_INIT(&clp->nfsc_devinfo);
824		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
825			LIST_INIT(&clp->nfsc_deleghash[i]);
826		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
827			LIST_INIT(&clp->nfsc_layouthash[i]);
828		clp->nfsc_flags = NFSCLFLAGS_INITED;
829		clp->nfsc_clientidrev = 1;
830		clp->nfsc_cbident = nfscl_nextcbident();
831		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
832		    clp->nfsc_idlen);
833		LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
834		nmp->nm_clp = clp;
835		clp->nfsc_nmp = nmp;
836		NFSUNLOCKCLSTATE();
837		if (start_renewthread != 0)
838			nfscl_start_renewthread(clp);
839	} else {
840		NFSUNLOCKCLSTATE();
841		if (newclp != NULL)
842			free(newclp, M_NFSCLCLIENT);
843	}
844	NFSLOCKCLSTATE();
845	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
846	    (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0)
847		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
848		    NFSCLSTATEMUTEXPTR, mp);
849	if (igotlock == 0) {
850		/*
851		 * Call nfsv4_lock() with "iwantlock == 0" so that it will
852		 * wait for a pending exclusive lock request.  This gives the
853		 * exclusive lock request priority over this shared lock
854		 * request.
855		 * An exclusive lock on nfsc_lock is used mainly for server
856		 * crash recoveries.
857		 */
858		nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
859		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
860	}
861	if (igotlock == 0 && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
862		/*
863		 * Both nfsv4_lock() and nfsv4_getref() know to check
864		 * for MNTK_UNMOUNTF and return without sleeping to
865		 * wait for the exclusive lock to be released, since it
866		 * might be held by nfscl_umount() and we need to get out
867		 * now for that case and not wait until nfscl_umount()
868		 * releases it.
869		 */
870		NFSUNLOCKCLSTATE();
871		return (EBADF);
872	}
873	NFSUNLOCKCLSTATE();
874
875	/*
876	 * If it needs a clientid, do the setclientid now.
877	 */
878	if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
879		if (!igotlock)
880			panic("nfscl_clget");
881		if (p == NULL || cred == NULL) {
882			NFSLOCKCLSTATE();
883			nfsv4_unlock(&clp->nfsc_lock, 0);
884			NFSUNLOCKCLSTATE();
885			return (EACCES);
886		}
887		/*
888		 * If RFC3530 Sec. 14.2.33 is taken literally,
889		 * NFSERR_CLIDINUSE will be returned persistently for the
890		 * case where a new mount of the same file system is using
891		 * a different principal. In practice, NFSERR_CLIDINUSE is
892		 * only returned when there is outstanding unexpired state
893		 * on the clientid. As such, try for twice the lease
894		 * interval, if we know what that is. Otherwise, make a
895		 * wild ass guess.
896		 * The case of returning NFSERR_STALECLIENTID is far less
897		 * likely, but might occur if there is a significant delay
898		 * between doing the SetClientID and SetClientIDConfirm Ops,
899		 * such that the server throws away the clientid before
900		 * receiving the SetClientIDConfirm.
901		 */
902		if (clp->nfsc_renew > 0)
903			clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
904		else
905			clidinusedelay = 120;
906		trystalecnt = 3;
907		do {
908			error = nfsrpc_setclient(nmp, clp, 0, cred, p);
909			if (error == NFSERR_STALECLIENTID ||
910			    error == NFSERR_STALEDONTRECOVER ||
911			    error == NFSERR_BADSESSION ||
912			    error == NFSERR_CLIDINUSE) {
913				(void) nfs_catnap(PZERO, error, "nfs_setcl");
914			}
915		} while (((error == NFSERR_STALECLIENTID ||
916		     error == NFSERR_BADSESSION ||
917		     error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
918		    (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
919		if (error) {
920			NFSLOCKCLSTATE();
921			nfsv4_unlock(&clp->nfsc_lock, 0);
922			NFSUNLOCKCLSTATE();
923			return (error);
924		}
925		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
926	}
927	if (igotlock) {
928		NFSLOCKCLSTATE();
929		nfsv4_unlock(&clp->nfsc_lock, 1);
930		NFSUNLOCKCLSTATE();
931	}
932
933	*clpp = clp;
934	return (0);
935}
936
937/*
938 * Get a reference to a clientid and return it, if valid.
939 */
940APPLESTATIC struct nfsclclient *
941nfscl_findcl(struct nfsmount *nmp)
942{
943	struct nfsclclient *clp;
944
945	clp = nmp->nm_clp;
946	if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
947		return (NULL);
948	return (clp);
949}
950
951/*
952 * Release the clientid structure. It may be locked or reference counted.
953 */
954static void
955nfscl_clrelease(struct nfsclclient *clp)
956{
957
958	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
959		nfsv4_unlock(&clp->nfsc_lock, 0);
960	else
961		nfsv4_relref(&clp->nfsc_lock);
962}
963
964/*
965 * External call for nfscl_clrelease.
966 */
967APPLESTATIC void
968nfscl_clientrelease(struct nfsclclient *clp)
969{
970
971	NFSLOCKCLSTATE();
972	if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
973		nfsv4_unlock(&clp->nfsc_lock, 0);
974	else
975		nfsv4_relref(&clp->nfsc_lock);
976	NFSUNLOCKCLSTATE();
977}
978
979/*
980 * Called when wanting to lock a byte region.
981 */
982APPLESTATIC int
983nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
984    short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
985    int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
986    struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
987{
988	struct nfscllockowner *lp;
989	struct nfsclopen *op;
990	struct nfsclclient *clp;
991	struct nfscllockowner *nlp;
992	struct nfscllock *nlop, *otherlop;
993	struct nfscldeleg *dp = NULL, *ldp = NULL;
994	struct nfscllockownerhead *lhp = NULL;
995	struct nfsnode *np;
996	u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
997	u_int8_t *openownp;
998	int error = 0, ret, donelocally = 0;
999	u_int32_t mode;
1000
1001	/* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1002	mode = 0;
1003	np = VTONFS(vp);
1004	*lpp = NULL;
1005	lp = NULL;
1006	*newonep = 0;
1007	*donelocallyp = 0;
1008
1009	/*
1010	 * Might need these, so MALLOC them now, to
1011	 * avoid a tsleep() in MALLOC later.
1012	 */
1013	MALLOC(nlp, struct nfscllockowner *,
1014	    sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1015	MALLOC(otherlop, struct nfscllock *,
1016	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1017	MALLOC(nlop, struct nfscllock *,
1018	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1019	nlop->nfslo_type = type;
1020	nlop->nfslo_first = off;
1021	if (len == NFS64BITSSET) {
1022		nlop->nfslo_end = NFS64BITSSET;
1023	} else {
1024		nlop->nfslo_end = off + len;
1025		if (nlop->nfslo_end <= nlop->nfslo_first)
1026			error = NFSERR_INVAL;
1027	}
1028
1029	if (!error) {
1030		if (recovery)
1031			clp = rclp;
1032		else
1033			error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
1034	}
1035	if (error) {
1036		FREE((caddr_t)nlp, M_NFSCLLOCKOWNER);
1037		FREE((caddr_t)otherlop, M_NFSCLLOCK);
1038		FREE((caddr_t)nlop, M_NFSCLLOCK);
1039		return (error);
1040	}
1041
1042	op = NULL;
1043	if (recovery) {
1044		ownp = rownp;
1045		openownp = ropenownp;
1046	} else {
1047		nfscl_filllockowner(id, own, flags);
1048		ownp = own;
1049		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
1050			nfscl_filllockowner(NULL, openown, F_POSIX);
1051		else
1052			nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1053		openownp = openown;
1054	}
1055	if (!recovery) {
1056		NFSLOCKCLSTATE();
1057		/*
1058		 * First, search for a delegation. If one exists for this file,
1059		 * the lock can be done locally against it, so long as there
1060		 * isn't a local lock conflict.
1061		 */
1062		ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1063		    np->n_fhp->nfh_len);
1064		/* Just sanity check for correct type of delegation */
1065		if (dp != NULL && ((dp->nfsdl_flags &
1066		    (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1067		     (type == F_WRLCK &&
1068		      (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1069			dp = NULL;
1070	}
1071	if (dp != NULL) {
1072		/* Now, find an open and maybe a lockowner. */
1073		ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh,
1074		    np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1075		if (ret)
1076			ret = nfscl_getopen(&clp->nfsc_owner,
1077			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1078			    ownp, mode, NULL, &op);
1079		if (!ret) {
1080			lhp = &dp->nfsdl_lock;
1081			TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1082			TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1083			dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1084			donelocally = 1;
1085		} else {
1086			dp = NULL;
1087		}
1088	}
1089	if (!donelocally) {
1090		/*
1091		 * Get the related Open and maybe lockowner.
1092		 */
1093		error = nfscl_getopen(&clp->nfsc_owner,
1094		    np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1095		    ownp, mode, &lp, &op);
1096		if (!error)
1097			lhp = &op->nfso_lock;
1098	}
1099	if (!error && !recovery)
1100		error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1101		    np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1102	if (error) {
1103		if (!recovery) {
1104			nfscl_clrelease(clp);
1105			NFSUNLOCKCLSTATE();
1106		}
1107		FREE((caddr_t)nlp, M_NFSCLLOCKOWNER);
1108		FREE((caddr_t)otherlop, M_NFSCLLOCK);
1109		FREE((caddr_t)nlop, M_NFSCLLOCK);
1110		return (error);
1111	}
1112
1113	/*
1114	 * Ok, see if a lockowner exists and create one, as required.
1115	 */
1116	if (lp == NULL)
1117		LIST_FOREACH(lp, lhp, nfsl_list) {
1118			if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1119				break;
1120		}
1121	if (lp == NULL) {
1122		NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1123		if (recovery)
1124			NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1125			    NFSV4CL_LOCKNAMELEN);
1126		else
1127			NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1128			    NFSV4CL_LOCKNAMELEN);
1129		nlp->nfsl_seqid = 0;
1130		nlp->nfsl_lockflags = flags;
1131		nlp->nfsl_inprog = NULL;
1132		nfscl_lockinit(&nlp->nfsl_rwlock);
1133		LIST_INIT(&nlp->nfsl_lock);
1134		if (donelocally) {
1135			nlp->nfsl_open = NULL;
1136			newnfsstats.cllocallockowners++;
1137		} else {
1138			nlp->nfsl_open = op;
1139			newnfsstats.cllockowners++;
1140		}
1141		LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1142		lp = nlp;
1143		nlp = NULL;
1144		*newonep = 1;
1145	}
1146
1147	/*
1148	 * Now, update the byte ranges for locks.
1149	 */
1150	ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1151	if (!ret)
1152		donelocally = 1;
1153	if (donelocally) {
1154		*donelocallyp = 1;
1155		if (!recovery)
1156			nfscl_clrelease(clp);
1157	} else {
1158		/*
1159		 * Serial modifications on the lock owner for multiple threads
1160		 * for the same process using a read/write lock.
1161		 */
1162		if (!recovery)
1163			nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1164	}
1165	if (!recovery)
1166		NFSUNLOCKCLSTATE();
1167
1168	if (nlp)
1169		FREE((caddr_t)nlp, M_NFSCLLOCKOWNER);
1170	if (nlop)
1171		FREE((caddr_t)nlop, M_NFSCLLOCK);
1172	if (otherlop)
1173		FREE((caddr_t)otherlop, M_NFSCLLOCK);
1174
1175	*lpp = lp;
1176	return (0);
1177}
1178
1179/*
1180 * Called to unlock a byte range, for LockU.
1181 */
1182APPLESTATIC int
1183nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1184    __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1185    struct nfsclclient *clp, void *id, int flags,
1186    struct nfscllockowner **lpp, int *dorpcp)
1187{
1188	struct nfscllockowner *lp;
1189	struct nfsclowner *owp;
1190	struct nfsclopen *op;
1191	struct nfscllock *nlop, *other_lop = NULL;
1192	struct nfscldeleg *dp;
1193	struct nfsnode *np;
1194	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1195	int ret = 0, fnd;
1196
1197	np = VTONFS(vp);
1198	*lpp = NULL;
1199	*dorpcp = 0;
1200
1201	/*
1202	 * Might need these, so MALLOC them now, to
1203	 * avoid a tsleep() in MALLOC later.
1204	 */
1205	MALLOC(nlop, struct nfscllock *,
1206	    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1207	nlop->nfslo_type = F_UNLCK;
1208	nlop->nfslo_first = off;
1209	if (len == NFS64BITSSET) {
1210		nlop->nfslo_end = NFS64BITSSET;
1211	} else {
1212		nlop->nfslo_end = off + len;
1213		if (nlop->nfslo_end <= nlop->nfslo_first) {
1214			FREE((caddr_t)nlop, M_NFSCLLOCK);
1215			return (NFSERR_INVAL);
1216		}
1217	}
1218	if (callcnt == 0) {
1219		MALLOC(other_lop, struct nfscllock *,
1220		    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1221		*other_lop = *nlop;
1222	}
1223	nfscl_filllockowner(id, own, flags);
1224	dp = NULL;
1225	NFSLOCKCLSTATE();
1226	if (callcnt == 0)
1227		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1228		    np->n_fhp->nfh_len);
1229
1230	/*
1231	 * First, unlock any local regions on a delegation.
1232	 */
1233	if (dp != NULL) {
1234		/* Look for this lockowner. */
1235		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1236			if (!NFSBCMP(lp->nfsl_owner, own,
1237			    NFSV4CL_LOCKNAMELEN))
1238				break;
1239		}
1240		if (lp != NULL)
1241			/* Use other_lop, so nlop is still available */
1242			(void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1243	}
1244
1245	/*
1246	 * Now, find a matching open/lockowner that hasn't already been done,
1247	 * as marked by nfsl_inprog.
1248	 */
1249	lp = NULL;
1250	fnd = 0;
1251	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1252	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1253		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1254		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1255		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1256			if (lp->nfsl_inprog == NULL &&
1257			    !NFSBCMP(lp->nfsl_owner, own,
1258			     NFSV4CL_LOCKNAMELEN)) {
1259				fnd = 1;
1260				break;
1261			}
1262		    }
1263		    if (fnd)
1264			break;
1265		}
1266	    }
1267	    if (fnd)
1268		break;
1269	}
1270
1271	if (lp != NULL) {
1272		ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1273		if (ret)
1274			*dorpcp = 1;
1275		/*
1276		 * Serial modifications on the lock owner for multiple
1277		 * threads for the same process using a read/write lock.
1278		 */
1279		lp->nfsl_inprog = p;
1280		nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1281		*lpp = lp;
1282	}
1283	NFSUNLOCKCLSTATE();
1284	if (nlop)
1285		FREE((caddr_t)nlop, M_NFSCLLOCK);
1286	if (other_lop)
1287		FREE((caddr_t)other_lop, M_NFSCLLOCK);
1288	return (0);
1289}
1290
1291/*
1292 * Release all lockowners marked in progess for this process and file.
1293 */
1294APPLESTATIC void
1295nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1296    void *id, int flags)
1297{
1298	struct nfsclowner *owp;
1299	struct nfsclopen *op;
1300	struct nfscllockowner *lp;
1301	struct nfsnode *np;
1302	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1303
1304	np = VTONFS(vp);
1305	nfscl_filllockowner(id, own, flags);
1306	NFSLOCKCLSTATE();
1307	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1308	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1309		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1310		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1311		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1312			if (lp->nfsl_inprog == p &&
1313			    !NFSBCMP(lp->nfsl_owner, own,
1314			    NFSV4CL_LOCKNAMELEN)) {
1315			    lp->nfsl_inprog = NULL;
1316			    nfscl_lockunlock(&lp->nfsl_rwlock);
1317			}
1318		    }
1319		}
1320	    }
1321	}
1322	nfscl_clrelease(clp);
1323	NFSUNLOCKCLSTATE();
1324}
1325
1326/*
1327 * Called to find out if any bytes within the byte range specified are
1328 * write locked by the calling process. Used to determine if flushing
1329 * is required before a LockU.
1330 * If in doubt, return 1, so the flush will occur.
1331 */
1332APPLESTATIC int
1333nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1334    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1335{
1336	struct nfsclowner *owp;
1337	struct nfscllockowner *lp;
1338	struct nfsclopen *op;
1339	struct nfsclclient *clp;
1340	struct nfscllock *lop;
1341	struct nfscldeleg *dp;
1342	struct nfsnode *np;
1343	u_int64_t off, end;
1344	u_int8_t own[NFSV4CL_LOCKNAMELEN];
1345	int error = 0;
1346
1347	np = VTONFS(vp);
1348	switch (fl->l_whence) {
1349	case SEEK_SET:
1350	case SEEK_CUR:
1351		/*
1352		 * Caller is responsible for adding any necessary offset
1353		 * when SEEK_CUR is used.
1354		 */
1355		off = fl->l_start;
1356		break;
1357	case SEEK_END:
1358		off = np->n_size + fl->l_start;
1359		break;
1360	default:
1361		return (1);
1362	};
1363	if (fl->l_len != 0) {
1364		end = off + fl->l_len;
1365		if (end < off)
1366			return (1);
1367	} else {
1368		end = NFS64BITSSET;
1369	}
1370
1371	error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
1372	if (error)
1373		return (1);
1374	nfscl_filllockowner(id, own, flags);
1375	NFSLOCKCLSTATE();
1376
1377	/*
1378	 * First check the delegation locks.
1379	 */
1380	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1381	if (dp != NULL) {
1382		LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1383			if (!NFSBCMP(lp->nfsl_owner, own,
1384			    NFSV4CL_LOCKNAMELEN))
1385				break;
1386		}
1387		if (lp != NULL) {
1388			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1389				if (lop->nfslo_first >= end)
1390					break;
1391				if (lop->nfslo_end <= off)
1392					continue;
1393				if (lop->nfslo_type == F_WRLCK) {
1394					nfscl_clrelease(clp);
1395					NFSUNLOCKCLSTATE();
1396					return (1);
1397				}
1398			}
1399		}
1400	}
1401
1402	/*
1403	 * Now, check state against the server.
1404	 */
1405	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1406	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1407		if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1408		    !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1409		    LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1410			if (!NFSBCMP(lp->nfsl_owner, own,
1411			    NFSV4CL_LOCKNAMELEN))
1412			    break;
1413		    }
1414		    if (lp != NULL) {
1415			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1416			    if (lop->nfslo_first >= end)
1417				break;
1418			    if (lop->nfslo_end <= off)
1419				continue;
1420			    if (lop->nfslo_type == F_WRLCK) {
1421				nfscl_clrelease(clp);
1422				NFSUNLOCKCLSTATE();
1423				return (1);
1424			    }
1425			}
1426		    }
1427		}
1428	    }
1429	}
1430	nfscl_clrelease(clp);
1431	NFSUNLOCKCLSTATE();
1432	return (0);
1433}
1434
1435/*
1436 * Release a byte range lock owner structure.
1437 */
1438APPLESTATIC void
1439nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1440{
1441	struct nfsclclient *clp;
1442
1443	if (lp == NULL)
1444		return;
1445	NFSLOCKCLSTATE();
1446	clp = lp->nfsl_open->nfso_own->nfsow_clp;
1447	if (error != 0 && candelete &&
1448	    (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1449		nfscl_freelockowner(lp, 0);
1450	else
1451		nfscl_lockunlock(&lp->nfsl_rwlock);
1452	nfscl_clrelease(clp);
1453	NFSUNLOCKCLSTATE();
1454}
1455
1456/*
1457 * Free up an open structure and any associated byte range lock structures.
1458 */
1459APPLESTATIC void
1460nfscl_freeopen(struct nfsclopen *op, int local)
1461{
1462
1463	LIST_REMOVE(op, nfso_list);
1464	nfscl_freealllocks(&op->nfso_lock, local);
1465	FREE((caddr_t)op, M_NFSCLOPEN);
1466	if (local)
1467		newnfsstats.cllocalopens--;
1468	else
1469		newnfsstats.clopens--;
1470}
1471
1472/*
1473 * Free up all lock owners and associated locks.
1474 */
1475static void
1476nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1477{
1478	struct nfscllockowner *lp, *nlp;
1479
1480	LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1481		if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1482			panic("nfscllckw");
1483		nfscl_freelockowner(lp, local);
1484	}
1485}
1486
1487/*
1488 * Called for an Open when NFSERR_EXPIRED is received from the server.
1489 * If there are no byte range locks nor a Share Deny lost, try to do a
1490 * fresh Open. Otherwise, free the open.
1491 */
1492static int
1493nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1494    struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1495{
1496	struct nfscllockowner *lp;
1497	struct nfscldeleg *dp;
1498	int mustdelete = 0, error;
1499
1500	/*
1501	 * Look for any byte range lock(s).
1502	 */
1503	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1504		if (!LIST_EMPTY(&lp->nfsl_lock)) {
1505			mustdelete = 1;
1506			break;
1507		}
1508	}
1509
1510	/*
1511	 * If no byte range lock(s) nor a Share deny, try to re-open.
1512	 */
1513	if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1514		newnfs_copycred(&op->nfso_cred, cred);
1515		dp = NULL;
1516		error = nfsrpc_reopen(nmp, op->nfso_fh,
1517		    op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1518		if (error) {
1519			mustdelete = 1;
1520			if (dp != NULL) {
1521				FREE((caddr_t)dp, M_NFSCLDELEG);
1522				dp = NULL;
1523			}
1524		}
1525		if (dp != NULL)
1526			nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1527			    op->nfso_fhlen, cred, p, &dp);
1528	}
1529
1530	/*
1531	 * If a byte range lock or Share deny or couldn't re-open, free it.
1532	 */
1533	if (mustdelete)
1534		nfscl_freeopen(op, 0);
1535	return (mustdelete);
1536}
1537
1538/*
1539 * Free up an open owner structure.
1540 */
1541static void
1542nfscl_freeopenowner(struct nfsclowner *owp, int local)
1543{
1544
1545	LIST_REMOVE(owp, nfsow_list);
1546	FREE((caddr_t)owp, M_NFSCLOWNER);
1547	if (local)
1548		newnfsstats.cllocalopenowners--;
1549	else
1550		newnfsstats.clopenowners--;
1551}
1552
1553/*
1554 * Free up a byte range lock owner structure.
1555 */
1556APPLESTATIC void
1557nfscl_freelockowner(struct nfscllockowner *lp, int local)
1558{
1559	struct nfscllock *lop, *nlop;
1560
1561	LIST_REMOVE(lp, nfsl_list);
1562	LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1563		nfscl_freelock(lop, local);
1564	}
1565	FREE((caddr_t)lp, M_NFSCLLOCKOWNER);
1566	if (local)
1567		newnfsstats.cllocallockowners--;
1568	else
1569		newnfsstats.cllockowners--;
1570}
1571
1572/*
1573 * Free up a byte range lock structure.
1574 */
1575APPLESTATIC void
1576nfscl_freelock(struct nfscllock *lop, int local)
1577{
1578
1579	LIST_REMOVE(lop, nfslo_list);
1580	FREE((caddr_t)lop, M_NFSCLLOCK);
1581	if (local)
1582		newnfsstats.cllocallocks--;
1583	else
1584		newnfsstats.cllocks--;
1585}
1586
1587/*
1588 * Clean out the state related to a delegation.
1589 */
1590static void
1591nfscl_cleandeleg(struct nfscldeleg *dp)
1592{
1593	struct nfsclowner *owp, *nowp;
1594	struct nfsclopen *op;
1595
1596	LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1597		op = LIST_FIRST(&owp->nfsow_open);
1598		if (op != NULL) {
1599			if (LIST_NEXT(op, nfso_list) != NULL)
1600				panic("nfscleandel");
1601			nfscl_freeopen(op, 1);
1602		}
1603		nfscl_freeopenowner(owp, 1);
1604	}
1605	nfscl_freealllocks(&dp->nfsdl_lock, 1);
1606}
1607
1608/*
1609 * Free a delegation.
1610 */
1611static void
1612nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp)
1613{
1614
1615	TAILQ_REMOVE(hdp, dp, nfsdl_list);
1616	LIST_REMOVE(dp, nfsdl_hash);
1617	FREE((caddr_t)dp, M_NFSCLDELEG);
1618	newnfsstats.cldelegates--;
1619	nfscl_delegcnt--;
1620}
1621
1622/*
1623 * Free up all state related to this client structure.
1624 */
1625static void
1626nfscl_cleanclient(struct nfsclclient *clp)
1627{
1628	struct nfsclowner *owp, *nowp;
1629	struct nfsclopen *op, *nop;
1630	struct nfscllayout *lyp, *nlyp;
1631	struct nfscldevinfo *dip, *ndip;
1632
1633	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1634		nfscl_freelayout(lyp);
1635
1636	LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1637		nfscl_freedevinfo(dip);
1638
1639	/* Now, all the OpenOwners, etc. */
1640	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1641		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1642			nfscl_freeopen(op, 0);
1643		}
1644		nfscl_freeopenowner(owp, 0);
1645	}
1646}
1647
1648/*
1649 * Called when an NFSERR_EXPIRED is received from the server.
1650 */
1651static void
1652nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1653    struct ucred *cred, NFSPROC_T *p)
1654{
1655	struct nfsclowner *owp, *nowp, *towp;
1656	struct nfsclopen *op, *nop, *top;
1657	struct nfscldeleg *dp, *ndp;
1658	int ret, printed = 0;
1659
1660	/*
1661	 * First, merge locally issued Opens into the list for the server.
1662	 */
1663	dp = TAILQ_FIRST(&clp->nfsc_deleg);
1664	while (dp != NULL) {
1665	    ndp = TAILQ_NEXT(dp, nfsdl_list);
1666	    owp = LIST_FIRST(&dp->nfsdl_owner);
1667	    while (owp != NULL) {
1668		nowp = LIST_NEXT(owp, nfsow_list);
1669		op = LIST_FIRST(&owp->nfsow_open);
1670		if (op != NULL) {
1671		    if (LIST_NEXT(op, nfso_list) != NULL)
1672			panic("nfsclexp");
1673		    LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1674			if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1675			    NFSV4CL_LOCKNAMELEN))
1676			    break;
1677		    }
1678		    if (towp != NULL) {
1679			/* Merge opens in */
1680			LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1681			    if (top->nfso_fhlen == op->nfso_fhlen &&
1682				!NFSBCMP(top->nfso_fh, op->nfso_fh,
1683				 op->nfso_fhlen)) {
1684				top->nfso_mode |= op->nfso_mode;
1685				top->nfso_opencnt += op->nfso_opencnt;
1686				break;
1687			    }
1688			}
1689			if (top == NULL) {
1690			    /* Just add the open to the owner list */
1691			    LIST_REMOVE(op, nfso_list);
1692			    op->nfso_own = towp;
1693			    LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1694			    newnfsstats.cllocalopens--;
1695			    newnfsstats.clopens++;
1696			}
1697		    } else {
1698			/* Just add the openowner to the client list */
1699			LIST_REMOVE(owp, nfsow_list);
1700			owp->nfsow_clp = clp;
1701			LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1702			newnfsstats.cllocalopenowners--;
1703			newnfsstats.clopenowners++;
1704			newnfsstats.cllocalopens--;
1705			newnfsstats.clopens++;
1706		    }
1707		}
1708		owp = nowp;
1709	    }
1710	    if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1711		printed = 1;
1712		printf("nfsv4 expired locks lost\n");
1713	    }
1714	    nfscl_cleandeleg(dp);
1715	    nfscl_freedeleg(&clp->nfsc_deleg, dp);
1716	    dp = ndp;
1717	}
1718	if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1719	    panic("nfsclexp");
1720
1721	/*
1722	 * Now, try and reopen against the server.
1723	 */
1724	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1725		owp->nfsow_seqid = 0;
1726		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1727			ret = nfscl_expireopen(clp, op, nmp, cred, p);
1728			if (ret && !printed) {
1729				printed = 1;
1730				printf("nfsv4 expired locks lost\n");
1731			}
1732		}
1733		if (LIST_EMPTY(&owp->nfsow_open))
1734			nfscl_freeopenowner(owp, 0);
1735	}
1736}
1737
1738/*
1739 * This function must be called after the process represented by "own" has
1740 * exited. Must be called with CLSTATE lock held.
1741 */
1742static void
1743nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1744{
1745	struct nfsclowner *owp, *nowp;
1746	struct nfscllockowner *lp, *nlp;
1747	struct nfscldeleg *dp;
1748
1749	/* First, get rid of local locks on delegations. */
1750	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1751		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1752		    if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1753			if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1754			    panic("nfscllckw");
1755			nfscl_freelockowner(lp, 1);
1756		    }
1757		}
1758	}
1759	owp = LIST_FIRST(&clp->nfsc_owner);
1760	while (owp != NULL) {
1761		nowp = LIST_NEXT(owp, nfsow_list);
1762		if (!NFSBCMP(owp->nfsow_owner, own,
1763		    NFSV4CL_LOCKNAMELEN)) {
1764			/*
1765			 * If there are children that haven't closed the
1766			 * file descriptors yet, the opens will still be
1767			 * here. For that case, let the renew thread clear
1768			 * out the OpenOwner later.
1769			 */
1770			if (LIST_EMPTY(&owp->nfsow_open))
1771				nfscl_freeopenowner(owp, 0);
1772			else
1773				owp->nfsow_defunct = 1;
1774		}
1775		owp = nowp;
1776	}
1777}
1778
1779/*
1780 * Find open/lock owners for processes that have exited.
1781 */
1782static void
1783nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1784{
1785	struct nfsclowner *owp, *nowp;
1786	struct nfsclopen *op;
1787	struct nfscllockowner *lp, *nlp;
1788	struct nfscldeleg *dp;
1789
1790	NFSPROCLISTLOCK();
1791	NFSLOCKCLSTATE();
1792	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1793		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1794			LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1795				if (LIST_EMPTY(&lp->nfsl_lock))
1796					nfscl_emptylockowner(lp, lhp);
1797			}
1798		}
1799		if (nfscl_procdoesntexist(owp->nfsow_owner))
1800			nfscl_cleanup_common(clp, owp->nfsow_owner);
1801	}
1802
1803	/*
1804	 * For the single open_owner case, these lock owners need to be
1805	 * checked to see if they still exist separately.
1806	 * This is because nfscl_procdoesntexist() never returns true for
1807	 * the single open_owner so that the above doesn't ever call
1808	 * nfscl_cleanup_common().
1809	 */
1810	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1811		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1812			if (nfscl_procdoesntexist(lp->nfsl_owner))
1813				nfscl_cleanup_common(clp, lp->nfsl_owner);
1814		}
1815	}
1816	NFSUNLOCKCLSTATE();
1817	NFSPROCLISTUNLOCK();
1818}
1819
1820/*
1821 * Take the empty lock owner and move it to the local lhp list if the
1822 * associated process no longer exists.
1823 */
1824static void
1825nfscl_emptylockowner(struct nfscllockowner *lp,
1826    struct nfscllockownerfhhead *lhp)
1827{
1828	struct nfscllockownerfh *lfhp, *mylfhp;
1829	struct nfscllockowner *nlp;
1830	int fnd_it;
1831
1832	/* If not a Posix lock owner, just return. */
1833	if ((lp->nfsl_lockflags & F_POSIX) == 0)
1834		return;
1835
1836	fnd_it = 0;
1837	mylfhp = NULL;
1838	/*
1839	 * First, search to see if this lock owner is already in the list.
1840	 * If it is, then the associated process no longer exists.
1841	 */
1842	SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1843		if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1844		    !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1845		    lfhp->nfslfh_len))
1846			mylfhp = lfhp;
1847		LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1848			if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1849			    NFSV4CL_LOCKNAMELEN))
1850				fnd_it = 1;
1851	}
1852	/* If not found, check if process still exists. */
1853	if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1854		return;
1855
1856	/* Move the lock owner over to the local list. */
1857	if (mylfhp == NULL) {
1858		mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1859		    M_NOWAIT);
1860		if (mylfhp == NULL)
1861			return;
1862		mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1863		NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1864		    mylfhp->nfslfh_len);
1865		LIST_INIT(&mylfhp->nfslfh_lock);
1866		SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1867	}
1868	LIST_REMOVE(lp, nfsl_list);
1869	LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1870}
1871
1872static int	fake_global;	/* Used to force visibility of MNTK_UNMOUNTF */
1873/*
1874 * Called from nfs umount to free up the clientid.
1875 */
1876APPLESTATIC void
1877nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1878{
1879	struct nfsclclient *clp;
1880	struct ucred *cred;
1881	int igotlock;
1882
1883	/*
1884	 * For the case that matters, this is the thread that set
1885	 * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1886	 * done to ensure that any thread executing nfscl_getcl() after
1887	 * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1888	 * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1889	 * explanation, courtesy of Alan Cox.
1890	 * What follows is a snippet from Alan Cox's email at:
1891	 * http://docs.FreeBSD.org/cgi/
1892	 *     mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1893	 *
1894	 * 1. Set MNTK_UNMOUNTF
1895	 * 2. Acquire a standard FreeBSD mutex "m".
1896	 * 3. Update some data structures.
1897	 * 4. Release mutex "m".
1898	 *
1899	 * Then, other threads that acquire "m" after step 4 has occurred will
1900	 * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1901	 * step 2 may or may not see MNTK_UNMOUNTF as set.
1902	 */
1903	NFSLOCKCLSTATE();
1904	if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1905		fake_global++;
1906		NFSUNLOCKCLSTATE();
1907		NFSLOCKCLSTATE();
1908	}
1909
1910	clp = nmp->nm_clp;
1911	if (clp != NULL) {
1912		if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1913			panic("nfscl umount");
1914
1915		/*
1916		 * First, handshake with the nfscl renew thread, to terminate
1917		 * it.
1918		 */
1919		clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1920		while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1921			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1922			    "nfsclumnt", hz);
1923
1924		/*
1925		 * Now, get the exclusive lock on the client state, so
1926		 * that no uses of the state are still in progress.
1927		 */
1928		do {
1929			igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1930			    NFSCLSTATEMUTEXPTR, NULL);
1931		} while (!igotlock);
1932		NFSUNLOCKCLSTATE();
1933
1934		/*
1935		 * Free up all the state. It will expire on the server, but
1936		 * maybe we should do a SetClientId/SetClientIdConfirm so
1937		 * the server throws it away?
1938		 */
1939		LIST_REMOVE(clp, nfsc_list);
1940		nfscl_delegreturnall(clp, p);
1941		cred = newnfs_getcred();
1942		if (NFSHASNFSV4N(nmp)) {
1943			(void)nfsrpc_destroysession(nmp, clp, cred, p);
1944			(void)nfsrpc_destroyclient(nmp, clp, cred, p);
1945		} else
1946			(void)nfsrpc_setclient(nmp, clp, 0, cred, p);
1947		nfscl_cleanclient(clp);
1948		nmp->nm_clp = NULL;
1949		NFSFREECRED(cred);
1950		free(clp, M_NFSCLCLIENT);
1951	} else
1952		NFSUNLOCKCLSTATE();
1953}
1954
1955/*
1956 * This function is called when a server replies with NFSERR_STALECLIENTID
1957 * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
1958 * doing Opens and Locks with reclaim. If these fail, it deletes the
1959 * corresponding state.
1960 */
1961static void
1962nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p)
1963{
1964	struct nfsclowner *owp, *nowp;
1965	struct nfsclopen *op, *nop;
1966	struct nfscllockowner *lp, *nlp;
1967	struct nfscllock *lop, *nlop;
1968	struct nfscldeleg *dp, *ndp, *tdp;
1969	struct nfsmount *nmp;
1970	struct ucred *tcred;
1971	struct nfsclopenhead extra_open;
1972	struct nfscldeleghead extra_deleg;
1973	struct nfsreq *rep;
1974	u_int64_t len;
1975	u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
1976	int i, igotlock = 0, error, trycnt, firstlock;
1977	struct nfscllayout *lyp, *nlyp;
1978
1979	/*
1980	 * First, lock the client structure, so everyone else will
1981	 * block when trying to use state.
1982	 */
1983	NFSLOCKCLSTATE();
1984	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
1985	do {
1986		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1987		    NFSCLSTATEMUTEXPTR, NULL);
1988	} while (!igotlock);
1989	NFSUNLOCKCLSTATE();
1990
1991	nmp = clp->nfsc_nmp;
1992	if (nmp == NULL)
1993		panic("nfscl recover");
1994
1995	/*
1996	 * For now, just get rid of all layouts. There may be a need
1997	 * to do LayoutCommit Ops with reclaim == true later.
1998	 */
1999	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2000		nfscl_freelayout(lyp);
2001	TAILQ_INIT(&clp->nfsc_layout);
2002	for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2003		LIST_INIT(&clp->nfsc_layouthash[i]);
2004
2005	trycnt = 5;
2006	do {
2007		error = nfsrpc_setclient(nmp, clp, 1, cred, p);
2008	} while ((error == NFSERR_STALECLIENTID ||
2009	     error == NFSERR_BADSESSION ||
2010	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2011	if (error) {
2012		NFSLOCKCLSTATE();
2013		clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2014		    NFSCLFLAGS_RECVRINPROG);
2015		wakeup(&clp->nfsc_flags);
2016		nfsv4_unlock(&clp->nfsc_lock, 0);
2017		NFSUNLOCKCLSTATE();
2018		return;
2019	}
2020	clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2021	clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2022
2023	/*
2024	 * Mark requests already queued on the server, so that they don't
2025	 * initiate another recovery cycle. Any requests already in the
2026	 * queue that handle state information will have the old stale
2027	 * clientid/stateid and will get a NFSERR_STALESTATEID,
2028	 * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2029	 * This will be translated to NFSERR_STALEDONTRECOVER when
2030	 * R_DONTRECOVER is set.
2031	 */
2032	NFSLOCKREQ();
2033	TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2034		if (rep->r_nmp == nmp)
2035			rep->r_flags |= R_DONTRECOVER;
2036	}
2037	NFSUNLOCKREQ();
2038
2039	/*
2040	 * Now, mark all delegations "need reclaim".
2041	 */
2042	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2043		dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2044
2045	TAILQ_INIT(&extra_deleg);
2046	LIST_INIT(&extra_open);
2047	/*
2048	 * Now traverse the state lists, doing Open and Lock Reclaims.
2049	 */
2050	tcred = newnfs_getcred();
2051	owp = LIST_FIRST(&clp->nfsc_owner);
2052	while (owp != NULL) {
2053	    nowp = LIST_NEXT(owp, nfsow_list);
2054	    owp->nfsow_seqid = 0;
2055	    op = LIST_FIRST(&owp->nfsow_open);
2056	    while (op != NULL) {
2057		nop = LIST_NEXT(op, nfso_list);
2058		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2059		    /* Search for a delegation to reclaim with the open */
2060		    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2061			if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2062			    continue;
2063			if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2064			    mode = NFSV4OPEN_ACCESSWRITE;
2065			    delegtype = NFSV4OPEN_DELEGATEWRITE;
2066			} else {
2067			    mode = NFSV4OPEN_ACCESSREAD;
2068			    delegtype = NFSV4OPEN_DELEGATEREAD;
2069			}
2070			if ((op->nfso_mode & mode) == mode &&
2071			    op->nfso_fhlen == dp->nfsdl_fhlen &&
2072			    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2073			    break;
2074		    }
2075		    ndp = dp;
2076		    if (dp == NULL)
2077			delegtype = NFSV4OPEN_DELEGATENONE;
2078		    newnfs_copycred(&op->nfso_cred, tcred);
2079		    error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2080			op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2081			op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2082			tcred, p);
2083		    if (!error) {
2084			/* Handle any replied delegation */
2085			if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2086			    || NFSMNT_RDONLY(nmp->nm_mountp))) {
2087			    if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2088				mode = NFSV4OPEN_ACCESSWRITE;
2089			    else
2090				mode = NFSV4OPEN_ACCESSREAD;
2091			    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2092				if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2093				    continue;
2094				if ((op->nfso_mode & mode) == mode &&
2095				    op->nfso_fhlen == dp->nfsdl_fhlen &&
2096				    !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2097				    op->nfso_fhlen)) {
2098				    dp->nfsdl_stateid = ndp->nfsdl_stateid;
2099				    dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2100				    dp->nfsdl_ace = ndp->nfsdl_ace;
2101				    dp->nfsdl_change = ndp->nfsdl_change;
2102				    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2103				    if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2104					dp->nfsdl_flags |= NFSCLDL_RECALL;
2105				    FREE((caddr_t)ndp, M_NFSCLDELEG);
2106				    ndp = NULL;
2107				    break;
2108				}
2109			    }
2110			}
2111			if (ndp != NULL)
2112			    TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2113
2114			/* and reclaim all byte range locks */
2115			lp = LIST_FIRST(&op->nfso_lock);
2116			while (lp != NULL) {
2117			    nlp = LIST_NEXT(lp, nfsl_list);
2118			    lp->nfsl_seqid = 0;
2119			    firstlock = 1;
2120			    lop = LIST_FIRST(&lp->nfsl_lock);
2121			    while (lop != NULL) {
2122				nlop = LIST_NEXT(lop, nfslo_list);
2123				if (lop->nfslo_end == NFS64BITSSET)
2124				    len = NFS64BITSSET;
2125				else
2126				    len = lop->nfslo_end - lop->nfslo_first;
2127				error = nfscl_trylock(nmp, NULL,
2128				    op->nfso_fh, op->nfso_fhlen, lp,
2129				    firstlock, 1, lop->nfslo_first, len,
2130				    lop->nfslo_type, tcred, p);
2131				if (error != 0)
2132				    nfscl_freelock(lop, 0);
2133				else
2134				    firstlock = 0;
2135				lop = nlop;
2136			    }
2137			    /* If no locks, but a lockowner, just delete it. */
2138			    if (LIST_EMPTY(&lp->nfsl_lock))
2139				nfscl_freelockowner(lp, 0);
2140			    lp = nlp;
2141			}
2142		    }
2143		}
2144		if (error != 0 && error != NFSERR_BADSESSION)
2145		    nfscl_freeopen(op, 0);
2146		op = nop;
2147	    }
2148	    owp = nowp;
2149	}
2150
2151	/*
2152	 * Now, try and get any delegations not yet reclaimed by cobbling
2153	 * to-gether an appropriate open.
2154	 */
2155	nowp = NULL;
2156	dp = TAILQ_FIRST(&clp->nfsc_deleg);
2157	while (dp != NULL) {
2158	    ndp = TAILQ_NEXT(dp, nfsdl_list);
2159	    if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2160		if (nowp == NULL) {
2161		    MALLOC(nowp, struct nfsclowner *,
2162			sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2163		    /*
2164		     * Name must be as long an largest possible
2165		     * NFSV4CL_LOCKNAMELEN. 12 for now.
2166		     */
2167		    NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2168			NFSV4CL_LOCKNAMELEN);
2169		    LIST_INIT(&nowp->nfsow_open);
2170		    nowp->nfsow_clp = clp;
2171		    nowp->nfsow_seqid = 0;
2172		    nowp->nfsow_defunct = 0;
2173		    nfscl_lockinit(&nowp->nfsow_rwlock);
2174		}
2175		nop = NULL;
2176		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2177		    MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) +
2178			dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2179		    nop->nfso_own = nowp;
2180		    if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2181			nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2182			delegtype = NFSV4OPEN_DELEGATEWRITE;
2183		    } else {
2184			nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2185			delegtype = NFSV4OPEN_DELEGATEREAD;
2186		    }
2187		    nop->nfso_opencnt = 0;
2188		    nop->nfso_posixlock = 1;
2189		    nop->nfso_fhlen = dp->nfsdl_fhlen;
2190		    NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2191		    LIST_INIT(&nop->nfso_lock);
2192		    nop->nfso_stateid.seqid = 0;
2193		    nop->nfso_stateid.other[0] = 0;
2194		    nop->nfso_stateid.other[1] = 0;
2195		    nop->nfso_stateid.other[2] = 0;
2196		    newnfs_copycred(&dp->nfsdl_cred, tcred);
2197		    newnfs_copyincred(tcred, &nop->nfso_cred);
2198		    tdp = NULL;
2199		    error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2200			nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2201			nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2202			delegtype, tcred, p);
2203		    if (tdp != NULL) {
2204			if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2205			    mode = NFSV4OPEN_ACCESSWRITE;
2206			else
2207			    mode = NFSV4OPEN_ACCESSREAD;
2208			if ((nop->nfso_mode & mode) == mode &&
2209			    nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2210			    !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2211			    nop->nfso_fhlen)) {
2212			    dp->nfsdl_stateid = tdp->nfsdl_stateid;
2213			    dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2214			    dp->nfsdl_ace = tdp->nfsdl_ace;
2215			    dp->nfsdl_change = tdp->nfsdl_change;
2216			    dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2217			    if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2218				dp->nfsdl_flags |= NFSCLDL_RECALL;
2219			    FREE((caddr_t)tdp, M_NFSCLDELEG);
2220			} else {
2221			    TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2222			}
2223		    }
2224		}
2225		if (error) {
2226		    if (nop != NULL)
2227			FREE((caddr_t)nop, M_NFSCLOPEN);
2228		    /*
2229		     * Couldn't reclaim it, so throw the state
2230		     * away. Ouch!!
2231		     */
2232		    nfscl_cleandeleg(dp);
2233		    nfscl_freedeleg(&clp->nfsc_deleg, dp);
2234		} else {
2235		    LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2236		}
2237	    }
2238	    dp = ndp;
2239	}
2240
2241	/*
2242	 * Now, get rid of extra Opens and Delegations.
2243	 */
2244	LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2245		do {
2246			newnfs_copycred(&op->nfso_cred, tcred);
2247			error = nfscl_tryclose(op, tcred, nmp, p);
2248			if (error == NFSERR_GRACE)
2249				(void) nfs_catnap(PZERO, error, "nfsexcls");
2250		} while (error == NFSERR_GRACE);
2251		LIST_REMOVE(op, nfso_list);
2252		FREE((caddr_t)op, M_NFSCLOPEN);
2253	}
2254	if (nowp != NULL)
2255		FREE((caddr_t)nowp, M_NFSCLOWNER);
2256
2257	TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2258		do {
2259			newnfs_copycred(&dp->nfsdl_cred, tcred);
2260			error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2261			if (error == NFSERR_GRACE)
2262				(void) nfs_catnap(PZERO, error, "nfsexdlg");
2263		} while (error == NFSERR_GRACE);
2264		TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2265		FREE((caddr_t)dp, M_NFSCLDELEG);
2266	}
2267
2268	/* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2269	if (NFSHASNFSV4N(nmp))
2270		(void)nfsrpc_reclaimcomplete(nmp, cred, p);
2271
2272	NFSLOCKCLSTATE();
2273	clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2274	wakeup(&clp->nfsc_flags);
2275	nfsv4_unlock(&clp->nfsc_lock, 0);
2276	NFSUNLOCKCLSTATE();
2277	NFSFREECRED(tcred);
2278}
2279
2280/*
2281 * This function is called when a server replies with NFSERR_EXPIRED.
2282 * It deletes all state for the client and does a fresh SetClientId/confirm.
2283 * XXX Someday it should post a signal to the process(es) that hold the
2284 * state, so they know that lock state has been lost.
2285 */
2286APPLESTATIC int
2287nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2288{
2289	struct nfsmount *nmp;
2290	struct ucred *cred;
2291	int igotlock = 0, error, trycnt;
2292
2293	/*
2294	 * If the clientid has gone away or a new SetClientid has already
2295	 * been done, just return ok.
2296	 */
2297	if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2298		return (0);
2299
2300	/*
2301	 * First, lock the client structure, so everyone else will
2302	 * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2303	 * that only one thread does the work.
2304	 */
2305	NFSLOCKCLSTATE();
2306	clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2307	do {
2308		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2309		    NFSCLSTATEMUTEXPTR, NULL);
2310	} while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2311	if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2312		if (igotlock)
2313			nfsv4_unlock(&clp->nfsc_lock, 0);
2314		NFSUNLOCKCLSTATE();
2315		return (0);
2316	}
2317	clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2318	NFSUNLOCKCLSTATE();
2319
2320	nmp = clp->nfsc_nmp;
2321	if (nmp == NULL)
2322		panic("nfscl expired");
2323	cred = newnfs_getcred();
2324	trycnt = 5;
2325	do {
2326		error = nfsrpc_setclient(nmp, clp, 0, cred, p);
2327	} while ((error == NFSERR_STALECLIENTID ||
2328	     error == NFSERR_BADSESSION ||
2329	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2330	if (error) {
2331		NFSLOCKCLSTATE();
2332		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2333	} else {
2334		/*
2335		 * Expire the state for the client.
2336		 */
2337		nfscl_expireclient(clp, nmp, cred, p);
2338		NFSLOCKCLSTATE();
2339		clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2340		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2341	}
2342	clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2343	wakeup(&clp->nfsc_flags);
2344	nfsv4_unlock(&clp->nfsc_lock, 0);
2345	NFSUNLOCKCLSTATE();
2346	NFSFREECRED(cred);
2347	return (error);
2348}
2349
2350/*
2351 * This function inserts a lock in the list after insert_lop.
2352 */
2353static void
2354nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2355    struct nfscllock *insert_lop, int local)
2356{
2357
2358	if ((struct nfscllockowner *)insert_lop == lp)
2359		LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2360	else
2361		LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2362	if (local)
2363		newnfsstats.cllocallocks++;
2364	else
2365		newnfsstats.cllocks++;
2366}
2367
2368/*
2369 * This function updates the locking for a lock owner and given file. It
2370 * maintains a list of lock ranges ordered on increasing file offset that
2371 * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2372 * It always adds new_lop to the list and sometimes uses the one pointed
2373 * at by other_lopp.
2374 * Returns 1 if the locks were modified, 0 otherwise.
2375 */
2376static int
2377nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2378    struct nfscllock **other_lopp, int local)
2379{
2380	struct nfscllock *new_lop = *new_lopp;
2381	struct nfscllock *lop, *tlop, *ilop;
2382	struct nfscllock *other_lop;
2383	int unlock = 0, modified = 0;
2384	u_int64_t tmp;
2385
2386	/*
2387	 * Work down the list until the lock is merged.
2388	 */
2389	if (new_lop->nfslo_type == F_UNLCK)
2390		unlock = 1;
2391	ilop = (struct nfscllock *)lp;
2392	lop = LIST_FIRST(&lp->nfsl_lock);
2393	while (lop != NULL) {
2394	    /*
2395	     * Only check locks for this file that aren't before the start of
2396	     * new lock's range.
2397	     */
2398	    if (lop->nfslo_end >= new_lop->nfslo_first) {
2399		if (new_lop->nfslo_end < lop->nfslo_first) {
2400		    /*
2401		     * If the new lock ends before the start of the
2402		     * current lock's range, no merge, just insert
2403		     * the new lock.
2404		     */
2405		    break;
2406		}
2407		if (new_lop->nfslo_type == lop->nfslo_type ||
2408		    (new_lop->nfslo_first <= lop->nfslo_first &&
2409		     new_lop->nfslo_end >= lop->nfslo_end)) {
2410		    /*
2411		     * This lock can be absorbed by the new lock/unlock.
2412		     * This happens when it covers the entire range
2413		     * of the old lock or is contiguous
2414		     * with the old lock and is of the same type or an
2415		     * unlock.
2416		     */
2417		    if (new_lop->nfslo_type != lop->nfslo_type ||
2418			new_lop->nfslo_first != lop->nfslo_first ||
2419			new_lop->nfslo_end != lop->nfslo_end)
2420			modified = 1;
2421		    if (lop->nfslo_first < new_lop->nfslo_first)
2422			new_lop->nfslo_first = lop->nfslo_first;
2423		    if (lop->nfslo_end > new_lop->nfslo_end)
2424			new_lop->nfslo_end = lop->nfslo_end;
2425		    tlop = lop;
2426		    lop = LIST_NEXT(lop, nfslo_list);
2427		    nfscl_freelock(tlop, local);
2428		    continue;
2429		}
2430
2431		/*
2432		 * All these cases are for contiguous locks that are not the
2433		 * same type, so they can't be merged.
2434		 */
2435		if (new_lop->nfslo_first <= lop->nfslo_first) {
2436		    /*
2437		     * This case is where the new lock overlaps with the
2438		     * first part of the old lock. Move the start of the
2439		     * old lock to just past the end of the new lock. The
2440		     * new lock will be inserted in front of the old, since
2441		     * ilop hasn't been updated. (We are done now.)
2442		     */
2443		    if (lop->nfslo_first != new_lop->nfslo_end) {
2444			lop->nfslo_first = new_lop->nfslo_end;
2445			modified = 1;
2446		    }
2447		    break;
2448		}
2449		if (new_lop->nfslo_end >= lop->nfslo_end) {
2450		    /*
2451		     * This case is where the new lock overlaps with the
2452		     * end of the old lock's range. Move the old lock's
2453		     * end to just before the new lock's first and insert
2454		     * the new lock after the old lock.
2455		     * Might not be done yet, since the new lock could
2456		     * overlap further locks with higher ranges.
2457		     */
2458		    if (lop->nfslo_end != new_lop->nfslo_first) {
2459			lop->nfslo_end = new_lop->nfslo_first;
2460			modified = 1;
2461		    }
2462		    ilop = lop;
2463		    lop = LIST_NEXT(lop, nfslo_list);
2464		    continue;
2465		}
2466		/*
2467		 * The final case is where the new lock's range is in the
2468		 * middle of the current lock's and splits the current lock
2469		 * up. Use *other_lopp to handle the second part of the
2470		 * split old lock range. (We are done now.)
2471		 * For unlock, we use new_lop as other_lop and tmp, since
2472		 * other_lop and new_lop are the same for this case.
2473		 * We noted the unlock case above, so we don't need
2474		 * new_lop->nfslo_type any longer.
2475		 */
2476		tmp = new_lop->nfslo_first;
2477		if (unlock) {
2478		    other_lop = new_lop;
2479		    *new_lopp = NULL;
2480		} else {
2481		    other_lop = *other_lopp;
2482		    *other_lopp = NULL;
2483		}
2484		other_lop->nfslo_first = new_lop->nfslo_end;
2485		other_lop->nfslo_end = lop->nfslo_end;
2486		other_lop->nfslo_type = lop->nfslo_type;
2487		lop->nfslo_end = tmp;
2488		nfscl_insertlock(lp, other_lop, lop, local);
2489		ilop = lop;
2490		modified = 1;
2491		break;
2492	    }
2493	    ilop = lop;
2494	    lop = LIST_NEXT(lop, nfslo_list);
2495	    if (lop == NULL)
2496		break;
2497	}
2498
2499	/*
2500	 * Insert the new lock in the list at the appropriate place.
2501	 */
2502	if (!unlock) {
2503		nfscl_insertlock(lp, new_lop, ilop, local);
2504		*new_lopp = NULL;
2505		modified = 1;
2506	}
2507	return (modified);
2508}
2509
2510/*
2511 * This function must be run as a kernel thread.
2512 * It does Renew Ops and recovery, when required.
2513 */
2514APPLESTATIC void
2515nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2516{
2517	struct nfsclowner *owp, *nowp;
2518	struct nfsclopen *op;
2519	struct nfscllockowner *lp, *nlp;
2520	struct nfscldeleghead dh;
2521	struct nfscldeleg *dp, *ndp;
2522	struct ucred *cred;
2523	u_int32_t clidrev;
2524	int error, cbpathdown, islept, igotlock, ret, clearok;
2525	uint32_t recover_done_time = 0;
2526	time_t mytime;
2527	static time_t prevsec = 0;
2528	struct nfscllockownerfh *lfhp, *nlfhp;
2529	struct nfscllockownerfhhead lfh;
2530	struct nfscllayout *lyp, *nlyp;
2531	struct nfscldevinfo *dip, *ndip;
2532	struct nfscllayouthead rlh;
2533	struct nfsclrecalllayout *recallp;
2534	struct nfsclds *dsp;
2535
2536	cred = newnfs_getcred();
2537	NFSLOCKCLSTATE();
2538	clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2539	NFSUNLOCKCLSTATE();
2540	for(;;) {
2541		newnfs_setroot(cred);
2542		cbpathdown = 0;
2543		if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2544			/*
2545			 * Only allow one recover within 1/2 of the lease
2546			 * duration (nfsc_renew).
2547			 */
2548			if (recover_done_time < NFSD_MONOSEC) {
2549				recover_done_time = NFSD_MONOSEC +
2550				    clp->nfsc_renew;
2551				NFSCL_DEBUG(1, "Doing recovery..\n");
2552				nfscl_recover(clp, cred, p);
2553			} else {
2554				NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n",
2555				    recover_done_time, (intmax_t)NFSD_MONOSEC);
2556				NFSLOCKCLSTATE();
2557				clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2558				NFSUNLOCKCLSTATE();
2559			}
2560		}
2561		if (clp->nfsc_expire <= NFSD_MONOSEC &&
2562		    (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2563			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2564			clidrev = clp->nfsc_clientidrev;
2565			error = nfsrpc_renew(clp, NULL, cred, p);
2566			if (error == NFSERR_CBPATHDOWN)
2567			    cbpathdown = 1;
2568			else if (error == NFSERR_STALECLIENTID ||
2569			    error == NFSERR_BADSESSION) {
2570			    NFSLOCKCLSTATE();
2571			    clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2572			    NFSUNLOCKCLSTATE();
2573			} else if (error == NFSERR_EXPIRED)
2574			    (void) nfscl_hasexpired(clp, clidrev, p);
2575		}
2576
2577checkdsrenew:
2578		if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2579			/* Do renews for any DS sessions. */
2580			NFSLOCKMNT(clp->nfsc_nmp);
2581			/* Skip first entry, since the MDS is handled above. */
2582			dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2583			if (dsp != NULL)
2584				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2585			while (dsp != NULL) {
2586				if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2587				    dsp->nfsclds_sess.nfsess_defunct == 0) {
2588					dsp->nfsclds_expire = NFSD_MONOSEC +
2589					    clp->nfsc_renew;
2590					NFSUNLOCKMNT(clp->nfsc_nmp);
2591					(void)nfsrpc_renew(clp, dsp, cred, p);
2592					goto checkdsrenew;
2593				}
2594				dsp = TAILQ_NEXT(dsp, nfsclds_list);
2595			}
2596			NFSUNLOCKMNT(clp->nfsc_nmp);
2597		}
2598
2599		TAILQ_INIT(&dh);
2600		NFSLOCKCLSTATE();
2601		if (cbpathdown)
2602			/* It's a Total Recall! */
2603			nfscl_totalrecall(clp);
2604
2605		/*
2606		 * Now, handle defunct owners.
2607		 */
2608		LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2609			if (LIST_EMPTY(&owp->nfsow_open)) {
2610				if (owp->nfsow_defunct != 0)
2611					nfscl_freeopenowner(owp, 0);
2612			}
2613		}
2614
2615		/*
2616		 * Do the recall on any delegations. To avoid trouble, always
2617		 * come back up here after having slept.
2618		 */
2619		igotlock = 0;
2620tryagain:
2621		dp = TAILQ_FIRST(&clp->nfsc_deleg);
2622		while (dp != NULL) {
2623			ndp = TAILQ_NEXT(dp, nfsdl_list);
2624			if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2625				/*
2626				 * Wait for outstanding I/O ops to be done.
2627				 */
2628				if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2629				    if (igotlock) {
2630					nfsv4_unlock(&clp->nfsc_lock, 0);
2631					igotlock = 0;
2632				    }
2633				    dp->nfsdl_rwlock.nfslock_lock |=
2634					NFSV4LOCK_WANTED;
2635				    (void) nfsmsleep(&dp->nfsdl_rwlock,
2636					NFSCLSTATEMUTEXPTR, PZERO, "nfscld",
2637					NULL);
2638				    goto tryagain;
2639				}
2640				while (!igotlock) {
2641				    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2642					&islept, NFSCLSTATEMUTEXPTR, NULL);
2643				    if (islept)
2644					goto tryagain;
2645				}
2646				NFSUNLOCKCLSTATE();
2647				newnfs_copycred(&dp->nfsdl_cred, cred);
2648				ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2649				    NULL, cred, p, 1);
2650				if (!ret) {
2651				    nfscl_cleandeleg(dp);
2652				    TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2653					nfsdl_list);
2654				    LIST_REMOVE(dp, nfsdl_hash);
2655				    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2656				    nfscl_delegcnt--;
2657				    newnfsstats.cldelegates--;
2658				}
2659				NFSLOCKCLSTATE();
2660			}
2661			dp = ndp;
2662		}
2663
2664		/*
2665		 * Clear out old delegations, if we are above the high water
2666		 * mark. Only clear out ones with no state related to them.
2667		 * The tailq list is in LRU order.
2668		 */
2669		dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2670		while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2671		    ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2672		    if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2673			dp->nfsdl_rwlock.nfslock_lock == 0 &&
2674			dp->nfsdl_timestamp < NFSD_MONOSEC &&
2675			(dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2676			  NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2677			clearok = 1;
2678			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2679			    op = LIST_FIRST(&owp->nfsow_open);
2680			    if (op != NULL) {
2681				clearok = 0;
2682				break;
2683			    }
2684			}
2685			if (clearok) {
2686			    LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2687				if (!LIST_EMPTY(&lp->nfsl_lock)) {
2688				    clearok = 0;
2689				    break;
2690				}
2691			    }
2692			}
2693			if (clearok) {
2694			    TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2695			    LIST_REMOVE(dp, nfsdl_hash);
2696			    TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2697			    nfscl_delegcnt--;
2698			    newnfsstats.cldelegates--;
2699			}
2700		    }
2701		    dp = ndp;
2702		}
2703		if (igotlock)
2704			nfsv4_unlock(&clp->nfsc_lock, 0);
2705
2706		/*
2707		 * Do the recall on any layouts. To avoid trouble, always
2708		 * come back up here after having slept.
2709		 */
2710		TAILQ_INIT(&rlh);
2711tryagain2:
2712		TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2713			if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2714				/*
2715				 * Wait for outstanding I/O ops to be done.
2716				 */
2717				if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2718				    (lyp->nfsly_lock.nfslock_lock &
2719				     NFSV4LOCK_LOCK) != 0) {
2720					lyp->nfsly_lock.nfslock_lock |=
2721					    NFSV4LOCK_WANTED;
2722					(void)nfsmsleep(&lyp->nfsly_lock,
2723					    NFSCLSTATEMUTEXPTR, PZERO, "nfslyp",
2724					    NULL);
2725					goto tryagain2;
2726				}
2727				/* Move the layout to the recall list. */
2728				TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2729				    nfsly_list);
2730				LIST_REMOVE(lyp, nfsly_hash);
2731				TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2732
2733				/* Handle any layout commits. */
2734				if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2735				    (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2736					lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2737					NFSUNLOCKCLSTATE();
2738					NFSCL_DEBUG(3, "do layoutcommit\n");
2739					nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2740					    cred, p);
2741					NFSLOCKCLSTATE();
2742					goto tryagain2;
2743				}
2744			}
2745		}
2746
2747		/* Now, look for stale layouts. */
2748		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2749		while (lyp != NULL) {
2750			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2751			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2752			    (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2753			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
2754			    lyp->nfsly_lock.nfslock_lock == 0) {
2755				NFSCL_DEBUG(4, "ret stale lay=%d\n",
2756				    nfscl_layoutcnt);
2757				recallp = malloc(sizeof(*recallp),
2758				    M_NFSLAYRECALL, M_NOWAIT);
2759				if (recallp == NULL)
2760					break;
2761				(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2762				    lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2763				    lyp->nfsly_stateid.seqid, recallp);
2764			}
2765			lyp = nlyp;
2766		}
2767
2768		/*
2769		 * Free up any unreferenced device info structures.
2770		 */
2771		LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2772			if (dip->nfsdi_layoutrefs == 0 &&
2773			    dip->nfsdi_refcnt == 0) {
2774				NFSCL_DEBUG(4, "freeing devinfo\n");
2775				LIST_REMOVE(dip, nfsdi_list);
2776				nfscl_freedevinfo(dip);
2777			}
2778		}
2779		NFSUNLOCKCLSTATE();
2780
2781		/* Do layout return(s), as required. */
2782		TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2783			TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2784			NFSCL_DEBUG(4, "ret layout\n");
2785			nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2786			nfscl_freelayout(lyp);
2787		}
2788
2789		/*
2790		 * Delegreturn any delegations cleaned out or recalled.
2791		 */
2792		TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2793			newnfs_copycred(&dp->nfsdl_cred, cred);
2794			(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2795			TAILQ_REMOVE(&dh, dp, nfsdl_list);
2796			FREE((caddr_t)dp, M_NFSCLDELEG);
2797		}
2798
2799		SLIST_INIT(&lfh);
2800		/*
2801		 * Call nfscl_cleanupkext() once per second to check for
2802		 * open/lock owners where the process has exited.
2803		 */
2804		mytime = NFSD_MONOSEC;
2805		if (prevsec != mytime) {
2806			prevsec = mytime;
2807			nfscl_cleanupkext(clp, &lfh);
2808		}
2809
2810		/*
2811		 * Do a ReleaseLockOwner for all lock owners where the
2812		 * associated process no longer exists, as found by
2813		 * nfscl_cleanupkext().
2814		 */
2815		newnfs_setroot(cred);
2816		SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2817			LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2818			    nlp) {
2819				(void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2820				    lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2821				    p);
2822				nfscl_freelockowner(lp, 0);
2823			}
2824			free(lfhp, M_TEMP);
2825		}
2826		SLIST_INIT(&lfh);
2827
2828		NFSLOCKCLSTATE();
2829		if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2830			(void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2831			    hz);
2832		if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2833			clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2834			NFSUNLOCKCLSTATE();
2835			NFSFREECRED(cred);
2836			wakeup((caddr_t)clp);
2837			return;
2838		}
2839		NFSUNLOCKCLSTATE();
2840	}
2841}
2842
2843/*
2844 * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2845 * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2846 */
2847APPLESTATIC void
2848nfscl_initiate_recovery(struct nfsclclient *clp)
2849{
2850
2851	if (clp == NULL)
2852		return;
2853	NFSLOCKCLSTATE();
2854	clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2855	NFSUNLOCKCLSTATE();
2856	wakeup((caddr_t)clp);
2857}
2858
2859/*
2860 * Dump out the state stuff for debugging.
2861 */
2862APPLESTATIC void
2863nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2864    int lockowner, int locks)
2865{
2866	struct nfsclclient *clp;
2867	struct nfsclowner *owp;
2868	struct nfsclopen *op;
2869	struct nfscllockowner *lp;
2870	struct nfscllock *lop;
2871	struct nfscldeleg *dp;
2872
2873	clp = nmp->nm_clp;
2874	if (clp == NULL) {
2875		printf("nfscl dumpstate NULL clp\n");
2876		return;
2877	}
2878	NFSLOCKCLSTATE();
2879	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2880	  LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2881	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2882		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2883		    owp->nfsow_owner[0], owp->nfsow_owner[1],
2884		    owp->nfsow_owner[2], owp->nfsow_owner[3],
2885		    owp->nfsow_seqid);
2886	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2887		if (opens)
2888		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2889			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2890			op->nfso_stateid.other[2], op->nfso_opencnt,
2891			op->nfso_fh[12]);
2892		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2893		    if (lockowner)
2894			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
2895			    lp->nfsl_owner[0], lp->nfsl_owner[1],
2896			    lp->nfsl_owner[2], lp->nfsl_owner[3],
2897			    lp->nfsl_seqid,
2898			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
2899			    lp->nfsl_stateid.other[2]);
2900		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
2901			if (locks)
2902#ifdef __FreeBSD__
2903			    printf("lck typ=%d fst=%ju end=%ju\n",
2904				lop->nfslo_type, (intmax_t)lop->nfslo_first,
2905				(intmax_t)lop->nfslo_end);
2906#else
2907			    printf("lck typ=%d fst=%qd end=%qd\n",
2908				lop->nfslo_type, lop->nfslo_first,
2909				lop->nfslo_end);
2910#endif
2911		    }
2912		}
2913	    }
2914	  }
2915	}
2916	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
2917	    if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2918		printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2919		    owp->nfsow_owner[0], owp->nfsow_owner[1],
2920		    owp->nfsow_owner[2], owp->nfsow_owner[3],
2921		    owp->nfsow_seqid);
2922	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2923		if (opens)
2924		    printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2925			op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2926			op->nfso_stateid.other[2], op->nfso_opencnt,
2927			op->nfso_fh[12]);
2928		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2929		    if (lockowner)
2930			printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
2931			    lp->nfsl_owner[0], lp->nfsl_owner[1],
2932			    lp->nfsl_owner[2], lp->nfsl_owner[3],
2933			    lp->nfsl_seqid,
2934			    lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
2935			    lp->nfsl_stateid.other[2]);
2936		    LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
2937			if (locks)
2938#ifdef __FreeBSD__
2939			    printf("lck typ=%d fst=%ju end=%ju\n",
2940				lop->nfslo_type, (intmax_t)lop->nfslo_first,
2941				(intmax_t)lop->nfslo_end);
2942#else
2943			    printf("lck typ=%d fst=%qd end=%qd\n",
2944				lop->nfslo_type, lop->nfslo_first,
2945				lop->nfslo_end);
2946#endif
2947		    }
2948		}
2949	    }
2950	}
2951	NFSUNLOCKCLSTATE();
2952}
2953
2954/*
2955 * Check for duplicate open owners and opens.
2956 * (Only used as a diagnostic aid.)
2957 */
2958APPLESTATIC void
2959nfscl_dupopen(vnode_t vp, int dupopens)
2960{
2961	struct nfsclclient *clp;
2962	struct nfsclowner *owp, *owp2;
2963	struct nfsclopen *op, *op2;
2964	struct nfsfh *nfhp;
2965
2966	clp = VFSTONFS(vnode_mount(vp))->nm_clp;
2967	if (clp == NULL) {
2968		printf("nfscl dupopen NULL clp\n");
2969		return;
2970	}
2971	nfhp = VTONFS(vp)->n_fhp;
2972	NFSLOCKCLSTATE();
2973
2974	/*
2975	 * First, search for duplicate owners.
2976	 * These should never happen!
2977	 */
2978	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
2979	    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
2980		if (owp != owp2 &&
2981		    !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
2982		    NFSV4CL_LOCKNAMELEN)) {
2983			NFSUNLOCKCLSTATE();
2984			printf("DUP OWNER\n");
2985			nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0);
2986			return;
2987		}
2988	    }
2989	}
2990
2991	/*
2992	 * Now, search for duplicate stateids.
2993	 * These shouldn't happen, either.
2994	 */
2995	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
2996	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
2997		LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
2998		    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2999			if (op != op2 &&
3000			    (op->nfso_stateid.other[0] != 0 ||
3001			     op->nfso_stateid.other[1] != 0 ||
3002			     op->nfso_stateid.other[2] != 0) &&
3003			    op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3004			    op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3005			    op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3006			    NFSUNLOCKCLSTATE();
3007			    printf("DUP STATEID\n");
3008			    nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0,
3009				0);
3010			    return;
3011			}
3012		    }
3013		}
3014	    }
3015	}
3016
3017	/*
3018	 * Now search for duplicate opens.
3019	 * Duplicate opens for the same owner
3020	 * should never occur. Other duplicates are
3021	 * possible and are checked for if "dupopens"
3022	 * is true.
3023	 */
3024	LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3025	    LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3026		if (nfhp->nfh_len == op2->nfso_fhlen &&
3027		    !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3028		    LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3029			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3030			    if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3031				!NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3032				(!NFSBCMP(op->nfso_own->nfsow_owner,
3033				 op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3034				 dupopens)) {
3035				if (!NFSBCMP(op->nfso_own->nfsow_owner,
3036				    op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3037				    NFSUNLOCKCLSTATE();
3038				    printf("BADDUP OPEN\n");
3039				} else {
3040				    NFSUNLOCKCLSTATE();
3041				    printf("DUP OPEN\n");
3042				}
3043				nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1,
3044				    0, 0);
3045				return;
3046			    }
3047			}
3048		    }
3049		}
3050	    }
3051	}
3052	NFSUNLOCKCLSTATE();
3053}
3054
3055/*
3056 * During close, find an open that needs to be dereferenced and
3057 * dereference it. If there are no more opens for this file,
3058 * log a message to that effect.
3059 * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3060 * on the file's vnode.
3061 * This is the safe way, since it is difficult to identify
3062 * which open the close is for and I/O can be performed after the
3063 * close(2) system call when a file is mmap'd.
3064 * If it returns 0 for success, there will be a referenced
3065 * clp returned via clpp.
3066 */
3067APPLESTATIC int
3068nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3069{
3070	struct nfsclclient *clp;
3071	struct nfsclowner *owp;
3072	struct nfsclopen *op;
3073	struct nfscldeleg *dp;
3074	struct nfsfh *nfhp;
3075	int error, notdecr;
3076
3077	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp);
3078	if (error)
3079		return (error);
3080	*clpp = clp;
3081
3082	nfhp = VTONFS(vp)->n_fhp;
3083	notdecr = 1;
3084	NFSLOCKCLSTATE();
3085	/*
3086	 * First, look for one under a delegation that was locally issued
3087	 * and just decrement the opencnt for it. Since all my Opens against
3088	 * the server are DENY_NONE, I don't see a problem with hanging
3089	 * onto them. (It is much easier to use one of the extant Opens
3090	 * that I already have on the server when a Delegation is recalled
3091	 * than to do fresh Opens.) Someday, I might need to rethink this, but.
3092	 */
3093	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3094	if (dp != NULL) {
3095		LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3096			op = LIST_FIRST(&owp->nfsow_open);
3097			if (op != NULL) {
3098				/*
3099				 * Since a delegation is for a file, there
3100				 * should never be more than one open for
3101				 * each openowner.
3102				 */
3103				if (LIST_NEXT(op, nfso_list) != NULL)
3104					panic("nfscdeleg opens");
3105				if (notdecr && op->nfso_opencnt > 0) {
3106					notdecr = 0;
3107					op->nfso_opencnt--;
3108					break;
3109				}
3110			}
3111		}
3112	}
3113
3114	/* Now process the opens against the server. */
3115	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3116		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3117			if (op->nfso_fhlen == nfhp->nfh_len &&
3118			    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3119			    nfhp->nfh_len)) {
3120				/* Found an open, decrement cnt if possible */
3121				if (notdecr && op->nfso_opencnt > 0) {
3122					notdecr = 0;
3123					op->nfso_opencnt--;
3124				}
3125				/*
3126				 * There are more opens, so just return.
3127				 */
3128				if (op->nfso_opencnt > 0) {
3129					NFSUNLOCKCLSTATE();
3130					return (0);
3131				}
3132			}
3133		}
3134	}
3135	NFSUNLOCKCLSTATE();
3136	if (notdecr)
3137		printf("nfscl: never fnd open\n");
3138	return (0);
3139}
3140
3141APPLESTATIC int
3142nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3143{
3144	struct nfsclclient *clp;
3145	struct nfsclowner *owp, *nowp;
3146	struct nfsclopen *op;
3147	struct nfscldeleg *dp;
3148	struct nfsfh *nfhp;
3149	struct nfsclrecalllayout *recallp;
3150	int error;
3151
3152	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp);
3153	if (error)
3154		return (error);
3155	*clpp = clp;
3156
3157	nfhp = VTONFS(vp)->n_fhp;
3158	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3159	NFSLOCKCLSTATE();
3160	/*
3161	 * First get rid of the local Open structures, which should be no
3162	 * longer in use.
3163	 */
3164	dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3165	if (dp != NULL) {
3166		LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3167			op = LIST_FIRST(&owp->nfsow_open);
3168			if (op != NULL) {
3169				KASSERT((op->nfso_opencnt == 0),
3170				    ("nfscl: bad open cnt on deleg"));
3171				nfscl_freeopen(op, 1);
3172			}
3173			nfscl_freeopenowner(owp, 1);
3174		}
3175	}
3176
3177	/* Return any layouts marked return on close. */
3178	nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3179
3180	/* Now process the opens against the server. */
3181lookformore:
3182	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3183		op = LIST_FIRST(&owp->nfsow_open);
3184		while (op != NULL) {
3185			if (op->nfso_fhlen == nfhp->nfh_len &&
3186			    !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3187			    nfhp->nfh_len)) {
3188				/* Found an open, close it. */
3189				KASSERT((op->nfso_opencnt == 0),
3190				    ("nfscl: bad open cnt on server"));
3191				NFSUNLOCKCLSTATE();
3192				nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op,
3193				    p);
3194				NFSLOCKCLSTATE();
3195				goto lookformore;
3196			}
3197			op = LIST_NEXT(op, nfso_list);
3198		}
3199	}
3200	NFSUNLOCKCLSTATE();
3201	/*
3202	 * recallp has been set NULL by nfscl_retoncloselayout() if it was
3203	 * used by the function, but calling free() with a NULL pointer is ok.
3204	 */
3205	free(recallp, M_NFSLAYRECALL);
3206	return (0);
3207}
3208
3209/*
3210 * Return all delegations on this client.
3211 * (Must be called with client sleep lock.)
3212 */
3213static void
3214nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3215{
3216	struct nfscldeleg *dp, *ndp;
3217	struct ucred *cred;
3218
3219	cred = newnfs_getcred();
3220	TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3221		nfscl_cleandeleg(dp);
3222		(void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3223		nfscl_freedeleg(&clp->nfsc_deleg, dp);
3224	}
3225	NFSFREECRED(cred);
3226}
3227
3228/*
3229 * Do a callback RPC.
3230 */
3231APPLESTATIC void
3232nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3233{
3234	int clist, gotseq_ok, i, j, k, op, rcalls;
3235	u_int32_t *tl;
3236	struct nfsclclient *clp;
3237	struct nfscldeleg *dp = NULL;
3238	int numops, taglen = -1, error = 0, trunc;
3239	u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3240	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3241	vnode_t vp = NULL;
3242	struct nfsnode *np;
3243	struct vattr va;
3244	struct nfsfh *nfhp;
3245	mount_t mp;
3246	nfsattrbit_t attrbits, rattrbits;
3247	nfsv4stateid_t stateid;
3248	uint32_t seqid, slotid = 0, highslot, cachethis;
3249	uint8_t sessionid[NFSX_V4SESSIONID];
3250	struct mbuf *rep;
3251	struct nfscllayout *lyp;
3252	uint64_t filesid[2], len, off;
3253	int changed, gotone, laytype, recalltype;
3254	uint32_t iomode;
3255	struct nfsclrecalllayout *recallp = NULL;
3256	struct nfsclsession *tsep;
3257
3258	gotseq_ok = 0;
3259	nfsrvd_rephead(nd);
3260	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3261	taglen = fxdr_unsigned(int, *tl);
3262	if (taglen < 0) {
3263		error = EBADRPC;
3264		goto nfsmout;
3265	}
3266	if (taglen <= NFSV4_SMALLSTR)
3267		tagstr = tag;
3268	else
3269		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3270	error = nfsrv_mtostr(nd, tagstr, taglen);
3271	if (error) {
3272		if (taglen > NFSV4_SMALLSTR)
3273			free(tagstr, M_TEMP);
3274		taglen = -1;
3275		goto nfsmout;
3276	}
3277	(void) nfsm_strtom(nd, tag, taglen);
3278	if (taglen > NFSV4_SMALLSTR) {
3279		free(tagstr, M_TEMP);
3280	}
3281	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3282	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3283	minorvers = fxdr_unsigned(u_int32_t, *tl++);
3284	if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION)
3285		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3286	cbident = fxdr_unsigned(u_int32_t, *tl++);
3287	if (nd->nd_repstat)
3288		numops = 0;
3289	else
3290		numops = fxdr_unsigned(int, *tl);
3291	/*
3292	 * Loop around doing the sub ops.
3293	 */
3294	for (i = 0; i < numops; i++) {
3295		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3296		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3297		*repp++ = *tl;
3298		op = fxdr_unsigned(int, *tl);
3299		if (op < NFSV4OP_CBGETATTR ||
3300		   (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3301		   (op > NFSV4OP_CBNOTIFYDEVID &&
3302		    minorvers == NFSV41_MINORVERSION)) {
3303		    nd->nd_repstat = NFSERR_OPILLEGAL;
3304		    *repp = nfscl_errmap(nd, minorvers);
3305		    retops++;
3306		    break;
3307		}
3308		nd->nd_procnum = op;
3309		if (op < NFSV4OP_CBNOPS)
3310			newnfsstats.cbrpccnt[nd->nd_procnum]++;
3311		switch (op) {
3312		case NFSV4OP_CBGETATTR:
3313			NFSCL_DEBUG(4, "cbgetattr\n");
3314			mp = NULL;
3315			vp = NULL;
3316			error = nfsm_getfh(nd, &nfhp);
3317			if (!error)
3318				error = nfsrv_getattrbits(nd, &attrbits,
3319				    NULL, NULL);
3320			if (error == 0 && i == 0 &&
3321			    minorvers != NFSV4_MINORVERSION)
3322				error = NFSERR_OPNOTINSESS;
3323			if (!error) {
3324				mp = nfscl_getmnt(minorvers, sessionid, cbident,
3325				    &clp);
3326				if (mp == NULL)
3327					error = NFSERR_SERVERFAULT;
3328			}
3329			if (!error) {
3330				error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3331				    nfhp->nfh_len, p, &np);
3332				if (!error)
3333					vp = NFSTOV(np);
3334			}
3335			if (!error) {
3336				NFSZERO_ATTRBIT(&rattrbits);
3337				NFSLOCKCLSTATE();
3338				dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3339				    nfhp->nfh_len);
3340				if (dp != NULL) {
3341					if (NFSISSET_ATTRBIT(&attrbits,
3342					    NFSATTRBIT_SIZE)) {
3343						if (vp != NULL)
3344							va.va_size = np->n_size;
3345						else
3346							va.va_size =
3347							    dp->nfsdl_size;
3348						NFSSETBIT_ATTRBIT(&rattrbits,
3349						    NFSATTRBIT_SIZE);
3350					}
3351					if (NFSISSET_ATTRBIT(&attrbits,
3352					    NFSATTRBIT_CHANGE)) {
3353						va.va_filerev =
3354						    dp->nfsdl_change;
3355						if (vp == NULL ||
3356						    (np->n_flag & NDELEGMOD))
3357							va.va_filerev++;
3358						NFSSETBIT_ATTRBIT(&rattrbits,
3359						    NFSATTRBIT_CHANGE);
3360					}
3361				} else
3362					error = NFSERR_SERVERFAULT;
3363				NFSUNLOCKCLSTATE();
3364			}
3365			if (vp != NULL)
3366				vrele(vp);
3367			if (mp != NULL)
3368				vfs_unbusy(mp);
3369			if (nfhp != NULL)
3370				FREE((caddr_t)nfhp, M_NFSFH);
3371			if (!error)
3372				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3373				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3374				    (uint64_t)0);
3375			break;
3376		case NFSV4OP_CBRECALL:
3377			NFSCL_DEBUG(4, "cbrecall\n");
3378			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3379			    NFSX_UNSIGNED);
3380			stateid.seqid = *tl++;
3381			NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3382			    NFSX_STATEIDOTHER);
3383			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3384			trunc = fxdr_unsigned(int, *tl);
3385			error = nfsm_getfh(nd, &nfhp);
3386			if (error == 0 && i == 0 &&
3387			    minorvers != NFSV4_MINORVERSION)
3388				error = NFSERR_OPNOTINSESS;
3389			if (!error) {
3390				NFSLOCKCLSTATE();
3391				if (minorvers == NFSV4_MINORVERSION)
3392					clp = nfscl_getclnt(cbident);
3393				else
3394					clp = nfscl_getclntsess(sessionid);
3395				if (clp != NULL) {
3396					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3397					    nfhp->nfh_len);
3398					if (dp != NULL && (dp->nfsdl_flags &
3399					    NFSCLDL_DELEGRET) == 0) {
3400						dp->nfsdl_flags |=
3401						    NFSCLDL_RECALL;
3402						wakeup((caddr_t)clp);
3403					}
3404				} else {
3405					error = NFSERR_SERVERFAULT;
3406				}
3407				NFSUNLOCKCLSTATE();
3408			}
3409			if (nfhp != NULL)
3410				FREE((caddr_t)nfhp, M_NFSFH);
3411			break;
3412		case NFSV4OP_CBLAYOUTRECALL:
3413			NFSCL_DEBUG(4, "cblayrec\n");
3414			nfhp = NULL;
3415			NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3416			laytype = fxdr_unsigned(int, *tl++);
3417			iomode = fxdr_unsigned(uint32_t, *tl++);
3418			if (newnfs_true == *tl++)
3419				changed = 1;
3420			else
3421				changed = 0;
3422			recalltype = fxdr_unsigned(int, *tl);
3423			recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3424			    M_WAITOK);
3425			if (laytype != NFSLAYOUT_NFSV4_1_FILES)
3426				error = NFSERR_NOMATCHLAYOUT;
3427			else if (recalltype == NFSLAYOUTRETURN_FILE) {
3428				error = nfsm_getfh(nd, &nfhp);
3429				NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3430				if (error != 0)
3431					goto nfsmout;
3432				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3433				    NFSX_STATEID);
3434				off = fxdr_hyper(tl); tl += 2;
3435				len = fxdr_hyper(tl); tl += 2;
3436				stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3437				NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3438				if (minorvers == NFSV4_MINORVERSION)
3439					error = NFSERR_NOTSUPP;
3440				else if (i == 0)
3441					error = NFSERR_OPNOTINSESS;
3442				if (error == 0) {
3443					NFSLOCKCLSTATE();
3444					clp = nfscl_getclntsess(sessionid);
3445					NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3446					if (clp != NULL) {
3447						lyp = nfscl_findlayout(clp,
3448						    nfhp->nfh_fh,
3449						    nfhp->nfh_len);
3450						NFSCL_DEBUG(4, "cblyp=%p\n",
3451						    lyp);
3452						if (lyp != NULL &&
3453						    (lyp->nfsly_flags &
3454						     NFSLY_FILES) != 0 &&
3455						    !NFSBCMP(stateid.other,
3456						    lyp->nfsly_stateid.other,
3457						    NFSX_STATEIDOTHER)) {
3458							error =
3459							    nfscl_layoutrecall(
3460							    recalltype,
3461							    lyp, iomode, off,
3462							    len, stateid.seqid,
3463							    recallp);
3464							recallp = NULL;
3465							wakeup(clp);
3466							NFSCL_DEBUG(4,
3467							    "aft layrcal=%d\n",
3468							    error);
3469						} else
3470							error =
3471							  NFSERR_NOMATCHLAYOUT;
3472					} else
3473						error = NFSERR_NOMATCHLAYOUT;
3474					NFSUNLOCKCLSTATE();
3475				}
3476				free(nfhp, M_NFSFH);
3477			} else if (recalltype == NFSLAYOUTRETURN_FSID) {
3478				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3479				filesid[0] = fxdr_hyper(tl); tl += 2;
3480				filesid[1] = fxdr_hyper(tl); tl += 2;
3481				gotone = 0;
3482				NFSLOCKCLSTATE();
3483				clp = nfscl_getclntsess(sessionid);
3484				if (clp != NULL) {
3485					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3486					    nfsly_list) {
3487						if (lyp->nfsly_filesid[0] ==
3488						    filesid[0] &&
3489						    lyp->nfsly_filesid[1] ==
3490						    filesid[1]) {
3491							error =
3492							    nfscl_layoutrecall(
3493							    recalltype,
3494							    lyp, iomode, 0,
3495							    UINT64_MAX,
3496							    lyp->nfsly_stateid.seqid,
3497							    recallp);
3498							recallp = NULL;
3499							gotone = 1;
3500						}
3501					}
3502					if (gotone != 0)
3503						wakeup(clp);
3504					else
3505						error = NFSERR_NOMATCHLAYOUT;
3506				} else
3507					error = NFSERR_NOMATCHLAYOUT;
3508				NFSUNLOCKCLSTATE();
3509			} else if (recalltype == NFSLAYOUTRETURN_ALL) {
3510				gotone = 0;
3511				NFSLOCKCLSTATE();
3512				clp = nfscl_getclntsess(sessionid);
3513				if (clp != NULL) {
3514					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3515					    nfsly_list) {
3516						error = nfscl_layoutrecall(
3517						    recalltype, lyp, iomode, 0,
3518						    UINT64_MAX,
3519						    lyp->nfsly_stateid.seqid,
3520						    recallp);
3521						recallp = NULL;
3522						gotone = 1;
3523					}
3524					if (gotone != 0)
3525						wakeup(clp);
3526					else
3527						error = NFSERR_NOMATCHLAYOUT;
3528				} else
3529					error = NFSERR_NOMATCHLAYOUT;
3530				NFSUNLOCKCLSTATE();
3531			} else
3532				error = NFSERR_NOMATCHLAYOUT;
3533			if (recallp != NULL) {
3534				free(recallp, M_NFSLAYRECALL);
3535				recallp = NULL;
3536			}
3537			break;
3538		case NFSV4OP_CBSEQUENCE:
3539			NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3540			    5 * NFSX_UNSIGNED);
3541			bcopy(tl, sessionid, NFSX_V4SESSIONID);
3542			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3543			seqid = fxdr_unsigned(uint32_t, *tl++);
3544			slotid = fxdr_unsigned(uint32_t, *tl++);
3545			highslot = fxdr_unsigned(uint32_t, *tl++);
3546			cachethis = *tl++;
3547			/* Throw away the referring call stuff. */
3548			clist = fxdr_unsigned(int, *tl);
3549			for (j = 0; j < clist; j++) {
3550				NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3551				    NFSX_UNSIGNED);
3552				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3553				rcalls = fxdr_unsigned(int, *tl);
3554				for (k = 0; k < rcalls; k++) {
3555					NFSM_DISSECT(tl, uint32_t *,
3556					    2 * NFSX_UNSIGNED);
3557				}
3558			}
3559			NFSLOCKCLSTATE();
3560			if (i == 0) {
3561				clp = nfscl_getclntsess(sessionid);
3562				if (clp == NULL)
3563					error = NFSERR_SERVERFAULT;
3564			} else
3565				error = NFSERR_SEQUENCEPOS;
3566			if (error == 0) {
3567				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3568				error = nfsv4_seqsession(seqid, slotid,
3569				    highslot, tsep->nfsess_cbslots, &rep,
3570				    tsep->nfsess_backslots);
3571			}
3572			NFSUNLOCKCLSTATE();
3573			if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3574				gotseq_ok = 1;
3575				if (rep != NULL) {
3576					/*
3577					 * Handle a reply for a retried
3578					 * callback.  The reply will be
3579					 * re-inserted in the session cache
3580					 * by the nfsv4_seqsess_cacherep() call
3581					 * after out:
3582					 */
3583					KASSERT(error == NFSERR_REPLYFROMCACHE,
3584					    ("cbsequence: non-NULL rep"));
3585					NFSCL_DEBUG(4, "Got cbretry\n");
3586					m_freem(nd->nd_mreq);
3587					nd->nd_mreq = rep;
3588					rep = NULL;
3589					goto out;
3590				}
3591				NFSM_BUILD(tl, uint32_t *,
3592				    NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3593				bcopy(sessionid, tl, NFSX_V4SESSIONID);
3594				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3595				*tl++ = txdr_unsigned(seqid);
3596				*tl++ = txdr_unsigned(slotid);
3597				*tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3598				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3599			}
3600			break;
3601		default:
3602			if (i == 0 && minorvers == NFSV41_MINORVERSION)
3603				error = NFSERR_OPNOTINSESS;
3604			else {
3605				NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3606				error = NFSERR_NOTSUPP;
3607			}
3608			break;
3609		};
3610		if (error) {
3611			if (error == EBADRPC || error == NFSERR_BADXDR) {
3612				nd->nd_repstat = NFSERR_BADXDR;
3613			} else {
3614				nd->nd_repstat = error;
3615			}
3616			error = 0;
3617		}
3618		retops++;
3619		if (nd->nd_repstat) {
3620			*repp = nfscl_errmap(nd, minorvers);
3621			break;
3622		} else
3623			*repp = 0;	/* NFS4_OK */
3624	}
3625nfsmout:
3626	if (recallp != NULL)
3627		free(recallp, M_NFSLAYRECALL);
3628	if (error) {
3629		if (error == EBADRPC || error == NFSERR_BADXDR)
3630			nd->nd_repstat = NFSERR_BADXDR;
3631		else
3632			printf("nfsv4 comperr1=%d\n", error);
3633	}
3634	if (taglen == -1) {
3635		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3636		*tl++ = 0;
3637		*tl = 0;
3638	} else {
3639		*retopsp = txdr_unsigned(retops);
3640	}
3641	*nd->nd_errp = nfscl_errmap(nd, minorvers);
3642out:
3643	if (gotseq_ok != 0) {
3644		rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3645		NFSLOCKCLSTATE();
3646		clp = nfscl_getclntsess(sessionid);
3647		if (clp != NULL) {
3648			tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3649			nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3650			    NFSERR_OK, &rep);
3651			NFSUNLOCKCLSTATE();
3652		} else {
3653			NFSUNLOCKCLSTATE();
3654			m_freem(rep);
3655		}
3656	}
3657}
3658
3659/*
3660 * Generate the next cbident value. Basically just increment a static value
3661 * and then check that it isn't already in the list, if it has wrapped around.
3662 */
3663static u_int32_t
3664nfscl_nextcbident(void)
3665{
3666	struct nfsclclient *clp;
3667	int matched;
3668	static u_int32_t nextcbident = 0;
3669	static int haswrapped = 0;
3670
3671	nextcbident++;
3672	if (nextcbident == 0)
3673		haswrapped = 1;
3674	if (haswrapped) {
3675		/*
3676		 * Search the clientid list for one already using this cbident.
3677		 */
3678		do {
3679			matched = 0;
3680			NFSLOCKCLSTATE();
3681			LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3682				if (clp->nfsc_cbident == nextcbident) {
3683					matched = 1;
3684					break;
3685				}
3686			}
3687			NFSUNLOCKCLSTATE();
3688			if (matched == 1)
3689				nextcbident++;
3690		} while (matched);
3691	}
3692	return (nextcbident);
3693}
3694
3695/*
3696 * Get the mount point related to a given cbident or session and busy it.
3697 */
3698static mount_t
3699nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3700    struct nfsclclient **clpp)
3701{
3702	struct nfsclclient *clp;
3703	mount_t mp;
3704	int error;
3705	struct nfsclsession *tsep;
3706
3707	*clpp = NULL;
3708	NFSLOCKCLSTATE();
3709	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3710		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3711		if (minorvers == NFSV4_MINORVERSION) {
3712			if (clp->nfsc_cbident == cbident)
3713				break;
3714		} else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3715		    NFSX_V4SESSIONID))
3716			break;
3717	}
3718	if (clp == NULL) {
3719		NFSUNLOCKCLSTATE();
3720		return (NULL);
3721	}
3722	mp = clp->nfsc_nmp->nm_mountp;
3723	vfs_ref(mp);
3724	NFSUNLOCKCLSTATE();
3725	error = vfs_busy(mp, 0);
3726	vfs_rel(mp);
3727	if (error != 0)
3728		return (NULL);
3729	*clpp = clp;
3730	return (mp);
3731}
3732
3733/*
3734 * Get the clientid pointer related to a given cbident.
3735 */
3736static struct nfsclclient *
3737nfscl_getclnt(u_int32_t cbident)
3738{
3739	struct nfsclclient *clp;
3740
3741	LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3742		if (clp->nfsc_cbident == cbident)
3743			break;
3744	return (clp);
3745}
3746
3747/*
3748 * Get the clientid pointer related to a given sessionid.
3749 */
3750static struct nfsclclient *
3751nfscl_getclntsess(uint8_t *sessionid)
3752{
3753	struct nfsclclient *clp;
3754	struct nfsclsession *tsep;
3755
3756	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3757		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3758		if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3759		    NFSX_V4SESSIONID))
3760			break;
3761	}
3762	return (clp);
3763}
3764
3765/*
3766 * Search for a lock conflict locally on the client. A conflict occurs if
3767 * - not same owner and overlapping byte range and at least one of them is
3768 *   a write lock or this is an unlock.
3769 */
3770static int
3771nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3772    struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3773    struct nfscllock **lopp)
3774{
3775	struct nfsclowner *owp;
3776	struct nfsclopen *op;
3777	int ret;
3778
3779	if (dp != NULL) {
3780		ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3781		if (ret)
3782			return (ret);
3783	}
3784	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3785		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3786			if (op->nfso_fhlen == fhlen &&
3787			    !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3788				ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3789				    own, lopp);
3790				if (ret)
3791					return (ret);
3792			}
3793		}
3794	}
3795	return (0);
3796}
3797
3798static int
3799nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3800    u_int8_t *own, struct nfscllock **lopp)
3801{
3802	struct nfscllockowner *lp;
3803	struct nfscllock *lop;
3804
3805	LIST_FOREACH(lp, lhp, nfsl_list) {
3806		if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3807			LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3808				if (lop->nfslo_first >= nlop->nfslo_end)
3809					break;
3810				if (lop->nfslo_end <= nlop->nfslo_first)
3811					continue;
3812				if (lop->nfslo_type == F_WRLCK ||
3813				    nlop->nfslo_type == F_WRLCK ||
3814				    nlop->nfslo_type == F_UNLCK) {
3815					if (lopp != NULL)
3816						*lopp = lop;
3817					return (NFSERR_DENIED);
3818				}
3819			}
3820		}
3821	}
3822	return (0);
3823}
3824
3825/*
3826 * Check for a local conflicting lock.
3827 */
3828APPLESTATIC int
3829nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
3830    u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
3831{
3832	struct nfscllock *lop, nlck;
3833	struct nfscldeleg *dp;
3834	struct nfsnode *np;
3835	u_int8_t own[NFSV4CL_LOCKNAMELEN];
3836	int error;
3837
3838	nlck.nfslo_type = fl->l_type;
3839	nlck.nfslo_first = off;
3840	if (len == NFS64BITSSET) {
3841		nlck.nfslo_end = NFS64BITSSET;
3842	} else {
3843		nlck.nfslo_end = off + len;
3844		if (nlck.nfslo_end <= nlck.nfslo_first)
3845			return (NFSERR_INVAL);
3846	}
3847	np = VTONFS(vp);
3848	nfscl_filllockowner(id, own, flags);
3849	NFSLOCKCLSTATE();
3850	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
3851	error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
3852	    &nlck, own, dp, &lop);
3853	if (error != 0) {
3854		fl->l_whence = SEEK_SET;
3855		fl->l_start = lop->nfslo_first;
3856		if (lop->nfslo_end == NFS64BITSSET)
3857			fl->l_len = 0;
3858		else
3859			fl->l_len = lop->nfslo_end - lop->nfslo_first;
3860		fl->l_pid = (pid_t)0;
3861		fl->l_type = lop->nfslo_type;
3862		error = -1;			/* no RPC required */
3863	} else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
3864	    fl->l_type == F_RDLCK)) {
3865		/*
3866		 * The delegation ensures that there isn't a conflicting
3867		 * lock on the server, so return -1 to indicate an RPC
3868		 * isn't required.
3869		 */
3870		fl->l_type = F_UNLCK;
3871		error = -1;
3872	}
3873	NFSUNLOCKCLSTATE();
3874	return (error);
3875}
3876
3877/*
3878 * Handle Recall of a delegation.
3879 * The clp must be exclusive locked when this is called.
3880 */
3881static int
3882nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
3883    struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
3884    int called_from_renewthread)
3885{
3886	struct nfsclowner *owp, *lowp, *nowp;
3887	struct nfsclopen *op, *lop;
3888	struct nfscllockowner *lp;
3889	struct nfscllock *lckp;
3890	struct nfsnode *np;
3891	int error = 0, ret, gotvp = 0;
3892
3893	if (vp == NULL) {
3894		/*
3895		 * First, get a vnode for the file. This is needed to do RPCs.
3896		 */
3897		ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
3898		    dp->nfsdl_fhlen, p, &np);
3899		if (ret) {
3900			/*
3901			 * File isn't open, so nothing to move over to the
3902			 * server.
3903			 */
3904			return (0);
3905		}
3906		vp = NFSTOV(np);
3907		gotvp = 1;
3908	} else {
3909		np = VTONFS(vp);
3910	}
3911	dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
3912
3913	/*
3914	 * Ok, if it's a write delegation, flush data to the server, so
3915	 * that close/open consistency is retained.
3916	 */
3917	ret = 0;
3918	NFSLOCKNODE(np);
3919	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
3920		np->n_flag |= NDELEGRECALL;
3921		NFSUNLOCKNODE(np);
3922		ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
3923		NFSLOCKNODE(np);
3924		np->n_flag &= ~NDELEGRECALL;
3925	}
3926	NFSINVALATTRCACHE(np);
3927	NFSUNLOCKNODE(np);
3928	if (ret == EIO && called_from_renewthread != 0) {
3929		/*
3930		 * If the flush failed with EIO for the renew thread,
3931		 * return now, so that the dirty buffer will be flushed
3932		 * later.
3933		 */
3934		if (gotvp != 0)
3935			vrele(vp);
3936		return (ret);
3937	}
3938
3939	/*
3940	 * Now, for each openowner with opens issued locally, move them
3941	 * over to state against the server.
3942	 */
3943	LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
3944		lop = LIST_FIRST(&lowp->nfsow_open);
3945		if (lop != NULL) {
3946			if (LIST_NEXT(lop, nfso_list) != NULL)
3947				panic("nfsdlg mult opens");
3948			/*
3949			 * Look for the same openowner against the server.
3950			 */
3951			LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3952				if (!NFSBCMP(lowp->nfsow_owner,
3953				    owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3954					newnfs_copycred(&dp->nfsdl_cred, cred);
3955					ret = nfscl_moveopen(vp, clp, nmp, lop,
3956					    owp, dp, cred, p);
3957					if (ret == NFSERR_STALECLIENTID ||
3958					    ret == NFSERR_STALEDONTRECOVER ||
3959					    ret == NFSERR_BADSESSION) {
3960						if (gotvp)
3961							vrele(vp);
3962						return (ret);
3963					}
3964					if (ret) {
3965						nfscl_freeopen(lop, 1);
3966						if (!error)
3967							error = ret;
3968					}
3969					break;
3970				}
3971			}
3972
3973			/*
3974			 * If no openowner found, create one and get an open
3975			 * for it.
3976			 */
3977			if (owp == NULL) {
3978				MALLOC(nowp, struct nfsclowner *,
3979				    sizeof (struct nfsclowner), M_NFSCLOWNER,
3980				    M_WAITOK);
3981				nfscl_newopen(clp, NULL, &owp, &nowp, &op,
3982				    NULL, lowp->nfsow_owner, dp->nfsdl_fh,
3983				    dp->nfsdl_fhlen, NULL, NULL);
3984				newnfs_copycred(&dp->nfsdl_cred, cred);
3985				ret = nfscl_moveopen(vp, clp, nmp, lop,
3986				    owp, dp, cred, p);
3987				if (ret) {
3988					nfscl_freeopenowner(owp, 0);
3989					if (ret == NFSERR_STALECLIENTID ||
3990					    ret == NFSERR_STALEDONTRECOVER ||
3991					    ret == NFSERR_BADSESSION) {
3992						if (gotvp)
3993							vrele(vp);
3994						return (ret);
3995					}
3996					if (ret) {
3997						nfscl_freeopen(lop, 1);
3998						if (!error)
3999							error = ret;
4000					}
4001				}
4002			}
4003		}
4004	}
4005
4006	/*
4007	 * Now, get byte range locks for any locks done locally.
4008	 */
4009	LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4010		LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4011			newnfs_copycred(&dp->nfsdl_cred, cred);
4012			ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4013			if (ret == NFSERR_STALESTATEID ||
4014			    ret == NFSERR_STALEDONTRECOVER ||
4015			    ret == NFSERR_STALECLIENTID ||
4016			    ret == NFSERR_BADSESSION) {
4017				if (gotvp)
4018					vrele(vp);
4019				return (ret);
4020			}
4021			if (ret && !error)
4022				error = ret;
4023		}
4024	}
4025	if (gotvp)
4026		vrele(vp);
4027	return (error);
4028}
4029
4030/*
4031 * Move a locally issued open over to an owner on the state list.
4032 * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4033 * returns with it unlocked.
4034 */
4035static int
4036nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4037    struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4038    struct ucred *cred, NFSPROC_T *p)
4039{
4040	struct nfsclopen *op, *nop;
4041	struct nfscldeleg *ndp;
4042	struct nfsnode *np;
4043	int error = 0, newone;
4044
4045	/*
4046	 * First, look for an appropriate open, If found, just increment the
4047	 * opencnt in it.
4048	 */
4049	LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4050		if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4051		    op->nfso_fhlen == lop->nfso_fhlen &&
4052		    !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4053			op->nfso_opencnt += lop->nfso_opencnt;
4054			nfscl_freeopen(lop, 1);
4055			return (0);
4056		}
4057	}
4058
4059	/* No appropriate open, so we have to do one against the server. */
4060	np = VTONFS(vp);
4061	MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) +
4062	    lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4063	newone = 0;
4064	nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4065	    lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4066	ndp = dp;
4067	error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4068	    lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4069	    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4070	if (error) {
4071		if (newone)
4072			nfscl_freeopen(op, 0);
4073	} else {
4074		op->nfso_mode |= lop->nfso_mode;
4075		op->nfso_opencnt += lop->nfso_opencnt;
4076		nfscl_freeopen(lop, 1);
4077	}
4078	if (nop != NULL)
4079		FREE((caddr_t)nop, M_NFSCLOPEN);
4080	if (ndp != NULL) {
4081		/*
4082		 * What should I do with the returned delegation, since the
4083		 * delegation is being recalled? For now, just printf and
4084		 * through it away.
4085		 */
4086		printf("Moveopen returned deleg\n");
4087		FREE((caddr_t)ndp, M_NFSCLDELEG);
4088	}
4089	return (error);
4090}
4091
4092/*
4093 * Recall all delegations on this client.
4094 */
4095static void
4096nfscl_totalrecall(struct nfsclclient *clp)
4097{
4098	struct nfscldeleg *dp;
4099
4100	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4101		if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4102			dp->nfsdl_flags |= NFSCLDL_RECALL;
4103	}
4104}
4105
4106/*
4107 * Relock byte ranges. Called for delegation recall and state expiry.
4108 */
4109static int
4110nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4111    struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4112    NFSPROC_T *p)
4113{
4114	struct nfscllockowner *nlp;
4115	struct nfsfh *nfhp;
4116	u_int64_t off, len;
4117	u_int32_t clidrev = 0;
4118	int error, newone, donelocally;
4119
4120	off = lop->nfslo_first;
4121	len = lop->nfslo_end - lop->nfslo_first;
4122	error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4123	    clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4124	    lp->nfsl_openowner, &nlp, &newone, &donelocally);
4125	if (error || donelocally)
4126		return (error);
4127	if (nmp->nm_clp != NULL)
4128		clidrev = nmp->nm_clp->nfsc_clientidrev;
4129	else
4130		clidrev = 0;
4131	nfhp = VTONFS(vp)->n_fhp;
4132	error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4133	    nfhp->nfh_len, nlp, newone, 0, off,
4134	    len, lop->nfslo_type, cred, p);
4135	if (error)
4136		nfscl_freelockowner(nlp, 0);
4137	return (error);
4138}
4139
4140/*
4141 * Called to re-open a file. Basically get a vnode for the file handle
4142 * and then call nfsrpc_openrpc() to do the rest.
4143 */
4144static int
4145nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4146    u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4147    struct ucred *cred, NFSPROC_T *p)
4148{
4149	struct nfsnode *np;
4150	vnode_t vp;
4151	int error;
4152
4153	error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4154	if (error)
4155		return (error);
4156	vp = NFSTOV(np);
4157	if (np->n_v4 != NULL) {
4158		error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4159		    np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4160		    NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4161		    cred, p);
4162	} else {
4163		error = EINVAL;
4164	}
4165	vrele(vp);
4166	return (error);
4167}
4168
4169/*
4170 * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4171 * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4172 * fail.
4173 */
4174static int
4175nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4176    u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4177    u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4178    int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4179{
4180	int error;
4181
4182	do {
4183		error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4184		    mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4185		    0, 0);
4186		if (error == NFSERR_DELAY)
4187			(void) nfs_catnap(PZERO, error, "nfstryop");
4188	} while (error == NFSERR_DELAY);
4189	if (error == EAUTH || error == EACCES) {
4190		/* Try again using system credentials */
4191		newnfs_setroot(cred);
4192		do {
4193		    error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4194			newfhlen, mode, op, name, namelen, ndpp, reclaim,
4195			delegtype, cred, p, 1, 0);
4196		    if (error == NFSERR_DELAY)
4197			(void) nfs_catnap(PZERO, error, "nfstryop");
4198		} while (error == NFSERR_DELAY);
4199	}
4200	return (error);
4201}
4202
4203/*
4204 * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4205 * NFSERR_DELAY. Also, retry with system credentials, if the provided
4206 * cred don't work.
4207 */
4208static int
4209nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4210    int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4211    u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4212{
4213	struct nfsrv_descript nfsd, *nd = &nfsd;
4214	int error;
4215
4216	do {
4217		error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4218		    reclaim, off, len, type, cred, p, 0);
4219		if (!error && nd->nd_repstat == NFSERR_DELAY)
4220			(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4221			    "nfstrylck");
4222	} while (!error && nd->nd_repstat == NFSERR_DELAY);
4223	if (!error)
4224		error = nd->nd_repstat;
4225	if (error == EAUTH || error == EACCES) {
4226		/* Try again using root credentials */
4227		newnfs_setroot(cred);
4228		do {
4229			error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4230			    newone, reclaim, off, len, type, cred, p, 1);
4231			if (!error && nd->nd_repstat == NFSERR_DELAY)
4232				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4233				    "nfstrylck");
4234		} while (!error && nd->nd_repstat == NFSERR_DELAY);
4235		if (!error)
4236			error = nd->nd_repstat;
4237	}
4238	return (error);
4239}
4240
4241/*
4242 * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4243 * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4244 * credentials fail.
4245 */
4246static int
4247nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4248    struct nfsmount *nmp, NFSPROC_T *p)
4249{
4250	int error;
4251
4252	do {
4253		error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4254		if (error == NFSERR_DELAY)
4255			(void) nfs_catnap(PZERO, error, "nfstrydp");
4256	} while (error == NFSERR_DELAY);
4257	if (error == EAUTH || error == EACCES) {
4258		/* Try again using system credentials */
4259		newnfs_setroot(cred);
4260		do {
4261			error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4262			if (error == NFSERR_DELAY)
4263				(void) nfs_catnap(PZERO, error, "nfstrydp");
4264		} while (error == NFSERR_DELAY);
4265	}
4266	return (error);
4267}
4268
4269/*
4270 * Try a close against the server. Just call nfsrpc_closerpc(),
4271 * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4272 * credentials fail.
4273 */
4274APPLESTATIC int
4275nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4276    struct nfsmount *nmp, NFSPROC_T *p)
4277{
4278	struct nfsrv_descript nfsd, *nd = &nfsd;
4279	int error;
4280
4281	do {
4282		error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4283		if (error == NFSERR_DELAY)
4284			(void) nfs_catnap(PZERO, error, "nfstrycl");
4285	} while (error == NFSERR_DELAY);
4286	if (error == EAUTH || error == EACCES) {
4287		/* Try again using system credentials */
4288		newnfs_setroot(cred);
4289		do {
4290			error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4291			if (error == NFSERR_DELAY)
4292				(void) nfs_catnap(PZERO, error, "nfstrycl");
4293		} while (error == NFSERR_DELAY);
4294	}
4295	return (error);
4296}
4297
4298/*
4299 * Decide if a delegation on a file permits close without flushing writes
4300 * to the server. This might be a big performance win in some environments.
4301 * (Not useful until the client does caching on local stable storage.)
4302 */
4303APPLESTATIC int
4304nfscl_mustflush(vnode_t vp)
4305{
4306	struct nfsclclient *clp;
4307	struct nfscldeleg *dp;
4308	struct nfsnode *np;
4309	struct nfsmount *nmp;
4310
4311	np = VTONFS(vp);
4312	nmp = VFSTONFS(vnode_mount(vp));
4313	if (!NFSHASNFSV4(nmp))
4314		return (1);
4315	NFSLOCKCLSTATE();
4316	clp = nfscl_findcl(nmp);
4317	if (clp == NULL) {
4318		NFSUNLOCKCLSTATE();
4319		return (1);
4320	}
4321	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4322	if (dp != NULL && (dp->nfsdl_flags &
4323	    (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4324	     NFSCLDL_WRITE &&
4325	    (dp->nfsdl_sizelimit >= np->n_size ||
4326	     !NFSHASSTRICT3530(nmp))) {
4327		NFSUNLOCKCLSTATE();
4328		return (0);
4329	}
4330	NFSUNLOCKCLSTATE();
4331	return (1);
4332}
4333
4334/*
4335 * See if a (write) delegation exists for this file.
4336 */
4337APPLESTATIC int
4338nfscl_nodeleg(vnode_t vp, int writedeleg)
4339{
4340	struct nfsclclient *clp;
4341	struct nfscldeleg *dp;
4342	struct nfsnode *np;
4343	struct nfsmount *nmp;
4344
4345	np = VTONFS(vp);
4346	nmp = VFSTONFS(vnode_mount(vp));
4347	if (!NFSHASNFSV4(nmp))
4348		return (1);
4349	NFSLOCKCLSTATE();
4350	clp = nfscl_findcl(nmp);
4351	if (clp == NULL) {
4352		NFSUNLOCKCLSTATE();
4353		return (1);
4354	}
4355	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4356	if (dp != NULL &&
4357	    (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4358	    (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4359	     NFSCLDL_WRITE)) {
4360		NFSUNLOCKCLSTATE();
4361		return (0);
4362	}
4363	NFSUNLOCKCLSTATE();
4364	return (1);
4365}
4366
4367/*
4368 * Look for an associated delegation that should be DelegReturned.
4369 */
4370APPLESTATIC int
4371nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4372{
4373	struct nfsclclient *clp;
4374	struct nfscldeleg *dp;
4375	struct nfsclowner *owp;
4376	struct nfscllockowner *lp;
4377	struct nfsmount *nmp;
4378	struct ucred *cred;
4379	struct nfsnode *np;
4380	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4381
4382	nmp = VFSTONFS(vnode_mount(vp));
4383	np = VTONFS(vp);
4384	NFSLOCKCLSTATE();
4385	/*
4386	 * Loop around waiting for:
4387	 * - outstanding I/O operations on delegations to complete
4388	 * - for a delegation on vp that has state, lock the client and
4389	 *   do a recall
4390	 * - return delegation with no state
4391	 */
4392	while (1) {
4393		clp = nfscl_findcl(nmp);
4394		if (clp == NULL) {
4395			NFSUNLOCKCLSTATE();
4396			return (retcnt);
4397		}
4398		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4399		    np->n_fhp->nfh_len);
4400		if (dp != NULL) {
4401		    /*
4402		     * Wait for outstanding I/O ops to be done.
4403		     */
4404		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4405			if (igotlock) {
4406			    nfsv4_unlock(&clp->nfsc_lock, 0);
4407			    igotlock = 0;
4408			}
4409			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4410			(void) nfsmsleep(&dp->nfsdl_rwlock,
4411			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4412			continue;
4413		    }
4414		    needsrecall = 0;
4415		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4416			if (!LIST_EMPTY(&owp->nfsow_open)) {
4417			    needsrecall = 1;
4418			    break;
4419			}
4420		    }
4421		    if (!needsrecall) {
4422			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4423			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4424				needsrecall = 1;
4425				break;
4426			    }
4427			}
4428		    }
4429		    if (needsrecall && !triedrecall) {
4430			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4431			islept = 0;
4432			while (!igotlock) {
4433			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4434				&islept, NFSCLSTATEMUTEXPTR, NULL);
4435			    if (islept)
4436				break;
4437			}
4438			if (islept)
4439			    continue;
4440			NFSUNLOCKCLSTATE();
4441			cred = newnfs_getcred();
4442			newnfs_copycred(&dp->nfsdl_cred, cred);
4443			(void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0);
4444			NFSFREECRED(cred);
4445			triedrecall = 1;
4446			NFSLOCKCLSTATE();
4447			nfsv4_unlock(&clp->nfsc_lock, 0);
4448			igotlock = 0;
4449			continue;
4450		    }
4451		    *stp = dp->nfsdl_stateid;
4452		    retcnt = 1;
4453		    nfscl_cleandeleg(dp);
4454		    nfscl_freedeleg(&clp->nfsc_deleg, dp);
4455		}
4456		if (igotlock)
4457		    nfsv4_unlock(&clp->nfsc_lock, 0);
4458		NFSUNLOCKCLSTATE();
4459		return (retcnt);
4460	}
4461}
4462
4463/*
4464 * Look for associated delegation(s) that should be DelegReturned.
4465 */
4466APPLESTATIC int
4467nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4468    nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4469{
4470	struct nfsclclient *clp;
4471	struct nfscldeleg *dp;
4472	struct nfsclowner *owp;
4473	struct nfscllockowner *lp;
4474	struct nfsmount *nmp;
4475	struct ucred *cred;
4476	struct nfsnode *np;
4477	int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4478
4479	nmp = VFSTONFS(vnode_mount(fvp));
4480	*gotfdp = 0;
4481	*gottdp = 0;
4482	NFSLOCKCLSTATE();
4483	/*
4484	 * Loop around waiting for:
4485	 * - outstanding I/O operations on delegations to complete
4486	 * - for a delegation on fvp that has state, lock the client and
4487	 *   do a recall
4488	 * - return delegation(s) with no state.
4489	 */
4490	while (1) {
4491		clp = nfscl_findcl(nmp);
4492		if (clp == NULL) {
4493			NFSUNLOCKCLSTATE();
4494			return (retcnt);
4495		}
4496		np = VTONFS(fvp);
4497		dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4498		    np->n_fhp->nfh_len);
4499		if (dp != NULL && *gotfdp == 0) {
4500		    /*
4501		     * Wait for outstanding I/O ops to be done.
4502		     */
4503		    if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4504			if (igotlock) {
4505			    nfsv4_unlock(&clp->nfsc_lock, 0);
4506			    igotlock = 0;
4507			}
4508			dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4509			(void) nfsmsleep(&dp->nfsdl_rwlock,
4510			    NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4511			continue;
4512		    }
4513		    needsrecall = 0;
4514		    LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4515			if (!LIST_EMPTY(&owp->nfsow_open)) {
4516			    needsrecall = 1;
4517			    break;
4518			}
4519		    }
4520		    if (!needsrecall) {
4521			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4522			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4523				needsrecall = 1;
4524				break;
4525			    }
4526			}
4527		    }
4528		    if (needsrecall && !triedrecall) {
4529			dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4530			islept = 0;
4531			while (!igotlock) {
4532			    igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4533				&islept, NFSCLSTATEMUTEXPTR, NULL);
4534			    if (islept)
4535				break;
4536			}
4537			if (islept)
4538			    continue;
4539			NFSUNLOCKCLSTATE();
4540			cred = newnfs_getcred();
4541			newnfs_copycred(&dp->nfsdl_cred, cred);
4542			(void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0);
4543			NFSFREECRED(cred);
4544			triedrecall = 1;
4545			NFSLOCKCLSTATE();
4546			nfsv4_unlock(&clp->nfsc_lock, 0);
4547			igotlock = 0;
4548			continue;
4549		    }
4550		    *fstp = dp->nfsdl_stateid;
4551		    retcnt++;
4552		    *gotfdp = 1;
4553		    nfscl_cleandeleg(dp);
4554		    nfscl_freedeleg(&clp->nfsc_deleg, dp);
4555		}
4556		if (igotlock) {
4557		    nfsv4_unlock(&clp->nfsc_lock, 0);
4558		    igotlock = 0;
4559		}
4560		if (tvp != NULL) {
4561		    np = VTONFS(tvp);
4562		    dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4563			np->n_fhp->nfh_len);
4564		    if (dp != NULL && *gottdp == 0) {
4565			/*
4566			 * Wait for outstanding I/O ops to be done.
4567			 */
4568			if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4569			    dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4570			    (void) nfsmsleep(&dp->nfsdl_rwlock,
4571				NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4572			    continue;
4573			}
4574			LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4575			    if (!LIST_EMPTY(&owp->nfsow_open)) {
4576				NFSUNLOCKCLSTATE();
4577				return (retcnt);
4578			    }
4579			}
4580			LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4581			    if (!LIST_EMPTY(&lp->nfsl_lock)) {
4582				NFSUNLOCKCLSTATE();
4583				return (retcnt);
4584			    }
4585			}
4586			*tstp = dp->nfsdl_stateid;
4587			retcnt++;
4588			*gottdp = 1;
4589			nfscl_cleandeleg(dp);
4590			nfscl_freedeleg(&clp->nfsc_deleg, dp);
4591		    }
4592		}
4593		NFSUNLOCKCLSTATE();
4594		return (retcnt);
4595	}
4596}
4597
4598/*
4599 * Get a reference on the clientid associated with the mount point.
4600 * Return 1 if success, 0 otherwise.
4601 */
4602APPLESTATIC int
4603nfscl_getref(struct nfsmount *nmp)
4604{
4605	struct nfsclclient *clp;
4606
4607	NFSLOCKCLSTATE();
4608	clp = nfscl_findcl(nmp);
4609	if (clp == NULL) {
4610		NFSUNLOCKCLSTATE();
4611		return (0);
4612	}
4613	nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4614	NFSUNLOCKCLSTATE();
4615	return (1);
4616}
4617
4618/*
4619 * Release a reference on a clientid acquired with the above call.
4620 */
4621APPLESTATIC void
4622nfscl_relref(struct nfsmount *nmp)
4623{
4624	struct nfsclclient *clp;
4625
4626	NFSLOCKCLSTATE();
4627	clp = nfscl_findcl(nmp);
4628	if (clp == NULL) {
4629		NFSUNLOCKCLSTATE();
4630		return;
4631	}
4632	nfsv4_relref(&clp->nfsc_lock);
4633	NFSUNLOCKCLSTATE();
4634}
4635
4636/*
4637 * Save the size attribute in the delegation, since the nfsnode
4638 * is going away.
4639 */
4640APPLESTATIC void
4641nfscl_reclaimnode(vnode_t vp)
4642{
4643	struct nfsclclient *clp;
4644	struct nfscldeleg *dp;
4645	struct nfsnode *np = VTONFS(vp);
4646	struct nfsmount *nmp;
4647
4648	nmp = VFSTONFS(vnode_mount(vp));
4649	if (!NFSHASNFSV4(nmp))
4650		return;
4651	NFSLOCKCLSTATE();
4652	clp = nfscl_findcl(nmp);
4653	if (clp == NULL) {
4654		NFSUNLOCKCLSTATE();
4655		return;
4656	}
4657	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4658	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4659		dp->nfsdl_size = np->n_size;
4660	NFSUNLOCKCLSTATE();
4661}
4662
4663/*
4664 * Get the saved size attribute in the delegation, since it is a
4665 * newly allocated nfsnode.
4666 */
4667APPLESTATIC void
4668nfscl_newnode(vnode_t vp)
4669{
4670	struct nfsclclient *clp;
4671	struct nfscldeleg *dp;
4672	struct nfsnode *np = VTONFS(vp);
4673	struct nfsmount *nmp;
4674
4675	nmp = VFSTONFS(vnode_mount(vp));
4676	if (!NFSHASNFSV4(nmp))
4677		return;
4678	NFSLOCKCLSTATE();
4679	clp = nfscl_findcl(nmp);
4680	if (clp == NULL) {
4681		NFSUNLOCKCLSTATE();
4682		return;
4683	}
4684	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4685	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4686		np->n_size = dp->nfsdl_size;
4687	NFSUNLOCKCLSTATE();
4688}
4689
4690/*
4691 * If there is a valid write delegation for this file, set the modtime
4692 * to the local clock time.
4693 */
4694APPLESTATIC void
4695nfscl_delegmodtime(vnode_t vp)
4696{
4697	struct nfsclclient *clp;
4698	struct nfscldeleg *dp;
4699	struct nfsnode *np = VTONFS(vp);
4700	struct nfsmount *nmp;
4701
4702	nmp = VFSTONFS(vnode_mount(vp));
4703	if (!NFSHASNFSV4(nmp))
4704		return;
4705	NFSLOCKCLSTATE();
4706	clp = nfscl_findcl(nmp);
4707	if (clp == NULL) {
4708		NFSUNLOCKCLSTATE();
4709		return;
4710	}
4711	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4712	if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4713		nanotime(&dp->nfsdl_modtime);
4714		dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4715	}
4716	NFSUNLOCKCLSTATE();
4717}
4718
4719/*
4720 * If there is a valid write delegation for this file with a modtime set,
4721 * put that modtime in mtime.
4722 */
4723APPLESTATIC void
4724nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4725{
4726	struct nfsclclient *clp;
4727	struct nfscldeleg *dp;
4728	struct nfsnode *np = VTONFS(vp);
4729	struct nfsmount *nmp;
4730
4731	nmp = VFSTONFS(vnode_mount(vp));
4732	if (!NFSHASNFSV4(nmp))
4733		return;
4734	NFSLOCKCLSTATE();
4735	clp = nfscl_findcl(nmp);
4736	if (clp == NULL) {
4737		NFSUNLOCKCLSTATE();
4738		return;
4739	}
4740	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4741	if (dp != NULL &&
4742	    (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4743	    (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4744		*mtime = dp->nfsdl_modtime;
4745	NFSUNLOCKCLSTATE();
4746}
4747
4748static int
4749nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4750{
4751	short *defaulterrp, *errp;
4752
4753	if (!nd->nd_repstat)
4754		return (0);
4755	if (nd->nd_procnum == NFSPROC_NOOP)
4756		return (txdr_unsigned(nd->nd_repstat & 0xffff));
4757	if (nd->nd_repstat == EBADRPC)
4758		return (txdr_unsigned(NFSERR_BADXDR));
4759	if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4760	    nd->nd_repstat == NFSERR_OPILLEGAL)
4761		return (txdr_unsigned(nd->nd_repstat));
4762	if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4763	    minorvers > NFSV4_MINORVERSION) {
4764		/* NFSv4.n error. */
4765		return (txdr_unsigned(nd->nd_repstat));
4766	}
4767	if (nd->nd_procnum < NFSV4OP_CBNOPS)
4768		errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4769	else
4770		return (txdr_unsigned(nd->nd_repstat));
4771	while (*++errp)
4772		if (*errp == (short)nd->nd_repstat)
4773			return (txdr_unsigned(nd->nd_repstat));
4774	return (txdr_unsigned(*defaulterrp));
4775}
4776
4777/*
4778 * Called to find/add a layout to a client.
4779 * This function returns the layout with a refcnt (shared lock) upon
4780 * success (returns 0) or with no lock/refcnt on the layout when an
4781 * error is returned.
4782 * If a layout is passed in via lypp, it is locked (exclusively locked).
4783 */
4784APPLESTATIC int
4785nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4786    nfsv4stateid_t *stateidp, int retonclose,
4787    struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4788    struct ucred *cred, NFSPROC_T *p)
4789{
4790	struct nfsclclient *clp;
4791	struct nfscllayout *lyp, *tlyp;
4792	struct nfsclflayout *flp;
4793	struct nfsnode *np = VTONFS(vp);
4794	mount_t mp;
4795	int layout_passed_in;
4796
4797	mp = nmp->nm_mountp;
4798	layout_passed_in = 1;
4799	tlyp = NULL;
4800	lyp = *lypp;
4801	if (lyp == NULL) {
4802		layout_passed_in = 0;
4803		tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4804		    M_WAITOK | M_ZERO);
4805	}
4806
4807	NFSLOCKCLSTATE();
4808	clp = nmp->nm_clp;
4809	if (clp == NULL) {
4810		if (layout_passed_in != 0)
4811			nfsv4_unlock(&lyp->nfsly_lock, 0);
4812		NFSUNLOCKCLSTATE();
4813		if (tlyp != NULL)
4814			free(tlyp, M_NFSLAYOUT);
4815		return (EPERM);
4816	}
4817	if (lyp == NULL) {
4818		/*
4819		 * Although no lyp was passed in, another thread might have
4820		 * allocated one. If one is found, just increment it's ref
4821		 * count and return it.
4822		 */
4823		lyp = nfscl_findlayout(clp, fhp, fhlen);
4824		if (lyp == NULL) {
4825			lyp = tlyp;
4826			tlyp = NULL;
4827			lyp->nfsly_stateid.seqid = stateidp->seqid;
4828			lyp->nfsly_stateid.other[0] = stateidp->other[0];
4829			lyp->nfsly_stateid.other[1] = stateidp->other[1];
4830			lyp->nfsly_stateid.other[2] = stateidp->other[2];
4831			lyp->nfsly_lastbyte = 0;
4832			LIST_INIT(&lyp->nfsly_flayread);
4833			LIST_INIT(&lyp->nfsly_flayrw);
4834			LIST_INIT(&lyp->nfsly_recall);
4835			lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
4836			lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
4837			lyp->nfsly_clp = clp;
4838			lyp->nfsly_flags = (retonclose != 0) ?
4839			    (NFSLY_FILES | NFSLY_RETONCLOSE) : NFSLY_FILES;
4840			lyp->nfsly_fhlen = fhlen;
4841			NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
4842			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4843			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
4844			    nfsly_hash);
4845			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4846			nfscl_layoutcnt++;
4847		} else {
4848			if (retonclose != 0)
4849				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4850			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
4851			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4852			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4853		}
4854		nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
4855		if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
4856			NFSUNLOCKCLSTATE();
4857			if (tlyp != NULL)
4858				free(tlyp, M_NFSLAYOUT);
4859			return (EPERM);
4860		}
4861		*lypp = lyp;
4862	} else
4863		lyp->nfsly_stateid.seqid = stateidp->seqid;
4864
4865	/* Merge the new list of File Layouts into the list. */
4866	flp = LIST_FIRST(fhlp);
4867	if (flp != NULL) {
4868		if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
4869			nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
4870		else
4871			nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
4872	}
4873	if (layout_passed_in != 0)
4874		nfsv4_unlock(&lyp->nfsly_lock, 1);
4875	NFSUNLOCKCLSTATE();
4876	if (tlyp != NULL)
4877		free(tlyp, M_NFSLAYOUT);
4878	return (0);
4879}
4880
4881/*
4882 * Search for a layout by MDS file handle.
4883 * If one is found, it is returned with a refcnt (shared lock) iff
4884 * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
4885 * returned NULL.
4886 */
4887struct nfscllayout *
4888nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
4889    uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
4890{
4891	struct nfscllayout *lyp;
4892	mount_t mp;
4893	int error, igotlock;
4894
4895	mp = clp->nfsc_nmp->nm_mountp;
4896	*recalledp = 0;
4897	*retflpp = NULL;
4898	NFSLOCKCLSTATE();
4899	lyp = nfscl_findlayout(clp, fhp, fhlen);
4900	if (lyp != NULL) {
4901		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
4902			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
4903			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4904			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4905			error = nfscl_findlayoutforio(lyp, off,
4906			    NFSV4OPEN_ACCESSREAD, retflpp);
4907			if (error == 0)
4908				nfsv4_getref(&lyp->nfsly_lock, NULL,
4909				    NFSCLSTATEMUTEXPTR, mp);
4910			else {
4911				do {
4912					igotlock = nfsv4_lock(&lyp->nfsly_lock,
4913					    1, NULL, NFSCLSTATEMUTEXPTR, mp);
4914				} while (igotlock == 0 &&
4915				    (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0);
4916				*retflpp = NULL;
4917			}
4918			if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
4919				lyp = NULL;
4920				*recalledp = 1;
4921			}
4922		} else {
4923			lyp = NULL;
4924			*recalledp = 1;
4925		}
4926	}
4927	NFSUNLOCKCLSTATE();
4928	return (lyp);
4929}
4930
4931/*
4932 * Search for a layout by MDS file handle. If one is found, mark in to be
4933 * recalled, if it already marked "return on close".
4934 */
4935static void
4936nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
4937    int fhlen, struct nfsclrecalllayout **recallpp)
4938{
4939	struct nfscllayout *lyp;
4940	uint32_t iomode;
4941
4942	if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) ||
4943	    nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
4944	    (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
4945		return;
4946	lyp = nfscl_findlayout(clp, fhp, fhlen);
4947	if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
4948	    NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
4949		iomode = 0;
4950		if (!LIST_EMPTY(&lyp->nfsly_flayread))
4951			iomode |= NFSLAYOUTIOMODE_READ;
4952		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
4953			iomode |= NFSLAYOUTIOMODE_RW;
4954		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
4955		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, *recallpp);
4956		NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
4957		*recallpp = NULL;
4958	}
4959}
4960
4961/*
4962 * Dereference a layout.
4963 */
4964void
4965nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
4966{
4967
4968	NFSLOCKCLSTATE();
4969	if (exclocked != 0)
4970		nfsv4_unlock(&lyp->nfsly_lock, 0);
4971	else
4972		nfsv4_relref(&lyp->nfsly_lock);
4973	NFSUNLOCKCLSTATE();
4974}
4975
4976/*
4977 * Search for a devinfo by deviceid. If one is found, return it after
4978 * acquiring a reference count on it.
4979 */
4980struct nfscldevinfo *
4981nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
4982    struct nfscldevinfo *dip)
4983{
4984
4985	NFSLOCKCLSTATE();
4986	if (dip == NULL)
4987		dip = nfscl_finddevinfo(clp, deviceid);
4988	if (dip != NULL)
4989		dip->nfsdi_refcnt++;
4990	NFSUNLOCKCLSTATE();
4991	return (dip);
4992}
4993
4994/*
4995 * Dereference a devinfo structure.
4996 */
4997static void
4998nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
4999{
5000
5001	dip->nfsdi_refcnt--;
5002	if (dip->nfsdi_refcnt == 0)
5003		wakeup(&dip->nfsdi_refcnt);
5004}
5005
5006/*
5007 * Dereference a devinfo structure.
5008 */
5009void
5010nfscl_reldevinfo(struct nfscldevinfo *dip)
5011{
5012
5013	NFSLOCKCLSTATE();
5014	nfscl_reldevinfo_locked(dip);
5015	NFSUNLOCKCLSTATE();
5016}
5017
5018/*
5019 * Find a layout for this file handle. Return NULL upon failure.
5020 */
5021static struct nfscllayout *
5022nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5023{
5024	struct nfscllayout *lyp;
5025
5026	LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5027		if (lyp->nfsly_fhlen == fhlen &&
5028		    !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5029			break;
5030	return (lyp);
5031}
5032
5033/*
5034 * Find a devinfo for this deviceid. Return NULL upon failure.
5035 */
5036static struct nfscldevinfo *
5037nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5038{
5039	struct nfscldevinfo *dip;
5040
5041	LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5042		if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5043		    == 0)
5044			break;
5045	return (dip);
5046}
5047
5048/*
5049 * Merge the new file layout list into the main one, maintaining it in
5050 * increasing offset order.
5051 */
5052static void
5053nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5054    struct nfsclflayouthead *newfhlp)
5055{
5056	struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5057
5058	flp = LIST_FIRST(fhlp);
5059	prevflp = NULL;
5060	LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5061		while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5062			prevflp = flp;
5063			flp = LIST_NEXT(flp, nfsfl_list);
5064		}
5065		if (prevflp == NULL)
5066			LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5067		else
5068			LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5069		prevflp = nflp;
5070	}
5071}
5072
5073/*
5074 * Add this nfscldevinfo to the client, if it doesn't already exist.
5075 * This function consumes the structure pointed at by dip, if not NULL.
5076 */
5077APPLESTATIC int
5078nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip,
5079    struct nfsclflayout *flp)
5080{
5081	struct nfsclclient *clp;
5082	struct nfscldevinfo *tdip;
5083
5084	NFSLOCKCLSTATE();
5085	clp = nmp->nm_clp;
5086	if (clp == NULL) {
5087		NFSUNLOCKCLSTATE();
5088		if (dip != NULL)
5089			free(dip, M_NFSDEVINFO);
5090		return (ENODEV);
5091	}
5092	tdip = nfscl_finddevinfo(clp, flp->nfsfl_dev);
5093	if (tdip != NULL) {
5094		tdip->nfsdi_layoutrefs++;
5095		flp->nfsfl_devp = tdip;
5096		nfscl_reldevinfo_locked(tdip);
5097		NFSUNLOCKCLSTATE();
5098		if (dip != NULL)
5099			free(dip, M_NFSDEVINFO);
5100		return (0);
5101	}
5102	if (dip != NULL) {
5103		LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5104		dip->nfsdi_layoutrefs = 1;
5105		flp->nfsfl_devp = dip;
5106	}
5107	NFSUNLOCKCLSTATE();
5108	if (dip == NULL)
5109		return (ENODEV);
5110	return (0);
5111}
5112
5113/*
5114 * Free up a layout structure and associated file layout structure(s).
5115 */
5116APPLESTATIC void
5117nfscl_freelayout(struct nfscllayout *layp)
5118{
5119	struct nfsclflayout *flp, *nflp;
5120	struct nfsclrecalllayout *rp, *nrp;
5121
5122	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5123		LIST_REMOVE(flp, nfsfl_list);
5124		nfscl_freeflayout(flp);
5125	}
5126	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5127		LIST_REMOVE(flp, nfsfl_list);
5128		nfscl_freeflayout(flp);
5129	}
5130	LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5131		LIST_REMOVE(rp, nfsrecly_list);
5132		free(rp, M_NFSLAYRECALL);
5133	}
5134	nfscl_layoutcnt--;
5135	free(layp, M_NFSLAYOUT);
5136}
5137
5138/*
5139 * Free up a file layout structure.
5140 */
5141APPLESTATIC void
5142nfscl_freeflayout(struct nfsclflayout *flp)
5143{
5144	int i;
5145
5146	for (i = 0; i < flp->nfsfl_fhcnt; i++)
5147		free(flp->nfsfl_fh[i], M_NFSFH);
5148	if (flp->nfsfl_devp != NULL)
5149		flp->nfsfl_devp->nfsdi_layoutrefs--;
5150	free(flp, M_NFSFLAYOUT);
5151}
5152
5153/*
5154 * Free up a file layout devinfo structure.
5155 */
5156APPLESTATIC void
5157nfscl_freedevinfo(struct nfscldevinfo *dip)
5158{
5159
5160	free(dip, M_NFSDEVINFO);
5161}
5162
5163/*
5164 * Mark any layouts that match as recalled.
5165 */
5166static int
5167nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5168    uint64_t off, uint64_t len, uint32_t stateseqid,
5169    struct nfsclrecalllayout *recallp)
5170{
5171	struct nfsclrecalllayout *rp, *orp;
5172
5173	recallp->nfsrecly_recalltype = recalltype;
5174	recallp->nfsrecly_iomode = iomode;
5175	recallp->nfsrecly_stateseqid = stateseqid;
5176	recallp->nfsrecly_off = off;
5177	recallp->nfsrecly_len = len;
5178	/*
5179	 * Order the list as file returns first, followed by fsid and any
5180	 * returns, both in increasing stateseqid order.
5181	 * Note that the seqids wrap around, so 1 is after 0xffffffff.
5182	 * (I'm not sure this is correct because I find RFC5661 confusing
5183	 *  on this, but hopefully it will work ok.)
5184	 */
5185	orp = NULL;
5186	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5187		orp = rp;
5188		if ((recalltype == NFSLAYOUTRETURN_FILE &&
5189		     (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5190		      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5191		    (recalltype != NFSLAYOUTRETURN_FILE &&
5192		     rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5193		     nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5194			LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5195			break;
5196		}
5197	}
5198	if (rp == NULL) {
5199		if (orp == NULL)
5200			LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5201			    nfsrecly_list);
5202		else
5203			LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5204	}
5205	lyp->nfsly_flags |= NFSLY_RECALL;
5206	return (0);
5207}
5208
5209/*
5210 * Compare the two seqids for ordering. The trick is that the seqids can
5211 * wrap around from 0xffffffff->0, so check for the cases where one
5212 * has wrapped around.
5213 * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5214 */
5215static int
5216nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5217{
5218
5219	if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5220		/* seqid2 has wrapped around. */
5221		return (0);
5222	if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5223		/* seqid1 has wrapped around. */
5224		return (1);
5225	if (seqid1 <= seqid2)
5226		return (1);
5227	return (0);
5228}
5229
5230/*
5231 * Do a layout return for each of the recalls.
5232 */
5233static void
5234nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5235    struct ucred *cred, NFSPROC_T *p)
5236{
5237	struct nfsclrecalllayout *rp;
5238	nfsv4stateid_t stateid;
5239
5240	NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5241	stateid.seqid = lyp->nfsly_stateid.seqid;
5242	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5243		(void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5244		    lyp->nfsly_fhlen, 0, NFSLAYOUT_NFSV4_1_FILES,
5245		    rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5246		    rp->nfsrecly_off, rp->nfsrecly_len,
5247		    &stateid, 0, NULL, cred, p, NULL);
5248	}
5249}
5250
5251/*
5252 * Do the layout commit for a file layout.
5253 */
5254static void
5255nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5256    struct ucred *cred, NFSPROC_T *p)
5257{
5258	struct nfsclflayout *flp;
5259	uint64_t len;
5260	int error;
5261
5262	LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5263		if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5264			len = flp->nfsfl_end - flp->nfsfl_off;
5265			error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5266			    lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5267			    lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5268			    NFSLAYOUT_NFSV4_1_FILES, 0, NULL, cred, p, NULL);
5269			NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5270			if (error == NFSERR_NOTSUPP) {
5271				/* If not supported, don't bother doing it. */
5272				NFSLOCKMNT(nmp);
5273				nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5274				NFSUNLOCKMNT(nmp);
5275				break;
5276			}
5277		}
5278	}
5279}
5280
5281/*
5282 * Commit all layouts for a file (vnode).
5283 */
5284int
5285nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5286{
5287	struct nfsclclient *clp;
5288	struct nfscllayout *lyp;
5289	struct nfsnode *np = VTONFS(vp);
5290	mount_t mp;
5291	struct nfsmount *nmp;
5292
5293	mp = vnode_mount(vp);
5294	nmp = VFSTONFS(mp);
5295	if (NFSHASNOLAYOUTCOMMIT(nmp))
5296		return (0);
5297	NFSLOCKCLSTATE();
5298	clp = nmp->nm_clp;
5299	if (clp == NULL) {
5300		NFSUNLOCKCLSTATE();
5301		return (EPERM);
5302	}
5303	lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5304	if (lyp == NULL) {
5305		NFSUNLOCKCLSTATE();
5306		return (EPERM);
5307	}
5308	nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5309	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
5310		NFSUNLOCKCLSTATE();
5311		return (EPERM);
5312	}
5313tryagain:
5314	if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5315		lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5316		NFSUNLOCKCLSTATE();
5317		NFSCL_DEBUG(4, "do layoutcommit2\n");
5318		nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5319		NFSLOCKCLSTATE();
5320		goto tryagain;
5321	}
5322	nfsv4_relref(&lyp->nfsly_lock);
5323	NFSUNLOCKCLSTATE();
5324	return (0);
5325}
5326
5327