nfs_clrpcops.c revision 291867
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clrpcops.c 291867 2015-12-05 21:28:54Z rmacklem $");
36
37/*
38 * Rpc op calls, generally called from the vnode op calls or through the
39 * buffer cache, for NFS v2, 3 and 4.
40 * These do not normally make any changes to vnode arguments or use
41 * structures that might change between the VFS variants. The returned
42 * arguments are all at the end, after the NFSPROC_T *p one.
43 */
44
45#ifndef APPLEKEXT
46#include "opt_inet6.h"
47
48#include <fs/nfs/nfsport.h>
49#include <sys/sysctl.h>
50
51SYSCTL_DECL(_vfs_nfs);
52
53static int	nfsignore_eexist = 0;
54SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
55    &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
56
57/*
58 * Global variables
59 */
60extern int nfs_numnfscbd;
61extern struct timeval nfsboottime;
62extern u_int32_t newnfs_false, newnfs_true;
63extern nfstype nfsv34_type[9];
64extern int nfsrv_useacl;
65extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
66extern int nfscl_debuglevel;
67NFSCLSTATEMUTEX;
68int nfstest_outofseq = 0;
69int nfscl_assumeposixlocks = 1;
70int nfscl_enablecallb = 0;
71short nfsv4_cbport = NFSV4_CBPORT;
72int nfstest_openallsetattr = 0;
73#endif	/* !APPLEKEXT */
74
75#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
76
77/*
78 * nfscl_getsameserver() can return one of three values:
79 * NFSDSP_USETHISSESSION - Use this session for the DS.
80 * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
81 *     session.
82 * NFSDSP_NOTFOUND - No matching server was found.
83 */
84enum nfsclds_state {
85	NFSDSP_USETHISSESSION = 0,
86	NFSDSP_SEQTHISSESSION = 1,
87	NFSDSP_NOTFOUND = 2,
88};
89
90static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
91    struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
92static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
93    nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
94static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
95    struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
96    void *);
97static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
98    nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
99    struct nfsvattr *, struct nfsfh **, int *, int *, void *);
100static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
101    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
102    NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
103    int *, void *, int *);
104static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
105    struct nfscllockowner *, u_int64_t, u_int64_t,
106    u_int32_t, struct ucred *, NFSPROC_T *, int);
107static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
108    struct acl *, nfsv4stateid_t *, void *);
109static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
110    uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
111    struct ucred *, NFSPROC_T *);
112static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
113    struct nfsclds **, NFSPROC_T *);
114static void nfscl_initsessionslots(struct nfsclsession *);
115static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
116    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
117    struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *);
118static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
119    struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
120    NFSPROC_T *);
121static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
122    nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
123    struct nfsfh *, int, struct ucred *, NFSPROC_T *);
124static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
125    struct nfsclds *, struct nfsclds **);
126#ifdef notyet
127static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
128    struct nfsfh *, struct ucred *, NFSPROC_T *, void *);
129#endif
130
131/*
132 * nfs null call from vfs.
133 */
134APPLESTATIC int
135nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
136{
137	int error;
138	struct nfsrv_descript nfsd, *nd = &nfsd;
139
140	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
141	error = nfscl_request(nd, vp, p, cred, NULL);
142	if (nd->nd_repstat && !error)
143		error = nd->nd_repstat;
144	mbuf_freem(nd->nd_mrep);
145	return (error);
146}
147
148/*
149 * nfs access rpc op.
150 * For nfs version 3 and 4, use the access rpc to check accessibility. If file
151 * modes are changed on the server, accesses might still fail later.
152 */
153APPLESTATIC int
154nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
155    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
156{
157	int error;
158	u_int32_t mode, rmode;
159
160	if (acmode & VREAD)
161		mode = NFSACCESS_READ;
162	else
163		mode = 0;
164	if (vnode_vtype(vp) == VDIR) {
165		if (acmode & VWRITE)
166			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
167				 NFSACCESS_DELETE);
168		if (acmode & VEXEC)
169			mode |= NFSACCESS_LOOKUP;
170	} else {
171		if (acmode & VWRITE)
172			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
173		if (acmode & VEXEC)
174			mode |= NFSACCESS_EXECUTE;
175	}
176
177	/*
178	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
179	 */
180	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
181	    NULL);
182
183	/*
184	 * The NFS V3 spec does not clarify whether or not
185	 * the returned access bits can be a superset of
186	 * the ones requested, so...
187	 */
188	if (!error && (rmode & mode) != mode)
189		error = EACCES;
190	return (error);
191}
192
193/*
194 * The actual rpc, separated out for Darwin.
195 */
196APPLESTATIC int
197nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
198    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
199    void *stuff)
200{
201	u_int32_t *tl;
202	u_int32_t supported, rmode;
203	int error;
204	struct nfsrv_descript nfsd, *nd = &nfsd;
205	nfsattrbit_t attrbits;
206
207	*attrflagp = 0;
208	supported = mode;
209	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
210	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
211	*tl = txdr_unsigned(mode);
212	if (nd->nd_flag & ND_NFSV4) {
213		/*
214		 * And do a Getattr op.
215		 */
216		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
217		*tl = txdr_unsigned(NFSV4OP_GETATTR);
218		NFSGETATTR_ATTRBIT(&attrbits);
219		(void) nfsrv_putattrbit(nd, &attrbits);
220	}
221	error = nfscl_request(nd, vp, p, cred, stuff);
222	if (error)
223		return (error);
224	if (nd->nd_flag & ND_NFSV3) {
225		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
226		if (error)
227			goto nfsmout;
228	}
229	if (!nd->nd_repstat) {
230		if (nd->nd_flag & ND_NFSV4) {
231			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
232			supported = fxdr_unsigned(u_int32_t, *tl++);
233		} else {
234			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
235		}
236		rmode = fxdr_unsigned(u_int32_t, *tl);
237		if (nd->nd_flag & ND_NFSV4)
238			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
239
240		/*
241		 * It's not obvious what should be done about
242		 * unsupported access modes. For now, be paranoid
243		 * and clear the unsupported ones.
244		 */
245		rmode &= supported;
246		*rmodep = rmode;
247	} else
248		error = nd->nd_repstat;
249nfsmout:
250	mbuf_freem(nd->nd_mrep);
251	return (error);
252}
253
254/*
255 * nfs open rpc
256 */
257APPLESTATIC int
258nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
259{
260	struct nfsclopen *op;
261	struct nfscldeleg *dp;
262	struct nfsfh *nfhp;
263	struct nfsnode *np = VTONFS(vp);
264	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
265	u_int32_t mode, clidrev;
266	int ret, newone, error, expireret = 0, retrycnt;
267
268	/*
269	 * For NFSv4, Open Ops are only done on Regular Files.
270	 */
271	if (vnode_vtype(vp) != VREG)
272		return (0);
273	mode = 0;
274	if (amode & FREAD)
275		mode |= NFSV4OPEN_ACCESSREAD;
276	if (amode & FWRITE)
277		mode |= NFSV4OPEN_ACCESSWRITE;
278	nfhp = np->n_fhp;
279
280	retrycnt = 0;
281#ifdef notdef
282{ char name[100]; int namel;
283namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
284bcopy(NFS4NODENAME(np->n_v4), name, namel);
285name[namel] = '\0';
286printf("rpcopen p=0x%x name=%s",p->p_pid,name);
287if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
288else printf(" fhl=0\n");
289}
290#endif
291	do {
292	    dp = NULL;
293	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
294		cred, p, NULL, &op, &newone, &ret, 1);
295	    if (error) {
296		return (error);
297	    }
298	    if (nmp->nm_clp != NULL)
299		clidrev = nmp->nm_clp->nfsc_clientidrev;
300	    else
301		clidrev = 0;
302	    if (ret == NFSCLOPEN_DOOPEN) {
303		if (np->n_v4 != NULL) {
304			error = nfsrpc_openrpc(nmp, vp, np->n_v4->n4_data,
305			   np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
306			   np->n_fhp->nfh_len, mode, op,
307			   NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &dp,
308			   0, 0x0, cred, p, 0, 0);
309			if (dp != NULL) {
310#ifdef APPLE
311				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
312#else
313				NFSLOCKNODE(np);
314				np->n_flag &= ~NDELEGMOD;
315				/*
316				 * Invalidate the attribute cache, so that
317				 * attributes that pre-date the issue of a
318				 * delegation are not cached, since the
319				 * cached attributes will remain valid while
320				 * the delegation is held.
321				 */
322				NFSINVALATTRCACHE(np);
323				NFSUNLOCKNODE(np);
324#endif
325				(void) nfscl_deleg(nmp->nm_mountp,
326				    op->nfso_own->nfsow_clp,
327				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
328			}
329		} else {
330			error = EIO;
331		}
332		newnfs_copyincred(cred, &op->nfso_cred);
333	    } else if (ret == NFSCLOPEN_SETCRED)
334		/*
335		 * This is a new local open on a delegation. It needs
336		 * to have credentials so that an open can be done
337		 * against the server during recovery.
338		 */
339		newnfs_copyincred(cred, &op->nfso_cred);
340
341	    /*
342	     * nfso_opencnt is the count of how many VOP_OPEN()s have
343	     * been done on this Open successfully and a VOP_CLOSE()
344	     * is expected for each of these.
345	     * If error is non-zero, don't increment it, since the Open
346	     * hasn't succeeded yet.
347	     */
348	    if (!error)
349		op->nfso_opencnt++;
350	    nfscl_openrelease(op, error, newone);
351	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
352		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
353		error == NFSERR_BADSESSION) {
354		(void) nfs_catnap(PZERO, error, "nfs_open");
355	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
356		&& clidrev != 0) {
357		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
358		retrycnt++;
359	    }
360	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
361	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
362	    error == NFSERR_BADSESSION ||
363	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
364	     expireret == 0 && clidrev != 0 && retrycnt < 4));
365	if (error && retrycnt >= 4)
366		error = EIO;
367	return (error);
368}
369
370/*
371 * the actual open rpc
372 */
373APPLESTATIC int
374nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
375    u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
376    u_int8_t *name, int namelen, struct nfscldeleg **dpp,
377    int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
378    int syscred, int recursed)
379{
380	u_int32_t *tl;
381	struct nfsrv_descript nfsd, *nd = &nfsd;
382	struct nfscldeleg *dp, *ndp = NULL;
383	struct nfsvattr nfsva;
384	u_int32_t rflags, deleg;
385	nfsattrbit_t attrbits;
386	int error, ret, acesize, limitby;
387
388	dp = *dpp;
389	*dpp = NULL;
390	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
391	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
392	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
393	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
394	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
395	*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
396	*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
397	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
398	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
399	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
400	if (reclaim) {
401		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
402		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
403		*tl = txdr_unsigned(delegtype);
404	} else {
405		if (dp != NULL) {
406			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
407			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
408			if (NFSHASNFSV4N(nmp))
409				*tl++ = 0;
410			else
411				*tl++ = dp->nfsdl_stateid.seqid;
412			*tl++ = dp->nfsdl_stateid.other[0];
413			*tl++ = dp->nfsdl_stateid.other[1];
414			*tl = dp->nfsdl_stateid.other[2];
415		} else {
416			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
417		}
418		(void) nfsm_strtom(nd, name, namelen);
419	}
420	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
421	*tl = txdr_unsigned(NFSV4OP_GETATTR);
422	NFSZERO_ATTRBIT(&attrbits);
423	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
424	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
425	(void) nfsrv_putattrbit(nd, &attrbits);
426	if (syscred)
427		nd->nd_flag |= ND_USEGSSNAME;
428	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
429	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
430	if (error)
431		return (error);
432	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
433	if (!nd->nd_repstat) {
434		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
435		    6 * NFSX_UNSIGNED);
436		op->nfso_stateid.seqid = *tl++;
437		op->nfso_stateid.other[0] = *tl++;
438		op->nfso_stateid.other[1] = *tl++;
439		op->nfso_stateid.other[2] = *tl;
440		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
441		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
442		if (error)
443			goto nfsmout;
444		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
445		deleg = fxdr_unsigned(u_int32_t, *tl);
446		if (deleg == NFSV4OPEN_DELEGATEREAD ||
447		    deleg == NFSV4OPEN_DELEGATEWRITE) {
448			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
449			      NFSCLFLAGS_FIRSTDELEG))
450				op->nfso_own->nfsow_clp->nfsc_flags |=
451				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
452			MALLOC(ndp, struct nfscldeleg *,
453			    sizeof (struct nfscldeleg) + newfhlen,
454			    M_NFSCLDELEG, M_WAITOK);
455			LIST_INIT(&ndp->nfsdl_owner);
456			LIST_INIT(&ndp->nfsdl_lock);
457			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
458			ndp->nfsdl_fhlen = newfhlen;
459			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
460			newnfs_copyincred(cred, &ndp->nfsdl_cred);
461			nfscl_lockinit(&ndp->nfsdl_rwlock);
462			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
463			    NFSX_UNSIGNED);
464			ndp->nfsdl_stateid.seqid = *tl++;
465			ndp->nfsdl_stateid.other[0] = *tl++;
466			ndp->nfsdl_stateid.other[1] = *tl++;
467			ndp->nfsdl_stateid.other[2] = *tl++;
468			ret = fxdr_unsigned(int, *tl);
469			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
470				ndp->nfsdl_flags = NFSCLDL_WRITE;
471				/*
472				 * Indicates how much the file can grow.
473				 */
474				NFSM_DISSECT(tl, u_int32_t *,
475				    3 * NFSX_UNSIGNED);
476				limitby = fxdr_unsigned(int, *tl++);
477				switch (limitby) {
478				case NFSV4OPEN_LIMITSIZE:
479					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
480					break;
481				case NFSV4OPEN_LIMITBLOCKS:
482					ndp->nfsdl_sizelimit =
483					    fxdr_unsigned(u_int64_t, *tl++);
484					ndp->nfsdl_sizelimit *=
485					    fxdr_unsigned(u_int64_t, *tl);
486					break;
487				default:
488					error = NFSERR_BADXDR;
489					goto nfsmout;
490				};
491			} else {
492				ndp->nfsdl_flags = NFSCLDL_READ;
493			}
494			if (ret)
495				ndp->nfsdl_flags |= NFSCLDL_RECALL;
496			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
497			    &acesize, p);
498			if (error)
499				goto nfsmout;
500		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
501			error = NFSERR_BADXDR;
502			goto nfsmout;
503		}
504		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
505		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
506		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
507		    NULL, NULL, NULL, p, cred);
508		if (error)
509			goto nfsmout;
510		if (ndp != NULL) {
511			ndp->nfsdl_change = nfsva.na_filerev;
512			ndp->nfsdl_modtime = nfsva.na_mtime;
513			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
514		}
515		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
516		    do {
517			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
518			    cred, p);
519			if (ret == NFSERR_DELAY)
520			    (void) nfs_catnap(PZERO, ret, "nfs_open");
521		    } while (ret == NFSERR_DELAY);
522		    error = ret;
523		}
524		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
525		    nfscl_assumeposixlocks)
526		    op->nfso_posixlock = 1;
527		else
528		    op->nfso_posixlock = 0;
529
530		/*
531		 * If the server is handing out delegations, but we didn't
532		 * get one because an OpenConfirm was required, try the
533		 * Open again, to get a delegation. This is a harmless no-op,
534		 * from a server's point of view.
535		 */
536		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
537		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
538		    && !error && dp == NULL && ndp == NULL && !recursed) {
539		    do {
540			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
541			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
542			    cred, p, syscred, 1);
543			if (ret == NFSERR_DELAY)
544			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
545		    } while (ret == NFSERR_DELAY);
546		    if (ret) {
547			if (ndp != NULL)
548				FREE((caddr_t)ndp, M_NFSCLDELEG);
549			if (ret == NFSERR_STALECLIENTID ||
550			    ret == NFSERR_STALEDONTRECOVER ||
551			    ret == NFSERR_BADSESSION)
552				error = ret;
553		    }
554		}
555	}
556	if (nd->nd_repstat != 0 && error == 0)
557		error = nd->nd_repstat;
558	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
559		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
560nfsmout:
561	if (!error)
562		*dpp = ndp;
563	else if (ndp != NULL)
564		FREE((caddr_t)ndp, M_NFSCLDELEG);
565	mbuf_freem(nd->nd_mrep);
566	return (error);
567}
568
569/*
570 * open downgrade rpc
571 */
572APPLESTATIC int
573nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
574    struct ucred *cred, NFSPROC_T *p)
575{
576	u_int32_t *tl;
577	struct nfsrv_descript nfsd, *nd = &nfsd;
578	int error;
579
580	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
581	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
582	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
583		*tl++ = 0;
584	else
585		*tl++ = op->nfso_stateid.seqid;
586	*tl++ = op->nfso_stateid.other[0];
587	*tl++ = op->nfso_stateid.other[1];
588	*tl++ = op->nfso_stateid.other[2];
589	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
590	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
591	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
592	error = nfscl_request(nd, vp, p, cred, NULL);
593	if (error)
594		return (error);
595	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
596	if (!nd->nd_repstat) {
597		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
598		op->nfso_stateid.seqid = *tl++;
599		op->nfso_stateid.other[0] = *tl++;
600		op->nfso_stateid.other[1] = *tl++;
601		op->nfso_stateid.other[2] = *tl;
602	}
603	if (nd->nd_repstat && error == 0)
604		error = nd->nd_repstat;
605	if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
606		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
607nfsmout:
608	mbuf_freem(nd->nd_mrep);
609	return (error);
610}
611
612/*
613 * V4 Close operation.
614 */
615APPLESTATIC int
616nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
617{
618	struct nfsclclient *clp;
619	int error;
620
621	if (vnode_vtype(vp) != VREG)
622		return (0);
623	if (doclose)
624		error = nfscl_doclose(vp, &clp, p);
625	else
626		error = nfscl_getclose(vp, &clp);
627	if (error)
628		return (error);
629
630	nfscl_clientrelease(clp);
631	return (0);
632}
633
634/*
635 * Close the open.
636 */
637APPLESTATIC void
638nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
639{
640	struct nfsrv_descript nfsd, *nd = &nfsd;
641	struct nfscllockowner *lp, *nlp;
642	struct nfscllock *lop, *nlop;
643	struct ucred *tcred;
644	u_int64_t off = 0, len = 0;
645	u_int32_t type = NFSV4LOCKT_READ;
646	int error, do_unlock, trycnt;
647
648	tcred = newnfs_getcred();
649	newnfs_copycred(&op->nfso_cred, tcred);
650	/*
651	 * (Theoretically this could be done in the same
652	 *  compound as the close, but having multiple
653	 *  sequenced Ops in the same compound might be
654	 *  too scary for some servers.)
655	 */
656	if (op->nfso_posixlock) {
657		off = 0;
658		len = NFS64BITSSET;
659		type = NFSV4LOCKT_READ;
660	}
661
662	/*
663	 * Since this function is only called from VOP_INACTIVE(), no
664	 * other thread will be manipulating this Open. As such, the
665	 * lock lists are not being changed by other threads, so it should
666	 * be safe to do this without locking.
667	 */
668	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
669		do_unlock = 1;
670		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
671			if (op->nfso_posixlock == 0) {
672				off = lop->nfslo_first;
673				len = lop->nfslo_end - lop->nfslo_first;
674				if (lop->nfslo_type == F_WRLCK)
675					type = NFSV4LOCKT_WRITE;
676				else
677					type = NFSV4LOCKT_READ;
678			}
679			if (do_unlock) {
680				trycnt = 0;
681				do {
682					error = nfsrpc_locku(nd, nmp, lp, off,
683					    len, type, tcred, p, 0);
684					if ((nd->nd_repstat == NFSERR_GRACE ||
685					    nd->nd_repstat == NFSERR_DELAY) &&
686					    error == 0)
687						(void) nfs_catnap(PZERO,
688						    (int)nd->nd_repstat,
689						    "nfs_close");
690				} while ((nd->nd_repstat == NFSERR_GRACE ||
691				    nd->nd_repstat == NFSERR_DELAY) &&
692				    error == 0 && trycnt++ < 5);
693				if (op->nfso_posixlock)
694					do_unlock = 0;
695			}
696			nfscl_freelock(lop, 0);
697		}
698		/*
699		 * Do a ReleaseLockOwner.
700		 * The lock owner name nfsl_owner may be used by other opens for
701		 * other files but the lock_owner4 name that nfsrpc_rellockown()
702		 * puts on the wire has the file handle for this file appended
703		 * to it, so it can be done now.
704		 */
705		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
706		    lp->nfsl_open->nfso_fhlen, tcred, p);
707	}
708
709	/*
710	 * There could be other Opens for different files on the same
711	 * OpenOwner, so locking is required.
712	 */
713	NFSLOCKCLSTATE();
714	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
715	NFSUNLOCKCLSTATE();
716	do {
717		error = nfscl_tryclose(op, tcred, nmp, p);
718		if (error == NFSERR_GRACE)
719			(void) nfs_catnap(PZERO, error, "nfs_close");
720	} while (error == NFSERR_GRACE);
721	NFSLOCKCLSTATE();
722	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
723
724	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
725		nfscl_freelockowner(lp, 0);
726	nfscl_freeopen(op, 0);
727	NFSUNLOCKCLSTATE();
728	NFSFREECRED(tcred);
729}
730
731/*
732 * The actual Close RPC.
733 */
734APPLESTATIC int
735nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
736    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
737    int syscred)
738{
739	u_int32_t *tl;
740	int error;
741
742	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
743	    op->nfso_fhlen, NULL, NULL);
744	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
745	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
746	if (NFSHASNFSV4N(nmp))
747		*tl++ = 0;
748	else
749		*tl++ = op->nfso_stateid.seqid;
750	*tl++ = op->nfso_stateid.other[0];
751	*tl++ = op->nfso_stateid.other[1];
752	*tl = op->nfso_stateid.other[2];
753	if (syscred)
754		nd->nd_flag |= ND_USEGSSNAME;
755	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
756	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
757	if (error)
758		return (error);
759	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
760	if (nd->nd_repstat == 0)
761		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
762	error = nd->nd_repstat;
763	if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
764		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
765nfsmout:
766	mbuf_freem(nd->nd_mrep);
767	return (error);
768}
769
770/*
771 * V4 Open Confirm RPC.
772 */
773APPLESTATIC int
774nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
775    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
776{
777	u_int32_t *tl;
778	struct nfsrv_descript nfsd, *nd = &nfsd;
779	struct nfsmount *nmp;
780	int error;
781
782	nmp = VFSTONFS(vnode_mount(vp));
783	if (NFSHASNFSV4N(nmp))
784		return (0);		/* No confirmation for NFSv4.1. */
785	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
786	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
787	*tl++ = op->nfso_stateid.seqid;
788	*tl++ = op->nfso_stateid.other[0];
789	*tl++ = op->nfso_stateid.other[1];
790	*tl++ = op->nfso_stateid.other[2];
791	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
792	error = nfscl_request(nd, vp, p, cred, NULL);
793	if (error)
794		return (error);
795	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
796	if (!nd->nd_repstat) {
797		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
798		op->nfso_stateid.seqid = *tl++;
799		op->nfso_stateid.other[0] = *tl++;
800		op->nfso_stateid.other[1] = *tl++;
801		op->nfso_stateid.other[2] = *tl;
802	}
803	error = nd->nd_repstat;
804	if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
805		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
806nfsmout:
807	mbuf_freem(nd->nd_mrep);
808	return (error);
809}
810
811/*
812 * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
813 * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
814 */
815APPLESTATIC int
816nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
817    struct ucred *cred, NFSPROC_T *p)
818{
819	u_int32_t *tl;
820	struct nfsrv_descript nfsd;
821	struct nfsrv_descript *nd = &nfsd;
822	nfsattrbit_t attrbits;
823	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
824	u_short port;
825	int error, isinet6 = 0, callblen;
826	nfsquad_t confirm;
827	u_int32_t lease;
828	static u_int32_t rev = 0;
829	struct nfsclds *dsp, *ndsp, *tdsp;
830
831	if (nfsboottime.tv_sec == 0)
832		NFSSETBOOTTIME(nfsboottime);
833	clp->nfsc_rev = rev++;
834	if (NFSHASNFSV4N(nmp)) {
835		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
836		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
837		NFSCL_DEBUG(1, "aft exch=%d\n", error);
838		if (error == 0) {
839			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
840			    &nmp->nm_sockreq,
841			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
842			if (error == 0) {
843				NFSLOCKMNT(nmp);
844				TAILQ_FOREACH_SAFE(tdsp, &nmp->nm_sess,
845				    nfsclds_list, ndsp)
846					nfscl_freenfsclds(tdsp);
847				TAILQ_INIT(&nmp->nm_sess);
848				TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
849				    nfsclds_list);
850				NFSUNLOCKMNT(nmp);
851			} else
852				nfscl_freenfsclds(dsp);
853			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
854		}
855		if (error == 0 && reclaim == 0) {
856			error = nfsrpc_reclaimcomplete(nmp, cred, p);
857			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
858			if (error == NFSERR_COMPLETEALREADY ||
859			    error == NFSERR_NOTSUPP)
860				/* Ignore this error. */
861				error = 0;
862		}
863		return (error);
864	}
865
866	/*
867	 * Allocate a single session structure for NFSv4.0, because some of
868	 * the fields are used by NFSv4.0 although it doesn't do a session.
869	 */
870	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
871	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
872	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
873	NFSLOCKMNT(nmp);
874	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
875	NFSUNLOCKMNT(nmp);
876
877	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
878	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
879	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
880	*tl = txdr_unsigned(clp->nfsc_rev);
881	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
882
883	/*
884	 * set up the callback address
885	 */
886	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
887	*tl = txdr_unsigned(NFS_CALLBCKPROG);
888	callblen = strlen(nfsv4_callbackaddr);
889	if (callblen == 0)
890		cp = nfscl_getmyip(nmp, &isinet6);
891	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
892	    (callblen > 0 || cp != NULL)) {
893		port = htons(nfsv4_cbport);
894		cp2 = (u_int8_t *)&port;
895#ifdef INET6
896		if ((callblen > 0 &&
897		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
898			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
899
900			(void) nfsm_strtom(nd, "tcp6", 4);
901			if (callblen == 0) {
902				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
903				ip6add = ip6buf;
904			} else {
905				ip6add = nfsv4_callbackaddr;
906			}
907			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
908			    ip6add, cp2[0], cp2[1]);
909		} else
910#endif
911		{
912			(void) nfsm_strtom(nd, "tcp", 3);
913			if (callblen == 0)
914				snprintf(addr, INET6_ADDRSTRLEN + 9,
915				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
916				    cp[2], cp[3], cp2[0], cp2[1]);
917			else
918				snprintf(addr, INET6_ADDRSTRLEN + 9,
919				    "%s.%d.%d", nfsv4_callbackaddr,
920				    cp2[0], cp2[1]);
921		}
922		(void) nfsm_strtom(nd, addr, strlen(addr));
923	} else {
924		(void) nfsm_strtom(nd, "tcp", 3);
925		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
926	}
927	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
928	*tl = txdr_unsigned(clp->nfsc_cbident);
929	nd->nd_flag |= ND_USEGSSNAME;
930	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
931		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
932	if (error)
933		return (error);
934	if (nd->nd_repstat == 0) {
935	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
936	    NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0] = *tl++;
937	    NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1] = *tl++;
938	    confirm.lval[0] = *tl++;
939	    confirm.lval[1] = *tl;
940	    mbuf_freem(nd->nd_mrep);
941	    nd->nd_mrep = NULL;
942
943	    /*
944	     * and confirm it.
945	     */
946	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
947		NULL);
948	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
949	    *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
950	    *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
951	    *tl++ = confirm.lval[0];
952	    *tl = confirm.lval[1];
953	    nd->nd_flag |= ND_USEGSSNAME;
954	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
955		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
956	    if (error)
957		return (error);
958	    mbuf_freem(nd->nd_mrep);
959	    nd->nd_mrep = NULL;
960	    if (nd->nd_repstat == 0) {
961		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
962		    nmp->nm_fhsize, NULL, NULL);
963		NFSZERO_ATTRBIT(&attrbits);
964		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
965		(void) nfsrv_putattrbit(nd, &attrbits);
966		nd->nd_flag |= ND_USEGSSNAME;
967		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
968		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
969		if (error)
970		    return (error);
971		if (nd->nd_repstat == 0) {
972		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
973			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
974		    if (error)
975			goto nfsmout;
976		    clp->nfsc_renew = NFSCL_RENEW(lease);
977		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
978		    clp->nfsc_clientidrev++;
979		    if (clp->nfsc_clientidrev == 0)
980			clp->nfsc_clientidrev++;
981		}
982	    }
983	}
984	error = nd->nd_repstat;
985nfsmout:
986	mbuf_freem(nd->nd_mrep);
987	return (error);
988}
989
990/*
991 * nfs getattr call.
992 */
993APPLESTATIC int
994nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
995    struct nfsvattr *nap, void *stuff)
996{
997	struct nfsrv_descript nfsd, *nd = &nfsd;
998	int error;
999	nfsattrbit_t attrbits;
1000
1001	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1002	if (nd->nd_flag & ND_NFSV4) {
1003		NFSGETATTR_ATTRBIT(&attrbits);
1004		(void) nfsrv_putattrbit(nd, &attrbits);
1005	}
1006	error = nfscl_request(nd, vp, p, cred, stuff);
1007	if (error)
1008		return (error);
1009	if (!nd->nd_repstat)
1010		error = nfsm_loadattr(nd, nap);
1011	else
1012		error = nd->nd_repstat;
1013	mbuf_freem(nd->nd_mrep);
1014	return (error);
1015}
1016
1017/*
1018 * nfs getattr call with non-vnode arguemnts.
1019 */
1020APPLESTATIC int
1021nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1022    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1023    uint32_t *leasep)
1024{
1025	struct nfsrv_descript nfsd, *nd = &nfsd;
1026	int error, vers = NFS_VER2;
1027	nfsattrbit_t attrbits;
1028
1029	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
1030	if (nd->nd_flag & ND_NFSV4) {
1031		vers = NFS_VER4;
1032		NFSGETATTR_ATTRBIT(&attrbits);
1033		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1034		(void) nfsrv_putattrbit(nd, &attrbits);
1035	} else if (nd->nd_flag & ND_NFSV3) {
1036		vers = NFS_VER3;
1037	}
1038	if (syscred)
1039		nd->nd_flag |= ND_USEGSSNAME;
1040	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1041	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1042	if (error)
1043		return (error);
1044	if (nd->nd_repstat == 0) {
1045		if ((nd->nd_flag & ND_NFSV4) != 0)
1046			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1047			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1048			    NULL, NULL);
1049		else
1050			error = nfsm_loadattr(nd, nap);
1051	} else
1052		error = nd->nd_repstat;
1053	mbuf_freem(nd->nd_mrep);
1054	return (error);
1055}
1056
1057/*
1058 * Do an nfs setattr operation.
1059 */
1060APPLESTATIC int
1061nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1062    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1063    void *stuff)
1064{
1065	int error, expireret = 0, openerr, retrycnt;
1066	u_int32_t clidrev = 0, mode;
1067	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1068	struct nfsfh *nfhp;
1069	nfsv4stateid_t stateid;
1070	void *lckp;
1071
1072	if (nmp->nm_clp != NULL)
1073		clidrev = nmp->nm_clp->nfsc_clientidrev;
1074	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1075		mode = NFSV4OPEN_ACCESSWRITE;
1076	else
1077		mode = NFSV4OPEN_ACCESSREAD;
1078	retrycnt = 0;
1079	do {
1080		lckp = NULL;
1081		openerr = 1;
1082		if (NFSHASNFSV4(nmp)) {
1083			nfhp = VTONFS(vp)->n_fhp;
1084			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1085			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1086			if (error && vnode_vtype(vp) == VREG &&
1087			    (mode == NFSV4OPEN_ACCESSWRITE ||
1088			     nfstest_openallsetattr)) {
1089				/*
1090				 * No Open stateid, so try and open the file
1091				 * now.
1092				 */
1093				if (mode == NFSV4OPEN_ACCESSWRITE)
1094					openerr = nfsrpc_open(vp, FWRITE, cred,
1095					    p);
1096				else
1097					openerr = nfsrpc_open(vp, FREAD, cred,
1098					    p);
1099				if (!openerr)
1100					(void) nfscl_getstateid(vp,
1101					    nfhp->nfh_fh, nfhp->nfh_len,
1102					    mode, 0, cred, p, &stateid, &lckp);
1103			}
1104		}
1105		if (vap != NULL)
1106			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1107			    rnap, attrflagp, stuff);
1108		else
1109			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1110			    stuff);
1111		if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
1112			nfscl_initiate_recovery(nmp->nm_clp);
1113		if (lckp != NULL)
1114			nfscl_lockderef(lckp);
1115		if (!openerr)
1116			(void) nfsrpc_close(vp, 0, p);
1117		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1118		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1119		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1120			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1121		} else if ((error == NFSERR_EXPIRED ||
1122		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1123			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1124		}
1125		retrycnt++;
1126	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1127	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1128	    error == NFSERR_BADSESSION ||
1129	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1130	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1131	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1132	if (error && retrycnt >= 4)
1133		error = EIO;
1134	return (error);
1135}
1136
1137static int
1138nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1139    nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1140    struct nfsvattr *rnap, int *attrflagp, void *stuff)
1141{
1142	u_int32_t *tl;
1143	struct nfsrv_descript nfsd, *nd = &nfsd;
1144	int error;
1145	nfsattrbit_t attrbits;
1146
1147	*attrflagp = 0;
1148	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1149	if (nd->nd_flag & ND_NFSV4)
1150		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1151	vap->va_type = vnode_vtype(vp);
1152	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1153	if (nd->nd_flag & ND_NFSV3) {
1154		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1155		*tl = newnfs_false;
1156	} else if (nd->nd_flag & ND_NFSV4) {
1157		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1158		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1159		NFSGETATTR_ATTRBIT(&attrbits);
1160		(void) nfsrv_putattrbit(nd, &attrbits);
1161	}
1162	error = nfscl_request(nd, vp, p, cred, stuff);
1163	if (error)
1164		return (error);
1165	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1166		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1167	if ((nd->nd_flag & ND_NFSV4) && !error)
1168		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1169	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1170		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1171	mbuf_freem(nd->nd_mrep);
1172	if (nd->nd_repstat && !error)
1173		error = nd->nd_repstat;
1174	return (error);
1175}
1176
1177/*
1178 * nfs lookup rpc
1179 */
1180APPLESTATIC int
1181nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1182    NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1183    struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1184{
1185	u_int32_t *tl;
1186	struct nfsrv_descript nfsd, *nd = &nfsd;
1187	struct nfsmount *nmp;
1188	struct nfsnode *np;
1189	struct nfsfh *nfhp;
1190	nfsattrbit_t attrbits;
1191	int error = 0, lookupp = 0;
1192
1193	*attrflagp = 0;
1194	*dattrflagp = 0;
1195	if (vnode_vtype(dvp) != VDIR)
1196		return (ENOTDIR);
1197	nmp = VFSTONFS(vnode_mount(dvp));
1198	if (len > NFS_MAXNAMLEN)
1199		return (ENAMETOOLONG);
1200	if (NFSHASNFSV4(nmp) && len == 1 &&
1201		name[0] == '.') {
1202		/*
1203		 * Just return the current dir's fh.
1204		 */
1205		np = VTONFS(dvp);
1206		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1207			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1208		nfhp->nfh_len = np->n_fhp->nfh_len;
1209		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1210		*nfhpp = nfhp;
1211		return (0);
1212	}
1213	if (NFSHASNFSV4(nmp) && len == 2 &&
1214		name[0] == '.' && name[1] == '.') {
1215		lookupp = 1;
1216		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1217	} else {
1218		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1219		(void) nfsm_strtom(nd, name, len);
1220	}
1221	if (nd->nd_flag & ND_NFSV4) {
1222		NFSGETATTR_ATTRBIT(&attrbits);
1223		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1224		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1225		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1226		(void) nfsrv_putattrbit(nd, &attrbits);
1227	}
1228	error = nfscl_request(nd, dvp, p, cred, stuff);
1229	if (error)
1230		return (error);
1231	if (nd->nd_repstat) {
1232		/*
1233		 * When an NFSv4 Lookupp returns ENOENT, it means that
1234		 * the lookup is at the root of an fs, so return this dir.
1235		 */
1236		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1237		    np = VTONFS(dvp);
1238		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1239			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1240		    nfhp->nfh_len = np->n_fhp->nfh_len;
1241		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1242		    *nfhpp = nfhp;
1243		    mbuf_freem(nd->nd_mrep);
1244		    return (0);
1245		}
1246		if (nd->nd_flag & ND_NFSV3)
1247		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1248		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1249		    ND_NFSV4) {
1250			/* Load the directory attributes. */
1251			error = nfsm_loadattr(nd, dnap);
1252			if (error == 0)
1253				*dattrflagp = 1;
1254		}
1255		goto nfsmout;
1256	}
1257	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1258		/* Load the directory attributes. */
1259		error = nfsm_loadattr(nd, dnap);
1260		if (error != 0)
1261			goto nfsmout;
1262		*dattrflagp = 1;
1263		/* Skip over the Lookup and GetFH operation status values. */
1264		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1265	}
1266	error = nfsm_getfh(nd, nfhpp);
1267	if (error)
1268		goto nfsmout;
1269
1270	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1271	if ((nd->nd_flag & ND_NFSV3) && !error)
1272		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1273nfsmout:
1274	mbuf_freem(nd->nd_mrep);
1275	if (!error && nd->nd_repstat)
1276		error = nd->nd_repstat;
1277	return (error);
1278}
1279
1280/*
1281 * Do a readlink rpc.
1282 */
1283APPLESTATIC int
1284nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1285    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1286{
1287	u_int32_t *tl;
1288	struct nfsrv_descript nfsd, *nd = &nfsd;
1289	struct nfsnode *np = VTONFS(vp);
1290	nfsattrbit_t attrbits;
1291	int error, len, cangetattr = 1;
1292
1293	*attrflagp = 0;
1294	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1295	if (nd->nd_flag & ND_NFSV4) {
1296		/*
1297		 * And do a Getattr op.
1298		 */
1299		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1300		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1301		NFSGETATTR_ATTRBIT(&attrbits);
1302		(void) nfsrv_putattrbit(nd, &attrbits);
1303	}
1304	error = nfscl_request(nd, vp, p, cred, stuff);
1305	if (error)
1306		return (error);
1307	if (nd->nd_flag & ND_NFSV3)
1308		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1309	if (!nd->nd_repstat && !error) {
1310		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1311		/*
1312		 * This seems weird to me, but must have been added to
1313		 * FreeBSD for some reason. The only thing I can think of
1314		 * is that there was/is some server that replies with
1315		 * more link data than it should?
1316		 */
1317		if (len == NFS_MAXPATHLEN) {
1318			NFSLOCKNODE(np);
1319			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1320				len = np->n_size;
1321				cangetattr = 0;
1322			}
1323			NFSUNLOCKNODE(np);
1324		}
1325		error = nfsm_mbufuio(nd, uiop, len);
1326		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1327			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1328	}
1329	if (nd->nd_repstat && !error)
1330		error = nd->nd_repstat;
1331nfsmout:
1332	mbuf_freem(nd->nd_mrep);
1333	return (error);
1334}
1335
1336/*
1337 * Read operation.
1338 */
1339APPLESTATIC int
1340nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1341    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1342{
1343	int error, expireret = 0, retrycnt;
1344	u_int32_t clidrev = 0;
1345	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1346	struct nfsnode *np = VTONFS(vp);
1347	struct ucred *newcred;
1348	struct nfsfh *nfhp = NULL;
1349	nfsv4stateid_t stateid;
1350	void *lckp;
1351
1352	if (nmp->nm_clp != NULL)
1353		clidrev = nmp->nm_clp->nfsc_clientidrev;
1354	newcred = cred;
1355	if (NFSHASNFSV4(nmp)) {
1356		nfhp = np->n_fhp;
1357		newcred = NFSNEWCRED(cred);
1358	}
1359	retrycnt = 0;
1360	do {
1361		lckp = NULL;
1362		if (NFSHASNFSV4(nmp))
1363			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1364			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1365			    &lckp);
1366		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1367		    attrflagp, stuff);
1368		if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
1369			nfscl_initiate_recovery(nmp->nm_clp);
1370		if (lckp != NULL)
1371			nfscl_lockderef(lckp);
1372		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1373		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1374		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1375			(void) nfs_catnap(PZERO, error, "nfs_read");
1376		} else if ((error == NFSERR_EXPIRED ||
1377		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1378			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1379		}
1380		retrycnt++;
1381	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1382	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1383	    error == NFSERR_BADSESSION ||
1384	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1385	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1386	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1387	if (error && retrycnt >= 4)
1388		error = EIO;
1389	if (NFSHASNFSV4(nmp))
1390		NFSFREECRED(newcred);
1391	return (error);
1392}
1393
1394/*
1395 * The actual read RPC.
1396 */
1397static int
1398nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1399    nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1400    int *attrflagp, void *stuff)
1401{
1402	u_int32_t *tl;
1403	int error = 0, len, retlen, tsiz, eof = 0;
1404	struct nfsrv_descript nfsd;
1405	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1406	struct nfsrv_descript *nd = &nfsd;
1407	int rsize;
1408	off_t tmp_off;
1409
1410	*attrflagp = 0;
1411	tsiz = uio_uio_resid(uiop);
1412	tmp_off = uiop->uio_offset + tsiz;
1413	NFSLOCKMNT(nmp);
1414	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1415		NFSUNLOCKMNT(nmp);
1416		return (EFBIG);
1417	}
1418	rsize = nmp->nm_rsize;
1419	NFSUNLOCKMNT(nmp);
1420	nd->nd_mrep = NULL;
1421	while (tsiz > 0) {
1422		*attrflagp = 0;
1423		len = (tsiz > rsize) ? rsize : tsiz;
1424		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1425		if (nd->nd_flag & ND_NFSV4)
1426			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1427		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1428		if (nd->nd_flag & ND_NFSV2) {
1429			*tl++ = txdr_unsigned(uiop->uio_offset);
1430			*tl++ = txdr_unsigned(len);
1431			*tl = 0;
1432		} else {
1433			txdr_hyper(uiop->uio_offset, tl);
1434			*(tl + 2) = txdr_unsigned(len);
1435		}
1436		/*
1437		 * Since I can't do a Getattr for NFSv4 for Write, there
1438		 * doesn't seem any point in doing one here, either.
1439		 * (See the comment in nfsrpc_writerpc() for more info.)
1440		 */
1441		error = nfscl_request(nd, vp, p, cred, stuff);
1442		if (error)
1443			return (error);
1444		if (nd->nd_flag & ND_NFSV3) {
1445			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1446		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1447			error = nfsm_loadattr(nd, nap);
1448			if (!error)
1449				*attrflagp = 1;
1450		}
1451		if (nd->nd_repstat || error) {
1452			if (!error)
1453				error = nd->nd_repstat;
1454			goto nfsmout;
1455		}
1456		if (nd->nd_flag & ND_NFSV3) {
1457			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1458			eof = fxdr_unsigned(int, *(tl + 1));
1459		} else if (nd->nd_flag & ND_NFSV4) {
1460			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1461			eof = fxdr_unsigned(int, *tl);
1462		}
1463		NFSM_STRSIZ(retlen, len);
1464		error = nfsm_mbufuio(nd, uiop, retlen);
1465		if (error)
1466			goto nfsmout;
1467		mbuf_freem(nd->nd_mrep);
1468		nd->nd_mrep = NULL;
1469		tsiz -= retlen;
1470		if (!(nd->nd_flag & ND_NFSV2)) {
1471			if (eof || retlen == 0)
1472				tsiz = 0;
1473		} else if (retlen < len)
1474			tsiz = 0;
1475	}
1476	return (0);
1477nfsmout:
1478	if (nd->nd_mrep != NULL)
1479		mbuf_freem(nd->nd_mrep);
1480	return (error);
1481}
1482
1483/*
1484 * nfs write operation
1485 * When called_from_strategy != 0, it should return EIO for an error that
1486 * indicates recovery is in progress, so that the buffer will be left
1487 * dirty and be written back to the server later. If it loops around,
1488 * the recovery thread could get stuck waiting for the buffer and recovery
1489 * will then deadlock.
1490 */
1491APPLESTATIC int
1492nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1493    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1494    void *stuff, int called_from_strategy)
1495{
1496	int error, expireret = 0, retrycnt, nostateid;
1497	u_int32_t clidrev = 0;
1498	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1499	struct nfsnode *np = VTONFS(vp);
1500	struct ucred *newcred;
1501	struct nfsfh *nfhp = NULL;
1502	nfsv4stateid_t stateid;
1503	void *lckp;
1504
1505	*must_commit = 0;
1506	if (nmp->nm_clp != NULL)
1507		clidrev = nmp->nm_clp->nfsc_clientidrev;
1508	newcred = cred;
1509	if (NFSHASNFSV4(nmp)) {
1510		newcred = NFSNEWCRED(cred);
1511		nfhp = np->n_fhp;
1512	}
1513	retrycnt = 0;
1514	do {
1515		lckp = NULL;
1516		nostateid = 0;
1517		if (NFSHASNFSV4(nmp)) {
1518			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1519			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1520			    &lckp);
1521			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1522			    stateid.other[2] == 0) {
1523				nostateid = 1;
1524				NFSCL_DEBUG(1, "stateid0 in write\n");
1525			}
1526		}
1527
1528		/*
1529		 * If there is no stateid for NFSv4, it means this is an
1530		 * extraneous write after close. Basically a poorly
1531		 * implemented buffer cache. Just don't do the write.
1532		 */
1533		if (nostateid)
1534			error = 0;
1535		else
1536			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1537			    newcred, &stateid, p, nap, attrflagp, stuff);
1538		if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION)
1539			nfscl_initiate_recovery(nmp->nm_clp);
1540		if (lckp != NULL)
1541			nfscl_lockderef(lckp);
1542		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1543		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1544		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1545			(void) nfs_catnap(PZERO, error, "nfs_write");
1546		} else if ((error == NFSERR_EXPIRED ||
1547		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1548			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1549		}
1550		retrycnt++;
1551	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1552	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1553	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1554	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1555	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1556	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1557	if (error != 0 && (retrycnt >= 4 ||
1558	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1559	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1560		error = EIO;
1561	if (NFSHASNFSV4(nmp))
1562		NFSFREECRED(newcred);
1563	return (error);
1564}
1565
1566/*
1567 * The actual write RPC.
1568 */
1569static int
1570nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1571    int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1572    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1573{
1574	u_int32_t *tl;
1575	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1576	struct nfsnode *np = VTONFS(vp);
1577	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1578	int wccflag = 0, wsize;
1579	int32_t backup;
1580	struct nfsrv_descript nfsd;
1581	struct nfsrv_descript *nd = &nfsd;
1582	nfsattrbit_t attrbits;
1583	off_t tmp_off;
1584
1585	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1586	*attrflagp = 0;
1587	tsiz = uio_uio_resid(uiop);
1588	tmp_off = uiop->uio_offset + tsiz;
1589	NFSLOCKMNT(nmp);
1590	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1591		NFSUNLOCKMNT(nmp);
1592		return (EFBIG);
1593	}
1594	wsize = nmp->nm_wsize;
1595	NFSUNLOCKMNT(nmp);
1596	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1597	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1598	while (tsiz > 0) {
1599		*attrflagp = 0;
1600		len = (tsiz > wsize) ? wsize : tsiz;
1601		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1602		if (nd->nd_flag & ND_NFSV4) {
1603			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1604			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1605			txdr_hyper(uiop->uio_offset, tl);
1606			tl += 2;
1607			*tl++ = txdr_unsigned(*iomode);
1608			*tl = txdr_unsigned(len);
1609		} else if (nd->nd_flag & ND_NFSV3) {
1610			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1611			txdr_hyper(uiop->uio_offset, tl);
1612			tl += 2;
1613			*tl++ = txdr_unsigned(len);
1614			*tl++ = txdr_unsigned(*iomode);
1615			*tl = txdr_unsigned(len);
1616		} else {
1617			u_int32_t x;
1618
1619			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1620			/*
1621			 * Not sure why someone changed this, since the
1622			 * RFC clearly states that "beginoffset" and
1623			 * "totalcount" are ignored, but it wouldn't
1624			 * surprise me if there's a busted server out there.
1625			 */
1626			/* Set both "begin" and "current" to non-garbage. */
1627			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1628			*tl++ = x;      /* "begin offset" */
1629			*tl++ = x;      /* "current offset" */
1630			x = txdr_unsigned(len);
1631			*tl++ = x;      /* total to this offset */
1632			*tl = x;        /* size of this write */
1633
1634		}
1635		nfsm_uiombuf(nd, uiop, len);
1636		/*
1637		 * Although it is tempting to do a normal Getattr Op in the
1638		 * NFSv4 compound, the result can be a nearly hung client
1639		 * system if the Getattr asks for Owner and/or OwnerGroup.
1640		 * It occurs when the client can't map either the Owner or
1641		 * Owner_group name in the Getattr reply to a uid/gid. When
1642		 * there is a cache miss, the kernel does an upcall to the
1643		 * nfsuserd. Then, it can try and read the local /etc/passwd
1644		 * or /etc/group file. It can then block in getnewbuf(),
1645		 * waiting for dirty writes to be pushed to the NFS server.
1646		 * The only reason this doesn't result in a complete
1647		 * deadlock, is that the upcall times out and allows
1648		 * the write to complete. However, progress is so slow
1649		 * that it might just as well be deadlocked.
1650		 * As such, we get the rest of the attributes, but not
1651		 * Owner or Owner_group.
1652		 * nb: nfscl_loadattrcache() needs to be told that these
1653		 *     partial attributes from a write rpc are being
1654		 *     passed in, via a argument flag.
1655		 */
1656		if (nd->nd_flag & ND_NFSV4) {
1657			NFSWRITEGETATTR_ATTRBIT(&attrbits);
1658			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1659			*tl = txdr_unsigned(NFSV4OP_GETATTR);
1660			(void) nfsrv_putattrbit(nd, &attrbits);
1661		}
1662		error = nfscl_request(nd, vp, p, cred, stuff);
1663		if (error)
1664			return (error);
1665		if (nd->nd_repstat) {
1666			/*
1667			 * In case the rpc gets retried, roll
1668			 * the uio fileds changed by nfsm_uiombuf()
1669			 * back.
1670			 */
1671			uiop->uio_offset -= len;
1672			uio_uio_resid_add(uiop, len);
1673			uio_iov_base_add(uiop, -len);
1674			uio_iov_len_add(uiop, len);
1675		}
1676		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1677			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1678			    &wccflag, stuff);
1679			if (error)
1680				goto nfsmout;
1681		}
1682		if (!nd->nd_repstat) {
1683			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1684				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1685					+ NFSX_VERF);
1686				rlen = fxdr_unsigned(int, *tl++);
1687				if (rlen == 0) {
1688					error = NFSERR_IO;
1689					goto nfsmout;
1690				} else if (rlen < len) {
1691					backup = len - rlen;
1692					uio_iov_base_add(uiop, -(backup));
1693					uio_iov_len_add(uiop, backup);
1694					uiop->uio_offset -= backup;
1695					uio_uio_resid_add(uiop, backup);
1696					len = rlen;
1697				}
1698				commit = fxdr_unsigned(int, *tl++);
1699
1700				/*
1701				 * Return the lowest committment level
1702				 * obtained by any of the RPCs.
1703				 */
1704				if (committed == NFSWRITE_FILESYNC)
1705					committed = commit;
1706				else if (committed == NFSWRITE_DATASYNC &&
1707					commit == NFSWRITE_UNSTABLE)
1708					committed = commit;
1709				NFSLOCKMNT(nmp);
1710				if (!NFSHASWRITEVERF(nmp)) {
1711					NFSBCOPY((caddr_t)tl,
1712					    (caddr_t)&nmp->nm_verf[0],
1713					    NFSX_VERF);
1714					NFSSETWRITEVERF(nmp);
1715	    			} else if (NFSBCMP(tl, nmp->nm_verf,
1716				    NFSX_VERF)) {
1717					*must_commit = 1;
1718					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1719				}
1720				NFSUNLOCKMNT(nmp);
1721			}
1722			if (nd->nd_flag & ND_NFSV4)
1723				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1724			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1725				error = nfsm_loadattr(nd, nap);
1726				if (!error)
1727					*attrflagp = NFS_LATTR_NOSHRINK;
1728			}
1729		} else {
1730			error = nd->nd_repstat;
1731		}
1732		if (error)
1733			goto nfsmout;
1734		NFSWRITERPC_SETTIME(wccflag, np, (nd->nd_flag & ND_NFSV4));
1735		mbuf_freem(nd->nd_mrep);
1736		nd->nd_mrep = NULL;
1737		tsiz -= len;
1738	}
1739nfsmout:
1740	if (nd->nd_mrep != NULL)
1741		mbuf_freem(nd->nd_mrep);
1742	*iomode = committed;
1743	if (nd->nd_repstat && !error)
1744		error = nd->nd_repstat;
1745	return (error);
1746}
1747
1748/*
1749 * nfs mknod rpc
1750 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1751 * mode set to specify the file type and the size field for rdev.
1752 */
1753APPLESTATIC int
1754nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1755    u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1756    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1757    int *attrflagp, int *dattrflagp, void *dstuff)
1758{
1759	u_int32_t *tl;
1760	int error = 0;
1761	struct nfsrv_descript nfsd, *nd = &nfsd;
1762	nfsattrbit_t attrbits;
1763
1764	*nfhpp = NULL;
1765	*attrflagp = 0;
1766	*dattrflagp = 0;
1767	if (namelen > NFS_MAXNAMLEN)
1768		return (ENAMETOOLONG);
1769	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1770	if (nd->nd_flag & ND_NFSV4) {
1771		if (vtyp == VBLK || vtyp == VCHR) {
1772			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1773			*tl++ = vtonfsv34_type(vtyp);
1774			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1775			*tl = txdr_unsigned(NFSMINOR(rdev));
1776		} else {
1777			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1778			*tl = vtonfsv34_type(vtyp);
1779		}
1780	}
1781	(void) nfsm_strtom(nd, name, namelen);
1782	if (nd->nd_flag & ND_NFSV3) {
1783		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1784		*tl = vtonfsv34_type(vtyp);
1785	}
1786	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1787		nfscl_fillsattr(nd, vap, dvp, 0, 0);
1788	if ((nd->nd_flag & ND_NFSV3) &&
1789	    (vtyp == VCHR || vtyp == VBLK)) {
1790		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1791		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1792		*tl = txdr_unsigned(NFSMINOR(rdev));
1793	}
1794	if (nd->nd_flag & ND_NFSV4) {
1795		NFSGETATTR_ATTRBIT(&attrbits);
1796		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1797		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1798		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1799		(void) nfsrv_putattrbit(nd, &attrbits);
1800	}
1801	if (nd->nd_flag & ND_NFSV2)
1802		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1803	error = nfscl_request(nd, dvp, p, cred, dstuff);
1804	if (error)
1805		return (error);
1806	if (nd->nd_flag & ND_NFSV4)
1807		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1808	if (!nd->nd_repstat) {
1809		if (nd->nd_flag & ND_NFSV4) {
1810			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1811			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1812			if (error)
1813				goto nfsmout;
1814		}
1815		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1816		if (error)
1817			goto nfsmout;
1818	}
1819	if (nd->nd_flag & ND_NFSV3)
1820		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1821	if (!error && nd->nd_repstat)
1822		error = nd->nd_repstat;
1823nfsmout:
1824	mbuf_freem(nd->nd_mrep);
1825	return (error);
1826}
1827
1828/*
1829 * nfs file create call
1830 * Mostly just call the approriate routine. (I separated out v4, so that
1831 * error recovery wouldn't be as difficult.)
1832 */
1833APPLESTATIC int
1834nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1835    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1836    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1837    int *attrflagp, int *dattrflagp, void *dstuff)
1838{
1839	int error = 0, newone, expireret = 0, retrycnt, unlocked;
1840	struct nfsclowner *owp;
1841	struct nfscldeleg *dp;
1842	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1843	u_int32_t clidrev;
1844
1845	if (NFSHASNFSV4(nmp)) {
1846	    retrycnt = 0;
1847	    do {
1848		dp = NULL;
1849		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1850		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1851		    NULL, 1);
1852		if (error)
1853			return (error);
1854		if (nmp->nm_clp != NULL)
1855			clidrev = nmp->nm_clp->nfsc_clientidrev;
1856		else
1857			clidrev = 0;
1858		error = nfsrpc_createv4(dvp, name, namelen, vap, cverf, fmode,
1859		  owp, &dp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
1860		  dstuff, &unlocked);
1861		/*
1862		 * There is no need to invalidate cached attributes here,
1863		 * since new post-delegation issue attributes are always
1864		 * returned by nfsrpc_createv4() and these will update the
1865		 * attribute cache.
1866		 */
1867		if (dp != NULL)
1868			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
1869			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
1870		nfscl_ownerrelease(owp, error, newone, unlocked);
1871		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1872		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1873		    error == NFSERR_BADSESSION) {
1874			(void) nfs_catnap(PZERO, error, "nfs_open");
1875		} else if ((error == NFSERR_EXPIRED ||
1876		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1877			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1878			retrycnt++;
1879		}
1880	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1881		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1882		error == NFSERR_BADSESSION ||
1883		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1884		 expireret == 0 && clidrev != 0 && retrycnt < 4));
1885	    if (error && retrycnt >= 4)
1886		    error = EIO;
1887	} else {
1888		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
1889		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
1890		    dstuff);
1891	}
1892	return (error);
1893}
1894
1895/*
1896 * The create rpc for v2 and 3.
1897 */
1898static int
1899nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1900    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1901    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1902    int *attrflagp, int *dattrflagp, void *dstuff)
1903{
1904	u_int32_t *tl;
1905	int error = 0;
1906	struct nfsrv_descript nfsd, *nd = &nfsd;
1907
1908	*nfhpp = NULL;
1909	*attrflagp = 0;
1910	*dattrflagp = 0;
1911	if (namelen > NFS_MAXNAMLEN)
1912		return (ENAMETOOLONG);
1913	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
1914	(void) nfsm_strtom(nd, name, namelen);
1915	if (nd->nd_flag & ND_NFSV3) {
1916		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1917		if (fmode & O_EXCL) {
1918			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
1919			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1920			*tl++ = cverf.lval[0];
1921			*tl = cverf.lval[1];
1922		} else {
1923			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
1924			nfscl_fillsattr(nd, vap, dvp, 0, 0);
1925		}
1926	} else {
1927		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
1928	}
1929	error = nfscl_request(nd, dvp, p, cred, dstuff);
1930	if (error)
1931		return (error);
1932	if (nd->nd_repstat == 0) {
1933		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1934		if (error)
1935			goto nfsmout;
1936	}
1937	if (nd->nd_flag & ND_NFSV3)
1938		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1939	if (nd->nd_repstat != 0 && error == 0)
1940		error = nd->nd_repstat;
1941nfsmout:
1942	mbuf_freem(nd->nd_mrep);
1943	return (error);
1944}
1945
1946static int
1947nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1948    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
1949    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
1950    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
1951    int *dattrflagp, void *dstuff, int *unlockedp)
1952{
1953	u_int32_t *tl;
1954	int error = 0, deleg, newone, ret, acesize, limitby;
1955	struct nfsrv_descript nfsd, *nd = &nfsd;
1956	struct nfsclopen *op;
1957	struct nfscldeleg *dp = NULL;
1958	struct nfsnode *np;
1959	struct nfsfh *nfhp;
1960	nfsattrbit_t attrbits;
1961	nfsv4stateid_t stateid;
1962	u_int32_t rflags;
1963	struct nfsmount *nmp;
1964
1965	nmp = VFSTONFS(dvp->v_mount);
1966	np = VTONFS(dvp);
1967	*unlockedp = 0;
1968	*nfhpp = NULL;
1969	*dpp = NULL;
1970	*attrflagp = 0;
1971	*dattrflagp = 0;
1972	if (namelen > NFS_MAXNAMLEN)
1973		return (ENAMETOOLONG);
1974	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
1975	/*
1976	 * For V4, this is actually an Open op.
1977	 */
1978	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1979	*tl++ = txdr_unsigned(owp->nfsow_seqid);
1980	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
1981	    NFSV4OPEN_ACCESSREAD);
1982	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1983	*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
1984	*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
1985	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
1986	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1987	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
1988	if (fmode & O_EXCL) {
1989		if (NFSHASNFSV4N(nmp)) {
1990			if (NFSHASSESSPERSIST(nmp)) {
1991				/* Use GUARDED for persistent sessions. */
1992				*tl = txdr_unsigned(NFSCREATE_GUARDED);
1993				nfscl_fillsattr(nd, vap, dvp, 0, 0);
1994			} else {
1995				/* Otherwise, use EXCLUSIVE4_1. */
1996				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
1997				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1998				*tl++ = cverf.lval[0];
1999				*tl = cverf.lval[1];
2000				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2001			}
2002		} else {
2003			/* NFSv4.0 */
2004			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2005			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2006			*tl++ = cverf.lval[0];
2007			*tl = cverf.lval[1];
2008		}
2009	} else {
2010		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2011		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2012	}
2013	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2014	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2015	(void) nfsm_strtom(nd, name, namelen);
2016	/* Get the new file's handle and attributes. */
2017	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2018	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2019	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2020	NFSGETATTR_ATTRBIT(&attrbits);
2021	(void) nfsrv_putattrbit(nd, &attrbits);
2022	/* Get the directory's post-op attributes. */
2023	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2024	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2025	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2026	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2027	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2028	(void) nfsrv_putattrbit(nd, &attrbits);
2029	error = nfscl_request(nd, dvp, p, cred, dstuff);
2030	if (error)
2031		return (error);
2032	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2033	if (nd->nd_repstat == 0) {
2034		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2035		    6 * NFSX_UNSIGNED);
2036		stateid.seqid = *tl++;
2037		stateid.other[0] = *tl++;
2038		stateid.other[1] = *tl++;
2039		stateid.other[2] = *tl;
2040		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2041		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2042		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2043		deleg = fxdr_unsigned(int, *tl);
2044		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2045		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2046			if (!(owp->nfsow_clp->nfsc_flags &
2047			      NFSCLFLAGS_FIRSTDELEG))
2048				owp->nfsow_clp->nfsc_flags |=
2049				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2050			MALLOC(dp, struct nfscldeleg *,
2051			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2052			    M_NFSCLDELEG, M_WAITOK);
2053			LIST_INIT(&dp->nfsdl_owner);
2054			LIST_INIT(&dp->nfsdl_lock);
2055			dp->nfsdl_clp = owp->nfsow_clp;
2056			newnfs_copyincred(cred, &dp->nfsdl_cred);
2057			nfscl_lockinit(&dp->nfsdl_rwlock);
2058			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2059			    NFSX_UNSIGNED);
2060			dp->nfsdl_stateid.seqid = *tl++;
2061			dp->nfsdl_stateid.other[0] = *tl++;
2062			dp->nfsdl_stateid.other[1] = *tl++;
2063			dp->nfsdl_stateid.other[2] = *tl++;
2064			ret = fxdr_unsigned(int, *tl);
2065			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2066				dp->nfsdl_flags = NFSCLDL_WRITE;
2067				/*
2068				 * Indicates how much the file can grow.
2069				 */
2070				NFSM_DISSECT(tl, u_int32_t *,
2071				    3 * NFSX_UNSIGNED);
2072				limitby = fxdr_unsigned(int, *tl++);
2073				switch (limitby) {
2074				case NFSV4OPEN_LIMITSIZE:
2075					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2076					break;
2077				case NFSV4OPEN_LIMITBLOCKS:
2078					dp->nfsdl_sizelimit =
2079					    fxdr_unsigned(u_int64_t, *tl++);
2080					dp->nfsdl_sizelimit *=
2081					    fxdr_unsigned(u_int64_t, *tl);
2082					break;
2083				default:
2084					error = NFSERR_BADXDR;
2085					goto nfsmout;
2086				};
2087			} else {
2088				dp->nfsdl_flags = NFSCLDL_READ;
2089			}
2090			if (ret)
2091				dp->nfsdl_flags |= NFSCLDL_RECALL;
2092			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2093			    &acesize, p);
2094			if (error)
2095				goto nfsmout;
2096		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2097			error = NFSERR_BADXDR;
2098			goto nfsmout;
2099		}
2100		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2101		if (error)
2102			goto nfsmout;
2103		/* Get rid of the PutFH and Getattr status values. */
2104		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2105		/* Load the directory attributes. */
2106		error = nfsm_loadattr(nd, dnap);
2107		if (error)
2108			goto nfsmout;
2109		*dattrflagp = 1;
2110		if (dp != NULL && *attrflagp) {
2111			dp->nfsdl_change = nnap->na_filerev;
2112			dp->nfsdl_modtime = nnap->na_mtime;
2113			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2114		}
2115		/*
2116		 * We can now complete the Open state.
2117		 */
2118		nfhp = *nfhpp;
2119		if (dp != NULL) {
2120			dp->nfsdl_fhlen = nfhp->nfh_len;
2121			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2122		}
2123		/*
2124		 * Get an Open structure that will be
2125		 * attached to the OpenOwner, acquired already.
2126		 */
2127		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2128		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2129		    cred, p, NULL, &op, &newone, NULL, 0);
2130		if (error)
2131			goto nfsmout;
2132		op->nfso_stateid = stateid;
2133		newnfs_copyincred(cred, &op->nfso_cred);
2134		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2135		    do {
2136			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2137			    nfhp->nfh_len, op, cred, p);
2138			if (ret == NFSERR_DELAY)
2139			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2140		    } while (ret == NFSERR_DELAY);
2141		    error = ret;
2142		}
2143
2144		/*
2145		 * If the server is handing out delegations, but we didn't
2146		 * get one because an OpenConfirm was required, try the
2147		 * Open again, to get a delegation. This is a harmless no-op,
2148		 * from a server's point of view.
2149		 */
2150		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2151		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2152		    !error && dp == NULL) {
2153		    do {
2154			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2155			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2156			    nfhp->nfh_fh, nfhp->nfh_len,
2157			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2158			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2159			if (ret == NFSERR_DELAY)
2160			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2161		    } while (ret == NFSERR_DELAY);
2162		    if (ret) {
2163			if (dp != NULL) {
2164				FREE((caddr_t)dp, M_NFSCLDELEG);
2165				dp = NULL;
2166			}
2167			if (ret == NFSERR_STALECLIENTID ||
2168			    ret == NFSERR_STALEDONTRECOVER ||
2169			    ret == NFSERR_BADSESSION)
2170				error = ret;
2171		    }
2172		}
2173		nfscl_openrelease(op, error, newone);
2174		*unlockedp = 1;
2175	}
2176	if (nd->nd_repstat != 0 && error == 0)
2177		error = nd->nd_repstat;
2178	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
2179		nfscl_initiate_recovery(owp->nfsow_clp);
2180nfsmout:
2181	if (!error)
2182		*dpp = dp;
2183	else if (dp != NULL)
2184		FREE((caddr_t)dp, M_NFSCLDELEG);
2185	mbuf_freem(nd->nd_mrep);
2186	return (error);
2187}
2188
2189/*
2190 * Nfs remove rpc
2191 */
2192APPLESTATIC int
2193nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2194    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2195    void *dstuff)
2196{
2197	u_int32_t *tl;
2198	struct nfsrv_descript nfsd, *nd = &nfsd;
2199	struct nfsnode *np;
2200	struct nfsmount *nmp;
2201	nfsv4stateid_t dstateid;
2202	int error, ret = 0, i;
2203
2204	*dattrflagp = 0;
2205	if (namelen > NFS_MAXNAMLEN)
2206		return (ENAMETOOLONG);
2207	nmp = VFSTONFS(vnode_mount(dvp));
2208tryagain:
2209	if (NFSHASNFSV4(nmp) && ret == 0) {
2210		ret = nfscl_removedeleg(vp, p, &dstateid);
2211		if (ret == 1) {
2212			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2213			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2214			    NFSX_UNSIGNED);
2215			if (NFSHASNFSV4N(nmp))
2216				*tl++ = 0;
2217			else
2218				*tl++ = dstateid.seqid;
2219			*tl++ = dstateid.other[0];
2220			*tl++ = dstateid.other[1];
2221			*tl++ = dstateid.other[2];
2222			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2223			np = VTONFS(dvp);
2224			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2225			    np->n_fhp->nfh_len, 0);
2226			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2227			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2228		}
2229	} else {
2230		ret = 0;
2231	}
2232	if (ret == 0)
2233		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2234	(void) nfsm_strtom(nd, name, namelen);
2235	error = nfscl_request(nd, dvp, p, cred, dstuff);
2236	if (error)
2237		return (error);
2238	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2239		/* For NFSv4, parse out any Delereturn replies. */
2240		if (ret > 0 && nd->nd_repstat != 0 &&
2241		    (nd->nd_flag & ND_NOMOREDATA)) {
2242			/*
2243			 * If the Delegreturn failed, try again without
2244			 * it. The server will Recall, as required.
2245			 */
2246			mbuf_freem(nd->nd_mrep);
2247			goto tryagain;
2248		}
2249		for (i = 0; i < (ret * 2); i++) {
2250			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2251			    ND_NFSV4) {
2252			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2253			    if (*(tl + 1))
2254				nd->nd_flag |= ND_NOMOREDATA;
2255			}
2256		}
2257		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2258	}
2259	if (nd->nd_repstat && !error)
2260		error = nd->nd_repstat;
2261nfsmout:
2262	mbuf_freem(nd->nd_mrep);
2263	return (error);
2264}
2265
2266/*
2267 * Do an nfs rename rpc.
2268 */
2269APPLESTATIC int
2270nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2271    vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2272    NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2273    int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2274{
2275	u_int32_t *tl;
2276	struct nfsrv_descript nfsd, *nd = &nfsd;
2277	struct nfsmount *nmp;
2278	struct nfsnode *np;
2279	nfsattrbit_t attrbits;
2280	nfsv4stateid_t fdstateid, tdstateid;
2281	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2282
2283	*fattrflagp = 0;
2284	*tattrflagp = 0;
2285	nmp = VFSTONFS(vnode_mount(fdvp));
2286	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2287		return (ENAMETOOLONG);
2288tryagain:
2289	if (NFSHASNFSV4(nmp) && ret == 0) {
2290		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2291		    &tdstateid, &gottd, p);
2292		if (gotfd && gottd) {
2293			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2294		} else if (gotfd) {
2295			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2296		} else if (gottd) {
2297			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2298		}
2299		if (gotfd) {
2300			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2301			if (NFSHASNFSV4N(nmp))
2302				*tl++ = 0;
2303			else
2304				*tl++ = fdstateid.seqid;
2305			*tl++ = fdstateid.other[0];
2306			*tl++ = fdstateid.other[1];
2307			*tl = fdstateid.other[2];
2308			if (gottd) {
2309				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2310				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2311				np = VTONFS(tvp);
2312				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2313				    np->n_fhp->nfh_len, 0);
2314				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2315				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2316			}
2317		}
2318		if (gottd) {
2319			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2320			if (NFSHASNFSV4N(nmp))
2321				*tl++ = 0;
2322			else
2323				*tl++ = tdstateid.seqid;
2324			*tl++ = tdstateid.other[0];
2325			*tl++ = tdstateid.other[1];
2326			*tl = tdstateid.other[2];
2327		}
2328		if (ret > 0) {
2329			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2330			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2331			np = VTONFS(fdvp);
2332			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2333			    np->n_fhp->nfh_len, 0);
2334			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2335			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2336		}
2337	} else {
2338		ret = 0;
2339	}
2340	if (ret == 0)
2341		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2342	if (nd->nd_flag & ND_NFSV4) {
2343		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2344		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2345		NFSWCCATTR_ATTRBIT(&attrbits);
2346		(void) nfsrv_putattrbit(nd, &attrbits);
2347		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2348		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2349		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2350		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2351		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2352		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2353		(void) nfsrv_putattrbit(nd, &attrbits);
2354		nd->nd_flag |= ND_V4WCCATTR;
2355		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2356		*tl = txdr_unsigned(NFSV4OP_RENAME);
2357	}
2358	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2359	if (!(nd->nd_flag & ND_NFSV4))
2360		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2361			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2362	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2363	error = nfscl_request(nd, fdvp, p, cred, fstuff);
2364	if (error)
2365		return (error);
2366	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2367		/* For NFSv4, parse out any Delereturn replies. */
2368		if (ret > 0 && nd->nd_repstat != 0 &&
2369		    (nd->nd_flag & ND_NOMOREDATA)) {
2370			/*
2371			 * If the Delegreturn failed, try again without
2372			 * it. The server will Recall, as required.
2373			 */
2374			mbuf_freem(nd->nd_mrep);
2375			goto tryagain;
2376		}
2377		for (i = 0; i < (ret * 2); i++) {
2378			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2379			    ND_NFSV4) {
2380			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2381			    if (*(tl + 1)) {
2382				if (i == 0 && ret > 1) {
2383				    /*
2384				     * If the Delegreturn failed, try again
2385				     * without it. The server will Recall, as
2386				     * required.
2387				     * If ret > 1, the first iteration of this
2388				     * loop is the second DelegReturn result.
2389				     */
2390				    mbuf_freem(nd->nd_mrep);
2391				    goto tryagain;
2392				} else {
2393				    nd->nd_flag |= ND_NOMOREDATA;
2394				}
2395			    }
2396			}
2397		}
2398		/* Now, the first wcc attribute reply. */
2399		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2400			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2401			if (*(tl + 1))
2402				nd->nd_flag |= ND_NOMOREDATA;
2403		}
2404		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2405		    fstuff);
2406		/* and the second wcc attribute reply. */
2407		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2408		    !error) {
2409			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2410			if (*(tl + 1))
2411				nd->nd_flag |= ND_NOMOREDATA;
2412		}
2413		if (!error)
2414			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2415			    NULL, tstuff);
2416	}
2417	if (nd->nd_repstat && !error)
2418		error = nd->nd_repstat;
2419nfsmout:
2420	mbuf_freem(nd->nd_mrep);
2421	return (error);
2422}
2423
2424/*
2425 * nfs hard link create rpc
2426 */
2427APPLESTATIC int
2428nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2429    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2430    struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2431{
2432	u_int32_t *tl;
2433	struct nfsrv_descript nfsd, *nd = &nfsd;
2434	nfsattrbit_t attrbits;
2435	int error = 0;
2436
2437	*attrflagp = 0;
2438	*dattrflagp = 0;
2439	if (namelen > NFS_MAXNAMLEN)
2440		return (ENAMETOOLONG);
2441	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2442	if (nd->nd_flag & ND_NFSV4) {
2443		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2444		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2445	}
2446	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2447		VTONFS(dvp)->n_fhp->nfh_len, 0);
2448	if (nd->nd_flag & ND_NFSV4) {
2449		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2450		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2451		NFSWCCATTR_ATTRBIT(&attrbits);
2452		(void) nfsrv_putattrbit(nd, &attrbits);
2453		nd->nd_flag |= ND_V4WCCATTR;
2454		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2455		*tl = txdr_unsigned(NFSV4OP_LINK);
2456	}
2457	(void) nfsm_strtom(nd, name, namelen);
2458	error = nfscl_request(nd, vp, p, cred, dstuff);
2459	if (error)
2460		return (error);
2461	if (nd->nd_flag & ND_NFSV3) {
2462		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2463		if (!error)
2464			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2465			    NULL, dstuff);
2466	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2467		/*
2468		 * First, parse out the PutFH and Getattr result.
2469		 */
2470		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2471		if (!(*(tl + 1)))
2472			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2473		if (*(tl + 1))
2474			nd->nd_flag |= ND_NOMOREDATA;
2475		/*
2476		 * Get the pre-op attributes.
2477		 */
2478		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2479	}
2480	if (nd->nd_repstat && !error)
2481		error = nd->nd_repstat;
2482nfsmout:
2483	mbuf_freem(nd->nd_mrep);
2484	return (error);
2485}
2486
2487/*
2488 * nfs symbolic link create rpc
2489 */
2490APPLESTATIC int
2491nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2492    struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2493    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2494    int *dattrflagp, void *dstuff)
2495{
2496	u_int32_t *tl;
2497	struct nfsrv_descript nfsd, *nd = &nfsd;
2498	struct nfsmount *nmp;
2499	int slen, error = 0;
2500
2501	*nfhpp = NULL;
2502	*attrflagp = 0;
2503	*dattrflagp = 0;
2504	nmp = VFSTONFS(vnode_mount(dvp));
2505	slen = strlen(target);
2506	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2507		return (ENAMETOOLONG);
2508	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2509	if (nd->nd_flag & ND_NFSV4) {
2510		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2511		*tl = txdr_unsigned(NFLNK);
2512		(void) nfsm_strtom(nd, target, slen);
2513	}
2514	(void) nfsm_strtom(nd, name, namelen);
2515	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2516		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2517	if (!(nd->nd_flag & ND_NFSV4))
2518		(void) nfsm_strtom(nd, target, slen);
2519	if (nd->nd_flag & ND_NFSV2)
2520		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2521	error = nfscl_request(nd, dvp, p, cred, dstuff);
2522	if (error)
2523		return (error);
2524	if (nd->nd_flag & ND_NFSV4)
2525		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2526	if ((nd->nd_flag & ND_NFSV3) && !error) {
2527		if (!nd->nd_repstat)
2528			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2529		if (!error)
2530			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2531			    NULL, dstuff);
2532	}
2533	if (nd->nd_repstat && !error)
2534		error = nd->nd_repstat;
2535	mbuf_freem(nd->nd_mrep);
2536	/*
2537	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2538	 * Only do this if vfs.nfs.ignore_eexist is set.
2539	 * Never do this for NFSv4.1 or later minor versions, since sessions
2540	 * should guarantee "exactly once" RPC semantics.
2541	 */
2542	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2543	    nmp->nm_minorvers == 0))
2544		error = 0;
2545	return (error);
2546}
2547
2548/*
2549 * nfs make dir rpc
2550 */
2551APPLESTATIC int
2552nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2553    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2554    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2555    int *dattrflagp, void *dstuff)
2556{
2557	u_int32_t *tl;
2558	struct nfsrv_descript nfsd, *nd = &nfsd;
2559	nfsattrbit_t attrbits;
2560	int error = 0;
2561	struct nfsfh *fhp;
2562	struct nfsmount *nmp;
2563
2564	*nfhpp = NULL;
2565	*attrflagp = 0;
2566	*dattrflagp = 0;
2567	nmp = VFSTONFS(vnode_mount(dvp));
2568	fhp = VTONFS(dvp)->n_fhp;
2569	if (namelen > NFS_MAXNAMLEN)
2570		return (ENAMETOOLONG);
2571	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2572	if (nd->nd_flag & ND_NFSV4) {
2573		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2574		*tl = txdr_unsigned(NFDIR);
2575	}
2576	(void) nfsm_strtom(nd, name, namelen);
2577	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2578	if (nd->nd_flag & ND_NFSV4) {
2579		NFSGETATTR_ATTRBIT(&attrbits);
2580		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2581		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2582		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2583		(void) nfsrv_putattrbit(nd, &attrbits);
2584		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2585		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2586		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2587		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2588		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2589		(void) nfsrv_putattrbit(nd, &attrbits);
2590	}
2591	error = nfscl_request(nd, dvp, p, cred, dstuff);
2592	if (error)
2593		return (error);
2594	if (nd->nd_flag & ND_NFSV4)
2595		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2596	if (!nd->nd_repstat && !error) {
2597		if (nd->nd_flag & ND_NFSV4) {
2598			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2599			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2600		}
2601		if (!error)
2602			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2603		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2604			/* Get rid of the PutFH and Getattr status values. */
2605			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2606			/* Load the directory attributes. */
2607			error = nfsm_loadattr(nd, dnap);
2608			if (error == 0)
2609				*dattrflagp = 1;
2610		}
2611	}
2612	if ((nd->nd_flag & ND_NFSV3) && !error)
2613		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2614	if (nd->nd_repstat && !error)
2615		error = nd->nd_repstat;
2616nfsmout:
2617	mbuf_freem(nd->nd_mrep);
2618	/*
2619	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2620	 * Only do this if vfs.nfs.ignore_eexist is set.
2621	 * Never do this for NFSv4.1 or later minor versions, since sessions
2622	 * should guarantee "exactly once" RPC semantics.
2623	 */
2624	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2625	    nmp->nm_minorvers == 0))
2626		error = 0;
2627	return (error);
2628}
2629
2630/*
2631 * nfs remove directory call
2632 */
2633APPLESTATIC int
2634nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2635    NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2636{
2637	struct nfsrv_descript nfsd, *nd = &nfsd;
2638	int error = 0;
2639
2640	*dattrflagp = 0;
2641	if (namelen > NFS_MAXNAMLEN)
2642		return (ENAMETOOLONG);
2643	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2644	(void) nfsm_strtom(nd, name, namelen);
2645	error = nfscl_request(nd, dvp, p, cred, dstuff);
2646	if (error)
2647		return (error);
2648	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2649		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2650	if (nd->nd_repstat && !error)
2651		error = nd->nd_repstat;
2652	mbuf_freem(nd->nd_mrep);
2653	/*
2654	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2655	 */
2656	if (error == ENOENT)
2657		error = 0;
2658	return (error);
2659}
2660
2661/*
2662 * Readdir rpc.
2663 * Always returns with either uio_resid unchanged, if you are at the
2664 * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2665 * filled in.
2666 * I felt this would allow caching of directory blocks more easily
2667 * than returning a pertially filled block.
2668 * Directory offset cookies:
2669 * Oh my, what to do with them...
2670 * I can think of three ways to deal with them:
2671 * 1 - have the layer above these RPCs maintain a map between logical
2672 *     directory byte offsets and the NFS directory offset cookies
2673 * 2 - pass the opaque directory offset cookies up into userland
2674 *     and let the libc functions deal with them, via the system call
2675 * 3 - return them to userland in the "struct dirent", so future versions
2676 *     of libc can use them and do whatever is necessary to amke things work
2677 *     above these rpc calls, in the meantime
2678 * For now, I do #3 by "hiding" the directory offset cookies after the
2679 * d_name field in struct dirent. This is space inside d_reclen that
2680 * will be ignored by anything that doesn't know about them.
2681 * The directory offset cookies are filled in as the last 8 bytes of
2682 * each directory entry, after d_name. Someday, the userland libc
2683 * functions may be able to use these. In the meantime, it satisfies
2684 * OpenBSD's requirements for cookies being returned.
2685 * If expects the directory offset cookie for the read to be in uio_offset
2686 * and returns the one for the next entry after this directory block in
2687 * there, as well.
2688 */
2689APPLESTATIC int
2690nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2691    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2692    int *eofp, void *stuff)
2693{
2694	int len, left;
2695	struct dirent *dp = NULL;
2696	u_int32_t *tl;
2697	nfsquad_t cookie, ncookie;
2698	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2699	struct nfsnode *dnp = VTONFS(vp);
2700	struct nfsvattr nfsva;
2701	struct nfsrv_descript nfsd, *nd = &nfsd;
2702	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2703	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2704	long dotfileid, dotdotfileid = 0;
2705	u_int32_t fakefileno = 0xffffffff, rderr;
2706	char *cp;
2707	nfsattrbit_t attrbits, dattrbits;
2708	u_int32_t *tl2 = NULL;
2709	size_t tresid;
2710
2711	KASSERT(uiop->uio_iovcnt == 1 &&
2712	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2713	    ("nfs readdirrpc bad uio"));
2714
2715	/*
2716	 * There is no point in reading a lot more than uio_resid, however
2717	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2718	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2719	 * will never make readsize > nm_readdirsize.
2720	 */
2721	readsize = nmp->nm_readdirsize;
2722	if (readsize > uio_uio_resid(uiop))
2723		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2724
2725	*attrflagp = 0;
2726	if (eofp)
2727		*eofp = 0;
2728	tresid = uio_uio_resid(uiop);
2729	cookie.lval[0] = cookiep->nfsuquad[0];
2730	cookie.lval[1] = cookiep->nfsuquad[1];
2731	nd->nd_mrep = NULL;
2732
2733	/*
2734	 * For NFSv4, first create the "." and ".." entries.
2735	 */
2736	if (NFSHASNFSV4(nmp)) {
2737		reqsize = 6 * NFSX_UNSIGNED;
2738		NFSGETATTR_ATTRBIT(&dattrbits);
2739		NFSZERO_ATTRBIT(&attrbits);
2740		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2741		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2742		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2743		    NFSATTRBIT_MOUNTEDONFILEID)) {
2744			NFSSETBIT_ATTRBIT(&attrbits,
2745			    NFSATTRBIT_MOUNTEDONFILEID);
2746			gotmnton = 1;
2747		} else {
2748			/*
2749			 * Must fake it. Use the fileno, except when the
2750			 * fsid is != to that of the directory. For that
2751			 * case, generate a fake fileno that is not the same.
2752			 */
2753			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2754			gotmnton = 0;
2755		}
2756
2757		/*
2758		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2759		 */
2760		if (uiop->uio_offset == 0) {
2761			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2762			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2763			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2764			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2765			(void) nfsrv_putattrbit(nd, &attrbits);
2766			error = nfscl_request(nd, vp, p, cred, stuff);
2767			if (error)
2768			    return (error);
2769			dotfileid = 0;	/* Fake out the compiler. */
2770			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2771			    error = nfsm_loadattr(nd, &nfsva);
2772			    if (error != 0)
2773				goto nfsmout;
2774			    dotfileid = nfsva.na_fileid;
2775			}
2776			if (nd->nd_repstat == 0) {
2777			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2778			    len = fxdr_unsigned(int, *(tl + 4));
2779			    if (len > 0 && len <= NFSX_V4FHMAX)
2780				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2781			    else
2782				error = EPERM;
2783			    if (!error) {
2784				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2785				nfsva.na_mntonfileno = 0xffffffff;
2786				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2787				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2788				    NULL, NULL, NULL, p, cred);
2789				if (error) {
2790				    dotdotfileid = dotfileid;
2791				} else if (gotmnton) {
2792				    if (nfsva.na_mntonfileno != 0xffffffff)
2793					dotdotfileid = nfsva.na_mntonfileno;
2794				    else
2795					dotdotfileid = nfsva.na_fileid;
2796				} else if (nfsva.na_filesid[0] ==
2797				    dnp->n_vattr.na_filesid[0] &&
2798				    nfsva.na_filesid[1] ==
2799				    dnp->n_vattr.na_filesid[1]) {
2800				    dotdotfileid = nfsva.na_fileid;
2801				} else {
2802				    do {
2803					fakefileno--;
2804				    } while (fakefileno ==
2805					nfsva.na_fileid);
2806				    dotdotfileid = fakefileno;
2807				}
2808			    }
2809			} else if (nd->nd_repstat == NFSERR_NOENT) {
2810			    /*
2811			     * Lookupp returns NFSERR_NOENT when we are
2812			     * at the root, so just use the current dir.
2813			     */
2814			    nd->nd_repstat = 0;
2815			    dotdotfileid = dotfileid;
2816			} else {
2817			    error = nd->nd_repstat;
2818			}
2819			mbuf_freem(nd->nd_mrep);
2820			if (error)
2821			    return (error);
2822			nd->nd_mrep = NULL;
2823			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
2824			dp->d_type = DT_DIR;
2825			dp->d_fileno = dotfileid;
2826			dp->d_namlen = 1;
2827			dp->d_name[0] = '.';
2828			dp->d_name[1] = '\0';
2829			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
2830			/*
2831			 * Just make these offset cookie 0.
2832			 */
2833			tl = (u_int32_t *)&dp->d_name[4];
2834			*tl++ = 0;
2835			*tl = 0;
2836			blksiz += dp->d_reclen;
2837			uio_uio_resid_add(uiop, -(dp->d_reclen));
2838			uiop->uio_offset += dp->d_reclen;
2839			uio_iov_base_add(uiop, dp->d_reclen);
2840			uio_iov_len_add(uiop, -(dp->d_reclen));
2841			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
2842			dp->d_type = DT_DIR;
2843			dp->d_fileno = dotdotfileid;
2844			dp->d_namlen = 2;
2845			dp->d_name[0] = '.';
2846			dp->d_name[1] = '.';
2847			dp->d_name[2] = '\0';
2848			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
2849			/*
2850			 * Just make these offset cookie 0.
2851			 */
2852			tl = (u_int32_t *)&dp->d_name[4];
2853			*tl++ = 0;
2854			*tl = 0;
2855			blksiz += dp->d_reclen;
2856			uio_uio_resid_add(uiop, -(dp->d_reclen));
2857			uiop->uio_offset += dp->d_reclen;
2858			uio_iov_base_add(uiop, dp->d_reclen);
2859			uio_iov_len_add(uiop, -(dp->d_reclen));
2860		}
2861		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2862	} else {
2863		reqsize = 5 * NFSX_UNSIGNED;
2864	}
2865
2866
2867	/*
2868	 * Loop around doing readdir rpc's of size readsize.
2869	 * The stopping criteria is EOF or buffer full.
2870	 */
2871	while (more_dirs && bigenough) {
2872		*attrflagp = 0;
2873		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
2874		if (nd->nd_flag & ND_NFSV2) {
2875			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2876			*tl++ = cookie.lval[1];
2877			*tl = txdr_unsigned(readsize);
2878		} else {
2879			NFSM_BUILD(tl, u_int32_t *, reqsize);
2880			*tl++ = cookie.lval[0];
2881			*tl++ = cookie.lval[1];
2882			if (cookie.qval == 0) {
2883				*tl++ = 0;
2884				*tl++ = 0;
2885			} else {
2886				NFSLOCKNODE(dnp);
2887				*tl++ = dnp->n_cookieverf.nfsuquad[0];
2888				*tl++ = dnp->n_cookieverf.nfsuquad[1];
2889				NFSUNLOCKNODE(dnp);
2890			}
2891			if (nd->nd_flag & ND_NFSV4) {
2892				*tl++ = txdr_unsigned(readsize);
2893				*tl = txdr_unsigned(readsize);
2894				(void) nfsrv_putattrbit(nd, &attrbits);
2895				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2896				*tl = txdr_unsigned(NFSV4OP_GETATTR);
2897				(void) nfsrv_putattrbit(nd, &dattrbits);
2898			} else {
2899				*tl = txdr_unsigned(readsize);
2900			}
2901		}
2902		error = nfscl_request(nd, vp, p, cred, stuff);
2903		if (error)
2904			return (error);
2905		if (!(nd->nd_flag & ND_NFSV2)) {
2906			if (nd->nd_flag & ND_NFSV3)
2907				error = nfscl_postop_attr(nd, nap, attrflagp,
2908				    stuff);
2909			if (!nd->nd_repstat && !error) {
2910				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2911				NFSLOCKNODE(dnp);
2912				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2913				dnp->n_cookieverf.nfsuquad[1] = *tl;
2914				NFSUNLOCKNODE(dnp);
2915			}
2916		}
2917		if (nd->nd_repstat || error) {
2918			if (!error)
2919				error = nd->nd_repstat;
2920			goto nfsmout;
2921		}
2922		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2923		more_dirs = fxdr_unsigned(int, *tl);
2924		if (!more_dirs)
2925			tryformoredirs = 0;
2926
2927		/* loop thru the dir entries, doctoring them to 4bsd form */
2928		while (more_dirs && bigenough) {
2929			if (nd->nd_flag & ND_NFSV4) {
2930				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
2931				ncookie.lval[0] = *tl++;
2932				ncookie.lval[1] = *tl++;
2933				len = fxdr_unsigned(int, *tl);
2934			} else if (nd->nd_flag & ND_NFSV3) {
2935				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
2936				nfsva.na_fileid = fxdr_hyper(tl);
2937				tl += 2;
2938				len = fxdr_unsigned(int, *tl);
2939			} else {
2940				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2941				nfsva.na_fileid =
2942				    fxdr_unsigned(long, *tl++);
2943				len = fxdr_unsigned(int, *tl);
2944			}
2945			if (len <= 0 || len > NFS_MAXNAMLEN) {
2946				error = EBADRPC;
2947				goto nfsmout;
2948			}
2949			tlen = NFSM_RNDUP(len);
2950			if (tlen == len)
2951				tlen += 4;  /* To ensure null termination */
2952			left = DIRBLKSIZ - blksiz;
2953			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) {
2954				dp->d_reclen += left;
2955				uio_iov_base_add(uiop, left);
2956				uio_iov_len_add(uiop, -(left));
2957				uio_uio_resid_add(uiop, -(left));
2958				uiop->uio_offset += left;
2959				blksiz = 0;
2960			}
2961			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
2962				bigenough = 0;
2963			if (bigenough) {
2964				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
2965				dp->d_namlen = len;
2966				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
2967				dp->d_type = DT_UNKNOWN;
2968				blksiz += dp->d_reclen;
2969				if (blksiz == DIRBLKSIZ)
2970					blksiz = 0;
2971				uio_uio_resid_add(uiop, -(DIRHDSIZ));
2972				uiop->uio_offset += DIRHDSIZ;
2973				uio_iov_base_add(uiop, DIRHDSIZ);
2974				uio_iov_len_add(uiop, -(DIRHDSIZ));
2975				error = nfsm_mbufuio(nd, uiop, len);
2976				if (error)
2977					goto nfsmout;
2978				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
2979				tlen -= len;
2980				*cp = '\0';	/* null terminate */
2981				cp += tlen;	/* points to cookie storage */
2982				tl2 = (u_int32_t *)cp;
2983				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
2984				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
2985				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
2986				uiop->uio_offset += (tlen + NFSX_HYPER);
2987			} else {
2988				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2989				if (error)
2990					goto nfsmout;
2991			}
2992			if (nd->nd_flag & ND_NFSV4) {
2993				rderr = 0;
2994				nfsva.na_mntonfileno = 0xffffffff;
2995				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2996				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2997				    NULL, NULL, &rderr, p, cred);
2998				if (error)
2999					goto nfsmout;
3000				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3001			} else if (nd->nd_flag & ND_NFSV3) {
3002				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3003				ncookie.lval[0] = *tl++;
3004				ncookie.lval[1] = *tl++;
3005			} else {
3006				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3007				ncookie.lval[0] = 0;
3008				ncookie.lval[1] = *tl++;
3009			}
3010			if (bigenough) {
3011			    if (nd->nd_flag & ND_NFSV4) {
3012				if (rderr) {
3013				    dp->d_fileno = 0;
3014				} else {
3015				    if (gotmnton) {
3016					if (nfsva.na_mntonfileno != 0xffffffff)
3017					    dp->d_fileno = nfsva.na_mntonfileno;
3018					else
3019					    dp->d_fileno = nfsva.na_fileid;
3020				    } else if (nfsva.na_filesid[0] ==
3021					dnp->n_vattr.na_filesid[0] &&
3022					nfsva.na_filesid[1] ==
3023					dnp->n_vattr.na_filesid[1]) {
3024					dp->d_fileno = nfsva.na_fileid;
3025				    } else {
3026					do {
3027					    fakefileno--;
3028					} while (fakefileno ==
3029					    nfsva.na_fileid);
3030					dp->d_fileno = fakefileno;
3031				    }
3032				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3033				}
3034			    } else {
3035				dp->d_fileno = nfsva.na_fileid;
3036			    }
3037			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3038				ncookie.lval[0];
3039			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3040				ncookie.lval[1];
3041			}
3042			more_dirs = fxdr_unsigned(int, *tl);
3043		}
3044		/*
3045		 * If at end of rpc data, get the eof boolean
3046		 */
3047		if (!more_dirs) {
3048			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3049			eof = fxdr_unsigned(int, *tl);
3050			if (tryformoredirs)
3051				more_dirs = !eof;
3052			if (nd->nd_flag & ND_NFSV4) {
3053				error = nfscl_postop_attr(nd, nap, attrflagp,
3054				    stuff);
3055				if (error)
3056					goto nfsmout;
3057			}
3058		}
3059		mbuf_freem(nd->nd_mrep);
3060		nd->nd_mrep = NULL;
3061	}
3062	/*
3063	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3064	 * by increasing d_reclen for the last record.
3065	 */
3066	if (blksiz > 0) {
3067		left = DIRBLKSIZ - blksiz;
3068		dp->d_reclen += left;
3069		uio_iov_base_add(uiop, left);
3070		uio_iov_len_add(uiop, -(left));
3071		uio_uio_resid_add(uiop, -(left));
3072		uiop->uio_offset += left;
3073	}
3074
3075	/*
3076	 * If returning no data, assume end of file.
3077	 * If not bigenough, return not end of file, since you aren't
3078	 *    returning all the data
3079	 * Otherwise, return the eof flag from the server.
3080	 */
3081	if (eofp) {
3082		if (tresid == ((size_t)(uio_uio_resid(uiop))))
3083			*eofp = 1;
3084		else if (!bigenough)
3085			*eofp = 0;
3086		else
3087			*eofp = eof;
3088	}
3089
3090	/*
3091	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3092	 */
3093	while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
3094		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
3095		dp->d_type = DT_UNKNOWN;
3096		dp->d_fileno = 0;
3097		dp->d_namlen = 0;
3098		dp->d_name[0] = '\0';
3099		tl = (u_int32_t *)&dp->d_name[4];
3100		*tl++ = cookie.lval[0];
3101		*tl = cookie.lval[1];
3102		dp->d_reclen = DIRBLKSIZ;
3103		uio_iov_base_add(uiop, DIRBLKSIZ);
3104		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3105		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3106		uiop->uio_offset += DIRBLKSIZ;
3107	}
3108
3109nfsmout:
3110	if (nd->nd_mrep != NULL)
3111		mbuf_freem(nd->nd_mrep);
3112	return (error);
3113}
3114
3115#ifndef APPLE
3116/*
3117 * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3118 * (Also used for NFS V4 when mount flag set.)
3119 * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3120 */
3121APPLESTATIC int
3122nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3123    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3124    int *eofp, void *stuff)
3125{
3126	int len, left;
3127	struct dirent *dp = NULL;
3128	u_int32_t *tl;
3129	vnode_t newvp = NULLVP;
3130	struct nfsrv_descript nfsd, *nd = &nfsd;
3131	struct nameidata nami, *ndp = &nami;
3132	struct componentname *cnp = &ndp->ni_cnd;
3133	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3134	struct nfsnode *dnp = VTONFS(vp), *np;
3135	struct nfsvattr nfsva;
3136	struct nfsfh *nfhp;
3137	nfsquad_t cookie, ncookie;
3138	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3139	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3140	int isdotdot = 0, unlocknewvp = 0;
3141	long dotfileid, dotdotfileid = 0, fileno = 0;
3142	char *cp;
3143	nfsattrbit_t attrbits, dattrbits;
3144	size_t tresid;
3145	u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr;
3146	struct timespec dctime;
3147
3148	KASSERT(uiop->uio_iovcnt == 1 &&
3149	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3150	    ("nfs readdirplusrpc bad uio"));
3151	timespecclear(&dctime);
3152	*attrflagp = 0;
3153	if (eofp != NULL)
3154		*eofp = 0;
3155	ndp->ni_dvp = vp;
3156	nd->nd_mrep = NULL;
3157	cookie.lval[0] = cookiep->nfsuquad[0];
3158	cookie.lval[1] = cookiep->nfsuquad[1];
3159	tresid = uio_uio_resid(uiop);
3160
3161	/*
3162	 * For NFSv4, first create the "." and ".." entries.
3163	 */
3164	if (NFSHASNFSV4(nmp)) {
3165		NFSGETATTR_ATTRBIT(&dattrbits);
3166		NFSZERO_ATTRBIT(&attrbits);
3167		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3168		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3169		    NFSATTRBIT_MOUNTEDONFILEID)) {
3170			NFSSETBIT_ATTRBIT(&attrbits,
3171			    NFSATTRBIT_MOUNTEDONFILEID);
3172			gotmnton = 1;
3173		} else {
3174			/*
3175			 * Must fake it. Use the fileno, except when the
3176			 * fsid is != to that of the directory. For that
3177			 * case, generate a fake fileno that is not the same.
3178			 */
3179			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3180			gotmnton = 0;
3181		}
3182
3183		/*
3184		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3185		 */
3186		if (uiop->uio_offset == 0) {
3187			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3188			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3189			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3190			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3191			(void) nfsrv_putattrbit(nd, &attrbits);
3192			error = nfscl_request(nd, vp, p, cred, stuff);
3193			if (error)
3194			    return (error);
3195			dotfileid = 0;	/* Fake out the compiler. */
3196			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3197			    error = nfsm_loadattr(nd, &nfsva);
3198			    if (error != 0)
3199				goto nfsmout;
3200			    dctime = nfsva.na_ctime;
3201			    dotfileid = nfsva.na_fileid;
3202			}
3203			if (nd->nd_repstat == 0) {
3204			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3205			    len = fxdr_unsigned(int, *(tl + 4));
3206			    if (len > 0 && len <= NFSX_V4FHMAX)
3207				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3208			    else
3209				error = EPERM;
3210			    if (!error) {
3211				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3212				nfsva.na_mntonfileno = 0xffffffff;
3213				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3214				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3215				    NULL, NULL, NULL, p, cred);
3216				if (error) {
3217				    dotdotfileid = dotfileid;
3218				} else if (gotmnton) {
3219				    if (nfsva.na_mntonfileno != 0xffffffff)
3220					dotdotfileid = nfsva.na_mntonfileno;
3221				    else
3222					dotdotfileid = nfsva.na_fileid;
3223				} else if (nfsva.na_filesid[0] ==
3224				    dnp->n_vattr.na_filesid[0] &&
3225				    nfsva.na_filesid[1] ==
3226				    dnp->n_vattr.na_filesid[1]) {
3227				    dotdotfileid = nfsva.na_fileid;
3228				} else {
3229				    do {
3230					fakefileno--;
3231				    } while (fakefileno ==
3232					nfsva.na_fileid);
3233				    dotdotfileid = fakefileno;
3234				}
3235			    }
3236			} else if (nd->nd_repstat == NFSERR_NOENT) {
3237			    /*
3238			     * Lookupp returns NFSERR_NOENT when we are
3239			     * at the root, so just use the current dir.
3240			     */
3241			    nd->nd_repstat = 0;
3242			    dotdotfileid = dotfileid;
3243			} else {
3244			    error = nd->nd_repstat;
3245			}
3246			mbuf_freem(nd->nd_mrep);
3247			if (error)
3248			    return (error);
3249			nd->nd_mrep = NULL;
3250			dp = (struct dirent *)uio_iov_base(uiop);
3251			dp->d_type = DT_DIR;
3252			dp->d_fileno = dotfileid;
3253			dp->d_namlen = 1;
3254			dp->d_name[0] = '.';
3255			dp->d_name[1] = '\0';
3256			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
3257			/*
3258			 * Just make these offset cookie 0.
3259			 */
3260			tl = (u_int32_t *)&dp->d_name[4];
3261			*tl++ = 0;
3262			*tl = 0;
3263			blksiz += dp->d_reclen;
3264			uio_uio_resid_add(uiop, -(dp->d_reclen));
3265			uiop->uio_offset += dp->d_reclen;
3266			uio_iov_base_add(uiop, dp->d_reclen);
3267			uio_iov_len_add(uiop, -(dp->d_reclen));
3268			dp = (struct dirent *)uio_iov_base(uiop);
3269			dp->d_type = DT_DIR;
3270			dp->d_fileno = dotdotfileid;
3271			dp->d_namlen = 2;
3272			dp->d_name[0] = '.';
3273			dp->d_name[1] = '.';
3274			dp->d_name[2] = '\0';
3275			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
3276			/*
3277			 * Just make these offset cookie 0.
3278			 */
3279			tl = (u_int32_t *)&dp->d_name[4];
3280			*tl++ = 0;
3281			*tl = 0;
3282			blksiz += dp->d_reclen;
3283			uio_uio_resid_add(uiop, -(dp->d_reclen));
3284			uiop->uio_offset += dp->d_reclen;
3285			uio_iov_base_add(uiop, dp->d_reclen);
3286			uio_iov_len_add(uiop, -(dp->d_reclen));
3287		}
3288		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3289		if (gotmnton)
3290			NFSSETBIT_ATTRBIT(&attrbits,
3291			    NFSATTRBIT_MOUNTEDONFILEID);
3292	}
3293
3294	/*
3295	 * Loop around doing readdir rpc's of size nm_readdirsize.
3296	 * The stopping criteria is EOF or buffer full.
3297	 */
3298	while (more_dirs && bigenough) {
3299		*attrflagp = 0;
3300		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3301 		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3302		*tl++ = cookie.lval[0];
3303		*tl++ = cookie.lval[1];
3304		if (cookie.qval == 0) {
3305			*tl++ = 0;
3306			*tl++ = 0;
3307		} else {
3308			NFSLOCKNODE(dnp);
3309			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3310			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3311			NFSUNLOCKNODE(dnp);
3312		}
3313		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3314		*tl = txdr_unsigned(nmp->nm_readdirsize);
3315		if (nd->nd_flag & ND_NFSV4) {
3316			(void) nfsrv_putattrbit(nd, &attrbits);
3317			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3318			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3319			(void) nfsrv_putattrbit(nd, &dattrbits);
3320		}
3321		error = nfscl_request(nd, vp, p, cred, stuff);
3322		if (error)
3323			return (error);
3324		if (nd->nd_flag & ND_NFSV3)
3325			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3326		if (nd->nd_repstat || error) {
3327			if (!error)
3328				error = nd->nd_repstat;
3329			goto nfsmout;
3330		}
3331		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3332			dctime = nap->na_ctime;
3333		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3334		NFSLOCKNODE(dnp);
3335		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3336		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3337		NFSUNLOCKNODE(dnp);
3338		more_dirs = fxdr_unsigned(int, *tl);
3339		if (!more_dirs)
3340			tryformoredirs = 0;
3341
3342		/* loop thru the dir entries, doctoring them to 4bsd form */
3343		while (more_dirs && bigenough) {
3344			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3345			if (nd->nd_flag & ND_NFSV4) {
3346				ncookie.lval[0] = *tl++;
3347				ncookie.lval[1] = *tl++;
3348			} else {
3349				fileno = fxdr_unsigned(long, *++tl);
3350				tl++;
3351			}
3352			len = fxdr_unsigned(int, *tl);
3353			if (len <= 0 || len > NFS_MAXNAMLEN) {
3354				error = EBADRPC;
3355				goto nfsmout;
3356			}
3357			tlen = NFSM_RNDUP(len);
3358			if (tlen == len)
3359				tlen += 4;  /* To ensure null termination */
3360			left = DIRBLKSIZ - blksiz;
3361			if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) {
3362				dp->d_reclen += left;
3363				uio_iov_base_add(uiop, left);
3364				uio_iov_len_add(uiop, -(left));
3365				uio_uio_resid_add(uiop, -(left));
3366				uiop->uio_offset += left;
3367				blksiz = 0;
3368			}
3369			if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
3370				bigenough = 0;
3371			if (bigenough) {
3372				dp = (struct dirent *)uio_iov_base(uiop);
3373				dp->d_namlen = len;
3374				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
3375				dp->d_type = DT_UNKNOWN;
3376				blksiz += dp->d_reclen;
3377				if (blksiz == DIRBLKSIZ)
3378					blksiz = 0;
3379				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3380				uiop->uio_offset += DIRHDSIZ;
3381				uio_iov_base_add(uiop, DIRHDSIZ);
3382				uio_iov_len_add(uiop, -(DIRHDSIZ));
3383				cnp->cn_nameptr = uio_iov_base(uiop);
3384				cnp->cn_namelen = len;
3385				NFSCNHASHZERO(cnp);
3386				error = nfsm_mbufuio(nd, uiop, len);
3387				if (error)
3388					goto nfsmout;
3389				cp = uio_iov_base(uiop);
3390				tlen -= len;
3391				*cp = '\0';
3392				cp += tlen;	/* points to cookie storage */
3393				tl2 = (u_int32_t *)cp;
3394				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3395				    cnp->cn_nameptr[1] == '.')
3396					isdotdot = 1;
3397				else
3398					isdotdot = 0;
3399				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3400				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3401				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3402				uiop->uio_offset += (tlen + NFSX_HYPER);
3403			} else {
3404				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3405				if (error)
3406					goto nfsmout;
3407			}
3408			nfhp = NULL;
3409			if (nd->nd_flag & ND_NFSV3) {
3410				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3411				ncookie.lval[0] = *tl++;
3412				ncookie.lval[1] = *tl++;
3413				attrflag = fxdr_unsigned(int, *tl);
3414				if (attrflag) {
3415				  error = nfsm_loadattr(nd, &nfsva);
3416				  if (error)
3417					goto nfsmout;
3418				}
3419				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3420				if (*tl) {
3421					error = nfsm_getfh(nd, &nfhp);
3422					if (error)
3423					    goto nfsmout;
3424				}
3425				if (!attrflag && nfhp != NULL) {
3426					FREE((caddr_t)nfhp, M_NFSFH);
3427					nfhp = NULL;
3428				}
3429			} else {
3430				rderr = 0;
3431				nfsva.na_mntonfileno = 0xffffffff;
3432				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3433				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3434				    NULL, NULL, &rderr, p, cred);
3435				if (error)
3436					goto nfsmout;
3437			}
3438
3439			if (bigenough) {
3440			    if (nd->nd_flag & ND_NFSV4) {
3441				if (rderr) {
3442				    dp->d_fileno = 0;
3443				} else if (gotmnton) {
3444				    if (nfsva.na_mntonfileno != 0xffffffff)
3445					dp->d_fileno = nfsva.na_mntonfileno;
3446				    else
3447					dp->d_fileno = nfsva.na_fileid;
3448				} else if (nfsva.na_filesid[0] ==
3449				    dnp->n_vattr.na_filesid[0] &&
3450				    nfsva.na_filesid[1] ==
3451				    dnp->n_vattr.na_filesid[1]) {
3452				    dp->d_fileno = nfsva.na_fileid;
3453				} else {
3454				    do {
3455					fakefileno--;
3456				    } while (fakefileno ==
3457					nfsva.na_fileid);
3458				    dp->d_fileno = fakefileno;
3459				}
3460			    } else {
3461				dp->d_fileno = fileno;
3462			    }
3463			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3464				ncookie.lval[0];
3465			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3466				ncookie.lval[1];
3467
3468			    if (nfhp != NULL) {
3469				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3470				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3471				    VREF(vp);
3472				    newvp = vp;
3473				    unlocknewvp = 0;
3474				    FREE((caddr_t)nfhp, M_NFSFH);
3475				    np = dnp;
3476				} else if (isdotdot != 0) {
3477				    /*
3478				     * Skip doing a nfscl_nget() call for "..".
3479				     * There's a race between acquiring the nfs
3480				     * node here and lookups that look for the
3481				     * directory being read (in the parent).
3482				     * It would try to get a lock on ".." here,
3483				     * owning the lock on the directory being
3484				     * read. Lookup will hold the lock on ".."
3485				     * and try to acquire the lock on the
3486				     * directory being read.
3487				     * If the directory is unlocked/relocked,
3488				     * then there is a LOR with the buflock
3489				     * vp is relocked.
3490				     */
3491				    free(nfhp, M_NFSFH);
3492				} else {
3493				    error = nfscl_nget(vnode_mount(vp), vp,
3494				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3495				    if (!error) {
3496					newvp = NFSTOV(np);
3497					unlocknewvp = 1;
3498				    }
3499				}
3500				nfhp = NULL;
3501				if (newvp != NULLVP) {
3502				    error = nfscl_loadattrcache(&newvp,
3503					&nfsva, NULL, NULL, 0, 0);
3504				    if (error) {
3505					if (unlocknewvp)
3506					    vput(newvp);
3507					else
3508					    vrele(newvp);
3509					goto nfsmout;
3510				    }
3511				    dp->d_type =
3512					vtonfs_dtype(np->n_vattr.na_type);
3513				    ndp->ni_vp = newvp;
3514				    NFSCNHASH(cnp, HASHINIT);
3515				    if (cnp->cn_namelen <= NCHNAMLEN &&
3516					(newvp->v_type != VDIR ||
3517					 dctime.tv_sec != 0)) {
3518					cache_enter_time(ndp->ni_dvp,
3519					    ndp->ni_vp, cnp,
3520					    &nfsva.na_ctime,
3521					    newvp->v_type != VDIR ? NULL :
3522					    &dctime);
3523				    }
3524				    if (unlocknewvp)
3525					vput(newvp);
3526				    else
3527					vrele(newvp);
3528				    newvp = NULLVP;
3529				}
3530			    }
3531			} else if (nfhp != NULL) {
3532			    FREE((caddr_t)nfhp, M_NFSFH);
3533			}
3534			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3535			more_dirs = fxdr_unsigned(int, *tl);
3536		}
3537		/*
3538		 * If at end of rpc data, get the eof boolean
3539		 */
3540		if (!more_dirs) {
3541			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3542			eof = fxdr_unsigned(int, *tl);
3543			if (tryformoredirs)
3544				more_dirs = !eof;
3545			if (nd->nd_flag & ND_NFSV4) {
3546				error = nfscl_postop_attr(nd, nap, attrflagp,
3547				    stuff);
3548				if (error)
3549					goto nfsmout;
3550			}
3551		}
3552		mbuf_freem(nd->nd_mrep);
3553		nd->nd_mrep = NULL;
3554	}
3555	/*
3556	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3557	 * by increasing d_reclen for the last record.
3558	 */
3559	if (blksiz > 0) {
3560		left = DIRBLKSIZ - blksiz;
3561		dp->d_reclen += left;
3562		uio_iov_base_add(uiop, left);
3563		uio_iov_len_add(uiop, -(left));
3564		uio_uio_resid_add(uiop, -(left));
3565		uiop->uio_offset += left;
3566	}
3567
3568	/*
3569	 * If returning no data, assume end of file.
3570	 * If not bigenough, return not end of file, since you aren't
3571	 *    returning all the data
3572	 * Otherwise, return the eof flag from the server.
3573	 */
3574	if (eofp != NULL) {
3575		if (tresid == uio_uio_resid(uiop))
3576			*eofp = 1;
3577		else if (!bigenough)
3578			*eofp = 0;
3579		else
3580			*eofp = eof;
3581	}
3582
3583	/*
3584	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3585	 */
3586	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3587		dp = (struct dirent *)uio_iov_base(uiop);
3588		dp->d_type = DT_UNKNOWN;
3589		dp->d_fileno = 0;
3590		dp->d_namlen = 0;
3591		dp->d_name[0] = '\0';
3592		tl = (u_int32_t *)&dp->d_name[4];
3593		*tl++ = cookie.lval[0];
3594		*tl = cookie.lval[1];
3595		dp->d_reclen = DIRBLKSIZ;
3596		uio_iov_base_add(uiop, DIRBLKSIZ);
3597		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3598		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3599		uiop->uio_offset += DIRBLKSIZ;
3600	}
3601
3602nfsmout:
3603	if (nd->nd_mrep != NULL)
3604		mbuf_freem(nd->nd_mrep);
3605	return (error);
3606}
3607#endif	/* !APPLE */
3608
3609/*
3610 * Nfs commit rpc
3611 */
3612APPLESTATIC int
3613nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3614    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3615{
3616	u_int32_t *tl;
3617	struct nfsrv_descript nfsd, *nd = &nfsd;
3618	nfsattrbit_t attrbits;
3619	int error;
3620	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3621
3622	*attrflagp = 0;
3623	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3624	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3625	txdr_hyper(offset, tl);
3626	tl += 2;
3627	*tl = txdr_unsigned(cnt);
3628	if (nd->nd_flag & ND_NFSV4) {
3629		/*
3630		 * And do a Getattr op.
3631		 */
3632		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3633		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3634		NFSGETATTR_ATTRBIT(&attrbits);
3635		(void) nfsrv_putattrbit(nd, &attrbits);
3636	}
3637	error = nfscl_request(nd, vp, p, cred, stuff);
3638	if (error)
3639		return (error);
3640	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3641	if (!error && !nd->nd_repstat) {
3642		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3643		NFSLOCKMNT(nmp);
3644		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3645			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3646			nd->nd_repstat = NFSERR_STALEWRITEVERF;
3647		}
3648		NFSUNLOCKMNT(nmp);
3649		if (nd->nd_flag & ND_NFSV4)
3650			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3651	}
3652nfsmout:
3653	if (!error && nd->nd_repstat)
3654		error = nd->nd_repstat;
3655	mbuf_freem(nd->nd_mrep);
3656	return (error);
3657}
3658
3659/*
3660 * NFS byte range lock rpc.
3661 * (Mostly just calls one of the three lower level RPC routines.)
3662 */
3663APPLESTATIC int
3664nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3665    int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3666{
3667	struct nfscllockowner *lp;
3668	struct nfsclclient *clp;
3669	struct nfsfh *nfhp;
3670	struct nfsrv_descript nfsd, *nd = &nfsd;
3671	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3672	u_int64_t off, len;
3673	off_t start, end;
3674	u_int32_t clidrev = 0;
3675	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3676	int callcnt, dorpc;
3677
3678	/*
3679	 * Convert the flock structure into a start and end and do POSIX
3680	 * bounds checking.
3681	 */
3682	switch (fl->l_whence) {
3683	case SEEK_SET:
3684	case SEEK_CUR:
3685		/*
3686		 * Caller is responsible for adding any necessary offset
3687		 * when SEEK_CUR is used.
3688		 */
3689		start = fl->l_start;
3690		off = fl->l_start;
3691		break;
3692	case SEEK_END:
3693		start = size + fl->l_start;
3694		off = size + fl->l_start;
3695		break;
3696	default:
3697		return (EINVAL);
3698	};
3699	if (start < 0)
3700		return (EINVAL);
3701	if (fl->l_len != 0) {
3702		end = start + fl->l_len - 1;
3703		if (end < start)
3704			return (EINVAL);
3705	}
3706
3707	len = fl->l_len;
3708	if (len == 0)
3709		len = NFS64BITSSET;
3710	retrycnt = 0;
3711	do {
3712	    nd->nd_repstat = 0;
3713	    if (op == F_GETLK) {
3714		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3715		if (error)
3716			return (error);
3717		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3718		if (!error) {
3719			clidrev = clp->nfsc_clientidrev;
3720			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3721			    p, id, flags);
3722		} else if (error == -1) {
3723			error = 0;
3724		}
3725		nfscl_clientrelease(clp);
3726	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3727		/*
3728		 * We must loop around for all lockowner cases.
3729		 */
3730		callcnt = 0;
3731		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3732		if (error)
3733			return (error);
3734		do {
3735		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3736			clp, id, flags, &lp, &dorpc);
3737		    /*
3738		     * If it returns a NULL lp, we're done.
3739		     */
3740		    if (lp == NULL) {
3741			if (callcnt == 0)
3742			    nfscl_clientrelease(clp);
3743			else
3744			    nfscl_releasealllocks(clp, vp, p, id, flags);
3745			return (error);
3746		    }
3747		    if (nmp->nm_clp != NULL)
3748			clidrev = nmp->nm_clp->nfsc_clientidrev;
3749		    else
3750			clidrev = 0;
3751		    /*
3752		     * If the server doesn't support Posix lock semantics,
3753		     * only allow locks on the entire file, since it won't
3754		     * handle overlapping byte ranges.
3755		     * There might still be a problem when a lock
3756		     * upgrade/downgrade (read<->write) occurs, since the
3757		     * server "might" expect an unlock first?
3758		     */
3759		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3760			(off == 0 && len == NFS64BITSSET))) {
3761			/*
3762			 * Since the lock records will go away, we must
3763			 * wait for grace and delay here.
3764			 */
3765			do {
3766			    error = nfsrpc_locku(nd, nmp, lp, off, len,
3767				NFSV4LOCKT_READ, cred, p, 0);
3768			    if ((nd->nd_repstat == NFSERR_GRACE ||
3769				 nd->nd_repstat == NFSERR_DELAY) &&
3770				error == 0)
3771				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3772				    "nfs_advlock");
3773			} while ((nd->nd_repstat == NFSERR_GRACE ||
3774			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
3775		    }
3776		    callcnt++;
3777		} while (error == 0 && nd->nd_repstat == 0);
3778		nfscl_releasealllocks(clp, vp, p, id, flags);
3779	    } else if (op == F_SETLK) {
3780		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3781		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3782		if (error || donelocally) {
3783			return (error);
3784		}
3785		if (nmp->nm_clp != NULL)
3786			clidrev = nmp->nm_clp->nfsc_clientidrev;
3787		else
3788			clidrev = 0;
3789		nfhp = VTONFS(vp)->n_fhp;
3790		if (!lp->nfsl_open->nfso_posixlock &&
3791		    (off != 0 || len != NFS64BITSSET)) {
3792			error = EINVAL;
3793		} else {
3794			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3795			    nfhp->nfh_len, lp, newone, reclaim, off,
3796			    len, fl->l_type, cred, p, 0);
3797		}
3798		if (!error)
3799			error = nd->nd_repstat;
3800		nfscl_lockrelease(lp, error, newone);
3801	    } else {
3802		error = EINVAL;
3803	    }
3804	    if (!error)
3805	        error = nd->nd_repstat;
3806	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3807		error == NFSERR_STALEDONTRECOVER ||
3808		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3809		error == NFSERR_BADSESSION) {
3810		(void) nfs_catnap(PZERO, error, "nfs_advlock");
3811	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3812		&& clidrev != 0) {
3813		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3814		retrycnt++;
3815	    }
3816	} while (error == NFSERR_GRACE ||
3817	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3818	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3819	    error == NFSERR_BADSESSION ||
3820	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3821	     expireret == 0 && clidrev != 0 && retrycnt < 4));
3822	if (error && retrycnt >= 4)
3823		error = EIO;
3824	return (error);
3825}
3826
3827/*
3828 * The lower level routine for the LockT case.
3829 */
3830APPLESTATIC int
3831nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3832    struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3833    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3834{
3835	u_int32_t *tl;
3836	int error, type, size;
3837	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3838	struct nfsnode *np;
3839	struct nfsmount *nmp;
3840
3841	nmp = VFSTONFS(vp->v_mount);
3842	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3843	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3844	if (fl->l_type == F_RDLCK)
3845		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3846	else
3847		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3848	txdr_hyper(off, tl);
3849	tl += 2;
3850	txdr_hyper(len, tl);
3851	tl += 2;
3852	*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
3853	*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
3854	nfscl_filllockowner(id, own, flags);
3855	np = VTONFS(vp);
3856	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
3857	    np->n_fhp->nfh_len);
3858	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
3859	error = nfscl_request(nd, vp, p, cred, NULL);
3860	if (error)
3861		return (error);
3862	if (nd->nd_repstat == 0) {
3863		fl->l_type = F_UNLCK;
3864	} else if (nd->nd_repstat == NFSERR_DENIED) {
3865		nd->nd_repstat = 0;
3866		fl->l_whence = SEEK_SET;
3867		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
3868		fl->l_start = fxdr_hyper(tl);
3869		tl += 2;
3870		len = fxdr_hyper(tl);
3871		tl += 2;
3872		if (len == NFS64BITSSET)
3873			fl->l_len = 0;
3874		else
3875			fl->l_len = len;
3876		type = fxdr_unsigned(int, *tl++);
3877		if (type == NFSV4LOCKT_WRITE)
3878			fl->l_type = F_WRLCK;
3879		else
3880			fl->l_type = F_RDLCK;
3881		/*
3882		 * XXX For now, I have no idea what to do with the
3883		 * conflicting lock_owner, so I'll just set the pid == 0
3884		 * and skip over the lock_owner.
3885		 */
3886		fl->l_pid = (pid_t)0;
3887		tl += 2;
3888		size = fxdr_unsigned(int, *tl);
3889		if (size < 0 || size > NFSV4_OPAQUELIMIT)
3890			error = EBADRPC;
3891		if (!error)
3892			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
3893	} else if (nd->nd_repstat == NFSERR_STALECLIENTID ||
3894	    nd->nd_repstat == NFSERR_BADSESSION)
3895		nfscl_initiate_recovery(clp);
3896nfsmout:
3897	mbuf_freem(nd->nd_mrep);
3898	return (error);
3899}
3900
3901/*
3902 * Lower level function that performs the LockU RPC.
3903 */
3904static int
3905nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
3906    struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
3907    u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
3908{
3909	u_int32_t *tl;
3910	int error;
3911
3912	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
3913	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
3914	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
3915	*tl++ = txdr_unsigned(type);
3916	*tl = txdr_unsigned(lp->nfsl_seqid);
3917	if (nfstest_outofseq &&
3918	    (arc4random() % nfstest_outofseq) == 0)
3919		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
3920	tl++;
3921	if (NFSHASNFSV4N(nmp))
3922		*tl++ = 0;
3923	else
3924		*tl++ = lp->nfsl_stateid.seqid;
3925	*tl++ = lp->nfsl_stateid.other[0];
3926	*tl++ = lp->nfsl_stateid.other[1];
3927	*tl++ = lp->nfsl_stateid.other[2];
3928	txdr_hyper(off, tl);
3929	tl += 2;
3930	txdr_hyper(len, tl);
3931	if (syscred)
3932		nd->nd_flag |= ND_USEGSSNAME;
3933	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
3934	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
3935	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
3936	if (error)
3937		return (error);
3938	if (nd->nd_repstat == 0) {
3939		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
3940		lp->nfsl_stateid.seqid = *tl++;
3941		lp->nfsl_stateid.other[0] = *tl++;
3942		lp->nfsl_stateid.other[1] = *tl++;
3943		lp->nfsl_stateid.other[2] = *tl;
3944	} else if (nd->nd_repstat == NFSERR_STALESTATEID ||
3945	    nd->nd_repstat == NFSERR_BADSESSION)
3946		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
3947nfsmout:
3948	mbuf_freem(nd->nd_mrep);
3949	return (error);
3950}
3951
3952/*
3953 * The actual Lock RPC.
3954 */
3955APPLESTATIC int
3956nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
3957    u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
3958    int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
3959    NFSPROC_T *p, int syscred)
3960{
3961	u_int32_t *tl;
3962	int error, size;
3963	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3964
3965	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
3966	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3967	if (type == F_RDLCK)
3968		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3969	else
3970		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3971	*tl++ = txdr_unsigned(reclaim);
3972	txdr_hyper(off, tl);
3973	tl += 2;
3974	txdr_hyper(len, tl);
3975	tl += 2;
3976	if (newone) {
3977	    *tl = newnfs_true;
3978	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
3979		2 * NFSX_UNSIGNED + NFSX_HYPER);
3980	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
3981	    if (NFSHASNFSV4N(nmp))
3982		*tl++ = 0;
3983	    else
3984		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
3985	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
3986	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
3987	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
3988	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
3989	    *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
3990	    *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
3991	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
3992	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
3993	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
3994	} else {
3995	    *tl = newnfs_false;
3996	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
3997	    if (NFSHASNFSV4N(nmp))
3998		*tl++ = 0;
3999	    else
4000		*tl++ = lp->nfsl_stateid.seqid;
4001	    *tl++ = lp->nfsl_stateid.other[0];
4002	    *tl++ = lp->nfsl_stateid.other[1];
4003	    *tl++ = lp->nfsl_stateid.other[2];
4004	    *tl = txdr_unsigned(lp->nfsl_seqid);
4005	    if (nfstest_outofseq &&
4006		(arc4random() % nfstest_outofseq) == 0)
4007		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4008	}
4009	if (syscred)
4010		nd->nd_flag |= ND_USEGSSNAME;
4011	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4012	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4013	if (error)
4014		return (error);
4015	if (newone)
4016	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4017	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4018	if (nd->nd_repstat == 0) {
4019		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4020		lp->nfsl_stateid.seqid = *tl++;
4021		lp->nfsl_stateid.other[0] = *tl++;
4022		lp->nfsl_stateid.other[1] = *tl++;
4023		lp->nfsl_stateid.other[2] = *tl;
4024	} else if (nd->nd_repstat == NFSERR_DENIED) {
4025		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4026		size = fxdr_unsigned(int, *(tl + 7));
4027		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4028			error = EBADRPC;
4029		if (!error)
4030			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4031	} else if (nd->nd_repstat == NFSERR_STALESTATEID ||
4032	    nd->nd_repstat == NFSERR_BADSESSION)
4033		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4034nfsmout:
4035	mbuf_freem(nd->nd_mrep);
4036	return (error);
4037}
4038
4039/*
4040 * nfs statfs rpc
4041 * (always called with the vp for the mount point)
4042 */
4043APPLESTATIC int
4044nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4045    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4046    void *stuff)
4047{
4048	u_int32_t *tl = NULL;
4049	struct nfsrv_descript nfsd, *nd = &nfsd;
4050	struct nfsmount *nmp;
4051	nfsattrbit_t attrbits;
4052	int error;
4053
4054	*attrflagp = 0;
4055	nmp = VFSTONFS(vnode_mount(vp));
4056	if (NFSHASNFSV4(nmp)) {
4057		/*
4058		 * For V4, you actually do a getattr.
4059		 */
4060		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4061		NFSSTATFS_GETATTRBIT(&attrbits);
4062		(void) nfsrv_putattrbit(nd, &attrbits);
4063		nd->nd_flag |= ND_USEGSSNAME;
4064		error = nfscl_request(nd, vp, p, cred, stuff);
4065		if (error)
4066			return (error);
4067		if (nd->nd_repstat == 0) {
4068			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4069			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4070			    cred);
4071			if (!error) {
4072				nmp->nm_fsid[0] = nap->na_filesid[0];
4073				nmp->nm_fsid[1] = nap->na_filesid[1];
4074				NFSSETHASSETFSID(nmp);
4075				*attrflagp = 1;
4076			}
4077		} else {
4078			error = nd->nd_repstat;
4079		}
4080		if (error)
4081			goto nfsmout;
4082	} else {
4083		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4084		error = nfscl_request(nd, vp, p, cred, stuff);
4085		if (error)
4086			return (error);
4087		if (nd->nd_flag & ND_NFSV3) {
4088			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4089			if (error)
4090				goto nfsmout;
4091		}
4092		if (nd->nd_repstat) {
4093			error = nd->nd_repstat;
4094			goto nfsmout;
4095		}
4096		NFSM_DISSECT(tl, u_int32_t *,
4097		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4098	}
4099	if (NFSHASNFSV3(nmp)) {
4100		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4101		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4102		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4103		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4104		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4105		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4106		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4107	} else if (NFSHASNFSV4(nmp) == 0) {
4108		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4109		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4110		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4111		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4112		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4113	}
4114nfsmout:
4115	mbuf_freem(nd->nd_mrep);
4116	return (error);
4117}
4118
4119/*
4120 * nfs pathconf rpc
4121 */
4122APPLESTATIC int
4123nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4124    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4125    void *stuff)
4126{
4127	struct nfsrv_descript nfsd, *nd = &nfsd;
4128	struct nfsmount *nmp;
4129	u_int32_t *tl;
4130	nfsattrbit_t attrbits;
4131	int error;
4132
4133	*attrflagp = 0;
4134	nmp = VFSTONFS(vnode_mount(vp));
4135	if (NFSHASNFSV4(nmp)) {
4136		/*
4137		 * For V4, you actually do a getattr.
4138		 */
4139		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4140		NFSPATHCONF_GETATTRBIT(&attrbits);
4141		(void) nfsrv_putattrbit(nd, &attrbits);
4142		nd->nd_flag |= ND_USEGSSNAME;
4143		error = nfscl_request(nd, vp, p, cred, stuff);
4144		if (error)
4145			return (error);
4146		if (nd->nd_repstat == 0) {
4147			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4148			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4149			    cred);
4150			if (!error)
4151				*attrflagp = 1;
4152		} else {
4153			error = nd->nd_repstat;
4154		}
4155	} else {
4156		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4157		error = nfscl_request(nd, vp, p, cred, stuff);
4158		if (error)
4159			return (error);
4160		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4161		if (nd->nd_repstat && !error)
4162			error = nd->nd_repstat;
4163		if (!error) {
4164			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4165			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4166			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4167			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4168			pc->pc_chownrestricted =
4169			    fxdr_unsigned(u_int32_t, *tl++);
4170			pc->pc_caseinsensitive =
4171			    fxdr_unsigned(u_int32_t, *tl++);
4172			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4173		}
4174	}
4175nfsmout:
4176	mbuf_freem(nd->nd_mrep);
4177	return (error);
4178}
4179
4180/*
4181 * nfs version 3 fsinfo rpc call
4182 */
4183APPLESTATIC int
4184nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4185    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4186{
4187	u_int32_t *tl;
4188	struct nfsrv_descript nfsd, *nd = &nfsd;
4189	int error;
4190
4191	*attrflagp = 0;
4192	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4193	error = nfscl_request(nd, vp, p, cred, stuff);
4194	if (error)
4195		return (error);
4196	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4197	if (nd->nd_repstat && !error)
4198		error = nd->nd_repstat;
4199	if (!error) {
4200		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4201		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4202		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4203		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4204		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4205		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4206		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4207		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4208		fsp->fs_maxfilesize = fxdr_hyper(tl);
4209		tl += 2;
4210		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4211		tl += 2;
4212		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4213	}
4214nfsmout:
4215	mbuf_freem(nd->nd_mrep);
4216	return (error);
4217}
4218
4219/*
4220 * This function performs the Renew RPC.
4221 */
4222APPLESTATIC int
4223nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4224    NFSPROC_T *p)
4225{
4226	u_int32_t *tl;
4227	struct nfsrv_descript nfsd;
4228	struct nfsrv_descript *nd = &nfsd;
4229	struct nfsmount *nmp;
4230	int error;
4231	struct nfssockreq *nrp;
4232
4233	nmp = clp->nfsc_nmp;
4234	if (nmp == NULL)
4235		return (0);
4236	nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4237	    &dsp->nfsclds_sess);
4238	if (!NFSHASNFSV4N(nmp)) {
4239		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4240		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4241		*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
4242		*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
4243	}
4244	nrp = dsp->nfsclds_sockp;
4245	if (nrp == NULL)
4246		/* If NULL, use the MDS socket. */
4247		nrp = &nmp->nm_sockreq;
4248	nd->nd_flag |= ND_USEGSSNAME;
4249	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4250	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4251	if (error)
4252		return (error);
4253	error = nd->nd_repstat;
4254	mbuf_freem(nd->nd_mrep);
4255	return (error);
4256}
4257
4258/*
4259 * This function performs the Releaselockowner RPC.
4260 */
4261APPLESTATIC int
4262nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4263    uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4264{
4265	struct nfsrv_descript nfsd, *nd = &nfsd;
4266	u_int32_t *tl;
4267	int error;
4268	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4269
4270	if (NFSHASNFSV4N(nmp)) {
4271		/* For NFSv4.1, do a FreeStateID. */
4272		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4273		    NULL);
4274		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4275	} else {
4276		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4277		    NULL);
4278		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4279		*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
4280		*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
4281		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4282		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4283		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4284	}
4285	nd->nd_flag |= ND_USEGSSNAME;
4286	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4287	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4288	if (error)
4289		return (error);
4290	error = nd->nd_repstat;
4291	mbuf_freem(nd->nd_mrep);
4292	return (error);
4293}
4294
4295/*
4296 * This function performs the Compound to get the mount pt FH.
4297 */
4298APPLESTATIC int
4299nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4300    NFSPROC_T *p)
4301{
4302	u_int32_t *tl;
4303	struct nfsrv_descript nfsd;
4304	struct nfsrv_descript *nd = &nfsd;
4305	u_char *cp, *cp2;
4306	int error, cnt, len, setnil;
4307	u_int32_t *opcntp;
4308
4309	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
4310	cp = dirpath;
4311	cnt = 0;
4312	do {
4313		setnil = 0;
4314		while (*cp == '/')
4315			cp++;
4316		cp2 = cp;
4317		while (*cp2 != '\0' && *cp2 != '/')
4318			cp2++;
4319		if (*cp2 == '/') {
4320			setnil = 1;
4321			*cp2 = '\0';
4322		}
4323		if (cp2 != cp) {
4324			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4325			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4326			nfsm_strtom(nd, cp, strlen(cp));
4327			cnt++;
4328		}
4329		if (setnil)
4330			*cp2++ = '/';
4331		cp = cp2;
4332	} while (*cp != '\0');
4333	if (NFSHASNFSV4N(nmp))
4334		/* Has a Sequence Op done by nfscl_reqstart(). */
4335		*opcntp = txdr_unsigned(3 + cnt);
4336	else
4337		*opcntp = txdr_unsigned(2 + cnt);
4338	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4339	*tl = txdr_unsigned(NFSV4OP_GETFH);
4340	nd->nd_flag |= ND_USEGSSNAME;
4341	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4342		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4343	if (error)
4344		return (error);
4345	if (nd->nd_repstat == 0) {
4346		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4347		tl += (2 + 2 * cnt);
4348		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4349			len > NFSX_FHMAX) {
4350			nd->nd_repstat = NFSERR_BADXDR;
4351		} else {
4352			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4353			if (nd->nd_repstat == 0)
4354				nmp->nm_fhsize = len;
4355		}
4356	}
4357	error = nd->nd_repstat;
4358nfsmout:
4359	mbuf_freem(nd->nd_mrep);
4360	return (error);
4361}
4362
4363/*
4364 * This function performs the Delegreturn RPC.
4365 */
4366APPLESTATIC int
4367nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4368    struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4369{
4370	u_int32_t *tl;
4371	struct nfsrv_descript nfsd;
4372	struct nfsrv_descript *nd = &nfsd;
4373	int error;
4374
4375	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4376	    dp->nfsdl_fhlen, NULL, NULL);
4377	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4378	if (NFSHASNFSV4N(nmp))
4379		*tl++ = 0;
4380	else
4381		*tl++ = dp->nfsdl_stateid.seqid;
4382	*tl++ = dp->nfsdl_stateid.other[0];
4383	*tl++ = dp->nfsdl_stateid.other[1];
4384	*tl = dp->nfsdl_stateid.other[2];
4385	if (syscred)
4386		nd->nd_flag |= ND_USEGSSNAME;
4387	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4388	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4389	if (error)
4390		return (error);
4391	error = nd->nd_repstat;
4392	mbuf_freem(nd->nd_mrep);
4393	return (error);
4394}
4395
4396/*
4397 * nfs getacl call.
4398 */
4399APPLESTATIC int
4400nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4401    struct acl *aclp, void *stuff)
4402{
4403	struct nfsrv_descript nfsd, *nd = &nfsd;
4404	int error;
4405	nfsattrbit_t attrbits;
4406	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4407
4408	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4409		return (EOPNOTSUPP);
4410	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4411	NFSZERO_ATTRBIT(&attrbits);
4412	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4413	(void) nfsrv_putattrbit(nd, &attrbits);
4414	error = nfscl_request(nd, vp, p, cred, stuff);
4415	if (error)
4416		return (error);
4417	if (!nd->nd_repstat)
4418		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4419		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4420	else
4421		error = nd->nd_repstat;
4422	mbuf_freem(nd->nd_mrep);
4423	return (error);
4424}
4425
4426/*
4427 * nfs setacl call.
4428 */
4429APPLESTATIC int
4430nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4431    struct acl *aclp, void *stuff)
4432{
4433	int error;
4434	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4435
4436	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4437		return (EOPNOTSUPP);
4438	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4439	return (error);
4440}
4441
4442/*
4443 * nfs setacl call.
4444 */
4445static int
4446nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4447    struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4448{
4449	struct nfsrv_descript nfsd, *nd = &nfsd;
4450	int error;
4451	nfsattrbit_t attrbits;
4452	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4453
4454	if (!NFSHASNFSV4(nmp))
4455		return (EOPNOTSUPP);
4456	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4457	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4458	NFSZERO_ATTRBIT(&attrbits);
4459	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4460	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4461	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
4462	error = nfscl_request(nd, vp, p, cred, stuff);
4463	if (error)
4464		return (error);
4465	/* Don't care about the pre/postop attributes */
4466	mbuf_freem(nd->nd_mrep);
4467	return (nd->nd_repstat);
4468}
4469
4470/*
4471 * Do the NFSv4.1 Exchange ID.
4472 */
4473int
4474nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4475    struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4476    struct ucred *cred, NFSPROC_T *p)
4477{
4478	uint32_t *tl, v41flags;
4479	struct nfsrv_descript nfsd;
4480	struct nfsrv_descript *nd = &nfsd;
4481	struct nfsclds *dsp;
4482	struct timespec verstime;
4483	int error, len;
4484
4485	*dspp = NULL;
4486	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
4487	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4488	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
4489	*tl = txdr_unsigned(clp->nfsc_rev);
4490	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4491
4492	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4493	*tl++ = txdr_unsigned(exchflags);
4494	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4495
4496	/* Set the implementation id4 */
4497	*tl = txdr_unsigned(1);
4498	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4499	(void) nfsm_strtom(nd, version, strlen(version));
4500	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4501	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
4502	verstime.tv_nsec = 0;
4503	txdr_nfsv4time(&verstime, tl);
4504	nd->nd_flag |= ND_USEGSSNAME;
4505	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4506	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4507	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4508	    (int)nd->nd_repstat);
4509	if (error != 0)
4510		return (error);
4511	if (nd->nd_repstat == 0) {
4512		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4513		len = fxdr_unsigned(int, *(tl + 7));
4514		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4515			error = NFSERR_BADXDR;
4516			goto nfsmout;
4517		}
4518		dsp = malloc(sizeof(struct nfsclds) + len, M_NFSCLDS,
4519		    M_WAITOK | M_ZERO);
4520		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4521		dsp->nfsclds_servownlen = len;
4522		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4523		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4524		dsp->nfsclds_sess.nfsess_sequenceid =
4525		    fxdr_unsigned(uint32_t, *tl++);
4526		v41flags = fxdr_unsigned(uint32_t, *tl);
4527		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4528		    NFSHASPNFSOPT(nmp)) {
4529			NFSCL_DEBUG(1, "set PNFS\n");
4530			NFSLOCKMNT(nmp);
4531			nmp->nm_state |= NFSSTA_PNFS;
4532			NFSUNLOCKMNT(nmp);
4533			dsp->nfsclds_flags |= NFSCLDS_MDS;
4534		}
4535		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4536			dsp->nfsclds_flags |= NFSCLDS_DS;
4537		if (len > 0)
4538			nd->nd_repstat = nfsrv_mtostr(nd,
4539			    dsp->nfsclds_serverown, len);
4540		if (nd->nd_repstat == 0) {
4541			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4542			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4543			    NULL, MTX_DEF);
4544			nfscl_initsessionslots(&dsp->nfsclds_sess);
4545			*dspp = dsp;
4546		} else
4547			free(dsp, M_NFSCLDS);
4548	}
4549	error = nd->nd_repstat;
4550nfsmout:
4551	mbuf_freem(nd->nd_mrep);
4552	return (error);
4553}
4554
4555/*
4556 * Do the NFSv4.1 Create Session.
4557 */
4558int
4559nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4560    struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4561    NFSPROC_T *p)
4562{
4563	uint32_t crflags, *tl;
4564	struct nfsrv_descript nfsd;
4565	struct nfsrv_descript *nd = &nfsd;
4566	int error, irdcnt;
4567
4568	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
4569	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4570	*tl++ = sep->nfsess_clientid.lval[0];
4571	*tl++ = sep->nfsess_clientid.lval[1];
4572	*tl++ = txdr_unsigned(sequenceid);
4573	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4574	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0)
4575		crflags |= NFSV4CRSESS_CONNBACKCHAN;
4576	*tl = txdr_unsigned(crflags);
4577
4578	/* Fill in fore channel attributes. */
4579	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4580	*tl++ = 0;				/* Header pad size */
4581	*tl++ = txdr_unsigned(100000);		/* Max request size */
4582	*tl++ = txdr_unsigned(100000);		/* Max response size */
4583	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4584	*tl++ = txdr_unsigned(20);		/* Max operations */
4585	*tl++ = txdr_unsigned(64);		/* Max slots */
4586	*tl = 0;				/* No rdma ird */
4587
4588	/* Fill in back channel attributes. */
4589	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4590	*tl++ = 0;				/* Header pad size */
4591	*tl++ = txdr_unsigned(10000);		/* Max request size */
4592	*tl++ = txdr_unsigned(10000);		/* Max response size */
4593	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4594	*tl++ = txdr_unsigned(4);		/* Max operations */
4595	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
4596	*tl = 0;				/* No rdma ird */
4597
4598	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4599	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
4600
4601	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
4602	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
4603	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
4604	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4605	*tl++ = 0;				/* Null machine name */
4606	*tl++ = 0;				/* Uid == 0 */
4607	*tl++ = 0;				/* Gid == 0 */
4608	*tl = 0;				/* No additional gids */
4609	nd->nd_flag |= ND_USEGSSNAME;
4610	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4611	    NFS_VER4, NULL, 1, NULL, NULL);
4612	if (error != 0)
4613		return (error);
4614	if (nd->nd_repstat == 0) {
4615		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4616		    2 * NFSX_UNSIGNED);
4617		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4618		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4619		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4620		crflags = fxdr_unsigned(uint32_t, *tl);
4621		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4622			NFSLOCKMNT(nmp);
4623			nmp->nm_state |= NFSSTA_SESSPERSIST;
4624			NFSUNLOCKMNT(nmp);
4625		}
4626
4627		/* Get the fore channel slot count. */
4628		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4629		tl += 3;		/* Skip the other counts. */
4630		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4631		tl++;
4632		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4633		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4634		irdcnt = fxdr_unsigned(int, *tl);
4635		if (irdcnt > 0)
4636			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4637
4638		/* and the back channel slot count. */
4639		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4640		tl += 5;
4641		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4642		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4643	}
4644	error = nd->nd_repstat;
4645nfsmout:
4646	mbuf_freem(nd->nd_mrep);
4647	return (error);
4648}
4649
4650/*
4651 * Do the NFSv4.1 Destroy Session.
4652 */
4653int
4654nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4655    struct ucred *cred, NFSPROC_T *p)
4656{
4657	uint32_t *tl;
4658	struct nfsrv_descript nfsd;
4659	struct nfsrv_descript *nd = &nfsd;
4660	int error;
4661
4662	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
4663	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4664	bcopy(NFSMNT_MDSSESSION(nmp)->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4665	nd->nd_flag |= ND_USEGSSNAME;
4666	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4667	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4668	if (error != 0)
4669		return (error);
4670	error = nd->nd_repstat;
4671	mbuf_freem(nd->nd_mrep);
4672	return (error);
4673}
4674
4675/*
4676 * Do the NFSv4.1 Destroy Client.
4677 */
4678int
4679nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4680    struct ucred *cred, NFSPROC_T *p)
4681{
4682	uint32_t *tl;
4683	struct nfsrv_descript nfsd;
4684	struct nfsrv_descript *nd = &nfsd;
4685	int error;
4686
4687	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
4688	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4689	*tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0];
4690	*tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1];
4691	nd->nd_flag |= ND_USEGSSNAME;
4692	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4693	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4694	if (error != 0)
4695		return (error);
4696	error = nd->nd_repstat;
4697	mbuf_freem(nd->nd_mrep);
4698	return (error);
4699}
4700
4701/*
4702 * Do the NFSv4.1 LayoutGet.
4703 */
4704int
4705nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4706    uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
4707    nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
4708    struct ucred *cred, NFSPROC_T *p, void *stuff)
4709{
4710	uint32_t *tl;
4711	struct nfsrv_descript nfsd, *nd = &nfsd;
4712	struct nfsfh *nfhp;
4713	struct nfsclflayout *flp, *prevflp, *tflp;
4714	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
4715	uint8_t *cp;
4716	uint64_t retlen;
4717
4718	flp = NULL;
4719	gotiomode = -1;
4720	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
4721	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
4722	    NFSX_STATEID);
4723	*tl++ = newnfs_false;		/* Don't signal availability. */
4724	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
4725	*tl++ = txdr_unsigned(iomode);
4726	txdr_hyper(offset, tl);
4727	tl += 2;
4728	txdr_hyper(len, tl);
4729	tl += 2;
4730	txdr_hyper(minlen, tl);
4731	tl += 2;
4732	*tl++ = txdr_unsigned(stateidp->seqid);
4733	NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
4734	*tl++ = stateidp->other[0];
4735	*tl++ = stateidp->other[1];
4736	*tl++ = stateidp->other[2];
4737	*tl = txdr_unsigned(layoutlen);
4738	nd->nd_flag |= ND_USEGSSNAME;
4739	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4740	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4741	if (error != 0)
4742		return (error);
4743	if (nd->nd_repstat == 0) {
4744		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
4745		if (*tl++ != 0)
4746			*retonclosep = 1;
4747		else
4748			*retonclosep = 0;
4749		stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
4750		NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
4751		    (int)stateidp->seqid);
4752		stateidp->other[0] = *tl++;
4753		stateidp->other[1] = *tl++;
4754		stateidp->other[2] = *tl++;
4755		cnt = fxdr_unsigned(int, *tl);
4756		NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
4757		if (cnt <= 0 || cnt > 10000) {
4758			/* Don't accept more than 10000 layouts in reply. */
4759			error = NFSERR_BADXDR;
4760			goto nfsmout;
4761		}
4762		for (i = 0; i < cnt; i++) {
4763			/* Dissect all the way to the file handle cnt. */
4764			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
4765			    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
4766			fhcnt = fxdr_unsigned(int, *(tl + 11 +
4767			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
4768			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
4769			if (fhcnt < 0 || fhcnt > 100) {
4770				/* Don't accept more than 100 file handles. */
4771				error = NFSERR_BADXDR;
4772				goto nfsmout;
4773			}
4774			if (fhcnt > 1)
4775				flp = malloc(sizeof(*flp) + (fhcnt - 1) *
4776				    sizeof(struct nfsfh *),
4777				    M_NFSFLAYOUT, M_WAITOK);
4778			else
4779				flp = malloc(sizeof(*flp),
4780				    M_NFSFLAYOUT, M_WAITOK);
4781			flp->nfsfl_flags = 0;
4782			flp->nfsfl_fhcnt = 0;
4783			flp->nfsfl_devp = NULL;
4784			flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
4785			retlen = fxdr_hyper(tl); tl += 2;
4786			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
4787				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
4788			else
4789				flp->nfsfl_end = flp->nfsfl_off + retlen;
4790			flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
4791			if (gotiomode == -1)
4792				gotiomode = flp->nfsfl_iomode;
4793			NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode,
4794			    (int)flp->nfsfl_iomode);
4795			if (fxdr_unsigned(int, *tl++) !=
4796			    NFSLAYOUT_NFSV4_1_FILES) {
4797				printf("NFSv4.1: got non-files layout\n");
4798				error = NFSERR_BADXDR;
4799				goto nfsmout;
4800			}
4801			NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
4802			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4803			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
4804			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
4805			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
4806			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
4807			if (fxdr_unsigned(int, *tl) != fhcnt) {
4808				printf("EEK! bad fhcnt\n");
4809				error = NFSERR_BADXDR;
4810				goto nfsmout;
4811			}
4812			for (j = 0; j < fhcnt; j++) {
4813				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4814				nfhlen = fxdr_unsigned(int, *tl);
4815				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
4816					error = NFSERR_BADXDR;
4817					goto nfsmout;
4818				}
4819				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
4820				    M_NFSFH, M_WAITOK);
4821				flp->nfsfl_fh[j] = nfhp;
4822				flp->nfsfl_fhcnt++;
4823				nfhp->nfh_len = nfhlen;
4824				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
4825				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
4826			}
4827			if (flp->nfsfl_iomode == gotiomode) {
4828				/* Keep the list in increasing offset order. */
4829				tflp = LIST_FIRST(flhp);
4830				prevflp = NULL;
4831				while (tflp != NULL &&
4832				    tflp->nfsfl_off < flp->nfsfl_off) {
4833					prevflp = tflp;
4834					tflp = LIST_NEXT(tflp, nfsfl_list);
4835				}
4836				if (prevflp == NULL)
4837					LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
4838				else
4839					LIST_INSERT_AFTER(prevflp, flp,
4840					    nfsfl_list);
4841			} else {
4842				printf("nfscl_layoutget(): got wrong iomode\n");
4843				nfscl_freeflayout(flp);
4844			}
4845			flp = NULL;
4846		}
4847	}
4848	if (nd->nd_repstat != 0 && error == 0)
4849		error = nd->nd_repstat;
4850nfsmout:
4851	if (error != 0 && flp != NULL)
4852		nfscl_freeflayout(flp);
4853	mbuf_freem(nd->nd_mrep);
4854	return (error);
4855}
4856
4857/*
4858 * Do the NFSv4.1 Get Device Info.
4859 */
4860int
4861nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4862    uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4863    NFSPROC_T *p)
4864{
4865	uint32_t cnt, *tl;
4866	struct nfsrv_descript nfsd;
4867	struct nfsrv_descript *nd = &nfsd;
4868	struct sockaddr_storage ss;
4869	struct nfsclds *dsp = NULL, **dspp;
4870	struct nfscldevinfo *ndi;
4871	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
4872	uint8_t stripeindex;
4873
4874	*ndip = NULL;
4875	ndi = NULL;
4876	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
4877	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4878	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4879	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4880	*tl++ = txdr_unsigned(layouttype);
4881	*tl++ = txdr_unsigned(100000);
4882	if (notifybitsp != NULL && *notifybitsp != 0) {
4883		*tl = txdr_unsigned(1);		/* One word of bits. */
4884		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4885		*tl = txdr_unsigned(*notifybitsp);
4886	} else
4887		*tl = txdr_unsigned(0);
4888	nd->nd_flag |= ND_USEGSSNAME;
4889	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4890	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4891	if (error != 0)
4892		return (error);
4893	if (nd->nd_repstat == 0) {
4894		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4895		if (layouttype != fxdr_unsigned(int, *tl++))
4896			printf("EEK! devinfo layout type not same!\n");
4897		stripecnt = fxdr_unsigned(int, *++tl);
4898		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4899		if (stripecnt < 1 || stripecnt > 4096) {
4900			printf("NFS devinfo stripecnt %d: out of range\n",
4901			    stripecnt);
4902			error = NFSERR_BADXDR;
4903			goto nfsmout;
4904		}
4905		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
4906		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4907		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4908		if (addrcnt < 1 || addrcnt > 128) {
4909			printf("NFS devinfo addrcnt %d: out of range\n",
4910			    addrcnt);
4911			error = NFSERR_BADXDR;
4912			goto nfsmout;
4913		}
4914
4915		/*
4916		 * Now we know how many stripe indices and addresses, so
4917		 * we can allocate the structure the correct size.
4918		 */
4919		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
4920		    + 1;
4921		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4922		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4923		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
4924		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
4925		ndi->nfsdi_refcnt = 0;
4926		ndi->nfsdi_stripecnt = stripecnt;
4927		ndi->nfsdi_addrcnt = addrcnt;
4928		/* Fill in the stripe indices. */
4929		for (i = 0; i < stripecnt; i++) {
4930			stripeindex = fxdr_unsigned(uint8_t, *tl++);
4931			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
4932			if (stripeindex >= addrcnt) {
4933				printf("NFS devinfo stripeindex %d: too big\n",
4934				    (int)stripeindex);
4935				error = NFSERR_BADXDR;
4936				goto nfsmout;
4937			}
4938			nfsfldi_setstripeindex(ndi, i, stripeindex);
4939		}
4940
4941		/* Now, dissect the server address(es). */
4942		safilled = 0;
4943		for (i = 0; i < addrcnt; i++) {
4944			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4945			cnt = fxdr_unsigned(uint32_t, *tl);
4946			if (cnt == 0) {
4947				printf("NFS devinfo 0 len addrlist\n");
4948				error = NFSERR_BADXDR;
4949				goto nfsmout;
4950			}
4951			dspp = nfsfldi_addr(ndi, i);
4952			pos = arc4random() % cnt;	/* Choose one. */
4953			safilled = 0;
4954			for (j = 0; j < cnt; j++) {
4955				error = nfsv4_getipaddr(nd, &ss, &isudp);
4956				if (error != 0 && error != EPERM) {
4957					error = NFSERR_BADXDR;
4958					goto nfsmout;
4959				}
4960				if (error == 0 && isudp == 0) {
4961					/*
4962					 * The algorithm is:
4963					 * - use "pos" entry if it is of the
4964					 *   same af_family or none of them
4965					 *   is of the same af_family
4966					 * else
4967					 * - use the first one of the same
4968					 *   af_family.
4969					 */
4970					if ((safilled == 0 && ss.ss_family ==
4971					     nmp->nm_nam->sa_family) ||
4972					    (j == pos &&
4973					     (safilled == 0 || ss.ss_family ==
4974					      nmp->nm_nam->sa_family)) ||
4975					    (safilled == 1 && ss.ss_family ==
4976					     nmp->nm_nam->sa_family)) {
4977						error = nfsrpc_fillsa(nmp, &ss,
4978						    &dsp, p);
4979						if (error == 0) {
4980							*dspp = dsp;
4981							if (ss.ss_family ==
4982							 nmp->nm_nam->sa_family)
4983								safilled = 2;
4984							else
4985								safilled = 1;
4986						}
4987					}
4988				}
4989			}
4990			if (safilled == 0)
4991				break;
4992		}
4993
4994		/* And the notify bits. */
4995		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4996		if (safilled != 0) {
4997			bitcnt = fxdr_unsigned(int, *tl);
4998			if (bitcnt > 0) {
4999				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5000				if (notifybitsp != NULL)
5001					*notifybitsp =
5002					    fxdr_unsigned(uint32_t, *tl);
5003			}
5004			*ndip = ndi;
5005		} else
5006			error = EPERM;
5007	}
5008	if (nd->nd_repstat != 0)
5009		error = nd->nd_repstat;
5010nfsmout:
5011	if (error != 0 && ndi != NULL)
5012		nfscl_freedevinfo(ndi);
5013	mbuf_freem(nd->nd_mrep);
5014	return (error);
5015}
5016
5017/*
5018 * Do the NFSv4.1 LayoutCommit.
5019 */
5020int
5021nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5022    uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5023    int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
5024    NFSPROC_T *p, void *stuff)
5025{
5026	uint32_t *tl;
5027	struct nfsrv_descript nfsd, *nd = &nfsd;
5028	int error, outcnt, i;
5029	uint8_t *cp;
5030
5031	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
5032	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5033	    NFSX_STATEID);
5034	txdr_hyper(off, tl);
5035	tl += 2;
5036	txdr_hyper(len, tl);
5037	tl += 2;
5038	if (reclaim != 0)
5039		*tl++ = newnfs_true;
5040	else
5041		*tl++ = newnfs_false;
5042	*tl++ = txdr_unsigned(stateidp->seqid);
5043	*tl++ = stateidp->other[0];
5044	*tl++ = stateidp->other[1];
5045	*tl++ = stateidp->other[2];
5046	*tl++ = newnfs_true;
5047	if (lastbyte < off)
5048		lastbyte = off;
5049	else if (lastbyte >= (off + len))
5050		lastbyte = off + len - 1;
5051	txdr_hyper(lastbyte, tl);
5052	tl += 2;
5053	*tl++ = newnfs_false;
5054	*tl++ = txdr_unsigned(layouttype);
5055	*tl = txdr_unsigned(layoutupdatecnt);
5056	if (layoutupdatecnt > 0) {
5057		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
5058		    ("Must be nil for Files Layout"));
5059		outcnt = NFSM_RNDUP(layoutupdatecnt);
5060		NFSM_BUILD(cp, uint8_t *, outcnt);
5061		NFSBCOPY(layp, cp, layoutupdatecnt);
5062		cp += layoutupdatecnt;
5063		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
5064			*cp++ = 0x0;
5065	}
5066	nd->nd_flag |= ND_USEGSSNAME;
5067	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5068	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5069	if (error != 0)
5070		return (error);
5071	error = nd->nd_repstat;
5072	mbuf_freem(nd->nd_mrep);
5073	return (error);
5074}
5075
5076/*
5077 * Do the NFSv4.1 LayoutReturn.
5078 */
5079int
5080nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5081    int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5082    uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
5083    struct ucred *cred, NFSPROC_T *p, void *stuff)
5084{
5085	uint32_t *tl;
5086	struct nfsrv_descript nfsd, *nd = &nfsd;
5087	int error, outcnt, i;
5088	uint8_t *cp;
5089
5090	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
5091	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5092	if (reclaim != 0)
5093		*tl++ = newnfs_true;
5094	else
5095		*tl++ = newnfs_false;
5096	*tl++ = txdr_unsigned(layouttype);
5097	*tl++ = txdr_unsigned(iomode);
5098	*tl = txdr_unsigned(layoutreturn);
5099	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5100		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5101		    NFSX_UNSIGNED);
5102		txdr_hyper(offset, tl);
5103		tl += 2;
5104		txdr_hyper(len, tl);
5105		tl += 2;
5106		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5107		*tl++ = txdr_unsigned(stateidp->seqid);
5108		*tl++ = stateidp->other[0];
5109		*tl++ = stateidp->other[1];
5110		*tl++ = stateidp->other[2];
5111		*tl = txdr_unsigned(layoutcnt);
5112		if (layoutcnt > 0) {
5113			outcnt = NFSM_RNDUP(layoutcnt);
5114			NFSM_BUILD(cp, uint8_t *, outcnt);
5115			NFSBCOPY(layp, cp, layoutcnt);
5116			cp += layoutcnt;
5117			for (i = 0; i < (outcnt - layoutcnt); i++)
5118				*cp++ = 0x0;
5119		}
5120	}
5121	nd->nd_flag |= ND_USEGSSNAME;
5122	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5123	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5124	if (error != 0)
5125		return (error);
5126	if (nd->nd_repstat == 0) {
5127		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5128		if (*tl != 0) {
5129			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5130			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5131			stateidp->other[0] = *tl++;
5132			stateidp->other[1] = *tl++;
5133			stateidp->other[2] = *tl;
5134		}
5135	} else
5136		error = nd->nd_repstat;
5137nfsmout:
5138	mbuf_freem(nd->nd_mrep);
5139	return (error);
5140}
5141
5142/*
5143 * Acquire a layout and devinfo, if possible. The caller must have acquired
5144 * a reference count on the nfsclclient structure before calling this.
5145 * Return the layout in lypp with a reference count on it, if successful.
5146 */
5147static int
5148nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5149    int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5150    struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5151{
5152	struct nfscllayout *lyp;
5153	struct nfsclflayout *flp, *tflp;
5154	struct nfscldevinfo *dip;
5155	struct nfsclflayouthead flh;
5156	int error = 0, islocked, layoutlen, recalled, retonclose;
5157	nfsv4stateid_t stateid;
5158
5159	*lypp = NULL;
5160	/*
5161	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5162	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5163	 * flp == NULL.
5164	 */
5165	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5166	    off, &flp, &recalled);
5167	islocked = 0;
5168	if (lyp == NULL || flp == NULL) {
5169		if (recalled != 0)
5170			return (EIO);
5171		LIST_INIT(&flh);
5172		layoutlen = NFSMNT_MDSSESSION(nmp)->nfsess_maxcache -
5173		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5174		if (lyp == NULL) {
5175			stateid.seqid = 0;
5176			stateid.other[0] = stateidp->other[0];
5177			stateid.other[1] = stateidp->other[1];
5178			stateid.other[2] = stateidp->other[2];
5179			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5180			    nfhp->nfh_len, iomode, (uint64_t)0, INT64_MAX,
5181			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5182			    &flh, cred, p, NULL);
5183		} else {
5184			islocked = 1;
5185			stateid.seqid = lyp->nfsly_stateid.seqid;
5186			stateid.other[0] = lyp->nfsly_stateid.other[0];
5187			stateid.other[1] = lyp->nfsly_stateid.other[1];
5188			stateid.other[2] = lyp->nfsly_stateid.other[2];
5189			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5190			    nfhp->nfh_len, iomode, off, INT64_MAX,
5191			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5192			    &flh, cred, p, NULL);
5193		}
5194		if (error == 0)
5195			LIST_FOREACH(tflp, &flh, nfsfl_list) {
5196				error = nfscl_adddevinfo(nmp, NULL, tflp);
5197				if (error != 0) {
5198					error = nfsrpc_getdeviceinfo(nmp,
5199					    tflp->nfsfl_dev,
5200					    NFSLAYOUT_NFSV4_1_FILES,
5201					    notifybitsp, &dip, cred, p);
5202					if (error != 0)
5203						break;
5204					error = nfscl_adddevinfo(nmp, dip,
5205					    tflp);
5206					if (error != 0)
5207						printf(
5208						    "getlayout: cannot add\n");
5209				}
5210			}
5211		if (error == 0) {
5212			/*
5213			 * nfscl_layout() always returns with the nfsly_lock
5214			 * set to a refcnt (shared lock).
5215			 */
5216			error = nfscl_layout(nmp, vp, nfhp->nfh_fh,
5217			    nfhp->nfh_len, &stateid, retonclose, &flh, &lyp,
5218			    cred, p);
5219			if (error == 0)
5220				*lypp = lyp;
5221		} else if (islocked != 0)
5222			nfsv4_unlock(&lyp->nfsly_lock, 0);
5223	} else
5224		*lypp = lyp;
5225	return (error);
5226}
5227
5228/*
5229 * Do a TCP connection plus exchange id and create session.
5230 * If successful, a "struct nfsclds" is linked into the list for the
5231 * mount point and a pointer to it is returned.
5232 */
5233static int
5234nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
5235    struct nfsclds **dspp, NFSPROC_T *p)
5236{
5237	struct sockaddr_in *msad, *sad, *ssd;
5238	struct sockaddr_in6 *msad6, *sad6, *ssd6;
5239	struct nfsclclient *clp;
5240	struct nfssockreq *nrp;
5241	struct nfsclds *dsp, *tdsp;
5242	int error;
5243	enum nfsclds_state retv;
5244	uint32_t sequenceid;
5245
5246	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5247	    ("nfsrpc_fillsa: NULL nr_cred"));
5248	NFSLOCKCLSTATE();
5249	clp = nmp->nm_clp;
5250	NFSUNLOCKCLSTATE();
5251	if (clp == NULL)
5252		return (EPERM);
5253	if (ssp->ss_family == AF_INET) {
5254		ssd = (struct sockaddr_in *)ssp;
5255		NFSLOCKMNT(nmp);
5256
5257		/*
5258		 * Check to see if we already have a session for this
5259		 * address that is usable for a DS.
5260		 * Note that the MDS's address is in a different place
5261		 * than the sessions already acquired for DS's.
5262		 */
5263		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5264		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5265		while (tdsp != NULL) {
5266			if (msad != NULL && msad->sin_family == AF_INET &&
5267			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
5268			    ssd->sin_port == msad->sin_port &&
5269			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) {
5270				*dspp = tdsp;
5271				NFSUNLOCKMNT(nmp);
5272				NFSCL_DEBUG(4, "fnd same addr\n");
5273				return (0);
5274			}
5275			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5276			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5277				msad = (struct sockaddr_in *)
5278				    tdsp->nfsclds_sockp->nr_nam;
5279			else
5280				msad = NULL;
5281		}
5282		NFSUNLOCKMNT(nmp);
5283
5284		/* No IP address match, so look for new/trunked one. */
5285		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5286		sad->sin_len = sizeof(*sad);
5287		sad->sin_family = AF_INET;
5288		sad->sin_port = ssd->sin_port;
5289		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
5290		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5291		nrp->nr_nam = (struct sockaddr *)sad;
5292	} else if (ssp->ss_family == AF_INET6) {
5293		ssd6 = (struct sockaddr_in6 *)ssp;
5294		NFSLOCKMNT(nmp);
5295
5296		/*
5297		 * Check to see if we already have a session for this
5298		 * address that is usable for a DS.
5299		 * Note that the MDS's address is in a different place
5300		 * than the sessions already acquired for DS's.
5301		 */
5302		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5303		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5304		while (tdsp != NULL) {
5305			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5306			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
5307			    &msad6->sin6_addr) &&
5308			    ssd6->sin6_port == msad6->sin6_port &&
5309			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) {
5310				*dspp = tdsp;
5311				NFSUNLOCKMNT(nmp);
5312				return (0);
5313			}
5314			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5315			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5316				msad6 = (struct sockaddr_in6 *)
5317				    tdsp->nfsclds_sockp->nr_nam;
5318			else
5319				msad6 = NULL;
5320		}
5321		NFSUNLOCKMNT(nmp);
5322
5323		/* No IP address match, so look for new/trunked one. */
5324		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5325		sad6->sin6_len = sizeof(*sad6);
5326		sad6->sin6_family = AF_INET6;
5327		sad6->sin6_port = ssd6->sin6_port;
5328		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
5329		    sizeof(struct in6_addr));
5330		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5331		nrp->nr_nam = (struct sockaddr *)sad6;
5332	} else
5333		return (EPERM);
5334
5335	nrp->nr_sotype = SOCK_STREAM;
5336	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5337	nrp->nr_prog = NFS_PROG;
5338	nrp->nr_vers = NFS_VER4;
5339
5340	/*
5341	 * Use the credentials that were used for the mount, which are
5342	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5343	 * Ref. counting the credentials with crhold() is probably not
5344	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5345	 * unmount, but I did it anyhow.
5346	 */
5347	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5348	error = newnfs_connect(nmp, nrp, NULL, p, 0);
5349	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5350
5351	/* Now, do the exchangeid and create session. */
5352	if (error == 0)
5353		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
5354		    &dsp, nrp->nr_cred, p);
5355	NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5356	if (error == 0) {
5357		dsp->nfsclds_sockp = nrp;
5358		NFSLOCKMNT(nmp);
5359		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5360		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5361		if (retv == NFSDSP_USETHISSESSION) {
5362			NFSUNLOCKMNT(nmp);
5363			/*
5364			 * If there is already a session for this server,
5365			 * use it.
5366			 */
5367			(void)newnfs_disconnect(nrp);
5368			nfscl_freenfsclds(dsp);
5369			*dspp = tdsp;
5370			return (0);
5371		}
5372		if (retv == NFSDSP_SEQTHISSESSION)
5373			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
5374		else
5375			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
5376		NFSUNLOCKMNT(nmp);
5377		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5378		    nrp, sequenceid, 0, nrp->nr_cred, p);
5379		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5380	} else {
5381		NFSFREECRED(nrp->nr_cred);
5382		NFSFREEMUTEX(&nrp->nr_mtx);
5383		free(nrp->nr_nam, M_SONAME);
5384		free(nrp, M_NFSSOCKREQ);
5385	}
5386	if (error == 0) {
5387		NFSCL_DEBUG(3, "add DS session\n");
5388		/*
5389		 * Put it at the end of the list. That way the list
5390		 * is ordered by when the entry was added. This matters
5391		 * since the one done first is the one that should be
5392		 * used for sequencid'ing any subsequent create sessions.
5393		 */
5394		NFSLOCKMNT(nmp);
5395		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5396		NFSUNLOCKMNT(nmp);
5397		*dspp = dsp;
5398	} else if (dsp != NULL)
5399		nfscl_freenfsclds(dsp);
5400	return (error);
5401}
5402
5403/*
5404 * Do the NFSv4.1 Reclaim Complete.
5405 */
5406int
5407nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5408{
5409	uint32_t *tl;
5410	struct nfsrv_descript nfsd;
5411	struct nfsrv_descript *nd = &nfsd;
5412	int error;
5413
5414	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
5415	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5416	*tl = newnfs_false;
5417	nd->nd_flag |= ND_USEGSSNAME;
5418	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5419	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5420	if (error != 0)
5421		return (error);
5422	error = nd->nd_repstat;
5423	mbuf_freem(nd->nd_mrep);
5424	return (error);
5425}
5426
5427/*
5428 * Initialize the slot tables for a session.
5429 */
5430static void
5431nfscl_initsessionslots(struct nfsclsession *sep)
5432{
5433	int i;
5434
5435	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5436		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5437			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5438		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5439	}
5440	for (i = 0; i < 64; i++)
5441		sep->nfsess_slotseq[i] = 0;
5442	sep->nfsess_slots = 0;
5443}
5444
5445/*
5446 * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5447 */
5448int
5449nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5450    uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p)
5451{
5452	struct nfsnode *np = VTONFS(vp);
5453	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5454	struct nfscllayout *layp;
5455	struct nfscldevinfo *dip;
5456	struct nfsclflayout *rflp;
5457	nfsv4stateid_t stateid;
5458	struct ucred *newcred;
5459	uint64_t lastbyte, len, off, oresid, xfer;
5460	int eof, error, iolaymode, recalled;
5461	void *lckp;
5462
5463	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5464	    (np->n_flag & NNOLAYOUT) != 0)
5465		return (EIO);
5466	/* Now, get a reference cnt on the clientid for this mount. */
5467	if (nfscl_getref(nmp) == 0)
5468		return (EIO);
5469
5470	/* Find an appropriate stateid. */
5471	newcred = NFSNEWCRED(cred);
5472	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5473	    rwaccess, 1, newcred, p, &stateid, &lckp);
5474	if (error != 0) {
5475		NFSFREECRED(newcred);
5476		nfscl_relref(nmp);
5477		return (error);
5478	}
5479	/* Search for a layout for this file. */
5480	off = uiop->uio_offset;
5481	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5482	    np->n_fhp->nfh_len, off, &rflp, &recalled);
5483	if (layp == NULL || rflp == NULL) {
5484		if (recalled != 0) {
5485			NFSFREECRED(newcred);
5486			nfscl_relref(nmp);
5487			return (EIO);
5488		}
5489		if (layp != NULL) {
5490			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5491			layp = NULL;
5492		}
5493		/* Try and get a Layout, if it is supported. */
5494		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5495		    (np->n_flag & NWRITEOPENED) != 0)
5496			iolaymode = NFSLAYOUTIOMODE_RW;
5497		else
5498			iolaymode = NFSLAYOUTIOMODE_READ;
5499		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5500		    NULL, &stateid, off, &layp, newcred, p);
5501		if (error != 0) {
5502			NFSLOCKNODE(np);
5503			np->n_flag |= NNOLAYOUT;
5504			NFSUNLOCKNODE(np);
5505			if (lckp != NULL)
5506				nfscl_lockderef(lckp);
5507			NFSFREECRED(newcred);
5508			if (layp != NULL)
5509				nfscl_rellayout(layp, 0);
5510			nfscl_relref(nmp);
5511			return (error);
5512		}
5513	}
5514
5515	/*
5516	 * Loop around finding a layout that works for the first part of
5517	 * this I/O operation, and then call the function that actually
5518	 * does the RPC.
5519	 */
5520	eof = 0;
5521	len = (uint64_t)uiop->uio_resid;
5522	while (len > 0 && error == 0 && eof == 0) {
5523		off = uiop->uio_offset;
5524		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5525		if (error == 0) {
5526			oresid = xfer = (uint64_t)uiop->uio_resid;
5527			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5528				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5529			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
5530			    rflp->nfsfl_devp);
5531			if (dip != NULL) {
5532				error = nfscl_doflayoutio(vp, uiop, iomode,
5533				    must_commit, &eof, &stateid, rwaccess, dip,
5534				    layp, rflp, off, xfer, newcred, p);
5535				nfscl_reldevinfo(dip);
5536				lastbyte = off + xfer - 1;
5537				if (error == 0) {
5538					NFSLOCKCLSTATE();
5539					if (lastbyte > layp->nfsly_lastbyte)
5540						layp->nfsly_lastbyte = lastbyte;
5541					NFSUNLOCKCLSTATE();
5542				}
5543			} else
5544				error = EIO;
5545			if (error == 0)
5546				len -= (oresid - (uint64_t)uiop->uio_resid);
5547		}
5548	}
5549	if (lckp != NULL)
5550		nfscl_lockderef(lckp);
5551	NFSFREECRED(newcred);
5552	nfscl_rellayout(layp, 0);
5553	nfscl_relref(nmp);
5554	return (error);
5555}
5556
5557/*
5558 * Find a file layout that will handle the first bytes of the requested
5559 * range and return the information from it needed to to the I/O operation.
5560 */
5561int
5562nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5563    struct nfsclflayout **retflpp)
5564{
5565	struct nfsclflayout *flp, *nflp, *rflp;
5566	uint32_t rw;
5567
5568	rflp = NULL;
5569	rw = rwaccess;
5570	/* For reading, do the Read list first and then the Write list. */
5571	do {
5572		if (rw == NFSV4OPEN_ACCESSREAD)
5573			flp = LIST_FIRST(&lyp->nfsly_flayread);
5574		else
5575			flp = LIST_FIRST(&lyp->nfsly_flayrw);
5576		while (flp != NULL) {
5577			nflp = LIST_NEXT(flp, nfsfl_list);
5578			if (flp->nfsfl_off > off)
5579				break;
5580			if (flp->nfsfl_end > off &&
5581			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5582				rflp = flp;
5583			flp = nflp;
5584		}
5585		if (rw == NFSV4OPEN_ACCESSREAD)
5586			rw = NFSV4OPEN_ACCESSWRITE;
5587		else
5588			rw = 0;
5589	} while (rw != 0);
5590	if (rflp != NULL) {
5591		/* This one covers the most bytes starting at off. */
5592		*retflpp = rflp;
5593		return (0);
5594	}
5595	return (EIO);
5596}
5597
5598/*
5599 * Do I/O using an NFSv4.1 file layout.
5600 */
5601static int
5602nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5603    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5604    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5605    uint64_t len, struct ucred *cred, NFSPROC_T *p)
5606{
5607	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5608	int commit_thru_mds, error = 0, stripe_index, stripe_pos;
5609	struct nfsnode *np;
5610	struct nfsfh *fhp;
5611	struct nfsclds **dspp;
5612
5613	np = VTONFS(vp);
5614	rel_off = off - flp->nfsfl_patoff;
5615	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5616	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5617	    dp->nfsdi_stripecnt;
5618	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5619
5620	/* Loop around, doing I/O for each stripe unit. */
5621	while (len > 0 && error == 0) {
5622		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5623		dspp = nfsfldi_addr(dp, stripe_index);
5624		if (len > transfer)
5625			xfer = transfer;
5626		else
5627			xfer = len;
5628		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5629			/* Dense layout. */
5630			if (stripe_pos >= flp->nfsfl_fhcnt)
5631				return (EIO);
5632			fhp = flp->nfsfl_fh[stripe_pos];
5633			io_off = (rel_off / (stripe_unit_size *
5634			    dp->nfsdi_stripecnt)) * stripe_unit_size +
5635			    rel_off % stripe_unit_size;
5636		} else {
5637			/* Sparse layout. */
5638			if (flp->nfsfl_fhcnt > 1) {
5639				if (stripe_index >= flp->nfsfl_fhcnt)
5640					return (EIO);
5641				fhp = flp->nfsfl_fh[stripe_index];
5642			} else if (flp->nfsfl_fhcnt == 1)
5643				fhp = flp->nfsfl_fh[0];
5644			else
5645				fhp = np->n_fhp;
5646			io_off = off;
5647		}
5648		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0)
5649			commit_thru_mds = 1;
5650		else
5651			commit_thru_mds = 0;
5652		if (rwflag == FREAD)
5653			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5654			    io_off, xfer, fhp, cred, p);
5655		else {
5656			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5657			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5658			    cred, p);
5659			if (error == 0) {
5660				NFSLOCKCLSTATE();
5661				lyp->nfsly_flags |= NFSLY_WRITTEN;
5662				NFSUNLOCKCLSTATE();
5663			}
5664		}
5665		if (error == 0) {
5666			transfer = stripe_unit_size;
5667			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5668			len -= xfer;
5669			off += xfer;
5670		}
5671	}
5672	return (error);
5673}
5674
5675/*
5676 * The actual read RPC done to a DS.
5677 */
5678static int
5679nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
5680    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
5681    struct ucred *cred, NFSPROC_T *p)
5682{
5683	uint32_t *tl;
5684	int error, retlen;
5685	struct nfsrv_descript nfsd;
5686	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5687	struct nfsrv_descript *nd = &nfsd;
5688	struct nfssockreq *nrp;
5689
5690	nd->nd_mrep = NULL;
5691	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5692	    NULL, &dsp->nfsclds_sess);
5693	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5694	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
5695	txdr_hyper(io_off, tl);
5696	*(tl + 2) = txdr_unsigned(len);
5697	nrp = dsp->nfsclds_sockp;
5698	if (nrp == NULL)
5699		/* If NULL, use the MDS socket. */
5700		nrp = &nmp->nm_sockreq;
5701	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5702	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5703	if (error != 0)
5704		return (error);
5705	if (nd->nd_repstat != 0) {
5706		error = nd->nd_repstat;
5707		goto nfsmout;
5708	}
5709	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5710	*eofp = fxdr_unsigned(int, *tl);
5711	NFSM_STRSIZ(retlen, len);
5712	error = nfsm_mbufuio(nd, uiop, retlen);
5713nfsmout:
5714	if (nd->nd_mrep != NULL)
5715		mbuf_freem(nd->nd_mrep);
5716	return (error);
5717}
5718
5719/*
5720 * The actual write RPC done to a DS.
5721 */
5722static int
5723nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5724    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
5725    struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
5726{
5727	uint32_t *tl;
5728	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5729	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
5730	int32_t backup;
5731	struct nfsrv_descript nfsd;
5732	struct nfsrv_descript *nd = &nfsd;
5733	struct nfssockreq *nrp;
5734
5735	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
5736	nd->nd_mrep = NULL;
5737	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5738	    NULL, &dsp->nfsclds_sess);
5739	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5740	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
5741	txdr_hyper(io_off, tl);
5742	tl += 2;
5743	*tl++ = txdr_unsigned(*iomode);
5744	*tl = txdr_unsigned(len);
5745	nfsm_uiombuf(nd, uiop, len);
5746	nrp = dsp->nfsclds_sockp;
5747	if (nrp == NULL)
5748		/* If NULL, use the MDS socket. */
5749		nrp = &nmp->nm_sockreq;
5750	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5751	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5752	if (error != 0)
5753		return (error);
5754	if (nd->nd_repstat != 0) {
5755		/*
5756		 * In case the rpc gets retried, roll
5757		 * the uio fileds changed by nfsm_uiombuf()
5758		 * back.
5759		 */
5760		uiop->uio_offset -= len;
5761		uio_uio_resid_add(uiop, len);
5762		uio_iov_base_add(uiop, -len);
5763		uio_iov_len_add(uiop, len);
5764		error = nd->nd_repstat;
5765	} else {
5766		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
5767		rlen = fxdr_unsigned(int, *tl++);
5768		if (rlen == 0) {
5769			error = NFSERR_IO;
5770			goto nfsmout;
5771		} else if (rlen < len) {
5772			backup = len - rlen;
5773			uio_iov_base_add(uiop, -(backup));
5774			uio_iov_len_add(uiop, backup);
5775			uiop->uio_offset -= backup;
5776			uio_uio_resid_add(uiop, backup);
5777			len = rlen;
5778		}
5779		commit = fxdr_unsigned(int, *tl++);
5780
5781		/*
5782		 * Return the lowest committment level
5783		 * obtained by any of the RPCs.
5784		 */
5785		if (committed == NFSWRITE_FILESYNC)
5786			committed = commit;
5787		else if (committed == NFSWRITE_DATASYNC &&
5788		    commit == NFSWRITE_UNSTABLE)
5789			committed = commit;
5790		if (commit_thru_mds != 0) {
5791			NFSLOCKMNT(nmp);
5792			if (!NFSHASWRITEVERF(nmp)) {
5793				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5794				NFSSETWRITEVERF(nmp);
5795	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
5796				*must_commit = 1;
5797				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5798			}
5799			NFSUNLOCKMNT(nmp);
5800		} else {
5801			NFSLOCKDS(dsp);
5802			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
5803				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5804				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
5805			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5806				*must_commit = 1;
5807				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5808			}
5809			NFSUNLOCKDS(dsp);
5810		}
5811	}
5812nfsmout:
5813	if (nd->nd_mrep != NULL)
5814		mbuf_freem(nd->nd_mrep);
5815	*iomode = committed;
5816	if (nd->nd_repstat != 0 && error == 0)
5817		error = nd->nd_repstat;
5818	return (error);
5819}
5820
5821/*
5822 * Free up the nfsclds structure.
5823 */
5824void
5825nfscl_freenfsclds(struct nfsclds *dsp)
5826{
5827	int i;
5828
5829	if (dsp == NULL)
5830		return;
5831	if (dsp->nfsclds_sockp != NULL) {
5832		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
5833		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
5834		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
5835		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
5836	}
5837	NFSFREEMUTEX(&dsp->nfsclds_mtx);
5838	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
5839	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5840		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
5841			m_freem(
5842			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
5843	}
5844	free(dsp, M_NFSCLDS);
5845}
5846
5847static enum nfsclds_state
5848nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
5849    struct nfsclds **retdspp)
5850{
5851	struct nfsclds *dsp, *cur_dsp;
5852
5853	/*
5854	 * Search the list of nfsclds structures for one with the same
5855	 * server.
5856	 */
5857	cur_dsp = NULL;
5858	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
5859		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
5860		    dsp->nfsclds_servownlen != 0 &&
5861		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
5862		    dsp->nfsclds_servownlen)) {
5863			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
5864			    TAILQ_FIRST(&nmp->nm_sess), dsp,
5865			    dsp->nfsclds_flags);
5866			/* Server major id matches. */
5867			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
5868				*retdspp = dsp;
5869				return (NFSDSP_USETHISSESSION);
5870			}
5871
5872			/*
5873			 * Note the first match, so it can be used for
5874			 * sequence'ing new sessions.
5875			 */
5876			if (cur_dsp == NULL)
5877				cur_dsp = dsp;
5878		}
5879	}
5880	if (cur_dsp != NULL) {
5881		*retdspp = cur_dsp;
5882		return (NFSDSP_SEQTHISSESSION);
5883	}
5884	return (NFSDSP_NOTFOUND);
5885}
5886
5887#ifdef notyet
5888/*
5889 * NFS commit rpc to a DS.
5890 */
5891static int
5892nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
5893    struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff)
5894{
5895	uint32_t *tl;
5896	struct nfsrv_descript nfsd, *nd = &nfsd;
5897	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5898	struct nfssockreq *nrp;
5899	int error;
5900
5901	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5902	    NULL, &dsp->nfsclds_sess);
5903	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
5904	txdr_hyper(offset, tl);
5905	tl += 2;
5906	*tl = txdr_unsigned(cnt);
5907	nrp = dsp->nfsclds_sockp;
5908	if (nrp == NULL)
5909		/* If NULL, use the MDS socket. */
5910		nrp = &nmp->nm_sockreq;
5911	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5912	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5913	if (error)
5914		return (error);
5915	if (nd->nd_repstat == 0) {
5916		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
5917		NFSLOCKDS(dsp);
5918		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5919			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5920			error = NFSERR_STALEWRITEVERF;
5921		}
5922		NFSUNLOCKDS(dsp);
5923	}
5924nfsmout:
5925	if (error == 0 && nd->nd_repstat != 0)
5926		error = nd->nd_repstat;
5927	mbuf_freem(nd->nd_mrep);
5928	return (error);
5929}
5930#endif
5931
5932