1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD$");
37
38/*
39 * nfs version 2 and 3 server calls to vnode ops
40 * - these routines generally have 3 phases
41 *   1 - break down and validate rpc request in mbuf list
42 *   2 - do the vnode ops for the request
43 *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
44 *   3 - build the rpc reply in an mbuf list
45 *   nb:
46 *	- do not mix the phases, since the nfsm_?? macros can return failures
47 *	  on a bad rpc or similar and do not do any vrele() or vput()'s
48 *
49 *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
50 *	error number iff error != 0 whereas
51 *	returning an error from the server function implies a fatal error
52 *	such as a badly constructed rpc request that should be dropped without
53 *	a reply.
54 *	For nfsm_reply(), the case where error == EBADRPC is treated
55 *	specially; after constructing a reply, it does an immediate
56 *	`goto nfsmout' to avoid getting any V3 post-op status appended.
57 *
58 * Other notes:
59 *	Warning: always pay careful attention to resource cleanup on return
60 *	and note that nfsm_*() macros can terminate a procedure on certain
61 *	errors.
62 *
63 *	lookup() and namei()
64 *	may return garbage in various structural fields/return elements
65 *	if an error is returned, and may garbage up nd.ni_dvp even if no
66 *	error is returned and you did not request LOCKPARENT or WANTPARENT.
67 *
68 *	We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
69 *	buffer has been freed or not.
70 */
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/proc.h>
75#include <sys/namei.h>
76#include <sys/unistd.h>
77#include <sys/vnode.h>
78#include <sys/mount.h>
79#include <sys/socket.h>
80#include <sys/socketvar.h>
81#include <sys/malloc.h>
82#include <sys/mbuf.h>
83#include <sys/priv.h>
84#include <sys/dirent.h>
85#include <sys/stat.h>
86#include <sys/kernel.h>
87#include <sys/sysctl.h>
88#include <sys/bio.h>
89#include <sys/buf.h>
90
91#include <vm/vm.h>
92#include <vm/vm_extern.h>
93#include <vm/vm_object.h>
94
95#include <nfs/nfsproto.h>
96#include <nfsserver/nfs.h>
97#include <nfs/xdr_subs.h>
98#include <nfsserver/nfsm_subs.h>
99
100FEATURE(nfsserver, "NFS server");
101
102#ifdef NFSRV_DEBUG
103#define nfsdbprintf(info)	printf info
104#else
105#define nfsdbprintf(info)
106#endif
107
108#define MAX_COMMIT_COUNT	(1024 * 1024)
109
110#define	MAX_REORDERED_RPC	16
111#define NUM_HEURISTIC		1031
112#define NHUSE_INIT		64
113#define NHUSE_INC		16
114#define NHUSE_MAX		2048
115
116static struct nfsheur {
117	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
118	off_t nh_nextoff;	/* next offset for sequential detection */
119	int nh_use;		/* use count for selection */
120	int nh_seqcount;	/* heuristic */
121} nfsheur[NUM_HEURISTIC];
122
123/* Global vars */
124
125int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
126int nfsrvw_procrastinate_v3 = 0;
127
128static struct timeval	nfsver = { 0 };
129
130SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
131
132static int nfs_async;
133static int nfs_commit_blks;
134static int nfs_commit_miss;
135SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
136    "Tell client that writes were synced even though they were not");
137SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
138    "Number of completed commits");
139SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
140
141struct nfsrvstats nfsrvstats;
142SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
143	&nfsrvstats, nfsrvstats, "S,nfsrvstats");
144
145static int	nfsrv_access(struct vnode *, accmode_t, struct ucred *,
146		    int, int);
147
148/*
149 * Clear nameidata fields that are tested in nsfmout cleanup code prior
150 * to using first nfsm macro (that might jump to the cleanup code).
151 */
152
153static __inline void
154ndclear(struct nameidata *nd)
155{
156
157	nd->ni_cnd.cn_flags = 0;
158	nd->ni_vp = NULL;
159	nd->ni_dvp = NULL;
160	nd->ni_startdir = NULL;
161	nd->ni_strictrelative = 0;
162}
163
164/*
165 * Takes two vfslocked integers and returns with at most one
166 * reference to giant.  The return value indicates whether giant
167 * is held by either lock.  This simplifies nfsrv ops by allowing
168 * them to track only one vfslocked var.
169 */
170static __inline int
171nfsrv_lockedpair(int vfs1, int vfs2)
172{
173
174	if (vfs1 && vfs2)
175		VFS_UNLOCK_GIANT(vfs2);
176
177	return (vfs1 | vfs2);
178}
179
180static __inline int
181nfsrv_lockedpair_nd(int vfs1, struct nameidata *nd)
182{
183	int vfs2;
184
185	vfs2 = NDHASGIANT(nd);
186
187	return nfsrv_lockedpair(vfs1, vfs2);
188}
189
190/*
191 * Heuristic to detect sequential operation.
192 */
193static struct nfsheur *
194nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
195{
196	struct nfsheur *nh;
197	int hi, try;
198
199	/* Locate best candidate. */
200	try = 32;
201	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
202	nh = &nfsheur[hi];
203	while (try--) {
204		if (nfsheur[hi].nh_vp == vp) {
205			nh = &nfsheur[hi];
206			break;
207		}
208		if (nfsheur[hi].nh_use > 0)
209			--nfsheur[hi].nh_use;
210		hi = (hi + 1) % NUM_HEURISTIC;
211		if (nfsheur[hi].nh_use < nh->nh_use)
212			nh = &nfsheur[hi];
213	}
214
215	/* Initialize hint if this is a new file. */
216	if (nh->nh_vp != vp) {
217		nh->nh_vp = vp;
218		nh->nh_nextoff = uio->uio_offset;
219		nh->nh_use = NHUSE_INIT;
220		if (uio->uio_offset == 0)
221			nh->nh_seqcount = 4;
222		else
223			nh->nh_seqcount = 1;
224	}
225
226	/* Calculate heuristic. */
227	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
228	    uio->uio_offset == nh->nh_nextoff) {
229		/* See comments in vfs_vnops.c:sequential_heuristic(). */
230		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
231		if (nh->nh_seqcount > IO_SEQMAX)
232			nh->nh_seqcount = IO_SEQMAX;
233	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
234	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
235		/* Probably a reordered RPC, leave seqcount alone. */
236	} else if (nh->nh_seqcount > 1) {
237		nh->nh_seqcount /= 2;
238	} else {
239		nh->nh_seqcount = 0;
240	}
241	nh->nh_use += NHUSE_INC;
242	if (nh->nh_use > NHUSE_MAX)
243		nh->nh_use = NHUSE_MAX;
244	return (nh);
245}
246
247/*
248 * nfs v3 access service
249 */
250int
251nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
252    struct mbuf **mrq)
253{
254	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
255	struct sockaddr *nam = nfsd->nd_nam;
256	caddr_t dpos = nfsd->nd_dpos;
257	struct ucred *cred = nfsd->nd_cr;
258	struct vnode *vp = NULL;
259	nfsfh_t nfh;
260	fhandle_t *fhp;
261	u_int32_t *tl;
262	caddr_t bpos;
263	int error = 0, rdonly, getret;
264	struct mbuf *mb, *mreq;
265	struct vattr vattr, *vap = &vattr;
266	u_long testmode, nfsmode;
267	int v3 = (nfsd->nd_flag & ND_NFSV3);
268	int vfslocked;
269
270	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
271	if (!v3)
272		panic("nfsrv3_access: v3 proc called on a v2 connection");
273	vfslocked = 0;
274	fhp = &nfh.fh_generic;
275	nfsm_srvmtofh(fhp);
276	tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
277	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
278	if (error) {
279		nfsm_reply(NFSX_UNSIGNED);
280		nfsm_srvpostop_attr(1, NULL);
281		error = 0;
282		goto nfsmout;
283	}
284	nfsmode = fxdr_unsigned(u_int32_t, *tl);
285	if ((nfsmode & NFSV3ACCESS_READ) &&
286		nfsrv_access(vp, VREAD, cred, rdonly, 0))
287		nfsmode &= ~NFSV3ACCESS_READ;
288	if (vp->v_type == VDIR)
289		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
290			NFSV3ACCESS_DELETE);
291	else
292		testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
293	if ((nfsmode & testmode) &&
294		nfsrv_access(vp, VWRITE, cred, rdonly, 0))
295		nfsmode &= ~testmode;
296	if (vp->v_type == VDIR)
297		testmode = NFSV3ACCESS_LOOKUP;
298	else
299		testmode = NFSV3ACCESS_EXECUTE;
300	if ((nfsmode & testmode) &&
301		nfsrv_access(vp, VEXEC, cred, rdonly, 0))
302		nfsmode &= ~testmode;
303	getret = VOP_GETATTR(vp, vap, cred);
304	vput(vp);
305	vp = NULL;
306	nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
307	nfsm_srvpostop_attr(getret, vap);
308	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
309	*tl = txdr_unsigned(nfsmode);
310nfsmout:
311	if (vp)
312		vput(vp);
313	VFS_UNLOCK_GIANT(vfslocked);
314	return(error);
315}
316
317/*
318 * nfs getattr service
319 */
320int
321nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
322    struct mbuf **mrq)
323{
324	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
325	struct sockaddr *nam = nfsd->nd_nam;
326	caddr_t dpos = nfsd->nd_dpos;
327	struct ucred *cred = nfsd->nd_cr;
328	struct nfs_fattr *fp;
329	struct vattr va;
330	struct vattr *vap = &va;
331	struct vnode *vp = NULL;
332	nfsfh_t nfh;
333	fhandle_t *fhp;
334	caddr_t bpos;
335	int error = 0, rdonly;
336	struct mbuf *mb, *mreq;
337	int vfslocked;
338
339	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
340	vfslocked = 0;
341	fhp = &nfh.fh_generic;
342	nfsm_srvmtofh(fhp);
343	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
344	if (error) {
345		nfsm_reply(0);
346		error = 0;
347		goto nfsmout;
348	}
349	error = VOP_GETATTR(vp, vap, cred);
350	vput(vp);
351	vp = NULL;
352	nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
353	if (error) {
354		error = 0;
355		goto nfsmout;
356	}
357	fp = nfsm_build(struct nfs_fattr *,
358	    NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
359	nfsm_srvfillattr(vap, fp);
360	/* fall through */
361
362nfsmout:
363	if (vp)
364		vput(vp);
365	VFS_UNLOCK_GIANT(vfslocked);
366	return(error);
367}
368
369/*
370 * nfs setattr service
371 */
372int
373nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
374    struct mbuf **mrq)
375{
376	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
377	struct sockaddr *nam = nfsd->nd_nam;
378	caddr_t dpos = nfsd->nd_dpos;
379	struct ucred *cred = nfsd->nd_cr;
380	struct vattr va, preat;
381	struct vattr *vap = &va;
382	struct nfsv2_sattr *sp;
383	struct nfs_fattr *fp;
384	struct vnode *vp = NULL;
385	nfsfh_t nfh;
386	fhandle_t *fhp;
387	u_int32_t *tl;
388	caddr_t bpos;
389	int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
390	int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
391	struct mbuf *mb, *mreq;
392	struct timespec guard = { 0, 0 };
393	struct mount *mp = NULL;
394	int tvfslocked;
395	int vfslocked;
396
397	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
398	vfslocked = 0;
399	fhp = &nfh.fh_generic;
400	nfsm_srvmtofh(fhp);
401	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
402		error = ESTALE;
403		goto out;
404	}
405	vfslocked = VFS_LOCK_GIANT(mp);
406	(void) vn_start_write(NULL, &mp, V_WAIT);
407	vfs_rel(mp);		/* The write holds a ref. */
408	VATTR_NULL(vap);
409	if (v3) {
410		nfsm_srvsattr(vap);
411		tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
412		gcheck = fxdr_unsigned(int, *tl);
413		if (gcheck) {
414			tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
415			fxdr_nfsv3time(tl, &guard);
416		}
417	} else {
418		sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
419		/*
420		 * Nah nah nah nah na nah
421		 * There is a bug in the Sun client that puts 0xffff in the mode
422		 * field of sattr when it should put in 0xffffffff. The u_short
423		 * doesn't sign extend.
424		 * --> check the low order 2 bytes for 0xffff
425		 */
426		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
427			vap->va_mode = nfstov_mode(sp->sa_mode);
428		if (sp->sa_uid != nfsrv_nfs_xdrneg1)
429			vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
430		if (sp->sa_gid != nfsrv_nfs_xdrneg1)
431			vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
432		if (sp->sa_size != nfsrv_nfs_xdrneg1)
433			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
434		if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
435#ifdef notyet
436			fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
437#else
438			vap->va_atime.tv_sec =
439				fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
440			vap->va_atime.tv_nsec = 0;
441#endif
442		}
443		if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
444			fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
445
446	}
447
448	/*
449	 * Now that we have all the fields, lets do it.
450	 */
451	error = nfsrv_fhtovp(fhp, 0, &vp, &tvfslocked, nfsd, slp, nam, &rdonly);
452	vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
453	if (error) {
454		nfsm_reply(2 * NFSX_UNSIGNED);
455		if (v3)
456			nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
457		error = 0;
458		goto nfsmout;
459	}
460
461	/*
462	 * vp now an active resource, pay careful attention to cleanup
463	 */
464	if (v3) {
465		error = preat_ret = VOP_GETATTR(vp, &preat, cred);
466		if (!error && gcheck &&
467			(preat.va_ctime.tv_sec != guard.tv_sec ||
468			 preat.va_ctime.tv_nsec != guard.tv_nsec))
469			error = NFSERR_NOT_SYNC;
470		if (error) {
471			vput(vp);
472			vp = NULL;
473			nfsm_reply(NFSX_WCCDATA(v3));
474			if (v3)
475				nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
476			error = 0;
477			goto nfsmout;
478		}
479	}
480
481	/*
482	 * If the size is being changed write acces is required, otherwise
483	 * just check for a read only filesystem.
484	 */
485	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
486		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
487			error = EROFS;
488			goto out;
489		}
490	} else {
491		if (vp->v_type == VDIR) {
492			error = EISDIR;
493			goto out;
494		} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
495		    0)) != 0)
496			goto out;
497	}
498	error = VOP_SETATTR(vp, vap, cred);
499	postat_ret = VOP_GETATTR(vp, vap, cred);
500	if (!error)
501		error = postat_ret;
502out:
503	if (vp != NULL)
504		vput(vp);
505
506	vp = NULL;
507	nfsm_reply(NFSX_WCCORFATTR(v3));
508	if (v3) {
509		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
510	} else if (!error) {
511		/* v2 non-error case. */
512		fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
513		nfsm_srvfillattr(vap, fp);
514	}
515	error = 0;
516	/* fall through */
517
518nfsmout:
519	if (vp)
520		vput(vp);
521	vn_finished_write(mp);
522	VFS_UNLOCK_GIANT(vfslocked);
523	return(error);
524}
525
526/*
527 * nfs lookup rpc
528 */
529int
530nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
531    struct mbuf **mrq)
532{
533	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
534	struct sockaddr *nam = nfsd->nd_nam;
535	caddr_t dpos = nfsd->nd_dpos;
536	struct ucred *cred = nfsd->nd_cr;
537	struct nfs_fattr *fp;
538	struct nameidata nd, ind, *ndp = &nd;
539	struct vnode *vp, *dirp = NULL;
540	nfsfh_t nfh;
541	fhandle_t *fhp;
542	caddr_t bpos;
543	int error = 0, len, dirattr_ret = 1;
544	int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
545	struct mbuf *mb, *mreq;
546	struct vattr va, dirattr, *vap = &va;
547	int tvfslocked;
548	int vfslocked;
549
550	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
551	ndclear(&nd);
552	vfslocked = 0;
553
554	fhp = &nfh.fh_generic;
555	nfsm_srvmtofh(fhp);
556	nfsm_srvnamesiz(len);
557
558	pubflag = nfs_ispublicfh(fhp);
559
560	nd.ni_cnd.cn_cred = cred;
561	nd.ni_cnd.cn_nameiop = LOOKUP;
562	nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART | MPSAFE;
563	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
564		&dirp, v3, &dirattr, &dirattr_ret, pubflag);
565	vfslocked = NDHASGIANT(&nd);
566
567	/*
568	 * namei failure, only dirp to cleanup.  Clear out garbarge from
569	 * structure in case macros jump to nfsmout.
570	 */
571
572	if (error) {
573		if (dirp) {
574			vrele(dirp);
575			dirp = NULL;
576		}
577		nfsm_reply(NFSX_POSTOPATTR(v3));
578		if (v3)
579			nfsm_srvpostop_attr(dirattr_ret, &dirattr);
580		error = 0;
581		goto nfsmout;
582	}
583
584	/*
585	 * Locate index file for public filehandle
586	 *
587	 * error is 0 on entry and 0 on exit from this block.
588	 */
589
590	if (pubflag) {
591		if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
592			/*
593			 * Setup call to lookup() to see if we can find
594			 * the index file. Arguably, this doesn't belong
595			 * in a kernel.. Ugh.  If an error occurs, do not
596			 * try to install an index file and then clear the
597			 * error.
598			 *
599			 * When we replace nd with ind and redirect ndp,
600			 * maintenance of ni_startdir and ni_vp shift to
601			 * ind and we have to clean them up in the old nd.
602			 * However, the cnd resource continues to be maintained
603			 * via the original nd.  Confused?  You aren't alone!
604			 */
605			ind = nd;
606			VOP_UNLOCK(nd.ni_vp, 0);
607			ind.ni_pathlen = strlen(nfs_pub.np_index);
608			ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
609			    nfs_pub.np_index;
610			ind.ni_startdir = nd.ni_vp;
611			VREF(ind.ni_startdir);
612			ind.ni_cnd.cn_flags &= ~GIANTHELD;
613			tvfslocked = VFS_LOCK_GIANT(ind.ni_startdir->v_mount);
614			if (tvfslocked)
615				nd.ni_cnd.cn_flags |= GIANTHELD;
616			error = lookup(&ind);
617			ind.ni_dvp = NULL;
618			vfslocked = nfsrv_lockedpair_nd(vfslocked, &ind);
619			ind.ni_cnd.cn_flags &= ~GIANTHELD;
620
621			if (error == 0) {
622				/*
623				 * Found an index file. Get rid of
624				 * the old references.  transfer nd.ni_vp'
625				 */
626				if (dirp)
627					vrele(dirp);
628				dirp = nd.ni_vp;
629				nd.ni_vp = NULL;
630				vrele(nd.ni_startdir);
631				nd.ni_startdir = NULL;
632				ndp = &ind;
633			}
634			error = 0;
635		}
636		/*
637		 * If the public filehandle was used, check that this lookup
638		 * didn't result in a filehandle outside the publicly exported
639		 * filesystem.  We clear the poor vp here to avoid lockups due
640		 * to NFS I/O.
641		 */
642
643		if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
644			vput(nd.ni_vp);
645			nd.ni_vp = NULL;
646			error = EPERM;
647		}
648	}
649
650	/*
651	 * Resources at this point:
652	 *	ndp->ni_vp	may not be NULL
653	 */
654
655	if (error) {
656		nfsm_reply(NFSX_POSTOPATTR(v3));
657		if (v3)
658			nfsm_srvpostop_attr(dirattr_ret, &dirattr);
659		error = 0;
660		goto nfsmout;
661	}
662
663	/*
664	 * Get underlying attribute, then release remaining resources ( for
665	 * the same potential blocking reason ) and reply.
666	 */
667	vp = ndp->ni_vp;
668	bzero((caddr_t)fhp, sizeof(nfh));
669	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
670	error = VOP_VPTOFH(vp, &fhp->fh_fid);
671	if (!error)
672		error = VOP_GETATTR(vp, vap, cred);
673
674	vput(vp);
675	vrele(ndp->ni_startdir);
676	vrele(dirp);
677	ndp->ni_vp = NULL;
678	ndp->ni_startdir = NULL;
679	dirp = NULL;
680	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
681	if (error) {
682		if (v3)
683			nfsm_srvpostop_attr(dirattr_ret, &dirattr);
684		error = 0;
685		goto nfsmout;
686	}
687	nfsm_srvfhtom(fhp, v3);
688	if (v3) {
689		nfsm_srvpostop_attr(0, vap);
690		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
691	} else {
692		fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
693		nfsm_srvfillattr(vap, fp);
694	}
695
696nfsmout:
697	if (ndp->ni_vp || dirp || ndp->ni_startdir) {
698		if (ndp->ni_vp)
699			vput(ndp->ni_vp);
700		if (dirp)
701			vrele(dirp);
702		if (ndp->ni_startdir)
703			vrele(ndp->ni_startdir);
704	}
705	NDFREE(&nd, NDF_ONLY_PNBUF);
706	VFS_UNLOCK_GIANT(vfslocked);
707	return (error);
708}
709
710/*
711 * nfs readlink service
712 */
713int
714nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
715    struct mbuf **mrq)
716{
717	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
718	struct sockaddr *nam = nfsd->nd_nam;
719	caddr_t dpos = nfsd->nd_dpos;
720	struct ucred *cred = nfsd->nd_cr;
721	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
722	struct iovec *ivp = iv;
723	struct mbuf *mp;
724	u_int32_t *tl;
725	caddr_t bpos;
726	int error = 0, rdonly, i, tlen, len, getret;
727	int v3 = (nfsd->nd_flag & ND_NFSV3);
728	struct mbuf *mb, *mp3, *nmp, *mreq;
729	struct vnode *vp = NULL;
730	struct vattr attr;
731	nfsfh_t nfh;
732	fhandle_t *fhp;
733	struct uio io, *uiop = &io;
734	int vfslocked;
735
736	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
737	vfslocked = 0;
738#ifndef nolint
739	mp = NULL;
740#endif
741	mp3 = NULL;
742	fhp = &nfh.fh_generic;
743	nfsm_srvmtofh(fhp);
744	len = 0;
745	i = 0;
746	while (len < NFS_MAXPATHLEN) {
747		MGET(nmp, M_WAIT, MT_DATA);
748		MCLGET(nmp, M_WAIT);
749		nmp->m_len = NFSMSIZ(nmp);
750		if (len == 0)
751			mp3 = mp = nmp;
752		else {
753			mp->m_next = nmp;
754			mp = nmp;
755		}
756		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
757			mp->m_len = NFS_MAXPATHLEN - len;
758			len = NFS_MAXPATHLEN;
759		} else
760			len += mp->m_len;
761		ivp->iov_base = mtod(mp, caddr_t);
762		ivp->iov_len = mp->m_len;
763		i++;
764		ivp++;
765	}
766	uiop->uio_iov = iv;
767	uiop->uio_iovcnt = i;
768	uiop->uio_offset = 0;
769	uiop->uio_resid = len;
770	uiop->uio_rw = UIO_READ;
771	uiop->uio_segflg = UIO_SYSSPACE;
772	uiop->uio_td = NULL;
773	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
774	if (error) {
775		nfsm_reply(2 * NFSX_UNSIGNED);
776		if (v3)
777			nfsm_srvpostop_attr(1, NULL);
778		error = 0;
779		goto nfsmout;
780	}
781	if (vp->v_type != VLNK) {
782		if (v3)
783			error = EINVAL;
784		else
785			error = ENXIO;
786	} else
787		error = VOP_READLINK(vp, uiop, cred);
788	getret = VOP_GETATTR(vp, &attr, cred);
789	vput(vp);
790	vp = NULL;
791	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
792	if (v3)
793		nfsm_srvpostop_attr(getret, &attr);
794	if (error) {
795		error = 0;
796		goto nfsmout;
797	}
798	if (uiop->uio_resid > 0) {
799		len -= uiop->uio_resid;
800		tlen = nfsm_rndup(len);
801		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
802	}
803	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
804	*tl = txdr_unsigned(len);
805	mb->m_next = mp3;
806	mp3 = NULL;
807nfsmout:
808	if (mp3)
809		m_freem(mp3);
810	if (vp)
811		vput(vp);
812	VFS_UNLOCK_GIANT(vfslocked);
813	return(error);
814}
815
816/*
817 * nfs read service
818 */
819int
820nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
821    struct mbuf **mrq)
822{
823	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
824	struct sockaddr *nam = nfsd->nd_nam;
825	caddr_t dpos = nfsd->nd_dpos;
826	struct ucred *cred = nfsd->nd_cr;
827	struct iovec *iv;
828	struct iovec *iv2;
829	struct mbuf *m;
830	struct nfs_fattr *fp;
831	u_int32_t *tl;
832	int i;
833	caddr_t bpos;
834	int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
835	int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
836	struct mbuf *mb, *mreq;
837	struct mbuf *m2;
838	struct vnode *vp = NULL;
839	nfsfh_t nfh;
840	fhandle_t *fhp;
841	struct uio io, *uiop = &io;
842	struct vattr va, *vap = &va;
843	struct nfsheur *nh;
844	off_t off;
845	int ioflag = 0;
846	int vfslocked;
847
848
849	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
850	vfslocked = 0;
851	fhp = &nfh.fh_generic;
852	nfsm_srvmtofh(fhp);
853	if (v3) {
854		tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
855		off = fxdr_hyper(tl);
856	} else {
857		tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
858		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
859	}
860	nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
861
862	/*
863	 * Reference vp.  If an error occurs, vp will be invalid, but we
864	 * have to NULL it just in case.  The macros might goto nfsmout
865	 * as well.
866	 */
867
868	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
869	if (error) {
870		vp = NULL;
871		nfsm_reply(2 * NFSX_UNSIGNED);
872		if (v3)
873			nfsm_srvpostop_attr(1, NULL);
874		error = 0;
875		goto nfsmout;
876	}
877
878	if (vp->v_type != VREG) {
879		if (v3)
880			error = EINVAL;
881		else
882			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
883	}
884	if (!error) {
885		if ((error = nfsrv_access(vp, VREAD, cred, rdonly, 1)) != 0)
886			error = nfsrv_access(vp, VEXEC, cred, rdonly, 1);
887	}
888	getret = VOP_GETATTR(vp, vap, cred);
889	if (!error)
890		error = getret;
891	if (error) {
892		vput(vp);
893		vp = NULL;
894		nfsm_reply(NFSX_POSTOPATTR(v3));
895		if (v3)
896			nfsm_srvpostop_attr(getret, vap);
897		error = 0;
898		goto nfsmout;
899	}
900
901	/*
902	 * Calculate byte count to read
903	 */
904	if (off >= vap->va_size)
905		cnt = 0;
906	else if ((off + reqlen) > vap->va_size)
907		cnt = vap->va_size - off;
908	else
909		cnt = reqlen;
910
911	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
912	if (v3) {
913		tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
914		*tl++ = nfsrv_nfs_true;
915		fp = (struct nfs_fattr *)tl;
916		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
917	} else {
918		tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
919		fp = (struct nfs_fattr *)tl;
920		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
921	}
922	len = left = nfsm_rndup(cnt);
923	if (cnt > 0) {
924		/*
925		 * Generate the mbuf list with the uio_iov ref. to it.
926		 */
927		i = 0;
928		m = m2 = mb;
929		while (left > 0) {
930			siz = min(M_TRAILINGSPACE(m), left);
931			if (siz > 0) {
932				left -= siz;
933				i++;
934			}
935			if (left > 0) {
936				MGET(m, M_WAIT, MT_DATA);
937				MCLGET(m, M_WAIT);
938				m->m_len = 0;
939				m2->m_next = m;
940				m2 = m;
941			}
942		}
943		iv = malloc(i * sizeof (struct iovec),
944		       M_TEMP, M_WAITOK);
945		uiop->uio_iov = iv2 = iv;
946		m = mb;
947		left = len;
948		i = 0;
949		while (left > 0) {
950			if (m == NULL)
951				panic("nfsrv_read iov");
952			siz = min(M_TRAILINGSPACE(m), left);
953			if (siz > 0) {
954				iv->iov_base = mtod(m, caddr_t) + m->m_len;
955				iv->iov_len = siz;
956				m->m_len += siz;
957				left -= siz;
958				iv++;
959				i++;
960			}
961			m = m->m_next;
962		}
963		uiop->uio_iovcnt = i;
964		uiop->uio_offset = off;
965		uiop->uio_resid = len;
966		uiop->uio_rw = UIO_READ;
967		uiop->uio_segflg = UIO_SYSSPACE;
968		uiop->uio_td = NULL;
969		nh = nfsrv_sequential_heuristic(uiop, vp);
970		ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
971		error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
972		if (error == 0)
973			nh->nh_nextoff = uiop->uio_offset;
974		free((caddr_t)iv2, M_TEMP);
975		if (error || (getret = VOP_GETATTR(vp, vap, cred))) {
976			if (!error)
977				error = getret;
978			m_freem(mreq);
979			vput(vp);
980			vp = NULL;
981			nfsm_reply(NFSX_POSTOPATTR(v3));
982			if (v3)
983				nfsm_srvpostop_attr(getret, vap);
984			error = 0;
985			goto nfsmout;
986		}
987	} else
988		uiop->uio_resid = 0;
989	vput(vp);
990	vp = NULL;
991	nfsm_srvfillattr(vap, fp);
992	tlen = len - uiop->uio_resid;
993	cnt = cnt < tlen ? cnt : tlen;
994	tlen = nfsm_rndup(cnt);
995	if (len != tlen || tlen != cnt)
996		nfsm_adj(mb, len - tlen, tlen - cnt);
997	if (v3) {
998		*tl++ = txdr_unsigned(cnt);
999		if (cnt < reqlen)
1000			*tl++ = nfsrv_nfs_true;
1001		else
1002			*tl++ = nfsrv_nfs_false;
1003	}
1004	*tl = txdr_unsigned(cnt);
1005nfsmout:
1006	if (vp)
1007		vput(vp);
1008	VFS_UNLOCK_GIANT(vfslocked);
1009	return(error);
1010}
1011
1012/*
1013 * nfs write service
1014 */
1015int
1016nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1017    struct mbuf **mrq)
1018{
1019	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1020	struct sockaddr *nam = nfsd->nd_nam;
1021	caddr_t dpos = nfsd->nd_dpos;
1022	struct ucred *cred = nfsd->nd_cr;
1023	struct iovec *ivp;
1024	int i, cnt;
1025	struct mbuf *mp;
1026	struct nfs_fattr *fp;
1027	struct iovec *iv;
1028	struct vattr va, forat;
1029	struct vattr *vap = &va;
1030	u_int32_t *tl;
1031	caddr_t bpos;
1032	int error = 0, rdonly, len, forat_ret = 1;
1033	int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
1034	int stable = NFSV3WRITE_FILESYNC;
1035	int v3 = (nfsd->nd_flag & ND_NFSV3);
1036	struct mbuf *mb, *mreq;
1037	struct vnode *vp = NULL;
1038	struct nfsheur *nh;
1039	nfsfh_t nfh;
1040	fhandle_t *fhp;
1041	struct uio io, *uiop = &io;
1042	off_t off;
1043	struct mount *mntp = NULL;
1044	int tvfslocked;
1045	int vfslocked;
1046
1047	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1048	vfslocked = 0;
1049	if (mrep == NULL) {
1050		*mrq = NULL;
1051		error = 0;
1052		goto nfsmout;
1053	}
1054	fhp = &nfh.fh_generic;
1055	nfsm_srvmtofh(fhp);
1056	if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1057		error = ESTALE;
1058		goto ereply;
1059	}
1060	vfslocked = VFS_LOCK_GIANT(mntp);
1061	(void) vn_start_write(NULL, &mntp, V_WAIT);
1062	vfs_rel(mntp);		/* The write holds a ref. */
1063	if (v3) {
1064		tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
1065		off = fxdr_hyper(tl);
1066		tl += 3;
1067		stable = fxdr_unsigned(int, *tl++);
1068	} else {
1069		tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
1070		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1071		tl += 2;
1072		if (nfs_async)
1073	    		stable = NFSV3WRITE_UNSTABLE;
1074	}
1075	retlen = len = fxdr_unsigned(int32_t, *tl);
1076	cnt = i = 0;
1077
1078	/*
1079	 * For NFS Version 2, it is not obvious what a write of zero length
1080	 * should do, but I might as well be consistent with Version 3,
1081	 * which is to return ok so long as there are no permission problems.
1082	 */
1083	if (len > 0) {
1084	    zeroing = 1;
1085	    mp = mrep;
1086	    while (mp) {
1087		if (mp == md) {
1088			zeroing = 0;
1089			adjust = dpos - mtod(mp, caddr_t);
1090			mp->m_len -= adjust;
1091			if (mp->m_len > 0 && adjust > 0)
1092				mp->m_data += adjust;
1093		}
1094		if (zeroing)
1095			mp->m_len = 0;
1096		else if (mp->m_len > 0) {
1097			i += mp->m_len;
1098			if (i > len) {
1099				mp->m_len -= (i - len);
1100				zeroing	= 1;
1101			}
1102			if (mp->m_len > 0)
1103				cnt++;
1104		}
1105		mp = mp->m_next;
1106	    }
1107	}
1108	if (len > NFS_MAXDATA || len < 0 || i < len) {
1109		error = EIO;
1110		nfsm_reply(2 * NFSX_UNSIGNED);
1111		if (v3)
1112			nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1113		error = 0;
1114		goto nfsmout;
1115	}
1116	error = nfsrv_fhtovp(fhp, 0, &vp, &tvfslocked, nfsd, slp, nam, &rdonly);
1117	vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
1118	if (error) {
1119		vp = NULL;
1120		nfsm_reply(2 * NFSX_UNSIGNED);
1121		if (v3)
1122			nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1123		error = 0;
1124		goto nfsmout;
1125	}
1126	if (v3)
1127		forat_ret = VOP_GETATTR(vp, &forat, cred);
1128	if (vp->v_type != VREG) {
1129		if (v3)
1130			error = EINVAL;
1131		else
1132			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1133	}
1134	if (!error)
1135		error = nfsrv_access(vp, VWRITE, cred, rdonly, 1);
1136	if (error) {
1137		vput(vp);
1138		vp = NULL;
1139		nfsm_reply(NFSX_WCCDATA(v3));
1140		if (v3)
1141			nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1142		error = 0;
1143		goto nfsmout;
1144	}
1145
1146	if (len > 0) {
1147	    ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
1148		M_WAITOK);
1149	    uiop->uio_iov = iv = ivp;
1150	    uiop->uio_iovcnt = cnt;
1151	    mp = mrep;
1152	    while (mp) {
1153		if (mp->m_len > 0) {
1154			ivp->iov_base = mtod(mp, caddr_t);
1155			ivp->iov_len = mp->m_len;
1156			ivp++;
1157		}
1158		mp = mp->m_next;
1159	    }
1160
1161	    /*
1162	     * XXX
1163	     * The IO_METASYNC flag indicates that all metadata (and not just
1164	     * enough to ensure data integrity) mus be written to stable storage
1165	     * synchronously.
1166	     * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1167	     */
1168	    if (stable == NFSV3WRITE_UNSTABLE)
1169		ioflags = IO_NODELOCKED;
1170	    else if (stable == NFSV3WRITE_DATASYNC)
1171		ioflags = (IO_SYNC | IO_NODELOCKED);
1172	    else
1173		ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1174	    uiop->uio_resid = len;
1175	    uiop->uio_rw = UIO_WRITE;
1176	    uiop->uio_segflg = UIO_SYSSPACE;
1177	    uiop->uio_td = NULL;
1178	    uiop->uio_offset = off;
1179	    nh = nfsrv_sequential_heuristic(uiop, vp);
1180	    ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1181	    error = VOP_WRITE(vp, uiop, ioflags, cred);
1182	    if (error == 0)
1183		    nh->nh_nextoff = uiop->uio_offset;
1184	    /* Unlocked write. */
1185	    nfsrvstats.srvvop_writes++;
1186	    free((caddr_t)iv, M_TEMP);
1187	}
1188	aftat_ret = VOP_GETATTR(vp, vap, cred);
1189	vput(vp);
1190	vp = NULL;
1191	if (!error)
1192		error = aftat_ret;
1193ereply:
1194	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
1195		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
1196	if (v3) {
1197		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
1198		if (error) {
1199			error = 0;
1200			goto nfsmout;
1201		}
1202		tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1203		*tl++ = txdr_unsigned(retlen);
1204		/*
1205		 * If nfs_async is set, then pretend the write was FILESYNC.
1206		 */
1207		if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1208			*tl++ = txdr_unsigned(stable);
1209		else
1210			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1211		/*
1212		 * Actually, there is no need to txdr these fields,
1213		 * but it may make the values more human readable,
1214		 * for debugging purposes.
1215		 */
1216		if (nfsver.tv_sec == 0)
1217			nfsver = boottime;
1218		*tl++ = txdr_unsigned(nfsver.tv_sec);
1219		*tl = txdr_unsigned(nfsver.tv_usec);
1220	} else if (!error) {
1221		/* v2 non-error case. */
1222		fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1223		nfsm_srvfillattr(vap, fp);
1224	}
1225	error = 0;
1226nfsmout:
1227	if (vp)
1228		vput(vp);
1229	vn_finished_write(mntp);
1230	VFS_UNLOCK_GIANT(vfslocked);
1231	return(error);
1232}
1233
1234/*
1235 * nfs create service
1236 * now does a truncate to 0 length via. setattr if it already exists
1237 */
1238int
1239nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1240    struct mbuf **mrq)
1241{
1242	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1243	struct sockaddr *nam = nfsd->nd_nam;
1244	caddr_t dpos = nfsd->nd_dpos;
1245	struct ucred *cred = nfsd->nd_cr;
1246	struct nfs_fattr *fp;
1247	struct vattr va, dirfor, diraft;
1248	struct vattr *vap = &va;
1249	struct nfsv2_sattr *sp;
1250	u_int32_t *tl;
1251	struct nameidata nd;
1252	caddr_t bpos;
1253	int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1254	int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1255	struct mbuf *mb, *mreq;
1256	struct vnode *dirp = NULL;
1257	nfsfh_t nfh;
1258	fhandle_t *fhp;
1259	u_quad_t tempsize;
1260	struct timespec cverf;
1261	struct mount *mp = NULL;
1262	int tvfslocked;
1263	int vfslocked;
1264
1265	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1266	vfslocked = 0;
1267#ifndef nolint
1268	rdev = 0;
1269#endif
1270	ndclear(&nd);
1271
1272	fhp = &nfh.fh_generic;
1273	nfsm_srvmtofh(fhp);
1274	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1275		error = ESTALE;
1276		goto ereply;
1277	}
1278	vfslocked = VFS_LOCK_GIANT(mp);
1279	(void) vn_start_write(NULL, &mp, V_WAIT);
1280	vfs_rel(mp);		/* The write holds a ref. */
1281	nfsm_srvnamesiz(len);
1282
1283	nd.ni_cnd.cn_cred = cred;
1284	nd.ni_cnd.cn_nameiop = CREATE;
1285	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
1286
1287	/*
1288	 * Call namei and do initial cleanup to get a few things
1289	 * out of the way.  If we get an initial error we cleanup
1290	 * and return here to avoid special-casing the invalid nd
1291	 * structure through the rest of the case.  dirp may be
1292	 * set even if an error occurs, but the nd structure will not
1293	 * be valid at all if an error occurs so we have to invalidate it
1294	 * prior to calling nfsm_reply ( which might goto nfsmout ).
1295	 */
1296	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
1297		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
1298	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1299	if (dirp && !v3) {
1300		vrele(dirp);
1301		dirp = NULL;
1302	}
1303	if (error) {
1304		nfsm_reply(NFSX_WCCDATA(v3));
1305		if (v3)
1306			nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1307		error = 0;
1308		goto nfsmout;
1309	}
1310
1311	/*
1312	 * No error.  Continue.  State:
1313	 *
1314	 *	startdir	is valid ( we release this immediately )
1315	 *	dirp 		may be valid
1316	 *	nd.ni_vp	may be valid
1317	 *	nd.ni_dvp	is valid
1318	 *
1319	 * The error state is set through the code and we may also do some
1320	 * opportunistic releasing of vnodes to avoid holding locks through
1321	 * NFS I/O.  The cleanup at the end is a catch-all
1322	 */
1323
1324	VATTR_NULL(vap);
1325	if (v3) {
1326		tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
1327		how = fxdr_unsigned(int, *tl);
1328		switch (how) {
1329		case NFSV3CREATE_GUARDED:
1330			if (nd.ni_vp) {
1331				error = EEXIST;
1332				break;
1333			}
1334			/* fall through */
1335		case NFSV3CREATE_UNCHECKED:
1336			nfsm_srvsattr(vap);
1337			break;
1338		case NFSV3CREATE_EXCLUSIVE:
1339			tl = nfsm_dissect_nonblock(u_int32_t *,
1340			    NFSX_V3CREATEVERF);
1341			/* Unique bytes, endianness is not important. */
1342			cverf.tv_sec  = (int32_t)tl[0];
1343			cverf.tv_nsec = tl[1];
1344			exclusive_flag = 1;
1345			break;
1346		};
1347		vap->va_type = VREG;
1348	} else {
1349		sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
1350		vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1351		if (vap->va_type == VNON)
1352			vap->va_type = VREG;
1353		vap->va_mode = nfstov_mode(sp->sa_mode);
1354		switch (vap->va_type) {
1355		case VREG:
1356			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1357			if (tsize != -1)
1358				vap->va_size = (u_quad_t)tsize;
1359			break;
1360		case VCHR:
1361		case VBLK:
1362		case VFIFO:
1363			rdev = fxdr_unsigned(long, sp->sa_size);
1364			break;
1365		default:
1366			break;
1367		};
1368	}
1369
1370	/*
1371	 * Iff doesn't exist, create it
1372	 * otherwise just truncate to 0 length
1373	 *   should I set the mode too ?
1374	 *
1375	 * The only possible error we can have at this point is EEXIST.
1376	 * nd.ni_vp will also be non-NULL in that case.
1377	 */
1378	if (nd.ni_vp == NULL) {
1379		if (vap->va_mode == (mode_t)VNOVAL)
1380			vap->va_mode = 0;
1381		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1382			error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1383			if (error)
1384				NDFREE(&nd, NDF_ONLY_PNBUF);
1385			else {
1386				if (exclusive_flag) {
1387					exclusive_flag = 0;
1388					VATTR_NULL(vap);
1389					vap->va_atime = cverf;
1390					error = VOP_SETATTR(nd.ni_vp, vap,
1391					    cred);
1392				}
1393			}
1394		} else if (vap->va_type == VCHR || vap->va_type == VBLK ||
1395		    vap->va_type == VFIFO) {
1396			/*
1397			 * NFSv2-specific code for creating device nodes
1398			 * and fifos.
1399			 *
1400			 * Handle SysV FIFO node special cases.  All other
1401			 * devices require super user to access.
1402			 */
1403			if (vap->va_type == VCHR && rdev == 0xffffffff)
1404				vap->va_type = VFIFO;
1405                        if (vap->va_type != VFIFO &&
1406			    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV,
1407			    0))) {
1408				goto ereply;
1409                        }
1410			vap->va_rdev = rdev;
1411			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1412			if (error) {
1413				NDFREE(&nd, NDF_ONLY_PNBUF);
1414				goto ereply;
1415			}
1416			vput(nd.ni_vp);
1417			nd.ni_vp = NULL;
1418
1419			/*
1420			 * release dvp prior to lookup
1421			 */
1422			vput(nd.ni_dvp);
1423			nd.ni_dvp = NULL;
1424			/*
1425			 * Setup for lookup.
1426			 *
1427			 * Even though LOCKPARENT was cleared, ni_dvp may
1428			 * be garbage.
1429			 */
1430			nd.ni_cnd.cn_nameiop = LOOKUP;
1431			nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1432			nd.ni_cnd.cn_thread = curthread;
1433			nd.ni_cnd.cn_cred = cred;
1434			tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
1435			if (tvfslocked)
1436				nd.ni_cnd.cn_flags |= GIANTHELD;
1437			error = lookup(&nd);
1438			nd.ni_dvp = NULL;
1439			vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1440			nd.ni_cnd.cn_flags &= ~GIANTHELD;
1441			if (error)
1442				goto ereply;
1443
1444			if (nd.ni_cnd.cn_flags & ISSYMLINK) {
1445				error = EINVAL;
1446				goto ereply;
1447			}
1448		} else {
1449			error = ENXIO;
1450		}
1451	} else {
1452		if (vap->va_size != -1) {
1453			error = nfsrv_access(nd.ni_vp, VWRITE,
1454			    cred, (nd.ni_cnd.cn_flags & RDONLY), 0);
1455			if (!error) {
1456				tempsize = vap->va_size;
1457				VATTR_NULL(vap);
1458				vap->va_size = tempsize;
1459				error = VOP_SETATTR(nd.ni_vp, vap, cred);
1460			}
1461		}
1462	}
1463
1464	if (!error) {
1465		bzero((caddr_t)fhp, sizeof(nfh));
1466		fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
1467		error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
1468		if (!error)
1469			error = VOP_GETATTR(nd.ni_vp, vap, cred);
1470	}
1471	if (v3) {
1472		if (exclusive_flag && !error &&
1473		    bcmp(&cverf, &vap->va_atime, sizeof (cverf)))
1474			error = EEXIST;
1475		if (dirp == nd.ni_dvp)
1476			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1477		else {
1478			/* Drop the other locks to avoid deadlock. */
1479			if (nd.ni_dvp) {
1480				if (nd.ni_dvp == nd.ni_vp)
1481					vrele(nd.ni_dvp);
1482				else
1483					vput(nd.ni_dvp);
1484			}
1485			if (nd.ni_vp)
1486				vput(nd.ni_vp);
1487			nd.ni_dvp = NULL;
1488			nd.ni_vp = NULL;
1489
1490			vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
1491			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1492			VOP_UNLOCK(dirp, 0);
1493		}
1494	}
1495ereply:
1496	nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1497	if (v3) {
1498		if (!error) {
1499			nfsm_srvpostop_fh(fhp);
1500			nfsm_srvpostop_attr(0, vap);
1501		}
1502		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1503	} else if (!error) {
1504		/* v2 non-error case. */
1505		nfsm_srvfhtom(fhp, v3);
1506		fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
1507		nfsm_srvfillattr(vap, fp);
1508	}
1509	error = 0;
1510
1511nfsmout:
1512	if (nd.ni_dvp) {
1513		if (nd.ni_dvp == nd.ni_vp)
1514			vrele(nd.ni_dvp);
1515		else
1516			vput(nd.ni_dvp);
1517	}
1518	if (nd.ni_vp)
1519		vput(nd.ni_vp);
1520	if (nd.ni_startdir) {
1521		vrele(nd.ni_startdir);
1522		nd.ni_startdir = NULL;
1523	}
1524	if (dirp)
1525		vrele(dirp);
1526	NDFREE(&nd, NDF_ONLY_PNBUF);
1527	vn_finished_write(mp);
1528	VFS_UNLOCK_GIANT(vfslocked);
1529	return (error);
1530}
1531
1532/*
1533 * nfs v3 mknod service
1534 */
1535int
1536nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1537    struct mbuf **mrq)
1538{
1539	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1540	struct sockaddr *nam = nfsd->nd_nam;
1541	caddr_t dpos = nfsd->nd_dpos;
1542	struct ucred *cred = nfsd->nd_cr;
1543	struct vattr va, dirfor, diraft;
1544	struct vattr *vap = &va;
1545	struct thread *td = curthread;
1546	u_int32_t *tl;
1547	struct nameidata nd;
1548	caddr_t bpos;
1549	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1550	u_int32_t major, minor;
1551	enum vtype vtyp;
1552	struct mbuf *mb, *mreq;
1553	struct vnode *vp, *dirp = NULL;
1554	nfsfh_t nfh;
1555	fhandle_t *fhp;
1556	struct mount *mp = NULL;
1557	int v3 = (nfsd->nd_flag & ND_NFSV3);
1558	int tvfslocked;
1559	int vfslocked;
1560
1561	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1562	vfslocked = 0;
1563	if (!v3)
1564		panic("nfsrv_mknod: v3 proc called on a v2 connection");
1565	ndclear(&nd);
1566
1567	fhp = &nfh.fh_generic;
1568	nfsm_srvmtofh(fhp);
1569	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1570		error = ESTALE;
1571		goto ereply;
1572	}
1573	vfslocked = VFS_LOCK_GIANT(mp);
1574	(void) vn_start_write(NULL, &mp, V_WAIT);
1575	vfs_rel(mp);		/* The write holds a ref. */
1576	nfsm_srvnamesiz(len);
1577
1578	nd.ni_cnd.cn_cred = cred;
1579	nd.ni_cnd.cn_nameiop = CREATE;
1580	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
1581
1582	/*
1583	 * Handle nfs_namei() call.  If an error occurs, the nd structure
1584	 * is not valid.  However, nfsm_*() routines may still jump to
1585	 * nfsmout.
1586	 */
1587
1588	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
1589		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
1590	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1591	if (error) {
1592		nfsm_reply(NFSX_WCCDATA(1));
1593		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1594		error = 0;
1595		goto nfsmout;
1596	}
1597	tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
1598	vtyp = nfsv3tov_type(*tl);
1599	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1600		error = NFSERR_BADTYPE;
1601		goto out;
1602	}
1603	VATTR_NULL(vap);
1604	nfsm_srvsattr(vap);
1605	if (vtyp == VCHR || vtyp == VBLK) {
1606		tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
1607		major = fxdr_unsigned(u_int32_t, *tl++);
1608		minor = fxdr_unsigned(u_int32_t, *tl);
1609		vap->va_rdev = makedev(major, minor);
1610	}
1611
1612	/*
1613	 * Iff doesn't exist, create it.
1614	 */
1615	if (nd.ni_vp) {
1616		error = EEXIST;
1617		goto out;
1618	}
1619	vap->va_type = vtyp;
1620	if (vap->va_mode == (mode_t)VNOVAL)
1621		vap->va_mode = 0;
1622	if (vtyp == VSOCK) {
1623		vrele(nd.ni_startdir);
1624		nd.ni_startdir = NULL;
1625		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1626		if (error)
1627			NDFREE(&nd, NDF_ONLY_PNBUF);
1628	} else {
1629		if (vtyp != VFIFO && (error = priv_check_cred(cred,
1630		    PRIV_VFS_MKNOD_DEV, 0)))
1631			goto out;
1632		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
1633		if (error) {
1634			NDFREE(&nd, NDF_ONLY_PNBUF);
1635			goto out;
1636		}
1637		vput(nd.ni_vp);
1638		nd.ni_vp = NULL;
1639
1640		/*
1641		 * Release dvp prior to lookup
1642		 */
1643		vput(nd.ni_dvp);
1644		nd.ni_dvp = NULL;
1645
1646		nd.ni_cnd.cn_nameiop = LOOKUP;
1647		nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
1648		nd.ni_cnd.cn_thread = td;
1649		nd.ni_cnd.cn_cred = td->td_ucred;
1650		tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
1651		if (tvfslocked)
1652			nd.ni_cnd.cn_flags |= GIANTHELD;
1653		error = lookup(&nd);
1654		nd.ni_dvp = NULL;
1655		vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1656		nd.ni_cnd.cn_flags &= ~GIANTHELD;
1657
1658		if (error)
1659			goto out;
1660		if (nd.ni_cnd.cn_flags & ISSYMLINK)
1661			error = EINVAL;
1662	}
1663
1664	/*
1665	 * send response, cleanup, return.
1666	 */
1667out:
1668	vp = nd.ni_vp;
1669	if (!error) {
1670		bzero((caddr_t)fhp, sizeof(nfh));
1671		fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1672		error = VOP_VPTOFH(vp, &fhp->fh_fid);
1673		if (!error)
1674			error = VOP_GETATTR(vp, vap, cred);
1675	}
1676	if (nd.ni_dvp) {
1677		if (nd.ni_dvp == nd.ni_vp)
1678			vrele(nd.ni_dvp);
1679		else
1680			vput(nd.ni_dvp);
1681		nd.ni_dvp = NULL;
1682	}
1683	if (vp) {
1684		vput(vp);
1685		vp = NULL;
1686		nd.ni_vp = NULL;
1687	}
1688	if (nd.ni_startdir) {
1689		vrele(nd.ni_startdir);
1690		nd.ni_startdir = NULL;
1691	}
1692	NDFREE(&nd, NDF_ONLY_PNBUF);
1693	if (dirp) {
1694		vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
1695		diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1696		vput(dirp);
1697	}
1698ereply:
1699	nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
1700	if (v3) {
1701		if (!error) {
1702			nfsm_srvpostop_fh(fhp);
1703			nfsm_srvpostop_attr(0, vap);
1704		}
1705		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1706	}
1707	vn_finished_write(mp);
1708	VFS_UNLOCK_GIANT(vfslocked);
1709	return (0);
1710nfsmout:
1711	if (nd.ni_dvp) {
1712		if (nd.ni_dvp == nd.ni_vp)
1713			vrele(nd.ni_dvp);
1714		else
1715			vput(nd.ni_dvp);
1716	}
1717	if (nd.ni_vp)
1718		vput(nd.ni_vp);
1719	if (dirp)
1720		vrele(dirp);
1721	if (nd.ni_startdir)
1722		vrele(nd.ni_startdir);
1723	NDFREE(&nd, NDF_ONLY_PNBUF);
1724	vn_finished_write(mp);
1725	VFS_UNLOCK_GIANT(vfslocked);
1726	return (error);
1727}
1728
1729/*
1730 * nfs remove service
1731 */
1732int
1733nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1734    struct mbuf **mrq)
1735{
1736	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1737	struct sockaddr *nam = nfsd->nd_nam;
1738	caddr_t dpos = nfsd->nd_dpos;
1739	struct ucred *cred = nfsd->nd_cr;
1740	struct nameidata nd;
1741	caddr_t bpos;
1742	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1743	int v3 = (nfsd->nd_flag & ND_NFSV3);
1744	struct mbuf *mb, *mreq;
1745	struct vnode *dirp;
1746	struct vattr dirfor, diraft;
1747	nfsfh_t nfh;
1748	fhandle_t *fhp;
1749	struct mount *mp = NULL;
1750	int vfslocked;
1751
1752	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1753	ndclear(&nd);
1754	vfslocked = 0;
1755
1756	fhp = &nfh.fh_generic;
1757	nfsm_srvmtofh(fhp);
1758	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
1759		error = ESTALE;
1760		goto ereply;
1761	}
1762	vfslocked = VFS_LOCK_GIANT(mp);
1763	(void) vn_start_write(NULL, &mp, V_WAIT);
1764	vfs_rel(mp);		/* The write holds a ref. */
1765	nfsm_srvnamesiz(len);
1766
1767	nd.ni_cnd.cn_cred = cred;
1768	nd.ni_cnd.cn_nameiop = DELETE;
1769	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
1770	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
1771		&dirp, v3,  &dirfor, &dirfor_ret, FALSE);
1772	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
1773	if (dirp && !v3) {
1774		vrele(dirp);
1775		dirp = NULL;
1776	}
1777	if (error == 0) {
1778		if (nd.ni_vp->v_type == VDIR) {
1779			error = EPERM;		/* POSIX */
1780			goto out;
1781		}
1782		/*
1783		 * The root of a mounted filesystem cannot be deleted.
1784		 */
1785		if (nd.ni_vp->v_vflag & VV_ROOT) {
1786			error = EBUSY;
1787			goto out;
1788		}
1789out:
1790		if (!error) {
1791			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1792			NDFREE(&nd, NDF_ONLY_PNBUF);
1793		}
1794	}
1795	if (dirp && v3) {
1796		if (dirp == nd.ni_dvp)
1797			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1798		else {
1799			/* Drop the other locks to avoid deadlock. */
1800			if (nd.ni_dvp) {
1801				if (nd.ni_dvp == nd.ni_vp)
1802					vrele(nd.ni_dvp);
1803				else
1804					vput(nd.ni_dvp);
1805			}
1806			if (nd.ni_vp)
1807				vput(nd.ni_vp);
1808			nd.ni_dvp = NULL;
1809			nd.ni_vp = NULL;
1810
1811			vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
1812			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1813			VOP_UNLOCK(dirp, 0);
1814		}
1815		vrele(dirp);
1816		dirp = NULL;
1817	}
1818ereply:
1819	nfsm_reply(NFSX_WCCDATA(v3));
1820	if (v3)
1821		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1822	error = 0;
1823nfsmout:
1824	NDFREE(&nd, NDF_ONLY_PNBUF);
1825	if (nd.ni_dvp) {
1826		if (nd.ni_dvp == nd.ni_vp)
1827			vrele(nd.ni_dvp);
1828		else
1829			vput(nd.ni_dvp);
1830	}
1831	if (nd.ni_vp)
1832		vput(nd.ni_vp);
1833	vn_finished_write(mp);
1834	VFS_UNLOCK_GIANT(vfslocked);
1835	return(error);
1836}
1837
1838/*
1839 * nfs rename service
1840 */
1841int
1842nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1843    struct mbuf **mrq)
1844{
1845	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1846	struct sockaddr *nam = nfsd->nd_nam;
1847	caddr_t dpos = nfsd->nd_dpos;
1848	struct ucred *cred = nfsd->nd_cr;
1849	caddr_t bpos;
1850	int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
1851	int tdirfor_ret = 1, tdiraft_ret = 1;
1852	int v3 = (nfsd->nd_flag & ND_NFSV3);
1853	struct mbuf *mb, *mreq;
1854	struct nameidata fromnd, tond;
1855	struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
1856	struct vnode *tdirp = NULL;
1857	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
1858	nfsfh_t fnfh, tnfh;
1859	fhandle_t *ffhp, *tfhp;
1860	uid_t saved_uid;
1861	struct mount *mp = NULL;
1862	int vfslocked;
1863
1864	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1865	vfslocked = 0;
1866#ifndef nolint
1867	fvp = NULL;
1868#endif
1869	ffhp = &fnfh.fh_generic;
1870	tfhp = &tnfh.fh_generic;
1871
1872	/*
1873	 * Clear fields incase goto nfsmout occurs from macro.
1874	 */
1875
1876	ndclear(&fromnd);
1877	ndclear(&tond);
1878
1879	nfsm_srvmtofh(ffhp);
1880	if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
1881		error = ESTALE;
1882		goto out1;
1883	}
1884	vfslocked = VFS_LOCK_GIANT(mp);
1885	(void) vn_start_write(NULL, &mp, V_WAIT);
1886	vfs_rel(mp);		/* The write holds a ref. */
1887	nfsm_srvnamesiz(len);
1888	/*
1889	 * Remember our original uid so that we can reset cr_uid before
1890	 * the second nfs_namei() call, in case it is remapped.
1891	 */
1892	saved_uid = cred->cr_uid;
1893	fromnd.ni_cnd.cn_cred = cred;
1894	fromnd.ni_cnd.cn_nameiop = DELETE;
1895	fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART | MPSAFE;
1896	error = nfs_namei(&fromnd, nfsd, ffhp, len, slp, nam, &md,
1897		&dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, FALSE);
1898	vfslocked = nfsrv_lockedpair_nd(vfslocked, &fromnd);
1899	if (fdirp && !v3) {
1900		vrele(fdirp);
1901		fdirp = NULL;
1902	}
1903	if (error) {
1904		nfsm_reply(2 * NFSX_WCCDATA(v3));
1905		if (v3) {
1906			nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
1907			nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
1908		}
1909		error = 0;
1910		goto nfsmout;
1911	}
1912	fvp = fromnd.ni_vp;
1913	nfsm_srvmtofh(tfhp);
1914	nfsm_srvnamesiz(len2);
1915	cred->cr_uid = saved_uid;
1916	tond.ni_cnd.cn_cred = cred;
1917	tond.ni_cnd.cn_nameiop = RENAME;
1918	tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE;
1919	error = nfs_namei(&tond, nfsd, tfhp, len2, slp, nam, &md,
1920		&dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, FALSE);
1921	vfslocked = nfsrv_lockedpair_nd(vfslocked, &tond);
1922	if (tdirp && !v3) {
1923		vrele(tdirp);
1924		tdirp = NULL;
1925	}
1926	if (error)
1927		goto out1;
1928
1929	tdvp = tond.ni_dvp;
1930	tvp = tond.ni_vp;
1931	if (tvp != NULL) {
1932		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1933			if (v3)
1934				error = EEXIST;
1935			else
1936				error = EISDIR;
1937			goto out;
1938		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1939			if (v3)
1940				error = EEXIST;
1941			else
1942				error = ENOTDIR;
1943			goto out;
1944		}
1945		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1946			if (v3)
1947				error = EXDEV;
1948			else
1949				error = ENOTEMPTY;
1950			goto out;
1951		}
1952	}
1953	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1954		if (v3)
1955			error = EXDEV;
1956		else
1957			error = ENOTEMPTY;
1958		goto out;
1959	}
1960	if (fvp->v_mount != tdvp->v_mount) {
1961		if (v3)
1962			error = EXDEV;
1963		else
1964			error = ENOTEMPTY;
1965		goto out;
1966	}
1967	if (fvp == tdvp) {
1968		if (v3)
1969			error = EINVAL;
1970		else
1971			error = ENOTEMPTY;
1972	}
1973	/*
1974	 * If source is the same as the destination (that is the
1975	 * same vnode with the same name in the same directory),
1976	 * then there is nothing to do.
1977	 */
1978	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
1979	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
1980	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
1981	      fromnd.ni_cnd.cn_namelen))
1982		error = -1;
1983out:
1984	if (!error) {
1985		/*
1986		 * The VOP_RENAME function releases all vnode references &
1987		 * locks prior to returning so we need to clear the pointers
1988		 * to bypass cleanup code later on.
1989		 */
1990		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
1991				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
1992		fromnd.ni_dvp = NULL;
1993		fromnd.ni_vp = NULL;
1994		tond.ni_dvp = NULL;
1995		tond.ni_vp = NULL;
1996		if (error) {
1997			NDFREE(&fromnd, NDF_ONLY_PNBUF);
1998			NDFREE(&tond, NDF_ONLY_PNBUF);
1999		}
2000	} else {
2001		if (error == -1)
2002			error = 0;
2003	}
2004	/* fall through */
2005out1:
2006	nfsm_reply(2 * NFSX_WCCDATA(v3));
2007	if (v3) {
2008		/* Release existing locks to prevent deadlock. */
2009		if (tond.ni_dvp) {
2010			if (tond.ni_dvp == tond.ni_vp)
2011				vrele(tond.ni_dvp);
2012			else
2013				vput(tond.ni_dvp);
2014		}
2015		if (tond.ni_vp)
2016			vput(tond.ni_vp);
2017		tond.ni_dvp = NULL;
2018		tond.ni_vp = NULL;
2019
2020		if (fdirp) {
2021			vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY);
2022			fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred);
2023			VOP_UNLOCK(fdirp, 0);
2024		}
2025		if (tdirp) {
2026			vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY);
2027			tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred);
2028			VOP_UNLOCK(tdirp, 0);
2029		}
2030		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2031		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2032	}
2033	error = 0;
2034	/* fall through */
2035
2036nfsmout:
2037	/*
2038	 * Clear out tond related fields
2039	 */
2040	if (tond.ni_dvp) {
2041		if (tond.ni_dvp == tond.ni_vp)
2042			vrele(tond.ni_dvp);
2043		else
2044			vput(tond.ni_dvp);
2045	}
2046	if (tond.ni_vp)
2047		vput(tond.ni_vp);
2048	if (tdirp)
2049		vrele(tdirp);
2050	if (tond.ni_startdir)
2051		vrele(tond.ni_startdir);
2052	NDFREE(&tond, NDF_ONLY_PNBUF);
2053	/*
2054	 * Clear out fromnd related fields
2055	 */
2056	if (fdirp)
2057		vrele(fdirp);
2058	if (fromnd.ni_startdir)
2059		vrele(fromnd.ni_startdir);
2060	NDFREE(&fromnd, NDF_ONLY_PNBUF);
2061	if (fromnd.ni_dvp)
2062		vrele(fromnd.ni_dvp);
2063	if (fromnd.ni_vp)
2064		vrele(fromnd.ni_vp);
2065
2066	vn_finished_write(mp);
2067	VFS_UNLOCK_GIANT(vfslocked);
2068	return (error);
2069}
2070
2071/*
2072 * nfs link service
2073 */
2074int
2075nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2076    struct mbuf **mrq)
2077{
2078	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2079	struct sockaddr *nam = nfsd->nd_nam;
2080	caddr_t dpos = nfsd->nd_dpos;
2081	struct ucred *cred = nfsd->nd_cr;
2082	struct nameidata nd;
2083	caddr_t bpos;
2084	int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2085	int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2086	struct mbuf *mb, *mreq;
2087	struct vnode *vp = NULL, *xp, *dirp = NULL;
2088	struct vattr dirfor, diraft, at;
2089	nfsfh_t nfh, dnfh;
2090	fhandle_t *fhp, *dfhp;
2091	struct mount *mp = NULL;
2092	int tvfslocked;
2093	int vfslocked;
2094
2095	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2096	ndclear(&nd);
2097	vfslocked = 0;
2098
2099	fhp = &nfh.fh_generic;
2100	dfhp = &dnfh.fh_generic;
2101	nfsm_srvmtofh(fhp);
2102	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2103		error = ESTALE;
2104		goto ereply;
2105	}
2106	vfslocked = VFS_LOCK_GIANT(mp);
2107	(void) vn_start_write(NULL, &mp, V_WAIT);
2108	vfs_rel(mp);		/* The write holds a ref. */
2109	nfsm_srvmtofh(dfhp);
2110	nfsm_srvnamesiz(len);
2111
2112	error = nfsrv_fhtovp(fhp, 0, &vp, &tvfslocked, nfsd, slp, nam, &rdonly);
2113	vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
2114	if (error) {
2115		nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2116		if (v3) {
2117			nfsm_srvpostop_attr(getret, &at);
2118			nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2119		}
2120		vp = NULL;
2121		error = 0;
2122		goto nfsmout;
2123	}
2124	if (v3)
2125		getret = VOP_GETATTR(vp, &at, cred);
2126	if (vp->v_type == VDIR) {
2127		error = EPERM;		/* POSIX */
2128		goto out1;
2129	}
2130	VOP_UNLOCK(vp, 0);
2131	nd.ni_cnd.cn_cred = cred;
2132	nd.ni_cnd.cn_nameiop = CREATE;
2133	nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE | MPSAFE;
2134	error = nfs_namei(&nd, nfsd, dfhp, len, slp, nam, &md, &dpos,
2135		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
2136	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2137	if (dirp && !v3) {
2138		vrele(dirp);
2139		dirp = NULL;
2140	}
2141	if (error) {
2142		vrele(vp);
2143		vp = NULL;
2144		goto out2;
2145	}
2146	xp = nd.ni_vp;
2147	if (xp != NULL) {
2148		error = EEXIST;
2149		vrele(vp);
2150		vp = NULL;
2151		goto out2;
2152	}
2153	xp = nd.ni_dvp;
2154	if (vp->v_mount != xp->v_mount) {
2155		error = EXDEV;
2156		vrele(vp);
2157		vp = NULL;
2158		goto out2;
2159	}
2160	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2161	error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2162	NDFREE(&nd, NDF_ONLY_PNBUF);
2163	/* fall through */
2164
2165out1:
2166	if (v3)
2167		getret = VOP_GETATTR(vp, &at, cred);
2168out2:
2169	if (dirp) {
2170		if (dirp == nd.ni_dvp)
2171			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2172		else {
2173			/* Release existing locks to prevent deadlock. */
2174			if (nd.ni_dvp) {
2175				if (nd.ni_dvp == nd.ni_vp)
2176					vrele(nd.ni_dvp);
2177				else
2178					vput(nd.ni_dvp);
2179			}
2180			if (nd.ni_vp)
2181				vrele(nd.ni_vp);
2182			nd.ni_dvp = NULL;
2183			nd.ni_vp = NULL;
2184
2185			vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
2186			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2187			VOP_UNLOCK(dirp, 0);
2188		}
2189	}
2190ereply:
2191	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2192	if (v3) {
2193		nfsm_srvpostop_attr(getret, &at);
2194		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2195	}
2196	error = 0;
2197	/* fall through */
2198
2199nfsmout:
2200	NDFREE(&nd, NDF_ONLY_PNBUF);
2201	if (vp)
2202		vput(vp);
2203	if (nd.ni_dvp) {
2204		if (nd.ni_dvp == nd.ni_vp)
2205			vrele(nd.ni_dvp);
2206		else
2207			vput(nd.ni_dvp);
2208	}
2209	if (dirp)
2210		vrele(dirp);
2211	if (nd.ni_vp)
2212		vrele(nd.ni_vp);
2213	vn_finished_write(mp);
2214	VFS_UNLOCK_GIANT(vfslocked);
2215	return(error);
2216}
2217
2218/*
2219 * nfs symbolic link service
2220 */
2221int
2222nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2223    struct mbuf **mrq)
2224{
2225	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2226	struct sockaddr *nam = nfsd->nd_nam;
2227	caddr_t dpos = nfsd->nd_dpos;
2228	struct ucred *cred = nfsd->nd_cr;
2229	struct vattr va, dirfor, diraft;
2230	struct nameidata nd;
2231	struct vattr *vap = &va;
2232	struct nfsv2_sattr *sp;
2233	char *bpos, *pathcp = NULL;
2234	struct uio io;
2235	struct iovec iv;
2236	int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2237	int v3 = (nfsd->nd_flag & ND_NFSV3);
2238	struct mbuf *mb, *mreq;
2239	struct vnode *dirp = NULL;
2240	nfsfh_t nfh;
2241	fhandle_t *fhp;
2242	struct mount *mp = NULL;
2243	int tvfslocked;
2244	int vfslocked;
2245
2246	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2247	ndclear(&nd);
2248	vfslocked = 0;
2249
2250	fhp = &nfh.fh_generic;
2251	nfsm_srvmtofh(fhp);
2252	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2253		error = ESTALE;
2254		goto out;
2255	}
2256	vfslocked = VFS_LOCK_GIANT(mp);
2257	(void) vn_start_write(NULL, &mp, V_WAIT);
2258	vfs_rel(mp);		/* The write holds a ref. */
2259	nfsm_srvnamesiz(len);
2260	nd.ni_cnd.cn_cred = cred;
2261	nd.ni_cnd.cn_nameiop = CREATE;
2262	nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART | MPSAFE;
2263	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
2264		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
2265	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2266	if (error == 0) {
2267		VATTR_NULL(vap);
2268		if (v3)
2269			nfsm_srvsattr(vap);
2270		nfsm_srvpathsiz(len2);
2271	}
2272	if (dirp && !v3) {
2273		vrele(dirp);
2274		dirp = NULL;
2275	}
2276	if (error)
2277		goto out;
2278	pathcp = malloc(len2 + 1, M_TEMP, M_WAITOK);
2279	iv.iov_base = pathcp;
2280	iv.iov_len = len2;
2281	io.uio_resid = len2;
2282	io.uio_offset = 0;
2283	io.uio_iov = &iv;
2284	io.uio_iovcnt = 1;
2285	io.uio_segflg = UIO_SYSSPACE;
2286	io.uio_rw = UIO_READ;
2287	io.uio_td = NULL;
2288	nfsm_mtouio(&io, len2);
2289	if (!v3) {
2290		sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
2291		vap->va_mode = nfstov_mode(sp->sa_mode);
2292	}
2293	*(pathcp + len2) = '\0';
2294	if (nd.ni_vp) {
2295		error = EEXIST;
2296		goto out;
2297	}
2298
2299	/*
2300	 * issue symlink op.  SAVESTART is set so the underlying path component
2301	 * is only freed by the VOP if an error occurs.
2302	 */
2303	if (vap->va_mode == (mode_t)VNOVAL)
2304		vap->va_mode = 0;
2305	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
2306	if (error)
2307		NDFREE(&nd, NDF_ONLY_PNBUF);
2308	else
2309		vput(nd.ni_vp);
2310	nd.ni_vp = NULL;
2311	/*
2312	 * releases directory prior to potential lookup op.
2313	 */
2314	vput(nd.ni_dvp);
2315	nd.ni_dvp = NULL;
2316
2317	if (error == 0) {
2318	    if (v3) {
2319		/*
2320		 * Issue lookup.  Leave SAVESTART set so we can easily free
2321		 * the name buffer later on.
2322		 *
2323		 * since LOCKPARENT is not set, ni_dvp will be garbage on
2324		 * return whether an error occurs or not.
2325		 */
2326		nd.ni_cnd.cn_nameiop = LOOKUP;
2327		nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
2328		nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
2329		nd.ni_cnd.cn_thread = curthread;
2330		nd.ni_cnd.cn_cred = cred;
2331		tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
2332		if (tvfslocked)
2333			nd.ni_cnd.cn_flags |= GIANTHELD;
2334		error = lookup(&nd);
2335		nd.ni_dvp = NULL;
2336		vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2337		nd.ni_cnd.cn_flags &= ~GIANTHELD;
2338
2339		if (error == 0) {
2340			bzero((caddr_t)fhp, sizeof(nfh));
2341			fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2342			error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2343			if (!error)
2344				error = VOP_GETATTR(nd.ni_vp, vap, cred);
2345			vput(nd.ni_vp);
2346			nd.ni_vp = NULL;
2347		}
2348	    }
2349	}
2350out:
2351	/*
2352	 * These releases aren't strictly required, does even doing them
2353	 * make any sense? XXX can nfsm_reply() block?
2354	 */
2355	if (pathcp) {
2356		free(pathcp, M_TEMP);
2357		pathcp = NULL;
2358	}
2359	if (dirp) {
2360		vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
2361		diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2362		VOP_UNLOCK(dirp, 0);
2363	}
2364	if (nd.ni_startdir) {
2365		vrele(nd.ni_startdir);
2366		nd.ni_startdir = NULL;
2367	}
2368	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2369	if (v3) {
2370		if (!error) {
2371			nfsm_srvpostop_fh(fhp);
2372			nfsm_srvpostop_attr(0, vap);
2373		}
2374		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2375	}
2376	error = 0;
2377	/* fall through */
2378
2379nfsmout:
2380	NDFREE(&nd, NDF_ONLY_PNBUF);
2381	if (nd.ni_dvp) {
2382		if (nd.ni_dvp == nd.ni_vp)
2383			vrele(nd.ni_dvp);
2384		else
2385			vput(nd.ni_dvp);
2386	}
2387	if (nd.ni_vp)
2388		vrele(nd.ni_vp);
2389	if (nd.ni_startdir)
2390		vrele(nd.ni_startdir);
2391	if (dirp)
2392		vrele(dirp);
2393	if (pathcp)
2394		free(pathcp, M_TEMP);
2395
2396	vn_finished_write(mp);
2397	VFS_UNLOCK_GIANT(vfslocked);
2398	return (error);
2399}
2400
2401/*
2402 * nfs mkdir service
2403 */
2404int
2405nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2406    struct mbuf **mrq)
2407{
2408	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2409	struct sockaddr *nam = nfsd->nd_nam;
2410	caddr_t dpos = nfsd->nd_dpos;
2411	struct ucred *cred = nfsd->nd_cr;
2412	struct vattr va, dirfor, diraft;
2413	struct vattr *vap = &va;
2414	struct nfs_fattr *fp;
2415	struct nameidata nd;
2416	u_int32_t *tl;
2417	caddr_t bpos;
2418	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2419	int v3 = (nfsd->nd_flag & ND_NFSV3);
2420	struct mbuf *mb, *mreq;
2421	struct vnode *dirp = NULL;
2422	int vpexcl = 0;
2423	nfsfh_t nfh;
2424	fhandle_t *fhp;
2425	struct mount *mp = NULL;
2426	int vfslocked;
2427
2428	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2429	ndclear(&nd);
2430	vfslocked = 0;
2431
2432	fhp = &nfh.fh_generic;
2433	nfsm_srvmtofh(fhp);
2434	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2435		error = ESTALE;
2436		goto out;
2437	}
2438	vfslocked = VFS_LOCK_GIANT(mp);
2439	(void) vn_start_write(NULL, &mp, V_WAIT);
2440	vfs_rel(mp);		/* The write holds a ref. */
2441	nfsm_srvnamesiz(len);
2442	nd.ni_cnd.cn_cred = cred;
2443	nd.ni_cnd.cn_nameiop = CREATE;
2444	nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE;
2445
2446	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
2447		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
2448	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2449	if (dirp && !v3) {
2450		vrele(dirp);
2451		dirp = NULL;
2452	}
2453	if (error) {
2454		nfsm_reply(NFSX_WCCDATA(v3));
2455		if (v3)
2456			nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2457		error = 0;
2458		goto nfsmout;
2459	}
2460	VATTR_NULL(vap);
2461	if (v3) {
2462		nfsm_srvsattr(vap);
2463	} else {
2464		tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
2465		vap->va_mode = nfstov_mode(*tl++);
2466	}
2467
2468	/*
2469	 * At this point nd.ni_dvp is referenced and exclusively locked and
2470	 * nd.ni_vp, if it exists, is referenced but not locked.
2471	 */
2472
2473	vap->va_type = VDIR;
2474	if (nd.ni_vp != NULL) {
2475		NDFREE(&nd, NDF_ONLY_PNBUF);
2476		error = EEXIST;
2477		goto out;
2478	}
2479
2480	/*
2481	 * Issue mkdir op.  Since SAVESTART is not set, the pathname
2482	 * component is freed by the VOP call.  This will fill-in
2483	 * nd.ni_vp, reference, and exclusively lock it.
2484	 */
2485	if (vap->va_mode == (mode_t)VNOVAL)
2486		vap->va_mode = 0;
2487	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
2488	NDFREE(&nd, NDF_ONLY_PNBUF);
2489	vpexcl = 1;
2490
2491	vput(nd.ni_dvp);
2492	nd.ni_dvp = NULL;
2493
2494	if (!error) {
2495		bzero((caddr_t)fhp, sizeof(nfh));
2496		fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
2497		error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
2498		if (!error)
2499			error = VOP_GETATTR(nd.ni_vp, vap, cred);
2500	}
2501out:
2502	if (dirp) {
2503		if (dirp == nd.ni_dvp) {
2504			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2505		} else {
2506			/* Release existing locks to prevent deadlock. */
2507			if (nd.ni_dvp) {
2508				NDFREE(&nd, NDF_ONLY_PNBUF);
2509				if (nd.ni_dvp == nd.ni_vp && vpexcl)
2510					vrele(nd.ni_dvp);
2511				else
2512					vput(nd.ni_dvp);
2513			}
2514			if (nd.ni_vp) {
2515				if (vpexcl)
2516					vput(nd.ni_vp);
2517				else
2518					vrele(nd.ni_vp);
2519			}
2520			nd.ni_dvp = NULL;
2521			nd.ni_vp = NULL;
2522			vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
2523			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2524			VOP_UNLOCK(dirp, 0);
2525		}
2526	}
2527	nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2528	if (v3) {
2529		if (!error) {
2530			nfsm_srvpostop_fh(fhp);
2531			nfsm_srvpostop_attr(0, vap);
2532		}
2533		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2534	} else if (!error) {
2535		/* v2 non-error case. */
2536		nfsm_srvfhtom(fhp, v3);
2537		fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
2538		nfsm_srvfillattr(vap, fp);
2539	}
2540	error = 0;
2541	/* fall through */
2542
2543nfsmout:
2544	if (nd.ni_dvp) {
2545		NDFREE(&nd, NDF_ONLY_PNBUF);
2546		if (nd.ni_dvp == nd.ni_vp && vpexcl)
2547			vrele(nd.ni_dvp);
2548		else
2549			vput(nd.ni_dvp);
2550	}
2551	if (nd.ni_vp) {
2552		if (vpexcl)
2553			vput(nd.ni_vp);
2554		else
2555			vrele(nd.ni_vp);
2556	}
2557	if (dirp)
2558		vrele(dirp);
2559	vn_finished_write(mp);
2560	VFS_UNLOCK_GIANT(vfslocked);
2561	return (error);
2562}
2563
2564/*
2565 * nfs rmdir service
2566 */
2567int
2568nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2569    struct mbuf **mrq)
2570{
2571	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2572	struct sockaddr *nam = nfsd->nd_nam;
2573	caddr_t dpos = nfsd->nd_dpos;
2574	struct ucred *cred = nfsd->nd_cr;
2575	caddr_t bpos;
2576	int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2577	int v3 = (nfsd->nd_flag & ND_NFSV3);
2578	struct mbuf *mb, *mreq;
2579	struct vnode *vp, *dirp = NULL;
2580	struct vattr dirfor, diraft;
2581	nfsfh_t nfh;
2582	fhandle_t *fhp;
2583	struct nameidata nd;
2584	struct mount *mp = NULL;
2585	int vfslocked;
2586
2587	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2588	ndclear(&nd);
2589	vfslocked = 0;
2590
2591	fhp = &nfh.fh_generic;
2592	nfsm_srvmtofh(fhp);
2593	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
2594		error = ESTALE;
2595		goto out;
2596	}
2597	vfslocked = VFS_LOCK_GIANT(mp);
2598	(void) vn_start_write(NULL, &mp, V_WAIT);
2599	vfs_rel(mp);		/* The write holds a ref. */
2600	nfsm_srvnamesiz(len);
2601	nd.ni_cnd.cn_cred = cred;
2602	nd.ni_cnd.cn_nameiop = DELETE;
2603	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
2604	error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
2605		&dirp, v3, &dirfor, &dirfor_ret, FALSE);
2606	vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
2607	if (dirp && !v3) {
2608		vrele(dirp);
2609		dirp = NULL;
2610	}
2611	if (error) {
2612		nfsm_reply(NFSX_WCCDATA(v3));
2613		if (v3)
2614			nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2615		error = 0;
2616		goto nfsmout;
2617	}
2618	vp = nd.ni_vp;
2619	if (vp->v_type != VDIR) {
2620		error = ENOTDIR;
2621		goto out;
2622	}
2623	/*
2624	 * No rmdir "." please.
2625	 */
2626	if (nd.ni_dvp == vp) {
2627		error = EINVAL;
2628		goto out;
2629	}
2630	/*
2631	 * The root of a mounted filesystem cannot be deleted.
2632	 */
2633	if (vp->v_vflag & VV_ROOT)
2634		error = EBUSY;
2635out:
2636	/*
2637	 * Issue or abort op.  Since SAVESTART is not set, path name
2638	 * component is freed by the VOP after either.
2639	 */
2640	if (!error)
2641		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2642	NDFREE(&nd, NDF_ONLY_PNBUF);
2643
2644	if (dirp) {
2645		if (dirp == nd.ni_dvp)
2646			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2647		else {
2648			/* Release existing locks to prevent deadlock. */
2649			if (nd.ni_dvp) {
2650				if (nd.ni_dvp == nd.ni_vp)
2651					vrele(nd.ni_dvp);
2652				else
2653					vput(nd.ni_dvp);
2654			}
2655			if (nd.ni_vp)
2656				vput(nd.ni_vp);
2657			nd.ni_dvp = NULL;
2658			nd.ni_vp = NULL;
2659			vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
2660			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2661			VOP_UNLOCK(dirp, 0);
2662		}
2663	}
2664	nfsm_reply(NFSX_WCCDATA(v3));
2665	error = 0;
2666	if (v3)
2667		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2668	/* fall through */
2669
2670nfsmout:
2671	NDFREE(&nd, NDF_ONLY_PNBUF);
2672	if (nd.ni_dvp) {
2673		if (nd.ni_dvp == nd.ni_vp)
2674			vrele(nd.ni_dvp);
2675		else
2676			vput(nd.ni_dvp);
2677	}
2678	if (nd.ni_vp)
2679		vput(nd.ni_vp);
2680	if (dirp)
2681		vrele(dirp);
2682
2683	vn_finished_write(mp);
2684	VFS_UNLOCK_GIANT(vfslocked);
2685	return(error);
2686}
2687
2688/*
2689 * nfs readdir service
2690 * - mallocs what it thinks is enough to read
2691 *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2692 * - calls VOP_READDIR()
2693 * - loops around building the reply
2694 *	if the output generated exceeds count break out of loop
2695 *	The nfsm_clget macro is used here so that the reply will be packed
2696 *	tightly in mbuf clusters.
2697 * - it only knows that it has encountered eof when the VOP_READDIR()
2698 *	reads nothing
2699 * - as such one readdir rpc will return eof false although you are there
2700 *	and then the next will return eof
2701 * - it trims out records with d_fileno == 0
2702 *	this doesn't matter for Unix clients, but they might confuse clients
2703 *	for other os'.
2704 * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2705 *	than requested, but this may not apply to all filesystems. For
2706 *	example, client NFS does not { although it is never remote mounted
2707 *	anyhow }
2708 *     The alternate call nfsrv_readdirplus() does lookups as well.
2709 * PS: The NFS protocol spec. does not clarify what the "count" byte
2710 *	argument is a count of.. just name strings and file id's or the
2711 *	entire reply rpc or ...
2712 *	I tried just file name and id sizes and it confused the Sun client,
2713 *	so I am using the full rpc size now. The "paranoia.." comment refers
2714 *	to including the status longwords that are not a part of the dir.
2715 *	"entry" structures, but are in the rpc.
2716 */
2717struct flrep {
2718	nfsuint64	fl_off;
2719	u_int32_t	fl_postopok;
2720	u_int32_t	fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2721	u_int32_t	fl_fhok;
2722	u_int32_t	fl_fhsize;
2723	u_int32_t	fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2724};
2725
2726int
2727nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2728    struct mbuf **mrq)
2729{
2730	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2731	struct sockaddr *nam = nfsd->nd_nam;
2732	caddr_t dpos = nfsd->nd_dpos;
2733	struct ucred *cred = nfsd->nd_cr;
2734	char *bp, *be;
2735	struct mbuf *mp;
2736	struct dirent *dp;
2737	caddr_t cp;
2738	u_int32_t *tl;
2739	caddr_t bpos;
2740	struct mbuf *mb, *mreq;
2741	char *cpos, *cend, *rbuf;
2742	struct vnode *vp = NULL;
2743	struct vattr at;
2744	nfsfh_t nfh;
2745	fhandle_t *fhp;
2746	struct uio io;
2747	struct iovec iv;
2748	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2749	int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2750	int v3 = (nfsd->nd_flag & ND_NFSV3);
2751	u_quad_t off, toff, verf;
2752	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
2753	int vfslocked, not_zfs;
2754
2755	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2756	vfslocked = 0;
2757	fhp = &nfh.fh_generic;
2758	nfsm_srvmtofh(fhp);
2759	if (v3) {
2760		tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
2761		toff = fxdr_hyper(tl);
2762		tl += 2;
2763		verf = fxdr_hyper(tl);
2764		tl += 2;
2765	} else {
2766		tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
2767		toff = fxdr_unsigned(u_quad_t, *tl++);
2768		verf = 0;	/* shut up gcc */
2769	}
2770	off = toff;
2771	cnt = fxdr_unsigned(int, *tl);
2772	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2773	xfer = NFS_SRVMAXDATA(nfsd);
2774	if (cnt > xfer)
2775		cnt = xfer;
2776	if (siz > xfer)
2777		siz = xfer;
2778	fullsiz = siz;
2779	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
2780	if (!error && vp->v_type != VDIR) {
2781		error = ENOTDIR;
2782		vput(vp);
2783		vp = NULL;
2784	}
2785	if (error) {
2786		nfsm_reply(NFSX_UNSIGNED);
2787		if (v3)
2788			nfsm_srvpostop_attr(getret, &at);
2789		error = 0;
2790		goto nfsmout;
2791	}
2792
2793	/*
2794	 * Obtain lock on vnode for this section of the code
2795	 */
2796	if (v3) {
2797		error = getret = VOP_GETATTR(vp, &at, cred);
2798#if 0
2799		/*
2800		 * XXX This check may be too strict for Solaris 2.5 clients.
2801		 */
2802		if (!error && toff && verf && verf != at.va_filerev)
2803			error = NFSERR_BAD_COOKIE;
2804#endif
2805	}
2806	if (!error)
2807		error = nfsrv_access(vp, VEXEC, cred, rdonly, 0);
2808	if (error) {
2809		vput(vp);
2810		vp = NULL;
2811		nfsm_reply(NFSX_POSTOPATTR(v3));
2812		if (v3)
2813			nfsm_srvpostop_attr(getret, &at);
2814		error = 0;
2815		goto nfsmout;
2816	}
2817	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") != 0;
2818	VOP_UNLOCK(vp, 0);
2819
2820	/*
2821	 * end section.  Allocate rbuf and continue
2822	 */
2823	rbuf = malloc(siz, M_TEMP, M_WAITOK);
2824again:
2825	iv.iov_base = rbuf;
2826	iv.iov_len = fullsiz;
2827	io.uio_iov = &iv;
2828	io.uio_iovcnt = 1;
2829	io.uio_offset = (off_t)off;
2830	io.uio_resid = fullsiz;
2831	io.uio_segflg = UIO_SYSSPACE;
2832	io.uio_rw = UIO_READ;
2833	io.uio_td = NULL;
2834	eofflag = 0;
2835	if (cookies) {
2836		free((caddr_t)cookies, M_TEMP);
2837		cookies = NULL;
2838	}
2839	vn_lock(vp, LK_SHARED | LK_RETRY);
2840	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2841	off = (off_t)io.uio_offset;
2842	if (!cookies && !error)
2843		error = NFSERR_PERM;
2844	if (v3) {
2845		getret = VOP_GETATTR(vp, &at, cred);
2846		if (!error)
2847			error = getret;
2848	}
2849	VOP_UNLOCK(vp, 0);
2850	if (error) {
2851		vrele(vp);
2852		vp = NULL;
2853		free((caddr_t)rbuf, M_TEMP);
2854		if (cookies)
2855			free((caddr_t)cookies, M_TEMP);
2856		nfsm_reply(NFSX_POSTOPATTR(v3));
2857		if (v3)
2858			nfsm_srvpostop_attr(getret, &at);
2859		error = 0;
2860		goto nfsmout;
2861	}
2862	if (io.uio_resid) {
2863		siz -= io.uio_resid;
2864
2865		/*
2866		 * If nothing read, return eof
2867		 * rpc reply
2868		 */
2869		if (siz == 0) {
2870			vrele(vp);
2871			vp = NULL;
2872			nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
2873				2 * NFSX_UNSIGNED);
2874			if (v3) {
2875				nfsm_srvpostop_attr(getret, &at);
2876				tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
2877				txdr_hyper(at.va_filerev, tl);
2878				tl += 2;
2879			} else
2880				tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2881			*tl++ = nfsrv_nfs_false;
2882			*tl = nfsrv_nfs_true;
2883			free((caddr_t)rbuf, M_TEMP);
2884			free((caddr_t)cookies, M_TEMP);
2885			error = 0;
2886			goto nfsmout;
2887		}
2888	}
2889
2890	/*
2891	 * Check for degenerate cases of nothing useful read.
2892	 * If so go try again
2893	 */
2894	cpos = rbuf;
2895	cend = rbuf + siz;
2896	dp = (struct dirent *)cpos;
2897	cookiep = cookies;
2898	/*
2899	 * For some reason FreeBSD's ufs_readdir() chooses to back the
2900	 * directory offset up to a block boundary, so it is necessary to
2901	 * skip over the records that precede the requested offset. This
2902	 * requires the assumption that file offset cookies monotonically
2903	 * increase.
2904	 * Since the offset cookies don't monotonically increase for ZFS,
2905	 * this is not done when ZFS is the file system.
2906	 */
2907	while (cpos < cend && ncookies > 0 &&
2908		(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
2909		 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
2910		cpos += dp->d_reclen;
2911		dp = (struct dirent *)cpos;
2912		cookiep++;
2913		ncookies--;
2914	}
2915	if (cpos >= cend || ncookies == 0) {
2916		toff = off;
2917		siz = fullsiz;
2918		goto again;
2919	}
2920
2921	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
2922	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
2923	if (v3) {
2924		nfsm_srvpostop_attr(getret, &at);
2925		tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
2926		txdr_hyper(at.va_filerev, tl);
2927	}
2928	mp = mb;
2929	bp = bpos;
2930	be = bp + M_TRAILINGSPACE(mp);
2931
2932	/* Loop through the records and build reply */
2933	while (cpos < cend && ncookies > 0) {
2934		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
2935			nlen = dp->d_namlen;
2936			rem = nfsm_rndup(nlen) - nlen;
2937			len += (4 * NFSX_UNSIGNED + nlen + rem);
2938			if (v3)
2939				len += 2 * NFSX_UNSIGNED;
2940			if (len > cnt) {
2941				eofflag = 0;
2942				break;
2943			}
2944			/*
2945			 * Build the directory record xdr from
2946			 * the dirent entry.
2947			 */
2948			nfsm_clget;
2949			*tl = nfsrv_nfs_true;
2950			bp += NFSX_UNSIGNED;
2951			if (v3) {
2952				nfsm_clget;
2953				*tl = 0;
2954				bp += NFSX_UNSIGNED;
2955			}
2956			nfsm_clget;
2957			*tl = txdr_unsigned(dp->d_fileno);
2958			bp += NFSX_UNSIGNED;
2959			nfsm_clget;
2960			*tl = txdr_unsigned(nlen);
2961			bp += NFSX_UNSIGNED;
2962
2963			/* And loop around copying the name */
2964			xfer = nlen;
2965			cp = dp->d_name;
2966			while (xfer > 0) {
2967				nfsm_clget;
2968				if ((bp+xfer) > be)
2969					tsiz = be-bp;
2970				else
2971					tsiz = xfer;
2972				bcopy(cp, bp, tsiz);
2973				bp += tsiz;
2974				xfer -= tsiz;
2975				if (xfer > 0)
2976					cp += tsiz;
2977			}
2978			/* And null pad to an int32_t boundary. */
2979			for (i = 0; i < rem; i++)
2980				*bp++ = '\0';
2981			nfsm_clget;
2982
2983			/* Finish off the record */
2984			if (v3) {
2985				*tl = 0;
2986				bp += NFSX_UNSIGNED;
2987				nfsm_clget;
2988			}
2989			*tl = txdr_unsigned(*cookiep);
2990			bp += NFSX_UNSIGNED;
2991		}
2992		cpos += dp->d_reclen;
2993		dp = (struct dirent *)cpos;
2994		cookiep++;
2995		ncookies--;
2996	}
2997	vrele(vp);
2998	vp = NULL;
2999	nfsm_clget;
3000	*tl = nfsrv_nfs_false;
3001	bp += NFSX_UNSIGNED;
3002	nfsm_clget;
3003	if (eofflag)
3004		*tl = nfsrv_nfs_true;
3005	else
3006		*tl = nfsrv_nfs_false;
3007	bp += NFSX_UNSIGNED;
3008	if (mp != mb) {
3009		if (bp < be)
3010			mp->m_len = bp - mtod(mp, caddr_t);
3011	} else
3012		mp->m_len += bp - bpos;
3013	free((caddr_t)rbuf, M_TEMP);
3014	free((caddr_t)cookies, M_TEMP);
3015
3016nfsmout:
3017	if (vp)
3018		vrele(vp);
3019	VFS_UNLOCK_GIANT(vfslocked);
3020	return(error);
3021}
3022
3023int
3024nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3025    struct mbuf **mrq)
3026{
3027	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3028	struct sockaddr *nam = nfsd->nd_nam;
3029	caddr_t dpos = nfsd->nd_dpos;
3030	struct ucred *cred = nfsd->nd_cr;
3031	char *bp, *be;
3032	struct mbuf *mp;
3033	struct dirent *dp;
3034	caddr_t cp;
3035	u_int32_t *tl;
3036	caddr_t bpos;
3037	struct mbuf *mb, *mreq;
3038	char *cpos, *cend, *rbuf;
3039	struct vnode *vp = NULL, *nvp;
3040	struct flrep fl;
3041	nfsfh_t nfh;
3042	fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3043	struct uio io;
3044	struct iovec iv;
3045	struct vattr va, at, *vap = &va;
3046	struct nfs_fattr *fp;
3047	int len, nlen, rem, xfer, tsiz, i, error = 0, error1, getret = 1;
3048	int vp_locked;
3049	int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3050	u_quad_t off, toff, verf;
3051	u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3052	int v3 = (nfsd->nd_flag & ND_NFSV3);
3053	int usevget = 1, vfslocked;
3054	struct componentname cn;
3055	struct mount *mntp = NULL;
3056	int not_zfs;
3057
3058	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3059	vfslocked = 0;
3060	vp_locked = 0;
3061	if (!v3)
3062		panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
3063	fhp = &nfh.fh_generic;
3064	nfsm_srvmtofh(fhp);
3065	tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
3066	toff = fxdr_hyper(tl);
3067	tl += 2;
3068	verf = fxdr_hyper(tl);
3069	tl += 2;
3070	siz = fxdr_unsigned(int, *tl++);
3071	cnt = fxdr_unsigned(int, *tl);
3072	off = toff;
3073	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3074	xfer = NFS_SRVMAXDATA(nfsd);
3075	if (cnt > xfer)
3076		cnt = xfer;
3077	if (siz > xfer)
3078		siz = xfer;
3079	fullsiz = siz;
3080	error = nfsrv_fhtovp(fhp, NFSRV_FLAG_BUSY, &vp, &vfslocked, nfsd, slp,
3081	    nam, &rdonly);
3082	if (!error) {
3083		vp_locked = 1;
3084		mntp = vp->v_mount;
3085		if (vp->v_type != VDIR) {
3086			error = ENOTDIR;
3087			vput(vp);
3088			vp = NULL;
3089			vp_locked = 0;
3090		}
3091	}
3092	if (error) {
3093		nfsm_reply(NFSX_UNSIGNED);
3094		nfsm_srvpostop_attr(getret, &at);
3095		error = 0;
3096		goto nfsmout;
3097	}
3098	error = getret = VOP_GETATTR(vp, &at, cred);
3099#if 0
3100	/*
3101	 * XXX This check may be too strict for Solaris 2.5 clients.
3102	 */
3103	if (!error && toff && verf && verf != at.va_filerev)
3104		error = NFSERR_BAD_COOKIE;
3105#endif
3106	if (!error)
3107		error = nfsrv_access(vp, VEXEC, cred, rdonly, 0);
3108	if (error) {
3109		vput(vp);
3110		vp_locked = 0;
3111		vp = NULL;
3112		nfsm_reply(NFSX_V3POSTOPATTR);
3113		nfsm_srvpostop_attr(getret, &at);
3114		error = 0;
3115		goto nfsmout;
3116	}
3117	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") != 0;
3118	VOP_UNLOCK(vp, 0);
3119	vp_locked = 0;
3120	rbuf = malloc(siz, M_TEMP, M_WAITOK);
3121again:
3122	iv.iov_base = rbuf;
3123	iv.iov_len = fullsiz;
3124	io.uio_iov = &iv;
3125	io.uio_iovcnt = 1;
3126	io.uio_offset = (off_t)off;
3127	io.uio_resid = fullsiz;
3128	io.uio_segflg = UIO_SYSSPACE;
3129	io.uio_rw = UIO_READ;
3130	io.uio_td = NULL;
3131	eofflag = 0;
3132	vp_locked = 1;
3133	if (cookies) {
3134		free((caddr_t)cookies, M_TEMP);
3135		cookies = NULL;
3136	}
3137	vn_lock(vp, LK_SHARED | LK_RETRY);
3138	error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3139	off = (u_quad_t)io.uio_offset;
3140	getret = VOP_GETATTR(vp, &at, cred);
3141	VOP_UNLOCK(vp, 0);
3142	vp_locked = 0;
3143	if (!cookies && !error)
3144		error = NFSERR_PERM;
3145	if (!error)
3146		error = getret;
3147	if (error) {
3148		vrele(vp);
3149		vp = NULL;
3150		if (cookies)
3151			free((caddr_t)cookies, M_TEMP);
3152		free((caddr_t)rbuf, M_TEMP);
3153		nfsm_reply(NFSX_V3POSTOPATTR);
3154		nfsm_srvpostop_attr(getret, &at);
3155		error = 0;
3156		goto nfsmout;
3157	}
3158	if (io.uio_resid) {
3159		siz -= io.uio_resid;
3160
3161		/*
3162		 * If nothing read, return eof
3163		 * rpc reply
3164		 */
3165		if (siz == 0) {
3166			vrele(vp);
3167			vp = NULL;
3168			nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3169				2 * NFSX_UNSIGNED);
3170			nfsm_srvpostop_attr(getret, &at);
3171			tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
3172			txdr_hyper(at.va_filerev, tl);
3173			tl += 2;
3174			*tl++ = nfsrv_nfs_false;
3175			*tl = nfsrv_nfs_true;
3176			free((caddr_t)cookies, M_TEMP);
3177			free((caddr_t)rbuf, M_TEMP);
3178			error = 0;
3179			goto nfsmout;
3180		}
3181	}
3182
3183	/*
3184	 * Check for degenerate cases of nothing useful read.
3185	 * If so go try again
3186	 */
3187	cpos = rbuf;
3188	cend = rbuf + siz;
3189	dp = (struct dirent *)cpos;
3190	cookiep = cookies;
3191	/*
3192	 * For some reason FreeBSD's ufs_readdir() chooses to back the
3193	 * directory offset up to a block boundary, so it is necessary to
3194	 * skip over the records that precede the requested offset. This
3195	 * requires the assumption that file offset cookies monotonically
3196	 * increase.
3197	 * Since the offset cookies don't monotonically increase for ZFS,
3198	 * this is not done when ZFS is the file system.
3199	 */
3200	while (cpos < cend && ncookies > 0 &&
3201		(dp->d_fileno == 0 || dp->d_type == DT_WHT ||
3202		 (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
3203		cpos += dp->d_reclen;
3204		dp = (struct dirent *)cpos;
3205		cookiep++;
3206		ncookies--;
3207	}
3208	if (cpos >= cend || ncookies == 0) {
3209		toff = off;
3210		siz = fullsiz;
3211		goto again;
3212	}
3213
3214	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3215	    2 * NFSX_UNSIGNED;
3216	nfsm_reply(cnt);
3217	nfsm_srvpostop_attr(getret, &at);
3218	tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
3219	txdr_hyper(at.va_filerev, tl);
3220	mp = mb;
3221	bp = bpos;
3222	be = bp + M_TRAILINGSPACE(mp);
3223
3224	/* Loop through the records and build reply */
3225	while (cpos < cend && ncookies > 0) {
3226		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3227			nlen = dp->d_namlen;
3228			rem = nfsm_rndup(nlen)-nlen;
3229
3230			if (usevget) {
3231				/*
3232				 * For readdir_and_lookup get the vnode using
3233				 * the file number.
3234				 */
3235				error = VFS_VGET(mntp, dp->d_fileno, LK_SHARED,
3236				    &nvp);
3237				if (error != 0 && error != EOPNOTSUPP) {
3238					error = 0;
3239					goto invalid;
3240				} else if (error == EOPNOTSUPP) {
3241					/*
3242					 * VFS_VGET() not supported?
3243					 * Let's switch to VOP_LOOKUP().
3244					 */
3245					error = 0;
3246					usevget = 0;
3247					cn.cn_nameiop = LOOKUP;
3248					cn.cn_flags = ISLASTCN | NOFOLLOW | \
3249					    LOCKSHARED | LOCKLEAF | MPSAFE;
3250					cn.cn_lkflags = LK_SHARED | LK_RETRY;
3251					cn.cn_cred = cred;
3252					cn.cn_thread = curthread;
3253				}
3254			}
3255			if (!usevget) {
3256				cn.cn_nameptr = dp->d_name;
3257				cn.cn_namelen = dp->d_namlen;
3258				if (dp->d_namlen == 2 &&
3259				    dp->d_name[0] == '.' &&
3260				    dp->d_name[1] == '.') {
3261					cn.cn_flags |= ISDOTDOT;
3262				} else {
3263					cn.cn_flags &= ~ISDOTDOT;
3264				}
3265				if (!vp_locked) {
3266					vn_lock(vp, LK_SHARED | LK_RETRY);
3267					vp_locked = 1;
3268				}
3269				if ((vp->v_vflag & VV_ROOT) != 0 &&
3270				    (cn.cn_flags & ISDOTDOT) != 0) {
3271					vref(vp);
3272					nvp = vp;
3273				} else if (VOP_LOOKUP(vp, &nvp, &cn) != 0)
3274					goto invalid;
3275			}
3276
3277			bzero((caddr_t)nfhp, NFSX_V3FH);
3278			nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid;
3279			if ((error1 = VOP_VPTOFH(nvp, &nfhp->fh_fid)) == 0)
3280				error1 = VOP_GETATTR(nvp, vap, cred);
3281			if (!usevget && vp == nvp)
3282				vunref(nvp);
3283			else
3284				vput(nvp);
3285			nvp = NULL;
3286			if (error1 != 0)
3287				goto invalid;
3288
3289			/*
3290			 * If either the dircount or maxcount will be
3291			 * exceeded, get out now. Both of these lengths
3292			 * are calculated conservatively, including all
3293			 * XDR overheads.
3294			 */
3295			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3296				NFSX_V3POSTOPATTR);
3297			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3298			if (len > cnt || dirlen > fullsiz) {
3299				eofflag = 0;
3300				break;
3301			}
3302
3303			/*
3304			 * Build the directory record xdr from
3305			 * the dirent entry.
3306			 */
3307			fp = (struct nfs_fattr *)&fl.fl_fattr;
3308			nfsm_srvfillattr(vap, fp);
3309			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3310			fl.fl_fhok = nfsrv_nfs_true;
3311			fl.fl_postopok = nfsrv_nfs_true;
3312			fl.fl_off.nfsuquad[0] = 0;
3313			fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3314
3315			nfsm_clget;
3316			*tl = nfsrv_nfs_true;
3317			bp += NFSX_UNSIGNED;
3318			nfsm_clget;
3319			*tl = 0;
3320			bp += NFSX_UNSIGNED;
3321			nfsm_clget;
3322			*tl = txdr_unsigned(dp->d_fileno);
3323			bp += NFSX_UNSIGNED;
3324			nfsm_clget;
3325			*tl = txdr_unsigned(nlen);
3326			bp += NFSX_UNSIGNED;
3327
3328			/* And loop around copying the name */
3329			xfer = nlen;
3330			cp = dp->d_name;
3331			while (xfer > 0) {
3332				nfsm_clget;
3333				if ((bp + xfer) > be)
3334					tsiz = be - bp;
3335				else
3336					tsiz = xfer;
3337				bcopy(cp, bp, tsiz);
3338				bp += tsiz;
3339				xfer -= tsiz;
3340				if (xfer > 0)
3341					cp += tsiz;
3342			}
3343			/* And null pad to an int32_t boundary. */
3344			for (i = 0; i < rem; i++)
3345				*bp++ = '\0';
3346
3347			/*
3348			 * Now copy the flrep structure out.
3349			 */
3350			xfer = sizeof (struct flrep);
3351			cp = (caddr_t)&fl;
3352			while (xfer > 0) {
3353				nfsm_clget;
3354				if ((bp + xfer) > be)
3355					tsiz = be - bp;
3356				else
3357					tsiz = xfer;
3358				bcopy(cp, bp, tsiz);
3359				bp += tsiz;
3360				xfer -= tsiz;
3361				if (xfer > 0)
3362					cp += tsiz;
3363			}
3364		}
3365invalid:
3366		cpos += dp->d_reclen;
3367		dp = (struct dirent *)cpos;
3368		cookiep++;
3369		ncookies--;
3370	}
3371	if (!usevget && vp_locked)
3372		vput(vp);
3373	else
3374		vrele(vp);
3375	vp = NULL;
3376	nfsm_clget;
3377	*tl = nfsrv_nfs_false;
3378	bp += NFSX_UNSIGNED;
3379	nfsm_clget;
3380	if (eofflag)
3381		*tl = nfsrv_nfs_true;
3382	else
3383		*tl = nfsrv_nfs_false;
3384	bp += NFSX_UNSIGNED;
3385	if (mp != mb) {
3386		if (bp < be)
3387			mp->m_len = bp - mtod(mp, caddr_t);
3388	} else
3389		mp->m_len += bp - bpos;
3390	free((caddr_t)cookies, M_TEMP);
3391	free((caddr_t)rbuf, M_TEMP);
3392nfsmout:
3393	if (vp)
3394		vrele(vp);
3395	if (mntp)
3396		vfs_unbusy(mntp);
3397	VFS_UNLOCK_GIANT(vfslocked);
3398	return(error);
3399}
3400
3401/*
3402 * nfs commit service
3403 */
3404int
3405nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3406    struct mbuf **mrq)
3407{
3408	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3409	struct sockaddr *nam = nfsd->nd_nam;
3410	caddr_t dpos = nfsd->nd_dpos;
3411	struct ucred *cred = nfsd->nd_cr;
3412	struct vattr bfor, aft;
3413	struct vnode *vp = NULL;
3414	nfsfh_t nfh;
3415	fhandle_t *fhp;
3416	u_int32_t *tl;
3417	caddr_t bpos;
3418	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3419	struct mbuf *mb, *mreq;
3420	u_quad_t off;
3421	struct mount *mp = NULL;
3422	int v3 = (nfsd->nd_flag & ND_NFSV3);
3423	int tvfslocked;
3424	int vfslocked;
3425
3426	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3427	vfslocked = 0;
3428	if (!v3)
3429		panic("nfsrv_commit: v3 proc called on a v2 connection");
3430	fhp = &nfh.fh_generic;
3431	nfsm_srvmtofh(fhp);
3432	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
3433		error = ESTALE;
3434		goto ereply;
3435	}
3436	vfslocked = VFS_LOCK_GIANT(mp);
3437	(void) vn_start_write(NULL, &mp, V_WAIT);
3438	vfs_rel(mp);		/* The write holds a ref. */
3439	tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
3440
3441	/*
3442	 * XXX At this time VOP_FSYNC() does not accept offset and byte
3443	 * count parameters, so these arguments are useless (someday maybe).
3444	 */
3445	off = fxdr_hyper(tl);
3446	tl += 2;
3447	cnt = fxdr_unsigned(int, *tl);
3448	error = nfsrv_fhtovp(fhp, 0, &vp, &tvfslocked, nfsd, slp, nam, &rdonly);
3449	vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
3450	if (error) {
3451		nfsm_reply(2 * NFSX_UNSIGNED);
3452		nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3453		error = 0;
3454		goto nfsmout;
3455	}
3456	for_ret = VOP_GETATTR(vp, &bfor, cred);
3457
3458	if (cnt > MAX_COMMIT_COUNT) {
3459		/*
3460		 * Give up and do the whole thing
3461		 */
3462		if (vp->v_object &&
3463		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3464			VM_OBJECT_LOCK(vp->v_object);
3465			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3466			VM_OBJECT_UNLOCK(vp->v_object);
3467		}
3468		error = VOP_FSYNC(vp, MNT_WAIT, curthread);
3469	} else {
3470		/*
3471		 * Locate and synchronously write any buffers that fall
3472		 * into the requested range.  Note:  we are assuming that
3473		 * f_iosize is a power of 2.
3474		 */
3475		int iosize = vp->v_mount->mnt_stat.f_iosize;
3476		int iomask = iosize - 1;
3477		struct bufobj *bo;
3478		daddr_t lblkno;
3479
3480		/*
3481		 * Align to iosize boundry, super-align to page boundry.
3482		 */
3483		if (off & iomask) {
3484			cnt += off & iomask;
3485			off &= ~(u_quad_t)iomask;
3486		}
3487		if (off & PAGE_MASK) {
3488			cnt += off & PAGE_MASK;
3489			off &= ~(u_quad_t)PAGE_MASK;
3490		}
3491		lblkno = off / iosize;
3492
3493		if (vp->v_object &&
3494		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3495			VM_OBJECT_LOCK(vp->v_object);
3496			vm_object_page_clean(vp->v_object, off, off + cnt,
3497			    OBJPC_SYNC);
3498			VM_OBJECT_UNLOCK(vp->v_object);
3499		}
3500
3501		bo = &vp->v_bufobj;
3502		BO_LOCK(bo);
3503		while (cnt > 0) {
3504			struct buf *bp;
3505
3506			/*
3507			 * If we have a buffer and it is marked B_DELWRI we
3508			 * have to lock and write it.  Otherwise the prior
3509			 * write is assumed to have already been committed.
3510			 *
3511			 * gbincore() can return invalid buffers now so we
3512			 * have to check that bit as well (though B_DELWRI
3513			 * should not be set if B_INVAL is set there could be
3514			 * a race here since we haven't locked the buffer).
3515			 */
3516			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
3517				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
3518				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
3519					BO_LOCK(bo);
3520					continue; /* retry */
3521				}
3522			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
3523				    B_DELWRI) {
3524					bremfree(bp);
3525					bp->b_flags &= ~B_ASYNC;
3526					bwrite(bp);
3527					++nfs_commit_miss;
3528				} else
3529					BUF_UNLOCK(bp);
3530				BO_LOCK(bo);
3531			}
3532			++nfs_commit_blks;
3533			if (cnt < iosize)
3534				break;
3535			cnt -= iosize;
3536			++lblkno;
3537		}
3538		BO_UNLOCK(bo);
3539	}
3540
3541	aft_ret = VOP_GETATTR(vp, &aft, cred);
3542	vput(vp);
3543	vp = NULL;
3544ereply:
3545	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3546	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3547	if (!error) {
3548		tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
3549		if (nfsver.tv_sec == 0)
3550			nfsver = boottime;
3551		*tl++ = txdr_unsigned(nfsver.tv_sec);
3552		*tl = txdr_unsigned(nfsver.tv_usec);
3553	} else {
3554		error = 0;
3555	}
3556nfsmout:
3557	if (vp)
3558		vput(vp);
3559	vn_finished_write(mp);
3560	VFS_UNLOCK_GIANT(vfslocked);
3561	return(error);
3562}
3563
3564/*
3565 * nfs statfs service
3566 */
3567int
3568nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3569    struct mbuf **mrq)
3570{
3571	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3572	struct sockaddr *nam = nfsd->nd_nam;
3573	caddr_t dpos = nfsd->nd_dpos;
3574	struct ucred *cred = nfsd->nd_cr;
3575	struct statfs *sf;
3576	struct nfs_statfs *sfp;
3577	caddr_t bpos;
3578	int error = 0, rdonly, getret = 1;
3579	int v3 = (nfsd->nd_flag & ND_NFSV3);
3580	struct mbuf *mb, *mreq;
3581	struct vnode *vp = NULL;
3582	struct vattr at;
3583	nfsfh_t nfh;
3584	fhandle_t *fhp;
3585	struct statfs statfs;
3586	u_quad_t tval;
3587	int vfslocked;
3588
3589	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3590	vfslocked = 0;
3591	fhp = &nfh.fh_generic;
3592	nfsm_srvmtofh(fhp);
3593	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
3594	if (error) {
3595		nfsm_reply(NFSX_UNSIGNED);
3596		if (v3)
3597			nfsm_srvpostop_attr(getret, &at);
3598		error = 0;
3599		goto nfsmout;
3600	}
3601	sf = &statfs;
3602	error = VFS_STATFS(vp->v_mount, sf);
3603	getret = VOP_GETATTR(vp, &at, cred);
3604	vput(vp);
3605	vp = NULL;
3606	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3607	if (v3)
3608		nfsm_srvpostop_attr(getret, &at);
3609	if (error) {
3610		error = 0;
3611		goto nfsmout;
3612	}
3613	sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
3614	if (v3) {
3615		tval = (u_quad_t)sf->f_blocks;
3616		tval *= (u_quad_t)sf->f_bsize;
3617		txdr_hyper(tval, &sfp->sf_tbytes);
3618		tval = (u_quad_t)sf->f_bfree;
3619		tval *= (u_quad_t)sf->f_bsize;
3620		txdr_hyper(tval, &sfp->sf_fbytes);
3621		/*
3622		 * Don't send negative values for available space,
3623		 * since this field is unsigned in the NFS protocol.
3624		 * Otherwise, the client would see absurdly high
3625		 * numbers for free space.
3626		 */
3627		if (sf->f_bavail < 0)
3628			tval = 0;
3629		else
3630			tval = (u_quad_t)sf->f_bavail;
3631		tval *= (u_quad_t)sf->f_bsize;
3632		txdr_hyper(tval, &sfp->sf_abytes);
3633		sfp->sf_tfiles.nfsuquad[0] = 0;
3634		sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3635		sfp->sf_ffiles.nfsuquad[0] = 0;
3636		sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3637		sfp->sf_afiles.nfsuquad[0] = 0;
3638		sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3639		sfp->sf_invarsec = 0;
3640	} else {
3641		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3642		sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3643		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3644		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3645		if (sf->f_bavail < 0)
3646			sfp->sf_bavail = 0;
3647		else
3648			sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3649	}
3650nfsmout:
3651	if (vp)
3652		vput(vp);
3653	VFS_UNLOCK_GIANT(vfslocked);
3654	return(error);
3655}
3656
3657/*
3658 * nfs fsinfo service
3659 */
3660int
3661nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3662    struct mbuf **mrq)
3663{
3664	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3665	struct sockaddr *nam = nfsd->nd_nam;
3666	caddr_t dpos = nfsd->nd_dpos;
3667	struct ucred *cred = nfsd->nd_cr;
3668	struct nfsv3_fsinfo *sip;
3669	caddr_t bpos;
3670	int error = 0, rdonly, getret = 1, pref;
3671	struct mbuf *mb, *mreq;
3672	struct vnode *vp = NULL;
3673	struct vattr at;
3674	nfsfh_t nfh;
3675	fhandle_t *fhp;
3676	u_quad_t maxfsize;
3677	struct statfs sb;
3678	int v3 = (nfsd->nd_flag & ND_NFSV3);
3679	int vfslocked;
3680
3681	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3682	if (!v3)
3683		panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
3684	fhp = &nfh.fh_generic;
3685	vfslocked = 0;
3686	nfsm_srvmtofh(fhp);
3687	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
3688	if (error) {
3689		nfsm_reply(NFSX_UNSIGNED);
3690		nfsm_srvpostop_attr(getret, &at);
3691		error = 0;
3692		goto nfsmout;
3693	}
3694
3695	/* XXX Try to make a guess on the max file size. */
3696	VFS_STATFS(vp->v_mount, &sb);
3697	maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3698
3699	getret = VOP_GETATTR(vp, &at, cred);
3700	vput(vp);
3701	vp = NULL;
3702	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3703	nfsm_srvpostop_attr(getret, &at);
3704	sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3705
3706	/*
3707	 * XXX
3708	 * There should be filesystem VFS OP(s) to get this information.
3709	 * For now, assume ufs.
3710	 */
3711	pref = NFS_SRVMAXDATA(nfsd);
3712	sip->fs_rtmax = txdr_unsigned(pref);
3713	sip->fs_rtpref = txdr_unsigned(pref);
3714	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3715	sip->fs_wtmax = txdr_unsigned(pref);
3716	sip->fs_wtpref = txdr_unsigned(pref);
3717	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3718	sip->fs_dtpref = txdr_unsigned(pref);
3719	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3720	sip->fs_timedelta.nfsv3_sec = 0;
3721	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3722	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3723		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3724		NFSV3FSINFO_CANSETTIME);
3725nfsmout:
3726	if (vp)
3727		vput(vp);
3728	VFS_UNLOCK_GIANT(vfslocked);
3729	return(error);
3730}
3731
3732/*
3733 * nfs pathconf service
3734 */
3735int
3736nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3737    struct mbuf **mrq)
3738{
3739	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3740	struct sockaddr *nam = nfsd->nd_nam;
3741	caddr_t dpos = nfsd->nd_dpos;
3742	struct ucred *cred = nfsd->nd_cr;
3743	struct nfsv3_pathconf *pc;
3744	caddr_t bpos;
3745	int error = 0, rdonly, getret = 1;
3746	register_t linkmax, namemax, chownres, notrunc;
3747	struct mbuf *mb, *mreq;
3748	struct vnode *vp = NULL;
3749	struct vattr at;
3750	nfsfh_t nfh;
3751	fhandle_t *fhp;
3752	int v3 = (nfsd->nd_flag & ND_NFSV3);
3753	int vfslocked;
3754
3755	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3756	if (!v3)
3757		panic("nfsrv_pathconf: v3 proc called on a v2 connection");
3758	vfslocked = 0;
3759	fhp = &nfh.fh_generic;
3760	nfsm_srvmtofh(fhp);
3761	error = nfsrv_fhtovp(fhp, 0, &vp, &vfslocked, nfsd, slp, nam, &rdonly);
3762	if (error) {
3763		nfsm_reply(NFSX_UNSIGNED);
3764		nfsm_srvpostop_attr(getret, &at);
3765		error = 0;
3766		goto nfsmout;
3767	}
3768	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3769	if (!error)
3770		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3771	if (!error)
3772		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3773	if (!error)
3774		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3775	getret = VOP_GETATTR(vp, &at, cred);
3776	vput(vp);
3777	vp = NULL;
3778	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
3779	nfsm_srvpostop_attr(getret, &at);
3780	if (error) {
3781		error = 0;
3782		goto nfsmout;
3783	}
3784	pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
3785
3786	pc->pc_linkmax = txdr_unsigned(linkmax);
3787	pc->pc_namemax = txdr_unsigned(namemax);
3788	pc->pc_notrunc = txdr_unsigned(notrunc);
3789	pc->pc_chownrestricted = txdr_unsigned(chownres);
3790
3791	/*
3792	 * These should probably be supported by VOP_PATHCONF(), but
3793	 * until msdosfs is exportable (why would you want to?), the
3794	 * Unix defaults should be ok.
3795	 */
3796	pc->pc_caseinsensitive = nfsrv_nfs_false;
3797	pc->pc_casepreserving = nfsrv_nfs_true;
3798nfsmout:
3799	if (vp)
3800		vput(vp);
3801	VFS_UNLOCK_GIANT(vfslocked);
3802	return(error);
3803}
3804
3805/*
3806 * Null operation, used by clients to ping server
3807 */
3808/* ARGSUSED */
3809int
3810nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3811    struct mbuf **mrq)
3812{
3813	struct mbuf *mrep = nfsd->nd_mrep;
3814	caddr_t bpos;
3815	int error = NFSERR_RETVOID;
3816	struct mbuf *mb, *mreq;
3817
3818	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3819	nfsm_reply(0);
3820nfsmout:
3821	return (error);
3822}
3823
3824/*
3825 * No operation, used for obsolete procedures
3826 */
3827/* ARGSUSED */
3828int
3829nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3830    struct mbuf **mrq)
3831{
3832	struct mbuf *mrep = nfsd->nd_mrep;
3833	caddr_t bpos;
3834	int error;
3835	struct mbuf *mb, *mreq;
3836
3837	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3838	if (nfsd->nd_repstat)
3839		error = nfsd->nd_repstat;
3840	else
3841		error = EPROCUNAVAIL;
3842	nfsm_reply(0);
3843	error = 0;
3844nfsmout:
3845	return (error);
3846}
3847
3848/*
3849 * Perform access checking for vnodes obtained from file handles that would
3850 * refer to files already opened by a Unix client. You cannot just use
3851 * vn_writechk() and VOP_ACCESS() for two reasons.
3852 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
3853 *     case.
3854 * 2 - The owner is to be given access irrespective of mode bits for some
3855 *     operations, so that processes that chmod after opening a file don't
3856 *     break. I don't like this because it opens a security hole, but since
3857 *     the nfs server opens a security hole the size of a barn door anyhow,
3858 *     what the heck.
3859 *
3860 * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3861 * will return EPERM instead of EACCES. EPERM is always an error.
3862 */
3863static int
3864nfsrv_access(struct vnode *vp, accmode_t accmode, struct ucred *cred,
3865    int rdonly, int override)
3866{
3867	struct vattr vattr;
3868	int error;
3869
3870	VFS_ASSERT_GIANT(vp->v_mount);
3871
3872	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3873
3874	if (accmode & VWRITE) {
3875		/* Just vn_writechk() changed to check rdonly */
3876		/*
3877		 * Disallow write attempts on read-only filesystems;
3878		 * unless the file is a socket or a block or character
3879		 * device resident on the filesystem.
3880		 */
3881		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3882			switch (vp->v_type) {
3883			case VREG:
3884			case VDIR:
3885			case VLNK:
3886				return (EROFS);
3887			default:
3888				break;
3889			}
3890		}
3891		/*
3892		 * If there's shared text associated with
3893		 * the inode, we can't allow writing.
3894		 */
3895		if (VOP_IS_TEXT(vp))
3896			return (ETXTBSY);
3897	}
3898
3899	error = VOP_GETATTR(vp, &vattr, cred);
3900	if (error)
3901		return (error);
3902	error = VOP_ACCESS(vp, accmode, cred, curthread);
3903	/*
3904	 * Allow certain operations for the owner (reads and writes
3905	 * on files that are already open).
3906	 */
3907	if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3908		error = 0;
3909	return (error);
3910}
3911