fuse_vnops.c revision 325164
1/*
2 * Copyright (c) 2007-2009 Google Inc. and Amit Singh
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
8 *
9 * * Redistributions of source code must retain the above copyright
10 *   notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 *   copyright notice, this list of conditions and the following disclaimer
13 *   in the documentation and/or other materials provided with the
14 *   distribution.
15 * * Neither the name of Google Inc. nor the names of its
16 *   contributors may be used to endorse or promote products derived from
17 *   this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * Copyright (C) 2005 Csaba Henk.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55
56#include <sys/cdefs.h>
57__FBSDID("$FreeBSD: stable/10/sys/fs/fuse/fuse_vnops.c 325164 2017-10-30 20:31:48Z pfg $");
58
59#include <sys/types.h>
60#include <sys/module.h>
61#include <sys/systm.h>
62#include <sys/errno.h>
63#include <sys/param.h>
64#include <sys/kernel.h>
65#include <sys/conf.h>
66#include <sys/uio.h>
67#include <sys/malloc.h>
68#include <sys/queue.h>
69#include <sys/lock.h>
70#include <sys/rwlock.h>
71#include <sys/sx.h>
72#include <sys/proc.h>
73#include <sys/mount.h>
74#include <sys/vnode.h>
75#include <sys/namei.h>
76#include <sys/extattr.h>
77#include <sys/stat.h>
78#include <sys/unistd.h>
79#include <sys/filedesc.h>
80#include <sys/file.h>
81#include <sys/fcntl.h>
82#include <sys/dirent.h>
83#include <sys/bio.h>
84#include <sys/buf.h>
85#include <sys/sysctl.h>
86
87#include <vm/vm.h>
88#include <vm/vm_extern.h>
89#include <vm/pmap.h>
90#include <vm/vm_map.h>
91#include <vm/vm_page.h>
92#include <vm/vm_param.h>
93#include <vm/vm_object.h>
94#include <vm/vm_pager.h>
95#include <vm/vnode_pager.h>
96#include <vm/vm_object.h>
97
98#include "fuse.h"
99#include "fuse_file.h"
100#include "fuse_internal.h"
101#include "fuse_ipc.h"
102#include "fuse_node.h"
103#include "fuse_param.h"
104#include "fuse_io.h"
105
106#include <sys/priv.h>
107
108#define FUSE_DEBUG_MODULE VNOPS
109#include "fuse_debug.h"
110
111/* vnode ops */
112static vop_access_t fuse_vnop_access;
113static vop_close_t fuse_vnop_close;
114static vop_create_t fuse_vnop_create;
115static vop_deleteextattr_t fuse_vnop_deleteextattr;
116static vop_fsync_t fuse_vnop_fsync;
117static vop_getattr_t fuse_vnop_getattr;
118static vop_getextattr_t fuse_vnop_getextattr;
119static vop_inactive_t fuse_vnop_inactive;
120static vop_link_t fuse_vnop_link;
121static vop_listextattr_t fuse_vnop_listextattr;
122static vop_lookup_t fuse_vnop_lookup;
123static vop_mkdir_t fuse_vnop_mkdir;
124static vop_mknod_t fuse_vnop_mknod;
125static vop_open_t fuse_vnop_open;
126static vop_read_t fuse_vnop_read;
127static vop_readdir_t fuse_vnop_readdir;
128static vop_readlink_t fuse_vnop_readlink;
129static vop_reclaim_t fuse_vnop_reclaim;
130static vop_remove_t fuse_vnop_remove;
131static vop_rename_t fuse_vnop_rename;
132static vop_rmdir_t fuse_vnop_rmdir;
133static vop_setattr_t fuse_vnop_setattr;
134static vop_setextattr_t fuse_vnop_setextattr;
135static vop_strategy_t fuse_vnop_strategy;
136static vop_symlink_t fuse_vnop_symlink;
137static vop_write_t fuse_vnop_write;
138static vop_getpages_t fuse_vnop_getpages;
139static vop_putpages_t fuse_vnop_putpages;
140static vop_print_t fuse_vnop_print;
141
142struct vop_vector fuse_vnops = {
143	.vop_default = &default_vnodeops,
144	.vop_access = fuse_vnop_access,
145	.vop_close = fuse_vnop_close,
146	.vop_create = fuse_vnop_create,
147	.vop_deleteextattr = fuse_vnop_deleteextattr,
148	.vop_fsync = fuse_vnop_fsync,
149	.vop_getattr = fuse_vnop_getattr,
150	.vop_getextattr = fuse_vnop_getextattr,
151	.vop_inactive = fuse_vnop_inactive,
152	.vop_link = fuse_vnop_link,
153	.vop_listextattr = fuse_vnop_listextattr,
154	.vop_lookup = fuse_vnop_lookup,
155	.vop_mkdir = fuse_vnop_mkdir,
156	.vop_mknod = fuse_vnop_mknod,
157	.vop_open = fuse_vnop_open,
158	.vop_pathconf = vop_stdpathconf,
159	.vop_read = fuse_vnop_read,
160	.vop_readdir = fuse_vnop_readdir,
161	.vop_readlink = fuse_vnop_readlink,
162	.vop_reclaim = fuse_vnop_reclaim,
163	.vop_remove = fuse_vnop_remove,
164	.vop_rename = fuse_vnop_rename,
165	.vop_rmdir = fuse_vnop_rmdir,
166	.vop_setattr = fuse_vnop_setattr,
167	.vop_setextattr = fuse_vnop_setextattr,
168	.vop_strategy = fuse_vnop_strategy,
169	.vop_symlink = fuse_vnop_symlink,
170	.vop_write = fuse_vnop_write,
171	.vop_getpages = fuse_vnop_getpages,
172	.vop_putpages = fuse_vnop_putpages,
173	.vop_print = fuse_vnop_print,
174};
175
176static u_long fuse_lookup_cache_hits = 0;
177
178SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
179    &fuse_lookup_cache_hits, 0, "");
180
181static u_long fuse_lookup_cache_misses = 0;
182
183SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
184    &fuse_lookup_cache_misses, 0, "");
185
186int	fuse_lookup_cache_enable = 1;
187
188SYSCTL_INT(_vfs_fuse, OID_AUTO, lookup_cache_enable, CTLFLAG_RW,
189    &fuse_lookup_cache_enable, 0, "");
190
191/*
192 * XXX: This feature is highly experimental and can bring to instabilities,
193 * needs revisiting before to be enabled by default.
194 */
195static int fuse_reclaim_revoked = 0;
196
197SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
198    &fuse_reclaim_revoked, 0, "");
199
200int	fuse_pbuf_freecnt = -1;
201
202#define fuse_vm_page_lock(m)		vm_page_lock((m));
203#define fuse_vm_page_unlock(m)		vm_page_unlock((m));
204#define fuse_vm_page_lock_queues()	((void)0)
205#define fuse_vm_page_unlock_queues()	((void)0)
206
207/*
208    struct vnop_access_args {
209        struct vnode *a_vp;
210#if VOP_ACCESS_TAKES_ACCMODE_T
211        accmode_t a_accmode;
212#else
213        int a_mode;
214#endif
215        struct ucred *a_cred;
216        struct thread *a_td;
217    };
218*/
219static int
220fuse_vnop_access(struct vop_access_args *ap)
221{
222	struct vnode *vp = ap->a_vp;
223	int accmode = ap->a_accmode;
224	struct ucred *cred = ap->a_cred;
225
226	struct fuse_access_param facp;
227	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
228
229	int err;
230
231	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
232
233	if (fuse_isdeadfs(vp)) {
234		if (vnode_isvroot(vp)) {
235			return 0;
236		}
237		return ENXIO;
238	}
239	if (!(data->dataflags & FSESS_INITED)) {
240		if (vnode_isvroot(vp)) {
241			if (priv_check_cred(cred, PRIV_VFS_ADMIN, 0) ||
242			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
243				return 0;
244			}
245		}
246		return EBADF;
247	}
248	if (vnode_islnk(vp)) {
249		return 0;
250	}
251	bzero(&facp, sizeof(facp));
252
253	err = fuse_internal_access(vp, accmode, &facp, ap->a_td, ap->a_cred);
254	FS_DEBUG2G("err=%d accmode=0x%x\n", err, accmode);
255	return err;
256}
257
258/*
259    struct vnop_close_args {
260	struct vnode *a_vp;
261	int  a_fflag;
262	struct ucred *a_cred;
263	struct thread *a_td;
264    };
265*/
266static int
267fuse_vnop_close(struct vop_close_args *ap)
268{
269	struct vnode *vp = ap->a_vp;
270	struct ucred *cred = ap->a_cred;
271	int fflag = ap->a_fflag;
272	fufh_type_t fufh_type;
273
274	fuse_trace_printf_vnop();
275
276	if (fuse_isdeadfs(vp)) {
277		return 0;
278	}
279	if (vnode_isdir(vp)) {
280		if (fuse_filehandle_valid(vp, FUFH_RDONLY)) {
281			fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
282		}
283		return 0;
284	}
285	if (fflag & IO_NDELAY) {
286		return 0;
287	}
288	fufh_type = fuse_filehandle_xlate_from_fflags(fflag);
289
290	if (!fuse_filehandle_valid(vp, fufh_type)) {
291		int i;
292
293		for (i = 0; i < FUFH_MAXTYPE; i++)
294			if (fuse_filehandle_valid(vp, i))
295				break;
296		if (i == FUFH_MAXTYPE)
297			panic("FUSE: fufh type %d found to be invalid in close"
298			      " (fflag=0x%x)\n",
299			      fufh_type, fflag);
300	}
301	if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
302		fuse_vnode_savesize(vp, cred);
303	}
304	return 0;
305}
306
307/*
308    struct vnop_create_args {
309	struct vnode *a_dvp;
310	struct vnode **a_vpp;
311	struct componentname *a_cnp;
312	struct vattr *a_vap;
313    };
314*/
315static int
316fuse_vnop_create(struct vop_create_args *ap)
317{
318	struct vnode *dvp = ap->a_dvp;
319	struct vnode **vpp = ap->a_vpp;
320	struct componentname *cnp = ap->a_cnp;
321	struct vattr *vap = ap->a_vap;
322	struct thread *td = cnp->cn_thread;
323	struct ucred *cred = cnp->cn_cred;
324
325	struct fuse_open_in *foi;
326	struct fuse_entry_out *feo;
327	struct fuse_dispatcher fdi;
328	struct fuse_dispatcher *fdip = &fdi;
329
330	int err;
331
332	struct mount *mp = vnode_mount(dvp);
333	uint64_t parentnid = VTOFUD(dvp)->nid;
334	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
335	uint64_t x_fh_id;
336	uint32_t x_open_flags;
337
338	fuse_trace_printf_vnop();
339
340	if (fuse_isdeadfs(dvp)) {
341		return ENXIO;
342	}
343	bzero(&fdi, sizeof(fdi));
344
345	/* XXX:	Will we ever want devices ? */
346	if ((vap->va_type != VREG)) {
347		printf("fuse_vnop_create: unsupported va_type %d\n",
348		    vap->va_type);
349		return (EINVAL);
350	}
351	debug_printf("parent nid = %ju, mode = %x\n", (uintmax_t)parentnid,
352	    mode);
353
354	fdisp_init(fdip, sizeof(*foi) + cnp->cn_namelen + 1);
355	if (!fsess_isimpl(mp, FUSE_CREATE)) {
356		debug_printf("eh, daemon doesn't implement create?\n");
357		return (EINVAL);
358	}
359	fdisp_make(fdip, FUSE_CREATE, vnode_mount(dvp), parentnid, td, cred);
360
361	foi = fdip->indata;
362	foi->mode = mode;
363	foi->flags = O_CREAT | O_RDWR;
364
365	memcpy((char *)fdip->indata + sizeof(*foi), cnp->cn_nameptr,
366	    cnp->cn_namelen);
367	((char *)fdip->indata)[sizeof(*foi) + cnp->cn_namelen] = '\0';
368
369	err = fdisp_wait_answ(fdip);
370
371	if (err) {
372		if (err == ENOSYS)
373			fsess_set_notimpl(mp, FUSE_CREATE);
374		debug_printf("create: got err=%d from daemon\n", err);
375		goto out;
376	}
377
378	feo = fdip->answ;
379
380	if ((err = fuse_internal_checkentry(feo, VREG))) {
381		goto out;
382	}
383	err = fuse_vnode_get(mp, feo->nodeid, dvp, vpp, cnp, VREG);
384	if (err) {
385		struct fuse_release_in *fri;
386		uint64_t nodeid = feo->nodeid;
387		uint64_t fh_id = ((struct fuse_open_out *)(feo + 1))->fh;
388
389		fdisp_init(fdip, sizeof(*fri));
390		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
391		fri = fdip->indata;
392		fri->fh = fh_id;
393		fri->flags = OFLAGS(mode);
394		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
395		fuse_insert_message(fdip->tick);
396		return err;
397	}
398	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
399
400	fdip->answ = feo + 1;
401
402	x_fh_id = ((struct fuse_open_out *)(feo + 1))->fh;
403	x_open_flags = ((struct fuse_open_out *)(feo + 1))->open_flags;
404	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, x_fh_id);
405	fuse_vnode_open(*vpp, x_open_flags, td);
406	cache_purge_negative(dvp);
407
408out:
409	fdisp_destroy(fdip);
410	return err;
411}
412
413/*
414 * Our vnop_fsync roughly corresponds to the FUSE_FSYNC method. The Linux
415 * version of FUSE also has a FUSE_FLUSH method.
416 *
417 * On Linux, fsync() synchronizes a file's complete in-core state with that
418 * on disk. The call is not supposed to return until the system has completed
419 * that action or until an error is detected.
420 *
421 * Linux also has an fdatasync() call that is similar to fsync() but is not
422 * required to update the metadata such as access time and modification time.
423 */
424
425/*
426    struct vnop_fsync_args {
427	struct vnodeop_desc *a_desc;
428	struct vnode * a_vp;
429	struct ucred * a_cred;
430	int  a_waitfor;
431	struct thread * a_td;
432    };
433*/
434static int
435fuse_vnop_fsync(struct vop_fsync_args *ap)
436{
437	struct vnode *vp = ap->a_vp;
438	struct thread *td = ap->a_td;
439
440	struct fuse_filehandle *fufh;
441	struct fuse_vnode_data *fvdat = VTOFUD(vp);
442
443	int type, err = 0;
444
445	fuse_trace_printf_vnop();
446
447	if (fuse_isdeadfs(vp)) {
448		return 0;
449	}
450	if ((err = vop_stdfsync(ap)))
451		return err;
452
453	if (!fsess_isimpl(vnode_mount(vp),
454	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
455		goto out;
456	}
457	for (type = 0; type < FUFH_MAXTYPE; type++) {
458		fufh = &(fvdat->fufh[type]);
459		if (FUFH_IS_VALID(fufh)) {
460			fuse_internal_fsync(vp, td, NULL, fufh);
461		}
462	}
463
464out:
465	return 0;
466}
467
468/*
469    struct vnop_getattr_args {
470	struct vnode *a_vp;
471	struct vattr *a_vap;
472	struct ucred *a_cred;
473	struct thread *a_td;
474    };
475*/
476static int
477fuse_vnop_getattr(struct vop_getattr_args *ap)
478{
479	struct vnode *vp = ap->a_vp;
480	struct vattr *vap = ap->a_vap;
481	struct ucred *cred = ap->a_cred;
482	struct thread *td = curthread;
483	struct fuse_vnode_data *fvdat = VTOFUD(vp);
484
485	int err = 0;
486	int dataflags;
487	struct fuse_dispatcher fdi;
488
489	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
490
491	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
492
493	/* Note that we are not bailing out on a dead file system just yet. */
494
495	if (!(dataflags & FSESS_INITED)) {
496		if (!vnode_isvroot(vp)) {
497			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
498			err = ENOTCONN;
499			debug_printf("fuse_getattr b: returning ENOTCONN\n");
500			return err;
501		} else {
502			goto fake;
503		}
504	}
505	fdisp_init(&fdi, 0);
506	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
507		if ((err == ENOTCONN) && vnode_isvroot(vp)) {
508			/* see comment at similar place in fuse_statfs() */
509			fdisp_destroy(&fdi);
510			goto fake;
511		}
512		if (err == ENOENT) {
513			fuse_internal_vnode_disappear(vp);
514		}
515		goto out;
516	}
517	cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
518	if (vap != VTOVA(vp)) {
519		memcpy(vap, VTOVA(vp), sizeof(*vap));
520	}
521	if (vap->va_type != vnode_vtype(vp)) {
522		fuse_internal_vnode_disappear(vp);
523		err = ENOENT;
524		goto out;
525	}
526	if ((fvdat->flag & FN_SIZECHANGE) != 0)
527		vap->va_size = fvdat->filesize;
528
529	if (vnode_isreg(vp) && (fvdat->flag & FN_SIZECHANGE) == 0) {
530		/*
531	         * This is for those cases when the file size changed without us
532	         * knowing, and we want to catch up.
533	         */
534		off_t new_filesize = ((struct fuse_attr_out *)
535				      fdi.answ)->attr.size;
536
537		if (fvdat->filesize != new_filesize) {
538			fuse_vnode_setsize(vp, cred, new_filesize);
539		}
540	}
541	debug_printf("fuse_getattr e: returning 0\n");
542
543out:
544	fdisp_destroy(&fdi);
545	return err;
546
547fake:
548	bzero(vap, sizeof(*vap));
549	vap->va_type = vnode_vtype(vp);
550
551	return 0;
552}
553
554/*
555    struct vnop_inactive_args {
556	struct vnode *a_vp;
557	struct thread *a_td;
558    };
559*/
560static int
561fuse_vnop_inactive(struct vop_inactive_args *ap)
562{
563	struct vnode *vp = ap->a_vp;
564	struct thread *td = ap->a_td;
565
566	struct fuse_vnode_data *fvdat = VTOFUD(vp);
567	struct fuse_filehandle *fufh = NULL;
568
569	int type, need_flush = 1;
570
571	FS_DEBUG("inode=%ju\n", (uintmax_t)VTOI(vp));
572
573	for (type = 0; type < FUFH_MAXTYPE; type++) {
574		fufh = &(fvdat->fufh[type]);
575		if (FUFH_IS_VALID(fufh)) {
576			if (need_flush && vp->v_type == VREG) {
577				if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
578					fuse_vnode_savesize(vp, NULL);
579				}
580				if (fuse_data_cache_invalidate ||
581				    (fvdat->flag & FN_REVOKED) != 0)
582					fuse_io_invalbuf(vp, td);
583				else
584					fuse_io_flushbuf(vp, MNT_WAIT, td);
585				need_flush = 0;
586			}
587			fuse_filehandle_close(vp, type, td, NULL);
588		}
589	}
590
591	if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) {
592		vrecycle(vp);
593	}
594	return 0;
595}
596
597/*
598    struct vnop_link_args {
599	struct vnode *a_tdvp;
600	struct vnode *a_vp;
601	struct componentname *a_cnp;
602    };
603*/
604static int
605fuse_vnop_link(struct vop_link_args *ap)
606{
607	struct vnode *vp = ap->a_vp;
608	struct vnode *tdvp = ap->a_tdvp;
609	struct componentname *cnp = ap->a_cnp;
610
611	struct vattr *vap = VTOVA(vp);
612
613	struct fuse_dispatcher fdi;
614	struct fuse_entry_out *feo;
615	struct fuse_link_in fli;
616
617	int err;
618
619	fuse_trace_printf_vnop();
620
621	if (fuse_isdeadfs(vp)) {
622		return ENXIO;
623	}
624	if (vnode_mount(tdvp) != vnode_mount(vp)) {
625		return EXDEV;
626	}
627	if (vap->va_nlink >= FUSE_LINK_MAX) {
628		return EMLINK;
629	}
630	fli.oldnodeid = VTOI(vp);
631
632	fdisp_init(&fdi, 0);
633	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
634	    FUSE_LINK, &fli, sizeof(fli), &fdi);
635	if ((err = fdisp_wait_answ(&fdi))) {
636		goto out;
637	}
638	feo = fdi.answ;
639
640	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
641out:
642	fdisp_destroy(&fdi);
643	return err;
644}
645
646/*
647    struct vnop_lookup_args {
648	struct vnodeop_desc *a_desc;
649	struct vnode *a_dvp;
650	struct vnode **a_vpp;
651	struct componentname *a_cnp;
652    };
653*/
654int
655fuse_vnop_lookup(struct vop_lookup_args *ap)
656{
657	struct vnode *dvp = ap->a_dvp;
658	struct vnode **vpp = ap->a_vpp;
659	struct componentname *cnp = ap->a_cnp;
660	struct thread *td = cnp->cn_thread;
661	struct ucred *cred = cnp->cn_cred;
662
663	int nameiop = cnp->cn_nameiop;
664	int flags = cnp->cn_flags;
665	int wantparent = flags & (LOCKPARENT | WANTPARENT);
666	int islastcn = flags & ISLASTCN;
667	struct mount *mp = vnode_mount(dvp);
668
669	int err = 0;
670	int lookup_err = 0;
671	struct vnode *vp = NULL;
672
673	struct fuse_dispatcher fdi;
674	enum fuse_opcode op;
675
676	uint64_t nid;
677	struct fuse_access_param facp;
678
679	FS_DEBUG2G("parent_inode=%ju - %*s\n",
680	    (uintmax_t)VTOI(dvp), (int)cnp->cn_namelen, cnp->cn_nameptr);
681
682	if (fuse_isdeadfs(dvp)) {
683		*vpp = NULL;
684		return ENXIO;
685	}
686	if (!vnode_isdir(dvp)) {
687		return ENOTDIR;
688	}
689	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) {
690		return EROFS;
691	}
692	/*
693         * We do access check prior to doing anything else only in the case
694         * when we are at fs root (we'd like to say, "we are at the first
695         * component", but that's not exactly the same... nevermind).
696         * See further comments at further access checks.
697         */
698
699	bzero(&facp, sizeof(facp));
700	if (vnode_isvroot(dvp)) {	/* early permission check hack */
701		if ((err = fuse_internal_access(dvp, VEXEC, &facp, td, cred))) {
702			return err;
703		}
704	}
705	if (flags & ISDOTDOT) {
706		nid = VTOFUD(dvp)->parent_nid;
707		if (nid == 0) {
708			return ENOENT;
709		}
710		fdisp_init(&fdi, 0);
711		op = FUSE_GETATTR;
712		goto calldaemon;
713	} else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') {
714		nid = VTOI(dvp);
715		fdisp_init(&fdi, 0);
716		op = FUSE_GETATTR;
717		goto calldaemon;
718	} else if (fuse_lookup_cache_enable) {
719		err = cache_lookup(dvp, vpp, cnp, NULL, NULL);
720		switch (err) {
721
722		case -1:		/* positive match */
723			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
724			return 0;
725
726		case 0:		/* no match in cache */
727			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
728			break;
729
730		case ENOENT:		/* negative match */
731			/* fall through */
732		default:
733			return err;
734		}
735	}
736	nid = VTOI(dvp);
737	fdisp_init(&fdi, cnp->cn_namelen + 1);
738	op = FUSE_LOOKUP;
739
740calldaemon:
741	fdisp_make(&fdi, op, mp, nid, td, cred);
742
743	if (op == FUSE_LOOKUP) {
744		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
745		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
746	}
747	lookup_err = fdisp_wait_answ(&fdi);
748
749	if ((op == FUSE_LOOKUP) && !lookup_err) {	/* lookup call succeeded */
750		nid = ((struct fuse_entry_out *)fdi.answ)->nodeid;
751		if (!nid) {
752			/*
753	                 * zero nodeid is the same as "not found",
754	                 * but it's also cacheable (which we keep
755	                 * keep on doing not as of writing this)
756	                 */
757			lookup_err = ENOENT;
758		} else if (nid == FUSE_ROOT_ID) {
759			lookup_err = EINVAL;
760		}
761	}
762	if (lookup_err &&
763	    (!fdi.answ_stat || lookup_err != ENOENT || op != FUSE_LOOKUP)) {
764		fdisp_destroy(&fdi);
765		return lookup_err;
766	}
767	/* lookup_err, if non-zero, must be ENOENT at this point */
768
769	if (lookup_err) {
770
771		if ((nameiop == CREATE || nameiop == RENAME) && islastcn
772		     /* && directory dvp has not been removed */ ) {
773
774			if (vfs_isrdonly(mp)) {
775				err = EROFS;
776				goto out;
777			}
778#if 0 /* THINK_ABOUT_THIS */
779			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
780				goto out;
781			}
782#endif
783
784			/*
785	                 * Possibly record the position of a slot in the
786	                 * directory large enough for the new component name.
787	                 * This can be recorded in the vnode private data for
788	                 * dvp. Set the SAVENAME flag to hold onto the
789	                 * pathname for use later in VOP_CREATE or VOP_RENAME.
790	                 */
791			cnp->cn_flags |= SAVENAME;
792
793			err = EJUSTRETURN;
794			goto out;
795		}
796		/* Consider inserting name into cache. */
797
798		/*
799	         * No we can't use negative caching, as the fs
800	         * changes are out of our control.
801	         * False positives' falseness turns out just as things
802	         * go by, but false negatives' falseness doesn't.
803	         * (and aiding the caching mechanism with extra control
804	         * mechanisms comes quite close to beating the whole purpose
805	         * caching...)
806	         */
807#if 0
808		if ((cnp->cn_flags & MAKEENTRY) != 0) {
809			FS_DEBUG("inserting NULL into cache\n");
810			cache_enter(dvp, NULL, cnp);
811		}
812#endif
813		err = ENOENT;
814		goto out;
815
816	} else {
817
818		/* !lookup_err */
819
820		struct fuse_entry_out *feo = NULL;
821		struct fuse_attr *fattr = NULL;
822
823		if (op == FUSE_GETATTR) {
824			fattr = &((struct fuse_attr_out *)fdi.answ)->attr;
825		} else {
826			feo = (struct fuse_entry_out *)fdi.answ;
827			fattr = &(feo->attr);
828		}
829
830		/*
831	         * If deleting, and at end of pathname, return parameters
832	         * which can be used to remove file.  If the wantparent flag
833	         * isn't set, we return only the directory, otherwise we go on
834	         * and lock the inode, being careful with ".".
835	         */
836		if (nameiop == DELETE && islastcn) {
837			/*
838	                 * Check for write access on directory.
839	                 */
840			facp.xuid = fattr->uid;
841			facp.facc_flags |= FACCESS_STICKY;
842			err = fuse_internal_access(dvp, VWRITE, &facp, td, cred);
843			facp.facc_flags &= ~FACCESS_XQUERIES;
844
845			if (err) {
846				goto out;
847			}
848			if (nid == VTOI(dvp)) {
849				vref(dvp);
850				*vpp = dvp;
851			} else {
852				err = fuse_vnode_get(dvp->v_mount, nid, dvp,
853				    &vp, cnp, IFTOVT(fattr->mode));
854				if (err)
855					goto out;
856				*vpp = vp;
857			}
858
859			/*
860			 * Save the name for use in VOP_RMDIR and VOP_REMOVE
861			 * later.
862			 */
863			cnp->cn_flags |= SAVENAME;
864			goto out;
865
866		}
867		/*
868	         * If rewriting (RENAME), return the inode and the
869	         * information required to rewrite the present directory
870	         * Must get inode of directory entry to verify it's a
871	         * regular file, or empty directory.
872	         */
873		if (nameiop == RENAME && wantparent && islastcn) {
874
875#if 0 /* THINK_ABOUT_THIS */
876			if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) {
877				goto out;
878			}
879#endif
880
881			/*
882	                 * Check for "."
883	                 */
884			if (nid == VTOI(dvp)) {
885				err = EISDIR;
886				goto out;
887			}
888			err = fuse_vnode_get(vnode_mount(dvp),
889			    nid,
890			    dvp,
891			    &vp,
892			    cnp,
893			    IFTOVT(fattr->mode));
894			if (err) {
895				goto out;
896			}
897			*vpp = vp;
898			/*
899	                 * Save the name for use in VOP_RENAME later.
900	                 */
901			cnp->cn_flags |= SAVENAME;
902
903			goto out;
904		}
905		if (flags & ISDOTDOT) {
906			struct mount *mp;
907			int ltype;
908
909			/*
910			 * Expanded copy of vn_vget_ino() so that
911			 * fuse_vnode_get() can be used.
912			 */
913			mp = dvp->v_mount;
914			ltype = VOP_ISLOCKED(dvp);
915			err = vfs_busy(mp, MBF_NOWAIT);
916			if (err != 0) {
917				vfs_ref(mp);
918				VOP_UNLOCK(dvp, 0);
919				err = vfs_busy(mp, 0);
920				vn_lock(dvp, ltype | LK_RETRY);
921				vfs_rel(mp);
922				if (err)
923					goto out;
924				if ((dvp->v_iflag & VI_DOOMED) != 0) {
925					err = ENOENT;
926					vfs_unbusy(mp);
927					goto out;
928				}
929			}
930			VOP_UNLOCK(dvp, 0);
931			err = fuse_vnode_get(vnode_mount(dvp),
932			    nid,
933			    NULL,
934			    &vp,
935			    cnp,
936			    IFTOVT(fattr->mode));
937			vfs_unbusy(mp);
938			vn_lock(dvp, ltype | LK_RETRY);
939			if ((dvp->v_iflag & VI_DOOMED) != 0) {
940				if (err == 0)
941					vput(vp);
942				err = ENOENT;
943			}
944			if (err)
945				goto out;
946			*vpp = vp;
947		} else if (nid == VTOI(dvp)) {
948			vref(dvp);
949			*vpp = dvp;
950		} else {
951			err = fuse_vnode_get(vnode_mount(dvp),
952			    nid,
953			    dvp,
954			    &vp,
955			    cnp,
956			    IFTOVT(fattr->mode));
957			if (err) {
958				goto out;
959			}
960			fuse_vnode_setparent(vp, dvp);
961			*vpp = vp;
962		}
963
964		if (op == FUSE_GETATTR) {
965			cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ);
966		} else {
967			cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ);
968		}
969
970		/* Insert name into cache if appropriate. */
971
972		/*
973	         * Nooo, caching is evil. With caching, we can't avoid stale
974	         * information taking over the playground (cached info is not
975	         * just positive/negative, it does have qualitative aspects,
976	         * too). And a (VOP/FUSE)_GETATTR is always thrown anyway, when
977	         * walking down along cached path components, and that's not
978	         * any cheaper than FUSE_LOOKUP. This might change with
979	         * implementing kernel side attr caching, but... In Linux,
980	         * lookup results are not cached, and the daemon is bombarded
981	         * with FUSE_LOOKUPS on and on. This shows that by design, the
982	         * daemon is expected to handle frequent lookup queries
983	         * efficiently, do its caching in userspace, and so on.
984	         *
985	         * So just leave the name cache alone.
986	         */
987
988		/*
989	         * Well, now I know, Linux caches lookups, but with a
990	         * timeout... So it's the same thing as attribute caching:
991	         * we can deal with it when implement timeouts.
992	         */
993#if 0
994		if (cnp->cn_flags & MAKEENTRY) {
995			cache_enter(dvp, *vpp, cnp);
996		}
997#endif
998	}
999out:
1000	if (!lookup_err) {
1001
1002		/* No lookup error; need to clean up. */
1003
1004		if (err) {		/* Found inode; exit with no vnode. */
1005			if (op == FUSE_LOOKUP) {
1006				fuse_internal_forget_send(vnode_mount(dvp), td, cred,
1007				    nid, 1);
1008			}
1009			fdisp_destroy(&fdi);
1010			return err;
1011		} else {
1012#ifndef NO_EARLY_PERM_CHECK_HACK
1013			if (!islastcn) {
1014				/*
1015				 * We have the attributes of the next item
1016				 * *now*, and it's a fact, and we do not
1017				 * have to do extra work for it (ie, beg the
1018				 * daemon), and it neither depends on such
1019				 * accidental things like attr caching. So
1020				 * the big idea: check credentials *now*,
1021				 * not at the beginning of the next call to
1022				 * lookup.
1023				 *
1024				 * The first item of the lookup chain (fs root)
1025				 * won't be checked then here, of course, as
1026				 * its never "the next". But go and see that
1027				 * the root is taken care about at the very
1028				 * beginning of this function.
1029				 *
1030				 * Now, given we want to do the access check
1031				 * this way, one might ask: so then why not
1032				 * do the access check just after fetching
1033				 * the inode and its attributes from the
1034				 * daemon? Why bother with producing the
1035				 * corresponding vnode at all if something
1036				 * is not OK? We know what's the deal as
1037				 * soon as we get those attrs... There is
1038				 * one bit of info though not given us by
1039				 * the daemon: whether his response is
1040				 * authorative or not... His response should
1041				 * be ignored if something is mounted over
1042				 * the dir in question. But that can be
1043				 * known only by having the vnode...
1044				 */
1045				int tmpvtype = vnode_vtype(*vpp);
1046
1047				bzero(&facp, sizeof(facp));
1048				/*the early perm check hack */
1049				    facp.facc_flags |= FACCESS_VA_VALID;
1050
1051				if ((tmpvtype != VDIR) && (tmpvtype != VLNK)) {
1052					err = ENOTDIR;
1053				}
1054				if (!err && !vnode_mountedhere(*vpp)) {
1055					err = fuse_internal_access(*vpp, VEXEC, &facp, td, cred);
1056				}
1057				if (err) {
1058					if (tmpvtype == VLNK)
1059						FS_DEBUG("weird, permission error with a symlink?\n");
1060					vput(*vpp);
1061					*vpp = NULL;
1062				}
1063			}
1064#endif
1065		}
1066	}
1067	fdisp_destroy(&fdi);
1068
1069	return err;
1070}
1071
1072/*
1073    struct vnop_mkdir_args {
1074	struct vnode *a_dvp;
1075	struct vnode **a_vpp;
1076	struct componentname *a_cnp;
1077	struct vattr *a_vap;
1078    };
1079*/
1080static int
1081fuse_vnop_mkdir(struct vop_mkdir_args *ap)
1082{
1083	struct vnode *dvp = ap->a_dvp;
1084	struct vnode **vpp = ap->a_vpp;
1085	struct componentname *cnp = ap->a_cnp;
1086	struct vattr *vap = ap->a_vap;
1087
1088	struct fuse_mkdir_in fmdi;
1089
1090	fuse_trace_printf_vnop();
1091
1092	if (fuse_isdeadfs(dvp)) {
1093		return ENXIO;
1094	}
1095	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
1096
1097	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
1098	    sizeof(fmdi), VDIR));
1099}
1100
1101/*
1102    struct vnop_mknod_args {
1103	struct vnode *a_dvp;
1104	struct vnode **a_vpp;
1105	struct componentname *a_cnp;
1106	struct vattr *a_vap;
1107    };
1108*/
1109static int
1110fuse_vnop_mknod(struct vop_mknod_args *ap)
1111{
1112
1113	return (EINVAL);
1114}
1115
1116
1117/*
1118    struct vnop_open_args {
1119	struct vnode *a_vp;
1120	int  a_mode;
1121	struct ucred *a_cred;
1122	struct thread *a_td;
1123	int a_fdidx; / struct file *a_fp;
1124    };
1125*/
1126static int
1127fuse_vnop_open(struct vop_open_args *ap)
1128{
1129	struct vnode *vp = ap->a_vp;
1130	int mode = ap->a_mode;
1131	struct thread *td = ap->a_td;
1132	struct ucred *cred = ap->a_cred;
1133
1134	fufh_type_t fufh_type;
1135	struct fuse_vnode_data *fvdat;
1136
1137	int error, isdir = 0;
1138	int32_t fuse_open_flags;
1139
1140	FS_DEBUG2G("inode=%ju mode=0x%x\n", (uintmax_t)VTOI(vp), mode);
1141
1142	if (fuse_isdeadfs(vp)) {
1143		return ENXIO;
1144	}
1145	fvdat = VTOFUD(vp);
1146
1147	if (vnode_isdir(vp)) {
1148		isdir = 1;
1149	}
1150	fuse_open_flags = 0;
1151	if (isdir) {
1152		fufh_type = FUFH_RDONLY;
1153	} else {
1154		fufh_type = fuse_filehandle_xlate_from_fflags(mode);
1155		/*
1156		 * For WRONLY opens, force DIRECT_IO.  This is necessary
1157		 * since writing a partial block through the buffer cache
1158		 * will result in a read of the block and that read won't
1159		 * be allowed by the WRONLY open.
1160		 */
1161		if (fufh_type == FUFH_WRONLY ||
1162		    (fvdat->flag & FN_DIRECTIO) != 0)
1163			fuse_open_flags = FOPEN_DIRECT_IO;
1164	}
1165
1166	if (fuse_filehandle_validrw(vp, fufh_type) != FUFH_INVALID) {
1167		fuse_vnode_open(vp, fuse_open_flags, td);
1168		return 0;
1169	}
1170	error = fuse_filehandle_open(vp, fufh_type, NULL, td, cred);
1171
1172	return error;
1173}
1174
1175/*
1176    struct vnop_read_args {
1177	struct vnode *a_vp;
1178	struct uio *a_uio;
1179	int  a_ioflag;
1180	struct ucred *a_cred;
1181    };
1182*/
1183static int
1184fuse_vnop_read(struct vop_read_args *ap)
1185{
1186	struct vnode *vp = ap->a_vp;
1187	struct uio *uio = ap->a_uio;
1188	int ioflag = ap->a_ioflag;
1189	struct ucred *cred = ap->a_cred;
1190
1191	FS_DEBUG2G("inode=%ju offset=%jd resid=%zd\n",
1192	    (uintmax_t)VTOI(vp), uio->uio_offset, uio->uio_resid);
1193
1194	if (fuse_isdeadfs(vp)) {
1195		return ENXIO;
1196	}
1197
1198	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1199		ioflag |= IO_DIRECT;
1200	}
1201
1202	return fuse_io_dispatch(vp, uio, ioflag, cred);
1203}
1204
1205/*
1206    struct vnop_readdir_args {
1207	struct vnode *a_vp;
1208	struct uio *a_uio;
1209	struct ucred *a_cred;
1210	int *a_eofflag;
1211	int *ncookies;
1212	u_long **a_cookies;
1213    };
1214*/
1215static int
1216fuse_vnop_readdir(struct vop_readdir_args *ap)
1217{
1218	struct vnode *vp = ap->a_vp;
1219	struct uio *uio = ap->a_uio;
1220	struct ucred *cred = ap->a_cred;
1221
1222	struct fuse_filehandle *fufh = NULL;
1223	struct fuse_vnode_data *fvdat;
1224	struct fuse_iov cookediov;
1225
1226	int err = 0;
1227	int freefufh = 0;
1228
1229	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
1230
1231	if (fuse_isdeadfs(vp)) {
1232		return ENXIO;
1233	}
1234	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
1235	    (uio_resid(uio) < sizeof(struct dirent))) {
1236		return EINVAL;
1237	}
1238	fvdat = VTOFUD(vp);
1239
1240	if (!fuse_filehandle_valid(vp, FUFH_RDONLY)) {
1241		FS_DEBUG("calling readdir() before open()");
1242		err = fuse_filehandle_open(vp, FUFH_RDONLY, &fufh, NULL, cred);
1243		freefufh = 1;
1244	} else {
1245		err = fuse_filehandle_get(vp, FUFH_RDONLY, &fufh);
1246	}
1247	if (err) {
1248		return (err);
1249	}
1250#define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
1251	fiov_init(&cookediov, DIRCOOKEDSIZE);
1252
1253	err = fuse_internal_readdir(vp, uio, fufh, &cookediov);
1254
1255	fiov_teardown(&cookediov);
1256	if (freefufh) {
1257		fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred);
1258	}
1259	return err;
1260}
1261
1262/*
1263    struct vnop_readlink_args {
1264	struct vnode *a_vp;
1265	struct uio *a_uio;
1266	struct ucred *a_cred;
1267    };
1268*/
1269static int
1270fuse_vnop_readlink(struct vop_readlink_args *ap)
1271{
1272	struct vnode *vp = ap->a_vp;
1273	struct uio *uio = ap->a_uio;
1274	struct ucred *cred = ap->a_cred;
1275
1276	struct fuse_dispatcher fdi;
1277	int err;
1278
1279	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
1280
1281	if (fuse_isdeadfs(vp)) {
1282		return ENXIO;
1283	}
1284	if (!vnode_islnk(vp)) {
1285		return EINVAL;
1286	}
1287	fdisp_init(&fdi, 0);
1288	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
1289	if (err) {
1290		goto out;
1291	}
1292	if (((char *)fdi.answ)[0] == '/' &&
1293	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
1294		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
1295
1296		err = uiomove(mpth, strlen(mpth), uio);
1297	}
1298	if (!err) {
1299		err = uiomove(fdi.answ, fdi.iosize, uio);
1300	}
1301out:
1302	fdisp_destroy(&fdi);
1303	return err;
1304}
1305
1306/*
1307    struct vnop_reclaim_args {
1308	struct vnode *a_vp;
1309	struct thread *a_td;
1310    };
1311*/
1312static int
1313fuse_vnop_reclaim(struct vop_reclaim_args *ap)
1314{
1315	struct vnode *vp = ap->a_vp;
1316	struct thread *td = ap->a_td;
1317
1318	struct fuse_vnode_data *fvdat = VTOFUD(vp);
1319	struct fuse_filehandle *fufh = NULL;
1320
1321	int type;
1322
1323	if (!fvdat) {
1324		panic("FUSE: no vnode data during recycling");
1325	}
1326	FS_DEBUG("inode=%ju\n", (uintmax_t)VTOI(vp));
1327
1328	for (type = 0; type < FUFH_MAXTYPE; type++) {
1329		fufh = &(fvdat->fufh[type]);
1330		if (FUFH_IS_VALID(fufh)) {
1331			printf("FUSE: vnode being reclaimed but fufh (type=%d) is valid",
1332			    type);
1333			fuse_filehandle_close(vp, type, td, NULL);
1334		}
1335	}
1336
1337	if ((!fuse_isdeadfs(vp)) && (fvdat->nlookup)) {
1338		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
1339		    fvdat->nlookup);
1340	}
1341	fuse_vnode_setparent(vp, NULL);
1342	cache_purge(vp);
1343	vfs_hash_remove(vp);
1344	vnode_destroy_vobject(vp);
1345	fuse_vnode_destroy(vp);
1346
1347	return 0;
1348}
1349
1350/*
1351    struct vnop_remove_args {
1352	struct vnode *a_dvp;
1353	struct vnode *a_vp;
1354	struct componentname *a_cnp;
1355    };
1356*/
1357static int
1358fuse_vnop_remove(struct vop_remove_args *ap)
1359{
1360	struct vnode *dvp = ap->a_dvp;
1361	struct vnode *vp = ap->a_vp;
1362	struct componentname *cnp = ap->a_cnp;
1363
1364	int err;
1365
1366	FS_DEBUG2G("inode=%ju name=%*s\n",
1367	    (uintmax_t)VTOI(vp), (int)cnp->cn_namelen, cnp->cn_nameptr);
1368
1369	if (fuse_isdeadfs(vp)) {
1370		return ENXIO;
1371	}
1372	if (vnode_isdir(vp)) {
1373		return EPERM;
1374	}
1375	cache_purge(vp);
1376
1377	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
1378
1379	if (err == 0)
1380		fuse_internal_vnode_disappear(vp);
1381	return err;
1382}
1383
1384/*
1385    struct vnop_rename_args {
1386	struct vnode *a_fdvp;
1387	struct vnode *a_fvp;
1388	struct componentname *a_fcnp;
1389	struct vnode *a_tdvp;
1390	struct vnode *a_tvp;
1391	struct componentname *a_tcnp;
1392    };
1393*/
1394static int
1395fuse_vnop_rename(struct vop_rename_args *ap)
1396{
1397	struct vnode *fdvp = ap->a_fdvp;
1398	struct vnode *fvp = ap->a_fvp;
1399	struct componentname *fcnp = ap->a_fcnp;
1400	struct vnode *tdvp = ap->a_tdvp;
1401	struct vnode *tvp = ap->a_tvp;
1402	struct componentname *tcnp = ap->a_tcnp;
1403	struct fuse_data *data;
1404
1405	int err = 0;
1406
1407	FS_DEBUG2G("from: inode=%ju name=%*s -> to: inode=%ju name=%*s\n",
1408	    (uintmax_t)VTOI(fvp), (int)fcnp->cn_namelen, fcnp->cn_nameptr,
1409	    (uintmax_t)(tvp == NULL ? -1 : VTOI(tvp)),
1410	    (int)tcnp->cn_namelen, tcnp->cn_nameptr);
1411
1412	if (fuse_isdeadfs(fdvp)) {
1413		return ENXIO;
1414	}
1415	if (fvp->v_mount != tdvp->v_mount ||
1416	    (tvp && fvp->v_mount != tvp->v_mount)) {
1417		FS_DEBUG("cross-device rename: %s -> %s\n",
1418		    fcnp->cn_nameptr, (tcnp != NULL ? tcnp->cn_nameptr : "(NULL)"));
1419		err = EXDEV;
1420		goto out;
1421	}
1422	cache_purge(fvp);
1423
1424	/*
1425         * FUSE library is expected to check if target directory is not
1426         * under the source directory in the file system tree.
1427         * Linux performs this check at VFS level.
1428         */
1429	data = fuse_get_mpdata(vnode_mount(tdvp));
1430	sx_xlock(&data->rename_lock);
1431	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
1432	if (err == 0) {
1433		if (tdvp != fdvp)
1434			fuse_vnode_setparent(fvp, tdvp);
1435		if (tvp != NULL)
1436			fuse_vnode_setparent(tvp, NULL);
1437	}
1438	sx_unlock(&data->rename_lock);
1439
1440	if (tvp != NULL && tvp != fvp) {
1441		cache_purge(tvp);
1442	}
1443	if (vnode_isdir(fvp)) {
1444		if ((tvp != NULL) && vnode_isdir(tvp)) {
1445			cache_purge(tdvp);
1446		}
1447		cache_purge(fdvp);
1448	}
1449out:
1450	if (tdvp == tvp) {
1451		vrele(tdvp);
1452	} else {
1453		vput(tdvp);
1454	}
1455	if (tvp != NULL) {
1456		vput(tvp);
1457	}
1458	vrele(fdvp);
1459	vrele(fvp);
1460
1461	return err;
1462}
1463
1464/*
1465    struct vnop_rmdir_args {
1466	    struct vnode *a_dvp;
1467	    struct vnode *a_vp;
1468	    struct componentname *a_cnp;
1469    } *ap;
1470*/
1471static int
1472fuse_vnop_rmdir(struct vop_rmdir_args *ap)
1473{
1474	struct vnode *dvp = ap->a_dvp;
1475	struct vnode *vp = ap->a_vp;
1476
1477	int err;
1478
1479	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
1480
1481	if (fuse_isdeadfs(vp)) {
1482		return ENXIO;
1483	}
1484	if (VTOFUD(vp) == VTOFUD(dvp)) {
1485		return EINVAL;
1486	}
1487	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
1488
1489	if (err == 0)
1490		fuse_internal_vnode_disappear(vp);
1491	return err;
1492}
1493
1494/*
1495    struct vnop_setattr_args {
1496	struct vnode *a_vp;
1497	struct vattr *a_vap;
1498	struct ucred *a_cred;
1499	struct thread *a_td;
1500    };
1501*/
1502static int
1503fuse_vnop_setattr(struct vop_setattr_args *ap)
1504{
1505	struct vnode *vp = ap->a_vp;
1506	struct vattr *vap = ap->a_vap;
1507	struct ucred *cred = ap->a_cred;
1508	struct thread *td = curthread;
1509
1510	struct fuse_dispatcher fdi;
1511	struct fuse_setattr_in *fsai;
1512	struct fuse_access_param facp;
1513
1514	int err = 0;
1515	enum vtype vtyp;
1516	int sizechanged = 0;
1517	uint64_t newsize = 0;
1518
1519	FS_DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp));
1520
1521	if (fuse_isdeadfs(vp)) {
1522		return ENXIO;
1523	}
1524	fdisp_init(&fdi, sizeof(*fsai));
1525	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
1526	fsai = fdi.indata;
1527	fsai->valid = 0;
1528
1529	bzero(&facp, sizeof(facp));
1530
1531	facp.xuid = vap->va_uid;
1532	facp.xgid = vap->va_gid;
1533
1534	if (vap->va_uid != (uid_t)VNOVAL) {
1535		facp.facc_flags |= FACCESS_CHOWN;
1536		fsai->uid = vap->va_uid;
1537		fsai->valid |= FATTR_UID;
1538	}
1539	if (vap->va_gid != (gid_t)VNOVAL) {
1540		facp.facc_flags |= FACCESS_CHOWN;
1541		fsai->gid = vap->va_gid;
1542		fsai->valid |= FATTR_GID;
1543	}
1544	if (vap->va_size != VNOVAL) {
1545
1546		struct fuse_filehandle *fufh = NULL;
1547
1548		/*Truncate to a new value. */
1549		    fsai->size = vap->va_size;
1550		sizechanged = 1;
1551		newsize = vap->va_size;
1552		fsai->valid |= FATTR_SIZE;
1553
1554		fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh);
1555		if (fufh) {
1556			fsai->fh = fufh->fh_id;
1557			fsai->valid |= FATTR_FH;
1558		}
1559	}
1560	if (vap->va_atime.tv_sec != VNOVAL) {
1561		fsai->atime = vap->va_atime.tv_sec;
1562		fsai->atimensec = vap->va_atime.tv_nsec;
1563		fsai->valid |= FATTR_ATIME;
1564	}
1565	if (vap->va_mtime.tv_sec != VNOVAL) {
1566		fsai->mtime = vap->va_mtime.tv_sec;
1567		fsai->mtimensec = vap->va_mtime.tv_nsec;
1568		fsai->valid |= FATTR_MTIME;
1569	}
1570	if (vap->va_mode != (mode_t)VNOVAL) {
1571		fsai->mode = vap->va_mode & ALLPERMS;
1572		fsai->valid |= FATTR_MODE;
1573	}
1574	if (!fsai->valid) {
1575		goto out;
1576	}
1577	vtyp = vnode_vtype(vp);
1578
1579	if (fsai->valid & FATTR_SIZE && vtyp == VDIR) {
1580		err = EISDIR;
1581		goto out;
1582	}
1583	if (vfs_isrdonly(vnode_mount(vp)) && (fsai->valid & ~FATTR_SIZE || vtyp == VREG)) {
1584		err = EROFS;
1585		goto out;
1586	}
1587	if (fsai->valid & ~FATTR_SIZE) {
1588	  /*err = fuse_internal_access(vp, VADMIN, context, &facp); */
1589	  /*XXX */
1590		    err = 0;
1591	}
1592	facp.facc_flags &= ~FACCESS_XQUERIES;
1593
1594	if (err && !(fsai->valid & ~(FATTR_ATIME | FATTR_MTIME)) &&
1595	    vap->va_vaflags & VA_UTIMES_NULL) {
1596		err = fuse_internal_access(vp, VWRITE, &facp, td, cred);
1597	}
1598	if (err)
1599		goto out;
1600	if ((err = fdisp_wait_answ(&fdi)))
1601		goto out;
1602	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
1603
1604	if (vnode_vtype(vp) != vtyp) {
1605		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
1606			debug_printf("FUSE: Dang! vnode_vtype is VNON and vtype isn't.\n");
1607		} else {
1608			/*
1609	                 * STALE vnode, ditch
1610	                 *
1611	                 * The vnode has changed its type "behind our back". There's
1612	                 * nothing really we can do, so let us just force an internal
1613	                 * revocation and tell the caller to try again, if interested.
1614	                 */
1615			fuse_internal_vnode_disappear(vp);
1616			err = EAGAIN;
1617		}
1618	}
1619	if (!err && !sizechanged) {
1620		cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
1621	}
1622out:
1623	fdisp_destroy(&fdi);
1624	if (!err && sizechanged) {
1625		fuse_vnode_setsize(vp, cred, newsize);
1626		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
1627	}
1628	return err;
1629}
1630
1631/*
1632    struct vnop_strategy_args {
1633	struct vnode *a_vp;
1634	struct buf *a_bp;
1635    };
1636*/
1637static int
1638fuse_vnop_strategy(struct vop_strategy_args *ap)
1639{
1640	struct vnode *vp = ap->a_vp;
1641	struct buf *bp = ap->a_bp;
1642
1643	fuse_trace_printf_vnop();
1644
1645	if (!vp || fuse_isdeadfs(vp)) {
1646		bp->b_ioflags |= BIO_ERROR;
1647		bp->b_error = ENXIO;
1648		bufdone(bp);
1649		return ENXIO;
1650	}
1651	if (bp->b_iocmd == BIO_WRITE)
1652		fuse_vnode_refreshsize(vp, NOCRED);
1653
1654	(void)fuse_io_strategy(vp, bp);
1655
1656	/*
1657	 * This is a dangerous function. If returns error, that might mean a
1658	 * panic. We prefer pretty much anything over being forced to panic
1659	 * by a malicious daemon (a demon?). So we just return 0 anyway. You
1660	 * should never mind this: this function has its own error
1661	 * propagation mechanism via the argument buffer, so
1662	 * not-that-melodramatic residents of the call chain still will be
1663	 * able to know what to do.
1664	 */
1665	return 0;
1666}
1667
1668
1669/*
1670    struct vnop_symlink_args {
1671	struct vnode *a_dvp;
1672	struct vnode **a_vpp;
1673	struct componentname *a_cnp;
1674	struct vattr *a_vap;
1675	char *a_target;
1676    };
1677*/
1678static int
1679fuse_vnop_symlink(struct vop_symlink_args *ap)
1680{
1681	struct vnode *dvp = ap->a_dvp;
1682	struct vnode **vpp = ap->a_vpp;
1683	struct componentname *cnp = ap->a_cnp;
1684	char *target = ap->a_target;
1685
1686	struct fuse_dispatcher fdi;
1687
1688	int err;
1689	size_t len;
1690
1691	FS_DEBUG2G("inode=%ju name=%*s\n",
1692	    (uintmax_t)VTOI(dvp), (int)cnp->cn_namelen, cnp->cn_nameptr);
1693
1694	if (fuse_isdeadfs(dvp)) {
1695		return ENXIO;
1696	}
1697	/*
1698         * Unlike the other creator type calls, here we have to create a message
1699         * where the name of the new entry comes first, and the data describing
1700         * the entry comes second.
1701         * Hence we can't rely on our handy fuse_internal_newentry() routine,
1702         * but put together the message manually and just call the core part.
1703         */
1704
1705	len = strlen(target) + 1;
1706	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
1707	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
1708
1709	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
1710	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
1711	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
1712
1713	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
1714	fdisp_destroy(&fdi);
1715	return err;
1716}
1717
1718/*
1719    struct vnop_write_args {
1720	struct vnode *a_vp;
1721	struct uio *a_uio;
1722	int  a_ioflag;
1723	struct ucred *a_cred;
1724    };
1725*/
1726static int
1727fuse_vnop_write(struct vop_write_args *ap)
1728{
1729	struct vnode *vp = ap->a_vp;
1730	struct uio *uio = ap->a_uio;
1731	int ioflag = ap->a_ioflag;
1732	struct ucred *cred = ap->a_cred;
1733
1734	fuse_trace_printf_vnop();
1735
1736	if (fuse_isdeadfs(vp)) {
1737		return ENXIO;
1738	}
1739	fuse_vnode_refreshsize(vp, cred);
1740
1741	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
1742		ioflag |= IO_DIRECT;
1743	}
1744
1745	return fuse_io_dispatch(vp, uio, ioflag, cred);
1746}
1747
1748/*
1749    struct vnop_getpages_args {
1750        struct vnode *a_vp;
1751        vm_page_t *a_m;
1752        int a_count;
1753        int a_reqpage;
1754        vm_ooffset_t a_offset;
1755    };
1756*/
1757static int
1758fuse_vnop_getpages(struct vop_getpages_args *ap)
1759{
1760	int i, error, nextoff, size, toff, count, npages;
1761	struct uio uio;
1762	struct iovec iov;
1763	vm_offset_t kva;
1764	struct buf *bp;
1765	struct vnode *vp;
1766	struct thread *td;
1767	struct ucred *cred;
1768	vm_page_t *pages;
1769
1770	FS_DEBUG2G("heh\n");
1771
1772	vp = ap->a_vp;
1773	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
1774	td = curthread;			/* XXX */
1775	cred = curthread->td_ucred;	/* XXX */
1776	pages = ap->a_m;
1777	count = ap->a_count;
1778
1779	if (!fsess_opt_mmap(vnode_mount(vp))) {
1780		FS_DEBUG("called on non-cacheable vnode??\n");
1781		return (VM_PAGER_ERROR);
1782	}
1783	npages = btoc(count);
1784
1785	/*
1786	 * If the requested page is partially valid, just return it and
1787	 * allow the pager to zero-out the blanks.  Partially valid pages
1788	 * can only occur at the file EOF.
1789	 */
1790
1791	VM_OBJECT_WLOCK(vp->v_object);
1792	fuse_vm_page_lock_queues();
1793	if (pages[ap->a_reqpage]->valid != 0) {
1794		for (i = 0; i < npages; ++i) {
1795			if (i != ap->a_reqpage) {
1796				fuse_vm_page_lock(pages[i]);
1797				vm_page_free(pages[i]);
1798				fuse_vm_page_unlock(pages[i]);
1799			}
1800		}
1801		fuse_vm_page_unlock_queues();
1802		VM_OBJECT_WUNLOCK(vp->v_object);
1803		return 0;
1804	}
1805	fuse_vm_page_unlock_queues();
1806	VM_OBJECT_WUNLOCK(vp->v_object);
1807
1808	/*
1809	 * We use only the kva address for the buffer, but this is extremely
1810	 * convienient and fast.
1811	 */
1812	bp = getpbuf(&fuse_pbuf_freecnt);
1813
1814	kva = (vm_offset_t)bp->b_data;
1815	pmap_qenter(kva, pages, npages);
1816	PCPU_INC(cnt.v_vnodein);
1817	PCPU_ADD(cnt.v_vnodepgsin, npages);
1818
1819	iov.iov_base = (caddr_t)kva;
1820	iov.iov_len = count;
1821	uio.uio_iov = &iov;
1822	uio.uio_iovcnt = 1;
1823	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
1824	uio.uio_resid = count;
1825	uio.uio_segflg = UIO_SYSSPACE;
1826	uio.uio_rw = UIO_READ;
1827	uio.uio_td = td;
1828
1829	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
1830	pmap_qremove(kva, npages);
1831
1832	relpbuf(bp, &fuse_pbuf_freecnt);
1833
1834	if (error && (uio.uio_resid == count)) {
1835		FS_DEBUG("error %d\n", error);
1836		VM_OBJECT_WLOCK(vp->v_object);
1837		fuse_vm_page_lock_queues();
1838		for (i = 0; i < npages; ++i) {
1839			if (i != ap->a_reqpage) {
1840				fuse_vm_page_lock(pages[i]);
1841				vm_page_free(pages[i]);
1842				fuse_vm_page_unlock(pages[i]);
1843			}
1844		}
1845		fuse_vm_page_unlock_queues();
1846		VM_OBJECT_WUNLOCK(vp->v_object);
1847		return VM_PAGER_ERROR;
1848	}
1849	/*
1850	 * Calculate the number of bytes read and validate only that number
1851	 * of bytes.  Note that due to pending writes, size may be 0.  This
1852	 * does not mean that the remaining data is invalid!
1853	 */
1854
1855	size = count - uio.uio_resid;
1856	VM_OBJECT_WLOCK(vp->v_object);
1857	fuse_vm_page_lock_queues();
1858	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
1859		vm_page_t m;
1860
1861		nextoff = toff + PAGE_SIZE;
1862		m = pages[i];
1863
1864		if (nextoff <= size) {
1865			/*
1866			 * Read operation filled an entire page
1867			 */
1868			m->valid = VM_PAGE_BITS_ALL;
1869			KASSERT(m->dirty == 0,
1870			    ("fuse_getpages: page %p is dirty", m));
1871		} else if (size > toff) {
1872			/*
1873			 * Read operation filled a partial page.
1874			 */
1875			m->valid = 0;
1876			vm_page_set_valid_range(m, 0, size - toff);
1877			KASSERT(m->dirty == 0,
1878			    ("fuse_getpages: page %p is dirty", m));
1879		} else {
1880			/*
1881			 * Read operation was short.  If no error occured
1882			 * we may have hit a zero-fill section.   We simply
1883			 * leave valid set to 0.
1884			 */
1885			;
1886		}
1887		if (i != ap->a_reqpage)
1888			vm_page_readahead_finish(m);
1889	}
1890	fuse_vm_page_unlock_queues();
1891	VM_OBJECT_WUNLOCK(vp->v_object);
1892	return 0;
1893}
1894
1895/*
1896    struct vnop_putpages_args {
1897        struct vnode *a_vp;
1898        vm_page_t *a_m;
1899        int a_count;
1900        int a_sync;
1901        int *a_rtvals;
1902        vm_ooffset_t a_offset;
1903    };
1904*/
1905static int
1906fuse_vnop_putpages(struct vop_putpages_args *ap)
1907{
1908	struct uio uio;
1909	struct iovec iov;
1910	vm_offset_t kva;
1911	struct buf *bp;
1912	int i, error, npages, count;
1913	off_t offset;
1914	int *rtvals;
1915	struct vnode *vp;
1916	struct thread *td;
1917	struct ucred *cred;
1918	vm_page_t *pages;
1919	vm_ooffset_t fsize;
1920
1921	FS_DEBUG2G("heh\n");
1922
1923	vp = ap->a_vp;
1924	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
1925	fsize = vp->v_object->un_pager.vnp.vnp_size;
1926	td = curthread;			/* XXX */
1927	cred = curthread->td_ucred;	/* XXX */
1928	pages = ap->a_m;
1929	count = ap->a_count;
1930	rtvals = ap->a_rtvals;
1931	npages = btoc(count);
1932	offset = IDX_TO_OFF(pages[0]->pindex);
1933
1934	if (!fsess_opt_mmap(vnode_mount(vp))) {
1935		FS_DEBUG("called on non-cacheable vnode??\n");
1936	}
1937	for (i = 0; i < npages; i++)
1938		rtvals[i] = VM_PAGER_AGAIN;
1939
1940	/*
1941	 * When putting pages, do not extend file past EOF.
1942	 */
1943
1944	if (offset + count > fsize) {
1945		count = fsize - offset;
1946		if (count < 0)
1947			count = 0;
1948	}
1949	/*
1950	 * We use only the kva address for the buffer, but this is extremely
1951	 * convienient and fast.
1952	 */
1953	bp = getpbuf(&fuse_pbuf_freecnt);
1954
1955	kva = (vm_offset_t)bp->b_data;
1956	pmap_qenter(kva, pages, npages);
1957	PCPU_INC(cnt.v_vnodeout);
1958	PCPU_ADD(cnt.v_vnodepgsout, count);
1959
1960	iov.iov_base = (caddr_t)kva;
1961	iov.iov_len = count;
1962	uio.uio_iov = &iov;
1963	uio.uio_iovcnt = 1;
1964	uio.uio_offset = offset;
1965	uio.uio_resid = count;
1966	uio.uio_segflg = UIO_SYSSPACE;
1967	uio.uio_rw = UIO_WRITE;
1968	uio.uio_td = td;
1969
1970	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
1971
1972	pmap_qremove(kva, npages);
1973	relpbuf(bp, &fuse_pbuf_freecnt);
1974
1975	if (!error) {
1976		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
1977
1978		for (i = 0; i < nwritten; i++) {
1979			rtvals[i] = VM_PAGER_OK;
1980			VM_OBJECT_WLOCK(pages[i]->object);
1981			vm_page_undirty(pages[i]);
1982			VM_OBJECT_WUNLOCK(pages[i]->object);
1983		}
1984	}
1985	return rtvals[0];
1986}
1987
1988static const char extattr_namespace_separator = '.';
1989
1990/*
1991    struct vop_getextattr_args {
1992        struct vop_generic_args a_gen;
1993        struct vnode *a_vp;
1994        int a_attrnamespace;
1995        const char *a_name;
1996        struct uio *a_uio;
1997        size_t *a_size;
1998        struct ucred *a_cred;
1999        struct thread *a_td;
2000    };
2001*/
2002static int
2003fuse_vnop_getextattr(struct vop_getextattr_args *ap)
2004{
2005	struct vnode *vp = ap->a_vp;
2006	struct uio *uio = ap->a_uio;
2007	struct fuse_dispatcher fdi = {0};
2008	struct fuse_getxattr_in *get_xattr_in;
2009	struct fuse_getxattr_out *get_xattr_out;
2010	struct mount *mp = vnode_mount(vp);
2011	char *prefix;
2012	size_t len;
2013	char *attr_str;
2014	struct thread *td = ap->a_td;
2015	struct ucred *cred = ap->a_cred;
2016	int err = 0;
2017
2018	fuse_trace_printf_vnop();
2019
2020	if (fuse_isdeadfs(vp))
2021		return ENXIO;
2022
2023	/* Default to looking for user attributes. */
2024	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2025		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2026	else
2027		prefix = EXTATTR_NAMESPACE_USER_STRING;
2028
2029	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2030	    strlen(ap->a_name) + 1;
2031
2032	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
2033	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
2034
2035	get_xattr_in = fdi.indata;
2036	/*
2037	 * Check to see whether we're querying the available size or
2038	 * issuing the actual request.  If we pass in 0, we get back struct
2039	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
2040	 * that much data, without the struct fuse_getxattr_out header.
2041	 */
2042	if (ap->a_size != NULL)
2043		get_xattr_in->size = 0;
2044	else
2045		get_xattr_in->size = uio->uio_resid;
2046
2047	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
2048	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2049	    ap->a_name);
2050
2051	err = fdisp_wait_answ(&fdi);
2052
2053	if (err != 0) {
2054		if (err == ENOSYS)
2055			fsess_set_notimpl(mp, FUSE_GETXATTR);
2056		debug_printf("getxattr: got err=%d from daemon\n", err);
2057		goto out;
2058	}
2059
2060	/*
2061	 * If we get to this point (i.e. no error), we should have a valid
2062	 * answer of some sort.  i.e. non-zero iosize and a valid pointer.
2063	 */
2064	if ((fdi.answ == NULL) || (fdi.iosize == 0)) {
2065		debug_printf("getxattr: err = 0, but answ = %p, iosize = %zu\n",
2066		    fdi.answ, fdi.iosize);
2067		err = EINVAL;
2068		goto out;
2069	}
2070	get_xattr_out = fdi.answ;
2071
2072	if (ap->a_size != NULL) {
2073		*ap->a_size = get_xattr_out->size;
2074	} else if (fdi.iosize > 0) {
2075		err = uiomove(fdi.answ, fdi.iosize, uio);
2076	} else {
2077		err = EINVAL;
2078	}
2079
2080out:
2081	fdisp_destroy(&fdi);
2082	return (err);
2083}
2084
2085/*
2086    struct vop_setextattr_args {
2087        struct vop_generic_args a_gen;
2088        struct vnode *a_vp;
2089        int a_attrnamespace;
2090        const char *a_name;
2091        struct uio *a_uio;
2092        struct ucred *a_cred;
2093        struct thread *a_td;
2094    };
2095*/
2096static int
2097fuse_vnop_setextattr(struct vop_setextattr_args *ap)
2098{
2099	struct vnode *vp = ap->a_vp;
2100	struct uio *uio = ap->a_uio;
2101	struct fuse_dispatcher fdi = {0};
2102	struct fuse_setxattr_in *set_xattr_in;
2103	struct mount *mp = vnode_mount(vp);
2104	char *prefix;
2105	size_t len;
2106	char *attr_str;
2107	struct thread *td = ap->a_td;
2108	struct ucred *cred = ap->a_cred;
2109	int err = 0;
2110
2111	fuse_trace_printf_vnop();
2112
2113	if (fuse_isdeadfs(vp))
2114		return ENXIO;
2115
2116	/* Default to looking for user attributes. */
2117	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2118		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2119	else
2120		prefix = EXTATTR_NAMESPACE_USER_STRING;
2121
2122	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2123	    strlen(ap->a_name) + 1;
2124
2125	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
2126	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
2127
2128	set_xattr_in = fdi.indata;
2129	set_xattr_in->size = uio->uio_resid;
2130
2131	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
2132	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2133	    ap->a_name);
2134
2135	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
2136	    uio->uio_resid, uio);
2137	if (err != 0) {
2138		debug_printf("setxattr: got error %d from uiomove\n", err);
2139		goto out;
2140	}
2141
2142	err = fdisp_wait_answ(&fdi);
2143
2144	if (err != 0) {
2145		if (err == ENOSYS)
2146			fsess_set_notimpl(mp, FUSE_SETXATTR);
2147		debug_printf("setxattr: got err=%d from daemon\n", err);
2148		goto out;
2149	}
2150
2151out:
2152	fdisp_destroy(&fdi);
2153	return (err);
2154}
2155
2156/*
2157 * The Linux / FUSE extended attribute list is simply a collection of
2158 * NUL-terminated strings.  The FreeBSD extended attribute list is a single
2159 * byte length followed by a non-NUL terminated string.  So, this allows
2160 * conversion of the Linux / FUSE format to the FreeBSD format in place.
2161 * Linux attribute names are reported with the namespace as a prefix (e.g.
2162 * "user.attribute_name"), but in FreeBSD they are reported without the
2163 * namespace prefix (e.g. "attribute_name").  So, we're going from:
2164 *
2165 * user.attr_name1\0user.attr_name2\0
2166 *
2167 * to:
2168 *
2169 * <num>attr_name1<num>attr_name2
2170 *
2171 * Where "<num>" is a single byte number of characters in the attribute name.
2172 *
2173 * Args:
2174 * prefix - exattr namespace prefix string
2175 * list, list_len - input list with namespace prefixes
2176 * bsd_list, bsd_list_len - output list compatible with bsd vfs
2177 */
2178static int
2179fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
2180    char *bsd_list, int *bsd_list_len)
2181{
2182	int len, pos, dist_to_next, prefix_len;
2183
2184	pos = 0;
2185	*bsd_list_len = 0;
2186	prefix_len = strlen(prefix);
2187
2188	while (pos < list_len && list[pos] != '\0') {
2189		dist_to_next = strlen(&list[pos]) + 1;
2190		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
2191		    list[pos + prefix_len] == extattr_namespace_separator) {
2192			len = dist_to_next -
2193			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
2194			if (len >= EXTATTR_MAXNAMELEN)
2195				return (ENAMETOOLONG);
2196
2197			bsd_list[*bsd_list_len] = len;
2198			memcpy(&bsd_list[*bsd_list_len + 1],
2199			    &list[pos + prefix_len +
2200			    sizeof(extattr_namespace_separator)], len);
2201
2202			*bsd_list_len += len + 1;
2203		}
2204
2205		pos += dist_to_next;
2206	}
2207
2208	return (0);
2209}
2210
2211/*
2212    struct vop_listextattr_args {
2213        struct vop_generic_args a_gen;
2214        struct vnode *a_vp;
2215        int a_attrnamespace;
2216        struct uio *a_uio;
2217        size_t *a_size;
2218        struct ucred *a_cred;
2219        struct thread *a_td;
2220    };
2221*/
2222static int
2223fuse_vnop_listextattr(struct vop_listextattr_args *ap)
2224{
2225	struct vnode *vp = ap->a_vp;
2226	struct uio *uio = ap->a_uio;
2227	struct fuse_dispatcher fdi = {0};
2228	struct fuse_getxattr_in *get_xattr_in;
2229	struct fuse_getxattr_out *get_xattr_out;
2230	struct mount *mp = vnode_mount(vp);
2231	size_t len;
2232	char *prefix;
2233	char *attr_str;
2234	char *bsd_list = NULL;
2235	int bsd_list_len;
2236	struct thread *td = ap->a_td;
2237	struct ucred *cred = ap->a_cred;
2238	int err = 0;
2239
2240	fuse_trace_printf_vnop();
2241
2242	if (fuse_isdeadfs(vp))
2243		return ENXIO;
2244
2245	/*
2246	 * Add space for a NUL and the period separator if enabled.
2247	 * Default to looking for user attributes.
2248	 */
2249	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2250		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2251	else
2252		prefix = EXTATTR_NAMESPACE_USER_STRING;
2253
2254	len = strlen(prefix) + sizeof(extattr_namespace_separator) + 1;
2255
2256	fdisp_init(&fdi, sizeof(*get_xattr_in) + len);
2257	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
2258
2259	get_xattr_in = fdi.indata;
2260	if (ap->a_size != NULL)
2261		get_xattr_in->size = 0;
2262	else
2263		get_xattr_in->size = uio->uio_resid + sizeof(*get_xattr_out);
2264
2265
2266	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
2267	snprintf(attr_str, len, "%s%c", prefix, extattr_namespace_separator);
2268
2269	err = fdisp_wait_answ(&fdi);
2270	if (err != 0) {
2271		if (err == ENOSYS)
2272			fsess_set_notimpl(mp, FUSE_LISTXATTR);
2273		debug_printf("listextattr: got err=%d from daemon\n", err);
2274		goto out;
2275	}
2276
2277	if ((fdi.answ == NULL) || (fdi.iosize == 0)) {
2278		err = EINVAL;
2279		goto out;
2280	}
2281	get_xattr_out = fdi.answ;
2282
2283	if (ap->a_size != NULL) {
2284		*ap->a_size = get_xattr_out->size;
2285	} else if (fdi.iosize > 0) {
2286		/*
2287		 * The Linux / FUSE attribute list format isn't the same
2288		 * as FreeBSD's format.  So we need to transform it into
2289		 * FreeBSD's format before giving it to the user.
2290		 */
2291		bsd_list = malloc(fdi.iosize, M_TEMP, M_WAITOK);
2292		err = fuse_xattrlist_convert(prefix, fdi.answ, fdi.iosize,
2293		    bsd_list, &bsd_list_len);
2294		if (err != 0)
2295			goto out;
2296
2297		err = uiomove(bsd_list, bsd_list_len, uio);
2298	} else {
2299		debug_printf("listextattr: returned iosize %zu for %s attribute list is "
2300		    "too small\n", fdi.iosize, prefix);
2301		err = EINVAL;
2302	}
2303
2304out:
2305	free(bsd_list, M_TEMP);
2306	fdisp_destroy(&fdi);
2307	return (err);
2308}
2309
2310/*
2311    struct vop_deleteextattr_args {
2312        struct vop_generic_args a_gen;
2313        struct vnode *a_vp;
2314        int a_attrnamespace;
2315        const char *a_name;
2316        struct ucred *a_cred;
2317        struct thread *a_td;
2318    };
2319*/
2320static int
2321fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
2322{
2323	struct vnode *vp = ap->a_vp;
2324	struct fuse_dispatcher fdi = {0};
2325	struct mount *mp = vnode_mount(vp);
2326	char *prefix;
2327	size_t len;
2328	char *attr_str;
2329	struct thread *td = ap->a_td;
2330	struct ucred *cred = ap->a_cred;
2331	int err;
2332
2333	fuse_trace_printf_vnop();
2334
2335	if (fuse_isdeadfs(vp))
2336		return ENXIO;
2337
2338	/* Default to looking for user attributes. */
2339	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
2340		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
2341	else
2342		prefix = EXTATTR_NAMESPACE_USER_STRING;
2343
2344	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
2345	    strlen(ap->a_name) + 1;
2346
2347	fdisp_init(&fdi, len);
2348	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
2349
2350	attr_str = fdi.indata;
2351	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
2352	    ap->a_name);
2353
2354	err = fdisp_wait_answ(&fdi);
2355	if (err != 0) {
2356		if (err == ENOSYS)
2357			fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
2358		debug_printf("removexattr: got err=%d from daemon\n", err);
2359	}
2360
2361	fdisp_destroy(&fdi);
2362	return (err);
2363}
2364
2365/*
2366    struct vnop_print_args {
2367        struct vnode *a_vp;
2368    };
2369*/
2370static int
2371fuse_vnop_print(struct vop_print_args *ap)
2372{
2373	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
2374
2375	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
2376	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
2377	    (uintmax_t)fvdat->nlookup,
2378	    fvdat->flag);
2379
2380	return 0;
2381}
2382