1/*  $NetBSD$ */
2
3/*-
4 *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *     notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in the
13 *     documentation and/or other materials provided with the distribution.
14 *
15 *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16 *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19 *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 *  POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <stdio.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <libgen.h>
32#include <errno.h>
33#include <err.h>
34#include <sysexits.h>
35#include <syslog.h>
36#include <puffs.h>
37#include <sys/socket.h>
38#include <sys/socket.h>
39#include <sys/extattr.h>
40#include <sys/time.h>
41#include <machine/vmparam.h>
42
43#include "perfuse_priv.h"
44#include "fuse.h"
45
46extern int perfuse_diagflags;
47
48#if 0
49static void print_node(const char *, puffs_cookie_t);
50#endif
51#ifdef PUFFS_KFLAG_CACHE_FS_TTL
52static void perfuse_newinfo_setttl(struct puffs_newinfo *,
53    struct puffs_node *, struct fuse_entry_out *, struct fuse_attr_out *);
54#endif /* PUFFS_KFLAG_CACHE_FS_TTL */
55static int xchg_msg(struct puffs_usermount *, puffs_cookie_t,
56    perfuse_msg_t *, size_t, enum perfuse_xchg_pb_reply);
57static int mode_access(puffs_cookie_t, const struct puffs_cred *, mode_t);
58static int sticky_access(puffs_cookie_t, struct puffs_node *,
59    const struct puffs_cred *);
60static void fuse_attr_to_vap(struct perfuse_state *,
61    struct vattr *, struct fuse_attr *);
62static int node_lookup_common(struct puffs_usermount *, puffs_cookie_t,
63    struct puffs_newinfo *, const char *, const struct puffs_cred *,
64    struct puffs_node **);
65static int node_mk_common(struct puffs_usermount *, puffs_cookie_t,
66    struct puffs_newinfo *, const struct puffs_cn *pcn, perfuse_msg_t *);
67static uint64_t readdir_last_cookie(struct fuse_dirent *, size_t);
68static ssize_t fuse_to_dirent(struct puffs_usermount *, puffs_cookie_t,
69    struct fuse_dirent *, size_t);
70static void readdir_buffered(puffs_cookie_t, struct dirent *, off_t *,
71    size_t *);
72static void node_ref(puffs_cookie_t);
73static void node_rele(puffs_cookie_t);
74static void requeue_request(struct puffs_usermount *,
75    puffs_cookie_t opc, enum perfuse_qtype);
76static int dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype, int);
77#define DEQUEUE_ALL 0
78
79/*
80 *  From <sys/vnode>, inside #ifdef _KERNEL section
81 */
82#define IO_SYNC		(0x40|IO_DSYNC)
83#define IO_DSYNC	0x00200
84#define IO_DIRECT	0x02000
85
86/*
87 *  From <fcntl>, inside #ifdef _KERNEL section
88 */
89#define F_WAIT		0x010
90#define F_FLOCK		0x020
91#define OFLAGS(fflags)  ((fflags) - 1)
92
93/*
94 * Borrowed from src/sys/kern/vfs_subr.c and src/sys/sys/vnode.h
95 */
96const enum vtype iftovt_tab[16] = {
97	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
98        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
99};
100const int vttoif_tab[9] = {
101	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
102        S_IFSOCK, S_IFIFO, S_IFMT,
103};
104
105#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
106#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
107
108#if 0
109static void
110print_node(const char *func, puffs_cookie_t opc)
111{
112	struct puffs_node *pn;
113	struct perfuse_node_data *pnd;
114	struct vattr *vap;
115
116	pn = (struct puffs_node *)opc;
117	pnd = PERFUSE_NODE_DATA(opc);
118	vap = &pn->pn_va;
119
120	printf("%s: \"%s\", opc = %p, nodeid = 0x%"PRIx64" ino = %"PRIu64"\n",
121	       func, pnd->pnd_name, opc, pnd->pnd_nodeid, vap->va_fileid);
122
123	return;
124}
125#endif /* PERFUSE_DEBUG */
126
127int
128perfuse_node_close_common(struct puffs_usermount *pu, puffs_cookie_t opc,
129	int mode)
130{
131	struct perfuse_state *ps;
132	perfuse_msg_t *pm;
133	int op;
134	uint64_t fh;
135	struct fuse_release_in *fri;
136	struct perfuse_node_data *pnd;
137	struct puffs_node *pn;
138	int error;
139
140	ps = puffs_getspecific(pu);
141	pn = (struct puffs_node *)opc;
142	pnd = PERFUSE_NODE_DATA(pn);
143
144	if (puffs_pn_getvap(pn)->va_type == VDIR) {
145		op = FUSE_RELEASEDIR;
146		mode = FREAD;
147	} else {
148		op = FUSE_RELEASE;
149	}
150
151	/*
152	 * Destroy the filehandle before sending the
153	 * request to the FUSE filesystem, otherwise
154	 * we may get a second close() while we wait
155	 * for the reply, and we would end up closing
156	 * the same fh twice instead of closng both.
157	 */
158	fh = perfuse_get_fh(opc, mode);
159	perfuse_destroy_fh(pn, fh);
160
161	/*
162	 * release_flags may be set to FUSE_RELEASE_FLUSH
163	 * to flush locks. lock_owner must be set in that case
164	 *
165	 * ps_new_msg() is called with NULL creds, which will
166	 * be interpreted as FUSE superuser. We come here from the
167	 * inactive method, which provides no creds, but obviously
168	 * runs with kernel privilege.
169	 */
170	pm = ps->ps_new_msg(pu, opc, op, sizeof(*fri), NULL);
171	fri = GET_INPAYLOAD(ps, pm, fuse_release_in);
172	fri->fh = fh;
173	fri->flags = 0;
174	fri->release_flags = 0;
175	fri->lock_owner = pnd->pnd_lock_owner;
176	fri->flags = (fri->lock_owner != 0) ? FUSE_RELEASE_FLUSH : 0;
177
178#ifdef PERFUSE_DEBUG
179	if (perfuse_diagflags & PDF_FH)
180		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
181			 __func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
182#endif
183
184	if ((error = xchg_msg(pu, opc, pm,
185			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
186		DERRX(EX_SOFTWARE, "%s: freed fh = 0x%"PRIx64" but filesystem "
187		      "returned error = %d", __func__, fh, error);
188
189	ps->ps_destroy_msg(pm);
190
191	return 0;
192}
193
194static int
195xchg_msg(struct puffs_usermount *pu, puffs_cookie_t opc, perfuse_msg_t *pm,
196	size_t len, enum perfuse_xchg_pb_reply wait)
197{
198	struct perfuse_state *ps;
199	struct perfuse_node_data *pnd;
200	struct perfuse_trace *pt = NULL;
201	int error;
202
203	ps = puffs_getspecific(pu);
204	pnd = NULL;
205	if ((struct puffs_node *)opc != NULL)
206		pnd = PERFUSE_NODE_DATA(opc);
207
208#ifdef PERFUSE_DEBUG
209	if ((perfuse_diagflags & PDF_FILENAME) && (opc != 0))
210		DPRINTF("file = \"%s\", ino = %"PRIu64" flags = 0x%x\n",
211			perfuse_node_path(ps, opc),
212			((struct puffs_node *)opc)->pn_va.va_fileid,
213			PERFUSE_NODE_DATA(opc)->pnd_flags);
214#endif
215	ps->ps_xchgcount++;
216	if (pnd)
217		pnd->pnd_inxchg++;
218
219	/*
220	 * Record FUSE call start if requested
221	 */
222	if (perfuse_diagflags & PDF_TRACE)
223		pt = perfuse_trace_begin(ps, opc, pm);
224
225	/*
226	 * Do actual FUSE exchange
227	 */
228	if ((error = ps->ps_xchg_msg(pu, pm, len, wait)) != 0)
229		ps->ps_destroy_msg(pm);
230
231	/*
232	 * Record FUSE call end if requested
233	 */
234	if (pt != NULL)
235		perfuse_trace_end(ps, pt, error);
236
237	ps->ps_xchgcount--;
238	if (pnd) {
239		pnd->pnd_inxchg--;
240		(void)dequeue_requests(opc, PCQ_AFTERXCHG, DEQUEUE_ALL);
241	}
242
243	return error;
244}
245
246static int
247mode_access(puffs_cookie_t opc, const struct puffs_cred *pcr, mode_t mode)
248{
249	struct puffs_node *pn;
250	struct vattr *va;
251
252	/*
253	 * pcr is NULL for self open through fsync or readdir.
254	 * In both case, access control is useless, as it was
255	 * done before, at open time.
256	 */
257	if (pcr == NULL)
258		return 0;
259
260	pn = (struct puffs_node *)opc;
261	va = puffs_pn_getvap(pn);
262	return puffs_access(va->va_type, va->va_mode,
263			    va->va_uid, va->va_gid,
264			    mode, pcr);
265}
266
267static int
268sticky_access(puffs_cookie_t opc, struct puffs_node *targ,
269	      const struct puffs_cred *pcr)
270{
271	uid_t uid;
272	int sticky, owner;
273
274	/*
275	 * This covers the case where the kernel requests a DELETE
276	 * or RENAME on its own, and where puffs_cred_getuid would
277	 * return -1. While such a situation should not happen,
278	 * we allow it here.
279	 *
280	 * This also allows root to tamper with other users' files
281	 * that have the sticky bit.
282	 */
283	if (puffs_cred_isjuggernaut(pcr))
284		return 0;
285
286	if (puffs_cred_getuid(pcr, &uid) != 0)
287		DERRX(EX_SOFTWARE, "puffs_cred_getuid fails in %s", __func__);
288
289	sticky = puffs_pn_getvap(opc)->va_mode & S_ISTXT;
290	owner = puffs_pn_getvap(targ)->va_uid == uid;
291
292	if (sticky && !owner)
293		return EACCES;
294
295	return 0;
296}
297
298
299static void
300fuse_attr_to_vap(struct perfuse_state *ps, struct vattr *vap,
301	struct fuse_attr *fa)
302{
303	vap->va_type = IFTOVT(fa->mode);
304	vap->va_mode = fa->mode & ALLPERMS;
305	vap->va_nlink = fa->nlink;
306	vap->va_uid = fa->uid;
307	vap->va_gid = fa->gid;
308	vap->va_fsid = (long)ps->ps_fsid;
309	vap->va_fileid = fa->ino;
310	vap->va_size = fa->size;
311	vap->va_blocksize = fa->blksize;
312	vap->va_atime.tv_sec = (time_t)fa->atime;
313	vap->va_atime.tv_nsec = (long) fa->atimensec;
314	vap->va_mtime.tv_sec = (time_t)fa->mtime;
315	vap->va_mtime.tv_nsec = (long)fa->mtimensec;
316	vap->va_ctime.tv_sec = (time_t)fa->ctime;
317	vap->va_ctime.tv_nsec = (long)fa->ctimensec;
318	vap->va_birthtime.tv_sec = 0;
319	vap->va_birthtime.tv_nsec = 0;
320	vap->va_gen = 0;
321	vap->va_flags = 0;
322	vap->va_rdev = fa->rdev;
323	vap->va_bytes = fa->size;
324	vap->va_filerev = (u_quad_t)PUFFS_VNOVAL;
325	vap->va_vaflags = 0;
326
327	if (vap->va_blocksize == 0)
328		vap->va_blocksize = DEV_BSIZE;
329
330	if (vap->va_size == (size_t)PUFFS_VNOVAL) /* XXX */
331		vap->va_size = 0;
332
333	return;
334}
335
336#ifdef PUFFS_KFLAG_CACHE_FS_TTL
337static void
338perfuse_newinfo_setttl(struct puffs_newinfo *pni,
339    struct puffs_node *pn, struct fuse_entry_out *feo,
340    struct fuse_attr_out *fao)
341{
342#ifdef PERFUSE_DEBUG
343	if ((feo == NULL) && (fao == NULL))
344		DERRX(EX_SOFTWARE, "%s: feo and fao NULL", __func__);
345
346	if ((feo != NULL) && (fao != NULL))
347		DERRX(EX_SOFTWARE, "%s: feo and fao != NULL", __func__);
348#endif /* PERFUSE_DEBUG */
349
350	if (fao != NULL) {
351		struct timespec va_ttl;
352
353		va_ttl.tv_sec = fao->attr_valid;
354		va_ttl.tv_nsec = fao->attr_valid_nsec;
355
356		puffs_newinfo_setvattl(pni, &va_ttl);
357	}
358
359	if (feo != NULL) {
360		struct timespec va_ttl;
361		struct timespec cn_ttl;
362		struct timespec now;
363		struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(pn);
364
365		va_ttl.tv_sec = feo->attr_valid;
366		va_ttl.tv_nsec = feo->attr_valid_nsec;
367		cn_ttl.tv_sec = feo->entry_valid;
368		cn_ttl.tv_nsec = feo->entry_valid_nsec;
369
370		puffs_newinfo_setvattl(pni, &va_ttl);
371		puffs_newinfo_setcnttl(pni, &cn_ttl);
372
373		if (clock_gettime(CLOCK_REALTIME, &now) != 0)
374			DERR(EX_OSERR, "clock_gettime failed");
375
376                timespecadd(&now, &cn_ttl, &pnd->pnd_cn_expire);
377	}
378
379	return;
380}
381#endif /* PUFFS_KFLAG_CACHE_FS_TTL */
382
383static int
384node_lookup_common(struct puffs_usermount *pu, puffs_cookie_t opc,
385	struct puffs_newinfo *pni, const char *path,
386	const struct puffs_cred *pcr, struct puffs_node **pnp)
387{
388	struct perfuse_state *ps;
389	struct perfuse_node_data *oldpnd;
390	perfuse_msg_t *pm;
391	struct fuse_entry_out *feo;
392	struct puffs_node *pn;
393	size_t len;
394	int error;
395
396	/*
397	 * Prevent further lookups if the parent was removed
398	 */
399	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
400		return ESTALE;
401
402	if (pnp == NULL)
403		DERRX(EX_SOFTWARE, "pnp must be != NULL");
404
405	ps = puffs_getspecific(pu);
406
407#ifdef PERFUSE_DEBUG
408	if (perfuse_diagflags & PDF_FILENAME)
409		DPRINTF("%s: opc = %p, file = \"%s\" looking up \"%s\"\n",
410			__func__, (void *)opc,
411			perfuse_node_path(ps, opc), path);
412
413	if (strcmp(path, ".") == 0)
414		DERRX(EX_SOFTWARE, "unexpected dot-lookup");
415
416	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_RECLAIMED)
417		DERRX(EX_SOFTWARE,
418		      "looking up reclaimed node opc = %p, name = \"%s\"",
419		      opc, path);
420
421	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_INVALID)
422		DERRX(EX_SOFTWARE,
423		      "looking up freed node opc = %p, name = \"%s\"",
424		      opc, path);
425#endif /* PERFUSE_DEBUG */
426
427	len = strlen(path) + 1;
428	pm = ps->ps_new_msg(pu, opc, FUSE_LOOKUP, len, pcr);
429	(void)strlcpy(_GET_INPAYLOAD(ps, pm, char *), path, len);
430
431	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
432		return error;
433
434	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
435
436	/*
437	 * Check for a known node, not reclaimed, with another name.
438	 * It may have been moved, or we can lookup ../
439	 */
440	if (((oldpnd = perfuse_node_bynodeid(ps, feo->nodeid)) != NULL) &&
441	    !(oldpnd->pnd_flags & PND_RECLAIMED)) {
442		/*
443		 * Save the new node name if not ..
444		 */
445		if (strncmp(path, "..", len) != 0)
446			(void)strlcpy(oldpnd->pnd_name,
447				      path, MAXPATHLEN);
448		pn = oldpnd->pnd_pn;
449
450	} else {
451		pn = perfuse_new_pn(pu, path, opc);
452		PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
453		perfuse_node_cache(ps, pn);
454	}
455
456#ifdef PERFUSE_DEBUG
457	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_RECLAIMED)
458		DERRX(EX_SOFTWARE,
459		      "reclaimed in lookup opc = %p, name = \"%s\", ck = %p",
460		      opc, path, pn);
461
462	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_INVALID)
463		DERRX(EX_SOFTWARE,
464		      "freed in lookup opc = %p, name = \"%s\", ck = %p",
465		      opc, path, pn);
466#endif /* PERFUSE_DEBUG */
467
468	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
469	pn->pn_va.va_gen = (u_long)(feo->generation);
470	PERFUSE_NODE_DATA(pn)->pnd_fuse_nlookup++;
471
472	*pnp = pn;
473
474#ifdef PERFUSE_DEBUG
475	if (perfuse_diagflags & PDF_FILENAME)
476		DPRINTF("%s: opc = %p, looked up opc = %p, "
477			"nodeid = 0x%"PRIx64" file = \"%s\"\n", __func__,
478			(void *)opc, pn, feo->nodeid, path);
479#endif
480
481	if (pni != NULL) {
482#ifdef PUFFS_KFLAG_CACHE_FS_TTL
483		puffs_newinfo_setva(pni, &pn->pn_va);
484		perfuse_newinfo_setttl(pni, pn, feo, NULL);
485#endif /* PUFFS_KFLAG_CACHE_FS_TTL */
486		puffs_newinfo_setcookie(pni, pn);
487		puffs_newinfo_setvtype(pni, pn->pn_va.va_type);
488		puffs_newinfo_setsize(pni, (voff_t)pn->pn_va.va_size);
489		puffs_newinfo_setrdev(pni, pn->pn_va.va_rdev);
490	}
491
492	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_NODELEAK) {
493		PERFUSE_NODE_DATA(pn)->pnd_flags &= ~PND_NODELEAK;
494		ps->ps_nodeleakcount--;
495	}
496
497	ps->ps_destroy_msg(pm);
498
499	return 0;
500}
501
502
503/*
504 * Common code for methods that create objects:
505 * perfuse_node_mkdir
506 * perfuse_node_mknod
507 * perfuse_node_symlink
508 */
509static int
510node_mk_common(struct puffs_usermount *pu, puffs_cookie_t opc,
511	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
512	perfuse_msg_t *pm)
513{
514	struct perfuse_state *ps;
515	struct puffs_node *pn;
516	struct fuse_entry_out *feo;
517	int error;
518
519	ps =  puffs_getspecific(pu);
520
521	if ((error = xchg_msg(pu, opc, pm, sizeof(*feo), wait_reply)) != 0)
522		return error;
523
524	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
525	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
526		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
527
528	pn = perfuse_new_pn(pu, pcn->pcn_name, opc);
529	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
530	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
531	perfuse_node_cache(ps, pn);
532
533	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
534	pn->pn_va.va_gen = (u_long)(feo->generation);
535
536	puffs_newinfo_setcookie(pni, pn);
537#ifdef PUFFS_KFLAG_CACHE_FS_TTL
538	puffs_newinfo_setva(pni, &pn->pn_va);
539	perfuse_newinfo_setttl(pni, pn, feo, NULL);
540#endif /* PUFFS_KFLAG_CACHE_FS_TTL */
541
542
543#ifdef PERFUSE_DEBUG
544	if (perfuse_diagflags & PDF_FILENAME)
545		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
546			"nodeid = 0x%"PRIx64"\n",
547			__func__, (void *)pn, pcn->pcn_name,
548			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid);
549#endif
550	ps->ps_destroy_msg(pm);
551
552	/* Parents is now dirty */
553	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
554
555	return 0;
556}
557
558static uint64_t
559readdir_last_cookie(struct fuse_dirent *fd, size_t fd_len)
560{
561	size_t len;
562	size_t seen = 0;
563	char *ndp;
564
565	do {
566		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
567		seen += len;
568
569		if (seen >= fd_len)
570			break;
571
572		ndp = (char *)(void *)fd + (size_t)len;
573		fd = (struct fuse_dirent *)(void *)ndp;
574	} while (1 /* CONSTCOND */);
575
576	return fd->off;
577}
578
579static ssize_t
580fuse_to_dirent(struct puffs_usermount *pu, puffs_cookie_t opc,
581	struct fuse_dirent *fd, size_t fd_len)
582{
583	struct dirent *dents;
584	size_t dents_len;
585	ssize_t written;
586	uint64_t fd_offset;
587	struct fuse_dirent *fd_base;
588	size_t len;
589
590	fd_base = fd;
591	fd_offset = 0;
592	written = 0;
593	dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
594	dents_len = (size_t)PERFUSE_NODE_DATA(opc)->pnd_dirent_len;
595
596	do {
597		char *ndp;
598		size_t reclen;
599
600		reclen = _DIRENT_RECLEN(dents, fd->namelen);
601
602		/*
603		 * Check we do not overflow the output buffer
604		 * struct fuse_dirent is bigger than struct dirent,
605		 * so we should always use fd_len and never reallocate
606		 * later.
607		 * If we have to reallocate,try to double the buffer
608		 * each time so that we do not have to do it too often.
609		 */
610		if (written + reclen > dents_len) {
611			if (dents_len == 0)
612				dents_len = fd_len;
613			else
614				dents_len =
615				   MAX(2 * dents_len, written + reclen);
616
617			dents = PERFUSE_NODE_DATA(opc)->pnd_dirent;
618			if ((dents = realloc(dents, dents_len)) == NULL)
619				DERR(EX_OSERR, "%s: malloc failed", __func__);
620
621			PERFUSE_NODE_DATA(opc)->pnd_dirent = dents;
622			PERFUSE_NODE_DATA(opc)->pnd_dirent_len = dents_len;
623
624			/*
625			 * (void *) for delint
626			 */
627			ndp = (char *)(void *)dents + written;
628			dents = (struct dirent *)(void *)ndp;
629		}
630
631		/*
632		 * Filesystem was mounted without -o use_ino
633		 * Perform a lookup to find it.
634		 */
635		if (fd->ino == PERFUSE_UNKNOWN_INO) {
636			struct puffs_node *pn;
637
638			if (node_lookup_common(pu, opc, NULL, fd->name,
639					       NULL, &pn) != 0) {
640				DWARNX("node_lookup_common failed");
641			} else {
642				fd->ino = pn->pn_va.va_fileid;
643				(void)perfuse_node_reclaim(pu, pn);
644			}
645		}
646
647		dents->d_fileno = fd->ino;
648		dents->d_reclen = (unsigned short)reclen;
649		dents->d_namlen = fd->namelen;
650		dents->d_type = fd->type;
651		strlcpy(dents->d_name, fd->name, fd->namelen + 1);
652
653#ifdef PERFUSE_DEBUG
654		if (perfuse_diagflags & PDF_READDIR)
655			DPRINTF("%s: translated \"%s\" ino = %"PRIu64"\n",
656				__func__, dents->d_name, dents->d_fileno);
657#endif
658
659		dents = _DIRENT_NEXT(dents);
660		written += reclen;
661
662		/*
663		 * Move to the next record.
664		 * fd->off is not the offset, it is an opaque cookie
665		 * given by the filesystem to keep state across multiple
666		 * readdir() operation.
667		 * Use record alignement instead.
668		 */
669		len = FUSE_DIRENT_ALIGN(sizeof(*fd) + fd->namelen);
670#ifdef PERFUSE_DEBUG
671		if (perfuse_diagflags & PDF_READDIR)
672			DPRINTF("%s: record at %"PRId64"/0x%"PRIx64" "
673				"length = %zd/0x%zx. "
674				"next record at %"PRId64"/0x%"PRIx64" "
675				"max %zd/0x%zx\n",
676				__func__, fd_offset, fd_offset, len, len,
677				fd_offset + len, fd_offset + len,
678				fd_len, fd_len);
679#endif
680		fd_offset += len;
681
682		/*
683		 * Check if next record is still within the packet
684		 * If it is not, we reached the end of the buffer.
685		 */
686		if (fd_offset >= fd_len)
687			break;
688
689		/*
690		 * (void *) for delint
691		 */
692		ndp = (char *)(void *)fd_base + (size_t)fd_offset;
693		fd = (struct fuse_dirent *)(void *)ndp;
694
695	} while (1 /* CONSTCOND */);
696
697	/*
698	 * Adjust the dirent output length
699	 */
700	if (written != -1)
701		PERFUSE_NODE_DATA(opc)->pnd_dirent_len = written;
702
703	return written;
704}
705
706static void
707readdir_buffered(puffs_cookie_t opc, struct dirent *dent, off_t *readoff,
708	size_t *reslen)
709{
710	struct dirent *fromdent;
711	struct perfuse_node_data *pnd;
712	char *ndp;
713
714	pnd = PERFUSE_NODE_DATA(opc);
715
716	while (*readoff < pnd->pnd_dirent_len) {
717		/*
718		 * (void *) for delint
719		 */
720		ndp = (char *)(void *)pnd->pnd_dirent + (size_t)*readoff;
721		fromdent = (struct dirent *)(void *)ndp;
722
723		if (*reslen < _DIRENT_SIZE(fromdent))
724			break;
725
726		memcpy(dent, fromdent, _DIRENT_SIZE(fromdent));
727		*readoff += _DIRENT_SIZE(fromdent);
728		*reslen -= _DIRENT_SIZE(fromdent);
729
730		dent = _DIRENT_NEXT(dent);
731	}
732
733#ifdef PERFUSE_DEBUG
734	if (perfuse_diagflags & PDF_READDIR)
735		DPRINTF("%s: readoff = %"PRId64",  "
736			"pnd->pnd_dirent_len = %"PRId64"\n",
737			__func__, *readoff, pnd->pnd_dirent_len);
738#endif
739	if (*readoff >=  pnd->pnd_dirent_len) {
740		free(pnd->pnd_dirent);
741		pnd->pnd_dirent = NULL;
742		pnd->pnd_dirent_len = 0;
743	}
744
745	return;
746}
747
748
749static void
750node_ref(puffs_cookie_t opc)
751{
752	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
753
754#ifdef PERFUSE_DEBUG
755	if (pnd->pnd_flags & PND_INVALID)
756		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
757#endif /* PERFUSE_DEBUG */
758
759	pnd->pnd_ref++;
760	return;
761}
762
763static void
764node_rele(puffs_cookie_t opc)
765{
766	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
767
768#ifdef PERFUSE_DEBUG
769	if (pnd->pnd_flags & PND_INVALID)
770		DERRX(EX_SOFTWARE, "Use of freed node opc = %p", opc);
771#endif /* PERFUSE_DEBUG */
772
773	pnd->pnd_ref--;
774
775	if (pnd->pnd_ref == 0)
776		(void)dequeue_requests(opc, PCQ_REF, DEQUEUE_ALL);
777
778	return;
779}
780
781static void
782requeue_request(struct puffs_usermount *pu, puffs_cookie_t opc,
783	enum perfuse_qtype type)
784{
785	struct perfuse_cc_queue pcq;
786	struct perfuse_node_data *pnd;
787#ifdef PERFUSE_DEBUG
788	struct perfuse_state *ps;
789
790	ps = perfuse_getspecific(pu);
791#endif
792
793	pnd = PERFUSE_NODE_DATA(opc);
794	pcq.pcq_type = type;
795	pcq.pcq_cc = puffs_cc_getcc(pu);
796	TAILQ_INSERT_TAIL(&pnd->pnd_pcq, &pcq, pcq_next);
797
798#ifdef PERFUSE_DEBUG
799	if (perfuse_diagflags & PDF_REQUEUE)
800		DPRINTF("%s: REQUEUE opc = %p, pcc = %p (%s)\n",
801		        __func__, (void *)opc, pcq.pcq_cc,
802			perfuse_qtypestr[type]);
803#endif
804
805	puffs_cc_yield(pcq.pcq_cc);
806	TAILQ_REMOVE(&pnd->pnd_pcq, &pcq, pcq_next);
807
808#ifdef PERFUSE_DEBUG
809	if (perfuse_diagflags & PDF_REQUEUE)
810		DPRINTF("%s: RESUME opc = %p, pcc = %p (%s)\n",
811		        __func__, (void *)opc, pcq.pcq_cc,
812			perfuse_qtypestr[type]);
813#endif
814
815	return;
816}
817
818static int
819dequeue_requests(puffs_cookie_t opc, enum perfuse_qtype type, int max)
820{
821	struct perfuse_cc_queue *pcq;
822	struct perfuse_node_data *pnd;
823	int dequeued;
824
825	pnd = PERFUSE_NODE_DATA(opc);
826	dequeued = 0;
827	TAILQ_FOREACH(pcq, &pnd->pnd_pcq, pcq_next) {
828		if (pcq->pcq_type != type)
829			continue;
830
831#ifdef PERFUSE_DEBUG
832		if (perfuse_diagflags & PDF_REQUEUE)
833			DPRINTF("%s: SCHEDULE opc = %p, pcc = %p (%s)\n",
834				__func__, (void *)opc, pcq->pcq_cc,
835				 perfuse_qtypestr[type]);
836#endif
837		puffs_cc_schedule(pcq->pcq_cc);
838
839		if (++dequeued == max)
840			break;
841	}
842
843#ifdef PERFUSE_DEBUG
844	if (perfuse_diagflags & PDF_REQUEUE)
845		DPRINTF("%s: DONE  opc = %p\n", __func__, (void *)opc);
846#endif
847
848	return dequeued;
849}
850
851void
852perfuse_fs_init(struct puffs_usermount *pu)
853{
854	struct perfuse_state *ps;
855	perfuse_msg_t *pm;
856	struct fuse_init_in *fii;
857	struct fuse_init_out *fio;
858	int error;
859
860	ps = puffs_getspecific(pu);
861
862        if (puffs_mount(pu, ps->ps_target, ps->ps_mountflags, ps->ps_root) != 0)
863                DERR(EX_OSERR, "%s: puffs_mount failed", __func__);
864
865	/*
866	 * Linux 2.6.34.1 sends theses flags:
867	 * FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC
868	 * FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK
869	 *
870	 * Linux also sets max_readahead at 32 pages (128 kB)
871	 *
872	 * ps_new_msg() is called with NULL creds, which will
873	 * be interpreted as FUSE superuser.
874	 */
875	pm = ps->ps_new_msg(pu, 0, FUSE_INIT, sizeof(*fii), NULL);
876	fii = GET_INPAYLOAD(ps, pm, fuse_init_in);
877	fii->major = FUSE_KERNEL_VERSION;
878	fii->minor = FUSE_KERNEL_MINOR_VERSION;
879	fii->max_readahead = (unsigned int)(32 * sysconf(_SC_PAGESIZE));
880	fii->flags = (FUSE_ASYNC_READ|FUSE_POSIX_LOCKS|FUSE_ATOMIC_O_TRUNC);
881
882	if ((error = xchg_msg(pu, 0, pm, sizeof(*fio), wait_reply)) != 0)
883		DERRX(EX_SOFTWARE, "init message exchange failed (%d)", error);
884
885	fio = GET_OUTPAYLOAD(ps, pm, fuse_init_out);
886	ps->ps_max_readahead = fio->max_readahead;
887	ps->ps_max_write = fio->max_write;
888
889	ps->ps_destroy_msg(pm);
890
891	return;
892}
893
894int
895perfuse_fs_unmount(struct puffs_usermount *pu, int flags)
896{
897	perfuse_msg_t *pm;
898	struct perfuse_state *ps;
899	puffs_cookie_t opc;
900	int error;
901
902	ps = puffs_getspecific(pu);
903	opc = (puffs_cookie_t)puffs_getroot(pu);
904
905	/*
906	 * ps_new_msg() is called with NULL creds, which will
907	 * be interpreted as FUSE superuser.
908	 */
909	pm = ps->ps_new_msg(pu, opc, FUSE_DESTROY, 0, NULL);
910
911	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0){
912		DWARN("unmount %s", ps->ps_target);
913		if (!(flags & MNT_FORCE))
914			return error;
915		else
916			error = 0;
917	} else {
918		ps->ps_destroy_msg(pm);
919	}
920
921	ps->ps_umount(pu);
922
923	if (perfuse_diagflags & PDF_MISC)
924		DPRINTF("%s unmounted, exit\n", ps->ps_target);
925
926	return 0;
927}
928
929int
930perfuse_fs_statvfs(struct puffs_usermount *pu, struct statvfs *svfsb)
931{
932	struct perfuse_state *ps;
933	perfuse_msg_t *pm;
934	puffs_cookie_t opc;
935	struct fuse_statfs_out *fso;
936	int error;
937
938	ps = puffs_getspecific(pu);
939	opc = (puffs_cookie_t)puffs_getroot(pu);
940
941	/*
942	 * ps_new_msg() is called with NULL creds, which will
943	 * be interpreted as FUSE superuser.
944	 */
945	pm = ps->ps_new_msg(pu, opc, FUSE_STATFS, 0, NULL);
946
947	if ((error = xchg_msg(pu, opc, pm, sizeof(*fso), wait_reply)) != 0)
948		return error;
949
950	fso = GET_OUTPAYLOAD(ps, pm, fuse_statfs_out);
951	svfsb->f_flag = ps->ps_mountflags;
952	svfsb->f_bsize = fso->st.bsize;
953	svfsb->f_frsize = fso->st.frsize;
954	svfsb->f_iosize = ((struct puffs_node *)opc)->pn_va.va_blocksize;
955	svfsb->f_blocks = fso->st.blocks;
956	svfsb->f_bfree = fso->st.bfree;
957	svfsb->f_bavail = fso->st.bavail;
958	svfsb->f_bresvd = fso->st.bfree - fso->st.bavail;
959	svfsb->f_files = fso->st.files;
960	svfsb->f_ffree = fso->st.ffree;
961	svfsb->f_favail = fso->st.ffree;/* files not reserved for root */
962	svfsb->f_fresvd = 0;		/* files reserved for root */
963
964	svfsb->f_syncreads = ps->ps_syncreads;
965	svfsb->f_syncwrites = ps->ps_syncwrites;
966
967	svfsb->f_asyncreads = ps->ps_asyncreads;
968	svfsb->f_asyncwrites = ps->ps_asyncwrites;
969
970	(void)memcpy(&svfsb->f_fsidx, &ps->ps_fsid, sizeof(ps->ps_fsid));
971	svfsb->f_fsid = (unsigned long)ps->ps_fsid;
972	svfsb->f_namemax = MAXPATHLEN;	/* XXX */
973	svfsb->f_owner = ps->ps_owner_uid;
974
975	(void)strlcpy(svfsb->f_mntonname, ps->ps_target, _VFS_NAMELEN);
976
977	if (ps->ps_filesystemtype != NULL)
978		(void)strlcpy(svfsb->f_fstypename,
979			      ps->ps_filesystemtype, _VFS_NAMELEN);
980	else
981		(void)strlcpy(svfsb->f_fstypename, "fuse", _VFS_NAMELEN);
982
983	if (ps->ps_source != NULL)
984		strlcpy(svfsb->f_mntfromname, ps->ps_source, _VFS_NAMELEN);
985	else
986		strlcpy(svfsb->f_mntfromname, _PATH_FUSE, _VFS_NAMELEN);
987
988	ps->ps_destroy_msg(pm);
989
990	return 0;
991}
992
993int
994perfuse_fs_sync(struct puffs_usermount *pu, int waitfor,
995	const struct puffs_cred *pcr)
996{
997	/*
998	 * FUSE does not seem to have a FS sync callback.
999	 * Maybe do not even register this callback
1000	 */
1001	return puffs_fsnop_sync(pu, waitfor, pcr);
1002}
1003
1004/* ARGSUSED0 */
1005int
1006perfuse_fs_fhtonode(struct puffs_usermount *pu, void *fid, size_t fidsize,
1007	struct puffs_newinfo *pni)
1008{
1009	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1010	return 0;
1011}
1012
1013/* ARGSUSED0 */
1014int
1015perfuse_fs_nodetofh(struct puffs_usermount *pu, puffs_cookie_t cookie,
1016	void *fid, size_t *fidsize)
1017{
1018	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1019	return 0;
1020}
1021
1022#if 0
1023/* ARGSUSED0 */
1024void
1025perfuse_fs_extattrctl(struct puffs_usermount *pu, int cmd,
1026	puffs_cookie_t *cookie, int flags, int namespace, const char *attrname)
1027{
1028	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
1029	return 0;
1030}
1031#endif /* 0 */
1032
1033/* ARGSUSED0 */
1034void
1035perfuse_fs_suspend(struct puffs_usermount *pu, int status)
1036{
1037	return;
1038}
1039
1040
1041int
1042perfuse_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
1043	struct puffs_newinfo *pni, const struct puffs_cn *pcn)
1044{
1045	struct perfuse_state *ps;
1046	struct puffs_node *pn;
1047	mode_t mode;
1048	int error;
1049
1050	ps = puffs_getspecific(pu);
1051	node_ref(opc);
1052
1053	/*
1054	 * Check permissions
1055	 */
1056	switch(pcn->pcn_nameiop) {
1057	case NAMEI_DELETE: /* FALLTHROUGH */
1058	case NAMEI_RENAME: /* FALLTHROUGH */
1059	case NAMEI_CREATE:
1060		if (pcn->pcn_flags & NAMEI_ISLASTCN)
1061			mode = PUFFS_VEXEC|PUFFS_VWRITE;
1062		else
1063			mode = PUFFS_VEXEC;
1064		break;
1065	case NAMEI_LOOKUP: /* FALLTHROUGH */
1066	default:
1067		mode = PUFFS_VEXEC;
1068		break;
1069	}
1070
1071	if ((error = mode_access(opc, pcn->pcn_cred, mode)) != 0)
1072		goto out;
1073
1074	error = node_lookup_common(pu, (puffs_cookie_t)opc, pni,
1075				   pcn->pcn_name, pcn->pcn_cred, &pn);
1076
1077	if (error != 0)
1078		goto out;
1079
1080	/*
1081	 * Kernel would kill us if the filesystem returned the parent
1082	 * itself. If we want to live, hide that!
1083	 */
1084	if ((opc == (puffs_cookie_t)pn) && (strcmp(pcn->pcn_name, ".") != 0)) {
1085		DERRX(EX_SOFTWARE, "lookup \"%s\" in \"%s\" returned parent",
1086		      pcn->pcn_name, perfuse_node_path(ps, opc));
1087		/* NOTREACHED */
1088		error = ESTALE;
1089		goto out;
1090	}
1091
1092	/*
1093	 * Removed node
1094	 */
1095	if (PERFUSE_NODE_DATA(pn)->pnd_flags & PND_REMOVED) {
1096		error = ENOENT;
1097		goto out;
1098	}
1099
1100	/*
1101	 * Check for sticky bit. Unfortunately there is no way to
1102	 * do this before creating the puffs_node, since we require
1103	 * this operation to get the node owner.
1104	 */
1105	switch (pcn->pcn_nameiop) {
1106	case NAMEI_DELETE: /* FALLTHROUGH */
1107	case NAMEI_RENAME:
1108		error = sticky_access(opc, pn, pcn->pcn_cred);
1109		if (error != 0) {
1110			(void)perfuse_node_reclaim(pu, pn);
1111			goto out;
1112		}
1113		break;
1114	default:
1115		break;
1116	}
1117
1118	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1119
1120	error = 0;
1121
1122out:
1123	node_rele(opc);
1124	return error;
1125}
1126
1127int
1128perfuse_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
1129	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1130	const struct vattr *vap)
1131{
1132	perfuse_msg_t *pm;
1133	struct perfuse_state *ps;
1134	struct fuse_create_in *fci;
1135	struct fuse_entry_out *feo;
1136	struct fuse_open_out *foo;
1137	struct puffs_node *pn;
1138	const char *name;
1139	size_t namelen;
1140	size_t len;
1141	int error;
1142
1143	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1144		return ENOENT;
1145
1146	node_ref(opc);
1147
1148	/*
1149	 * If create is unimplemented: Check that it does not
1150	 * already exists, and if not, do mknod and open
1151	 */
1152	ps = puffs_getspecific(pu);
1153	if (ps->ps_flags & PS_NO_CREAT) {
1154		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1155					   pcn->pcn_cred, &pn);
1156		if (error == 0)	{
1157			(void)perfuse_node_reclaim(pu, pn);
1158			error = EEXIST;
1159			goto out;
1160		}
1161
1162		error = perfuse_node_mknod(pu, opc, pni, pcn, vap);
1163		if (error != 0)
1164			goto out;
1165
1166		error = node_lookup_common(pu, opc, NULL, pcn->pcn_name,
1167					   pcn->pcn_cred, &pn);
1168		if (error != 0)
1169			goto out;
1170
1171		/*
1172		 * FUSE does the open at create time, while
1173		 * NetBSD will open in a subsequent operation.
1174		 * We need to open now, in order to retain FUSE
1175		 * semantics. The calling process will not get
1176		 * a file descriptor before the kernel sends
1177		 * the open operation.
1178		 */
1179		error = perfuse_node_open(pu, (puffs_cookie_t)pn,
1180					  FWRITE, pcn->pcn_cred);
1181		goto out;
1182	}
1183
1184	name = pcn->pcn_name;
1185	namelen = pcn->pcn_namelen + 1;
1186	len = sizeof(*fci) + namelen;
1187
1188	/*
1189	 * flags should use O_WRONLY instead of O_RDWR, but it
1190	 * breaks when the caller tries to read from file.
1191	 *
1192	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1193	 */
1194	pm = ps->ps_new_msg(pu, opc, FUSE_CREATE, len, pcn->pcn_cred);
1195	fci = GET_INPAYLOAD(ps, pm, fuse_create_in);
1196	fci->flags = O_CREAT | O_TRUNC | O_RDWR;
1197	fci->mode = vap->va_mode | VTTOIF(vap->va_type);
1198	fci->umask = 0; 	/* Seems unused by libfuse */
1199	(void)strlcpy((char*)(void *)(fci + 1), name, namelen);
1200
1201	len = sizeof(*feo) + sizeof(*foo);
1202	if ((error = xchg_msg(pu, opc, pm, len, wait_reply)) != 0) {
1203		/*
1204		 * create is unimplmented, remember it for later,
1205		 * and start over using mknod and open instead.
1206		 */
1207		if (error == ENOSYS) {
1208			ps->ps_flags |= PS_NO_CREAT;
1209			error = perfuse_node_create(pu, opc, pni, pcn, vap);
1210		}
1211
1212		goto out;
1213	}
1214
1215	feo = GET_OUTPAYLOAD(ps, pm, fuse_entry_out);
1216	foo = (struct fuse_open_out *)(void *)(feo + 1);
1217	if (feo->nodeid == PERFUSE_UNKNOWN_NODEID)
1218		DERRX(EX_SOFTWARE, "%s: no nodeid", __func__);
1219
1220	/*
1221	 * Save the file handle and inode in node private data
1222	 * so that we can reuse it later
1223	 */
1224	pn = perfuse_new_pn(pu, name, opc);
1225	perfuse_new_fh((puffs_cookie_t)pn, foo->fh, FWRITE);
1226	PERFUSE_NODE_DATA(pn)->pnd_nodeid = feo->nodeid;
1227	PERFUSE_NODE_DATA(pn)->pnd_puffs_nlookup++;
1228	perfuse_node_cache(ps, pn);
1229
1230	fuse_attr_to_vap(ps, &pn->pn_va, &feo->attr);
1231	pn->pn_va.va_gen = (u_long)(feo->generation);
1232
1233	puffs_newinfo_setcookie(pni, pn);
1234#ifdef PUFFS_KFLAG_CACHE_FS_TTL
1235	puffs_newinfo_setva(pni, &pn->pn_va);
1236	perfuse_newinfo_setttl(pni, pn, feo, NULL);
1237#endif /* PUFFS_KFLAG_CACHE_FS_TTL */
1238
1239#ifdef PERFUSE_DEBUG
1240	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1241		DPRINTF("%s: opc = %p, file = \"%s\", flags = 0x%x "
1242			"nodeid = 0x%"PRIx64", wfh = 0x%"PRIx64"\n",
1243			__func__, (void *)pn, pcn->pcn_name,
1244			PERFUSE_NODE_DATA(pn)->pnd_flags, feo->nodeid,
1245			foo->fh);
1246#endif
1247
1248	ps->ps_destroy_msg(pm);
1249	error = 0;
1250
1251out:
1252	node_rele(opc);
1253	return error;
1254}
1255
1256
1257int
1258perfuse_node_mknod(struct puffs_usermount *pu, puffs_cookie_t opc,
1259	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
1260	const struct vattr *vap)
1261{
1262	struct perfuse_state *ps;
1263	perfuse_msg_t *pm;
1264	struct fuse_mknod_in *fmi;
1265	const char* path;
1266	size_t len;
1267	int error;
1268
1269	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1270		return ENOENT;
1271
1272	node_ref(opc);
1273
1274	/*
1275	 * Only superuser can mknod objects other than
1276	 * directories, files, socks, fifo and links.
1277	 *
1278	 * Create an object require -WX permission in the parent directory
1279	 */
1280	switch (vap->va_type) {
1281	case VDIR:	/* FALLTHROUGH */
1282	case VREG:	/* FALLTHROUGH */
1283	case VFIFO:	/* FALLTHROUGH */
1284	case VSOCK:
1285		break;
1286	default:	/* VNON, VBLK, VCHR, VBAD */
1287		if (!puffs_cred_isjuggernaut(pcn->pcn_cred)) {
1288			error = EACCES;
1289			goto out;
1290		}
1291		break;
1292	}
1293
1294
1295	ps = puffs_getspecific(pu);
1296	path = pcn->pcn_name;
1297	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
1298
1299	/*
1300	 * mode must contain file type (ie: S_IFREG), use VTTOIF(vap->va_type)
1301	 */
1302	pm = ps->ps_new_msg(pu, opc, FUSE_MKNOD, len, pcn->pcn_cred);
1303	fmi = GET_INPAYLOAD(ps, pm, fuse_mknod_in);
1304	fmi->mode = vap->va_mode | VTTOIF(vap->va_type);
1305	fmi->rdev = (uint32_t)vap->va_rdev;
1306	fmi->umask = 0; 	/* Seems unused bu libfuse */
1307	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
1308
1309	error = node_mk_common(pu, opc, pni, pcn, pm);
1310
1311out:
1312	node_rele(opc);
1313	return error;
1314}
1315
1316
1317int
1318perfuse_node_open(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1319	const struct puffs_cred *pcr)
1320{
1321	struct perfuse_state *ps;
1322	struct perfuse_node_data *pnd;
1323	perfuse_msg_t *pm;
1324	mode_t fmode;
1325	int op;
1326	struct fuse_open_in *foi;
1327	struct fuse_open_out *foo;
1328	struct puffs_node *pn;
1329	int error;
1330
1331	ps = puffs_getspecific(pu);
1332	pn = (struct puffs_node *)opc;
1333	pnd = PERFUSE_NODE_DATA(opc);
1334	error = 0;
1335
1336	if (pnd->pnd_flags & PND_REMOVED)
1337		return ENOENT;
1338
1339	node_ref(opc);
1340
1341	if (puffs_pn_getvap(pn)->va_type == VDIR)
1342		op = FUSE_OPENDIR;
1343	else
1344		op = FUSE_OPEN;
1345
1346	/*
1347	 * libfuse docs says
1348	 * - O_CREAT and O_EXCL should never be set.
1349	 * - O_TRUNC may be used if mount option atomic_o_trunc is used XXX
1350	 *
1351	 * O_APPEND makes no sense since FUSE always sends
1352	 * the file offset for write operations. If the
1353	 * filesystem uses pwrite(), O_APPEND would cause
1354	 * the offset to be ignored and cause file corruption.
1355	 */
1356	mode &= ~(O_CREAT|O_EXCL|O_APPEND);
1357
1358	/*
1359	 * Do not open twice, and do not reopen for reading
1360	 * if we already have write handle.
1361	 */
1362	if (((mode & FREAD) && (pnd->pnd_flags & PND_RFH)) ||
1363	    ((mode & FREAD) && (pnd->pnd_flags & PND_WFH)) ||
1364	    ((mode & FWRITE) && (pnd->pnd_flags & PND_WFH)))
1365		goto out;
1366
1367	/*
1368	 * Queue open on a node so that we do not open
1369	 * twice. This would be better with read and
1370	 * write distinguished.
1371	 */
1372	while (pnd->pnd_flags & PND_INOPEN)
1373		requeue_request(pu, opc, PCQ_OPEN);
1374	pnd->pnd_flags |= PND_INOPEN;
1375
1376	/*
1377	 * Convert PUFFS mode to FUSE mode: convert FREAD/FWRITE
1378	 * to O_RDONLY/O_WRONLY while perserving the other options.
1379	 */
1380	fmode = mode & ~(FREAD|FWRITE);
1381	fmode |= (mode & FWRITE) ? O_RDWR : O_RDONLY;
1382
1383	pm = ps->ps_new_msg(pu, opc, op, sizeof(*foi), pcr);
1384	foi = GET_INPAYLOAD(ps, pm, fuse_open_in);
1385	foi->flags = fmode;
1386	foi->unused = 0;
1387
1388	if ((error = xchg_msg(pu, opc, pm, sizeof(*foo), wait_reply)) != 0)
1389		goto out;
1390
1391	foo = GET_OUTPAYLOAD(ps, pm, fuse_open_out);
1392
1393	/*
1394	 * Save the file handle in node private data
1395	 * so that we can reuse it later
1396	 */
1397	perfuse_new_fh(opc, foo->fh, mode);
1398
1399#ifdef PERFUSE_DEBUG
1400	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
1401		DPRINTF("%s: opc = %p, file = \"%s\", "
1402			"nodeid = 0x%"PRIx64", %s%sfh = 0x%"PRIx64"\n",
1403			__func__, (void *)opc, perfuse_node_path(ps, opc),
1404			pnd->pnd_nodeid, mode & FREAD ? "r" : "",
1405			mode & FWRITE ? "w" : "", foo->fh);
1406#endif
1407
1408	ps->ps_destroy_msg(pm);
1409out:
1410
1411	pnd->pnd_flags &= ~PND_INOPEN;
1412	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
1413
1414	node_rele(opc);
1415	return error;
1416}
1417
1418/* ARGSUSED0 */
1419int
1420perfuse_node_close(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1421	const struct puffs_cred *pcr)
1422{
1423	struct perfuse_node_data *pnd;
1424
1425	pnd = PERFUSE_NODE_DATA(opc);
1426
1427	if (!(pnd->pnd_flags & PND_OPEN))
1428		return EBADF;
1429
1430	/*
1431	 * Actual close is postponed at inactive time.
1432	 */
1433	return 0;
1434}
1435
1436int
1437perfuse_node_access(struct puffs_usermount *pu, puffs_cookie_t opc, int mode,
1438	const struct puffs_cred *pcr)
1439{
1440	perfuse_msg_t *pm;
1441	struct perfuse_state *ps;
1442	struct fuse_access_in *fai;
1443	int error;
1444
1445	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
1446		return ENOENT;
1447
1448	node_ref(opc);
1449
1450	/*
1451	 * If we previously detected the filesystem does not
1452	 * implement access(), short-circuit the call and skip
1453	 * to libpuffs access() emulation.
1454	 */
1455	ps = puffs_getspecific(pu);
1456	if (ps->ps_flags & PS_NO_ACCESS) {
1457		const struct vattr *vap;
1458
1459		vap = puffs_pn_getvap((struct puffs_node *)opc);
1460
1461		error = puffs_access(IFTOVT(vap->va_mode),
1462				     vap->va_mode & ACCESSPERMS,
1463				     vap->va_uid, vap->va_gid,
1464				     (mode_t)mode, pcr);
1465		goto out;
1466	}
1467
1468	/*
1469	 * Plain access call
1470	 */
1471	pm = ps->ps_new_msg(pu, opc, FUSE_ACCESS, sizeof(*fai), pcr);
1472	fai = GET_INPAYLOAD(ps, pm, fuse_access_in);
1473	fai->mask = 0;
1474	fai->mask |= (mode & PUFFS_VREAD) ? R_OK : 0;
1475	fai->mask |= (mode & PUFFS_VWRITE) ? W_OK : 0;
1476	fai->mask |= (mode & PUFFS_VEXEC) ? X_OK : 0;
1477
1478	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
1479
1480	ps->ps_destroy_msg(pm);
1481
1482	/*
1483	 * If unimplemented, start over with emulation
1484	 */
1485	if (error == ENOSYS) {
1486		ps->ps_flags |= PS_NO_ACCESS;
1487		error = perfuse_node_access(pu, opc, mode, pcr);
1488	}
1489
1490out:
1491	node_rele(opc);
1492	return error;
1493}
1494
1495int
1496perfuse_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1497	struct vattr *vap, const struct puffs_cred *pcr)
1498{
1499	return perfuse_node_getattr_ttl(pu, opc, vap, pcr, NULL);
1500}
1501
1502int
1503perfuse_node_getattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1504	struct vattr *vap, const struct puffs_cred *pcr,
1505	struct timespec *va_ttl)
1506{
1507	perfuse_msg_t *pm = NULL;
1508	struct perfuse_state *ps;
1509	struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
1510	struct fuse_getattr_in *fgi;
1511	struct fuse_attr_out *fao;
1512	int error = 0;
1513
1514	if (pnd->pnd_flags & PND_REMOVED)
1515		return ENOENT;
1516
1517	node_ref(opc);
1518
1519	/*
1520	 * Serialize size access, see comment in perfuse_node_setattr().
1521	 */
1522	while (pnd->pnd_flags & PND_INRESIZE)
1523		requeue_request(pu, opc, PCQ_RESIZE);
1524	pnd->pnd_flags |= PND_INRESIZE;
1525
1526	ps = puffs_getspecific(pu);
1527
1528	/*
1529	 * FUSE_GETATTR_FH must be set in fgi->flags
1530	 * if we use for fgi->fh
1531	 */
1532	pm = ps->ps_new_msg(pu, opc, FUSE_GETATTR, sizeof(*fgi), pcr);
1533	fgi = GET_INPAYLOAD(ps, pm, fuse_getattr_in);
1534	fgi->getattr_flags = 0;
1535	fgi->dummy = 0;
1536	fgi->fh = 0;
1537
1538	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_OPEN) {
1539		fgi->fh = perfuse_get_fh(opc, FREAD);
1540		fgi->getattr_flags |= FUSE_GETATTR_FH;
1541	}
1542
1543#ifdef PERFUSE_DEBUG
1544	if (perfuse_diagflags & PDF_RESIZE)
1545		DPRINTF(">> %s %p %" PRIu64 "\n", __func__, (void *)opc,
1546		    vap->va_size);
1547#endif
1548
1549	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
1550		goto out;
1551
1552	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1553
1554#ifdef PERFUSE_DEBUG
1555	if (perfuse_diagflags & PDF_RESIZE)
1556		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1557		    (void *)opc, vap->va_size, fao->attr.size);
1558#endif
1559
1560	/*
1561	 * We set birthtime, flags, filerev,vaflags to 0.
1562	 * This seems the best bet, since the information is
1563	 * not available from filesystem.
1564	 */
1565	fuse_attr_to_vap(ps, vap, &fao->attr);
1566
1567	if (va_ttl != NULL) {
1568		va_ttl->tv_sec = fao->attr_valid;
1569		va_ttl->tv_nsec = fao->attr_valid_nsec;
1570	}
1571
1572	ps->ps_destroy_msg(pm);
1573	error = 0;
1574out:
1575
1576	pnd->pnd_flags &= ~PND_INRESIZE;
1577	(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1578
1579	node_rele(opc);
1580	return error;
1581}
1582
1583int
1584perfuse_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
1585	const struct vattr *vap, const struct puffs_cred *pcr)
1586{
1587	return perfuse_node_setattr_ttl(pu, opc,
1588					__UNCONST(vap), pcr, NULL, 0);
1589}
1590
1591int
1592perfuse_node_setattr_ttl(struct puffs_usermount *pu, puffs_cookie_t opc,
1593	struct vattr *vap, const struct puffs_cred *pcr,
1594	struct timespec *va_ttl, int xflag)
1595{
1596	perfuse_msg_t *pm;
1597	uint64_t fh;
1598	struct perfuse_state *ps;
1599	struct perfuse_node_data *pnd;
1600	struct fuse_setattr_in *fsi;
1601	struct fuse_attr_out *fao;
1602	struct vattr *old_va;
1603	enum perfuse_xchg_pb_reply reply;
1604	int error;
1605#ifdef PERFUSE_DEBUG
1606	struct vattr *old_vap;
1607	int resize_debug = 0;
1608#endif
1609	ps = puffs_getspecific(pu);
1610	pnd = PERFUSE_NODE_DATA(opc);
1611
1612	/*
1613	 * The only operation we can do once the file is removed
1614	 * is to resize it, and we can do it only if it is open.
1615	 * Do not even send the operation to the filesystem: the
1616	 * file is not there anymore.
1617	 */
1618	if (pnd->pnd_flags & PND_REMOVED) {
1619		if (!(pnd->pnd_flags & PND_OPEN))
1620			return ENOENT;
1621
1622		return 0;
1623	}
1624
1625	old_va = puffs_pn_getvap((struct puffs_node *)opc);
1626
1627	/*
1628	 * Check for permission to change size
1629	 * It is always allowed if we already have a write file handle
1630	 */
1631	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1632	    !(pnd->pnd_flags & PND_WFH) &&
1633	    (error = mode_access(opc, pcr, PUFFS_VWRITE)) != 0)
1634		return error;
1635
1636	/*
1637	 * Check for permission to change dates
1638	 */
1639	if (((vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1640	     (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL)) &&
1641	    (puffs_access_times(old_va->va_uid, old_va->va_gid,
1642				old_va->va_mode, 0, pcr) != 0))
1643		return EACCES;
1644
1645	/*
1646	 * Check for permission to change owner and group
1647	 */
1648	if (((vap->va_uid != (uid_t)PUFFS_VNOVAL) ||
1649	     (vap->va_gid != (gid_t)PUFFS_VNOVAL)) &&
1650	    (puffs_access_chown(old_va->va_uid, old_va->va_gid,
1651				vap->va_uid, vap->va_gid, pcr)) != 0)
1652		return EACCES;
1653
1654	/*
1655	 * Check for permission to change permissions
1656	 */
1657	if ((vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1658	    (puffs_access_chmod(old_va->va_uid, old_va->va_gid,
1659				old_va->va_type, vap->va_mode, pcr)) != 0)
1660		return EACCES;
1661
1662	node_ref(opc);
1663
1664	if (pnd->pnd_flags & PND_WFH)
1665		fh = perfuse_get_fh(opc, FWRITE);
1666	else
1667		fh = FUSE_UNKNOWN_FH;
1668
1669	/*
1670	 * fchmod() sets mode and fh, and it may carry
1671	 * a resize as well. That may break if the
1672	 * filesystem does chmod then resize, and fails
1673	 * because it does not have permission anymore.
1674	 * We work this around by splitting into two setattr.
1675	 */
1676	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1677	    (vap->va_mode != (mode_t)PUFFS_VNOVAL) &&
1678	    (fh != FUSE_UNKNOWN_FH)) {
1679		struct vattr resize_va;
1680
1681		(void)memcpy(&resize_va, vap, sizeof(resize_va));
1682		resize_va.va_mode = (mode_t)PUFFS_VNOVAL;
1683		if ((error = perfuse_node_setattr_ttl(pu, opc, &resize_va,
1684						      pcr, va_ttl, xflag)) != 0)
1685			goto out2;
1686
1687		vap->va_size = (u_quad_t)PUFFS_VNOVAL;
1688	}
1689
1690	pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
1691	fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
1692	fsi->valid = 0;
1693
1694	/*
1695	 * Get a fh if the node is open for writing
1696	 */
1697	if (fh != FUSE_UNKNOWN_FH) {
1698		fsi->fh = fh;
1699		fsi->valid |= FUSE_FATTR_FH;
1700	}
1701
1702
1703	if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
1704		fsi->size = vap->va_size;
1705		fsi->valid |= FUSE_FATTR_SIZE;
1706
1707		/*
1708		 * Serialize anything that can touch file size
1709		 * to avoid reordered GETATTR and SETATTR.
1710		 * Out of order SETATTR can report stale size,
1711		 * which will cause the kernel to truncate the file.
1712		 * XXX Probably useless now we have a lock on GETATTR
1713		 */
1714		while (pnd->pnd_flags & PND_INRESIZE)
1715			requeue_request(pu, opc, PCQ_RESIZE);
1716		pnd->pnd_flags |= PND_INRESIZE;
1717	}
1718
1719	/*
1720 	 * Setting mtime without atime or vice versa leads to
1721	 * dates being reset to Epoch on glusterfs. If one
1722	 * is missing, use the old value.
1723 	 */
1724	if ((vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) ||
1725	    (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL)) {
1726
1727		if (vap->va_atime.tv_sec != (time_t)PUFFS_VNOVAL) {
1728			fsi->atime = vap->va_atime.tv_sec;
1729			fsi->atimensec = (uint32_t)vap->va_atime.tv_nsec;
1730		} else {
1731			fsi->atime = old_va->va_atime.tv_sec;
1732			fsi->atimensec = (uint32_t)old_va->va_atime.tv_nsec;
1733		}
1734
1735		if (vap->va_mtime.tv_sec != (time_t)PUFFS_VNOVAL) {
1736			fsi->mtime = vap->va_mtime.tv_sec;
1737			fsi->mtimensec = (uint32_t)vap->va_mtime.tv_nsec;
1738		} else {
1739			fsi->mtime = old_va->va_mtime.tv_sec;
1740			fsi->mtimensec = (uint32_t)old_va->va_mtime.tv_nsec;
1741		}
1742
1743		fsi->valid |= (FUSE_FATTR_MTIME|FUSE_FATTR_ATIME);
1744	}
1745
1746	if (vap->va_mode != (mode_t)PUFFS_VNOVAL) {
1747		fsi->mode = vap->va_mode;
1748		fsi->valid |= FUSE_FATTR_MODE;
1749	}
1750
1751	if (vap->va_uid != (uid_t)PUFFS_VNOVAL) {
1752		fsi->uid = vap->va_uid;
1753		fsi->valid |= FUSE_FATTR_UID;
1754	}
1755
1756	if (vap->va_gid != (gid_t)PUFFS_VNOVAL) {
1757		fsi->gid = vap->va_gid;
1758		fsi->valid |= FUSE_FATTR_GID;
1759	}
1760
1761	if (pnd->pnd_lock_owner != 0) {
1762		fsi->lock_owner = pnd->pnd_lock_owner;
1763		fsi->valid |= FUSE_FATTR_LOCKOWNER;
1764	}
1765
1766	/*
1767	 * ftruncate() sends only va_size, and metadata cache
1768	 * flush adds va_atime and va_mtime. Some FUSE
1769	 * filesystems will attempt to detect ftruncate by
1770	 * checking for FATTR_SIZE being set without
1771	 * FATTR_UID|FATTR_GID|FATTR_ATIME|FATTR_MTIME|FATTR_MODE
1772	 *
1773	 * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
1774	 * if we suspect a ftruncate().
1775	 */
1776	if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
1777	    ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
1778	     (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
1779	     (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
1780		fsi->atime = 0;
1781		fsi->atimensec = 0;
1782		fsi->mtime = 0;
1783		fsi->mtimensec = 0;
1784		fsi->valid &= ~(FUSE_FATTR_ATIME|FUSE_FATTR_MTIME);
1785	}
1786
1787	/*
1788	 * If nothing remain, discard the operation.
1789	 */
1790	if (!(fsi->valid & (FUSE_FATTR_SIZE|FUSE_FATTR_ATIME|FUSE_FATTR_MTIME|
1791			    FUSE_FATTR_MODE|FUSE_FATTR_UID|FUSE_FATTR_GID))) {
1792		error = 0;
1793		ps->ps_destroy_msg(pm);
1794		goto out;
1795	}
1796
1797#ifdef PERFUSE_DEBUG
1798	old_vap = puffs_pn_getvap((struct puffs_node *)opc);
1799
1800	if ((perfuse_diagflags & PDF_RESIZE) &&
1801	    (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
1802		resize_debug = 1;
1803
1804		DPRINTF(">> %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1805		    (void *)opc,
1806		    puffs_pn_getvap((struct puffs_node *)opc)->va_size,
1807		    fsi->size);
1808	}
1809#endif
1810
1811	/*
1812	 * Do not honour FAF when changing size. How do
1813	 * you want such a thing to work?
1814	 */
1815	reply = wait_reply;
1816#ifdef PUFFS_SETATTR_FAF
1817	if ((xflag & PUFFS_SETATTR_FAF) && !(fsi->valid & FUSE_FATTR_SIZE))
1818		reply = no_reply;
1819#endif
1820	if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), reply)) != 0)
1821		goto out;
1822
1823	if (reply == no_reply)
1824		goto out;
1825
1826	/*
1827	 * Copy back the new values
1828	 */
1829	fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
1830
1831#ifdef PERFUSE_DEBUG
1832	if (resize_debug)
1833		DPRINTF("<< %s %p %" PRIu64 " -> %" PRIu64 "\n", __func__,
1834		    (void *)opc, old_vap->va_size, fao->attr.size);
1835#endif
1836
1837	fuse_attr_to_vap(ps, old_va, &fao->attr);
1838
1839	if (va_ttl != NULL) {
1840		va_ttl->tv_sec = fao->attr_valid;
1841		va_ttl->tv_nsec = fao->attr_valid_nsec;
1842		(void)memcpy(vap, old_va, sizeof(*vap));
1843	}
1844
1845	ps->ps_destroy_msg(pm);
1846	error = 0;
1847
1848out:
1849	if (pnd->pnd_flags & PND_INRESIZE) {
1850		pnd->pnd_flags &= ~PND_INRESIZE;
1851		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
1852	}
1853
1854out2:
1855	node_rele(opc);
1856	return error;
1857}
1858
1859int
1860perfuse_node_poll(struct puffs_usermount *pu, puffs_cookie_t opc, int *events)
1861{
1862	struct perfuse_state *ps;
1863	perfuse_msg_t *pm;
1864	struct fuse_poll_in *fpi;
1865	struct fuse_poll_out *fpo;
1866	int error;
1867
1868	node_ref(opc);
1869	ps = puffs_getspecific(pu);
1870	/*
1871	 * kh is set if FUSE_POLL_SCHEDULE_NOTIFY is set.
1872	 *
1873	 * XXX ps_new_msg() is called with NULL creds, which will
1874	 * be interpreted as FUSE superuser. We have no way to
1875	 * know the requesting process' credential, but since poll
1876	 * is supposed to operate on a file that has been open,
1877	 * permission should have already been checked at open time.
1878	 * That still may breaks on filesystems that provides odd
1879	 * semantics.
1880 	 */
1881	pm = ps->ps_new_msg(pu, opc, FUSE_POLL, sizeof(*fpi), NULL);
1882	fpi = GET_INPAYLOAD(ps, pm, fuse_poll_in);
1883	fpi->fh = perfuse_get_fh(opc, FREAD);
1884	fpi->kh = 0;
1885	fpi->flags = 0;
1886
1887#ifdef PERFUSE_DEBUG
1888	if (perfuse_diagflags & PDF_FH)
1889		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
1890			"fh = 0x%"PRIx64"\n", __func__, (void *)opc,
1891			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fpi->fh);
1892#endif
1893	if ((error = xchg_msg(pu, opc, pm, sizeof(*fpo), wait_reply)) != 0)
1894		goto out;
1895
1896	fpo = GET_OUTPAYLOAD(ps, pm, fuse_poll_out);
1897	*events = fpo->revents;
1898
1899	ps->ps_destroy_msg(pm);
1900	error = 0;
1901
1902out:
1903	node_rele(opc);
1904	return error;
1905}
1906
1907/* ARGSUSED0 */
1908int
1909perfuse_node_mmap(struct puffs_usermount *pu, puffs_cookie_t opc, int flags,
1910	const struct puffs_cred *pcr)
1911{
1912	/*
1913	 * Not implemented anymore in libfuse
1914	 */
1915	return ENOSYS;
1916}
1917
1918/* ARGSUSED2 */
1919int
1920perfuse_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
1921	const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
1922{
1923	int op;
1924	perfuse_msg_t *pm;
1925	struct perfuse_state *ps;
1926	struct perfuse_node_data *pnd;
1927	struct fuse_fsync_in *ffi;
1928	uint64_t fh;
1929	int error = 0;
1930
1931	pm = NULL;
1932	ps = puffs_getspecific(pu);
1933	pnd = PERFUSE_NODE_DATA(opc);
1934
1935	/*
1936	 * No need to sync a removed node
1937	 */
1938	if (pnd->pnd_flags & PND_REMOVED)
1939		return 0;
1940
1941	/*
1942	 * We do not sync closed files. They have been
1943	 * sync at inactive time already.
1944	 */
1945	if (!(pnd->pnd_flags & PND_OPEN))
1946		return 0;
1947
1948	node_ref(opc);
1949
1950	if (puffs_pn_getvap((struct puffs_node *)opc)->va_type == VDIR)
1951		op = FUSE_FSYNCDIR;
1952	else 		/* VREG but also other types such as VLNK */
1953		op = FUSE_FSYNC;
1954
1955	/*
1956	 * Do not sync if there are no change to sync
1957	 * XXX remove that test on files if we implement mmap
1958	 */
1959#ifdef PERFUSE_DEBUG
1960	if (perfuse_diagflags & PDF_SYNC)
1961		DPRINTF("%s: TEST opc = %p, file = \"%s\" is %sdirty\n",
1962			__func__, (void*)opc, perfuse_node_path(ps, opc),
1963			pnd->pnd_flags & PND_DIRTY ? "" : "not ");
1964#endif
1965	if (!(pnd->pnd_flags & PND_DIRTY))
1966		goto out;
1967
1968	/*
1969	 * It seems NetBSD can call fsync without open first
1970	 * glusterfs complain in such a situation:
1971	 * "FSYNC() ERR => -1 (Invalid argument)"
1972	 * The file will be closed at inactive time.
1973	 *
1974	 * We open the directory for reading in order to sync.
1975	 * This sounds rather counterintuitive, but it works.
1976	 */
1977	if (!(pnd->pnd_flags & PND_WFH)) {
1978		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
1979			goto out;
1980	}
1981
1982	if (op == FUSE_FSYNCDIR)
1983		fh = perfuse_get_fh(opc, FREAD);
1984	else
1985		fh = perfuse_get_fh(opc, FWRITE);
1986
1987	/*
1988	 * If fsync_flags  is set, meta data should not be flushed.
1989	 */
1990	pm = ps->ps_new_msg(pu, opc, op, sizeof(*ffi), pcr);
1991	ffi = GET_INPAYLOAD(ps, pm, fuse_fsync_in);
1992	ffi->fh = fh;
1993	ffi->fsync_flags = (flags & FFILESYNC) ? 0 : 1;
1994
1995#ifdef PERFUSE_DEBUG
1996	if (perfuse_diagflags & PDF_FH)
1997		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
1998			__func__, (void *)opc,
1999			PERFUSE_NODE_DATA(opc)->pnd_nodeid, ffi->fh);
2000#endif
2001
2002	if ((error = xchg_msg(pu, opc, pm,
2003			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
2004		goto out;
2005
2006	/*
2007	 * No reply beyond fuse_out_header: nothing to do on success
2008	 * just clear the dirty flag
2009	 */
2010	pnd->pnd_flags &= ~PND_DIRTY;
2011
2012#ifdef PERFUSE_DEBUG
2013	if (perfuse_diagflags & PDF_SYNC)
2014		DPRINTF("%s: CLEAR opc = %p, file = \"%s\"\n",
2015			__func__, (void*)opc, perfuse_node_path(ps, opc));
2016#endif
2017
2018	ps->ps_destroy_msg(pm);
2019	error = 0;
2020
2021out:
2022	/*
2023	 * ENOSYS is not returned to kernel,
2024	 */
2025	if (error == ENOSYS)
2026		error = 0;
2027
2028	node_rele(opc);
2029	return error;
2030}
2031
2032/* ARGSUSED0 */
2033int
2034perfuse_node_seek(struct puffs_usermount *pu, puffs_cookie_t opc,
2035	off_t oldoff, off_t newoff, const struct puffs_cred *pcr)
2036{
2037	return 0;
2038}
2039
2040int
2041perfuse_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
2042	puffs_cookie_t targ, const struct puffs_cn *pcn)
2043{
2044	struct perfuse_state *ps;
2045	struct perfuse_node_data *pnd;
2046	perfuse_msg_t *pm;
2047	char *path;
2048	const char *name;
2049	size_t len;
2050	int error;
2051
2052	pnd = PERFUSE_NODE_DATA(opc);
2053
2054	if ((pnd->pnd_flags & PND_REMOVED) ||
2055	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2056		return ENOENT;
2057
2058#ifdef PERFUSE_DEBUG
2059	if (targ == NULL)
2060		DERRX(EX_SOFTWARE, "%s: targ is NULL", __func__);
2061
2062	if (perfuse_diagflags & (PDF_FH|PDF_FILENAME))
2063		DPRINTF("%s: opc = %p, remove opc = %p, file = \"%s\"\n",
2064			__func__, (void *)opc, (void *)targ, pcn->pcn_name);
2065#endif
2066	node_ref(opc);
2067	node_ref(targ);
2068
2069	/*
2070	 * Await for all operations on the deleted node to drain,
2071	 * as the filesystem may be confused to have it deleted
2072	 * during a getattr
2073	 */
2074	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2075		requeue_request(pu, targ, PCQ_AFTERXCHG);
2076
2077	ps = puffs_getspecific(pu);
2078	pnd = PERFUSE_NODE_DATA(opc);
2079	name = pcn->pcn_name;
2080	len = pcn->pcn_namelen + 1;
2081
2082	pm = ps->ps_new_msg(pu, opc, FUSE_UNLINK, len, pcn->pcn_cred);
2083	path = _GET_INPAYLOAD(ps, pm, char *);
2084	(void)strlcpy(path, name, len);
2085
2086	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2087		goto out;
2088
2089	perfuse_cache_flush(targ);
2090	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2091
2092	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2093		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2094
2095	/*
2096	 * The parent directory needs a sync
2097	 */
2098	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2099
2100#ifdef PERFUSE_DEBUG
2101	if (perfuse_diagflags & PDF_FILENAME)
2102		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2103			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2104			pcn->pcn_name);
2105#endif
2106	ps->ps_destroy_msg(pm);
2107	error = 0;
2108
2109out:
2110	node_rele(opc);
2111	node_rele(targ);
2112	return error;
2113}
2114
2115int
2116perfuse_node_link(struct puffs_usermount *pu, puffs_cookie_t opc,
2117	puffs_cookie_t targ, const struct puffs_cn *pcn)
2118{
2119	struct perfuse_state *ps;
2120	perfuse_msg_t *pm;
2121	const char *name;
2122	size_t len;
2123	struct puffs_node *pn;
2124	struct fuse_link_in *fli;
2125	int error;
2126
2127	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2128		return ENOENT;
2129
2130	node_ref(opc);
2131	node_ref(targ);
2132	ps = puffs_getspecific(pu);
2133	pn = (struct puffs_node *)targ;
2134	name = pcn->pcn_name;
2135	len =  sizeof(*fli) + pcn->pcn_namelen + 1;
2136
2137	pm = ps->ps_new_msg(pu, opc, FUSE_LINK, len, pcn->pcn_cred);
2138	fli = GET_INPAYLOAD(ps, pm, fuse_link_in);
2139	fli->oldnodeid = PERFUSE_NODE_DATA(pn)->pnd_nodeid;
2140	(void)strlcpy((char *)(void *)(fli + 1), name, len - sizeof(*fli));
2141
2142	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2143		goto out;
2144
2145	ps->ps_destroy_msg(pm);
2146	error = 0;
2147
2148out:
2149	node_rele(opc);
2150	node_rele(targ);
2151	return error;
2152}
2153
2154int
2155perfuse_node_rename(struct puffs_usermount *pu, puffs_cookie_t opc,
2156	puffs_cookie_t src, const struct puffs_cn *pcn_src,
2157	puffs_cookie_t targ_dir, puffs_cookie_t targ,
2158	const struct puffs_cn *pcn_targ)
2159{
2160	struct perfuse_state *ps;
2161	struct perfuse_node_data *dstdir_pnd;
2162	perfuse_msg_t *pm;
2163	struct fuse_rename_in *fri;
2164	const char *newname;
2165	const char *oldname;
2166	char *np;
2167	int error;
2168	size_t len;
2169	size_t newname_len;
2170	size_t oldname_len;
2171
2172	if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED) ||
2173	    (PERFUSE_NODE_DATA(src)->pnd_flags & PND_REMOVED) ||
2174	    (PERFUSE_NODE_DATA(targ_dir)->pnd_flags & PND_REMOVED))
2175		return ENOENT;
2176
2177	node_ref(opc);
2178	node_ref(src);
2179
2180	/*
2181	 * Await for all operations on the deleted node to drain,
2182	 * as the filesystem may be confused to have it deleted
2183	 * during a getattr
2184	 */
2185	if ((struct puffs_node *)targ != NULL) {
2186		node_ref(targ);
2187		while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2188			requeue_request(pu, targ, PCQ_AFTERXCHG);
2189	} else {
2190		while (PERFUSE_NODE_DATA(src)->pnd_inxchg)
2191			requeue_request(pu, src, PCQ_AFTERXCHG);
2192	}
2193
2194	ps = puffs_getspecific(pu);
2195	newname =  pcn_targ->pcn_name;
2196	newname_len = pcn_targ->pcn_namelen + 1;
2197	oldname =  pcn_src->pcn_name;
2198	oldname_len = pcn_src->pcn_namelen + 1;
2199
2200	len = sizeof(*fri) + oldname_len + newname_len;
2201	pm = ps->ps_new_msg(pu, opc, FUSE_RENAME, len, pcn_targ->pcn_cred);
2202	fri = GET_INPAYLOAD(ps, pm, fuse_rename_in);
2203	fri->newdir = PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid;
2204	np = (char *)(void *)(fri + 1);
2205	(void)strlcpy(np, oldname, oldname_len);
2206	np += oldname_len;
2207	(void)strlcpy(np, newname, newname_len);
2208
2209	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2210		goto out;
2211
2212
2213	/*
2214	 * Record new parent nodeid
2215	 */
2216	dstdir_pnd = PERFUSE_NODE_DATA(targ_dir);
2217	PERFUSE_NODE_DATA(src)->pnd_parent_nodeid = dstdir_pnd->pnd_nodeid;
2218
2219	if (opc != targ_dir)
2220		dstdir_pnd->pnd_flags |= PND_DIRTY;
2221
2222	if (strcmp(newname, "..") != 0)
2223		(void)strlcpy(PERFUSE_NODE_DATA(src)->pnd_name,
2224		    newname, MAXPATHLEN);
2225	else
2226		PERFUSE_NODE_DATA(src)->pnd_name[0] = 0; /* forget name */
2227
2228	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2229
2230	if ((struct puffs_node *)targ != NULL) {
2231		perfuse_cache_flush(targ);
2232		PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2233	}
2234
2235#ifdef PERFUSE_DEBUG
2236	if (perfuse_diagflags & PDF_FILENAME)
2237		DPRINTF("%s: nodeid = 0x%"PRIx64" file = \"%s\" renamed \"%s\" "
2238			"nodeid = 0x%"PRIx64" -> nodeid = 0x%"PRIx64" \"%s\"\n",
2239	 		__func__, PERFUSE_NODE_DATA(src)->pnd_nodeid,
2240			pcn_src->pcn_name, pcn_targ->pcn_name,
2241			PERFUSE_NODE_DATA(opc)->pnd_nodeid,
2242			PERFUSE_NODE_DATA(targ_dir)->pnd_nodeid,
2243			perfuse_node_path(ps, targ_dir));
2244#endif
2245
2246	ps->ps_destroy_msg(pm);
2247	error = 0;
2248
2249out:
2250	node_rele(opc);
2251	node_rele(src);
2252	if ((struct puffs_node *)targ != NULL)
2253		node_rele(targ);
2254
2255	return error;
2256}
2257
2258int
2259perfuse_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2260	struct puffs_newinfo *pni, const struct puffs_cn *pcn,
2261	const struct vattr *vap)
2262{
2263	struct perfuse_state *ps;
2264	perfuse_msg_t *pm;
2265	struct fuse_mkdir_in *fmi;
2266	const char *path;
2267	size_t len;
2268	int error;
2269
2270	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2271		return ENOENT;
2272
2273	node_ref(opc);
2274	ps = puffs_getspecific(pu);
2275	path = pcn->pcn_name;
2276	len = sizeof(*fmi) + pcn->pcn_namelen + 1;
2277
2278	pm = ps->ps_new_msg(pu, opc, FUSE_MKDIR, len, pcn->pcn_cred);
2279	fmi = GET_INPAYLOAD(ps, pm, fuse_mkdir_in);
2280	fmi->mode = vap->va_mode;
2281	fmi->umask = 0; 	/* Seems unused by libfuse? */
2282	(void)strlcpy((char *)(void *)(fmi + 1), path, len - sizeof(*fmi));
2283
2284	error = node_mk_common(pu, opc, pni, pcn, pm);
2285
2286	node_rele(opc);
2287	return error;
2288}
2289
2290
2291int
2292perfuse_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2293	puffs_cookie_t targ, const struct puffs_cn *pcn)
2294{
2295	struct perfuse_state *ps;
2296	struct perfuse_node_data *pnd;
2297	perfuse_msg_t *pm;
2298	char *path;
2299	const char *name;
2300	size_t len;
2301	int error;
2302
2303	pnd = PERFUSE_NODE_DATA(opc);
2304
2305	if ((pnd->pnd_flags & PND_REMOVED) ||
2306	    (PERFUSE_NODE_DATA(targ)->pnd_flags & PND_REMOVED))
2307		return ENOENT;
2308
2309	node_ref(opc);
2310	node_ref(targ);
2311
2312	/*
2313	 * Await for all operations on the deleted node to drain,
2314	 * as the filesystem may be confused to have it deleted
2315	 * during a getattr
2316	 */
2317	while (PERFUSE_NODE_DATA(targ)->pnd_inxchg)
2318		requeue_request(pu, targ, PCQ_AFTERXCHG);
2319
2320	ps = puffs_getspecific(pu);
2321	name = pcn->pcn_name;
2322	len = pcn->pcn_namelen + 1;
2323
2324	pm = ps->ps_new_msg(pu, opc, FUSE_RMDIR, len, pcn->pcn_cred);
2325	path = _GET_INPAYLOAD(ps, pm, char *);
2326	(void)strlcpy(path, name, len);
2327
2328	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2329		goto out;
2330
2331	perfuse_cache_flush(targ);
2332	PERFUSE_NODE_DATA(targ)->pnd_flags |= PND_REMOVED;
2333
2334	if (!(PERFUSE_NODE_DATA(targ)->pnd_flags & PND_OPEN))
2335		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2);
2336
2337	/*
2338	 * The parent directory needs a sync
2339	 */
2340	PERFUSE_NODE_DATA(opc)->pnd_flags |= PND_DIRTY;
2341
2342#ifdef PERFUSE_DEBUG
2343	if (perfuse_diagflags & PDF_FILENAME)
2344		DPRINTF("%s: remove nodeid = 0x%"PRIx64" file = \"%s\"\n",
2345			__func__, PERFUSE_NODE_DATA(targ)->pnd_nodeid,
2346			perfuse_node_path(ps, targ));
2347#endif
2348	ps->ps_destroy_msg(pm);
2349	error = 0;
2350
2351out:
2352	node_rele(opc);
2353	node_rele(targ);
2354	return error;
2355}
2356
2357/* vap is unused */
2358/* ARGSUSED4 */
2359int
2360perfuse_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2361	struct puffs_newinfo *pni, const struct puffs_cn *pcn_src,
2362	const struct vattr *vap, const char *link_target)
2363{
2364	struct perfuse_state *ps;
2365	perfuse_msg_t *pm;
2366	char *np;
2367	const char *path;
2368	size_t path_len;
2369	size_t linkname_len;
2370	size_t len;
2371	int error;
2372
2373	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2374		return ENOENT;
2375
2376	node_ref(opc);
2377	ps = puffs_getspecific(pu);
2378	path = pcn_src->pcn_name;
2379	path_len = pcn_src->pcn_namelen + 1;
2380	linkname_len = strlen(link_target) + 1;
2381	len = path_len + linkname_len;
2382
2383	pm = ps->ps_new_msg(pu, opc, FUSE_SYMLINK, len, pcn_src->pcn_cred);
2384	np = _GET_INPAYLOAD(ps, pm, char *);
2385	(void)strlcpy(np, path, path_len);
2386	np += path_len;
2387	(void)strlcpy(np, link_target, linkname_len);
2388
2389	error = node_mk_common(pu, opc, pni, pcn_src, pm);
2390
2391	node_rele(opc);
2392	return error;
2393}
2394
2395/* ARGSUSED4 */
2396int
2397perfuse_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
2398	struct dirent *dent, off_t *readoff, size_t *reslen,
2399	const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
2400	size_t *ncookies)
2401{
2402	perfuse_msg_t *pm;
2403	uint64_t fh;
2404	struct perfuse_state *ps;
2405	struct perfuse_node_data *pnd;
2406	struct fuse_read_in *fri;
2407	struct fuse_out_header *foh;
2408	struct fuse_dirent *fd;
2409	size_t foh_len;
2410	int error;
2411	size_t fd_maxlen;
2412
2413	error = 0;
2414	node_ref(opc);
2415	ps = puffs_getspecific(pu);
2416
2417	/*
2418	 * readdir state is kept at node level, and several readdir
2419	 * requests can be issued at the same time on the same node.
2420	 * We need to queue requests so that only one is in readdir
2421	 * code at the same time.
2422	 */
2423	pnd = PERFUSE_NODE_DATA(opc);
2424	while (pnd->pnd_flags & PND_INREADDIR)
2425		requeue_request(pu, opc, PCQ_READDIR);
2426	pnd->pnd_flags |= PND_INREADDIR;
2427
2428#ifdef PERFUSE_DEBUG
2429	if (perfuse_diagflags & PDF_READDIR)
2430		DPRINTF("%s: READDIR opc = %p enter critical section\n",
2431			__func__, (void *)opc);
2432#endif
2433	/*
2434	 * Re-initialize pnd->pnd_fd_cookie on the first readdir for a node
2435	 */
2436	if (*readoff == 0)
2437		pnd->pnd_fd_cookie = 0;
2438
2439	/*
2440	 * Do we already have the data bufered?
2441	 */
2442	if (pnd->pnd_dirent != NULL)
2443		goto out;
2444	pnd->pnd_dirent_len = 0;
2445
2446	/*
2447	 * It seems NetBSD can call readdir without open first
2448	 * libfuse will crash if it is done that way, hence open first.
2449	 */
2450	if (!(pnd->pnd_flags & PND_OPEN)) {
2451		if ((error = perfuse_node_open(pu, opc, FREAD, pcr)) != 0)
2452			goto out;
2453	}
2454
2455	fh = perfuse_get_fh(opc, FREAD);
2456
2457#ifdef PERFUSE_DEBUG
2458	if (perfuse_diagflags & PDF_FH)
2459		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
2460			"rfh = 0x%"PRIx64"\n", __func__, (void *)opc,
2461			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fh);
2462#endif
2463
2464	pnd->pnd_all_fd = NULL;
2465	pnd->pnd_all_fd_len = 0;
2466	fd_maxlen = ps->ps_max_readahead - sizeof(*foh);
2467
2468	do {
2469		size_t fd_len;
2470		char *afdp;
2471
2472		pm = ps->ps_new_msg(pu, opc, FUSE_READDIR, sizeof(*fri), pcr);
2473
2474		/*
2475		 * read_flags, lock_owner and flags are unused in libfuse
2476		 */
2477		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
2478		fri->fh = fh;
2479		fri->offset = pnd->pnd_fd_cookie;
2480		fri->size = (uint32_t)fd_maxlen;
2481		fri->read_flags = 0;
2482		fri->lock_owner = 0;
2483		fri->flags = 0;
2484
2485		if ((error = xchg_msg(pu, opc, pm,
2486				      UNSPEC_REPLY_LEN, wait_reply)) != 0)
2487			goto out;
2488
2489		/*
2490		 * There are many puffs_framebufs calls later,
2491		 * therefore foh will not be valid for a long time.
2492		 * Just get the length and forget it.
2493		 */
2494		foh = GET_OUTHDR(ps, pm);
2495		foh_len = foh->len;
2496
2497		/*
2498		 * Empty read: we reached the end of the buffer.
2499		 */
2500		if (foh_len == sizeof(*foh)) {
2501			ps->ps_destroy_msg(pm);
2502			*eofflag = 1;
2503			break;
2504		}
2505
2506		/*
2507		 * Check for corrupted message.
2508		 */
2509		if (foh_len < sizeof(*foh) + sizeof(*fd)) {
2510			ps->ps_destroy_msg(pm);
2511			DWARNX("readdir reply too short");
2512			error = EIO;
2513			goto out;
2514		}
2515
2516
2517		fd = GET_OUTPAYLOAD(ps, pm, fuse_dirent);
2518		fd_len = foh_len - sizeof(*foh);
2519
2520		pnd->pnd_all_fd = realloc(pnd->pnd_all_fd,
2521					  pnd->pnd_all_fd_len + fd_len);
2522		if (pnd->pnd_all_fd  == NULL)
2523			DERR(EX_OSERR, "%s: malloc failed", __func__);
2524
2525		afdp = (char *)(void *)pnd->pnd_all_fd + pnd->pnd_all_fd_len;
2526		(void)memcpy(afdp, fd, fd_len);
2527
2528		pnd->pnd_all_fd_len += fd_len;
2529
2530		/*
2531		 * The fd->off field is used as a cookie for
2532		 * resuming the next readdir() where this one was left.
2533	 	 */
2534		pnd->pnd_fd_cookie = readdir_last_cookie(fd, fd_len);
2535
2536		ps->ps_destroy_msg(pm);
2537	} while (1 /* CONSTCOND */);
2538
2539	if (pnd->pnd_all_fd != NULL) {
2540		if (fuse_to_dirent(pu, opc, pnd->pnd_all_fd,
2541				   pnd->pnd_all_fd_len) == -1)
2542			error = EIO;
2543	}
2544
2545out:
2546	if (pnd->pnd_all_fd != NULL) {
2547		free(pnd->pnd_all_fd);
2548		pnd->pnd_all_fd = NULL;
2549		pnd->pnd_all_fd_len = 0;
2550	}
2551
2552	if (error == 0)
2553		readdir_buffered(opc, dent, readoff, reslen);
2554
2555	/*
2556	 * Schedule queued readdir requests
2557	 */
2558	pnd->pnd_flags &= ~PND_INREADDIR;
2559	(void)dequeue_requests(opc, PCQ_READDIR, DEQUEUE_ALL);
2560
2561#ifdef PERFUSE_DEBUG
2562	if (perfuse_diagflags & PDF_READDIR)
2563		DPRINTF("%s: READDIR opc = %p exit critical section\n",
2564			__func__, (void *)opc);
2565#endif
2566
2567	node_rele(opc);
2568	return error;
2569}
2570
2571int
2572perfuse_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
2573	const struct puffs_cred *pcr, char *linkname, size_t *linklen)
2574{
2575	struct perfuse_state *ps;
2576	perfuse_msg_t *pm;
2577	int error;
2578	size_t len;
2579	struct fuse_out_header *foh;
2580
2581	if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
2582		return ENOENT;
2583
2584	node_ref(opc);
2585	ps = puffs_getspecific(pu);
2586
2587	pm = ps->ps_new_msg(pu, opc, FUSE_READLINK, 0, pcr);
2588
2589	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2590		goto out;
2591
2592	foh = GET_OUTHDR(ps, pm);
2593	len = foh->len - sizeof(*foh);
2594	if (len > *linklen)
2595		DERRX(EX_PROTOCOL, "path len = %zd too long", len);
2596	if (len == 0)
2597		DERRX(EX_PROTOCOL, "path len = %zd too short", len);
2598
2599	/*
2600	 * FUSE filesystems return a NUL terminated string, we
2601	 * do not want to trailing \0
2602	 */
2603	*linklen = len - 1;
2604	(void)memcpy(linkname, _GET_OUTPAYLOAD(ps, pm, char *), len);
2605
2606	ps->ps_destroy_msg(pm);
2607	error = 0;
2608
2609out:
2610	node_rele(opc);
2611	return error;
2612}
2613
2614int
2615perfuse_node_reclaim(struct puffs_usermount *pu, puffs_cookie_t opc)
2616{
2617	struct perfuse_state *ps;
2618	perfuse_msg_t *pm;
2619	struct perfuse_node_data *pnd;
2620	struct fuse_forget_in *ffi;
2621	int nlookup;
2622	struct timespec now;
2623
2624	if (opc == 0)
2625		return 0;
2626
2627	ps = puffs_getspecific(pu);
2628	pnd = PERFUSE_NODE_DATA(opc);
2629
2630	/*
2631	 * Never forget the root.
2632	 */
2633	if (pnd->pnd_nodeid == FUSE_ROOT_ID)
2634		return 0;
2635
2636	/*
2637	 * There is a race condition between reclaim and lookup.
2638	 * When looking up an already known node, the kernel cannot
2639	 * hold a reference on the result until it gets the PUFFS
2640	 * reply. It mayy therefore reclaim the node after the
2641	 * userland looked it up, and before it gets the reply.
2642	 * On rely, the kernel re-creates the node, but at that
2643	 * time the node has been reclaimed in userland.
2644	 *
2645	 * In order to avoid this, we refuse reclaiming nodes that
2646	 * are too young since the last lookup - and that we do
2647	 * not have removed on our own, of course.
2648	 */
2649	if (clock_gettime(CLOCK_REALTIME, &now) != 0)
2650		DERR(EX_OSERR, "clock_gettime failed");
2651
2652	if (timespeccmp(&pnd->pnd_cn_expire, &now, >) &&
2653	    !(pnd->pnd_flags & PND_REMOVED)) {
2654		if (!(pnd->pnd_flags & PND_NODELEAK)) {
2655			ps->ps_nodeleakcount++;
2656			pnd->pnd_flags |= PND_NODELEAK;
2657		}
2658		DWARNX("possible leaked node:: opc = %p \"%s\"",
2659		       opc, pnd->pnd_name);
2660		return 0;
2661	}
2662
2663	node_ref(opc);
2664	pnd->pnd_flags |= PND_RECLAIMED;
2665	pnd->pnd_puffs_nlookup--;
2666	nlookup = pnd->pnd_puffs_nlookup;
2667
2668#ifdef PERFUSE_DEBUG
2669	if (perfuse_diagflags & PDF_RECLAIM)
2670		DPRINTF("%s (nodeid %"PRId64") reclaimed\n",
2671			perfuse_node_path(ps, opc), pnd->pnd_nodeid);
2672#endif
2673
2674#ifdef PERFUSE_DEBUG
2675	if (perfuse_diagflags & PDF_RECLAIM)
2676		DPRINTF("%s (nodeid %"PRId64") is %sreclaimed, nlookup = %d "
2677			"%s%s%s%s, pending ops:%s%s%s\n",
2678		        perfuse_node_path(ps, opc), pnd->pnd_nodeid,
2679		        pnd->pnd_flags & PND_RECLAIMED ? "" : "not ",
2680			pnd->pnd_puffs_nlookup,
2681			pnd->pnd_flags & PND_OPEN ? "open " : "not open",
2682			pnd->pnd_flags & PND_RFH ? "r" : "",
2683			pnd->pnd_flags & PND_WFH ? "w" : "",
2684			pnd->pnd_flags & PND_BUSY ? "" : " none",
2685			pnd->pnd_flags & PND_INREADDIR ? " readdir" : "",
2686			pnd->pnd_flags & PND_INWRITE ? " write" : "",
2687			pnd->pnd_flags & PND_INOPEN ? " open" : "");
2688#endif
2689	/*
2690	 * Make sure it is not looked up again
2691	 */
2692	if (!(pnd->pnd_flags & PND_REMOVED))
2693		perfuse_cache_flush(opc);
2694
2695	/*
2696	 * Purge any activity on the node, while checking
2697	 * that it remains eligible for a reclaim.
2698	 */
2699	while (pnd->pnd_ref > 1)
2700		requeue_request(pu, opc, PCQ_REF);
2701
2702	/*
2703	 * reclaim cancel?
2704	 */
2705	if (pnd->pnd_puffs_nlookup > nlookup) {
2706		pnd->pnd_flags &= ~PND_RECLAIMED;
2707		perfuse_node_cache(ps, opc);
2708		node_rele(opc);
2709		return 0;
2710	}
2711
2712
2713#ifdef PERFUSE_DEBUG
2714	if ((pnd->pnd_flags & PND_OPEN) ||
2715	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2716		DERRX(EX_SOFTWARE, "%s: opc = %p: still open",
2717		      __func__, opc);
2718
2719	if ((pnd->pnd_flags & PND_BUSY) ||
2720	       !TAILQ_EMPTY(&pnd->pnd_pcq))
2721		DERRX(EX_SOFTWARE, "%s: opc = %p: queued operations",
2722		      __func__, opc);
2723
2724	if (pnd->pnd_inxchg != 0)
2725		DERRX(EX_SOFTWARE, "%s: opc = %p: ongoing operations",
2726		      __func__, opc);
2727#endif
2728
2729	/*
2730	 * Send the FORGET message
2731	 *
2732	 * ps_new_msg() is called with NULL creds, which will
2733	 * be interpreted as FUSE superuser. This is obviously
2734	 * fine since we operate with kernel creds here.
2735	 */
2736	pm = ps->ps_new_msg(pu, opc, FUSE_FORGET,
2737		      sizeof(*ffi), NULL);
2738	ffi = GET_INPAYLOAD(ps, pm, fuse_forget_in);
2739	ffi->nlookup = pnd->pnd_fuse_nlookup;
2740
2741	/*
2742	 * No reply is expected, pm is freed in xchg_msg
2743	 */
2744	(void)xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, no_reply);
2745
2746	perfuse_destroy_pn(pu, opc);
2747
2748	return 0;
2749}
2750
2751int
2752perfuse_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
2753{
2754	struct perfuse_node_data *pnd;
2755	int error;
2756
2757	if (opc == 0)
2758		return 0;
2759
2760	node_ref(opc);
2761	pnd = PERFUSE_NODE_DATA(opc);
2762
2763	if (!(pnd->pnd_flags & (PND_OPEN|PND_REMOVED)))
2764		goto out;
2765
2766	/*
2767	 * Make sure all operation are finished
2768	 * There can be an ongoing write. Other
2769	 * operation wait for all data before
2770	 * the close/inactive.
2771	 */
2772	while (pnd->pnd_flags & PND_INWRITE)
2773		requeue_request(pu, opc, PCQ_AFTERWRITE);
2774
2775	/*
2776	 * The inactive operation may be cancelled,
2777	 * If no open is in progress, set PND_INOPEN
2778	 * so that a new open will be queued.
2779	 */
2780	if (pnd->pnd_flags & PND_INOPEN)
2781		goto out;
2782
2783	pnd->pnd_flags |= PND_INOPEN;
2784
2785	/*
2786	 * Sync data
2787	 */
2788	if (pnd->pnd_flags & PND_DIRTY) {
2789		if ((error = perfuse_node_fsync(pu, opc, NULL, 0, 0, 0)) != 0)
2790			DWARN("%s: perfuse_node_fsync failed error = %d",
2791			      __func__, error);
2792	}
2793
2794
2795	/*
2796	 * Close handles
2797	 */
2798	if (pnd->pnd_flags & PND_WFH) {
2799		if ((error = perfuse_node_close_common(pu, opc, FWRITE)) != 0)
2800			DWARN("%s: close write FH failed error = %d",
2801			      __func__, error);
2802	}
2803
2804	if (pnd->pnd_flags & PND_RFH) {
2805		if ((error = perfuse_node_close_common(pu, opc, FREAD)) != 0)
2806			DWARN("%s: close read FH failed error = %d",
2807			      __func__, error);
2808	}
2809
2810	/*
2811	 * This will cause a reclaim to be sent
2812	 */
2813	if (pnd->pnd_flags & PND_REMOVED)
2814		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
2815
2816	/*
2817	 * Schedule awaiting operations
2818	 */
2819	pnd->pnd_flags &= ~PND_INOPEN;
2820	(void)dequeue_requests(opc, PCQ_OPEN, DEQUEUE_ALL);
2821
2822	/*
2823	 * errors are ignored, since the kernel ignores the return code.
2824	 */
2825out:
2826	node_rele(opc);
2827	return 0;
2828}
2829
2830
2831/* ARGSUSED0 */
2832int
2833perfuse_node_print(struct puffs_usermount *pu, puffs_cookie_t opc)
2834{
2835	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2836	return 0;
2837}
2838
2839/* ARGSUSED0 */
2840int
2841perfuse_node_pathconf(struct puffs_usermount *pu, puffs_cookie_t opc,
2842	int name, int *retval)
2843{
2844	DERRX(EX_SOFTWARE, "%s: UNIMPLEMENTED (FATAL)", __func__);
2845	return 0;
2846}
2847
2848int
2849perfuse_node_advlock(struct puffs_usermount *pu, puffs_cookie_t opc,
2850	void *id, int op, struct flock *fl, int flags)
2851{
2852	struct perfuse_state *ps;
2853	int fop;
2854	perfuse_msg_t *pm;
2855	uint64_t fh;
2856	struct fuse_lk_in *fli;
2857	struct fuse_out_header *foh;
2858	struct fuse_lk_out *flo;
2859	uint32_t owner;
2860	size_t len;
2861	int error;
2862
2863	node_ref(opc);
2864
2865	/*
2866	 * Make sure we do have a filehandle, as the FUSE filesystem
2867	 * expect one. E.g.: if we provide none, GlusterFS logs an error
2868	 * "0-glusterfs-fuse: xl is NULL"
2869	 *
2870	 * We need the read file handle if the file is open read only,
2871	 * in order to support shared locks on read-only files.
2872	 * NB: The kernel always sends advlock for read-only
2873	 * files at exit time when the process used lock, see
2874	 * sys_exit -> exit1 -> fd_free -> fd_close -> VOP_ADVLOCK
2875	 */
2876	if ((fh = perfuse_get_fh(opc, FREAD)) == FUSE_UNKNOWN_FH) {
2877		error = EBADF;
2878		goto out;
2879	}
2880
2881	ps = puffs_getspecific(pu);
2882
2883	if (op == F_GETLK)
2884		fop = FUSE_GETLK;
2885	else
2886		fop = (flags & F_WAIT) ? FUSE_SETLKW : FUSE_SETLK;
2887
2888	/*
2889	 * XXX ps_new_msg() is called with NULL creds, which will
2890	 * be interpreted as FUSE superuser. We have no way to
2891	 * know the requesting process' credential, but since advlock()
2892	 * is supposed to operate on a file that has been open(),
2893	 * permission should have already been checked at open() time.
2894	 */
2895	pm = ps->ps_new_msg(pu, opc, fop, sizeof(*fli), NULL);
2896	fli = GET_INPAYLOAD(ps, pm, fuse_lk_in);
2897	fli->fh = fh;
2898	fli->owner = (uint64_t)(vaddr_t)id;
2899	fli->lk.start = fl->l_start;
2900	fli->lk.end = fl->l_start + fl->l_len;
2901	fli->lk.type = fl->l_type;
2902	fli->lk.pid = fl->l_pid;
2903	fli->lk_flags = (flags & F_FLOCK) ? FUSE_LK_FLOCK : 0;
2904
2905	owner = (uint32_t)(vaddr_t)id;
2906
2907#ifdef PERFUSE_DEBUG
2908	if (perfuse_diagflags & PDF_FH)
2909		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
2910			__func__, (void *)opc,
2911			PERFUSE_NODE_DATA(opc)->pnd_nodeid, fli->fh);
2912#endif
2913
2914	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
2915		goto out;
2916
2917	foh = GET_OUTHDR(ps, pm);
2918	len = foh->len - sizeof(*foh);
2919
2920	/*
2921	 * Save or clear the lock
2922	 */
2923	switch (op) {
2924	case F_GETLK:
2925		if (len != sizeof(*flo))
2926			DERRX(EX_SOFTWARE,
2927			      "%s: Unexpected lock reply len %zd",
2928			      __func__, len);
2929
2930		flo = GET_OUTPAYLOAD(ps, pm, fuse_lk_out);
2931		fl->l_start = flo->lk.start;
2932		fl->l_len = flo->lk.end - flo->lk.start;
2933		fl->l_pid = flo->lk.pid;
2934		fl->l_type = flo->lk.type;
2935		fl->l_whence = SEEK_SET;	/* libfuse hardcodes it */
2936
2937		PERFUSE_NODE_DATA(opc)->pnd_lock_owner = flo->lk.pid;
2938		break;
2939	case F_UNLCK:
2940		owner = 0;
2941		/* FALLTHROUGH */
2942	case F_SETLK:
2943		/* FALLTHROUGH */
2944	case F_SETLKW:
2945		if (error != 0)
2946			PERFUSE_NODE_DATA(opc)->pnd_lock_owner = owner;
2947
2948		if (len != 0)
2949			DERRX(EX_SOFTWARE,
2950			      "%s: Unexpected unlock reply len %zd",
2951			      __func__, len);
2952
2953		break;
2954	default:
2955		DERRX(EX_SOFTWARE, "%s: Unexpected op %d", __func__, op);
2956		break;
2957	}
2958
2959	ps->ps_destroy_msg(pm);
2960	error = 0;
2961
2962out:
2963	node_rele(opc);
2964	return error;
2965}
2966
2967int
2968perfuse_node_read(struct puffs_usermount *pu, puffs_cookie_t opc, uint8_t *buf,
2969	off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag)
2970{
2971	struct perfuse_state *ps;
2972	struct perfuse_node_data *pnd;
2973	const struct vattr *vap;
2974	perfuse_msg_t *pm;
2975	struct fuse_read_in *fri;
2976	struct fuse_out_header *foh;
2977	size_t readen;
2978	int error;
2979
2980	ps = puffs_getspecific(pu);
2981	pnd = PERFUSE_NODE_DATA(opc);
2982	vap = puffs_pn_getvap((struct puffs_node *)opc);
2983
2984	/*
2985	 * NetBSD turns that into a getdents(2) output
2986	 * We just do a EISDIR as this feature is of little use.
2987	 */
2988	if (vap->va_type == VDIR)
2989		return EISDIR;
2990
2991	if ((u_quad_t)offset + *resid > vap->va_size)
2992		DWARNX("%s %p read %lld@%zu beyond EOF %" PRIu64 "\n",
2993		       __func__, (void *)opc, (long long)offset,
2994		       *resid, vap->va_size);
2995
2996	do {
2997		size_t max_read;
2998
2999		max_read = ps->ps_max_readahead - sizeof(*foh);
3000		/*
3001		 * flags may be set to FUSE_READ_LOCKOWNER
3002		 * if lock_owner is provided.
3003		 */
3004		pm = ps->ps_new_msg(pu, opc, FUSE_READ, sizeof(*fri), pcr);
3005		fri = GET_INPAYLOAD(ps, pm, fuse_read_in);
3006		fri->fh = perfuse_get_fh(opc, FREAD);
3007		fri->offset = offset;
3008		fri->size = (uint32_t)MIN(*resid, max_read);
3009		fri->read_flags = 0; /* XXX Unused by libfuse? */
3010		fri->lock_owner = pnd->pnd_lock_owner;
3011		fri->flags = 0;
3012		fri->flags |= (fri->lock_owner != 0) ? FUSE_READ_LOCKOWNER : 0;
3013
3014#ifdef PERFUSE_DEBUG
3015	if (perfuse_diagflags & PDF_FH)
3016		DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", fh = 0x%"PRIx64"\n",
3017			__func__, (void *)opc, pnd->pnd_nodeid, fri->fh);
3018#endif
3019		error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply);
3020		if (error  != 0)
3021			return error;
3022
3023		foh = GET_OUTHDR(ps, pm);
3024		readen = foh->len - sizeof(*foh);
3025
3026#ifdef PERFUSE_DEBUG
3027		if (readen > *resid)
3028			DERRX(EX_SOFTWARE, "%s: Unexpected big read %zd",
3029			      __func__, readen);
3030#endif
3031
3032		(void)memcpy(buf,  _GET_OUTPAYLOAD(ps, pm, char *), readen);
3033
3034		buf += readen;
3035		offset += readen;
3036		*resid -= readen;
3037
3038		ps->ps_destroy_msg(pm);
3039	} while ((*resid != 0) && (readen != 0));
3040
3041	if (ioflag & (IO_SYNC|IO_DSYNC))
3042		ps->ps_syncreads++;
3043	else
3044		ps->ps_asyncreads++;
3045
3046	return 0;
3047}
3048
3049int
3050perfuse_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
3051	uint8_t *buf, off_t offset, size_t *resid,
3052	const struct puffs_cred *pcr, int ioflag)
3053{
3054	return perfuse_node_write2(pu, opc, buf, offset, resid, pcr, ioflag, 0);
3055}
3056
3057/* ARGSUSED7 */
3058int
3059perfuse_node_write2(struct puffs_usermount *pu, puffs_cookie_t opc,
3060	uint8_t *buf, off_t offset, size_t *resid,
3061	const struct puffs_cred *pcr, int ioflag, int xflag)
3062{
3063	struct perfuse_state *ps;
3064	struct perfuse_node_data *pnd;
3065	struct vattr *vap;
3066	perfuse_msg_t *pm;
3067	struct fuse_write_in *fwi;
3068	struct fuse_write_out *fwo;
3069	size_t data_len;
3070	size_t payload_len;
3071	size_t written;
3072	int inresize;
3073	int error;
3074
3075	ps = puffs_getspecific(pu);
3076	pnd = PERFUSE_NODE_DATA(opc);
3077	vap = puffs_pn_getvap((struct puffs_node *)opc);
3078	written = 0;
3079	inresize = 0;
3080	error = 0;
3081
3082	if (vap->va_type == VDIR)
3083		return EISDIR;
3084
3085	node_ref(opc);
3086
3087	/*
3088	 * We need to queue write requests in order to avoid
3089	 * dequeueing PCQ_AFTERWRITE when there are pending writes.
3090	 */
3091	while (pnd->pnd_flags & PND_INWRITE)
3092		requeue_request(pu, opc, PCQ_WRITE);
3093	pnd->pnd_flags |= PND_INWRITE;
3094
3095	/*
3096	 * Serialize size access, see comment in perfuse_node_setattr().
3097	 */
3098	if ((u_quad_t)offset + *resid > vap->va_size) {
3099		while (pnd->pnd_flags & PND_INRESIZE)
3100			requeue_request(pu, opc, PCQ_RESIZE);
3101		pnd->pnd_flags |= PND_INRESIZE;
3102		inresize = 1;
3103	}
3104
3105	/*
3106	 * append flag: re-read the file size so that
3107	 * we get the latest value.
3108	 */
3109	if (ioflag & PUFFS_IO_APPEND) {
3110		DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
3111
3112		if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
3113			goto out;
3114
3115		offset = vap->va_size;
3116	}
3117
3118#ifdef PERFUSE_DEBUG
3119	if (perfuse_diagflags & PDF_RESIZE)
3120		DPRINTF(">> %s %p %" PRIu64 "\n", __func__,
3121			(void *)opc, vap->va_size);
3122#endif
3123
3124	do {
3125		size_t max_write;
3126		/*
3127		 * There is a writepage flag when data
3128		 * is aligned to page size. Use it for
3129		 * everything but the data after the last
3130		 * page boundary.
3131		 */
3132		max_write = ps->ps_max_write - sizeof(*fwi);
3133
3134		data_len = MIN(*resid, max_write);
3135		if (data_len > (size_t)sysconf(_SC_PAGESIZE))
3136			data_len = data_len & ~(sysconf(_SC_PAGESIZE) - 1);
3137
3138		payload_len = data_len + sizeof(*fwi);
3139
3140		/*
3141		 * flags may be set to FUSE_WRITE_CACHE (XXX usage?)
3142		 * or FUSE_WRITE_LOCKOWNER, if lock_owner is provided.
3143		 * write_flags is set to 1 for writepage.
3144		 */
3145		pm = ps->ps_new_msg(pu, opc, FUSE_WRITE, payload_len, pcr);
3146		fwi = GET_INPAYLOAD(ps, pm, fuse_write_in);
3147		fwi->fh = perfuse_get_fh(opc, FWRITE);
3148		fwi->offset = offset;
3149		fwi->size = (uint32_t)data_len;
3150		fwi->write_flags = (fwi->size % sysconf(_SC_PAGESIZE)) ? 0 : 1;
3151		fwi->lock_owner = pnd->pnd_lock_owner;
3152		fwi->flags = 0;
3153		fwi->flags |= (fwi->lock_owner != 0) ? FUSE_WRITE_LOCKOWNER : 0;
3154		fwi->flags |= (ioflag & IO_DIRECT) ? 0 : FUSE_WRITE_CACHE;
3155		(void)memcpy((fwi + 1), buf, data_len);
3156
3157
3158#ifdef PERFUSE_DEBUG
3159		if (perfuse_diagflags & PDF_FH)
3160			DPRINTF("%s: opc = %p, nodeid = 0x%"PRIx64", "
3161				"fh = 0x%"PRIx64"\n", __func__,
3162				(void *)opc, pnd->pnd_nodeid, fwi->fh);
3163#endif
3164		if ((error = xchg_msg(pu, opc, pm,
3165				      sizeof(*fwo), wait_reply)) != 0)
3166			goto out;
3167
3168		fwo = GET_OUTPAYLOAD(ps, pm, fuse_write_out);
3169		written = fwo->size;
3170		ps->ps_destroy_msg(pm);
3171
3172#ifdef PERFUSE_DEBUG
3173		if (written > *resid)
3174			DERRX(EX_SOFTWARE, "%s: Unexpected big write %zd",
3175			      __func__, written);
3176#endif
3177		*resid -= written;
3178		offset += written;
3179		buf += written;
3180
3181	} while (*resid != 0);
3182
3183	/*
3184	 * puffs_ops(3) says
3185	 *  "everything must be written or an error will be generated"
3186	 */
3187	if (*resid != 0)
3188		error = EFBIG;
3189
3190#ifdef PERFUSE_DEBUG
3191	if (perfuse_diagflags & PDF_RESIZE) {
3192		if (offset > (off_t)vap->va_size)
3193			DPRINTF("<< %s %p %" PRIu64 " -> %lld\n", __func__,
3194				(void *)opc, vap->va_size, (long long)offset);
3195		else
3196			DPRINTF("<< %s %p \n", __func__, (void *)opc);
3197	}
3198#endif
3199
3200	/*
3201	 * Update file size if we wrote beyond the end
3202	 */
3203	if (offset > (off_t)vap->va_size)
3204		vap->va_size = offset;
3205
3206	if (inresize) {
3207#ifdef PERFUSE_DEBUG
3208		if (!(pnd->pnd_flags & PND_INRESIZE))
3209			DERRX(EX_SOFTWARE, "file write grow without resize");
3210#endif
3211		pnd->pnd_flags &= ~PND_INRESIZE;
3212		(void)dequeue_requests(opc, PCQ_RESIZE, DEQUEUE_ALL);
3213	}
3214
3215
3216	/*
3217	 * Statistics
3218	 */
3219	if (ioflag & (IO_SYNC|IO_DSYNC))
3220		ps->ps_syncwrites++;
3221	else
3222		ps->ps_asyncwrites++;
3223
3224	/*
3225	 * Remember to sync the file
3226	 */
3227	pnd->pnd_flags |= PND_DIRTY;
3228
3229#ifdef PERFUSE_DEBUG
3230	if (perfuse_diagflags & PDF_SYNC)
3231		DPRINTF("%s: DIRTY opc = %p, file = \"%s\"\n",
3232			__func__, (void*)opc, perfuse_node_path(ps, opc));
3233#endif
3234
3235out:
3236	/*
3237	 * VOP_PUTPAGE causes FAF write where kernel does not
3238	 * check operation result. At least warn if it failed.
3239	 */
3240#ifdef PUFFS_WRITE_FAF
3241	if (error && (xflag & PUFFS_WRITE_FAF))
3242		DWARN("Data loss caused by FAF write failed on \"%s\"",
3243		      pnd->pnd_name);
3244#endif /* PUFFS_WRITE_FAF */
3245
3246	/*
3247	 * If there are no more queued write, we can resume
3248	 * an operation awaiting write completion.
3249	 */
3250	pnd->pnd_flags &= ~PND_INWRITE;
3251	if (dequeue_requests(opc, PCQ_WRITE, 1) == 0)
3252		(void)dequeue_requests(opc, PCQ_AFTERWRITE, DEQUEUE_ALL);
3253
3254	node_rele(opc);
3255	return error;
3256}
3257
3258/* ARGSUSED0 */
3259void
3260perfuse_cache_write(struct puffs_usermount *pu, puffs_cookie_t opc, size_t size,
3261	struct puffs_cacherun *runs)
3262{
3263	return;
3264}
3265
3266/* ARGSUSED4 */
3267int
3268perfuse_node_getextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3269	int attrns, const char *attrname, size_t *attrsize, uint8_t *attr,
3270	size_t *resid, const struct puffs_cred *pcr)
3271{
3272	struct perfuse_state *ps;
3273	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3274	perfuse_msg_t *pm;
3275	struct fuse_getxattr_in *fgi;
3276	struct fuse_getxattr_out *fgo;
3277	struct fuse_out_header *foh;
3278	size_t attrnamelen;
3279	size_t len;
3280	char *np;
3281	int error;
3282
3283	node_ref(opc);
3284	ps = puffs_getspecific(pu);
3285	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3286	attrnamelen = strlen(attrname) + 1;
3287	len = sizeof(*fgi) + attrnamelen;
3288
3289	pm = ps->ps_new_msg(pu, opc, FUSE_GETXATTR, len, pcr);
3290	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3291	fgi->size = (unsigned int)((resid != NULL) ? *resid : 0);
3292	np = (char *)(void *)(fgi + 1);
3293	(void)strlcpy(np, attrname, attrnamelen);
3294
3295	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3296		goto out;
3297
3298	/*
3299	 * We just get fuse_getattr_out with list size if we requested
3300	 * a null size.
3301	 */
3302	if (resid == NULL) {
3303		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3304
3305		if (attrsize != NULL)
3306			*attrsize = fgo->size;
3307
3308		ps->ps_destroy_msg(pm);
3309		error = 0;
3310		goto out;
3311	}
3312
3313	/*
3314	 * And with a non null requested size, we get the list just
3315	 * after the header
3316	 */
3317	foh = GET_OUTHDR(ps, pm);
3318	np = (char *)(void *)(foh + 1);
3319
3320	if (resid != NULL) {
3321		len = MAX(foh->len - sizeof(*foh), *resid);
3322		(void)memcpy(attr, np, len);
3323		*resid -= len;
3324	}
3325
3326	ps->ps_destroy_msg(pm);
3327	error = 0;
3328
3329out:
3330	node_rele(opc);
3331	return error;
3332}
3333
3334int
3335perfuse_node_setextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3336	int attrns, const char *attrname, uint8_t *attr, size_t *resid,
3337	const struct puffs_cred *pcr)
3338{
3339	struct perfuse_state *ps;
3340	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3341	perfuse_msg_t *pm;
3342	struct fuse_setxattr_in *fsi;
3343	size_t attrnamelen;
3344	size_t len;
3345	char *np;
3346	int error;
3347
3348	node_ref(opc);
3349	ps = puffs_getspecific(pu);
3350	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3351	attrnamelen = strlen(attrname) + 1;
3352	len = sizeof(*fsi) + attrnamelen + *resid;
3353
3354	pm = ps->ps_new_msg(pu, opc, FUSE_SETXATTR, len, pcr);
3355	fsi = GET_INPAYLOAD(ps, pm, fuse_setxattr_in);
3356	fsi->size = (unsigned int)*resid;
3357	fsi->flags = 0;
3358	np = (char *)(void *)(fsi + 1);
3359	(void)strlcpy(np, attrname, attrnamelen);
3360	np += attrnamelen;
3361	(void)memcpy(np, (char *)attr, *resid);
3362
3363	if ((error = xchg_msg(pu, opc, pm,
3364			      NO_PAYLOAD_REPLY_LEN, wait_reply)) != 0)
3365		goto out;
3366
3367	ps->ps_destroy_msg(pm);
3368	*resid = 0;
3369	error = 0;
3370
3371out:
3372	node_rele(opc);
3373	return error;
3374}
3375
3376/* ARGSUSED2 */
3377int
3378perfuse_node_listextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3379	int attrns, size_t *attrsize, uint8_t *attrs, size_t *resid, int flag,
3380	const struct puffs_cred *pcr)
3381{
3382	struct perfuse_state *ps;
3383	perfuse_msg_t *pm;
3384	struct fuse_getxattr_in *fgi;
3385	struct fuse_getxattr_out *fgo;
3386	struct fuse_out_header *foh;
3387	char *np;
3388	size_t len, puffs_len;
3389	int error;
3390
3391	node_ref(opc);
3392
3393	ps = puffs_getspecific(pu);
3394	len = sizeof(*fgi);
3395
3396	pm = ps->ps_new_msg(pu, opc, FUSE_LISTXATTR, len, pcr);
3397	fgi = GET_INPAYLOAD(ps, pm, fuse_getxattr_in);
3398	if (resid != NULL)
3399		fgi->size = (unsigned int)*resid;
3400	else
3401		fgi->size = 0;
3402
3403	if ((error = xchg_msg(pu, opc, pm, UNSPEC_REPLY_LEN, wait_reply)) != 0)
3404		goto out;
3405
3406	/*
3407	 * We just get fuse_getattr_out with list size if we requested
3408	 * a null size.
3409	 */
3410	if (resid == NULL) {
3411		fgo = GET_OUTPAYLOAD(ps, pm, fuse_getxattr_out);
3412
3413		if (attrsize != NULL)
3414			*attrsize = fgo->size;
3415
3416		ps->ps_destroy_msg(pm);
3417
3418		error = 0;
3419		goto out;
3420	}
3421
3422	/*
3423	 * And with a non null requested size, we get the list just
3424	 * after the header
3425	 */
3426	foh = GET_OUTHDR(ps, pm);
3427	np = (char *)(void *)(foh + 1);
3428	puffs_len = foh->len - sizeof(*foh);
3429
3430	if (attrs != NULL) {
3431#ifdef PUFFS_EXTATTR_LIST_LENPREFIX
3432		/*
3433		 * Convert the FUSE reply to length prefixed strings
3434		 * if this is what the kernel wants.
3435		 */
3436		if (flag & PUFFS_EXTATTR_LIST_LENPREFIX) {
3437			size_t i, attrlen;
3438
3439			for (i = 0; i < puffs_len; i += attrlen + 1) {
3440				attrlen = strlen(np + i);
3441				(void)memmove(np + i + 1, np + i, attrlen);
3442				*(np + i) = (uint8_t)attrlen;
3443			}
3444		}
3445#endif /* PUFFS_EXTATTR_LIST_LENPREFIX */
3446		(void)memcpy(attrs, np, puffs_len);
3447		*resid -= puffs_len;
3448	}
3449
3450	if (attrsize != NULL)
3451		*attrsize = puffs_len;
3452
3453	ps->ps_destroy_msg(pm);
3454	error = 0;
3455
3456out:
3457	node_rele(opc);
3458	return error;
3459}
3460
3461int
3462perfuse_node_deleteextattr(struct puffs_usermount *pu, puffs_cookie_t opc,
3463	int attrns, const char *attrname, const struct puffs_cred *pcr)
3464{
3465	struct perfuse_state *ps;
3466	char fuse_attrname[LINUX_XATTR_NAME_MAX + 1];
3467	perfuse_msg_t *pm;
3468	size_t attrnamelen;
3469	char *np;
3470	int error;
3471
3472	node_ref(opc);
3473
3474	ps = puffs_getspecific(pu);
3475	attrname = perfuse_native_ns(attrns, attrname, fuse_attrname);
3476	attrnamelen = strlen(attrname) + 1;
3477
3478	pm = ps->ps_new_msg(pu, opc, FUSE_REMOVEXATTR, attrnamelen, pcr);
3479	np = _GET_INPAYLOAD(ps, pm, char *);
3480	(void)strlcpy(np, attrname, attrnamelen);
3481
3482	error = xchg_msg(pu, opc, pm, NO_PAYLOAD_REPLY_LEN, wait_reply);
3483
3484	ps->ps_destroy_msg(pm);
3485
3486	node_rele(opc);
3487	return error;
3488}
3489