pseudofs_vnops.c revision 259506
1/*-
2 * Copyright (c) 2001 Dag-Erling Co��dan Sm��rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/fs/pseudofs/pseudofs_vnops.c 259506 2013-12-17 13:10:28Z kib $");
31
32#include "opt_pseudofs.h"
33
34#include <sys/param.h>
35#include <sys/kernel.h>
36#include <sys/systm.h>
37#include <sys/ctype.h>
38#include <sys/dirent.h>
39#include <sys/fcntl.h>
40#include <sys/limits.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mount.h>
44#include <sys/mutex.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/sbuf.h>
48#include <sys/sx.h>
49#include <sys/sysctl.h>
50#include <sys/vnode.h>
51
52#include <fs/pseudofs/pseudofs.h>
53#include <fs/pseudofs/pseudofs_internal.h>
54
55#define KASSERT_PN_IS_DIR(pn)						\
56	KASSERT((pn)->pn_type == pfstype_root ||			\
57	    (pn)->pn_type == pfstype_dir ||				\
58	    (pn)->pn_type == pfstype_procdir,				\
59	    ("%s(): VDIR vnode refers to non-directory pfs_node", __func__))
60
61#define KASSERT_PN_IS_FILE(pn)						\
62	KASSERT((pn)->pn_type == pfstype_file,				\
63	    ("%s(): VREG vnode refers to non-file pfs_node", __func__))
64
65#define KASSERT_PN_IS_LINK(pn)						\
66	KASSERT((pn)->pn_type == pfstype_symlink,			\
67	    ("%s(): VLNK vnode refers to non-link pfs_node", __func__))
68
69/*
70 * Returns the fileno, adjusted for target pid
71 */
72static uint32_t
73pn_fileno(struct pfs_node *pn, pid_t pid)
74{
75
76	KASSERT(pn->pn_fileno > 0,
77	    ("%s(): no fileno allocated", __func__));
78	if (pid != NO_PID)
79		return (pn->pn_fileno * NO_PID + pid);
80	return (pn->pn_fileno);
81}
82
83/*
84 * Returns non-zero if given file is visible to given thread.
85 */
86static int
87pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc)
88{
89	int visible;
90
91	if (proc == NULL)
92		return (0);
93
94	PROC_LOCK_ASSERT(proc, MA_OWNED);
95
96	visible = ((proc->p_flag & P_WEXIT) == 0);
97	if (visible)
98		visible = (p_cansee(td, proc) == 0);
99	if (visible && pn->pn_vis != NULL)
100		visible = pn_vis(td, proc, pn);
101	if (!visible)
102		return (0);
103	return (1);
104}
105
106static int
107pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, struct proc **p)
108{
109	struct proc *proc;
110
111	PFS_TRACE(("%s (pid: %d, req: %d)",
112	    pn->pn_name, pid, td->td_proc->p_pid));
113
114	if (p)
115		*p = NULL;
116	if (pid == NO_PID)
117		PFS_RETURN (1);
118	if ((proc = pfind(pid)) == NULL)
119		PFS_RETURN (0);
120	if (pfs_visible_proc(td, pn, proc)) {
121		if (p)
122			*p = proc;
123		else
124			PROC_UNLOCK(proc);
125		PFS_RETURN (1);
126	}
127	PROC_UNLOCK(proc);
128	PFS_RETURN (0);
129}
130
131/*
132 * Verify permissions
133 */
134static int
135pfs_access(struct vop_access_args *va)
136{
137	struct vnode *vn = va->a_vp;
138	struct pfs_vdata *pvd = vn->v_data;
139	struct vattr vattr;
140	int error;
141
142	PFS_TRACE(("%s", pvd->pvd_pn->pn_name));
143	(void)pvd;
144
145	error = VOP_GETATTR(vn, &vattr, va->a_cred);
146	if (error)
147		PFS_RETURN (error);
148	error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid,
149	    vattr.va_gid, va->a_accmode, va->a_cred, NULL);
150	PFS_RETURN (error);
151}
152
153/*
154 * Close a file or directory
155 */
156static int
157pfs_close(struct vop_close_args *va)
158{
159	struct vnode *vn = va->a_vp;
160	struct pfs_vdata *pvd = vn->v_data;
161	struct pfs_node *pn = pvd->pvd_pn;
162	struct proc *proc;
163	int error;
164
165	PFS_TRACE(("%s", pn->pn_name));
166	pfs_assert_not_owned(pn);
167
168	/*
169	 * Do nothing unless this is the last close and the node has a
170	 * last-close handler.
171	 */
172	if (vrefcnt(vn) > 1 || pn->pn_close == NULL)
173		PFS_RETURN (0);
174
175	if (pvd->pvd_pid != NO_PID) {
176		proc = pfind(pvd->pvd_pid);
177	} else {
178		proc = NULL;
179	}
180
181	error = pn_close(va->a_td, proc, pn);
182
183	if (proc != NULL)
184		PROC_UNLOCK(proc);
185
186	PFS_RETURN (error);
187}
188
189/*
190 * Get file attributes
191 */
192static int
193pfs_getattr(struct vop_getattr_args *va)
194{
195	struct vnode *vn = va->a_vp;
196	struct pfs_vdata *pvd = vn->v_data;
197	struct pfs_node *pn = pvd->pvd_pn;
198	struct vattr *vap = va->a_vap;
199	struct proc *proc;
200	int error = 0;
201
202	PFS_TRACE(("%s", pn->pn_name));
203	pfs_assert_not_owned(pn);
204
205	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
206		PFS_RETURN (ENOENT);
207
208	vap->va_type = vn->v_type;
209	vap->va_fileid = pn_fileno(pn, pvd->pvd_pid);
210	vap->va_flags = 0;
211	vap->va_blocksize = PAGE_SIZE;
212	vap->va_bytes = vap->va_size = 0;
213	vap->va_filerev = 0;
214	vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0];
215	vap->va_nlink = 1;
216	nanotime(&vap->va_ctime);
217	vap->va_atime = vap->va_mtime = vap->va_ctime;
218
219	switch (pn->pn_type) {
220	case pfstype_procdir:
221	case pfstype_root:
222	case pfstype_dir:
223#if 0
224		pfs_lock(pn);
225		/* compute link count */
226		pfs_unlock(pn);
227#endif
228		vap->va_mode = 0555;
229		break;
230	case pfstype_file:
231	case pfstype_symlink:
232		vap->va_mode = 0444;
233		break;
234	default:
235		printf("shouldn't be here!\n");
236		vap->va_mode = 0;
237		break;
238	}
239
240	if (proc != NULL) {
241		vap->va_uid = proc->p_ucred->cr_ruid;
242		vap->va_gid = proc->p_ucred->cr_rgid;
243	} else {
244		vap->va_uid = 0;
245		vap->va_gid = 0;
246	}
247
248	if (pn->pn_attr != NULL)
249		error = pn_attr(curthread, proc, pn, vap);
250
251	if(proc != NULL)
252		PROC_UNLOCK(proc);
253
254	PFS_RETURN (error);
255}
256
257/*
258 * Perform an ioctl
259 */
260static int
261pfs_ioctl(struct vop_ioctl_args *va)
262{
263	struct vnode *vn;
264	struct pfs_vdata *pvd;
265	struct pfs_node *pn;
266	struct proc *proc;
267	int error;
268
269	vn = va->a_vp;
270	vn_lock(vn, LK_SHARED | LK_RETRY);
271	if (vn->v_iflag & VI_DOOMED) {
272		VOP_UNLOCK(vn, 0);
273		return (EBADF);
274	}
275	pvd = vn->v_data;
276	pn = pvd->pvd_pn;
277
278	PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command));
279	pfs_assert_not_owned(pn);
280
281	if (vn->v_type != VREG) {
282		VOP_UNLOCK(vn, 0);
283		PFS_RETURN (EINVAL);
284	}
285	KASSERT_PN_IS_FILE(pn);
286
287	if (pn->pn_ioctl == NULL) {
288		VOP_UNLOCK(vn, 0);
289		PFS_RETURN (ENOTTY);
290	}
291
292	/*
293	 * This is necessary because process' privileges may
294	 * have changed since the open() call.
295	 */
296	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) {
297		VOP_UNLOCK(vn, 0);
298		PFS_RETURN (EIO);
299	}
300
301	error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data);
302
303	if (proc != NULL)
304		PROC_UNLOCK(proc);
305
306	VOP_UNLOCK(vn, 0);
307	PFS_RETURN (error);
308}
309
310/*
311 * Perform getextattr
312 */
313static int
314pfs_getextattr(struct vop_getextattr_args *va)
315{
316	struct vnode *vn = va->a_vp;
317	struct pfs_vdata *pvd = vn->v_data;
318	struct pfs_node *pn = pvd->pvd_pn;
319	struct proc *proc;
320	int error;
321
322	PFS_TRACE(("%s", pn->pn_name));
323	pfs_assert_not_owned(pn);
324
325	/*
326	 * This is necessary because either process' privileges may
327	 * have changed since the open() call.
328	 */
329	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
330		PFS_RETURN (EIO);
331
332	if (pn->pn_getextattr == NULL)
333		error = EOPNOTSUPP;
334	else
335		error = pn_getextattr(curthread, proc, pn,
336		    va->a_attrnamespace, va->a_name, va->a_uio,
337		    va->a_size, va->a_cred);
338
339	if (proc != NULL)
340		PROC_UNLOCK(proc);
341
342	PFS_RETURN (error);
343}
344
345/*
346 * Convert a vnode to its component name
347 */
348static int
349pfs_vptocnp(struct vop_vptocnp_args *ap)
350{
351	struct vnode *vp = ap->a_vp;
352	struct vnode **dvp = ap->a_vpp;
353	struct pfs_vdata *pvd = vp->v_data;
354	struct pfs_node *pd = pvd->pvd_pn;
355	struct pfs_node *pn;
356	struct mount *mp;
357	char *buf = ap->a_buf;
358	int *buflen = ap->a_buflen;
359	char pidbuf[PFS_NAMELEN];
360	pid_t pid = pvd->pvd_pid;
361	int len, i, error, locked;
362
363	i = *buflen;
364	error = 0;
365
366	pfs_lock(pd);
367
368	if (vp->v_type == VDIR && pd->pn_type == pfstype_root) {
369		*dvp = vp;
370		vhold(*dvp);
371		pfs_unlock(pd);
372		PFS_RETURN (0);
373	} else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) {
374		len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
375		i -= len;
376		if (i < 0) {
377			error = ENOMEM;
378			goto failed;
379		}
380		bcopy(pidbuf, buf + i, len);
381	} else {
382		len = strlen(pd->pn_name);
383		i -= len;
384		if (i < 0) {
385			error = ENOMEM;
386			goto failed;
387		}
388		bcopy(pd->pn_name, buf + i, len);
389	}
390
391	pn = pd->pn_parent;
392	pfs_unlock(pd);
393
394	mp = vp->v_mount;
395	error = vfs_busy(mp, 0);
396	if (error)
397		return (error);
398
399	/*
400	 * vp is held by caller.
401	 */
402	locked = VOP_ISLOCKED(vp);
403	VOP_UNLOCK(vp, 0);
404
405	error = pfs_vncache_alloc(mp, dvp, pn, pid);
406	if (error) {
407		vn_lock(vp, locked | LK_RETRY);
408		vfs_unbusy(mp);
409		PFS_RETURN(error);
410	}
411
412	*buflen = i;
413	VOP_UNLOCK(*dvp, 0);
414	vn_lock(vp, locked | LK_RETRY);
415	vfs_unbusy(mp);
416
417	PFS_RETURN (0);
418failed:
419	pfs_unlock(pd);
420	PFS_RETURN(error);
421}
422
423/*
424 * Look up a file or directory
425 */
426static int
427pfs_lookup(struct vop_cachedlookup_args *va)
428{
429	struct vnode *vn = va->a_dvp;
430	struct vnode **vpp = va->a_vpp;
431	struct componentname *cnp = va->a_cnp;
432	struct pfs_vdata *pvd = vn->v_data;
433	struct pfs_node *pd = pvd->pvd_pn;
434	struct pfs_node *pn, *pdn = NULL;
435	struct mount *mp;
436	pid_t pid = pvd->pvd_pid;
437	char *pname;
438	int error, i, namelen, visible;
439
440	PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr));
441	pfs_assert_not_owned(pd);
442
443	if (vn->v_type != VDIR)
444		PFS_RETURN (ENOTDIR);
445	KASSERT_PN_IS_DIR(pd);
446
447	error = VOP_ACCESS(vn, VEXEC, cnp->cn_cred, cnp->cn_thread);
448	if (error)
449		PFS_RETURN (error);
450
451	/*
452	 * Don't support DELETE or RENAME.  CREATE is supported so
453	 * that O_CREAT will work, but the lookup will still fail if
454	 * the file does not exist.
455	 */
456	if ((cnp->cn_flags & ISLASTCN) &&
457	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
458		PFS_RETURN (EOPNOTSUPP);
459
460	/* shortcut: check if the name is too long */
461	if (cnp->cn_namelen >= PFS_NAMELEN)
462		PFS_RETURN (ENOENT);
463
464	/* check that parent directory is visible... */
465	if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL))
466		PFS_RETURN (ENOENT);
467
468	/* self */
469	namelen = cnp->cn_namelen;
470	pname = cnp->cn_nameptr;
471	if (namelen == 1 && pname[0] == '.') {
472		pn = pd;
473		*vpp = vn;
474		VREF(vn);
475		PFS_RETURN (0);
476	}
477
478	mp = vn->v_mount;
479
480	/* parent */
481	if (cnp->cn_flags & ISDOTDOT) {
482		if (pd->pn_type == pfstype_root)
483			PFS_RETURN (EIO);
484		error = vfs_busy(mp, MBF_NOWAIT);
485		if (error != 0) {
486			vfs_ref(mp);
487			VOP_UNLOCK(vn, 0);
488			error = vfs_busy(mp, 0);
489			vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
490			vfs_rel(mp);
491			if (error != 0)
492				PFS_RETURN(ENOENT);
493			if (vn->v_iflag & VI_DOOMED) {
494				vfs_unbusy(mp);
495				PFS_RETURN(ENOENT);
496			}
497		}
498		VOP_UNLOCK(vn, 0);
499		KASSERT(pd->pn_parent != NULL,
500		    ("%s(): non-root directory has no parent", __func__));
501		/*
502		 * This one is tricky.  Descendents of procdir nodes
503		 * inherit their parent's process affinity, but
504		 * there's no easy reverse mapping.  For simplicity,
505		 * we assume that if this node is a procdir, its
506		 * parent isn't (which is correct as long as
507		 * descendents of procdir nodes are never procdir
508		 * nodes themselves)
509		 */
510		if (pd->pn_type == pfstype_procdir)
511			pid = NO_PID;
512		pfs_lock(pd);
513		pn = pd->pn_parent;
514		pfs_unlock(pd);
515		goto got_pnode;
516	}
517
518	pfs_lock(pd);
519
520	/* named node */
521	for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next)
522		if (pn->pn_type == pfstype_procdir)
523			pdn = pn;
524		else if (pn->pn_name[namelen] == '\0' &&
525		    bcmp(pname, pn->pn_name, namelen) == 0) {
526			pfs_unlock(pd);
527			goto got_pnode;
528		}
529
530	/* process dependent node */
531	if ((pn = pdn) != NULL) {
532		pid = 0;
533		for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i)
534			if ((pid = pid * 10 + pname[i] - '0') > PID_MAX)
535				break;
536		if (i == cnp->cn_namelen) {
537			pfs_unlock(pd);
538			goto got_pnode;
539		}
540	}
541
542	pfs_unlock(pd);
543
544	PFS_RETURN (ENOENT);
545
546 got_pnode:
547	pfs_assert_not_owned(pd);
548	pfs_assert_not_owned(pn);
549	visible = pfs_visible(curthread, pn, pid, NULL);
550	if (!visible) {
551		error = ENOENT;
552		goto failed;
553	}
554
555	error = pfs_vncache_alloc(mp, vpp, pn, pid);
556	if (error)
557		goto failed;
558
559	if (cnp->cn_flags & ISDOTDOT) {
560		vfs_unbusy(mp);
561		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
562		if (vn->v_iflag & VI_DOOMED) {
563			vput(*vpp);
564			*vpp = NULL;
565			PFS_RETURN(ENOENT);
566		}
567	}
568	if (cnp->cn_flags & MAKEENTRY && !(vn->v_iflag & VI_DOOMED))
569		cache_enter(vn, *vpp, cnp);
570	PFS_RETURN (0);
571 failed:
572	if (cnp->cn_flags & ISDOTDOT) {
573		vfs_unbusy(mp);
574		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
575		*vpp = NULL;
576	}
577	PFS_RETURN(error);
578}
579
580/*
581 * Open a file or directory.
582 */
583static int
584pfs_open(struct vop_open_args *va)
585{
586	struct vnode *vn = va->a_vp;
587	struct pfs_vdata *pvd = vn->v_data;
588	struct pfs_node *pn = pvd->pvd_pn;
589	int mode = va->a_mode;
590
591	PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode));
592	pfs_assert_not_owned(pn);
593
594	/* check if the requested mode is permitted */
595	if (((mode & FREAD) && !(mode & PFS_RD)) ||
596	    ((mode & FWRITE) && !(mode & PFS_WR)))
597		PFS_RETURN (EPERM);
598
599	/* we don't support locking */
600	if ((mode & O_SHLOCK) || (mode & O_EXLOCK))
601		PFS_RETURN (EOPNOTSUPP);
602
603	PFS_RETURN (0);
604}
605
606/*
607 * Read from a file
608 */
609static int
610pfs_read(struct vop_read_args *va)
611{
612	struct vnode *vn = va->a_vp;
613	struct pfs_vdata *pvd = vn->v_data;
614	struct pfs_node *pn = pvd->pvd_pn;
615	struct uio *uio = va->a_uio;
616	struct proc *proc;
617	struct sbuf *sb = NULL;
618	int error, locked;
619	off_t buflen;
620
621	PFS_TRACE(("%s", pn->pn_name));
622	pfs_assert_not_owned(pn);
623
624	if (vn->v_type != VREG)
625		PFS_RETURN (EINVAL);
626	KASSERT_PN_IS_FILE(pn);
627
628	if (!(pn->pn_flags & PFS_RD))
629		PFS_RETURN (EBADF);
630
631	if (pn->pn_fill == NULL)
632		PFS_RETURN (EIO);
633
634	/*
635	 * This is necessary because either process' privileges may
636	 * have changed since the open() call.
637	 */
638	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
639		PFS_RETURN (EIO);
640	if (proc != NULL) {
641		_PHOLD(proc);
642		PROC_UNLOCK(proc);
643	}
644
645	vhold(vn);
646	locked = VOP_ISLOCKED(vn);
647	VOP_UNLOCK(vn, 0);
648
649	if (pn->pn_flags & PFS_RAWRD) {
650		PFS_TRACE(("%zd resid", uio->uio_resid));
651		error = pn_fill(curthread, proc, pn, NULL, uio);
652		PFS_TRACE(("%zd resid", uio->uio_resid));
653		goto ret;
654	}
655
656	if (uio->uio_resid < 0 || uio->uio_offset < 0 ||
657	    uio->uio_resid > OFF_MAX - uio->uio_offset) {
658		error = EINVAL;
659		goto ret;
660	}
661	buflen = uio->uio_offset + uio->uio_resid;
662	if (buflen > MAXPHYS)
663		buflen = MAXPHYS;
664
665	sb = sbuf_new(sb, NULL, buflen + 1, 0);
666	if (sb == NULL) {
667		error = EIO;
668		goto ret;
669	}
670
671	error = pn_fill(curthread, proc, pn, sb, uio);
672
673	if (error) {
674		sbuf_delete(sb);
675		goto ret;
676	}
677
678	/*
679	 * XXX: If the buffer overflowed, sbuf_len() will not return
680	 * the data length. Then just use the full length because an
681	 * overflowed sbuf must be full.
682	 */
683	if (sbuf_finish(sb) == 0)
684		buflen = sbuf_len(sb);
685	error = uiomove_frombuf(sbuf_data(sb), buflen, uio);
686	sbuf_delete(sb);
687ret:
688	vn_lock(vn, locked | LK_RETRY);
689	vdrop(vn);
690	if (proc != NULL)
691		PRELE(proc);
692	PFS_RETURN (error);
693}
694
695/*
696 * Iterate through directory entries
697 */
698static int
699pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd,
700	    struct pfs_node **pn, struct proc **p)
701{
702	int visible;
703
704	sx_assert(&allproc_lock, SX_SLOCKED);
705	pfs_assert_owned(pd);
706 again:
707	if (*pn == NULL) {
708		/* first node */
709		*pn = pd->pn_nodes;
710	} else if ((*pn)->pn_type != pfstype_procdir) {
711		/* next node */
712		*pn = (*pn)->pn_next;
713	}
714	if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) {
715		/* next process */
716		if (*p == NULL)
717			*p = LIST_FIRST(&allproc);
718		else
719			*p = LIST_NEXT(*p, p_list);
720		/* out of processes: next node */
721		if (*p == NULL)
722			*pn = (*pn)->pn_next;
723		else
724			PROC_LOCK(*p);
725	}
726
727	if ((*pn) == NULL)
728		return (-1);
729
730	if (*p != NULL) {
731		visible = pfs_visible_proc(td, *pn, *p);
732		PROC_UNLOCK(*p);
733	} else if (proc != NULL) {
734		visible = pfs_visible_proc(td, *pn, proc);
735	} else {
736		visible = 1;
737	}
738	if (!visible)
739		goto again;
740
741	return (0);
742}
743
744/* Directory entry list */
745struct pfsentry {
746	STAILQ_ENTRY(pfsentry)	link;
747	struct dirent		entry;
748};
749STAILQ_HEAD(pfsdirentlist, pfsentry);
750
751/*
752 * Return directory entries.
753 */
754static int
755pfs_readdir(struct vop_readdir_args *va)
756{
757	struct vnode *vn = va->a_vp;
758	struct pfs_vdata *pvd = vn->v_data;
759	struct pfs_node *pd = pvd->pvd_pn;
760	pid_t pid = pvd->pvd_pid;
761	struct proc *p, *proc;
762	struct pfs_node *pn;
763	struct uio *uio;
764	struct pfsentry *pfsent, *pfsent2;
765	struct pfsdirentlist lst;
766	off_t offset;
767	int error, i, resid;
768
769	STAILQ_INIT(&lst);
770	error = 0;
771	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
772	    ("%s(): pn_info does not match mountpoint", __func__));
773	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
774	pfs_assert_not_owned(pd);
775
776	if (vn->v_type != VDIR)
777		PFS_RETURN (ENOTDIR);
778	KASSERT_PN_IS_DIR(pd);
779	uio = va->a_uio;
780
781	/* only allow reading entire entries */
782	offset = uio->uio_offset;
783	resid = uio->uio_resid;
784	if (offset < 0 || offset % PFS_DELEN != 0 ||
785	    (resid && resid < PFS_DELEN))
786		PFS_RETURN (EINVAL);
787	if (resid == 0)
788		PFS_RETURN (0);
789
790	sx_slock(&allproc_lock);
791	pfs_lock(pd);
792
793        /* check if the directory is visible to the caller */
794        if (!pfs_visible(curthread, pd, pid, &proc)) {
795		sx_sunlock(&allproc_lock);
796		pfs_unlock(pd);
797                PFS_RETURN (ENOENT);
798	}
799	KASSERT(pid == NO_PID || proc != NULL,
800	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));
801
802	/* skip unwanted entries */
803	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
804		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
805			/* nothing left... */
806			if (proc != NULL)
807				PROC_UNLOCK(proc);
808			pfs_unlock(pd);
809			sx_sunlock(&allproc_lock);
810			PFS_RETURN (0);
811		}
812	}
813
814	/* fill in entries */
815	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
816	    resid >= PFS_DELEN) {
817		if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV,
818		    M_NOWAIT | M_ZERO)) == NULL) {
819			error = ENOMEM;
820			break;
821		}
822		pfsent->entry.d_reclen = PFS_DELEN;
823		pfsent->entry.d_fileno = pn_fileno(pn, pid);
824		/* PFS_DELEN was picked to fit PFS_NAMLEN */
825		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
826			pfsent->entry.d_name[i] = pn->pn_name[i];
827		pfsent->entry.d_name[i] = 0;
828		pfsent->entry.d_namlen = i;
829		switch (pn->pn_type) {
830		case pfstype_procdir:
831			KASSERT(p != NULL,
832			    ("reached procdir node with p == NULL"));
833			pfsent->entry.d_namlen = snprintf(pfsent->entry.d_name,
834			    PFS_NAMELEN, "%d", p->p_pid);
835			/* fall through */
836		case pfstype_root:
837		case pfstype_dir:
838		case pfstype_this:
839		case pfstype_parent:
840			pfsent->entry.d_type = DT_DIR;
841			break;
842		case pfstype_file:
843			pfsent->entry.d_type = DT_REG;
844			break;
845		case pfstype_symlink:
846			pfsent->entry.d_type = DT_LNK;
847			break;
848		default:
849			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
850		}
851		PFS_TRACE(("%s", pfsent->entry.d_name));
852		STAILQ_INSERT_TAIL(&lst, pfsent, link);
853		offset += PFS_DELEN;
854		resid -= PFS_DELEN;
855	}
856	if (proc != NULL)
857		PROC_UNLOCK(proc);
858	pfs_unlock(pd);
859	sx_sunlock(&allproc_lock);
860	i = 0;
861	STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) {
862		if (error == 0)
863			error = uiomove(&pfsent->entry, PFS_DELEN, uio);
864		free(pfsent, M_IOV);
865		i++;
866	}
867	PFS_TRACE(("%d bytes", i * PFS_DELEN));
868	PFS_RETURN (error);
869}
870
871/*
872 * Read a symbolic link
873 */
874static int
875pfs_readlink(struct vop_readlink_args *va)
876{
877	struct vnode *vn = va->a_vp;
878	struct pfs_vdata *pvd = vn->v_data;
879	struct pfs_node *pn = pvd->pvd_pn;
880	struct uio *uio = va->a_uio;
881	struct proc *proc = NULL;
882	char buf[PATH_MAX];
883	struct sbuf sb;
884	int error, locked;
885
886	PFS_TRACE(("%s", pn->pn_name));
887	pfs_assert_not_owned(pn);
888
889	if (vn->v_type != VLNK)
890		PFS_RETURN (EINVAL);
891	KASSERT_PN_IS_LINK(pn);
892
893	if (pn->pn_fill == NULL)
894		PFS_RETURN (EIO);
895
896	if (pvd->pvd_pid != NO_PID) {
897		if ((proc = pfind(pvd->pvd_pid)) == NULL)
898			PFS_RETURN (EIO);
899		if (proc->p_flag & P_WEXIT) {
900			PROC_UNLOCK(proc);
901			PFS_RETURN (EIO);
902		}
903		_PHOLD(proc);
904		PROC_UNLOCK(proc);
905	}
906	vhold(vn);
907	locked = VOP_ISLOCKED(vn);
908	VOP_UNLOCK(vn, 0);
909
910	/* sbuf_new() can't fail with a static buffer */
911	sbuf_new(&sb, buf, sizeof buf, 0);
912
913	error = pn_fill(curthread, proc, pn, &sb, NULL);
914
915	if (proc != NULL)
916		PRELE(proc);
917	vn_lock(vn, locked | LK_RETRY);
918	vdrop(vn);
919
920	if (error) {
921		sbuf_delete(&sb);
922		PFS_RETURN (error);
923	}
924
925	if (sbuf_finish(&sb) != 0) {
926		sbuf_delete(&sb);
927		PFS_RETURN (ENAMETOOLONG);
928	}
929
930	error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio);
931	sbuf_delete(&sb);
932	PFS_RETURN (error);
933}
934
935/*
936 * Reclaim a vnode
937 */
938static int
939pfs_reclaim(struct vop_reclaim_args *va)
940{
941	struct vnode *vn = va->a_vp;
942	struct pfs_vdata *pvd = vn->v_data;
943	struct pfs_node *pn = pvd->pvd_pn;
944
945	PFS_TRACE(("%s", pn->pn_name));
946	pfs_assert_not_owned(pn);
947
948	return (pfs_vncache_free(va->a_vp));
949}
950
951/*
952 * Set attributes
953 */
954static int
955pfs_setattr(struct vop_setattr_args *va)
956{
957	struct vnode *vn = va->a_vp;
958	struct pfs_vdata *pvd = vn->v_data;
959	struct pfs_node *pn = pvd->pvd_pn;
960
961	PFS_TRACE(("%s", pn->pn_name));
962	pfs_assert_not_owned(pn);
963
964	PFS_RETURN (EOPNOTSUPP);
965}
966
967/*
968 * Write to a file
969 */
970static int
971pfs_write(struct vop_write_args *va)
972{
973	struct vnode *vn = va->a_vp;
974	struct pfs_vdata *pvd = vn->v_data;
975	struct pfs_node *pn = pvd->pvd_pn;
976	struct uio *uio = va->a_uio;
977	struct proc *proc;
978	struct sbuf sb;
979	int error;
980
981	PFS_TRACE(("%s", pn->pn_name));
982	pfs_assert_not_owned(pn);
983
984	if (vn->v_type != VREG)
985		PFS_RETURN (EINVAL);
986	KASSERT_PN_IS_FILE(pn);
987
988	if (!(pn->pn_flags & PFS_WR))
989		PFS_RETURN (EBADF);
990
991	if (pn->pn_fill == NULL)
992		PFS_RETURN (EIO);
993
994	/*
995	 * This is necessary because either process' privileges may
996	 * have changed since the open() call.
997	 */
998	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
999		PFS_RETURN (EIO);
1000	if (proc != NULL) {
1001		_PHOLD(proc);
1002		PROC_UNLOCK(proc);
1003	}
1004
1005	if (pn->pn_flags & PFS_RAWWR) {
1006		error = pn_fill(curthread, proc, pn, NULL, uio);
1007		if (proc != NULL)
1008			PRELE(proc);
1009		PFS_RETURN (error);
1010	}
1011
1012	sbuf_uionew(&sb, uio, &error);
1013	if (error) {
1014		if (proc != NULL)
1015			PRELE(proc);
1016		PFS_RETURN (error);
1017	}
1018
1019	error = pn_fill(curthread, proc, pn, &sb, uio);
1020
1021	sbuf_delete(&sb);
1022	if (proc != NULL)
1023		PRELE(proc);
1024	PFS_RETURN (error);
1025}
1026
1027/*
1028 * Vnode operations
1029 */
1030struct vop_vector pfs_vnodeops = {
1031	.vop_default =		&default_vnodeops,
1032
1033	.vop_access =		pfs_access,
1034	.vop_cachedlookup =	pfs_lookup,
1035	.vop_close =		pfs_close,
1036	.vop_create =		VOP_EOPNOTSUPP,
1037	.vop_getattr =		pfs_getattr,
1038	.vop_getextattr =	pfs_getextattr,
1039	.vop_ioctl =		pfs_ioctl,
1040	.vop_link =		VOP_EOPNOTSUPP,
1041	.vop_lookup =		vfs_cache_lookup,
1042	.vop_mkdir =		VOP_EOPNOTSUPP,
1043	.vop_mknod =		VOP_EOPNOTSUPP,
1044	.vop_open =		pfs_open,
1045	.vop_read =		pfs_read,
1046	.vop_readdir =		pfs_readdir,
1047	.vop_readlink =		pfs_readlink,
1048	.vop_reclaim =		pfs_reclaim,
1049	.vop_remove =		VOP_EOPNOTSUPP,
1050	.vop_rename =		VOP_EOPNOTSUPP,
1051	.vop_rmdir =		VOP_EOPNOTSUPP,
1052	.vop_setattr =		pfs_setattr,
1053	.vop_symlink =		VOP_EOPNOTSUPP,
1054	.vop_vptocnp =		pfs_vptocnp,
1055	.vop_write =		pfs_write,
1056	/* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */
1057};
1058