1/*-
2 * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28/*
29 * POSIX message queue implementation.
30 *
31 * 1) A mqueue filesystem can be mounted, each message queue appears
32 *    in mounted directory, user can change queue's permission and
33 *    ownership, or remove a queue. Manually creating a file in the
34 *    directory causes a message queue to be created in the kernel with
35 *    default message queue attributes applied and same name used, this
36 *    method is not advocated since mq_open syscall allows user to specify
37 *    different attributes. Also the file system can be mounted multiple
38 *    times at different mount points but shows same contents.
39 *
40 * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
41 *    but directly operate on internal data structure, this allows user to
42 *    use the IPC facility without having to mount mqueue file system.
43 */
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: stable/10/sys/kern/uipc_mqueue.c 325783 2017-11-13 23:21:17Z jamie $");
47
48#include "opt_capsicum.h"
49#include "opt_compat.h"
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/systm.h>
54#include <sys/limits.h>
55#include <sys/malloc.h>
56#include <sys/buf.h>
57#include <sys/capsicum.h>
58#include <sys/dirent.h>
59#include <sys/event.h>
60#include <sys/eventhandler.h>
61#include <sys/fcntl.h>
62#include <sys/file.h>
63#include <sys/filedesc.h>
64#include <sys/jail.h>
65#include <sys/lock.h>
66#include <sys/module.h>
67#include <sys/mount.h>
68#include <sys/mqueue.h>
69#include <sys/mutex.h>
70#include <sys/namei.h>
71#include <sys/posix4.h>
72#include <sys/poll.h>
73#include <sys/priv.h>
74#include <sys/proc.h>
75#include <sys/queue.h>
76#include <sys/sysproto.h>
77#include <sys/stat.h>
78#include <sys/syscall.h>
79#include <sys/syscallsubr.h>
80#include <sys/sysent.h>
81#include <sys/sx.h>
82#include <sys/sysctl.h>
83#include <sys/taskqueue.h>
84#include <sys/unistd.h>
85#include <sys/vnode.h>
86#include <machine/atomic.h>
87
88FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
89
90/*
91 * Limits and constants
92 */
93#define	MQFS_NAMELEN		NAME_MAX
94#define MQFS_DELEN		(8 + MQFS_NAMELEN)
95
96/* node types */
97typedef enum {
98	mqfstype_none = 0,
99	mqfstype_root,
100	mqfstype_dir,
101	mqfstype_this,
102	mqfstype_parent,
103	mqfstype_file,
104	mqfstype_symlink,
105} mqfs_type_t;
106
107struct mqfs_node;
108
109/*
110 * mqfs_info: describes a mqfs instance
111 */
112struct mqfs_info {
113	struct sx		mi_lock;
114	struct mqfs_node	*mi_root;
115	struct unrhdr		*mi_unrhdr;
116};
117
118struct mqfs_vdata {
119	LIST_ENTRY(mqfs_vdata)	mv_link;
120	struct mqfs_node	*mv_node;
121	struct vnode		*mv_vnode;
122	struct task		mv_task;
123};
124
125/*
126 * mqfs_node: describes a node (file or directory) within a mqfs
127 */
128struct mqfs_node {
129	char			mn_name[MQFS_NAMELEN+1];
130	struct mqfs_info	*mn_info;
131	struct mqfs_node	*mn_parent;
132	LIST_HEAD(,mqfs_node)	mn_children;
133	LIST_ENTRY(mqfs_node)	mn_sibling;
134	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
135	const void		*mn_pr_root;
136	int			mn_refcount;
137	mqfs_type_t		mn_type;
138	int			mn_deleted;
139	uint32_t		mn_fileno;
140	void			*mn_data;
141	struct timespec		mn_birth;
142	struct timespec		mn_ctime;
143	struct timespec		mn_atime;
144	struct timespec		mn_mtime;
145	uid_t			mn_uid;
146	gid_t			mn_gid;
147	int			mn_mode;
148};
149
150#define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
151#define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
152#define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
153#define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
154				(fp)->f_data)->mn_data))
155
156TAILQ_HEAD(msgq, mqueue_msg);
157
158struct mqueue;
159
160struct mqueue_notifier {
161	LIST_ENTRY(mqueue_notifier)	nt_link;
162	struct sigevent			nt_sigev;
163	ksiginfo_t			nt_ksi;
164	struct proc			*nt_proc;
165};
166
167struct mqueue {
168	struct mtx	mq_mutex;
169	int		mq_flags;
170	long		mq_maxmsg;
171	long		mq_msgsize;
172	long		mq_curmsgs;
173	long		mq_totalbytes;
174	struct msgq	mq_msgq;
175	int		mq_receivers;
176	int		mq_senders;
177	struct selinfo	mq_rsel;
178	struct selinfo	mq_wsel;
179	struct mqueue_notifier	*mq_notifier;
180};
181
182#define	MQ_RSEL		0x01
183#define	MQ_WSEL		0x02
184
185struct mqueue_msg {
186	TAILQ_ENTRY(mqueue_msg)	msg_link;
187	unsigned int	msg_prio;
188	unsigned int	msg_size;
189	/* following real data... */
190};
191
192static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
193	"POSIX real time message queue");
194
195static int	default_maxmsg  = 10;
196static int	default_msgsize = 1024;
197
198static int	maxmsg = 100;
199SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
200    &maxmsg, 0, "Default maximum messages in queue");
201static int	maxmsgsize = 16384;
202SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
203    &maxmsgsize, 0, "Default maximum message size");
204static int	maxmq = 100;
205SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
206    &maxmq, 0, "maximum message queues");
207static int	curmq = 0;
208SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
209    &curmq, 0, "current message queue number");
210static int	unloadable = 0;
211static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
212
213static eventhandler_tag exit_tag;
214
215/* Only one instance per-system */
216static struct mqfs_info		mqfs_data;
217static uma_zone_t		mqnode_zone;
218static uma_zone_t		mqueue_zone;
219static uma_zone_t		mvdata_zone;
220static uma_zone_t		mqnoti_zone;
221static struct vop_vector	mqfs_vnodeops;
222static struct fileops		mqueueops;
223static unsigned			mqfs_osd_jail_slot;
224
225/*
226 * Directory structure construction and manipulation
227 */
228#ifdef notyet
229static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
230	const char *name, int namelen, struct ucred *cred, int mode);
231static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
232	const char *name, int namelen, struct ucred *cred, int mode);
233#endif
234
235static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
236	const char *name, int namelen, struct ucred *cred, int mode);
237static int	mqfs_destroy(struct mqfs_node *mn);
238static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
239static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
240static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
241static int	mqfs_prison_remove(void *obj, void *data);
242
243/*
244 * Message queue construction and maniplation
245 */
246static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
247static void	mqueue_free(struct mqueue *mq);
248static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
249			size_t msg_len, unsigned msg_prio, int waitok,
250			const struct timespec *abs_timeout);
251static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
252			size_t msg_len, unsigned *msg_prio, int waitok,
253			const struct timespec *abs_timeout);
254static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
255			int timo);
256static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
257			int timo);
258static void	mqueue_send_notification(struct mqueue *mq);
259static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
260static void	mq_proc_exit(void *arg, struct proc *p);
261
262/*
263 * kqueue filters
264 */
265static void	filt_mqdetach(struct knote *kn);
266static int	filt_mqread(struct knote *kn, long hint);
267static int	filt_mqwrite(struct knote *kn, long hint);
268
269struct filterops mq_rfiltops = {
270	.f_isfd = 1,
271	.f_detach = filt_mqdetach,
272	.f_event = filt_mqread,
273};
274struct filterops mq_wfiltops = {
275	.f_isfd = 1,
276	.f_detach = filt_mqdetach,
277	.f_event = filt_mqwrite,
278};
279
280/*
281 * Initialize fileno bitmap
282 */
283static void
284mqfs_fileno_init(struct mqfs_info *mi)
285{
286	struct unrhdr *up;
287
288	up = new_unrhdr(1, INT_MAX, NULL);
289	mi->mi_unrhdr = up;
290}
291
292/*
293 * Tear down fileno bitmap
294 */
295static void
296mqfs_fileno_uninit(struct mqfs_info *mi)
297{
298	struct unrhdr *up;
299
300	up = mi->mi_unrhdr;
301	mi->mi_unrhdr = NULL;
302	delete_unrhdr(up);
303}
304
305/*
306 * Allocate a file number
307 */
308static void
309mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
310{
311	/* make sure our parent has a file number */
312	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
313		mqfs_fileno_alloc(mi, mn->mn_parent);
314
315	switch (mn->mn_type) {
316	case mqfstype_root:
317	case mqfstype_dir:
318	case mqfstype_file:
319	case mqfstype_symlink:
320		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
321		break;
322	case mqfstype_this:
323		KASSERT(mn->mn_parent != NULL,
324		    ("mqfstype_this node has no parent"));
325		mn->mn_fileno = mn->mn_parent->mn_fileno;
326		break;
327	case mqfstype_parent:
328		KASSERT(mn->mn_parent != NULL,
329		    ("mqfstype_parent node has no parent"));
330		if (mn->mn_parent == mi->mi_root) {
331			mn->mn_fileno = mn->mn_parent->mn_fileno;
332			break;
333		}
334		KASSERT(mn->mn_parent->mn_parent != NULL,
335		    ("mqfstype_parent node has no grandparent"));
336		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
337		break;
338	default:
339		KASSERT(0,
340		    ("mqfs_fileno_alloc() called for unknown type node: %d",
341			mn->mn_type));
342		break;
343	}
344}
345
346/*
347 * Release a file number
348 */
349static void
350mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
351{
352	switch (mn->mn_type) {
353	case mqfstype_root:
354	case mqfstype_dir:
355	case mqfstype_file:
356	case mqfstype_symlink:
357		free_unr(mi->mi_unrhdr, mn->mn_fileno);
358		break;
359	case mqfstype_this:
360	case mqfstype_parent:
361		/* ignore these, as they don't "own" their file number */
362		break;
363	default:
364		KASSERT(0,
365		    ("mqfs_fileno_free() called for unknown type node: %d",
366			mn->mn_type));
367		break;
368	}
369}
370
371static __inline struct mqfs_node *
372mqnode_alloc(void)
373{
374	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
375}
376
377static __inline void
378mqnode_free(struct mqfs_node *node)
379{
380	uma_zfree(mqnode_zone, node);
381}
382
383static __inline void
384mqnode_addref(struct mqfs_node *node)
385{
386	atomic_fetchadd_int(&node->mn_refcount, 1);
387}
388
389static __inline void
390mqnode_release(struct mqfs_node *node)
391{
392	struct mqfs_info *mqfs;
393	int old, exp;
394
395	mqfs = node->mn_info;
396	old = atomic_fetchadd_int(&node->mn_refcount, -1);
397	if (node->mn_type == mqfstype_dir ||
398	    node->mn_type == mqfstype_root)
399		exp = 3; /* include . and .. */
400	else
401		exp = 1;
402	if (old == exp) {
403		int locked = sx_xlocked(&mqfs->mi_lock);
404		if (!locked)
405			sx_xlock(&mqfs->mi_lock);
406		mqfs_destroy(node);
407		if (!locked)
408			sx_xunlock(&mqfs->mi_lock);
409	}
410}
411
412/*
413 * Add a node to a directory
414 */
415static int
416mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
417{
418	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
419	KASSERT(parent->mn_info != NULL,
420	    ("%s(): parent has no mn_info", __func__));
421	KASSERT(parent->mn_type == mqfstype_dir ||
422	    parent->mn_type == mqfstype_root,
423	    ("%s(): parent is not a directory", __func__));
424
425	node->mn_info = parent->mn_info;
426	node->mn_parent = parent;
427	LIST_INIT(&node->mn_children);
428	LIST_INIT(&node->mn_vnodes);
429	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
430	mqnode_addref(parent);
431	return (0);
432}
433
434static struct mqfs_node *
435mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
436	int nodetype)
437{
438	struct mqfs_node *node;
439
440	node = mqnode_alloc();
441	strncpy(node->mn_name, name, namelen);
442	node->mn_pr_root = cred->cr_prison->pr_root;
443	node->mn_type = nodetype;
444	node->mn_refcount = 1;
445	vfs_timestamp(&node->mn_birth);
446	node->mn_ctime = node->mn_atime = node->mn_mtime
447		= node->mn_birth;
448	node->mn_uid = cred->cr_uid;
449	node->mn_gid = cred->cr_gid;
450	node->mn_mode = mode;
451	return (node);
452}
453
454/*
455 * Create a file
456 */
457static struct mqfs_node *
458mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
459	struct ucred *cred, int mode)
460{
461	struct mqfs_node *node;
462
463	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
464	if (mqfs_add_node(parent, node) != 0) {
465		mqnode_free(node);
466		return (NULL);
467	}
468	return (node);
469}
470
471/*
472 * Add . and .. to a directory
473 */
474static int
475mqfs_fixup_dir(struct mqfs_node *parent)
476{
477	struct mqfs_node *dir;
478
479	dir = mqnode_alloc();
480	dir->mn_name[0] = '.';
481	dir->mn_type = mqfstype_this;
482	dir->mn_refcount = 1;
483	if (mqfs_add_node(parent, dir) != 0) {
484		mqnode_free(dir);
485		return (-1);
486	}
487
488	dir = mqnode_alloc();
489	dir->mn_name[0] = dir->mn_name[1] = '.';
490	dir->mn_type = mqfstype_parent;
491	dir->mn_refcount = 1;
492
493	if (mqfs_add_node(parent, dir) != 0) {
494		mqnode_free(dir);
495		return (-1);
496	}
497
498	return (0);
499}
500
501#ifdef notyet
502
503/*
504 * Create a directory
505 */
506static struct mqfs_node *
507mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
508	struct ucred *cred, int mode)
509{
510	struct mqfs_node *node;
511
512	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
513	if (mqfs_add_node(parent, node) != 0) {
514		mqnode_free(node);
515		return (NULL);
516	}
517
518	if (mqfs_fixup_dir(node) != 0) {
519		mqfs_destroy(node);
520		return (NULL);
521	}
522	return (node);
523}
524
525/*
526 * Create a symlink
527 */
528static struct mqfs_node *
529mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
530	struct ucred *cred, int mode)
531{
532	struct mqfs_node *node;
533
534	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
535	if (mqfs_add_node(parent, node) != 0) {
536		mqnode_free(node);
537		return (NULL);
538	}
539	return (node);
540}
541
542#endif
543
544/*
545 * Destroy a node or a tree of nodes
546 */
547static int
548mqfs_destroy(struct mqfs_node *node)
549{
550	struct mqfs_node *parent;
551
552	KASSERT(node != NULL,
553	    ("%s(): node is NULL", __func__));
554	KASSERT(node->mn_info != NULL,
555	    ("%s(): node has no mn_info", __func__));
556
557	/* destroy children */
558	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
559		while (! LIST_EMPTY(&node->mn_children))
560			mqfs_destroy(LIST_FIRST(&node->mn_children));
561
562	/* unlink from parent */
563	if ((parent = node->mn_parent) != NULL) {
564		KASSERT(parent->mn_info == node->mn_info,
565		    ("%s(): parent has different mn_info", __func__));
566		LIST_REMOVE(node, mn_sibling);
567	}
568
569	if (node->mn_fileno != 0)
570		mqfs_fileno_free(node->mn_info, node);
571	if (node->mn_data != NULL)
572		mqueue_free(node->mn_data);
573	mqnode_free(node);
574	return (0);
575}
576
577/*
578 * Mount a mqfs instance
579 */
580static int
581mqfs_mount(struct mount *mp)
582{
583	struct statfs *sbp;
584
585	if (mp->mnt_flag & MNT_UPDATE)
586		return (EOPNOTSUPP);
587
588	mp->mnt_data = &mqfs_data;
589	MNT_ILOCK(mp);
590	mp->mnt_flag |= MNT_LOCAL;
591	MNT_IUNLOCK(mp);
592	vfs_getnewfsid(mp);
593
594	sbp = &mp->mnt_stat;
595	vfs_mountedfrom(mp, "mqueue");
596	sbp->f_bsize = PAGE_SIZE;
597	sbp->f_iosize = PAGE_SIZE;
598	sbp->f_blocks = 1;
599	sbp->f_bfree = 0;
600	sbp->f_bavail = 0;
601	sbp->f_files = 1;
602	sbp->f_ffree = 0;
603	return (0);
604}
605
606/*
607 * Unmount a mqfs instance
608 */
609static int
610mqfs_unmount(struct mount *mp, int mntflags)
611{
612	int error;
613
614	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
615	    curthread);
616	return (error);
617}
618
619/*
620 * Return a root vnode
621 */
622static int
623mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
624{
625	struct mqfs_info *mqfs;
626	int ret;
627
628	mqfs = VFSTOMQFS(mp);
629	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
630	return (ret);
631}
632
633/*
634 * Return filesystem stats
635 */
636static int
637mqfs_statfs(struct mount *mp, struct statfs *sbp)
638{
639	/* XXX update statistics */
640	return (0);
641}
642
643/*
644 * Initialize a mqfs instance
645 */
646static int
647mqfs_init(struct vfsconf *vfc)
648{
649	struct mqfs_node *root;
650	struct mqfs_info *mi;
651	osd_method_t methods[PR_MAXMETHOD] = {
652	    [PR_METHOD_REMOVE] = mqfs_prison_remove,
653	};
654
655	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
656		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
657	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
658		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
659	mvdata_zone = uma_zcreate("mvdata",
660		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
661		NULL, UMA_ALIGN_PTR, 0);
662	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
663		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
664	mi = &mqfs_data;
665	sx_init(&mi->mi_lock, "mqfs lock");
666	/* set up the root diretory */
667	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
668		mqfstype_root);
669	root->mn_info = mi;
670	LIST_INIT(&root->mn_children);
671	LIST_INIT(&root->mn_vnodes);
672	mi->mi_root = root;
673	mqfs_fileno_init(mi);
674	mqfs_fileno_alloc(mi, root);
675	mqfs_fixup_dir(root);
676	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
677	    EVENTHANDLER_PRI_ANY);
678	mq_fdclose = mqueue_fdclose;
679	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
680	mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
681	return (0);
682}
683
684/*
685 * Destroy a mqfs instance
686 */
687static int
688mqfs_uninit(struct vfsconf *vfc)
689{
690	struct mqfs_info *mi;
691
692	if (!unloadable)
693		return (EOPNOTSUPP);
694	osd_jail_deregister(mqfs_osd_jail_slot);
695	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
696	mi = &mqfs_data;
697	mqfs_destroy(mi->mi_root);
698	mi->mi_root = NULL;
699	mqfs_fileno_uninit(mi);
700	sx_destroy(&mi->mi_lock);
701	uma_zdestroy(mqnode_zone);
702	uma_zdestroy(mqueue_zone);
703	uma_zdestroy(mvdata_zone);
704	uma_zdestroy(mqnoti_zone);
705	return (0);
706}
707
708/*
709 * task routine
710 */
711static void
712do_recycle(void *context, int pending __unused)
713{
714	struct vnode *vp = (struct vnode *)context;
715
716	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
717	vrecycle(vp);
718	VOP_UNLOCK(vp, 0);
719	vdrop(vp);
720}
721
722/*
723 * Allocate a vnode
724 */
725static int
726mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
727{
728	struct mqfs_vdata *vd;
729	struct mqfs_info  *mqfs;
730	struct vnode *newvpp;
731	int error;
732
733	mqfs = pn->mn_info;
734	*vpp = NULL;
735	sx_xlock(&mqfs->mi_lock);
736	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
737		if (vd->mv_vnode->v_mount == mp) {
738			vhold(vd->mv_vnode);
739			break;
740		}
741	}
742
743	if (vd != NULL) {
744found:
745		*vpp = vd->mv_vnode;
746		sx_xunlock(&mqfs->mi_lock);
747		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
748		vdrop(*vpp);
749		return (error);
750	}
751	sx_xunlock(&mqfs->mi_lock);
752
753	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
754	if (error)
755		return (error);
756	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
757	error = insmntque(newvpp, mp);
758	if (error != 0)
759		return (error);
760
761	sx_xlock(&mqfs->mi_lock);
762	/*
763	 * Check if it has already been allocated
764	 * while we were blocked.
765	 */
766	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
767		if (vd->mv_vnode->v_mount == mp) {
768			vhold(vd->mv_vnode);
769			sx_xunlock(&mqfs->mi_lock);
770
771			vgone(newvpp);
772			vput(newvpp);
773			goto found;
774		}
775	}
776
777	*vpp = newvpp;
778
779	vd = uma_zalloc(mvdata_zone, M_WAITOK);
780	(*vpp)->v_data = vd;
781	vd->mv_vnode = *vpp;
782	vd->mv_node = pn;
783	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
784	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
785	mqnode_addref(pn);
786	switch (pn->mn_type) {
787	case mqfstype_root:
788		(*vpp)->v_vflag = VV_ROOT;
789		/* fall through */
790	case mqfstype_dir:
791	case mqfstype_this:
792	case mqfstype_parent:
793		(*vpp)->v_type = VDIR;
794		break;
795	case mqfstype_file:
796		(*vpp)->v_type = VREG;
797		break;
798	case mqfstype_symlink:
799		(*vpp)->v_type = VLNK;
800		break;
801	case mqfstype_none:
802		KASSERT(0, ("mqfs_allocf called for null node\n"));
803	default:
804		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
805	}
806	sx_xunlock(&mqfs->mi_lock);
807	return (0);
808}
809
810/*
811 * Search a directory entry
812 */
813static struct mqfs_node *
814mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
815{
816	struct mqfs_node *pn;
817	const void *pr_root;
818
819	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
820	pr_root = cred->cr_prison->pr_root;
821	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
822		/* Only match names within the same prison root directory */
823		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
824		    strncmp(pn->mn_name, name, len) == 0 &&
825		    pn->mn_name[len] == '\0')
826			return (pn);
827	}
828	return (NULL);
829}
830
831/*
832 * Look up a file or directory.
833 */
834static int
835mqfs_lookupx(struct vop_cachedlookup_args *ap)
836{
837	struct componentname *cnp;
838	struct vnode *dvp, **vpp;
839	struct mqfs_node *pd;
840	struct mqfs_node *pn;
841	struct mqfs_info *mqfs;
842	int nameiop, flags, error, namelen;
843	char *pname;
844	struct thread *td;
845
846	cnp = ap->a_cnp;
847	vpp = ap->a_vpp;
848	dvp = ap->a_dvp;
849	pname = cnp->cn_nameptr;
850	namelen = cnp->cn_namelen;
851	td = cnp->cn_thread;
852	flags = cnp->cn_flags;
853	nameiop = cnp->cn_nameiop;
854	pd = VTON(dvp);
855	pn = NULL;
856	mqfs = pd->mn_info;
857	*vpp = NULLVP;
858
859	if (dvp->v_type != VDIR)
860		return (ENOTDIR);
861
862	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
863	if (error)
864		return (error);
865
866	/* shortcut: check if the name is too long */
867	if (cnp->cn_namelen >= MQFS_NAMELEN)
868		return (ENOENT);
869
870	/* self */
871	if (namelen == 1 && pname[0] == '.') {
872		if ((flags & ISLASTCN) && nameiop != LOOKUP)
873			return (EINVAL);
874		pn = pd;
875		*vpp = dvp;
876		VREF(dvp);
877		return (0);
878	}
879
880	/* parent */
881	if (cnp->cn_flags & ISDOTDOT) {
882		if (dvp->v_vflag & VV_ROOT)
883			return (EIO);
884		if ((flags & ISLASTCN) && nameiop != LOOKUP)
885			return (EINVAL);
886		VOP_UNLOCK(dvp, 0);
887		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
888		pn = pd->mn_parent;
889		error = mqfs_allocv(dvp->v_mount, vpp, pn);
890		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
891		return (error);
892	}
893
894	/* named node */
895	sx_xlock(&mqfs->mi_lock);
896	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
897	if (pn != NULL)
898		mqnode_addref(pn);
899	sx_xunlock(&mqfs->mi_lock);
900
901	/* found */
902	if (pn != NULL) {
903		/* DELETE */
904		if (nameiop == DELETE && (flags & ISLASTCN)) {
905			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
906			if (error) {
907				mqnode_release(pn);
908				return (error);
909			}
910			if (*vpp == dvp) {
911				VREF(dvp);
912				*vpp = dvp;
913				mqnode_release(pn);
914				return (0);
915			}
916		}
917
918		/* allocate vnode */
919		error = mqfs_allocv(dvp->v_mount, vpp, pn);
920		mqnode_release(pn);
921		if (error == 0 && cnp->cn_flags & MAKEENTRY)
922			cache_enter(dvp, *vpp, cnp);
923		return (error);
924	}
925
926	/* not found */
927
928	/* will create a new entry in the directory ? */
929	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
930	    && (flags & ISLASTCN)) {
931		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
932		if (error)
933			return (error);
934		cnp->cn_flags |= SAVENAME;
935		return (EJUSTRETURN);
936	}
937	return (ENOENT);
938}
939
940#if 0
941struct vop_lookup_args {
942	struct vop_generic_args a_gen;
943	struct vnode *a_dvp;
944	struct vnode **a_vpp;
945	struct componentname *a_cnp;
946};
947#endif
948
949/*
950 * vnode lookup operation
951 */
952static int
953mqfs_lookup(struct vop_cachedlookup_args *ap)
954{
955	int rc;
956
957	rc = mqfs_lookupx(ap);
958	return (rc);
959}
960
961#if 0
962struct vop_create_args {
963	struct vnode *a_dvp;
964	struct vnode **a_vpp;
965	struct componentname *a_cnp;
966	struct vattr *a_vap;
967};
968#endif
969
970/*
971 * vnode creation operation
972 */
973static int
974mqfs_create(struct vop_create_args *ap)
975{
976	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
977	struct componentname *cnp = ap->a_cnp;
978	struct mqfs_node *pd;
979	struct mqfs_node *pn;
980	struct mqueue *mq;
981	int error;
982
983	pd = VTON(ap->a_dvp);
984	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
985		return (ENOTDIR);
986	mq = mqueue_alloc(NULL);
987	if (mq == NULL)
988		return (EAGAIN);
989	sx_xlock(&mqfs->mi_lock);
990	if ((cnp->cn_flags & HASBUF) == 0)
991		panic("%s: no name", __func__);
992	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
993		cnp->cn_cred, ap->a_vap->va_mode);
994	if (pn == NULL) {
995		sx_xunlock(&mqfs->mi_lock);
996		error = ENOSPC;
997	} else {
998		mqnode_addref(pn);
999		sx_xunlock(&mqfs->mi_lock);
1000		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1001		mqnode_release(pn);
1002		if (error)
1003			mqfs_destroy(pn);
1004		else
1005			pn->mn_data = mq;
1006	}
1007	if (error)
1008		mqueue_free(mq);
1009	return (error);
1010}
1011
1012/*
1013 * Remove an entry
1014 */
1015static
1016int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1017{
1018	struct mqfs_node *parent;
1019	struct mqfs_vdata *vd;
1020	int error = 0;
1021
1022	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1023
1024	if (ucred->cr_uid != pn->mn_uid &&
1025	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0)
1026		error = EACCES;
1027	else if (!pn->mn_deleted) {
1028		parent = pn->mn_parent;
1029		pn->mn_parent = NULL;
1030		pn->mn_deleted = 1;
1031		LIST_REMOVE(pn, mn_sibling);
1032		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1033			cache_purge(vd->mv_vnode);
1034			vhold(vd->mv_vnode);
1035			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1036		}
1037		mqnode_release(pn);
1038		mqnode_release(parent);
1039	} else
1040		error = ENOENT;
1041	return (error);
1042}
1043
1044#if 0
1045struct vop_remove_args {
1046	struct vnode *a_dvp;
1047	struct vnode *a_vp;
1048	struct componentname *a_cnp;
1049};
1050#endif
1051
1052/*
1053 * vnode removal operation
1054 */
1055static int
1056mqfs_remove(struct vop_remove_args *ap)
1057{
1058	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1059	struct mqfs_node *pn;
1060	int error;
1061
1062	if (ap->a_vp->v_type == VDIR)
1063                return (EPERM);
1064	pn = VTON(ap->a_vp);
1065	sx_xlock(&mqfs->mi_lock);
1066	error = do_unlink(pn, ap->a_cnp->cn_cred);
1067	sx_xunlock(&mqfs->mi_lock);
1068	return (error);
1069}
1070
1071#if 0
1072struct vop_inactive_args {
1073	struct vnode *a_vp;
1074	struct thread *a_td;
1075};
1076#endif
1077
1078static int
1079mqfs_inactive(struct vop_inactive_args *ap)
1080{
1081	struct mqfs_node *pn = VTON(ap->a_vp);
1082
1083	if (pn->mn_deleted)
1084		vrecycle(ap->a_vp);
1085	return (0);
1086}
1087
1088#if 0
1089struct vop_reclaim_args {
1090	struct vop_generic_args a_gen;
1091	struct vnode *a_vp;
1092	struct thread *a_td;
1093};
1094#endif
1095
1096static int
1097mqfs_reclaim(struct vop_reclaim_args *ap)
1098{
1099	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1100	struct vnode *vp = ap->a_vp;
1101	struct mqfs_node *pn;
1102	struct mqfs_vdata *vd;
1103
1104	vd = vp->v_data;
1105	pn = vd->mv_node;
1106	sx_xlock(&mqfs->mi_lock);
1107	vp->v_data = NULL;
1108	LIST_REMOVE(vd, mv_link);
1109	uma_zfree(mvdata_zone, vd);
1110	mqnode_release(pn);
1111	sx_xunlock(&mqfs->mi_lock);
1112	return (0);
1113}
1114
1115#if 0
1116struct vop_open_args {
1117	struct vop_generic_args a_gen;
1118	struct vnode *a_vp;
1119	int a_mode;
1120	struct ucred *a_cred;
1121	struct thread *a_td;
1122	struct file *a_fp;
1123};
1124#endif
1125
1126static int
1127mqfs_open(struct vop_open_args *ap)
1128{
1129	return (0);
1130}
1131
1132#if 0
1133struct vop_close_args {
1134	struct vop_generic_args a_gen;
1135	struct vnode *a_vp;
1136	int a_fflag;
1137	struct ucred *a_cred;
1138	struct thread *a_td;
1139};
1140#endif
1141
1142static int
1143mqfs_close(struct vop_close_args *ap)
1144{
1145	return (0);
1146}
1147
1148#if 0
1149struct vop_access_args {
1150	struct vop_generic_args a_gen;
1151	struct vnode *a_vp;
1152	accmode_t a_accmode;
1153	struct ucred *a_cred;
1154	struct thread *a_td;
1155};
1156#endif
1157
1158/*
1159 * Verify permissions
1160 */
1161static int
1162mqfs_access(struct vop_access_args *ap)
1163{
1164	struct vnode *vp = ap->a_vp;
1165	struct vattr vattr;
1166	int error;
1167
1168	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1169	if (error)
1170		return (error);
1171	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1172	    vattr.va_gid, ap->a_accmode, ap->a_cred, NULL);
1173	return (error);
1174}
1175
1176#if 0
1177struct vop_getattr_args {
1178	struct vop_generic_args a_gen;
1179	struct vnode *a_vp;
1180	struct vattr *a_vap;
1181	struct ucred *a_cred;
1182};
1183#endif
1184
1185/*
1186 * Get file attributes
1187 */
1188static int
1189mqfs_getattr(struct vop_getattr_args *ap)
1190{
1191	struct vnode *vp = ap->a_vp;
1192	struct mqfs_node *pn = VTON(vp);
1193	struct vattr *vap = ap->a_vap;
1194	int error = 0;
1195
1196	vap->va_type = vp->v_type;
1197	vap->va_mode = pn->mn_mode;
1198	vap->va_nlink = 1;
1199	vap->va_uid = pn->mn_uid;
1200	vap->va_gid = pn->mn_gid;
1201	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1202	vap->va_fileid = pn->mn_fileno;
1203	vap->va_size = 0;
1204	vap->va_blocksize = PAGE_SIZE;
1205	vap->va_bytes = vap->va_size = 0;
1206	vap->va_atime = pn->mn_atime;
1207	vap->va_mtime = pn->mn_mtime;
1208	vap->va_ctime = pn->mn_ctime;
1209	vap->va_birthtime = pn->mn_birth;
1210	vap->va_gen = 0;
1211	vap->va_flags = 0;
1212	vap->va_rdev = NODEV;
1213	vap->va_bytes = 0;
1214	vap->va_filerev = 0;
1215	return (error);
1216}
1217
1218#if 0
1219struct vop_setattr_args {
1220	struct vop_generic_args a_gen;
1221	struct vnode *a_vp;
1222	struct vattr *a_vap;
1223	struct ucred *a_cred;
1224};
1225#endif
1226/*
1227 * Set attributes
1228 */
1229static int
1230mqfs_setattr(struct vop_setattr_args *ap)
1231{
1232	struct mqfs_node *pn;
1233	struct vattr *vap;
1234	struct vnode *vp;
1235	struct thread *td;
1236	int c, error;
1237	uid_t uid;
1238	gid_t gid;
1239
1240	td = curthread;
1241	vap = ap->a_vap;
1242	vp = ap->a_vp;
1243	if ((vap->va_type != VNON) ||
1244	    (vap->va_nlink != VNOVAL) ||
1245	    (vap->va_fsid != VNOVAL) ||
1246	    (vap->va_fileid != VNOVAL) ||
1247	    (vap->va_blocksize != VNOVAL) ||
1248	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1249	    (vap->va_rdev != VNOVAL) ||
1250	    ((int)vap->va_bytes != VNOVAL) ||
1251	    (vap->va_gen != VNOVAL)) {
1252		return (EINVAL);
1253	}
1254
1255	pn = VTON(vp);
1256
1257	error = c = 0;
1258	if (vap->va_uid == (uid_t)VNOVAL)
1259		uid = pn->mn_uid;
1260	else
1261		uid = vap->va_uid;
1262	if (vap->va_gid == (gid_t)VNOVAL)
1263		gid = pn->mn_gid;
1264	else
1265		gid = vap->va_gid;
1266
1267	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1268		/*
1269		 * To modify the ownership of a file, must possess VADMIN
1270		 * for that file.
1271		 */
1272		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1273			return (error);
1274
1275		/*
1276		 * XXXRW: Why is there a privilege check here: shouldn't the
1277		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1278		 * below definitely right?
1279		 */
1280		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1281		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1282		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1283			return (error);
1284		pn->mn_uid = uid;
1285		pn->mn_gid = gid;
1286		c = 1;
1287	}
1288
1289	if (vap->va_mode != (mode_t)VNOVAL) {
1290		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1291		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1292			return (error);
1293		pn->mn_mode = vap->va_mode;
1294		c = 1;
1295	}
1296
1297	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1298		/* See the comment in ufs_vnops::ufs_setattr(). */
1299		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1300		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1301		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1302			return (error);
1303		if (vap->va_atime.tv_sec != VNOVAL) {
1304			pn->mn_atime = vap->va_atime;
1305		}
1306		if (vap->va_mtime.tv_sec != VNOVAL) {
1307			pn->mn_mtime = vap->va_mtime;
1308		}
1309		c = 1;
1310	}
1311	if (c) {
1312		vfs_timestamp(&pn->mn_ctime);
1313	}
1314	return (0);
1315}
1316
1317#if 0
1318struct vop_read_args {
1319	struct vop_generic_args a_gen;
1320	struct vnode *a_vp;
1321	struct uio *a_uio;
1322	int a_ioflag;
1323	struct ucred *a_cred;
1324};
1325#endif
1326
1327/*
1328 * Read from a file
1329 */
1330static int
1331mqfs_read(struct vop_read_args *ap)
1332{
1333	char buf[80];
1334	struct vnode *vp = ap->a_vp;
1335	struct uio *uio = ap->a_uio;
1336	struct mqfs_node *pn;
1337	struct mqueue *mq;
1338	int len, error;
1339
1340	if (vp->v_type != VREG)
1341		return (EINVAL);
1342
1343	pn = VTON(vp);
1344	mq = VTOMQ(vp);
1345	snprintf(buf, sizeof(buf),
1346		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1347		mq->mq_totalbytes,
1348		mq->mq_maxmsg,
1349		mq->mq_curmsgs,
1350		mq->mq_msgsize);
1351	buf[sizeof(buf)-1] = '\0';
1352	len = strlen(buf);
1353	error = uiomove_frombuf(buf, len, uio);
1354	return (error);
1355}
1356
1357#if 0
1358struct vop_readdir_args {
1359	struct vop_generic_args a_gen;
1360	struct vnode *a_vp;
1361	struct uio *a_uio;
1362	struct ucred *a_cred;
1363	int *a_eofflag;
1364	int *a_ncookies;
1365	u_long **a_cookies;
1366};
1367#endif
1368
1369/*
1370 * Return directory entries.
1371 */
1372static int
1373mqfs_readdir(struct vop_readdir_args *ap)
1374{
1375	struct vnode *vp;
1376	struct mqfs_info *mi;
1377	struct mqfs_node *pd;
1378	struct mqfs_node *pn;
1379	struct dirent entry;
1380	struct uio *uio;
1381	const void *pr_root;
1382	int *tmp_ncookies = NULL;
1383	off_t offset;
1384	int error, i;
1385
1386	vp = ap->a_vp;
1387	mi = VFSTOMQFS(vp->v_mount);
1388	pd = VTON(vp);
1389	uio = ap->a_uio;
1390
1391	if (vp->v_type != VDIR)
1392		return (ENOTDIR);
1393
1394	if (uio->uio_offset < 0)
1395		return (EINVAL);
1396
1397	if (ap->a_ncookies != NULL) {
1398		tmp_ncookies = ap->a_ncookies;
1399		*ap->a_ncookies = 0;
1400		ap->a_ncookies = NULL;
1401        }
1402
1403	error = 0;
1404	offset = 0;
1405
1406	pr_root = ap->a_cred->cr_prison->pr_root;
1407	sx_xlock(&mi->mi_lock);
1408
1409	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1410		entry.d_reclen = sizeof(entry);
1411
1412		/*
1413		 * Only show names within the same prison root directory
1414		 * (or not associated with a prison, e.g. "." and "..").
1415		 */
1416		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
1417			continue;
1418		if (!pn->mn_fileno)
1419			mqfs_fileno_alloc(mi, pn);
1420		entry.d_fileno = pn->mn_fileno;
1421		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1422			entry.d_name[i] = pn->mn_name[i];
1423		entry.d_name[i] = 0;
1424		entry.d_namlen = i;
1425		switch (pn->mn_type) {
1426		case mqfstype_root:
1427		case mqfstype_dir:
1428		case mqfstype_this:
1429		case mqfstype_parent:
1430			entry.d_type = DT_DIR;
1431			break;
1432		case mqfstype_file:
1433			entry.d_type = DT_REG;
1434			break;
1435		case mqfstype_symlink:
1436			entry.d_type = DT_LNK;
1437			break;
1438		default:
1439			panic("%s has unexpected node type: %d", pn->mn_name,
1440				pn->mn_type);
1441		}
1442		if (entry.d_reclen > uio->uio_resid)
1443                        break;
1444		if (offset >= uio->uio_offset) {
1445			error = vfs_read_dirent(ap, &entry, offset);
1446                        if (error)
1447                                break;
1448                }
1449                offset += entry.d_reclen;
1450	}
1451	sx_xunlock(&mi->mi_lock);
1452
1453	uio->uio_offset = offset;
1454
1455	if (tmp_ncookies != NULL)
1456		ap->a_ncookies = tmp_ncookies;
1457
1458	return (error);
1459}
1460
1461#ifdef notyet
1462
1463#if 0
1464struct vop_mkdir_args {
1465	struct vnode *a_dvp;
1466	struvt vnode **a_vpp;
1467	struvt componentname *a_cnp;
1468	struct vattr *a_vap;
1469};
1470#endif
1471
1472/*
1473 * Create a directory.
1474 */
1475static int
1476mqfs_mkdir(struct vop_mkdir_args *ap)
1477{
1478	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1479	struct componentname *cnp = ap->a_cnp;
1480	struct mqfs_node *pd = VTON(ap->a_dvp);
1481	struct mqfs_node *pn;
1482	int error;
1483
1484	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1485		return (ENOTDIR);
1486	sx_xlock(&mqfs->mi_lock);
1487	if ((cnp->cn_flags & HASBUF) == 0)
1488		panic("%s: no name", __func__);
1489	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1490		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1491	if (pn != NULL)
1492		mqnode_addref(pn);
1493	sx_xunlock(&mqfs->mi_lock);
1494	if (pn == NULL) {
1495		error = ENOSPC;
1496	} else {
1497		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1498		mqnode_release(pn);
1499	}
1500	return (error);
1501}
1502
1503#if 0
1504struct vop_rmdir_args {
1505	struct vnode *a_dvp;
1506	struct vnode *a_vp;
1507	struct componentname *a_cnp;
1508};
1509#endif
1510
1511/*
1512 * Remove a directory.
1513 */
1514static int
1515mqfs_rmdir(struct vop_rmdir_args *ap)
1516{
1517	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1518	struct mqfs_node *pn = VTON(ap->a_vp);
1519	struct mqfs_node *pt;
1520
1521	if (pn->mn_type != mqfstype_dir)
1522		return (ENOTDIR);
1523
1524	sx_xlock(&mqfs->mi_lock);
1525	if (pn->mn_deleted) {
1526		sx_xunlock(&mqfs->mi_lock);
1527		return (ENOENT);
1528	}
1529
1530	pt = LIST_FIRST(&pn->mn_children);
1531	pt = LIST_NEXT(pt, mn_sibling);
1532	pt = LIST_NEXT(pt, mn_sibling);
1533	if (pt != NULL) {
1534		sx_xunlock(&mqfs->mi_lock);
1535		return (ENOTEMPTY);
1536	}
1537	pt = pn->mn_parent;
1538	pn->mn_parent = NULL;
1539	pn->mn_deleted = 1;
1540	LIST_REMOVE(pn, mn_sibling);
1541	mqnode_release(pn);
1542	mqnode_release(pt);
1543	sx_xunlock(&mqfs->mi_lock);
1544	cache_purge(ap->a_vp);
1545	return (0);
1546}
1547
1548#endif /* notyet */
1549
1550/*
1551 * See if this prison root is obsolete, and clean up associated queues if it is.
1552 */
1553static int
1554mqfs_prison_remove(void *obj, void *data __unused)
1555{
1556	const struct prison *pr = obj;
1557	const struct prison *tpr;
1558	struct mqfs_node *pn, *tpn;
1559	int found;
1560
1561	found = 0;
1562	TAILQ_FOREACH(tpr, &allprison, pr_list) {
1563		if (tpr->pr_root == pr->pr_root && tpr != pr && tpr->pr_ref > 0)
1564			found = 1;
1565	}
1566	if (!found) {
1567		/*
1568		 * No jails are rooted in this directory anymore,
1569		 * so no queues should be either.
1570		 */
1571		sx_xlock(&mqfs_data.mi_lock);
1572		LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
1573		    mn_sibling, tpn) {
1574			if (pn->mn_pr_root == pr->pr_root)
1575				(void)do_unlink(pn, curthread->td_ucred);
1576		}
1577		sx_xunlock(&mqfs_data.mi_lock);
1578	}
1579	return (0);
1580}
1581
1582/*
1583 * Allocate a message queue
1584 */
1585static struct mqueue *
1586mqueue_alloc(const struct mq_attr *attr)
1587{
1588	struct mqueue *mq;
1589
1590	if (curmq >= maxmq)
1591		return (NULL);
1592	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1593	TAILQ_INIT(&mq->mq_msgq);
1594	if (attr != NULL) {
1595		mq->mq_maxmsg = attr->mq_maxmsg;
1596		mq->mq_msgsize = attr->mq_msgsize;
1597	} else {
1598		mq->mq_maxmsg = default_maxmsg;
1599		mq->mq_msgsize = default_msgsize;
1600	}
1601	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1602	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1603	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1604	atomic_add_int(&curmq, 1);
1605	return (mq);
1606}
1607
1608/*
1609 * Destroy a message queue
1610 */
1611static void
1612mqueue_free(struct mqueue *mq)
1613{
1614	struct mqueue_msg *msg;
1615
1616	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1617		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1618		free(msg, M_MQUEUEDATA);
1619	}
1620
1621	mtx_destroy(&mq->mq_mutex);
1622	seldrain(&mq->mq_rsel);
1623	seldrain(&mq->mq_wsel);
1624	knlist_destroy(&mq->mq_rsel.si_note);
1625	knlist_destroy(&mq->mq_wsel.si_note);
1626	uma_zfree(mqueue_zone, mq);
1627	atomic_add_int(&curmq, -1);
1628}
1629
1630/*
1631 * Load a message from user space
1632 */
1633static struct mqueue_msg *
1634mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1635{
1636	struct mqueue_msg *msg;
1637	size_t len;
1638	int error;
1639
1640	len = sizeof(struct mqueue_msg) + msg_size;
1641	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1642	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1643	    msg_size);
1644	if (error) {
1645		free(msg, M_MQUEUEDATA);
1646		msg = NULL;
1647	} else {
1648		msg->msg_size = msg_size;
1649		msg->msg_prio = msg_prio;
1650	}
1651	return (msg);
1652}
1653
1654/*
1655 * Save a message to user space
1656 */
1657static int
1658mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1659{
1660	int error;
1661
1662	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1663		msg->msg_size);
1664	if (error == 0 && msg_prio != NULL)
1665		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1666	return (error);
1667}
1668
1669/*
1670 * Free a message's memory
1671 */
1672static __inline void
1673mqueue_freemsg(struct mqueue_msg *msg)
1674{
1675	free(msg, M_MQUEUEDATA);
1676}
1677
1678/*
1679 * Send a message. if waitok is false, thread will not be
1680 * blocked if there is no data in queue, otherwise, absolute
1681 * time will be checked.
1682 */
1683int
1684mqueue_send(struct mqueue *mq, const char *msg_ptr,
1685	size_t msg_len, unsigned msg_prio, int waitok,
1686	const struct timespec *abs_timeout)
1687{
1688	struct mqueue_msg *msg;
1689	struct timespec ts, ts2;
1690	struct timeval tv;
1691	int error;
1692
1693	if (msg_prio >= MQ_PRIO_MAX)
1694		return (EINVAL);
1695	if (msg_len > mq->mq_msgsize)
1696		return (EMSGSIZE);
1697	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1698	if (msg == NULL)
1699		return (EFAULT);
1700
1701	/* O_NONBLOCK case */
1702	if (!waitok) {
1703		error = _mqueue_send(mq, msg, -1);
1704		if (error)
1705			goto bad;
1706		return (0);
1707	}
1708
1709	/* we allow a null timeout (wait forever) */
1710	if (abs_timeout == NULL) {
1711		error = _mqueue_send(mq, msg, 0);
1712		if (error)
1713			goto bad;
1714		return (0);
1715	}
1716
1717	/* send it before checking time */
1718	error = _mqueue_send(mq, msg, -1);
1719	if (error == 0)
1720		return (0);
1721
1722	if (error != EAGAIN)
1723		goto bad;
1724
1725	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1726		error = EINVAL;
1727		goto bad;
1728	}
1729	for (;;) {
1730		ts2 = *abs_timeout;
1731		getnanotime(&ts);
1732		timespecsub(&ts2, &ts);
1733		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1734			error = ETIMEDOUT;
1735			break;
1736		}
1737		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1738		error = _mqueue_send(mq, msg, tvtohz(&tv));
1739		if (error != ETIMEDOUT)
1740			break;
1741	}
1742	if (error == 0)
1743		return (0);
1744bad:
1745	mqueue_freemsg(msg);
1746	return (error);
1747}
1748
1749/*
1750 * Common routine to send a message
1751 */
1752static int
1753_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1754{
1755	struct mqueue_msg *msg2;
1756	int error = 0;
1757
1758	mtx_lock(&mq->mq_mutex);
1759	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1760		if (timo < 0) {
1761			mtx_unlock(&mq->mq_mutex);
1762			return (EAGAIN);
1763		}
1764		mq->mq_senders++;
1765		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1766			    PCATCH, "mqsend", timo);
1767		mq->mq_senders--;
1768		if (error == EAGAIN)
1769			error = ETIMEDOUT;
1770	}
1771	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1772		mtx_unlock(&mq->mq_mutex);
1773		return (error);
1774	}
1775	error = 0;
1776	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1777		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1778	} else {
1779		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1780			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1781		} else {
1782			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1783				if (msg2->msg_prio < msg->msg_prio)
1784					break;
1785			}
1786			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1787		}
1788	}
1789	mq->mq_curmsgs++;
1790	mq->mq_totalbytes += msg->msg_size;
1791	if (mq->mq_receivers)
1792		wakeup_one(&mq->mq_receivers);
1793	else if (mq->mq_notifier != NULL)
1794		mqueue_send_notification(mq);
1795	if (mq->mq_flags & MQ_RSEL) {
1796		mq->mq_flags &= ~MQ_RSEL;
1797		selwakeup(&mq->mq_rsel);
1798	}
1799	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1800	mtx_unlock(&mq->mq_mutex);
1801	return (0);
1802}
1803
1804/*
1805 * Send realtime a signal to process which registered itself
1806 * successfully by mq_notify.
1807 */
1808static void
1809mqueue_send_notification(struct mqueue *mq)
1810{
1811	struct mqueue_notifier *nt;
1812	struct thread *td;
1813	struct proc *p;
1814	int error;
1815
1816	mtx_assert(&mq->mq_mutex, MA_OWNED);
1817	nt = mq->mq_notifier;
1818	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1819		p = nt->nt_proc;
1820		error = sigev_findtd(p, &nt->nt_sigev, &td);
1821		if (error) {
1822			mq->mq_notifier = NULL;
1823			return;
1824		}
1825		if (!KSI_ONQ(&nt->nt_ksi)) {
1826			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1827			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1828		}
1829		PROC_UNLOCK(p);
1830	}
1831	mq->mq_notifier = NULL;
1832}
1833
1834/*
1835 * Get a message. if waitok is false, thread will not be
1836 * blocked if there is no data in queue, otherwise, absolute
1837 * time will be checked.
1838 */
1839int
1840mqueue_receive(struct mqueue *mq, char *msg_ptr,
1841	size_t msg_len, unsigned *msg_prio, int waitok,
1842	const struct timespec *abs_timeout)
1843{
1844	struct mqueue_msg *msg;
1845	struct timespec ts, ts2;
1846	struct timeval tv;
1847	int error;
1848
1849	if (msg_len < mq->mq_msgsize)
1850		return (EMSGSIZE);
1851
1852	/* O_NONBLOCK case */
1853	if (!waitok) {
1854		error = _mqueue_recv(mq, &msg, -1);
1855		if (error)
1856			return (error);
1857		goto received;
1858	}
1859
1860	/* we allow a null timeout (wait forever). */
1861	if (abs_timeout == NULL) {
1862		error = _mqueue_recv(mq, &msg, 0);
1863		if (error)
1864			return (error);
1865		goto received;
1866	}
1867
1868	/* try to get a message before checking time */
1869	error = _mqueue_recv(mq, &msg, -1);
1870	if (error == 0)
1871		goto received;
1872
1873	if (error != EAGAIN)
1874		return (error);
1875
1876	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1877		error = EINVAL;
1878		return (error);
1879	}
1880
1881	for (;;) {
1882		ts2 = *abs_timeout;
1883		getnanotime(&ts);
1884		timespecsub(&ts2, &ts);
1885		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1886			error = ETIMEDOUT;
1887			return (error);
1888		}
1889		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1890		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1891		if (error == 0)
1892			break;
1893		if (error != ETIMEDOUT)
1894			return (error);
1895	}
1896
1897received:
1898	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1899	if (error == 0) {
1900		curthread->td_retval[0] = msg->msg_size;
1901		curthread->td_retval[1] = 0;
1902	}
1903	mqueue_freemsg(msg);
1904	return (error);
1905}
1906
1907/*
1908 * Common routine to receive a message
1909 */
1910static int
1911_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1912{
1913	int error = 0;
1914
1915	mtx_lock(&mq->mq_mutex);
1916	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1917		if (timo < 0) {
1918			mtx_unlock(&mq->mq_mutex);
1919			return (EAGAIN);
1920		}
1921		mq->mq_receivers++;
1922		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1923			    PCATCH, "mqrecv", timo);
1924		mq->mq_receivers--;
1925		if (error == EAGAIN)
1926			error = ETIMEDOUT;
1927	}
1928	if (*msg != NULL) {
1929		error = 0;
1930		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1931		mq->mq_curmsgs--;
1932		mq->mq_totalbytes -= (*msg)->msg_size;
1933		if (mq->mq_senders)
1934			wakeup_one(&mq->mq_senders);
1935		if (mq->mq_flags & MQ_WSEL) {
1936			mq->mq_flags &= ~MQ_WSEL;
1937			selwakeup(&mq->mq_wsel);
1938		}
1939		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1940	}
1941	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1942	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1943		mqueue_send_notification(mq);
1944	}
1945	mtx_unlock(&mq->mq_mutex);
1946	return (error);
1947}
1948
1949static __inline struct mqueue_notifier *
1950notifier_alloc(void)
1951{
1952	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
1953}
1954
1955static __inline void
1956notifier_free(struct mqueue_notifier *p)
1957{
1958	uma_zfree(mqnoti_zone, p);
1959}
1960
1961static struct mqueue_notifier *
1962notifier_search(struct proc *p, int fd)
1963{
1964	struct mqueue_notifier *nt;
1965
1966	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
1967		if (nt->nt_ksi.ksi_mqd == fd)
1968			break;
1969	}
1970	return (nt);
1971}
1972
1973static __inline void
1974notifier_insert(struct proc *p, struct mqueue_notifier *nt)
1975{
1976	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
1977}
1978
1979static __inline void
1980notifier_delete(struct proc *p, struct mqueue_notifier *nt)
1981{
1982	LIST_REMOVE(nt, nt_link);
1983	notifier_free(nt);
1984}
1985
1986static void
1987notifier_remove(struct proc *p, struct mqueue *mq, int fd)
1988{
1989	struct mqueue_notifier *nt;
1990
1991	mtx_assert(&mq->mq_mutex, MA_OWNED);
1992	PROC_LOCK(p);
1993	nt = notifier_search(p, fd);
1994	if (nt != NULL) {
1995		if (mq->mq_notifier == nt)
1996			mq->mq_notifier = NULL;
1997		sigqueue_take(&nt->nt_ksi);
1998		notifier_delete(p, nt);
1999	}
2000	PROC_UNLOCK(p);
2001}
2002
2003static int
2004kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
2005    const struct mq_attr *attr)
2006{
2007	char path[MQFS_NAMELEN + 1];
2008	struct mqfs_node *pn;
2009	struct filedesc *fdp;
2010	struct file *fp;
2011	struct mqueue *mq;
2012	int fd, error, len, cmode;
2013
2014	fdp = td->td_proc->p_fd;
2015	cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
2016	mq = NULL;
2017	if ((flags & O_CREAT) != 0 && attr != NULL) {
2018		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
2019			return (EINVAL);
2020		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
2021			return (EINVAL);
2022	}
2023
2024	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
2025        if (error)
2026		return (error);
2027
2028	/*
2029	 * The first character of name must be a slash  (/) character
2030	 * and the remaining characters of name cannot include any slash
2031	 * characters.
2032	 */
2033	len = strlen(path);
2034	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2035		return (EINVAL);
2036
2037	error = falloc(td, &fp, &fd, O_CLOEXEC);
2038	if (error)
2039		return (error);
2040
2041	sx_xlock(&mqfs_data.mi_lock);
2042	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2043	if (pn == NULL) {
2044		if (!(flags & O_CREAT)) {
2045			error = ENOENT;
2046		} else {
2047			mq = mqueue_alloc(attr);
2048			if (mq == NULL) {
2049				error = ENFILE;
2050			} else {
2051				pn = mqfs_create_file(mqfs_data.mi_root,
2052				         path + 1, len - 1, td->td_ucred,
2053					 cmode);
2054				if (pn == NULL) {
2055					error = ENOSPC;
2056					mqueue_free(mq);
2057				}
2058			}
2059		}
2060
2061		if (error == 0) {
2062			pn->mn_data = mq;
2063		}
2064	} else {
2065		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2066			error = EEXIST;
2067		} else {
2068			accmode_t accmode = 0;
2069
2070			if (flags & FREAD)
2071				accmode |= VREAD;
2072			if (flags & FWRITE)
2073				accmode |= VWRITE;
2074			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2075				    pn->mn_gid, accmode, td->td_ucred, NULL);
2076		}
2077	}
2078
2079	if (error) {
2080		sx_xunlock(&mqfs_data.mi_lock);
2081		fdclose(td, fp, fd);
2082		fdrop(fp, td);
2083		return (error);
2084	}
2085
2086	mqnode_addref(pn);
2087	sx_xunlock(&mqfs_data.mi_lock);
2088
2089	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2090	    &mqueueops);
2091
2092	td->td_retval[0] = fd;
2093	fdrop(fp, td);
2094	return (0);
2095}
2096
2097/*
2098 * Syscall to open a message queue.
2099 */
2100int
2101sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
2102{
2103	struct mq_attr attr;
2104	int flags, error;
2105
2106	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2107		return (EINVAL);
2108	flags = FFLAGS(uap->flags);
2109	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2110		error = copyin(uap->attr, &attr, sizeof(attr));
2111		if (error)
2112			return (error);
2113	}
2114	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2115	    uap->attr != NULL ? &attr : NULL));
2116}
2117
2118/*
2119 * Syscall to unlink a message queue.
2120 */
2121int
2122sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2123{
2124	char path[MQFS_NAMELEN+1];
2125	struct mqfs_node *pn;
2126	int error, len;
2127
2128	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2129        if (error)
2130		return (error);
2131
2132	len = strlen(path);
2133	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2134		return (EINVAL);
2135
2136	sx_xlock(&mqfs_data.mi_lock);
2137	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2138	if (pn != NULL)
2139		error = do_unlink(pn, td->td_ucred);
2140	else
2141		error = ENOENT;
2142	sx_xunlock(&mqfs_data.mi_lock);
2143	return (error);
2144}
2145
2146typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
2147
2148/*
2149 * Get message queue by giving file slot
2150 */
2151static int
2152_getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
2153       struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2154{
2155	struct mqfs_node *pn;
2156	int error;
2157
2158	error = func(td, fd, rightsp, fpp);
2159	if (error)
2160		return (error);
2161	if (&mqueueops != (*fpp)->f_ops) {
2162		fdrop(*fpp, td);
2163		return (EBADF);
2164	}
2165	pn = (*fpp)->f_data;
2166	if (ppn)
2167		*ppn = pn;
2168	if (pmq)
2169		*pmq = pn->mn_data;
2170	return (0);
2171}
2172
2173static __inline int
2174getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2175	struct mqueue **pmq)
2176{
2177	cap_rights_t rights;
2178
2179	return _getmq(td, fd, cap_rights_init(&rights, CAP_EVENT), fget,
2180	    fpp, ppn, pmq);
2181}
2182
2183static __inline int
2184getmq_read(struct thread *td, int fd, struct file **fpp,
2185	 struct mqfs_node **ppn, struct mqueue **pmq)
2186{
2187	cap_rights_t rights;
2188
2189	return _getmq(td, fd, cap_rights_init(&rights, CAP_READ), fget_read,
2190	    fpp, ppn, pmq);
2191}
2192
2193static __inline int
2194getmq_write(struct thread *td, int fd, struct file **fpp,
2195	struct mqfs_node **ppn, struct mqueue **pmq)
2196{
2197	cap_rights_t rights;
2198
2199	return _getmq(td, fd, cap_rights_init(&rights, CAP_WRITE), fget_write,
2200	    fpp, ppn, pmq);
2201}
2202
2203static int
2204kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2205    struct mq_attr *oattr)
2206{
2207	struct mqueue *mq;
2208	struct file *fp;
2209	u_int oflag, flag;
2210	int error;
2211
2212	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2213		return (EINVAL);
2214	error = getmq(td, mqd, &fp, NULL, &mq);
2215	if (error)
2216		return (error);
2217	oattr->mq_maxmsg  = mq->mq_maxmsg;
2218	oattr->mq_msgsize = mq->mq_msgsize;
2219	oattr->mq_curmsgs = mq->mq_curmsgs;
2220	if (attr != NULL) {
2221		do {
2222			oflag = flag = fp->f_flag;
2223			flag &= ~O_NONBLOCK;
2224			flag |= (attr->mq_flags & O_NONBLOCK);
2225		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2226	} else
2227		oflag = fp->f_flag;
2228	oattr->mq_flags = (O_NONBLOCK & oflag);
2229	fdrop(fp, td);
2230	return (error);
2231}
2232
2233int
2234sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2235{
2236	struct mq_attr attr, oattr;
2237	int error;
2238
2239	if (uap->attr != NULL) {
2240		error = copyin(uap->attr, &attr, sizeof(attr));
2241		if (error != 0)
2242			return (error);
2243	}
2244	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2245	    &oattr);
2246	if (error == 0 && uap->oattr != NULL) {
2247		bzero(oattr.__reserved, sizeof(oattr.__reserved));
2248		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2249	}
2250	return (error);
2251}
2252
2253int
2254sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2255{
2256	struct mqueue *mq;
2257	struct file *fp;
2258	struct timespec *abs_timeout, ets;
2259	int error;
2260	int waitok;
2261
2262	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2263	if (error)
2264		return (error);
2265	if (uap->abs_timeout != NULL) {
2266		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2267		if (error != 0)
2268			return (error);
2269		abs_timeout = &ets;
2270	} else
2271		abs_timeout = NULL;
2272	waitok = !(fp->f_flag & O_NONBLOCK);
2273	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2274		uap->msg_prio, waitok, abs_timeout);
2275	fdrop(fp, td);
2276	return (error);
2277}
2278
2279int
2280sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2281{
2282	struct mqueue *mq;
2283	struct file *fp;
2284	struct timespec *abs_timeout, ets;
2285	int error, waitok;
2286
2287	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2288	if (error)
2289		return (error);
2290	if (uap->abs_timeout != NULL) {
2291		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2292		if (error != 0)
2293			return (error);
2294		abs_timeout = &ets;
2295	} else
2296		abs_timeout = NULL;
2297	waitok = !(fp->f_flag & O_NONBLOCK);
2298	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2299		uap->msg_prio, waitok, abs_timeout);
2300	fdrop(fp, td);
2301	return (error);
2302}
2303
2304static int
2305kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
2306{
2307#ifdef CAPABILITIES
2308	cap_rights_t rights;
2309#endif
2310	struct filedesc *fdp;
2311	struct proc *p;
2312	struct mqueue *mq;
2313	struct file *fp, *fp2;
2314	struct mqueue_notifier *nt, *newnt = NULL;
2315	int error;
2316
2317	if (sigev != NULL) {
2318		if (sigev->sigev_notify != SIGEV_SIGNAL &&
2319		    sigev->sigev_notify != SIGEV_THREAD_ID &&
2320		    sigev->sigev_notify != SIGEV_NONE)
2321			return (EINVAL);
2322		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
2323		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
2324		    !_SIG_VALID(sigev->sigev_signo))
2325			return (EINVAL);
2326	}
2327	p = td->td_proc;
2328	fdp = td->td_proc->p_fd;
2329	error = getmq(td, mqd, &fp, NULL, &mq);
2330	if (error)
2331		return (error);
2332again:
2333	FILEDESC_SLOCK(fdp);
2334	fp2 = fget_locked(fdp, mqd);
2335	if (fp2 == NULL) {
2336		FILEDESC_SUNLOCK(fdp);
2337		error = EBADF;
2338		goto out;
2339	}
2340#ifdef CAPABILITIES
2341	error = cap_check(cap_rights(fdp, mqd),
2342	    cap_rights_init(&rights, CAP_EVENT));
2343	if (error) {
2344		FILEDESC_SUNLOCK(fdp);
2345		goto out;
2346	}
2347#endif
2348	if (fp2 != fp) {
2349		FILEDESC_SUNLOCK(fdp);
2350		error = EBADF;
2351		goto out;
2352	}
2353	mtx_lock(&mq->mq_mutex);
2354	FILEDESC_SUNLOCK(fdp);
2355	if (sigev != NULL) {
2356		if (mq->mq_notifier != NULL) {
2357			error = EBUSY;
2358		} else {
2359			PROC_LOCK(p);
2360			nt = notifier_search(p, mqd);
2361			if (nt == NULL) {
2362				if (newnt == NULL) {
2363					PROC_UNLOCK(p);
2364					mtx_unlock(&mq->mq_mutex);
2365					newnt = notifier_alloc();
2366					goto again;
2367				}
2368			}
2369
2370			if (nt != NULL) {
2371				sigqueue_take(&nt->nt_ksi);
2372				if (newnt != NULL) {
2373					notifier_free(newnt);
2374					newnt = NULL;
2375				}
2376			} else {
2377				nt = newnt;
2378				newnt = NULL;
2379				ksiginfo_init(&nt->nt_ksi);
2380				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2381				nt->nt_ksi.ksi_code = SI_MESGQ;
2382				nt->nt_proc = p;
2383				nt->nt_ksi.ksi_mqd = mqd;
2384				notifier_insert(p, nt);
2385			}
2386			nt->nt_sigev = *sigev;
2387			mq->mq_notifier = nt;
2388			PROC_UNLOCK(p);
2389			/*
2390			 * if there is no receivers and message queue
2391			 * is not empty, we should send notification
2392			 * as soon as possible.
2393			 */
2394			if (mq->mq_receivers == 0 &&
2395			    !TAILQ_EMPTY(&mq->mq_msgq))
2396				mqueue_send_notification(mq);
2397		}
2398	} else {
2399		notifier_remove(p, mq, mqd);
2400	}
2401	mtx_unlock(&mq->mq_mutex);
2402
2403out:
2404	fdrop(fp, td);
2405	if (newnt != NULL)
2406		notifier_free(newnt);
2407	return (error);
2408}
2409
2410int
2411sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2412{
2413	struct sigevent ev, *evp;
2414	int error;
2415
2416	if (uap->sigev == NULL) {
2417		evp = NULL;
2418	} else {
2419		error = copyin(uap->sigev, &ev, sizeof(ev));
2420		if (error != 0)
2421			return (error);
2422		evp = &ev;
2423	}
2424	return (kern_kmq_notify(td, uap->mqd, evp));
2425}
2426
2427static void
2428mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2429{
2430	struct filedesc *fdp;
2431	struct mqueue *mq;
2432
2433	fdp = td->td_proc->p_fd;
2434	FILEDESC_LOCK_ASSERT(fdp);
2435
2436	if (fp->f_ops == &mqueueops) {
2437		mq = FPTOMQ(fp);
2438		mtx_lock(&mq->mq_mutex);
2439		notifier_remove(td->td_proc, mq, fd);
2440
2441		/* have to wakeup thread in same process */
2442		if (mq->mq_flags & MQ_RSEL) {
2443			mq->mq_flags &= ~MQ_RSEL;
2444			selwakeup(&mq->mq_rsel);
2445		}
2446		if (mq->mq_flags & MQ_WSEL) {
2447			mq->mq_flags &= ~MQ_WSEL;
2448			selwakeup(&mq->mq_wsel);
2449		}
2450		mtx_unlock(&mq->mq_mutex);
2451	}
2452}
2453
2454static void
2455mq_proc_exit(void *arg __unused, struct proc *p)
2456{
2457	struct filedesc *fdp;
2458	struct file *fp;
2459	struct mqueue *mq;
2460	int i;
2461
2462	fdp = p->p_fd;
2463	FILEDESC_SLOCK(fdp);
2464	for (i = 0; i < fdp->fd_nfiles; ++i) {
2465		fp = fget_locked(fdp, i);
2466		if (fp != NULL && fp->f_ops == &mqueueops) {
2467			mq = FPTOMQ(fp);
2468			mtx_lock(&mq->mq_mutex);
2469			notifier_remove(p, FPTOMQ(fp), i);
2470			mtx_unlock(&mq->mq_mutex);
2471		}
2472	}
2473	FILEDESC_SUNLOCK(fdp);
2474	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2475}
2476
2477static int
2478mqf_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
2479	int flags, struct thread *td)
2480{
2481	return (EOPNOTSUPP);
2482}
2483
2484static int
2485mqf_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
2486	int flags, struct thread *td)
2487{
2488	return (EOPNOTSUPP);
2489}
2490
2491static int
2492mqf_truncate(struct file *fp, off_t length, struct ucred *active_cred,
2493    struct thread *td)
2494{
2495
2496	return (EINVAL);
2497}
2498
2499static int
2500mqf_ioctl(struct file *fp, u_long cmd, void *data,
2501	struct ucred *active_cred, struct thread *td)
2502{
2503	return (ENOTTY);
2504}
2505
2506static int
2507mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2508	struct thread *td)
2509{
2510	struct mqueue *mq = FPTOMQ(fp);
2511	int revents = 0;
2512
2513	mtx_lock(&mq->mq_mutex);
2514	if (events & (POLLIN | POLLRDNORM)) {
2515		if (mq->mq_curmsgs) {
2516			revents |= events & (POLLIN | POLLRDNORM);
2517		} else {
2518			mq->mq_flags |= MQ_RSEL;
2519			selrecord(td, &mq->mq_rsel);
2520 		}
2521	}
2522	if (events & POLLOUT) {
2523		if (mq->mq_curmsgs < mq->mq_maxmsg)
2524			revents |= POLLOUT;
2525		else {
2526			mq->mq_flags |= MQ_WSEL;
2527			selrecord(td, &mq->mq_wsel);
2528		}
2529	}
2530	mtx_unlock(&mq->mq_mutex);
2531	return (revents);
2532}
2533
2534static int
2535mqf_close(struct file *fp, struct thread *td)
2536{
2537	struct mqfs_node *pn;
2538
2539	fp->f_ops = &badfileops;
2540	pn = fp->f_data;
2541	fp->f_data = NULL;
2542	sx_xlock(&mqfs_data.mi_lock);
2543	mqnode_release(pn);
2544	sx_xunlock(&mqfs_data.mi_lock);
2545	return (0);
2546}
2547
2548static int
2549mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2550	struct thread *td)
2551{
2552	struct mqfs_node *pn = fp->f_data;
2553
2554	bzero(st, sizeof *st);
2555	sx_xlock(&mqfs_data.mi_lock);
2556	st->st_atim = pn->mn_atime;
2557	st->st_mtim = pn->mn_mtime;
2558	st->st_ctim = pn->mn_ctime;
2559	st->st_birthtim = pn->mn_birth;
2560	st->st_uid = pn->mn_uid;
2561	st->st_gid = pn->mn_gid;
2562	st->st_mode = S_IFIFO | pn->mn_mode;
2563	sx_xunlock(&mqfs_data.mi_lock);
2564	return (0);
2565}
2566
2567static int
2568mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
2569    struct thread *td)
2570{
2571	struct mqfs_node *pn;
2572	int error;
2573
2574	error = 0;
2575	pn = fp->f_data;
2576	sx_xlock(&mqfs_data.mi_lock);
2577	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
2578	    active_cred, NULL);
2579	if (error != 0)
2580		goto out;
2581	pn->mn_mode = mode & ACCESSPERMS;
2582out:
2583	sx_xunlock(&mqfs_data.mi_lock);
2584	return (error);
2585}
2586
2587static int
2588mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
2589    struct thread *td)
2590{
2591	struct mqfs_node *pn;
2592	int error;
2593
2594	error = 0;
2595	pn = fp->f_data;
2596	sx_xlock(&mqfs_data.mi_lock);
2597	if (uid == (uid_t)-1)
2598		uid = pn->mn_uid;
2599	if (gid == (gid_t)-1)
2600		gid = pn->mn_gid;
2601	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
2602	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
2603	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
2604		goto out;
2605	pn->mn_uid = uid;
2606	pn->mn_gid = gid;
2607out:
2608	sx_xunlock(&mqfs_data.mi_lock);
2609	return (error);
2610}
2611
2612static int
2613mqf_kqfilter(struct file *fp, struct knote *kn)
2614{
2615	struct mqueue *mq = FPTOMQ(fp);
2616	int error = 0;
2617
2618	if (kn->kn_filter == EVFILT_READ) {
2619		kn->kn_fop = &mq_rfiltops;
2620		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2621	} else if (kn->kn_filter == EVFILT_WRITE) {
2622		kn->kn_fop = &mq_wfiltops;
2623		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2624	} else
2625		error = EINVAL;
2626	return (error);
2627}
2628
2629static void
2630filt_mqdetach(struct knote *kn)
2631{
2632	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2633
2634	if (kn->kn_filter == EVFILT_READ)
2635		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2636	else if (kn->kn_filter == EVFILT_WRITE)
2637		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2638	else
2639		panic("filt_mqdetach");
2640}
2641
2642static int
2643filt_mqread(struct knote *kn, long hint)
2644{
2645	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2646
2647	mtx_assert(&mq->mq_mutex, MA_OWNED);
2648	return (mq->mq_curmsgs != 0);
2649}
2650
2651static int
2652filt_mqwrite(struct knote *kn, long hint)
2653{
2654	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2655
2656	mtx_assert(&mq->mq_mutex, MA_OWNED);
2657	return (mq->mq_curmsgs < mq->mq_maxmsg);
2658}
2659
2660static struct fileops mqueueops = {
2661	.fo_read		= mqf_read,
2662	.fo_write		= mqf_write,
2663	.fo_truncate		= mqf_truncate,
2664	.fo_ioctl		= mqf_ioctl,
2665	.fo_poll		= mqf_poll,
2666	.fo_kqfilter		= mqf_kqfilter,
2667	.fo_stat		= mqf_stat,
2668	.fo_chmod		= mqf_chmod,
2669	.fo_chown		= mqf_chown,
2670	.fo_close		= mqf_close,
2671	.fo_sendfile		= invfo_sendfile,
2672};
2673
2674static struct vop_vector mqfs_vnodeops = {
2675	.vop_default 		= &default_vnodeops,
2676	.vop_access		= mqfs_access,
2677	.vop_cachedlookup	= mqfs_lookup,
2678	.vop_lookup		= vfs_cache_lookup,
2679	.vop_reclaim		= mqfs_reclaim,
2680	.vop_create		= mqfs_create,
2681	.vop_remove		= mqfs_remove,
2682	.vop_inactive		= mqfs_inactive,
2683	.vop_open		= mqfs_open,
2684	.vop_close		= mqfs_close,
2685	.vop_getattr		= mqfs_getattr,
2686	.vop_setattr		= mqfs_setattr,
2687	.vop_read		= mqfs_read,
2688	.vop_write		= VOP_EOPNOTSUPP,
2689	.vop_readdir		= mqfs_readdir,
2690	.vop_mkdir		= VOP_EOPNOTSUPP,
2691	.vop_rmdir		= VOP_EOPNOTSUPP
2692};
2693
2694static struct vfsops mqfs_vfsops = {
2695	.vfs_init 		= mqfs_init,
2696	.vfs_uninit		= mqfs_uninit,
2697	.vfs_mount		= mqfs_mount,
2698	.vfs_unmount		= mqfs_unmount,
2699	.vfs_root		= mqfs_root,
2700	.vfs_statfs		= mqfs_statfs,
2701};
2702
2703static struct vfsconf mqueuefs_vfsconf = {
2704	.vfc_version = VFS_VERSION,
2705	.vfc_name = "mqueuefs",
2706	.vfc_vfsops = &mqfs_vfsops,
2707	.vfc_typenum = -1,
2708	.vfc_flags = VFCF_SYNTHETIC
2709};
2710
2711static struct syscall_helper_data mq_syscalls[] = {
2712	SYSCALL_INIT_HELPER(kmq_open),
2713	SYSCALL_INIT_HELPER(kmq_setattr),
2714	SYSCALL_INIT_HELPER(kmq_timedsend),
2715	SYSCALL_INIT_HELPER(kmq_timedreceive),
2716	SYSCALL_INIT_HELPER(kmq_notify),
2717	SYSCALL_INIT_HELPER(kmq_unlink),
2718	SYSCALL_INIT_LAST
2719};
2720
2721#ifdef COMPAT_FREEBSD32
2722#include <compat/freebsd32/freebsd32.h>
2723#include <compat/freebsd32/freebsd32_proto.h>
2724#include <compat/freebsd32/freebsd32_signal.h>
2725#include <compat/freebsd32/freebsd32_syscall.h>
2726#include <compat/freebsd32/freebsd32_util.h>
2727
2728static void
2729mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2730{
2731
2732	to->mq_flags = from->mq_flags;
2733	to->mq_maxmsg = from->mq_maxmsg;
2734	to->mq_msgsize = from->mq_msgsize;
2735	to->mq_curmsgs = from->mq_curmsgs;
2736}
2737
2738static void
2739mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2740{
2741
2742	to->mq_flags = from->mq_flags;
2743	to->mq_maxmsg = from->mq_maxmsg;
2744	to->mq_msgsize = from->mq_msgsize;
2745	to->mq_curmsgs = from->mq_curmsgs;
2746}
2747
2748int
2749freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2750{
2751	struct mq_attr attr;
2752	struct mq_attr32 attr32;
2753	int flags, error;
2754
2755	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2756		return (EINVAL);
2757	flags = FFLAGS(uap->flags);
2758	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2759		error = copyin(uap->attr, &attr32, sizeof(attr32));
2760		if (error)
2761			return (error);
2762		mq_attr_from32(&attr32, &attr);
2763	}
2764	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2765	    uap->attr != NULL ? &attr : NULL));
2766}
2767
2768int
2769freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2770{
2771	struct mq_attr attr, oattr;
2772	struct mq_attr32 attr32, oattr32;
2773	int error;
2774
2775	if (uap->attr != NULL) {
2776		error = copyin(uap->attr, &attr32, sizeof(attr32));
2777		if (error != 0)
2778			return (error);
2779		mq_attr_from32(&attr32, &attr);
2780	}
2781	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2782	    &oattr);
2783	if (error == 0 && uap->oattr != NULL) {
2784		mq_attr_to32(&oattr, &oattr32);
2785		bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
2786		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2787	}
2788	return (error);
2789}
2790
2791int
2792freebsd32_kmq_timedsend(struct thread *td,
2793    struct freebsd32_kmq_timedsend_args *uap)
2794{
2795	struct mqueue *mq;
2796	struct file *fp;
2797	struct timespec32 ets32;
2798	struct timespec *abs_timeout, ets;
2799	int error;
2800	int waitok;
2801
2802	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2803	if (error)
2804		return (error);
2805	if (uap->abs_timeout != NULL) {
2806		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2807		if (error != 0)
2808			return (error);
2809		CP(ets32, ets, tv_sec);
2810		CP(ets32, ets, tv_nsec);
2811		abs_timeout = &ets;
2812	} else
2813		abs_timeout = NULL;
2814	waitok = !(fp->f_flag & O_NONBLOCK);
2815	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2816		uap->msg_prio, waitok, abs_timeout);
2817	fdrop(fp, td);
2818	return (error);
2819}
2820
2821int
2822freebsd32_kmq_timedreceive(struct thread *td,
2823    struct freebsd32_kmq_timedreceive_args *uap)
2824{
2825	struct mqueue *mq;
2826	struct file *fp;
2827	struct timespec32 ets32;
2828	struct timespec *abs_timeout, ets;
2829	int error, waitok;
2830
2831	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2832	if (error)
2833		return (error);
2834	if (uap->abs_timeout != NULL) {
2835		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2836		if (error != 0)
2837			return (error);
2838		CP(ets32, ets, tv_sec);
2839		CP(ets32, ets, tv_nsec);
2840		abs_timeout = &ets;
2841	} else
2842		abs_timeout = NULL;
2843	waitok = !(fp->f_flag & O_NONBLOCK);
2844	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2845		uap->msg_prio, waitok, abs_timeout);
2846	fdrop(fp, td);
2847	return (error);
2848}
2849
2850int
2851freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
2852{
2853	struct sigevent ev, *evp;
2854	struct sigevent32 ev32;
2855	int error;
2856
2857	if (uap->sigev == NULL) {
2858		evp = NULL;
2859	} else {
2860		error = copyin(uap->sigev, &ev32, sizeof(ev32));
2861		if (error != 0)
2862			return (error);
2863		error = convert_sigevent32(&ev32, &ev);
2864		if (error != 0)
2865			return (error);
2866		evp = &ev;
2867	}
2868	return (kern_kmq_notify(td, uap->mqd, evp));
2869}
2870
2871static struct syscall_helper_data mq32_syscalls[] = {
2872	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2873	SYSCALL32_INIT_HELPER(freebsd32_kmq_setattr),
2874	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedsend),
2875	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedreceive),
2876	SYSCALL32_INIT_HELPER(freebsd32_kmq_notify),
2877	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
2878	SYSCALL_INIT_LAST
2879};
2880#endif
2881
2882static int
2883mqinit(void)
2884{
2885	int error;
2886
2887	error = syscall_helper_register(mq_syscalls);
2888	if (error != 0)
2889		return (error);
2890#ifdef COMPAT_FREEBSD32
2891	error = syscall32_helper_register(mq32_syscalls);
2892	if (error != 0)
2893		return (error);
2894#endif
2895	return (0);
2896}
2897
2898static int
2899mqunload(void)
2900{
2901
2902#ifdef COMPAT_FREEBSD32
2903	syscall32_helper_unregister(mq32_syscalls);
2904#endif
2905	syscall_helper_unregister(mq_syscalls);
2906	return (0);
2907}
2908
2909static int
2910mq_modload(struct module *module, int cmd, void *arg)
2911{
2912	int error = 0;
2913
2914	error = vfs_modevent(module, cmd, arg);
2915	if (error != 0)
2916		return (error);
2917
2918	switch (cmd) {
2919	case MOD_LOAD:
2920		error = mqinit();
2921		if (error != 0)
2922			mqunload();
2923		break;
2924	case MOD_UNLOAD:
2925		error = mqunload();
2926		break;
2927	default:
2928		break;
2929	}
2930	return (error);
2931}
2932
2933static moduledata_t mqueuefs_mod = {
2934	"mqueuefs",
2935	mq_modload,
2936	&mqueuefs_vfsconf
2937};
2938DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2939MODULE_VERSION(mqueuefs, 1);
2940