1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
5 * Copyright (c) 2016-2017 Robert N. M. Watson
6 * All rights reserved.
7 *
8 * Portions of this software were developed by BAE Systems, the University of
9 * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
10 * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
11 * Computing (TC) research program.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36/*
37 * POSIX message queue implementation.
38 *
39 * 1) A mqueue filesystem can be mounted, each message queue appears
40 *    in mounted directory, user can change queue's permission and
41 *    ownership, or remove a queue. Manually creating a file in the
42 *    directory causes a message queue to be created in the kernel with
43 *    default message queue attributes applied and same name used, this
44 *    method is not advocated since mq_open syscall allows user to specify
45 *    different attributes. Also the file system can be mounted multiple
46 *    times at different mount points but shows same contents.
47 *
48 * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
49 *    but directly operate on internal data structure, this allows user to
50 *    use the IPC facility without having to mount mqueue file system.
51 */
52
53#include <sys/cdefs.h>
54__FBSDID("$FreeBSD$");
55
56#include "opt_capsicum.h"
57
58#include <sys/param.h>
59#include <sys/kernel.h>
60#include <sys/systm.h>
61#include <sys/limits.h>
62#include <sys/malloc.h>
63#include <sys/buf.h>
64#include <sys/capsicum.h>
65#include <sys/dirent.h>
66#include <sys/event.h>
67#include <sys/eventhandler.h>
68#include <sys/fcntl.h>
69#include <sys/file.h>
70#include <sys/filedesc.h>
71#include <sys/jail.h>
72#include <sys/lock.h>
73#include <sys/module.h>
74#include <sys/mount.h>
75#include <sys/mqueue.h>
76#include <sys/mutex.h>
77#include <sys/namei.h>
78#include <sys/posix4.h>
79#include <sys/poll.h>
80#include <sys/priv.h>
81#include <sys/proc.h>
82#include <sys/queue.h>
83#include <sys/sysproto.h>
84#include <sys/stat.h>
85#include <sys/syscall.h>
86#include <sys/syscallsubr.h>
87#include <sys/sysent.h>
88#include <sys/sx.h>
89#include <sys/sysctl.h>
90#include <sys/taskqueue.h>
91#include <sys/unistd.h>
92#include <sys/user.h>
93#include <sys/vnode.h>
94#include <machine/atomic.h>
95
96#include <security/audit/audit.h>
97
98FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
99
100/*
101 * Limits and constants
102 */
103#define	MQFS_NAMELEN		NAME_MAX
104#define MQFS_DELEN		(8 + MQFS_NAMELEN)
105
106/* node types */
107typedef enum {
108	mqfstype_none = 0,
109	mqfstype_root,
110	mqfstype_dir,
111	mqfstype_this,
112	mqfstype_parent,
113	mqfstype_file,
114	mqfstype_symlink,
115} mqfs_type_t;
116
117struct mqfs_node;
118
119/*
120 * mqfs_info: describes a mqfs instance
121 */
122struct mqfs_info {
123	struct sx		mi_lock;
124	struct mqfs_node	*mi_root;
125	struct unrhdr		*mi_unrhdr;
126};
127
128struct mqfs_vdata {
129	LIST_ENTRY(mqfs_vdata)	mv_link;
130	struct mqfs_node	*mv_node;
131	struct vnode		*mv_vnode;
132	struct task		mv_task;
133};
134
135/*
136 * mqfs_node: describes a node (file or directory) within a mqfs
137 */
138struct mqfs_node {
139	char			mn_name[MQFS_NAMELEN+1];
140	struct mqfs_info	*mn_info;
141	struct mqfs_node	*mn_parent;
142	LIST_HEAD(,mqfs_node)	mn_children;
143	LIST_ENTRY(mqfs_node)	mn_sibling;
144	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
145	const void		*mn_pr_root;
146	int			mn_refcount;
147	mqfs_type_t		mn_type;
148	int			mn_deleted;
149	uint32_t		mn_fileno;
150	void			*mn_data;
151	struct timespec		mn_birth;
152	struct timespec		mn_ctime;
153	struct timespec		mn_atime;
154	struct timespec		mn_mtime;
155	uid_t			mn_uid;
156	gid_t			mn_gid;
157	int			mn_mode;
158};
159
160#define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
161#define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
162#define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
163#define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
164				(fp)->f_data)->mn_data))
165
166TAILQ_HEAD(msgq, mqueue_msg);
167
168struct mqueue;
169
170struct mqueue_notifier {
171	LIST_ENTRY(mqueue_notifier)	nt_link;
172	struct sigevent			nt_sigev;
173	ksiginfo_t			nt_ksi;
174	struct proc			*nt_proc;
175};
176
177struct mqueue {
178	struct mtx	mq_mutex;
179	int		mq_flags;
180	long		mq_maxmsg;
181	long		mq_msgsize;
182	long		mq_curmsgs;
183	long		mq_totalbytes;
184	struct msgq	mq_msgq;
185	int		mq_receivers;
186	int		mq_senders;
187	struct selinfo	mq_rsel;
188	struct selinfo	mq_wsel;
189	struct mqueue_notifier	*mq_notifier;
190};
191
192#define	MQ_RSEL		0x01
193#define	MQ_WSEL		0x02
194
195struct mqueue_msg {
196	TAILQ_ENTRY(mqueue_msg)	msg_link;
197	unsigned int	msg_prio;
198	unsigned int	msg_size;
199	/* following real data... */
200};
201
202static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
203	"POSIX real time message queue");
204
205static int	default_maxmsg  = 10;
206static int	default_msgsize = 1024;
207
208static int	maxmsg = 100;
209SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
210    &maxmsg, 0, "Default maximum messages in queue");
211static int	maxmsgsize = 16384;
212SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
213    &maxmsgsize, 0, "Default maximum message size");
214static int	maxmq = 100;
215SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
216    &maxmq, 0, "maximum message queues");
217static int	curmq = 0;
218SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
219    &curmq, 0, "current message queue number");
220static int	unloadable = 0;
221static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
222
223static eventhandler_tag exit_tag;
224
225/* Only one instance per-system */
226static struct mqfs_info		mqfs_data;
227static uma_zone_t		mqnode_zone;
228static uma_zone_t		mqueue_zone;
229static uma_zone_t		mvdata_zone;
230static uma_zone_t		mqnoti_zone;
231static struct vop_vector	mqfs_vnodeops;
232static struct fileops		mqueueops;
233static unsigned			mqfs_osd_jail_slot;
234
235/*
236 * Directory structure construction and manipulation
237 */
238#ifdef notyet
239static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
240	const char *name, int namelen, struct ucred *cred, int mode);
241static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
242	const char *name, int namelen, struct ucred *cred, int mode);
243#endif
244
245static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
246	const char *name, int namelen, struct ucred *cred, int mode);
247static int	mqfs_destroy(struct mqfs_node *mn);
248static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
249static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
250static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
251static int	mqfs_prison_remove(void *obj, void *data);
252
253/*
254 * Message queue construction and maniplation
255 */
256static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
257static void	mqueue_free(struct mqueue *mq);
258static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
259			size_t msg_len, unsigned msg_prio, int waitok,
260			const struct timespec *abs_timeout);
261static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
262			size_t msg_len, unsigned *msg_prio, int waitok,
263			const struct timespec *abs_timeout);
264static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
265			int timo);
266static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
267			int timo);
268static void	mqueue_send_notification(struct mqueue *mq);
269static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
270static void	mq_proc_exit(void *arg, struct proc *p);
271
272/*
273 * kqueue filters
274 */
275static void	filt_mqdetach(struct knote *kn);
276static int	filt_mqread(struct knote *kn, long hint);
277static int	filt_mqwrite(struct knote *kn, long hint);
278
279struct filterops mq_rfiltops = {
280	.f_isfd = 1,
281	.f_detach = filt_mqdetach,
282	.f_event = filt_mqread,
283};
284struct filterops mq_wfiltops = {
285	.f_isfd = 1,
286	.f_detach = filt_mqdetach,
287	.f_event = filt_mqwrite,
288};
289
290/*
291 * Initialize fileno bitmap
292 */
293static void
294mqfs_fileno_init(struct mqfs_info *mi)
295{
296	struct unrhdr *up;
297
298	up = new_unrhdr(1, INT_MAX, NULL);
299	mi->mi_unrhdr = up;
300}
301
302/*
303 * Tear down fileno bitmap
304 */
305static void
306mqfs_fileno_uninit(struct mqfs_info *mi)
307{
308	struct unrhdr *up;
309
310	up = mi->mi_unrhdr;
311	mi->mi_unrhdr = NULL;
312	delete_unrhdr(up);
313}
314
315/*
316 * Allocate a file number
317 */
318static void
319mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
320{
321	/* make sure our parent has a file number */
322	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
323		mqfs_fileno_alloc(mi, mn->mn_parent);
324
325	switch (mn->mn_type) {
326	case mqfstype_root:
327	case mqfstype_dir:
328	case mqfstype_file:
329	case mqfstype_symlink:
330		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
331		break;
332	case mqfstype_this:
333		KASSERT(mn->mn_parent != NULL,
334		    ("mqfstype_this node has no parent"));
335		mn->mn_fileno = mn->mn_parent->mn_fileno;
336		break;
337	case mqfstype_parent:
338		KASSERT(mn->mn_parent != NULL,
339		    ("mqfstype_parent node has no parent"));
340		if (mn->mn_parent == mi->mi_root) {
341			mn->mn_fileno = mn->mn_parent->mn_fileno;
342			break;
343		}
344		KASSERT(mn->mn_parent->mn_parent != NULL,
345		    ("mqfstype_parent node has no grandparent"));
346		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
347		break;
348	default:
349		KASSERT(0,
350		    ("mqfs_fileno_alloc() called for unknown type node: %d",
351			mn->mn_type));
352		break;
353	}
354}
355
356/*
357 * Release a file number
358 */
359static void
360mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
361{
362	switch (mn->mn_type) {
363	case mqfstype_root:
364	case mqfstype_dir:
365	case mqfstype_file:
366	case mqfstype_symlink:
367		free_unr(mi->mi_unrhdr, mn->mn_fileno);
368		break;
369	case mqfstype_this:
370	case mqfstype_parent:
371		/* ignore these, as they don't "own" their file number */
372		break;
373	default:
374		KASSERT(0,
375		    ("mqfs_fileno_free() called for unknown type node: %d",
376			mn->mn_type));
377		break;
378	}
379}
380
381static __inline struct mqfs_node *
382mqnode_alloc(void)
383{
384	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
385}
386
387static __inline void
388mqnode_free(struct mqfs_node *node)
389{
390	uma_zfree(mqnode_zone, node);
391}
392
393static __inline void
394mqnode_addref(struct mqfs_node *node)
395{
396	atomic_add_int(&node->mn_refcount, 1);
397}
398
399static __inline void
400mqnode_release(struct mqfs_node *node)
401{
402	struct mqfs_info *mqfs;
403	int old, exp;
404
405	mqfs = node->mn_info;
406	old = atomic_fetchadd_int(&node->mn_refcount, -1);
407	if (node->mn_type == mqfstype_dir ||
408	    node->mn_type == mqfstype_root)
409		exp = 3; /* include . and .. */
410	else
411		exp = 1;
412	if (old == exp) {
413		int locked = sx_xlocked(&mqfs->mi_lock);
414		if (!locked)
415			sx_xlock(&mqfs->mi_lock);
416		mqfs_destroy(node);
417		if (!locked)
418			sx_xunlock(&mqfs->mi_lock);
419	}
420}
421
422/*
423 * Add a node to a directory
424 */
425static int
426mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
427{
428	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
429	KASSERT(parent->mn_info != NULL,
430	    ("%s(): parent has no mn_info", __func__));
431	KASSERT(parent->mn_type == mqfstype_dir ||
432	    parent->mn_type == mqfstype_root,
433	    ("%s(): parent is not a directory", __func__));
434
435	node->mn_info = parent->mn_info;
436	node->mn_parent = parent;
437	LIST_INIT(&node->mn_children);
438	LIST_INIT(&node->mn_vnodes);
439	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
440	mqnode_addref(parent);
441	return (0);
442}
443
444static struct mqfs_node *
445mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
446	int nodetype)
447{
448	struct mqfs_node *node;
449
450	node = mqnode_alloc();
451	strncpy(node->mn_name, name, namelen);
452	node->mn_pr_root = cred->cr_prison->pr_root;
453	node->mn_type = nodetype;
454	node->mn_refcount = 1;
455	vfs_timestamp(&node->mn_birth);
456	node->mn_ctime = node->mn_atime = node->mn_mtime
457		= node->mn_birth;
458	node->mn_uid = cred->cr_uid;
459	node->mn_gid = cred->cr_gid;
460	node->mn_mode = mode;
461	return (node);
462}
463
464/*
465 * Create a file
466 */
467static struct mqfs_node *
468mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
469	struct ucred *cred, int mode)
470{
471	struct mqfs_node *node;
472
473	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
474	if (mqfs_add_node(parent, node) != 0) {
475		mqnode_free(node);
476		return (NULL);
477	}
478	return (node);
479}
480
481/*
482 * Add . and .. to a directory
483 */
484static int
485mqfs_fixup_dir(struct mqfs_node *parent)
486{
487	struct mqfs_node *dir;
488
489	dir = mqnode_alloc();
490	dir->mn_name[0] = '.';
491	dir->mn_type = mqfstype_this;
492	dir->mn_refcount = 1;
493	if (mqfs_add_node(parent, dir) != 0) {
494		mqnode_free(dir);
495		return (-1);
496	}
497
498	dir = mqnode_alloc();
499	dir->mn_name[0] = dir->mn_name[1] = '.';
500	dir->mn_type = mqfstype_parent;
501	dir->mn_refcount = 1;
502
503	if (mqfs_add_node(parent, dir) != 0) {
504		mqnode_free(dir);
505		return (-1);
506	}
507
508	return (0);
509}
510
511#ifdef notyet
512
513/*
514 * Create a directory
515 */
516static struct mqfs_node *
517mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
518	struct ucred *cred, int mode)
519{
520	struct mqfs_node *node;
521
522	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
523	if (mqfs_add_node(parent, node) != 0) {
524		mqnode_free(node);
525		return (NULL);
526	}
527
528	if (mqfs_fixup_dir(node) != 0) {
529		mqfs_destroy(node);
530		return (NULL);
531	}
532	return (node);
533}
534
535/*
536 * Create a symlink
537 */
538static struct mqfs_node *
539mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
540	struct ucred *cred, int mode)
541{
542	struct mqfs_node *node;
543
544	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
545	if (mqfs_add_node(parent, node) != 0) {
546		mqnode_free(node);
547		return (NULL);
548	}
549	return (node);
550}
551
552#endif
553
554/*
555 * Destroy a node or a tree of nodes
556 */
557static int
558mqfs_destroy(struct mqfs_node *node)
559{
560	struct mqfs_node *parent;
561
562	KASSERT(node != NULL,
563	    ("%s(): node is NULL", __func__));
564	KASSERT(node->mn_info != NULL,
565	    ("%s(): node has no mn_info", __func__));
566
567	/* destroy children */
568	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
569		while (! LIST_EMPTY(&node->mn_children))
570			mqfs_destroy(LIST_FIRST(&node->mn_children));
571
572	/* unlink from parent */
573	if ((parent = node->mn_parent) != NULL) {
574		KASSERT(parent->mn_info == node->mn_info,
575		    ("%s(): parent has different mn_info", __func__));
576		LIST_REMOVE(node, mn_sibling);
577	}
578
579	if (node->mn_fileno != 0)
580		mqfs_fileno_free(node->mn_info, node);
581	if (node->mn_data != NULL)
582		mqueue_free(node->mn_data);
583	mqnode_free(node);
584	return (0);
585}
586
587/*
588 * Mount a mqfs instance
589 */
590static int
591mqfs_mount(struct mount *mp)
592{
593	struct statfs *sbp;
594
595	if (mp->mnt_flag & MNT_UPDATE)
596		return (EOPNOTSUPP);
597
598	mp->mnt_data = &mqfs_data;
599	MNT_ILOCK(mp);
600	mp->mnt_flag |= MNT_LOCAL;
601	MNT_IUNLOCK(mp);
602	vfs_getnewfsid(mp);
603
604	sbp = &mp->mnt_stat;
605	vfs_mountedfrom(mp, "mqueue");
606	sbp->f_bsize = PAGE_SIZE;
607	sbp->f_iosize = PAGE_SIZE;
608	sbp->f_blocks = 1;
609	sbp->f_bfree = 0;
610	sbp->f_bavail = 0;
611	sbp->f_files = 1;
612	sbp->f_ffree = 0;
613	return (0);
614}
615
616/*
617 * Unmount a mqfs instance
618 */
619static int
620mqfs_unmount(struct mount *mp, int mntflags)
621{
622	int error;
623
624	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
625	    curthread);
626	return (error);
627}
628
629/*
630 * Return a root vnode
631 */
632static int
633mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
634{
635	struct mqfs_info *mqfs;
636	int ret;
637
638	mqfs = VFSTOMQFS(mp);
639	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
640	return (ret);
641}
642
643/*
644 * Return filesystem stats
645 */
646static int
647mqfs_statfs(struct mount *mp, struct statfs *sbp)
648{
649	/* XXX update statistics */
650	return (0);
651}
652
653/*
654 * Initialize a mqfs instance
655 */
656static int
657mqfs_init(struct vfsconf *vfc)
658{
659	struct mqfs_node *root;
660	struct mqfs_info *mi;
661	osd_method_t methods[PR_MAXMETHOD] = {
662	    [PR_METHOD_REMOVE] = mqfs_prison_remove,
663	};
664
665	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
666		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
667	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
668		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
669	mvdata_zone = uma_zcreate("mvdata",
670		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
671		NULL, UMA_ALIGN_PTR, 0);
672	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
673		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
674	mi = &mqfs_data;
675	sx_init(&mi->mi_lock, "mqfs lock");
676	/* set up the root diretory */
677	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
678		mqfstype_root);
679	root->mn_info = mi;
680	LIST_INIT(&root->mn_children);
681	LIST_INIT(&root->mn_vnodes);
682	mi->mi_root = root;
683	mqfs_fileno_init(mi);
684	mqfs_fileno_alloc(mi, root);
685	mqfs_fixup_dir(root);
686	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
687	    EVENTHANDLER_PRI_ANY);
688	mq_fdclose = mqueue_fdclose;
689	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
690	mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
691	return (0);
692}
693
694/*
695 * Destroy a mqfs instance
696 */
697static int
698mqfs_uninit(struct vfsconf *vfc)
699{
700	struct mqfs_info *mi;
701
702	if (!unloadable)
703		return (EOPNOTSUPP);
704	osd_jail_deregister(mqfs_osd_jail_slot);
705	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
706	mi = &mqfs_data;
707	mqfs_destroy(mi->mi_root);
708	mi->mi_root = NULL;
709	mqfs_fileno_uninit(mi);
710	sx_destroy(&mi->mi_lock);
711	uma_zdestroy(mqnode_zone);
712	uma_zdestroy(mqueue_zone);
713	uma_zdestroy(mvdata_zone);
714	uma_zdestroy(mqnoti_zone);
715	return (0);
716}
717
718/*
719 * task routine
720 */
721static void
722do_recycle(void *context, int pending __unused)
723{
724	struct vnode *vp = (struct vnode *)context;
725
726	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
727	vrecycle(vp);
728	VOP_UNLOCK(vp);
729	vdrop(vp);
730}
731
732/*
733 * Allocate a vnode
734 */
735static int
736mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
737{
738	struct mqfs_vdata *vd;
739	struct mqfs_info  *mqfs;
740	struct vnode *newvpp;
741	int error;
742
743	mqfs = pn->mn_info;
744	*vpp = NULL;
745	sx_xlock(&mqfs->mi_lock);
746	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
747		if (vd->mv_vnode->v_mount == mp) {
748			vhold(vd->mv_vnode);
749			break;
750		}
751	}
752
753	if (vd != NULL) {
754found:
755		*vpp = vd->mv_vnode;
756		sx_xunlock(&mqfs->mi_lock);
757		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE);
758		vdrop(*vpp);
759		return (error);
760	}
761	sx_xunlock(&mqfs->mi_lock);
762
763	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
764	if (error)
765		return (error);
766	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
767	error = insmntque(newvpp, mp);
768	if (error != 0)
769		return (error);
770
771	sx_xlock(&mqfs->mi_lock);
772	/*
773	 * Check if it has already been allocated
774	 * while we were blocked.
775	 */
776	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
777		if (vd->mv_vnode->v_mount == mp) {
778			vhold(vd->mv_vnode);
779			sx_xunlock(&mqfs->mi_lock);
780
781			vgone(newvpp);
782			vput(newvpp);
783			goto found;
784		}
785	}
786
787	*vpp = newvpp;
788
789	vd = uma_zalloc(mvdata_zone, M_WAITOK);
790	(*vpp)->v_data = vd;
791	vd->mv_vnode = *vpp;
792	vd->mv_node = pn;
793	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
794	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
795	mqnode_addref(pn);
796	switch (pn->mn_type) {
797	case mqfstype_root:
798		(*vpp)->v_vflag = VV_ROOT;
799		/* fall through */
800	case mqfstype_dir:
801	case mqfstype_this:
802	case mqfstype_parent:
803		(*vpp)->v_type = VDIR;
804		break;
805	case mqfstype_file:
806		(*vpp)->v_type = VREG;
807		break;
808	case mqfstype_symlink:
809		(*vpp)->v_type = VLNK;
810		break;
811	case mqfstype_none:
812		KASSERT(0, ("mqfs_allocf called for null node\n"));
813	default:
814		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
815	}
816	sx_xunlock(&mqfs->mi_lock);
817	return (0);
818}
819
820/*
821 * Search a directory entry
822 */
823static struct mqfs_node *
824mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
825{
826	struct mqfs_node *pn;
827	const void *pr_root;
828
829	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
830	pr_root = cred->cr_prison->pr_root;
831	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
832		/* Only match names within the same prison root directory */
833		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
834		    strncmp(pn->mn_name, name, len) == 0 &&
835		    pn->mn_name[len] == '\0')
836			return (pn);
837	}
838	return (NULL);
839}
840
841/*
842 * Look up a file or directory.
843 */
844static int
845mqfs_lookupx(struct vop_cachedlookup_args *ap)
846{
847	struct componentname *cnp;
848	struct vnode *dvp, **vpp;
849	struct mqfs_node *pd;
850	struct mqfs_node *pn;
851	struct mqfs_info *mqfs;
852	int nameiop, flags, error, namelen;
853	char *pname;
854	struct thread *td;
855
856	cnp = ap->a_cnp;
857	vpp = ap->a_vpp;
858	dvp = ap->a_dvp;
859	pname = cnp->cn_nameptr;
860	namelen = cnp->cn_namelen;
861	td = cnp->cn_thread;
862	flags = cnp->cn_flags;
863	nameiop = cnp->cn_nameiop;
864	pd = VTON(dvp);
865	pn = NULL;
866	mqfs = pd->mn_info;
867	*vpp = NULLVP;
868
869	if (dvp->v_type != VDIR)
870		return (ENOTDIR);
871
872	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
873	if (error)
874		return (error);
875
876	/* shortcut: check if the name is too long */
877	if (cnp->cn_namelen >= MQFS_NAMELEN)
878		return (ENOENT);
879
880	/* self */
881	if (namelen == 1 && pname[0] == '.') {
882		if ((flags & ISLASTCN) && nameiop != LOOKUP)
883			return (EINVAL);
884		pn = pd;
885		*vpp = dvp;
886		VREF(dvp);
887		return (0);
888	}
889
890	/* parent */
891	if (cnp->cn_flags & ISDOTDOT) {
892		if (dvp->v_vflag & VV_ROOT)
893			return (EIO);
894		if ((flags & ISLASTCN) && nameiop != LOOKUP)
895			return (EINVAL);
896		VOP_UNLOCK(dvp);
897		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
898		pn = pd->mn_parent;
899		error = mqfs_allocv(dvp->v_mount, vpp, pn);
900		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
901		return (error);
902	}
903
904	/* named node */
905	sx_xlock(&mqfs->mi_lock);
906	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
907	if (pn != NULL)
908		mqnode_addref(pn);
909	sx_xunlock(&mqfs->mi_lock);
910
911	/* found */
912	if (pn != NULL) {
913		/* DELETE */
914		if (nameiop == DELETE && (flags & ISLASTCN)) {
915			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
916			if (error) {
917				mqnode_release(pn);
918				return (error);
919			}
920			if (*vpp == dvp) {
921				VREF(dvp);
922				*vpp = dvp;
923				mqnode_release(pn);
924				return (0);
925			}
926		}
927
928		/* allocate vnode */
929		error = mqfs_allocv(dvp->v_mount, vpp, pn);
930		mqnode_release(pn);
931		if (error == 0 && cnp->cn_flags & MAKEENTRY)
932			cache_enter(dvp, *vpp, cnp);
933		return (error);
934	}
935
936	/* not found */
937
938	/* will create a new entry in the directory ? */
939	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
940	    && (flags & ISLASTCN)) {
941		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
942		if (error)
943			return (error);
944		cnp->cn_flags |= SAVENAME;
945		return (EJUSTRETURN);
946	}
947	return (ENOENT);
948}
949
950#if 0
951struct vop_lookup_args {
952	struct vop_generic_args a_gen;
953	struct vnode *a_dvp;
954	struct vnode **a_vpp;
955	struct componentname *a_cnp;
956};
957#endif
958
959/*
960 * vnode lookup operation
961 */
962static int
963mqfs_lookup(struct vop_cachedlookup_args *ap)
964{
965	int rc;
966
967	rc = mqfs_lookupx(ap);
968	return (rc);
969}
970
971#if 0
972struct vop_create_args {
973	struct vnode *a_dvp;
974	struct vnode **a_vpp;
975	struct componentname *a_cnp;
976	struct vattr *a_vap;
977};
978#endif
979
980/*
981 * vnode creation operation
982 */
983static int
984mqfs_create(struct vop_create_args *ap)
985{
986	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
987	struct componentname *cnp = ap->a_cnp;
988	struct mqfs_node *pd;
989	struct mqfs_node *pn;
990	struct mqueue *mq;
991	int error;
992
993	pd = VTON(ap->a_dvp);
994	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
995		return (ENOTDIR);
996	mq = mqueue_alloc(NULL);
997	if (mq == NULL)
998		return (EAGAIN);
999	sx_xlock(&mqfs->mi_lock);
1000	if ((cnp->cn_flags & HASBUF) == 0)
1001		panic("%s: no name", __func__);
1002	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
1003		cnp->cn_cred, ap->a_vap->va_mode);
1004	if (pn == NULL) {
1005		sx_xunlock(&mqfs->mi_lock);
1006		error = ENOSPC;
1007	} else {
1008		mqnode_addref(pn);
1009		sx_xunlock(&mqfs->mi_lock);
1010		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1011		mqnode_release(pn);
1012		if (error)
1013			mqfs_destroy(pn);
1014		else
1015			pn->mn_data = mq;
1016	}
1017	if (error)
1018		mqueue_free(mq);
1019	return (error);
1020}
1021
1022/*
1023 * Remove an entry
1024 */
1025static
1026int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1027{
1028	struct mqfs_node *parent;
1029	struct mqfs_vdata *vd;
1030	int error = 0;
1031
1032	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1033
1034	if (ucred->cr_uid != pn->mn_uid &&
1035	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN)) != 0)
1036		error = EACCES;
1037	else if (!pn->mn_deleted) {
1038		parent = pn->mn_parent;
1039		pn->mn_parent = NULL;
1040		pn->mn_deleted = 1;
1041		LIST_REMOVE(pn, mn_sibling);
1042		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1043			cache_purge(vd->mv_vnode);
1044			vhold(vd->mv_vnode);
1045			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1046		}
1047		mqnode_release(pn);
1048		mqnode_release(parent);
1049	} else
1050		error = ENOENT;
1051	return (error);
1052}
1053
1054#if 0
1055struct vop_remove_args {
1056	struct vnode *a_dvp;
1057	struct vnode *a_vp;
1058	struct componentname *a_cnp;
1059};
1060#endif
1061
1062/*
1063 * vnode removal operation
1064 */
1065static int
1066mqfs_remove(struct vop_remove_args *ap)
1067{
1068	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1069	struct mqfs_node *pn;
1070	int error;
1071
1072	if (ap->a_vp->v_type == VDIR)
1073                return (EPERM);
1074	pn = VTON(ap->a_vp);
1075	sx_xlock(&mqfs->mi_lock);
1076	error = do_unlink(pn, ap->a_cnp->cn_cred);
1077	sx_xunlock(&mqfs->mi_lock);
1078	return (error);
1079}
1080
1081#if 0
1082struct vop_inactive_args {
1083	struct vnode *a_vp;
1084	struct thread *a_td;
1085};
1086#endif
1087
1088static int
1089mqfs_inactive(struct vop_inactive_args *ap)
1090{
1091	struct mqfs_node *pn = VTON(ap->a_vp);
1092
1093	if (pn->mn_deleted)
1094		vrecycle(ap->a_vp);
1095	return (0);
1096}
1097
1098#if 0
1099struct vop_reclaim_args {
1100	struct vop_generic_args a_gen;
1101	struct vnode *a_vp;
1102};
1103#endif
1104
1105static int
1106mqfs_reclaim(struct vop_reclaim_args *ap)
1107{
1108	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1109	struct vnode *vp = ap->a_vp;
1110	struct mqfs_node *pn;
1111	struct mqfs_vdata *vd;
1112
1113	vd = vp->v_data;
1114	pn = vd->mv_node;
1115	sx_xlock(&mqfs->mi_lock);
1116	vp->v_data = NULL;
1117	LIST_REMOVE(vd, mv_link);
1118	uma_zfree(mvdata_zone, vd);
1119	mqnode_release(pn);
1120	sx_xunlock(&mqfs->mi_lock);
1121	return (0);
1122}
1123
1124#if 0
1125struct vop_open_args {
1126	struct vop_generic_args a_gen;
1127	struct vnode *a_vp;
1128	int a_mode;
1129	struct ucred *a_cred;
1130	struct thread *a_td;
1131	struct file *a_fp;
1132};
1133#endif
1134
1135static int
1136mqfs_open(struct vop_open_args *ap)
1137{
1138	return (0);
1139}
1140
1141#if 0
1142struct vop_close_args {
1143	struct vop_generic_args a_gen;
1144	struct vnode *a_vp;
1145	int a_fflag;
1146	struct ucred *a_cred;
1147	struct thread *a_td;
1148};
1149#endif
1150
1151static int
1152mqfs_close(struct vop_close_args *ap)
1153{
1154	return (0);
1155}
1156
1157#if 0
1158struct vop_access_args {
1159	struct vop_generic_args a_gen;
1160	struct vnode *a_vp;
1161	accmode_t a_accmode;
1162	struct ucred *a_cred;
1163	struct thread *a_td;
1164};
1165#endif
1166
1167/*
1168 * Verify permissions
1169 */
1170static int
1171mqfs_access(struct vop_access_args *ap)
1172{
1173	struct vnode *vp = ap->a_vp;
1174	struct vattr vattr;
1175	int error;
1176
1177	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1178	if (error)
1179		return (error);
1180	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid,
1181	    ap->a_accmode, ap->a_cred);
1182	return (error);
1183}
1184
1185#if 0
1186struct vop_getattr_args {
1187	struct vop_generic_args a_gen;
1188	struct vnode *a_vp;
1189	struct vattr *a_vap;
1190	struct ucred *a_cred;
1191};
1192#endif
1193
1194/*
1195 * Get file attributes
1196 */
1197static int
1198mqfs_getattr(struct vop_getattr_args *ap)
1199{
1200	struct vnode *vp = ap->a_vp;
1201	struct mqfs_node *pn = VTON(vp);
1202	struct vattr *vap = ap->a_vap;
1203	int error = 0;
1204
1205	vap->va_type = vp->v_type;
1206	vap->va_mode = pn->mn_mode;
1207	vap->va_nlink = 1;
1208	vap->va_uid = pn->mn_uid;
1209	vap->va_gid = pn->mn_gid;
1210	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1211	vap->va_fileid = pn->mn_fileno;
1212	vap->va_size = 0;
1213	vap->va_blocksize = PAGE_SIZE;
1214	vap->va_bytes = vap->va_size = 0;
1215	vap->va_atime = pn->mn_atime;
1216	vap->va_mtime = pn->mn_mtime;
1217	vap->va_ctime = pn->mn_ctime;
1218	vap->va_birthtime = pn->mn_birth;
1219	vap->va_gen = 0;
1220	vap->va_flags = 0;
1221	vap->va_rdev = NODEV;
1222	vap->va_bytes = 0;
1223	vap->va_filerev = 0;
1224	return (error);
1225}
1226
1227#if 0
1228struct vop_setattr_args {
1229	struct vop_generic_args a_gen;
1230	struct vnode *a_vp;
1231	struct vattr *a_vap;
1232	struct ucred *a_cred;
1233};
1234#endif
1235/*
1236 * Set attributes
1237 */
1238static int
1239mqfs_setattr(struct vop_setattr_args *ap)
1240{
1241	struct mqfs_node *pn;
1242	struct vattr *vap;
1243	struct vnode *vp;
1244	struct thread *td;
1245	int c, error;
1246	uid_t uid;
1247	gid_t gid;
1248
1249	td = curthread;
1250	vap = ap->a_vap;
1251	vp = ap->a_vp;
1252	if ((vap->va_type != VNON) ||
1253	    (vap->va_nlink != VNOVAL) ||
1254	    (vap->va_fsid != VNOVAL) ||
1255	    (vap->va_fileid != VNOVAL) ||
1256	    (vap->va_blocksize != VNOVAL) ||
1257	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1258	    (vap->va_rdev != VNOVAL) ||
1259	    ((int)vap->va_bytes != VNOVAL) ||
1260	    (vap->va_gen != VNOVAL)) {
1261		return (EINVAL);
1262	}
1263
1264	pn = VTON(vp);
1265
1266	error = c = 0;
1267	if (vap->va_uid == (uid_t)VNOVAL)
1268		uid = pn->mn_uid;
1269	else
1270		uid = vap->va_uid;
1271	if (vap->va_gid == (gid_t)VNOVAL)
1272		gid = pn->mn_gid;
1273	else
1274		gid = vap->va_gid;
1275
1276	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1277		/*
1278		 * To modify the ownership of a file, must possess VADMIN
1279		 * for that file.
1280		 */
1281		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1282			return (error);
1283
1284		/*
1285		 * XXXRW: Why is there a privilege check here: shouldn't the
1286		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1287		 * below definitely right?
1288		 */
1289		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1290		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1291		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1292			return (error);
1293		pn->mn_uid = uid;
1294		pn->mn_gid = gid;
1295		c = 1;
1296	}
1297
1298	if (vap->va_mode != (mode_t)VNOVAL) {
1299		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1300		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1301			return (error);
1302		pn->mn_mode = vap->va_mode;
1303		c = 1;
1304	}
1305
1306	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1307		/* See the comment in ufs_vnops::ufs_setattr(). */
1308		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1309		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1310		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1311			return (error);
1312		if (vap->va_atime.tv_sec != VNOVAL) {
1313			pn->mn_atime = vap->va_atime;
1314		}
1315		if (vap->va_mtime.tv_sec != VNOVAL) {
1316			pn->mn_mtime = vap->va_mtime;
1317		}
1318		c = 1;
1319	}
1320	if (c) {
1321		vfs_timestamp(&pn->mn_ctime);
1322	}
1323	return (0);
1324}
1325
1326#if 0
1327struct vop_read_args {
1328	struct vop_generic_args a_gen;
1329	struct vnode *a_vp;
1330	struct uio *a_uio;
1331	int a_ioflag;
1332	struct ucred *a_cred;
1333};
1334#endif
1335
1336/*
1337 * Read from a file
1338 */
1339static int
1340mqfs_read(struct vop_read_args *ap)
1341{
1342	char buf[80];
1343	struct vnode *vp = ap->a_vp;
1344	struct uio *uio = ap->a_uio;
1345	struct mqueue *mq;
1346	int len, error;
1347
1348	if (vp->v_type != VREG)
1349		return (EINVAL);
1350
1351	mq = VTOMQ(vp);
1352	snprintf(buf, sizeof(buf),
1353		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1354		mq->mq_totalbytes,
1355		mq->mq_maxmsg,
1356		mq->mq_curmsgs,
1357		mq->mq_msgsize);
1358	buf[sizeof(buf)-1] = '\0';
1359	len = strlen(buf);
1360	error = uiomove_frombuf(buf, len, uio);
1361	return (error);
1362}
1363
1364#if 0
1365struct vop_readdir_args {
1366	struct vop_generic_args a_gen;
1367	struct vnode *a_vp;
1368	struct uio *a_uio;
1369	struct ucred *a_cred;
1370	int *a_eofflag;
1371	int *a_ncookies;
1372	u_long **a_cookies;
1373};
1374#endif
1375
1376/*
1377 * Return directory entries.
1378 */
1379static int
1380mqfs_readdir(struct vop_readdir_args *ap)
1381{
1382	struct vnode *vp;
1383	struct mqfs_info *mi;
1384	struct mqfs_node *pd;
1385	struct mqfs_node *pn;
1386	struct dirent entry;
1387	struct uio *uio;
1388	const void *pr_root;
1389	int *tmp_ncookies = NULL;
1390	off_t offset;
1391	int error, i;
1392
1393	vp = ap->a_vp;
1394	mi = VFSTOMQFS(vp->v_mount);
1395	pd = VTON(vp);
1396	uio = ap->a_uio;
1397
1398	if (vp->v_type != VDIR)
1399		return (ENOTDIR);
1400
1401	if (uio->uio_offset < 0)
1402		return (EINVAL);
1403
1404	if (ap->a_ncookies != NULL) {
1405		tmp_ncookies = ap->a_ncookies;
1406		*ap->a_ncookies = 0;
1407		ap->a_ncookies = NULL;
1408        }
1409
1410	error = 0;
1411	offset = 0;
1412
1413	pr_root = ap->a_cred->cr_prison->pr_root;
1414	sx_xlock(&mi->mi_lock);
1415
1416	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1417		entry.d_reclen = sizeof(entry);
1418
1419		/*
1420		 * Only show names within the same prison root directory
1421		 * (or not associated with a prison, e.g. "." and "..").
1422		 */
1423		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
1424			continue;
1425		if (!pn->mn_fileno)
1426			mqfs_fileno_alloc(mi, pn);
1427		entry.d_fileno = pn->mn_fileno;
1428		entry.d_off = offset + entry.d_reclen;
1429		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1430			entry.d_name[i] = pn->mn_name[i];
1431		entry.d_namlen = i;
1432		switch (pn->mn_type) {
1433		case mqfstype_root:
1434		case mqfstype_dir:
1435		case mqfstype_this:
1436		case mqfstype_parent:
1437			entry.d_type = DT_DIR;
1438			break;
1439		case mqfstype_file:
1440			entry.d_type = DT_REG;
1441			break;
1442		case mqfstype_symlink:
1443			entry.d_type = DT_LNK;
1444			break;
1445		default:
1446			panic("%s has unexpected node type: %d", pn->mn_name,
1447				pn->mn_type);
1448		}
1449		dirent_terminate(&entry);
1450		if (entry.d_reclen > uio->uio_resid)
1451                        break;
1452		if (offset >= uio->uio_offset) {
1453			error = vfs_read_dirent(ap, &entry, offset);
1454                        if (error)
1455                                break;
1456                }
1457                offset += entry.d_reclen;
1458	}
1459	sx_xunlock(&mi->mi_lock);
1460
1461	uio->uio_offset = offset;
1462
1463	if (tmp_ncookies != NULL)
1464		ap->a_ncookies = tmp_ncookies;
1465
1466	return (error);
1467}
1468
1469#ifdef notyet
1470
1471#if 0
1472struct vop_mkdir_args {
1473	struct vnode *a_dvp;
1474	struvt vnode **a_vpp;
1475	struvt componentname *a_cnp;
1476	struct vattr *a_vap;
1477};
1478#endif
1479
1480/*
1481 * Create a directory.
1482 */
1483static int
1484mqfs_mkdir(struct vop_mkdir_args *ap)
1485{
1486	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1487	struct componentname *cnp = ap->a_cnp;
1488	struct mqfs_node *pd = VTON(ap->a_dvp);
1489	struct mqfs_node *pn;
1490	int error;
1491
1492	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1493		return (ENOTDIR);
1494	sx_xlock(&mqfs->mi_lock);
1495	if ((cnp->cn_flags & HASBUF) == 0)
1496		panic("%s: no name", __func__);
1497	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1498		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1499	if (pn != NULL)
1500		mqnode_addref(pn);
1501	sx_xunlock(&mqfs->mi_lock);
1502	if (pn == NULL) {
1503		error = ENOSPC;
1504	} else {
1505		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1506		mqnode_release(pn);
1507	}
1508	return (error);
1509}
1510
1511#if 0
1512struct vop_rmdir_args {
1513	struct vnode *a_dvp;
1514	struct vnode *a_vp;
1515	struct componentname *a_cnp;
1516};
1517#endif
1518
1519/*
1520 * Remove a directory.
1521 */
1522static int
1523mqfs_rmdir(struct vop_rmdir_args *ap)
1524{
1525	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1526	struct mqfs_node *pn = VTON(ap->a_vp);
1527	struct mqfs_node *pt;
1528
1529	if (pn->mn_type != mqfstype_dir)
1530		return (ENOTDIR);
1531
1532	sx_xlock(&mqfs->mi_lock);
1533	if (pn->mn_deleted) {
1534		sx_xunlock(&mqfs->mi_lock);
1535		return (ENOENT);
1536	}
1537
1538	pt = LIST_FIRST(&pn->mn_children);
1539	pt = LIST_NEXT(pt, mn_sibling);
1540	pt = LIST_NEXT(pt, mn_sibling);
1541	if (pt != NULL) {
1542		sx_xunlock(&mqfs->mi_lock);
1543		return (ENOTEMPTY);
1544	}
1545	pt = pn->mn_parent;
1546	pn->mn_parent = NULL;
1547	pn->mn_deleted = 1;
1548	LIST_REMOVE(pn, mn_sibling);
1549	mqnode_release(pn);
1550	mqnode_release(pt);
1551	sx_xunlock(&mqfs->mi_lock);
1552	cache_purge(ap->a_vp);
1553	return (0);
1554}
1555
1556#endif /* notyet */
1557
1558/*
1559 * See if this prison root is obsolete, and clean up associated queues if it is.
1560 */
1561static int
1562mqfs_prison_remove(void *obj, void *data __unused)
1563{
1564	const struct prison *pr = obj;
1565	struct prison *tpr;
1566	struct mqfs_node *pn, *tpn;
1567	struct vnode *pr_root;
1568
1569	pr_root = pr->pr_root;
1570	if (pr->pr_parent->pr_root == pr_root)
1571		return (0);
1572	TAILQ_FOREACH(tpr, &allprison, pr_list) {
1573		if (tpr != pr && tpr->pr_root == pr_root)
1574			return (0);
1575	}
1576	/*
1577	 * No jails are rooted in this directory anymore,
1578	 * so no queues should be either.
1579	 */
1580	sx_xlock(&mqfs_data.mi_lock);
1581	LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
1582	    mn_sibling, tpn) {
1583		if (pn->mn_pr_root == pr_root)
1584			(void)do_unlink(pn, curthread->td_ucred);
1585	}
1586	sx_xunlock(&mqfs_data.mi_lock);
1587	return (0);
1588}
1589
1590/*
1591 * Allocate a message queue
1592 */
1593static struct mqueue *
1594mqueue_alloc(const struct mq_attr *attr)
1595{
1596	struct mqueue *mq;
1597
1598	if (curmq >= maxmq)
1599		return (NULL);
1600	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1601	TAILQ_INIT(&mq->mq_msgq);
1602	if (attr != NULL) {
1603		mq->mq_maxmsg = attr->mq_maxmsg;
1604		mq->mq_msgsize = attr->mq_msgsize;
1605	} else {
1606		mq->mq_maxmsg = default_maxmsg;
1607		mq->mq_msgsize = default_msgsize;
1608	}
1609	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1610	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1611	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1612	atomic_add_int(&curmq, 1);
1613	return (mq);
1614}
1615
1616/*
1617 * Destroy a message queue
1618 */
1619static void
1620mqueue_free(struct mqueue *mq)
1621{
1622	struct mqueue_msg *msg;
1623
1624	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1625		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1626		free(msg, M_MQUEUEDATA);
1627	}
1628
1629	mtx_destroy(&mq->mq_mutex);
1630	seldrain(&mq->mq_rsel);
1631	seldrain(&mq->mq_wsel);
1632	knlist_destroy(&mq->mq_rsel.si_note);
1633	knlist_destroy(&mq->mq_wsel.si_note);
1634	uma_zfree(mqueue_zone, mq);
1635	atomic_add_int(&curmq, -1);
1636}
1637
1638/*
1639 * Load a message from user space
1640 */
1641static struct mqueue_msg *
1642mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1643{
1644	struct mqueue_msg *msg;
1645	size_t len;
1646	int error;
1647
1648	len = sizeof(struct mqueue_msg) + msg_size;
1649	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1650	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1651	    msg_size);
1652	if (error) {
1653		free(msg, M_MQUEUEDATA);
1654		msg = NULL;
1655	} else {
1656		msg->msg_size = msg_size;
1657		msg->msg_prio = msg_prio;
1658	}
1659	return (msg);
1660}
1661
1662/*
1663 * Save a message to user space
1664 */
1665static int
1666mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1667{
1668	int error;
1669
1670	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1671		msg->msg_size);
1672	if (error == 0 && msg_prio != NULL)
1673		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1674	return (error);
1675}
1676
1677/*
1678 * Free a message's memory
1679 */
1680static __inline void
1681mqueue_freemsg(struct mqueue_msg *msg)
1682{
1683	free(msg, M_MQUEUEDATA);
1684}
1685
1686/*
1687 * Send a message. if waitok is false, thread will not be
1688 * blocked if there is no data in queue, otherwise, absolute
1689 * time will be checked.
1690 */
1691int
1692mqueue_send(struct mqueue *mq, const char *msg_ptr,
1693	size_t msg_len, unsigned msg_prio, int waitok,
1694	const struct timespec *abs_timeout)
1695{
1696	struct mqueue_msg *msg;
1697	struct timespec ts, ts2;
1698	struct timeval tv;
1699	int error;
1700
1701	if (msg_prio >= MQ_PRIO_MAX)
1702		return (EINVAL);
1703	if (msg_len > mq->mq_msgsize)
1704		return (EMSGSIZE);
1705	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1706	if (msg == NULL)
1707		return (EFAULT);
1708
1709	/* O_NONBLOCK case */
1710	if (!waitok) {
1711		error = _mqueue_send(mq, msg, -1);
1712		if (error)
1713			goto bad;
1714		return (0);
1715	}
1716
1717	/* we allow a null timeout (wait forever) */
1718	if (abs_timeout == NULL) {
1719		error = _mqueue_send(mq, msg, 0);
1720		if (error)
1721			goto bad;
1722		return (0);
1723	}
1724
1725	/* send it before checking time */
1726	error = _mqueue_send(mq, msg, -1);
1727	if (error == 0)
1728		return (0);
1729
1730	if (error != EAGAIN)
1731		goto bad;
1732
1733	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1734		error = EINVAL;
1735		goto bad;
1736	}
1737	for (;;) {
1738		getnanotime(&ts);
1739		timespecsub(abs_timeout, &ts, &ts2);
1740		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1741			error = ETIMEDOUT;
1742			break;
1743		}
1744		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1745		error = _mqueue_send(mq, msg, tvtohz(&tv));
1746		if (error != ETIMEDOUT)
1747			break;
1748	}
1749	if (error == 0)
1750		return (0);
1751bad:
1752	mqueue_freemsg(msg);
1753	return (error);
1754}
1755
1756/*
1757 * Common routine to send a message
1758 */
1759static int
1760_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1761{
1762	struct mqueue_msg *msg2;
1763	int error = 0;
1764
1765	mtx_lock(&mq->mq_mutex);
1766	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1767		if (timo < 0) {
1768			mtx_unlock(&mq->mq_mutex);
1769			return (EAGAIN);
1770		}
1771		mq->mq_senders++;
1772		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1773			    PCATCH, "mqsend", timo);
1774		mq->mq_senders--;
1775		if (error == EAGAIN)
1776			error = ETIMEDOUT;
1777	}
1778	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1779		mtx_unlock(&mq->mq_mutex);
1780		return (error);
1781	}
1782	error = 0;
1783	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1784		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1785	} else {
1786		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1787			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1788		} else {
1789			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1790				if (msg2->msg_prio < msg->msg_prio)
1791					break;
1792			}
1793			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1794		}
1795	}
1796	mq->mq_curmsgs++;
1797	mq->mq_totalbytes += msg->msg_size;
1798	if (mq->mq_receivers)
1799		wakeup_one(&mq->mq_receivers);
1800	else if (mq->mq_notifier != NULL)
1801		mqueue_send_notification(mq);
1802	if (mq->mq_flags & MQ_RSEL) {
1803		mq->mq_flags &= ~MQ_RSEL;
1804		selwakeup(&mq->mq_rsel);
1805	}
1806	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1807	mtx_unlock(&mq->mq_mutex);
1808	return (0);
1809}
1810
1811/*
1812 * Send realtime a signal to process which registered itself
1813 * successfully by mq_notify.
1814 */
1815static void
1816mqueue_send_notification(struct mqueue *mq)
1817{
1818	struct mqueue_notifier *nt;
1819	struct thread *td;
1820	struct proc *p;
1821	int error;
1822
1823	mtx_assert(&mq->mq_mutex, MA_OWNED);
1824	nt = mq->mq_notifier;
1825	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1826		p = nt->nt_proc;
1827		error = sigev_findtd(p, &nt->nt_sigev, &td);
1828		if (error) {
1829			mq->mq_notifier = NULL;
1830			return;
1831		}
1832		if (!KSI_ONQ(&nt->nt_ksi)) {
1833			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1834			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1835		}
1836		PROC_UNLOCK(p);
1837	}
1838	mq->mq_notifier = NULL;
1839}
1840
1841/*
1842 * Get a message. if waitok is false, thread will not be
1843 * blocked if there is no data in queue, otherwise, absolute
1844 * time will be checked.
1845 */
1846int
1847mqueue_receive(struct mqueue *mq, char *msg_ptr,
1848	size_t msg_len, unsigned *msg_prio, int waitok,
1849	const struct timespec *abs_timeout)
1850{
1851	struct mqueue_msg *msg;
1852	struct timespec ts, ts2;
1853	struct timeval tv;
1854	int error;
1855
1856	if (msg_len < mq->mq_msgsize)
1857		return (EMSGSIZE);
1858
1859	/* O_NONBLOCK case */
1860	if (!waitok) {
1861		error = _mqueue_recv(mq, &msg, -1);
1862		if (error)
1863			return (error);
1864		goto received;
1865	}
1866
1867	/* we allow a null timeout (wait forever). */
1868	if (abs_timeout == NULL) {
1869		error = _mqueue_recv(mq, &msg, 0);
1870		if (error)
1871			return (error);
1872		goto received;
1873	}
1874
1875	/* try to get a message before checking time */
1876	error = _mqueue_recv(mq, &msg, -1);
1877	if (error == 0)
1878		goto received;
1879
1880	if (error != EAGAIN)
1881		return (error);
1882
1883	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1884		error = EINVAL;
1885		return (error);
1886	}
1887
1888	for (;;) {
1889		getnanotime(&ts);
1890		timespecsub(abs_timeout, &ts, &ts2);
1891		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1892			error = ETIMEDOUT;
1893			return (error);
1894		}
1895		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1896		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1897		if (error == 0)
1898			break;
1899		if (error != ETIMEDOUT)
1900			return (error);
1901	}
1902
1903received:
1904	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1905	if (error == 0) {
1906		curthread->td_retval[0] = msg->msg_size;
1907		curthread->td_retval[1] = 0;
1908	}
1909	mqueue_freemsg(msg);
1910	return (error);
1911}
1912
1913/*
1914 * Common routine to receive a message
1915 */
1916static int
1917_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1918{
1919	int error = 0;
1920
1921	mtx_lock(&mq->mq_mutex);
1922	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1923		if (timo < 0) {
1924			mtx_unlock(&mq->mq_mutex);
1925			return (EAGAIN);
1926		}
1927		mq->mq_receivers++;
1928		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1929			    PCATCH, "mqrecv", timo);
1930		mq->mq_receivers--;
1931		if (error == EAGAIN)
1932			error = ETIMEDOUT;
1933	}
1934	if (*msg != NULL) {
1935		error = 0;
1936		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1937		mq->mq_curmsgs--;
1938		mq->mq_totalbytes -= (*msg)->msg_size;
1939		if (mq->mq_senders)
1940			wakeup_one(&mq->mq_senders);
1941		if (mq->mq_flags & MQ_WSEL) {
1942			mq->mq_flags &= ~MQ_WSEL;
1943			selwakeup(&mq->mq_wsel);
1944		}
1945		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1946	}
1947	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1948	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1949		mqueue_send_notification(mq);
1950	}
1951	mtx_unlock(&mq->mq_mutex);
1952	return (error);
1953}
1954
1955static __inline struct mqueue_notifier *
1956notifier_alloc(void)
1957{
1958	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
1959}
1960
1961static __inline void
1962notifier_free(struct mqueue_notifier *p)
1963{
1964	uma_zfree(mqnoti_zone, p);
1965}
1966
1967static struct mqueue_notifier *
1968notifier_search(struct proc *p, int fd)
1969{
1970	struct mqueue_notifier *nt;
1971
1972	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
1973		if (nt->nt_ksi.ksi_mqd == fd)
1974			break;
1975	}
1976	return (nt);
1977}
1978
1979static __inline void
1980notifier_insert(struct proc *p, struct mqueue_notifier *nt)
1981{
1982	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
1983}
1984
1985static __inline void
1986notifier_delete(struct proc *p, struct mqueue_notifier *nt)
1987{
1988	LIST_REMOVE(nt, nt_link);
1989	notifier_free(nt);
1990}
1991
1992static void
1993notifier_remove(struct proc *p, struct mqueue *mq, int fd)
1994{
1995	struct mqueue_notifier *nt;
1996
1997	mtx_assert(&mq->mq_mutex, MA_OWNED);
1998	PROC_LOCK(p);
1999	nt = notifier_search(p, fd);
2000	if (nt != NULL) {
2001		if (mq->mq_notifier == nt)
2002			mq->mq_notifier = NULL;
2003		sigqueue_take(&nt->nt_ksi);
2004		notifier_delete(p, nt);
2005	}
2006	PROC_UNLOCK(p);
2007}
2008
2009static int
2010kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
2011    const struct mq_attr *attr)
2012{
2013	char path[MQFS_NAMELEN + 1];
2014	struct mqfs_node *pn;
2015	struct pwddesc *pdp;
2016	struct file *fp;
2017	struct mqueue *mq;
2018	int fd, error, len, cmode;
2019
2020	AUDIT_ARG_FFLAGS(flags);
2021	AUDIT_ARG_MODE(mode);
2022
2023	pdp = td->td_proc->p_pd;
2024	cmode = (((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT);
2025	mq = NULL;
2026	if ((flags & O_CREAT) != 0 && attr != NULL) {
2027		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
2028			return (EINVAL);
2029		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
2030			return (EINVAL);
2031	}
2032
2033	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
2034        if (error)
2035		return (error);
2036
2037	/*
2038	 * The first character of name must be a slash  (/) character
2039	 * and the remaining characters of name cannot include any slash
2040	 * characters.
2041	 */
2042	len = strlen(path);
2043	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2044		return (EINVAL);
2045	/*
2046	 * "." and ".." are magic directories, populated on the fly, and cannot
2047	 * be opened as queues.
2048	 */
2049	if (strcmp(path, "/.") == 0 || strcmp(path, "/..") == 0)
2050		return (EINVAL);
2051	AUDIT_ARG_UPATH1_CANON(path);
2052
2053	error = falloc(td, &fp, &fd, O_CLOEXEC);
2054	if (error)
2055		return (error);
2056
2057	sx_xlock(&mqfs_data.mi_lock);
2058	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2059	if (pn == NULL) {
2060		if (!(flags & O_CREAT)) {
2061			error = ENOENT;
2062		} else {
2063			mq = mqueue_alloc(attr);
2064			if (mq == NULL) {
2065				error = ENFILE;
2066			} else {
2067				pn = mqfs_create_file(mqfs_data.mi_root,
2068				         path + 1, len - 1, td->td_ucred,
2069					 cmode);
2070				if (pn == NULL) {
2071					error = ENOSPC;
2072					mqueue_free(mq);
2073				}
2074			}
2075		}
2076
2077		if (error == 0) {
2078			pn->mn_data = mq;
2079		}
2080	} else {
2081		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2082			error = EEXIST;
2083		} else {
2084			accmode_t accmode = 0;
2085
2086			if (flags & FREAD)
2087				accmode |= VREAD;
2088			if (flags & FWRITE)
2089				accmode |= VWRITE;
2090			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2091			    pn->mn_gid, accmode, td->td_ucred);
2092		}
2093	}
2094
2095	if (error) {
2096		sx_xunlock(&mqfs_data.mi_lock);
2097		fdclose(td, fp, fd);
2098		fdrop(fp, td);
2099		return (error);
2100	}
2101
2102	mqnode_addref(pn);
2103	sx_xunlock(&mqfs_data.mi_lock);
2104
2105	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2106	    &mqueueops);
2107
2108	td->td_retval[0] = fd;
2109	fdrop(fp, td);
2110	return (0);
2111}
2112
2113/*
2114 * Syscall to open a message queue.
2115 */
2116int
2117sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
2118{
2119	struct mq_attr attr;
2120	int flags, error;
2121
2122	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2123		return (EINVAL);
2124	flags = FFLAGS(uap->flags);
2125	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2126		error = copyin(uap->attr, &attr, sizeof(attr));
2127		if (error)
2128			return (error);
2129	}
2130	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2131	    uap->attr != NULL ? &attr : NULL));
2132}
2133
2134/*
2135 * Syscall to unlink a message queue.
2136 */
2137int
2138sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2139{
2140	char path[MQFS_NAMELEN+1];
2141	struct mqfs_node *pn;
2142	int error, len;
2143
2144	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2145        if (error)
2146		return (error);
2147
2148	len = strlen(path);
2149	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2150		return (EINVAL);
2151	if (strcmp(path, "/.") == 0 || strcmp(path, "/..") == 0)
2152		return (EINVAL);
2153	AUDIT_ARG_UPATH1_CANON(path);
2154
2155	sx_xlock(&mqfs_data.mi_lock);
2156	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2157	if (pn != NULL)
2158		error = do_unlink(pn, td->td_ucred);
2159	else
2160		error = ENOENT;
2161	sx_xunlock(&mqfs_data.mi_lock);
2162	return (error);
2163}
2164
2165typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
2166
2167/*
2168 * Get message queue by giving file slot
2169 */
2170static int
2171_getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
2172       struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2173{
2174	struct mqfs_node *pn;
2175	int error;
2176
2177	error = func(td, fd, rightsp, fpp);
2178	if (error)
2179		return (error);
2180	if (&mqueueops != (*fpp)->f_ops) {
2181		fdrop(*fpp, td);
2182		return (EBADF);
2183	}
2184	pn = (*fpp)->f_data;
2185	if (ppn)
2186		*ppn = pn;
2187	if (pmq)
2188		*pmq = pn->mn_data;
2189	return (0);
2190}
2191
2192static __inline int
2193getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2194	struct mqueue **pmq)
2195{
2196
2197	return _getmq(td, fd, &cap_event_rights, fget,
2198	    fpp, ppn, pmq);
2199}
2200
2201static __inline int
2202getmq_read(struct thread *td, int fd, struct file **fpp,
2203	 struct mqfs_node **ppn, struct mqueue **pmq)
2204{
2205
2206	return _getmq(td, fd, &cap_read_rights, fget_read,
2207	    fpp, ppn, pmq);
2208}
2209
2210static __inline int
2211getmq_write(struct thread *td, int fd, struct file **fpp,
2212	struct mqfs_node **ppn, struct mqueue **pmq)
2213{
2214
2215	return _getmq(td, fd, &cap_write_rights, fget_write,
2216	    fpp, ppn, pmq);
2217}
2218
2219static int
2220kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2221    struct mq_attr *oattr)
2222{
2223	struct mqueue *mq;
2224	struct file *fp;
2225	u_int oflag, flag;
2226	int error;
2227
2228	AUDIT_ARG_FD(mqd);
2229	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2230		return (EINVAL);
2231	error = getmq(td, mqd, &fp, NULL, &mq);
2232	if (error)
2233		return (error);
2234	oattr->mq_maxmsg  = mq->mq_maxmsg;
2235	oattr->mq_msgsize = mq->mq_msgsize;
2236	oattr->mq_curmsgs = mq->mq_curmsgs;
2237	if (attr != NULL) {
2238		do {
2239			oflag = flag = fp->f_flag;
2240			flag &= ~O_NONBLOCK;
2241			flag |= (attr->mq_flags & O_NONBLOCK);
2242		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2243	} else
2244		oflag = fp->f_flag;
2245	oattr->mq_flags = (O_NONBLOCK & oflag);
2246	fdrop(fp, td);
2247	return (error);
2248}
2249
2250int
2251sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2252{
2253	struct mq_attr attr, oattr;
2254	int error;
2255
2256	if (uap->attr != NULL) {
2257		error = copyin(uap->attr, &attr, sizeof(attr));
2258		if (error != 0)
2259			return (error);
2260	}
2261	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2262	    &oattr);
2263	if (error == 0 && uap->oattr != NULL) {
2264		bzero(oattr.__reserved, sizeof(oattr.__reserved));
2265		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2266	}
2267	return (error);
2268}
2269
2270int
2271sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2272{
2273	struct mqueue *mq;
2274	struct file *fp;
2275	struct timespec *abs_timeout, ets;
2276	int error;
2277	int waitok;
2278
2279	AUDIT_ARG_FD(uap->mqd);
2280	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2281	if (error)
2282		return (error);
2283	if (uap->abs_timeout != NULL) {
2284		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2285		if (error != 0)
2286			goto out;
2287		abs_timeout = &ets;
2288	} else
2289		abs_timeout = NULL;
2290	waitok = !(fp->f_flag & O_NONBLOCK);
2291	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2292		uap->msg_prio, waitok, abs_timeout);
2293out:
2294	fdrop(fp, td);
2295	return (error);
2296}
2297
2298int
2299sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2300{
2301	struct mqueue *mq;
2302	struct file *fp;
2303	struct timespec *abs_timeout, ets;
2304	int error, waitok;
2305
2306	AUDIT_ARG_FD(uap->mqd);
2307	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2308	if (error)
2309		return (error);
2310	if (uap->abs_timeout != NULL) {
2311		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2312		if (error != 0)
2313			goto out;
2314		abs_timeout = &ets;
2315	} else
2316		abs_timeout = NULL;
2317	waitok = !(fp->f_flag & O_NONBLOCK);
2318	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2319		uap->msg_prio, waitok, abs_timeout);
2320out:
2321	fdrop(fp, td);
2322	return (error);
2323}
2324
2325static int
2326kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
2327{
2328	struct filedesc *fdp;
2329	struct proc *p;
2330	struct mqueue *mq;
2331	struct file *fp, *fp2;
2332	struct mqueue_notifier *nt, *newnt = NULL;
2333	int error;
2334
2335	AUDIT_ARG_FD(mqd);
2336	if (sigev != NULL) {
2337		if (sigev->sigev_notify != SIGEV_SIGNAL &&
2338		    sigev->sigev_notify != SIGEV_THREAD_ID &&
2339		    sigev->sigev_notify != SIGEV_NONE)
2340			return (EINVAL);
2341		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
2342		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
2343		    !_SIG_VALID(sigev->sigev_signo))
2344			return (EINVAL);
2345	}
2346	p = td->td_proc;
2347	fdp = td->td_proc->p_fd;
2348	error = getmq(td, mqd, &fp, NULL, &mq);
2349	if (error)
2350		return (error);
2351again:
2352	FILEDESC_SLOCK(fdp);
2353	fp2 = fget_locked(fdp, mqd);
2354	if (fp2 == NULL) {
2355		FILEDESC_SUNLOCK(fdp);
2356		error = EBADF;
2357		goto out;
2358	}
2359#ifdef CAPABILITIES
2360	error = cap_check(cap_rights(fdp, mqd), &cap_event_rights);
2361	if (error) {
2362		FILEDESC_SUNLOCK(fdp);
2363		goto out;
2364	}
2365#endif
2366	if (fp2 != fp) {
2367		FILEDESC_SUNLOCK(fdp);
2368		error = EBADF;
2369		goto out;
2370	}
2371	mtx_lock(&mq->mq_mutex);
2372	FILEDESC_SUNLOCK(fdp);
2373	if (sigev != NULL) {
2374		if (mq->mq_notifier != NULL) {
2375			error = EBUSY;
2376		} else {
2377			PROC_LOCK(p);
2378			nt = notifier_search(p, mqd);
2379			if (nt == NULL) {
2380				if (newnt == NULL) {
2381					PROC_UNLOCK(p);
2382					mtx_unlock(&mq->mq_mutex);
2383					newnt = notifier_alloc();
2384					goto again;
2385				}
2386			}
2387
2388			if (nt != NULL) {
2389				sigqueue_take(&nt->nt_ksi);
2390				if (newnt != NULL) {
2391					notifier_free(newnt);
2392					newnt = NULL;
2393				}
2394			} else {
2395				nt = newnt;
2396				newnt = NULL;
2397				ksiginfo_init(&nt->nt_ksi);
2398				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2399				nt->nt_ksi.ksi_code = SI_MESGQ;
2400				nt->nt_proc = p;
2401				nt->nt_ksi.ksi_mqd = mqd;
2402				notifier_insert(p, nt);
2403			}
2404			nt->nt_sigev = *sigev;
2405			mq->mq_notifier = nt;
2406			PROC_UNLOCK(p);
2407			/*
2408			 * if there is no receivers and message queue
2409			 * is not empty, we should send notification
2410			 * as soon as possible.
2411			 */
2412			if (mq->mq_receivers == 0 &&
2413			    !TAILQ_EMPTY(&mq->mq_msgq))
2414				mqueue_send_notification(mq);
2415		}
2416	} else {
2417		notifier_remove(p, mq, mqd);
2418	}
2419	mtx_unlock(&mq->mq_mutex);
2420
2421out:
2422	fdrop(fp, td);
2423	if (newnt != NULL)
2424		notifier_free(newnt);
2425	return (error);
2426}
2427
2428int
2429sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2430{
2431	struct sigevent ev, *evp;
2432	int error;
2433
2434	if (uap->sigev == NULL) {
2435		evp = NULL;
2436	} else {
2437		error = copyin(uap->sigev, &ev, sizeof(ev));
2438		if (error != 0)
2439			return (error);
2440		evp = &ev;
2441	}
2442	return (kern_kmq_notify(td, uap->mqd, evp));
2443}
2444
2445static void
2446mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2447{
2448	struct mqueue *mq;
2449#ifdef INVARIANTS
2450	struct filedesc *fdp;
2451
2452	fdp = td->td_proc->p_fd;
2453	FILEDESC_LOCK_ASSERT(fdp);
2454#endif
2455
2456	if (fp->f_ops == &mqueueops) {
2457		mq = FPTOMQ(fp);
2458		mtx_lock(&mq->mq_mutex);
2459		notifier_remove(td->td_proc, mq, fd);
2460
2461		/* have to wakeup thread in same process */
2462		if (mq->mq_flags & MQ_RSEL) {
2463			mq->mq_flags &= ~MQ_RSEL;
2464			selwakeup(&mq->mq_rsel);
2465		}
2466		if (mq->mq_flags & MQ_WSEL) {
2467			mq->mq_flags &= ~MQ_WSEL;
2468			selwakeup(&mq->mq_wsel);
2469		}
2470		mtx_unlock(&mq->mq_mutex);
2471	}
2472}
2473
2474static void
2475mq_proc_exit(void *arg __unused, struct proc *p)
2476{
2477	struct filedesc *fdp;
2478	struct file *fp;
2479	struct mqueue *mq;
2480	int i;
2481
2482	fdp = p->p_fd;
2483	FILEDESC_SLOCK(fdp);
2484	for (i = 0; i < fdp->fd_nfiles; ++i) {
2485		fp = fget_locked(fdp, i);
2486		if (fp != NULL && fp->f_ops == &mqueueops) {
2487			mq = FPTOMQ(fp);
2488			mtx_lock(&mq->mq_mutex);
2489			notifier_remove(p, FPTOMQ(fp), i);
2490			mtx_unlock(&mq->mq_mutex);
2491		}
2492	}
2493	FILEDESC_SUNLOCK(fdp);
2494	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2495}
2496
2497static int
2498mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2499	struct thread *td)
2500{
2501	struct mqueue *mq = FPTOMQ(fp);
2502	int revents = 0;
2503
2504	mtx_lock(&mq->mq_mutex);
2505	if (events & (POLLIN | POLLRDNORM)) {
2506		if (mq->mq_curmsgs) {
2507			revents |= events & (POLLIN | POLLRDNORM);
2508		} else {
2509			mq->mq_flags |= MQ_RSEL;
2510			selrecord(td, &mq->mq_rsel);
2511 		}
2512	}
2513	if (events & POLLOUT) {
2514		if (mq->mq_curmsgs < mq->mq_maxmsg)
2515			revents |= POLLOUT;
2516		else {
2517			mq->mq_flags |= MQ_WSEL;
2518			selrecord(td, &mq->mq_wsel);
2519		}
2520	}
2521	mtx_unlock(&mq->mq_mutex);
2522	return (revents);
2523}
2524
2525static int
2526mqf_close(struct file *fp, struct thread *td)
2527{
2528	struct mqfs_node *pn;
2529
2530	fp->f_ops = &badfileops;
2531	pn = fp->f_data;
2532	fp->f_data = NULL;
2533	sx_xlock(&mqfs_data.mi_lock);
2534	mqnode_release(pn);
2535	sx_xunlock(&mqfs_data.mi_lock);
2536	return (0);
2537}
2538
2539static int
2540mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2541	struct thread *td)
2542{
2543	struct mqfs_node *pn = fp->f_data;
2544
2545	bzero(st, sizeof *st);
2546	sx_xlock(&mqfs_data.mi_lock);
2547	st->st_atim = pn->mn_atime;
2548	st->st_mtim = pn->mn_mtime;
2549	st->st_ctim = pn->mn_ctime;
2550	st->st_birthtim = pn->mn_birth;
2551	st->st_uid = pn->mn_uid;
2552	st->st_gid = pn->mn_gid;
2553	st->st_mode = S_IFIFO | pn->mn_mode;
2554	sx_xunlock(&mqfs_data.mi_lock);
2555	return (0);
2556}
2557
2558static int
2559mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
2560    struct thread *td)
2561{
2562	struct mqfs_node *pn;
2563	int error;
2564
2565	error = 0;
2566	pn = fp->f_data;
2567	sx_xlock(&mqfs_data.mi_lock);
2568	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
2569	    active_cred);
2570	if (error != 0)
2571		goto out;
2572	pn->mn_mode = mode & ACCESSPERMS;
2573out:
2574	sx_xunlock(&mqfs_data.mi_lock);
2575	return (error);
2576}
2577
2578static int
2579mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
2580    struct thread *td)
2581{
2582	struct mqfs_node *pn;
2583	int error;
2584
2585	error = 0;
2586	pn = fp->f_data;
2587	sx_xlock(&mqfs_data.mi_lock);
2588	if (uid == (uid_t)-1)
2589		uid = pn->mn_uid;
2590	if (gid == (gid_t)-1)
2591		gid = pn->mn_gid;
2592	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
2593	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
2594	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN)))
2595		goto out;
2596	pn->mn_uid = uid;
2597	pn->mn_gid = gid;
2598out:
2599	sx_xunlock(&mqfs_data.mi_lock);
2600	return (error);
2601}
2602
2603static int
2604mqf_kqfilter(struct file *fp, struct knote *kn)
2605{
2606	struct mqueue *mq = FPTOMQ(fp);
2607	int error = 0;
2608
2609	if (kn->kn_filter == EVFILT_READ) {
2610		kn->kn_fop = &mq_rfiltops;
2611		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2612	} else if (kn->kn_filter == EVFILT_WRITE) {
2613		kn->kn_fop = &mq_wfiltops;
2614		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2615	} else
2616		error = EINVAL;
2617	return (error);
2618}
2619
2620static void
2621filt_mqdetach(struct knote *kn)
2622{
2623	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2624
2625	if (kn->kn_filter == EVFILT_READ)
2626		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2627	else if (kn->kn_filter == EVFILT_WRITE)
2628		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2629	else
2630		panic("filt_mqdetach");
2631}
2632
2633static int
2634filt_mqread(struct knote *kn, long hint)
2635{
2636	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2637
2638	mtx_assert(&mq->mq_mutex, MA_OWNED);
2639	return (mq->mq_curmsgs != 0);
2640}
2641
2642static int
2643filt_mqwrite(struct knote *kn, long hint)
2644{
2645	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2646
2647	mtx_assert(&mq->mq_mutex, MA_OWNED);
2648	return (mq->mq_curmsgs < mq->mq_maxmsg);
2649}
2650
2651static int
2652mqf_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
2653{
2654
2655	kif->kf_type = KF_TYPE_MQUEUE;
2656	return (0);
2657}
2658
2659static struct fileops mqueueops = {
2660	.fo_read		= invfo_rdwr,
2661	.fo_write		= invfo_rdwr,
2662	.fo_truncate		= invfo_truncate,
2663	.fo_ioctl		= invfo_ioctl,
2664	.fo_poll		= mqf_poll,
2665	.fo_kqfilter		= mqf_kqfilter,
2666	.fo_stat		= mqf_stat,
2667	.fo_close		= mqf_close,
2668	.fo_chmod		= mqf_chmod,
2669	.fo_chown		= mqf_chown,
2670	.fo_sendfile		= invfo_sendfile,
2671	.fo_fill_kinfo		= mqf_fill_kinfo,
2672	.fo_flags		= DFLAG_PASSABLE,
2673};
2674
2675static struct vop_vector mqfs_vnodeops = {
2676	.vop_default 		= &default_vnodeops,
2677	.vop_access		= mqfs_access,
2678	.vop_cachedlookup	= mqfs_lookup,
2679	.vop_lookup		= vfs_cache_lookup,
2680	.vop_reclaim		= mqfs_reclaim,
2681	.vop_create		= mqfs_create,
2682	.vop_remove		= mqfs_remove,
2683	.vop_inactive		= mqfs_inactive,
2684	.vop_open		= mqfs_open,
2685	.vop_close		= mqfs_close,
2686	.vop_getattr		= mqfs_getattr,
2687	.vop_setattr		= mqfs_setattr,
2688	.vop_read		= mqfs_read,
2689	.vop_write		= VOP_EOPNOTSUPP,
2690	.vop_readdir		= mqfs_readdir,
2691	.vop_mkdir		= VOP_EOPNOTSUPP,
2692	.vop_rmdir		= VOP_EOPNOTSUPP
2693};
2694VFS_VOP_VECTOR_REGISTER(mqfs_vnodeops);
2695
2696static struct vfsops mqfs_vfsops = {
2697	.vfs_init 		= mqfs_init,
2698	.vfs_uninit		= mqfs_uninit,
2699	.vfs_mount		= mqfs_mount,
2700	.vfs_unmount		= mqfs_unmount,
2701	.vfs_root		= mqfs_root,
2702	.vfs_statfs		= mqfs_statfs,
2703};
2704
2705static struct vfsconf mqueuefs_vfsconf = {
2706	.vfc_version = VFS_VERSION,
2707	.vfc_name = "mqueuefs",
2708	.vfc_vfsops = &mqfs_vfsops,
2709	.vfc_typenum = -1,
2710	.vfc_flags = VFCF_SYNTHETIC
2711};
2712
2713static struct syscall_helper_data mq_syscalls[] = {
2714	SYSCALL_INIT_HELPER(kmq_open),
2715	SYSCALL_INIT_HELPER_F(kmq_setattr, SYF_CAPENABLED),
2716	SYSCALL_INIT_HELPER_F(kmq_timedsend, SYF_CAPENABLED),
2717	SYSCALL_INIT_HELPER_F(kmq_timedreceive, SYF_CAPENABLED),
2718	SYSCALL_INIT_HELPER_F(kmq_notify, SYF_CAPENABLED),
2719	SYSCALL_INIT_HELPER(kmq_unlink),
2720	SYSCALL_INIT_LAST
2721};
2722
2723#ifdef COMPAT_FREEBSD32
2724#include <compat/freebsd32/freebsd32.h>
2725#include <compat/freebsd32/freebsd32_proto.h>
2726#include <compat/freebsd32/freebsd32_signal.h>
2727#include <compat/freebsd32/freebsd32_syscall.h>
2728#include <compat/freebsd32/freebsd32_util.h>
2729
2730static void
2731mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2732{
2733
2734	to->mq_flags = from->mq_flags;
2735	to->mq_maxmsg = from->mq_maxmsg;
2736	to->mq_msgsize = from->mq_msgsize;
2737	to->mq_curmsgs = from->mq_curmsgs;
2738}
2739
2740static void
2741mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2742{
2743
2744	to->mq_flags = from->mq_flags;
2745	to->mq_maxmsg = from->mq_maxmsg;
2746	to->mq_msgsize = from->mq_msgsize;
2747	to->mq_curmsgs = from->mq_curmsgs;
2748}
2749
2750int
2751freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2752{
2753	struct mq_attr attr;
2754	struct mq_attr32 attr32;
2755	int flags, error;
2756
2757	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2758		return (EINVAL);
2759	flags = FFLAGS(uap->flags);
2760	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2761		error = copyin(uap->attr, &attr32, sizeof(attr32));
2762		if (error)
2763			return (error);
2764		mq_attr_from32(&attr32, &attr);
2765	}
2766	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2767	    uap->attr != NULL ? &attr : NULL));
2768}
2769
2770int
2771freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2772{
2773	struct mq_attr attr, oattr;
2774	struct mq_attr32 attr32, oattr32;
2775	int error;
2776
2777	if (uap->attr != NULL) {
2778		error = copyin(uap->attr, &attr32, sizeof(attr32));
2779		if (error != 0)
2780			return (error);
2781		mq_attr_from32(&attr32, &attr);
2782	}
2783	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2784	    &oattr);
2785	if (error == 0 && uap->oattr != NULL) {
2786		mq_attr_to32(&oattr, &oattr32);
2787		bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
2788		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2789	}
2790	return (error);
2791}
2792
2793int
2794freebsd32_kmq_timedsend(struct thread *td,
2795    struct freebsd32_kmq_timedsend_args *uap)
2796{
2797	struct mqueue *mq;
2798	struct file *fp;
2799	struct timespec32 ets32;
2800	struct timespec *abs_timeout, ets;
2801	int error;
2802	int waitok;
2803
2804	AUDIT_ARG_FD(uap->mqd);
2805	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2806	if (error)
2807		return (error);
2808	if (uap->abs_timeout != NULL) {
2809		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2810		if (error != 0)
2811			goto out;
2812		CP(ets32, ets, tv_sec);
2813		CP(ets32, ets, tv_nsec);
2814		abs_timeout = &ets;
2815	} else
2816		abs_timeout = NULL;
2817	waitok = !(fp->f_flag & O_NONBLOCK);
2818	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2819		uap->msg_prio, waitok, abs_timeout);
2820out:
2821	fdrop(fp, td);
2822	return (error);
2823}
2824
2825int
2826freebsd32_kmq_timedreceive(struct thread *td,
2827    struct freebsd32_kmq_timedreceive_args *uap)
2828{
2829	struct mqueue *mq;
2830	struct file *fp;
2831	struct timespec32 ets32;
2832	struct timespec *abs_timeout, ets;
2833	int error, waitok;
2834
2835	AUDIT_ARG_FD(uap->mqd);
2836	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2837	if (error)
2838		return (error);
2839	if (uap->abs_timeout != NULL) {
2840		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2841		if (error != 0)
2842			goto out;
2843		CP(ets32, ets, tv_sec);
2844		CP(ets32, ets, tv_nsec);
2845		abs_timeout = &ets;
2846	} else
2847		abs_timeout = NULL;
2848	waitok = !(fp->f_flag & O_NONBLOCK);
2849	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2850		uap->msg_prio, waitok, abs_timeout);
2851out:
2852	fdrop(fp, td);
2853	return (error);
2854}
2855
2856int
2857freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
2858{
2859	struct sigevent ev, *evp;
2860	struct sigevent32 ev32;
2861	int error;
2862
2863	if (uap->sigev == NULL) {
2864		evp = NULL;
2865	} else {
2866		error = copyin(uap->sigev, &ev32, sizeof(ev32));
2867		if (error != 0)
2868			return (error);
2869		error = convert_sigevent32(&ev32, &ev);
2870		if (error != 0)
2871			return (error);
2872		evp = &ev;
2873	}
2874	return (kern_kmq_notify(td, uap->mqd, evp));
2875}
2876
2877static struct syscall_helper_data mq32_syscalls[] = {
2878	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2879	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_setattr, SYF_CAPENABLED),
2880	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedsend, SYF_CAPENABLED),
2881	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedreceive, SYF_CAPENABLED),
2882	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_notify, SYF_CAPENABLED),
2883	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
2884	SYSCALL_INIT_LAST
2885};
2886#endif
2887
2888static int
2889mqinit(void)
2890{
2891	int error;
2892
2893	error = syscall_helper_register(mq_syscalls, SY_THR_STATIC_KLD);
2894	if (error != 0)
2895		return (error);
2896#ifdef COMPAT_FREEBSD32
2897	error = syscall32_helper_register(mq32_syscalls, SY_THR_STATIC_KLD);
2898	if (error != 0)
2899		return (error);
2900#endif
2901	return (0);
2902}
2903
2904static int
2905mqunload(void)
2906{
2907
2908#ifdef COMPAT_FREEBSD32
2909	syscall32_helper_unregister(mq32_syscalls);
2910#endif
2911	syscall_helper_unregister(mq_syscalls);
2912	return (0);
2913}
2914
2915static int
2916mq_modload(struct module *module, int cmd, void *arg)
2917{
2918	int error = 0;
2919
2920	error = vfs_modevent(module, cmd, arg);
2921	if (error != 0)
2922		return (error);
2923
2924	switch (cmd) {
2925	case MOD_LOAD:
2926		error = mqinit();
2927		if (error != 0)
2928			mqunload();
2929		break;
2930	case MOD_UNLOAD:
2931		error = mqunload();
2932		break;
2933	default:
2934		break;
2935	}
2936	return (error);
2937}
2938
2939static moduledata_t mqueuefs_mod = {
2940	"mqueuefs",
2941	mq_modload,
2942	&mqueuefs_vfsconf
2943};
2944DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2945MODULE_VERSION(mqueuefs, 1);
2946