sysv_msg.c revision 298835
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_msg.c 298835 2016-04-30 04:02:32Z jamie $");
52
53#include "opt_compat.h"
54#include "opt_sysvipc.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/priv.h>
61#include <sys/proc.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/mount.h>
66#include <sys/msg.h>
67#include <sys/racct.h>
68#include <sys/sx.h>
69#include <sys/syscall.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysent.h>
72#include <sys/sysctl.h>
73#include <sys/malloc.h>
74#include <sys/jail.h>
75
76#include <security/mac/mac_framework.h>
77
78FEATURE(sysv_msg, "System V message queues support");
79
80static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
81
82static int msginit(void);
83static int msgunload(void);
84static int sysvmsg_modload(struct module *, int, void *);
85static void msq_remove(struct msqid_kernel *);
86static struct prison *msg_find_prison(struct ucred *);
87static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
88static int msg_prison_check(void *, void *);
89static int msg_prison_set(void *, void *);
90static int msg_prison_get(void *, void *);
91static int msg_prison_remove(void *, void *);
92static void msg_prison_cleanup(struct prison *);
93
94
95#ifdef MSG_DEBUG
96#define DPRINTF(a)	printf a
97#else
98#define DPRINTF(a)	(void)0
99#endif
100
101static void msg_freehdr(struct msg *msghdr);
102
103#ifndef MSGSSZ
104#define MSGSSZ	8		/* Each segment must be 2^N long */
105#endif
106#ifndef MSGSEG
107#define MSGSEG	2048		/* must be less than 32767 */
108#endif
109#define MSGMAX	(MSGSSZ*MSGSEG)
110#ifndef MSGMNB
111#define MSGMNB	2048		/* max # of bytes in a queue */
112#endif
113#ifndef MSGMNI
114#define MSGMNI	40
115#endif
116#ifndef MSGTQL
117#define MSGTQL	40
118#endif
119
120/*
121 * Based on the configuration parameters described in an SVR2 (yes, two)
122 * config(1m) man page.
123 *
124 * Each message is broken up and stored in segments that are msgssz bytes
125 * long.  For efficiency reasons, this should be a power of two.  Also,
126 * it doesn't make sense if it is less than 8 or greater than about 256.
127 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
128 * two between 8 and 1024 inclusive (and panic's if it isn't).
129 */
130struct msginfo msginfo = {
131                MSGMAX,         /* max chars in a message */
132                MSGMNI,         /* # of message queue identifiers */
133                MSGMNB,         /* max chars in a queue */
134                MSGTQL,         /* max messages in system */
135                MSGSSZ,         /* size of a message segment */
136                		/* (must be small power of 2 greater than 4) */
137                MSGSEG          /* number of message segments */
138};
139
140/*
141 * macros to convert between msqid_ds's and msqid's.
142 * (specific to this implementation)
143 */
144#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
145#define MSQID_IX(id)	((id) & 0xffff)
146#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
147
148/*
149 * The rest of this file is specific to this particular implementation.
150 */
151
152struct msgmap {
153	short	next;		/* next segment in buffer */
154    				/* -1 -> available */
155    				/* 0..(MSGSEG-1) -> index of next segment */
156};
157
158#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
159
160static int nfree_msgmaps;	/* # of free map entries */
161static short free_msgmaps;	/* head of linked list of free map entries */
162static struct msg *free_msghdrs;/* list of free msg headers */
163static char *msgpool;		/* MSGMAX byte long msg buffer pool */
164static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
165static struct msg *msghdrs;	/* MSGTQL msg headers */
166static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
167static struct mtx msq_mtx;	/* global mutex for message queues. */
168static unsigned msg_prison_slot;/* prison OSD slot */
169
170static struct syscall_helper_data msg_syscalls[] = {
171	SYSCALL_INIT_HELPER(msgctl),
172	SYSCALL_INIT_HELPER(msgget),
173	SYSCALL_INIT_HELPER(msgsnd),
174	SYSCALL_INIT_HELPER(msgrcv),
175#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
176    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
177	SYSCALL_INIT_HELPER(msgsys),
178	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
179#endif
180	SYSCALL_INIT_LAST
181};
182
183#ifdef COMPAT_FREEBSD32
184#include <compat/freebsd32/freebsd32.h>
185#include <compat/freebsd32/freebsd32_ipc.h>
186#include <compat/freebsd32/freebsd32_proto.h>
187#include <compat/freebsd32/freebsd32_signal.h>
188#include <compat/freebsd32/freebsd32_syscall.h>
189#include <compat/freebsd32/freebsd32_util.h>
190
191static struct syscall_helper_data msg32_syscalls[] = {
192	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
193	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
194	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
195	SYSCALL32_INIT_HELPER_COMPAT(msgget),
196	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
197#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
198    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
199	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
200#endif
201	SYSCALL_INIT_LAST
202};
203#endif
204
205static int
206msginit()
207{
208	struct prison *pr;
209	void *rsv;
210	int i, error;
211	osd_method_t methods[PR_MAXMETHOD] = {
212	    [PR_METHOD_CHECK] =		msg_prison_check,
213	    [PR_METHOD_SET] =		msg_prison_set,
214	    [PR_METHOD_GET] =		msg_prison_get,
215	    [PR_METHOD_REMOVE] =	msg_prison_remove,
216	};
217
218	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
219	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
220	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
221	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
222	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
223	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
224
225	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
226	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
227	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
228	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
229	    M_WAITOK);
230
231	/*
232	 * msginfo.msgssz should be a power of two for efficiency reasons.
233	 * It is also pretty silly if msginfo.msgssz is less than 8
234	 * or greater than about 256 so ...
235	 */
236
237	i = 8;
238	while (i < 1024 && i != msginfo.msgssz)
239		i <<= 1;
240    	if (i != msginfo.msgssz) {
241		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
242		    msginfo.msgssz));
243		panic("msginfo.msgssz not a small power of 2");
244	}
245
246	if (msginfo.msgseg > 32767) {
247		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
248		panic("msginfo.msgseg > 32767");
249	}
250
251	for (i = 0; i < msginfo.msgseg; i++) {
252		if (i > 0)
253			msgmaps[i-1].next = i;
254		msgmaps[i].next = -1;	/* implies entry is available */
255	}
256	free_msgmaps = 0;
257	nfree_msgmaps = msginfo.msgseg;
258
259	for (i = 0; i < msginfo.msgtql; i++) {
260		msghdrs[i].msg_type = 0;
261		if (i > 0)
262			msghdrs[i-1].msg_next = &msghdrs[i];
263		msghdrs[i].msg_next = NULL;
264#ifdef MAC
265		mac_sysvmsg_init(&msghdrs[i]);
266#endif
267    	}
268	free_msghdrs = &msghdrs[0];
269
270	for (i = 0; i < msginfo.msgmni; i++) {
271		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
272		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
273		msqids[i].u.msg_perm.mode = 0;
274#ifdef MAC
275		mac_sysvmsq_init(&msqids[i]);
276#endif
277	}
278	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
279
280	/* Set current prisons according to their allow.sysvipc. */
281	msg_prison_slot = osd_jail_register(NULL, methods);
282	rsv = osd_reserve(msg_prison_slot);
283	prison_lock(&prison0);
284	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
285	prison_unlock(&prison0);
286	rsv = NULL;
287	sx_slock(&allprison_lock);
288	TAILQ_FOREACH(pr, &allprison, pr_list) {
289		if (rsv == NULL)
290			rsv = osd_reserve(msg_prison_slot);
291		prison_lock(pr);
292		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
293			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
294			    &prison0);
295			rsv = NULL;
296		}
297		prison_unlock(pr);
298	}
299	if (rsv != NULL)
300		osd_free_reserved(rsv);
301	sx_sunlock(&allprison_lock);
302
303	error = syscall_helper_register(msg_syscalls);
304	if (error != 0)
305		return (error);
306#ifdef COMPAT_FREEBSD32
307	error = syscall32_helper_register(msg32_syscalls);
308	if (error != 0)
309		return (error);
310#endif
311	return (0);
312}
313
314static int
315msgunload()
316{
317	struct msqid_kernel *msqkptr;
318	int msqid;
319#ifdef MAC
320	int i;
321#endif
322
323	syscall_helper_unregister(msg_syscalls);
324#ifdef COMPAT_FREEBSD32
325	syscall32_helper_unregister(msg32_syscalls);
326#endif
327
328	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
329		/*
330		 * Look for an unallocated and unlocked msqid_ds.
331		 * msqid_ds's can be locked by msgsnd or msgrcv while
332		 * they are copying the message in/out.  We can't
333		 * re-use the entry until they release it.
334		 */
335		msqkptr = &msqids[msqid];
336		if (msqkptr->u.msg_qbytes != 0 ||
337		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
338			break;
339	}
340	if (msqid != msginfo.msgmni)
341		return (EBUSY);
342
343	if (msg_prison_slot != 0)
344		osd_jail_deregister(msg_prison_slot);
345#ifdef MAC
346	for (i = 0; i < msginfo.msgtql; i++)
347		mac_sysvmsg_destroy(&msghdrs[i]);
348	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
349		mac_sysvmsq_destroy(&msqids[msqid]);
350#endif
351	free(msgpool, M_MSG);
352	free(msgmaps, M_MSG);
353	free(msghdrs, M_MSG);
354	free(msqids, M_MSG);
355	mtx_destroy(&msq_mtx);
356	return (0);
357}
358
359
360static int
361sysvmsg_modload(struct module *module, int cmd, void *arg)
362{
363	int error = 0;
364
365	switch (cmd) {
366	case MOD_LOAD:
367		error = msginit();
368		if (error != 0)
369			msgunload();
370		break;
371	case MOD_UNLOAD:
372		error = msgunload();
373		break;
374	case MOD_SHUTDOWN:
375		break;
376	default:
377		error = EINVAL;
378		break;
379	}
380	return (error);
381}
382
383static moduledata_t sysvmsg_mod = {
384	"sysvmsg",
385	&sysvmsg_modload,
386	NULL
387};
388
389DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
390MODULE_VERSION(sysvmsg, 1);
391
392static void
393msg_freehdr(msghdr)
394	struct msg *msghdr;
395{
396	while (msghdr->msg_ts > 0) {
397		short next;
398		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
399			panic("msghdr->msg_spot out of range");
400		next = msgmaps[msghdr->msg_spot].next;
401		msgmaps[msghdr->msg_spot].next = free_msgmaps;
402		free_msgmaps = msghdr->msg_spot;
403		nfree_msgmaps++;
404		msghdr->msg_spot = next;
405		if (msghdr->msg_ts >= msginfo.msgssz)
406			msghdr->msg_ts -= msginfo.msgssz;
407		else
408			msghdr->msg_ts = 0;
409	}
410	if (msghdr->msg_spot != -1)
411		panic("msghdr->msg_spot != -1");
412	msghdr->msg_next = free_msghdrs;
413	free_msghdrs = msghdr;
414#ifdef MAC
415	mac_sysvmsg_cleanup(msghdr);
416#endif
417}
418
419static void
420msq_remove(struct msqid_kernel *msqkptr)
421{
422	struct msg *msghdr;
423
424	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
425	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
426	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
427	crfree(msqkptr->cred);
428	msqkptr->cred = NULL;
429
430	/* Free the message headers */
431	msghdr = msqkptr->u.msg_first;
432	while (msghdr != NULL) {
433		struct msg *msghdr_tmp;
434
435		/* Free the segments of each message */
436		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
437		msqkptr->u.msg_qnum--;
438		msghdr_tmp = msghdr;
439		msghdr = msghdr->msg_next;
440		msg_freehdr(msghdr_tmp);
441	}
442
443	if (msqkptr->u.msg_cbytes != 0)
444		panic("msg_cbytes is screwed up");
445	if (msqkptr->u.msg_qnum != 0)
446		panic("msg_qnum is screwed up");
447
448	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
449
450#ifdef MAC
451	mac_sysvmsq_cleanup(msqkptr);
452#endif
453
454	wakeup(msqkptr);
455}
456
457static struct prison *
458msg_find_prison(struct ucred *cred)
459{
460	struct prison *pr, *rpr;
461
462	pr = cred->cr_prison;
463	prison_lock(pr);
464	rpr = osd_jail_get(pr, msg_prison_slot);
465	prison_unlock(pr);
466	return rpr;
467}
468
469static int
470msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
471{
472
473	if (msqkptr->cred == NULL ||
474	    !(rpr == msqkptr->cred->cr_prison ||
475	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
476		return (EINVAL);
477	return (0);
478}
479
480#ifndef _SYS_SYSPROTO_H_
481struct msgctl_args {
482	int	msqid;
483	int	cmd;
484	struct	msqid_ds *buf;
485};
486#endif
487int
488sys_msgctl(td, uap)
489	struct thread *td;
490	register struct msgctl_args *uap;
491{
492	int msqid = uap->msqid;
493	int cmd = uap->cmd;
494	struct msqid_ds msqbuf;
495	int error;
496
497	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
498	if (cmd == IPC_SET &&
499	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
500		return (error);
501	error = kern_msgctl(td, msqid, cmd, &msqbuf);
502	if (cmd == IPC_STAT && error == 0)
503		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
504	return (error);
505}
506
507int
508kern_msgctl(td, msqid, cmd, msqbuf)
509	struct thread *td;
510	int msqid;
511	int cmd;
512	struct msqid_ds *msqbuf;
513{
514	int rval, error, msqix;
515	register struct msqid_kernel *msqkptr;
516	struct prison *rpr;
517
518	rpr = msg_find_prison(td->td_ucred);
519	if (rpr == NULL)
520		return (ENOSYS);
521
522	msqix = IPCID_TO_IX(msqid);
523
524	if (msqix < 0 || msqix >= msginfo.msgmni) {
525		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
526		    msginfo.msgmni));
527		return (EINVAL);
528	}
529
530	msqkptr = &msqids[msqix];
531
532	mtx_lock(&msq_mtx);
533	if (msqkptr->u.msg_qbytes == 0) {
534		DPRINTF(("no such msqid\n"));
535		error = EINVAL;
536		goto done2;
537	}
538	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
539		DPRINTF(("wrong sequence number\n"));
540		error = EINVAL;
541		goto done2;
542	}
543
544	error = msq_prison_cansee(rpr, msqkptr);
545	if (error != 0) {
546		DPRINTF(("requester can't see prison\n"));
547		goto done2;
548	}
549
550#ifdef MAC
551	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
552	if (error != 0)
553		goto done2;
554#endif
555
556	error = 0;
557	rval = 0;
558
559	switch (cmd) {
560
561	case IPC_RMID:
562	{
563#ifdef MAC
564		struct msg *msghdr;
565#endif
566		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
567			goto done2;
568
569#ifdef MAC
570		/*
571		 * Check that the thread has MAC access permissions to
572		 * individual msghdrs.  Note: We need to do this in a
573		 * separate loop because the actual loop alters the
574		 * msq/msghdr info as it progresses, and there is no going
575		 * back if half the way through we discover that the
576		 * thread cannot free a certain msghdr.  The msq will get
577		 * into an inconsistent state.
578		 */
579		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
580		    msghdr = msghdr->msg_next) {
581			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
582			if (error != 0)
583				goto done2;
584		}
585#endif
586
587		msq_remove(msqkptr);
588	}
589
590		break;
591
592	case IPC_SET:
593		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
594			goto done2;
595		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
596			error = priv_check(td, PRIV_IPC_MSGSIZE);
597			if (error)
598				goto done2;
599		}
600		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
601			DPRINTF(("can't increase msg_qbytes beyond %d"
602			    "(truncating)\n", msginfo.msgmnb));
603			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
604		}
605		if (msqbuf->msg_qbytes == 0) {
606			DPRINTF(("can't reduce msg_qbytes to 0\n"));
607			error = EINVAL;		/* non-standard errno! */
608			goto done2;
609		}
610		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
611		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
612		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
613		    (msqbuf->msg_perm.mode & 0777);
614		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
615		msqkptr->u.msg_ctime = time_second;
616		break;
617
618	case IPC_STAT:
619		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
620			DPRINTF(("requester doesn't have read access\n"));
621			goto done2;
622		}
623		*msqbuf = msqkptr->u;
624		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
625			msqbuf->msg_perm.key = IPC_PRIVATE;
626		break;
627
628	default:
629		DPRINTF(("invalid command %d\n", cmd));
630		error = EINVAL;
631		goto done2;
632	}
633
634	if (error == 0)
635		td->td_retval[0] = rval;
636done2:
637	mtx_unlock(&msq_mtx);
638	return (error);
639}
640
641#ifndef _SYS_SYSPROTO_H_
642struct msgget_args {
643	key_t	key;
644	int	msgflg;
645};
646#endif
647
648int
649sys_msgget(td, uap)
650	struct thread *td;
651	register struct msgget_args *uap;
652{
653	int msqid, error = 0;
654	int key = uap->key;
655	int msgflg = uap->msgflg;
656	struct ucred *cred = td->td_ucred;
657	register struct msqid_kernel *msqkptr = NULL;
658
659	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
660
661	if (msg_find_prison(cred) == NULL)
662		return (ENOSYS);
663
664	mtx_lock(&msq_mtx);
665	if (key != IPC_PRIVATE) {
666		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
667			msqkptr = &msqids[msqid];
668			if (msqkptr->u.msg_qbytes != 0 &&
669			    msqkptr->cred != NULL &&
670			    msqkptr->cred->cr_prison == cred->cr_prison &&
671			    msqkptr->u.msg_perm.key == key)
672				break;
673		}
674		if (msqid < msginfo.msgmni) {
675			DPRINTF(("found public key\n"));
676			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
677				DPRINTF(("not exclusive\n"));
678				error = EEXIST;
679				goto done2;
680			}
681			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
682			    msgflg & 0700))) {
683				DPRINTF(("requester doesn't have 0%o access\n",
684				    msgflg & 0700));
685				goto done2;
686			}
687#ifdef MAC
688			error = mac_sysvmsq_check_msqget(cred, msqkptr);
689			if (error != 0)
690				goto done2;
691#endif
692			goto found;
693		}
694	}
695
696	DPRINTF(("need to allocate the msqid_ds\n"));
697	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
698		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
699			/*
700			 * Look for an unallocated and unlocked msqid_ds.
701			 * msqid_ds's can be locked by msgsnd or msgrcv while
702			 * they are copying the message in/out.  We can't
703			 * re-use the entry until they release it.
704			 */
705			msqkptr = &msqids[msqid];
706			if (msqkptr->u.msg_qbytes == 0 &&
707			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
708				break;
709		}
710		if (msqid == msginfo.msgmni) {
711			DPRINTF(("no more msqid_ds's available\n"));
712			error = ENOSPC;
713			goto done2;
714		}
715#ifdef RACCT
716		if (racct_enable) {
717			PROC_LOCK(td->td_proc);
718			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
719			PROC_UNLOCK(td->td_proc);
720			if (error != 0) {
721				error = ENOSPC;
722				goto done2;
723			}
724		}
725#endif
726		DPRINTF(("msqid %d is available\n", msqid));
727		msqkptr->u.msg_perm.key = key;
728		msqkptr->u.msg_perm.cuid = cred->cr_uid;
729		msqkptr->u.msg_perm.uid = cred->cr_uid;
730		msqkptr->u.msg_perm.cgid = cred->cr_gid;
731		msqkptr->u.msg_perm.gid = cred->cr_gid;
732		msqkptr->u.msg_perm.mode = (msgflg & 0777);
733		msqkptr->cred = crhold(cred);
734		/* Make sure that the returned msqid is unique */
735		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
736		msqkptr->u.msg_first = NULL;
737		msqkptr->u.msg_last = NULL;
738		msqkptr->u.msg_cbytes = 0;
739		msqkptr->u.msg_qnum = 0;
740		msqkptr->u.msg_qbytes = msginfo.msgmnb;
741		msqkptr->u.msg_lspid = 0;
742		msqkptr->u.msg_lrpid = 0;
743		msqkptr->u.msg_stime = 0;
744		msqkptr->u.msg_rtime = 0;
745		msqkptr->u.msg_ctime = time_second;
746#ifdef MAC
747		mac_sysvmsq_create(cred, msqkptr);
748#endif
749	} else {
750		DPRINTF(("didn't find it and wasn't asked to create it\n"));
751		error = ENOENT;
752		goto done2;
753	}
754
755found:
756	/* Construct the unique msqid */
757	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
758done2:
759	mtx_unlock(&msq_mtx);
760	return (error);
761}
762
763#ifndef _SYS_SYSPROTO_H_
764struct msgsnd_args {
765	int	msqid;
766	const void	*msgp;
767	size_t	msgsz;
768	int	msgflg;
769};
770#endif
771int
772kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
773	struct thread *td;
774	int msqid;
775	const void *msgp;	/* XXX msgp is actually mtext. */
776	size_t msgsz;
777	int msgflg;
778	long mtype;
779{
780	int msqix, segs_needed, error = 0;
781	register struct msqid_kernel *msqkptr;
782	register struct msg *msghdr;
783	struct prison *rpr;
784	short next;
785#ifdef RACCT
786	size_t saved_msgsz;
787#endif
788
789	rpr = msg_find_prison(td->td_ucred);
790	if (rpr == NULL)
791		return (ENOSYS);
792
793	mtx_lock(&msq_mtx);
794	msqix = IPCID_TO_IX(msqid);
795
796	if (msqix < 0 || msqix >= msginfo.msgmni) {
797		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
798		    msginfo.msgmni));
799		error = EINVAL;
800		goto done2;
801	}
802
803	msqkptr = &msqids[msqix];
804	if (msqkptr->u.msg_qbytes == 0) {
805		DPRINTF(("no such message queue id\n"));
806		error = EINVAL;
807		goto done2;
808	}
809	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
810		DPRINTF(("wrong sequence number\n"));
811		error = EINVAL;
812		goto done2;
813	}
814
815	if ((error = msq_prison_cansee(rpr, msqkptr))) {
816		DPRINTF(("requester can't see prison\n"));
817		goto done2;
818	}
819
820	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
821		DPRINTF(("requester doesn't have write access\n"));
822		goto done2;
823	}
824
825#ifdef MAC
826	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
827	if (error != 0)
828		goto done2;
829#endif
830
831#ifdef RACCT
832	if (racct_enable) {
833		PROC_LOCK(td->td_proc);
834		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
835			PROC_UNLOCK(td->td_proc);
836			error = EAGAIN;
837			goto done2;
838		}
839		saved_msgsz = msgsz;
840		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
841			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
842			PROC_UNLOCK(td->td_proc);
843			error = EAGAIN;
844			goto done2;
845		}
846		PROC_UNLOCK(td->td_proc);
847	}
848#endif
849
850	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
851	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
852	    msginfo.msgssz, segs_needed));
853	for (;;) {
854		int need_more_resources = 0;
855
856		/*
857		 * check msgsz
858		 * (inside this loop in case msg_qbytes changes while we sleep)
859		 */
860
861		if (msgsz > msqkptr->u.msg_qbytes) {
862			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
863			error = EINVAL;
864			goto done3;
865		}
866
867		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
868			DPRINTF(("msqid is locked\n"));
869			need_more_resources = 1;
870		}
871		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
872			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
873			need_more_resources = 1;
874		}
875		if (segs_needed > nfree_msgmaps) {
876			DPRINTF(("segs_needed > nfree_msgmaps\n"));
877			need_more_resources = 1;
878		}
879		if (free_msghdrs == NULL) {
880			DPRINTF(("no more msghdrs\n"));
881			need_more_resources = 1;
882		}
883
884		if (need_more_resources) {
885			int we_own_it;
886
887			if ((msgflg & IPC_NOWAIT) != 0) {
888				DPRINTF(("need more resources but caller "
889				    "doesn't want to wait\n"));
890				error = EAGAIN;
891				goto done3;
892			}
893
894			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
895				DPRINTF(("we don't own the msqid_ds\n"));
896				we_own_it = 0;
897			} else {
898				/* Force later arrivals to wait for our
899				   request */
900				DPRINTF(("we own the msqid_ds\n"));
901				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
902				we_own_it = 1;
903			}
904			DPRINTF(("msgsnd:  goodnight\n"));
905			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
906			    "msgsnd", hz);
907			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
908			if (we_own_it)
909				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
910			if (error == EWOULDBLOCK) {
911				DPRINTF(("msgsnd:  timed out\n"));
912				continue;
913			}
914			if (error != 0) {
915				DPRINTF(("msgsnd:  interrupted system call\n"));
916				error = EINTR;
917				goto done3;
918			}
919
920			/*
921			 * Make sure that the msq queue still exists
922			 */
923
924			if (msqkptr->u.msg_qbytes == 0) {
925				DPRINTF(("msqid deleted\n"));
926				error = EIDRM;
927				goto done3;
928			}
929
930		} else {
931			DPRINTF(("got all the resources that we need\n"));
932			break;
933		}
934	}
935
936	/*
937	 * We have the resources that we need.
938	 * Make sure!
939	 */
940
941	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
942		panic("msg_perm.mode & MSG_LOCKED");
943	if (segs_needed > nfree_msgmaps)
944		panic("segs_needed > nfree_msgmaps");
945	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
946		panic("msgsz + msg_cbytes > msg_qbytes");
947	if (free_msghdrs == NULL)
948		panic("no more msghdrs");
949
950	/*
951	 * Re-lock the msqid_ds in case we page-fault when copying in the
952	 * message
953	 */
954
955	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
956		panic("msqid_ds is already locked");
957	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
958
959	/*
960	 * Allocate a message header
961	 */
962
963	msghdr = free_msghdrs;
964	free_msghdrs = msghdr->msg_next;
965	msghdr->msg_spot = -1;
966	msghdr->msg_ts = msgsz;
967	msghdr->msg_type = mtype;
968#ifdef MAC
969	/*
970	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
971	 * immediately?  Or, should it be checked just before the msg is
972	 * enqueued in the msgq (as it is done now)?
973	 */
974	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
975#endif
976
977	/*
978	 * Allocate space for the message
979	 */
980
981	while (segs_needed > 0) {
982		if (nfree_msgmaps <= 0)
983			panic("not enough msgmaps");
984		if (free_msgmaps == -1)
985			panic("nil free_msgmaps");
986		next = free_msgmaps;
987		if (next <= -1)
988			panic("next too low #1");
989		if (next >= msginfo.msgseg)
990			panic("next out of range #1");
991		DPRINTF(("allocating segment %d to message\n", next));
992		free_msgmaps = msgmaps[next].next;
993		nfree_msgmaps--;
994		msgmaps[next].next = msghdr->msg_spot;
995		msghdr->msg_spot = next;
996		segs_needed--;
997	}
998
999	/*
1000	 * Validate the message type
1001	 */
1002
1003	if (msghdr->msg_type < 1) {
1004		msg_freehdr(msghdr);
1005		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1006		wakeup(msqkptr);
1007		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
1008		error = EINVAL;
1009		goto done3;
1010	}
1011
1012	/*
1013	 * Copy in the message body
1014	 */
1015
1016	next = msghdr->msg_spot;
1017	while (msgsz > 0) {
1018		size_t tlen;
1019		if (msgsz > msginfo.msgssz)
1020			tlen = msginfo.msgssz;
1021		else
1022			tlen = msgsz;
1023		if (next <= -1)
1024			panic("next too low #2");
1025		if (next >= msginfo.msgseg)
1026			panic("next out of range #2");
1027		mtx_unlock(&msq_mtx);
1028		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
1029		    tlen)) != 0) {
1030			mtx_lock(&msq_mtx);
1031			DPRINTF(("error %d copying in message segment\n",
1032			    error));
1033			msg_freehdr(msghdr);
1034			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1035			wakeup(msqkptr);
1036			goto done3;
1037		}
1038		mtx_lock(&msq_mtx);
1039		msgsz -= tlen;
1040		msgp = (const char *)msgp + tlen;
1041		next = msgmaps[next].next;
1042	}
1043	if (next != -1)
1044		panic("didn't use all the msg segments");
1045
1046	/*
1047	 * We've got the message.  Unlock the msqid_ds.
1048	 */
1049
1050	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1051
1052	/*
1053	 * Make sure that the msqid_ds is still allocated.
1054	 */
1055
1056	if (msqkptr->u.msg_qbytes == 0) {
1057		msg_freehdr(msghdr);
1058		wakeup(msqkptr);
1059		error = EIDRM;
1060		goto done3;
1061	}
1062
1063#ifdef MAC
1064	/*
1065	 * Note: Since the task/thread allocates the msghdr and usually
1066	 * primes it with its own MAC label, for a majority of policies, it
1067	 * won't be necessary to check whether the msghdr has access
1068	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
1069	 * suffice in that case.  However, this hook may be required where
1070	 * individual policies derive a non-identical label for the msghdr
1071	 * from the current thread label and may want to check the msghdr
1072	 * enqueue permissions, along with read/write permissions to the
1073	 * msgq.
1074	 */
1075	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
1076	if (error != 0) {
1077		msg_freehdr(msghdr);
1078		wakeup(msqkptr);
1079		goto done3;
1080	}
1081#endif
1082
1083	/*
1084	 * Put the message into the queue
1085	 */
1086	if (msqkptr->u.msg_first == NULL) {
1087		msqkptr->u.msg_first = msghdr;
1088		msqkptr->u.msg_last = msghdr;
1089	} else {
1090		msqkptr->u.msg_last->msg_next = msghdr;
1091		msqkptr->u.msg_last = msghdr;
1092	}
1093	msqkptr->u.msg_last->msg_next = NULL;
1094
1095	msqkptr->u.msg_cbytes += msghdr->msg_ts;
1096	msqkptr->u.msg_qnum++;
1097	msqkptr->u.msg_lspid = td->td_proc->p_pid;
1098	msqkptr->u.msg_stime = time_second;
1099
1100	wakeup(msqkptr);
1101	td->td_retval[0] = 0;
1102done3:
1103#ifdef RACCT
1104	if (racct_enable && error != 0) {
1105		PROC_LOCK(td->td_proc);
1106		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1107		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1108		PROC_UNLOCK(td->td_proc);
1109	}
1110#endif
1111done2:
1112	mtx_unlock(&msq_mtx);
1113	return (error);
1114}
1115
1116int
1117sys_msgsnd(td, uap)
1118	struct thread *td;
1119	register struct msgsnd_args *uap;
1120{
1121	int error;
1122	long mtype;
1123
1124	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1125	    uap->msgsz, uap->msgflg));
1126
1127	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1128		DPRINTF(("error %d copying the message type\n", error));
1129		return (error);
1130	}
1131	return (kern_msgsnd(td, uap->msqid,
1132	    (const char *)uap->msgp + sizeof(mtype),
1133	    uap->msgsz, uap->msgflg, mtype));
1134}
1135
1136#ifndef _SYS_SYSPROTO_H_
1137struct msgrcv_args {
1138	int	msqid;
1139	void	*msgp;
1140	size_t	msgsz;
1141	long	msgtyp;
1142	int	msgflg;
1143};
1144#endif
1145int
1146kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1147	struct thread *td;
1148	int msqid;
1149	void *msgp;	/* XXX msgp is actually mtext. */
1150	size_t msgsz;
1151	long msgtyp;
1152	int msgflg;
1153	long *mtype;
1154{
1155	size_t len;
1156	register struct msqid_kernel *msqkptr;
1157	register struct msg *msghdr;
1158	struct prison *rpr;
1159	int msqix, error = 0;
1160	short next;
1161
1162	rpr = msg_find_prison(td->td_ucred);
1163	if (rpr == NULL)
1164		return (ENOSYS);
1165
1166	msqix = IPCID_TO_IX(msqid);
1167
1168	if (msqix < 0 || msqix >= msginfo.msgmni) {
1169		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1170		    msginfo.msgmni));
1171		return (EINVAL);
1172	}
1173
1174	msqkptr = &msqids[msqix];
1175	mtx_lock(&msq_mtx);
1176	if (msqkptr->u.msg_qbytes == 0) {
1177		DPRINTF(("no such message queue id\n"));
1178		error = EINVAL;
1179		goto done2;
1180	}
1181	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1182		DPRINTF(("wrong sequence number\n"));
1183		error = EINVAL;
1184		goto done2;
1185	}
1186
1187	if ((error = msq_prison_cansee(rpr, msqkptr))) {
1188		DPRINTF(("requester can't see prison\n"));
1189		goto done2;
1190	}
1191
1192	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1193		DPRINTF(("requester doesn't have read access\n"));
1194		goto done2;
1195	}
1196
1197#ifdef MAC
1198	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1199	if (error != 0)
1200		goto done2;
1201#endif
1202
1203	msghdr = NULL;
1204	while (msghdr == NULL) {
1205		if (msgtyp == 0) {
1206			msghdr = msqkptr->u.msg_first;
1207			if (msghdr != NULL) {
1208				if (msgsz < msghdr->msg_ts &&
1209				    (msgflg & MSG_NOERROR) == 0) {
1210					DPRINTF(("first message on the queue "
1211					    "is too big (want %zu, got %d)\n",
1212					    msgsz, msghdr->msg_ts));
1213					error = E2BIG;
1214					goto done2;
1215				}
1216#ifdef MAC
1217				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1218				    msghdr);
1219				if (error != 0)
1220					goto done2;
1221#endif
1222				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1223					msqkptr->u.msg_first = NULL;
1224					msqkptr->u.msg_last = NULL;
1225				} else {
1226					msqkptr->u.msg_first = msghdr->msg_next;
1227					if (msqkptr->u.msg_first == NULL)
1228						panic("msg_first/last screwed up #1");
1229				}
1230			}
1231		} else {
1232			struct msg *previous;
1233			struct msg **prev;
1234
1235			previous = NULL;
1236			prev = &(msqkptr->u.msg_first);
1237			while ((msghdr = *prev) != NULL) {
1238				/*
1239				 * Is this message's type an exact match or is
1240				 * this message's type less than or equal to
1241				 * the absolute value of a negative msgtyp?
1242				 * Note that the second half of this test can
1243				 * NEVER be true if msgtyp is positive since
1244				 * msg_type is always positive!
1245				 */
1246
1247				if (msgtyp == msghdr->msg_type ||
1248				    msghdr->msg_type <= -msgtyp) {
1249					DPRINTF(("found message type %ld, "
1250					    "requested %ld\n",
1251					    msghdr->msg_type, msgtyp));
1252					if (msgsz < msghdr->msg_ts &&
1253					    (msgflg & MSG_NOERROR) == 0) {
1254						DPRINTF(("requested message "
1255						    "on the queue is too big "
1256						    "(want %zu, got %hu)\n",
1257						    msgsz, msghdr->msg_ts));
1258						error = E2BIG;
1259						goto done2;
1260					}
1261#ifdef MAC
1262					error = mac_sysvmsq_check_msgrcv(
1263					    td->td_ucred, msghdr);
1264					if (error != 0)
1265						goto done2;
1266#endif
1267					*prev = msghdr->msg_next;
1268					if (msghdr == msqkptr->u.msg_last) {
1269						if (previous == NULL) {
1270							if (prev !=
1271							    &msqkptr->u.msg_first)
1272								panic("msg_first/last screwed up #2");
1273							msqkptr->u.msg_first =
1274							    NULL;
1275							msqkptr->u.msg_last =
1276							    NULL;
1277						} else {
1278							if (prev ==
1279							    &msqkptr->u.msg_first)
1280								panic("msg_first/last screwed up #3");
1281							msqkptr->u.msg_last =
1282							    previous;
1283						}
1284					}
1285					break;
1286				}
1287				previous = msghdr;
1288				prev = &(msghdr->msg_next);
1289			}
1290		}
1291
1292		/*
1293		 * We've either extracted the msghdr for the appropriate
1294		 * message or there isn't one.
1295		 * If there is one then bail out of this loop.
1296		 */
1297
1298		if (msghdr != NULL)
1299			break;
1300
1301		/*
1302		 * Hmph!  No message found.  Does the user want to wait?
1303		 */
1304
1305		if ((msgflg & IPC_NOWAIT) != 0) {
1306			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1307			    msgtyp));
1308			/* The SVID says to return ENOMSG. */
1309			error = ENOMSG;
1310			goto done2;
1311		}
1312
1313		/*
1314		 * Wait for something to happen
1315		 */
1316
1317		DPRINTF(("msgrcv:  goodnight\n"));
1318		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1319		    "msgrcv", 0);
1320		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1321
1322		if (error != 0) {
1323			DPRINTF(("msgrcv:  interrupted system call\n"));
1324			error = EINTR;
1325			goto done2;
1326		}
1327
1328		/*
1329		 * Make sure that the msq queue still exists
1330		 */
1331
1332		if (msqkptr->u.msg_qbytes == 0 ||
1333		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1334			DPRINTF(("msqid deleted\n"));
1335			error = EIDRM;
1336			goto done2;
1337		}
1338	}
1339
1340	/*
1341	 * Return the message to the user.
1342	 *
1343	 * First, do the bookkeeping (before we risk being interrupted).
1344	 */
1345
1346	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1347	msqkptr->u.msg_qnum--;
1348	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1349	msqkptr->u.msg_rtime = time_second;
1350
1351	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1352	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1353
1354	/*
1355	 * Make msgsz the actual amount that we'll be returning.
1356	 * Note that this effectively truncates the message if it is too long
1357	 * (since msgsz is never increased).
1358	 */
1359
1360	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1361	    msghdr->msg_ts));
1362	if (msgsz > msghdr->msg_ts)
1363		msgsz = msghdr->msg_ts;
1364	*mtype = msghdr->msg_type;
1365
1366	/*
1367	 * Return the segments to the user
1368	 */
1369
1370	next = msghdr->msg_spot;
1371	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1372		size_t tlen;
1373
1374		if (msgsz - len > msginfo.msgssz)
1375			tlen = msginfo.msgssz;
1376		else
1377			tlen = msgsz - len;
1378		if (next <= -1)
1379			panic("next too low #3");
1380		if (next >= msginfo.msgseg)
1381			panic("next out of range #3");
1382		mtx_unlock(&msq_mtx);
1383		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1384		mtx_lock(&msq_mtx);
1385		if (error != 0) {
1386			DPRINTF(("error (%d) copying out message segment\n",
1387			    error));
1388			msg_freehdr(msghdr);
1389			wakeup(msqkptr);
1390			goto done2;
1391		}
1392		msgp = (char *)msgp + tlen;
1393		next = msgmaps[next].next;
1394	}
1395
1396	/*
1397	 * Done, return the actual number of bytes copied out.
1398	 */
1399
1400	msg_freehdr(msghdr);
1401	wakeup(msqkptr);
1402	td->td_retval[0] = msgsz;
1403done2:
1404	mtx_unlock(&msq_mtx);
1405	return (error);
1406}
1407
1408int
1409sys_msgrcv(td, uap)
1410	struct thread *td;
1411	register struct msgrcv_args *uap;
1412{
1413	int error;
1414	long mtype;
1415
1416	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1417	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1418
1419	if ((error = kern_msgrcv(td, uap->msqid,
1420	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1421	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1422		return (error);
1423	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1424		DPRINTF(("error %d copying the message type\n", error));
1425	return (error);
1426}
1427
1428static int
1429sysctl_msqids(SYSCTL_HANDLER_ARGS)
1430{
1431	struct msqid_kernel tmsqk;
1432	struct prison *pr, *rpr;
1433	int error, i;
1434
1435	pr = req->td->td_ucred->cr_prison;
1436	rpr = msg_find_prison(req->td->td_ucred);
1437	error = 0;
1438	for (i = 0; i < msginfo.msgmni; i++) {
1439		mtx_lock(&msq_mtx);
1440		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
1441		    msq_prison_cansee(rpr, &msqids[i]) != 0)
1442			bzero(&tmsqk, sizeof(tmsqk));
1443		else {
1444			tmsqk = msqids[i];
1445			if (tmsqk.cred->cr_prison != pr)
1446				tmsqk.u.msg_perm.key = IPC_PRIVATE;
1447		}
1448		mtx_unlock(&msq_mtx);
1449		error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
1450		if (error != 0)
1451			break;
1452	}
1453	return (error);
1454}
1455
1456SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1457    "Maximum message size");
1458SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1459    "Number of message queue identifiers");
1460SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1461    "Maximum number of bytes in a queue");
1462SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1463    "Maximum number of messages in the system");
1464SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1465    "Size of a message segment");
1466SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1467    "Number of message segments");
1468SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
1469    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
1470    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1471
1472static int
1473msg_prison_check(void *obj, void *data)
1474{
1475	struct prison *pr = obj;
1476	struct prison *prpr;
1477	struct vfsoptlist *opts = data;
1478	int error, jsys;
1479
1480	/*
1481	 * sysvmsg is a jailsys integer.
1482	 * It must be "disable" if the parent jail is disabled.
1483	 */
1484	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1485	if (error != ENOENT) {
1486		if (error != 0)
1487			return (error);
1488		switch (jsys) {
1489		case JAIL_SYS_DISABLE:
1490			break;
1491		case JAIL_SYS_NEW:
1492		case JAIL_SYS_INHERIT:
1493			prison_lock(pr->pr_parent);
1494			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1495			prison_unlock(pr->pr_parent);
1496			if (prpr == NULL)
1497				return (EPERM);
1498			break;
1499		default:
1500			return (EINVAL);
1501		}
1502	}
1503
1504	return (0);
1505}
1506
1507static int
1508msg_prison_set(void *obj, void *data)
1509{
1510	struct prison *pr = obj;
1511	struct prison *tpr, *orpr, *nrpr, *trpr;
1512	struct vfsoptlist *opts = data;
1513	void *rsv;
1514	int jsys, descend;
1515
1516	/*
1517	 * sysvmsg controls which jail is the root of the associated msgs (this
1518	 * jail or same as the parent), or if the feature is available at all.
1519	 */
1520	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
1521		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
1522		    ? JAIL_SYS_INHERIT
1523		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
1524		    ? JAIL_SYS_DISABLE
1525		    : -1;
1526	if (jsys == JAIL_SYS_DISABLE) {
1527		prison_lock(pr);
1528		orpr = osd_jail_get(pr, msg_prison_slot);
1529		if (orpr != NULL)
1530			osd_jail_del(pr, msg_prison_slot);
1531		prison_unlock(pr);
1532		if (orpr != NULL) {
1533			if (orpr == pr)
1534				msg_prison_cleanup(pr);
1535			/* Disable all child jails as well. */
1536			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1537				prison_lock(tpr);
1538				trpr = osd_jail_get(tpr, msg_prison_slot);
1539				if (trpr != NULL) {
1540					osd_jail_del(tpr, msg_prison_slot);
1541					prison_unlock(tpr);
1542					if (trpr == tpr)
1543						msg_prison_cleanup(tpr);
1544				} else {
1545					prison_unlock(tpr);
1546					descend = 0;
1547				}
1548			}
1549		}
1550	} else if (jsys != -1) {
1551		if (jsys == JAIL_SYS_NEW)
1552			nrpr = pr;
1553		else {
1554			prison_lock(pr->pr_parent);
1555			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1556			prison_unlock(pr->pr_parent);
1557		}
1558		rsv = osd_reserve(msg_prison_slot);
1559		prison_lock(pr);
1560		orpr = osd_jail_get(pr, msg_prison_slot);
1561		if (orpr != nrpr)
1562			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
1563			    nrpr);
1564		else
1565			osd_free_reserved(rsv);
1566		prison_unlock(pr);
1567		if (orpr != nrpr) {
1568			if (orpr == pr)
1569				msg_prison_cleanup(pr);
1570			if (orpr != NULL) {
1571				/* Change child jails matching the old root, */
1572				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1573					prison_lock(tpr);
1574					trpr = osd_jail_get(tpr,
1575					    msg_prison_slot);
1576					if (trpr == orpr) {
1577						(void)osd_jail_set(tpr,
1578						    msg_prison_slot, nrpr);
1579						prison_unlock(tpr);
1580						if (trpr == tpr)
1581							msg_prison_cleanup(tpr);
1582					} else {
1583						prison_unlock(tpr);
1584						descend = 0;
1585					}
1586				}
1587			}
1588		}
1589	}
1590
1591	return (0);
1592}
1593
1594static int
1595msg_prison_get(void *obj, void *data)
1596{
1597	struct prison *pr = obj;
1598	struct prison *rpr;
1599	struct vfsoptlist *opts = data;
1600	int error, jsys;
1601
1602	/* Set sysvmsg based on the jail's root prison. */
1603	prison_lock(pr);
1604	rpr = osd_jail_get(pr, msg_prison_slot);
1605	prison_unlock(pr);
1606	jsys = rpr == NULL ? JAIL_SYS_DISABLE
1607	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
1608	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1609	if (error == ENOENT)
1610		error = 0;
1611	return (error);
1612}
1613
1614static int
1615msg_prison_remove(void *obj, void *data __unused)
1616{
1617	struct prison *pr = obj;
1618	struct prison *rpr;
1619
1620	prison_lock(pr);
1621	rpr = osd_jail_get(pr, msg_prison_slot);
1622	prison_unlock(pr);
1623	if (rpr == pr)
1624		msg_prison_cleanup(pr);
1625	return (0);
1626}
1627
1628static void
1629msg_prison_cleanup(struct prison *pr)
1630{
1631	struct msqid_kernel *msqkptr;
1632	int i;
1633
1634	/* Remove any msqs that belong to this jail. */
1635	mtx_lock(&msq_mtx);
1636	for (i = 0; i < msginfo.msgmni; i++) {
1637		msqkptr = &msqids[i];
1638		if (msqkptr->u.msg_qbytes != 0 &&
1639		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
1640			msq_remove(msqkptr);
1641	}
1642	mtx_unlock(&msq_mtx);
1643}
1644
1645SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
1646
1647#ifdef COMPAT_FREEBSD32
1648int
1649freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1650{
1651
1652#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1653    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1654	switch (uap->which) {
1655	case 0:
1656		return (freebsd7_freebsd32_msgctl(td,
1657		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1658	case 2:
1659		return (freebsd32_msgsnd(td,
1660		    (struct freebsd32_msgsnd_args *)&uap->a2));
1661	case 3:
1662		return (freebsd32_msgrcv(td,
1663		    (struct freebsd32_msgrcv_args *)&uap->a2));
1664	default:
1665		return (sys_msgsys(td, (struct msgsys_args *)uap));
1666	}
1667#else
1668	return (nosys(td, NULL));
1669#endif
1670}
1671
1672#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1673    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1674int
1675freebsd7_freebsd32_msgctl(struct thread *td,
1676    struct freebsd7_freebsd32_msgctl_args *uap)
1677{
1678	struct msqid_ds msqbuf;
1679	struct msqid_ds32_old msqbuf32;
1680	int error;
1681
1682	if (uap->cmd == IPC_SET) {
1683		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1684		if (error)
1685			return (error);
1686		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1687		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1688		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1689		CP(msqbuf32, msqbuf, msg_cbytes);
1690		CP(msqbuf32, msqbuf, msg_qnum);
1691		CP(msqbuf32, msqbuf, msg_qbytes);
1692		CP(msqbuf32, msqbuf, msg_lspid);
1693		CP(msqbuf32, msqbuf, msg_lrpid);
1694		CP(msqbuf32, msqbuf, msg_stime);
1695		CP(msqbuf32, msqbuf, msg_rtime);
1696		CP(msqbuf32, msqbuf, msg_ctime);
1697	}
1698	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1699	if (error)
1700		return (error);
1701	if (uap->cmd == IPC_STAT) {
1702		bzero(&msqbuf32, sizeof(msqbuf32));
1703		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1704		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1705		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1706		CP(msqbuf, msqbuf32, msg_cbytes);
1707		CP(msqbuf, msqbuf32, msg_qnum);
1708		CP(msqbuf, msqbuf32, msg_qbytes);
1709		CP(msqbuf, msqbuf32, msg_lspid);
1710		CP(msqbuf, msqbuf32, msg_lrpid);
1711		CP(msqbuf, msqbuf32, msg_stime);
1712		CP(msqbuf, msqbuf32, msg_rtime);
1713		CP(msqbuf, msqbuf32, msg_ctime);
1714		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1715	}
1716	return (error);
1717}
1718#endif
1719
1720int
1721freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1722{
1723	struct msqid_ds msqbuf;
1724	struct msqid_ds32 msqbuf32;
1725	int error;
1726
1727	if (uap->cmd == IPC_SET) {
1728		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1729		if (error)
1730			return (error);
1731		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1732		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1733		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1734		CP(msqbuf32, msqbuf, msg_cbytes);
1735		CP(msqbuf32, msqbuf, msg_qnum);
1736		CP(msqbuf32, msqbuf, msg_qbytes);
1737		CP(msqbuf32, msqbuf, msg_lspid);
1738		CP(msqbuf32, msqbuf, msg_lrpid);
1739		CP(msqbuf32, msqbuf, msg_stime);
1740		CP(msqbuf32, msqbuf, msg_rtime);
1741		CP(msqbuf32, msqbuf, msg_ctime);
1742	}
1743	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1744	if (error)
1745		return (error);
1746	if (uap->cmd == IPC_STAT) {
1747		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1748		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1749		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1750		CP(msqbuf, msqbuf32, msg_cbytes);
1751		CP(msqbuf, msqbuf32, msg_qnum);
1752		CP(msqbuf, msqbuf32, msg_qbytes);
1753		CP(msqbuf, msqbuf32, msg_lspid);
1754		CP(msqbuf, msqbuf32, msg_lrpid);
1755		CP(msqbuf, msqbuf32, msg_stime);
1756		CP(msqbuf, msqbuf32, msg_rtime);
1757		CP(msqbuf, msqbuf32, msg_ctime);
1758		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1759	}
1760	return (error);
1761}
1762
1763int
1764freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1765{
1766	const void *msgp;
1767	long mtype;
1768	int32_t mtype32;
1769	int error;
1770
1771	msgp = PTRIN(uap->msgp);
1772	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1773		return (error);
1774	mtype = mtype32;
1775	return (kern_msgsnd(td, uap->msqid,
1776	    (const char *)msgp + sizeof(mtype32),
1777	    uap->msgsz, uap->msgflg, mtype));
1778}
1779
1780int
1781freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1782{
1783	void *msgp;
1784	long mtype;
1785	int32_t mtype32;
1786	int error;
1787
1788	msgp = PTRIN(uap->msgp);
1789	if ((error = kern_msgrcv(td, uap->msqid,
1790	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1791	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1792		return (error);
1793	mtype32 = (int32_t)mtype;
1794	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1795}
1796#endif
1797
1798#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1799    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1800
1801/* XXX casting to (sy_call_t *) is bogus, as usual. */
1802static sy_call_t *msgcalls[] = {
1803	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1804	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1805};
1806
1807/*
1808 * Entry point for all MSG calls.
1809 */
1810int
1811sys_msgsys(td, uap)
1812	struct thread *td;
1813	/* XXX actually varargs. */
1814	struct msgsys_args /* {
1815		int	which;
1816		int	a2;
1817		int	a3;
1818		int	a4;
1819		int	a5;
1820		int	a6;
1821	} */ *uap;
1822{
1823	int error;
1824
1825	if (uap->which < 0 ||
1826	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
1827		return (EINVAL);
1828	error = (*msgcalls[uap->which])(td, &uap->a2);
1829	return (error);
1830}
1831
1832#ifndef CP
1833#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1834#endif
1835
1836#ifndef _SYS_SYSPROTO_H_
1837struct freebsd7_msgctl_args {
1838	int	msqid;
1839	int	cmd;
1840	struct	msqid_ds_old *buf;
1841};
1842#endif
1843int
1844freebsd7_msgctl(td, uap)
1845	struct thread *td;
1846	struct freebsd7_msgctl_args *uap;
1847{
1848	struct msqid_ds_old msqold;
1849	struct msqid_ds msqbuf;
1850	int error;
1851
1852	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1853	    uap->buf));
1854	if (uap->cmd == IPC_SET) {
1855		error = copyin(uap->buf, &msqold, sizeof(msqold));
1856		if (error)
1857			return (error);
1858		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1859		CP(msqold, msqbuf, msg_first);
1860		CP(msqold, msqbuf, msg_last);
1861		CP(msqold, msqbuf, msg_cbytes);
1862		CP(msqold, msqbuf, msg_qnum);
1863		CP(msqold, msqbuf, msg_qbytes);
1864		CP(msqold, msqbuf, msg_lspid);
1865		CP(msqold, msqbuf, msg_lrpid);
1866		CP(msqold, msqbuf, msg_stime);
1867		CP(msqold, msqbuf, msg_rtime);
1868		CP(msqold, msqbuf, msg_ctime);
1869	}
1870	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1871	if (error)
1872		return (error);
1873	if (uap->cmd == IPC_STAT) {
1874		bzero(&msqold, sizeof(msqold));
1875		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1876		CP(msqbuf, msqold, msg_first);
1877		CP(msqbuf, msqold, msg_last);
1878		CP(msqbuf, msqold, msg_cbytes);
1879		CP(msqbuf, msqold, msg_qnum);
1880		CP(msqbuf, msqold, msg_qbytes);
1881		CP(msqbuf, msqold, msg_lspid);
1882		CP(msqbuf, msqold, msg_lrpid);
1883		CP(msqbuf, msqold, msg_stime);
1884		CP(msqbuf, msqold, msg_rtime);
1885		CP(msqbuf, msqold, msg_ctime);
1886		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1887	}
1888	return (error);
1889}
1890
1891#undef CP
1892
1893#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1894	   COMPAT_FREEBSD7 */
1895