sysv_msg.c revision 140614
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 140614 2005-01-22 18:51:43Z rwatson $");
52
53#include "opt_sysvipc.h"
54#include "opt_mac.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/proc.h>
61#include <sys/lock.h>
62#include <sys/mac.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/msg.h>
66#include <sys/syscall.h>
67#include <sys/sysent.h>
68#include <sys/sysctl.h>
69#include <sys/malloc.h>
70#include <sys/jail.h>
71
72static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
73
74static void msginit(void);
75static int msgunload(void);
76static int sysvmsg_modload(struct module *, int, void *);
77
78#ifdef MSG_DEBUG
79#define DPRINTF(a)	printf a
80#else
81#define DPRINTF(a)
82#endif
83#ifdef MAC_DEBUG
84#define MPRINTF(a)	printf a
85#else
86#define MPRINTF(a)
87#endif
88
89static void msg_freehdr(struct msg *msghdr);
90
91/* XXX casting to (sy_call_t *) is bogus, as usual. */
92static sy_call_t *msgcalls[] = {
93	(sy_call_t *)msgctl, (sy_call_t *)msgget,
94	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
95};
96
97#ifndef MSGSSZ
98#define MSGSSZ	8		/* Each segment must be 2^N long */
99#endif
100#ifndef MSGSEG
101#define MSGSEG	2048		/* must be less than 32767 */
102#endif
103#define MSGMAX	(MSGSSZ*MSGSEG)
104#ifndef MSGMNB
105#define MSGMNB	2048		/* max # of bytes in a queue */
106#endif
107#ifndef MSGMNI
108#define MSGMNI	40
109#endif
110#ifndef MSGTQL
111#define MSGTQL	40
112#endif
113
114/*
115 * Based on the configuration parameters described in an SVR2 (yes, two)
116 * config(1m) man page.
117 *
118 * Each message is broken up and stored in segments that are msgssz bytes
119 * long.  For efficiency reasons, this should be a power of two.  Also,
120 * it doesn't make sense if it is less than 8 or greater than about 256.
121 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
122 * two between 8 and 1024 inclusive (and panic's if it isn't).
123 */
124struct msginfo msginfo = {
125                MSGMAX,         /* max chars in a message */
126                MSGMNI,         /* # of message queue identifiers */
127                MSGMNB,         /* max chars in a queue */
128                MSGTQL,         /* max messages in system */
129                MSGSSZ,         /* size of a message segment */
130                		/* (must be small power of 2 greater than 4) */
131                MSGSEG          /* number of message segments */
132};
133
134/*
135 * macros to convert between msqid_ds's and msqid's.
136 * (specific to this implementation)
137 */
138#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
139#define MSQID_IX(id)	((id) & 0xffff)
140#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
141
142/*
143 * The rest of this file is specific to this particular implementation.
144 */
145
146struct msgmap {
147	short	next;		/* next segment in buffer */
148    				/* -1 -> available */
149    				/* 0..(MSGSEG-1) -> index of next segment */
150};
151
152#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
153
154static int nfree_msgmaps;	/* # of free map entries */
155static short free_msgmaps;	/* head of linked list of free map entries */
156static struct msg *free_msghdrs;/* list of free msg headers */
157static char *msgpool;		/* MSGMAX byte long msg buffer pool */
158static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
159static struct msg *msghdrs;	/* MSGTQL msg headers */
160static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
161static struct mtx msq_mtx;	/* global mutex for message queues. */
162
163static void
164msginit()
165{
166	register int i;
167
168	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
169	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
170	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
171	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
172	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
173	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
174
175	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
176	if (msgpool == NULL)
177		panic("msgpool is NULL");
178	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
179	if (msgmaps == NULL)
180		panic("msgmaps is NULL");
181	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
182	if (msghdrs == NULL)
183		panic("msghdrs is NULL");
184	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
185	    M_WAITOK);
186	if (msqids == NULL)
187		panic("msqids is NULL");
188
189	/*
190	 * msginfo.msgssz should be a power of two for efficiency reasons.
191	 * It is also pretty silly if msginfo.msgssz is less than 8
192	 * or greater than about 256 so ...
193	 */
194
195	i = 8;
196	while (i < 1024 && i != msginfo.msgssz)
197		i <<= 1;
198    	if (i != msginfo.msgssz) {
199		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
200		    msginfo.msgssz));
201		panic("msginfo.msgssz not a small power of 2");
202	}
203
204	if (msginfo.msgseg > 32767) {
205		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
206		panic("msginfo.msgseg > 32767");
207	}
208
209	if (msgmaps == NULL)
210		panic("msgmaps is NULL");
211
212	for (i = 0; i < msginfo.msgseg; i++) {
213		if (i > 0)
214			msgmaps[i-1].next = i;
215		msgmaps[i].next = -1;	/* implies entry is available */
216	}
217	free_msgmaps = 0;
218	nfree_msgmaps = msginfo.msgseg;
219
220	if (msghdrs == NULL)
221		panic("msghdrs is NULL");
222
223	for (i = 0; i < msginfo.msgtql; i++) {
224		msghdrs[i].msg_type = 0;
225		if (i > 0)
226			msghdrs[i-1].msg_next = &msghdrs[i];
227		msghdrs[i].msg_next = NULL;
228#ifdef MAC
229		mac_init_sysv_msgmsg(&msghdrs[i]);
230#endif
231    	}
232	free_msghdrs = &msghdrs[0];
233
234	if (msqids == NULL)
235		panic("msqids is NULL");
236
237	for (i = 0; i < msginfo.msgmni; i++) {
238		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
239		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
240		msqids[i].u.msg_perm.mode = 0;
241#ifdef MAC
242		mac_init_sysv_msgqueue(&msqids[i]);
243#endif
244	}
245	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
246}
247
248static int
249msgunload()
250{
251	struct msqid_kernel *msqkptr;
252	int msqid;
253#ifdef MAC
254	int i;
255#endif
256
257	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
258		/*
259		 * Look for an unallocated and unlocked msqid_ds.
260		 * msqid_ds's can be locked by msgsnd or msgrcv while
261		 * they are copying the message in/out.  We can't
262		 * re-use the entry until they release it.
263		 */
264		msqkptr = &msqids[msqid];
265		if (msqkptr->u.msg_qbytes != 0 ||
266		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
267			break;
268	}
269	if (msqid != msginfo.msgmni)
270		return (EBUSY);
271
272#ifdef MAC
273	for (i = 0; i < msginfo.msgtql; i++)
274		mac_destroy_sysv_msgmsg(&msghdrs[i]);
275	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
276		mac_destroy_sysv_msgqueue(&msqids[msqid]);
277#endif
278	free(msgpool, M_MSG);
279	free(msgmaps, M_MSG);
280	free(msghdrs, M_MSG);
281	free(msqids, M_MSG);
282	mtx_destroy(&msq_mtx);
283	return (0);
284}
285
286
287static int
288sysvmsg_modload(struct module *module, int cmd, void *arg)
289{
290	int error = 0;
291
292	switch (cmd) {
293	case MOD_LOAD:
294		msginit();
295		break;
296	case MOD_UNLOAD:
297		error = msgunload();
298		break;
299	case MOD_SHUTDOWN:
300		break;
301	default:
302		error = EINVAL;
303		break;
304	}
305	return (error);
306}
307
308static moduledata_t sysvmsg_mod = {
309	"sysvmsg",
310	&sysvmsg_modload,
311	NULL
312};
313
314SYSCALL_MODULE_HELPER(msgsys);
315SYSCALL_MODULE_HELPER(msgctl);
316SYSCALL_MODULE_HELPER(msgget);
317SYSCALL_MODULE_HELPER(msgsnd);
318SYSCALL_MODULE_HELPER(msgrcv);
319
320DECLARE_MODULE(sysvmsg, sysvmsg_mod,
321	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
322MODULE_VERSION(sysvmsg, 1);
323
324/*
325 * Entry point for all MSG calls
326 *
327 * MPSAFE
328 */
329int
330msgsys(td, uap)
331	struct thread *td;
332	/* XXX actually varargs. */
333	struct msgsys_args /* {
334		int	which;
335		int	a2;
336		int	a3;
337		int	a4;
338		int	a5;
339		int	a6;
340	} */ *uap;
341{
342	int error;
343
344	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
345		return (ENOSYS);
346	if (uap->which < 0 ||
347	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
348		return (EINVAL);
349	error = (*msgcalls[uap->which])(td, &uap->a2);
350	return (error);
351}
352
353static void
354msg_freehdr(msghdr)
355	struct msg *msghdr;
356{
357	while (msghdr->msg_ts > 0) {
358		short next;
359		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
360			panic("msghdr->msg_spot out of range");
361		next = msgmaps[msghdr->msg_spot].next;
362		msgmaps[msghdr->msg_spot].next = free_msgmaps;
363		free_msgmaps = msghdr->msg_spot;
364		nfree_msgmaps++;
365		msghdr->msg_spot = next;
366		if (msghdr->msg_ts >= msginfo.msgssz)
367			msghdr->msg_ts -= msginfo.msgssz;
368		else
369			msghdr->msg_ts = 0;
370	}
371	if (msghdr->msg_spot != -1)
372		panic("msghdr->msg_spot != -1");
373	msghdr->msg_next = free_msghdrs;
374	free_msghdrs = msghdr;
375#ifdef MAC
376	mac_cleanup_sysv_msgmsg(msghdr);
377#endif
378}
379
380#ifndef _SYS_SYSPROTO_H_
381struct msgctl_args {
382	int	msqid;
383	int	cmd;
384	struct	msqid_ds *buf;
385};
386#endif
387
388/*
389 * MPSAFE
390 */
391int
392msgctl(td, uap)
393	struct thread *td;
394	register struct msgctl_args *uap;
395{
396	int msqid = uap->msqid;
397	int cmd = uap->cmd;
398	struct msqid_ds *user_msqptr = uap->buf;
399	int rval, error;
400	struct msqid_ds msqbuf;
401	register struct msqid_kernel *msqkptr;
402
403	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
404	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
405		return (ENOSYS);
406
407	msqid = IPCID_TO_IX(msqid);
408
409	if (msqid < 0 || msqid >= msginfo.msgmni) {
410		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
411		    msginfo.msgmni));
412		return (EINVAL);
413	}
414	if (cmd == IPC_SET &&
415	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
416		return (error);
417
418	msqkptr = &msqids[msqid];
419
420	mtx_lock(&msq_mtx);
421	if (msqkptr->u.msg_qbytes == 0) {
422		DPRINTF(("no such msqid\n"));
423		error = EINVAL;
424		goto done2;
425	}
426	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
427		DPRINTF(("wrong sequence number\n"));
428		error = EINVAL;
429		goto done2;
430	}
431#ifdef MAC
432	error = mac_check_sysv_msqctl(td->td_ucred, msqkptr, cmd);
433	if (error != 0) {
434		MPRINTF(("mac_check_sysv_msqctl returned %d\n", error));
435		goto done2;
436	}
437#endif
438
439	error = 0;
440	rval = 0;
441
442	switch (cmd) {
443
444	case IPC_RMID:
445	{
446		struct msg *msghdr;
447		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
448			goto done2;
449
450#ifdef MAC
451		/*
452		 * Check that the thread has MAC access permissions to
453		 * individual msghdrs.  Note: We need to do this in a
454		 * separate loop because the actual loop alters the
455		 * msq/msghdr info as it progresses, and there is no going
456		 * back if half the way through we discover that the
457		 * thread cannot free a certain msghdr.  The msq will get
458		 * into an inconsistent state.
459		 */
460		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
461		    msghdr = msghdr->msg_next) {
462			error = mac_check_sysv_msgrmid(td->td_ucred, msghdr);
463			if (error != 0) {
464				MPRINTF(("mac_check_sysv_msgrmid returned %d\n",
465				    error));
466				goto done2;
467			}
468		}
469#endif
470
471		/* Free the message headers */
472		msghdr = msqkptr->u.msg_first;
473		while (msghdr != NULL) {
474			struct msg *msghdr_tmp;
475
476			/* Free the segments of each message */
477			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
478			msqkptr->u.msg_qnum--;
479			msghdr_tmp = msghdr;
480			msghdr = msghdr->msg_next;
481			msg_freehdr(msghdr_tmp);
482		}
483
484		if (msqkptr->u.msg_cbytes != 0)
485			panic("msg_cbytes is screwed up");
486		if (msqkptr->u.msg_qnum != 0)
487			panic("msg_qnum is screwed up");
488
489		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
490
491#ifdef MAC
492		mac_cleanup_sysv_msgqueue(msqkptr);
493#endif
494
495		wakeup(msqkptr);
496	}
497
498		break;
499
500	case IPC_SET:
501		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
502			goto done2;
503		if (msqbuf.msg_qbytes > msqkptr->u.msg_qbytes) {
504			error = suser(td);
505			if (error)
506				goto done2;
507		}
508		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
509			DPRINTF(("can't increase msg_qbytes beyond %d"
510			    "(truncating)\n", msginfo.msgmnb));
511			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
512		}
513		if (msqbuf.msg_qbytes == 0) {
514			DPRINTF(("can't reduce msg_qbytes to 0\n"));
515			error = EINVAL;		/* non-standard errno! */
516			goto done2;
517		}
518		msqkptr->u.msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
519		msqkptr->u.msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
520		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
521		    (msqbuf.msg_perm.mode & 0777);
522		msqkptr->u.msg_qbytes = msqbuf.msg_qbytes;
523		msqkptr->u.msg_ctime = time_second;
524		break;
525
526	case IPC_STAT:
527		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
528			DPRINTF(("requester doesn't have read access\n"));
529			goto done2;
530		}
531		break;
532
533	default:
534		DPRINTF(("invalid command %d\n", cmd));
535		error = EINVAL;
536		goto done2;
537	}
538
539	if (error == 0)
540		td->td_retval[0] = rval;
541done2:
542	mtx_unlock(&msq_mtx);
543	if (cmd == IPC_STAT && error == 0)
544		error = copyout(&(msqkptr->u), user_msqptr, sizeof(struct msqid_ds));
545	return(error);
546}
547
548#ifndef _SYS_SYSPROTO_H_
549struct msgget_args {
550	key_t	key;
551	int	msgflg;
552};
553#endif
554
555/*
556 * MPSAFE
557 */
558int
559msgget(td, uap)
560	struct thread *td;
561	register struct msgget_args *uap;
562{
563	int msqid, error = 0;
564	int key = uap->key;
565	int msgflg = uap->msgflg;
566	struct ucred *cred = td->td_ucred;
567	register struct msqid_kernel *msqkptr = NULL;
568
569	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
570
571	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
572		return (ENOSYS);
573
574	mtx_lock(&msq_mtx);
575	if (key != IPC_PRIVATE) {
576		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
577			msqkptr = &msqids[msqid];
578			if (msqkptr->u.msg_qbytes != 0 &&
579			    msqkptr->u.msg_perm.key == key)
580				break;
581		}
582		if (msqid < msginfo.msgmni) {
583			DPRINTF(("found public key\n"));
584			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
585				DPRINTF(("not exclusive\n"));
586				error = EEXIST;
587				goto done2;
588			}
589			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
590			    msgflg & 0700))) {
591				DPRINTF(("requester doesn't have 0%o access\n",
592				    msgflg & 0700));
593				goto done2;
594			}
595#ifdef MAC
596			error = mac_check_sysv_msqget(cred, msqkptr);
597			if (error != 0) {
598				MPRINTF(("mac_check_sysv_msqget returned %d\n",
599				    error));
600				goto done2;
601			}
602#endif
603			goto found;
604		}
605	}
606
607	DPRINTF(("need to allocate the msqid_ds\n"));
608	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
609		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
610			/*
611			 * Look for an unallocated and unlocked msqid_ds.
612			 * msqid_ds's can be locked by msgsnd or msgrcv while
613			 * they are copying the message in/out.  We can't
614			 * re-use the entry until they release it.
615			 */
616			msqkptr = &msqids[msqid];
617			if (msqkptr->u.msg_qbytes == 0 &&
618			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
619				break;
620		}
621		if (msqid == msginfo.msgmni) {
622			DPRINTF(("no more msqid_ds's available\n"));
623			error = ENOSPC;
624			goto done2;
625		}
626		DPRINTF(("msqid %d is available\n", msqid));
627		msqkptr->u.msg_perm.key = key;
628		msqkptr->u.msg_perm.cuid = cred->cr_uid;
629		msqkptr->u.msg_perm.uid = cred->cr_uid;
630		msqkptr->u.msg_perm.cgid = cred->cr_gid;
631		msqkptr->u.msg_perm.gid = cred->cr_gid;
632		msqkptr->u.msg_perm.mode = (msgflg & 0777);
633		/* Make sure that the returned msqid is unique */
634		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
635		msqkptr->u.msg_first = NULL;
636		msqkptr->u.msg_last = NULL;
637		msqkptr->u.msg_cbytes = 0;
638		msqkptr->u.msg_qnum = 0;
639		msqkptr->u.msg_qbytes = msginfo.msgmnb;
640		msqkptr->u.msg_lspid = 0;
641		msqkptr->u.msg_lrpid = 0;
642		msqkptr->u.msg_stime = 0;
643		msqkptr->u.msg_rtime = 0;
644		msqkptr->u.msg_ctime = time_second;
645#ifdef MAC
646		mac_create_sysv_msgqueue(cred, msqkptr);
647#endif
648	} else {
649		DPRINTF(("didn't find it and wasn't asked to create it\n"));
650		error = ENOENT;
651		goto done2;
652	}
653
654found:
655	/* Construct the unique msqid */
656	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
657done2:
658	mtx_unlock(&msq_mtx);
659	return (error);
660}
661
662#ifndef _SYS_SYSPROTO_H_
663struct msgsnd_args {
664	int	msqid;
665	const void	*msgp;
666	size_t	msgsz;
667	int	msgflg;
668};
669#endif
670
671/*
672 * MPSAFE
673 */
674int
675msgsnd(td, uap)
676	struct thread *td;
677	register struct msgsnd_args *uap;
678{
679	int msqid = uap->msqid;
680	const void *user_msgp = uap->msgp;
681	size_t msgsz = uap->msgsz;
682	int msgflg = uap->msgflg;
683	int segs_needed, error = 0;
684	register struct msqid_kernel *msqkptr;
685	register struct msg *msghdr;
686	short next;
687
688	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
689	    msgflg));
690	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
691		return (ENOSYS);
692
693	mtx_lock(&msq_mtx);
694	msqid = IPCID_TO_IX(msqid);
695
696	if (msqid < 0 || msqid >= msginfo.msgmni) {
697		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
698		    msginfo.msgmni));
699		error = EINVAL;
700		goto done2;
701	}
702
703	msqkptr = &msqids[msqid];
704	if (msqkptr->u.msg_qbytes == 0) {
705		DPRINTF(("no such message queue id\n"));
706		error = EINVAL;
707		goto done2;
708	}
709	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
710		DPRINTF(("wrong sequence number\n"));
711		error = EINVAL;
712		goto done2;
713	}
714
715	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
716		DPRINTF(("requester doesn't have write access\n"));
717		goto done2;
718	}
719
720#ifdef MAC
721	error = mac_check_sysv_msqsnd(td->td_ucred, msqkptr);
722	if (error != 0) {
723		MPRINTF(("mac_check_sysv_msqsnd returned %d\n", error));
724		goto done2;
725	}
726#endif
727
728	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
729	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
730	    segs_needed));
731	for (;;) {
732		int need_more_resources = 0;
733
734		/*
735		 * check msgsz
736		 * (inside this loop in case msg_qbytes changes while we sleep)
737		 */
738
739		if (msgsz > msqkptr->u.msg_qbytes) {
740			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
741			error = EINVAL;
742			goto done2;
743		}
744
745		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
746			DPRINTF(("msqid is locked\n"));
747			need_more_resources = 1;
748		}
749		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
750			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
751			need_more_resources = 1;
752		}
753		if (segs_needed > nfree_msgmaps) {
754			DPRINTF(("segs_needed > nfree_msgmaps\n"));
755			need_more_resources = 1;
756		}
757		if (free_msghdrs == NULL) {
758			DPRINTF(("no more msghdrs\n"));
759			need_more_resources = 1;
760		}
761
762		if (need_more_resources) {
763			int we_own_it;
764
765			if ((msgflg & IPC_NOWAIT) != 0) {
766				DPRINTF(("need more resources but caller "
767				    "doesn't want to wait\n"));
768				error = EAGAIN;
769				goto done2;
770			}
771
772			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
773				DPRINTF(("we don't own the msqid_ds\n"));
774				we_own_it = 0;
775			} else {
776				/* Force later arrivals to wait for our
777				   request */
778				DPRINTF(("we own the msqid_ds\n"));
779				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
780				we_own_it = 1;
781			}
782			DPRINTF(("goodnight\n"));
783			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
784			    "msgwait", 0);
785			DPRINTF(("good morning, error=%d\n", error));
786			if (we_own_it)
787				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
788			if (error != 0) {
789				DPRINTF(("msgsnd:  interrupted system call\n"));
790				error = EINTR;
791				goto done2;
792			}
793
794			/*
795			 * Make sure that the msq queue still exists
796			 */
797
798			if (msqkptr->u.msg_qbytes == 0) {
799				DPRINTF(("msqid deleted\n"));
800				error = EIDRM;
801				goto done2;
802			}
803
804		} else {
805			DPRINTF(("got all the resources that we need\n"));
806			break;
807		}
808	}
809
810	/*
811	 * We have the resources that we need.
812	 * Make sure!
813	 */
814
815	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
816		panic("msg_perm.mode & MSG_LOCKED");
817	if (segs_needed > nfree_msgmaps)
818		panic("segs_needed > nfree_msgmaps");
819	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
820		panic("msgsz + msg_cbytes > msg_qbytes");
821	if (free_msghdrs == NULL)
822		panic("no more msghdrs");
823
824	/*
825	 * Re-lock the msqid_ds in case we page-fault when copying in the
826	 * message
827	 */
828
829	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
830		panic("msqid_ds is already locked");
831	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
832
833	/*
834	 * Allocate a message header
835	 */
836
837	msghdr = free_msghdrs;
838	free_msghdrs = msghdr->msg_next;
839	msghdr->msg_spot = -1;
840	msghdr->msg_ts = msgsz;
841#ifdef MAC
842	/*
843	 * XXXMAC: Should the mac_check_sysv_msgmsq check follow here
844	 * immediately?  Or, should it be checked just before the msg is
845	 * enqueued in the msgq (as it is done now)?
846	 */
847	mac_create_sysv_msgmsg(td->td_ucred, msqkptr, msghdr);
848#endif
849
850	/*
851	 * Allocate space for the message
852	 */
853
854	while (segs_needed > 0) {
855		if (nfree_msgmaps <= 0)
856			panic("not enough msgmaps");
857		if (free_msgmaps == -1)
858			panic("nil free_msgmaps");
859		next = free_msgmaps;
860		if (next <= -1)
861			panic("next too low #1");
862		if (next >= msginfo.msgseg)
863			panic("next out of range #1");
864		DPRINTF(("allocating segment %d to message\n", next));
865		free_msgmaps = msgmaps[next].next;
866		nfree_msgmaps--;
867		msgmaps[next].next = msghdr->msg_spot;
868		msghdr->msg_spot = next;
869		segs_needed--;
870	}
871
872	/*
873	 * Copy in the message type
874	 */
875
876	mtx_unlock(&msq_mtx);
877	if ((error = copyin(user_msgp, &msghdr->msg_type,
878	    sizeof(msghdr->msg_type))) != 0) {
879		mtx_lock(&msq_mtx);
880		DPRINTF(("error %d copying the message type\n", error));
881		msg_freehdr(msghdr);
882		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
883		wakeup(msqkptr);
884		goto done2;
885	}
886	mtx_lock(&msq_mtx);
887	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
888
889	/*
890	 * Validate the message type
891	 */
892
893	if (msghdr->msg_type < 1) {
894		msg_freehdr(msghdr);
895		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
896		wakeup(msqkptr);
897		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
898		error = EINVAL;
899		goto done2;
900	}
901
902	/*
903	 * Copy in the message body
904	 */
905
906	next = msghdr->msg_spot;
907	while (msgsz > 0) {
908		size_t tlen;
909		if (msgsz > msginfo.msgssz)
910			tlen = msginfo.msgssz;
911		else
912			tlen = msgsz;
913		if (next <= -1)
914			panic("next too low #2");
915		if (next >= msginfo.msgseg)
916			panic("next out of range #2");
917		mtx_unlock(&msq_mtx);
918		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
919		    tlen)) != 0) {
920			mtx_lock(&msq_mtx);
921			DPRINTF(("error %d copying in message segment\n",
922			    error));
923			msg_freehdr(msghdr);
924			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
925			wakeup(msqkptr);
926			goto done2;
927		}
928		mtx_lock(&msq_mtx);
929		msgsz -= tlen;
930		user_msgp = (const char *)user_msgp + tlen;
931		next = msgmaps[next].next;
932	}
933	if (next != -1)
934		panic("didn't use all the msg segments");
935
936	/*
937	 * We've got the message.  Unlock the msqid_ds.
938	 */
939
940	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
941
942	/*
943	 * Make sure that the msqid_ds is still allocated.
944	 */
945
946	if (msqkptr->u.msg_qbytes == 0) {
947		msg_freehdr(msghdr);
948		wakeup(msqkptr);
949		error = EIDRM;
950		goto done2;
951	}
952
953#ifdef MAC
954	/*
955	 * Note: Since the task/thread allocates the msghdr and usually
956	 * primes it with its own MAC label, for a majority of policies, it
957	 * won't be necessary to check whether the msghdr has access
958	 * permissions to the msgq.  The mac_check_sysv_msqsnd check would
959	 * suffice in that case.  However, this hook may be required where
960	 * individual policies derive a non-identical label for the msghdr
961	 * from the current thread label and may want to check the msghdr
962	 * enqueue permissions, along with read/write permissions to the
963	 * msgq.
964	 */
965	error = mac_check_sysv_msgmsq(td->td_ucred, msghdr, msqkptr);
966	if (error != 0) {
967		MPRINTF(("mac_check_sysv_msqmsq returned %d\n", error));
968		msg_freehdr(msghdr);
969		wakeup(msqkptr);
970		goto done2;
971	}
972#endif
973
974	/*
975	 * Put the message into the queue
976	 */
977	if (msqkptr->u.msg_first == NULL) {
978		msqkptr->u.msg_first = msghdr;
979		msqkptr->u.msg_last = msghdr;
980	} else {
981		msqkptr->u.msg_last->msg_next = msghdr;
982		msqkptr->u.msg_last = msghdr;
983	}
984	msqkptr->u.msg_last->msg_next = NULL;
985
986	msqkptr->u.msg_cbytes += msghdr->msg_ts;
987	msqkptr->u.msg_qnum++;
988	msqkptr->u.msg_lspid = td->td_proc->p_pid;
989	msqkptr->u.msg_stime = time_second;
990
991	wakeup(msqkptr);
992	td->td_retval[0] = 0;
993done2:
994	mtx_unlock(&msq_mtx);
995	return (error);
996}
997
998#ifndef _SYS_SYSPROTO_H_
999struct msgrcv_args {
1000	int	msqid;
1001	void	*msgp;
1002	size_t	msgsz;
1003	long	msgtyp;
1004	int	msgflg;
1005};
1006#endif
1007
1008/*
1009 * MPSAFE
1010 */
1011int
1012msgrcv(td, uap)
1013	struct thread *td;
1014	register struct msgrcv_args *uap;
1015{
1016	int msqid = uap->msqid;
1017	void *user_msgp = uap->msgp;
1018	size_t msgsz = uap->msgsz;
1019	long msgtyp = uap->msgtyp;
1020	int msgflg = uap->msgflg;
1021	size_t len;
1022	register struct msqid_kernel *msqkptr;
1023	register struct msg *msghdr;
1024	int error = 0;
1025	short next;
1026
1027	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
1028	    msgsz, msgtyp, msgflg));
1029
1030	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
1031		return (ENOSYS);
1032
1033	msqid = IPCID_TO_IX(msqid);
1034
1035	if (msqid < 0 || msqid >= msginfo.msgmni) {
1036		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
1037		    msginfo.msgmni));
1038		return (EINVAL);
1039	}
1040
1041	msqkptr = &msqids[msqid];
1042	mtx_lock(&msq_mtx);
1043	if (msqkptr->u.msg_qbytes == 0) {
1044		DPRINTF(("no such message queue id\n"));
1045		error = EINVAL;
1046		goto done2;
1047	}
1048	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1049		DPRINTF(("wrong sequence number\n"));
1050		error = EINVAL;
1051		goto done2;
1052	}
1053
1054	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1055		DPRINTF(("requester doesn't have read access\n"));
1056		goto done2;
1057	}
1058
1059#ifdef MAC
1060	error = mac_check_sysv_msqrcv(td->td_ucred, msqkptr);
1061	if (error != 0) {
1062		MPRINTF(("mac_check_sysv_msqrcv returned %d\n", error));
1063		goto done2;
1064	}
1065#endif
1066
1067	msghdr = NULL;
1068	while (msghdr == NULL) {
1069		if (msgtyp == 0) {
1070			msghdr = msqkptr->u.msg_first;
1071			if (msghdr != NULL) {
1072				if (msgsz < msghdr->msg_ts &&
1073				    (msgflg & MSG_NOERROR) == 0) {
1074					DPRINTF(("first message on the queue "
1075					    "is too big (want %d, got %d)\n",
1076					    msgsz, msghdr->msg_ts));
1077					error = E2BIG;
1078					goto done2;
1079				}
1080#ifdef MAC
1081				error = mac_check_sysv_msgrcv(td->td_ucred,
1082				    msghdr);
1083				if (error != 0) {
1084					MPRINTF(("mac_check_sysv_msgrcv "
1085					    "returned %d\n", error));
1086					goto done2;
1087				}
1088#endif
1089				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1090					msqkptr->u.msg_first = NULL;
1091					msqkptr->u.msg_last = NULL;
1092				} else {
1093					msqkptr->u.msg_first = msghdr->msg_next;
1094					if (msqkptr->u.msg_first == NULL)
1095						panic("msg_first/last screwed up #1");
1096				}
1097			}
1098		} else {
1099			struct msg *previous;
1100			struct msg **prev;
1101
1102			previous = NULL;
1103			prev = &(msqkptr->u.msg_first);
1104			while ((msghdr = *prev) != NULL) {
1105				/*
1106				 * Is this message's type an exact match or is
1107				 * this message's type less than or equal to
1108				 * the absolute value of a negative msgtyp?
1109				 * Note that the second half of this test can
1110				 * NEVER be true if msgtyp is positive since
1111				 * msg_type is always positive!
1112				 */
1113
1114				if (msgtyp == msghdr->msg_type ||
1115				    msghdr->msg_type <= -msgtyp) {
1116					DPRINTF(("found message type %d, "
1117					    "requested %d\n",
1118					    msghdr->msg_type, msgtyp));
1119					if (msgsz < msghdr->msg_ts &&
1120					    (msgflg & MSG_NOERROR) == 0) {
1121						DPRINTF(("requested message "
1122						    "on the queue is too big "
1123						    "(want %d, got %d)\n",
1124						    msgsz, msghdr->msg_ts));
1125						error = E2BIG;
1126						goto done2;
1127					}
1128#ifdef MAC
1129					error = mac_check_sysv_msgrcv(
1130					    td->td_ucred, msghdr);
1131					if (error != 0) {
1132						MPRINTF(("mac_check_sysv_"
1133						    "msgrcv returned %d\n",
1134						    error));
1135						goto done2;
1136					}
1137#endif
1138					*prev = msghdr->msg_next;
1139					if (msghdr == msqkptr->u.msg_last) {
1140						if (previous == NULL) {
1141							if (prev !=
1142							    &msqkptr->u.msg_first)
1143								panic("msg_first/last screwed up #2");
1144							msqkptr->u.msg_first =
1145							    NULL;
1146							msqkptr->u.msg_last =
1147							    NULL;
1148						} else {
1149							if (prev ==
1150							    &msqkptr->u.msg_first)
1151								panic("msg_first/last screwed up #3");
1152							msqkptr->u.msg_last =
1153							    previous;
1154						}
1155					}
1156					break;
1157				}
1158				previous = msghdr;
1159				prev = &(msghdr->msg_next);
1160			}
1161		}
1162
1163		/*
1164		 * We've either extracted the msghdr for the appropriate
1165		 * message or there isn't one.
1166		 * If there is one then bail out of this loop.
1167		 */
1168
1169		if (msghdr != NULL)
1170			break;
1171
1172		/*
1173		 * Hmph!  No message found.  Does the user want to wait?
1174		 */
1175
1176		if ((msgflg & IPC_NOWAIT) != 0) {
1177			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1178			    msgtyp));
1179			/* The SVID says to return ENOMSG. */
1180			error = ENOMSG;
1181			goto done2;
1182		}
1183
1184		/*
1185		 * Wait for something to happen
1186		 */
1187
1188		DPRINTF(("msgrcv:  goodnight\n"));
1189		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1190		    "msgwait", 0);
1191		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1192
1193		if (error != 0) {
1194			DPRINTF(("msgsnd:  interrupted system call\n"));
1195			error = EINTR;
1196			goto done2;
1197		}
1198
1199		/*
1200		 * Make sure that the msq queue still exists
1201		 */
1202
1203		if (msqkptr->u.msg_qbytes == 0 ||
1204		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1205			DPRINTF(("msqid deleted\n"));
1206			error = EIDRM;
1207			goto done2;
1208		}
1209	}
1210
1211	/*
1212	 * Return the message to the user.
1213	 *
1214	 * First, do the bookkeeping (before we risk being interrupted).
1215	 */
1216
1217	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1218	msqkptr->u.msg_qnum--;
1219	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1220	msqkptr->u.msg_rtime = time_second;
1221
1222	/*
1223	 * Make msgsz the actual amount that we'll be returning.
1224	 * Note that this effectively truncates the message if it is too long
1225	 * (since msgsz is never increased).
1226	 */
1227
1228	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1229	    msghdr->msg_ts));
1230	if (msgsz > msghdr->msg_ts)
1231		msgsz = msghdr->msg_ts;
1232
1233	/*
1234	 * Return the type to the user.
1235	 */
1236
1237	mtx_unlock(&msq_mtx);
1238	error = copyout(&(msghdr->msg_type), user_msgp,
1239	    sizeof(msghdr->msg_type));
1240	mtx_lock(&msq_mtx);
1241	if (error != 0) {
1242		DPRINTF(("error (%d) copying out message type\n", error));
1243		msg_freehdr(msghdr);
1244		wakeup(msqkptr);
1245		goto done2;
1246	}
1247	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1248
1249	/*
1250	 * Return the segments to the user
1251	 */
1252
1253	next = msghdr->msg_spot;
1254	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1255		size_t tlen;
1256
1257		if (msgsz - len > msginfo.msgssz)
1258			tlen = msginfo.msgssz;
1259		else
1260			tlen = msgsz - len;
1261		if (next <= -1)
1262			panic("next too low #3");
1263		if (next >= msginfo.msgseg)
1264			panic("next out of range #3");
1265		mtx_unlock(&msq_mtx);
1266		error = copyout(&msgpool[next * msginfo.msgssz],
1267		    user_msgp, tlen);
1268		mtx_lock(&msq_mtx);
1269		if (error != 0) {
1270			DPRINTF(("error (%d) copying out message segment\n",
1271			    error));
1272			msg_freehdr(msghdr);
1273			wakeup(msqkptr);
1274			goto done2;
1275		}
1276		user_msgp = (char *)user_msgp + tlen;
1277		next = msgmaps[next].next;
1278	}
1279
1280	/*
1281	 * Done, return the actual number of bytes copied out.
1282	 */
1283
1284	msg_freehdr(msghdr);
1285	wakeup(msqkptr);
1286	td->td_retval[0] = msgsz;
1287done2:
1288	mtx_unlock(&msq_mtx);
1289	return (error);
1290}
1291
1292static int
1293sysctl_msqids(SYSCTL_HANDLER_ARGS)
1294{
1295
1296	return (SYSCTL_OUT(req, msqids,
1297	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1298}
1299
1300SYSCTL_DECL(_kern_ipc);
1301SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1302SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1303SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0, "");
1304SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0, "");
1305SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1306SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1307SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1308    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1309