sysv_msg.c revision 194832
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 194832 2009-06-24 13:35:38Z jhb $");
52
53#include "opt_sysvipc.h"
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/sysproto.h>
58#include <sys/kernel.h>
59#include <sys/priv.h>
60#include <sys/proc.h>
61#include <sys/lock.h>
62#include <sys/mutex.h>
63#include <sys/module.h>
64#include <sys/msg.h>
65#include <sys/syscall.h>
66#include <sys/syscallsubr.h>
67#include <sys/sysent.h>
68#include <sys/sysctl.h>
69#include <sys/malloc.h>
70#include <sys/jail.h>
71
72#include <security/mac/mac_framework.h>
73
74static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
75
76static void msginit(void);
77static int msgunload(void);
78static int sysvmsg_modload(struct module *, int, void *);
79
80#ifdef MSG_DEBUG
81#define DPRINTF(a)	printf a
82#else
83#define DPRINTF(a)	(void)0
84#endif
85
86static void msg_freehdr(struct msg *msghdr);
87
88/* XXX casting to (sy_call_t *) is bogus, as usual. */
89static sy_call_t *msgcalls[] = {
90	(sy_call_t *)msgctl, (sy_call_t *)msgget,
91	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
92};
93
94#ifndef MSGSSZ
95#define MSGSSZ	8		/* Each segment must be 2^N long */
96#endif
97#ifndef MSGSEG
98#define MSGSEG	2048		/* must be less than 32767 */
99#endif
100#define MSGMAX	(MSGSSZ*MSGSEG)
101#ifndef MSGMNB
102#define MSGMNB	2048		/* max # of bytes in a queue */
103#endif
104#ifndef MSGMNI
105#define MSGMNI	40
106#endif
107#ifndef MSGTQL
108#define MSGTQL	40
109#endif
110
111/*
112 * Based on the configuration parameters described in an SVR2 (yes, two)
113 * config(1m) man page.
114 *
115 * Each message is broken up and stored in segments that are msgssz bytes
116 * long.  For efficiency reasons, this should be a power of two.  Also,
117 * it doesn't make sense if it is less than 8 or greater than about 256.
118 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
119 * two between 8 and 1024 inclusive (and panic's if it isn't).
120 */
121struct msginfo msginfo = {
122                MSGMAX,         /* max chars in a message */
123                MSGMNI,         /* # of message queue identifiers */
124                MSGMNB,         /* max chars in a queue */
125                MSGTQL,         /* max messages in system */
126                MSGSSZ,         /* size of a message segment */
127                		/* (must be small power of 2 greater than 4) */
128                MSGSEG          /* number of message segments */
129};
130
131/*
132 * macros to convert between msqid_ds's and msqid's.
133 * (specific to this implementation)
134 */
135#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
136#define MSQID_IX(id)	((id) & 0xffff)
137#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
138
139/*
140 * The rest of this file is specific to this particular implementation.
141 */
142
143struct msgmap {
144	short	next;		/* next segment in buffer */
145    				/* -1 -> available */
146    				/* 0..(MSGSEG-1) -> index of next segment */
147};
148
149#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
150
151static int nfree_msgmaps;	/* # of free map entries */
152static short free_msgmaps;	/* head of linked list of free map entries */
153static struct msg *free_msghdrs;/* list of free msg headers */
154static char *msgpool;		/* MSGMAX byte long msg buffer pool */
155static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
156static struct msg *msghdrs;	/* MSGTQL msg headers */
157static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
158static struct mtx msq_mtx;	/* global mutex for message queues. */
159
160static void
161msginit()
162{
163	register int i;
164
165	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
166	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
167	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
168	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
169	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
170	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
171
172	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
173	if (msgpool == NULL)
174		panic("msgpool is NULL");
175	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
176	if (msgmaps == NULL)
177		panic("msgmaps is NULL");
178	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
179	if (msghdrs == NULL)
180		panic("msghdrs is NULL");
181	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
182	    M_WAITOK);
183	if (msqids == NULL)
184		panic("msqids is NULL");
185
186	/*
187	 * msginfo.msgssz should be a power of two for efficiency reasons.
188	 * It is also pretty silly if msginfo.msgssz is less than 8
189	 * or greater than about 256 so ...
190	 */
191
192	i = 8;
193	while (i < 1024 && i != msginfo.msgssz)
194		i <<= 1;
195    	if (i != msginfo.msgssz) {
196		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
197		    msginfo.msgssz));
198		panic("msginfo.msgssz not a small power of 2");
199	}
200
201	if (msginfo.msgseg > 32767) {
202		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
203		panic("msginfo.msgseg > 32767");
204	}
205
206	if (msgmaps == NULL)
207		panic("msgmaps is NULL");
208
209	for (i = 0; i < msginfo.msgseg; i++) {
210		if (i > 0)
211			msgmaps[i-1].next = i;
212		msgmaps[i].next = -1;	/* implies entry is available */
213	}
214	free_msgmaps = 0;
215	nfree_msgmaps = msginfo.msgseg;
216
217	if (msghdrs == NULL)
218		panic("msghdrs is NULL");
219
220	for (i = 0; i < msginfo.msgtql; i++) {
221		msghdrs[i].msg_type = 0;
222		if (i > 0)
223			msghdrs[i-1].msg_next = &msghdrs[i];
224		msghdrs[i].msg_next = NULL;
225#ifdef MAC
226		mac_sysvmsg_init(&msghdrs[i]);
227#endif
228    	}
229	free_msghdrs = &msghdrs[0];
230
231	if (msqids == NULL)
232		panic("msqids is NULL");
233
234	for (i = 0; i < msginfo.msgmni; i++) {
235		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
236		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
237		msqids[i].u.msg_perm.mode = 0;
238#ifdef MAC
239		mac_sysvmsq_init(&msqids[i]);
240#endif
241	}
242	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
243}
244
245static int
246msgunload()
247{
248	struct msqid_kernel *msqkptr;
249	int msqid;
250#ifdef MAC
251	int i;
252#endif
253
254	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
255		/*
256		 * Look for an unallocated and unlocked msqid_ds.
257		 * msqid_ds's can be locked by msgsnd or msgrcv while
258		 * they are copying the message in/out.  We can't
259		 * re-use the entry until they release it.
260		 */
261		msqkptr = &msqids[msqid];
262		if (msqkptr->u.msg_qbytes != 0 ||
263		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
264			break;
265	}
266	if (msqid != msginfo.msgmni)
267		return (EBUSY);
268
269#ifdef MAC
270	for (i = 0; i < msginfo.msgtql; i++)
271		mac_sysvmsg_destroy(&msghdrs[i]);
272	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
273		mac_sysvmsq_destroy(&msqids[msqid]);
274#endif
275	free(msgpool, M_MSG);
276	free(msgmaps, M_MSG);
277	free(msghdrs, M_MSG);
278	free(msqids, M_MSG);
279	mtx_destroy(&msq_mtx);
280	return (0);
281}
282
283
284static int
285sysvmsg_modload(struct module *module, int cmd, void *arg)
286{
287	int error = 0;
288
289	switch (cmd) {
290	case MOD_LOAD:
291		msginit();
292		break;
293	case MOD_UNLOAD:
294		error = msgunload();
295		break;
296	case MOD_SHUTDOWN:
297		break;
298	default:
299		error = EINVAL;
300		break;
301	}
302	return (error);
303}
304
305static moduledata_t sysvmsg_mod = {
306	"sysvmsg",
307	&sysvmsg_modload,
308	NULL
309};
310
311SYSCALL_MODULE_HELPER(msgsys);
312SYSCALL_MODULE_HELPER(msgctl);
313SYSCALL_MODULE_HELPER(msgget);
314SYSCALL_MODULE_HELPER(msgsnd);
315SYSCALL_MODULE_HELPER(msgrcv);
316
317DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
318MODULE_VERSION(sysvmsg, 1);
319
320/*
321 * Entry point for all MSG calls.
322 */
323int
324msgsys(td, uap)
325	struct thread *td;
326	/* XXX actually varargs. */
327	struct msgsys_args /* {
328		int	which;
329		int	a2;
330		int	a3;
331		int	a4;
332		int	a5;
333		int	a6;
334	} */ *uap;
335{
336	int error;
337
338	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
339		return (ENOSYS);
340	if (uap->which < 0 ||
341	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
342		return (EINVAL);
343	error = (*msgcalls[uap->which])(td, &uap->a2);
344	return (error);
345}
346
347static void
348msg_freehdr(msghdr)
349	struct msg *msghdr;
350{
351	while (msghdr->msg_ts > 0) {
352		short next;
353		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
354			panic("msghdr->msg_spot out of range");
355		next = msgmaps[msghdr->msg_spot].next;
356		msgmaps[msghdr->msg_spot].next = free_msgmaps;
357		free_msgmaps = msghdr->msg_spot;
358		nfree_msgmaps++;
359		msghdr->msg_spot = next;
360		if (msghdr->msg_ts >= msginfo.msgssz)
361			msghdr->msg_ts -= msginfo.msgssz;
362		else
363			msghdr->msg_ts = 0;
364	}
365	if (msghdr->msg_spot != -1)
366		panic("msghdr->msg_spot != -1");
367	msghdr->msg_next = free_msghdrs;
368	free_msghdrs = msghdr;
369#ifdef MAC
370	mac_sysvmsg_cleanup(msghdr);
371#endif
372}
373
374#ifndef _SYS_SYSPROTO_H_
375struct msgctl_args {
376	int	msqid;
377	int	cmd;
378	struct	msqid_ds *buf;
379};
380#endif
381int
382msgctl(td, uap)
383	struct thread *td;
384	register struct msgctl_args *uap;
385{
386	int msqid = uap->msqid;
387	int cmd = uap->cmd;
388	struct msqid_ds msqbuf;
389	int error;
390
391	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
392	if (cmd == IPC_SET &&
393	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
394		return (error);
395	error = kern_msgctl(td, msqid, cmd, &msqbuf);
396	if (cmd == IPC_STAT && error == 0)
397		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
398	return (error);
399}
400
401int
402kern_msgctl(td, msqid, cmd, msqbuf)
403	struct thread *td;
404	int msqid;
405	int cmd;
406	struct msqid_ds *msqbuf;
407{
408	int rval, error, msqix;
409	register struct msqid_kernel *msqkptr;
410
411	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
412		return (ENOSYS);
413
414	msqix = IPCID_TO_IX(msqid);
415
416	if (msqix < 0 || msqix >= msginfo.msgmni) {
417		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
418		    msginfo.msgmni));
419		return (EINVAL);
420	}
421
422	msqkptr = &msqids[msqix];
423
424	mtx_lock(&msq_mtx);
425	if (msqkptr->u.msg_qbytes == 0) {
426		DPRINTF(("no such msqid\n"));
427		error = EINVAL;
428		goto done2;
429	}
430	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
431		DPRINTF(("wrong sequence number\n"));
432		error = EINVAL;
433		goto done2;
434	}
435#ifdef MAC
436	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
437	if (error != 0)
438		goto done2;
439#endif
440
441	error = 0;
442	rval = 0;
443
444	switch (cmd) {
445
446	case IPC_RMID:
447	{
448		struct msg *msghdr;
449		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
450			goto done2;
451
452#ifdef MAC
453		/*
454		 * Check that the thread has MAC access permissions to
455		 * individual msghdrs.  Note: We need to do this in a
456		 * separate loop because the actual loop alters the
457		 * msq/msghdr info as it progresses, and there is no going
458		 * back if half the way through we discover that the
459		 * thread cannot free a certain msghdr.  The msq will get
460		 * into an inconsistent state.
461		 */
462		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
463		    msghdr = msghdr->msg_next) {
464			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
465			if (error != 0)
466				goto done2;
467		}
468#endif
469
470		/* Free the message headers */
471		msghdr = msqkptr->u.msg_first;
472		while (msghdr != NULL) {
473			struct msg *msghdr_tmp;
474
475			/* Free the segments of each message */
476			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
477			msqkptr->u.msg_qnum--;
478			msghdr_tmp = msghdr;
479			msghdr = msghdr->msg_next;
480			msg_freehdr(msghdr_tmp);
481		}
482
483		if (msqkptr->u.msg_cbytes != 0)
484			panic("msg_cbytes is screwed up");
485		if (msqkptr->u.msg_qnum != 0)
486			panic("msg_qnum is screwed up");
487
488		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
489
490#ifdef MAC
491		mac_sysvmsq_cleanup(msqkptr);
492#endif
493
494		wakeup(msqkptr);
495	}
496
497		break;
498
499	case IPC_SET:
500		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
501			goto done2;
502		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
503			error = priv_check(td, PRIV_IPC_MSGSIZE);
504			if (error)
505				goto done2;
506		}
507		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
508			DPRINTF(("can't increase msg_qbytes beyond %d"
509			    "(truncating)\n", msginfo.msgmnb));
510			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
511		}
512		if (msqbuf->msg_qbytes == 0) {
513			DPRINTF(("can't reduce msg_qbytes to 0\n"));
514			error = EINVAL;		/* non-standard errno! */
515			goto done2;
516		}
517		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
518		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
519		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
520		    (msqbuf->msg_perm.mode & 0777);
521		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
522		msqkptr->u.msg_ctime = time_second;
523		break;
524
525	case IPC_STAT:
526		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
527			DPRINTF(("requester doesn't have read access\n"));
528			goto done2;
529		}
530		*msqbuf = msqkptr->u;
531		break;
532
533	default:
534		DPRINTF(("invalid command %d\n", cmd));
535		error = EINVAL;
536		goto done2;
537	}
538
539	if (error == 0)
540		td->td_retval[0] = rval;
541done2:
542	mtx_unlock(&msq_mtx);
543	return (error);
544}
545
546#ifndef _SYS_SYSPROTO_H_
547struct msgget_args {
548	key_t	key;
549	int	msgflg;
550};
551#endif
552int
553msgget(td, uap)
554	struct thread *td;
555	register struct msgget_args *uap;
556{
557	int msqid, error = 0;
558	int key = uap->key;
559	int msgflg = uap->msgflg;
560	struct ucred *cred = td->td_ucred;
561	register struct msqid_kernel *msqkptr = NULL;
562
563	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
564
565	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
566		return (ENOSYS);
567
568	mtx_lock(&msq_mtx);
569	if (key != IPC_PRIVATE) {
570		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
571			msqkptr = &msqids[msqid];
572			if (msqkptr->u.msg_qbytes != 0 &&
573			    msqkptr->u.msg_perm.key == key)
574				break;
575		}
576		if (msqid < msginfo.msgmni) {
577			DPRINTF(("found public key\n"));
578			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
579				DPRINTF(("not exclusive\n"));
580				error = EEXIST;
581				goto done2;
582			}
583			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
584			    msgflg & 0700))) {
585				DPRINTF(("requester doesn't have 0%o access\n",
586				    msgflg & 0700));
587				goto done2;
588			}
589#ifdef MAC
590			error = mac_sysvmsq_check_msqget(cred, msqkptr);
591			if (error != 0)
592				goto done2;
593#endif
594			goto found;
595		}
596	}
597
598	DPRINTF(("need to allocate the msqid_ds\n"));
599	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
600		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
601			/*
602			 * Look for an unallocated and unlocked msqid_ds.
603			 * msqid_ds's can be locked by msgsnd or msgrcv while
604			 * they are copying the message in/out.  We can't
605			 * re-use the entry until they release it.
606			 */
607			msqkptr = &msqids[msqid];
608			if (msqkptr->u.msg_qbytes == 0 &&
609			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
610				break;
611		}
612		if (msqid == msginfo.msgmni) {
613			DPRINTF(("no more msqid_ds's available\n"));
614			error = ENOSPC;
615			goto done2;
616		}
617		DPRINTF(("msqid %d is available\n", msqid));
618		msqkptr->u.msg_perm.key = key;
619		msqkptr->u.msg_perm.cuid = cred->cr_uid;
620		msqkptr->u.msg_perm.uid = cred->cr_uid;
621		msqkptr->u.msg_perm.cgid = cred->cr_gid;
622		msqkptr->u.msg_perm.gid = cred->cr_gid;
623		msqkptr->u.msg_perm.mode = (msgflg & 0777);
624		/* Make sure that the returned msqid is unique */
625		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
626		msqkptr->u.msg_first = NULL;
627		msqkptr->u.msg_last = NULL;
628		msqkptr->u.msg_cbytes = 0;
629		msqkptr->u.msg_qnum = 0;
630		msqkptr->u.msg_qbytes = msginfo.msgmnb;
631		msqkptr->u.msg_lspid = 0;
632		msqkptr->u.msg_lrpid = 0;
633		msqkptr->u.msg_stime = 0;
634		msqkptr->u.msg_rtime = 0;
635		msqkptr->u.msg_ctime = time_second;
636#ifdef MAC
637		mac_sysvmsq_create(cred, msqkptr);
638#endif
639	} else {
640		DPRINTF(("didn't find it and wasn't asked to create it\n"));
641		error = ENOENT;
642		goto done2;
643	}
644
645found:
646	/* Construct the unique msqid */
647	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
648done2:
649	mtx_unlock(&msq_mtx);
650	return (error);
651}
652
653#ifndef _SYS_SYSPROTO_H_
654struct msgsnd_args {
655	int	msqid;
656	const void	*msgp;
657	size_t	msgsz;
658	int	msgflg;
659};
660#endif
661int
662kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
663	struct thread *td;
664	int msqid;
665	const void *msgp;	/* XXX msgp is actually mtext. */
666	size_t msgsz;
667	int msgflg;
668	long mtype;
669{
670	int msqix, segs_needed, error = 0;
671	register struct msqid_kernel *msqkptr;
672	register struct msg *msghdr;
673	short next;
674
675	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
676		return (ENOSYS);
677
678	mtx_lock(&msq_mtx);
679	msqix = IPCID_TO_IX(msqid);
680
681	if (msqix < 0 || msqix >= msginfo.msgmni) {
682		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
683		    msginfo.msgmni));
684		error = EINVAL;
685		goto done2;
686	}
687
688	msqkptr = &msqids[msqix];
689	if (msqkptr->u.msg_qbytes == 0) {
690		DPRINTF(("no such message queue id\n"));
691		error = EINVAL;
692		goto done2;
693	}
694	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
695		DPRINTF(("wrong sequence number\n"));
696		error = EINVAL;
697		goto done2;
698	}
699
700	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
701		DPRINTF(("requester doesn't have write access\n"));
702		goto done2;
703	}
704
705#ifdef MAC
706	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
707	if (error != 0)
708		goto done2;
709#endif
710
711	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
712	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
713	    msginfo.msgssz, segs_needed));
714	for (;;) {
715		int need_more_resources = 0;
716
717		/*
718		 * check msgsz
719		 * (inside this loop in case msg_qbytes changes while we sleep)
720		 */
721
722		if (msgsz > msqkptr->u.msg_qbytes) {
723			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
724			error = EINVAL;
725			goto done2;
726		}
727
728		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
729			DPRINTF(("msqid is locked\n"));
730			need_more_resources = 1;
731		}
732		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
733			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
734			need_more_resources = 1;
735		}
736		if (segs_needed > nfree_msgmaps) {
737			DPRINTF(("segs_needed > nfree_msgmaps\n"));
738			need_more_resources = 1;
739		}
740		if (free_msghdrs == NULL) {
741			DPRINTF(("no more msghdrs\n"));
742			need_more_resources = 1;
743		}
744
745		if (need_more_resources) {
746			int we_own_it;
747
748			if ((msgflg & IPC_NOWAIT) != 0) {
749				DPRINTF(("need more resources but caller "
750				    "doesn't want to wait\n"));
751				error = EAGAIN;
752				goto done2;
753			}
754
755			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
756				DPRINTF(("we don't own the msqid_ds\n"));
757				we_own_it = 0;
758			} else {
759				/* Force later arrivals to wait for our
760				   request */
761				DPRINTF(("we own the msqid_ds\n"));
762				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
763				we_own_it = 1;
764			}
765			DPRINTF(("msgsnd:  goodnight\n"));
766			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
767			    "msgsnd", hz);
768			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
769			if (we_own_it)
770				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
771			if (error == EWOULDBLOCK) {
772				DPRINTF(("msgsnd:  timed out\n"));
773				continue;
774			}
775			if (error != 0) {
776				DPRINTF(("msgsnd:  interrupted system call\n"));
777				error = EINTR;
778				goto done2;
779			}
780
781			/*
782			 * Make sure that the msq queue still exists
783			 */
784
785			if (msqkptr->u.msg_qbytes == 0) {
786				DPRINTF(("msqid deleted\n"));
787				error = EIDRM;
788				goto done2;
789			}
790
791		} else {
792			DPRINTF(("got all the resources that we need\n"));
793			break;
794		}
795	}
796
797	/*
798	 * We have the resources that we need.
799	 * Make sure!
800	 */
801
802	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
803		panic("msg_perm.mode & MSG_LOCKED");
804	if (segs_needed > nfree_msgmaps)
805		panic("segs_needed > nfree_msgmaps");
806	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
807		panic("msgsz + msg_cbytes > msg_qbytes");
808	if (free_msghdrs == NULL)
809		panic("no more msghdrs");
810
811	/*
812	 * Re-lock the msqid_ds in case we page-fault when copying in the
813	 * message
814	 */
815
816	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
817		panic("msqid_ds is already locked");
818	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
819
820	/*
821	 * Allocate a message header
822	 */
823
824	msghdr = free_msghdrs;
825	free_msghdrs = msghdr->msg_next;
826	msghdr->msg_spot = -1;
827	msghdr->msg_ts = msgsz;
828	msghdr->msg_type = mtype;
829#ifdef MAC
830	/*
831	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
832	 * immediately?  Or, should it be checked just before the msg is
833	 * enqueued in the msgq (as it is done now)?
834	 */
835	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
836#endif
837
838	/*
839	 * Allocate space for the message
840	 */
841
842	while (segs_needed > 0) {
843		if (nfree_msgmaps <= 0)
844			panic("not enough msgmaps");
845		if (free_msgmaps == -1)
846			panic("nil free_msgmaps");
847		next = free_msgmaps;
848		if (next <= -1)
849			panic("next too low #1");
850		if (next >= msginfo.msgseg)
851			panic("next out of range #1");
852		DPRINTF(("allocating segment %d to message\n", next));
853		free_msgmaps = msgmaps[next].next;
854		nfree_msgmaps--;
855		msgmaps[next].next = msghdr->msg_spot;
856		msghdr->msg_spot = next;
857		segs_needed--;
858	}
859
860	/*
861	 * Validate the message type
862	 */
863
864	if (msghdr->msg_type < 1) {
865		msg_freehdr(msghdr);
866		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
867		wakeup(msqkptr);
868		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
869		error = EINVAL;
870		goto done2;
871	}
872
873	/*
874	 * Copy in the message body
875	 */
876
877	next = msghdr->msg_spot;
878	while (msgsz > 0) {
879		size_t tlen;
880		if (msgsz > msginfo.msgssz)
881			tlen = msginfo.msgssz;
882		else
883			tlen = msgsz;
884		if (next <= -1)
885			panic("next too low #2");
886		if (next >= msginfo.msgseg)
887			panic("next out of range #2");
888		mtx_unlock(&msq_mtx);
889		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
890		    tlen)) != 0) {
891			mtx_lock(&msq_mtx);
892			DPRINTF(("error %d copying in message segment\n",
893			    error));
894			msg_freehdr(msghdr);
895			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
896			wakeup(msqkptr);
897			goto done2;
898		}
899		mtx_lock(&msq_mtx);
900		msgsz -= tlen;
901		msgp = (const char *)msgp + tlen;
902		next = msgmaps[next].next;
903	}
904	if (next != -1)
905		panic("didn't use all the msg segments");
906
907	/*
908	 * We've got the message.  Unlock the msqid_ds.
909	 */
910
911	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
912
913	/*
914	 * Make sure that the msqid_ds is still allocated.
915	 */
916
917	if (msqkptr->u.msg_qbytes == 0) {
918		msg_freehdr(msghdr);
919		wakeup(msqkptr);
920		error = EIDRM;
921		goto done2;
922	}
923
924#ifdef MAC
925	/*
926	 * Note: Since the task/thread allocates the msghdr and usually
927	 * primes it with its own MAC label, for a majority of policies, it
928	 * won't be necessary to check whether the msghdr has access
929	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
930	 * suffice in that case.  However, this hook may be required where
931	 * individual policies derive a non-identical label for the msghdr
932	 * from the current thread label and may want to check the msghdr
933	 * enqueue permissions, along with read/write permissions to the
934	 * msgq.
935	 */
936	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
937	if (error != 0) {
938		msg_freehdr(msghdr);
939		wakeup(msqkptr);
940		goto done2;
941	}
942#endif
943
944	/*
945	 * Put the message into the queue
946	 */
947	if (msqkptr->u.msg_first == NULL) {
948		msqkptr->u.msg_first = msghdr;
949		msqkptr->u.msg_last = msghdr;
950	} else {
951		msqkptr->u.msg_last->msg_next = msghdr;
952		msqkptr->u.msg_last = msghdr;
953	}
954	msqkptr->u.msg_last->msg_next = NULL;
955
956	msqkptr->u.msg_cbytes += msghdr->msg_ts;
957	msqkptr->u.msg_qnum++;
958	msqkptr->u.msg_lspid = td->td_proc->p_pid;
959	msqkptr->u.msg_stime = time_second;
960
961	wakeup(msqkptr);
962	td->td_retval[0] = 0;
963done2:
964	mtx_unlock(&msq_mtx);
965	return (error);
966}
967
968int
969msgsnd(td, uap)
970	struct thread *td;
971	register struct msgsnd_args *uap;
972{
973	int error;
974	long mtype;
975
976	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
977	    uap->msgsz, uap->msgflg));
978
979	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
980		DPRINTF(("error %d copying the message type\n", error));
981		return (error);
982	}
983	return (kern_msgsnd(td, uap->msqid,
984	    (const char *)uap->msgp + sizeof(mtype),
985	    uap->msgsz, uap->msgflg, mtype));
986}
987
988#ifndef _SYS_SYSPROTO_H_
989struct msgrcv_args {
990	int	msqid;
991	void	*msgp;
992	size_t	msgsz;
993	long	msgtyp;
994	int	msgflg;
995};
996#endif
997int
998kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
999	struct thread *td;
1000	int msqid;
1001	void *msgp;	/* XXX msgp is actually mtext. */
1002	size_t msgsz;
1003	long msgtyp;
1004	int msgflg;
1005	long *mtype;
1006{
1007	size_t len;
1008	register struct msqid_kernel *msqkptr;
1009	register struct msg *msghdr;
1010	int msqix, error = 0;
1011	short next;
1012
1013	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1014		return (ENOSYS);
1015
1016	msqix = IPCID_TO_IX(msqid);
1017
1018	if (msqix < 0 || msqix >= msginfo.msgmni) {
1019		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1020		    msginfo.msgmni));
1021		return (EINVAL);
1022	}
1023
1024	msqkptr = &msqids[msqix];
1025	mtx_lock(&msq_mtx);
1026	if (msqkptr->u.msg_qbytes == 0) {
1027		DPRINTF(("no such message queue id\n"));
1028		error = EINVAL;
1029		goto done2;
1030	}
1031	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1032		DPRINTF(("wrong sequence number\n"));
1033		error = EINVAL;
1034		goto done2;
1035	}
1036
1037	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1038		DPRINTF(("requester doesn't have read access\n"));
1039		goto done2;
1040	}
1041
1042#ifdef MAC
1043	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1044	if (error != 0)
1045		goto done2;
1046#endif
1047
1048	msghdr = NULL;
1049	while (msghdr == NULL) {
1050		if (msgtyp == 0) {
1051			msghdr = msqkptr->u.msg_first;
1052			if (msghdr != NULL) {
1053				if (msgsz < msghdr->msg_ts &&
1054				    (msgflg & MSG_NOERROR) == 0) {
1055					DPRINTF(("first message on the queue "
1056					    "is too big (want %zu, got %d)\n",
1057					    msgsz, msghdr->msg_ts));
1058					error = E2BIG;
1059					goto done2;
1060				}
1061#ifdef MAC
1062				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1063				    msghdr);
1064				if (error != 0)
1065					goto done2;
1066#endif
1067				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1068					msqkptr->u.msg_first = NULL;
1069					msqkptr->u.msg_last = NULL;
1070				} else {
1071					msqkptr->u.msg_first = msghdr->msg_next;
1072					if (msqkptr->u.msg_first == NULL)
1073						panic("msg_first/last screwed up #1");
1074				}
1075			}
1076		} else {
1077			struct msg *previous;
1078			struct msg **prev;
1079
1080			previous = NULL;
1081			prev = &(msqkptr->u.msg_first);
1082			while ((msghdr = *prev) != NULL) {
1083				/*
1084				 * Is this message's type an exact match or is
1085				 * this message's type less than or equal to
1086				 * the absolute value of a negative msgtyp?
1087				 * Note that the second half of this test can
1088				 * NEVER be true if msgtyp is positive since
1089				 * msg_type is always positive!
1090				 */
1091
1092				if (msgtyp == msghdr->msg_type ||
1093				    msghdr->msg_type <= -msgtyp) {
1094					DPRINTF(("found message type %ld, "
1095					    "requested %ld\n",
1096					    msghdr->msg_type, msgtyp));
1097					if (msgsz < msghdr->msg_ts &&
1098					    (msgflg & MSG_NOERROR) == 0) {
1099						DPRINTF(("requested message "
1100						    "on the queue is too big "
1101						    "(want %zu, got %hu)\n",
1102						    msgsz, msghdr->msg_ts));
1103						error = E2BIG;
1104						goto done2;
1105					}
1106#ifdef MAC
1107					error = mac_sysvmsq_check_msgrcv(
1108					    td->td_ucred, msghdr);
1109					if (error != 0)
1110						goto done2;
1111#endif
1112					*prev = msghdr->msg_next;
1113					if (msghdr == msqkptr->u.msg_last) {
1114						if (previous == NULL) {
1115							if (prev !=
1116							    &msqkptr->u.msg_first)
1117								panic("msg_first/last screwed up #2");
1118							msqkptr->u.msg_first =
1119							    NULL;
1120							msqkptr->u.msg_last =
1121							    NULL;
1122						} else {
1123							if (prev ==
1124							    &msqkptr->u.msg_first)
1125								panic("msg_first/last screwed up #3");
1126							msqkptr->u.msg_last =
1127							    previous;
1128						}
1129					}
1130					break;
1131				}
1132				previous = msghdr;
1133				prev = &(msghdr->msg_next);
1134			}
1135		}
1136
1137		/*
1138		 * We've either extracted the msghdr for the appropriate
1139		 * message or there isn't one.
1140		 * If there is one then bail out of this loop.
1141		 */
1142
1143		if (msghdr != NULL)
1144			break;
1145
1146		/*
1147		 * Hmph!  No message found.  Does the user want to wait?
1148		 */
1149
1150		if ((msgflg & IPC_NOWAIT) != 0) {
1151			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1152			    msgtyp));
1153			/* The SVID says to return ENOMSG. */
1154			error = ENOMSG;
1155			goto done2;
1156		}
1157
1158		/*
1159		 * Wait for something to happen
1160		 */
1161
1162		DPRINTF(("msgrcv:  goodnight\n"));
1163		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1164		    "msgrcv", 0);
1165		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1166
1167		if (error != 0) {
1168			DPRINTF(("msgrcv:  interrupted system call\n"));
1169			error = EINTR;
1170			goto done2;
1171		}
1172
1173		/*
1174		 * Make sure that the msq queue still exists
1175		 */
1176
1177		if (msqkptr->u.msg_qbytes == 0 ||
1178		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1179			DPRINTF(("msqid deleted\n"));
1180			error = EIDRM;
1181			goto done2;
1182		}
1183	}
1184
1185	/*
1186	 * Return the message to the user.
1187	 *
1188	 * First, do the bookkeeping (before we risk being interrupted).
1189	 */
1190
1191	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1192	msqkptr->u.msg_qnum--;
1193	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1194	msqkptr->u.msg_rtime = time_second;
1195
1196	/*
1197	 * Make msgsz the actual amount that we'll be returning.
1198	 * Note that this effectively truncates the message if it is too long
1199	 * (since msgsz is never increased).
1200	 */
1201
1202	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1203	    msghdr->msg_ts));
1204	if (msgsz > msghdr->msg_ts)
1205		msgsz = msghdr->msg_ts;
1206	*mtype = msghdr->msg_type;
1207
1208	/*
1209	 * Return the segments to the user
1210	 */
1211
1212	next = msghdr->msg_spot;
1213	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1214		size_t tlen;
1215
1216		if (msgsz - len > msginfo.msgssz)
1217			tlen = msginfo.msgssz;
1218		else
1219			tlen = msgsz - len;
1220		if (next <= -1)
1221			panic("next too low #3");
1222		if (next >= msginfo.msgseg)
1223			panic("next out of range #3");
1224		mtx_unlock(&msq_mtx);
1225		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1226		mtx_lock(&msq_mtx);
1227		if (error != 0) {
1228			DPRINTF(("error (%d) copying out message segment\n",
1229			    error));
1230			msg_freehdr(msghdr);
1231			wakeup(msqkptr);
1232			goto done2;
1233		}
1234		msgp = (char *)msgp + tlen;
1235		next = msgmaps[next].next;
1236	}
1237
1238	/*
1239	 * Done, return the actual number of bytes copied out.
1240	 */
1241
1242	msg_freehdr(msghdr);
1243	wakeup(msqkptr);
1244	td->td_retval[0] = msgsz;
1245done2:
1246	mtx_unlock(&msq_mtx);
1247	return (error);
1248}
1249
1250int
1251msgrcv(td, uap)
1252	struct thread *td;
1253	register struct msgrcv_args *uap;
1254{
1255	int error;
1256	long mtype;
1257
1258	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1259	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1260
1261	if ((error = kern_msgrcv(td, uap->msqid,
1262	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1263	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1264		return (error);
1265	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1266		DPRINTF(("error %d copying the message type\n", error));
1267	return (error);
1268}
1269
1270static int
1271sysctl_msqids(SYSCTL_HANDLER_ARGS)
1272{
1273
1274	return (SYSCTL_OUT(req, msqids,
1275	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1276}
1277
1278SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1279    "Maximum message size");
1280SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1281    "Number of message queue identifiers");
1282SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1283    "Maximum number of bytes in a queue");
1284SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1285    "Maximum number of messages in the system");
1286SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1287    "Size of a message segment");
1288SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1289    "Number of message segments");
1290SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1291    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1292