sysv_msg.c revision 167232
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 167232 2007-03-05 13:10:58Z rwatson $");
52
53#include "opt_sysvipc.h"
54#include "opt_mac.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/priv.h>
61#include <sys/proc.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/msg.h>
66#include <sys/syscall.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysent.h>
69#include <sys/sysctl.h>
70#include <sys/malloc.h>
71#include <sys/jail.h>
72
73#include <security/mac/mac_framework.h>
74
75static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
76
77static void msginit(void);
78static int msgunload(void);
79static int sysvmsg_modload(struct module *, int, void *);
80
81#ifdef MSG_DEBUG
82#define DPRINTF(a)	printf a
83#else
84#define DPRINTF(a)
85#endif
86
87static void msg_freehdr(struct msg *msghdr);
88
89/* XXX casting to (sy_call_t *) is bogus, as usual. */
90static sy_call_t *msgcalls[] = {
91	(sy_call_t *)msgctl, (sy_call_t *)msgget,
92	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
93};
94
95#ifndef MSGSSZ
96#define MSGSSZ	8		/* Each segment must be 2^N long */
97#endif
98#ifndef MSGSEG
99#define MSGSEG	2048		/* must be less than 32767 */
100#endif
101#define MSGMAX	(MSGSSZ*MSGSEG)
102#ifndef MSGMNB
103#define MSGMNB	2048		/* max # of bytes in a queue */
104#endif
105#ifndef MSGMNI
106#define MSGMNI	40
107#endif
108#ifndef MSGTQL
109#define MSGTQL	40
110#endif
111
112/*
113 * Based on the configuration parameters described in an SVR2 (yes, two)
114 * config(1m) man page.
115 *
116 * Each message is broken up and stored in segments that are msgssz bytes
117 * long.  For efficiency reasons, this should be a power of two.  Also,
118 * it doesn't make sense if it is less than 8 or greater than about 256.
119 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
120 * two between 8 and 1024 inclusive (and panic's if it isn't).
121 */
122struct msginfo msginfo = {
123                MSGMAX,         /* max chars in a message */
124                MSGMNI,         /* # of message queue identifiers */
125                MSGMNB,         /* max chars in a queue */
126                MSGTQL,         /* max messages in system */
127                MSGSSZ,         /* size of a message segment */
128                		/* (must be small power of 2 greater than 4) */
129                MSGSEG          /* number of message segments */
130};
131
132/*
133 * macros to convert between msqid_ds's and msqid's.
134 * (specific to this implementation)
135 */
136#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
137#define MSQID_IX(id)	((id) & 0xffff)
138#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
139
140/*
141 * The rest of this file is specific to this particular implementation.
142 */
143
144struct msgmap {
145	short	next;		/* next segment in buffer */
146    				/* -1 -> available */
147    				/* 0..(MSGSEG-1) -> index of next segment */
148};
149
150#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
151
152static int nfree_msgmaps;	/* # of free map entries */
153static short free_msgmaps;	/* head of linked list of free map entries */
154static struct msg *free_msghdrs;/* list of free msg headers */
155static char *msgpool;		/* MSGMAX byte long msg buffer pool */
156static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
157static struct msg *msghdrs;	/* MSGTQL msg headers */
158static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
159static struct mtx msq_mtx;	/* global mutex for message queues. */
160
161static void
162msginit()
163{
164	register int i;
165
166	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
167	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
168	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
169	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
170	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
171	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
172
173	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
174	if (msgpool == NULL)
175		panic("msgpool is NULL");
176	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
177	if (msgmaps == NULL)
178		panic("msgmaps is NULL");
179	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
180	if (msghdrs == NULL)
181		panic("msghdrs is NULL");
182	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
183	    M_WAITOK);
184	if (msqids == NULL)
185		panic("msqids is NULL");
186
187	/*
188	 * msginfo.msgssz should be a power of two for efficiency reasons.
189	 * It is also pretty silly if msginfo.msgssz is less than 8
190	 * or greater than about 256 so ...
191	 */
192
193	i = 8;
194	while (i < 1024 && i != msginfo.msgssz)
195		i <<= 1;
196    	if (i != msginfo.msgssz) {
197		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
198		    msginfo.msgssz));
199		panic("msginfo.msgssz not a small power of 2");
200	}
201
202	if (msginfo.msgseg > 32767) {
203		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
204		panic("msginfo.msgseg > 32767");
205	}
206
207	if (msgmaps == NULL)
208		panic("msgmaps is NULL");
209
210	for (i = 0; i < msginfo.msgseg; i++) {
211		if (i > 0)
212			msgmaps[i-1].next = i;
213		msgmaps[i].next = -1;	/* implies entry is available */
214	}
215	free_msgmaps = 0;
216	nfree_msgmaps = msginfo.msgseg;
217
218	if (msghdrs == NULL)
219		panic("msghdrs is NULL");
220
221	for (i = 0; i < msginfo.msgtql; i++) {
222		msghdrs[i].msg_type = 0;
223		if (i > 0)
224			msghdrs[i-1].msg_next = &msghdrs[i];
225		msghdrs[i].msg_next = NULL;
226#ifdef MAC
227		mac_init_sysv_msgmsg(&msghdrs[i]);
228#endif
229    	}
230	free_msghdrs = &msghdrs[0];
231
232	if (msqids == NULL)
233		panic("msqids is NULL");
234
235	for (i = 0; i < msginfo.msgmni; i++) {
236		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
237		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
238		msqids[i].u.msg_perm.mode = 0;
239#ifdef MAC
240		mac_init_sysv_msgqueue(&msqids[i]);
241#endif
242	}
243	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
244}
245
246static int
247msgunload()
248{
249	struct msqid_kernel *msqkptr;
250	int msqid;
251#ifdef MAC
252	int i;
253#endif
254
255	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
256		/*
257		 * Look for an unallocated and unlocked msqid_ds.
258		 * msqid_ds's can be locked by msgsnd or msgrcv while
259		 * they are copying the message in/out.  We can't
260		 * re-use the entry until they release it.
261		 */
262		msqkptr = &msqids[msqid];
263		if (msqkptr->u.msg_qbytes != 0 ||
264		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
265			break;
266	}
267	if (msqid != msginfo.msgmni)
268		return (EBUSY);
269
270#ifdef MAC
271	for (i = 0; i < msginfo.msgtql; i++)
272		mac_destroy_sysv_msgmsg(&msghdrs[i]);
273	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
274		mac_destroy_sysv_msgqueue(&msqids[msqid]);
275#endif
276	free(msgpool, M_MSG);
277	free(msgmaps, M_MSG);
278	free(msghdrs, M_MSG);
279	free(msqids, M_MSG);
280	mtx_destroy(&msq_mtx);
281	return (0);
282}
283
284
285static int
286sysvmsg_modload(struct module *module, int cmd, void *arg)
287{
288	int error = 0;
289
290	switch (cmd) {
291	case MOD_LOAD:
292		msginit();
293		break;
294	case MOD_UNLOAD:
295		error = msgunload();
296		break;
297	case MOD_SHUTDOWN:
298		break;
299	default:
300		error = EINVAL;
301		break;
302	}
303	return (error);
304}
305
306static moduledata_t sysvmsg_mod = {
307	"sysvmsg",
308	&sysvmsg_modload,
309	NULL
310};
311
312SYSCALL_MODULE_HELPER(msgsys);
313SYSCALL_MODULE_HELPER(msgctl);
314SYSCALL_MODULE_HELPER(msgget);
315SYSCALL_MODULE_HELPER(msgsnd);
316SYSCALL_MODULE_HELPER(msgrcv);
317
318DECLARE_MODULE(sysvmsg, sysvmsg_mod,
319	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
320MODULE_VERSION(sysvmsg, 1);
321
322/*
323 * Entry point for all MSG calls.
324 */
325int
326msgsys(td, uap)
327	struct thread *td;
328	/* XXX actually varargs. */
329	struct msgsys_args /* {
330		int	which;
331		int	a2;
332		int	a3;
333		int	a4;
334		int	a5;
335		int	a6;
336	} */ *uap;
337{
338	int error;
339
340	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
341		return (ENOSYS);
342	if (uap->which < 0 ||
343	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
344		return (EINVAL);
345	error = (*msgcalls[uap->which])(td, &uap->a2);
346	return (error);
347}
348
349static void
350msg_freehdr(msghdr)
351	struct msg *msghdr;
352{
353	while (msghdr->msg_ts > 0) {
354		short next;
355		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
356			panic("msghdr->msg_spot out of range");
357		next = msgmaps[msghdr->msg_spot].next;
358		msgmaps[msghdr->msg_spot].next = free_msgmaps;
359		free_msgmaps = msghdr->msg_spot;
360		nfree_msgmaps++;
361		msghdr->msg_spot = next;
362		if (msghdr->msg_ts >= msginfo.msgssz)
363			msghdr->msg_ts -= msginfo.msgssz;
364		else
365			msghdr->msg_ts = 0;
366	}
367	if (msghdr->msg_spot != -1)
368		panic("msghdr->msg_spot != -1");
369	msghdr->msg_next = free_msghdrs;
370	free_msghdrs = msghdr;
371#ifdef MAC
372	mac_cleanup_sysv_msgmsg(msghdr);
373#endif
374}
375
376#ifndef _SYS_SYSPROTO_H_
377struct msgctl_args {
378	int	msqid;
379	int	cmd;
380	struct	msqid_ds *buf;
381};
382#endif
383int
384msgctl(td, uap)
385	struct thread *td;
386	register struct msgctl_args *uap;
387{
388	int msqid = uap->msqid;
389	int cmd = uap->cmd;
390	struct msqid_ds msqbuf;
391	int error;
392
393	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
394	if (cmd == IPC_SET &&
395	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
396		return (error);
397	error = kern_msgctl(td, msqid, cmd, &msqbuf);
398	if (cmd == IPC_STAT && error == 0)
399		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
400	return (error);
401}
402
403int
404kern_msgctl(td, msqid, cmd, msqbuf)
405	struct thread *td;
406	int msqid;
407	int cmd;
408	struct msqid_ds *msqbuf;
409{
410	int rval, error, msqix;
411	register struct msqid_kernel *msqkptr;
412
413	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
414		return (ENOSYS);
415
416	msqix = IPCID_TO_IX(msqid);
417
418	if (msqix < 0 || msqix >= msginfo.msgmni) {
419		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
420		    msginfo.msgmni));
421		return (EINVAL);
422	}
423
424	msqkptr = &msqids[msqix];
425
426	mtx_lock(&msq_mtx);
427	if (msqkptr->u.msg_qbytes == 0) {
428		DPRINTF(("no such msqid\n"));
429		error = EINVAL;
430		goto done2;
431	}
432	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
433		DPRINTF(("wrong sequence number\n"));
434		error = EINVAL;
435		goto done2;
436	}
437#ifdef MAC
438	error = mac_check_sysv_msqctl(td->td_ucred, msqkptr, cmd);
439	if (error != 0)
440		goto done2;
441#endif
442
443	error = 0;
444	rval = 0;
445
446	switch (cmd) {
447
448	case IPC_RMID:
449	{
450		struct msg *msghdr;
451		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
452			goto done2;
453
454#ifdef MAC
455		/*
456		 * Check that the thread has MAC access permissions to
457		 * individual msghdrs.  Note: We need to do this in a
458		 * separate loop because the actual loop alters the
459		 * msq/msghdr info as it progresses, and there is no going
460		 * back if half the way through we discover that the
461		 * thread cannot free a certain msghdr.  The msq will get
462		 * into an inconsistent state.
463		 */
464		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
465		    msghdr = msghdr->msg_next) {
466			error = mac_check_sysv_msgrmid(td->td_ucred, msghdr);
467			if (error != 0)
468				goto done2;
469		}
470#endif
471
472		/* Free the message headers */
473		msghdr = msqkptr->u.msg_first;
474		while (msghdr != NULL) {
475			struct msg *msghdr_tmp;
476
477			/* Free the segments of each message */
478			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
479			msqkptr->u.msg_qnum--;
480			msghdr_tmp = msghdr;
481			msghdr = msghdr->msg_next;
482			msg_freehdr(msghdr_tmp);
483		}
484
485		if (msqkptr->u.msg_cbytes != 0)
486			panic("msg_cbytes is screwed up");
487		if (msqkptr->u.msg_qnum != 0)
488			panic("msg_qnum is screwed up");
489
490		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
491
492#ifdef MAC
493		mac_cleanup_sysv_msgqueue(msqkptr);
494#endif
495
496		wakeup(msqkptr);
497	}
498
499		break;
500
501	case IPC_SET:
502		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
503			goto done2;
504		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
505			error = priv_check_cred(td->td_ucred,
506			    PRIV_IPC_MSGSIZE, SUSER_ALLOWJAIL);
507			if (error)
508				goto done2;
509		}
510		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
511			DPRINTF(("can't increase msg_qbytes beyond %d"
512			    "(truncating)\n", msginfo.msgmnb));
513			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
514		}
515		if (msqbuf->msg_qbytes == 0) {
516			DPRINTF(("can't reduce msg_qbytes to 0\n"));
517			error = EINVAL;		/* non-standard errno! */
518			goto done2;
519		}
520		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
521		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
522		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
523		    (msqbuf->msg_perm.mode & 0777);
524		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
525		msqkptr->u.msg_ctime = time_second;
526		break;
527
528	case IPC_STAT:
529		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
530			DPRINTF(("requester doesn't have read access\n"));
531			goto done2;
532		}
533		*msqbuf = msqkptr->u;
534		break;
535
536	default:
537		DPRINTF(("invalid command %d\n", cmd));
538		error = EINVAL;
539		goto done2;
540	}
541
542	if (error == 0)
543		td->td_retval[0] = rval;
544done2:
545	mtx_unlock(&msq_mtx);
546	return (error);
547}
548
549#ifndef _SYS_SYSPROTO_H_
550struct msgget_args {
551	key_t	key;
552	int	msgflg;
553};
554#endif
555int
556msgget(td, uap)
557	struct thread *td;
558	register struct msgget_args *uap;
559{
560	int msqid, error = 0;
561	int key = uap->key;
562	int msgflg = uap->msgflg;
563	struct ucred *cred = td->td_ucred;
564	register struct msqid_kernel *msqkptr = NULL;
565
566	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
567
568	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
569		return (ENOSYS);
570
571	mtx_lock(&msq_mtx);
572	if (key != IPC_PRIVATE) {
573		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
574			msqkptr = &msqids[msqid];
575			if (msqkptr->u.msg_qbytes != 0 &&
576			    msqkptr->u.msg_perm.key == key)
577				break;
578		}
579		if (msqid < msginfo.msgmni) {
580			DPRINTF(("found public key\n"));
581			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
582				DPRINTF(("not exclusive\n"));
583				error = EEXIST;
584				goto done2;
585			}
586			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
587			    msgflg & 0700))) {
588				DPRINTF(("requester doesn't have 0%o access\n",
589				    msgflg & 0700));
590				goto done2;
591			}
592#ifdef MAC
593			error = mac_check_sysv_msqget(cred, msqkptr);
594			if (error != 0)
595				goto done2;
596#endif
597			goto found;
598		}
599	}
600
601	DPRINTF(("need to allocate the msqid_ds\n"));
602	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
603		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
604			/*
605			 * Look for an unallocated and unlocked msqid_ds.
606			 * msqid_ds's can be locked by msgsnd or msgrcv while
607			 * they are copying the message in/out.  We can't
608			 * re-use the entry until they release it.
609			 */
610			msqkptr = &msqids[msqid];
611			if (msqkptr->u.msg_qbytes == 0 &&
612			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
613				break;
614		}
615		if (msqid == msginfo.msgmni) {
616			DPRINTF(("no more msqid_ds's available\n"));
617			error = ENOSPC;
618			goto done2;
619		}
620		DPRINTF(("msqid %d is available\n", msqid));
621		msqkptr->u.msg_perm.key = key;
622		msqkptr->u.msg_perm.cuid = cred->cr_uid;
623		msqkptr->u.msg_perm.uid = cred->cr_uid;
624		msqkptr->u.msg_perm.cgid = cred->cr_gid;
625		msqkptr->u.msg_perm.gid = cred->cr_gid;
626		msqkptr->u.msg_perm.mode = (msgflg & 0777);
627		/* Make sure that the returned msqid is unique */
628		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
629		msqkptr->u.msg_first = NULL;
630		msqkptr->u.msg_last = NULL;
631		msqkptr->u.msg_cbytes = 0;
632		msqkptr->u.msg_qnum = 0;
633		msqkptr->u.msg_qbytes = msginfo.msgmnb;
634		msqkptr->u.msg_lspid = 0;
635		msqkptr->u.msg_lrpid = 0;
636		msqkptr->u.msg_stime = 0;
637		msqkptr->u.msg_rtime = 0;
638		msqkptr->u.msg_ctime = time_second;
639#ifdef MAC
640		mac_create_sysv_msgqueue(cred, msqkptr);
641#endif
642	} else {
643		DPRINTF(("didn't find it and wasn't asked to create it\n"));
644		error = ENOENT;
645		goto done2;
646	}
647
648found:
649	/* Construct the unique msqid */
650	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
651done2:
652	mtx_unlock(&msq_mtx);
653	return (error);
654}
655
656#ifndef _SYS_SYSPROTO_H_
657struct msgsnd_args {
658	int	msqid;
659	const void	*msgp;
660	size_t	msgsz;
661	int	msgflg;
662};
663#endif
664int
665kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
666	struct thread *td;
667	int msqid;
668	const void *msgp;	/* XXX msgp is actually mtext. */
669	size_t msgsz;
670	int msgflg;
671	long mtype;
672{
673	int msqix, segs_needed, error = 0;
674	register struct msqid_kernel *msqkptr;
675	register struct msg *msghdr;
676	short next;
677
678	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
679		return (ENOSYS);
680
681	mtx_lock(&msq_mtx);
682	msqix = IPCID_TO_IX(msqid);
683
684	if (msqix < 0 || msqix >= msginfo.msgmni) {
685		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
686		    msginfo.msgmni));
687		error = EINVAL;
688		goto done2;
689	}
690
691	msqkptr = &msqids[msqix];
692	if (msqkptr->u.msg_qbytes == 0) {
693		DPRINTF(("no such message queue id\n"));
694		error = EINVAL;
695		goto done2;
696	}
697	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
698		DPRINTF(("wrong sequence number\n"));
699		error = EINVAL;
700		goto done2;
701	}
702
703	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
704		DPRINTF(("requester doesn't have write access\n"));
705		goto done2;
706	}
707
708#ifdef MAC
709	error = mac_check_sysv_msqsnd(td->td_ucred, msqkptr);
710	if (error != 0)
711		goto done2;
712#endif
713
714	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
715	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
716	    msginfo.msgssz, segs_needed));
717	for (;;) {
718		int need_more_resources = 0;
719
720		/*
721		 * check msgsz
722		 * (inside this loop in case msg_qbytes changes while we sleep)
723		 */
724
725		if (msgsz > msqkptr->u.msg_qbytes) {
726			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
727			error = EINVAL;
728			goto done2;
729		}
730
731		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
732			DPRINTF(("msqid is locked\n"));
733			need_more_resources = 1;
734		}
735		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
736			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
737			need_more_resources = 1;
738		}
739		if (segs_needed > nfree_msgmaps) {
740			DPRINTF(("segs_needed > nfree_msgmaps\n"));
741			need_more_resources = 1;
742		}
743		if (free_msghdrs == NULL) {
744			DPRINTF(("no more msghdrs\n"));
745			need_more_resources = 1;
746		}
747
748		if (need_more_resources) {
749			int we_own_it;
750
751			if ((msgflg & IPC_NOWAIT) != 0) {
752				DPRINTF(("need more resources but caller "
753				    "doesn't want to wait\n"));
754				error = EAGAIN;
755				goto done2;
756			}
757
758			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
759				DPRINTF(("we don't own the msqid_ds\n"));
760				we_own_it = 0;
761			} else {
762				/* Force later arrivals to wait for our
763				   request */
764				DPRINTF(("we own the msqid_ds\n"));
765				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
766				we_own_it = 1;
767			}
768			DPRINTF(("msgsnd:  goodnight\n"));
769			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
770			    "msgsnd", hz);
771			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
772			if (we_own_it)
773				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
774			if (error == EWOULDBLOCK) {
775				DPRINTF(("msgsnd:  timed out\n"));
776				continue;
777			}
778			if (error != 0) {
779				DPRINTF(("msgsnd:  interrupted system call\n"));
780				error = EINTR;
781				goto done2;
782			}
783
784			/*
785			 * Make sure that the msq queue still exists
786			 */
787
788			if (msqkptr->u.msg_qbytes == 0) {
789				DPRINTF(("msqid deleted\n"));
790				error = EIDRM;
791				goto done2;
792			}
793
794		} else {
795			DPRINTF(("got all the resources that we need\n"));
796			break;
797		}
798	}
799
800	/*
801	 * We have the resources that we need.
802	 * Make sure!
803	 */
804
805	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
806		panic("msg_perm.mode & MSG_LOCKED");
807	if (segs_needed > nfree_msgmaps)
808		panic("segs_needed > nfree_msgmaps");
809	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
810		panic("msgsz + msg_cbytes > msg_qbytes");
811	if (free_msghdrs == NULL)
812		panic("no more msghdrs");
813
814	/*
815	 * Re-lock the msqid_ds in case we page-fault when copying in the
816	 * message
817	 */
818
819	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
820		panic("msqid_ds is already locked");
821	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
822
823	/*
824	 * Allocate a message header
825	 */
826
827	msghdr = free_msghdrs;
828	free_msghdrs = msghdr->msg_next;
829	msghdr->msg_spot = -1;
830	msghdr->msg_ts = msgsz;
831	msghdr->msg_type = mtype;
832#ifdef MAC
833	/*
834	 * XXXMAC: Should the mac_check_sysv_msgmsq check follow here
835	 * immediately?  Or, should it be checked just before the msg is
836	 * enqueued in the msgq (as it is done now)?
837	 */
838	mac_create_sysv_msgmsg(td->td_ucred, msqkptr, msghdr);
839#endif
840
841	/*
842	 * Allocate space for the message
843	 */
844
845	while (segs_needed > 0) {
846		if (nfree_msgmaps <= 0)
847			panic("not enough msgmaps");
848		if (free_msgmaps == -1)
849			panic("nil free_msgmaps");
850		next = free_msgmaps;
851		if (next <= -1)
852			panic("next too low #1");
853		if (next >= msginfo.msgseg)
854			panic("next out of range #1");
855		DPRINTF(("allocating segment %d to message\n", next));
856		free_msgmaps = msgmaps[next].next;
857		nfree_msgmaps--;
858		msgmaps[next].next = msghdr->msg_spot;
859		msghdr->msg_spot = next;
860		segs_needed--;
861	}
862
863	/*
864	 * Validate the message type
865	 */
866
867	if (msghdr->msg_type < 1) {
868		msg_freehdr(msghdr);
869		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
870		wakeup(msqkptr);
871		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
872		error = EINVAL;
873		goto done2;
874	}
875
876	/*
877	 * Copy in the message body
878	 */
879
880	next = msghdr->msg_spot;
881	while (msgsz > 0) {
882		size_t tlen;
883		if (msgsz > msginfo.msgssz)
884			tlen = msginfo.msgssz;
885		else
886			tlen = msgsz;
887		if (next <= -1)
888			panic("next too low #2");
889		if (next >= msginfo.msgseg)
890			panic("next out of range #2");
891		mtx_unlock(&msq_mtx);
892		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
893		    tlen)) != 0) {
894			mtx_lock(&msq_mtx);
895			DPRINTF(("error %d copying in message segment\n",
896			    error));
897			msg_freehdr(msghdr);
898			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
899			wakeup(msqkptr);
900			goto done2;
901		}
902		mtx_lock(&msq_mtx);
903		msgsz -= tlen;
904		msgp = (const char *)msgp + tlen;
905		next = msgmaps[next].next;
906	}
907	if (next != -1)
908		panic("didn't use all the msg segments");
909
910	/*
911	 * We've got the message.  Unlock the msqid_ds.
912	 */
913
914	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
915
916	/*
917	 * Make sure that the msqid_ds is still allocated.
918	 */
919
920	if (msqkptr->u.msg_qbytes == 0) {
921		msg_freehdr(msghdr);
922		wakeup(msqkptr);
923		error = EIDRM;
924		goto done2;
925	}
926
927#ifdef MAC
928	/*
929	 * Note: Since the task/thread allocates the msghdr and usually
930	 * primes it with its own MAC label, for a majority of policies, it
931	 * won't be necessary to check whether the msghdr has access
932	 * permissions to the msgq.  The mac_check_sysv_msqsnd check would
933	 * suffice in that case.  However, this hook may be required where
934	 * individual policies derive a non-identical label for the msghdr
935	 * from the current thread label and may want to check the msghdr
936	 * enqueue permissions, along with read/write permissions to the
937	 * msgq.
938	 */
939	error = mac_check_sysv_msgmsq(td->td_ucred, msghdr, msqkptr);
940	if (error != 0) {
941		msg_freehdr(msghdr);
942		wakeup(msqkptr);
943		goto done2;
944	}
945#endif
946
947	/*
948	 * Put the message into the queue
949	 */
950	if (msqkptr->u.msg_first == NULL) {
951		msqkptr->u.msg_first = msghdr;
952		msqkptr->u.msg_last = msghdr;
953	} else {
954		msqkptr->u.msg_last->msg_next = msghdr;
955		msqkptr->u.msg_last = msghdr;
956	}
957	msqkptr->u.msg_last->msg_next = NULL;
958
959	msqkptr->u.msg_cbytes += msghdr->msg_ts;
960	msqkptr->u.msg_qnum++;
961	msqkptr->u.msg_lspid = td->td_proc->p_pid;
962	msqkptr->u.msg_stime = time_second;
963
964	wakeup(msqkptr);
965	td->td_retval[0] = 0;
966done2:
967	mtx_unlock(&msq_mtx);
968	return (error);
969}
970
971int
972msgsnd(td, uap)
973	struct thread *td;
974	register struct msgsnd_args *uap;
975{
976	int error;
977	long mtype;
978
979	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
980	    uap->msgsz, uap->msgflg));
981
982	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
983		DPRINTF(("error %d copying the message type\n", error));
984		return (error);
985	}
986	return (kern_msgsnd(td, uap->msqid,
987	    (const char *)uap->msgp + sizeof(mtype),
988	    uap->msgsz, uap->msgflg, mtype));
989}
990
991#ifndef _SYS_SYSPROTO_H_
992struct msgrcv_args {
993	int	msqid;
994	void	*msgp;
995	size_t	msgsz;
996	long	msgtyp;
997	int	msgflg;
998};
999#endif
1000int
1001kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1002	struct thread *td;
1003	int msqid;
1004	void *msgp;	/* XXX msgp is actually mtext. */
1005	size_t msgsz;
1006	long msgtyp;
1007	int msgflg;
1008	long *mtype;
1009{
1010	size_t len;
1011	register struct msqid_kernel *msqkptr;
1012	register struct msg *msghdr;
1013	int msqix, error = 0;
1014	short next;
1015
1016	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
1017		return (ENOSYS);
1018
1019	msqix = IPCID_TO_IX(msqid);
1020
1021	if (msqix < 0 || msqix >= msginfo.msgmni) {
1022		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1023		    msginfo.msgmni));
1024		return (EINVAL);
1025	}
1026
1027	msqkptr = &msqids[msqix];
1028	mtx_lock(&msq_mtx);
1029	if (msqkptr->u.msg_qbytes == 0) {
1030		DPRINTF(("no such message queue id\n"));
1031		error = EINVAL;
1032		goto done2;
1033	}
1034	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1035		DPRINTF(("wrong sequence number\n"));
1036		error = EINVAL;
1037		goto done2;
1038	}
1039
1040	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1041		DPRINTF(("requester doesn't have read access\n"));
1042		goto done2;
1043	}
1044
1045#ifdef MAC
1046	error = mac_check_sysv_msqrcv(td->td_ucred, msqkptr);
1047	if (error != 0)
1048		goto done2;
1049#endif
1050
1051	msghdr = NULL;
1052	while (msghdr == NULL) {
1053		if (msgtyp == 0) {
1054			msghdr = msqkptr->u.msg_first;
1055			if (msghdr != NULL) {
1056				if (msgsz < msghdr->msg_ts &&
1057				    (msgflg & MSG_NOERROR) == 0) {
1058					DPRINTF(("first message on the queue "
1059					    "is too big (want %zu, got %d)\n",
1060					    msgsz, msghdr->msg_ts));
1061					error = E2BIG;
1062					goto done2;
1063				}
1064#ifdef MAC
1065				error = mac_check_sysv_msgrcv(td->td_ucred,
1066				    msghdr);
1067				if (error != 0)
1068					goto done2;
1069#endif
1070				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1071					msqkptr->u.msg_first = NULL;
1072					msqkptr->u.msg_last = NULL;
1073				} else {
1074					msqkptr->u.msg_first = msghdr->msg_next;
1075					if (msqkptr->u.msg_first == NULL)
1076						panic("msg_first/last screwed up #1");
1077				}
1078			}
1079		} else {
1080			struct msg *previous;
1081			struct msg **prev;
1082
1083			previous = NULL;
1084			prev = &(msqkptr->u.msg_first);
1085			while ((msghdr = *prev) != NULL) {
1086				/*
1087				 * Is this message's type an exact match or is
1088				 * this message's type less than or equal to
1089				 * the absolute value of a negative msgtyp?
1090				 * Note that the second half of this test can
1091				 * NEVER be true if msgtyp is positive since
1092				 * msg_type is always positive!
1093				 */
1094
1095				if (msgtyp == msghdr->msg_type ||
1096				    msghdr->msg_type <= -msgtyp) {
1097					DPRINTF(("found message type %ld, "
1098					    "requested %ld\n",
1099					    msghdr->msg_type, msgtyp));
1100					if (msgsz < msghdr->msg_ts &&
1101					    (msgflg & MSG_NOERROR) == 0) {
1102						DPRINTF(("requested message "
1103						    "on the queue is too big "
1104						    "(want %zu, got %hu)\n",
1105						    msgsz, msghdr->msg_ts));
1106						error = E2BIG;
1107						goto done2;
1108					}
1109#ifdef MAC
1110					error = mac_check_sysv_msgrcv(
1111					    td->td_ucred, msghdr);
1112					if (error != 0)
1113						goto done2;
1114#endif
1115					*prev = msghdr->msg_next;
1116					if (msghdr == msqkptr->u.msg_last) {
1117						if (previous == NULL) {
1118							if (prev !=
1119							    &msqkptr->u.msg_first)
1120								panic("msg_first/last screwed up #2");
1121							msqkptr->u.msg_first =
1122							    NULL;
1123							msqkptr->u.msg_last =
1124							    NULL;
1125						} else {
1126							if (prev ==
1127							    &msqkptr->u.msg_first)
1128								panic("msg_first/last screwed up #3");
1129							msqkptr->u.msg_last =
1130							    previous;
1131						}
1132					}
1133					break;
1134				}
1135				previous = msghdr;
1136				prev = &(msghdr->msg_next);
1137			}
1138		}
1139
1140		/*
1141		 * We've either extracted the msghdr for the appropriate
1142		 * message or there isn't one.
1143		 * If there is one then bail out of this loop.
1144		 */
1145
1146		if (msghdr != NULL)
1147			break;
1148
1149		/*
1150		 * Hmph!  No message found.  Does the user want to wait?
1151		 */
1152
1153		if ((msgflg & IPC_NOWAIT) != 0) {
1154			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1155			    msgtyp));
1156			/* The SVID says to return ENOMSG. */
1157			error = ENOMSG;
1158			goto done2;
1159		}
1160
1161		/*
1162		 * Wait for something to happen
1163		 */
1164
1165		DPRINTF(("msgrcv:  goodnight\n"));
1166		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1167		    "msgrcv", 0);
1168		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1169
1170		if (error != 0) {
1171			DPRINTF(("msgrcv:  interrupted system call\n"));
1172			error = EINTR;
1173			goto done2;
1174		}
1175
1176		/*
1177		 * Make sure that the msq queue still exists
1178		 */
1179
1180		if (msqkptr->u.msg_qbytes == 0 ||
1181		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1182			DPRINTF(("msqid deleted\n"));
1183			error = EIDRM;
1184			goto done2;
1185		}
1186	}
1187
1188	/*
1189	 * Return the message to the user.
1190	 *
1191	 * First, do the bookkeeping (before we risk being interrupted).
1192	 */
1193
1194	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1195	msqkptr->u.msg_qnum--;
1196	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1197	msqkptr->u.msg_rtime = time_second;
1198
1199	/*
1200	 * Make msgsz the actual amount that we'll be returning.
1201	 * Note that this effectively truncates the message if it is too long
1202	 * (since msgsz is never increased).
1203	 */
1204
1205	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1206	    msghdr->msg_ts));
1207	if (msgsz > msghdr->msg_ts)
1208		msgsz = msghdr->msg_ts;
1209	*mtype = msghdr->msg_type;
1210
1211	/*
1212	 * Return the segments to the user
1213	 */
1214
1215	next = msghdr->msg_spot;
1216	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1217		size_t tlen;
1218
1219		if (msgsz - len > msginfo.msgssz)
1220			tlen = msginfo.msgssz;
1221		else
1222			tlen = msgsz - len;
1223		if (next <= -1)
1224			panic("next too low #3");
1225		if (next >= msginfo.msgseg)
1226			panic("next out of range #3");
1227		mtx_unlock(&msq_mtx);
1228		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1229		mtx_lock(&msq_mtx);
1230		if (error != 0) {
1231			DPRINTF(("error (%d) copying out message segment\n",
1232			    error));
1233			msg_freehdr(msghdr);
1234			wakeup(msqkptr);
1235			goto done2;
1236		}
1237		msgp = (char *)msgp + tlen;
1238		next = msgmaps[next].next;
1239	}
1240
1241	/*
1242	 * Done, return the actual number of bytes copied out.
1243	 */
1244
1245	msg_freehdr(msghdr);
1246	wakeup(msqkptr);
1247	td->td_retval[0] = msgsz;
1248done2:
1249	mtx_unlock(&msq_mtx);
1250	return (error);
1251}
1252
1253int
1254msgrcv(td, uap)
1255	struct thread *td;
1256	register struct msgrcv_args *uap;
1257{
1258	int error;
1259	long mtype;
1260
1261	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1262	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1263
1264	if ((error = kern_msgrcv(td, uap->msqid,
1265	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1266	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1267		return (error);
1268	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1269		DPRINTF(("error %d copying the message type\n", error));
1270	return (error);
1271}
1272
1273static int
1274sysctl_msqids(SYSCTL_HANDLER_ARGS)
1275{
1276
1277	return (SYSCTL_OUT(req, msqids,
1278	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1279}
1280
1281SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1282    "Maximum message size");
1283SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1284    "Number of message queue identifiers");
1285SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1286    "Maximum number of bytes in a queue");
1287SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1288    "Maximum number of messages in the system");
1289SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1290    "Size of a message segment");
1291SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1292    "Number of message segments");
1293SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1294    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1295