sysv_msg.c revision 100523
1/* $FreeBSD: head/sys/kern/sysv_msg.c 100523 2002-07-22 18:27:54Z alfred $ */
2
3/*
4 * Implementation of SVID messages
5 *
6 * Author:  Daniel Boulet
7 *
8 * Copyright 1993 Daniel Boulet and RTMX Inc.
9 *
10 * This system call was implemented by Daniel Boulet under contract from RTMX.
11 *
12 * Redistribution and use in source forms, with and without modification,
13 * are permitted provided that this entire comment appears intact.
14 *
15 * Redistribution in binary form may occur without any restrictions.
16 * Obviously, it would be nice if you gave credit where credit is due
17 * but requiring it would be too onerous.
18 *
19 * This software is provided ``AS IS'' without any warranties of any kind.
20 */
21
22#include "opt_sysvipc.h"
23
24#include <sys/param.h>
25#include <sys/systm.h>
26#include <sys/sysproto.h>
27#include <sys/kernel.h>
28#include <sys/proc.h>
29#include <sys/lock.h>
30#include <sys/mutex.h>
31#include <sys/msg.h>
32#include <sys/syscall.h>
33#include <sys/sysent.h>
34#include <sys/sysctl.h>
35#include <sys/malloc.h>
36#include <sys/jail.h>
37
38static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39
40static void msginit(void);
41static int msgunload(void);
42static int sysvmsg_modload(struct module *, int, void *);
43
44#ifdef MSG_DEBUG
45#define DPRINTF(a)	printf a
46#else
47#define DPRINTF(a)
48#endif
49
50static void msg_freehdr(struct msg *msghdr);
51
52/* XXX casting to (sy_call_t *) is bogus, as usual. */
53static sy_call_t *msgcalls[] = {
54	(sy_call_t *)msgctl, (sy_call_t *)msgget,
55	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
56};
57
58struct msg {
59	struct	msg *msg_next;	/* next msg in the chain */
60	long	msg_type;	/* type of this message */
61    				/* >0 -> type of this message */
62    				/* 0 -> free header */
63	u_short	msg_ts;		/* size of this message */
64	short	msg_spot;	/* location of start of msg in buffer */
65};
66
67
68#ifndef MSGSSZ
69#define MSGSSZ	8		/* Each segment must be 2^N long */
70#endif
71#ifndef MSGSEG
72#define MSGSEG	2048		/* must be less than 32767 */
73#endif
74#define MSGMAX	(MSGSSZ*MSGSEG)
75#ifndef MSGMNB
76#define MSGMNB	2048		/* max # of bytes in a queue */
77#endif
78#ifndef MSGMNI
79#define MSGMNI	40
80#endif
81#ifndef MSGTQL
82#define MSGTQL	40
83#endif
84
85/*
86 * Based on the configuration parameters described in an SVR2 (yes, two)
87 * config(1m) man page.
88 *
89 * Each message is broken up and stored in segments that are msgssz bytes
90 * long.  For efficiency reasons, this should be a power of two.  Also,
91 * it doesn't make sense if it is less than 8 or greater than about 256.
92 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
93 * two between 8 and 1024 inclusive (and panic's if it isn't).
94 */
95struct msginfo msginfo = {
96                MSGMAX,         /* max chars in a message */
97                MSGMNI,         /* # of message queue identifiers */
98                MSGMNB,         /* max chars in a queue */
99                MSGTQL,         /* max messages in system */
100                MSGSSZ,         /* size of a message segment */
101                		/* (must be small power of 2 greater than 4) */
102                MSGSEG          /* number of message segments */
103};
104
105/*
106 * macros to convert between msqid_ds's and msqid's.
107 * (specific to this implementation)
108 */
109#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
110#define MSQID_IX(id)	((id) & 0xffff)
111#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
112
113/*
114 * The rest of this file is specific to this particular implementation.
115 */
116
117struct msgmap {
118	short	next;		/* next segment in buffer */
119    				/* -1 -> available */
120    				/* 0..(MSGSEG-1) -> index of next segment */
121};
122
123#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
124
125static int nfree_msgmaps;	/* # of free map entries */
126static short free_msgmaps;	/* head of linked list of free map entries */
127static struct msg *free_msghdrs;/* list of free msg headers */
128static char *msgpool;		/* MSGMAX byte long msg buffer pool */
129static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
130static struct msg *msghdrs;	/* MSGTQL msg headers */
131static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
132
133static void
134msginit()
135{
136	register int i;
137
138	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
139	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
140	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
141	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
142
143	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
144	if (msgpool == NULL)
145		panic("msgpool is NULL");
146	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
147	if (msgmaps == NULL)
148		panic("msgmaps is NULL");
149	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
150	if (msghdrs == NULL)
151		panic("msghdrs is NULL");
152	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
153	if (msqids == NULL)
154		panic("msqids is NULL");
155
156	/*
157	 * msginfo.msgssz should be a power of two for efficiency reasons.
158	 * It is also pretty silly if msginfo.msgssz is less than 8
159	 * or greater than about 256 so ...
160	 */
161
162	i = 8;
163	while (i < 1024 && i != msginfo.msgssz)
164		i <<= 1;
165    	if (i != msginfo.msgssz) {
166		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
167		    msginfo.msgssz));
168		panic("msginfo.msgssz not a small power of 2");
169	}
170
171	if (msginfo.msgseg > 32767) {
172		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
173		panic("msginfo.msgseg > 32767");
174	}
175
176	if (msgmaps == NULL)
177		panic("msgmaps is NULL");
178
179	for (i = 0; i < msginfo.msgseg; i++) {
180		if (i > 0)
181			msgmaps[i-1].next = i;
182		msgmaps[i].next = -1;	/* implies entry is available */
183	}
184	free_msgmaps = 0;
185	nfree_msgmaps = msginfo.msgseg;
186
187	if (msghdrs == NULL)
188		panic("msghdrs is NULL");
189
190	for (i = 0; i < msginfo.msgtql; i++) {
191		msghdrs[i].msg_type = 0;
192		if (i > 0)
193			msghdrs[i-1].msg_next = &msghdrs[i];
194		msghdrs[i].msg_next = NULL;
195    	}
196	free_msghdrs = &msghdrs[0];
197
198	if (msqids == NULL)
199		panic("msqids is NULL");
200
201	for (i = 0; i < msginfo.msgmni; i++) {
202		msqids[i].msg_qbytes = 0;	/* implies entry is available */
203		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
204		msqids[i].msg_perm.mode = 0;
205	}
206}
207
208static int
209msgunload()
210{
211	struct msqid_ds *msqptr;
212	int msqid;
213
214	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
215		/*
216		 * Look for an unallocated and unlocked msqid_ds.
217		 * msqid_ds's can be locked by msgsnd or msgrcv while
218		 * they are copying the message in/out.  We can't
219		 * re-use the entry until they release it.
220		 */
221		msqptr = &msqids[msqid];
222		if (msqptr->msg_qbytes != 0 ||
223		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
224			break;
225	}
226	if (msqid != msginfo.msgmni)
227		return (EBUSY);
228
229	free(msgpool, M_MSG);
230	free(msgmaps, M_MSG);
231	free(msghdrs, M_MSG);
232	free(msqids, M_MSG);
233	return (0);
234}
235
236
237static int
238sysvmsg_modload(struct module *module, int cmd, void *arg)
239{
240	int error = 0;
241
242	switch (cmd) {
243	case MOD_LOAD:
244		msginit();
245		break;
246	case MOD_UNLOAD:
247		error = msgunload();
248		break;
249	case MOD_SHUTDOWN:
250		break;
251	default:
252		error = EINVAL;
253		break;
254	}
255	return (error);
256}
257
258static moduledata_t sysvmsg_mod = {
259	"sysvmsg",
260	&sysvmsg_modload,
261	NULL
262};
263
264SYSCALL_MODULE_HELPER(msgsys);
265SYSCALL_MODULE_HELPER(msgctl);
266SYSCALL_MODULE_HELPER(msgget);
267SYSCALL_MODULE_HELPER(msgsnd);
268SYSCALL_MODULE_HELPER(msgrcv);
269
270DECLARE_MODULE(sysvmsg, sysvmsg_mod,
271	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
272MODULE_VERSION(sysvmsg, 1);
273
274/*
275 * Entry point for all MSG calls
276 *
277 * MPSAFE
278 */
279int
280msgsys(td, uap)
281	struct thread *td;
282	/* XXX actually varargs. */
283	struct msgsys_args /* {
284		u_int	which;
285		int	a2;
286		int	a3;
287		int	a4;
288		int	a5;
289		int	a6;
290	} */ *uap;
291{
292	int error;
293
294	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
295		return (ENOSYS);
296	if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
297		return (EINVAL);
298	mtx_lock(&Giant);
299	error = (*msgcalls[uap->which])(td, &uap->a2);
300	mtx_unlock(&Giant);
301	return (error);
302}
303
304static void
305msg_freehdr(msghdr)
306	struct msg *msghdr;
307{
308	while (msghdr->msg_ts > 0) {
309		short next;
310		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
311			panic("msghdr->msg_spot out of range");
312		next = msgmaps[msghdr->msg_spot].next;
313		msgmaps[msghdr->msg_spot].next = free_msgmaps;
314		free_msgmaps = msghdr->msg_spot;
315		nfree_msgmaps++;
316		msghdr->msg_spot = next;
317		if (msghdr->msg_ts >= msginfo.msgssz)
318			msghdr->msg_ts -= msginfo.msgssz;
319		else
320			msghdr->msg_ts = 0;
321	}
322	if (msghdr->msg_spot != -1)
323		panic("msghdr->msg_spot != -1");
324	msghdr->msg_next = free_msghdrs;
325	free_msghdrs = msghdr;
326}
327
328#ifndef _SYS_SYSPROTO_H_
329struct msgctl_args {
330	int	msqid;
331	int	cmd;
332	struct	msqid_ds *buf;
333};
334#endif
335
336/*
337 * MPSAFE
338 */
339int
340msgctl(td, uap)
341	struct thread *td;
342	register struct msgctl_args *uap;
343{
344	int msqid = uap->msqid;
345	int cmd = uap->cmd;
346	struct msqid_ds *user_msqptr = uap->buf;
347	int rval, error;
348	struct msqid_ds msqbuf;
349	register struct msqid_ds *msqptr;
350
351	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
352	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
353		return (ENOSYS);
354
355	mtx_lock(&Giant);
356	msqid = IPCID_TO_IX(msqid);
357
358	if (msqid < 0 || msqid >= msginfo.msgmni) {
359		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
360		    msginfo.msgmni));
361		error = EINVAL;
362		goto done2;
363	}
364
365	msqptr = &msqids[msqid];
366
367	if (msqptr->msg_qbytes == 0) {
368		DPRINTF(("no such msqid\n"));
369		error = EINVAL;
370		goto done2;
371	}
372	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
373		DPRINTF(("wrong sequence number\n"));
374		error = EINVAL;
375		goto done2;
376	}
377
378	error = 0;
379	rval = 0;
380
381	switch (cmd) {
382
383	case IPC_RMID:
384	{
385		struct msg *msghdr;
386		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
387			goto done2;
388		/* Free the message headers */
389		msghdr = msqptr->msg_first;
390		while (msghdr != NULL) {
391			struct msg *msghdr_tmp;
392
393			/* Free the segments of each message */
394			msqptr->msg_cbytes -= msghdr->msg_ts;
395			msqptr->msg_qnum--;
396			msghdr_tmp = msghdr;
397			msghdr = msghdr->msg_next;
398			msg_freehdr(msghdr_tmp);
399		}
400
401		if (msqptr->msg_cbytes != 0)
402			panic("msg_cbytes is screwed up");
403		if (msqptr->msg_qnum != 0)
404			panic("msg_qnum is screwed up");
405
406		msqptr->msg_qbytes = 0;	/* Mark it as free */
407
408		wakeup(msqptr);
409	}
410
411		break;
412
413	case IPC_SET:
414		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
415			goto done2;
416		if ((error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
417			goto done2;
418		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
419			error = suser(td);
420			if (error)
421				goto done2;
422		}
423		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
424			DPRINTF(("can't increase msg_qbytes beyond %d"
425			    "(truncating)\n", msginfo.msgmnb));
426			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
427		}
428		if (msqbuf.msg_qbytes == 0) {
429			DPRINTF(("can't reduce msg_qbytes to 0\n"));
430			error = EINVAL;		/* non-standard errno! */
431			goto done2;
432		}
433		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
434		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
435		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
436		    (msqbuf.msg_perm.mode & 0777);
437		msqptr->msg_qbytes = msqbuf.msg_qbytes;
438		msqptr->msg_ctime = time_second;
439		break;
440
441	case IPC_STAT:
442		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
443			DPRINTF(("requester doesn't have read access\n"));
444			goto done2;
445		}
446		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
447		break;
448
449	default:
450		DPRINTF(("invalid command %d\n", cmd));
451		error = EINVAL;
452		goto done2;
453	}
454
455	if (error == 0)
456		td->td_retval[0] = rval;
457done2:
458	mtx_unlock(&Giant);
459	return(error);
460}
461
462#ifndef _SYS_SYSPROTO_H_
463struct msgget_args {
464	key_t	key;
465	int	msgflg;
466};
467#endif
468
469/*
470 * MPSAFE
471 */
472int
473msgget(td, uap)
474	struct thread *td;
475	register struct msgget_args *uap;
476{
477	int msqid, error = 0;
478	int key = uap->key;
479	int msgflg = uap->msgflg;
480	struct ucred *cred = td->td_ucred;
481	register struct msqid_ds *msqptr = NULL;
482
483	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
484
485	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
486		return (ENOSYS);
487
488	mtx_lock(&Giant);
489	if (key != IPC_PRIVATE) {
490		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
491			msqptr = &msqids[msqid];
492			if (msqptr->msg_qbytes != 0 &&
493			    msqptr->msg_perm.key == key)
494				break;
495		}
496		if (msqid < msginfo.msgmni) {
497			DPRINTF(("found public key\n"));
498			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
499				DPRINTF(("not exclusive\n"));
500				error = EEXIST;
501				goto done2;
502			}
503			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
504				DPRINTF(("requester doesn't have 0%o access\n",
505				    msgflg & 0700));
506				goto done2;
507			}
508			goto found;
509		}
510	}
511
512	DPRINTF(("need to allocate the msqid_ds\n"));
513	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
514		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
515			/*
516			 * Look for an unallocated and unlocked msqid_ds.
517			 * msqid_ds's can be locked by msgsnd or msgrcv while
518			 * they are copying the message in/out.  We can't
519			 * re-use the entry until they release it.
520			 */
521			msqptr = &msqids[msqid];
522			if (msqptr->msg_qbytes == 0 &&
523			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
524				break;
525		}
526		if (msqid == msginfo.msgmni) {
527			DPRINTF(("no more msqid_ds's available\n"));
528			error = ENOSPC;
529			goto done2;
530		}
531		DPRINTF(("msqid %d is available\n", msqid));
532		msqptr->msg_perm.key = key;
533		msqptr->msg_perm.cuid = cred->cr_uid;
534		msqptr->msg_perm.uid = cred->cr_uid;
535		msqptr->msg_perm.cgid = cred->cr_gid;
536		msqptr->msg_perm.gid = cred->cr_gid;
537		msqptr->msg_perm.mode = (msgflg & 0777);
538		/* Make sure that the returned msqid is unique */
539		msqptr->msg_perm.seq++;
540		msqptr->msg_first = NULL;
541		msqptr->msg_last = NULL;
542		msqptr->msg_cbytes = 0;
543		msqptr->msg_qnum = 0;
544		msqptr->msg_qbytes = msginfo.msgmnb;
545		msqptr->msg_lspid = 0;
546		msqptr->msg_lrpid = 0;
547		msqptr->msg_stime = 0;
548		msqptr->msg_rtime = 0;
549		msqptr->msg_ctime = time_second;
550	} else {
551		DPRINTF(("didn't find it and wasn't asked to create it\n"));
552		error = ENOENT;
553		goto done2;
554	}
555
556found:
557	/* Construct the unique msqid */
558	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
559done2:
560	mtx_unlock(&Giant);
561	return (error);
562}
563
564#ifndef _SYS_SYSPROTO_H_
565struct msgsnd_args {
566	int	msqid;
567	void	*msgp;
568	size_t	msgsz;
569	int	msgflg;
570};
571#endif
572
573/*
574 * MPSAFE
575 */
576int
577msgsnd(td, uap)
578	struct thread *td;
579	register struct msgsnd_args *uap;
580{
581	int msqid = uap->msqid;
582	void *user_msgp = uap->msgp;
583	size_t msgsz = uap->msgsz;
584	int msgflg = uap->msgflg;
585	int segs_needed, error = 0;
586	register struct msqid_ds *msqptr;
587	register struct msg *msghdr;
588	short next;
589
590	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
591	    msgflg));
592	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
593		return (ENOSYS);
594
595	mtx_lock(&Giant);
596	msqid = IPCID_TO_IX(msqid);
597
598	if (msqid < 0 || msqid >= msginfo.msgmni) {
599		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
600		    msginfo.msgmni));
601		error = EINVAL;
602		goto done2;
603	}
604
605	msqptr = &msqids[msqid];
606	if (msqptr->msg_qbytes == 0) {
607		DPRINTF(("no such message queue id\n"));
608		error = EINVAL;
609		goto done2;
610	}
611	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
612		DPRINTF(("wrong sequence number\n"));
613		error = EINVAL;
614		goto done2;
615	}
616
617	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
618		DPRINTF(("requester doesn't have write access\n"));
619		goto done2;
620	}
621
622	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
623	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
624	    segs_needed));
625	for (;;) {
626		int need_more_resources = 0;
627
628		/*
629		 * check msgsz
630		 * (inside this loop in case msg_qbytes changes while we sleep)
631		 */
632
633		if (msgsz > msqptr->msg_qbytes) {
634			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
635			error = EINVAL;
636			goto done2;
637		}
638
639		if (msqptr->msg_perm.mode & MSG_LOCKED) {
640			DPRINTF(("msqid is locked\n"));
641			need_more_resources = 1;
642		}
643		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
644			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
645			need_more_resources = 1;
646		}
647		if (segs_needed > nfree_msgmaps) {
648			DPRINTF(("segs_needed > nfree_msgmaps\n"));
649			need_more_resources = 1;
650		}
651		if (free_msghdrs == NULL) {
652			DPRINTF(("no more msghdrs\n"));
653			need_more_resources = 1;
654		}
655
656		if (need_more_resources) {
657			int we_own_it;
658
659			if ((msgflg & IPC_NOWAIT) != 0) {
660				DPRINTF(("need more resources but caller "
661				    "doesn't want to wait\n"));
662				error = EAGAIN;
663				goto done2;
664			}
665
666			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
667				DPRINTF(("we don't own the msqid_ds\n"));
668				we_own_it = 0;
669			} else {
670				/* Force later arrivals to wait for our
671				   request */
672				DPRINTF(("we own the msqid_ds\n"));
673				msqptr->msg_perm.mode |= MSG_LOCKED;
674				we_own_it = 1;
675			}
676			DPRINTF(("goodnight\n"));
677			error = tsleep(msqptr, (PZERO - 4) | PCATCH,
678			    "msgwait", 0);
679			DPRINTF(("good morning, error=%d\n", error));
680			if (we_own_it)
681				msqptr->msg_perm.mode &= ~MSG_LOCKED;
682			if (error != 0) {
683				DPRINTF(("msgsnd:  interrupted system call\n"));
684				error = EINTR;
685				goto done2;
686			}
687
688			/*
689			 * Make sure that the msq queue still exists
690			 */
691
692			if (msqptr->msg_qbytes == 0) {
693				DPRINTF(("msqid deleted\n"));
694				error = EIDRM;
695				goto done2;
696			}
697
698		} else {
699			DPRINTF(("got all the resources that we need\n"));
700			break;
701		}
702	}
703
704	/*
705	 * We have the resources that we need.
706	 * Make sure!
707	 */
708
709	if (msqptr->msg_perm.mode & MSG_LOCKED)
710		panic("msg_perm.mode & MSG_LOCKED");
711	if (segs_needed > nfree_msgmaps)
712		panic("segs_needed > nfree_msgmaps");
713	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
714		panic("msgsz + msg_cbytes > msg_qbytes");
715	if (free_msghdrs == NULL)
716		panic("no more msghdrs");
717
718	/*
719	 * Re-lock the msqid_ds in case we page-fault when copying in the
720	 * message
721	 */
722
723	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
724		panic("msqid_ds is already locked");
725	msqptr->msg_perm.mode |= MSG_LOCKED;
726
727	/*
728	 * Allocate a message header
729	 */
730
731	msghdr = free_msghdrs;
732	free_msghdrs = msghdr->msg_next;
733	msghdr->msg_spot = -1;
734	msghdr->msg_ts = msgsz;
735
736	/*
737	 * Allocate space for the message
738	 */
739
740	while (segs_needed > 0) {
741		if (nfree_msgmaps <= 0)
742			panic("not enough msgmaps");
743		if (free_msgmaps == -1)
744			panic("nil free_msgmaps");
745		next = free_msgmaps;
746		if (next <= -1)
747			panic("next too low #1");
748		if (next >= msginfo.msgseg)
749			panic("next out of range #1");
750		DPRINTF(("allocating segment %d to message\n", next));
751		free_msgmaps = msgmaps[next].next;
752		nfree_msgmaps--;
753		msgmaps[next].next = msghdr->msg_spot;
754		msghdr->msg_spot = next;
755		segs_needed--;
756	}
757
758	/*
759	 * Copy in the message type
760	 */
761
762	if ((error = copyin(user_msgp, &msghdr->msg_type,
763	    sizeof(msghdr->msg_type))) != 0) {
764		DPRINTF(("error %d copying the message type\n", error));
765		msg_freehdr(msghdr);
766		msqptr->msg_perm.mode &= ~MSG_LOCKED;
767		wakeup(msqptr);
768		goto done2;
769	}
770	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
771
772	/*
773	 * Validate the message type
774	 */
775
776	if (msghdr->msg_type < 1) {
777		msg_freehdr(msghdr);
778		msqptr->msg_perm.mode &= ~MSG_LOCKED;
779		wakeup(msqptr);
780		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
781		error = EINVAL;
782		goto done2;
783	}
784
785	/*
786	 * Copy in the message body
787	 */
788
789	next = msghdr->msg_spot;
790	while (msgsz > 0) {
791		size_t tlen;
792		if (msgsz > msginfo.msgssz)
793			tlen = msginfo.msgssz;
794		else
795			tlen = msgsz;
796		if (next <= -1)
797			panic("next too low #2");
798		if (next >= msginfo.msgseg)
799			panic("next out of range #2");
800		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
801		    tlen)) != 0) {
802			DPRINTF(("error %d copying in message segment\n",
803			    error));
804			msg_freehdr(msghdr);
805			msqptr->msg_perm.mode &= ~MSG_LOCKED;
806			wakeup(msqptr);
807			goto done2;
808		}
809		msgsz -= tlen;
810		user_msgp = (char *)user_msgp + tlen;
811		next = msgmaps[next].next;
812	}
813	if (next != -1)
814		panic("didn't use all the msg segments");
815
816	/*
817	 * We've got the message.  Unlock the msqid_ds.
818	 */
819
820	msqptr->msg_perm.mode &= ~MSG_LOCKED;
821
822	/*
823	 * Make sure that the msqid_ds is still allocated.
824	 */
825
826	if (msqptr->msg_qbytes == 0) {
827		msg_freehdr(msghdr);
828		wakeup(msqptr);
829		error = EIDRM;
830		goto done2;
831	}
832
833	/*
834	 * Put the message into the queue
835	 */
836
837	if (msqptr->msg_first == NULL) {
838		msqptr->msg_first = msghdr;
839		msqptr->msg_last = msghdr;
840	} else {
841		msqptr->msg_last->msg_next = msghdr;
842		msqptr->msg_last = msghdr;
843	}
844	msqptr->msg_last->msg_next = NULL;
845
846	msqptr->msg_cbytes += msghdr->msg_ts;
847	msqptr->msg_qnum++;
848	msqptr->msg_lspid = td->td_proc->p_pid;
849	msqptr->msg_stime = time_second;
850
851	wakeup(msqptr);
852	td->td_retval[0] = 0;
853done2:
854	mtx_unlock(&Giant);
855	return (error);
856}
857
858#ifndef _SYS_SYSPROTO_H_
859struct msgrcv_args {
860	int	msqid;
861	void	*msgp;
862	size_t	msgsz;
863	long	msgtyp;
864	int	msgflg;
865};
866#endif
867
868/*
869 * MPSAFE
870 */
871int
872msgrcv(td, uap)
873	struct thread *td;
874	register struct msgrcv_args *uap;
875{
876	int msqid = uap->msqid;
877	void *user_msgp = uap->msgp;
878	size_t msgsz = uap->msgsz;
879	long msgtyp = uap->msgtyp;
880	int msgflg = uap->msgflg;
881	size_t len;
882	register struct msqid_ds *msqptr;
883	register struct msg *msghdr;
884	int error = 0;
885	short next;
886
887	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
888	    msgsz, msgtyp, msgflg));
889
890	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
891		return (ENOSYS);
892
893	mtx_lock(&Giant);
894	msqid = IPCID_TO_IX(msqid);
895
896	if (msqid < 0 || msqid >= msginfo.msgmni) {
897		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
898		    msginfo.msgmni));
899		error = EINVAL;
900		goto done2;
901	}
902
903	msqptr = &msqids[msqid];
904	if (msqptr->msg_qbytes == 0) {
905		DPRINTF(("no such message queue id\n"));
906		error = EINVAL;
907		goto done2;
908	}
909	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
910		DPRINTF(("wrong sequence number\n"));
911		error = EINVAL;
912		goto done2;
913	}
914
915	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
916		DPRINTF(("requester doesn't have read access\n"));
917		goto done2;
918	}
919
920	msghdr = NULL;
921	while (msghdr == NULL) {
922		if (msgtyp == 0) {
923			msghdr = msqptr->msg_first;
924			if (msghdr != NULL) {
925				if (msgsz < msghdr->msg_ts &&
926				    (msgflg & MSG_NOERROR) == 0) {
927					DPRINTF(("first message on the queue "
928					    "is too big (want %d, got %d)\n",
929					    msgsz, msghdr->msg_ts));
930					error = E2BIG;
931					goto done2;
932				}
933				if (msqptr->msg_first == msqptr->msg_last) {
934					msqptr->msg_first = NULL;
935					msqptr->msg_last = NULL;
936				} else {
937					msqptr->msg_first = msghdr->msg_next;
938					if (msqptr->msg_first == NULL)
939						panic("msg_first/last screwed up #1");
940				}
941			}
942		} else {
943			struct msg *previous;
944			struct msg **prev;
945
946			previous = NULL;
947			prev = &(msqptr->msg_first);
948			while ((msghdr = *prev) != NULL) {
949				/*
950				 * Is this message's type an exact match or is
951				 * this message's type less than or equal to
952				 * the absolute value of a negative msgtyp?
953				 * Note that the second half of this test can
954				 * NEVER be true if msgtyp is positive since
955				 * msg_type is always positive!
956				 */
957
958				if (msgtyp == msghdr->msg_type ||
959				    msghdr->msg_type <= -msgtyp) {
960					DPRINTF(("found message type %d, "
961					    "requested %d\n",
962					    msghdr->msg_type, msgtyp));
963					if (msgsz < msghdr->msg_ts &&
964					    (msgflg & MSG_NOERROR) == 0) {
965						DPRINTF(("requested message "
966						    "on the queue is too big "
967						    "(want %d, got %d)\n",
968						    msgsz, msghdr->msg_ts));
969						error = E2BIG;
970						goto done2;
971					}
972					*prev = msghdr->msg_next;
973					if (msghdr == msqptr->msg_last) {
974						if (previous == NULL) {
975							if (prev !=
976							    &msqptr->msg_first)
977								panic("msg_first/last screwed up #2");
978							msqptr->msg_first =
979							    NULL;
980							msqptr->msg_last =
981							    NULL;
982						} else {
983							if (prev ==
984							    &msqptr->msg_first)
985								panic("msg_first/last screwed up #3");
986							msqptr->msg_last =
987							    previous;
988						}
989					}
990					break;
991				}
992				previous = msghdr;
993				prev = &(msghdr->msg_next);
994			}
995		}
996
997		/*
998		 * We've either extracted the msghdr for the appropriate
999		 * message or there isn't one.
1000		 * If there is one then bail out of this loop.
1001		 */
1002
1003		if (msghdr != NULL)
1004			break;
1005
1006		/*
1007		 * Hmph!  No message found.  Does the user want to wait?
1008		 */
1009
1010		if ((msgflg & IPC_NOWAIT) != 0) {
1011			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1012			    msgtyp));
1013			/* The SVID says to return ENOMSG. */
1014			error = ENOMSG;
1015			goto done2;
1016		}
1017
1018		/*
1019		 * Wait for something to happen
1020		 */
1021
1022		DPRINTF(("msgrcv:  goodnight\n"));
1023		error = tsleep(msqptr, (PZERO - 4) | PCATCH, "msgwait", 0);
1024		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1025
1026		if (error != 0) {
1027			DPRINTF(("msgsnd:  interrupted system call\n"));
1028			error = EINTR;
1029			goto done2;
1030		}
1031
1032		/*
1033		 * Make sure that the msq queue still exists
1034		 */
1035
1036		if (msqptr->msg_qbytes == 0 ||
1037		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1038			DPRINTF(("msqid deleted\n"));
1039			error = EIDRM;
1040			goto done2;
1041		}
1042	}
1043
1044	/*
1045	 * Return the message to the user.
1046	 *
1047	 * First, do the bookkeeping (before we risk being interrupted).
1048	 */
1049
1050	msqptr->msg_cbytes -= msghdr->msg_ts;
1051	msqptr->msg_qnum--;
1052	msqptr->msg_lrpid = td->td_proc->p_pid;
1053	msqptr->msg_rtime = time_second;
1054
1055	/*
1056	 * Make msgsz the actual amount that we'll be returning.
1057	 * Note that this effectively truncates the message if it is too long
1058	 * (since msgsz is never increased).
1059	 */
1060
1061	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1062	    msghdr->msg_ts));
1063	if (msgsz > msghdr->msg_ts)
1064		msgsz = msghdr->msg_ts;
1065
1066	/*
1067	 * Return the type to the user.
1068	 */
1069
1070	error = copyout(&(msghdr->msg_type), user_msgp,
1071	    sizeof(msghdr->msg_type));
1072	if (error != 0) {
1073		DPRINTF(("error (%d) copying out message type\n", error));
1074		msg_freehdr(msghdr);
1075		wakeup(msqptr);
1076		goto done2;
1077	}
1078	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1079
1080	/*
1081	 * Return the segments to the user
1082	 */
1083
1084	next = msghdr->msg_spot;
1085	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1086		size_t tlen;
1087
1088		if (msgsz - len > msginfo.msgssz)
1089			tlen = msginfo.msgssz;
1090		else
1091			tlen = msgsz - len;
1092		if (next <= -1)
1093			panic("next too low #3");
1094		if (next >= msginfo.msgseg)
1095			panic("next out of range #3");
1096		error = copyout(&msgpool[next * msginfo.msgssz],
1097		    user_msgp, tlen);
1098		if (error != 0) {
1099			DPRINTF(("error (%d) copying out message segment\n",
1100			    error));
1101			msg_freehdr(msghdr);
1102			wakeup(msqptr);
1103			goto done2;
1104		}
1105		user_msgp = (char *)user_msgp + tlen;
1106		next = msgmaps[next].next;
1107	}
1108
1109	/*
1110	 * Done, return the actual number of bytes copied out.
1111	 */
1112
1113	msg_freehdr(msghdr);
1114	wakeup(msqptr);
1115	td->td_retval[0] = msgsz;
1116done2:
1117	mtx_unlock(&Giant);
1118	return (error);
1119}
1120
1121static int
1122sysctl_msqids(SYSCTL_HANDLER_ARGS)
1123{
1124
1125	return (SYSCTL_OUT(req, msqids,
1126	    sizeof(struct msqid_ds) * msginfo.msgmni));
1127}
1128
1129SYSCTL_DECL(_kern_ipc);
1130SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1131SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1132SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1133SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1134SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1135SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1136SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1137    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1138