sysv_shm.c revision 140617
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 140617 2005-01-22 19:10:25Z rwatson $");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67#include "opt_mac.h"
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/resourcevar.h>
81#include <sys/stat.h>
82#include <sys/syscall.h>
83#include <sys/syscallsubr.h>
84#include <sys/sysent.h>
85#include <sys/sysproto.h>
86#include <sys/jail.h>
87#include <sys/mac.h>
88
89#include <vm/vm.h>
90#include <vm/vm_param.h>
91#include <vm/pmap.h>
92#include <vm/vm_object.h>
93#include <vm/vm_map.h>
94#include <vm/vm_page.h>
95#include <vm/vm_pager.h>
96
97#ifdef MAC_DEBUG
98#define MPRINTF(a)      printf a
99#else
100#define MPRINTF(a)
101#endif
102
103static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
104
105struct oshmctl_args;
106static int oshmctl(struct thread *td, struct oshmctl_args *uap);
107
108static int shmget_allocate_segment(struct thread *td,
109    struct shmget_args *uap, int mode);
110static int shmget_existing(struct thread *td, struct shmget_args *uap,
111    int mode, int segnum);
112
113/* XXX casting to (sy_call_t *) is bogus, as usual. */
114static sy_call_t *shmcalls[] = {
115	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
116	(sy_call_t *)shmdt, (sy_call_t *)shmget,
117	(sy_call_t *)shmctl
118};
119
120#define	SHMSEG_FREE     	0x0200
121#define	SHMSEG_REMOVED  	0x0400
122#define	SHMSEG_ALLOCATED	0x0800
123#define	SHMSEG_WANTED		0x1000
124
125static int shm_last_free, shm_nused, shm_committed, shmalloced;
126static struct shmid_kernel	*shmsegs;
127
128struct shmmap_state {
129	vm_offset_t va;
130	int shmid;
131};
132
133static void shm_deallocate_segment(struct shmid_kernel *);
134static int shm_find_segment_by_key(key_t);
135static struct shmid_kernel *shm_find_segment_by_shmid(int);
136static struct shmid_kernel *shm_find_segment_by_shmidx(int);
137static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
138static void shmrealloc(void);
139static void shminit(void);
140static int sysvshm_modload(struct module *, int, void *);
141static int shmunload(void);
142static void shmexit_myhook(struct vmspace *vm);
143static void shmfork_myhook(struct proc *p1, struct proc *p2);
144static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
145
146/*
147 * Tuneable values.
148 */
149#ifndef SHMMAXPGS
150#define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
151#endif
152#ifndef SHMMAX
153#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
154#endif
155#ifndef SHMMIN
156#define	SHMMIN	1
157#endif
158#ifndef SHMMNI
159#define	SHMMNI	192
160#endif
161#ifndef SHMSEG
162#define	SHMSEG	128
163#endif
164#ifndef SHMALL
165#define	SHMALL	(SHMMAXPGS)
166#endif
167
168struct	shminfo shminfo = {
169	SHMMAX,
170	SHMMIN,
171	SHMMNI,
172	SHMSEG,
173	SHMALL
174};
175
176static int shm_use_phys;
177static int shm_allow_removed;
178
179SYSCTL_DECL(_kern_ipc);
180SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
181SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
182SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0, "");
183SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0, "");
184SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
185SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
186    &shm_use_phys, 0, "");
187SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
188    &shm_allow_removed, 0, "");
189SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
190    NULL, 0, sysctl_shmsegs, "", "");
191
192static int
193shm_find_segment_by_key(key)
194	key_t key;
195{
196	int i;
197
198	for (i = 0; i < shmalloced; i++)
199		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
200		    shmsegs[i].u.shm_perm.key == key)
201			return (i);
202	return (-1);
203}
204
205static struct shmid_kernel *
206shm_find_segment_by_shmid(int shmid)
207{
208	int segnum;
209	struct shmid_kernel *shmseg;
210
211	segnum = IPCID_TO_IX(shmid);
212	if (segnum < 0 || segnum >= shmalloced)
213		return (NULL);
214	shmseg = &shmsegs[segnum];
215	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
216	    (!shm_allow_removed &&
217	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
218	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
219		return (NULL);
220	return (shmseg);
221}
222
223static struct shmid_kernel *
224shm_find_segment_by_shmidx(int segnum)
225{
226	struct shmid_kernel *shmseg;
227
228	if (segnum < 0 || segnum >= shmalloced)
229		return (NULL);
230	shmseg = &shmsegs[segnum];
231	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
232	    (!shm_allow_removed &&
233	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
234		return (NULL);
235	return (shmseg);
236}
237
238static void
239shm_deallocate_segment(shmseg)
240	struct shmid_kernel *shmseg;
241{
242	size_t size;
243
244	GIANT_REQUIRED;
245
246	vm_object_deallocate(shmseg->u.shm_internal);
247	shmseg->u.shm_internal = NULL;
248	size = round_page(shmseg->u.shm_segsz);
249	shm_committed -= btoc(size);
250	shm_nused--;
251	shmseg->u.shm_perm.mode = SHMSEG_FREE;
252#ifdef MAC
253	mac_cleanup_sysv_shm(shmseg);
254#endif
255}
256
257static int
258shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
259{
260	struct shmid_kernel *shmseg;
261	int segnum, result;
262	size_t size;
263
264	GIANT_REQUIRED;
265
266	segnum = IPCID_TO_IX(shmmap_s->shmid);
267	shmseg = &shmsegs[segnum];
268	size = round_page(shmseg->u.shm_segsz);
269	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
270	if (result != KERN_SUCCESS)
271		return (EINVAL);
272	shmmap_s->shmid = -1;
273	shmseg->u.shm_dtime = time_second;
274	if ((--shmseg->u.shm_nattch <= 0) &&
275	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
276		shm_deallocate_segment(shmseg);
277		shm_last_free = segnum;
278	}
279	return (0);
280}
281
282#ifndef _SYS_SYSPROTO_H_
283struct shmdt_args {
284	const void *shmaddr;
285};
286#endif
287
288/*
289 * MPSAFE
290 */
291int
292shmdt(td, uap)
293	struct thread *td;
294	struct shmdt_args *uap;
295{
296	struct proc *p = td->td_proc;
297	struct shmmap_state *shmmap_s;
298#ifdef MAC
299	struct shmid_kernel *shmsegptr;
300#endif
301	int i;
302	int error = 0;
303
304	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
305		return (ENOSYS);
306	mtx_lock(&Giant);
307	shmmap_s = p->p_vmspace->vm_shm;
308 	if (shmmap_s == NULL) {
309		error = EINVAL;
310		goto done2;
311	}
312	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
313		if (shmmap_s->shmid != -1 &&
314		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
315			break;
316		}
317	}
318	if (i == shminfo.shmseg) {
319		error = EINVAL;
320		goto done2;
321	}
322#ifdef MAC
323	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
324	error = mac_check_sysv_shmdt(td->td_ucred, shmsegptr);
325	if (error != 0) {
326		MPRINTF(("mac_check_sysv_shmdt returned %d\n", error));
327		goto done2;
328	}
329#endif
330	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
331done2:
332	mtx_unlock(&Giant);
333	return (error);
334}
335
336#ifndef _SYS_SYSPROTO_H_
337struct shmat_args {
338	int shmid;
339	const void *shmaddr;
340	int shmflg;
341};
342#endif
343
344/*
345 * MPSAFE
346 */
347int
348kern_shmat(td, shmid, shmaddr, shmflg)
349	struct thread *td;
350	int shmid;
351	const void *shmaddr;
352	int shmflg;
353{
354	struct proc *p = td->td_proc;
355	int i, flags;
356	struct shmid_kernel *shmseg;
357	struct shmmap_state *shmmap_s = NULL;
358	vm_offset_t attach_va;
359	vm_prot_t prot;
360	vm_size_t size;
361	int rv;
362	int error = 0;
363
364	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
365		return (ENOSYS);
366	mtx_lock(&Giant);
367	shmmap_s = p->p_vmspace->vm_shm;
368	if (shmmap_s == NULL) {
369		size = shminfo.shmseg * sizeof(struct shmmap_state);
370		shmmap_s = malloc(size, M_SHM, M_WAITOK);
371		for (i = 0; i < shminfo.shmseg; i++)
372			shmmap_s[i].shmid = -1;
373		p->p_vmspace->vm_shm = shmmap_s;
374	}
375	shmseg = shm_find_segment_by_shmid(shmid);
376	if (shmseg == NULL) {
377		error = EINVAL;
378		goto done2;
379	}
380	error = ipcperm(td, &shmseg->u.shm_perm,
381	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
382	if (error)
383		goto done2;
384#ifdef MAC
385	error = mac_check_sysv_shmat(td->td_ucred, shmseg, shmflg);
386	if (error != 0) {
387	 	MPRINTF(("mac_check_sysv_shmat returned %d\n", error));
388		goto done2;
389	}
390#endif
391	for (i = 0; i < shminfo.shmseg; i++) {
392		if (shmmap_s->shmid == -1)
393			break;
394		shmmap_s++;
395	}
396	if (i >= shminfo.shmseg) {
397		error = EMFILE;
398		goto done2;
399	}
400	size = round_page(shmseg->u.shm_segsz);
401#ifdef VM_PROT_READ_IS_EXEC
402	prot = VM_PROT_READ | VM_PROT_EXECUTE;
403#else
404	prot = VM_PROT_READ;
405#endif
406	if ((shmflg & SHM_RDONLY) == 0)
407		prot |= VM_PROT_WRITE;
408	flags = MAP_ANON | MAP_SHARED;
409	if (shmaddr) {
410		flags |= MAP_FIXED;
411		if (shmflg & SHM_RND) {
412			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
413		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
414			attach_va = (vm_offset_t)shmaddr;
415		} else {
416			error = EINVAL;
417			goto done2;
418		}
419	} else {
420		/*
421		 * This is just a hint to vm_map_find() about where to
422		 * put it.
423		 */
424		PROC_LOCK(p);
425		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
426		    lim_max(p, RLIMIT_DATA));
427		PROC_UNLOCK(p);
428	}
429
430	vm_object_reference(shmseg->u.shm_internal);
431	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->u.shm_internal,
432		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
433	if (rv != KERN_SUCCESS) {
434		vm_object_deallocate(shmseg->u.shm_internal);
435		error = ENOMEM;
436		goto done2;
437	}
438	vm_map_inherit(&p->p_vmspace->vm_map,
439		attach_va, attach_va + size, VM_INHERIT_SHARE);
440
441	shmmap_s->va = attach_va;
442	shmmap_s->shmid = shmid;
443	shmseg->u.shm_lpid = p->p_pid;
444	shmseg->u.shm_atime = time_second;
445	shmseg->u.shm_nattch++;
446	td->td_retval[0] = attach_va;
447done2:
448	mtx_unlock(&Giant);
449	return (error);
450}
451
452int
453shmat(td, uap)
454	struct thread *td;
455	struct shmat_args *uap;
456{
457	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
458}
459
460struct oshmid_ds {
461	struct	ipc_perm shm_perm;	/* operation perms */
462	int	shm_segsz;		/* size of segment (bytes) */
463	u_short	shm_cpid;		/* pid, creator */
464	u_short	shm_lpid;		/* pid, last operation */
465	short	shm_nattch;		/* no. of current attaches */
466	time_t	shm_atime;		/* last attach time */
467	time_t	shm_dtime;		/* last detach time */
468	time_t	shm_ctime;		/* last change time */
469	void	*shm_handle;		/* internal handle for shm segment */
470};
471
472struct oshmctl_args {
473	int shmid;
474	int cmd;
475	struct oshmid_ds *ubuf;
476};
477
478/*
479 * MPSAFE
480 */
481static int
482oshmctl(td, uap)
483	struct thread *td;
484	struct oshmctl_args *uap;
485{
486#ifdef COMPAT_43
487	int error = 0;
488	struct shmid_kernel *shmseg;
489	struct oshmid_ds outbuf;
490
491	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
492		return (ENOSYS);
493	mtx_lock(&Giant);
494	shmseg = shm_find_segment_by_shmid(uap->shmid);
495	if (shmseg == NULL) {
496		error = EINVAL;
497		goto done2;
498	}
499	switch (uap->cmd) {
500	case IPC_STAT:
501		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
502		if (error)
503			goto done2;
504#ifdef MAC
505		error = mac_check_sysv_shmctl(td->td_ucred, shmseg, uap->cmd);
506		if (error != 0) {
507			MPRINTF(("mac_check_sysv_shmctl returned %d\n",
508			    error));
509			goto done2;
510		}
511#endif
512		outbuf.shm_perm = shmseg->u.shm_perm;
513		outbuf.shm_segsz = shmseg->u.shm_segsz;
514		outbuf.shm_cpid = shmseg->u.shm_cpid;
515		outbuf.shm_lpid = shmseg->u.shm_lpid;
516		outbuf.shm_nattch = shmseg->u.shm_nattch;
517		outbuf.shm_atime = shmseg->u.shm_atime;
518		outbuf.shm_dtime = shmseg->u.shm_dtime;
519		outbuf.shm_ctime = shmseg->u.shm_ctime;
520		outbuf.shm_handle = shmseg->u.shm_internal;
521		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
522		if (error)
523			goto done2;
524		break;
525	default:
526		error = shmctl(td, (struct shmctl_args *)uap);
527		break;
528	}
529done2:
530	mtx_unlock(&Giant);
531	return (error);
532#else
533	return (EINVAL);
534#endif
535}
536
537#ifndef _SYS_SYSPROTO_H_
538struct shmctl_args {
539	int shmid;
540	int cmd;
541	struct shmid_ds *buf;
542};
543#endif
544
545/*
546 * MPSAFE
547 */
548int
549kern_shmctl(td, shmid, cmd, buf, bufsz)
550	struct thread *td;
551	int shmid;
552	int cmd;
553	void *buf;
554	size_t *bufsz;
555{
556	int error = 0;
557	struct shmid_kernel *shmseg;
558
559	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
560		return (ENOSYS);
561
562	mtx_lock(&Giant);
563	switch (cmd) {
564	case IPC_INFO:
565		memcpy(buf, &shminfo, sizeof(shminfo));
566		if (bufsz)
567			*bufsz = sizeof(shminfo);
568		td->td_retval[0] = shmalloced;
569		goto done2;
570	case SHM_INFO: {
571		struct shm_info shm_info;
572		shm_info.used_ids = shm_nused;
573		shm_info.shm_rss = 0;	/*XXX where to get from ? */
574		shm_info.shm_tot = 0;	/*XXX where to get from ? */
575		shm_info.shm_swp = 0;	/*XXX where to get from ? */
576		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
577		shm_info.swap_successes = 0;	/*XXX where to get from ? */
578		memcpy(buf, &shm_info, sizeof(shm_info));
579		if (bufsz)
580			*bufsz = sizeof(shm_info);
581		td->td_retval[0] = shmalloced;
582		goto done2;
583	}
584	}
585	if (cmd == SHM_STAT)
586		shmseg = shm_find_segment_by_shmidx(shmid);
587	else
588		shmseg = shm_find_segment_by_shmid(shmid);
589	if (shmseg == NULL) {
590		error = EINVAL;
591		goto done2;
592	}
593#ifdef MAC
594	error = mac_check_sysv_shmctl(td->td_ucred, shmseg, cmd);
595	if (error != 0) {
596		MPRINTF(("mac_check_sysv_shmctl returned %d\n", error));
597		goto done2;
598	}
599#endif
600	switch (cmd) {
601	case SHM_STAT:
602	case IPC_STAT:
603		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
604		if (error)
605			goto done2;
606		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
607		if (bufsz)
608			*bufsz = sizeof(struct shmid_ds);
609		if (cmd == SHM_STAT)
610			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
611		break;
612	case IPC_SET: {
613		struct shmid_ds *shmid;
614
615		shmid = (struct shmid_ds *)buf;
616		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
617		if (error)
618			goto done2;
619		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
620		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
621		shmseg->u.shm_perm.mode =
622		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
623		    (shmid->shm_perm.mode & ACCESSPERMS);
624		shmseg->u.shm_ctime = time_second;
625		break;
626	}
627	case IPC_RMID:
628		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
629		if (error)
630			goto done2;
631		shmseg->u.shm_perm.key = IPC_PRIVATE;
632		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
633		if (shmseg->u.shm_nattch <= 0) {
634			shm_deallocate_segment(shmseg);
635			shm_last_free = IPCID_TO_IX(shmid);
636		}
637		break;
638#if 0
639	case SHM_LOCK:
640	case SHM_UNLOCK:
641#endif
642	default:
643		error = EINVAL;
644		break;
645	}
646done2:
647	mtx_unlock(&Giant);
648	return (error);
649}
650
651int
652shmctl(td, uap)
653	struct thread *td;
654	struct shmctl_args *uap;
655{
656	int error = 0;
657	struct shmid_ds buf;
658	size_t bufsz;
659
660	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
661	if (uap->cmd == IPC_SET) {
662		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
663			goto done;
664	}
665
666	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
667	if (error)
668		goto done;
669
670	/* Cases in which we need to copyout */
671	switch (uap->cmd) {
672	case IPC_INFO:
673	case SHM_INFO:
674	case SHM_STAT:
675	case IPC_STAT:
676		error = copyout(&buf, uap->buf, bufsz);
677		break;
678	}
679
680done:
681	if (error) {
682		/* Invalidate the return value */
683		td->td_retval[0] = -1;
684	}
685	return (error);
686}
687
688
689#ifndef _SYS_SYSPROTO_H_
690struct shmget_args {
691	key_t key;
692	size_t size;
693	int shmflg;
694};
695#endif
696
697static int
698shmget_existing(td, uap, mode, segnum)
699	struct thread *td;
700	struct shmget_args *uap;
701	int mode;
702	int segnum;
703{
704	struct shmid_kernel *shmseg;
705	int error;
706
707	shmseg = &shmsegs[segnum];
708	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
709		/*
710		 * This segment is in the process of being allocated.  Wait
711		 * until it's done, and look the key up again (in case the
712		 * allocation failed or it was freed).
713		 */
714		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
715		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
716		if (error)
717			return (error);
718		return (EAGAIN);
719	}
720	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
721		return (EEXIST);
722	error = ipcperm(td, &shmseg->u.shm_perm, mode);
723#ifdef MAC
724	error = mac_check_sysv_shmget(td->td_ucred, shmseg, uap->shmflg);
725	if (error != 0)
726		MPRINTF(("mac_check_sysv_shmget returned %d\n", error));
727#endif
728	if (error)
729		return (error);
730	if (uap->size && uap->size > shmseg->u.shm_segsz)
731		return (EINVAL);
732	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
733	return (0);
734}
735
736static int
737shmget_allocate_segment(td, uap, mode)
738	struct thread *td;
739	struct shmget_args *uap;
740	int mode;
741{
742	int i, segnum, shmid, size;
743	struct ucred *cred = td->td_ucred;
744	struct shmid_kernel *shmseg;
745	vm_object_t shm_object;
746
747	GIANT_REQUIRED;
748
749	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
750		return (EINVAL);
751	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
752		return (ENOSPC);
753	size = round_page(uap->size);
754	if (shm_committed + btoc(size) > shminfo.shmall)
755		return (ENOMEM);
756	if (shm_last_free < 0) {
757		shmrealloc();	/* Maybe expand the shmsegs[] array. */
758		for (i = 0; i < shmalloced; i++)
759			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
760				break;
761		if (i == shmalloced)
762			return (ENOSPC);
763		segnum = i;
764	} else  {
765		segnum = shm_last_free;
766		shm_last_free = -1;
767	}
768	shmseg = &shmsegs[segnum];
769	/*
770	 * In case we sleep in malloc(), mark the segment present but deleted
771	 * so that noone else tries to create the same key.
772	 */
773	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
774	shmseg->u.shm_perm.key = uap->key;
775	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
776	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
777
778	/*
779	 * We make sure that we have allocated a pager before we need
780	 * to.
781	 */
782	if (shm_use_phys) {
783		shm_object =
784		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
785	} else {
786		shm_object =
787		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
788	}
789	VM_OBJECT_LOCK(shm_object);
790	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
791	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
792	VM_OBJECT_UNLOCK(shm_object);
793
794	shmseg->u.shm_internal = shm_object;
795	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
796	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
797	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
798	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
799	shmseg->u.shm_segsz = uap->size;
800	shmseg->u.shm_cpid = td->td_proc->p_pid;
801	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
802	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
803#ifdef MAC
804	mac_create_sysv_shm(cred, shmseg);
805#endif
806	shmseg->u.shm_ctime = time_second;
807	shm_committed += btoc(size);
808	shm_nused++;
809	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
810		/*
811		 * Somebody else wanted this key while we were asleep.  Wake
812		 * them up now.
813		 */
814		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
815		wakeup(shmseg);
816	}
817	td->td_retval[0] = shmid;
818	return (0);
819}
820
821/*
822 * MPSAFE
823 */
824int
825shmget(td, uap)
826	struct thread *td;
827	struct shmget_args *uap;
828{
829	int segnum, mode;
830	int error;
831
832	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
833		return (ENOSYS);
834	mtx_lock(&Giant);
835	mode = uap->shmflg & ACCESSPERMS;
836	if (uap->key != IPC_PRIVATE) {
837	again:
838		segnum = shm_find_segment_by_key(uap->key);
839		if (segnum >= 0) {
840			error = shmget_existing(td, uap, mode, segnum);
841			if (error == EAGAIN)
842				goto again;
843			goto done2;
844		}
845		if ((uap->shmflg & IPC_CREAT) == 0) {
846			error = ENOENT;
847			goto done2;
848		}
849	}
850	error = shmget_allocate_segment(td, uap, mode);
851done2:
852	mtx_unlock(&Giant);
853	return (error);
854}
855
856/*
857 * MPSAFE
858 */
859int
860shmsys(td, uap)
861	struct thread *td;
862	/* XXX actually varargs. */
863	struct shmsys_args /* {
864		int	which;
865		int	a2;
866		int	a3;
867		int	a4;
868	} */ *uap;
869{
870	int error;
871
872	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
873		return (ENOSYS);
874	if (uap->which < 0 ||
875	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
876		return (EINVAL);
877	mtx_lock(&Giant);
878	error = (*shmcalls[uap->which])(td, &uap->a2);
879	mtx_unlock(&Giant);
880	return (error);
881}
882
883static void
884shmfork_myhook(p1, p2)
885	struct proc *p1, *p2;
886{
887	struct shmmap_state *shmmap_s;
888	size_t size;
889	int i;
890
891	mtx_lock(&Giant);
892	size = shminfo.shmseg * sizeof(struct shmmap_state);
893	shmmap_s = malloc(size, M_SHM, M_WAITOK);
894	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
895	p2->p_vmspace->vm_shm = shmmap_s;
896	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
897		if (shmmap_s->shmid != -1)
898			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
899	mtx_unlock(&Giant);
900}
901
902static void
903shmexit_myhook(struct vmspace *vm)
904{
905	struct shmmap_state *base, *shm;
906	int i;
907
908	if ((base = vm->vm_shm) != NULL) {
909		vm->vm_shm = NULL;
910		mtx_lock(&Giant);
911		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
912			if (shm->shmid != -1)
913				shm_delete_mapping(vm, shm);
914		}
915		mtx_unlock(&Giant);
916		free(base, M_SHM);
917	}
918}
919
920static void
921shmrealloc(void)
922{
923	int i;
924	struct shmid_kernel *newsegs;
925
926	if (shmalloced >= shminfo.shmmni)
927		return;
928
929	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
930	if (newsegs == NULL)
931		return;
932	for (i = 0; i < shmalloced; i++)
933		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
934	for (; i < shminfo.shmmni; i++) {
935		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
936		shmsegs[i].u.shm_perm.seq = 0;
937#ifdef MAC
938		mac_init_sysv_shm(&shmsegs[i]);
939#endif
940	}
941	free(shmsegs, M_SHM);
942	shmsegs = newsegs;
943	shmalloced = shminfo.shmmni;
944}
945
946static void
947shminit()
948{
949	int i;
950
951	TUNABLE_INT_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
952	for (i = PAGE_SIZE; i > 0; i--) {
953		shminfo.shmmax = shminfo.shmall * PAGE_SIZE;
954		if (shminfo.shmmax >= shminfo.shmall)
955			break;
956	}
957	TUNABLE_INT_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
958	TUNABLE_INT_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
959	TUNABLE_INT_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
960	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
961
962	shmalloced = shminfo.shmmni;
963	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
964	if (shmsegs == NULL)
965		panic("cannot allocate initial memory for sysvshm");
966	for (i = 0; i < shmalloced; i++) {
967		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
968		shmsegs[i].u.shm_perm.seq = 0;
969#ifdef MAC
970		mac_init_sysv_shm(&shmsegs[i]);
971#endif
972	}
973	shm_last_free = 0;
974	shm_nused = 0;
975	shm_committed = 0;
976	shmexit_hook = &shmexit_myhook;
977	shmfork_hook = &shmfork_myhook;
978}
979
980static int
981shmunload()
982{
983#ifdef MAC
984	int i;
985#endif
986
987	if (shm_nused > 0)
988		return (EBUSY);
989
990#ifdef MAC
991	for (i = 0; i < shmalloced; i++)
992		mac_destroy_sysv_shm(&shmsegs[i]);
993#endif
994	free(shmsegs, M_SHM);
995	shmexit_hook = NULL;
996	shmfork_hook = NULL;
997	return (0);
998}
999
1000static int
1001sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
1002{
1003
1004	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
1005}
1006
1007static int
1008sysvshm_modload(struct module *module, int cmd, void *arg)
1009{
1010	int error = 0;
1011
1012	switch (cmd) {
1013	case MOD_LOAD:
1014		shminit();
1015		break;
1016	case MOD_UNLOAD:
1017		error = shmunload();
1018		break;
1019	case MOD_SHUTDOWN:
1020		break;
1021	default:
1022		error = EINVAL;
1023		break;
1024	}
1025	return (error);
1026}
1027
1028static moduledata_t sysvshm_mod = {
1029	"sysvshm",
1030	&sysvshm_modload,
1031	NULL
1032};
1033
1034SYSCALL_MODULE_HELPER(shmsys);
1035SYSCALL_MODULE_HELPER(shmat);
1036SYSCALL_MODULE_HELPER(shmctl);
1037SYSCALL_MODULE_HELPER(shmdt);
1038SYSCALL_MODULE_HELPER(shmget);
1039
1040DECLARE_MODULE(sysvshm, sysvshm_mod,
1041	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1042MODULE_VERSION(sysvshm, 1);
1043