sysv_shm.c revision 146164
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 146164 2005-05-12 20:04:48Z jhb $");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67#include "opt_mac.h"
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/resourcevar.h>
81#include <sys/stat.h>
82#include <sys/syscall.h>
83#include <sys/syscallsubr.h>
84#include <sys/sysent.h>
85#include <sys/sysproto.h>
86#include <sys/jail.h>
87#include <sys/mac.h>
88
89#include <vm/vm.h>
90#include <vm/vm_param.h>
91#include <vm/pmap.h>
92#include <vm/vm_object.h>
93#include <vm/vm_map.h>
94#include <vm/vm_page.h>
95#include <vm/vm_pager.h>
96
97#ifdef MAC_DEBUG
98#define MPRINTF(a)      printf a
99#else
100#define MPRINTF(a)
101#endif
102
103static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
104
105struct oshmctl_args;
106static int oshmctl(struct thread *td, struct oshmctl_args *uap);
107
108static int shmget_allocate_segment(struct thread *td,
109    struct shmget_args *uap, int mode);
110static int shmget_existing(struct thread *td, struct shmget_args *uap,
111    int mode, int segnum);
112
113/* XXX casting to (sy_call_t *) is bogus, as usual. */
114static sy_call_t *shmcalls[] = {
115	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
116	(sy_call_t *)shmdt, (sy_call_t *)shmget,
117	(sy_call_t *)shmctl
118};
119
120#define	SHMSEG_FREE     	0x0200
121#define	SHMSEG_REMOVED  	0x0400
122#define	SHMSEG_ALLOCATED	0x0800
123#define	SHMSEG_WANTED		0x1000
124
125static int shm_last_free, shm_nused, shm_committed, shmalloced;
126static struct shmid_kernel	*shmsegs;
127
128struct shmmap_state {
129	vm_offset_t va;
130	int shmid;
131};
132
133static void shm_deallocate_segment(struct shmid_kernel *);
134static int shm_find_segment_by_key(key_t);
135static struct shmid_kernel *shm_find_segment_by_shmid(int);
136static struct shmid_kernel *shm_find_segment_by_shmidx(int);
137static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
138static void shmrealloc(void);
139static void shminit(void);
140static int sysvshm_modload(struct module *, int, void *);
141static int shmunload(void);
142static void shmexit_myhook(struct vmspace *vm);
143static void shmfork_myhook(struct proc *p1, struct proc *p2);
144static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
145
146/*
147 * Tuneable values.
148 */
149#ifndef SHMMAXPGS
150#define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
151#endif
152#ifndef SHMMAX
153#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
154#endif
155#ifndef SHMMIN
156#define	SHMMIN	1
157#endif
158#ifndef SHMMNI
159#define	SHMMNI	192
160#endif
161#ifndef SHMSEG
162#define	SHMSEG	128
163#endif
164#ifndef SHMALL
165#define	SHMALL	(SHMMAXPGS)
166#endif
167
168struct	shminfo shminfo = {
169	SHMMAX,
170	SHMMIN,
171	SHMMNI,
172	SHMSEG,
173	SHMALL
174};
175
176static int shm_use_phys;
177static int shm_allow_removed;
178
179SYSCTL_DECL(_kern_ipc);
180SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
181    "Maximum shared memory segment size");
182SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
183    "Minimum shared memory segment size");
184SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
185    "Number of shared memory identifiers");
186SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
187    "Number of segments per process");
188SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
189    "Maximum number of pages available for shared memory");
190SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
191    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
192SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
193    &shm_allow_removed, 0,
194    "Enable/Disable attachment to attached segments marked for removal");
195SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
196    NULL, 0, sysctl_shmsegs, "",
197    "Current number of shared memory segments allocated");
198
199static int
200shm_find_segment_by_key(key)
201	key_t key;
202{
203	int i;
204
205	for (i = 0; i < shmalloced; i++)
206		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
207		    shmsegs[i].u.shm_perm.key == key)
208			return (i);
209	return (-1);
210}
211
212static struct shmid_kernel *
213shm_find_segment_by_shmid(int shmid)
214{
215	int segnum;
216	struct shmid_kernel *shmseg;
217
218	segnum = IPCID_TO_IX(shmid);
219	if (segnum < 0 || segnum >= shmalloced)
220		return (NULL);
221	shmseg = &shmsegs[segnum];
222	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
223	    (!shm_allow_removed &&
224	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
225	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
226		return (NULL);
227	return (shmseg);
228}
229
230static struct shmid_kernel *
231shm_find_segment_by_shmidx(int segnum)
232{
233	struct shmid_kernel *shmseg;
234
235	if (segnum < 0 || segnum >= shmalloced)
236		return (NULL);
237	shmseg = &shmsegs[segnum];
238	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
239	    (!shm_allow_removed &&
240	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
241		return (NULL);
242	return (shmseg);
243}
244
245static void
246shm_deallocate_segment(shmseg)
247	struct shmid_kernel *shmseg;
248{
249	size_t size;
250
251	GIANT_REQUIRED;
252
253	vm_object_deallocate(shmseg->u.shm_internal);
254	shmseg->u.shm_internal = NULL;
255	size = round_page(shmseg->u.shm_segsz);
256	shm_committed -= btoc(size);
257	shm_nused--;
258	shmseg->u.shm_perm.mode = SHMSEG_FREE;
259#ifdef MAC
260	mac_cleanup_sysv_shm(shmseg);
261#endif
262}
263
264static int
265shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
266{
267	struct shmid_kernel *shmseg;
268	int segnum, result;
269	size_t size;
270
271	GIANT_REQUIRED;
272
273	segnum = IPCID_TO_IX(shmmap_s->shmid);
274	shmseg = &shmsegs[segnum];
275	size = round_page(shmseg->u.shm_segsz);
276	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
277	if (result != KERN_SUCCESS)
278		return (EINVAL);
279	shmmap_s->shmid = -1;
280	shmseg->u.shm_dtime = time_second;
281	if ((--shmseg->u.shm_nattch <= 0) &&
282	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
283		shm_deallocate_segment(shmseg);
284		shm_last_free = segnum;
285	}
286	return (0);
287}
288
289#ifndef _SYS_SYSPROTO_H_
290struct shmdt_args {
291	const void *shmaddr;
292};
293#endif
294
295/*
296 * MPSAFE
297 */
298int
299shmdt(td, uap)
300	struct thread *td;
301	struct shmdt_args *uap;
302{
303	struct proc *p = td->td_proc;
304	struct shmmap_state *shmmap_s;
305#ifdef MAC
306	struct shmid_kernel *shmsegptr;
307#endif
308	int i;
309	int error = 0;
310
311	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
312		return (ENOSYS);
313	mtx_lock(&Giant);
314	shmmap_s = p->p_vmspace->vm_shm;
315 	if (shmmap_s == NULL) {
316		error = EINVAL;
317		goto done2;
318	}
319	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
320		if (shmmap_s->shmid != -1 &&
321		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
322			break;
323		}
324	}
325	if (i == shminfo.shmseg) {
326		error = EINVAL;
327		goto done2;
328	}
329#ifdef MAC
330	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
331	error = mac_check_sysv_shmdt(td->td_ucred, shmsegptr);
332	if (error != 0) {
333		MPRINTF(("mac_check_sysv_shmdt returned %d\n", error));
334		goto done2;
335	}
336#endif
337	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
338done2:
339	mtx_unlock(&Giant);
340	return (error);
341}
342
343#ifndef _SYS_SYSPROTO_H_
344struct shmat_args {
345	int shmid;
346	const void *shmaddr;
347	int shmflg;
348};
349#endif
350
351/*
352 * MPSAFE
353 */
354int
355kern_shmat(td, shmid, shmaddr, shmflg)
356	struct thread *td;
357	int shmid;
358	const void *shmaddr;
359	int shmflg;
360{
361	struct proc *p = td->td_proc;
362	int i, flags;
363	struct shmid_kernel *shmseg;
364	struct shmmap_state *shmmap_s = NULL;
365	vm_offset_t attach_va;
366	vm_prot_t prot;
367	vm_size_t size;
368	int rv;
369	int error = 0;
370
371	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
372		return (ENOSYS);
373	mtx_lock(&Giant);
374	shmmap_s = p->p_vmspace->vm_shm;
375	if (shmmap_s == NULL) {
376		size = shminfo.shmseg * sizeof(struct shmmap_state);
377		shmmap_s = malloc(size, M_SHM, M_WAITOK);
378		for (i = 0; i < shminfo.shmseg; i++)
379			shmmap_s[i].shmid = -1;
380		p->p_vmspace->vm_shm = shmmap_s;
381	}
382	shmseg = shm_find_segment_by_shmid(shmid);
383	if (shmseg == NULL) {
384		error = EINVAL;
385		goto done2;
386	}
387	error = ipcperm(td, &shmseg->u.shm_perm,
388	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
389	if (error)
390		goto done2;
391#ifdef MAC
392	error = mac_check_sysv_shmat(td->td_ucred, shmseg, shmflg);
393	if (error != 0) {
394	 	MPRINTF(("mac_check_sysv_shmat returned %d\n", error));
395		goto done2;
396	}
397#endif
398	for (i = 0; i < shminfo.shmseg; i++) {
399		if (shmmap_s->shmid == -1)
400			break;
401		shmmap_s++;
402	}
403	if (i >= shminfo.shmseg) {
404		error = EMFILE;
405		goto done2;
406	}
407	size = round_page(shmseg->u.shm_segsz);
408#ifdef VM_PROT_READ_IS_EXEC
409	prot = VM_PROT_READ | VM_PROT_EXECUTE;
410#else
411	prot = VM_PROT_READ;
412#endif
413	if ((shmflg & SHM_RDONLY) == 0)
414		prot |= VM_PROT_WRITE;
415	flags = MAP_ANON | MAP_SHARED;
416	if (shmaddr) {
417		flags |= MAP_FIXED;
418		if (shmflg & SHM_RND) {
419			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
420		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
421			attach_va = (vm_offset_t)shmaddr;
422		} else {
423			error = EINVAL;
424			goto done2;
425		}
426	} else {
427		/*
428		 * This is just a hint to vm_map_find() about where to
429		 * put it.
430		 */
431		PROC_LOCK(p);
432		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
433		    lim_max(p, RLIMIT_DATA));
434		PROC_UNLOCK(p);
435	}
436
437	vm_object_reference(shmseg->u.shm_internal);
438	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->u.shm_internal,
439		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
440	if (rv != KERN_SUCCESS) {
441		vm_object_deallocate(shmseg->u.shm_internal);
442		error = ENOMEM;
443		goto done2;
444	}
445	vm_map_inherit(&p->p_vmspace->vm_map,
446		attach_va, attach_va + size, VM_INHERIT_SHARE);
447
448	shmmap_s->va = attach_va;
449	shmmap_s->shmid = shmid;
450	shmseg->u.shm_lpid = p->p_pid;
451	shmseg->u.shm_atime = time_second;
452	shmseg->u.shm_nattch++;
453	td->td_retval[0] = attach_va;
454done2:
455	mtx_unlock(&Giant);
456	return (error);
457}
458
459int
460shmat(td, uap)
461	struct thread *td;
462	struct shmat_args *uap;
463{
464	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
465}
466
467struct oshmid_ds {
468	struct	ipc_perm shm_perm;	/* operation perms */
469	int	shm_segsz;		/* size of segment (bytes) */
470	u_short	shm_cpid;		/* pid, creator */
471	u_short	shm_lpid;		/* pid, last operation */
472	short	shm_nattch;		/* no. of current attaches */
473	time_t	shm_atime;		/* last attach time */
474	time_t	shm_dtime;		/* last detach time */
475	time_t	shm_ctime;		/* last change time */
476	void	*shm_handle;		/* internal handle for shm segment */
477};
478
479struct oshmctl_args {
480	int shmid;
481	int cmd;
482	struct oshmid_ds *ubuf;
483};
484
485/*
486 * MPSAFE
487 */
488static int
489oshmctl(td, uap)
490	struct thread *td;
491	struct oshmctl_args *uap;
492{
493#ifdef COMPAT_43
494	int error = 0;
495	struct shmid_kernel *shmseg;
496	struct oshmid_ds outbuf;
497
498	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
499		return (ENOSYS);
500	mtx_lock(&Giant);
501	shmseg = shm_find_segment_by_shmid(uap->shmid);
502	if (shmseg == NULL) {
503		error = EINVAL;
504		goto done2;
505	}
506	switch (uap->cmd) {
507	case IPC_STAT:
508		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
509		if (error)
510			goto done2;
511#ifdef MAC
512		error = mac_check_sysv_shmctl(td->td_ucred, shmseg, uap->cmd);
513		if (error != 0) {
514			MPRINTF(("mac_check_sysv_shmctl returned %d\n",
515			    error));
516			goto done2;
517		}
518#endif
519		outbuf.shm_perm = shmseg->u.shm_perm;
520		outbuf.shm_segsz = shmseg->u.shm_segsz;
521		outbuf.shm_cpid = shmseg->u.shm_cpid;
522		outbuf.shm_lpid = shmseg->u.shm_lpid;
523		outbuf.shm_nattch = shmseg->u.shm_nattch;
524		outbuf.shm_atime = shmseg->u.shm_atime;
525		outbuf.shm_dtime = shmseg->u.shm_dtime;
526		outbuf.shm_ctime = shmseg->u.shm_ctime;
527		outbuf.shm_handle = shmseg->u.shm_internal;
528		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
529		if (error)
530			goto done2;
531		break;
532	default:
533		error = shmctl(td, (struct shmctl_args *)uap);
534		break;
535	}
536done2:
537	mtx_unlock(&Giant);
538	return (error);
539#else
540	return (EINVAL);
541#endif
542}
543
544#ifndef _SYS_SYSPROTO_H_
545struct shmctl_args {
546	int shmid;
547	int cmd;
548	struct shmid_ds *buf;
549};
550#endif
551
552/*
553 * MPSAFE
554 */
555int
556kern_shmctl(td, shmid, cmd, buf, bufsz)
557	struct thread *td;
558	int shmid;
559	int cmd;
560	void *buf;
561	size_t *bufsz;
562{
563	int error = 0;
564	struct shmid_kernel *shmseg;
565
566	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
567		return (ENOSYS);
568
569	mtx_lock(&Giant);
570	switch (cmd) {
571	case IPC_INFO:
572		memcpy(buf, &shminfo, sizeof(shminfo));
573		if (bufsz)
574			*bufsz = sizeof(shminfo);
575		td->td_retval[0] = shmalloced;
576		goto done2;
577	case SHM_INFO: {
578		struct shm_info shm_info;
579		shm_info.used_ids = shm_nused;
580		shm_info.shm_rss = 0;	/*XXX where to get from ? */
581		shm_info.shm_tot = 0;	/*XXX where to get from ? */
582		shm_info.shm_swp = 0;	/*XXX where to get from ? */
583		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
584		shm_info.swap_successes = 0;	/*XXX where to get from ? */
585		memcpy(buf, &shm_info, sizeof(shm_info));
586		if (bufsz)
587			*bufsz = sizeof(shm_info);
588		td->td_retval[0] = shmalloced;
589		goto done2;
590	}
591	}
592	if (cmd == SHM_STAT)
593		shmseg = shm_find_segment_by_shmidx(shmid);
594	else
595		shmseg = shm_find_segment_by_shmid(shmid);
596	if (shmseg == NULL) {
597		error = EINVAL;
598		goto done2;
599	}
600#ifdef MAC
601	error = mac_check_sysv_shmctl(td->td_ucred, shmseg, cmd);
602	if (error != 0) {
603		MPRINTF(("mac_check_sysv_shmctl returned %d\n", error));
604		goto done2;
605	}
606#endif
607	switch (cmd) {
608	case SHM_STAT:
609	case IPC_STAT:
610		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
611		if (error)
612			goto done2;
613		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
614		if (bufsz)
615			*bufsz = sizeof(struct shmid_ds);
616		if (cmd == SHM_STAT)
617			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
618		break;
619	case IPC_SET: {
620		struct shmid_ds *shmid;
621
622		shmid = (struct shmid_ds *)buf;
623		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
624		if (error)
625			goto done2;
626		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
627		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
628		shmseg->u.shm_perm.mode =
629		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
630		    (shmid->shm_perm.mode & ACCESSPERMS);
631		shmseg->u.shm_ctime = time_second;
632		break;
633	}
634	case IPC_RMID:
635		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
636		if (error)
637			goto done2;
638		shmseg->u.shm_perm.key = IPC_PRIVATE;
639		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
640		if (shmseg->u.shm_nattch <= 0) {
641			shm_deallocate_segment(shmseg);
642			shm_last_free = IPCID_TO_IX(shmid);
643		}
644		break;
645#if 0
646	case SHM_LOCK:
647	case SHM_UNLOCK:
648#endif
649	default:
650		error = EINVAL;
651		break;
652	}
653done2:
654	mtx_unlock(&Giant);
655	return (error);
656}
657
658int
659shmctl(td, uap)
660	struct thread *td;
661	struct shmctl_args *uap;
662{
663	int error = 0;
664	struct shmid_ds buf;
665	size_t bufsz;
666
667	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
668	if (uap->cmd == IPC_SET) {
669		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
670			goto done;
671	}
672
673	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
674	if (error)
675		goto done;
676
677	/* Cases in which we need to copyout */
678	switch (uap->cmd) {
679	case IPC_INFO:
680	case SHM_INFO:
681	case SHM_STAT:
682	case IPC_STAT:
683		error = copyout(&buf, uap->buf, bufsz);
684		break;
685	}
686
687done:
688	if (error) {
689		/* Invalidate the return value */
690		td->td_retval[0] = -1;
691	}
692	return (error);
693}
694
695
696#ifndef _SYS_SYSPROTO_H_
697struct shmget_args {
698	key_t key;
699	size_t size;
700	int shmflg;
701};
702#endif
703
704static int
705shmget_existing(td, uap, mode, segnum)
706	struct thread *td;
707	struct shmget_args *uap;
708	int mode;
709	int segnum;
710{
711	struct shmid_kernel *shmseg;
712	int error;
713
714	shmseg = &shmsegs[segnum];
715	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
716		/*
717		 * This segment is in the process of being allocated.  Wait
718		 * until it's done, and look the key up again (in case the
719		 * allocation failed or it was freed).
720		 */
721		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
722		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
723		if (error)
724			return (error);
725		return (EAGAIN);
726	}
727	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
728		return (EEXIST);
729	error = ipcperm(td, &shmseg->u.shm_perm, mode);
730#ifdef MAC
731	error = mac_check_sysv_shmget(td->td_ucred, shmseg, uap->shmflg);
732	if (error != 0)
733		MPRINTF(("mac_check_sysv_shmget returned %d\n", error));
734#endif
735	if (error)
736		return (error);
737	if (uap->size && uap->size > shmseg->u.shm_segsz)
738		return (EINVAL);
739	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
740	return (0);
741}
742
743static int
744shmget_allocate_segment(td, uap, mode)
745	struct thread *td;
746	struct shmget_args *uap;
747	int mode;
748{
749	int i, segnum, shmid, size;
750	struct ucred *cred = td->td_ucred;
751	struct shmid_kernel *shmseg;
752	vm_object_t shm_object;
753
754	GIANT_REQUIRED;
755
756	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
757		return (EINVAL);
758	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
759		return (ENOSPC);
760	size = round_page(uap->size);
761	if (shm_committed + btoc(size) > shminfo.shmall)
762		return (ENOMEM);
763	if (shm_last_free < 0) {
764		shmrealloc();	/* Maybe expand the shmsegs[] array. */
765		for (i = 0; i < shmalloced; i++)
766			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
767				break;
768		if (i == shmalloced)
769			return (ENOSPC);
770		segnum = i;
771	} else  {
772		segnum = shm_last_free;
773		shm_last_free = -1;
774	}
775	shmseg = &shmsegs[segnum];
776	/*
777	 * In case we sleep in malloc(), mark the segment present but deleted
778	 * so that noone else tries to create the same key.
779	 */
780	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
781	shmseg->u.shm_perm.key = uap->key;
782	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
783	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
784
785	/*
786	 * We make sure that we have allocated a pager before we need
787	 * to.
788	 */
789	if (shm_use_phys) {
790		shm_object =
791		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
792	} else {
793		shm_object =
794		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
795	}
796	VM_OBJECT_LOCK(shm_object);
797	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
798	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
799	VM_OBJECT_UNLOCK(shm_object);
800
801	shmseg->u.shm_internal = shm_object;
802	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
803	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
804	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
805	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
806	shmseg->u.shm_segsz = uap->size;
807	shmseg->u.shm_cpid = td->td_proc->p_pid;
808	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
809	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
810#ifdef MAC
811	mac_create_sysv_shm(cred, shmseg);
812#endif
813	shmseg->u.shm_ctime = time_second;
814	shm_committed += btoc(size);
815	shm_nused++;
816	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
817		/*
818		 * Somebody else wanted this key while we were asleep.  Wake
819		 * them up now.
820		 */
821		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
822		wakeup(shmseg);
823	}
824	td->td_retval[0] = shmid;
825	return (0);
826}
827
828/*
829 * MPSAFE
830 */
831int
832shmget(td, uap)
833	struct thread *td;
834	struct shmget_args *uap;
835{
836	int segnum, mode;
837	int error;
838
839	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
840		return (ENOSYS);
841	mtx_lock(&Giant);
842	mode = uap->shmflg & ACCESSPERMS;
843	if (uap->key != IPC_PRIVATE) {
844	again:
845		segnum = shm_find_segment_by_key(uap->key);
846		if (segnum >= 0) {
847			error = shmget_existing(td, uap, mode, segnum);
848			if (error == EAGAIN)
849				goto again;
850			goto done2;
851		}
852		if ((uap->shmflg & IPC_CREAT) == 0) {
853			error = ENOENT;
854			goto done2;
855		}
856	}
857	error = shmget_allocate_segment(td, uap, mode);
858done2:
859	mtx_unlock(&Giant);
860	return (error);
861}
862
863/*
864 * MPSAFE
865 */
866int
867shmsys(td, uap)
868	struct thread *td;
869	/* XXX actually varargs. */
870	struct shmsys_args /* {
871		int	which;
872		int	a2;
873		int	a3;
874		int	a4;
875	} */ *uap;
876{
877	int error;
878
879	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
880		return (ENOSYS);
881	if (uap->which < 0 ||
882	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
883		return (EINVAL);
884	mtx_lock(&Giant);
885	error = (*shmcalls[uap->which])(td, &uap->a2);
886	mtx_unlock(&Giant);
887	return (error);
888}
889
890static void
891shmfork_myhook(p1, p2)
892	struct proc *p1, *p2;
893{
894	struct shmmap_state *shmmap_s;
895	size_t size;
896	int i;
897
898	mtx_lock(&Giant);
899	size = shminfo.shmseg * sizeof(struct shmmap_state);
900	shmmap_s = malloc(size, M_SHM, M_WAITOK);
901	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
902	p2->p_vmspace->vm_shm = shmmap_s;
903	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
904		if (shmmap_s->shmid != -1)
905			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
906	mtx_unlock(&Giant);
907}
908
909static void
910shmexit_myhook(struct vmspace *vm)
911{
912	struct shmmap_state *base, *shm;
913	int i;
914
915	if ((base = vm->vm_shm) != NULL) {
916		vm->vm_shm = NULL;
917		mtx_lock(&Giant);
918		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
919			if (shm->shmid != -1)
920				shm_delete_mapping(vm, shm);
921		}
922		mtx_unlock(&Giant);
923		free(base, M_SHM);
924	}
925}
926
927static void
928shmrealloc(void)
929{
930	int i;
931	struct shmid_kernel *newsegs;
932
933	if (shmalloced >= shminfo.shmmni)
934		return;
935
936	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
937	if (newsegs == NULL)
938		return;
939	for (i = 0; i < shmalloced; i++)
940		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
941	for (; i < shminfo.shmmni; i++) {
942		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
943		shmsegs[i].u.shm_perm.seq = 0;
944#ifdef MAC
945		mac_init_sysv_shm(&shmsegs[i]);
946#endif
947	}
948	free(shmsegs, M_SHM);
949	shmsegs = newsegs;
950	shmalloced = shminfo.shmmni;
951}
952
953static void
954shminit()
955{
956	int i;
957
958	TUNABLE_INT_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
959	for (i = PAGE_SIZE; i > 0; i--) {
960		shminfo.shmmax = shminfo.shmall * i;
961		if (shminfo.shmmax >= shminfo.shmall)
962			break;
963	}
964	TUNABLE_INT_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
965	TUNABLE_INT_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
966	TUNABLE_INT_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
967	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
968
969	shmalloced = shminfo.shmmni;
970	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
971	if (shmsegs == NULL)
972		panic("cannot allocate initial memory for sysvshm");
973	for (i = 0; i < shmalloced; i++) {
974		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
975		shmsegs[i].u.shm_perm.seq = 0;
976#ifdef MAC
977		mac_init_sysv_shm(&shmsegs[i]);
978#endif
979	}
980	shm_last_free = 0;
981	shm_nused = 0;
982	shm_committed = 0;
983	shmexit_hook = &shmexit_myhook;
984	shmfork_hook = &shmfork_myhook;
985}
986
987static int
988shmunload()
989{
990#ifdef MAC
991	int i;
992#endif
993
994	if (shm_nused > 0)
995		return (EBUSY);
996
997#ifdef MAC
998	for (i = 0; i < shmalloced; i++)
999		mac_destroy_sysv_shm(&shmsegs[i]);
1000#endif
1001	free(shmsegs, M_SHM);
1002	shmexit_hook = NULL;
1003	shmfork_hook = NULL;
1004	return (0);
1005}
1006
1007static int
1008sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
1009{
1010
1011	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
1012}
1013
1014static int
1015sysvshm_modload(struct module *module, int cmd, void *arg)
1016{
1017	int error = 0;
1018
1019	switch (cmd) {
1020	case MOD_LOAD:
1021		shminit();
1022		break;
1023	case MOD_UNLOAD:
1024		error = shmunload();
1025		break;
1026	case MOD_SHUTDOWN:
1027		break;
1028	default:
1029		error = EINVAL;
1030		break;
1031	}
1032	return (error);
1033}
1034
1035static moduledata_t sysvshm_mod = {
1036	"sysvshm",
1037	&sysvshm_modload,
1038	NULL
1039};
1040
1041SYSCALL_MODULE_HELPER(shmsys);
1042SYSCALL_MODULE_HELPER(shmat);
1043SYSCALL_MODULE_HELPER(shmctl);
1044SYSCALL_MODULE_HELPER(shmdt);
1045SYSCALL_MODULE_HELPER(shmget);
1046
1047DECLARE_MODULE(sysvshm, sysvshm_mod,
1048	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1049MODULE_VERSION(sysvshm, 1);
1050