sysv_shm.c revision 231195
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 231195 2012-02-08 09:18:22Z pjd $");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/limits.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/racct.h>
81#include <sys/resourcevar.h>
82#include <sys/stat.h>
83#include <sys/syscall.h>
84#include <sys/syscallsubr.h>
85#include <sys/sysent.h>
86#include <sys/sysproto.h>
87#include <sys/jail.h>
88
89#include <security/mac/mac_framework.h>
90
91#include <vm/vm.h>
92#include <vm/vm_param.h>
93#include <vm/pmap.h>
94#include <vm/vm_object.h>
95#include <vm/vm_map.h>
96#include <vm/vm_page.h>
97#include <vm/vm_pager.h>
98
99FEATURE(sysv_shm, "System V shared memory segments support");
100
101static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
102
103static int shmget_allocate_segment(struct thread *td,
104    struct shmget_args *uap, int mode);
105static int shmget_existing(struct thread *td, struct shmget_args *uap,
106    int mode, int segnum);
107
108#define	SHMSEG_FREE     	0x0200
109#define	SHMSEG_REMOVED  	0x0400
110#define	SHMSEG_ALLOCATED	0x0800
111#define	SHMSEG_WANTED		0x1000
112
113static int shm_last_free, shm_nused, shmalloced;
114vm_size_t shm_committed;
115static struct shmid_kernel	*shmsegs;
116
117struct shmmap_state {
118	vm_offset_t va;
119	int shmid;
120};
121
122static void shm_deallocate_segment(struct shmid_kernel *);
123static int shm_find_segment_by_key(key_t);
124static struct shmid_kernel *shm_find_segment_by_shmid(int);
125static struct shmid_kernel *shm_find_segment_by_shmidx(int);
126static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
127static void shmrealloc(void);
128static int shminit(void);
129static int sysvshm_modload(struct module *, int, void *);
130static int shmunload(void);
131static void shmexit_myhook(struct vmspace *vm);
132static void shmfork_myhook(struct proc *p1, struct proc *p2);
133static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
134
135/*
136 * Tuneable values.
137 */
138#ifndef SHMMAXPGS
139#define	SHMMAXPGS	131072	/* Note: sysv shared memory is swap backed. */
140#endif
141#ifndef SHMMAX
142#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
143#endif
144#ifndef SHMMIN
145#define	SHMMIN	1
146#endif
147#ifndef SHMMNI
148#define	SHMMNI	192
149#endif
150#ifndef SHMSEG
151#define	SHMSEG	128
152#endif
153#ifndef SHMALL
154#define	SHMALL	(SHMMAXPGS)
155#endif
156
157struct	shminfo shminfo = {
158	SHMMAX,
159	SHMMIN,
160	SHMMNI,
161	SHMSEG,
162	SHMALL
163};
164
165static int shm_use_phys;
166static int shm_allow_removed;
167
168SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
169    "Maximum shared memory segment size");
170SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
171    "Minimum shared memory segment size");
172SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
173    "Number of shared memory identifiers");
174SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
175    "Number of segments per process");
176SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
177    "Maximum number of pages available for shared memory");
178SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
179    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
180SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
181    &shm_allow_removed, 0,
182    "Enable/Disable attachment to attached segments marked for removal");
183SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD,
184    NULL, 0, sysctl_shmsegs, "",
185    "Current number of shared memory segments allocated");
186
187static int
188shm_find_segment_by_key(key)
189	key_t key;
190{
191	int i;
192
193	for (i = 0; i < shmalloced; i++)
194		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
195		    shmsegs[i].u.shm_perm.key == key)
196			return (i);
197	return (-1);
198}
199
200static struct shmid_kernel *
201shm_find_segment_by_shmid(int shmid)
202{
203	int segnum;
204	struct shmid_kernel *shmseg;
205
206	segnum = IPCID_TO_IX(shmid);
207	if (segnum < 0 || segnum >= shmalloced)
208		return (NULL);
209	shmseg = &shmsegs[segnum];
210	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
211	    (!shm_allow_removed &&
212	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
213	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
214		return (NULL);
215	return (shmseg);
216}
217
218static struct shmid_kernel *
219shm_find_segment_by_shmidx(int segnum)
220{
221	struct shmid_kernel *shmseg;
222
223	if (segnum < 0 || segnum >= shmalloced)
224		return (NULL);
225	shmseg = &shmsegs[segnum];
226	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
227	    (!shm_allow_removed &&
228	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
229		return (NULL);
230	return (shmseg);
231}
232
233static void
234shm_deallocate_segment(shmseg)
235	struct shmid_kernel *shmseg;
236{
237	vm_size_t size;
238
239	GIANT_REQUIRED;
240
241	vm_object_deallocate(shmseg->object);
242	shmseg->object = NULL;
243	size = round_page(shmseg->u.shm_segsz);
244	shm_committed -= btoc(size);
245	shm_nused--;
246	shmseg->u.shm_perm.mode = SHMSEG_FREE;
247#ifdef MAC
248	mac_sysvshm_cleanup(shmseg);
249#endif
250	racct_sub_cred(shmseg->cred, RACCT_NSHM, 1);
251	racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size);
252	crfree(shmseg->cred);
253	shmseg->cred = NULL;
254}
255
256static int
257shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
258{
259	struct shmid_kernel *shmseg;
260	int segnum, result;
261	vm_size_t size;
262
263	GIANT_REQUIRED;
264
265	segnum = IPCID_TO_IX(shmmap_s->shmid);
266	shmseg = &shmsegs[segnum];
267	size = round_page(shmseg->u.shm_segsz);
268	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
269	if (result != KERN_SUCCESS)
270		return (EINVAL);
271	shmmap_s->shmid = -1;
272	shmseg->u.shm_dtime = time_second;
273	if ((--shmseg->u.shm_nattch <= 0) &&
274	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
275		shm_deallocate_segment(shmseg);
276		shm_last_free = segnum;
277	}
278	return (0);
279}
280
281#ifndef _SYS_SYSPROTO_H_
282struct shmdt_args {
283	const void *shmaddr;
284};
285#endif
286int
287sys_shmdt(td, uap)
288	struct thread *td;
289	struct shmdt_args *uap;
290{
291	struct proc *p = td->td_proc;
292	struct shmmap_state *shmmap_s;
293#ifdef MAC
294	struct shmid_kernel *shmsegptr;
295#endif
296	int i;
297	int error = 0;
298
299	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
300		return (ENOSYS);
301	mtx_lock(&Giant);
302	shmmap_s = p->p_vmspace->vm_shm;
303 	if (shmmap_s == NULL) {
304		error = EINVAL;
305		goto done2;
306	}
307	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
308		if (shmmap_s->shmid != -1 &&
309		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
310			break;
311		}
312	}
313	if (i == shminfo.shmseg) {
314		error = EINVAL;
315		goto done2;
316	}
317#ifdef MAC
318	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
319	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
320	if (error != 0)
321		goto done2;
322#endif
323	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
324done2:
325	mtx_unlock(&Giant);
326	return (error);
327}
328
329#ifndef _SYS_SYSPROTO_H_
330struct shmat_args {
331	int shmid;
332	const void *shmaddr;
333	int shmflg;
334};
335#endif
336int
337kern_shmat(td, shmid, shmaddr, shmflg)
338	struct thread *td;
339	int shmid;
340	const void *shmaddr;
341	int shmflg;
342{
343	struct proc *p = td->td_proc;
344	int i, flags;
345	struct shmid_kernel *shmseg;
346	struct shmmap_state *shmmap_s = NULL;
347	vm_offset_t attach_va;
348	vm_prot_t prot;
349	vm_size_t size;
350	int rv;
351	int error = 0;
352
353	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
354		return (ENOSYS);
355	mtx_lock(&Giant);
356	shmmap_s = p->p_vmspace->vm_shm;
357	if (shmmap_s == NULL) {
358		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
359		    M_SHM, M_WAITOK);
360		for (i = 0; i < shminfo.shmseg; i++)
361			shmmap_s[i].shmid = -1;
362		p->p_vmspace->vm_shm = shmmap_s;
363	}
364	shmseg = shm_find_segment_by_shmid(shmid);
365	if (shmseg == NULL) {
366		error = EINVAL;
367		goto done2;
368	}
369	error = ipcperm(td, &shmseg->u.shm_perm,
370	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
371	if (error)
372		goto done2;
373#ifdef MAC
374	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
375	if (error != 0)
376		goto done2;
377#endif
378	for (i = 0; i < shminfo.shmseg; i++) {
379		if (shmmap_s->shmid == -1)
380			break;
381		shmmap_s++;
382	}
383	if (i >= shminfo.shmseg) {
384		error = EMFILE;
385		goto done2;
386	}
387	size = round_page(shmseg->u.shm_segsz);
388	prot = VM_PROT_READ;
389	if ((shmflg & SHM_RDONLY) == 0)
390		prot |= VM_PROT_WRITE;
391	flags = MAP_ANON | MAP_SHARED;
392	if (shmaddr) {
393		flags |= MAP_FIXED;
394		if (shmflg & SHM_RND) {
395			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
396		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
397			attach_va = (vm_offset_t)shmaddr;
398		} else {
399			error = EINVAL;
400			goto done2;
401		}
402	} else {
403		/*
404		 * This is just a hint to vm_map_find() about where to
405		 * put it.
406		 */
407		PROC_LOCK(p);
408		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
409		    lim_max(p, RLIMIT_DATA));
410		PROC_UNLOCK(p);
411	}
412
413	vm_object_reference(shmseg->object);
414	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
415	    0, &attach_va, size, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
416	    VMFS_ANY_SPACE, prot, prot, 0);
417	if (rv != KERN_SUCCESS) {
418		vm_object_deallocate(shmseg->object);
419		error = ENOMEM;
420		goto done2;
421	}
422	vm_map_inherit(&p->p_vmspace->vm_map,
423		attach_va, attach_va + size, VM_INHERIT_SHARE);
424
425	shmmap_s->va = attach_va;
426	shmmap_s->shmid = shmid;
427	shmseg->u.shm_lpid = p->p_pid;
428	shmseg->u.shm_atime = time_second;
429	shmseg->u.shm_nattch++;
430	td->td_retval[0] = attach_va;
431done2:
432	mtx_unlock(&Giant);
433	return (error);
434}
435
436int
437sys_shmat(td, uap)
438	struct thread *td;
439	struct shmat_args *uap;
440{
441	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
442}
443
444int
445kern_shmctl(td, shmid, cmd, buf, bufsz)
446	struct thread *td;
447	int shmid;
448	int cmd;
449	void *buf;
450	size_t *bufsz;
451{
452	int error = 0;
453	struct shmid_kernel *shmseg;
454
455	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
456		return (ENOSYS);
457
458	mtx_lock(&Giant);
459	switch (cmd) {
460	/*
461	 * It is possible that kern_shmctl is being called from the Linux ABI
462	 * layer, in which case, we will need to implement IPC_INFO.  It should
463	 * be noted that other shmctl calls will be funneled through here for
464	 * Linix binaries as well.
465	 *
466	 * NB: The Linux ABI layer will convert this data to structure(s) more
467	 * consistent with the Linux ABI.
468	 */
469	case IPC_INFO:
470		memcpy(buf, &shminfo, sizeof(shminfo));
471		if (bufsz)
472			*bufsz = sizeof(shminfo);
473		td->td_retval[0] = shmalloced;
474		goto done2;
475	case SHM_INFO: {
476		struct shm_info shm_info;
477		shm_info.used_ids = shm_nused;
478		shm_info.shm_rss = 0;	/*XXX where to get from ? */
479		shm_info.shm_tot = 0;	/*XXX where to get from ? */
480		shm_info.shm_swp = 0;	/*XXX where to get from ? */
481		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
482		shm_info.swap_successes = 0;	/*XXX where to get from ? */
483		memcpy(buf, &shm_info, sizeof(shm_info));
484		if (bufsz)
485			*bufsz = sizeof(shm_info);
486		td->td_retval[0] = shmalloced;
487		goto done2;
488	}
489	}
490	if (cmd == SHM_STAT)
491		shmseg = shm_find_segment_by_shmidx(shmid);
492	else
493		shmseg = shm_find_segment_by_shmid(shmid);
494	if (shmseg == NULL) {
495		error = EINVAL;
496		goto done2;
497	}
498#ifdef MAC
499	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
500	if (error != 0)
501		goto done2;
502#endif
503	switch (cmd) {
504	case SHM_STAT:
505	case IPC_STAT:
506		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
507		if (error)
508			goto done2;
509		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
510		if (bufsz)
511			*bufsz = sizeof(struct shmid_ds);
512		if (cmd == SHM_STAT)
513			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
514		break;
515	case IPC_SET: {
516		struct shmid_ds *shmid;
517
518		shmid = (struct shmid_ds *)buf;
519		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
520		if (error)
521			goto done2;
522		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
523		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
524		shmseg->u.shm_perm.mode =
525		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
526		    (shmid->shm_perm.mode & ACCESSPERMS);
527		shmseg->u.shm_ctime = time_second;
528		break;
529	}
530	case IPC_RMID:
531		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
532		if (error)
533			goto done2;
534		shmseg->u.shm_perm.key = IPC_PRIVATE;
535		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
536		if (shmseg->u.shm_nattch <= 0) {
537			shm_deallocate_segment(shmseg);
538			shm_last_free = IPCID_TO_IX(shmid);
539		}
540		break;
541#if 0
542	case SHM_LOCK:
543	case SHM_UNLOCK:
544#endif
545	default:
546		error = EINVAL;
547		break;
548	}
549done2:
550	mtx_unlock(&Giant);
551	return (error);
552}
553
554#ifndef _SYS_SYSPROTO_H_
555struct shmctl_args {
556	int shmid;
557	int cmd;
558	struct shmid_ds *buf;
559};
560#endif
561int
562sys_shmctl(td, uap)
563	struct thread *td;
564	struct shmctl_args *uap;
565{
566	int error = 0;
567	struct shmid_ds buf;
568	size_t bufsz;
569
570	/*
571	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
572	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
573	 * return an error back to the user since we do not to support this.
574	 */
575	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
576	    uap->cmd == SHM_STAT)
577		return (EINVAL);
578
579	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
580	if (uap->cmd == IPC_SET) {
581		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
582			goto done;
583	}
584
585	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
586	if (error)
587		goto done;
588
589	/* Cases in which we need to copyout */
590	switch (uap->cmd) {
591	case IPC_STAT:
592		error = copyout(&buf, uap->buf, bufsz);
593		break;
594	}
595
596done:
597	if (error) {
598		/* Invalidate the return value */
599		td->td_retval[0] = -1;
600	}
601	return (error);
602}
603
604
605static int
606shmget_existing(td, uap, mode, segnum)
607	struct thread *td;
608	struct shmget_args *uap;
609	int mode;
610	int segnum;
611{
612	struct shmid_kernel *shmseg;
613	int error;
614
615	shmseg = &shmsegs[segnum];
616	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
617		/*
618		 * This segment is in the process of being allocated.  Wait
619		 * until it's done, and look the key up again (in case the
620		 * allocation failed or it was freed).
621		 */
622		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
623		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
624		if (error)
625			return (error);
626		return (EAGAIN);
627	}
628	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
629		return (EEXIST);
630#ifdef MAC
631	error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg);
632	if (error != 0)
633		return (error);
634#endif
635	if (uap->size != 0 && uap->size > shmseg->u.shm_segsz)
636		return (EINVAL);
637	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
638	return (0);
639}
640
641static int
642shmget_allocate_segment(td, uap, mode)
643	struct thread *td;
644	struct shmget_args *uap;
645	int mode;
646{
647	int i, segnum, shmid;
648	size_t size;
649	struct ucred *cred = td->td_ucred;
650	struct shmid_kernel *shmseg;
651	vm_object_t shm_object;
652
653	GIANT_REQUIRED;
654
655	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
656		return (EINVAL);
657	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
658		return (ENOSPC);
659	size = round_page(uap->size);
660	if (shm_committed + btoc(size) > shminfo.shmall)
661		return (ENOMEM);
662	if (shm_last_free < 0) {
663		shmrealloc();	/* Maybe expand the shmsegs[] array. */
664		for (i = 0; i < shmalloced; i++)
665			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
666				break;
667		if (i == shmalloced)
668			return (ENOSPC);
669		segnum = i;
670	} else  {
671		segnum = shm_last_free;
672		shm_last_free = -1;
673	}
674	shmseg = &shmsegs[segnum];
675#ifdef RACCT
676	PROC_LOCK(td->td_proc);
677	if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
678		PROC_UNLOCK(td->td_proc);
679		return (ENOSPC);
680	}
681	if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
682		racct_sub(td->td_proc, RACCT_NSHM, 1);
683		PROC_UNLOCK(td->td_proc);
684		return (ENOMEM);
685	}
686	PROC_UNLOCK(td->td_proc);
687#endif
688	/*
689	 * In case we sleep in malloc(), mark the segment present but deleted
690	 * so that noone else tries to create the same key.
691	 */
692	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
693	shmseg->u.shm_perm.key = uap->key;
694	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
695	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
696
697	/*
698	 * We make sure that we have allocated a pager before we need
699	 * to.
700	 */
701	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
702	    0, size, VM_PROT_DEFAULT, 0, cred);
703	if (shm_object == NULL) {
704#ifdef RACCT
705		PROC_LOCK(td->td_proc);
706		racct_sub(td->td_proc, RACCT_NSHM, 1);
707		racct_sub(td->td_proc, RACCT_SHMSIZE, size);
708		PROC_UNLOCK(td->td_proc);
709#endif
710		return (ENOMEM);
711	}
712	VM_OBJECT_LOCK(shm_object);
713	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
714	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
715	VM_OBJECT_UNLOCK(shm_object);
716
717	shmseg->object = shm_object;
718	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
719	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
720	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
721	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
722	shmseg->cred = crhold(cred);
723	shmseg->u.shm_segsz = uap->size;
724	shmseg->u.shm_cpid = td->td_proc->p_pid;
725	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
726	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
727#ifdef MAC
728	mac_sysvshm_create(cred, shmseg);
729#endif
730	shmseg->u.shm_ctime = time_second;
731	shm_committed += btoc(size);
732	shm_nused++;
733	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
734		/*
735		 * Somebody else wanted this key while we were asleep.  Wake
736		 * them up now.
737		 */
738		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
739		wakeup(shmseg);
740	}
741	td->td_retval[0] = shmid;
742	return (0);
743}
744
745#ifndef _SYS_SYSPROTO_H_
746struct shmget_args {
747	key_t key;
748	size_t size;
749	int shmflg;
750};
751#endif
752int
753sys_shmget(td, uap)
754	struct thread *td;
755	struct shmget_args *uap;
756{
757	int segnum, mode;
758	int error;
759
760	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
761		return (ENOSYS);
762	mtx_lock(&Giant);
763	mode = uap->shmflg & ACCESSPERMS;
764	if (uap->key != IPC_PRIVATE) {
765	again:
766		segnum = shm_find_segment_by_key(uap->key);
767		if (segnum >= 0) {
768			error = shmget_existing(td, uap, mode, segnum);
769			if (error == EAGAIN)
770				goto again;
771			goto done2;
772		}
773		if ((uap->shmflg & IPC_CREAT) == 0) {
774			error = ENOENT;
775			goto done2;
776		}
777	}
778	error = shmget_allocate_segment(td, uap, mode);
779done2:
780	mtx_unlock(&Giant);
781	return (error);
782}
783
784static void
785shmfork_myhook(p1, p2)
786	struct proc *p1, *p2;
787{
788	struct shmmap_state *shmmap_s;
789	size_t size;
790	int i;
791
792	mtx_lock(&Giant);
793	size = shminfo.shmseg * sizeof(struct shmmap_state);
794	shmmap_s = malloc(size, M_SHM, M_WAITOK);
795	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
796	p2->p_vmspace->vm_shm = shmmap_s;
797	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
798		if (shmmap_s->shmid != -1)
799			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
800	mtx_unlock(&Giant);
801}
802
803static void
804shmexit_myhook(struct vmspace *vm)
805{
806	struct shmmap_state *base, *shm;
807	int i;
808
809	if ((base = vm->vm_shm) != NULL) {
810		vm->vm_shm = NULL;
811		mtx_lock(&Giant);
812		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
813			if (shm->shmid != -1)
814				shm_delete_mapping(vm, shm);
815		}
816		mtx_unlock(&Giant);
817		free(base, M_SHM);
818	}
819}
820
821static void
822shmrealloc(void)
823{
824	int i;
825	struct shmid_kernel *newsegs;
826
827	if (shmalloced >= shminfo.shmmni)
828		return;
829
830	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
831	if (newsegs == NULL)
832		return;
833	for (i = 0; i < shmalloced; i++)
834		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
835	for (; i < shminfo.shmmni; i++) {
836		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
837		shmsegs[i].u.shm_perm.seq = 0;
838#ifdef MAC
839		mac_sysvshm_init(&shmsegs[i]);
840#endif
841	}
842	free(shmsegs, M_SHM);
843	shmsegs = newsegs;
844	shmalloced = shminfo.shmmni;
845}
846
847static struct syscall_helper_data shm_syscalls[] = {
848	SYSCALL_INIT_HELPER(shmat),
849	SYSCALL_INIT_HELPER(shmctl),
850	SYSCALL_INIT_HELPER(shmdt),
851	SYSCALL_INIT_HELPER(shmget),
852#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
853    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
854	SYSCALL_INIT_HELPER_COMPAT(freebsd7_shmctl),
855#endif
856#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
857	SYSCALL_INIT_HELPER(shmsys),
858#endif
859	SYSCALL_INIT_LAST
860};
861
862#ifdef COMPAT_FREEBSD32
863#include <compat/freebsd32/freebsd32.h>
864#include <compat/freebsd32/freebsd32_ipc.h>
865#include <compat/freebsd32/freebsd32_proto.h>
866#include <compat/freebsd32/freebsd32_signal.h>
867#include <compat/freebsd32/freebsd32_syscall.h>
868#include <compat/freebsd32/freebsd32_util.h>
869
870static struct syscall_helper_data shm32_syscalls[] = {
871	SYSCALL32_INIT_HELPER_COMPAT(shmat),
872	SYSCALL32_INIT_HELPER_COMPAT(shmdt),
873	SYSCALL32_INIT_HELPER_COMPAT(shmget),
874	SYSCALL32_INIT_HELPER(freebsd32_shmsys),
875	SYSCALL32_INIT_HELPER(freebsd32_shmctl),
876#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
877    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
878	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl),
879#endif
880	SYSCALL_INIT_LAST
881};
882#endif
883
884static int
885shminit()
886{
887	int i, error;
888
889#ifndef BURN_BRIDGES
890	if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
891		printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n");
892#endif
893	TUNABLE_ULONG_FETCH("kern.ipc.shmall", &shminfo.shmall);
894	if (!TUNABLE_ULONG_FETCH("kern.ipc.shmmax", &shminfo.shmmax)) {
895		/* Initialize shmmax dealing with possible overflow. */
896		for (i = PAGE_SIZE; i > 0; i--) {
897			shminfo.shmmax = shminfo.shmall * i;
898			if (shminfo.shmmax >= shminfo.shmall)
899				break;
900		}
901	}
902	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
903	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
904	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
905	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
906
907	shmalloced = shminfo.shmmni;
908	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
909	for (i = 0; i < shmalloced; i++) {
910		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
911		shmsegs[i].u.shm_perm.seq = 0;
912#ifdef MAC
913		mac_sysvshm_init(&shmsegs[i]);
914#endif
915	}
916	shm_last_free = 0;
917	shm_nused = 0;
918	shm_committed = 0;
919	shmexit_hook = &shmexit_myhook;
920	shmfork_hook = &shmfork_myhook;
921
922	error = syscall_helper_register(shm_syscalls);
923	if (error != 0)
924		return (error);
925#ifdef COMPAT_FREEBSD32
926	error = syscall32_helper_register(shm32_syscalls);
927	if (error != 0)
928		return (error);
929#endif
930	return (0);
931}
932
933static int
934shmunload()
935{
936	int i;
937
938	if (shm_nused > 0)
939		return (EBUSY);
940
941#ifdef COMPAT_FREEBSD32
942	syscall32_helper_unregister(shm32_syscalls);
943#endif
944	syscall_helper_unregister(shm_syscalls);
945
946	for (i = 0; i < shmalloced; i++) {
947#ifdef MAC
948		mac_sysvshm_destroy(&shmsegs[i]);
949#endif
950		/*
951		 * Objects might be still mapped into the processes
952		 * address spaces.  Actual free would happen on the
953		 * last mapping destruction.
954		 */
955		if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE)
956			vm_object_deallocate(shmsegs[i].object);
957	}
958	free(shmsegs, M_SHM);
959	shmexit_hook = NULL;
960	shmfork_hook = NULL;
961	return (0);
962}
963
964static int
965sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
966{
967
968	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
969}
970
971#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
972struct oshmid_ds {
973	struct	ipc_perm_old shm_perm;	/* operation perms */
974	int	shm_segsz;		/* size of segment (bytes) */
975	u_short	shm_cpid;		/* pid, creator */
976	u_short	shm_lpid;		/* pid, last operation */
977	short	shm_nattch;		/* no. of current attaches */
978	time_t	shm_atime;		/* last attach time */
979	time_t	shm_dtime;		/* last detach time */
980	time_t	shm_ctime;		/* last change time */
981	void	*shm_handle;		/* internal handle for shm segment */
982};
983
984struct oshmctl_args {
985	int shmid;
986	int cmd;
987	struct oshmid_ds *ubuf;
988};
989
990static int
991oshmctl(struct thread *td, struct oshmctl_args *uap)
992{
993#ifdef COMPAT_43
994	int error = 0;
995	struct shmid_kernel *shmseg;
996	struct oshmid_ds outbuf;
997
998	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
999		return (ENOSYS);
1000	mtx_lock(&Giant);
1001	shmseg = shm_find_segment_by_shmid(uap->shmid);
1002	if (shmseg == NULL) {
1003		error = EINVAL;
1004		goto done2;
1005	}
1006	switch (uap->cmd) {
1007	case IPC_STAT:
1008		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
1009		if (error)
1010			goto done2;
1011#ifdef MAC
1012		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
1013		if (error != 0)
1014			goto done2;
1015#endif
1016		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
1017		outbuf.shm_segsz = shmseg->u.shm_segsz;
1018		outbuf.shm_cpid = shmseg->u.shm_cpid;
1019		outbuf.shm_lpid = shmseg->u.shm_lpid;
1020		outbuf.shm_nattch = shmseg->u.shm_nattch;
1021		outbuf.shm_atime = shmseg->u.shm_atime;
1022		outbuf.shm_dtime = shmseg->u.shm_dtime;
1023		outbuf.shm_ctime = shmseg->u.shm_ctime;
1024		outbuf.shm_handle = shmseg->object;
1025		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
1026		if (error)
1027			goto done2;
1028		break;
1029	default:
1030		error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
1031		break;
1032	}
1033done2:
1034	mtx_unlock(&Giant);
1035	return (error);
1036#else
1037	return (EINVAL);
1038#endif
1039}
1040
1041/* XXX casting to (sy_call_t *) is bogus, as usual. */
1042static sy_call_t *shmcalls[] = {
1043	(sy_call_t *)sys_shmat, (sy_call_t *)oshmctl,
1044	(sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget,
1045	(sy_call_t *)freebsd7_shmctl
1046};
1047
1048int
1049sys_shmsys(td, uap)
1050	struct thread *td;
1051	/* XXX actually varargs. */
1052	struct shmsys_args /* {
1053		int	which;
1054		int	a2;
1055		int	a3;
1056		int	a4;
1057	} */ *uap;
1058{
1059	int error;
1060
1061	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1062		return (ENOSYS);
1063	if (uap->which < 0 ||
1064	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
1065		return (EINVAL);
1066	mtx_lock(&Giant);
1067	error = (*shmcalls[uap->which])(td, &uap->a2);
1068	mtx_unlock(&Giant);
1069	return (error);
1070}
1071
1072#endif	/* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */
1073
1074#ifdef COMPAT_FREEBSD32
1075
1076int
1077freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap)
1078{
1079
1080#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1081    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1082	switch (uap->which) {
1083	case 0:	{	/* shmat */
1084		struct shmat_args ap;
1085
1086		ap.shmid = uap->a2;
1087		ap.shmaddr = PTRIN(uap->a3);
1088		ap.shmflg = uap->a4;
1089		return (sysent[SYS_shmat].sy_call(td, &ap));
1090	}
1091	case 2: {	/* shmdt */
1092		struct shmdt_args ap;
1093
1094		ap.shmaddr = PTRIN(uap->a2);
1095		return (sysent[SYS_shmdt].sy_call(td, &ap));
1096	}
1097	case 3: {	/* shmget */
1098		struct shmget_args ap;
1099
1100		ap.key = uap->a2;
1101		ap.size = uap->a3;
1102		ap.shmflg = uap->a4;
1103		return (sysent[SYS_shmget].sy_call(td, &ap));
1104	}
1105	case 4: {	/* shmctl */
1106		struct freebsd7_freebsd32_shmctl_args ap;
1107
1108		ap.shmid = uap->a2;
1109		ap.cmd = uap->a3;
1110		ap.buf = PTRIN(uap->a4);
1111		return (freebsd7_freebsd32_shmctl(td, &ap));
1112	}
1113	case 1:		/* oshmctl */
1114	default:
1115		return (EINVAL);
1116	}
1117#else
1118	return (nosys(td, NULL));
1119#endif
1120}
1121
1122#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1123    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1124int
1125freebsd7_freebsd32_shmctl(struct thread *td,
1126    struct freebsd7_freebsd32_shmctl_args *uap)
1127{
1128	int error = 0;
1129	union {
1130		struct shmid_ds shmid_ds;
1131		struct shm_info shm_info;
1132		struct shminfo shminfo;
1133	} u;
1134	union {
1135		struct shmid_ds32_old shmid_ds32;
1136		struct shm_info32 shm_info32;
1137		struct shminfo32 shminfo32;
1138	} u32;
1139	size_t sz;
1140
1141	if (uap->cmd == IPC_SET) {
1142		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1143		    sizeof(u32.shmid_ds32))))
1144			goto done;
1145		freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm,
1146		    &u.shmid_ds.shm_perm);
1147		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1148		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1149		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1150		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1151		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1152		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1153		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1154	}
1155
1156	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1157	if (error)
1158		goto done;
1159
1160	/* Cases in which we need to copyout */
1161	switch (uap->cmd) {
1162	case IPC_INFO:
1163		CP(u.shminfo, u32.shminfo32, shmmax);
1164		CP(u.shminfo, u32.shminfo32, shmmin);
1165		CP(u.shminfo, u32.shminfo32, shmmni);
1166		CP(u.shminfo, u32.shminfo32, shmseg);
1167		CP(u.shminfo, u32.shminfo32, shmall);
1168		error = copyout(&u32.shminfo32, uap->buf,
1169		    sizeof(u32.shminfo32));
1170		break;
1171	case SHM_INFO:
1172		CP(u.shm_info, u32.shm_info32, used_ids);
1173		CP(u.shm_info, u32.shm_info32, shm_rss);
1174		CP(u.shm_info, u32.shm_info32, shm_tot);
1175		CP(u.shm_info, u32.shm_info32, shm_swp);
1176		CP(u.shm_info, u32.shm_info32, swap_attempts);
1177		CP(u.shm_info, u32.shm_info32, swap_successes);
1178		error = copyout(&u32.shm_info32, uap->buf,
1179		    sizeof(u32.shm_info32));
1180		break;
1181	case SHM_STAT:
1182	case IPC_STAT:
1183		freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm,
1184		    &u32.shmid_ds32.shm_perm);
1185		if (u.shmid_ds.shm_segsz > INT32_MAX)
1186			u32.shmid_ds32.shm_segsz = INT32_MAX;
1187		else
1188			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1189		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1190		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1191		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1192		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1193		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1194		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1195		u32.shmid_ds32.shm_internal = 0;
1196		error = copyout(&u32.shmid_ds32, uap->buf,
1197		    sizeof(u32.shmid_ds32));
1198		break;
1199	}
1200
1201done:
1202	if (error) {
1203		/* Invalidate the return value */
1204		td->td_retval[0] = -1;
1205	}
1206	return (error);
1207}
1208#endif
1209
1210int
1211freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap)
1212{
1213	int error = 0;
1214	union {
1215		struct shmid_ds shmid_ds;
1216		struct shm_info shm_info;
1217		struct shminfo shminfo;
1218	} u;
1219	union {
1220		struct shmid_ds32 shmid_ds32;
1221		struct shm_info32 shm_info32;
1222		struct shminfo32 shminfo32;
1223	} u32;
1224	size_t sz;
1225
1226	if (uap->cmd == IPC_SET) {
1227		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1228		    sizeof(u32.shmid_ds32))))
1229			goto done;
1230		freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm,
1231		    &u.shmid_ds.shm_perm);
1232		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1233		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1234		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1235		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1236		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1237		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1238		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1239	}
1240
1241	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1242	if (error)
1243		goto done;
1244
1245	/* Cases in which we need to copyout */
1246	switch (uap->cmd) {
1247	case IPC_INFO:
1248		CP(u.shminfo, u32.shminfo32, shmmax);
1249		CP(u.shminfo, u32.shminfo32, shmmin);
1250		CP(u.shminfo, u32.shminfo32, shmmni);
1251		CP(u.shminfo, u32.shminfo32, shmseg);
1252		CP(u.shminfo, u32.shminfo32, shmall);
1253		error = copyout(&u32.shminfo32, uap->buf,
1254		    sizeof(u32.shminfo32));
1255		break;
1256	case SHM_INFO:
1257		CP(u.shm_info, u32.shm_info32, used_ids);
1258		CP(u.shm_info, u32.shm_info32, shm_rss);
1259		CP(u.shm_info, u32.shm_info32, shm_tot);
1260		CP(u.shm_info, u32.shm_info32, shm_swp);
1261		CP(u.shm_info, u32.shm_info32, swap_attempts);
1262		CP(u.shm_info, u32.shm_info32, swap_successes);
1263		error = copyout(&u32.shm_info32, uap->buf,
1264		    sizeof(u32.shm_info32));
1265		break;
1266	case SHM_STAT:
1267	case IPC_STAT:
1268		freebsd32_ipcperm_out(&u.shmid_ds.shm_perm,
1269		    &u32.shmid_ds32.shm_perm);
1270		if (u.shmid_ds.shm_segsz > INT32_MAX)
1271			u32.shmid_ds32.shm_segsz = INT32_MAX;
1272		else
1273			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1274		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1275		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1276		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1277		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1278		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1279		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1280		error = copyout(&u32.shmid_ds32, uap->buf,
1281		    sizeof(u32.shmid_ds32));
1282		break;
1283	}
1284
1285done:
1286	if (error) {
1287		/* Invalidate the return value */
1288		td->td_retval[0] = -1;
1289	}
1290	return (error);
1291}
1292#endif
1293
1294#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1295    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1296
1297#ifndef CP
1298#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1299#endif
1300
1301#ifndef _SYS_SYSPROTO_H_
1302struct freebsd7_shmctl_args {
1303	int shmid;
1304	int cmd;
1305	struct shmid_ds_old *buf;
1306};
1307#endif
1308int
1309freebsd7_shmctl(td, uap)
1310	struct thread *td;
1311	struct freebsd7_shmctl_args *uap;
1312{
1313	int error = 0;
1314	struct shmid_ds_old old;
1315	struct shmid_ds buf;
1316	size_t bufsz;
1317
1318	/*
1319	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
1320	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
1321	 * return an error back to the user since we do not to support this.
1322	 */
1323	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
1324	    uap->cmd == SHM_STAT)
1325		return (EINVAL);
1326
1327	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
1328	if (uap->cmd == IPC_SET) {
1329		if ((error = copyin(uap->buf, &old, sizeof(old))))
1330			goto done;
1331		ipcperm_old2new(&old.shm_perm, &buf.shm_perm);
1332		CP(old, buf, shm_segsz);
1333		CP(old, buf, shm_lpid);
1334		CP(old, buf, shm_cpid);
1335		CP(old, buf, shm_nattch);
1336		CP(old, buf, shm_atime);
1337		CP(old, buf, shm_dtime);
1338		CP(old, buf, shm_ctime);
1339	}
1340
1341	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
1342	if (error)
1343		goto done;
1344
1345	/* Cases in which we need to copyout */
1346	switch (uap->cmd) {
1347	case IPC_STAT:
1348		ipcperm_new2old(&buf.shm_perm, &old.shm_perm);
1349		if (buf.shm_segsz > INT_MAX)
1350			old.shm_segsz = INT_MAX;
1351		else
1352			CP(buf, old, shm_segsz);
1353		CP(buf, old, shm_lpid);
1354		CP(buf, old, shm_cpid);
1355		if (buf.shm_nattch > SHRT_MAX)
1356			old.shm_nattch = SHRT_MAX;
1357		else
1358			CP(buf, old, shm_nattch);
1359		CP(buf, old, shm_atime);
1360		CP(buf, old, shm_dtime);
1361		CP(buf, old, shm_ctime);
1362		old.shm_internal = NULL;
1363		error = copyout(&old, uap->buf, sizeof(old));
1364		break;
1365	}
1366
1367done:
1368	if (error) {
1369		/* Invalidate the return value */
1370		td->td_retval[0] = -1;
1371	}
1372	return (error);
1373}
1374
1375#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1376	   COMPAT_FREEBSD7 */
1377
1378static int
1379sysvshm_modload(struct module *module, int cmd, void *arg)
1380{
1381	int error = 0;
1382
1383	switch (cmd) {
1384	case MOD_LOAD:
1385		error = shminit();
1386		if (error != 0)
1387			shmunload();
1388		break;
1389	case MOD_UNLOAD:
1390		error = shmunload();
1391		break;
1392	case MOD_SHUTDOWN:
1393		break;
1394	default:
1395		error = EINVAL;
1396		break;
1397	}
1398	return (error);
1399}
1400
1401static moduledata_t sysvshm_mod = {
1402	"sysvshm",
1403	&sysvshm_modload,
1404	NULL
1405};
1406
1407DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1408MODULE_VERSION(sysvshm, 1);
1409