1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD$");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/limits.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/racct.h>
81#include <sys/resourcevar.h>
82#include <sys/rwlock.h>
83#include <sys/stat.h>
84#include <sys/syscall.h>
85#include <sys/syscallsubr.h>
86#include <sys/sysent.h>
87#include <sys/sysproto.h>
88#include <sys/jail.h>
89
90#include <security/mac/mac_framework.h>
91
92#include <vm/vm.h>
93#include <vm/vm_param.h>
94#include <vm/pmap.h>
95#include <vm/vm_object.h>
96#include <vm/vm_map.h>
97#include <vm/vm_page.h>
98#include <vm/vm_pager.h>
99
100FEATURE(sysv_shm, "System V shared memory segments support");
101
102static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
103
104static int shmget_allocate_segment(struct thread *td,
105    struct shmget_args *uap, int mode);
106static int shmget_existing(struct thread *td, struct shmget_args *uap,
107    int mode, int segnum);
108
109#define	SHMSEG_FREE     	0x0200
110#define	SHMSEG_REMOVED  	0x0400
111#define	SHMSEG_ALLOCATED	0x0800
112#define	SHMSEG_WANTED		0x1000
113
114static int shm_last_free, shm_nused, shmalloced;
115vm_size_t shm_committed;
116static struct shmid_kernel	*shmsegs;
117
118struct shmmap_state {
119	vm_offset_t va;
120	int shmid;
121};
122
123static void shm_deallocate_segment(struct shmid_kernel *);
124static int shm_find_segment_by_key(key_t);
125static struct shmid_kernel *shm_find_segment_by_shmid(int);
126static struct shmid_kernel *shm_find_segment_by_shmidx(int);
127static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
128static void shmrealloc(void);
129static int shminit(void);
130static int sysvshm_modload(struct module *, int, void *);
131static int shmunload(void);
132static void shmexit_myhook(struct vmspace *vm);
133static void shmfork_myhook(struct proc *p1, struct proc *p2);
134static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
135
136/*
137 * Tuneable values.
138 */
139#ifndef SHMMAXPGS
140#define	SHMMAXPGS	131072	/* Note: sysv shared memory is swap backed. */
141#endif
142#ifndef SHMMAX
143#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
144#endif
145#ifndef SHMMIN
146#define	SHMMIN	1
147#endif
148#ifndef SHMMNI
149#define	SHMMNI	192
150#endif
151#ifndef SHMSEG
152#define	SHMSEG	128
153#endif
154#ifndef SHMALL
155#define	SHMALL	(SHMMAXPGS)
156#endif
157
158struct	shminfo shminfo = {
159	SHMMAX,
160	SHMMIN,
161	SHMMNI,
162	SHMSEG,
163	SHMALL
164};
165
166static int shm_use_phys;
167static int shm_allow_removed;
168
169SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
170    "Maximum shared memory segment size");
171SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
172    "Minimum shared memory segment size");
173SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
174    "Number of shared memory identifiers");
175SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
176    "Number of segments per process");
177SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
178    "Maximum number of pages available for shared memory");
179SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
180    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
181SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
182    &shm_allow_removed, 0,
183    "Enable/Disable attachment to attached segments marked for removal");
184SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD,
185    NULL, 0, sysctl_shmsegs, "",
186    "Current number of shared memory segments allocated");
187
188static int
189shm_find_segment_by_key(key)
190	key_t key;
191{
192	int i;
193
194	for (i = 0; i < shmalloced; i++)
195		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
196		    shmsegs[i].u.shm_perm.key == key)
197			return (i);
198	return (-1);
199}
200
201static struct shmid_kernel *
202shm_find_segment_by_shmid(int shmid)
203{
204	int segnum;
205	struct shmid_kernel *shmseg;
206
207	segnum = IPCID_TO_IX(shmid);
208	if (segnum < 0 || segnum >= shmalloced)
209		return (NULL);
210	shmseg = &shmsegs[segnum];
211	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
212	    (!shm_allow_removed &&
213	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
214	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
215		return (NULL);
216	return (shmseg);
217}
218
219static struct shmid_kernel *
220shm_find_segment_by_shmidx(int segnum)
221{
222	struct shmid_kernel *shmseg;
223
224	if (segnum < 0 || segnum >= shmalloced)
225		return (NULL);
226	shmseg = &shmsegs[segnum];
227	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
228	    (!shm_allow_removed &&
229	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
230		return (NULL);
231	return (shmseg);
232}
233
234static void
235shm_deallocate_segment(shmseg)
236	struct shmid_kernel *shmseg;
237{
238	vm_size_t size;
239
240	GIANT_REQUIRED;
241
242	vm_object_deallocate(shmseg->object);
243	shmseg->object = NULL;
244	size = round_page(shmseg->u.shm_segsz);
245	shm_committed -= btoc(size);
246	shm_nused--;
247	shmseg->u.shm_perm.mode = SHMSEG_FREE;
248#ifdef MAC
249	mac_sysvshm_cleanup(shmseg);
250#endif
251	racct_sub_cred(shmseg->cred, RACCT_NSHM, 1);
252	racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size);
253	crfree(shmseg->cred);
254	shmseg->cred = NULL;
255}
256
257static int
258shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
259{
260	struct shmid_kernel *shmseg;
261	int segnum, result;
262	vm_size_t size;
263
264	GIANT_REQUIRED;
265
266	segnum = IPCID_TO_IX(shmmap_s->shmid);
267	shmseg = &shmsegs[segnum];
268	size = round_page(shmseg->u.shm_segsz);
269	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
270	if (result != KERN_SUCCESS)
271		return (EINVAL);
272	shmmap_s->shmid = -1;
273	shmseg->u.shm_dtime = time_second;
274	if ((--shmseg->u.shm_nattch <= 0) &&
275	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
276		shm_deallocate_segment(shmseg);
277		shm_last_free = segnum;
278	}
279	return (0);
280}
281
282#ifndef _SYS_SYSPROTO_H_
283struct shmdt_args {
284	const void *shmaddr;
285};
286#endif
287int
288sys_shmdt(td, uap)
289	struct thread *td;
290	struct shmdt_args *uap;
291{
292	struct proc *p = td->td_proc;
293	struct shmmap_state *shmmap_s;
294#ifdef MAC
295	struct shmid_kernel *shmsegptr;
296#endif
297	int i;
298	int error = 0;
299
300	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
301		return (ENOSYS);
302	mtx_lock(&Giant);
303	shmmap_s = p->p_vmspace->vm_shm;
304 	if (shmmap_s == NULL) {
305		error = EINVAL;
306		goto done2;
307	}
308	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
309		if (shmmap_s->shmid != -1 &&
310		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
311			break;
312		}
313	}
314	if (i == shminfo.shmseg) {
315		error = EINVAL;
316		goto done2;
317	}
318#ifdef MAC
319	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
320	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
321	if (error != 0)
322		goto done2;
323#endif
324	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
325done2:
326	mtx_unlock(&Giant);
327	return (error);
328}
329
330#ifndef _SYS_SYSPROTO_H_
331struct shmat_args {
332	int shmid;
333	const void *shmaddr;
334	int shmflg;
335};
336#endif
337int
338kern_shmat(td, shmid, shmaddr, shmflg)
339	struct thread *td;
340	int shmid;
341	const void *shmaddr;
342	int shmflg;
343{
344	struct proc *p = td->td_proc;
345	int i, flags;
346	struct shmid_kernel *shmseg;
347	struct shmmap_state *shmmap_s = NULL;
348	vm_offset_t attach_va;
349	vm_prot_t prot;
350	vm_size_t size;
351	int rv;
352	int error = 0;
353
354	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
355		return (ENOSYS);
356	mtx_lock(&Giant);
357	shmmap_s = p->p_vmspace->vm_shm;
358	if (shmmap_s == NULL) {
359		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
360		    M_SHM, M_WAITOK);
361		for (i = 0; i < shminfo.shmseg; i++)
362			shmmap_s[i].shmid = -1;
363		p->p_vmspace->vm_shm = shmmap_s;
364	}
365	shmseg = shm_find_segment_by_shmid(shmid);
366	if (shmseg == NULL) {
367		error = EINVAL;
368		goto done2;
369	}
370	error = ipcperm(td, &shmseg->u.shm_perm,
371	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
372	if (error)
373		goto done2;
374#ifdef MAC
375	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
376	if (error != 0)
377		goto done2;
378#endif
379	for (i = 0; i < shminfo.shmseg; i++) {
380		if (shmmap_s->shmid == -1)
381			break;
382		shmmap_s++;
383	}
384	if (i >= shminfo.shmseg) {
385		error = EMFILE;
386		goto done2;
387	}
388	size = round_page(shmseg->u.shm_segsz);
389	prot = VM_PROT_READ;
390	if ((shmflg & SHM_RDONLY) == 0)
391		prot |= VM_PROT_WRITE;
392	flags = MAP_ANON | MAP_SHARED;
393	if (shmaddr) {
394		flags |= MAP_FIXED;
395		if (shmflg & SHM_RND) {
396			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
397		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
398			attach_va = (vm_offset_t)shmaddr;
399		} else {
400			error = EINVAL;
401			goto done2;
402		}
403	} else {
404		/*
405		 * This is just a hint to vm_map_find() about where to
406		 * put it.
407		 */
408		PROC_LOCK(p);
409		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
410		    lim_max(p, RLIMIT_DATA));
411		PROC_UNLOCK(p);
412	}
413
414	vm_object_reference(shmseg->object);
415	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
416	    0, &attach_va, size, 0, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
417	    VMFS_OPTIMAL_SPACE, prot, prot, MAP_INHERIT_SHARE);
418	if (rv != KERN_SUCCESS) {
419		vm_object_deallocate(shmseg->object);
420		error = ENOMEM;
421		goto done2;
422	}
423
424	shmmap_s->va = attach_va;
425	shmmap_s->shmid = shmid;
426	shmseg->u.shm_lpid = p->p_pid;
427	shmseg->u.shm_atime = time_second;
428	shmseg->u.shm_nattch++;
429	td->td_retval[0] = attach_va;
430done2:
431	mtx_unlock(&Giant);
432	return (error);
433}
434
435int
436sys_shmat(td, uap)
437	struct thread *td;
438	struct shmat_args *uap;
439{
440	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
441}
442
443int
444kern_shmctl(td, shmid, cmd, buf, bufsz)
445	struct thread *td;
446	int shmid;
447	int cmd;
448	void *buf;
449	size_t *bufsz;
450{
451	int error = 0;
452	struct shmid_kernel *shmseg;
453
454	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
455		return (ENOSYS);
456
457	mtx_lock(&Giant);
458	switch (cmd) {
459	/*
460	 * It is possible that kern_shmctl is being called from the Linux ABI
461	 * layer, in which case, we will need to implement IPC_INFO.  It should
462	 * be noted that other shmctl calls will be funneled through here for
463	 * Linix binaries as well.
464	 *
465	 * NB: The Linux ABI layer will convert this data to structure(s) more
466	 * consistent with the Linux ABI.
467	 */
468	case IPC_INFO:
469		memcpy(buf, &shminfo, sizeof(shminfo));
470		if (bufsz)
471			*bufsz = sizeof(shminfo);
472		td->td_retval[0] = shmalloced;
473		goto done2;
474	case SHM_INFO: {
475		struct shm_info shm_info;
476		shm_info.used_ids = shm_nused;
477		shm_info.shm_rss = 0;	/*XXX where to get from ? */
478		shm_info.shm_tot = 0;	/*XXX where to get from ? */
479		shm_info.shm_swp = 0;	/*XXX where to get from ? */
480		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
481		shm_info.swap_successes = 0;	/*XXX where to get from ? */
482		memcpy(buf, &shm_info, sizeof(shm_info));
483		if (bufsz)
484			*bufsz = sizeof(shm_info);
485		td->td_retval[0] = shmalloced;
486		goto done2;
487	}
488	}
489	if (cmd == SHM_STAT)
490		shmseg = shm_find_segment_by_shmidx(shmid);
491	else
492		shmseg = shm_find_segment_by_shmid(shmid);
493	if (shmseg == NULL) {
494		error = EINVAL;
495		goto done2;
496	}
497#ifdef MAC
498	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
499	if (error != 0)
500		goto done2;
501#endif
502	switch (cmd) {
503	case SHM_STAT:
504	case IPC_STAT:
505		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
506		if (error)
507			goto done2;
508		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
509		if (bufsz)
510			*bufsz = sizeof(struct shmid_ds);
511		if (cmd == SHM_STAT)
512			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
513		break;
514	case IPC_SET: {
515		struct shmid_ds *shmid;
516
517		shmid = (struct shmid_ds *)buf;
518		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
519		if (error)
520			goto done2;
521		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
522		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
523		shmseg->u.shm_perm.mode =
524		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
525		    (shmid->shm_perm.mode & ACCESSPERMS);
526		shmseg->u.shm_ctime = time_second;
527		break;
528	}
529	case IPC_RMID:
530		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
531		if (error)
532			goto done2;
533		shmseg->u.shm_perm.key = IPC_PRIVATE;
534		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
535		if (shmseg->u.shm_nattch <= 0) {
536			shm_deallocate_segment(shmseg);
537			shm_last_free = IPCID_TO_IX(shmid);
538		}
539		break;
540#if 0
541	case SHM_LOCK:
542	case SHM_UNLOCK:
543#endif
544	default:
545		error = EINVAL;
546		break;
547	}
548done2:
549	mtx_unlock(&Giant);
550	return (error);
551}
552
553#ifndef _SYS_SYSPROTO_H_
554struct shmctl_args {
555	int shmid;
556	int cmd;
557	struct shmid_ds *buf;
558};
559#endif
560int
561sys_shmctl(td, uap)
562	struct thread *td;
563	struct shmctl_args *uap;
564{
565	int error = 0;
566	struct shmid_ds buf;
567	size_t bufsz;
568
569	/*
570	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
571	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
572	 * return an error back to the user since we do not to support this.
573	 */
574	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
575	    uap->cmd == SHM_STAT)
576		return (EINVAL);
577
578	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
579	if (uap->cmd == IPC_SET) {
580		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
581			goto done;
582	}
583
584	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
585	if (error)
586		goto done;
587
588	/* Cases in which we need to copyout */
589	switch (uap->cmd) {
590	case IPC_STAT:
591		error = copyout(&buf, uap->buf, bufsz);
592		break;
593	}
594
595done:
596	if (error) {
597		/* Invalidate the return value */
598		td->td_retval[0] = -1;
599	}
600	return (error);
601}
602
603
604static int
605shmget_existing(td, uap, mode, segnum)
606	struct thread *td;
607	struct shmget_args *uap;
608	int mode;
609	int segnum;
610{
611	struct shmid_kernel *shmseg;
612	int error;
613
614	shmseg = &shmsegs[segnum];
615	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
616		/*
617		 * This segment is in the process of being allocated.  Wait
618		 * until it's done, and look the key up again (in case the
619		 * allocation failed or it was freed).
620		 */
621		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
622		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
623		if (error)
624			return (error);
625		return (EAGAIN);
626	}
627	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
628		return (EEXIST);
629#ifdef MAC
630	error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg);
631	if (error != 0)
632		return (error);
633#endif
634	if (uap->size != 0 && uap->size > shmseg->u.shm_segsz)
635		return (EINVAL);
636	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
637	return (0);
638}
639
640static int
641shmget_allocate_segment(td, uap, mode)
642	struct thread *td;
643	struct shmget_args *uap;
644	int mode;
645{
646	int i, segnum, shmid;
647	size_t size;
648	struct ucred *cred = td->td_ucred;
649	struct shmid_kernel *shmseg;
650	vm_object_t shm_object;
651
652	GIANT_REQUIRED;
653
654	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
655		return (EINVAL);
656	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
657		return (ENOSPC);
658	size = round_page(uap->size);
659	if (shm_committed + btoc(size) > shminfo.shmall)
660		return (ENOMEM);
661	if (shm_last_free < 0) {
662		shmrealloc();	/* Maybe expand the shmsegs[] array. */
663		for (i = 0; i < shmalloced; i++)
664			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
665				break;
666		if (i == shmalloced)
667			return (ENOSPC);
668		segnum = i;
669	} else  {
670		segnum = shm_last_free;
671		shm_last_free = -1;
672	}
673	shmseg = &shmsegs[segnum];
674#ifdef RACCT
675	PROC_LOCK(td->td_proc);
676	if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
677		PROC_UNLOCK(td->td_proc);
678		return (ENOSPC);
679	}
680	if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
681		racct_sub(td->td_proc, RACCT_NSHM, 1);
682		PROC_UNLOCK(td->td_proc);
683		return (ENOMEM);
684	}
685	PROC_UNLOCK(td->td_proc);
686#endif
687	/*
688	 * In case we sleep in malloc(), mark the segment present but deleted
689	 * so that noone else tries to create the same key.
690	 */
691	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
692	shmseg->u.shm_perm.key = uap->key;
693	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
694	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
695
696	/*
697	 * We make sure that we have allocated a pager before we need
698	 * to.
699	 */
700	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
701	    0, size, VM_PROT_DEFAULT, 0, cred);
702	if (shm_object == NULL) {
703#ifdef RACCT
704		PROC_LOCK(td->td_proc);
705		racct_sub(td->td_proc, RACCT_NSHM, 1);
706		racct_sub(td->td_proc, RACCT_SHMSIZE, size);
707		PROC_UNLOCK(td->td_proc);
708#endif
709		return (ENOMEM);
710	}
711	VM_OBJECT_WLOCK(shm_object);
712	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
713	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
714	VM_OBJECT_WUNLOCK(shm_object);
715
716	shmseg->object = shm_object;
717	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
718	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
719	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
720	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
721	shmseg->cred = crhold(cred);
722	shmseg->u.shm_segsz = uap->size;
723	shmseg->u.shm_cpid = td->td_proc->p_pid;
724	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
725	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
726#ifdef MAC
727	mac_sysvshm_create(cred, shmseg);
728#endif
729	shmseg->u.shm_ctime = time_second;
730	shm_committed += btoc(size);
731	shm_nused++;
732	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
733		/*
734		 * Somebody else wanted this key while we were asleep.  Wake
735		 * them up now.
736		 */
737		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
738		wakeup(shmseg);
739	}
740	td->td_retval[0] = shmid;
741	return (0);
742}
743
744#ifndef _SYS_SYSPROTO_H_
745struct shmget_args {
746	key_t key;
747	size_t size;
748	int shmflg;
749};
750#endif
751int
752sys_shmget(td, uap)
753	struct thread *td;
754	struct shmget_args *uap;
755{
756	int segnum, mode;
757	int error;
758
759	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
760		return (ENOSYS);
761	mtx_lock(&Giant);
762	mode = uap->shmflg & ACCESSPERMS;
763	if (uap->key != IPC_PRIVATE) {
764	again:
765		segnum = shm_find_segment_by_key(uap->key);
766		if (segnum >= 0) {
767			error = shmget_existing(td, uap, mode, segnum);
768			if (error == EAGAIN)
769				goto again;
770			goto done2;
771		}
772		if ((uap->shmflg & IPC_CREAT) == 0) {
773			error = ENOENT;
774			goto done2;
775		}
776	}
777	error = shmget_allocate_segment(td, uap, mode);
778done2:
779	mtx_unlock(&Giant);
780	return (error);
781}
782
783static void
784shmfork_myhook(p1, p2)
785	struct proc *p1, *p2;
786{
787	struct shmmap_state *shmmap_s;
788	size_t size;
789	int i;
790
791	mtx_lock(&Giant);
792	size = shminfo.shmseg * sizeof(struct shmmap_state);
793	shmmap_s = malloc(size, M_SHM, M_WAITOK);
794	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
795	p2->p_vmspace->vm_shm = shmmap_s;
796	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
797		if (shmmap_s->shmid != -1)
798			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
799	mtx_unlock(&Giant);
800}
801
802static void
803shmexit_myhook(struct vmspace *vm)
804{
805	struct shmmap_state *base, *shm;
806	int i;
807
808	if ((base = vm->vm_shm) != NULL) {
809		vm->vm_shm = NULL;
810		mtx_lock(&Giant);
811		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
812			if (shm->shmid != -1)
813				shm_delete_mapping(vm, shm);
814		}
815		mtx_unlock(&Giant);
816		free(base, M_SHM);
817	}
818}
819
820static void
821shmrealloc(void)
822{
823	int i;
824	struct shmid_kernel *newsegs;
825
826	if (shmalloced >= shminfo.shmmni)
827		return;
828
829	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
830	if (newsegs == NULL)
831		return;
832	for (i = 0; i < shmalloced; i++)
833		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
834	for (; i < shminfo.shmmni; i++) {
835		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
836		shmsegs[i].u.shm_perm.seq = 0;
837#ifdef MAC
838		mac_sysvshm_init(&shmsegs[i]);
839#endif
840	}
841	free(shmsegs, M_SHM);
842	shmsegs = newsegs;
843	shmalloced = shminfo.shmmni;
844}
845
846static struct syscall_helper_data shm_syscalls[] = {
847	SYSCALL_INIT_HELPER(shmat),
848	SYSCALL_INIT_HELPER(shmctl),
849	SYSCALL_INIT_HELPER(shmdt),
850	SYSCALL_INIT_HELPER(shmget),
851#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
852    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
853	SYSCALL_INIT_HELPER_COMPAT(freebsd7_shmctl),
854#endif
855#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
856	SYSCALL_INIT_HELPER(shmsys),
857#endif
858	SYSCALL_INIT_LAST
859};
860
861#ifdef COMPAT_FREEBSD32
862#include <compat/freebsd32/freebsd32.h>
863#include <compat/freebsd32/freebsd32_ipc.h>
864#include <compat/freebsd32/freebsd32_proto.h>
865#include <compat/freebsd32/freebsd32_signal.h>
866#include <compat/freebsd32/freebsd32_syscall.h>
867#include <compat/freebsd32/freebsd32_util.h>
868
869static struct syscall_helper_data shm32_syscalls[] = {
870	SYSCALL32_INIT_HELPER_COMPAT(shmat),
871	SYSCALL32_INIT_HELPER_COMPAT(shmdt),
872	SYSCALL32_INIT_HELPER_COMPAT(shmget),
873	SYSCALL32_INIT_HELPER(freebsd32_shmsys),
874	SYSCALL32_INIT_HELPER(freebsd32_shmctl),
875#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
876    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
877	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl),
878#endif
879	SYSCALL_INIT_LAST
880};
881#endif
882
883static int
884shminit()
885{
886	int i, error;
887
888#ifndef BURN_BRIDGES
889	if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
890		printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n");
891#endif
892	TUNABLE_ULONG_FETCH("kern.ipc.shmall", &shminfo.shmall);
893	if (!TUNABLE_ULONG_FETCH("kern.ipc.shmmax", &shminfo.shmmax)) {
894		/* Initialize shmmax dealing with possible overflow. */
895		for (i = PAGE_SIZE; i > 0; i--) {
896			shminfo.shmmax = shminfo.shmall * i;
897			if (shminfo.shmmax >= shminfo.shmall)
898				break;
899		}
900	}
901	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
902	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
903	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
904	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
905
906	shmalloced = shminfo.shmmni;
907	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
908	for (i = 0; i < shmalloced; i++) {
909		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
910		shmsegs[i].u.shm_perm.seq = 0;
911#ifdef MAC
912		mac_sysvshm_init(&shmsegs[i]);
913#endif
914	}
915	shm_last_free = 0;
916	shm_nused = 0;
917	shm_committed = 0;
918	shmexit_hook = &shmexit_myhook;
919	shmfork_hook = &shmfork_myhook;
920
921	error = syscall_helper_register(shm_syscalls);
922	if (error != 0)
923		return (error);
924#ifdef COMPAT_FREEBSD32
925	error = syscall32_helper_register(shm32_syscalls);
926	if (error != 0)
927		return (error);
928#endif
929	return (0);
930}
931
932static int
933shmunload()
934{
935	int i;
936
937	if (shm_nused > 0)
938		return (EBUSY);
939
940#ifdef COMPAT_FREEBSD32
941	syscall32_helper_unregister(shm32_syscalls);
942#endif
943	syscall_helper_unregister(shm_syscalls);
944
945	for (i = 0; i < shmalloced; i++) {
946#ifdef MAC
947		mac_sysvshm_destroy(&shmsegs[i]);
948#endif
949		/*
950		 * Objects might be still mapped into the processes
951		 * address spaces.  Actual free would happen on the
952		 * last mapping destruction.
953		 */
954		if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE)
955			vm_object_deallocate(shmsegs[i].object);
956	}
957	free(shmsegs, M_SHM);
958	shmexit_hook = NULL;
959	shmfork_hook = NULL;
960	return (0);
961}
962
963static int
964sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
965{
966
967	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
968}
969
970#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
971struct oshmid_ds {
972	struct	ipc_perm_old shm_perm;	/* operation perms */
973	int	shm_segsz;		/* size of segment (bytes) */
974	u_short	shm_cpid;		/* pid, creator */
975	u_short	shm_lpid;		/* pid, last operation */
976	short	shm_nattch;		/* no. of current attaches */
977	time_t	shm_atime;		/* last attach time */
978	time_t	shm_dtime;		/* last detach time */
979	time_t	shm_ctime;		/* last change time */
980	void	*shm_handle;		/* internal handle for shm segment */
981};
982
983struct oshmctl_args {
984	int shmid;
985	int cmd;
986	struct oshmid_ds *ubuf;
987};
988
989static int
990oshmctl(struct thread *td, struct oshmctl_args *uap)
991{
992#ifdef COMPAT_43
993	int error = 0;
994	struct shmid_kernel *shmseg;
995	struct oshmid_ds outbuf;
996
997	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
998		return (ENOSYS);
999	mtx_lock(&Giant);
1000	shmseg = shm_find_segment_by_shmid(uap->shmid);
1001	if (shmseg == NULL) {
1002		error = EINVAL;
1003		goto done2;
1004	}
1005	switch (uap->cmd) {
1006	case IPC_STAT:
1007		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
1008		if (error)
1009			goto done2;
1010#ifdef MAC
1011		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
1012		if (error != 0)
1013			goto done2;
1014#endif
1015		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
1016		outbuf.shm_segsz = shmseg->u.shm_segsz;
1017		outbuf.shm_cpid = shmseg->u.shm_cpid;
1018		outbuf.shm_lpid = shmseg->u.shm_lpid;
1019		outbuf.shm_nattch = shmseg->u.shm_nattch;
1020		outbuf.shm_atime = shmseg->u.shm_atime;
1021		outbuf.shm_dtime = shmseg->u.shm_dtime;
1022		outbuf.shm_ctime = shmseg->u.shm_ctime;
1023		outbuf.shm_handle = shmseg->object;
1024		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
1025		if (error)
1026			goto done2;
1027		break;
1028	default:
1029		error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
1030		break;
1031	}
1032done2:
1033	mtx_unlock(&Giant);
1034	return (error);
1035#else
1036	return (EINVAL);
1037#endif
1038}
1039
1040/* XXX casting to (sy_call_t *) is bogus, as usual. */
1041static sy_call_t *shmcalls[] = {
1042	(sy_call_t *)sys_shmat, (sy_call_t *)oshmctl,
1043	(sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget,
1044	(sy_call_t *)freebsd7_shmctl
1045};
1046
1047int
1048sys_shmsys(td, uap)
1049	struct thread *td;
1050	/* XXX actually varargs. */
1051	struct shmsys_args /* {
1052		int	which;
1053		int	a2;
1054		int	a3;
1055		int	a4;
1056	} */ *uap;
1057{
1058	int error;
1059
1060	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1061		return (ENOSYS);
1062	if (uap->which < 0 ||
1063	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
1064		return (EINVAL);
1065	mtx_lock(&Giant);
1066	error = (*shmcalls[uap->which])(td, &uap->a2);
1067	mtx_unlock(&Giant);
1068	return (error);
1069}
1070
1071#endif	/* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */
1072
1073#ifdef COMPAT_FREEBSD32
1074
1075int
1076freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap)
1077{
1078
1079#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1080    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1081	switch (uap->which) {
1082	case 0:	{	/* shmat */
1083		struct shmat_args ap;
1084
1085		ap.shmid = uap->a2;
1086		ap.shmaddr = PTRIN(uap->a3);
1087		ap.shmflg = uap->a4;
1088		return (sysent[SYS_shmat].sy_call(td, &ap));
1089	}
1090	case 2: {	/* shmdt */
1091		struct shmdt_args ap;
1092
1093		ap.shmaddr = PTRIN(uap->a2);
1094		return (sysent[SYS_shmdt].sy_call(td, &ap));
1095	}
1096	case 3: {	/* shmget */
1097		struct shmget_args ap;
1098
1099		ap.key = uap->a2;
1100		ap.size = uap->a3;
1101		ap.shmflg = uap->a4;
1102		return (sysent[SYS_shmget].sy_call(td, &ap));
1103	}
1104	case 4: {	/* shmctl */
1105		struct freebsd7_freebsd32_shmctl_args ap;
1106
1107		ap.shmid = uap->a2;
1108		ap.cmd = uap->a3;
1109		ap.buf = PTRIN(uap->a4);
1110		return (freebsd7_freebsd32_shmctl(td, &ap));
1111	}
1112	case 1:		/* oshmctl */
1113	default:
1114		return (EINVAL);
1115	}
1116#else
1117	return (nosys(td, NULL));
1118#endif
1119}
1120
1121#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1122    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1123int
1124freebsd7_freebsd32_shmctl(struct thread *td,
1125    struct freebsd7_freebsd32_shmctl_args *uap)
1126{
1127	int error = 0;
1128	union {
1129		struct shmid_ds shmid_ds;
1130		struct shm_info shm_info;
1131		struct shminfo shminfo;
1132	} u;
1133	union {
1134		struct shmid_ds32_old shmid_ds32;
1135		struct shm_info32 shm_info32;
1136		struct shminfo32 shminfo32;
1137	} u32;
1138	size_t sz;
1139
1140	if (uap->cmd == IPC_SET) {
1141		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1142		    sizeof(u32.shmid_ds32))))
1143			goto done;
1144		freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm,
1145		    &u.shmid_ds.shm_perm);
1146		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1147		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1148		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1149		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1150		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1151		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1152		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1153	}
1154
1155	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1156	if (error)
1157		goto done;
1158
1159	/* Cases in which we need to copyout */
1160	switch (uap->cmd) {
1161	case IPC_INFO:
1162		CP(u.shminfo, u32.shminfo32, shmmax);
1163		CP(u.shminfo, u32.shminfo32, shmmin);
1164		CP(u.shminfo, u32.shminfo32, shmmni);
1165		CP(u.shminfo, u32.shminfo32, shmseg);
1166		CP(u.shminfo, u32.shminfo32, shmall);
1167		error = copyout(&u32.shminfo32, uap->buf,
1168		    sizeof(u32.shminfo32));
1169		break;
1170	case SHM_INFO:
1171		CP(u.shm_info, u32.shm_info32, used_ids);
1172		CP(u.shm_info, u32.shm_info32, shm_rss);
1173		CP(u.shm_info, u32.shm_info32, shm_tot);
1174		CP(u.shm_info, u32.shm_info32, shm_swp);
1175		CP(u.shm_info, u32.shm_info32, swap_attempts);
1176		CP(u.shm_info, u32.shm_info32, swap_successes);
1177		error = copyout(&u32.shm_info32, uap->buf,
1178		    sizeof(u32.shm_info32));
1179		break;
1180	case SHM_STAT:
1181	case IPC_STAT:
1182		freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm,
1183		    &u32.shmid_ds32.shm_perm);
1184		if (u.shmid_ds.shm_segsz > INT32_MAX)
1185			u32.shmid_ds32.shm_segsz = INT32_MAX;
1186		else
1187			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1188		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1189		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1190		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1191		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1192		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1193		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1194		u32.shmid_ds32.shm_internal = 0;
1195		error = copyout(&u32.shmid_ds32, uap->buf,
1196		    sizeof(u32.shmid_ds32));
1197		break;
1198	}
1199
1200done:
1201	if (error) {
1202		/* Invalidate the return value */
1203		td->td_retval[0] = -1;
1204	}
1205	return (error);
1206}
1207#endif
1208
1209int
1210freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap)
1211{
1212	int error = 0;
1213	union {
1214		struct shmid_ds shmid_ds;
1215		struct shm_info shm_info;
1216		struct shminfo shminfo;
1217	} u;
1218	union {
1219		struct shmid_ds32 shmid_ds32;
1220		struct shm_info32 shm_info32;
1221		struct shminfo32 shminfo32;
1222	} u32;
1223	size_t sz;
1224
1225	if (uap->cmd == IPC_SET) {
1226		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1227		    sizeof(u32.shmid_ds32))))
1228			goto done;
1229		freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm,
1230		    &u.shmid_ds.shm_perm);
1231		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1232		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1233		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1234		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1235		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1236		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1237		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1238	}
1239
1240	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1241	if (error)
1242		goto done;
1243
1244	/* Cases in which we need to copyout */
1245	switch (uap->cmd) {
1246	case IPC_INFO:
1247		CP(u.shminfo, u32.shminfo32, shmmax);
1248		CP(u.shminfo, u32.shminfo32, shmmin);
1249		CP(u.shminfo, u32.shminfo32, shmmni);
1250		CP(u.shminfo, u32.shminfo32, shmseg);
1251		CP(u.shminfo, u32.shminfo32, shmall);
1252		error = copyout(&u32.shminfo32, uap->buf,
1253		    sizeof(u32.shminfo32));
1254		break;
1255	case SHM_INFO:
1256		CP(u.shm_info, u32.shm_info32, used_ids);
1257		CP(u.shm_info, u32.shm_info32, shm_rss);
1258		CP(u.shm_info, u32.shm_info32, shm_tot);
1259		CP(u.shm_info, u32.shm_info32, shm_swp);
1260		CP(u.shm_info, u32.shm_info32, swap_attempts);
1261		CP(u.shm_info, u32.shm_info32, swap_successes);
1262		error = copyout(&u32.shm_info32, uap->buf,
1263		    sizeof(u32.shm_info32));
1264		break;
1265	case SHM_STAT:
1266	case IPC_STAT:
1267		freebsd32_ipcperm_out(&u.shmid_ds.shm_perm,
1268		    &u32.shmid_ds32.shm_perm);
1269		if (u.shmid_ds.shm_segsz > INT32_MAX)
1270			u32.shmid_ds32.shm_segsz = INT32_MAX;
1271		else
1272			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1273		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1274		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1275		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1276		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1277		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1278		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1279		error = copyout(&u32.shmid_ds32, uap->buf,
1280		    sizeof(u32.shmid_ds32));
1281		break;
1282	}
1283
1284done:
1285	if (error) {
1286		/* Invalidate the return value */
1287		td->td_retval[0] = -1;
1288	}
1289	return (error);
1290}
1291#endif
1292
1293#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1294    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1295
1296#ifndef CP
1297#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1298#endif
1299
1300#ifndef _SYS_SYSPROTO_H_
1301struct freebsd7_shmctl_args {
1302	int shmid;
1303	int cmd;
1304	struct shmid_ds_old *buf;
1305};
1306#endif
1307int
1308freebsd7_shmctl(td, uap)
1309	struct thread *td;
1310	struct freebsd7_shmctl_args *uap;
1311{
1312	int error = 0;
1313	struct shmid_ds_old old;
1314	struct shmid_ds buf;
1315	size_t bufsz;
1316
1317	/*
1318	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
1319	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
1320	 * return an error back to the user since we do not to support this.
1321	 */
1322	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
1323	    uap->cmd == SHM_STAT)
1324		return (EINVAL);
1325
1326	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
1327	if (uap->cmd == IPC_SET) {
1328		if ((error = copyin(uap->buf, &old, sizeof(old))))
1329			goto done;
1330		ipcperm_old2new(&old.shm_perm, &buf.shm_perm);
1331		CP(old, buf, shm_segsz);
1332		CP(old, buf, shm_lpid);
1333		CP(old, buf, shm_cpid);
1334		CP(old, buf, shm_nattch);
1335		CP(old, buf, shm_atime);
1336		CP(old, buf, shm_dtime);
1337		CP(old, buf, shm_ctime);
1338	}
1339
1340	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
1341	if (error)
1342		goto done;
1343
1344	/* Cases in which we need to copyout */
1345	switch (uap->cmd) {
1346	case IPC_STAT:
1347		ipcperm_new2old(&buf.shm_perm, &old.shm_perm);
1348		if (buf.shm_segsz > INT_MAX)
1349			old.shm_segsz = INT_MAX;
1350		else
1351			CP(buf, old, shm_segsz);
1352		CP(buf, old, shm_lpid);
1353		CP(buf, old, shm_cpid);
1354		if (buf.shm_nattch > SHRT_MAX)
1355			old.shm_nattch = SHRT_MAX;
1356		else
1357			CP(buf, old, shm_nattch);
1358		CP(buf, old, shm_atime);
1359		CP(buf, old, shm_dtime);
1360		CP(buf, old, shm_ctime);
1361		old.shm_internal = NULL;
1362		error = copyout(&old, uap->buf, sizeof(old));
1363		break;
1364	}
1365
1366done:
1367	if (error) {
1368		/* Invalidate the return value */
1369		td->td_retval[0] = -1;
1370	}
1371	return (error);
1372}
1373
1374#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1375	   COMPAT_FREEBSD7 */
1376
1377static int
1378sysvshm_modload(struct module *module, int cmd, void *arg)
1379{
1380	int error = 0;
1381
1382	switch (cmd) {
1383	case MOD_LOAD:
1384		error = shminit();
1385		if (error != 0)
1386			shmunload();
1387		break;
1388	case MOD_UNLOAD:
1389		error = shmunload();
1390		break;
1391	case MOD_SHUTDOWN:
1392		break;
1393	default:
1394		error = EINVAL;
1395		break;
1396	}
1397	return (error);
1398}
1399
1400static moduledata_t sysvshm_mod = {
1401	"sysvshm",
1402	&sysvshm_modload,
1403	NULL
1404};
1405
1406DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1407MODULE_VERSION(sysvshm, 1);
1408