sysv_shm.c revision 194941
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 194941 2009-06-25 07:16:10Z rwatson $");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/limits.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/resourcevar.h>
81#include <sys/stat.h>
82#include <sys/syscall.h>
83#include <sys/syscallsubr.h>
84#include <sys/sysent.h>
85#include <sys/sysproto.h>
86#include <sys/jail.h>
87
88#include <security/mac/mac_framework.h>
89
90#include <vm/vm.h>
91#include <vm/vm_param.h>
92#include <vm/pmap.h>
93#include <vm/vm_object.h>
94#include <vm/vm_map.h>
95#include <vm/vm_page.h>
96#include <vm/vm_pager.h>
97
98static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
99
100static int shmget_allocate_segment(struct thread *td,
101    struct shmget_args *uap, int mode);
102static int shmget_existing(struct thread *td, struct shmget_args *uap,
103    int mode, int segnum);
104
105#define	SHMSEG_FREE     	0x0200
106#define	SHMSEG_REMOVED  	0x0400
107#define	SHMSEG_ALLOCATED	0x0800
108#define	SHMSEG_WANTED		0x1000
109
110static int shm_last_free, shm_nused, shmalloced;
111vm_size_t shm_committed;
112static struct shmid_kernel	*shmsegs;
113
114struct shmmap_state {
115	vm_offset_t va;
116	int shmid;
117};
118
119#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
120struct oshmctl_args;
121static int oshmctl(struct thread *td, struct oshmctl_args *uap);
122#endif
123static void shm_deallocate_segment(struct shmid_kernel *);
124static int shm_find_segment_by_key(key_t);
125static struct shmid_kernel *shm_find_segment_by_shmid(int);
126static struct shmid_kernel *shm_find_segment_by_shmidx(int);
127static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
128static void shmrealloc(void);
129static void shminit(void);
130static int sysvshm_modload(struct module *, int, void *);
131static int shmunload(void);
132static void shmexit_myhook(struct vmspace *vm);
133static void shmfork_myhook(struct proc *p1, struct proc *p2);
134static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
135
136/*
137 * Tuneable values.
138 */
139#ifndef SHMMAXPGS
140#define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
141#endif
142#ifndef SHMMAX
143#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
144#endif
145#ifndef SHMMIN
146#define	SHMMIN	1
147#endif
148#ifndef SHMMNI
149#define	SHMMNI	192
150#endif
151#ifndef SHMSEG
152#define	SHMSEG	128
153#endif
154#ifndef SHMALL
155#define	SHMALL	(SHMMAXPGS)
156#endif
157
158struct	shminfo shminfo = {
159	SHMMAX,
160	SHMMIN,
161	SHMMNI,
162	SHMSEG,
163	SHMALL
164};
165
166static int shm_use_phys;
167static int shm_allow_removed;
168
169SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
170    "Maximum shared memory segment size");
171SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
172    "Minimum shared memory segment size");
173SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
174    "Number of shared memory identifiers");
175SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
176    "Number of segments per process");
177SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
178    "Maximum number of pages available for shared memory");
179SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
180    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
181SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
182    &shm_allow_removed, 0,
183    "Enable/Disable attachment to attached segments marked for removal");
184SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
185    NULL, 0, sysctl_shmsegs, "",
186    "Current number of shared memory segments allocated");
187
188static int
189shm_find_segment_by_key(key)
190	key_t key;
191{
192	int i;
193
194	for (i = 0; i < shmalloced; i++)
195		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
196		    shmsegs[i].u.shm_perm.key == key)
197			return (i);
198	return (-1);
199}
200
201static struct shmid_kernel *
202shm_find_segment_by_shmid(int shmid)
203{
204	int segnum;
205	struct shmid_kernel *shmseg;
206
207	segnum = IPCID_TO_IX(shmid);
208	if (segnum < 0 || segnum >= shmalloced)
209		return (NULL);
210	shmseg = &shmsegs[segnum];
211	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
212	    (!shm_allow_removed &&
213	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
214	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
215		return (NULL);
216	return (shmseg);
217}
218
219static struct shmid_kernel *
220shm_find_segment_by_shmidx(int segnum)
221{
222	struct shmid_kernel *shmseg;
223
224	if (segnum < 0 || segnum >= shmalloced)
225		return (NULL);
226	shmseg = &shmsegs[segnum];
227	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
228	    (!shm_allow_removed &&
229	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
230		return (NULL);
231	return (shmseg);
232}
233
234static void
235shm_deallocate_segment(shmseg)
236	struct shmid_kernel *shmseg;
237{
238	vm_size_t size;
239
240	GIANT_REQUIRED;
241
242	vm_object_deallocate(shmseg->object);
243	shmseg->object = NULL;
244	size = round_page(shmseg->u.shm_segsz);
245	shm_committed -= btoc(size);
246	shm_nused--;
247	shmseg->u.shm_perm.mode = SHMSEG_FREE;
248#ifdef MAC
249	mac_sysvshm_cleanup(shmseg);
250#endif
251}
252
253static int
254shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
255{
256	struct shmid_kernel *shmseg;
257	int segnum, result;
258	vm_size_t size;
259
260	GIANT_REQUIRED;
261
262	segnum = IPCID_TO_IX(shmmap_s->shmid);
263	shmseg = &shmsegs[segnum];
264	size = round_page(shmseg->u.shm_segsz);
265	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
266	if (result != KERN_SUCCESS)
267		return (EINVAL);
268	shmmap_s->shmid = -1;
269	shmseg->u.shm_dtime = time_second;
270	if ((--shmseg->u.shm_nattch <= 0) &&
271	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
272		shm_deallocate_segment(shmseg);
273		shm_last_free = segnum;
274	}
275	return (0);
276}
277
278#ifndef _SYS_SYSPROTO_H_
279struct shmdt_args {
280	const void *shmaddr;
281};
282#endif
283int
284shmdt(td, uap)
285	struct thread *td;
286	struct shmdt_args *uap;
287{
288	struct proc *p = td->td_proc;
289	struct shmmap_state *shmmap_s;
290#ifdef MAC
291	struct shmid_kernel *shmsegptr;
292#endif
293	int i;
294	int error = 0;
295
296	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
297		return (ENOSYS);
298	mtx_lock(&Giant);
299	shmmap_s = p->p_vmspace->vm_shm;
300 	if (shmmap_s == NULL) {
301		error = EINVAL;
302		goto done2;
303	}
304	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
305		if (shmmap_s->shmid != -1 &&
306		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
307			break;
308		}
309	}
310	if (i == shminfo.shmseg) {
311		error = EINVAL;
312		goto done2;
313	}
314#ifdef MAC
315	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
316	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
317	if (error != 0)
318		goto done2;
319#endif
320	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
321done2:
322	mtx_unlock(&Giant);
323	return (error);
324}
325
326#ifndef _SYS_SYSPROTO_H_
327struct shmat_args {
328	int shmid;
329	const void *shmaddr;
330	int shmflg;
331};
332#endif
333int
334kern_shmat(td, shmid, shmaddr, shmflg)
335	struct thread *td;
336	int shmid;
337	const void *shmaddr;
338	int shmflg;
339{
340	struct proc *p = td->td_proc;
341	int i, flags;
342	struct shmid_kernel *shmseg;
343	struct shmmap_state *shmmap_s = NULL;
344	vm_offset_t attach_va;
345	vm_prot_t prot;
346	vm_size_t size;
347	int rv;
348	int error = 0;
349
350	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
351		return (ENOSYS);
352	mtx_lock(&Giant);
353	shmmap_s = p->p_vmspace->vm_shm;
354	if (shmmap_s == NULL) {
355		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
356		    M_SHM, M_WAITOK);
357		for (i = 0; i < shminfo.shmseg; i++)
358			shmmap_s[i].shmid = -1;
359		p->p_vmspace->vm_shm = shmmap_s;
360	}
361	shmseg = shm_find_segment_by_shmid(shmid);
362	if (shmseg == NULL) {
363		error = EINVAL;
364		goto done2;
365	}
366	error = ipcperm(td, &shmseg->u.shm_perm,
367	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
368	if (error)
369		goto done2;
370#ifdef MAC
371	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
372	if (error != 0)
373		goto done2;
374#endif
375	for (i = 0; i < shminfo.shmseg; i++) {
376		if (shmmap_s->shmid == -1)
377			break;
378		shmmap_s++;
379	}
380	if (i >= shminfo.shmseg) {
381		error = EMFILE;
382		goto done2;
383	}
384	size = round_page(shmseg->u.shm_segsz);
385	prot = VM_PROT_READ;
386	if ((shmflg & SHM_RDONLY) == 0)
387		prot |= VM_PROT_WRITE;
388	flags = MAP_ANON | MAP_SHARED;
389	if (shmaddr) {
390		flags |= MAP_FIXED;
391		if (shmflg & SHM_RND) {
392			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
393		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
394			attach_va = (vm_offset_t)shmaddr;
395		} else {
396			error = EINVAL;
397			goto done2;
398		}
399	} else {
400		/*
401		 * This is just a hint to vm_map_find() about where to
402		 * put it.
403		 */
404		PROC_LOCK(p);
405		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
406		    lim_max(p, RLIMIT_DATA));
407		PROC_UNLOCK(p);
408	}
409
410	vm_object_reference(shmseg->object);
411	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
412	    0, &attach_va, size, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
413	    VMFS_ANY_SPACE, prot, prot, 0);
414	if (rv != KERN_SUCCESS) {
415		vm_object_deallocate(shmseg->object);
416		error = ENOMEM;
417		goto done2;
418	}
419	vm_map_inherit(&p->p_vmspace->vm_map,
420		attach_va, attach_va + size, VM_INHERIT_SHARE);
421
422	shmmap_s->va = attach_va;
423	shmmap_s->shmid = shmid;
424	shmseg->u.shm_lpid = p->p_pid;
425	shmseg->u.shm_atime = time_second;
426	shmseg->u.shm_nattch++;
427	td->td_retval[0] = attach_va;
428done2:
429	mtx_unlock(&Giant);
430	return (error);
431}
432
433int
434shmat(td, uap)
435	struct thread *td;
436	struct shmat_args *uap;
437{
438	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
439}
440
441int
442kern_shmctl(td, shmid, cmd, buf, bufsz)
443	struct thread *td;
444	int shmid;
445	int cmd;
446	void *buf;
447	size_t *bufsz;
448{
449	int error = 0;
450	struct shmid_kernel *shmseg;
451
452	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
453		return (ENOSYS);
454
455	mtx_lock(&Giant);
456	switch (cmd) {
457	/*
458	 * It is possible that kern_shmctl is being called from the Linux ABI
459	 * layer, in which case, we will need to implement IPC_INFO.  It should
460	 * be noted that other shmctl calls will be funneled through here for
461	 * Linix binaries as well.
462	 *
463	 * NB: The Linux ABI layer will convert this data to structure(s) more
464	 * consistent with the Linux ABI.
465	 */
466	case IPC_INFO:
467		memcpy(buf, &shminfo, sizeof(shminfo));
468		if (bufsz)
469			*bufsz = sizeof(shminfo);
470		td->td_retval[0] = shmalloced;
471		goto done2;
472	case SHM_INFO: {
473		struct shm_info shm_info;
474		shm_info.used_ids = shm_nused;
475		shm_info.shm_rss = 0;	/*XXX where to get from ? */
476		shm_info.shm_tot = 0;	/*XXX where to get from ? */
477		shm_info.shm_swp = 0;	/*XXX where to get from ? */
478		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
479		shm_info.swap_successes = 0;	/*XXX where to get from ? */
480		memcpy(buf, &shm_info, sizeof(shm_info));
481		if (bufsz)
482			*bufsz = sizeof(shm_info);
483		td->td_retval[0] = shmalloced;
484		goto done2;
485	}
486	}
487	if (cmd == SHM_STAT)
488		shmseg = shm_find_segment_by_shmidx(shmid);
489	else
490		shmseg = shm_find_segment_by_shmid(shmid);
491	if (shmseg == NULL) {
492		error = EINVAL;
493		goto done2;
494	}
495#ifdef MAC
496	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
497	if (error != 0)
498		goto done2;
499#endif
500	switch (cmd) {
501	case SHM_STAT:
502	case IPC_STAT:
503		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
504		if (error)
505			goto done2;
506		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
507		if (bufsz)
508			*bufsz = sizeof(struct shmid_ds);
509		if (cmd == SHM_STAT)
510			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
511		break;
512	case IPC_SET: {
513		struct shmid_ds *shmid;
514
515		shmid = (struct shmid_ds *)buf;
516		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
517		if (error)
518			goto done2;
519		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
520		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
521		shmseg->u.shm_perm.mode =
522		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
523		    (shmid->shm_perm.mode & ACCESSPERMS);
524		shmseg->u.shm_ctime = time_second;
525		break;
526	}
527	case IPC_RMID:
528		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
529		if (error)
530			goto done2;
531		shmseg->u.shm_perm.key = IPC_PRIVATE;
532		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
533		if (shmseg->u.shm_nattch <= 0) {
534			shm_deallocate_segment(shmseg);
535			shm_last_free = IPCID_TO_IX(shmid);
536		}
537		break;
538#if 0
539	case SHM_LOCK:
540	case SHM_UNLOCK:
541#endif
542	default:
543		error = EINVAL;
544		break;
545	}
546done2:
547	mtx_unlock(&Giant);
548	return (error);
549}
550
551#ifndef _SYS_SYSPROTO_H_
552struct shmctl_args {
553	int shmid;
554	int cmd;
555	struct shmid_ds *buf;
556};
557#endif
558int
559shmctl(td, uap)
560	struct thread *td;
561	struct shmctl_args *uap;
562{
563	int error = 0;
564	struct shmid_ds buf;
565	size_t bufsz;
566
567	/*
568	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
569	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
570	 * return an error back to the user since we do not to support this.
571	 */
572	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
573	    uap->cmd == SHM_STAT)
574		return (EINVAL);
575
576	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
577	if (uap->cmd == IPC_SET) {
578		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
579			goto done;
580	}
581
582	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
583	if (error)
584		goto done;
585
586	/* Cases in which we need to copyout */
587	switch (uap->cmd) {
588	case IPC_STAT:
589		error = copyout(&buf, uap->buf, bufsz);
590		break;
591	}
592
593done:
594	if (error) {
595		/* Invalidate the return value */
596		td->td_retval[0] = -1;
597	}
598	return (error);
599}
600
601
602static int
603shmget_existing(td, uap, mode, segnum)
604	struct thread *td;
605	struct shmget_args *uap;
606	int mode;
607	int segnum;
608{
609	struct shmid_kernel *shmseg;
610	int error;
611
612	shmseg = &shmsegs[segnum];
613	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
614		/*
615		 * This segment is in the process of being allocated.  Wait
616		 * until it's done, and look the key up again (in case the
617		 * allocation failed or it was freed).
618		 */
619		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
620		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
621		if (error)
622			return (error);
623		return (EAGAIN);
624	}
625	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
626		return (EEXIST);
627#ifdef MAC
628	error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg);
629	if (error != 0)
630		return (error);
631#endif
632	if (uap->size != 0 && uap->size > shmseg->u.shm_segsz)
633		return (EINVAL);
634	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
635	return (0);
636}
637
638static int
639shmget_allocate_segment(td, uap, mode)
640	struct thread *td;
641	struct shmget_args *uap;
642	int mode;
643{
644	int i, segnum, shmid;
645	size_t size;
646	struct ucred *cred = td->td_ucred;
647	struct shmid_kernel *shmseg;
648	vm_object_t shm_object;
649
650	GIANT_REQUIRED;
651
652	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
653		return (EINVAL);
654	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
655		return (ENOSPC);
656	size = round_page(uap->size);
657	if (shm_committed + btoc(size) > shminfo.shmall)
658		return (ENOMEM);
659	if (shm_last_free < 0) {
660		shmrealloc();	/* Maybe expand the shmsegs[] array. */
661		for (i = 0; i < shmalloced; i++)
662			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
663				break;
664		if (i == shmalloced)
665			return (ENOSPC);
666		segnum = i;
667	} else  {
668		segnum = shm_last_free;
669		shm_last_free = -1;
670	}
671	shmseg = &shmsegs[segnum];
672	/*
673	 * In case we sleep in malloc(), mark the segment present but deleted
674	 * so that noone else tries to create the same key.
675	 */
676	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
677	shmseg->u.shm_perm.key = uap->key;
678	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
679	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
680
681	/*
682	 * We make sure that we have allocated a pager before we need
683	 * to.
684	 */
685	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
686	    0, size, VM_PROT_DEFAULT, 0, cred);
687	if (shm_object == NULL)
688		return (ENOMEM);
689	VM_OBJECT_LOCK(shm_object);
690	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
691	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
692	VM_OBJECT_UNLOCK(shm_object);
693
694	shmseg->object = shm_object;
695	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
696	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
697	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
698	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
699	shmseg->u.shm_segsz = uap->size;
700	shmseg->u.shm_cpid = td->td_proc->p_pid;
701	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
702	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
703#ifdef MAC
704	mac_sysvshm_create(cred, shmseg);
705#endif
706	shmseg->u.shm_ctime = time_second;
707	shm_committed += btoc(size);
708	shm_nused++;
709	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
710		/*
711		 * Somebody else wanted this key while we were asleep.  Wake
712		 * them up now.
713		 */
714		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
715		wakeup(shmseg);
716	}
717	td->td_retval[0] = shmid;
718	return (0);
719}
720
721#ifndef _SYS_SYSPROTO_H_
722struct shmget_args {
723	key_t key;
724	size_t size;
725	int shmflg;
726};
727#endif
728int
729shmget(td, uap)
730	struct thread *td;
731	struct shmget_args *uap;
732{
733	int segnum, mode;
734	int error;
735
736	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
737		return (ENOSYS);
738	mtx_lock(&Giant);
739	mode = uap->shmflg & ACCESSPERMS;
740	if (uap->key != IPC_PRIVATE) {
741	again:
742		segnum = shm_find_segment_by_key(uap->key);
743		if (segnum >= 0) {
744			error = shmget_existing(td, uap, mode, segnum);
745			if (error == EAGAIN)
746				goto again;
747			goto done2;
748		}
749		if ((uap->shmflg & IPC_CREAT) == 0) {
750			error = ENOENT;
751			goto done2;
752		}
753	}
754	error = shmget_allocate_segment(td, uap, mode);
755done2:
756	mtx_unlock(&Giant);
757	return (error);
758}
759
760static void
761shmfork_myhook(p1, p2)
762	struct proc *p1, *p2;
763{
764	struct shmmap_state *shmmap_s;
765	size_t size;
766	int i;
767
768	mtx_lock(&Giant);
769	size = shminfo.shmseg * sizeof(struct shmmap_state);
770	shmmap_s = malloc(size, M_SHM, M_WAITOK);
771	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
772	p2->p_vmspace->vm_shm = shmmap_s;
773	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
774		if (shmmap_s->shmid != -1)
775			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
776	mtx_unlock(&Giant);
777}
778
779static void
780shmexit_myhook(struct vmspace *vm)
781{
782	struct shmmap_state *base, *shm;
783	int i;
784
785	if ((base = vm->vm_shm) != NULL) {
786		vm->vm_shm = NULL;
787		mtx_lock(&Giant);
788		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
789			if (shm->shmid != -1)
790				shm_delete_mapping(vm, shm);
791		}
792		mtx_unlock(&Giant);
793		free(base, M_SHM);
794	}
795}
796
797static void
798shmrealloc(void)
799{
800	int i;
801	struct shmid_kernel *newsegs;
802
803	if (shmalloced >= shminfo.shmmni)
804		return;
805
806	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
807	if (newsegs == NULL)
808		return;
809	for (i = 0; i < shmalloced; i++)
810		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
811	for (; i < shminfo.shmmni; i++) {
812		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
813		shmsegs[i].u.shm_perm.seq = 0;
814#ifdef MAC
815		mac_sysvshm_init(&shmsegs[i]);
816#endif
817	}
818	free(shmsegs, M_SHM);
819	shmsegs = newsegs;
820	shmalloced = shminfo.shmmni;
821}
822
823static void
824shminit()
825{
826	int i;
827
828	TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
829	for (i = PAGE_SIZE; i > 0; i--) {
830		shminfo.shmmax = shminfo.shmall * i;
831		if (shminfo.shmmax >= shminfo.shmall)
832			break;
833	}
834	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
835	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
836	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
837	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
838
839	shmalloced = shminfo.shmmni;
840	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
841	if (shmsegs == NULL)
842		panic("cannot allocate initial memory for sysvshm");
843	for (i = 0; i < shmalloced; i++) {
844		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
845		shmsegs[i].u.shm_perm.seq = 0;
846#ifdef MAC
847		mac_sysvshm_init(&shmsegs[i]);
848#endif
849	}
850	shm_last_free = 0;
851	shm_nused = 0;
852	shm_committed = 0;
853	shmexit_hook = &shmexit_myhook;
854	shmfork_hook = &shmfork_myhook;
855}
856
857static int
858shmunload()
859{
860#ifdef MAC
861	int i;
862#endif
863
864	if (shm_nused > 0)
865		return (EBUSY);
866
867#ifdef MAC
868	for (i = 0; i < shmalloced; i++)
869		mac_sysvshm_destroy(&shmsegs[i]);
870#endif
871	free(shmsegs, M_SHM);
872	shmexit_hook = NULL;
873	shmfork_hook = NULL;
874	return (0);
875}
876
877static int
878sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
879{
880
881	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
882}
883
884#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
885struct oshmid_ds {
886	struct	ipc_perm_old shm_perm;	/* operation perms */
887	int	shm_segsz;		/* size of segment (bytes) */
888	u_short	shm_cpid;		/* pid, creator */
889	u_short	shm_lpid;		/* pid, last operation */
890	short	shm_nattch;		/* no. of current attaches */
891	time_t	shm_atime;		/* last attach time */
892	time_t	shm_dtime;		/* last detach time */
893	time_t	shm_ctime;		/* last change time */
894	void	*shm_handle;		/* internal handle for shm segment */
895};
896
897struct oshmctl_args {
898	int shmid;
899	int cmd;
900	struct oshmid_ds *ubuf;
901};
902
903static int
904oshmctl(td, uap)
905	struct thread *td;
906	struct oshmctl_args *uap;
907{
908#ifdef COMPAT_43
909	int error = 0;
910	struct shmid_kernel *shmseg;
911	struct oshmid_ds outbuf;
912
913	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
914		return (ENOSYS);
915	mtx_lock(&Giant);
916	shmseg = shm_find_segment_by_shmid(uap->shmid);
917	if (shmseg == NULL) {
918		error = EINVAL;
919		goto done2;
920	}
921	switch (uap->cmd) {
922	case IPC_STAT:
923		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
924		if (error)
925			goto done2;
926#ifdef MAC
927		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
928		if (error != 0)
929			goto done2;
930#endif
931		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
932		outbuf.shm_segsz = shmseg->u.shm_segsz;
933		outbuf.shm_cpid = shmseg->u.shm_cpid;
934		outbuf.shm_lpid = shmseg->u.shm_lpid;
935		outbuf.shm_nattch = shmseg->u.shm_nattch;
936		outbuf.shm_atime = shmseg->u.shm_atime;
937		outbuf.shm_dtime = shmseg->u.shm_dtime;
938		outbuf.shm_ctime = shmseg->u.shm_ctime;
939		outbuf.shm_handle = shmseg->object;
940		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
941		if (error)
942			goto done2;
943		break;
944	default:
945		error = freebsd7_shmctl(td, (struct shmctl_args *)uap);
946		break;
947	}
948done2:
949	mtx_unlock(&Giant);
950	return (error);
951#else
952	return (EINVAL);
953#endif
954}
955
956/* XXX casting to (sy_call_t *) is bogus, as usual. */
957static sy_call_t *shmcalls[] = {
958	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
959	(sy_call_t *)shmdt, (sy_call_t *)shmget,
960	(sy_call_t *)freebsd7_shmctl
961};
962
963int
964shmsys(td, uap)
965	struct thread *td;
966	/* XXX actually varargs. */
967	struct shmsys_args /* {
968		int	which;
969		int	a2;
970		int	a3;
971		int	a4;
972	} */ *uap;
973{
974	int error;
975
976	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
977		return (ENOSYS);
978	if (uap->which < 0 ||
979	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
980		return (EINVAL);
981	mtx_lock(&Giant);
982	error = (*shmcalls[uap->which])(td, &uap->a2);
983	mtx_unlock(&Giant);
984	return (error);
985}
986
987SYSCALL_MODULE_HELPER(shmsys);
988#endif	/* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */
989
990#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
991    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
992
993#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
994
995
996#ifndef _SYS_SYSPROTO_H_
997struct freebsd7_shmctl_args {
998	int shmid;
999	int cmd;
1000	struct shmid_ds_old *buf;
1001};
1002#endif
1003int
1004freebsd7_shmctl(td, uap)
1005	struct thread *td;
1006	struct freebsd7_shmctl_args *uap;
1007{
1008	int error = 0;
1009	struct shmid_ds_old old;
1010	struct shmid_ds buf;
1011	size_t bufsz;
1012
1013	/*
1014	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
1015	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
1016	 * return an error back to the user since we do not to support this.
1017	 */
1018	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
1019	    uap->cmd == SHM_STAT)
1020		return (EINVAL);
1021
1022	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
1023	if (uap->cmd == IPC_SET) {
1024		if ((error = copyin(uap->buf, &old, sizeof(old))))
1025			goto done;
1026		ipcperm_old2new(&old.shm_perm, &buf.shm_perm);
1027		CP(old, buf, shm_segsz);
1028		CP(old, buf, shm_lpid);
1029		CP(old, buf, shm_cpid);
1030		CP(old, buf, shm_nattch);
1031		CP(old, buf, shm_atime);
1032		CP(old, buf, shm_dtime);
1033		CP(old, buf, shm_ctime);
1034	}
1035
1036	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
1037	if (error)
1038		goto done;
1039
1040	/* Cases in which we need to copyout */
1041	switch (uap->cmd) {
1042	case IPC_STAT:
1043		ipcperm_new2old(&buf.shm_perm, &old.shm_perm);
1044		if (buf.shm_segsz > INT_MAX)
1045			old.shm_segsz = INT_MAX;
1046		else
1047			CP(buf, old, shm_segsz);
1048		CP(buf, old, shm_lpid);
1049		CP(buf, old, shm_cpid);
1050		if (buf.shm_nattch > SHRT_MAX)
1051			old.shm_nattch = SHRT_MAX;
1052		else
1053			CP(buf, old, shm_nattch);
1054		CP(buf, old, shm_atime);
1055		CP(buf, old, shm_dtime);
1056		CP(buf, old, shm_ctime);
1057		old.shm_internal = NULL;
1058		error = copyout(&old, uap->buf, sizeof(old));
1059		break;
1060	}
1061
1062done:
1063	if (error) {
1064		/* Invalidate the return value */
1065		td->td_retval[0] = -1;
1066	}
1067	return (error);
1068}
1069
1070SYSCALL_MODULE_HELPER(freebsd7_shmctl);
1071
1072#undef CP
1073
1074#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1075	   COMPAT_FREEBSD7 */
1076
1077static int
1078sysvshm_modload(struct module *module, int cmd, void *arg)
1079{
1080	int error = 0;
1081
1082	switch (cmd) {
1083	case MOD_LOAD:
1084		shminit();
1085		break;
1086	case MOD_UNLOAD:
1087		error = shmunload();
1088		break;
1089	case MOD_SHUTDOWN:
1090		break;
1091	default:
1092		error = EINVAL;
1093		break;
1094	}
1095	return (error);
1096}
1097
1098static moduledata_t sysvshm_mod = {
1099	"sysvshm",
1100	&sysvshm_modload,
1101	NULL
1102};
1103
1104SYSCALL_MODULE_HELPER(shmat);
1105SYSCALL_MODULE_HELPER(shmctl);
1106SYSCALL_MODULE_HELPER(shmdt);
1107SYSCALL_MODULE_HELPER(shmget);
1108
1109DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1110MODULE_VERSION(sysvshm, 1);
1111