sysv_shm.c revision 116182
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 116182 2003-06-11 00:56:59Z obrien $");
34
35#include "opt_compat.h"
36#include "opt_sysvipc.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/sysctl.h>
43#include <sys/shm.h>
44#include <sys/proc.h>
45#include <sys/malloc.h>
46#include <sys/mman.h>
47#include <sys/mutex.h>
48#include <sys/stat.h>
49#include <sys/syscall.h>
50#include <sys/syscallsubr.h>
51#include <sys/sysent.h>
52#include <sys/sysproto.h>
53#include <sys/jail.h>
54
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/pmap.h>
58#include <vm/vm_object.h>
59#include <vm/vm_map.h>
60#include <vm/vm_page.h>
61#include <vm/vm_pager.h>
62
63static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
64
65struct oshmctl_args;
66static int oshmctl(struct thread *td, struct oshmctl_args *uap);
67
68static int shmget_allocate_segment(struct thread *td,
69    struct shmget_args *uap, int mode);
70static int shmget_existing(struct thread *td, struct shmget_args *uap,
71    int mode, int segnum);
72
73/* XXX casting to (sy_call_t *) is bogus, as usual. */
74static sy_call_t *shmcalls[] = {
75	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
76	(sy_call_t *)shmdt, (sy_call_t *)shmget,
77	(sy_call_t *)shmctl
78};
79
80#define	SHMSEG_FREE     	0x0200
81#define	SHMSEG_REMOVED  	0x0400
82#define	SHMSEG_ALLOCATED	0x0800
83#define	SHMSEG_WANTED		0x1000
84
85static int shm_last_free, shm_nused, shm_committed, shmalloced;
86static struct shmid_ds	*shmsegs;
87
88struct shm_handle {
89	/* vm_offset_t kva; */
90	vm_object_t shm_object;
91};
92
93struct shmmap_state {
94	vm_offset_t va;
95	int shmid;
96};
97
98static void shm_deallocate_segment(struct shmid_ds *);
99static int shm_find_segment_by_key(key_t);
100static struct shmid_ds *shm_find_segment_by_shmid(int, int);
101static struct shmid_ds *shm_find_segment_by_shmidx(int, int);
102static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
103static void shmrealloc(void);
104static void shminit(void);
105static int sysvshm_modload(struct module *, int, void *);
106static int shmunload(void);
107static void shmexit_myhook(struct vmspace *vm);
108static void shmfork_myhook(struct proc *p1, struct proc *p2);
109static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
110
111/*
112 * Tuneable values.
113 */
114#ifndef SHMMAXPGS
115#define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
116#endif
117#ifndef SHMMAX
118#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
119#endif
120#ifndef SHMMIN
121#define	SHMMIN	1
122#endif
123#ifndef SHMMNI
124#define	SHMMNI	192
125#endif
126#ifndef SHMSEG
127#define	SHMSEG	128
128#endif
129#ifndef SHMALL
130#define	SHMALL	(SHMMAXPGS)
131#endif
132
133struct	shminfo shminfo = {
134	SHMMAX,
135	SHMMIN,
136	SHMMNI,
137	SHMSEG,
138	SHMALL
139};
140
141static int shm_use_phys;
142
143SYSCTL_DECL(_kern_ipc);
144SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
145SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
146SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, "");
147SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RD, &shminfo.shmseg, 0, "");
148SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
149SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
150    &shm_use_phys, 0, "");
151SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
152    NULL, 0, sysctl_shmsegs, "", "");
153
154static int
155shm_find_segment_by_key(key)
156	key_t key;
157{
158	int i;
159
160	for (i = 0; i < shmalloced; i++)
161		if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) &&
162		    shmsegs[i].shm_perm.key == key)
163			return (i);
164	return (-1);
165}
166
167static struct shmid_ds *
168shm_find_segment_by_shmid(int shmid, int wantrem)
169{
170	int segnum;
171	struct shmid_ds *shmseg;
172
173	segnum = IPCID_TO_IX(shmid);
174	if (segnum < 0 || segnum >= shmalloced)
175		return (NULL);
176	shmseg = &shmsegs[segnum];
177	if (!((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) ||
178  	    (wantrem && !(shmseg->shm_perm.mode & SHMSEG_REMOVED))) ||
179	    shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid))
180		return (NULL);
181	return (shmseg);
182}
183
184static struct shmid_ds *
185shm_find_segment_by_shmidx(int segnum, int wantrem)
186{
187	struct shmid_ds *shmseg;
188
189	if (segnum < 0 || segnum >= shmalloced)
190		return (NULL);
191	shmseg = &shmsegs[segnum];
192	if (!((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) ||
193  	    (wantrem && !(shmseg->shm_perm.mode & SHMSEG_REMOVED))))
194		return (NULL);
195	return (shmseg);
196}
197
198static void
199shm_deallocate_segment(shmseg)
200	struct shmid_ds *shmseg;
201{
202	struct shm_handle *shm_handle;
203	size_t size;
204
205	GIANT_REQUIRED;
206
207	shm_handle = shmseg->shm_internal;
208	vm_object_deallocate(shm_handle->shm_object);
209	free(shm_handle, M_SHM);
210	shmseg->shm_internal = NULL;
211	size = round_page(shmseg->shm_segsz);
212	shm_committed -= btoc(size);
213	shm_nused--;
214	shmseg->shm_perm.mode = SHMSEG_FREE;
215}
216
217static int
218shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
219{
220	struct shmid_ds *shmseg;
221	int segnum, result;
222	size_t size;
223
224	GIANT_REQUIRED;
225
226	segnum = IPCID_TO_IX(shmmap_s->shmid);
227	shmseg = &shmsegs[segnum];
228	size = round_page(shmseg->shm_segsz);
229	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
230	if (result != KERN_SUCCESS)
231		return (EINVAL);
232	shmmap_s->shmid = -1;
233	shmseg->shm_dtime = time_second;
234	if ((--shmseg->shm_nattch <= 0) &&
235	    (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
236		shm_deallocate_segment(shmseg);
237		shm_last_free = segnum;
238	}
239	return (0);
240}
241
242#ifndef _SYS_SYSPROTO_H_
243struct shmdt_args {
244	const void *shmaddr;
245};
246#endif
247
248/*
249 * MPSAFE
250 */
251int
252shmdt(td, uap)
253	struct thread *td;
254	struct shmdt_args *uap;
255{
256	struct proc *p = td->td_proc;
257	struct shmmap_state *shmmap_s;
258	int i;
259	int error = 0;
260
261	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
262		return (ENOSYS);
263	mtx_lock(&Giant);
264	shmmap_s = p->p_vmspace->vm_shm;
265 	if (shmmap_s == NULL) {
266		error = EINVAL;
267		goto done2;
268	}
269	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
270		if (shmmap_s->shmid != -1 &&
271		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
272			break;
273		}
274	}
275	if (i == shminfo.shmseg) {
276		error = EINVAL;
277		goto done2;
278	}
279	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
280done2:
281	mtx_unlock(&Giant);
282	return (error);
283}
284
285#ifndef _SYS_SYSPROTO_H_
286struct shmat_args {
287	int shmid;
288	const void *shmaddr;
289	int shmflg;
290};
291#endif
292
293/*
294 * MPSAFE
295 */
296int
297kern_shmat(td, shmid, shmaddr, shmflg, wantrem)
298	struct thread *td;
299	int shmid;
300	const void *shmaddr;
301	int shmflg;
302	int wantrem;
303{
304	struct proc *p = td->td_proc;
305	int i, flags;
306	struct shmid_ds *shmseg;
307	struct shmmap_state *shmmap_s = NULL;
308	struct shm_handle *shm_handle;
309	vm_offset_t attach_va;
310	vm_prot_t prot;
311	vm_size_t size;
312	int rv;
313	int error = 0;
314
315	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
316		return (ENOSYS);
317	mtx_lock(&Giant);
318	shmmap_s = p->p_vmspace->vm_shm;
319	if (shmmap_s == NULL) {
320		size = shminfo.shmseg * sizeof(struct shmmap_state);
321		shmmap_s = malloc(size, M_SHM, M_WAITOK);
322		for (i = 0; i < shminfo.shmseg; i++)
323			shmmap_s[i].shmid = -1;
324		p->p_vmspace->vm_shm = shmmap_s;
325	}
326	shmseg = shm_find_segment_by_shmid(shmid, wantrem);
327	if (shmseg == NULL) {
328		error = EINVAL;
329		goto done2;
330	}
331	error = ipcperm(td, &shmseg->shm_perm,
332	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
333	if (error)
334		goto done2;
335	for (i = 0; i < shminfo.shmseg; i++) {
336		if (shmmap_s->shmid == -1)
337			break;
338		shmmap_s++;
339	}
340	if (i >= shminfo.shmseg) {
341		error = EMFILE;
342		goto done2;
343	}
344	size = round_page(shmseg->shm_segsz);
345#ifdef VM_PROT_READ_IS_EXEC
346	prot = VM_PROT_READ | VM_PROT_EXECUTE;
347#else
348	prot = VM_PROT_READ;
349#endif
350	if ((shmflg & SHM_RDONLY) == 0)
351		prot |= VM_PROT_WRITE;
352	flags = MAP_ANON | MAP_SHARED;
353	if (shmaddr) {
354		flags |= MAP_FIXED;
355		if (shmflg & SHM_RND) {
356			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
357		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
358			attach_va = (vm_offset_t)shmaddr;
359		} else {
360			error = EINVAL;
361			goto done2;
362		}
363	} else {
364		/*
365		 * This is just a hint to vm_map_find() about where to
366		 * put it.
367		 */
368		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr
369		    + maxtsiz + maxdsiz);
370	}
371
372	shm_handle = shmseg->shm_internal;
373	vm_object_reference(shm_handle->shm_object);
374	rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
375		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
376	if (rv != KERN_SUCCESS) {
377		error = ENOMEM;
378		goto done2;
379	}
380	vm_map_inherit(&p->p_vmspace->vm_map,
381		attach_va, attach_va + size, VM_INHERIT_SHARE);
382
383	shmmap_s->va = attach_va;
384	shmmap_s->shmid = shmid;
385	shmseg->shm_lpid = p->p_pid;
386	shmseg->shm_atime = time_second;
387	shmseg->shm_nattch++;
388	td->td_retval[0] = attach_va;
389done2:
390	mtx_unlock(&Giant);
391	return (error);
392}
393
394int
395shmat(td, uap)
396	struct thread *td;
397	struct shmat_args *uap;
398{
399	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg, 0);
400}
401
402struct oshmid_ds {
403	struct	ipc_perm shm_perm;	/* operation perms */
404	int	shm_segsz;		/* size of segment (bytes) */
405	ushort	shm_cpid;		/* pid, creator */
406	ushort	shm_lpid;		/* pid, last operation */
407	short	shm_nattch;		/* no. of current attaches */
408	time_t	shm_atime;		/* last attach time */
409	time_t	shm_dtime;		/* last detach time */
410	time_t	shm_ctime;		/* last change time */
411	void	*shm_handle;		/* internal handle for shm segment */
412};
413
414struct oshmctl_args {
415	int shmid;
416	int cmd;
417	struct oshmid_ds *ubuf;
418};
419
420/*
421 * MPSAFE
422 */
423static int
424oshmctl(td, uap)
425	struct thread *td;
426	struct oshmctl_args *uap;
427{
428#ifdef COMPAT_43
429	int error = 0;
430	struct shmid_ds *shmseg;
431	struct oshmid_ds outbuf;
432
433	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
434		return (ENOSYS);
435	mtx_lock(&Giant);
436	shmseg = shm_find_segment_by_shmid(uap->shmid, 0);
437	if (shmseg == NULL) {
438		error = EINVAL;
439		goto done2;
440	}
441	switch (uap->cmd) {
442	case IPC_STAT:
443		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
444		if (error)
445			goto done2;
446		outbuf.shm_perm = shmseg->shm_perm;
447		outbuf.shm_segsz = shmseg->shm_segsz;
448		outbuf.shm_cpid = shmseg->shm_cpid;
449		outbuf.shm_lpid = shmseg->shm_lpid;
450		outbuf.shm_nattch = shmseg->shm_nattch;
451		outbuf.shm_atime = shmseg->shm_atime;
452		outbuf.shm_dtime = shmseg->shm_dtime;
453		outbuf.shm_ctime = shmseg->shm_ctime;
454		outbuf.shm_handle = shmseg->shm_internal;
455		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
456		if (error)
457			goto done2;
458		break;
459	default:
460		/* XXX casting to (sy_call_t *) is bogus, as usual. */
461		error = ((sy_call_t *)shmctl)(td, uap);
462		break;
463	}
464done2:
465	mtx_unlock(&Giant);
466	return (error);
467#else
468	return (EINVAL);
469#endif
470}
471
472#ifndef _SYS_SYSPROTO_H_
473struct shmctl_args {
474	int shmid;
475	int cmd;
476	struct shmid_ds *buf;
477};
478#endif
479
480/*
481 * MPSAFE
482 */
483int
484kern_shmctl(td, shmid, cmd, buf, bufsz, wantrem)
485	struct thread *td;
486	int shmid;
487	int cmd;
488	void *buf;
489	size_t *bufsz;
490	int wantrem;
491{
492	int error = 0;
493	struct shmid_ds *shmseg;
494
495	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
496		return (ENOSYS);
497
498	mtx_lock(&Giant);
499	switch (cmd) {
500	case IPC_INFO:
501		memcpy(buf, &shminfo, sizeof(shminfo));
502		if (bufsz)
503			*bufsz = sizeof(shminfo);
504		td->td_retval[0] = shmalloced;
505		goto done2;
506	case SHM_INFO: {
507		struct shm_info shm_info;
508		shm_info.used_ids = shm_nused;
509		shm_info.shm_rss = 0;	/*XXX where to get from ? */
510		shm_info.shm_tot = 0;	/*XXX where to get from ? */
511		shm_info.shm_swp = 0;	/*XXX where to get from ? */
512		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
513		shm_info.swap_successes = 0;	/*XXX where to get from ? */
514		memcpy(buf, &shm_info, sizeof(shm_info));
515		if (bufsz)
516			*bufsz = sizeof(shm_info);
517		td->td_retval[0] = shmalloced;
518		goto done2;
519	}
520	}
521	if (cmd == SHM_STAT)
522		shmseg = shm_find_segment_by_shmidx(shmid, wantrem);
523	else
524		shmseg = shm_find_segment_by_shmid(shmid, wantrem);
525	if (shmseg == NULL) {
526		error = EINVAL;
527		goto done2;
528	}
529	switch (cmd) {
530	case SHM_STAT:
531	case IPC_STAT:
532		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
533		if (error)
534			goto done2;
535		memcpy(buf, shmseg, sizeof(struct shmid_ds));
536		if (bufsz)
537			*bufsz = sizeof(struct shmid_ds);
538		if (cmd == SHM_STAT)
539			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->shm_perm);
540		break;
541	case IPC_SET: {
542		struct shmid_ds *shmid;
543
544		shmid = (struct shmid_ds *)buf;
545		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
546		if (error)
547			goto done2;
548		shmseg->shm_perm.uid = shmid->shm_perm.uid;
549		shmseg->shm_perm.gid = shmid->shm_perm.gid;
550		shmseg->shm_perm.mode =
551		    (shmseg->shm_perm.mode & ~ACCESSPERMS) |
552		    (shmid->shm_perm.mode & ACCESSPERMS);
553		shmseg->shm_ctime = time_second;
554		break;
555	}
556	case IPC_RMID:
557		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
558		if (error)
559			goto done2;
560		shmseg->shm_perm.key = IPC_PRIVATE;
561		shmseg->shm_perm.mode |= SHMSEG_REMOVED;
562		if (shmseg->shm_nattch <= 0) {
563			shm_deallocate_segment(shmseg);
564			shm_last_free = IPCID_TO_IX(shmid);
565		}
566		break;
567#if 0
568	case SHM_LOCK:
569	case SHM_UNLOCK:
570#endif
571	default:
572		error = EINVAL;
573		break;
574	}
575done2:
576	mtx_unlock(&Giant);
577	return (error);
578}
579
580int
581shmctl(td, uap)
582	struct thread *td;
583	struct shmctl_args *uap;
584{
585	int error = 0;
586	struct shmid_ds buf;
587	size_t bufsz;
588
589	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
590	if (uap->cmd == IPC_SET) {
591		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
592			goto done;
593	}
594
595	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz, 0);
596	if (error)
597		goto done;
598
599	/* Cases in which we need to copyout */
600	switch (uap->cmd) {
601	case IPC_INFO:
602	case SHM_INFO:
603	case SHM_STAT:
604	case IPC_STAT:
605		error = copyout(&buf, uap->buf, bufsz);
606		break;
607	}
608
609done:
610	if (error) {
611		/* Invalidate the return value */
612		td->td_retval[0] = -1;
613	}
614	return (error);
615}
616
617
618#ifndef _SYS_SYSPROTO_H_
619struct shmget_args {
620	key_t key;
621	size_t size;
622	int shmflg;
623};
624#endif
625
626static int
627shmget_existing(td, uap, mode, segnum)
628	struct thread *td;
629	struct shmget_args *uap;
630	int mode;
631	int segnum;
632{
633	struct shmid_ds *shmseg;
634	int error;
635
636	shmseg = &shmsegs[segnum];
637	if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
638		/*
639		 * This segment is in the process of being allocated.  Wait
640		 * until it's done, and look the key up again (in case the
641		 * allocation failed or it was freed).
642		 */
643		shmseg->shm_perm.mode |= SHMSEG_WANTED;
644		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
645		if (error)
646			return (error);
647		return (EAGAIN);
648	}
649	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
650		return (EEXIST);
651	error = ipcperm(td, &shmseg->shm_perm, mode);
652	if (error)
653		return (error);
654	if (uap->size && uap->size > shmseg->shm_segsz)
655		return (EINVAL);
656	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
657	return (0);
658}
659
660static int
661shmget_allocate_segment(td, uap, mode)
662	struct thread *td;
663	struct shmget_args *uap;
664	int mode;
665{
666	int i, segnum, shmid, size;
667	struct ucred *cred = td->td_ucred;
668	struct shmid_ds *shmseg;
669	struct shm_handle *shm_handle;
670
671	GIANT_REQUIRED;
672
673	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
674		return (EINVAL);
675	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
676		return (ENOSPC);
677	size = round_page(uap->size);
678	if (shm_committed + btoc(size) > shminfo.shmall)
679		return (ENOMEM);
680	if (shm_last_free < 0) {
681		shmrealloc();	/* Maybe expand the shmsegs[] array. */
682		for (i = 0; i < shmalloced; i++)
683			if (shmsegs[i].shm_perm.mode & SHMSEG_FREE)
684				break;
685		if (i == shmalloced)
686			return (ENOSPC);
687		segnum = i;
688	} else  {
689		segnum = shm_last_free;
690		shm_last_free = -1;
691	}
692	shmseg = &shmsegs[segnum];
693	/*
694	 * In case we sleep in malloc(), mark the segment present but deleted
695	 * so that noone else tries to create the same key.
696	 */
697	shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
698	shmseg->shm_perm.key = uap->key;
699	shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff;
700	shm_handle = (struct shm_handle *)
701	    malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK);
702	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
703
704	/*
705	 * We make sure that we have allocated a pager before we need
706	 * to.
707	 */
708	if (shm_use_phys) {
709		shm_handle->shm_object =
710		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
711	} else {
712		shm_handle->shm_object =
713		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
714	}
715	VM_OBJECT_LOCK(shm_handle->shm_object);
716	vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
717	vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
718	VM_OBJECT_UNLOCK(shm_handle->shm_object);
719
720	shmseg->shm_internal = shm_handle;
721	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
722	shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid;
723	shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
724	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
725	shmseg->shm_segsz = uap->size;
726	shmseg->shm_cpid = td->td_proc->p_pid;
727	shmseg->shm_lpid = shmseg->shm_nattch = 0;
728	shmseg->shm_atime = shmseg->shm_dtime = 0;
729	shmseg->shm_ctime = time_second;
730	shm_committed += btoc(size);
731	shm_nused++;
732	if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
733		/*
734		 * Somebody else wanted this key while we were asleep.  Wake
735		 * them up now.
736		 */
737		shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
738		wakeup(shmseg);
739	}
740	td->td_retval[0] = shmid;
741	return (0);
742}
743
744/*
745 * MPSAFE
746 */
747int
748shmget(td, uap)
749	struct thread *td;
750	struct shmget_args *uap;
751{
752	int segnum, mode;
753	int error;
754
755	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
756		return (ENOSYS);
757	mtx_lock(&Giant);
758	mode = uap->shmflg & ACCESSPERMS;
759	if (uap->key != IPC_PRIVATE) {
760	again:
761		segnum = shm_find_segment_by_key(uap->key);
762		if (segnum >= 0) {
763			error = shmget_existing(td, uap, mode, segnum);
764			if (error == EAGAIN)
765				goto again;
766			goto done2;
767		}
768		if ((uap->shmflg & IPC_CREAT) == 0) {
769			error = ENOENT;
770			goto done2;
771		}
772	}
773	error = shmget_allocate_segment(td, uap, mode);
774done2:
775	mtx_unlock(&Giant);
776	return (error);
777}
778
779/*
780 * MPSAFE
781 */
782int
783shmsys(td, uap)
784	struct thread *td;
785	/* XXX actually varargs. */
786	struct shmsys_args /* {
787		u_int	which;
788		int	a2;
789		int	a3;
790		int	a4;
791	} */ *uap;
792{
793	int error;
794
795	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
796		return (ENOSYS);
797	if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
798		return (EINVAL);
799	mtx_lock(&Giant);
800	error = (*shmcalls[uap->which])(td, &uap->a2);
801	mtx_unlock(&Giant);
802	return (error);
803}
804
805static void
806shmfork_myhook(p1, p2)
807	struct proc *p1, *p2;
808{
809	struct shmmap_state *shmmap_s;
810	size_t size;
811	int i;
812
813	size = shminfo.shmseg * sizeof(struct shmmap_state);
814	shmmap_s = malloc(size, M_SHM, M_WAITOK);
815	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
816	p2->p_vmspace->vm_shm = shmmap_s;
817	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
818		if (shmmap_s->shmid != -1)
819			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++;
820}
821
822static void
823shmexit_myhook(struct vmspace *vm)
824{
825	struct shmmap_state *base, *shm;
826	int i;
827
828	GIANT_REQUIRED;
829
830	if ((base = vm->vm_shm) != NULL) {
831		vm->vm_shm = NULL;
832		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
833			if (shm->shmid != -1)
834				shm_delete_mapping(vm, shm);
835		}
836		free(base, M_SHM);
837	}
838}
839
840static void
841shmrealloc(void)
842{
843	int i;
844	struct shmid_ds *newsegs;
845
846	if (shmalloced >= shminfo.shmmni)
847		return;
848
849	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
850	if (newsegs == NULL)
851		return;
852	for (i = 0; i < shmalloced; i++)
853		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
854	for (; i < shminfo.shmmni; i++) {
855		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
856		shmsegs[i].shm_perm.seq = 0;
857	}
858	free(shmsegs, M_SHM);
859	shmsegs = newsegs;
860	shmalloced = shminfo.shmmni;
861}
862
863static void
864shminit()
865{
866	int i;
867
868	TUNABLE_INT_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
869	for (i = PAGE_SIZE; i > 0; i--) {
870		shminfo.shmmax = shminfo.shmall * PAGE_SIZE;
871		if (shminfo.shmmax >= shminfo.shmall)
872			break;
873	}
874	TUNABLE_INT_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
875	TUNABLE_INT_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
876	TUNABLE_INT_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
877	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
878
879	shmalloced = shminfo.shmmni;
880	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
881	if (shmsegs == NULL)
882		panic("cannot allocate initial memory for sysvshm");
883	for (i = 0; i < shmalloced; i++) {
884		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
885		shmsegs[i].shm_perm.seq = 0;
886	}
887	shm_last_free = 0;
888	shm_nused = 0;
889	shm_committed = 0;
890	shmexit_hook = &shmexit_myhook;
891	shmfork_hook = &shmfork_myhook;
892}
893
894static int
895shmunload()
896{
897
898	if (shm_nused > 0)
899		return (EBUSY);
900
901	free(shmsegs, M_SHM);
902	shmexit_hook = NULL;
903	shmfork_hook = NULL;
904	return (0);
905}
906
907static int
908sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
909{
910
911	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
912}
913
914static int
915sysvshm_modload(struct module *module, int cmd, void *arg)
916{
917	int error = 0;
918
919	switch (cmd) {
920	case MOD_LOAD:
921		shminit();
922		break;
923	case MOD_UNLOAD:
924		error = shmunload();
925		break;
926	case MOD_SHUTDOWN:
927		break;
928	default:
929		error = EINVAL;
930		break;
931	}
932	return (error);
933}
934
935static moduledata_t sysvshm_mod = {
936	"sysvshm",
937	&sysvshm_modload,
938	NULL
939};
940
941SYSCALL_MODULE_HELPER(shmsys);
942SYSCALL_MODULE_HELPER(shmat);
943SYSCALL_MODULE_HELPER(shmctl);
944SYSCALL_MODULE_HELPER(shmdt);
945SYSCALL_MODULE_HELPER(shmget);
946
947DECLARE_MODULE(sysvshm, sysvshm_mod,
948	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
949MODULE_VERSION(sysvshm, 1);
950