vm_mmap.c revision 321717
1193326Sed/*-
2193326Sed * Copyright (c) 1988 University of Utah.
3193326Sed * Copyright (c) 1991, 1993
4193326Sed *	The Regents of the University of California.  All rights reserved.
5193326Sed *
6193326Sed * This code is derived from software contributed to Berkeley by
7193326Sed * the Systems Programming Group of the University of Utah Computer
8193326Sed * Science Department.
9193326Sed *
10193326Sed * Redistribution and use in source and binary forms, with or without
11193326Sed * modification, are permitted provided that the following conditions
12193326Sed * are met:
13193326Sed * 1. Redistributions of source code must retain the above copyright
14249423Sdim *    notice, this list of conditions and the following disclaimer.
15249423Sdim * 2. Redistributions in binary form must reproduce the above copyright
16193326Sed *    notice, this list of conditions and the following disclaimer in the
17193326Sed *    documentation and/or other materials provided with the distribution.
18249423Sdim * 4. Neither the name of the University nor the names of its contributors
19193326Sed *    may be used to endorse or promote products derived from this software
20193326Sed *    without specific prior written permission.
21263508Sdim *
22193326Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23249423Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24207619Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25249423Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26243830Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27224145Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28193326Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29193326Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30193326Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31193326Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32193326Sed * SUCH DAMAGE.
33193326Sed *
34208600Srdivacky * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
35193326Sed *
36193326Sed *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
37193326Sed */
38193326Sed
39193326Sed/*
40193326Sed * Mapped file (mmap) interface to VM
41193326Sed */
42193326Sed
43208600Srdivacky#include <sys/cdefs.h>
44193326Sed__FBSDID("$FreeBSD: stable/10/sys/vm/vm_mmap.c 321717 2017-07-30 10:36:20Z kib $");
45193326Sed
46193326Sed#include "opt_compat.h"
47208600Srdivacky#include "opt_hwpmc_hooks.h"
48193326Sed
49193326Sed#include <sys/param.h>
50193326Sed#include <sys/systm.h>
51193326Sed#include <sys/capsicum.h>
52263508Sdim#include <sys/kernel.h>
53263508Sdim#include <sys/lock.h>
54249423Sdim#include <sys/mutex.h>
55249423Sdim#include <sys/sysproto.h>
56249423Sdim#include <sys/filedesc.h>
57193326Sed#include <sys/priv.h>
58218893Sdim#include <sys/proc.h>
59193326Sed#include <sys/procctl.h>
60193326Sed#include <sys/racct.h>
61193326Sed#include <sys/resource.h>
62193326Sed#include <sys/resourcevar.h>
63193326Sed#include <sys/rwlock.h>
64193326Sed#include <sys/sysctl.h>
65193326Sed#include <sys/vnode.h>
66224145Sdim#include <sys/fcntl.h>
67208600Srdivacky#include <sys/file.h>
68193326Sed#include <sys/mman.h>
69193326Sed#include <sys/mount.h>
70193326Sed#include <sys/conf.h>
71224145Sdim#include <sys/stat.h>
72193326Sed#include <sys/syscallsubr.h>
73208600Srdivacky#include <sys/sysent.h>
74193326Sed#include <sys/vmmeter.h>
75193326Sed
76224145Sdim#include <security/mac/mac_framework.h>
77224145Sdim
78224145Sdim#include <vm/vm.h>
79224145Sdim#include <vm/vm_param.h>
80193326Sed#include <vm/pmap.h>
81193326Sed#include <vm/vm_map.h>
82224145Sdim#include <vm/vm_object.h>
83224145Sdim#include <vm/vm_page.h>
84193326Sed#include <vm/vm_pager.h>
85193326Sed#include <vm/vm_pageout.h>
86193326Sed#include <vm/vm_extern.h>
87193326Sed#include <vm/vm_page.h>
88193326Sed#include <vm/vnode_pager.h>
89193326Sed
90234353Sdim#ifdef HWPMC_HOOKS
91234353Sdim#include <sys/pmckern.h>
92234353Sdim#endif
93193326Sed
94193326Sedint old_mlock = 0;
95224145SdimSYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0,
96224145Sdim    "Do not apply RLIMIT_MEMLOCK on mlockall");
97224145SdimTUNABLE_INT("vm.old_mlock", &old_mlock);
98224145Sdim
99224145Sdim#ifdef MAP_32BIT
100224145Sdim#define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
101224145Sdim#endif
102224145Sdim
103224145Sdimstatic int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
104224145Sdim    int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
105224145Sdimstatic int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
106224145Sdim    int *, struct cdev *, vm_ooffset_t *, vm_object_t *);
107226633Sdimstatic int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
108226633Sdim    int *, struct shmfd *, vm_ooffset_t, vm_object_t *);
109234982Sdim
110234982Sdim#ifndef _SYS_SYSPROTO_H_
111226633Sdimstruct sbrk_args {
112226633Sdim	int incr;
113226633Sdim};
114226633Sdim#endif
115226633Sdim
116226633Sdimint
117226633Sdimsys_sbrk(struct thread *td, struct sbrk_args *uap)
118234982Sdim{
119234982Sdim	/* Not yet implemented */
120226633Sdim	return (EOPNOTSUPP);
121226633Sdim}
122226633Sdim
123226633Sdim#ifndef _SYS_SYSPROTO_H_
124226633Sdimstruct sstk_args {
125218893Sdim	int incr;
126218893Sdim};
127218893Sdim#endif
128218893Sdim
129218893Sdimint
130218893Sdimsys_sstk(struct thread *td, struct sstk_args *uap)
131218893Sdim{
132218893Sdim	/* Not yet implemented */
133218893Sdim	return (EOPNOTSUPP);
134218893Sdim}
135218893Sdim
136218893Sdim#if defined(COMPAT_43)
137218893Sdim#ifndef _SYS_SYSPROTO_H_
138263508Sdimstruct getpagesize_args {
139218893Sdim	int dummy;
140218893Sdim};
141218893Sdim#endif
142218893Sdim
143249423Sdimint
144249423Sdimogetpagesize(struct thread *td, struct getpagesize_args *uap)
145218893Sdim{
146218893Sdim
147263508Sdim	td->td_retval[0] = PAGE_SIZE;
148249423Sdim	return (0);
149218893Sdim}
150218893Sdim#endif				/* COMPAT_43 */
151218893Sdim
152249423Sdim
153249423Sdim/*
154249423Sdim * Memory Map (mmap) system call.  Note that the file offset
155249423Sdim * and address are allowed to be NOT page aligned, though if
156263508Sdim * the MAP_FIXED flag it set, both must have the same remainder
157249423Sdim * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
158249423Sdim * page-aligned, the actual mapping starts at trunc_page(addr)
159249423Sdim * and the return value is adjusted up by the page offset.
160249423Sdim *
161218893Sdim * Generally speaking, only character devices which are themselves
162263508Sdim * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
163249423Sdim * there would be no cache coherency between a descriptor and a VM mapping
164263508Sdim * both to the same character device.
165249423Sdim */
166263508Sdim#ifndef _SYS_SYSPROTO_H_
167218893Sdimstruct mmap_args {
168218893Sdim	void *addr;
169218893Sdim	size_t len;
170218893Sdim	int prot;
171263508Sdim	int flags;
172218893Sdim	int fd;
173218893Sdim	long pad;
174218893Sdim	off_t pos;
175218893Sdim};
176249423Sdim#endif
177249423Sdim
178218893Sdimint
179218893Sdimsys_mmap(td, uap)
180218893Sdim	struct thread *td;
181218893Sdim	struct mmap_args *uap;
182218893Sdim{
183218893Sdim#ifdef HWPMC_HOOKS
184218893Sdim	struct pmckern_map_in pkm;
185218893Sdim#endif
186218893Sdim	struct file *fp;
187218893Sdim	struct vnode *vp;
188218893Sdim	vm_offset_t addr;
189218893Sdim	vm_size_t size, pageoff;
190218893Sdim	vm_prot_t cap_maxprot, prot, maxprot;
191218893Sdim	void *handle;
192218893Sdim	objtype_t handle_type;
193234353Sdim	int align, error, flags;
194249423Sdim	off_t pos;
195234353Sdim	struct vmspace *vms = td->td_proc->p_vmspace;
196234353Sdim	cap_rights_t rights;
197234353Sdim
198234353Sdim	addr = (vm_offset_t) uap->addr;
199249423Sdim	size = uap->len;
200234353Sdim	prot = uap->prot & VM_PROT_ALL;
201249423Sdim	flags = uap->flags;
202249423Sdim	pos = uap->pos;
203249423Sdim
204234353Sdim	fp = NULL;
205234353Sdim
206234353Sdim	/*
207249423Sdim	 * Enforce the constraints.
208234353Sdim	 * Mapping of length 0 is only allowed for old binaries.
209234353Sdim	 * Anonymous mapping shall specify -1 as filedescriptor and
210234353Sdim	 * zero position for new code. Be nice to ancient a.out
211249423Sdim	 * binaries and correct pos for anonymous mapping, since old
212249423Sdim	 * ld.so sometimes issues anonymous map requests with non-zero
213249423Sdim	 * pos.
214249423Sdim	 */
215234353Sdim	if (!SV_CURPROC_FLAG(SV_AOUT)) {
216249423Sdim		if ((uap->len == 0 && curproc->p_osrel >= P_OSREL_MAP_ANON) ||
217234353Sdim		    ((flags & MAP_ANON) != 0 && (uap->fd != -1 || pos != 0)))
218249423Sdim			return (EINVAL);
219249423Sdim	} else {
220249423Sdim		if ((flags & MAP_ANON) != 0)
221249423Sdim			pos = 0;
222249423Sdim	}
223249423Sdim
224249423Sdim	if (flags & MAP_STACK) {
225249423Sdim		if ((uap->fd != -1) ||
226234353Sdim		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
227249423Sdim			return (EINVAL);
228234353Sdim		flags |= MAP_ANON;
229234353Sdim		pos = 0;
230234353Sdim	}
231249423Sdim	if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL)
232234353Sdim		return (EINVAL);
233234353Sdim	if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || uap->fd != -1 ||
234234353Sdim	    pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
235234353Sdim	    MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
236234353Sdim		return (EINVAL);
237249423Sdim
238234353Sdim	/*
239234353Sdim	 * Align the file position to a page boundary,
240234353Sdim	 * and save its page offset component.
241234353Sdim	 */
242234353Sdim	pageoff = (pos & PAGE_MASK);
243234353Sdim	pos -= pageoff;
244234353Sdim
245234353Sdim	/* Adjust size for rounding (on both ends). */
246234353Sdim	size += pageoff;			/* low end... */
247234353Sdim	size = (vm_size_t) round_page(size);	/* hi end */
248234353Sdim
249249423Sdim	/* Ensure alignment is at least a page and fits in a pointer. */
250234353Sdim	align = flags & MAP_ALIGNMENT_MASK;
251234353Sdim	if (align != 0 && align != MAP_ALIGNED_SUPER &&
252234353Sdim	    (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
253234353Sdim	    align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
254234353Sdim		return (EINVAL);
255263508Sdim
256239462Sdim	/*
257239462Sdim	 * Check for illegal addresses.  Watch out for address wrap... Note
258249423Sdim	 * that VM_*_ADDRESS are not constants due to casts (argh).
259249423Sdim	 */
260239462Sdim	if (flags & MAP_FIXED) {
261239462Sdim		/*
262249423Sdim		 * The specified address must have the same remainder
263239462Sdim		 * as the file offset taken modulo PAGE_SIZE, so it
264239462Sdim		 * should be aligned after adjustment by pageoff.
265239462Sdim		 */
266239462Sdim		addr -= pageoff;
267249423Sdim		if (addr & PAGE_MASK)
268249423Sdim			return (EINVAL);
269239462Sdim
270239462Sdim		/* Address range must be all in user VM space. */
271263508Sdim		if (addr < vm_map_min(&vms->vm_map) ||
272263508Sdim		    addr + size > vm_map_max(&vms->vm_map))
273193326Sed			return (EINVAL);
274193326Sed		if (addr + size < addr)
275218893Sdim			return (EINVAL);
276193326Sed#ifdef MAP_32BIT
277193326Sed		if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR)
278193326Sed			return (EINVAL);
279193326Sed	} else if (flags & MAP_32BIT) {
280193326Sed		/*
281193326Sed		 * For MAP_32BIT, override the hint if it is too high and
282218893Sdim		 * do not bother moving the mapping past the heap (since
283193326Sed		 * the heap is usually above 2GB).
284193326Sed		 */
285263508Sdim		if (addr + size > MAP_32BIT_MAX_ADDR)
286239462Sdim			addr = 0;
287239462Sdim#endif
288239462Sdim	} else {
289239462Sdim		/*
290239462Sdim		 * XXX for non-fixed mappings where no hint is provided or
291239462Sdim		 * the hint would fall in the potential heap space,
292239462Sdim		 * place it after the end of the largest possible heap.
293239462Sdim		 *
294263508Sdim		 * There should really be a pmap call to determine a reasonable
295263508Sdim		 * location.
296239462Sdim		 */
297239462Sdim		PROC_LOCK(td->td_proc);
298239462Sdim		if (addr == 0 ||
299239462Sdim		    (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
300239462Sdim		    addr < round_page((vm_offset_t)vms->vm_daddr +
301239462Sdim		    lim_max(td->td_proc, RLIMIT_DATA))))
302239462Sdim			addr = round_page((vm_offset_t)vms->vm_daddr +
303263508Sdim			    lim_max(td->td_proc, RLIMIT_DATA));
304243830Sdim		PROC_UNLOCK(td->td_proc);
305243830Sdim	}
306243830Sdim	if ((flags & MAP_GUARD) != 0) {
307243830Sdim		handle = NULL;
308243830Sdim		handle_type = OBJT_DEFAULT;
309193326Sed		maxprot = VM_PROT_NONE;
310193326Sed		cap_maxprot = VM_PROT_NONE;
311226633Sdim	} else if ((flags & MAP_ANON) != 0) {
312243830Sdim		/*
313243830Sdim		 * Mapping blank space is trivial.
314243830Sdim		 */
315243830Sdim		handle = NULL;
316243830Sdim		handle_type = OBJT_DEFAULT;
317193326Sed		maxprot = VM_PROT_ALL;
318193326Sed		cap_maxprot = VM_PROT_ALL;
319243830Sdim	} else {
320243830Sdim		/*
321243830Sdim		 * Mapping file, get fp for validation and don't let the
322243830Sdim		 * descriptor disappear on us if we block. Check capability
323243830Sdim		 * rights, but also return the maximum rights to be combined
324243830Sdim		 * with maxprot later.
325243830Sdim		 */
326243830Sdim		cap_rights_init(&rights, CAP_MMAP);
327243830Sdim		if (prot & PROT_READ)
328243830Sdim			cap_rights_set(&rights, CAP_MMAP_R);
329243830Sdim		if ((flags & MAP_SHARED) != 0) {
330243830Sdim			if (prot & PROT_WRITE)
331243830Sdim				cap_rights_set(&rights, CAP_MMAP_W);
332243830Sdim		}
333243830Sdim		if (prot & PROT_EXEC)
334243830Sdim			cap_rights_set(&rights, CAP_MMAP_X);
335243830Sdim		error = fget_mmap(td, uap->fd, &rights, &cap_maxprot, &fp);
336243830Sdim		if (error != 0)
337243830Sdim			goto done;
338243830Sdim		if (fp->f_type == DTYPE_SHM) {
339243830Sdim			handle = fp->f_data;
340243830Sdim			handle_type = OBJT_SWAP;
341243830Sdim			maxprot = VM_PROT_NONE;
342243830Sdim
343243830Sdim			/* FREAD should always be set. */
344243830Sdim			if (fp->f_flag & FREAD)
345243830Sdim				maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
346243830Sdim			if (fp->f_flag & FWRITE)
347243830Sdim				maxprot |= VM_PROT_WRITE;
348243830Sdim			goto map;
349243830Sdim		}
350243830Sdim		if (fp->f_type != DTYPE_VNODE) {
351193326Sed			error = ENODEV;
352193326Sed			goto done;
353193326Sed		}
354193326Sed#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
355193326Sed    defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
356193326Sed		/*
357193326Sed		 * POSIX shared-memory objects are defined to have
358193326Sed		 * kernel persistence, and are not defined to support
359193326Sed		 * read(2)/write(2) -- or even open(2).  Thus, we can
360193326Sed		 * use MAP_ASYNC to trade on-disk coherence for speed.
361243830Sdim		 * The shm_open(3) library routine turns on the FPOSIXSHM
362243830Sdim		 * flag to request this behavior.
363243830Sdim		 */
364243830Sdim		if (fp->f_flag & FPOSIXSHM)
365243830Sdim			flags |= MAP_NOSYNC;
366243830Sdim#endif
367243830Sdim		vp = fp->f_vnode;
368243830Sdim		/*
369243830Sdim		 * Ensure that file and memory protections are
370243830Sdim		 * compatible.  Note that we only worry about
371243830Sdim		 * writability if mapping is shared; in this case,
372243830Sdim		 * current and max prot are dictated by the open file.
373243830Sdim		 * XXX use the vnode instead?  Problem is: what
374243830Sdim		 * credentials do we use for determination? What if
375243830Sdim		 * proc does a setuid?
376243830Sdim		 */
377193326Sed		if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC)
378193326Sed			maxprot = VM_PROT_NONE;
379243830Sdim		else
380219077Sdim			maxprot = VM_PROT_EXECUTE;
381219077Sdim		if (fp->f_flag & FREAD) {
382193326Sed			maxprot |= VM_PROT_READ;
383193326Sed		} else if (prot & PROT_READ) {
384193326Sed			error = EACCES;
385243830Sdim			goto done;
386203955Srdivacky		}
387193326Sed		/*
388193326Sed		 * If we are sharing potential changes (either via
389263508Sdim		 * MAP_SHARED or via the implicit sharing of character
390263508Sdim		 * device mappings), and we are trying to get write
391263508Sdim		 * permission although we opened it without asking
392263508Sdim		 * for it, bail out.
393263508Sdim		 */
394263508Sdim		if ((flags & MAP_SHARED) != 0) {
395263508Sdim			if ((fp->f_flag & FWRITE) != 0) {
396263508Sdim				maxprot |= VM_PROT_WRITE;
397193326Sed			} else if ((prot & PROT_WRITE) != 0) {
398193326Sed				error = EACCES;
399203955Srdivacky				goto done;
400203955Srdivacky			}
401203955Srdivacky		} else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) {
402239462Sdim			maxprot |= VM_PROT_WRITE;
403203955Srdivacky			cap_maxprot |= VM_PROT_WRITE;
404203955Srdivacky		}
405203955Srdivacky		handle = (void *)vp;
406239462Sdim		handle_type = OBJT_VNODE;
407203955Srdivacky	}
408203955Srdivackymap:
409203955Srdivacky	td->td_fpop = fp;
410239462Sdim	maxprot &= cap_maxprot;
411203955Srdivacky	error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
412203955Srdivacky	    flags, handle_type, handle, pos);
413203955Srdivacky	td->td_fpop = NULL;
414239462Sdim#ifdef HWPMC_HOOKS
415203955Srdivacky	/* inform hwpmc(4) if an executable is being mapped */
416203955Srdivacky	if (error == 0 && handle_type == OBJT_VNODE &&
417203955Srdivacky	    (prot & PROT_EXEC)) {
418193326Sed		pkm.pm_file = handle;
419193326Sed		pkm.pm_address = (uintptr_t) addr;
420193326Sed		PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
421193326Sed	}
422193326Sed#endif
423243830Sdim	if (error == 0)
424193326Sed		td->td_retval[0] = (register_t) (addr + pageoff);
425198092Srdivackydone:
426193326Sed	if (fp)
427193326Sed		fdrop(fp, td);
428193326Sed
429193326Sed	return (error);
430193326Sed}
431198092Srdivacky
432193326Sedint
433193326Sedfreebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap)
434193326Sed{
435193326Sed	struct mmap_args oargs;
436193326Sed
437193326Sed	oargs.addr = uap->addr;
438193326Sed	oargs.len = uap->len;
439193326Sed	oargs.prot = uap->prot;
440193326Sed	oargs.flags = uap->flags;
441193326Sed	oargs.fd = uap->fd;
442193326Sed	oargs.pos = uap->pos;
443243830Sdim	return (sys_mmap(td, &oargs));
444263508Sdim}
445226633Sdim
446212904Sdim#ifdef COMPAT_43
447212904Sdim#ifndef _SYS_SYSPROTO_H_
448193326Sedstruct ommap_args {
449198092Srdivacky	caddr_t addr;
450193326Sed	int len;
451193326Sed	int prot;
452193326Sed	int flags;
453193326Sed	int fd;
454193326Sed	long pos;
455193326Sed};
456193326Sed#endif
457193326Sedint
458193326Sedommap(td, uap)
459193326Sed	struct thread *td;
460193326Sed	struct ommap_args *uap;
461193326Sed{
462193326Sed	struct mmap_args nargs;
463193326Sed	static const char cvtbsdprot[8] = {
464193326Sed		0,
465193326Sed		PROT_EXEC,
466193326Sed		PROT_WRITE,
467193326Sed		PROT_EXEC | PROT_WRITE,
468193326Sed		PROT_READ,
469193326Sed		PROT_EXEC | PROT_READ,
470193326Sed		PROT_WRITE | PROT_READ,
471198092Srdivacky		PROT_EXEC | PROT_WRITE | PROT_READ,
472226633Sdim	};
473239462Sdim
474193326Sed#define	OMAP_ANON	0x0002
475193326Sed#define	OMAP_COPY	0x0020
476193326Sed#define	OMAP_SHARED	0x0010
477193326Sed#define	OMAP_FIXED	0x0100
478193326Sed
479193326Sed	nargs.addr = uap->addr;
480193326Sed	nargs.len = uap->len;
481198092Srdivacky	nargs.prot = cvtbsdprot[uap->prot & 0x7];
482193326Sed#ifdef COMPAT_FREEBSD32
483206275Srdivacky#if defined(__amd64__) || defined(__ia64__)
484206275Srdivacky	if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
485207619Srdivacky	    nargs.prot != 0)
486207619Srdivacky		nargs.prot |= PROT_EXEC;
487207619Srdivacky#endif
488206275Srdivacky#endif
489206275Srdivacky	nargs.flags = 0;
490206275Srdivacky	if (uap->flags & OMAP_ANON)
491193326Sed		nargs.flags |= MAP_ANON;
492193326Sed	if (uap->flags & OMAP_COPY)
493193326Sed		nargs.flags |= MAP_COPY;
494239462Sdim	if (uap->flags & OMAP_SHARED)
495193326Sed		nargs.flags |= MAP_SHARED;
496193326Sed	else
497193326Sed		nargs.flags |= MAP_PRIVATE;
498193326Sed	if (uap->flags & OMAP_FIXED)
499193326Sed		nargs.flags |= MAP_FIXED;
500193326Sed	nargs.fd = uap->fd;
501198092Srdivacky	nargs.pos = uap->pos;
502193326Sed	return (sys_mmap(td, &nargs));
503193326Sed}
504193326Sed#endif				/* COMPAT_43 */
505193326Sed
506193326Sed
507193326Sed#ifndef _SYS_SYSPROTO_H_
508198092Srdivackystruct msync_args {
509193326Sed	void *addr;
510193326Sed	size_t len;
511193326Sed	int flags;
512193326Sed};
513198092Srdivacky#endif
514193326Sedint
515193326Sedsys_msync(td, uap)
516193326Sed	struct thread *td;
517193326Sed	struct msync_args *uap;
518193326Sed{
519193326Sed	vm_offset_t addr;
520198092Srdivacky	vm_size_t size, pageoff;
521193326Sed	int flags;
522193326Sed	vm_map_t map;
523249423Sdim	int rv;
524224145Sdim
525224145Sdim	addr = (vm_offset_t) uap->addr;
526224145Sdim	size = uap->len;
527224145Sdim	flags = uap->flags;
528193326Sed
529193326Sed	pageoff = (addr & PAGE_MASK);
530193326Sed	addr -= pageoff;
531198092Srdivacky	size += pageoff;
532249423Sdim	size = (vm_size_t) round_page(size);
533193326Sed	if (addr + size < addr)
534193326Sed		return (EINVAL);
535198092Srdivacky
536193326Sed	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
537249423Sdim		return (EINVAL);
538193326Sed
539198092Srdivacky	map = &td->td_proc->p_vmspace->vm_map;
540193326Sed
541193326Sed	/*
542193326Sed	 * Clean the pages and interpret the return value.
543193326Sed	 */
544193326Sed	rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0,
545193326Sed	    (flags & MS_INVALIDATE) != 0);
546193326Sed	switch (rv) {
547193326Sed	case KERN_SUCCESS:
548193326Sed		return (0);
549193326Sed	case KERN_INVALID_ADDRESS:
550198092Srdivacky		return (ENOMEM);
551193326Sed	case KERN_INVALID_ARGUMENT:
552193326Sed		return (EBUSY);
553193326Sed	case KERN_FAILURE:
554198092Srdivacky		return (EIO);
555193326Sed	default:
556193326Sed		return (EINVAL);
557193326Sed	}
558193326Sed}
559193326Sed
560193326Sed#ifndef _SYS_SYSPROTO_H_
561198092Srdivackystruct munmap_args {
562226633Sdim	void *addr;
563198092Srdivacky	size_t len;
564193326Sed};
565193326Sed#endif
566193326Sedint
567193326Sedsys_munmap(td, uap)
568193326Sed	struct thread *td;
569193326Sed	struct munmap_args *uap;
570193326Sed{
571198092Srdivacky#ifdef HWPMC_HOOKS
572193326Sed	struct pmckern_map_out pkm;
573193326Sed	vm_map_entry_t entry;
574193326Sed#endif
575198092Srdivacky	vm_offset_t addr;
576193326Sed	vm_size_t size, pageoff;
577193326Sed	vm_map_t map;
578193326Sed
579193326Sed	addr = (vm_offset_t) uap->addr;
580193326Sed	size = uap->len;
581243830Sdim	if (size == 0)
582263508Sdim		return (EINVAL);
583243830Sdim
584243830Sdim	pageoff = (addr & PAGE_MASK);
585243830Sdim	addr -= pageoff;
586243830Sdim	size += pageoff;
587243830Sdim	size = (vm_size_t) round_page(size);
588243830Sdim	if (addr + size < addr)
589243830Sdim		return (EINVAL);
590243830Sdim
591243830Sdim	/*
592243830Sdim	 * Check for illegal addresses.  Watch out for address wrap...
593243830Sdim	 */
594243830Sdim	map = &td->td_proc->p_vmspace->vm_map;
595243830Sdim	if (addr < vm_map_min(map) || addr + size > vm_map_max(map))
596243830Sdim		return (EINVAL);
597243830Sdim	vm_map_lock(map);
598243830Sdim#ifdef HWPMC_HOOKS
599243830Sdim	/*
600243830Sdim	 * Inform hwpmc if the address range being unmapped contains
601243830Sdim	 * an executable region.
602243830Sdim	 */
603263508Sdim	pkm.pm_address = (uintptr_t) NULL;
604243830Sdim	if (vm_map_lookup_entry(map, addr, &entry)) {
605243830Sdim		for (;
606243830Sdim		     entry != &map->header && entry->start < addr + size;
607243830Sdim		     entry = entry->next) {
608243830Sdim			if (vm_map_check_protection(map, entry->start,
609243830Sdim				entry->end, VM_PROT_EXECUTE) == TRUE) {
610243830Sdim				pkm.pm_address = (uintptr_t) addr;
611243830Sdim				pkm.pm_size = (size_t) size;
612243830Sdim				break;
613243830Sdim			}
614243830Sdim		}
615243830Sdim	}
616243830Sdim#endif
617243830Sdim	vm_map_delete(map, addr, addr + size);
618243830Sdim
619203955Srdivacky#ifdef HWPMC_HOOKS
620203955Srdivacky	/* downgrade the lock to prevent a LOR with the pmc-sx lock */
621203955Srdivacky	vm_map_lock_downgrade(map);
622203955Srdivacky	if (pkm.pm_address != (uintptr_t) NULL)
623203955Srdivacky		PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm);
624203955Srdivacky	vm_map_unlock_read(map);
625193326Sed#else
626193326Sed	vm_map_unlock(map);
627193326Sed#endif
628193326Sed	/* vm_map_delete returns nothing but KERN_SUCCESS anyway */
629263508Sdim	return (0);
630263508Sdim}
631263508Sdim
632263508Sdim#ifndef _SYS_SYSPROTO_H_
633263508Sdimstruct mprotect_args {
634263508Sdim	const void *addr;
635243830Sdim	size_t len;
636243830Sdim	int prot;
637193326Sed};
638239462Sdim#endif
639239462Sdimint
640203955Srdivackysys_mprotect(td, uap)
641203955Srdivacky	struct thread *td;
642203955Srdivacky	struct mprotect_args *uap;
643203955Srdivacky{
644203955Srdivacky	vm_offset_t addr;
645203955Srdivacky	vm_size_t size, pageoff;
646203955Srdivacky	vm_prot_t prot;
647203955Srdivacky
648203955Srdivacky	addr = (vm_offset_t) uap->addr;
649203955Srdivacky	size = uap->len;
650203955Srdivacky	prot = uap->prot & VM_PROT_ALL;
651193326Sed
652193326Sed	pageoff = (addr & PAGE_MASK);
653263508Sdim	addr -= pageoff;
654239462Sdim	size += pageoff;
655243830Sdim	size = (vm_size_t) round_page(size);
656251662Sdim	if (addr + size < addr)
657243830Sdim		return (EINVAL);
658243830Sdim
659243830Sdim	switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr,
660243830Sdim	    addr + size, prot, FALSE)) {
661243830Sdim	case KERN_SUCCESS:
662251662Sdim		return (0);
663239462Sdim	case KERN_PROTECTION_FAILURE:
664251662Sdim		return (EACCES);
665251662Sdim	case KERN_RESOURCE_SHORTAGE:
666239462Sdim		return (ENOMEM);
667263508Sdim	}
668251662Sdim	return (EINVAL);
669251662Sdim}
670251662Sdim
671251662Sdim#ifndef _SYS_SYSPROTO_H_
672251662Sdimstruct minherit_args {
673239462Sdim	void *addr;
674263508Sdim	size_t len;
675251662Sdim	int inherit;
676251662Sdim};
677251662Sdim#endif
678251662Sdimint
679251662Sdimsys_minherit(struct thread *td, struct minherit_args *uap)
680251662Sdim{
681251662Sdim	vm_offset_t addr;
682251662Sdim	vm_size_t size, pageoff;
683251662Sdim	vm_inherit_t inherit;
684251662Sdim
685251662Sdim	addr = (vm_offset_t)uap->addr;
686251662Sdim	size = uap->len;
687251662Sdim	inherit = uap->inherit;
688243830Sdim
689243830Sdim	pageoff = (addr & PAGE_MASK);
690243830Sdim	addr -= pageoff;
691243830Sdim	size += pageoff;
692239462Sdim	size = (vm_size_t) round_page(size);
693239462Sdim	if (addr + size < addr)
694239462Sdim		return (EINVAL);
695239462Sdim
696243830Sdim	switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr,
697243830Sdim	    addr + size, inherit)) {
698251662Sdim	case KERN_SUCCESS:
699243830Sdim		return (0);
700243830Sdim	case KERN_PROTECTION_FAILURE:
701239462Sdim		return (EACCES);
702239462Sdim	}
703239462Sdim	return (EINVAL);
704251662Sdim}
705239462Sdim
706239462Sdim#ifndef _SYS_SYSPROTO_H_
707239462Sdimstruct madvise_args {
708193326Sed	void *addr;
709193326Sed	size_t len;
710193326Sed	int behav;
711193326Sed};
712193326Sed#endif
713263508Sdim
714193326Sedint
715193326Sedsys_madvise(struct thread *td, struct madvise_args *uap)
716193326Sed{
717193326Sed	vm_offset_t start, end;
718193326Sed	vm_map_t map;
719207619Srdivacky	int flags;
720207619Srdivacky
721207619Srdivacky	/*
722207619Srdivacky	 * Check for our special case, advising the swap pager we are
723207619Srdivacky	 * "immortal."
724207619Srdivacky	 */
725207619Srdivacky	if (uap->behav == MADV_PROTECT) {
726207619Srdivacky		flags = PPROT_SET;
727207619Srdivacky		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
728207619Srdivacky		    PROC_SPROTECT, &flags));
729239462Sdim	}
730207619Srdivacky
731207619Srdivacky	/*
732207619Srdivacky	 * Check for illegal behavior
733193326Sed	 */
734263508Sdim	if (uap->behav < 0 || uap->behav > MADV_CORE)
735239462Sdim		return (EINVAL);
736207619Srdivacky	/*
737239462Sdim	 * Check for illegal addresses.  Watch out for address wrap... Note
738207619Srdivacky	 * that VM_*_ADDRESS are not constants due to casts (argh).
739207619Srdivacky	 */
740239462Sdim	map = &td->td_proc->p_vmspace->vm_map;
741207619Srdivacky	if ((vm_offset_t)uap->addr < vm_map_min(map) ||
742218893Sdim	    (vm_offset_t)uap->addr + uap->len > vm_map_max(map))
743207619Srdivacky		return (EINVAL);
744207619Srdivacky	if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
745207619Srdivacky		return (EINVAL);
746193326Sed
747263508Sdim	/*
748263508Sdim	 * Since this routine is only advisory, we default to conservative
749263508Sdim	 * behavior.
750239462Sdim	 */
751207619Srdivacky	start = trunc_page((vm_offset_t) uap->addr);
752218893Sdim	end = round_page((vm_offset_t) uap->addr + uap->len);
753239462Sdim
754207619Srdivacky	if (vm_map_madvise(map, start, end, uap->behav))
755193326Sed		return (EINVAL);
756249423Sdim	return (0);
757207619Srdivacky}
758249423Sdim
759249423Sdim#ifndef _SYS_SYSPROTO_H_
760249423Sdimstruct mincore_args {
761249423Sdim	const void *addr;
762193326Sed	size_t len;
763193326Sed	char *vec;
764263508Sdim};
765249423Sdim#endif
766203955Srdivacky
767249423Sdimint
768203955Srdivackysys_mincore(struct thread *td, struct mincore_args *uap)
769218893Sdim{
770249423Sdim	vm_offset_t addr, first_addr;
771203955Srdivacky	vm_offset_t end, cend;
772203955Srdivacky	pmap_t pmap;
773263508Sdim	vm_map_t map;
774212904Sdim	char *vec;
775212904Sdim	int error = 0;
776212904Sdim	int vecindex, lastvecindex;
777212904Sdim	vm_map_entry_t current;
778218893Sdim	vm_map_entry_t entry;
779212904Sdim	vm_object_t object;
780212904Sdim	vm_paddr_t locked_pa;
781212904Sdim	vm_page_t m;
782203955Srdivacky	vm_pindex_t pindex;
783249423Sdim	int mincoreinfo;
784249423Sdim	unsigned int timestamp;
785203955Srdivacky	boolean_t locked;
786203955Srdivacky
787249423Sdim	/*
788203955Srdivacky	 * Make sure that the addresses presented are valid for user
789203955Srdivacky	 * mode.
790221345Sdim	 */
791221345Sdim	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
792221345Sdim	end = addr + (vm_size_t)round_page(uap->len);
793221345Sdim	map = &td->td_proc->p_vmspace->vm_map;
794221345Sdim	if (end > vm_map_max(map) || end < addr)
795221345Sdim		return (ENOMEM);
796221345Sdim
797263508Sdim	/*
798263508Sdim	 * Address of byte vector
799221345Sdim	 */
800221345Sdim	vec = uap->vec;
801221345Sdim
802221345Sdim	pmap = vmspace_pmap(td->td_proc->p_vmspace);
803221345Sdim
804221345Sdim	vm_map_lock_read(map);
805221345SdimRestartScan:
806221345Sdim	timestamp = map->timestamp;
807221345Sdim
808221345Sdim	if (!vm_map_lookup_entry(map, addr, &entry)) {
809221345Sdim		vm_map_unlock_read(map);
810221345Sdim		return (ENOMEM);
811221345Sdim	}
812221345Sdim
813221345Sdim	/*
814221345Sdim	 * Do this on a map entry basis so that if the pages are not
815221345Sdim	 * in the current processes address space, we can easily look
816221345Sdim	 * up the pages elsewhere.
817221345Sdim	 */
818221345Sdim	lastvecindex = -1;
819221345Sdim	for (current = entry;
820221345Sdim	    (current != &map->header) && (current->start < end);
821221345Sdim	    current = current->next) {
822221345Sdim
823221345Sdim		/*
824263508Sdim		 * check for contiguity
825210299Sed		 */
826210299Sed		if (current->end < end &&
827210299Sed		    (entry->next == &map->header ||
828210299Sed		     current->next->start > current->end)) {
829263508Sdim			vm_map_unlock_read(map);
830210299Sed			return (ENOMEM);
831239462Sdim		}
832210299Sed
833210299Sed		/*
834210299Sed		 * ignore submaps (for now) or null objects
835210299Sed		 */
836210299Sed		if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
837239462Sdim			current->object.vm_object == NULL)
838210299Sed			continue;
839210299Sed
840210299Sed		/*
841210299Sed		 * limit this scan to the current map entry and the
842263508Sdim		 * limits for the mincore call
843210299Sed		 */
844210299Sed		if (addr < current->start)
845210299Sed			addr = current->start;
846210299Sed		cend = current->end;
847239462Sdim		if (cend > end)
848234353Sdim			cend = end;
849234353Sdim
850234353Sdim		/*
851210299Sed		 * scan this entry one page at a time
852210299Sed		 */
853263508Sdim		while (addr < cend) {
854239462Sdim			/*
855210299Sed			 * Check pmap first, it is likely faster, also
856239462Sdim			 * it can provide info as to whether we are the
857210299Sed			 * one referencing or modifying the page.
858210299Sed			 */
859210299Sed			object = NULL;
860263508Sdim			locked_pa = 0;
861263508Sdim		retry:
862210299Sed			m = NULL;
863210299Sed			mincoreinfo = pmap_mincore(pmap, addr, &locked_pa);
864210299Sed			if (locked_pa != 0) {
865210299Sed				/*
866210299Sed				 * The page is mapped by this process but not
867210299Sed				 * both accessed and modified.  It is also
868239462Sdim				 * managed.  Acquire the object lock so that
869210299Sed				 * other mappings might be examined.
870210299Sed				 */
871210299Sed				m = PHYS_TO_VM_PAGE(locked_pa);
872210299Sed				if (m->object != object) {
873263508Sdim					if (object != NULL)
874210299Sed						VM_OBJECT_WUNLOCK(object);
875210299Sed					object = m->object;
876210299Sed					locked = VM_OBJECT_TRYWLOCK(object);
877210299Sed					vm_page_unlock(m);
878239462Sdim					if (!locked) {
879234353Sdim						VM_OBJECT_WLOCK(object);
880234353Sdim						vm_page_lock(m);
881234353Sdim						goto retry;
882210299Sed					}
883210299Sed				} else
884263508Sdim					vm_page_unlock(m);
885239462Sdim				KASSERT(m->valid == VM_PAGE_BITS_ALL,
886210299Sed				    ("mincore: page %p is mapped but invalid",
887210299Sed				    m));
888263508Sdim			} else if (mincoreinfo == 0) {
889210299Sed				/*
890210299Sed				 * The page is not mapped by this process.  If
891210299Sed				 * the object implements managed pages, then
892210299Sed				 * determine if the page is resident so that
893210299Sed				 * the mappings might be examined.
894210299Sed				 */
895239462Sdim				if (current->object.vm_object != object) {
896210299Sed					if (object != NULL)
897210299Sed						VM_OBJECT_WUNLOCK(object);
898210299Sed					object = current->object.vm_object;
899210299Sed					VM_OBJECT_WLOCK(object);
900263508Sdim				}
901210299Sed				if (object->type == OBJT_DEFAULT ||
902210299Sed				    object->type == OBJT_SWAP ||
903210299Sed				    object->type == OBJT_VNODE) {
904210299Sed					pindex = OFF_TO_IDX(current->offset +
905239462Sdim					    (addr - current->start));
906234353Sdim					m = vm_page_lookup(object, pindex);
907234353Sdim					if (m == NULL &&
908234353Sdim					    vm_page_is_cached(object, pindex))
909210299Sed						mincoreinfo = MINCORE_INCORE;
910210299Sed					if (m != NULL && m->valid == 0)
911218893Sdim						m = NULL;
912221345Sdim					if (m != NULL)
913221345Sdim						mincoreinfo = MINCORE_INCORE;
914218893Sdim				}
915218893Sdim			}
916218893Sdim			if (m != NULL) {
917263508Sdim				/* Examine other mappings to the page. */
918210299Sed				if (m->dirty == 0 && pmap_is_modified(m))
919221345Sdim					vm_page_dirty(m);
920210299Sed				if (m->dirty != 0)
921263508Sdim					mincoreinfo |= MINCORE_MODIFIED_OTHER;
922210299Sed				/*
923210299Sed				 * The first test for PGA_REFERENCED is an
924210299Sed				 * optimization.  The second test is
925210299Sed				 * required because a concurrent pmap
926210299Sed				 * operation could clear the last reference
927210299Sed				 * and set PGA_REFERENCED before the call to
928210299Sed				 * pmap_is_referenced().
929239462Sdim				 */
930210299Sed				if ((m->aflags & PGA_REFERENCED) != 0 ||
931210299Sed				    pmap_is_referenced(m) ||
932210299Sed				    (m->aflags & PGA_REFERENCED) != 0)
933210299Sed					mincoreinfo |= MINCORE_REFERENCED_OTHER;
934263508Sdim			}
935210299Sed			if (object != NULL)
936210299Sed				VM_OBJECT_WUNLOCK(object);
937210299Sed
938210299Sed			/*
939234353Sdim			 * subyte may page fault.  In case it needs to modify
940234353Sdim			 * the map, we release the lock.
941234353Sdim			 */
942234353Sdim			vm_map_unlock_read(map);
943210299Sed
944210299Sed			/*
945218893Sdim			 * calculate index into user supplied byte vector
946218893Sdim			 */
947218893Sdim			vecindex = OFF_TO_IDX(addr - first_addr);
948218893Sdim
949218893Sdim			/*
950218893Sdim			 * If we have skipped map entries, we need to make sure that
951193326Sed			 * the byte vector is zeroed for those skipped entries.
952193326Sed			 */
953193326Sed			while ((lastvecindex + 1) < vecindex) {
954193326Sed				++lastvecindex;
955193326Sed				error = subyte(vec + lastvecindex, 0);
956193326Sed				if (error) {
957193326Sed					error = EFAULT;
958193326Sed					goto done2;
959193326Sed				}
960221345Sdim			}
961221345Sdim
962221345Sdim			/*
963221345Sdim			 * Pass the page information to the user
964221345Sdim			 */
965221345Sdim			error = subyte(vec + vecindex, mincoreinfo);
966221345Sdim			if (error) {
967221345Sdim				error = EFAULT;
968221345Sdim				goto done2;
969221345Sdim			}
970221345Sdim
971221345Sdim			/*
972221345Sdim			 * If the map has changed, due to the subyte, the previous
973263508Sdim			 * output may be invalid.
974263508Sdim			 */
975221345Sdim			vm_map_lock_read(map);
976221345Sdim			if (timestamp != map->timestamp)
977221345Sdim				goto RestartScan;
978221345Sdim
979221345Sdim			lastvecindex = vecindex;
980221345Sdim			addr += PAGE_SIZE;
981221345Sdim		}
982221345Sdim	}
983221345Sdim
984221345Sdim	/*
985221345Sdim	 * subyte may page fault.  In case it needs to modify
986221345Sdim	 * the map, we release the lock.
987221345Sdim	 */
988221345Sdim	vm_map_unlock_read(map);
989221345Sdim
990221345Sdim	/*
991221345Sdim	 * Zero the last entries in the byte vector.
992221345Sdim	 */
993263508Sdim	vecindex = OFF_TO_IDX(end - first_addr);
994221345Sdim	while ((lastvecindex + 1) < vecindex) {
995221345Sdim		++lastvecindex;
996221345Sdim		error = subyte(vec + lastvecindex, 0);
997263508Sdim		if (error) {
998263508Sdim			error = EFAULT;
999221345Sdim			goto done2;
1000221345Sdim		}
1001221345Sdim	}
1002221345Sdim
1003221345Sdim	/*
1004221345Sdim	 * If the map has changed, due to the subyte, the previous
1005221345Sdim	 * output may be invalid.
1006221345Sdim	 */
1007221345Sdim	vm_map_lock_read(map);
1008221345Sdim	if (timestamp != map->timestamp)
1009263508Sdim		goto RestartScan;
1010221345Sdim	vm_map_unlock_read(map);
1011221345Sdimdone2:
1012221345Sdim	return (error);
1013251662Sdim}
1014251662Sdim
1015251662Sdim#ifndef _SYS_SYSPROTO_H_
1016251662Sdimstruct mlock_args {
1017251662Sdim	const void *addr;
1018251662Sdim	size_t len;
1019251662Sdim};
1020251662Sdim#endif
1021251662Sdimint
1022251662Sdimsys_mlock(struct thread *td, struct mlock_args *uap)
1023251662Sdim{
1024251662Sdim
1025251662Sdim	return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len));
1026251662Sdim}
1027251662Sdim
1028251662Sdimint
1029251662Sdimvm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len)
1030251662Sdim{
1031251662Sdim	vm_offset_t addr, end, last, start;
1032251662Sdim	vm_size_t npages, size;
1033251662Sdim	vm_map_t map;
1034251662Sdim	unsigned long nsize;
1035251662Sdim	int error;
1036251662Sdim
1037251662Sdim	error = priv_check_cred(cred, PRIV_VM_MLOCK, 0);
1038251662Sdim	if (error)
1039251662Sdim		return (error);
1040251662Sdim	addr = (vm_offset_t)addr0;
1041251662Sdim	size = len;
1042251662Sdim	last = addr + size;
1043251662Sdim	start = trunc_page(addr);
1044251662Sdim	end = round_page(last);
1045251662Sdim	if (last < addr || end < addr)
1046251662Sdim		return (EINVAL);
1047251662Sdim	npages = atop(end - start);
1048251662Sdim	if (npages > vm_page_max_wired)
1049251662Sdim		return (ENOMEM);
1050251662Sdim	map = &proc->p_vmspace->vm_map;
1051251662Sdim	PROC_LOCK(proc);
1052251662Sdim	nsize = ptoa(npages + pmap_wired_count(map->pmap));
1053251662Sdim	if (nsize > lim_cur(proc, RLIMIT_MEMLOCK)) {
1054251662Sdim		PROC_UNLOCK(proc);
1055251662Sdim		return (ENOMEM);
1056263508Sdim	}
1057251662Sdim	PROC_UNLOCK(proc);
1058251662Sdim	if (npages + cnt.v_wire_count > vm_page_max_wired)
1059251662Sdim		return (EAGAIN);
1060251662Sdim#ifdef RACCT
1061251662Sdim	if (racct_enable) {
1062251662Sdim		PROC_LOCK(proc);
1063251662Sdim		error = racct_set(proc, RACCT_MEMLOCK, nsize);
1064251662Sdim		PROC_UNLOCK(proc);
1065251662Sdim		if (error != 0)
1066251662Sdim			return (ENOMEM);
1067251662Sdim	}
1068251662Sdim#endif
1069251662Sdim	error = vm_map_wire(map, start, end,
1070251662Sdim	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1071251662Sdim#ifdef RACCT
1072251662Sdim	if (racct_enable && error != KERN_SUCCESS) {
1073251662Sdim		PROC_LOCK(proc);
1074251662Sdim		racct_set(proc, RACCT_MEMLOCK,
1075251662Sdim		    ptoa(pmap_wired_count(map->pmap)));
1076251662Sdim		PROC_UNLOCK(proc);
1077251662Sdim	}
1078251662Sdim#endif
1079251662Sdim	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1080251662Sdim}
1081251662Sdim
1082251662Sdim#ifndef _SYS_SYSPROTO_H_
1083251662Sdimstruct mlockall_args {
1084263508Sdim	int	how;
1085251662Sdim};
1086251662Sdim#endif
1087251662Sdim
1088251662Sdimint
1089251662Sdimsys_mlockall(struct thread *td, struct mlockall_args *uap)
1090251662Sdim{
1091251662Sdim	vm_map_t map;
1092251662Sdim	int error;
1093251662Sdim
1094251662Sdim	map = &td->td_proc->p_vmspace->vm_map;
1095251662Sdim	error = priv_check(td, PRIV_VM_MLOCK);
1096251662Sdim	if (error)
1097251662Sdim		return (error);
1098251662Sdim
1099251662Sdim	if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
1100251662Sdim		return (EINVAL);
1101251662Sdim
1102251662Sdim	/*
1103251662Sdim	 * If wiring all pages in the process would cause it to exceed
1104251662Sdim	 * a hard resource limit, return ENOMEM.
1105263508Sdim	 */
1106251662Sdim	if (!old_mlock && uap->how & MCL_CURRENT) {
1107251662Sdim		PROC_LOCK(td->td_proc);
1108251662Sdim		if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
1109251662Sdim			PROC_UNLOCK(td->td_proc);
1110251662Sdim			return (ENOMEM);
1111251662Sdim		}
1112251662Sdim		PROC_UNLOCK(td->td_proc);
1113251662Sdim	}
1114251662Sdim#ifdef RACCT
1115251662Sdim	if (racct_enable) {
1116251662Sdim		PROC_LOCK(td->td_proc);
1117263508Sdim		error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
1118263508Sdim		PROC_UNLOCK(td->td_proc);
1119263508Sdim		if (error != 0)
1120263508Sdim			return (ENOMEM);
1121263508Sdim	}
1122263508Sdim#endif
1123263508Sdim
1124263508Sdim	if (uap->how & MCL_FUTURE) {
1125263508Sdim		vm_map_lock(map);
1126263508Sdim		vm_map_modflags(map, MAP_WIREFUTURE, 0);
1127263508Sdim		vm_map_unlock(map);
1128263508Sdim		error = 0;
1129263508Sdim	}
1130263508Sdim
1131263508Sdim	if (uap->how & MCL_CURRENT) {
1132263508Sdim		/*
1133263508Sdim		 * P1003.1-2001 mandates that all currently mapped pages
1134263508Sdim		 * will be memory resident and locked (wired) upon return
1135263508Sdim		 * from mlockall(). vm_map_wire() will wire pages, by
1136263508Sdim		 * calling vm_fault_wire() for each page in the region.
1137263508Sdim		 */
1138263508Sdim		error = vm_map_wire(map, vm_map_min(map), vm_map_max(map),
1139263508Sdim		    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1140263508Sdim		error = (error == KERN_SUCCESS ? 0 : EAGAIN);
1141263508Sdim	}
1142263508Sdim#ifdef RACCT
1143263508Sdim	if (racct_enable && error != KERN_SUCCESS) {
1144263508Sdim		PROC_LOCK(td->td_proc);
1145263508Sdim		racct_set(td->td_proc, RACCT_MEMLOCK,
1146263508Sdim		    ptoa(pmap_wired_count(map->pmap)));
1147263508Sdim		PROC_UNLOCK(td->td_proc);
1148263508Sdim	}
1149263508Sdim#endif
1150263508Sdim
1151263508Sdim	return (error);
1152263508Sdim}
1153263508Sdim
1154263508Sdim#ifndef _SYS_SYSPROTO_H_
1155263508Sdimstruct munlockall_args {
1156263508Sdim	register_t dummy;
1157263508Sdim};
1158263508Sdim#endif
1159263508Sdim
1160263508Sdimint
1161263508Sdimsys_munlockall(struct thread *td, struct munlockall_args *uap)
1162263508Sdim{
1163263508Sdim	vm_map_t map;
1164263508Sdim	int error;
1165263508Sdim
1166263508Sdim	map = &td->td_proc->p_vmspace->vm_map;
1167263508Sdim	error = priv_check(td, PRIV_VM_MUNLOCK);
1168263508Sdim	if (error)
1169263508Sdim		return (error);
1170263508Sdim
1171263508Sdim	/* Clear the MAP_WIREFUTURE flag from this vm_map. */
1172263508Sdim	vm_map_lock(map);
1173263508Sdim	vm_map_modflags(map, 0, MAP_WIREFUTURE);
1174263508Sdim	vm_map_unlock(map);
1175263508Sdim
1176263508Sdim	/* Forcibly unwire all pages. */
1177263508Sdim	error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
1178263508Sdim	    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1179263508Sdim#ifdef RACCT
1180263508Sdim	if (racct_enable && error == KERN_SUCCESS) {
1181263508Sdim		PROC_LOCK(td->td_proc);
1182263508Sdim		racct_set(td->td_proc, RACCT_MEMLOCK, 0);
1183263508Sdim		PROC_UNLOCK(td->td_proc);
1184263508Sdim	}
1185263508Sdim#endif
1186263508Sdim
1187263508Sdim	return (error);
1188263508Sdim}
1189263508Sdim
1190263508Sdim#ifndef _SYS_SYSPROTO_H_
1191263508Sdimstruct munlock_args {
1192263508Sdim	const void *addr;
1193263508Sdim	size_t len;
1194263508Sdim};
1195263508Sdim#endif
1196263508Sdimint
1197263508Sdimsys_munlock(td, uap)
1198263508Sdim	struct thread *td;
1199263508Sdim	struct munlock_args *uap;
1200263508Sdim{
1201263508Sdim	vm_offset_t addr, end, last, start;
1202263508Sdim	vm_size_t size;
1203263508Sdim#ifdef RACCT
1204263508Sdim	vm_map_t map;
1205263508Sdim#endif
1206263508Sdim	int error;
1207263508Sdim
1208263508Sdim	error = priv_check(td, PRIV_VM_MUNLOCK);
1209263508Sdim	if (error)
1210263508Sdim		return (error);
1211263508Sdim	addr = (vm_offset_t)uap->addr;
1212263508Sdim	size = uap->len;
1213263508Sdim	last = addr + size;
1214263508Sdim	start = trunc_page(addr);
1215263508Sdim	end = round_page(last);
1216263508Sdim	if (last < addr || end < addr)
1217263508Sdim		return (EINVAL);
1218263508Sdim	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
1219263508Sdim	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1220263508Sdim#ifdef RACCT
1221	if (racct_enable && error == KERN_SUCCESS) {
1222		PROC_LOCK(td->td_proc);
1223		map = &td->td_proc->p_vmspace->vm_map;
1224		racct_set(td->td_proc, RACCT_MEMLOCK,
1225		    ptoa(pmap_wired_count(map->pmap)));
1226		PROC_UNLOCK(td->td_proc);
1227	}
1228#endif
1229	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1230}
1231
1232/*
1233 * vm_mmap_vnode()
1234 *
1235 * Helper function for vm_mmap.  Perform sanity check specific for mmap
1236 * operations on vnodes.
1237 *
1238 * For VCHR vnodes, the vnode lock is held over the call to
1239 * vm_mmap_cdev() to keep vp->v_rdev valid.
1240 */
1241int
1242vm_mmap_vnode(struct thread *td, vm_size_t objsize,
1243    vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
1244    struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
1245    boolean_t *writecounted)
1246{
1247	struct vattr va;
1248	vm_object_t obj;
1249	vm_offset_t foff;
1250	struct ucred *cred;
1251	int error, flags, locktype;
1252
1253	cred = td->td_ucred;
1254	if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
1255		locktype = LK_EXCLUSIVE;
1256	else
1257		locktype = LK_SHARED;
1258	if ((error = vget(vp, locktype, td)) != 0)
1259		return (error);
1260	foff = *foffp;
1261	flags = *flagsp;
1262	obj = vp->v_object;
1263	if (vp->v_type == VREG) {
1264		/*
1265		 * Get the proper underlying object
1266		 */
1267		if (obj == NULL) {
1268			error = EINVAL;
1269			goto done;
1270		}
1271		if (obj->type == OBJT_VNODE && obj->handle != vp) {
1272			vput(vp);
1273			vp = (struct vnode *)obj->handle;
1274			/*
1275			 * Bypass filesystems obey the mpsafety of the
1276			 * underlying fs.  Tmpfs never bypasses.
1277			 */
1278			error = vget(vp, locktype, td);
1279			if (error != 0)
1280				return (error);
1281		}
1282		if (locktype == LK_EXCLUSIVE) {
1283			*writecounted = TRUE;
1284			vnode_pager_update_writecount(obj, 0, objsize);
1285		}
1286	} else if (vp->v_type == VCHR) {
1287		error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp,
1288		    vp->v_rdev, foffp, objp);
1289		if (error == 0)
1290			goto mark_atime;
1291		goto done;
1292	} else {
1293		error = EINVAL;
1294		goto done;
1295	}
1296	if ((error = VOP_GETATTR(vp, &va, cred)))
1297		goto done;
1298#ifdef MAC
1299	error = mac_vnode_check_mmap(cred, vp, prot, flags);
1300	if (error != 0)
1301		goto done;
1302#endif
1303	if ((flags & MAP_SHARED) != 0) {
1304		if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
1305			if (prot & PROT_WRITE) {
1306				error = EPERM;
1307				goto done;
1308			}
1309			*maxprotp &= ~VM_PROT_WRITE;
1310		}
1311	}
1312	/*
1313	 * If it is a regular file without any references
1314	 * we do not need to sync it.
1315	 * Adjust object size to be the size of actual file.
1316	 */
1317	objsize = round_page(va.va_size);
1318	if (va.va_nlink == 0)
1319		flags |= MAP_NOSYNC;
1320	if (obj->type == OBJT_VNODE)
1321		obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff,
1322		    cred);
1323	else {
1324		KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP,
1325		    ("wrong object type"));
1326		vm_object_reference(obj);
1327	}
1328	if (obj == NULL) {
1329		error = ENOMEM;
1330		goto done;
1331	}
1332	*objp = obj;
1333	*flagsp = flags;
1334
1335mark_atime:
1336	vfs_mark_atime(vp, cred);
1337
1338done:
1339	if (error != 0 && *writecounted) {
1340		*writecounted = FALSE;
1341		vnode_pager_update_writecount(obj, objsize, 0);
1342	}
1343	vput(vp);
1344	return (error);
1345}
1346
1347/*
1348 * vm_mmap_cdev()
1349 *
1350 * Helper function for vm_mmap.  Perform sanity check specific for mmap
1351 * operations on cdevs.
1352 */
1353int
1354vm_mmap_cdev(struct thread *td, vm_size_t objsize,
1355    vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
1356    struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp)
1357{
1358	vm_object_t obj;
1359	struct cdevsw *dsw;
1360	int error, flags, ref;
1361
1362	flags = *flagsp;
1363
1364	dsw = dev_refthread(cdev, &ref);
1365	if (dsw == NULL)
1366		return (ENXIO);
1367	if (dsw->d_flags & D_MMAP_ANON) {
1368		dev_relthread(cdev, ref);
1369		*maxprotp = VM_PROT_ALL;
1370		*flagsp |= MAP_ANON;
1371		return (0);
1372	}
1373	/*
1374	 * cdevs do not provide private mappings of any kind.
1375	 */
1376	if ((*maxprotp & VM_PROT_WRITE) == 0 &&
1377	    (prot & PROT_WRITE) != 0) {
1378		dev_relthread(cdev, ref);
1379		return (EACCES);
1380	}
1381	if (flags & (MAP_PRIVATE|MAP_COPY)) {
1382		dev_relthread(cdev, ref);
1383		return (EINVAL);
1384	}
1385	/*
1386	 * Force device mappings to be shared.
1387	 */
1388	flags |= MAP_SHARED;
1389#ifdef MAC_XXX
1390	error = mac_cdev_check_mmap(td->td_ucred, cdev, prot);
1391	if (error != 0) {
1392		dev_relthread(cdev, ref);
1393		return (error);
1394	}
1395#endif
1396	/*
1397	 * First, try d_mmap_single().  If that is not implemented
1398	 * (returns ENODEV), fall back to using the device pager.
1399	 * Note that d_mmap_single() must return a reference to the
1400	 * object (it needs to bump the reference count of the object
1401	 * it returns somehow).
1402	 *
1403	 * XXX assumes VM_PROT_* == PROT_*
1404	 */
1405	error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
1406	dev_relthread(cdev, ref);
1407	if (error != ENODEV)
1408		return (error);
1409	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
1410	    td->td_ucred);
1411	if (obj == NULL)
1412		return (EINVAL);
1413	*objp = obj;
1414	*flagsp = flags;
1415	return (0);
1416}
1417
1418/*
1419 * vm_mmap_shm()
1420 *
1421 * MPSAFE
1422 *
1423 * Helper function for vm_mmap.  Perform sanity check specific for mmap
1424 * operations on shm file descriptors.
1425 */
1426int
1427vm_mmap_shm(struct thread *td, vm_size_t objsize,
1428    vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
1429    struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp)
1430{
1431	int error;
1432
1433	if ((*flagsp & MAP_SHARED) != 0 &&
1434	    (*maxprotp & VM_PROT_WRITE) == 0 &&
1435	    (prot & PROT_WRITE) != 0)
1436		return (EACCES);
1437#ifdef MAC
1438	error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp);
1439	if (error != 0)
1440		return (error);
1441#endif
1442	error = shm_mmap(shmfd, objsize, foff, objp);
1443	if (error)
1444		return (error);
1445	return (0);
1446}
1447
1448/*
1449 * vm_mmap()
1450 *
1451 * MPSAFE
1452 *
1453 * Internal version of mmap.  Currently used by mmap, exec, and sys5
1454 * shared memory.  Handle is either a vnode pointer or NULL for MAP_ANON.
1455 */
1456int
1457vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1458	vm_prot_t maxprot, int flags,
1459	objtype_t handle_type, void *handle,
1460	vm_ooffset_t foff)
1461{
1462	boolean_t curmap, fitit;
1463	vm_offset_t max_addr;
1464	vm_object_t object = NULL;
1465	struct thread *td = curthread;
1466	int docow, error, findspace, rv;
1467	boolean_t writecounted;
1468
1469	if (size == 0)
1470		return (0);
1471
1472	size = round_page(size);
1473
1474	curmap = map == &td->td_proc->p_vmspace->vm_map;
1475	if (curmap) {
1476		PROC_LOCK(td->td_proc);
1477		if (map->size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) {
1478			PROC_UNLOCK(td->td_proc);
1479			return (ENOMEM);
1480		}
1481		if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
1482			PROC_UNLOCK(td->td_proc);
1483			return (ENOMEM);
1484		}
1485		if (!old_mlock && map->flags & MAP_WIREFUTURE) {
1486			if (ptoa(pmap_wired_count(map->pmap)) + size >
1487			    lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
1488				racct_set_force(td->td_proc, RACCT_VMEM,
1489				    map->size);
1490				PROC_UNLOCK(td->td_proc);
1491				return (ENOMEM);
1492			}
1493			error = racct_set(td->td_proc, RACCT_MEMLOCK,
1494			    ptoa(pmap_wired_count(map->pmap)) + size);
1495			if (error != 0) {
1496				racct_set_force(td->td_proc, RACCT_VMEM,
1497				    map->size);
1498				PROC_UNLOCK(td->td_proc);
1499				return (error);
1500			}
1501		}
1502		PROC_UNLOCK(td->td_proc);
1503	}
1504
1505	/*
1506	 * We currently can only deal with page aligned file offsets.
1507	 * The check is here rather than in the syscall because the
1508	 * kernel calls this function internally for other mmaping
1509	 * operations (such as in exec) and non-aligned offsets will
1510	 * cause pmap inconsistencies...so we want to be sure to
1511	 * disallow this in all cases.
1512	 */
1513	if (foff & PAGE_MASK)
1514		return (EINVAL);
1515
1516	if ((flags & MAP_FIXED) == 0) {
1517		fitit = TRUE;
1518		*addr = round_page(*addr);
1519	} else {
1520		if (*addr != trunc_page(*addr))
1521			return (EINVAL);
1522		fitit = FALSE;
1523	}
1524	writecounted = FALSE;
1525
1526	/*
1527	 * Lookup/allocate object.
1528	 */
1529	switch (handle_type) {
1530	case OBJT_DEVICE:
1531		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags,
1532		    handle, &foff, &object);
1533		break;
1534	case OBJT_VNODE:
1535		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
1536		    handle, &foff, &object, &writecounted);
1537		break;
1538	case OBJT_SWAP:
1539		error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
1540		    handle, foff, &object);
1541		break;
1542	case OBJT_DEFAULT:
1543		if (handle == NULL) {
1544			error = 0;
1545			break;
1546		}
1547		/* FALLTHROUGH */
1548	default:
1549		error = EINVAL;
1550		break;
1551	}
1552	if (error)
1553		return (error);
1554	if (flags & MAP_ANON) {
1555		object = NULL;
1556		docow = 0;
1557		/*
1558		 * Unnamed anonymous regions always start at 0.
1559		 */
1560		if (handle == 0)
1561			foff = 0;
1562	} else if (flags & MAP_PREFAULT_READ)
1563		docow = MAP_PREFAULT;
1564	else
1565		docow = MAP_PREFAULT_PARTIAL;
1566
1567	if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
1568		docow |= MAP_COPY_ON_WRITE;
1569	if (flags & MAP_NOSYNC)
1570		docow |= MAP_DISABLE_SYNCER;
1571	if (flags & MAP_NOCORE)
1572		docow |= MAP_DISABLE_COREDUMP;
1573	/* Shared memory is also shared with children. */
1574	if (flags & MAP_SHARED)
1575		docow |= MAP_INHERIT_SHARE;
1576	if (writecounted)
1577		docow |= MAP_VN_WRITECOUNT;
1578	if (flags & MAP_STACK) {
1579		if (object != NULL)
1580			return (EINVAL);
1581		docow |= MAP_STACK_GROWS_DOWN;
1582	}
1583	if ((flags & MAP_EXCL) != 0)
1584		docow |= MAP_CHECK_EXCL;
1585	if ((flags & MAP_GUARD) != 0)
1586		docow |= MAP_CREATE_GUARD;
1587
1588	if (fitit) {
1589		if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
1590			findspace = VMFS_SUPER_SPACE;
1591		else if ((flags & MAP_ALIGNMENT_MASK) != 0)
1592			findspace = VMFS_ALIGNED_SPACE(flags >>
1593			    MAP_ALIGNMENT_SHIFT);
1594		else
1595			findspace = VMFS_OPTIMAL_SPACE;
1596		max_addr = 0;
1597#ifdef MAP_32BIT
1598		if ((flags & MAP_32BIT) != 0)
1599			max_addr = MAP_32BIT_MAX_ADDR;
1600#endif
1601		if (curmap) {
1602			vm_offset_t min_addr;
1603
1604			PROC_LOCK(td->td_proc);
1605			min_addr = round_page((vm_offset_t)td->td_proc->
1606			    p_vmspace->vm_daddr + lim_max(td->td_proc,
1607			    RLIMIT_DATA));
1608			PROC_UNLOCK(td->td_proc);
1609			rv = vm_map_find_min(map, object, foff, addr, size,
1610			    min_addr, max_addr,
1611			    findspace, prot, maxprot, docow);
1612		} else {
1613			rv = vm_map_find(map, object, foff, addr, size,
1614			    max_addr, findspace, prot, maxprot, docow);
1615		}
1616	} else {
1617		rv = vm_map_fixed(map, object, foff, *addr, size,
1618		    prot, maxprot, docow);
1619	}
1620
1621	if (rv == KERN_SUCCESS) {
1622		/*
1623		 * If the process has requested that all future mappings
1624		 * be wired, then heed this.
1625		 */
1626		if (map->flags & MAP_WIREFUTURE) {
1627			vm_map_wire(map, *addr, *addr + size,
1628			    VM_MAP_WIRE_USER | ((flags & MAP_STACK) ?
1629			    VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES));
1630		}
1631	} else {
1632		/*
1633		 * If this mapping was accounted for in the vnode's
1634		 * writecount, then undo that now.
1635		 */
1636		if (writecounted)
1637			vnode_pager_release_writecount(object, 0, size);
1638		/*
1639		 * Lose the object reference.  Will destroy the
1640		 * object if it's an unnamed anonymous mapping
1641		 * or named anonymous without other references.
1642		 */
1643		vm_object_deallocate(object);
1644	}
1645	return (vm_mmap_to_errno(rv));
1646}
1647
1648/*
1649 * Translate a Mach VM return code to zero on success or the appropriate errno
1650 * on failure.
1651 */
1652int
1653vm_mmap_to_errno(int rv)
1654{
1655
1656	switch (rv) {
1657	case KERN_SUCCESS:
1658		return (0);
1659	case KERN_INVALID_ADDRESS:
1660	case KERN_NO_SPACE:
1661		return (ENOMEM);
1662	case KERN_PROTECTION_FAILURE:
1663		return (EACCES);
1664	default:
1665		return (EINVAL);
1666	}
1667}
1668