linux_mmap.c revision 302964
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * Copyright (c) 1994-1995 S��ren Schmidt
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer
13 *    in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: stable/10/sys/compat/linux/linux_mmap.c 302964 2016-07-17 15:23:32Z dchagin $
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_mmap.c 302964 2016-07-17 15:23:32Z dchagin $");
36
37#include <sys/capsicum.h>
38#include <sys/file.h>
39#include <sys/imgact.h>
40#include <sys/ktr.h>
41#include <sys/mman.h>
42#include <sys/proc.h>
43#include <sys/resourcevar.h>
44#include <sys/sysent.h>
45#include <sys/sysproto.h>
46
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49
50#include <compat/linux/linux_emul.h>
51#include <compat/linux/linux_mmap.h>
52#include <compat/linux/linux_persona.h>
53#include <compat/linux/linux_util.h>
54
55
56#define STACK_SIZE  (2 * 1024 * 1024)
57#define GUARD_SIZE  (4 * PAGE_SIZE)
58
59#if defined(__amd64__)
60static void linux_fixup_prot(struct thread *td, int *prot);
61#endif
62
63
64int
65linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot,
66    int flags, int fd, off_t pos)
67{
68	struct proc *p = td->td_proc;
69	struct vmspace *vms = td->td_proc->p_vmspace;
70	struct mmap_args /* {
71		caddr_t addr;
72		size_t len;
73		int prot;
74		int flags;
75		int fd;
76		off_t pos;
77	} */ bsd_args;
78	int error;
79	struct file *fp;
80
81	cap_rights_t rights;
82	LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
83	    addr, len, prot, flags, fd, pos);
84
85	error = 0;
86	bsd_args.flags = 0;
87	fp = NULL;
88
89	/*
90	 * Linux mmap(2):
91	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
92	 */
93	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
94		return (EINVAL);
95
96	if (flags & LINUX_MAP_SHARED)
97		bsd_args.flags |= MAP_SHARED;
98	if (flags & LINUX_MAP_PRIVATE)
99		bsd_args.flags |= MAP_PRIVATE;
100	if (flags & LINUX_MAP_FIXED)
101		bsd_args.flags |= MAP_FIXED;
102	if (flags & LINUX_MAP_ANON) {
103		/* Enforce pos to be on page boundary, then ignore. */
104		if ((pos & PAGE_MASK) != 0)
105			return (EINVAL);
106		pos = 0;
107		bsd_args.flags |= MAP_ANON;
108	} else
109		bsd_args.flags |= MAP_NOSYNC;
110	if (flags & LINUX_MAP_GROWSDOWN)
111		bsd_args.flags |= MAP_STACK;
112
113	/*
114	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
115	 * on Linux/i386 if the binary requires executable stack.
116	 * We do this only for IA32 emulation as on native i386 this is does not
117	 * make sense without PAE.
118	 *
119	 * XXX. Linux checks that the file system is not mounted with noexec.
120	 */
121	bsd_args.prot = prot;
122#if defined(__amd64__)
123	linux_fixup_prot(td, &bsd_args.prot);
124#endif
125
126	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
127	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
128	if (bsd_args.fd != -1) {
129		/*
130		 * Linux follows Solaris mmap(2) description:
131		 * The file descriptor fildes is opened with
132		 * read permission, regardless of the
133		 * protection options specified.
134		 */
135
136		error = fget(td, bsd_args.fd,
137		    cap_rights_init(&rights, CAP_MMAP), &fp);
138		if (error != 0)
139			return (error);
140		if (fp->f_type != DTYPE_VNODE) {
141			fdrop(fp, td);
142			return (EINVAL);
143		}
144
145		/* Linux mmap() just fails for O_WRONLY files */
146		if (!(fp->f_flag & FREAD)) {
147			fdrop(fp, td);
148			return (EACCES);
149		}
150
151		fdrop(fp, td);
152	}
153
154	if (flags & LINUX_MAP_GROWSDOWN) {
155		/*
156		 * The Linux MAP_GROWSDOWN option does not limit auto
157		 * growth of the region.  Linux mmap with this option
158		 * takes as addr the initial BOS, and as len, the initial
159		 * region size.  It can then grow down from addr without
160		 * limit.  However, Linux threads has an implicit internal
161		 * limit to stack size of STACK_SIZE.  Its just not
162		 * enforced explicitly in Linux.  But, here we impose
163		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
164		 * region, since we can do this with our mmap.
165		 *
166		 * Our mmap with MAP_STACK takes addr as the maximum
167		 * downsize limit on BOS, and as len the max size of
168		 * the region.  It then maps the top SGROWSIZ bytes,
169		 * and auto grows the region down, up to the limit
170		 * in addr.
171		 *
172		 * If we don't use the MAP_STACK option, the effect
173		 * of this code is to allocate a stack region of a
174		 * fixed size of (STACK_SIZE - GUARD_SIZE).
175		 */
176
177		if ((caddr_t)addr + len > vms->vm_maxsaddr) {
178			/*
179			 * Some Linux apps will attempt to mmap
180			 * thread stacks near the top of their
181			 * address space.  If their TOS is greater
182			 * than vm_maxsaddr, vm_map_growstack()
183			 * will confuse the thread stack with the
184			 * process stack and deliver a SEGV if they
185			 * attempt to grow the thread stack past their
186			 * current stacksize rlimit.  To avoid this,
187			 * adjust vm_maxsaddr upwards to reflect
188			 * the current stacksize rlimit rather
189			 * than the maximum possible stacksize.
190			 * It would be better to adjust the
191			 * mmap'ed region, but some apps do not check
192			 * mmap's return value.
193			 */
194			PROC_LOCK(p);
195			vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack -
196			    lim_cur(p, RLIMIT_STACK);
197			PROC_UNLOCK(p);
198		}
199
200		/*
201		 * This gives us our maximum stack size and a new BOS.
202		 * If we're using VM_STACK, then mmap will just map
203		 * the top SGROWSIZ bytes, and let the stack grow down
204		 * to the limit at BOS.  If we're not using VM_STACK
205		 * we map the full stack, since we don't have a way
206		 * to autogrow it.
207		 */
208		if (len > STACK_SIZE - GUARD_SIZE) {
209			bsd_args.addr = (caddr_t)addr;
210			bsd_args.len = len;
211		} else {
212			bsd_args.addr = (caddr_t)addr -
213			    (STACK_SIZE - GUARD_SIZE - len);
214			bsd_args.len = STACK_SIZE - GUARD_SIZE;
215		}
216	} else {
217		bsd_args.addr = (caddr_t)addr;
218		bsd_args.len  = len;
219	}
220	bsd_args.pos = pos;
221
222	error = sys_mmap(td, &bsd_args);
223
224	LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]);
225
226	return (error);
227}
228
229int
230linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
231{
232	struct mprotect_args bsd_args;
233
234	bsd_args.addr = (void *)addr;
235	bsd_args.len = len;
236	bsd_args.prot = prot;
237
238#if defined(__amd64__)
239	linux_fixup_prot(td, &bsd_args.prot);
240#endif
241	return (sys_mprotect(td, &bsd_args));
242}
243
244#if defined(__amd64__)
245static void
246linux_fixup_prot(struct thread *td, int *prot)
247{
248	struct linux_pemuldata *pem;
249
250	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
251		pem = pem_find(td->td_proc);
252		if (pem->persona & LINUX_READ_IMPLIES_EXEC)
253			*prot |= PROT_EXEC;
254	}
255
256}
257#endif
258