linux32_sysvec.c revision 293516
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293516 2016-01-09 15:48:11Z dchagin $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86#include <compat/linux/linux_vdso.h>
87
88MODULE_VERSION(linux, 1);
89
90MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
91
92#define	AUXARGS_ENTRY_32(pos, id, val)	\
93	do {				\
94		suword32(pos++, id);	\
95		suword32(pos++, val);	\
96	} while (0)
97
98#if BYTE_ORDER == LITTLE_ENDIAN
99#define SHELLMAGIC      0x2123 /* #! */
100#else
101#define SHELLMAGIC      0x2321
102#endif
103
104/*
105 * Allow the sendsig functions to use the ldebug() facility
106 * even though they are not syscalls themselves. Map them
107 * to syscall 0. This is slightly less bogus than using
108 * ldebug(sigreturn).
109 */
110#define	LINUX_SYS_linux_rt_sendsig	0
111#define	LINUX_SYS_linux_sendsig		0
112
113const char *linux_kplatform;
114static int linux_szsigcode;
115static vm_object_t linux_shared_page_obj;
116static char *linux_shared_page_mapping;
117extern char _binary_linux32_locore_o_start;
118extern char _binary_linux32_locore_o_end;
119
120extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
121
122SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
123SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
124
125static int	elf_linux_fixup(register_t **stack_base,
126		    struct image_params *iparams);
127static register_t *linux_copyout_strings(struct image_params *imgp);
128static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
129static void	exec_linux_setregs(struct thread *td,
130				   struct image_params *imgp, u_long stack);
131static void	linux32_fixlimit(struct rlimit *rl, int which);
132static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
133static void	linux_vdso_install(void *param);
134static void	linux_vdso_deinstall(void *param);
135
136static eventhandler_tag linux_exit_tag;
137static eventhandler_tag linux_exec_tag;
138static eventhandler_tag linux_thread_dtor_tag;
139
140/*
141 * Linux syscalls return negative errno's, we do positive and map them
142 * Reference:
143 *   FreeBSD: src/sys/sys/errno.h
144 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
145 *            linux-2.6.17.8/include/asm-generic/errno.h
146 */
147static int bsd_to_linux_errno[ELAST + 1] = {
148	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
149	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
150	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
151	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
152	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
153	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
154	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
155	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
156	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
157	 -72, -67, -71
158};
159
160int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
161	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
162	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
163	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
164	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
165	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
166	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
167	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
168	0, LINUX_SIGUSR1, LINUX_SIGUSR2
169};
170
171int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
172	SIGHUP, SIGINT, SIGQUIT, SIGILL,
173	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
174	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
175	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
176	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
177	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
178	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
179	SIGIO, SIGURG, SIGSYS
180};
181
182#define LINUX_T_UNKNOWN  255
183static int _bsd_to_linux_trapcode[] = {
184	LINUX_T_UNKNOWN,	/* 0 */
185	6,			/* 1  T_PRIVINFLT */
186	LINUX_T_UNKNOWN,	/* 2 */
187	3,			/* 3  T_BPTFLT */
188	LINUX_T_UNKNOWN,	/* 4 */
189	LINUX_T_UNKNOWN,	/* 5 */
190	16,			/* 6  T_ARITHTRAP */
191	254,			/* 7  T_ASTFLT */
192	LINUX_T_UNKNOWN,	/* 8 */
193	13,			/* 9  T_PROTFLT */
194	1,			/* 10 T_TRCTRAP */
195	LINUX_T_UNKNOWN,	/* 11 */
196	14,			/* 12 T_PAGEFLT */
197	LINUX_T_UNKNOWN,	/* 13 */
198	17,			/* 14 T_ALIGNFLT */
199	LINUX_T_UNKNOWN,	/* 15 */
200	LINUX_T_UNKNOWN,	/* 16 */
201	LINUX_T_UNKNOWN,	/* 17 */
202	0,			/* 18 T_DIVIDE */
203	2,			/* 19 T_NMI */
204	4,			/* 20 T_OFLOW */
205	5,			/* 21 T_BOUND */
206	7,			/* 22 T_DNA */
207	8,			/* 23 T_DOUBLEFLT */
208	9,			/* 24 T_FPOPFLT */
209	10,			/* 25 T_TSSFLT */
210	11,			/* 26 T_SEGNPFLT */
211	12,			/* 27 T_STKFLT */
212	18,			/* 28 T_MCHK */
213	19,			/* 29 T_XMMFLT */
214	15			/* 30 T_RESERVED */
215};
216#define bsd_to_linux_trapcode(code) \
217    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
218     _bsd_to_linux_trapcode[(code)]: \
219     LINUX_T_UNKNOWN)
220
221struct linux32_ps_strings {
222	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
223	u_int ps_nargvstr;	/* the number of argument strings */
224	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
225	u_int ps_nenvstr;	/* the number of environment strings */
226};
227
228LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
229LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
230LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
231LINUX_VDSO_SYM_CHAR(linux_platform);
232
233/*
234 * If FreeBSD & Linux have a difference of opinion about what a trap
235 * means, deal with it here.
236 *
237 * MPSAFE
238 */
239static int
240translate_traps(int signal, int trap_code)
241{
242	if (signal != SIGBUS)
243		return signal;
244	switch (trap_code) {
245	case T_PROTFLT:
246	case T_TSSFLT:
247	case T_DOUBLEFLT:
248	case T_PAGEFLT:
249		return SIGSEGV;
250	default:
251		return signal;
252	}
253}
254
255static int
256elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257{
258	Elf32_Auxargs *args;
259	Elf32_Addr *base;
260	Elf32_Addr *pos;
261	struct linux32_ps_strings *arginfo;
262
263	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
264
265	KASSERT(curthread->td_proc == imgp->proc,
266	    ("unsafe elf_linux_fixup(), should be curproc"));
267	base = (Elf32_Addr *)*stack_base;
268	args = (Elf32_Auxargs *)imgp->auxargs;
269	pos = base + (imgp->args->argc + imgp->args->envc + 2);
270
271	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
272	    imgp->proc->p_sysent->sv_shared_page_base);
273	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
274	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
275
276	/*
277	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
278	 * as it has appeared in the 2.4.0-rc7 first time.
279	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
280	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
281	 * is not present.
282	 * Also see linux_times() implementation.
283	 */
284	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
285		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
286	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
287	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
288	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
289	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
290	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
291	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
292	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
293	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
294	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
295	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
296	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
297	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
298	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
299	if (args->execfd != -1)
300		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
301	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
302
303	free(imgp->auxargs, M_TEMP);
304	imgp->auxargs = NULL;
305
306	base--;
307	suword32(base, (uint32_t)imgp->args->argc);
308	*stack_base = (register_t *)base;
309	return (0);
310}
311
312static void
313linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
314{
315	struct thread *td = curthread;
316	struct proc *p = td->td_proc;
317	struct sigacts *psp;
318	struct trapframe *regs;
319	struct l_rt_sigframe *fp, frame;
320	int oonstack;
321	int sig;
322	int code;
323
324	sig = ksi->ksi_signo;
325	code = ksi->ksi_code;
326	PROC_LOCK_ASSERT(p, MA_OWNED);
327	psp = p->p_sigacts;
328	mtx_assert(&psp->ps_mtx, MA_OWNED);
329	regs = td->td_frame;
330	oonstack = sigonstack(regs->tf_rsp);
331
332#ifdef DEBUG
333	if (ldebug(rt_sendsig))
334		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
335		    catcher, sig, (void*)mask, code);
336#endif
337	/*
338	 * Allocate space for the signal handler context.
339	 */
340	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
341	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
342		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
343		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
344	} else
345		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
346	mtx_unlock(&psp->ps_mtx);
347
348	/*
349	 * Build the argument list for the signal handler.
350	 */
351	if (p->p_sysent->sv_sigtbl)
352		if (sig <= p->p_sysent->sv_sigsize)
353			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
354
355	bzero(&frame, sizeof(frame));
356
357	frame.sf_handler = PTROUT(catcher);
358	frame.sf_sig = sig;
359	frame.sf_siginfo = PTROUT(&fp->sf_si);
360	frame.sf_ucontext = PTROUT(&fp->sf_sc);
361
362	/* Fill in POSIX parts */
363	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
364
365	/*
366	 * Build the signal context to be used by sigreturn
367	 * and libgcc unwind.
368	 */
369	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
370	frame.sf_sc.uc_link = 0;		/* XXX ??? */
371
372	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
373	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
374	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
375	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
376	PROC_UNLOCK(p);
377
378	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
379
380	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
381	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
382	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
383	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
384	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
385	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_rsp;
386	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
387	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
388	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
389	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
390	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
391	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
392	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
393	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
394	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
395	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
396	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
397	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
398	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
399	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
400	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
401
402#ifdef DEBUG
403	if (ldebug(rt_sendsig))
404		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
405		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
406		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
407#endif
408
409	if (copyout(&frame, fp, sizeof(frame)) != 0) {
410		/*
411		 * Process has trashed its stack; give it an illegal
412		 * instruction to halt it in its tracks.
413		 */
414#ifdef DEBUG
415		if (ldebug(rt_sendsig))
416			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
417			    fp, oonstack);
418#endif
419		PROC_LOCK(p);
420		sigexit(td, SIGILL);
421	}
422
423	/*
424	 * Build context to run handler in.
425	 */
426	regs->tf_rsp = PTROUT(fp);
427	regs->tf_rip = linux32_rt_sigcode;
428	regs->tf_rflags &= ~(PSL_T | PSL_D);
429	regs->tf_cs = _ucode32sel;
430	regs->tf_ss = _udatasel;
431	regs->tf_ds = _udatasel;
432	regs->tf_es = _udatasel;
433	regs->tf_fs = _ufssel;
434	regs->tf_gs = _ugssel;
435	regs->tf_flags = TF_HASSEGS;
436	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
437	PROC_LOCK(p);
438	mtx_lock(&psp->ps_mtx);
439}
440
441
442/*
443 * Send an interrupt to process.
444 *
445 * Stack is set up to allow sigcode stored
446 * in u. to call routine, followed by kcall
447 * to sigreturn routine below.  After sigreturn
448 * resets the signal mask, the stack, and the
449 * frame pointer, it returns to the user
450 * specified pc, psl.
451 */
452static void
453linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
454{
455	struct thread *td = curthread;
456	struct proc *p = td->td_proc;
457	struct sigacts *psp;
458	struct trapframe *regs;
459	struct l_sigframe *fp, frame;
460	l_sigset_t lmask;
461	int oonstack, i;
462	int sig, code;
463
464	sig = ksi->ksi_signo;
465	code = ksi->ksi_code;
466	PROC_LOCK_ASSERT(p, MA_OWNED);
467	psp = p->p_sigacts;
468	mtx_assert(&psp->ps_mtx, MA_OWNED);
469	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
470		/* Signal handler installed with SA_SIGINFO. */
471		linux_rt_sendsig(catcher, ksi, mask);
472		return;
473	}
474
475	regs = td->td_frame;
476	oonstack = sigonstack(regs->tf_rsp);
477
478#ifdef DEBUG
479	if (ldebug(sendsig))
480		printf(ARGS(sendsig, "%p, %d, %p, %u"),
481		    catcher, sig, (void*)mask, code);
482#endif
483
484	/*
485	 * Allocate space for the signal handler context.
486	 */
487	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
488	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
489		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
490		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
491	} else
492		fp = (struct l_sigframe *)regs->tf_rsp - 1;
493	mtx_unlock(&psp->ps_mtx);
494	PROC_UNLOCK(p);
495
496	/*
497	 * Build the argument list for the signal handler.
498	 */
499	if (p->p_sysent->sv_sigtbl)
500		if (sig <= p->p_sysent->sv_sigsize)
501			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
502
503	bzero(&frame, sizeof(frame));
504
505	frame.sf_handler = PTROUT(catcher);
506	frame.sf_sig = sig;
507
508	bsd_to_linux_sigset(mask, &lmask);
509
510	/*
511	 * Build the signal context to be used by sigreturn.
512	 */
513	frame.sf_sc.sc_mask   = lmask.__bits[0];
514	frame.sf_sc.sc_gs     = regs->tf_gs;
515	frame.sf_sc.sc_fs     = regs->tf_fs;
516	frame.sf_sc.sc_es     = regs->tf_es;
517	frame.sf_sc.sc_ds     = regs->tf_ds;
518	frame.sf_sc.sc_edi    = regs->tf_rdi;
519	frame.sf_sc.sc_esi    = regs->tf_rsi;
520	frame.sf_sc.sc_ebp    = regs->tf_rbp;
521	frame.sf_sc.sc_ebx    = regs->tf_rbx;
522	frame.sf_sc.sc_esp    = regs->tf_rsp;
523	frame.sf_sc.sc_edx    = regs->tf_rdx;
524	frame.sf_sc.sc_ecx    = regs->tf_rcx;
525	frame.sf_sc.sc_eax    = regs->tf_rax;
526	frame.sf_sc.sc_eip    = regs->tf_rip;
527	frame.sf_sc.sc_cs     = regs->tf_cs;
528	frame.sf_sc.sc_eflags = regs->tf_rflags;
529	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
530	frame.sf_sc.sc_ss     = regs->tf_ss;
531	frame.sf_sc.sc_err    = regs->tf_err;
532	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
533	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
534
535	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
536		frame.sf_extramask[i] = lmask.__bits[i+1];
537
538	if (copyout(&frame, fp, sizeof(frame)) != 0) {
539		/*
540		 * Process has trashed its stack; give it an illegal
541		 * instruction to halt it in its tracks.
542		 */
543		PROC_LOCK(p);
544		sigexit(td, SIGILL);
545	}
546
547	/*
548	 * Build context to run handler in.
549	 */
550	regs->tf_rsp = PTROUT(fp);
551	regs->tf_rip = linux32_sigcode;
552	regs->tf_rflags &= ~(PSL_T | PSL_D);
553	regs->tf_cs = _ucode32sel;
554	regs->tf_ss = _udatasel;
555	regs->tf_ds = _udatasel;
556	regs->tf_es = _udatasel;
557	regs->tf_fs = _ufssel;
558	regs->tf_gs = _ugssel;
559	regs->tf_flags = TF_HASSEGS;
560	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
561	PROC_LOCK(p);
562	mtx_lock(&psp->ps_mtx);
563}
564
565/*
566 * System call to cleanup state after a signal
567 * has been taken.  Reset signal mask and
568 * stack state from context left by sendsig (above).
569 * Return to previous pc and psl as specified by
570 * context left by sendsig. Check carefully to
571 * make sure that the user has not modified the
572 * psl to gain improper privileges or to cause
573 * a machine fault.
574 */
575int
576linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
577{
578	struct l_sigframe frame;
579	struct trapframe *regs;
580	sigset_t bmask;
581	l_sigset_t lmask;
582	int eflags, i;
583	ksiginfo_t ksi;
584
585	regs = td->td_frame;
586
587#ifdef DEBUG
588	if (ldebug(sigreturn))
589		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
590#endif
591	/*
592	 * The trampoline code hands us the sigframe.
593	 * It is unsafe to keep track of it ourselves, in the event that a
594	 * program jumps out of a signal handler.
595	 */
596	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
597		return (EFAULT);
598
599	/*
600	 * Check for security violations.
601	 */
602#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
603	eflags = frame.sf_sc.sc_eflags;
604	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
605		return(EINVAL);
606
607	/*
608	 * Don't allow users to load a valid privileged %cs.  Let the
609	 * hardware check for invalid selectors, excess privilege in
610	 * other selectors, invalid %eip's and invalid %esp's.
611	 */
612#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
613	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
614		ksiginfo_init_trap(&ksi);
615		ksi.ksi_signo = SIGBUS;
616		ksi.ksi_code = BUS_OBJERR;
617		ksi.ksi_trapno = T_PROTFLT;
618		ksi.ksi_addr = (void *)regs->tf_rip;
619		trapsignal(td, &ksi);
620		return(EINVAL);
621	}
622
623	lmask.__bits[0] = frame.sf_sc.sc_mask;
624	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
625		lmask.__bits[i+1] = frame.sf_extramask[i];
626	linux_to_bsd_sigset(&lmask, &bmask);
627	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
628
629	/*
630	 * Restore signal context.
631	 */
632	regs->tf_rdi    = frame.sf_sc.sc_edi;
633	regs->tf_rsi    = frame.sf_sc.sc_esi;
634	regs->tf_rbp    = frame.sf_sc.sc_ebp;
635	regs->tf_rbx    = frame.sf_sc.sc_ebx;
636	regs->tf_rdx    = frame.sf_sc.sc_edx;
637	regs->tf_rcx    = frame.sf_sc.sc_ecx;
638	regs->tf_rax    = frame.sf_sc.sc_eax;
639	regs->tf_rip    = frame.sf_sc.sc_eip;
640	regs->tf_cs     = frame.sf_sc.sc_cs;
641	regs->tf_ds     = frame.sf_sc.sc_ds;
642	regs->tf_es     = frame.sf_sc.sc_es;
643	regs->tf_fs     = frame.sf_sc.sc_fs;
644	regs->tf_gs     = frame.sf_sc.sc_gs;
645	regs->tf_rflags = eflags;
646	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
647	regs->tf_ss     = frame.sf_sc.sc_ss;
648	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
649
650	return (EJUSTRETURN);
651}
652
653/*
654 * System call to cleanup state after a signal
655 * has been taken.  Reset signal mask and
656 * stack state from context left by rt_sendsig (above).
657 * Return to previous pc and psl as specified by
658 * context left by sendsig. Check carefully to
659 * make sure that the user has not modified the
660 * psl to gain improper privileges or to cause
661 * a machine fault.
662 */
663int
664linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
665{
666	struct l_ucontext uc;
667	struct l_sigcontext *context;
668	sigset_t bmask;
669	l_stack_t *lss;
670	stack_t ss;
671	struct trapframe *regs;
672	int eflags;
673	ksiginfo_t ksi;
674
675	regs = td->td_frame;
676
677#ifdef DEBUG
678	if (ldebug(rt_sigreturn))
679		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
680#endif
681	/*
682	 * The trampoline code hands us the ucontext.
683	 * It is unsafe to keep track of it ourselves, in the event that a
684	 * program jumps out of a signal handler.
685	 */
686	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
687		return (EFAULT);
688
689	context = &uc.uc_mcontext;
690
691	/*
692	 * Check for security violations.
693	 */
694#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
695	eflags = context->sc_eflags;
696	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
697		return(EINVAL);
698
699	/*
700	 * Don't allow users to load a valid privileged %cs.  Let the
701	 * hardware check for invalid selectors, excess privilege in
702	 * other selectors, invalid %eip's and invalid %esp's.
703	 */
704#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
705	if (!CS_SECURE(context->sc_cs)) {
706		ksiginfo_init_trap(&ksi);
707		ksi.ksi_signo = SIGBUS;
708		ksi.ksi_code = BUS_OBJERR;
709		ksi.ksi_trapno = T_PROTFLT;
710		ksi.ksi_addr = (void *)regs->tf_rip;
711		trapsignal(td, &ksi);
712		return(EINVAL);
713	}
714
715	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
716	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
717
718	/*
719	 * Restore signal context
720	 */
721	regs->tf_gs	= context->sc_gs;
722	regs->tf_fs	= context->sc_fs;
723	regs->tf_es	= context->sc_es;
724	regs->tf_ds	= context->sc_ds;
725	regs->tf_rdi    = context->sc_edi;
726	regs->tf_rsi    = context->sc_esi;
727	regs->tf_rbp    = context->sc_ebp;
728	regs->tf_rbx    = context->sc_ebx;
729	regs->tf_rdx    = context->sc_edx;
730	regs->tf_rcx    = context->sc_ecx;
731	regs->tf_rax    = context->sc_eax;
732	regs->tf_rip    = context->sc_eip;
733	regs->tf_cs     = context->sc_cs;
734	regs->tf_rflags = eflags;
735	regs->tf_rsp    = context->sc_esp_at_signal;
736	regs->tf_ss     = context->sc_ss;
737	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
738
739	/*
740	 * call sigaltstack & ignore results..
741	 */
742	lss = &uc.uc_stack;
743	ss.ss_sp = PTRIN(lss->ss_sp);
744	ss.ss_size = lss->ss_size;
745	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
746
747#ifdef DEBUG
748	if (ldebug(rt_sigreturn))
749		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
750		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
751#endif
752	(void)kern_sigaltstack(td, &ss, NULL);
753
754	return (EJUSTRETURN);
755}
756
757static int
758linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
759{
760	struct proc *p;
761	struct trapframe *frame;
762
763	p = td->td_proc;
764	frame = td->td_frame;
765
766	sa->args[0] = frame->tf_rbx;
767	sa->args[1] = frame->tf_rcx;
768	sa->args[2] = frame->tf_rdx;
769	sa->args[3] = frame->tf_rsi;
770	sa->args[4] = frame->tf_rdi;
771	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
772	sa->code = frame->tf_rax;
773
774	if (sa->code >= p->p_sysent->sv_size)
775		sa->callp = &p->p_sysent->sv_table[0];
776	else
777		sa->callp = &p->p_sysent->sv_table[sa->code];
778	sa->narg = sa->callp->sy_narg;
779
780	td->td_retval[0] = 0;
781	td->td_retval[1] = frame->tf_rdx;
782
783	return (0);
784}
785
786/*
787 * If a linux binary is exec'ing something, try this image activator
788 * first.  We override standard shell script execution in order to
789 * be able to modify the interpreter path.  We only do this if a linux
790 * binary is doing the exec, so we do not create an EXEC module for it.
791 */
792static int	exec_linux_imgact_try(struct image_params *iparams);
793
794static int
795exec_linux_imgact_try(struct image_params *imgp)
796{
797	const char *head = (const char *)imgp->image_header;
798	char *rpath;
799	int error = -1;
800
801	/*
802	* The interpreter for shell scripts run from a linux binary needs
803	* to be located in /compat/linux if possible in order to recursively
804	* maintain linux path emulation.
805	*/
806	if (((const short *)head)[0] == SHELLMAGIC) {
807		/*
808		* Run our normal shell image activator.  If it succeeds attempt
809		* to use the alternate path for the interpreter.  If an
810		* alternate * path is found, use our stringspace to store it.
811		*/
812		if ((error = exec_shell_imgact(imgp)) == 0) {
813			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
814			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
815			    AT_FDCWD);
816			if (rpath != NULL)
817				imgp->args->fname_buf =
818				    imgp->interpreter_name = rpath;
819		}
820	}
821	return (error);
822}
823
824/*
825 * Clear registers on exec
826 * XXX copied from ia32_signal.c.
827 */
828static void
829exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
830{
831	struct trapframe *regs = td->td_frame;
832	struct pcb *pcb = td->td_pcb;
833
834	mtx_lock(&dt_lock);
835	if (td->td_proc->p_md.md_ldt != NULL)
836		user_ldt_free(td);
837	else
838		mtx_unlock(&dt_lock);
839
840	critical_enter();
841	wrmsr(MSR_FSBASE, 0);
842	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
843	pcb->pcb_fsbase = 0;
844	pcb->pcb_gsbase = 0;
845	critical_exit();
846	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
847
848	bzero((char *)regs, sizeof(struct trapframe));
849	regs->tf_rip = imgp->entry_addr;
850	regs->tf_rsp = stack;
851	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
852	regs->tf_gs = _ugssel;
853	regs->tf_fs = _ufssel;
854	regs->tf_es = _udatasel;
855	regs->tf_ds = _udatasel;
856	regs->tf_ss = _udatasel;
857	regs->tf_flags = TF_HASSEGS;
858	regs->tf_cs = _ucode32sel;
859	regs->tf_rbx = imgp->ps_strings;
860
861	fpstate_drop(td);
862
863	/* Do full restore on return so that we can change to a different %cs */
864	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
865	td->td_retval[1] = 0;
866}
867
868/*
869 * XXX copied from ia32_sysvec.c.
870 */
871static register_t *
872linux_copyout_strings(struct image_params *imgp)
873{
874	int argc, envc;
875	u_int32_t *vectp;
876	char *stringp, *destp;
877	u_int32_t *stack_base;
878	struct linux32_ps_strings *arginfo;
879
880	/*
881	 * Calculate string base and vector table pointers.
882	 * Also deal with signal trampoline code for this exec type.
883	 */
884	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
885	destp =	(caddr_t)arginfo - SPARE_USRSPACE -
886	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
887
888	/*
889	 * If we have a valid auxargs ptr, prepare some room
890	 * on the stack.
891	 */
892	if (imgp->auxargs) {
893		/*
894		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
895		 * lower compatibility.
896		 */
897		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
898		    (LINUX_AT_COUNT * 2);
899		/*
900		 * The '+ 2' is for the null pointers at the end of each of
901		 * the arg and env vector sets,and imgp->auxarg_size is room
902		 * for argument of Runtime loader.
903		 */
904		vectp = (u_int32_t *) (destp - (imgp->args->argc +
905		    imgp->args->envc + 2 + imgp->auxarg_size) *
906		    sizeof(u_int32_t));
907
908	} else
909		/*
910		 * The '+ 2' is for the null pointers at the end of each of
911		 * the arg and env vector sets
912		 */
913		vectp = (u_int32_t *)(destp - (imgp->args->argc +
914		    imgp->args->envc + 2) * sizeof(u_int32_t));
915
916	/*
917	 * vectp also becomes our initial stack base
918	 */
919	stack_base = vectp;
920
921	stringp = imgp->args->begin_argv;
922	argc = imgp->args->argc;
923	envc = imgp->args->envc;
924	/*
925	 * Copy out strings - arguments and environment.
926	 */
927	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
928
929	/*
930	 * Fill in "ps_strings" struct for ps, w, etc.
931	 */
932	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
933	suword32(&arginfo->ps_nargvstr, argc);
934
935	/*
936	 * Fill in argument portion of vector table.
937	 */
938	for (; argc > 0; --argc) {
939		suword32(vectp++, (uint32_t)(intptr_t)destp);
940		while (*stringp++ != 0)
941			destp++;
942		destp++;
943	}
944
945	/* a null vector table pointer separates the argp's from the envp's */
946	suword32(vectp++, 0);
947
948	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
949	suword32(&arginfo->ps_nenvstr, envc);
950
951	/*
952	 * Fill in environment portion of vector table.
953	 */
954	for (; envc > 0; --envc) {
955		suword32(vectp++, (uint32_t)(intptr_t)destp);
956		while (*stringp++ != 0)
957			destp++;
958		destp++;
959	}
960
961	/* end of vector table is a null pointer */
962	suword32(vectp, 0);
963
964	return ((register_t *)stack_base);
965}
966
967static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
968    "32-bit Linux emulation");
969
970static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
971SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
972    &linux32_maxdsiz, 0, "");
973static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
974SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
975    &linux32_maxssiz, 0, "");
976static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
977SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
978    &linux32_maxvmem, 0, "");
979
980static void
981linux32_fixlimit(struct rlimit *rl, int which)
982{
983
984	switch (which) {
985	case RLIMIT_DATA:
986		if (linux32_maxdsiz != 0) {
987			if (rl->rlim_cur > linux32_maxdsiz)
988				rl->rlim_cur = linux32_maxdsiz;
989			if (rl->rlim_max > linux32_maxdsiz)
990				rl->rlim_max = linux32_maxdsiz;
991		}
992		break;
993	case RLIMIT_STACK:
994		if (linux32_maxssiz != 0) {
995			if (rl->rlim_cur > linux32_maxssiz)
996				rl->rlim_cur = linux32_maxssiz;
997			if (rl->rlim_max > linux32_maxssiz)
998				rl->rlim_max = linux32_maxssiz;
999		}
1000		break;
1001	case RLIMIT_VMEM:
1002		if (linux32_maxvmem != 0) {
1003			if (rl->rlim_cur > linux32_maxvmem)
1004				rl->rlim_cur = linux32_maxvmem;
1005			if (rl->rlim_max > linux32_maxvmem)
1006				rl->rlim_max = linux32_maxvmem;
1007		}
1008		break;
1009	}
1010}
1011
1012struct sysentvec elf_linux_sysvec = {
1013	.sv_size	= LINUX_SYS_MAXSYSCALL,
1014	.sv_table	= linux_sysent,
1015	.sv_mask	= 0,
1016	.sv_sigsize	= LINUX_SIGTBLSZ,
1017	.sv_sigtbl	= bsd_to_linux_signal,
1018	.sv_errsize	= ELAST + 1,
1019	.sv_errtbl	= bsd_to_linux_errno,
1020	.sv_transtrap	= translate_traps,
1021	.sv_fixup	= elf_linux_fixup,
1022	.sv_sendsig	= linux_sendsig,
1023	.sv_sigcode	= &_binary_linux32_locore_o_start,
1024	.sv_szsigcode	= &linux_szsigcode,
1025	.sv_prepsyscall	= NULL,
1026	.sv_name	= "Linux ELF32",
1027	.sv_coredump	= elf32_coredump,
1028	.sv_imgact_try	= exec_linux_imgact_try,
1029	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1030	.sv_pagesize	= PAGE_SIZE,
1031	.sv_minuser	= VM_MIN_ADDRESS,
1032	.sv_maxuser	= LINUX32_MAXUSER,
1033	.sv_usrstack	= LINUX32_USRSTACK,
1034	.sv_psstrings	= LINUX32_PS_STRINGS,
1035	.sv_stackprot	= VM_PROT_ALL,
1036	.sv_copyout_strings = linux_copyout_strings,
1037	.sv_setregs	= exec_linux_setregs,
1038	.sv_fixlimit	= linux32_fixlimit,
1039	.sv_maxssiz	= &linux32_maxssiz,
1040	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1041	.sv_set_syscall_retval = cpu_set_syscall_retval,
1042	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1043	.sv_syscallnames = NULL,
1044	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1045	.sv_shared_page_len = PAGE_SIZE,
1046	.sv_schedtail	= linux_schedtail,
1047	.sv_thread_detach = linux_thread_detach,
1048};
1049
1050static void
1051linux_vdso_install(void *param)
1052{
1053
1054	linux_szsigcode = (&_binary_linux32_locore_o_end -
1055	    &_binary_linux32_locore_o_start);
1056
1057	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1058		panic("Linux invalid vdso size\n");
1059
1060	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1061
1062	linux_shared_page_obj = __elfN(linux_shared_page_init)
1063	    (&linux_shared_page_mapping);
1064
1065	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1066
1067	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1068	    linux_szsigcode);
1069	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1070
1071	linux_kplatform = linux_shared_page_mapping +
1072	    (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1073}
1074SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1075    (sysinit_cfunc_t)linux_vdso_install, NULL);
1076
1077static void
1078linux_vdso_deinstall(void *param)
1079{
1080
1081	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1082};
1083SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1084    (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1085
1086static char GNU_ABI_VENDOR[] = "GNU";
1087static int GNULINUX_ABI_DESC = 0;
1088
1089static boolean_t
1090linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1091{
1092	const Elf32_Word *desc;
1093	uintptr_t p;
1094
1095	p = (uintptr_t)(note + 1);
1096	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1097
1098	desc = (const Elf32_Word *)p;
1099	if (desc[0] != GNULINUX_ABI_DESC)
1100		return (FALSE);
1101
1102	/*
1103	 * For linux we encode osrel as follows (see linux_mib.c):
1104	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1105	 */
1106	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1107
1108	return (TRUE);
1109}
1110
1111static Elf_Brandnote linux32_brandnote = {
1112	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1113	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1114	.hdr.n_type	= 1,
1115	.vendor		= GNU_ABI_VENDOR,
1116	.flags		= BN_TRANSLATE_OSREL,
1117	.trans_osrel	= linux32_trans_osrel
1118};
1119
1120static Elf32_Brandinfo linux_brand = {
1121	.brand		= ELFOSABI_LINUX,
1122	.machine	= EM_386,
1123	.compat_3_brand	= "Linux",
1124	.emul_path	= "/compat/linux",
1125	.interp_path	= "/lib/ld-linux.so.1",
1126	.sysvec		= &elf_linux_sysvec,
1127	.interp_newpath	= NULL,
1128	.brand_note	= &linux32_brandnote,
1129	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1130};
1131
1132static Elf32_Brandinfo linux_glibc2brand = {
1133	.brand		= ELFOSABI_LINUX,
1134	.machine	= EM_386,
1135	.compat_3_brand	= "Linux",
1136	.emul_path	= "/compat/linux",
1137	.interp_path	= "/lib/ld-linux.so.2",
1138	.sysvec		= &elf_linux_sysvec,
1139	.interp_newpath	= NULL,
1140	.brand_note	= &linux32_brandnote,
1141	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1142};
1143
1144Elf32_Brandinfo *linux_brandlist[] = {
1145	&linux_brand,
1146	&linux_glibc2brand,
1147	NULL
1148};
1149
1150static int
1151linux_elf_modevent(module_t mod, int type, void *data)
1152{
1153	Elf32_Brandinfo **brandinfo;
1154	int error;
1155	struct linux_ioctl_handler **lihp;
1156	struct linux_device_handler **ldhp;
1157
1158	error = 0;
1159
1160	switch(type) {
1161	case MOD_LOAD:
1162		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1163		     ++brandinfo)
1164			if (elf32_insert_brand_entry(*brandinfo) < 0)
1165				error = EINVAL;
1166		if (error == 0) {
1167			SET_FOREACH(lihp, linux_ioctl_handler_set)
1168				linux_ioctl_register_handler(*lihp);
1169			SET_FOREACH(ldhp, linux_device_handler_set)
1170				linux_device_register_handler(*ldhp);
1171			LIST_INIT(&futex_list);
1172			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1173			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1174			    linux_proc_exit, NULL, 1000);
1175			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1176			    linux_proc_exec, NULL, 1000);
1177			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1178			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1179			linux_osd_jail_register();
1180			stclohz = (stathz ? stathz : hz);
1181			if (bootverbose)
1182				printf("Linux ELF exec handler installed\n");
1183		} else
1184			printf("cannot insert Linux ELF brand handler\n");
1185		break;
1186	case MOD_UNLOAD:
1187		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1188		     ++brandinfo)
1189			if (elf32_brand_inuse(*brandinfo))
1190				error = EBUSY;
1191		if (error == 0) {
1192			for (brandinfo = &linux_brandlist[0];
1193			     *brandinfo != NULL; ++brandinfo)
1194				if (elf32_remove_brand_entry(*brandinfo) < 0)
1195					error = EINVAL;
1196		}
1197		if (error == 0) {
1198			SET_FOREACH(lihp, linux_ioctl_handler_set)
1199				linux_ioctl_unregister_handler(*lihp);
1200			SET_FOREACH(ldhp, linux_device_handler_set)
1201				linux_device_unregister_handler(*ldhp);
1202			mtx_destroy(&futex_mtx);
1203			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1204			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1205			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1206			linux_osd_jail_deregister();
1207			if (bootverbose)
1208				printf("Linux ELF exec handler removed\n");
1209		} else
1210			printf("Could not deinstall ELF interpreter entry\n");
1211		break;
1212	default:
1213		return (EOPNOTSUPP);
1214	}
1215	return (error);
1216}
1217
1218static moduledata_t linux_elf_mod = {
1219	"linuxelf",
1220	linux_elf_modevent,
1221	0
1222};
1223
1224DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1225