linux32_sysvec.c revision 293493
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293493 2016-01-09 15:16:13Z dchagin $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86
87MODULE_VERSION(linux, 1);
88
89MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
90
91#define	AUXARGS_ENTRY_32(pos, id, val)	\
92	do {				\
93		suword32(pos++, id);	\
94		suword32(pos++, val);	\
95	} while (0)
96
97#if BYTE_ORDER == LITTLE_ENDIAN
98#define SHELLMAGIC      0x2123 /* #! */
99#else
100#define SHELLMAGIC      0x2321
101#endif
102
103/*
104 * Allow the sendsig functions to use the ldebug() facility
105 * even though they are not syscalls themselves. Map them
106 * to syscall 0. This is slightly less bogus than using
107 * ldebug(sigreturn).
108 */
109#define	LINUX_SYS_linux_rt_sendsig	0
110#define	LINUX_SYS_linux_sendsig		0
111
112const char *linux_platform = "i686";
113static int linux_szplatform;
114extern char linux_sigcode[];
115extern int linux_szsigcode;
116
117extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121
122static int	elf_linux_fixup(register_t **stack_base,
123		    struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void	exec_linux_setregs(struct thread *td,
127				   struct image_params *imgp, u_long stack);
128static void	linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130
131static eventhandler_tag linux_exit_tag;
132static eventhandler_tag linux_exec_tag;
133static eventhandler_tag linux_thread_dtor_tag;
134
135/*
136 * Linux syscalls return negative errno's, we do positive and map them
137 * Reference:
138 *   FreeBSD: src/sys/sys/errno.h
139 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
140 *            linux-2.6.17.8/include/asm-generic/errno.h
141 */
142static int bsd_to_linux_errno[ELAST + 1] = {
143	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
144	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
151	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
152	 -72, -67, -71
153};
154
155int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163	0, LINUX_SIGUSR1, LINUX_SIGUSR2
164};
165
166int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167	SIGHUP, SIGINT, SIGQUIT, SIGILL,
168	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174	SIGIO, SIGURG, SIGSYS
175};
176
177#define LINUX_T_UNKNOWN  255
178static int _bsd_to_linux_trapcode[] = {
179	LINUX_T_UNKNOWN,	/* 0 */
180	6,			/* 1  T_PRIVINFLT */
181	LINUX_T_UNKNOWN,	/* 2 */
182	3,			/* 3  T_BPTFLT */
183	LINUX_T_UNKNOWN,	/* 4 */
184	LINUX_T_UNKNOWN,	/* 5 */
185	16,			/* 6  T_ARITHTRAP */
186	254,			/* 7  T_ASTFLT */
187	LINUX_T_UNKNOWN,	/* 8 */
188	13,			/* 9  T_PROTFLT */
189	1,			/* 10 T_TRCTRAP */
190	LINUX_T_UNKNOWN,	/* 11 */
191	14,			/* 12 T_PAGEFLT */
192	LINUX_T_UNKNOWN,	/* 13 */
193	17,			/* 14 T_ALIGNFLT */
194	LINUX_T_UNKNOWN,	/* 15 */
195	LINUX_T_UNKNOWN,	/* 16 */
196	LINUX_T_UNKNOWN,	/* 17 */
197	0,			/* 18 T_DIVIDE */
198	2,			/* 19 T_NMI */
199	4,			/* 20 T_OFLOW */
200	5,			/* 21 T_BOUND */
201	7,			/* 22 T_DNA */
202	8,			/* 23 T_DOUBLEFLT */
203	9,			/* 24 T_FPOPFLT */
204	10,			/* 25 T_TSSFLT */
205	11,			/* 26 T_SEGNPFLT */
206	12,			/* 27 T_STKFLT */
207	18,			/* 28 T_MCHK */
208	19,			/* 29 T_XMMFLT */
209	15			/* 30 T_RESERVED */
210};
211#define bsd_to_linux_trapcode(code) \
212    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213     _bsd_to_linux_trapcode[(code)]: \
214     LINUX_T_UNKNOWN)
215
216struct linux32_ps_strings {
217	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
218	u_int ps_nargvstr;	/* the number of argument strings */
219	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
220	u_int ps_nenvstr;	/* the number of environment strings */
221};
222
223/*
224 * If FreeBSD & Linux have a difference of opinion about what a trap
225 * means, deal with it here.
226 *
227 * MPSAFE
228 */
229static int
230translate_traps(int signal, int trap_code)
231{
232	if (signal != SIGBUS)
233		return signal;
234	switch (trap_code) {
235	case T_PROTFLT:
236	case T_TSSFLT:
237	case T_DOUBLEFLT:
238	case T_PAGEFLT:
239		return SIGSEGV;
240	default:
241		return signal;
242	}
243}
244
245static int
246elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247{
248	Elf32_Auxargs *args;
249	Elf32_Addr *base;
250	Elf32_Addr *pos, *uplatform;
251	struct linux32_ps_strings *arginfo;
252
253	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
255
256	KASSERT(curthread->td_proc == imgp->proc,
257	    ("unsafe elf_linux_fixup(), should be curproc"));
258	base = (Elf32_Addr *)*stack_base;
259	args = (Elf32_Auxargs *)imgp->auxargs;
260	pos = base + (imgp->args->argc + imgp->args->envc + 2);
261
262	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
263
264	/*
265	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
266	 * as it has appeared in the 2.4.0-rc7 first time.
267	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
268	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
269	 * is not present.
270	 * Also see linux_times() implementation.
271	 */
272	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
273		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
274	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
275	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
276	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
277	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
278	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
279	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
280	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
281	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
282	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
283	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
284	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
285	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
286	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
287	if (args->execfd != -1)
288		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
289	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
290
291	free(imgp->auxargs, M_TEMP);
292	imgp->auxargs = NULL;
293
294	base--;
295	suword32(base, (uint32_t)imgp->args->argc);
296	*stack_base = (register_t *)base;
297	return 0;
298}
299
300extern unsigned long linux_sznonrtsigcode;
301
302static void
303linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
304{
305	struct thread *td = curthread;
306	struct proc *p = td->td_proc;
307	struct sigacts *psp;
308	struct trapframe *regs;
309	struct l_rt_sigframe *fp, frame;
310	int oonstack;
311	int sig;
312	int code;
313
314	sig = ksi->ksi_signo;
315	code = ksi->ksi_code;
316	PROC_LOCK_ASSERT(p, MA_OWNED);
317	psp = p->p_sigacts;
318	mtx_assert(&psp->ps_mtx, MA_OWNED);
319	regs = td->td_frame;
320	oonstack = sigonstack(regs->tf_rsp);
321
322#ifdef DEBUG
323	if (ldebug(rt_sendsig))
324		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
325		    catcher, sig, (void*)mask, code);
326#endif
327	/*
328	 * Allocate space for the signal handler context.
329	 */
330	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
331	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
332		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
333		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
334	} else
335		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
336	mtx_unlock(&psp->ps_mtx);
337
338	/*
339	 * Build the argument list for the signal handler.
340	 */
341	if (p->p_sysent->sv_sigtbl)
342		if (sig <= p->p_sysent->sv_sigsize)
343			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
344
345	bzero(&frame, sizeof(frame));
346
347	frame.sf_handler = PTROUT(catcher);
348	frame.sf_sig = sig;
349	frame.sf_siginfo = PTROUT(&fp->sf_si);
350	frame.sf_ucontext = PTROUT(&fp->sf_sc);
351
352	/* Fill in POSIX parts */
353	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
354
355	/*
356	 * Build the signal context to be used by sigreturn.
357	 */
358	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
359	frame.sf_sc.uc_link = 0;		/* XXX ??? */
360
361	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
362	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
363	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
364	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
365	PROC_UNLOCK(p);
366
367	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
368
369	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
370	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
371	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
372	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
373	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
374	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
375	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
376	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
377	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
378	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
379	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
380	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
381	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
382	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
383	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
384	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
385	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
386	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
387	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
388	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
389
390#ifdef DEBUG
391	if (ldebug(rt_sendsig))
392		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
393		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
394		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
395#endif
396
397	if (copyout(&frame, fp, sizeof(frame)) != 0) {
398		/*
399		 * Process has trashed its stack; give it an illegal
400		 * instruction to halt it in its tracks.
401		 */
402#ifdef DEBUG
403		if (ldebug(rt_sendsig))
404			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
405			    fp, oonstack);
406#endif
407		PROC_LOCK(p);
408		sigexit(td, SIGILL);
409	}
410
411	/*
412	 * Build context to run handler in.
413	 */
414	regs->tf_rsp = PTROUT(fp);
415	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
416	regs->tf_rflags &= ~(PSL_T | PSL_D);
417	regs->tf_cs = _ucode32sel;
418	regs->tf_ss = _udatasel;
419	regs->tf_ds = _udatasel;
420	regs->tf_es = _udatasel;
421	regs->tf_fs = _ufssel;
422	regs->tf_gs = _ugssel;
423	regs->tf_flags = TF_HASSEGS;
424	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
425	PROC_LOCK(p);
426	mtx_lock(&psp->ps_mtx);
427}
428
429
430/*
431 * Send an interrupt to process.
432 *
433 * Stack is set up to allow sigcode stored
434 * in u. to call routine, followed by kcall
435 * to sigreturn routine below.  After sigreturn
436 * resets the signal mask, the stack, and the
437 * frame pointer, it returns to the user
438 * specified pc, psl.
439 */
440static void
441linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
442{
443	struct thread *td = curthread;
444	struct proc *p = td->td_proc;
445	struct sigacts *psp;
446	struct trapframe *regs;
447	struct l_sigframe *fp, frame;
448	l_sigset_t lmask;
449	int oonstack, i;
450	int sig, code;
451
452	sig = ksi->ksi_signo;
453	code = ksi->ksi_code;
454	PROC_LOCK_ASSERT(p, MA_OWNED);
455	psp = p->p_sigacts;
456	mtx_assert(&psp->ps_mtx, MA_OWNED);
457	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
458		/* Signal handler installed with SA_SIGINFO. */
459		linux_rt_sendsig(catcher, ksi, mask);
460		return;
461	}
462
463	regs = td->td_frame;
464	oonstack = sigonstack(regs->tf_rsp);
465
466#ifdef DEBUG
467	if (ldebug(sendsig))
468		printf(ARGS(sendsig, "%p, %d, %p, %u"),
469		    catcher, sig, (void*)mask, code);
470#endif
471
472	/*
473	 * Allocate space for the signal handler context.
474	 */
475	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
476	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
477		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
478		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
479	} else
480		fp = (struct l_sigframe *)regs->tf_rsp - 1;
481	mtx_unlock(&psp->ps_mtx);
482	PROC_UNLOCK(p);
483
484	/*
485	 * Build the argument list for the signal handler.
486	 */
487	if (p->p_sysent->sv_sigtbl)
488		if (sig <= p->p_sysent->sv_sigsize)
489			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
490
491	bzero(&frame, sizeof(frame));
492
493	frame.sf_handler = PTROUT(catcher);
494	frame.sf_sig = sig;
495
496	bsd_to_linux_sigset(mask, &lmask);
497
498	/*
499	 * Build the signal context to be used by sigreturn.
500	 */
501	frame.sf_sc.sc_mask   = lmask.__bits[0];
502	frame.sf_sc.sc_gs     = regs->tf_gs;
503	frame.sf_sc.sc_fs     = regs->tf_fs;
504	frame.sf_sc.sc_es     = regs->tf_es;
505	frame.sf_sc.sc_ds     = regs->tf_ds;
506	frame.sf_sc.sc_edi    = regs->tf_rdi;
507	frame.sf_sc.sc_esi    = regs->tf_rsi;
508	frame.sf_sc.sc_ebp    = regs->tf_rbp;
509	frame.sf_sc.sc_ebx    = regs->tf_rbx;
510	frame.sf_sc.sc_edx    = regs->tf_rdx;
511	frame.sf_sc.sc_ecx    = regs->tf_rcx;
512	frame.sf_sc.sc_eax    = regs->tf_rax;
513	frame.sf_sc.sc_eip    = regs->tf_rip;
514	frame.sf_sc.sc_cs     = regs->tf_cs;
515	frame.sf_sc.sc_eflags = regs->tf_rflags;
516	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
517	frame.sf_sc.sc_ss     = regs->tf_ss;
518	frame.sf_sc.sc_err    = regs->tf_err;
519	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
520	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
521
522	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
523		frame.sf_extramask[i] = lmask.__bits[i+1];
524
525	if (copyout(&frame, fp, sizeof(frame)) != 0) {
526		/*
527		 * Process has trashed its stack; give it an illegal
528		 * instruction to halt it in its tracks.
529		 */
530		PROC_LOCK(p);
531		sigexit(td, SIGILL);
532	}
533
534	/*
535	 * Build context to run handler in.
536	 */
537	regs->tf_rsp = PTROUT(fp);
538	regs->tf_rip = p->p_sysent->sv_sigcode_base;
539	regs->tf_rflags &= ~(PSL_T | PSL_D);
540	regs->tf_cs = _ucode32sel;
541	regs->tf_ss = _udatasel;
542	regs->tf_ds = _udatasel;
543	regs->tf_es = _udatasel;
544	regs->tf_fs = _ufssel;
545	regs->tf_gs = _ugssel;
546	regs->tf_flags = TF_HASSEGS;
547	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
548	PROC_LOCK(p);
549	mtx_lock(&psp->ps_mtx);
550}
551
552/*
553 * System call to cleanup state after a signal
554 * has been taken.  Reset signal mask and
555 * stack state from context left by sendsig (above).
556 * Return to previous pc and psl as specified by
557 * context left by sendsig. Check carefully to
558 * make sure that the user has not modified the
559 * psl to gain improper privileges or to cause
560 * a machine fault.
561 */
562int
563linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
564{
565	struct l_sigframe frame;
566	struct trapframe *regs;
567	sigset_t bmask;
568	l_sigset_t lmask;
569	int eflags, i;
570	ksiginfo_t ksi;
571
572	regs = td->td_frame;
573
574#ifdef DEBUG
575	if (ldebug(sigreturn))
576		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
577#endif
578	/*
579	 * The trampoline code hands us the sigframe.
580	 * It is unsafe to keep track of it ourselves, in the event that a
581	 * program jumps out of a signal handler.
582	 */
583	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
584		return (EFAULT);
585
586	/*
587	 * Check for security violations.
588	 */
589#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
590	eflags = frame.sf_sc.sc_eflags;
591	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
592		return(EINVAL);
593
594	/*
595	 * Don't allow users to load a valid privileged %cs.  Let the
596	 * hardware check for invalid selectors, excess privilege in
597	 * other selectors, invalid %eip's and invalid %esp's.
598	 */
599#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
600	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
601		ksiginfo_init_trap(&ksi);
602		ksi.ksi_signo = SIGBUS;
603		ksi.ksi_code = BUS_OBJERR;
604		ksi.ksi_trapno = T_PROTFLT;
605		ksi.ksi_addr = (void *)regs->tf_rip;
606		trapsignal(td, &ksi);
607		return(EINVAL);
608	}
609
610	lmask.__bits[0] = frame.sf_sc.sc_mask;
611	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
612		lmask.__bits[i+1] = frame.sf_extramask[i];
613	linux_to_bsd_sigset(&lmask, &bmask);
614	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
615
616	/*
617	 * Restore signal context.
618	 */
619	regs->tf_rdi    = frame.sf_sc.sc_edi;
620	regs->tf_rsi    = frame.sf_sc.sc_esi;
621	regs->tf_rbp    = frame.sf_sc.sc_ebp;
622	regs->tf_rbx    = frame.sf_sc.sc_ebx;
623	regs->tf_rdx    = frame.sf_sc.sc_edx;
624	regs->tf_rcx    = frame.sf_sc.sc_ecx;
625	regs->tf_rax    = frame.sf_sc.sc_eax;
626	regs->tf_rip    = frame.sf_sc.sc_eip;
627	regs->tf_cs     = frame.sf_sc.sc_cs;
628	regs->tf_ds     = frame.sf_sc.sc_ds;
629	regs->tf_es     = frame.sf_sc.sc_es;
630	regs->tf_fs     = frame.sf_sc.sc_fs;
631	regs->tf_gs     = frame.sf_sc.sc_gs;
632	regs->tf_rflags = eflags;
633	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
634	regs->tf_ss     = frame.sf_sc.sc_ss;
635	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
636
637	return (EJUSTRETURN);
638}
639
640/*
641 * System call to cleanup state after a signal
642 * has been taken.  Reset signal mask and
643 * stack state from context left by rt_sendsig (above).
644 * Return to previous pc and psl as specified by
645 * context left by sendsig. Check carefully to
646 * make sure that the user has not modified the
647 * psl to gain improper privileges or to cause
648 * a machine fault.
649 */
650int
651linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
652{
653	struct l_ucontext uc;
654	struct l_sigcontext *context;
655	sigset_t bmask;
656	l_stack_t *lss;
657	stack_t ss;
658	struct trapframe *regs;
659	int eflags;
660	ksiginfo_t ksi;
661
662	regs = td->td_frame;
663
664#ifdef DEBUG
665	if (ldebug(rt_sigreturn))
666		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
667#endif
668	/*
669	 * The trampoline code hands us the ucontext.
670	 * It is unsafe to keep track of it ourselves, in the event that a
671	 * program jumps out of a signal handler.
672	 */
673	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
674		return (EFAULT);
675
676	context = &uc.uc_mcontext;
677
678	/*
679	 * Check for security violations.
680	 */
681#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
682	eflags = context->sc_eflags;
683	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
684		return(EINVAL);
685
686	/*
687	 * Don't allow users to load a valid privileged %cs.  Let the
688	 * hardware check for invalid selectors, excess privilege in
689	 * other selectors, invalid %eip's and invalid %esp's.
690	 */
691#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
692	if (!CS_SECURE(context->sc_cs)) {
693		ksiginfo_init_trap(&ksi);
694		ksi.ksi_signo = SIGBUS;
695		ksi.ksi_code = BUS_OBJERR;
696		ksi.ksi_trapno = T_PROTFLT;
697		ksi.ksi_addr = (void *)regs->tf_rip;
698		trapsignal(td, &ksi);
699		return(EINVAL);
700	}
701
702	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
703	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
704
705	/*
706	 * Restore signal context
707	 */
708	regs->tf_gs	= context->sc_gs;
709	regs->tf_fs	= context->sc_fs;
710	regs->tf_es	= context->sc_es;
711	regs->tf_ds	= context->sc_ds;
712	regs->tf_rdi    = context->sc_edi;
713	regs->tf_rsi    = context->sc_esi;
714	regs->tf_rbp    = context->sc_ebp;
715	regs->tf_rbx    = context->sc_ebx;
716	regs->tf_rdx    = context->sc_edx;
717	regs->tf_rcx    = context->sc_ecx;
718	regs->tf_rax    = context->sc_eax;
719	regs->tf_rip    = context->sc_eip;
720	regs->tf_cs     = context->sc_cs;
721	regs->tf_rflags = eflags;
722	regs->tf_rsp    = context->sc_esp_at_signal;
723	regs->tf_ss     = context->sc_ss;
724	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
725
726	/*
727	 * call sigaltstack & ignore results..
728	 */
729	lss = &uc.uc_stack;
730	ss.ss_sp = PTRIN(lss->ss_sp);
731	ss.ss_size = lss->ss_size;
732	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
733
734#ifdef DEBUG
735	if (ldebug(rt_sigreturn))
736		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
737		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
738#endif
739	(void)kern_sigaltstack(td, &ss, NULL);
740
741	return (EJUSTRETURN);
742}
743
744static int
745linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
746{
747	struct proc *p;
748	struct trapframe *frame;
749
750	p = td->td_proc;
751	frame = td->td_frame;
752
753	sa->args[0] = frame->tf_rbx;
754	sa->args[1] = frame->tf_rcx;
755	sa->args[2] = frame->tf_rdx;
756	sa->args[3] = frame->tf_rsi;
757	sa->args[4] = frame->tf_rdi;
758	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
759	sa->code = frame->tf_rax;
760
761	if (sa->code >= p->p_sysent->sv_size)
762		sa->callp = &p->p_sysent->sv_table[0];
763	else
764		sa->callp = &p->p_sysent->sv_table[sa->code];
765	sa->narg = sa->callp->sy_narg;
766
767	td->td_retval[0] = 0;
768	td->td_retval[1] = frame->tf_rdx;
769
770	return (0);
771}
772
773/*
774 * If a linux binary is exec'ing something, try this image activator
775 * first.  We override standard shell script execution in order to
776 * be able to modify the interpreter path.  We only do this if a linux
777 * binary is doing the exec, so we do not create an EXEC module for it.
778 */
779static int	exec_linux_imgact_try(struct image_params *iparams);
780
781static int
782exec_linux_imgact_try(struct image_params *imgp)
783{
784	const char *head = (const char *)imgp->image_header;
785	char *rpath;
786	int error = -1;
787
788	/*
789	* The interpreter for shell scripts run from a linux binary needs
790	* to be located in /compat/linux if possible in order to recursively
791	* maintain linux path emulation.
792	*/
793	if (((const short *)head)[0] == SHELLMAGIC) {
794		/*
795		* Run our normal shell image activator.  If it succeeds attempt
796		* to use the alternate path for the interpreter.  If an
797		* alternate * path is found, use our stringspace to store it.
798		*/
799		if ((error = exec_shell_imgact(imgp)) == 0) {
800			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
801			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
802			    AT_FDCWD);
803			if (rpath != NULL)
804				imgp->args->fname_buf =
805				    imgp->interpreter_name = rpath;
806		}
807	}
808	return (error);
809}
810
811/*
812 * Clear registers on exec
813 * XXX copied from ia32_signal.c.
814 */
815static void
816exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
817{
818	struct trapframe *regs = td->td_frame;
819	struct pcb *pcb = td->td_pcb;
820
821	mtx_lock(&dt_lock);
822	if (td->td_proc->p_md.md_ldt != NULL)
823		user_ldt_free(td);
824	else
825		mtx_unlock(&dt_lock);
826
827	critical_enter();
828	wrmsr(MSR_FSBASE, 0);
829	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
830	pcb->pcb_fsbase = 0;
831	pcb->pcb_gsbase = 0;
832	critical_exit();
833	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
834
835	bzero((char *)regs, sizeof(struct trapframe));
836	regs->tf_rip = imgp->entry_addr;
837	regs->tf_rsp = stack;
838	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
839	regs->tf_gs = _ugssel;
840	regs->tf_fs = _ufssel;
841	regs->tf_es = _udatasel;
842	regs->tf_ds = _udatasel;
843	regs->tf_ss = _udatasel;
844	regs->tf_flags = TF_HASSEGS;
845	regs->tf_cs = _ucode32sel;
846	regs->tf_rbx = imgp->ps_strings;
847
848	fpstate_drop(td);
849
850	/* Do full restore on return so that we can change to a different %cs */
851	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
852	td->td_retval[1] = 0;
853}
854
855/*
856 * XXX copied from ia32_sysvec.c.
857 */
858static register_t *
859linux_copyout_strings(struct image_params *imgp)
860{
861	int argc, envc;
862	u_int32_t *vectp;
863	char *stringp, *destp;
864	u_int32_t *stack_base;
865	struct linux32_ps_strings *arginfo;
866
867	/*
868	 * Calculate string base and vector table pointers.
869	 * Also deal with signal trampoline code for this exec type.
870	 */
871	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
872	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
873	    roundup((ARG_MAX - imgp->args->stringspace),
874	    sizeof(char *));
875
876	/*
877	 * Install LINUX_PLATFORM
878	 */
879	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
880	    linux_szplatform);
881
882	/*
883	 * If we have a valid auxargs ptr, prepare some room
884	 * on the stack.
885	 */
886	if (imgp->auxargs) {
887		/*
888		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
889		 * lower compatibility.
890		 */
891		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
892		    (LINUX_AT_COUNT * 2);
893		/*
894		 * The '+ 2' is for the null pointers at the end of each of
895		 * the arg and env vector sets,and imgp->auxarg_size is room
896		 * for argument of Runtime loader.
897		 */
898		vectp = (u_int32_t *) (destp - (imgp->args->argc +
899		    imgp->args->envc + 2 + imgp->auxarg_size) *
900		    sizeof(u_int32_t));
901
902	} else
903		/*
904		 * The '+ 2' is for the null pointers at the end of each of
905		 * the arg and env vector sets
906		 */
907		vectp = (u_int32_t *)(destp - (imgp->args->argc +
908		    imgp->args->envc + 2) * sizeof(u_int32_t));
909
910	/*
911	 * vectp also becomes our initial stack base
912	 */
913	stack_base = vectp;
914
915	stringp = imgp->args->begin_argv;
916	argc = imgp->args->argc;
917	envc = imgp->args->envc;
918	/*
919	 * Copy out strings - arguments and environment.
920	 */
921	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
922
923	/*
924	 * Fill in "ps_strings" struct for ps, w, etc.
925	 */
926	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
927	suword32(&arginfo->ps_nargvstr, argc);
928
929	/*
930	 * Fill in argument portion of vector table.
931	 */
932	for (; argc > 0; --argc) {
933		suword32(vectp++, (uint32_t)(intptr_t)destp);
934		while (*stringp++ != 0)
935			destp++;
936		destp++;
937	}
938
939	/* a null vector table pointer separates the argp's from the envp's */
940	suword32(vectp++, 0);
941
942	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
943	suword32(&arginfo->ps_nenvstr, envc);
944
945	/*
946	 * Fill in environment portion of vector table.
947	 */
948	for (; envc > 0; --envc) {
949		suword32(vectp++, (uint32_t)(intptr_t)destp);
950		while (*stringp++ != 0)
951			destp++;
952		destp++;
953	}
954
955	/* end of vector table is a null pointer */
956	suword32(vectp, 0);
957
958	return ((register_t *)stack_base);
959}
960
961static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
962    "32-bit Linux emulation");
963
964static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
965SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
966    &linux32_maxdsiz, 0, "");
967static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
968SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
969    &linux32_maxssiz, 0, "");
970static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
971SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
972    &linux32_maxvmem, 0, "");
973
974static void
975linux32_fixlimit(struct rlimit *rl, int which)
976{
977
978	switch (which) {
979	case RLIMIT_DATA:
980		if (linux32_maxdsiz != 0) {
981			if (rl->rlim_cur > linux32_maxdsiz)
982				rl->rlim_cur = linux32_maxdsiz;
983			if (rl->rlim_max > linux32_maxdsiz)
984				rl->rlim_max = linux32_maxdsiz;
985		}
986		break;
987	case RLIMIT_STACK:
988		if (linux32_maxssiz != 0) {
989			if (rl->rlim_cur > linux32_maxssiz)
990				rl->rlim_cur = linux32_maxssiz;
991			if (rl->rlim_max > linux32_maxssiz)
992				rl->rlim_max = linux32_maxssiz;
993		}
994		break;
995	case RLIMIT_VMEM:
996		if (linux32_maxvmem != 0) {
997			if (rl->rlim_cur > linux32_maxvmem)
998				rl->rlim_cur = linux32_maxvmem;
999			if (rl->rlim_max > linux32_maxvmem)
1000				rl->rlim_max = linux32_maxvmem;
1001		}
1002		break;
1003	}
1004}
1005
1006struct sysentvec elf_linux_sysvec = {
1007	.sv_size	= LINUX_SYS_MAXSYSCALL,
1008	.sv_table	= linux_sysent,
1009	.sv_mask	= 0,
1010	.sv_sigsize	= LINUX_SIGTBLSZ,
1011	.sv_sigtbl	= bsd_to_linux_signal,
1012	.sv_errsize	= ELAST + 1,
1013	.sv_errtbl	= bsd_to_linux_errno,
1014	.sv_transtrap	= translate_traps,
1015	.sv_fixup	= elf_linux_fixup,
1016	.sv_sendsig	= linux_sendsig,
1017	.sv_sigcode	= linux_sigcode,
1018	.sv_szsigcode	= &linux_szsigcode,
1019	.sv_prepsyscall	= NULL,
1020	.sv_name	= "Linux ELF32",
1021	.sv_coredump	= elf32_coredump,
1022	.sv_imgact_try	= exec_linux_imgact_try,
1023	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1024	.sv_pagesize	= PAGE_SIZE,
1025	.sv_minuser	= VM_MIN_ADDRESS,
1026	.sv_maxuser	= LINUX32_MAXUSER,
1027	.sv_usrstack	= LINUX32_USRSTACK,
1028	.sv_psstrings	= LINUX32_PS_STRINGS,
1029	.sv_stackprot	= VM_PROT_ALL,
1030	.sv_copyout_strings = linux_copyout_strings,
1031	.sv_setregs	= exec_linux_setregs,
1032	.sv_fixlimit	= linux32_fixlimit,
1033	.sv_maxssiz	= &linux32_maxssiz,
1034	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1035	.sv_set_syscall_retval = cpu_set_syscall_retval,
1036	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1037	.sv_syscallnames = NULL,
1038	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1039	.sv_shared_page_len = PAGE_SIZE,
1040	.sv_schedtail	= linux_schedtail,
1041	.sv_thread_detach = linux_thread_detach,
1042};
1043INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1044
1045static char GNU_ABI_VENDOR[] = "GNU";
1046static int GNULINUX_ABI_DESC = 0;
1047
1048static boolean_t
1049linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1050{
1051	const Elf32_Word *desc;
1052	uintptr_t p;
1053
1054	p = (uintptr_t)(note + 1);
1055	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1056
1057	desc = (const Elf32_Word *)p;
1058	if (desc[0] != GNULINUX_ABI_DESC)
1059		return (FALSE);
1060
1061	/*
1062	 * For linux we encode osrel as follows (see linux_mib.c):
1063	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1064	 */
1065	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1066
1067	return (TRUE);
1068}
1069
1070static Elf_Brandnote linux32_brandnote = {
1071	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1072	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1073	.hdr.n_type	= 1,
1074	.vendor		= GNU_ABI_VENDOR,
1075	.flags		= BN_TRANSLATE_OSREL,
1076	.trans_osrel	= linux32_trans_osrel
1077};
1078
1079static Elf32_Brandinfo linux_brand = {
1080	.brand		= ELFOSABI_LINUX,
1081	.machine	= EM_386,
1082	.compat_3_brand	= "Linux",
1083	.emul_path	= "/compat/linux",
1084	.interp_path	= "/lib/ld-linux.so.1",
1085	.sysvec		= &elf_linux_sysvec,
1086	.interp_newpath	= NULL,
1087	.brand_note	= &linux32_brandnote,
1088	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1089};
1090
1091static Elf32_Brandinfo linux_glibc2brand = {
1092	.brand		= ELFOSABI_LINUX,
1093	.machine	= EM_386,
1094	.compat_3_brand	= "Linux",
1095	.emul_path	= "/compat/linux",
1096	.interp_path	= "/lib/ld-linux.so.2",
1097	.sysvec		= &elf_linux_sysvec,
1098	.interp_newpath	= NULL,
1099	.brand_note	= &linux32_brandnote,
1100	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1101};
1102
1103Elf32_Brandinfo *linux_brandlist[] = {
1104	&linux_brand,
1105	&linux_glibc2brand,
1106	NULL
1107};
1108
1109static int
1110linux_elf_modevent(module_t mod, int type, void *data)
1111{
1112	Elf32_Brandinfo **brandinfo;
1113	int error;
1114	struct linux_ioctl_handler **lihp;
1115	struct linux_device_handler **ldhp;
1116
1117	error = 0;
1118
1119	switch(type) {
1120	case MOD_LOAD:
1121		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122		     ++brandinfo)
1123			if (elf32_insert_brand_entry(*brandinfo) < 0)
1124				error = EINVAL;
1125		if (error == 0) {
1126			SET_FOREACH(lihp, linux_ioctl_handler_set)
1127				linux_ioctl_register_handler(*lihp);
1128			SET_FOREACH(ldhp, linux_device_handler_set)
1129				linux_device_register_handler(*ldhp);
1130			LIST_INIT(&futex_list);
1131			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1132			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1133			    linux_proc_exit, NULL, 1000);
1134			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1135			    linux_proc_exec, NULL, 1000);
1136			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1137			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1138			linux_szplatform = roundup(strlen(linux_platform) + 1,
1139			    sizeof(char *));
1140			linux_osd_jail_register();
1141			stclohz = (stathz ? stathz : hz);
1142			if (bootverbose)
1143				printf("Linux ELF exec handler installed\n");
1144		} else
1145			printf("cannot insert Linux ELF brand handler\n");
1146		break;
1147	case MOD_UNLOAD:
1148		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1149		     ++brandinfo)
1150			if (elf32_brand_inuse(*brandinfo))
1151				error = EBUSY;
1152		if (error == 0) {
1153			for (brandinfo = &linux_brandlist[0];
1154			     *brandinfo != NULL; ++brandinfo)
1155				if (elf32_remove_brand_entry(*brandinfo) < 0)
1156					error = EINVAL;
1157		}
1158		if (error == 0) {
1159			SET_FOREACH(lihp, linux_ioctl_handler_set)
1160				linux_ioctl_unregister_handler(*lihp);
1161			SET_FOREACH(ldhp, linux_device_handler_set)
1162				linux_device_unregister_handler(*ldhp);
1163			mtx_destroy(&futex_mtx);
1164			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1165			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1166			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1167			linux_osd_jail_deregister();
1168			if (bootverbose)
1169				printf("Linux ELF exec handler removed\n");
1170		} else
1171			printf("Could not deinstall ELF interpreter entry\n");
1172		break;
1173	default:
1174		return EOPNOTSUPP;
1175	}
1176	return error;
1177}
1178
1179static moduledata_t linux_elf_mod = {
1180	"linuxelf",
1181	linux_elf_modevent,
1182	0
1183};
1184
1185DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1186