linux32_sysvec.c revision 294905
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S�ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: releng/9.3/sys/amd64/linux32/linux32_sysvec.c 294905 2016-01-27 07:42:11Z delphij $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_mib.h>
82#include <compat/linux/linux_misc.h>
83#include <compat/linux/linux_signal.h>
84#include <compat/linux/linux_util.h>
85
86MODULE_VERSION(linux, 1);
87
88MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89
90#define	AUXARGS_ENTRY_32(pos, id, val)	\
91	do {				\
92		suword32(pos++, id);	\
93		suword32(pos++, val);	\
94	} while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC      0x2123 /* #! */
98#else
99#define SHELLMAGIC      0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define	LINUX_SYS_linux_rt_sendsig	0
109#define	LINUX_SYS_linux_sendsig		0
110
111const char *linux_platform = "i686";
112static int linux_szplatform;
113extern char linux_sigcode[];
114extern int linux_szsigcode;
115
116extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117
118SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120
121static int	elf_linux_fixup(register_t **stack_base,
122		    struct image_params *iparams);
123static register_t *linux_copyout_strings(struct image_params *imgp);
124static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
125static void	exec_linux_setregs(struct thread *td,
126				   struct image_params *imgp, u_long stack);
127static void	linux32_fixlimit(struct rlimit *rl, int which);
128static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
129
130static eventhandler_tag linux_exit_tag;
131static eventhandler_tag linux_exec_tag;
132
133/*
134 * Linux syscalls return negative errno's, we do positive and map them
135 * Reference:
136 *   FreeBSD: src/sys/sys/errno.h
137 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138 *            linux-2.6.17.8/include/asm-generic/errno.h
139 */
140static int bsd_to_linux_errno[ELAST + 1] = {
141	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150	 -72, -67, -71
151};
152
153int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162};
163
164int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172	SIGIO, SIGURG, SIGSYS
173};
174
175#define LINUX_T_UNKNOWN  255
176static int _bsd_to_linux_trapcode[] = {
177	LINUX_T_UNKNOWN,	/* 0 */
178	6,			/* 1  T_PRIVINFLT */
179	LINUX_T_UNKNOWN,	/* 2 */
180	3,			/* 3  T_BPTFLT */
181	LINUX_T_UNKNOWN,	/* 4 */
182	LINUX_T_UNKNOWN,	/* 5 */
183	16,			/* 6  T_ARITHTRAP */
184	254,			/* 7  T_ASTFLT */
185	LINUX_T_UNKNOWN,	/* 8 */
186	13,			/* 9  T_PROTFLT */
187	1,			/* 10 T_TRCTRAP */
188	LINUX_T_UNKNOWN,	/* 11 */
189	14,			/* 12 T_PAGEFLT */
190	LINUX_T_UNKNOWN,	/* 13 */
191	17,			/* 14 T_ALIGNFLT */
192	LINUX_T_UNKNOWN,	/* 15 */
193	LINUX_T_UNKNOWN,	/* 16 */
194	LINUX_T_UNKNOWN,	/* 17 */
195	0,			/* 18 T_DIVIDE */
196	2,			/* 19 T_NMI */
197	4,			/* 20 T_OFLOW */
198	5,			/* 21 T_BOUND */
199	7,			/* 22 T_DNA */
200	8,			/* 23 T_DOUBLEFLT */
201	9,			/* 24 T_FPOPFLT */
202	10,			/* 25 T_TSSFLT */
203	11,			/* 26 T_SEGNPFLT */
204	12,			/* 27 T_STKFLT */
205	18,			/* 28 T_MCHK */
206	19,			/* 29 T_XMMFLT */
207	15			/* 30 T_RESERVED */
208};
209#define bsd_to_linux_trapcode(code) \
210    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211     _bsd_to_linux_trapcode[(code)]: \
212     LINUX_T_UNKNOWN)
213
214struct linux32_ps_strings {
215	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216	u_int ps_nargvstr;	/* the number of argument strings */
217	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218	u_int ps_nenvstr;	/* the number of environment strings */
219};
220
221/*
222 * If FreeBSD & Linux have a difference of opinion about what a trap
223 * means, deal with it here.
224 *
225 * MPSAFE
226 */
227static int
228translate_traps(int signal, int trap_code)
229{
230	if (signal != SIGBUS)
231		return signal;
232	switch (trap_code) {
233	case T_PROTFLT:
234	case T_TSSFLT:
235	case T_DOUBLEFLT:
236	case T_PAGEFLT:
237		return SIGSEGV;
238	default:
239		return signal;
240	}
241}
242
243static int
244elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245{
246	Elf32_Auxargs *args;
247	Elf32_Addr *base;
248	Elf32_Addr *pos, *uplatform;
249	struct linux32_ps_strings *arginfo;
250	int issetugid;
251
252	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
253	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254
255	KASSERT(curthread->td_proc == imgp->proc,
256	    ("unsafe elf_linux_fixup(), should be curproc"));
257	base = (Elf32_Addr *)*stack_base;
258	args = (Elf32_Auxargs *)imgp->auxargs;
259	pos = base + (imgp->args->argc + imgp->args->envc + 2);
260
261	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
262	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
263
264	/*
265	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
266	 * as it has appeared in the 2.4.0-rc7 first time.
267	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
268	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
269	 * is not present.
270	 * Also see linux_times() implementation.
271	 */
272	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
273		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
274	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
275	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
276	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
277	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
278	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
279	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
280	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
281	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, issetugid);
282	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
283	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
284	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
285	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
286	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
287	if (args->execfd != -1)
288		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
289	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
290
291	free(imgp->auxargs, M_TEMP);
292	imgp->auxargs = NULL;
293
294	base--;
295	suword32(base, (uint32_t)imgp->args->argc);
296	*stack_base = (register_t *)base;
297	return 0;
298}
299
300extern unsigned long linux_sznonrtsigcode;
301
302static void
303linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
304{
305	struct thread *td = curthread;
306	struct proc *p = td->td_proc;
307	struct sigacts *psp;
308	struct trapframe *regs;
309	struct l_rt_sigframe *fp, frame;
310	int oonstack;
311	int sig;
312	int code;
313
314	sig = ksi->ksi_signo;
315	code = ksi->ksi_code;
316	PROC_LOCK_ASSERT(p, MA_OWNED);
317	psp = p->p_sigacts;
318	mtx_assert(&psp->ps_mtx, MA_OWNED);
319	regs = td->td_frame;
320	oonstack = sigonstack(regs->tf_rsp);
321
322#ifdef DEBUG
323	if (ldebug(rt_sendsig))
324		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
325		    catcher, sig, (void*)mask, code);
326#endif
327	/*
328	 * Allocate space for the signal handler context.
329	 */
330	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
331	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
332		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
333		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
334	} else
335		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
336	mtx_unlock(&psp->ps_mtx);
337
338	/*
339	 * Build the argument list for the signal handler.
340	 */
341	if (p->p_sysent->sv_sigtbl)
342		if (sig <= p->p_sysent->sv_sigsize)
343			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
344
345	bzero(&frame, sizeof(frame));
346
347	frame.sf_handler = PTROUT(catcher);
348	frame.sf_sig = sig;
349	frame.sf_siginfo = PTROUT(&fp->sf_si);
350	frame.sf_ucontext = PTROUT(&fp->sf_sc);
351
352	/* Fill in POSIX parts */
353	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
354
355	/*
356	 * Build the signal context to be used by sigreturn.
357	 */
358	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
359	frame.sf_sc.uc_link = 0;		/* XXX ??? */
360
361	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
362	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
363	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
364	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
365	PROC_UNLOCK(p);
366
367	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
368
369	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
370	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
371	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
372	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
373	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
374	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
375	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
376	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
377	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
378	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
379	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
380	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
381	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
382	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
383	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
384	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
385	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
386	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
387	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
388	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
389
390#ifdef DEBUG
391	if (ldebug(rt_sendsig))
392		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
393		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
394		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
395#endif
396
397	if (copyout(&frame, fp, sizeof(frame)) != 0) {
398		/*
399		 * Process has trashed its stack; give it an illegal
400		 * instruction to halt it in its tracks.
401		 */
402#ifdef DEBUG
403		if (ldebug(rt_sendsig))
404			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
405			    fp, oonstack);
406#endif
407		PROC_LOCK(p);
408		sigexit(td, SIGILL);
409	}
410
411	/*
412	 * Build context to run handler in.
413	 */
414	regs->tf_rsp = PTROUT(fp);
415	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
416	regs->tf_rflags &= ~(PSL_T | PSL_D);
417	regs->tf_cs = _ucode32sel;
418	regs->tf_ss = _udatasel;
419	regs->tf_ds = _udatasel;
420	regs->tf_es = _udatasel;
421	regs->tf_fs = _ufssel;
422	regs->tf_gs = _ugssel;
423	regs->tf_flags = TF_HASSEGS;
424	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
425	PROC_LOCK(p);
426	mtx_lock(&psp->ps_mtx);
427}
428
429
430/*
431 * Send an interrupt to process.
432 *
433 * Stack is set up to allow sigcode stored
434 * in u. to call routine, followed by kcall
435 * to sigreturn routine below.  After sigreturn
436 * resets the signal mask, the stack, and the
437 * frame pointer, it returns to the user
438 * specified pc, psl.
439 */
440static void
441linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
442{
443	struct thread *td = curthread;
444	struct proc *p = td->td_proc;
445	struct sigacts *psp;
446	struct trapframe *regs;
447	struct l_sigframe *fp, frame;
448	l_sigset_t lmask;
449	int oonstack, i;
450	int sig, code;
451
452	sig = ksi->ksi_signo;
453	code = ksi->ksi_code;
454	PROC_LOCK_ASSERT(p, MA_OWNED);
455	psp = p->p_sigacts;
456	mtx_assert(&psp->ps_mtx, MA_OWNED);
457	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
458		/* Signal handler installed with SA_SIGINFO. */
459		linux_rt_sendsig(catcher, ksi, mask);
460		return;
461	}
462
463	regs = td->td_frame;
464	oonstack = sigonstack(regs->tf_rsp);
465
466#ifdef DEBUG
467	if (ldebug(sendsig))
468		printf(ARGS(sendsig, "%p, %d, %p, %u"),
469		    catcher, sig, (void*)mask, code);
470#endif
471
472	/*
473	 * Allocate space for the signal handler context.
474	 */
475	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
476	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
477		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
478		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
479	} else
480		fp = (struct l_sigframe *)regs->tf_rsp - 1;
481	mtx_unlock(&psp->ps_mtx);
482	PROC_UNLOCK(p);
483
484	/*
485	 * Build the argument list for the signal handler.
486	 */
487	if (p->p_sysent->sv_sigtbl)
488		if (sig <= p->p_sysent->sv_sigsize)
489			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
490
491	bzero(&frame, sizeof(frame));
492
493	frame.sf_handler = PTROUT(catcher);
494	frame.sf_sig = sig;
495
496	bsd_to_linux_sigset(mask, &lmask);
497
498	/*
499	 * Build the signal context to be used by sigreturn.
500	 */
501	frame.sf_sc.sc_mask   = lmask.__bits[0];
502	frame.sf_sc.sc_gs     = regs->tf_gs;
503	frame.sf_sc.sc_fs     = regs->tf_fs;
504	frame.sf_sc.sc_es     = regs->tf_es;
505	frame.sf_sc.sc_ds     = regs->tf_ds;
506	frame.sf_sc.sc_edi    = regs->tf_rdi;
507	frame.sf_sc.sc_esi    = regs->tf_rsi;
508	frame.sf_sc.sc_ebp    = regs->tf_rbp;
509	frame.sf_sc.sc_ebx    = regs->tf_rbx;
510	frame.sf_sc.sc_edx    = regs->tf_rdx;
511	frame.sf_sc.sc_ecx    = regs->tf_rcx;
512	frame.sf_sc.sc_eax    = regs->tf_rax;
513	frame.sf_sc.sc_eip    = regs->tf_rip;
514	frame.sf_sc.sc_cs     = regs->tf_cs;
515	frame.sf_sc.sc_eflags = regs->tf_rflags;
516	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
517	frame.sf_sc.sc_ss     = regs->tf_ss;
518	frame.sf_sc.sc_err    = regs->tf_err;
519	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
520	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
521
522	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
523		frame.sf_extramask[i] = lmask.__bits[i+1];
524
525	if (copyout(&frame, fp, sizeof(frame)) != 0) {
526		/*
527		 * Process has trashed its stack; give it an illegal
528		 * instruction to halt it in its tracks.
529		 */
530		PROC_LOCK(p);
531		sigexit(td, SIGILL);
532	}
533
534	/*
535	 * Build context to run handler in.
536	 */
537	regs->tf_rsp = PTROUT(fp);
538	regs->tf_rip = p->p_sysent->sv_sigcode_base;
539	regs->tf_rflags &= ~(PSL_T | PSL_D);
540	regs->tf_cs = _ucode32sel;
541	regs->tf_ss = _udatasel;
542	regs->tf_ds = _udatasel;
543	regs->tf_es = _udatasel;
544	regs->tf_fs = _ufssel;
545	regs->tf_gs = _ugssel;
546	regs->tf_flags = TF_HASSEGS;
547	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
548	PROC_LOCK(p);
549	mtx_lock(&psp->ps_mtx);
550}
551
552/*
553 * System call to cleanup state after a signal
554 * has been taken.  Reset signal mask and
555 * stack state from context left by sendsig (above).
556 * Return to previous pc and psl as specified by
557 * context left by sendsig. Check carefully to
558 * make sure that the user has not modified the
559 * psl to gain improper privileges or to cause
560 * a machine fault.
561 */
562int
563linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
564{
565	struct l_sigframe frame;
566	struct trapframe *regs;
567	sigset_t bmask;
568	l_sigset_t lmask;
569	int eflags, i;
570	ksiginfo_t ksi;
571
572	regs = td->td_frame;
573
574#ifdef DEBUG
575	if (ldebug(sigreturn))
576		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
577#endif
578	/*
579	 * The trampoline code hands us the sigframe.
580	 * It is unsafe to keep track of it ourselves, in the event that a
581	 * program jumps out of a signal handler.
582	 */
583	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
584		return (EFAULT);
585
586	/*
587	 * Check for security violations.
588	 */
589#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
590	eflags = frame.sf_sc.sc_eflags;
591	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
592		return(EINVAL);
593
594	/*
595	 * Don't allow users to load a valid privileged %cs.  Let the
596	 * hardware check for invalid selectors, excess privilege in
597	 * other selectors, invalid %eip's and invalid %esp's.
598	 */
599#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
600	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
601		ksiginfo_init_trap(&ksi);
602		ksi.ksi_signo = SIGBUS;
603		ksi.ksi_code = BUS_OBJERR;
604		ksi.ksi_trapno = T_PROTFLT;
605		ksi.ksi_addr = (void *)regs->tf_rip;
606		trapsignal(td, &ksi);
607		return(EINVAL);
608	}
609
610	lmask.__bits[0] = frame.sf_sc.sc_mask;
611	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
612		lmask.__bits[i+1] = frame.sf_extramask[i];
613	linux_to_bsd_sigset(&lmask, &bmask);
614	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
615
616	/*
617	 * Restore signal context.
618	 */
619	regs->tf_rdi    = frame.sf_sc.sc_edi;
620	regs->tf_rsi    = frame.sf_sc.sc_esi;
621	regs->tf_rbp    = frame.sf_sc.sc_ebp;
622	regs->tf_rbx    = frame.sf_sc.sc_ebx;
623	regs->tf_rdx    = frame.sf_sc.sc_edx;
624	regs->tf_rcx    = frame.sf_sc.sc_ecx;
625	regs->tf_rax    = frame.sf_sc.sc_eax;
626	regs->tf_rip    = frame.sf_sc.sc_eip;
627	regs->tf_cs     = frame.sf_sc.sc_cs;
628	regs->tf_ds     = frame.sf_sc.sc_ds;
629	regs->tf_es     = frame.sf_sc.sc_es;
630	regs->tf_fs     = frame.sf_sc.sc_fs;
631	regs->tf_gs     = frame.sf_sc.sc_gs;
632	regs->tf_rflags = eflags;
633	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
634	regs->tf_ss     = frame.sf_sc.sc_ss;
635	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
636
637	return (EJUSTRETURN);
638}
639
640/*
641 * System call to cleanup state after a signal
642 * has been taken.  Reset signal mask and
643 * stack state from context left by rt_sendsig (above).
644 * Return to previous pc and psl as specified by
645 * context left by sendsig. Check carefully to
646 * make sure that the user has not modified the
647 * psl to gain improper privileges or to cause
648 * a machine fault.
649 */
650int
651linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
652{
653	struct l_ucontext uc;
654	struct l_sigcontext *context;
655	sigset_t bmask;
656	l_stack_t *lss;
657	stack_t ss;
658	struct trapframe *regs;
659	int eflags;
660	ksiginfo_t ksi;
661
662	regs = td->td_frame;
663
664#ifdef DEBUG
665	if (ldebug(rt_sigreturn))
666		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
667#endif
668	/*
669	 * The trampoline code hands us the ucontext.
670	 * It is unsafe to keep track of it ourselves, in the event that a
671	 * program jumps out of a signal handler.
672	 */
673	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
674		return (EFAULT);
675
676	context = &uc.uc_mcontext;
677
678	/*
679	 * Check for security violations.
680	 */
681#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
682	eflags = context->sc_eflags;
683	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
684		return(EINVAL);
685
686	/*
687	 * Don't allow users to load a valid privileged %cs.  Let the
688	 * hardware check for invalid selectors, excess privilege in
689	 * other selectors, invalid %eip's and invalid %esp's.
690	 */
691#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
692	if (!CS_SECURE(context->sc_cs)) {
693		ksiginfo_init_trap(&ksi);
694		ksi.ksi_signo = SIGBUS;
695		ksi.ksi_code = BUS_OBJERR;
696		ksi.ksi_trapno = T_PROTFLT;
697		ksi.ksi_addr = (void *)regs->tf_rip;
698		trapsignal(td, &ksi);
699		return(EINVAL);
700	}
701
702	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
703	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
704
705	/*
706	 * Restore signal context
707	 */
708	regs->tf_gs	= context->sc_gs;
709	regs->tf_fs	= context->sc_fs;
710	regs->tf_es	= context->sc_es;
711	regs->tf_ds	= context->sc_ds;
712	regs->tf_rdi    = context->sc_edi;
713	regs->tf_rsi    = context->sc_esi;
714	regs->tf_rbp    = context->sc_ebp;
715	regs->tf_rbx    = context->sc_ebx;
716	regs->tf_rdx    = context->sc_edx;
717	regs->tf_rcx    = context->sc_ecx;
718	regs->tf_rax    = context->sc_eax;
719	regs->tf_rip    = context->sc_eip;
720	regs->tf_cs     = context->sc_cs;
721	regs->tf_rflags = eflags;
722	regs->tf_rsp    = context->sc_esp_at_signal;
723	regs->tf_ss     = context->sc_ss;
724	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
725
726	/*
727	 * call sigaltstack & ignore results..
728	 */
729	lss = &uc.uc_stack;
730	ss.ss_sp = PTRIN(lss->ss_sp);
731	ss.ss_size = lss->ss_size;
732	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
733
734#ifdef DEBUG
735	if (ldebug(rt_sigreturn))
736		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
737		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
738#endif
739	(void)kern_sigaltstack(td, &ss, NULL);
740
741	return (EJUSTRETURN);
742}
743
744static int
745linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
746{
747	struct proc *p;
748	struct trapframe *frame;
749
750	p = td->td_proc;
751	frame = td->td_frame;
752
753	sa->args[0] = frame->tf_rbx;
754	sa->args[1] = frame->tf_rcx;
755	sa->args[2] = frame->tf_rdx;
756	sa->args[3] = frame->tf_rsi;
757	sa->args[4] = frame->tf_rdi;
758	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
759	sa->code = frame->tf_rax;
760
761	if (sa->code >= p->p_sysent->sv_size)
762		sa->callp = &p->p_sysent->sv_table[0];
763	else
764		sa->callp = &p->p_sysent->sv_table[sa->code];
765	sa->narg = sa->callp->sy_narg;
766
767	td->td_retval[0] = 0;
768	td->td_retval[1] = frame->tf_rdx;
769
770	return (0);
771}
772
773/*
774 * If a linux binary is exec'ing something, try this image activator
775 * first.  We override standard shell script execution in order to
776 * be able to modify the interpreter path.  We only do this if a linux
777 * binary is doing the exec, so we do not create an EXEC module for it.
778 */
779static int	exec_linux_imgact_try(struct image_params *iparams);
780
781static int
782exec_linux_imgact_try(struct image_params *imgp)
783{
784	const char *head = (const char *)imgp->image_header;
785	char *rpath;
786	int error = -1;
787
788	/*
789	* The interpreter for shell scripts run from a linux binary needs
790	* to be located in /compat/linux if possible in order to recursively
791	* maintain linux path emulation.
792	*/
793	if (((const short *)head)[0] == SHELLMAGIC) {
794		/*
795		* Run our normal shell image activator.  If it succeeds attempt
796		* to use the alternate path for the interpreter.  If an
797		* alternate * path is found, use our stringspace to store it.
798		*/
799		if ((error = exec_shell_imgact(imgp)) == 0) {
800			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
801			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
802			    AT_FDCWD);
803			if (rpath != NULL)
804				imgp->args->fname_buf =
805				    imgp->interpreter_name = rpath;
806		}
807	}
808	return (error);
809}
810
811/*
812 * Clear registers on exec
813 * XXX copied from ia32_signal.c.
814 */
815static void
816exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
817{
818	struct trapframe *regs = td->td_frame;
819	struct pcb *pcb = td->td_pcb;
820
821	mtx_lock(&dt_lock);
822	if (td->td_proc->p_md.md_ldt != NULL)
823		user_ldt_free(td);
824	else
825		mtx_unlock(&dt_lock);
826
827	critical_enter();
828	wrmsr(MSR_FSBASE, 0);
829	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
830	pcb->pcb_fsbase = 0;
831	pcb->pcb_gsbase = 0;
832	critical_exit();
833	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
834
835	bzero((char *)regs, sizeof(struct trapframe));
836	regs->tf_rip = imgp->entry_addr;
837	regs->tf_rsp = stack;
838	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
839	regs->tf_gs = _ugssel;
840	regs->tf_fs = _ufssel;
841	regs->tf_es = _udatasel;
842	regs->tf_ds = _udatasel;
843	regs->tf_ss = _udatasel;
844	regs->tf_flags = TF_HASSEGS;
845	regs->tf_cs = _ucode32sel;
846	regs->tf_rbx = imgp->ps_strings;
847
848	fpstate_drop(td);
849
850	/* Do full restore on return so that we can change to a different %cs */
851	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
852	td->td_retval[1] = 0;
853}
854
855/*
856 * XXX copied from ia32_sysvec.c.
857 */
858static register_t *
859linux_copyout_strings(struct image_params *imgp)
860{
861	int argc, envc;
862	u_int32_t *vectp;
863	char *stringp, *destp;
864	u_int32_t *stack_base;
865	struct linux32_ps_strings *arginfo;
866
867	/*
868	 * Calculate string base and vector table pointers.
869	 * Also deal with signal trampoline code for this exec type.
870	 */
871	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
872	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
873	    roundup((ARG_MAX - imgp->args->stringspace),
874	    sizeof(char *));
875
876	/*
877	 * Install LINUX_PLATFORM
878	 */
879	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
880	    linux_szplatform);
881
882	/*
883	 * If we have a valid auxargs ptr, prepare some room
884	 * on the stack.
885	 */
886	if (imgp->auxargs) {
887		/*
888		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
889		 * lower compatibility.
890		 */
891		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
892		    (LINUX_AT_COUNT * 2);
893		/*
894		 * The '+ 2' is for the null pointers at the end of each of
895		 * the arg and env vector sets,and imgp->auxarg_size is room
896		 * for argument of Runtime loader.
897		 */
898		vectp = (u_int32_t *) (destp - (imgp->args->argc +
899		    imgp->args->envc + 2 + imgp->auxarg_size) *
900		    sizeof(u_int32_t));
901
902	} else
903		/*
904		 * The '+ 2' is for the null pointers at the end of each of
905		 * the arg and env vector sets
906		 */
907		vectp = (u_int32_t *)(destp - (imgp->args->argc +
908		    imgp->args->envc + 2) * sizeof(u_int32_t));
909
910	/*
911	 * vectp also becomes our initial stack base
912	 */
913	stack_base = vectp;
914
915	stringp = imgp->args->begin_argv;
916	argc = imgp->args->argc;
917	envc = imgp->args->envc;
918	/*
919	 * Copy out strings - arguments and environment.
920	 */
921	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
922
923	/*
924	 * Fill in "ps_strings" struct for ps, w, etc.
925	 */
926	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
927	suword32(&arginfo->ps_nargvstr, argc);
928
929	/*
930	 * Fill in argument portion of vector table.
931	 */
932	for (; argc > 0; --argc) {
933		suword32(vectp++, (uint32_t)(intptr_t)destp);
934		while (*stringp++ != 0)
935			destp++;
936		destp++;
937	}
938
939	/* a null vector table pointer separates the argp's from the envp's */
940	suword32(vectp++, 0);
941
942	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
943	suword32(&arginfo->ps_nenvstr, envc);
944
945	/*
946	 * Fill in environment portion of vector table.
947	 */
948	for (; envc > 0; --envc) {
949		suword32(vectp++, (uint32_t)(intptr_t)destp);
950		while (*stringp++ != 0)
951			destp++;
952		destp++;
953	}
954
955	/* end of vector table is a null pointer */
956	suword32(vectp, 0);
957
958	return ((register_t *)stack_base);
959}
960
961static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
962    "32-bit Linux emulation");
963
964static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
965SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
966    &linux32_maxdsiz, 0, "");
967static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
968SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
969    &linux32_maxssiz, 0, "");
970static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
971SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
972    &linux32_maxvmem, 0, "");
973
974static void
975linux32_fixlimit(struct rlimit *rl, int which)
976{
977
978	switch (which) {
979	case RLIMIT_DATA:
980		if (linux32_maxdsiz != 0) {
981			if (rl->rlim_cur > linux32_maxdsiz)
982				rl->rlim_cur = linux32_maxdsiz;
983			if (rl->rlim_max > linux32_maxdsiz)
984				rl->rlim_max = linux32_maxdsiz;
985		}
986		break;
987	case RLIMIT_STACK:
988		if (linux32_maxssiz != 0) {
989			if (rl->rlim_cur > linux32_maxssiz)
990				rl->rlim_cur = linux32_maxssiz;
991			if (rl->rlim_max > linux32_maxssiz)
992				rl->rlim_max = linux32_maxssiz;
993		}
994		break;
995	case RLIMIT_VMEM:
996		if (linux32_maxvmem != 0) {
997			if (rl->rlim_cur > linux32_maxvmem)
998				rl->rlim_cur = linux32_maxvmem;
999			if (rl->rlim_max > linux32_maxvmem)
1000				rl->rlim_max = linux32_maxvmem;
1001		}
1002		break;
1003	}
1004}
1005
1006struct sysentvec elf_linux_sysvec = {
1007	.sv_size	= LINUX_SYS_MAXSYSCALL,
1008	.sv_table	= linux_sysent,
1009	.sv_mask	= 0,
1010	.sv_sigsize	= LINUX_SIGTBLSZ,
1011	.sv_sigtbl	= bsd_to_linux_signal,
1012	.sv_errsize	= ELAST + 1,
1013	.sv_errtbl	= bsd_to_linux_errno,
1014	.sv_transtrap	= translate_traps,
1015	.sv_fixup	= elf_linux_fixup,
1016	.sv_sendsig	= linux_sendsig,
1017	.sv_sigcode	= linux_sigcode,
1018	.sv_szsigcode	= &linux_szsigcode,
1019	.sv_prepsyscall	= NULL,
1020	.sv_name	= "Linux ELF32",
1021	.sv_coredump	= elf32_coredump,
1022	.sv_imgact_try	= exec_linux_imgact_try,
1023	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1024	.sv_pagesize	= PAGE_SIZE,
1025	.sv_minuser	= VM_MIN_ADDRESS,
1026	.sv_maxuser	= LINUX32_MAXUSER,
1027	.sv_usrstack	= LINUX32_USRSTACK,
1028	.sv_psstrings	= LINUX32_PS_STRINGS,
1029	.sv_stackprot	= VM_PROT_ALL,
1030	.sv_copyout_strings = linux_copyout_strings,
1031	.sv_setregs	= exec_linux_setregs,
1032	.sv_fixlimit	= linux32_fixlimit,
1033	.sv_maxssiz	= &linux32_maxssiz,
1034	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1035	.sv_set_syscall_retval = cpu_set_syscall_retval,
1036	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1037	.sv_syscallnames = NULL,
1038	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1039	.sv_shared_page_len = PAGE_SIZE,
1040	.sv_schedtail	= linux_schedtail,
1041};
1042INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1043
1044static char GNU_ABI_VENDOR[] = "GNU";
1045static int GNULINUX_ABI_DESC = 0;
1046
1047static boolean_t
1048linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1049{
1050	const Elf32_Word *desc;
1051	uintptr_t p;
1052
1053	p = (uintptr_t)(note + 1);
1054	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1055
1056	desc = (const Elf32_Word *)p;
1057	if (desc[0] != GNULINUX_ABI_DESC)
1058		return (FALSE);
1059
1060	/*
1061	 * For linux we encode osrel as follows (see linux_mib.c):
1062	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1063	 */
1064	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1065
1066	return (TRUE);
1067}
1068
1069static Elf_Brandnote linux32_brandnote = {
1070	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1071	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1072	.hdr.n_type	= 1,
1073	.vendor		= GNU_ABI_VENDOR,
1074	.flags		= BN_TRANSLATE_OSREL,
1075	.trans_osrel	= linux32_trans_osrel
1076};
1077
1078static Elf32_Brandinfo linux_brand = {
1079	.brand		= ELFOSABI_LINUX,
1080	.machine	= EM_386,
1081	.compat_3_brand	= "Linux",
1082	.emul_path	= "/compat/linux",
1083	.interp_path	= "/lib/ld-linux.so.1",
1084	.sysvec		= &elf_linux_sysvec,
1085	.interp_newpath	= NULL,
1086	.brand_note	= &linux32_brandnote,
1087	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1088};
1089
1090static Elf32_Brandinfo linux_glibc2brand = {
1091	.brand		= ELFOSABI_LINUX,
1092	.machine	= EM_386,
1093	.compat_3_brand	= "Linux",
1094	.emul_path	= "/compat/linux",
1095	.interp_path	= "/lib/ld-linux.so.2",
1096	.sysvec		= &elf_linux_sysvec,
1097	.interp_newpath	= NULL,
1098	.brand_note	= &linux32_brandnote,
1099	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1100};
1101
1102Elf32_Brandinfo *linux_brandlist[] = {
1103	&linux_brand,
1104	&linux_glibc2brand,
1105	NULL
1106};
1107
1108static int
1109linux_elf_modevent(module_t mod, int type, void *data)
1110{
1111	Elf32_Brandinfo **brandinfo;
1112	int error;
1113	struct linux_ioctl_handler **lihp;
1114	struct linux_device_handler **ldhp;
1115
1116	error = 0;
1117
1118	switch(type) {
1119	case MOD_LOAD:
1120		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1121		     ++brandinfo)
1122			if (elf32_insert_brand_entry(*brandinfo) < 0)
1123				error = EINVAL;
1124		if (error == 0) {
1125			SET_FOREACH(lihp, linux_ioctl_handler_set)
1126				linux_ioctl_register_handler(*lihp);
1127			SET_FOREACH(ldhp, linux_device_handler_set)
1128				linux_device_register_handler(*ldhp);
1129			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1130			sx_init(&emul_shared_lock, "emuldata->shared lock");
1131			LIST_INIT(&futex_list);
1132			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1133			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1134			    linux_proc_exit, NULL, 1000);
1135			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1136			    linux_proc_exec, NULL, 1000);
1137			linux_szplatform = roundup(strlen(linux_platform) + 1,
1138			    sizeof(char *));
1139			linux_osd_jail_register();
1140			stclohz = (stathz ? stathz : hz);
1141			if (bootverbose)
1142				printf("Linux ELF exec handler installed\n");
1143		} else
1144			printf("cannot insert Linux ELF brand handler\n");
1145		break;
1146	case MOD_UNLOAD:
1147		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1148		     ++brandinfo)
1149			if (elf32_brand_inuse(*brandinfo))
1150				error = EBUSY;
1151		if (error == 0) {
1152			for (brandinfo = &linux_brandlist[0];
1153			     *brandinfo != NULL; ++brandinfo)
1154				if (elf32_remove_brand_entry(*brandinfo) < 0)
1155					error = EINVAL;
1156		}
1157		if (error == 0) {
1158			SET_FOREACH(lihp, linux_ioctl_handler_set)
1159				linux_ioctl_unregister_handler(*lihp);
1160			SET_FOREACH(ldhp, linux_device_handler_set)
1161				linux_device_unregister_handler(*ldhp);
1162			mtx_destroy(&emul_lock);
1163			sx_destroy(&emul_shared_lock);
1164			mtx_destroy(&futex_mtx);
1165			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1166			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1167			linux_osd_jail_deregister();
1168			if (bootverbose)
1169				printf("Linux ELF exec handler removed\n");
1170		} else
1171			printf("Could not deinstall ELF interpreter entry\n");
1172		break;
1173	default:
1174		return EOPNOTSUPP;
1175	}
1176	return error;
1177}
1178
1179static moduledata_t linux_elf_mod = {
1180	"linuxelf",
1181	linux_elf_modevent,
1182	0
1183};
1184
1185DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1186