linux_sysvec.c revision 293540
1/*-
2 * Copyright (c) 1994-1996 S��ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/i386/linux/linux_sysvec.c 293540 2016-01-09 16:29:51Z dchagin $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysctl.h>
48#include <sys/sysent.h>
49#include <sys/sysproto.h>
50#include <sys/vnode.h>
51#include <sys/eventhandler.h>
52
53#include <vm/vm.h>
54#include <vm/pmap.h>
55#include <vm/vm_extern.h>
56#include <vm/vm_map.h>
57#include <vm/vm_object.h>
58#include <vm/vm_page.h>
59#include <vm/vm_param.h>
60
61#include <machine/cpu.h>
62#include <machine/cputypes.h>
63#include <machine/md_var.h>
64#include <machine/pcb.h>
65
66#include <i386/linux/linux.h>
67#include <i386/linux/linux_proto.h>
68#include <compat/linux/linux_emul.h>
69#include <compat/linux/linux_futex.h>
70#include <compat/linux/linux_ioctl.h>
71#include <compat/linux/linux_mib.h>
72#include <compat/linux/linux_misc.h>
73#include <compat/linux/linux_signal.h>
74#include <compat/linux/linux_util.h>
75#include <compat/linux/linux_vdso.h>
76
77MODULE_VERSION(linux, 1);
78
79#if BYTE_ORDER == LITTLE_ENDIAN
80#define SHELLMAGIC      0x2123 /* #! */
81#else
82#define SHELLMAGIC      0x2321
83#endif
84
85#if defined(DEBUG)
86SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
87            CTLTYPE_STRING | CTLFLAG_RW,
88            0, 0, linux_sysctl_debug, "A",
89            "Linux debugging control");
90#endif
91
92/*
93 * Allow the sendsig functions to use the ldebug() facility
94 * even though they are not syscalls themselves. Map them
95 * to syscall 0. This is slightly less bogus than using
96 * ldebug(sigreturn).
97 */
98#define	LINUX_SYS_linux_rt_sendsig	0
99#define	LINUX_SYS_linux_sendsig		0
100
101#define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
102
103static int linux_szsigcode;
104static vm_object_t linux_shared_page_obj;
105static char *linux_shared_page_mapping;
106extern char _binary_linux_locore_o_start;
107extern char _binary_linux_locore_o_end;
108
109extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
110
111SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
112
113static int	linux_fixup(register_t **stack_base,
114		    struct image_params *iparams);
115static int	elf_linux_fixup(register_t **stack_base,
116		    struct image_params *iparams);
117static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
118static void	exec_linux_setregs(struct thread *td,
119		    struct image_params *imgp, u_long stack);
120static register_t *linux_copyout_strings(struct image_params *imgp);
121static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
122static void	linux_vdso_install(void *param);
123static void	linux_vdso_deinstall(void *param);
124
125static int linux_szplatform;
126const char *linux_kplatform;
127
128static eventhandler_tag linux_exit_tag;
129static eventhandler_tag linux_exec_tag;
130static eventhandler_tag linux_thread_dtor_tag;
131
132/*
133 * Linux syscalls return negative errno's, we do positive and map them
134 * Reference:
135 *   FreeBSD: src/sys/sys/errno.h
136 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
137 *            linux-2.6.17.8/include/asm-generic/errno.h
138 */
139static int bsd_to_linux_errno[ELAST + 1] = {
140	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
141	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
142	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
143	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
144	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
145	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
146	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
147	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
148	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
149	 -72, -67, -71
150};
151
152int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
153	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
154	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
155	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
156	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
157	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
158	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
159	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
160	0, LINUX_SIGUSR1, LINUX_SIGUSR2
161};
162
163int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
164	SIGHUP, SIGINT, SIGQUIT, SIGILL,
165	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
166	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
167	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
168	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
169	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
170	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
171	SIGIO, SIGURG, SIGSYS
172};
173
174#define LINUX_T_UNKNOWN  255
175static int _bsd_to_linux_trapcode[] = {
176	LINUX_T_UNKNOWN,	/* 0 */
177	6,			/* 1  T_PRIVINFLT */
178	LINUX_T_UNKNOWN,	/* 2 */
179	3,			/* 3  T_BPTFLT */
180	LINUX_T_UNKNOWN,	/* 4 */
181	LINUX_T_UNKNOWN,	/* 5 */
182	16,			/* 6  T_ARITHTRAP */
183	254,			/* 7  T_ASTFLT */
184	LINUX_T_UNKNOWN,	/* 8 */
185	13,			/* 9  T_PROTFLT */
186	1,			/* 10 T_TRCTRAP */
187	LINUX_T_UNKNOWN,	/* 11 */
188	14,			/* 12 T_PAGEFLT */
189	LINUX_T_UNKNOWN,	/* 13 */
190	17,			/* 14 T_ALIGNFLT */
191	LINUX_T_UNKNOWN,	/* 15 */
192	LINUX_T_UNKNOWN,	/* 16 */
193	LINUX_T_UNKNOWN,	/* 17 */
194	0,			/* 18 T_DIVIDE */
195	2,			/* 19 T_NMI */
196	4,			/* 20 T_OFLOW */
197	5,			/* 21 T_BOUND */
198	7,			/* 22 T_DNA */
199	8,			/* 23 T_DOUBLEFLT */
200	9,			/* 24 T_FPOPFLT */
201	10,			/* 25 T_TSSFLT */
202	11,			/* 26 T_SEGNPFLT */
203	12,			/* 27 T_STKFLT */
204	18,			/* 28 T_MCHK */
205	19,			/* 29 T_XMMFLT */
206	15			/* 30 T_RESERVED */
207};
208#define bsd_to_linux_trapcode(code) \
209    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
210     _bsd_to_linux_trapcode[(code)]: \
211     LINUX_T_UNKNOWN)
212
213LINUX_VDSO_SYM_INTPTR(linux_sigcode);
214LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
215LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
216
217/*
218 * If FreeBSD & Linux have a difference of opinion about what a trap
219 * means, deal with it here.
220 *
221 * MPSAFE
222 */
223static int
224translate_traps(int signal, int trap_code)
225{
226	if (signal != SIGBUS)
227		return (signal);
228	switch (trap_code) {
229	case T_PROTFLT:
230	case T_TSSFLT:
231	case T_DOUBLEFLT:
232	case T_PAGEFLT:
233		return (SIGSEGV);
234	default:
235		return (signal);
236	}
237}
238
239static int
240linux_fixup(register_t **stack_base, struct image_params *imgp)
241{
242	register_t *argv, *envp;
243
244	argv = *stack_base;
245	envp = *stack_base + (imgp->args->argc + 1);
246	(*stack_base)--;
247	suword(*stack_base, (intptr_t)(void *)envp);
248	(*stack_base)--;
249	suword(*stack_base, (intptr_t)(void *)argv);
250	(*stack_base)--;
251	suword(*stack_base, imgp->args->argc);
252	return (0);
253}
254
255static int
256elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257{
258	struct proc *p;
259	Elf32_Auxargs *args;
260	Elf32_Addr *uplatform;
261	struct ps_strings *arginfo;
262	register_t *pos;
263
264	KASSERT(curthread->td_proc == imgp->proc,
265	    ("unsafe elf_linux_fixup(), should be curproc"));
266
267	p = imgp->proc;
268	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
269	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
270	args = (Elf32_Auxargs *)imgp->auxargs;
271	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
272
273	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
274	    imgp->proc->p_sysent->sv_shared_page_base);
275	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
276	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
277
278	/*
279	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
280	 * as it has appeared in the 2.4.0-rc7 first time.
281	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
282	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
283	 * is not present.
284	 * Also see linux_times() implementation.
285	 */
286	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
287		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
288	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
289	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
290	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
291	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
292	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
293	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
294	AUXARGS_ENTRY(pos, AT_BASE, args->base);
295	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
296	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
297	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
298	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
299	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
300	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
301	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
302	if (imgp->execpathp != 0)
303		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
304	if (args->execfd != -1)
305		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
306	AUXARGS_ENTRY(pos, AT_NULL, 0);
307
308	free(imgp->auxargs, M_TEMP);
309	imgp->auxargs = NULL;
310
311	(*stack_base)--;
312	suword(*stack_base, (register_t)imgp->args->argc);
313	return (0);
314}
315
316/*
317 * Copied from kern/kern_exec.c
318 */
319static register_t *
320linux_copyout_strings(struct image_params *imgp)
321{
322	int argc, envc;
323	char **vectp;
324	char *stringp, *destp;
325	register_t *stack_base;
326	struct ps_strings *arginfo;
327	char canary[LINUX_AT_RANDOM_LEN];
328	size_t execpath_len;
329	struct proc *p;
330
331	/*
332	 * Calculate string base and vector table pointers.
333	 */
334	p = imgp->proc;
335	if (imgp->execpath != NULL && imgp->auxargs != NULL)
336		execpath_len = strlen(imgp->execpath) + 1;
337	else
338		execpath_len = 0;
339	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
340	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
341	    roundup(sizeof(canary), sizeof(char *)) -
342	    roundup(execpath_len, sizeof(char *)) -
343	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
344
345	/*
346	 * install LINUX_PLATFORM
347	 */
348	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
349	    linux_szplatform);
350
351	if (execpath_len != 0) {
352		imgp->execpathp = (uintptr_t)arginfo -
353		linux_szplatform - execpath_len;
354		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
355	}
356
357	/*
358	 * Prepare the canary for SSP.
359	 */
360	arc4rand(canary, sizeof(canary), 0);
361	imgp->canary = (uintptr_t)arginfo - linux_szplatform -
362	    roundup(execpath_len, sizeof(char *)) -
363	    roundup(sizeof(canary), sizeof(char *));
364	copyout(canary, (void *)imgp->canary, sizeof(canary));
365
366	/*
367	 * If we have a valid auxargs ptr, prepare some room
368	 * on the stack.
369	 */
370	if (imgp->auxargs) {
371		/*
372		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
373		 * lower compatibility.
374		 */
375		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
376		    (LINUX_AT_COUNT * 2);
377		/*
378		 * The '+ 2' is for the null pointers at the end of each of
379		 * the arg and env vector sets,and imgp->auxarg_size is room
380		 * for argument of Runtime loader.
381		 */
382		vectp = (char **)(destp - (imgp->args->argc +
383		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
384	} else {
385		/*
386		 * The '+ 2' is for the null pointers at the end of each of
387		 * the arg and env vector sets
388		 */
389		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
390		    sizeof(char *));
391	}
392
393	/*
394	 * vectp also becomes our initial stack base
395	 */
396	stack_base = (register_t *)vectp;
397
398	stringp = imgp->args->begin_argv;
399	argc = imgp->args->argc;
400	envc = imgp->args->envc;
401
402	/*
403	 * Copy out strings - arguments and environment.
404	 */
405	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
406
407	/*
408	 * Fill in "ps_strings" struct for ps, w, etc.
409	 */
410	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
411	suword(&arginfo->ps_nargvstr, argc);
412
413	/*
414	 * Fill in argument portion of vector table.
415	 */
416	for (; argc > 0; --argc) {
417		suword(vectp++, (long)(intptr_t)destp);
418		while (*stringp++ != 0)
419			destp++;
420		destp++;
421	}
422
423	/* a null vector table pointer separates the argp's from the envp's */
424	suword(vectp++, 0);
425
426	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
427	suword(&arginfo->ps_nenvstr, envc);
428
429	/*
430	 * Fill in environment portion of vector table.
431	 */
432	for (; envc > 0; --envc) {
433		suword(vectp++, (long)(intptr_t)destp);
434		while (*stringp++ != 0)
435			destp++;
436		destp++;
437	}
438
439	/* end of vector table is a null pointer */
440	suword(vectp, 0);
441
442	return (stack_base);
443}
444
445static void
446linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
447{
448	struct thread *td = curthread;
449	struct proc *p = td->td_proc;
450	struct sigacts *psp;
451	struct trapframe *regs;
452	struct l_rt_sigframe *fp, frame;
453	int sig, code;
454	int oonstack;
455
456	sig = ksi->ksi_signo;
457	code = ksi->ksi_code;
458	PROC_LOCK_ASSERT(p, MA_OWNED);
459	psp = p->p_sigacts;
460	mtx_assert(&psp->ps_mtx, MA_OWNED);
461	regs = td->td_frame;
462	oonstack = sigonstack(regs->tf_esp);
463
464#ifdef DEBUG
465	if (ldebug(rt_sendsig))
466		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
467		    catcher, sig, (void*)mask, code);
468#endif
469	/*
470	 * Allocate space for the signal handler context.
471	 */
472	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
473	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
474		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
475		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
476	} else
477		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
478	mtx_unlock(&psp->ps_mtx);
479
480	/*
481	 * Build the argument list for the signal handler.
482	 */
483	sig = BSD_TO_LINUX_SIGNAL(sig);
484
485	bzero(&frame, sizeof(frame));
486
487	frame.sf_handler = catcher;
488	frame.sf_sig = sig;
489	frame.sf_siginfo = &fp->sf_si;
490	frame.sf_ucontext = &fp->sf_sc;
491
492	/* Fill in POSIX parts */
493	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
494
495	/*
496	 * Build the signal context to be used by sigreturn.
497	 */
498	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
499	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
500
501	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
502	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
503	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
504	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
505	PROC_UNLOCK(p);
506
507	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
508
509	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
510	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
511	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
512	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
513	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
514	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
515	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
516	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
517	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
518	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
519	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
520	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
521	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
522	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
523	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
524	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
525	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
526	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
527	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
528	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
529	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
530
531#ifdef DEBUG
532	if (ldebug(rt_sendsig))
533		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
534		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
535		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
536#endif
537
538	if (copyout(&frame, fp, sizeof(frame)) != 0) {
539		/*
540		 * Process has trashed its stack; give it an illegal
541		 * instruction to halt it in its tracks.
542		 */
543#ifdef DEBUG
544		if (ldebug(rt_sendsig))
545			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
546			    fp, oonstack);
547#endif
548		PROC_LOCK(p);
549		sigexit(td, SIGILL);
550	}
551
552	/*
553	 * Build context to run handler in.
554	 */
555	regs->tf_esp = (int)fp;
556	regs->tf_eip = linux_rt_sigcode;
557	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
558	regs->tf_cs = _ucodesel;
559	regs->tf_ds = _udatasel;
560	regs->tf_es = _udatasel;
561	regs->tf_fs = _udatasel;
562	regs->tf_ss = _udatasel;
563	PROC_LOCK(p);
564	mtx_lock(&psp->ps_mtx);
565}
566
567
568/*
569 * Send an interrupt to process.
570 *
571 * Stack is set up to allow sigcode stored
572 * in u. to call routine, followed by kcall
573 * to sigreturn routine below.  After sigreturn
574 * resets the signal mask, the stack, and the
575 * frame pointer, it returns to the user
576 * specified pc, psl.
577 */
578static void
579linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
580{
581	struct thread *td = curthread;
582	struct proc *p = td->td_proc;
583	struct sigacts *psp;
584	struct trapframe *regs;
585	struct l_sigframe *fp, frame;
586	l_sigset_t lmask;
587	int sig, code;
588	int oonstack, i;
589
590	PROC_LOCK_ASSERT(p, MA_OWNED);
591	psp = p->p_sigacts;
592	sig = ksi->ksi_signo;
593	code = ksi->ksi_code;
594	mtx_assert(&psp->ps_mtx, MA_OWNED);
595	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
596		/* Signal handler installed with SA_SIGINFO. */
597		linux_rt_sendsig(catcher, ksi, mask);
598		return;
599	}
600	regs = td->td_frame;
601	oonstack = sigonstack(regs->tf_esp);
602
603#ifdef DEBUG
604	if (ldebug(sendsig))
605		printf(ARGS(sendsig, "%p, %d, %p, %u"),
606		    catcher, sig, (void*)mask, code);
607#endif
608
609	/*
610	 * Allocate space for the signal handler context.
611	 */
612	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
613	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
614		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
615		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
616	} else
617		fp = (struct l_sigframe *)regs->tf_esp - 1;
618	mtx_unlock(&psp->ps_mtx);
619	PROC_UNLOCK(p);
620
621	/*
622	 * Build the argument list for the signal handler.
623	 */
624	sig = BSD_TO_LINUX_SIGNAL(sig);
625
626	bzero(&frame, sizeof(frame));
627
628	frame.sf_handler = catcher;
629	frame.sf_sig = sig;
630
631	bsd_to_linux_sigset(mask, &lmask);
632
633	/*
634	 * Build the signal context to be used by sigreturn.
635	 */
636	frame.sf_sc.sc_mask   = lmask.__bits[0];
637	frame.sf_sc.sc_gs     = rgs();
638	frame.sf_sc.sc_fs     = regs->tf_fs;
639	frame.sf_sc.sc_es     = regs->tf_es;
640	frame.sf_sc.sc_ds     = regs->tf_ds;
641	frame.sf_sc.sc_edi    = regs->tf_edi;
642	frame.sf_sc.sc_esi    = regs->tf_esi;
643	frame.sf_sc.sc_ebp    = regs->tf_ebp;
644	frame.sf_sc.sc_ebx    = regs->tf_ebx;
645	frame.sf_sc.sc_esp    = regs->tf_esp;
646	frame.sf_sc.sc_edx    = regs->tf_edx;
647	frame.sf_sc.sc_ecx    = regs->tf_ecx;
648	frame.sf_sc.sc_eax    = regs->tf_eax;
649	frame.sf_sc.sc_eip    = regs->tf_eip;
650	frame.sf_sc.sc_cs     = regs->tf_cs;
651	frame.sf_sc.sc_eflags = regs->tf_eflags;
652	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
653	frame.sf_sc.sc_ss     = regs->tf_ss;
654	frame.sf_sc.sc_err    = regs->tf_err;
655	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
656	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
657
658	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
659		frame.sf_extramask[i] = lmask.__bits[i+1];
660
661	if (copyout(&frame, fp, sizeof(frame)) != 0) {
662		/*
663		 * Process has trashed its stack; give it an illegal
664		 * instruction to halt it in its tracks.
665		 */
666		PROC_LOCK(p);
667		sigexit(td, SIGILL);
668	}
669
670	/*
671	 * Build context to run handler in.
672	 */
673	regs->tf_esp = (int)fp;
674	regs->tf_eip = linux_sigcode;
675	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
676	regs->tf_cs = _ucodesel;
677	regs->tf_ds = _udatasel;
678	regs->tf_es = _udatasel;
679	regs->tf_fs = _udatasel;
680	regs->tf_ss = _udatasel;
681	PROC_LOCK(p);
682	mtx_lock(&psp->ps_mtx);
683}
684
685/*
686 * System call to cleanup state after a signal
687 * has been taken.  Reset signal mask and
688 * stack state from context left by sendsig (above).
689 * Return to previous pc and psl as specified by
690 * context left by sendsig. Check carefully to
691 * make sure that the user has not modified the
692 * psl to gain improper privileges or to cause
693 * a machine fault.
694 */
695int
696linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
697{
698	struct l_sigframe frame;
699	struct trapframe *regs;
700	l_sigset_t lmask;
701	sigset_t bmask;
702	int eflags, i;
703	ksiginfo_t ksi;
704
705	regs = td->td_frame;
706
707#ifdef DEBUG
708	if (ldebug(sigreturn))
709		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
710#endif
711	/*
712	 * The trampoline code hands us the sigframe.
713	 * It is unsafe to keep track of it ourselves, in the event that a
714	 * program jumps out of a signal handler.
715	 */
716	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
717		return (EFAULT);
718
719	/*
720	 * Check for security violations.
721	 */
722#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
723	eflags = frame.sf_sc.sc_eflags;
724	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
725		return (EINVAL);
726
727	/*
728	 * Don't allow users to load a valid privileged %cs.  Let the
729	 * hardware check for invalid selectors, excess privilege in
730	 * other selectors, invalid %eip's and invalid %esp's.
731	 */
732#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
733	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
734		ksiginfo_init_trap(&ksi);
735		ksi.ksi_signo = SIGBUS;
736		ksi.ksi_code = BUS_OBJERR;
737		ksi.ksi_trapno = T_PROTFLT;
738		ksi.ksi_addr = (void *)regs->tf_eip;
739		trapsignal(td, &ksi);
740		return (EINVAL);
741	}
742
743	lmask.__bits[0] = frame.sf_sc.sc_mask;
744	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
745		lmask.__bits[i+1] = frame.sf_extramask[i];
746	linux_to_bsd_sigset(&lmask, &bmask);
747	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
748
749	/*
750	 * Restore signal context.
751	 */
752	/* %gs was restored by the trampoline. */
753	regs->tf_fs     = frame.sf_sc.sc_fs;
754	regs->tf_es     = frame.sf_sc.sc_es;
755	regs->tf_ds     = frame.sf_sc.sc_ds;
756	regs->tf_edi    = frame.sf_sc.sc_edi;
757	regs->tf_esi    = frame.sf_sc.sc_esi;
758	regs->tf_ebp    = frame.sf_sc.sc_ebp;
759	regs->tf_ebx    = frame.sf_sc.sc_ebx;
760	regs->tf_edx    = frame.sf_sc.sc_edx;
761	regs->tf_ecx    = frame.sf_sc.sc_ecx;
762	regs->tf_eax    = frame.sf_sc.sc_eax;
763	regs->tf_eip    = frame.sf_sc.sc_eip;
764	regs->tf_cs     = frame.sf_sc.sc_cs;
765	regs->tf_eflags = eflags;
766	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
767	regs->tf_ss     = frame.sf_sc.sc_ss;
768
769	return (EJUSTRETURN);
770}
771
772/*
773 * System call to cleanup state after a signal
774 * has been taken.  Reset signal mask and
775 * stack state from context left by rt_sendsig (above).
776 * Return to previous pc and psl as specified by
777 * context left by sendsig. Check carefully to
778 * make sure that the user has not modified the
779 * psl to gain improper privileges or to cause
780 * a machine fault.
781 */
782int
783linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
784{
785	struct l_ucontext uc;
786	struct l_sigcontext *context;
787	sigset_t bmask;
788	l_stack_t *lss;
789	stack_t ss;
790	struct trapframe *regs;
791	int eflags;
792	ksiginfo_t ksi;
793
794	regs = td->td_frame;
795
796#ifdef DEBUG
797	if (ldebug(rt_sigreturn))
798		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
799#endif
800	/*
801	 * The trampoline code hands us the ucontext.
802	 * It is unsafe to keep track of it ourselves, in the event that a
803	 * program jumps out of a signal handler.
804	 */
805	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
806		return (EFAULT);
807
808	context = &uc.uc_mcontext;
809
810	/*
811	 * Check for security violations.
812	 */
813#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
814	eflags = context->sc_eflags;
815	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
816		return (EINVAL);
817
818	/*
819	 * Don't allow users to load a valid privileged %cs.  Let the
820	 * hardware check for invalid selectors, excess privilege in
821	 * other selectors, invalid %eip's and invalid %esp's.
822	 */
823#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
824	if (!CS_SECURE(context->sc_cs)) {
825		ksiginfo_init_trap(&ksi);
826		ksi.ksi_signo = SIGBUS;
827		ksi.ksi_code = BUS_OBJERR;
828		ksi.ksi_trapno = T_PROTFLT;
829		ksi.ksi_addr = (void *)regs->tf_eip;
830		trapsignal(td, &ksi);
831		return (EINVAL);
832	}
833
834	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
835	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
836
837	/*
838	 * Restore signal context
839	 */
840	/* %gs was restored by the trampoline. */
841	regs->tf_fs     = context->sc_fs;
842	regs->tf_es     = context->sc_es;
843	regs->tf_ds     = context->sc_ds;
844	regs->tf_edi    = context->sc_edi;
845	regs->tf_esi    = context->sc_esi;
846	regs->tf_ebp    = context->sc_ebp;
847	regs->tf_ebx    = context->sc_ebx;
848	regs->tf_edx    = context->sc_edx;
849	regs->tf_ecx    = context->sc_ecx;
850	regs->tf_eax    = context->sc_eax;
851	regs->tf_eip    = context->sc_eip;
852	regs->tf_cs     = context->sc_cs;
853	regs->tf_eflags = eflags;
854	regs->tf_esp    = context->sc_esp_at_signal;
855	regs->tf_ss     = context->sc_ss;
856
857	/*
858	 * call sigaltstack & ignore results..
859	 */
860	lss = &uc.uc_stack;
861	ss.ss_sp = lss->ss_sp;
862	ss.ss_size = lss->ss_size;
863	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
864
865#ifdef DEBUG
866	if (ldebug(rt_sigreturn))
867		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
868		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
869#endif
870	(void)kern_sigaltstack(td, &ss, NULL);
871
872	return (EJUSTRETURN);
873}
874
875static int
876linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
877{
878	struct proc *p;
879	struct trapframe *frame;
880
881	p = td->td_proc;
882	frame = td->td_frame;
883
884	sa->code = frame->tf_eax;
885	sa->args[0] = frame->tf_ebx;
886	sa->args[1] = frame->tf_ecx;
887	sa->args[2] = frame->tf_edx;
888	sa->args[3] = frame->tf_esi;
889	sa->args[4] = frame->tf_edi;
890	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
891
892	if (sa->code >= p->p_sysent->sv_size)
893		sa->callp = &p->p_sysent->sv_table[0];
894 	else
895 		sa->callp = &p->p_sysent->sv_table[sa->code];
896	sa->narg = sa->callp->sy_narg;
897
898	td->td_retval[0] = 0;
899	td->td_retval[1] = frame->tf_edx;
900
901	return (0);
902}
903
904/*
905 * If a linux binary is exec'ing something, try this image activator
906 * first.  We override standard shell script execution in order to
907 * be able to modify the interpreter path.  We only do this if a linux
908 * binary is doing the exec, so we do not create an EXEC module for it.
909 */
910static int	exec_linux_imgact_try(struct image_params *iparams);
911
912static int
913exec_linux_imgact_try(struct image_params *imgp)
914{
915    const char *head = (const char *)imgp->image_header;
916    char *rpath;
917    int error = -1;
918
919    /*
920     * The interpreter for shell scripts run from a linux binary needs
921     * to be located in /compat/linux if possible in order to recursively
922     * maintain linux path emulation.
923     */
924    if (((const short *)head)[0] == SHELLMAGIC) {
925	    /*
926	     * Run our normal shell image activator.  If it succeeds attempt
927	     * to use the alternate path for the interpreter.  If an alternate
928	     * path is found, use our stringspace to store it.
929	     */
930	    if ((error = exec_shell_imgact(imgp)) == 0) {
931		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
932			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
933		    if (rpath != NULL)
934			    imgp->args->fname_buf =
935				imgp->interpreter_name = rpath;
936	    }
937    }
938    return (error);
939}
940
941/*
942 * exec_setregs may initialize some registers differently than Linux
943 * does, thus potentially confusing Linux binaries. If necessary, we
944 * override the exec_setregs default(s) here.
945 */
946static void
947exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
948{
949	struct pcb *pcb = td->td_pcb;
950
951	exec_setregs(td, imgp, stack);
952
953	/* Linux sets %gs to 0, we default to _udatasel */
954	pcb->pcb_gs = 0;
955	load_gs(0);
956
957	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
958}
959
960static void
961linux_get_machine(const char **dst)
962{
963
964	switch (cpu_class) {
965	case CPUCLASS_686:
966		*dst = "i686";
967		break;
968	case CPUCLASS_586:
969		*dst = "i586";
970		break;
971	case CPUCLASS_486:
972		*dst = "i486";
973		break;
974	default:
975		*dst = "i386";
976	}
977}
978
979struct sysentvec linux_sysvec = {
980	.sv_size	= LINUX_SYS_MAXSYSCALL,
981	.sv_table	= linux_sysent,
982	.sv_mask	= 0,
983	.sv_sigsize	= LINUX_SIGTBLSZ,
984	.sv_sigtbl	= bsd_to_linux_signal,
985	.sv_errsize	= ELAST + 1,
986	.sv_errtbl	= bsd_to_linux_errno,
987	.sv_transtrap	= translate_traps,
988	.sv_fixup	= linux_fixup,
989	.sv_sendsig	= linux_sendsig,
990	.sv_sigcode	= &_binary_linux_locore_o_start,
991	.sv_szsigcode	= &linux_szsigcode,
992	.sv_prepsyscall	= NULL,
993	.sv_name	= "Linux a.out",
994	.sv_coredump	= NULL,
995	.sv_imgact_try	= exec_linux_imgact_try,
996	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
997	.sv_pagesize	= PAGE_SIZE,
998	.sv_minuser	= VM_MIN_ADDRESS,
999	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1000	.sv_usrstack	= LINUX_USRSTACK,
1001	.sv_psstrings	= PS_STRINGS,
1002	.sv_stackprot	= VM_PROT_ALL,
1003	.sv_copyout_strings = exec_copyout_strings,
1004	.sv_setregs	= exec_linux_setregs,
1005	.sv_fixlimit	= NULL,
1006	.sv_maxssiz	= NULL,
1007	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
1008	.sv_set_syscall_retval = cpu_set_syscall_retval,
1009	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1010	.sv_syscallnames = NULL,
1011	.sv_shared_page_base = LINUX_SHAREDPAGE,
1012	.sv_shared_page_len = PAGE_SIZE,
1013	.sv_schedtail	= linux_schedtail,
1014	.sv_thread_detach = linux_thread_detach,
1015};
1016INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
1017
1018struct sysentvec elf_linux_sysvec = {
1019	.sv_size	= LINUX_SYS_MAXSYSCALL,
1020	.sv_table	= linux_sysent,
1021	.sv_mask	= 0,
1022	.sv_sigsize	= LINUX_SIGTBLSZ,
1023	.sv_sigtbl	= bsd_to_linux_signal,
1024	.sv_errsize	= ELAST + 1,
1025	.sv_errtbl	= bsd_to_linux_errno,
1026	.sv_transtrap	= translate_traps,
1027	.sv_fixup	= elf_linux_fixup,
1028	.sv_sendsig	= linux_sendsig,
1029	.sv_sigcode	= &_binary_linux_locore_o_start,
1030	.sv_szsigcode	= &linux_szsigcode,
1031	.sv_prepsyscall	= NULL,
1032	.sv_name	= "Linux ELF",
1033	.sv_coredump	= elf32_coredump,
1034	.sv_imgact_try	= exec_linux_imgact_try,
1035	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1036	.sv_pagesize	= PAGE_SIZE,
1037	.sv_minuser	= VM_MIN_ADDRESS,
1038	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1039	.sv_usrstack	= LINUX_USRSTACK,
1040	.sv_psstrings	= LINUX_PS_STRINGS,
1041	.sv_stackprot	= VM_PROT_ALL,
1042	.sv_copyout_strings = linux_copyout_strings,
1043	.sv_setregs	= exec_linux_setregs,
1044	.sv_fixlimit	= NULL,
1045	.sv_maxssiz	= NULL,
1046	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1047	.sv_set_syscall_retval = cpu_set_syscall_retval,
1048	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1049	.sv_syscallnames = NULL,
1050	.sv_shared_page_base = LINUX_SHAREDPAGE,
1051	.sv_shared_page_len = PAGE_SIZE,
1052	.sv_schedtail	= linux_schedtail,
1053	.sv_thread_detach = linux_thread_detach,
1054};
1055
1056static void
1057linux_vdso_install(void *param)
1058{
1059
1060	linux_szsigcode = (&_binary_linux_locore_o_end -
1061	    &_binary_linux_locore_o_start);
1062
1063	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1064		panic("Linux invalid vdso size\n");
1065
1066	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1067
1068	linux_shared_page_obj = __elfN(linux_shared_page_init)
1069	    (&linux_shared_page_mapping);
1070
1071	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1072
1073	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1074	    linux_szsigcode);
1075	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1076}
1077SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1078    (sysinit_cfunc_t)linux_vdso_install, NULL);
1079
1080static void
1081linux_vdso_deinstall(void *param)
1082{
1083
1084	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1085};
1086SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1087    (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1088
1089static char GNU_ABI_VENDOR[] = "GNU";
1090static int GNULINUX_ABI_DESC = 0;
1091
1092static boolean_t
1093linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1094{
1095	const Elf32_Word *desc;
1096	uintptr_t p;
1097
1098	p = (uintptr_t)(note + 1);
1099	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1100
1101	desc = (const Elf32_Word *)p;
1102	if (desc[0] != GNULINUX_ABI_DESC)
1103		return (FALSE);
1104
1105	/*
1106	 * For linux we encode osrel as follows (see linux_mib.c):
1107	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1108	 */
1109	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1110
1111	return (TRUE);
1112}
1113
1114static Elf_Brandnote linux_brandnote = {
1115	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1116	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1117	.hdr.n_type	= 1,
1118	.vendor		= GNU_ABI_VENDOR,
1119	.flags		= BN_TRANSLATE_OSREL,
1120	.trans_osrel	= linux_trans_osrel
1121};
1122
1123static Elf32_Brandinfo linux_brand = {
1124	.brand		= ELFOSABI_LINUX,
1125	.machine	= EM_386,
1126	.compat_3_brand	= "Linux",
1127	.emul_path	= "/compat/linux",
1128	.interp_path	= "/lib/ld-linux.so.1",
1129	.sysvec		= &elf_linux_sysvec,
1130	.interp_newpath	= NULL,
1131	.brand_note	= &linux_brandnote,
1132	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1133};
1134
1135static Elf32_Brandinfo linux_glibc2brand = {
1136	.brand		= ELFOSABI_LINUX,
1137	.machine	= EM_386,
1138	.compat_3_brand	= "Linux",
1139	.emul_path	= "/compat/linux",
1140	.interp_path	= "/lib/ld-linux.so.2",
1141	.sysvec		= &elf_linux_sysvec,
1142	.interp_newpath	= NULL,
1143	.brand_note	= &linux_brandnote,
1144	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1145};
1146
1147Elf32_Brandinfo *linux_brandlist[] = {
1148	&linux_brand,
1149	&linux_glibc2brand,
1150	NULL
1151};
1152
1153static int
1154linux_elf_modevent(module_t mod, int type, void *data)
1155{
1156	Elf32_Brandinfo **brandinfo;
1157	int error;
1158	struct linux_ioctl_handler **lihp;
1159
1160	error = 0;
1161
1162	switch(type) {
1163	case MOD_LOAD:
1164		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1165		     ++brandinfo)
1166			if (elf32_insert_brand_entry(*brandinfo) < 0)
1167				error = EINVAL;
1168		if (error == 0) {
1169			SET_FOREACH(lihp, linux_ioctl_handler_set)
1170				linux_ioctl_register_handler(*lihp);
1171			LIST_INIT(&futex_list);
1172			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1173			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1174			      NULL, 1000);
1175			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1176			      NULL, 1000);
1177			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1178			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1179			linux_get_machine(&linux_kplatform);
1180			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1181			    sizeof(char *));
1182			linux_osd_jail_register();
1183			stclohz = (stathz ? stathz : hz);
1184			if (bootverbose)
1185				printf("Linux ELF exec handler installed\n");
1186		} else
1187			printf("cannot insert Linux ELF brand handler\n");
1188		break;
1189	case MOD_UNLOAD:
1190		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1191		     ++brandinfo)
1192			if (elf32_brand_inuse(*brandinfo))
1193				error = EBUSY;
1194		if (error == 0) {
1195			for (brandinfo = &linux_brandlist[0];
1196			     *brandinfo != NULL; ++brandinfo)
1197				if (elf32_remove_brand_entry(*brandinfo) < 0)
1198					error = EINVAL;
1199		}
1200		if (error == 0) {
1201			SET_FOREACH(lihp, linux_ioctl_handler_set)
1202				linux_ioctl_unregister_handler(*lihp);
1203			mtx_destroy(&futex_mtx);
1204			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1205			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1206			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1207			linux_osd_jail_deregister();
1208			if (bootverbose)
1209				printf("Linux ELF exec handler removed\n");
1210		} else
1211			printf("Could not deinstall ELF interpreter entry\n");
1212		break;
1213	default:
1214		return (EOPNOTSUPP);
1215	}
1216	return (error);
1217}
1218
1219static moduledata_t linux_elf_mod = {
1220	"linuxelf",
1221	linux_elf_modevent,
1222	0
1223};
1224
1225DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1226