linux32_sysvec.c revision 293516
1/*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S��ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293516 2016-01-09 15:48:11Z dchagin $"); 35#include "opt_compat.h" 36 37#ifndef COMPAT_FREEBSD32 38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39#endif 40 41#define __ELF_WORD_SIZE 32 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/exec.h> 46#include <sys/fcntl.h> 47#include <sys/imgact.h> 48#include <sys/imgact_elf.h> 49#include <sys/kernel.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/module.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/resourcevar.h> 56#include <sys/signalvar.h> 57#include <sys/sysctl.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysent.h> 60#include <sys/sysproto.h> 61#include <sys/vnode.h> 62#include <sys/eventhandler.h> 63 64#include <vm/vm.h> 65#include <vm/pmap.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_map.h> 68#include <vm/vm_object.h> 69#include <vm/vm_page.h> 70#include <vm/vm_param.h> 71 72#include <machine/cpu.h> 73#include <machine/md_var.h> 74#include <machine/pcb.h> 75#include <machine/specialreg.h> 76 77#include <amd64/linux32/linux.h> 78#include <amd64/linux32/linux32_proto.h> 79#include <compat/linux/linux_emul.h> 80#include <compat/linux/linux_futex.h> 81#include <compat/linux/linux_ioctl.h> 82#include <compat/linux/linux_mib.h> 83#include <compat/linux/linux_misc.h> 84#include <compat/linux/linux_signal.h> 85#include <compat/linux/linux_util.h> 86#include <compat/linux/linux_vdso.h> 87 88MODULE_VERSION(linux, 1); 89 90MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 91 92#define AUXARGS_ENTRY_32(pos, id, val) \ 93 do { \ 94 suword32(pos++, id); \ 95 suword32(pos++, val); \ 96 } while (0) 97 98#if BYTE_ORDER == LITTLE_ENDIAN 99#define SHELLMAGIC 0x2123 /* #! */ 100#else 101#define SHELLMAGIC 0x2321 102#endif 103 104/* 105 * Allow the sendsig functions to use the ldebug() facility 106 * even though they are not syscalls themselves. Map them 107 * to syscall 0. This is slightly less bogus than using 108 * ldebug(sigreturn). 109 */ 110#define LINUX_SYS_linux_rt_sendsig 0 111#define LINUX_SYS_linux_sendsig 0 112 113const char *linux_kplatform; 114static int linux_szsigcode; 115static vm_object_t linux_shared_page_obj; 116static char *linux_shared_page_mapping; 117extern char _binary_linux32_locore_o_start; 118extern char _binary_linux32_locore_o_end; 119 120extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 121 122SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 123SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 124 125static int elf_linux_fixup(register_t **stack_base, 126 struct image_params *iparams); 127static register_t *linux_copyout_strings(struct image_params *imgp); 128static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 129static void exec_linux_setregs(struct thread *td, 130 struct image_params *imgp, u_long stack); 131static void linux32_fixlimit(struct rlimit *rl, int which); 132static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 133static void linux_vdso_install(void *param); 134static void linux_vdso_deinstall(void *param); 135 136static eventhandler_tag linux_exit_tag; 137static eventhandler_tag linux_exec_tag; 138static eventhandler_tag linux_thread_dtor_tag; 139 140/* 141 * Linux syscalls return negative errno's, we do positive and map them 142 * Reference: 143 * FreeBSD: src/sys/sys/errno.h 144 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 145 * linux-2.6.17.8/include/asm-generic/errno.h 146 */ 147static int bsd_to_linux_errno[ELAST + 1] = { 148 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 149 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 150 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 151 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 152 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 153 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 154 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 155 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 156 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 157 -72, -67, -71 158}; 159 160int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 161 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 162 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 163 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 164 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 165 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 166 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 167 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 168 0, LINUX_SIGUSR1, LINUX_SIGUSR2 169}; 170 171int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 172 SIGHUP, SIGINT, SIGQUIT, SIGILL, 173 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 174 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 175 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 176 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 177 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 178 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 179 SIGIO, SIGURG, SIGSYS 180}; 181 182#define LINUX_T_UNKNOWN 255 183static int _bsd_to_linux_trapcode[] = { 184 LINUX_T_UNKNOWN, /* 0 */ 185 6, /* 1 T_PRIVINFLT */ 186 LINUX_T_UNKNOWN, /* 2 */ 187 3, /* 3 T_BPTFLT */ 188 LINUX_T_UNKNOWN, /* 4 */ 189 LINUX_T_UNKNOWN, /* 5 */ 190 16, /* 6 T_ARITHTRAP */ 191 254, /* 7 T_ASTFLT */ 192 LINUX_T_UNKNOWN, /* 8 */ 193 13, /* 9 T_PROTFLT */ 194 1, /* 10 T_TRCTRAP */ 195 LINUX_T_UNKNOWN, /* 11 */ 196 14, /* 12 T_PAGEFLT */ 197 LINUX_T_UNKNOWN, /* 13 */ 198 17, /* 14 T_ALIGNFLT */ 199 LINUX_T_UNKNOWN, /* 15 */ 200 LINUX_T_UNKNOWN, /* 16 */ 201 LINUX_T_UNKNOWN, /* 17 */ 202 0, /* 18 T_DIVIDE */ 203 2, /* 19 T_NMI */ 204 4, /* 20 T_OFLOW */ 205 5, /* 21 T_BOUND */ 206 7, /* 22 T_DNA */ 207 8, /* 23 T_DOUBLEFLT */ 208 9, /* 24 T_FPOPFLT */ 209 10, /* 25 T_TSSFLT */ 210 11, /* 26 T_SEGNPFLT */ 211 12, /* 27 T_STKFLT */ 212 18, /* 28 T_MCHK */ 213 19, /* 29 T_XMMFLT */ 214 15 /* 30 T_RESERVED */ 215}; 216#define bsd_to_linux_trapcode(code) \ 217 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 218 _bsd_to_linux_trapcode[(code)]: \ 219 LINUX_T_UNKNOWN) 220 221struct linux32_ps_strings { 222 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 223 u_int ps_nargvstr; /* the number of argument strings */ 224 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 225 u_int ps_nenvstr; /* the number of environment strings */ 226}; 227 228LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 229LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 230LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 231LINUX_VDSO_SYM_CHAR(linux_platform); 232 233/* 234 * If FreeBSD & Linux have a difference of opinion about what a trap 235 * means, deal with it here. 236 * 237 * MPSAFE 238 */ 239static int 240translate_traps(int signal, int trap_code) 241{ 242 if (signal != SIGBUS) 243 return signal; 244 switch (trap_code) { 245 case T_PROTFLT: 246 case T_TSSFLT: 247 case T_DOUBLEFLT: 248 case T_PAGEFLT: 249 return SIGSEGV; 250 default: 251 return signal; 252 } 253} 254 255static int 256elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 257{ 258 Elf32_Auxargs *args; 259 Elf32_Addr *base; 260 Elf32_Addr *pos; 261 struct linux32_ps_strings *arginfo; 262 263 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 264 265 KASSERT(curthread->td_proc == imgp->proc, 266 ("unsafe elf_linux_fixup(), should be curproc")); 267 base = (Elf32_Addr *)*stack_base; 268 args = (Elf32_Auxargs *)imgp->auxargs; 269 pos = base + (imgp->args->argc + imgp->args->envc + 2); 270 271 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 272 imgp->proc->p_sysent->sv_shared_page_base); 273 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 274 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 275 276 /* 277 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 278 * as it has appeared in the 2.4.0-rc7 first time. 279 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 280 * glibc falls back to the hard-coded CLK_TCK value when aux entry 281 * is not present. 282 * Also see linux_times() implementation. 283 */ 284 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 285 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 286 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 287 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 288 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 289 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 290 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 291 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 292 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 293 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 294 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 295 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 296 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 297 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 298 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 299 if (args->execfd != -1) 300 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 301 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 302 303 free(imgp->auxargs, M_TEMP); 304 imgp->auxargs = NULL; 305 306 base--; 307 suword32(base, (uint32_t)imgp->args->argc); 308 *stack_base = (register_t *)base; 309 return (0); 310} 311 312static void 313linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 314{ 315 struct thread *td = curthread; 316 struct proc *p = td->td_proc; 317 struct sigacts *psp; 318 struct trapframe *regs; 319 struct l_rt_sigframe *fp, frame; 320 int oonstack; 321 int sig; 322 int code; 323 324 sig = ksi->ksi_signo; 325 code = ksi->ksi_code; 326 PROC_LOCK_ASSERT(p, MA_OWNED); 327 psp = p->p_sigacts; 328 mtx_assert(&psp->ps_mtx, MA_OWNED); 329 regs = td->td_frame; 330 oonstack = sigonstack(regs->tf_rsp); 331 332#ifdef DEBUG 333 if (ldebug(rt_sendsig)) 334 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 335 catcher, sig, (void*)mask, code); 336#endif 337 /* 338 * Allocate space for the signal handler context. 339 */ 340 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 341 SIGISMEMBER(psp->ps_sigonstack, sig)) { 342 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 343 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 344 } else 345 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 346 mtx_unlock(&psp->ps_mtx); 347 348 /* 349 * Build the argument list for the signal handler. 350 */ 351 if (p->p_sysent->sv_sigtbl) 352 if (sig <= p->p_sysent->sv_sigsize) 353 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 354 355 bzero(&frame, sizeof(frame)); 356 357 frame.sf_handler = PTROUT(catcher); 358 frame.sf_sig = sig; 359 frame.sf_siginfo = PTROUT(&fp->sf_si); 360 frame.sf_ucontext = PTROUT(&fp->sf_sc); 361 362 /* Fill in POSIX parts */ 363 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 364 365 /* 366 * Build the signal context to be used by sigreturn 367 * and libgcc unwind. 368 */ 369 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 370 frame.sf_sc.uc_link = 0; /* XXX ??? */ 371 372 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 373 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 374 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 375 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 376 PROC_UNLOCK(p); 377 378 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 379 380 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 381 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 382 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 383 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 384 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 385 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 386 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 387 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 388 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 389 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 390 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 391 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 392 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 393 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 394 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 395 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 396 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 397 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 398 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 399 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 400 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 401 402#ifdef DEBUG 403 if (ldebug(rt_sendsig)) 404 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 405 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 406 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 407#endif 408 409 if (copyout(&frame, fp, sizeof(frame)) != 0) { 410 /* 411 * Process has trashed its stack; give it an illegal 412 * instruction to halt it in its tracks. 413 */ 414#ifdef DEBUG 415 if (ldebug(rt_sendsig)) 416 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 417 fp, oonstack); 418#endif 419 PROC_LOCK(p); 420 sigexit(td, SIGILL); 421 } 422 423 /* 424 * Build context to run handler in. 425 */ 426 regs->tf_rsp = PTROUT(fp); 427 regs->tf_rip = linux32_rt_sigcode; 428 regs->tf_rflags &= ~(PSL_T | PSL_D); 429 regs->tf_cs = _ucode32sel; 430 regs->tf_ss = _udatasel; 431 regs->tf_ds = _udatasel; 432 regs->tf_es = _udatasel; 433 regs->tf_fs = _ufssel; 434 regs->tf_gs = _ugssel; 435 regs->tf_flags = TF_HASSEGS; 436 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 437 PROC_LOCK(p); 438 mtx_lock(&psp->ps_mtx); 439} 440 441 442/* 443 * Send an interrupt to process. 444 * 445 * Stack is set up to allow sigcode stored 446 * in u. to call routine, followed by kcall 447 * to sigreturn routine below. After sigreturn 448 * resets the signal mask, the stack, and the 449 * frame pointer, it returns to the user 450 * specified pc, psl. 451 */ 452static void 453linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 454{ 455 struct thread *td = curthread; 456 struct proc *p = td->td_proc; 457 struct sigacts *psp; 458 struct trapframe *regs; 459 struct l_sigframe *fp, frame; 460 l_sigset_t lmask; 461 int oonstack, i; 462 int sig, code; 463 464 sig = ksi->ksi_signo; 465 code = ksi->ksi_code; 466 PROC_LOCK_ASSERT(p, MA_OWNED); 467 psp = p->p_sigacts; 468 mtx_assert(&psp->ps_mtx, MA_OWNED); 469 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 470 /* Signal handler installed with SA_SIGINFO. */ 471 linux_rt_sendsig(catcher, ksi, mask); 472 return; 473 } 474 475 regs = td->td_frame; 476 oonstack = sigonstack(regs->tf_rsp); 477 478#ifdef DEBUG 479 if (ldebug(sendsig)) 480 printf(ARGS(sendsig, "%p, %d, %p, %u"), 481 catcher, sig, (void*)mask, code); 482#endif 483 484 /* 485 * Allocate space for the signal handler context. 486 */ 487 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 488 SIGISMEMBER(psp->ps_sigonstack, sig)) { 489 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 490 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 491 } else 492 fp = (struct l_sigframe *)regs->tf_rsp - 1; 493 mtx_unlock(&psp->ps_mtx); 494 PROC_UNLOCK(p); 495 496 /* 497 * Build the argument list for the signal handler. 498 */ 499 if (p->p_sysent->sv_sigtbl) 500 if (sig <= p->p_sysent->sv_sigsize) 501 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 502 503 bzero(&frame, sizeof(frame)); 504 505 frame.sf_handler = PTROUT(catcher); 506 frame.sf_sig = sig; 507 508 bsd_to_linux_sigset(mask, &lmask); 509 510 /* 511 * Build the signal context to be used by sigreturn. 512 */ 513 frame.sf_sc.sc_mask = lmask.__bits[0]; 514 frame.sf_sc.sc_gs = regs->tf_gs; 515 frame.sf_sc.sc_fs = regs->tf_fs; 516 frame.sf_sc.sc_es = regs->tf_es; 517 frame.sf_sc.sc_ds = regs->tf_ds; 518 frame.sf_sc.sc_edi = regs->tf_rdi; 519 frame.sf_sc.sc_esi = regs->tf_rsi; 520 frame.sf_sc.sc_ebp = regs->tf_rbp; 521 frame.sf_sc.sc_ebx = regs->tf_rbx; 522 frame.sf_sc.sc_esp = regs->tf_rsp; 523 frame.sf_sc.sc_edx = regs->tf_rdx; 524 frame.sf_sc.sc_ecx = regs->tf_rcx; 525 frame.sf_sc.sc_eax = regs->tf_rax; 526 frame.sf_sc.sc_eip = regs->tf_rip; 527 frame.sf_sc.sc_cs = regs->tf_cs; 528 frame.sf_sc.sc_eflags = regs->tf_rflags; 529 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 530 frame.sf_sc.sc_ss = regs->tf_ss; 531 frame.sf_sc.sc_err = regs->tf_err; 532 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 533 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 534 535 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 536 frame.sf_extramask[i] = lmask.__bits[i+1]; 537 538 if (copyout(&frame, fp, sizeof(frame)) != 0) { 539 /* 540 * Process has trashed its stack; give it an illegal 541 * instruction to halt it in its tracks. 542 */ 543 PROC_LOCK(p); 544 sigexit(td, SIGILL); 545 } 546 547 /* 548 * Build context to run handler in. 549 */ 550 regs->tf_rsp = PTROUT(fp); 551 regs->tf_rip = linux32_sigcode; 552 regs->tf_rflags &= ~(PSL_T | PSL_D); 553 regs->tf_cs = _ucode32sel; 554 regs->tf_ss = _udatasel; 555 regs->tf_ds = _udatasel; 556 regs->tf_es = _udatasel; 557 regs->tf_fs = _ufssel; 558 regs->tf_gs = _ugssel; 559 regs->tf_flags = TF_HASSEGS; 560 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 561 PROC_LOCK(p); 562 mtx_lock(&psp->ps_mtx); 563} 564 565/* 566 * System call to cleanup state after a signal 567 * has been taken. Reset signal mask and 568 * stack state from context left by sendsig (above). 569 * Return to previous pc and psl as specified by 570 * context left by sendsig. Check carefully to 571 * make sure that the user has not modified the 572 * psl to gain improper privileges or to cause 573 * a machine fault. 574 */ 575int 576linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 577{ 578 struct l_sigframe frame; 579 struct trapframe *regs; 580 sigset_t bmask; 581 l_sigset_t lmask; 582 int eflags, i; 583 ksiginfo_t ksi; 584 585 regs = td->td_frame; 586 587#ifdef DEBUG 588 if (ldebug(sigreturn)) 589 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 590#endif 591 /* 592 * The trampoline code hands us the sigframe. 593 * It is unsafe to keep track of it ourselves, in the event that a 594 * program jumps out of a signal handler. 595 */ 596 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 597 return (EFAULT); 598 599 /* 600 * Check for security violations. 601 */ 602#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 603 eflags = frame.sf_sc.sc_eflags; 604 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 605 return(EINVAL); 606 607 /* 608 * Don't allow users to load a valid privileged %cs. Let the 609 * hardware check for invalid selectors, excess privilege in 610 * other selectors, invalid %eip's and invalid %esp's. 611 */ 612#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 613 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 614 ksiginfo_init_trap(&ksi); 615 ksi.ksi_signo = SIGBUS; 616 ksi.ksi_code = BUS_OBJERR; 617 ksi.ksi_trapno = T_PROTFLT; 618 ksi.ksi_addr = (void *)regs->tf_rip; 619 trapsignal(td, &ksi); 620 return(EINVAL); 621 } 622 623 lmask.__bits[0] = frame.sf_sc.sc_mask; 624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 625 lmask.__bits[i+1] = frame.sf_extramask[i]; 626 linux_to_bsd_sigset(&lmask, &bmask); 627 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 628 629 /* 630 * Restore signal context. 631 */ 632 regs->tf_rdi = frame.sf_sc.sc_edi; 633 regs->tf_rsi = frame.sf_sc.sc_esi; 634 regs->tf_rbp = frame.sf_sc.sc_ebp; 635 regs->tf_rbx = frame.sf_sc.sc_ebx; 636 regs->tf_rdx = frame.sf_sc.sc_edx; 637 regs->tf_rcx = frame.sf_sc.sc_ecx; 638 regs->tf_rax = frame.sf_sc.sc_eax; 639 regs->tf_rip = frame.sf_sc.sc_eip; 640 regs->tf_cs = frame.sf_sc.sc_cs; 641 regs->tf_ds = frame.sf_sc.sc_ds; 642 regs->tf_es = frame.sf_sc.sc_es; 643 regs->tf_fs = frame.sf_sc.sc_fs; 644 regs->tf_gs = frame.sf_sc.sc_gs; 645 regs->tf_rflags = eflags; 646 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 647 regs->tf_ss = frame.sf_sc.sc_ss; 648 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 649 650 return (EJUSTRETURN); 651} 652 653/* 654 * System call to cleanup state after a signal 655 * has been taken. Reset signal mask and 656 * stack state from context left by rt_sendsig (above). 657 * Return to previous pc and psl as specified by 658 * context left by sendsig. Check carefully to 659 * make sure that the user has not modified the 660 * psl to gain improper privileges or to cause 661 * a machine fault. 662 */ 663int 664linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 665{ 666 struct l_ucontext uc; 667 struct l_sigcontext *context; 668 sigset_t bmask; 669 l_stack_t *lss; 670 stack_t ss; 671 struct trapframe *regs; 672 int eflags; 673 ksiginfo_t ksi; 674 675 regs = td->td_frame; 676 677#ifdef DEBUG 678 if (ldebug(rt_sigreturn)) 679 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 680#endif 681 /* 682 * The trampoline code hands us the ucontext. 683 * It is unsafe to keep track of it ourselves, in the event that a 684 * program jumps out of a signal handler. 685 */ 686 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 687 return (EFAULT); 688 689 context = &uc.uc_mcontext; 690 691 /* 692 * Check for security violations. 693 */ 694#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 695 eflags = context->sc_eflags; 696 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 697 return(EINVAL); 698 699 /* 700 * Don't allow users to load a valid privileged %cs. Let the 701 * hardware check for invalid selectors, excess privilege in 702 * other selectors, invalid %eip's and invalid %esp's. 703 */ 704#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 705 if (!CS_SECURE(context->sc_cs)) { 706 ksiginfo_init_trap(&ksi); 707 ksi.ksi_signo = SIGBUS; 708 ksi.ksi_code = BUS_OBJERR; 709 ksi.ksi_trapno = T_PROTFLT; 710 ksi.ksi_addr = (void *)regs->tf_rip; 711 trapsignal(td, &ksi); 712 return(EINVAL); 713 } 714 715 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 716 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 717 718 /* 719 * Restore signal context 720 */ 721 regs->tf_gs = context->sc_gs; 722 regs->tf_fs = context->sc_fs; 723 regs->tf_es = context->sc_es; 724 regs->tf_ds = context->sc_ds; 725 regs->tf_rdi = context->sc_edi; 726 regs->tf_rsi = context->sc_esi; 727 regs->tf_rbp = context->sc_ebp; 728 regs->tf_rbx = context->sc_ebx; 729 regs->tf_rdx = context->sc_edx; 730 regs->tf_rcx = context->sc_ecx; 731 regs->tf_rax = context->sc_eax; 732 regs->tf_rip = context->sc_eip; 733 regs->tf_cs = context->sc_cs; 734 regs->tf_rflags = eflags; 735 regs->tf_rsp = context->sc_esp_at_signal; 736 regs->tf_ss = context->sc_ss; 737 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 738 739 /* 740 * call sigaltstack & ignore results.. 741 */ 742 lss = &uc.uc_stack; 743 ss.ss_sp = PTRIN(lss->ss_sp); 744 ss.ss_size = lss->ss_size; 745 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 746 747#ifdef DEBUG 748 if (ldebug(rt_sigreturn)) 749 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 750 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 751#endif 752 (void)kern_sigaltstack(td, &ss, NULL); 753 754 return (EJUSTRETURN); 755} 756 757static int 758linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 759{ 760 struct proc *p; 761 struct trapframe *frame; 762 763 p = td->td_proc; 764 frame = td->td_frame; 765 766 sa->args[0] = frame->tf_rbx; 767 sa->args[1] = frame->tf_rcx; 768 sa->args[2] = frame->tf_rdx; 769 sa->args[3] = frame->tf_rsi; 770 sa->args[4] = frame->tf_rdi; 771 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 772 sa->code = frame->tf_rax; 773 774 if (sa->code >= p->p_sysent->sv_size) 775 sa->callp = &p->p_sysent->sv_table[0]; 776 else 777 sa->callp = &p->p_sysent->sv_table[sa->code]; 778 sa->narg = sa->callp->sy_narg; 779 780 td->td_retval[0] = 0; 781 td->td_retval[1] = frame->tf_rdx; 782 783 return (0); 784} 785 786/* 787 * If a linux binary is exec'ing something, try this image activator 788 * first. We override standard shell script execution in order to 789 * be able to modify the interpreter path. We only do this if a linux 790 * binary is doing the exec, so we do not create an EXEC module for it. 791 */ 792static int exec_linux_imgact_try(struct image_params *iparams); 793 794static int 795exec_linux_imgact_try(struct image_params *imgp) 796{ 797 const char *head = (const char *)imgp->image_header; 798 char *rpath; 799 int error = -1; 800 801 /* 802 * The interpreter for shell scripts run from a linux binary needs 803 * to be located in /compat/linux if possible in order to recursively 804 * maintain linux path emulation. 805 */ 806 if (((const short *)head)[0] == SHELLMAGIC) { 807 /* 808 * Run our normal shell image activator. If it succeeds attempt 809 * to use the alternate path for the interpreter. If an 810 * alternate * path is found, use our stringspace to store it. 811 */ 812 if ((error = exec_shell_imgact(imgp)) == 0) { 813 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 814 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 815 AT_FDCWD); 816 if (rpath != NULL) 817 imgp->args->fname_buf = 818 imgp->interpreter_name = rpath; 819 } 820 } 821 return (error); 822} 823 824/* 825 * Clear registers on exec 826 * XXX copied from ia32_signal.c. 827 */ 828static void 829exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 830{ 831 struct trapframe *regs = td->td_frame; 832 struct pcb *pcb = td->td_pcb; 833 834 mtx_lock(&dt_lock); 835 if (td->td_proc->p_md.md_ldt != NULL) 836 user_ldt_free(td); 837 else 838 mtx_unlock(&dt_lock); 839 840 critical_enter(); 841 wrmsr(MSR_FSBASE, 0); 842 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 843 pcb->pcb_fsbase = 0; 844 pcb->pcb_gsbase = 0; 845 critical_exit(); 846 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 847 848 bzero((char *)regs, sizeof(struct trapframe)); 849 regs->tf_rip = imgp->entry_addr; 850 regs->tf_rsp = stack; 851 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 852 regs->tf_gs = _ugssel; 853 regs->tf_fs = _ufssel; 854 regs->tf_es = _udatasel; 855 regs->tf_ds = _udatasel; 856 regs->tf_ss = _udatasel; 857 regs->tf_flags = TF_HASSEGS; 858 regs->tf_cs = _ucode32sel; 859 regs->tf_rbx = imgp->ps_strings; 860 861 fpstate_drop(td); 862 863 /* Do full restore on return so that we can change to a different %cs */ 864 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 865 td->td_retval[1] = 0; 866} 867 868/* 869 * XXX copied from ia32_sysvec.c. 870 */ 871static register_t * 872linux_copyout_strings(struct image_params *imgp) 873{ 874 int argc, envc; 875 u_int32_t *vectp; 876 char *stringp, *destp; 877 u_int32_t *stack_base; 878 struct linux32_ps_strings *arginfo; 879 880 /* 881 * Calculate string base and vector table pointers. 882 * Also deal with signal trampoline code for this exec type. 883 */ 884 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 885 destp = (caddr_t)arginfo - SPARE_USRSPACE - 886 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 887 888 /* 889 * If we have a valid auxargs ptr, prepare some room 890 * on the stack. 891 */ 892 if (imgp->auxargs) { 893 /* 894 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 895 * lower compatibility. 896 */ 897 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 898 (LINUX_AT_COUNT * 2); 899 /* 900 * The '+ 2' is for the null pointers at the end of each of 901 * the arg and env vector sets,and imgp->auxarg_size is room 902 * for argument of Runtime loader. 903 */ 904 vectp = (u_int32_t *) (destp - (imgp->args->argc + 905 imgp->args->envc + 2 + imgp->auxarg_size) * 906 sizeof(u_int32_t)); 907 908 } else 909 /* 910 * The '+ 2' is for the null pointers at the end of each of 911 * the arg and env vector sets 912 */ 913 vectp = (u_int32_t *)(destp - (imgp->args->argc + 914 imgp->args->envc + 2) * sizeof(u_int32_t)); 915 916 /* 917 * vectp also becomes our initial stack base 918 */ 919 stack_base = vectp; 920 921 stringp = imgp->args->begin_argv; 922 argc = imgp->args->argc; 923 envc = imgp->args->envc; 924 /* 925 * Copy out strings - arguments and environment. 926 */ 927 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 928 929 /* 930 * Fill in "ps_strings" struct for ps, w, etc. 931 */ 932 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 933 suword32(&arginfo->ps_nargvstr, argc); 934 935 /* 936 * Fill in argument portion of vector table. 937 */ 938 for (; argc > 0; --argc) { 939 suword32(vectp++, (uint32_t)(intptr_t)destp); 940 while (*stringp++ != 0) 941 destp++; 942 destp++; 943 } 944 945 /* a null vector table pointer separates the argp's from the envp's */ 946 suword32(vectp++, 0); 947 948 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 949 suword32(&arginfo->ps_nenvstr, envc); 950 951 /* 952 * Fill in environment portion of vector table. 953 */ 954 for (; envc > 0; --envc) { 955 suword32(vectp++, (uint32_t)(intptr_t)destp); 956 while (*stringp++ != 0) 957 destp++; 958 destp++; 959 } 960 961 /* end of vector table is a null pointer */ 962 suword32(vectp, 0); 963 964 return ((register_t *)stack_base); 965} 966 967static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 968 "32-bit Linux emulation"); 969 970static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 971SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 972 &linux32_maxdsiz, 0, ""); 973static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 974SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 975 &linux32_maxssiz, 0, ""); 976static u_long linux32_maxvmem = LINUX32_MAXVMEM; 977SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 978 &linux32_maxvmem, 0, ""); 979 980static void 981linux32_fixlimit(struct rlimit *rl, int which) 982{ 983 984 switch (which) { 985 case RLIMIT_DATA: 986 if (linux32_maxdsiz != 0) { 987 if (rl->rlim_cur > linux32_maxdsiz) 988 rl->rlim_cur = linux32_maxdsiz; 989 if (rl->rlim_max > linux32_maxdsiz) 990 rl->rlim_max = linux32_maxdsiz; 991 } 992 break; 993 case RLIMIT_STACK: 994 if (linux32_maxssiz != 0) { 995 if (rl->rlim_cur > linux32_maxssiz) 996 rl->rlim_cur = linux32_maxssiz; 997 if (rl->rlim_max > linux32_maxssiz) 998 rl->rlim_max = linux32_maxssiz; 999 } 1000 break; 1001 case RLIMIT_VMEM: 1002 if (linux32_maxvmem != 0) { 1003 if (rl->rlim_cur > linux32_maxvmem) 1004 rl->rlim_cur = linux32_maxvmem; 1005 if (rl->rlim_max > linux32_maxvmem) 1006 rl->rlim_max = linux32_maxvmem; 1007 } 1008 break; 1009 } 1010} 1011 1012struct sysentvec elf_linux_sysvec = { 1013 .sv_size = LINUX_SYS_MAXSYSCALL, 1014 .sv_table = linux_sysent, 1015 .sv_mask = 0, 1016 .sv_sigsize = LINUX_SIGTBLSZ, 1017 .sv_sigtbl = bsd_to_linux_signal, 1018 .sv_errsize = ELAST + 1, 1019 .sv_errtbl = bsd_to_linux_errno, 1020 .sv_transtrap = translate_traps, 1021 .sv_fixup = elf_linux_fixup, 1022 .sv_sendsig = linux_sendsig, 1023 .sv_sigcode = &_binary_linux32_locore_o_start, 1024 .sv_szsigcode = &linux_szsigcode, 1025 .sv_prepsyscall = NULL, 1026 .sv_name = "Linux ELF32", 1027 .sv_coredump = elf32_coredump, 1028 .sv_imgact_try = exec_linux_imgact_try, 1029 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1030 .sv_pagesize = PAGE_SIZE, 1031 .sv_minuser = VM_MIN_ADDRESS, 1032 .sv_maxuser = LINUX32_MAXUSER, 1033 .sv_usrstack = LINUX32_USRSTACK, 1034 .sv_psstrings = LINUX32_PS_STRINGS, 1035 .sv_stackprot = VM_PROT_ALL, 1036 .sv_copyout_strings = linux_copyout_strings, 1037 .sv_setregs = exec_linux_setregs, 1038 .sv_fixlimit = linux32_fixlimit, 1039 .sv_maxssiz = &linux32_maxssiz, 1040 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1041 .sv_set_syscall_retval = cpu_set_syscall_retval, 1042 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1043 .sv_syscallnames = NULL, 1044 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1045 .sv_shared_page_len = PAGE_SIZE, 1046 .sv_schedtail = linux_schedtail, 1047 .sv_thread_detach = linux_thread_detach, 1048}; 1049 1050static void 1051linux_vdso_install(void *param) 1052{ 1053 1054 linux_szsigcode = (&_binary_linux32_locore_o_end - 1055 &_binary_linux32_locore_o_start); 1056 1057 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1058 panic("Linux invalid vdso size\n"); 1059 1060 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1061 1062 linux_shared_page_obj = __elfN(linux_shared_page_init) 1063 (&linux_shared_page_mapping); 1064 1065 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1066 1067 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1068 linux_szsigcode); 1069 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1070 1071 linux_kplatform = linux_shared_page_mapping + 1072 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1073} 1074SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1075 (sysinit_cfunc_t)linux_vdso_install, NULL); 1076 1077static void 1078linux_vdso_deinstall(void *param) 1079{ 1080 1081 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1082}; 1083SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1084 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1085 1086static char GNU_ABI_VENDOR[] = "GNU"; 1087static int GNULINUX_ABI_DESC = 0; 1088 1089static boolean_t 1090linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1091{ 1092 const Elf32_Word *desc; 1093 uintptr_t p; 1094 1095 p = (uintptr_t)(note + 1); 1096 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1097 1098 desc = (const Elf32_Word *)p; 1099 if (desc[0] != GNULINUX_ABI_DESC) 1100 return (FALSE); 1101 1102 /* 1103 * For linux we encode osrel as follows (see linux_mib.c): 1104 * VVVMMMIII (version, major, minor), see linux_mib.c. 1105 */ 1106 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1107 1108 return (TRUE); 1109} 1110 1111static Elf_Brandnote linux32_brandnote = { 1112 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1113 .hdr.n_descsz = 16, /* XXX at least 16 */ 1114 .hdr.n_type = 1, 1115 .vendor = GNU_ABI_VENDOR, 1116 .flags = BN_TRANSLATE_OSREL, 1117 .trans_osrel = linux32_trans_osrel 1118}; 1119 1120static Elf32_Brandinfo linux_brand = { 1121 .brand = ELFOSABI_LINUX, 1122 .machine = EM_386, 1123 .compat_3_brand = "Linux", 1124 .emul_path = "/compat/linux", 1125 .interp_path = "/lib/ld-linux.so.1", 1126 .sysvec = &elf_linux_sysvec, 1127 .interp_newpath = NULL, 1128 .brand_note = &linux32_brandnote, 1129 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1130}; 1131 1132static Elf32_Brandinfo linux_glibc2brand = { 1133 .brand = ELFOSABI_LINUX, 1134 .machine = EM_386, 1135 .compat_3_brand = "Linux", 1136 .emul_path = "/compat/linux", 1137 .interp_path = "/lib/ld-linux.so.2", 1138 .sysvec = &elf_linux_sysvec, 1139 .interp_newpath = NULL, 1140 .brand_note = &linux32_brandnote, 1141 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1142}; 1143 1144Elf32_Brandinfo *linux_brandlist[] = { 1145 &linux_brand, 1146 &linux_glibc2brand, 1147 NULL 1148}; 1149 1150static int 1151linux_elf_modevent(module_t mod, int type, void *data) 1152{ 1153 Elf32_Brandinfo **brandinfo; 1154 int error; 1155 struct linux_ioctl_handler **lihp; 1156 struct linux_device_handler **ldhp; 1157 1158 error = 0; 1159 1160 switch(type) { 1161 case MOD_LOAD: 1162 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1163 ++brandinfo) 1164 if (elf32_insert_brand_entry(*brandinfo) < 0) 1165 error = EINVAL; 1166 if (error == 0) { 1167 SET_FOREACH(lihp, linux_ioctl_handler_set) 1168 linux_ioctl_register_handler(*lihp); 1169 SET_FOREACH(ldhp, linux_device_handler_set) 1170 linux_device_register_handler(*ldhp); 1171 LIST_INIT(&futex_list); 1172 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1173 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1174 linux_proc_exit, NULL, 1000); 1175 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1176 linux_proc_exec, NULL, 1000); 1177 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1178 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1179 linux_osd_jail_register(); 1180 stclohz = (stathz ? stathz : hz); 1181 if (bootverbose) 1182 printf("Linux ELF exec handler installed\n"); 1183 } else 1184 printf("cannot insert Linux ELF brand handler\n"); 1185 break; 1186 case MOD_UNLOAD: 1187 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1188 ++brandinfo) 1189 if (elf32_brand_inuse(*brandinfo)) 1190 error = EBUSY; 1191 if (error == 0) { 1192 for (brandinfo = &linux_brandlist[0]; 1193 *brandinfo != NULL; ++brandinfo) 1194 if (elf32_remove_brand_entry(*brandinfo) < 0) 1195 error = EINVAL; 1196 } 1197 if (error == 0) { 1198 SET_FOREACH(lihp, linux_ioctl_handler_set) 1199 linux_ioctl_unregister_handler(*lihp); 1200 SET_FOREACH(ldhp, linux_device_handler_set) 1201 linux_device_unregister_handler(*ldhp); 1202 mtx_destroy(&futex_mtx); 1203 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1204 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1205 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1206 linux_osd_jail_deregister(); 1207 if (bootverbose) 1208 printf("Linux ELF exec handler removed\n"); 1209 } else 1210 printf("Could not deinstall ELF interpreter entry\n"); 1211 break; 1212 default: 1213 return (EOPNOTSUPP); 1214 } 1215 return (error); 1216} 1217 1218static moduledata_t linux_elf_mod = { 1219 "linuxelf", 1220 linux_elf_modevent, 1221 0 1222}; 1223 1224DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1225