linux32_sysvec.c revision 293514
1/*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S��ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293514 2016-01-09 15:44:38Z dchagin $"); 35#include "opt_compat.h" 36 37#ifndef COMPAT_FREEBSD32 38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39#endif 40 41#define __ELF_WORD_SIZE 32 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/exec.h> 46#include <sys/fcntl.h> 47#include <sys/imgact.h> 48#include <sys/imgact_elf.h> 49#include <sys/kernel.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/module.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/resourcevar.h> 56#include <sys/signalvar.h> 57#include <sys/sysctl.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysent.h> 60#include <sys/sysproto.h> 61#include <sys/vnode.h> 62#include <sys/eventhandler.h> 63 64#include <vm/vm.h> 65#include <vm/pmap.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_map.h> 68#include <vm/vm_object.h> 69#include <vm/vm_page.h> 70#include <vm/vm_param.h> 71 72#include <machine/cpu.h> 73#include <machine/md_var.h> 74#include <machine/pcb.h> 75#include <machine/specialreg.h> 76 77#include <amd64/linux32/linux.h> 78#include <amd64/linux32/linux32_proto.h> 79#include <compat/linux/linux_emul.h> 80#include <compat/linux/linux_futex.h> 81#include <compat/linux/linux_ioctl.h> 82#include <compat/linux/linux_mib.h> 83#include <compat/linux/linux_misc.h> 84#include <compat/linux/linux_signal.h> 85#include <compat/linux/linux_util.h> 86#include <compat/linux/linux_vdso.h> 87 88MODULE_VERSION(linux, 1); 89 90MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 91 92#define AUXARGS_ENTRY_32(pos, id, val) \ 93 do { \ 94 suword32(pos++, id); \ 95 suword32(pos++, val); \ 96 } while (0) 97 98#if BYTE_ORDER == LITTLE_ENDIAN 99#define SHELLMAGIC 0x2123 /* #! */ 100#else 101#define SHELLMAGIC 0x2321 102#endif 103 104/* 105 * Allow the sendsig functions to use the ldebug() facility 106 * even though they are not syscalls themselves. Map them 107 * to syscall 0. This is slightly less bogus than using 108 * ldebug(sigreturn). 109 */ 110#define LINUX_SYS_linux_rt_sendsig 0 111#define LINUX_SYS_linux_sendsig 0 112 113const char *linux_platform = "i686"; 114static int linux_szplatform; 115static int linux_szsigcode; 116static vm_object_t linux_shared_page_obj; 117static char *linux_shared_page_mapping; 118extern char _binary_linux32_locore_o_start; 119extern char _binary_linux32_locore_o_end; 120 121extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 122 123SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 124SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 125 126static int elf_linux_fixup(register_t **stack_base, 127 struct image_params *iparams); 128static register_t *linux_copyout_strings(struct image_params *imgp); 129static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 130static void exec_linux_setregs(struct thread *td, 131 struct image_params *imgp, u_long stack); 132static void linux32_fixlimit(struct rlimit *rl, int which); 133static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 134static void linux_vdso_install(void *param); 135static void linux_vdso_deinstall(void *param); 136 137static eventhandler_tag linux_exit_tag; 138static eventhandler_tag linux_exec_tag; 139static eventhandler_tag linux_thread_dtor_tag; 140 141/* 142 * Linux syscalls return negative errno's, we do positive and map them 143 * Reference: 144 * FreeBSD: src/sys/sys/errno.h 145 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 146 * linux-2.6.17.8/include/asm-generic/errno.h 147 */ 148static int bsd_to_linux_errno[ELAST + 1] = { 149 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 150 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 151 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 152 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 153 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 154 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 155 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 156 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 157 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 158 -72, -67, -71 159}; 160 161int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 162 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 163 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 164 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 165 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 166 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 167 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 168 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 169 0, LINUX_SIGUSR1, LINUX_SIGUSR2 170}; 171 172int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 173 SIGHUP, SIGINT, SIGQUIT, SIGILL, 174 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 175 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 176 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 177 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 178 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 179 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 180 SIGIO, SIGURG, SIGSYS 181}; 182 183#define LINUX_T_UNKNOWN 255 184static int _bsd_to_linux_trapcode[] = { 185 LINUX_T_UNKNOWN, /* 0 */ 186 6, /* 1 T_PRIVINFLT */ 187 LINUX_T_UNKNOWN, /* 2 */ 188 3, /* 3 T_BPTFLT */ 189 LINUX_T_UNKNOWN, /* 4 */ 190 LINUX_T_UNKNOWN, /* 5 */ 191 16, /* 6 T_ARITHTRAP */ 192 254, /* 7 T_ASTFLT */ 193 LINUX_T_UNKNOWN, /* 8 */ 194 13, /* 9 T_PROTFLT */ 195 1, /* 10 T_TRCTRAP */ 196 LINUX_T_UNKNOWN, /* 11 */ 197 14, /* 12 T_PAGEFLT */ 198 LINUX_T_UNKNOWN, /* 13 */ 199 17, /* 14 T_ALIGNFLT */ 200 LINUX_T_UNKNOWN, /* 15 */ 201 LINUX_T_UNKNOWN, /* 16 */ 202 LINUX_T_UNKNOWN, /* 17 */ 203 0, /* 18 T_DIVIDE */ 204 2, /* 19 T_NMI */ 205 4, /* 20 T_OFLOW */ 206 5, /* 21 T_BOUND */ 207 7, /* 22 T_DNA */ 208 8, /* 23 T_DOUBLEFLT */ 209 9, /* 24 T_FPOPFLT */ 210 10, /* 25 T_TSSFLT */ 211 11, /* 26 T_SEGNPFLT */ 212 12, /* 27 T_STKFLT */ 213 18, /* 28 T_MCHK */ 214 19, /* 29 T_XMMFLT */ 215 15 /* 30 T_RESERVED */ 216}; 217#define bsd_to_linux_trapcode(code) \ 218 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 219 _bsd_to_linux_trapcode[(code)]: \ 220 LINUX_T_UNKNOWN) 221 222struct linux32_ps_strings { 223 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 224 u_int ps_nargvstr; /* the number of argument strings */ 225 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 226 u_int ps_nenvstr; /* the number of environment strings */ 227}; 228 229LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 230LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 231LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 232 233/* 234 * If FreeBSD & Linux have a difference of opinion about what a trap 235 * means, deal with it here. 236 * 237 * MPSAFE 238 */ 239static int 240translate_traps(int signal, int trap_code) 241{ 242 if (signal != SIGBUS) 243 return signal; 244 switch (trap_code) { 245 case T_PROTFLT: 246 case T_TSSFLT: 247 case T_DOUBLEFLT: 248 case T_PAGEFLT: 249 return SIGSEGV; 250 default: 251 return signal; 252 } 253} 254 255static int 256elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 257{ 258 Elf32_Auxargs *args; 259 Elf32_Addr *base; 260 Elf32_Addr *pos, *uplatform; 261 struct linux32_ps_strings *arginfo; 262 263 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 264 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); 265 266 KASSERT(curthread->td_proc == imgp->proc, 267 ("unsafe elf_linux_fixup(), should be curproc")); 268 base = (Elf32_Addr *)*stack_base; 269 args = (Elf32_Auxargs *)imgp->auxargs; 270 pos = base + (imgp->args->argc + imgp->args->envc + 2); 271 272 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 273 imgp->proc->p_sysent->sv_shared_page_base); 274 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 275 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 276 277 /* 278 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 279 * as it has appeared in the 2.4.0-rc7 first time. 280 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 281 * glibc falls back to the hard-coded CLK_TCK value when aux entry 282 * is not present. 283 * Also see linux_times() implementation. 284 */ 285 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 286 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 287 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 288 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 289 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 290 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 291 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 292 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 293 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 294 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 295 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 296 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 297 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 298 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 299 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 300 if (args->execfd != -1) 301 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 302 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 303 304 free(imgp->auxargs, M_TEMP); 305 imgp->auxargs = NULL; 306 307 base--; 308 suword32(base, (uint32_t)imgp->args->argc); 309 *stack_base = (register_t *)base; 310 return (0); 311} 312 313static void 314linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 315{ 316 struct thread *td = curthread; 317 struct proc *p = td->td_proc; 318 struct sigacts *psp; 319 struct trapframe *regs; 320 struct l_rt_sigframe *fp, frame; 321 int oonstack; 322 int sig; 323 int code; 324 325 sig = ksi->ksi_signo; 326 code = ksi->ksi_code; 327 PROC_LOCK_ASSERT(p, MA_OWNED); 328 psp = p->p_sigacts; 329 mtx_assert(&psp->ps_mtx, MA_OWNED); 330 regs = td->td_frame; 331 oonstack = sigonstack(regs->tf_rsp); 332 333#ifdef DEBUG 334 if (ldebug(rt_sendsig)) 335 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 336 catcher, sig, (void*)mask, code); 337#endif 338 /* 339 * Allocate space for the signal handler context. 340 */ 341 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 342 SIGISMEMBER(psp->ps_sigonstack, sig)) { 343 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 344 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 345 } else 346 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 347 mtx_unlock(&psp->ps_mtx); 348 349 /* 350 * Build the argument list for the signal handler. 351 */ 352 if (p->p_sysent->sv_sigtbl) 353 if (sig <= p->p_sysent->sv_sigsize) 354 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 355 356 bzero(&frame, sizeof(frame)); 357 358 frame.sf_handler = PTROUT(catcher); 359 frame.sf_sig = sig; 360 frame.sf_siginfo = PTROUT(&fp->sf_si); 361 frame.sf_ucontext = PTROUT(&fp->sf_sc); 362 363 /* Fill in POSIX parts */ 364 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 365 366 /* 367 * Build the signal context to be used by sigreturn 368 * and libgcc unwind. 369 */ 370 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 371 frame.sf_sc.uc_link = 0; /* XXX ??? */ 372 373 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 374 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 375 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 376 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 377 PROC_UNLOCK(p); 378 379 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 380 381 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 382 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 383 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 384 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 385 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 386 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 387 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 388 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 389 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 390 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 391 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 392 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 393 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 394 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 395 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 396 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 397 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 398 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 399 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 400 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 401 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 402 403#ifdef DEBUG 404 if (ldebug(rt_sendsig)) 405 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 406 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 407 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 408#endif 409 410 if (copyout(&frame, fp, sizeof(frame)) != 0) { 411 /* 412 * Process has trashed its stack; give it an illegal 413 * instruction to halt it in its tracks. 414 */ 415#ifdef DEBUG 416 if (ldebug(rt_sendsig)) 417 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 418 fp, oonstack); 419#endif 420 PROC_LOCK(p); 421 sigexit(td, SIGILL); 422 } 423 424 /* 425 * Build context to run handler in. 426 */ 427 regs->tf_rsp = PTROUT(fp); 428 regs->tf_rip = linux32_rt_sigcode; 429 regs->tf_rflags &= ~(PSL_T | PSL_D); 430 regs->tf_cs = _ucode32sel; 431 regs->tf_ss = _udatasel; 432 regs->tf_ds = _udatasel; 433 regs->tf_es = _udatasel; 434 regs->tf_fs = _ufssel; 435 regs->tf_gs = _ugssel; 436 regs->tf_flags = TF_HASSEGS; 437 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 438 PROC_LOCK(p); 439 mtx_lock(&psp->ps_mtx); 440} 441 442 443/* 444 * Send an interrupt to process. 445 * 446 * Stack is set up to allow sigcode stored 447 * in u. to call routine, followed by kcall 448 * to sigreturn routine below. After sigreturn 449 * resets the signal mask, the stack, and the 450 * frame pointer, it returns to the user 451 * specified pc, psl. 452 */ 453static void 454linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 455{ 456 struct thread *td = curthread; 457 struct proc *p = td->td_proc; 458 struct sigacts *psp; 459 struct trapframe *regs; 460 struct l_sigframe *fp, frame; 461 l_sigset_t lmask; 462 int oonstack, i; 463 int sig, code; 464 465 sig = ksi->ksi_signo; 466 code = ksi->ksi_code; 467 PROC_LOCK_ASSERT(p, MA_OWNED); 468 psp = p->p_sigacts; 469 mtx_assert(&psp->ps_mtx, MA_OWNED); 470 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 471 /* Signal handler installed with SA_SIGINFO. */ 472 linux_rt_sendsig(catcher, ksi, mask); 473 return; 474 } 475 476 regs = td->td_frame; 477 oonstack = sigonstack(regs->tf_rsp); 478 479#ifdef DEBUG 480 if (ldebug(sendsig)) 481 printf(ARGS(sendsig, "%p, %d, %p, %u"), 482 catcher, sig, (void*)mask, code); 483#endif 484 485 /* 486 * Allocate space for the signal handler context. 487 */ 488 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 489 SIGISMEMBER(psp->ps_sigonstack, sig)) { 490 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 491 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 492 } else 493 fp = (struct l_sigframe *)regs->tf_rsp - 1; 494 mtx_unlock(&psp->ps_mtx); 495 PROC_UNLOCK(p); 496 497 /* 498 * Build the argument list for the signal handler. 499 */ 500 if (p->p_sysent->sv_sigtbl) 501 if (sig <= p->p_sysent->sv_sigsize) 502 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 503 504 bzero(&frame, sizeof(frame)); 505 506 frame.sf_handler = PTROUT(catcher); 507 frame.sf_sig = sig; 508 509 bsd_to_linux_sigset(mask, &lmask); 510 511 /* 512 * Build the signal context to be used by sigreturn. 513 */ 514 frame.sf_sc.sc_mask = lmask.__bits[0]; 515 frame.sf_sc.sc_gs = regs->tf_gs; 516 frame.sf_sc.sc_fs = regs->tf_fs; 517 frame.sf_sc.sc_es = regs->tf_es; 518 frame.sf_sc.sc_ds = regs->tf_ds; 519 frame.sf_sc.sc_edi = regs->tf_rdi; 520 frame.sf_sc.sc_esi = regs->tf_rsi; 521 frame.sf_sc.sc_ebp = regs->tf_rbp; 522 frame.sf_sc.sc_ebx = regs->tf_rbx; 523 frame.sf_sc.sc_esp = regs->tf_rsp; 524 frame.sf_sc.sc_edx = regs->tf_rdx; 525 frame.sf_sc.sc_ecx = regs->tf_rcx; 526 frame.sf_sc.sc_eax = regs->tf_rax; 527 frame.sf_sc.sc_eip = regs->tf_rip; 528 frame.sf_sc.sc_cs = regs->tf_cs; 529 frame.sf_sc.sc_eflags = regs->tf_rflags; 530 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 531 frame.sf_sc.sc_ss = regs->tf_ss; 532 frame.sf_sc.sc_err = regs->tf_err; 533 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 534 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 535 536 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 537 frame.sf_extramask[i] = lmask.__bits[i+1]; 538 539 if (copyout(&frame, fp, sizeof(frame)) != 0) { 540 /* 541 * Process has trashed its stack; give it an illegal 542 * instruction to halt it in its tracks. 543 */ 544 PROC_LOCK(p); 545 sigexit(td, SIGILL); 546 } 547 548 /* 549 * Build context to run handler in. 550 */ 551 regs->tf_rsp = PTROUT(fp); 552 regs->tf_rip = linux32_sigcode; 553 regs->tf_rflags &= ~(PSL_T | PSL_D); 554 regs->tf_cs = _ucode32sel; 555 regs->tf_ss = _udatasel; 556 regs->tf_ds = _udatasel; 557 regs->tf_es = _udatasel; 558 regs->tf_fs = _ufssel; 559 regs->tf_gs = _ugssel; 560 regs->tf_flags = TF_HASSEGS; 561 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 562 PROC_LOCK(p); 563 mtx_lock(&psp->ps_mtx); 564} 565 566/* 567 * System call to cleanup state after a signal 568 * has been taken. Reset signal mask and 569 * stack state from context left by sendsig (above). 570 * Return to previous pc and psl as specified by 571 * context left by sendsig. Check carefully to 572 * make sure that the user has not modified the 573 * psl to gain improper privileges or to cause 574 * a machine fault. 575 */ 576int 577linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 578{ 579 struct l_sigframe frame; 580 struct trapframe *regs; 581 sigset_t bmask; 582 l_sigset_t lmask; 583 int eflags, i; 584 ksiginfo_t ksi; 585 586 regs = td->td_frame; 587 588#ifdef DEBUG 589 if (ldebug(sigreturn)) 590 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 591#endif 592 /* 593 * The trampoline code hands us the sigframe. 594 * It is unsafe to keep track of it ourselves, in the event that a 595 * program jumps out of a signal handler. 596 */ 597 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 598 return (EFAULT); 599 600 /* 601 * Check for security violations. 602 */ 603#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 604 eflags = frame.sf_sc.sc_eflags; 605 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 606 return(EINVAL); 607 608 /* 609 * Don't allow users to load a valid privileged %cs. Let the 610 * hardware check for invalid selectors, excess privilege in 611 * other selectors, invalid %eip's and invalid %esp's. 612 */ 613#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 614 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 615 ksiginfo_init_trap(&ksi); 616 ksi.ksi_signo = SIGBUS; 617 ksi.ksi_code = BUS_OBJERR; 618 ksi.ksi_trapno = T_PROTFLT; 619 ksi.ksi_addr = (void *)regs->tf_rip; 620 trapsignal(td, &ksi); 621 return(EINVAL); 622 } 623 624 lmask.__bits[0] = frame.sf_sc.sc_mask; 625 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 626 lmask.__bits[i+1] = frame.sf_extramask[i]; 627 linux_to_bsd_sigset(&lmask, &bmask); 628 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 629 630 /* 631 * Restore signal context. 632 */ 633 regs->tf_rdi = frame.sf_sc.sc_edi; 634 regs->tf_rsi = frame.sf_sc.sc_esi; 635 regs->tf_rbp = frame.sf_sc.sc_ebp; 636 regs->tf_rbx = frame.sf_sc.sc_ebx; 637 regs->tf_rdx = frame.sf_sc.sc_edx; 638 regs->tf_rcx = frame.sf_sc.sc_ecx; 639 regs->tf_rax = frame.sf_sc.sc_eax; 640 regs->tf_rip = frame.sf_sc.sc_eip; 641 regs->tf_cs = frame.sf_sc.sc_cs; 642 regs->tf_ds = frame.sf_sc.sc_ds; 643 regs->tf_es = frame.sf_sc.sc_es; 644 regs->tf_fs = frame.sf_sc.sc_fs; 645 regs->tf_gs = frame.sf_sc.sc_gs; 646 regs->tf_rflags = eflags; 647 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 648 regs->tf_ss = frame.sf_sc.sc_ss; 649 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 650 651 return (EJUSTRETURN); 652} 653 654/* 655 * System call to cleanup state after a signal 656 * has been taken. Reset signal mask and 657 * stack state from context left by rt_sendsig (above). 658 * Return to previous pc and psl as specified by 659 * context left by sendsig. Check carefully to 660 * make sure that the user has not modified the 661 * psl to gain improper privileges or to cause 662 * a machine fault. 663 */ 664int 665linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 666{ 667 struct l_ucontext uc; 668 struct l_sigcontext *context; 669 sigset_t bmask; 670 l_stack_t *lss; 671 stack_t ss; 672 struct trapframe *regs; 673 int eflags; 674 ksiginfo_t ksi; 675 676 regs = td->td_frame; 677 678#ifdef DEBUG 679 if (ldebug(rt_sigreturn)) 680 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 681#endif 682 /* 683 * The trampoline code hands us the ucontext. 684 * It is unsafe to keep track of it ourselves, in the event that a 685 * program jumps out of a signal handler. 686 */ 687 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 688 return (EFAULT); 689 690 context = &uc.uc_mcontext; 691 692 /* 693 * Check for security violations. 694 */ 695#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 696 eflags = context->sc_eflags; 697 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 698 return(EINVAL); 699 700 /* 701 * Don't allow users to load a valid privileged %cs. Let the 702 * hardware check for invalid selectors, excess privilege in 703 * other selectors, invalid %eip's and invalid %esp's. 704 */ 705#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 706 if (!CS_SECURE(context->sc_cs)) { 707 ksiginfo_init_trap(&ksi); 708 ksi.ksi_signo = SIGBUS; 709 ksi.ksi_code = BUS_OBJERR; 710 ksi.ksi_trapno = T_PROTFLT; 711 ksi.ksi_addr = (void *)regs->tf_rip; 712 trapsignal(td, &ksi); 713 return(EINVAL); 714 } 715 716 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 717 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 718 719 /* 720 * Restore signal context 721 */ 722 regs->tf_gs = context->sc_gs; 723 regs->tf_fs = context->sc_fs; 724 regs->tf_es = context->sc_es; 725 regs->tf_ds = context->sc_ds; 726 regs->tf_rdi = context->sc_edi; 727 regs->tf_rsi = context->sc_esi; 728 regs->tf_rbp = context->sc_ebp; 729 regs->tf_rbx = context->sc_ebx; 730 regs->tf_rdx = context->sc_edx; 731 regs->tf_rcx = context->sc_ecx; 732 regs->tf_rax = context->sc_eax; 733 regs->tf_rip = context->sc_eip; 734 regs->tf_cs = context->sc_cs; 735 regs->tf_rflags = eflags; 736 regs->tf_rsp = context->sc_esp_at_signal; 737 regs->tf_ss = context->sc_ss; 738 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 739 740 /* 741 * call sigaltstack & ignore results.. 742 */ 743 lss = &uc.uc_stack; 744 ss.ss_sp = PTRIN(lss->ss_sp); 745 ss.ss_size = lss->ss_size; 746 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 747 748#ifdef DEBUG 749 if (ldebug(rt_sigreturn)) 750 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 751 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 752#endif 753 (void)kern_sigaltstack(td, &ss, NULL); 754 755 return (EJUSTRETURN); 756} 757 758static int 759linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 760{ 761 struct proc *p; 762 struct trapframe *frame; 763 764 p = td->td_proc; 765 frame = td->td_frame; 766 767 sa->args[0] = frame->tf_rbx; 768 sa->args[1] = frame->tf_rcx; 769 sa->args[2] = frame->tf_rdx; 770 sa->args[3] = frame->tf_rsi; 771 sa->args[4] = frame->tf_rdi; 772 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 773 sa->code = frame->tf_rax; 774 775 if (sa->code >= p->p_sysent->sv_size) 776 sa->callp = &p->p_sysent->sv_table[0]; 777 else 778 sa->callp = &p->p_sysent->sv_table[sa->code]; 779 sa->narg = sa->callp->sy_narg; 780 781 td->td_retval[0] = 0; 782 td->td_retval[1] = frame->tf_rdx; 783 784 return (0); 785} 786 787/* 788 * If a linux binary is exec'ing something, try this image activator 789 * first. We override standard shell script execution in order to 790 * be able to modify the interpreter path. We only do this if a linux 791 * binary is doing the exec, so we do not create an EXEC module for it. 792 */ 793static int exec_linux_imgact_try(struct image_params *iparams); 794 795static int 796exec_linux_imgact_try(struct image_params *imgp) 797{ 798 const char *head = (const char *)imgp->image_header; 799 char *rpath; 800 int error = -1; 801 802 /* 803 * The interpreter for shell scripts run from a linux binary needs 804 * to be located in /compat/linux if possible in order to recursively 805 * maintain linux path emulation. 806 */ 807 if (((const short *)head)[0] == SHELLMAGIC) { 808 /* 809 * Run our normal shell image activator. If it succeeds attempt 810 * to use the alternate path for the interpreter. If an 811 * alternate * path is found, use our stringspace to store it. 812 */ 813 if ((error = exec_shell_imgact(imgp)) == 0) { 814 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 815 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 816 AT_FDCWD); 817 if (rpath != NULL) 818 imgp->args->fname_buf = 819 imgp->interpreter_name = rpath; 820 } 821 } 822 return (error); 823} 824 825/* 826 * Clear registers on exec 827 * XXX copied from ia32_signal.c. 828 */ 829static void 830exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 831{ 832 struct trapframe *regs = td->td_frame; 833 struct pcb *pcb = td->td_pcb; 834 835 mtx_lock(&dt_lock); 836 if (td->td_proc->p_md.md_ldt != NULL) 837 user_ldt_free(td); 838 else 839 mtx_unlock(&dt_lock); 840 841 critical_enter(); 842 wrmsr(MSR_FSBASE, 0); 843 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 844 pcb->pcb_fsbase = 0; 845 pcb->pcb_gsbase = 0; 846 critical_exit(); 847 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 848 849 bzero((char *)regs, sizeof(struct trapframe)); 850 regs->tf_rip = imgp->entry_addr; 851 regs->tf_rsp = stack; 852 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 853 regs->tf_gs = _ugssel; 854 regs->tf_fs = _ufssel; 855 regs->tf_es = _udatasel; 856 regs->tf_ds = _udatasel; 857 regs->tf_ss = _udatasel; 858 regs->tf_flags = TF_HASSEGS; 859 regs->tf_cs = _ucode32sel; 860 regs->tf_rbx = imgp->ps_strings; 861 862 fpstate_drop(td); 863 864 /* Do full restore on return so that we can change to a different %cs */ 865 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 866 td->td_retval[1] = 0; 867} 868 869/* 870 * XXX copied from ia32_sysvec.c. 871 */ 872static register_t * 873linux_copyout_strings(struct image_params *imgp) 874{ 875 int argc, envc; 876 u_int32_t *vectp; 877 char *stringp, *destp; 878 u_int32_t *stack_base; 879 struct linux32_ps_strings *arginfo; 880 881 /* 882 * Calculate string base and vector table pointers. 883 * Also deal with signal trampoline code for this exec type. 884 */ 885 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 886 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - 887 roundup((ARG_MAX - imgp->args->stringspace), 888 sizeof(char *)); 889 890 /* 891 * Install LINUX_PLATFORM 892 */ 893 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform), 894 linux_szplatform); 895 896 /* 897 * If we have a valid auxargs ptr, prepare some room 898 * on the stack. 899 */ 900 if (imgp->auxargs) { 901 /* 902 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 903 * lower compatibility. 904 */ 905 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 906 (LINUX_AT_COUNT * 2); 907 /* 908 * The '+ 2' is for the null pointers at the end of each of 909 * the arg and env vector sets,and imgp->auxarg_size is room 910 * for argument of Runtime loader. 911 */ 912 vectp = (u_int32_t *) (destp - (imgp->args->argc + 913 imgp->args->envc + 2 + imgp->auxarg_size) * 914 sizeof(u_int32_t)); 915 916 } else 917 /* 918 * The '+ 2' is for the null pointers at the end of each of 919 * the arg and env vector sets 920 */ 921 vectp = (u_int32_t *)(destp - (imgp->args->argc + 922 imgp->args->envc + 2) * sizeof(u_int32_t)); 923 924 /* 925 * vectp also becomes our initial stack base 926 */ 927 stack_base = vectp; 928 929 stringp = imgp->args->begin_argv; 930 argc = imgp->args->argc; 931 envc = imgp->args->envc; 932 /* 933 * Copy out strings - arguments and environment. 934 */ 935 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 936 937 /* 938 * Fill in "ps_strings" struct for ps, w, etc. 939 */ 940 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 941 suword32(&arginfo->ps_nargvstr, argc); 942 943 /* 944 * Fill in argument portion of vector table. 945 */ 946 for (; argc > 0; --argc) { 947 suword32(vectp++, (uint32_t)(intptr_t)destp); 948 while (*stringp++ != 0) 949 destp++; 950 destp++; 951 } 952 953 /* a null vector table pointer separates the argp's from the envp's */ 954 suword32(vectp++, 0); 955 956 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 957 suword32(&arginfo->ps_nenvstr, envc); 958 959 /* 960 * Fill in environment portion of vector table. 961 */ 962 for (; envc > 0; --envc) { 963 suword32(vectp++, (uint32_t)(intptr_t)destp); 964 while (*stringp++ != 0) 965 destp++; 966 destp++; 967 } 968 969 /* end of vector table is a null pointer */ 970 suword32(vectp, 0); 971 972 return ((register_t *)stack_base); 973} 974 975static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 976 "32-bit Linux emulation"); 977 978static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 979SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 980 &linux32_maxdsiz, 0, ""); 981static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 982SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 983 &linux32_maxssiz, 0, ""); 984static u_long linux32_maxvmem = LINUX32_MAXVMEM; 985SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 986 &linux32_maxvmem, 0, ""); 987 988static void 989linux32_fixlimit(struct rlimit *rl, int which) 990{ 991 992 switch (which) { 993 case RLIMIT_DATA: 994 if (linux32_maxdsiz != 0) { 995 if (rl->rlim_cur > linux32_maxdsiz) 996 rl->rlim_cur = linux32_maxdsiz; 997 if (rl->rlim_max > linux32_maxdsiz) 998 rl->rlim_max = linux32_maxdsiz; 999 } 1000 break; 1001 case RLIMIT_STACK: 1002 if (linux32_maxssiz != 0) { 1003 if (rl->rlim_cur > linux32_maxssiz) 1004 rl->rlim_cur = linux32_maxssiz; 1005 if (rl->rlim_max > linux32_maxssiz) 1006 rl->rlim_max = linux32_maxssiz; 1007 } 1008 break; 1009 case RLIMIT_VMEM: 1010 if (linux32_maxvmem != 0) { 1011 if (rl->rlim_cur > linux32_maxvmem) 1012 rl->rlim_cur = linux32_maxvmem; 1013 if (rl->rlim_max > linux32_maxvmem) 1014 rl->rlim_max = linux32_maxvmem; 1015 } 1016 break; 1017 } 1018} 1019 1020struct sysentvec elf_linux_sysvec = { 1021 .sv_size = LINUX_SYS_MAXSYSCALL, 1022 .sv_table = linux_sysent, 1023 .sv_mask = 0, 1024 .sv_sigsize = LINUX_SIGTBLSZ, 1025 .sv_sigtbl = bsd_to_linux_signal, 1026 .sv_errsize = ELAST + 1, 1027 .sv_errtbl = bsd_to_linux_errno, 1028 .sv_transtrap = translate_traps, 1029 .sv_fixup = elf_linux_fixup, 1030 .sv_sendsig = linux_sendsig, 1031 .sv_sigcode = &_binary_linux32_locore_o_start, 1032 .sv_szsigcode = &linux_szsigcode, 1033 .sv_prepsyscall = NULL, 1034 .sv_name = "Linux ELF32", 1035 .sv_coredump = elf32_coredump, 1036 .sv_imgact_try = exec_linux_imgact_try, 1037 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1038 .sv_pagesize = PAGE_SIZE, 1039 .sv_minuser = VM_MIN_ADDRESS, 1040 .sv_maxuser = LINUX32_MAXUSER, 1041 .sv_usrstack = LINUX32_USRSTACK, 1042 .sv_psstrings = LINUX32_PS_STRINGS, 1043 .sv_stackprot = VM_PROT_ALL, 1044 .sv_copyout_strings = linux_copyout_strings, 1045 .sv_setregs = exec_linux_setregs, 1046 .sv_fixlimit = linux32_fixlimit, 1047 .sv_maxssiz = &linux32_maxssiz, 1048 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1049 .sv_set_syscall_retval = cpu_set_syscall_retval, 1050 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1051 .sv_syscallnames = NULL, 1052 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1053 .sv_shared_page_len = PAGE_SIZE, 1054 .sv_schedtail = linux_schedtail, 1055 .sv_thread_detach = linux_thread_detach, 1056}; 1057 1058static void 1059linux_vdso_install(void *param) 1060{ 1061 1062 linux_szsigcode = (&_binary_linux32_locore_o_end - 1063 &_binary_linux32_locore_o_start); 1064 1065 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1066 panic("Linux invalid vdso size\n"); 1067 1068 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1069 1070 linux_shared_page_obj = __elfN(linux_shared_page_init) 1071 (&linux_shared_page_mapping); 1072 1073 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1074 1075 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1076 linux_szsigcode); 1077 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1078} 1079SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1080 (sysinit_cfunc_t)linux_vdso_install, NULL); 1081 1082static void 1083linux_vdso_deinstall(void *param) 1084{ 1085 1086 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1087}; 1088SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1089 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1090 1091static char GNU_ABI_VENDOR[] = "GNU"; 1092static int GNULINUX_ABI_DESC = 0; 1093 1094static boolean_t 1095linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1096{ 1097 const Elf32_Word *desc; 1098 uintptr_t p; 1099 1100 p = (uintptr_t)(note + 1); 1101 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1102 1103 desc = (const Elf32_Word *)p; 1104 if (desc[0] != GNULINUX_ABI_DESC) 1105 return (FALSE); 1106 1107 /* 1108 * For linux we encode osrel as follows (see linux_mib.c): 1109 * VVVMMMIII (version, major, minor), see linux_mib.c. 1110 */ 1111 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1112 1113 return (TRUE); 1114} 1115 1116static Elf_Brandnote linux32_brandnote = { 1117 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1118 .hdr.n_descsz = 16, /* XXX at least 16 */ 1119 .hdr.n_type = 1, 1120 .vendor = GNU_ABI_VENDOR, 1121 .flags = BN_TRANSLATE_OSREL, 1122 .trans_osrel = linux32_trans_osrel 1123}; 1124 1125static Elf32_Brandinfo linux_brand = { 1126 .brand = ELFOSABI_LINUX, 1127 .machine = EM_386, 1128 .compat_3_brand = "Linux", 1129 .emul_path = "/compat/linux", 1130 .interp_path = "/lib/ld-linux.so.1", 1131 .sysvec = &elf_linux_sysvec, 1132 .interp_newpath = NULL, 1133 .brand_note = &linux32_brandnote, 1134 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1135}; 1136 1137static Elf32_Brandinfo linux_glibc2brand = { 1138 .brand = ELFOSABI_LINUX, 1139 .machine = EM_386, 1140 .compat_3_brand = "Linux", 1141 .emul_path = "/compat/linux", 1142 .interp_path = "/lib/ld-linux.so.2", 1143 .sysvec = &elf_linux_sysvec, 1144 .interp_newpath = NULL, 1145 .brand_note = &linux32_brandnote, 1146 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1147}; 1148 1149Elf32_Brandinfo *linux_brandlist[] = { 1150 &linux_brand, 1151 &linux_glibc2brand, 1152 NULL 1153}; 1154 1155static int 1156linux_elf_modevent(module_t mod, int type, void *data) 1157{ 1158 Elf32_Brandinfo **brandinfo; 1159 int error; 1160 struct linux_ioctl_handler **lihp; 1161 struct linux_device_handler **ldhp; 1162 1163 error = 0; 1164 1165 switch(type) { 1166 case MOD_LOAD: 1167 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1168 ++brandinfo) 1169 if (elf32_insert_brand_entry(*brandinfo) < 0) 1170 error = EINVAL; 1171 if (error == 0) { 1172 SET_FOREACH(lihp, linux_ioctl_handler_set) 1173 linux_ioctl_register_handler(*lihp); 1174 SET_FOREACH(ldhp, linux_device_handler_set) 1175 linux_device_register_handler(*ldhp); 1176 LIST_INIT(&futex_list); 1177 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1178 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1179 linux_proc_exit, NULL, 1000); 1180 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1181 linux_proc_exec, NULL, 1000); 1182 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1183 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1184 linux_szplatform = roundup(strlen(linux_platform) + 1, 1185 sizeof(char *)); 1186 linux_osd_jail_register(); 1187 stclohz = (stathz ? stathz : hz); 1188 if (bootverbose) 1189 printf("Linux ELF exec handler installed\n"); 1190 } else 1191 printf("cannot insert Linux ELF brand handler\n"); 1192 break; 1193 case MOD_UNLOAD: 1194 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1195 ++brandinfo) 1196 if (elf32_brand_inuse(*brandinfo)) 1197 error = EBUSY; 1198 if (error == 0) { 1199 for (brandinfo = &linux_brandlist[0]; 1200 *brandinfo != NULL; ++brandinfo) 1201 if (elf32_remove_brand_entry(*brandinfo) < 0) 1202 error = EINVAL; 1203 } 1204 if (error == 0) { 1205 SET_FOREACH(lihp, linux_ioctl_handler_set) 1206 linux_ioctl_unregister_handler(*lihp); 1207 SET_FOREACH(ldhp, linux_device_handler_set) 1208 linux_device_unregister_handler(*ldhp); 1209 mtx_destroy(&futex_mtx); 1210 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1211 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1212 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1213 linux_osd_jail_deregister(); 1214 if (bootverbose) 1215 printf("Linux ELF exec handler removed\n"); 1216 } else 1217 printf("Could not deinstall ELF interpreter entry\n"); 1218 break; 1219 default: 1220 return (EOPNOTSUPP); 1221 } 1222 return (error); 1223} 1224 1225static moduledata_t linux_elf_mod = { 1226 "linuxelf", 1227 linux_elf_modevent, 1228 0 1229}; 1230 1231DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1232