linux32_sysvec.c revision 293535
1/*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S��ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293535 2016-01-09 16:24:30Z dchagin $"); 35#include "opt_compat.h" 36 37#ifndef COMPAT_FREEBSD32 38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39#endif 40 41#define __ELF_WORD_SIZE 32 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/exec.h> 46#include <sys/fcntl.h> 47#include <sys/imgact.h> 48#include <sys/imgact_elf.h> 49#include <sys/kernel.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/module.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/resourcevar.h> 56#include <sys/signalvar.h> 57#include <sys/sysctl.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysent.h> 60#include <sys/sysproto.h> 61#include <sys/vnode.h> 62#include <sys/eventhandler.h> 63 64#include <vm/vm.h> 65#include <vm/pmap.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_map.h> 68#include <vm/vm_object.h> 69#include <vm/vm_page.h> 70#include <vm/vm_param.h> 71 72#include <machine/cpu.h> 73#include <machine/md_var.h> 74#include <machine/pcb.h> 75#include <machine/specialreg.h> 76 77#include <amd64/linux32/linux.h> 78#include <amd64/linux32/linux32_proto.h> 79#include <compat/linux/linux_emul.h> 80#include <compat/linux/linux_futex.h> 81#include <compat/linux/linux_ioctl.h> 82#include <compat/linux/linux_mib.h> 83#include <compat/linux/linux_misc.h> 84#include <compat/linux/linux_signal.h> 85#include <compat/linux/linux_util.h> 86#include <compat/linux/linux_vdso.h> 87 88MODULE_VERSION(linux, 1); 89 90#define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96#if BYTE_ORDER == LITTLE_ENDIAN 97#define SHELLMAGIC 0x2123 /* #! */ 98#else 99#define SHELLMAGIC 0x2321 100#endif 101 102/* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108#define LINUX_SYS_linux_rt_sendsig 0 109#define LINUX_SYS_linux_sendsig 0 110 111const char *linux_kplatform; 112static int linux_szsigcode; 113static vm_object_t linux_shared_page_obj; 114static char *linux_shared_page_mapping; 115extern char _binary_linux32_locore_o_start; 116extern char _binary_linux32_locore_o_end; 117 118extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 119 120SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 121 122static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124static register_t *linux_copyout_strings(struct image_params *imgp); 125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128static void linux32_fixlimit(struct rlimit *rl, int which); 129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130static void linux_vdso_install(void *param); 131static void linux_vdso_deinstall(void *param); 132 133/* 134 * Linux syscalls return negative errno's, we do positive and map them 135 * Reference: 136 * FreeBSD: src/sys/sys/errno.h 137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 138 * linux-2.6.17.8/include/asm-generic/errno.h 139 */ 140static int bsd_to_linux_errno[ELAST + 1] = { 141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 150 -72, -67, -71 151}; 152 153int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 161 0, LINUX_SIGUSR1, LINUX_SIGUSR2 162}; 163 164int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 165 SIGHUP, SIGINT, SIGQUIT, SIGILL, 166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 172 SIGIO, SIGURG, SIGSYS 173}; 174 175#define LINUX_T_UNKNOWN 255 176static int _bsd_to_linux_trapcode[] = { 177 LINUX_T_UNKNOWN, /* 0 */ 178 6, /* 1 T_PRIVINFLT */ 179 LINUX_T_UNKNOWN, /* 2 */ 180 3, /* 3 T_BPTFLT */ 181 LINUX_T_UNKNOWN, /* 4 */ 182 LINUX_T_UNKNOWN, /* 5 */ 183 16, /* 6 T_ARITHTRAP */ 184 254, /* 7 T_ASTFLT */ 185 LINUX_T_UNKNOWN, /* 8 */ 186 13, /* 9 T_PROTFLT */ 187 1, /* 10 T_TRCTRAP */ 188 LINUX_T_UNKNOWN, /* 11 */ 189 14, /* 12 T_PAGEFLT */ 190 LINUX_T_UNKNOWN, /* 13 */ 191 17, /* 14 T_ALIGNFLT */ 192 LINUX_T_UNKNOWN, /* 15 */ 193 LINUX_T_UNKNOWN, /* 16 */ 194 LINUX_T_UNKNOWN, /* 17 */ 195 0, /* 18 T_DIVIDE */ 196 2, /* 19 T_NMI */ 197 4, /* 20 T_OFLOW */ 198 5, /* 21 T_BOUND */ 199 7, /* 22 T_DNA */ 200 8, /* 23 T_DOUBLEFLT */ 201 9, /* 24 T_FPOPFLT */ 202 10, /* 25 T_TSSFLT */ 203 11, /* 26 T_SEGNPFLT */ 204 12, /* 27 T_STKFLT */ 205 18, /* 28 T_MCHK */ 206 19, /* 29 T_XMMFLT */ 207 15 /* 30 T_RESERVED */ 208}; 209#define bsd_to_linux_trapcode(code) \ 210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 211 _bsd_to_linux_trapcode[(code)]: \ 212 LINUX_T_UNKNOWN) 213 214struct linux32_ps_strings { 215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 216 u_int ps_nargvstr; /* the number of argument strings */ 217 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 218 u_int ps_nenvstr; /* the number of environment strings */ 219}; 220 221LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 222LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 223LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 224LINUX_VDSO_SYM_CHAR(linux_platform); 225 226/* 227 * If FreeBSD & Linux have a difference of opinion about what a trap 228 * means, deal with it here. 229 * 230 * MPSAFE 231 */ 232static int 233translate_traps(int signal, int trap_code) 234{ 235 if (signal != SIGBUS) 236 return signal; 237 switch (trap_code) { 238 case T_PROTFLT: 239 case T_TSSFLT: 240 case T_DOUBLEFLT: 241 case T_PAGEFLT: 242 return SIGSEGV; 243 default: 244 return signal; 245 } 246} 247 248static int 249elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 250{ 251 Elf32_Auxargs *args; 252 Elf32_Addr *base; 253 Elf32_Addr *pos; 254 struct linux32_ps_strings *arginfo; 255 256 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 257 258 KASSERT(curthread->td_proc == imgp->proc, 259 ("unsafe elf_linux_fixup(), should be curproc")); 260 base = (Elf32_Addr *)*stack_base; 261 args = (Elf32_Auxargs *)imgp->auxargs; 262 pos = base + (imgp->args->argc + imgp->args->envc + 2); 263 264 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 265 imgp->proc->p_sysent->sv_shared_page_base); 266 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 267 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 268 269 /* 270 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 271 * as it has appeared in the 2.4.0-rc7 first time. 272 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 273 * glibc falls back to the hard-coded CLK_TCK value when aux entry 274 * is not present. 275 * Also see linux_times() implementation. 276 */ 277 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 278 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 279 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 280 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 281 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 282 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 283 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 284 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 285 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 286 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 287 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 288 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 289 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 290 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 291 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 292 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); 293 if (imgp->execpathp != 0) 294 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); 295 if (args->execfd != -1) 296 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 297 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 298 299 free(imgp->auxargs, M_TEMP); 300 imgp->auxargs = NULL; 301 302 base--; 303 suword32(base, (uint32_t)imgp->args->argc); 304 *stack_base = (register_t *)base; 305 return (0); 306} 307 308static void 309linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 310{ 311 struct thread *td = curthread; 312 struct proc *p = td->td_proc; 313 struct sigacts *psp; 314 struct trapframe *regs; 315 struct l_rt_sigframe *fp, frame; 316 int oonstack; 317 int sig; 318 int code; 319 320 sig = ksi->ksi_signo; 321 code = ksi->ksi_code; 322 PROC_LOCK_ASSERT(p, MA_OWNED); 323 psp = p->p_sigacts; 324 mtx_assert(&psp->ps_mtx, MA_OWNED); 325 regs = td->td_frame; 326 oonstack = sigonstack(regs->tf_rsp); 327 328#ifdef DEBUG 329 if (ldebug(rt_sendsig)) 330 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 331 catcher, sig, (void*)mask, code); 332#endif 333 /* 334 * Allocate space for the signal handler context. 335 */ 336 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 337 SIGISMEMBER(psp->ps_sigonstack, sig)) { 338 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 339 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 340 } else 341 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 342 mtx_unlock(&psp->ps_mtx); 343 344 /* 345 * Build the argument list for the signal handler. 346 */ 347 if (p->p_sysent->sv_sigtbl) 348 if (sig <= p->p_sysent->sv_sigsize) 349 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 350 351 bzero(&frame, sizeof(frame)); 352 353 frame.sf_handler = PTROUT(catcher); 354 frame.sf_sig = sig; 355 frame.sf_siginfo = PTROUT(&fp->sf_si); 356 frame.sf_ucontext = PTROUT(&fp->sf_sc); 357 358 /* Fill in POSIX parts */ 359 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 360 361 /* 362 * Build the signal context to be used by sigreturn 363 * and libgcc unwind. 364 */ 365 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 366 frame.sf_sc.uc_link = 0; /* XXX ??? */ 367 368 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 369 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 370 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 371 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 372 PROC_UNLOCK(p); 373 374 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 375 376 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 377 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 378 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 379 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 380 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 381 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 382 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 383 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 384 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 385 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 386 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 387 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 388 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 389 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 390 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 391 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 392 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 393 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 394 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 395 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 396 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 397 398#ifdef DEBUG 399 if (ldebug(rt_sendsig)) 400 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 401 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 402 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 403#endif 404 405 if (copyout(&frame, fp, sizeof(frame)) != 0) { 406 /* 407 * Process has trashed its stack; give it an illegal 408 * instruction to halt it in its tracks. 409 */ 410#ifdef DEBUG 411 if (ldebug(rt_sendsig)) 412 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 413 fp, oonstack); 414#endif 415 PROC_LOCK(p); 416 sigexit(td, SIGILL); 417 } 418 419 /* 420 * Build context to run handler in. 421 */ 422 regs->tf_rsp = PTROUT(fp); 423 regs->tf_rip = linux32_rt_sigcode; 424 regs->tf_rflags &= ~(PSL_T | PSL_D); 425 regs->tf_cs = _ucode32sel; 426 regs->tf_ss = _udatasel; 427 regs->tf_ds = _udatasel; 428 regs->tf_es = _udatasel; 429 regs->tf_fs = _ufssel; 430 regs->tf_gs = _ugssel; 431 regs->tf_flags = TF_HASSEGS; 432 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 433 PROC_LOCK(p); 434 mtx_lock(&psp->ps_mtx); 435} 436 437 438/* 439 * Send an interrupt to process. 440 * 441 * Stack is set up to allow sigcode stored 442 * in u. to call routine, followed by kcall 443 * to sigreturn routine below. After sigreturn 444 * resets the signal mask, the stack, and the 445 * frame pointer, it returns to the user 446 * specified pc, psl. 447 */ 448static void 449linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 450{ 451 struct thread *td = curthread; 452 struct proc *p = td->td_proc; 453 struct sigacts *psp; 454 struct trapframe *regs; 455 struct l_sigframe *fp, frame; 456 l_sigset_t lmask; 457 int oonstack, i; 458 int sig, code; 459 460 sig = ksi->ksi_signo; 461 code = ksi->ksi_code; 462 PROC_LOCK_ASSERT(p, MA_OWNED); 463 psp = p->p_sigacts; 464 mtx_assert(&psp->ps_mtx, MA_OWNED); 465 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 466 /* Signal handler installed with SA_SIGINFO. */ 467 linux_rt_sendsig(catcher, ksi, mask); 468 return; 469 } 470 471 regs = td->td_frame; 472 oonstack = sigonstack(regs->tf_rsp); 473 474#ifdef DEBUG 475 if (ldebug(sendsig)) 476 printf(ARGS(sendsig, "%p, %d, %p, %u"), 477 catcher, sig, (void*)mask, code); 478#endif 479 480 /* 481 * Allocate space for the signal handler context. 482 */ 483 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 484 SIGISMEMBER(psp->ps_sigonstack, sig)) { 485 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 486 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 487 } else 488 fp = (struct l_sigframe *)regs->tf_rsp - 1; 489 mtx_unlock(&psp->ps_mtx); 490 PROC_UNLOCK(p); 491 492 /* 493 * Build the argument list for the signal handler. 494 */ 495 if (p->p_sysent->sv_sigtbl) 496 if (sig <= p->p_sysent->sv_sigsize) 497 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 498 499 bzero(&frame, sizeof(frame)); 500 501 frame.sf_handler = PTROUT(catcher); 502 frame.sf_sig = sig; 503 504 bsd_to_linux_sigset(mask, &lmask); 505 506 /* 507 * Build the signal context to be used by sigreturn. 508 */ 509 frame.sf_sc.sc_mask = lmask.__bits[0]; 510 frame.sf_sc.sc_gs = regs->tf_gs; 511 frame.sf_sc.sc_fs = regs->tf_fs; 512 frame.sf_sc.sc_es = regs->tf_es; 513 frame.sf_sc.sc_ds = regs->tf_ds; 514 frame.sf_sc.sc_edi = regs->tf_rdi; 515 frame.sf_sc.sc_esi = regs->tf_rsi; 516 frame.sf_sc.sc_ebp = regs->tf_rbp; 517 frame.sf_sc.sc_ebx = regs->tf_rbx; 518 frame.sf_sc.sc_esp = regs->tf_rsp; 519 frame.sf_sc.sc_edx = regs->tf_rdx; 520 frame.sf_sc.sc_ecx = regs->tf_rcx; 521 frame.sf_sc.sc_eax = regs->tf_rax; 522 frame.sf_sc.sc_eip = regs->tf_rip; 523 frame.sf_sc.sc_cs = regs->tf_cs; 524 frame.sf_sc.sc_eflags = regs->tf_rflags; 525 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 526 frame.sf_sc.sc_ss = regs->tf_ss; 527 frame.sf_sc.sc_err = regs->tf_err; 528 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 529 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 530 531 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 532 frame.sf_extramask[i] = lmask.__bits[i+1]; 533 534 if (copyout(&frame, fp, sizeof(frame)) != 0) { 535 /* 536 * Process has trashed its stack; give it an illegal 537 * instruction to halt it in its tracks. 538 */ 539 PROC_LOCK(p); 540 sigexit(td, SIGILL); 541 } 542 543 /* 544 * Build context to run handler in. 545 */ 546 regs->tf_rsp = PTROUT(fp); 547 regs->tf_rip = linux32_sigcode; 548 regs->tf_rflags &= ~(PSL_T | PSL_D); 549 regs->tf_cs = _ucode32sel; 550 regs->tf_ss = _udatasel; 551 regs->tf_ds = _udatasel; 552 regs->tf_es = _udatasel; 553 regs->tf_fs = _ufssel; 554 regs->tf_gs = _ugssel; 555 regs->tf_flags = TF_HASSEGS; 556 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 557 PROC_LOCK(p); 558 mtx_lock(&psp->ps_mtx); 559} 560 561/* 562 * System call to cleanup state after a signal 563 * has been taken. Reset signal mask and 564 * stack state from context left by sendsig (above). 565 * Return to previous pc and psl as specified by 566 * context left by sendsig. Check carefully to 567 * make sure that the user has not modified the 568 * psl to gain improper privileges or to cause 569 * a machine fault. 570 */ 571int 572linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 573{ 574 struct l_sigframe frame; 575 struct trapframe *regs; 576 sigset_t bmask; 577 l_sigset_t lmask; 578 int eflags, i; 579 ksiginfo_t ksi; 580 581 regs = td->td_frame; 582 583#ifdef DEBUG 584 if (ldebug(sigreturn)) 585 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 586#endif 587 /* 588 * The trampoline code hands us the sigframe. 589 * It is unsafe to keep track of it ourselves, in the event that a 590 * program jumps out of a signal handler. 591 */ 592 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 593 return (EFAULT); 594 595 /* 596 * Check for security violations. 597 */ 598#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 599 eflags = frame.sf_sc.sc_eflags; 600 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 601 return(EINVAL); 602 603 /* 604 * Don't allow users to load a valid privileged %cs. Let the 605 * hardware check for invalid selectors, excess privilege in 606 * other selectors, invalid %eip's and invalid %esp's. 607 */ 608#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 609 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 610 ksiginfo_init_trap(&ksi); 611 ksi.ksi_signo = SIGBUS; 612 ksi.ksi_code = BUS_OBJERR; 613 ksi.ksi_trapno = T_PROTFLT; 614 ksi.ksi_addr = (void *)regs->tf_rip; 615 trapsignal(td, &ksi); 616 return(EINVAL); 617 } 618 619 lmask.__bits[0] = frame.sf_sc.sc_mask; 620 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 621 lmask.__bits[i+1] = frame.sf_extramask[i]; 622 linux_to_bsd_sigset(&lmask, &bmask); 623 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 624 625 /* 626 * Restore signal context. 627 */ 628 regs->tf_rdi = frame.sf_sc.sc_edi; 629 regs->tf_rsi = frame.sf_sc.sc_esi; 630 regs->tf_rbp = frame.sf_sc.sc_ebp; 631 regs->tf_rbx = frame.sf_sc.sc_ebx; 632 regs->tf_rdx = frame.sf_sc.sc_edx; 633 regs->tf_rcx = frame.sf_sc.sc_ecx; 634 regs->tf_rax = frame.sf_sc.sc_eax; 635 regs->tf_rip = frame.sf_sc.sc_eip; 636 regs->tf_cs = frame.sf_sc.sc_cs; 637 regs->tf_ds = frame.sf_sc.sc_ds; 638 regs->tf_es = frame.sf_sc.sc_es; 639 regs->tf_fs = frame.sf_sc.sc_fs; 640 regs->tf_gs = frame.sf_sc.sc_gs; 641 regs->tf_rflags = eflags; 642 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 643 regs->tf_ss = frame.sf_sc.sc_ss; 644 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 645 646 return (EJUSTRETURN); 647} 648 649/* 650 * System call to cleanup state after a signal 651 * has been taken. Reset signal mask and 652 * stack state from context left by rt_sendsig (above). 653 * Return to previous pc and psl as specified by 654 * context left by sendsig. Check carefully to 655 * make sure that the user has not modified the 656 * psl to gain improper privileges or to cause 657 * a machine fault. 658 */ 659int 660linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 661{ 662 struct l_ucontext uc; 663 struct l_sigcontext *context; 664 sigset_t bmask; 665 l_stack_t *lss; 666 stack_t ss; 667 struct trapframe *regs; 668 int eflags; 669 ksiginfo_t ksi; 670 671 regs = td->td_frame; 672 673#ifdef DEBUG 674 if (ldebug(rt_sigreturn)) 675 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 676#endif 677 /* 678 * The trampoline code hands us the ucontext. 679 * It is unsafe to keep track of it ourselves, in the event that a 680 * program jumps out of a signal handler. 681 */ 682 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 683 return (EFAULT); 684 685 context = &uc.uc_mcontext; 686 687 /* 688 * Check for security violations. 689 */ 690#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 691 eflags = context->sc_eflags; 692 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 693 return(EINVAL); 694 695 /* 696 * Don't allow users to load a valid privileged %cs. Let the 697 * hardware check for invalid selectors, excess privilege in 698 * other selectors, invalid %eip's and invalid %esp's. 699 */ 700#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 701 if (!CS_SECURE(context->sc_cs)) { 702 ksiginfo_init_trap(&ksi); 703 ksi.ksi_signo = SIGBUS; 704 ksi.ksi_code = BUS_OBJERR; 705 ksi.ksi_trapno = T_PROTFLT; 706 ksi.ksi_addr = (void *)regs->tf_rip; 707 trapsignal(td, &ksi); 708 return(EINVAL); 709 } 710 711 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 712 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 713 714 /* 715 * Restore signal context 716 */ 717 regs->tf_gs = context->sc_gs; 718 regs->tf_fs = context->sc_fs; 719 regs->tf_es = context->sc_es; 720 regs->tf_ds = context->sc_ds; 721 regs->tf_rdi = context->sc_edi; 722 regs->tf_rsi = context->sc_esi; 723 regs->tf_rbp = context->sc_ebp; 724 regs->tf_rbx = context->sc_ebx; 725 regs->tf_rdx = context->sc_edx; 726 regs->tf_rcx = context->sc_ecx; 727 regs->tf_rax = context->sc_eax; 728 regs->tf_rip = context->sc_eip; 729 regs->tf_cs = context->sc_cs; 730 regs->tf_rflags = eflags; 731 regs->tf_rsp = context->sc_esp_at_signal; 732 regs->tf_ss = context->sc_ss; 733 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 734 735 /* 736 * call sigaltstack & ignore results.. 737 */ 738 lss = &uc.uc_stack; 739 ss.ss_sp = PTRIN(lss->ss_sp); 740 ss.ss_size = lss->ss_size; 741 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 742 743#ifdef DEBUG 744 if (ldebug(rt_sigreturn)) 745 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 746 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 747#endif 748 (void)kern_sigaltstack(td, &ss, NULL); 749 750 return (EJUSTRETURN); 751} 752 753static int 754linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 755{ 756 struct proc *p; 757 struct trapframe *frame; 758 759 p = td->td_proc; 760 frame = td->td_frame; 761 762 sa->args[0] = frame->tf_rbx; 763 sa->args[1] = frame->tf_rcx; 764 sa->args[2] = frame->tf_rdx; 765 sa->args[3] = frame->tf_rsi; 766 sa->args[4] = frame->tf_rdi; 767 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 768 sa->code = frame->tf_rax; 769 770 if (sa->code >= p->p_sysent->sv_size) 771 sa->callp = &p->p_sysent->sv_table[0]; 772 else 773 sa->callp = &p->p_sysent->sv_table[sa->code]; 774 sa->narg = sa->callp->sy_narg; 775 776 td->td_retval[0] = 0; 777 td->td_retval[1] = frame->tf_rdx; 778 779 return (0); 780} 781 782/* 783 * If a linux binary is exec'ing something, try this image activator 784 * first. We override standard shell script execution in order to 785 * be able to modify the interpreter path. We only do this if a linux 786 * binary is doing the exec, so we do not create an EXEC module for it. 787 */ 788static int exec_linux_imgact_try(struct image_params *iparams); 789 790static int 791exec_linux_imgact_try(struct image_params *imgp) 792{ 793 const char *head = (const char *)imgp->image_header; 794 char *rpath; 795 int error = -1; 796 797 /* 798 * The interpreter for shell scripts run from a linux binary needs 799 * to be located in /compat/linux if possible in order to recursively 800 * maintain linux path emulation. 801 */ 802 if (((const short *)head)[0] == SHELLMAGIC) { 803 /* 804 * Run our normal shell image activator. If it succeeds attempt 805 * to use the alternate path for the interpreter. If an 806 * alternate * path is found, use our stringspace to store it. 807 */ 808 if ((error = exec_shell_imgact(imgp)) == 0) { 809 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 810 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 811 AT_FDCWD); 812 if (rpath != NULL) 813 imgp->args->fname_buf = 814 imgp->interpreter_name = rpath; 815 } 816 } 817 return (error); 818} 819 820/* 821 * Clear registers on exec 822 * XXX copied from ia32_signal.c. 823 */ 824static void 825exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 826{ 827 struct trapframe *regs = td->td_frame; 828 struct pcb *pcb = td->td_pcb; 829 830 mtx_lock(&dt_lock); 831 if (td->td_proc->p_md.md_ldt != NULL) 832 user_ldt_free(td); 833 else 834 mtx_unlock(&dt_lock); 835 836 critical_enter(); 837 wrmsr(MSR_FSBASE, 0); 838 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 839 pcb->pcb_fsbase = 0; 840 pcb->pcb_gsbase = 0; 841 critical_exit(); 842 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 843 844 bzero((char *)regs, sizeof(struct trapframe)); 845 regs->tf_rip = imgp->entry_addr; 846 regs->tf_rsp = stack; 847 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 848 regs->tf_gs = _ugssel; 849 regs->tf_fs = _ufssel; 850 regs->tf_es = _udatasel; 851 regs->tf_ds = _udatasel; 852 regs->tf_ss = _udatasel; 853 regs->tf_flags = TF_HASSEGS; 854 regs->tf_cs = _ucode32sel; 855 regs->tf_rbx = imgp->ps_strings; 856 857 fpstate_drop(td); 858 859 /* Do full restore on return so that we can change to a different %cs */ 860 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 861 td->td_retval[1] = 0; 862} 863 864/* 865 * XXX copied from ia32_sysvec.c. 866 */ 867static register_t * 868linux_copyout_strings(struct image_params *imgp) 869{ 870 int argc, envc; 871 u_int32_t *vectp; 872 char *stringp, *destp; 873 u_int32_t *stack_base; 874 struct linux32_ps_strings *arginfo; 875 char canary[LINUX_AT_RANDOM_LEN]; 876 size_t execpath_len; 877 878 /* 879 * Calculate string base and vector table pointers. 880 */ 881 if (imgp->execpath != NULL && imgp->auxargs != NULL) 882 execpath_len = strlen(imgp->execpath) + 1; 883 else 884 execpath_len = 0; 885 886 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 887 destp = (caddr_t)arginfo - SPARE_USRSPACE - 888 roundup(sizeof(canary), sizeof(char *)) - 889 roundup(execpath_len, sizeof(char *)) - 890 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 891 892 if (execpath_len != 0) { 893 imgp->execpathp = (uintptr_t)arginfo - execpath_len; 894 copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); 895 } 896 897 /* 898 * Prepare the canary for SSP. 899 */ 900 arc4rand(canary, sizeof(canary), 0); 901 imgp->canary = (uintptr_t)arginfo - 902 roundup(execpath_len, sizeof(char *)) - 903 roundup(sizeof(canary), sizeof(char *)); 904 copyout(canary, (void *)imgp->canary, sizeof(canary)); 905 906 /* 907 * If we have a valid auxargs ptr, prepare some room 908 * on the stack. 909 */ 910 if (imgp->auxargs) { 911 /* 912 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 913 * lower compatibility. 914 */ 915 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 916 (LINUX_AT_COUNT * 2); 917 /* 918 * The '+ 2' is for the null pointers at the end of each of 919 * the arg and env vector sets,and imgp->auxarg_size is room 920 * for argument of Runtime loader. 921 */ 922 vectp = (u_int32_t *) (destp - (imgp->args->argc + 923 imgp->args->envc + 2 + imgp->auxarg_size) * 924 sizeof(u_int32_t)); 925 926 } else 927 /* 928 * The '+ 2' is for the null pointers at the end of each of 929 * the arg and env vector sets 930 */ 931 vectp = (u_int32_t *)(destp - (imgp->args->argc + 932 imgp->args->envc + 2) * sizeof(u_int32_t)); 933 934 /* 935 * vectp also becomes our initial stack base 936 */ 937 stack_base = vectp; 938 939 stringp = imgp->args->begin_argv; 940 argc = imgp->args->argc; 941 envc = imgp->args->envc; 942 /* 943 * Copy out strings - arguments and environment. 944 */ 945 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 946 947 /* 948 * Fill in "ps_strings" struct for ps, w, etc. 949 */ 950 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 951 suword32(&arginfo->ps_nargvstr, argc); 952 953 /* 954 * Fill in argument portion of vector table. 955 */ 956 for (; argc > 0; --argc) { 957 suword32(vectp++, (uint32_t)(intptr_t)destp); 958 while (*stringp++ != 0) 959 destp++; 960 destp++; 961 } 962 963 /* a null vector table pointer separates the argp's from the envp's */ 964 suword32(vectp++, 0); 965 966 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 967 suword32(&arginfo->ps_nenvstr, envc); 968 969 /* 970 * Fill in environment portion of vector table. 971 */ 972 for (; envc > 0; --envc) { 973 suword32(vectp++, (uint32_t)(intptr_t)destp); 974 while (*stringp++ != 0) 975 destp++; 976 destp++; 977 } 978 979 /* end of vector table is a null pointer */ 980 suword32(vectp, 0); 981 982 return ((register_t *)stack_base); 983} 984 985static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 986 "32-bit Linux emulation"); 987 988static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 989SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 990 &linux32_maxdsiz, 0, ""); 991static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 992SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 993 &linux32_maxssiz, 0, ""); 994static u_long linux32_maxvmem = LINUX32_MAXVMEM; 995SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 996 &linux32_maxvmem, 0, ""); 997 998#if defined(DEBUG) 999SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, 1000 CTLTYPE_STRING | CTLFLAG_RW, 1001 0, 0, linux_sysctl_debug, "A", 1002 "Linux debugging control"); 1003#endif 1004 1005static void 1006linux32_fixlimit(struct rlimit *rl, int which) 1007{ 1008 1009 switch (which) { 1010 case RLIMIT_DATA: 1011 if (linux32_maxdsiz != 0) { 1012 if (rl->rlim_cur > linux32_maxdsiz) 1013 rl->rlim_cur = linux32_maxdsiz; 1014 if (rl->rlim_max > linux32_maxdsiz) 1015 rl->rlim_max = linux32_maxdsiz; 1016 } 1017 break; 1018 case RLIMIT_STACK: 1019 if (linux32_maxssiz != 0) { 1020 if (rl->rlim_cur > linux32_maxssiz) 1021 rl->rlim_cur = linux32_maxssiz; 1022 if (rl->rlim_max > linux32_maxssiz) 1023 rl->rlim_max = linux32_maxssiz; 1024 } 1025 break; 1026 case RLIMIT_VMEM: 1027 if (linux32_maxvmem != 0) { 1028 if (rl->rlim_cur > linux32_maxvmem) 1029 rl->rlim_cur = linux32_maxvmem; 1030 if (rl->rlim_max > linux32_maxvmem) 1031 rl->rlim_max = linux32_maxvmem; 1032 } 1033 break; 1034 } 1035} 1036 1037struct sysentvec elf_linux_sysvec = { 1038 .sv_size = LINUX_SYS_MAXSYSCALL, 1039 .sv_table = linux_sysent, 1040 .sv_mask = 0, 1041 .sv_sigsize = LINUX_SIGTBLSZ, 1042 .sv_sigtbl = bsd_to_linux_signal, 1043 .sv_errsize = ELAST + 1, 1044 .sv_errtbl = bsd_to_linux_errno, 1045 .sv_transtrap = translate_traps, 1046 .sv_fixup = elf_linux_fixup, 1047 .sv_sendsig = linux_sendsig, 1048 .sv_sigcode = &_binary_linux32_locore_o_start, 1049 .sv_szsigcode = &linux_szsigcode, 1050 .sv_prepsyscall = NULL, 1051 .sv_name = "Linux ELF32", 1052 .sv_coredump = elf32_coredump, 1053 .sv_imgact_try = exec_linux_imgact_try, 1054 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1055 .sv_pagesize = PAGE_SIZE, 1056 .sv_minuser = VM_MIN_ADDRESS, 1057 .sv_maxuser = LINUX32_MAXUSER, 1058 .sv_usrstack = LINUX32_USRSTACK, 1059 .sv_psstrings = LINUX32_PS_STRINGS, 1060 .sv_stackprot = VM_PROT_ALL, 1061 .sv_copyout_strings = linux_copyout_strings, 1062 .sv_setregs = exec_linux_setregs, 1063 .sv_fixlimit = linux32_fixlimit, 1064 .sv_maxssiz = &linux32_maxssiz, 1065 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1066 .sv_set_syscall_retval = cpu_set_syscall_retval, 1067 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1068 .sv_syscallnames = NULL, 1069 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1070 .sv_shared_page_len = PAGE_SIZE, 1071 .sv_schedtail = linux_schedtail, 1072 .sv_thread_detach = linux_thread_detach, 1073}; 1074 1075static void 1076linux_vdso_install(void *param) 1077{ 1078 1079 linux_szsigcode = (&_binary_linux32_locore_o_end - 1080 &_binary_linux32_locore_o_start); 1081 1082 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1083 panic("Linux invalid vdso size\n"); 1084 1085 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1086 1087 linux_shared_page_obj = __elfN(linux_shared_page_init) 1088 (&linux_shared_page_mapping); 1089 1090 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1091 1092 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1093 linux_szsigcode); 1094 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1095 1096 linux_kplatform = linux_shared_page_mapping + 1097 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1098} 1099SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1100 (sysinit_cfunc_t)linux_vdso_install, NULL); 1101 1102static void 1103linux_vdso_deinstall(void *param) 1104{ 1105 1106 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1107}; 1108SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1109 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1110 1111static char GNU_ABI_VENDOR[] = "GNU"; 1112static int GNULINUX_ABI_DESC = 0; 1113 1114static boolean_t 1115linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1116{ 1117 const Elf32_Word *desc; 1118 uintptr_t p; 1119 1120 p = (uintptr_t)(note + 1); 1121 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1122 1123 desc = (const Elf32_Word *)p; 1124 if (desc[0] != GNULINUX_ABI_DESC) 1125 return (FALSE); 1126 1127 /* 1128 * For linux we encode osrel as follows (see linux_mib.c): 1129 * VVVMMMIII (version, major, minor), see linux_mib.c. 1130 */ 1131 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1132 1133 return (TRUE); 1134} 1135 1136static Elf_Brandnote linux32_brandnote = { 1137 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1138 .hdr.n_descsz = 16, /* XXX at least 16 */ 1139 .hdr.n_type = 1, 1140 .vendor = GNU_ABI_VENDOR, 1141 .flags = BN_TRANSLATE_OSREL, 1142 .trans_osrel = linux32_trans_osrel 1143}; 1144 1145static Elf32_Brandinfo linux_brand = { 1146 .brand = ELFOSABI_LINUX, 1147 .machine = EM_386, 1148 .compat_3_brand = "Linux", 1149 .emul_path = "/compat/linux", 1150 .interp_path = "/lib/ld-linux.so.1", 1151 .sysvec = &elf_linux_sysvec, 1152 .interp_newpath = NULL, 1153 .brand_note = &linux32_brandnote, 1154 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1155}; 1156 1157static Elf32_Brandinfo linux_glibc2brand = { 1158 .brand = ELFOSABI_LINUX, 1159 .machine = EM_386, 1160 .compat_3_brand = "Linux", 1161 .emul_path = "/compat/linux", 1162 .interp_path = "/lib/ld-linux.so.2", 1163 .sysvec = &elf_linux_sysvec, 1164 .interp_newpath = NULL, 1165 .brand_note = &linux32_brandnote, 1166 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1167}; 1168 1169Elf32_Brandinfo *linux_brandlist[] = { 1170 &linux_brand, 1171 &linux_glibc2brand, 1172 NULL 1173}; 1174 1175static int 1176linux_elf_modevent(module_t mod, int type, void *data) 1177{ 1178 Elf32_Brandinfo **brandinfo; 1179 int error; 1180 struct linux_ioctl_handler **lihp; 1181 1182 error = 0; 1183 1184 switch(type) { 1185 case MOD_LOAD: 1186 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1187 ++brandinfo) 1188 if (elf32_insert_brand_entry(*brandinfo) < 0) 1189 error = EINVAL; 1190 if (error == 0) { 1191 SET_FOREACH(lihp, linux_ioctl_handler_set) 1192 linux_ioctl_register_handler(*lihp); 1193 LIST_INIT(&futex_list); 1194 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1195 stclohz = (stathz ? stathz : hz); 1196 if (bootverbose) 1197 printf("Linux ELF exec handler installed\n"); 1198 } else 1199 printf("cannot insert Linux ELF brand handler\n"); 1200 break; 1201 case MOD_UNLOAD: 1202 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1203 ++brandinfo) 1204 if (elf32_brand_inuse(*brandinfo)) 1205 error = EBUSY; 1206 if (error == 0) { 1207 for (brandinfo = &linux_brandlist[0]; 1208 *brandinfo != NULL; ++brandinfo) 1209 if (elf32_remove_brand_entry(*brandinfo) < 0) 1210 error = EINVAL; 1211 } 1212 if (error == 0) { 1213 SET_FOREACH(lihp, linux_ioctl_handler_set) 1214 linux_ioctl_unregister_handler(*lihp); 1215 mtx_destroy(&futex_mtx); 1216 if (bootverbose) 1217 printf("Linux ELF exec handler removed\n"); 1218 } else 1219 printf("Could not deinstall ELF interpreter entry\n"); 1220 break; 1221 default: 1222 return (EOPNOTSUPP); 1223 } 1224 return (error); 1225} 1226 1227static moduledata_t linux_elf_mod = { 1228 "linuxelf", 1229 linux_elf_modevent, 1230 0 1231}; 1232 1233DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1234MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); 1235