linux_sysvec.c revision 293535
1131554Stjr/*- 2131554Stjr * Copyright (c) 1994-1996 S��ren Schmidt 3131554Stjr * All rights reserved. 4131554Stjr * 5131554Stjr * Redistribution and use in source and binary forms, with or without 6131554Stjr * modification, are permitted provided that the following conditions 7131554Stjr * are met: 8131554Stjr * 1. Redistributions of source code must retain the above copyright 9131554Stjr * notice, this list of conditions and the following disclaimer 10131554Stjr * in this position and unchanged. 11131554Stjr * 2. Redistributions in binary form must reproduce the above copyright 12131554Stjr * notice, this list of conditions and the following disclaimer in the 13131554Stjr * documentation and/or other materials provided with the distribution. 14131554Stjr * 3. The name of the author may not be used to endorse or promote products 15131554Stjr * derived from this software without specific prior written permission 16131554Stjr * 17131554Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18131554Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19131554Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20131554Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21131554Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22131554Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23131554Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24131554Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25131554Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26131554Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27131554Stjr */ 28131554Stjr 29131554Stjr#include <sys/cdefs.h> 30131554Stjr__FBSDID("$FreeBSD: stable/10/sys/i386/linux/linux_sysvec.c 293535 2016-01-09 16:24:30Z dchagin $"); 31131554Stjr 32131554Stjr#include <sys/param.h> 33131554Stjr#include <sys/systm.h> 34131554Stjr#include <sys/exec.h> 35131554Stjr#include <sys/fcntl.h> 36131554Stjr#include <sys/imgact.h> 37131554Stjr#include <sys/imgact_aout.h> 38131554Stjr#include <sys/imgact_elf.h> 39131554Stjr#include <sys/kernel.h> 40131554Stjr#include <sys/lock.h> 41131554Stjr#include <sys/malloc.h> 42131554Stjr#include <sys/module.h> 43131554Stjr#include <sys/mutex.h> 44131554Stjr#include <sys/proc.h> 45131554Stjr#include <sys/signalvar.h> 46131554Stjr#include <sys/syscallsubr.h> 47131554Stjr#include <sys/sysctl.h> 48131554Stjr#include <sys/sysent.h> 49131554Stjr#include <sys/sysproto.h> 50131554Stjr#include <sys/vnode.h> 51131554Stjr#include <sys/eventhandler.h> 52131554Stjr 53131554Stjr#include <vm/vm.h> 54131554Stjr#include <vm/pmap.h> 55131554Stjr#include <vm/vm_extern.h> 56131554Stjr#include <vm/vm_map.h> 57131554Stjr#include <vm/vm_object.h> 58131554Stjr#include <vm/vm_page.h> 59131554Stjr#include <vm/vm_param.h> 60131554Stjr 61131554Stjr#include <machine/cpu.h> 62131554Stjr#include <machine/cputypes.h> 63131554Stjr#include <machine/md_var.h> 64131554Stjr#include <machine/pcb.h> 65131554Stjr 66131554Stjr#include <i386/linux/linux.h> 67131554Stjr#include <i386/linux/linux_proto.h> 68131554Stjr#include <compat/linux/linux_emul.h> 69131554Stjr#include <compat/linux/linux_futex.h> 70131554Stjr#include <compat/linux/linux_ioctl.h> 71131554Stjr#include <compat/linux/linux_mib.h> 72131554Stjr#include <compat/linux/linux_misc.h> 73131554Stjr#include <compat/linux/linux_signal.h> 74131554Stjr#include <compat/linux/linux_util.h> 75131554Stjr#include <compat/linux/linux_vdso.h> 76131554Stjr 77131554StjrMODULE_VERSION(linux, 1); 78131554Stjr 79131554Stjr#if BYTE_ORDER == LITTLE_ENDIAN 80131554Stjr#define SHELLMAGIC 0x2123 /* #! */ 81131554Stjr#else 82131554Stjr#define SHELLMAGIC 0x2321 83131554Stjr#endif 84131554Stjr 85131554Stjr#if defined(DEBUG) 86131554StjrSYSCTL_PROC(_compat_linux, OID_AUTO, debug, 87131554Stjr CTLTYPE_STRING | CTLFLAG_RW, 88131554Stjr 0, 0, linux_sysctl_debug, "A", 89131554Stjr "Linux debugging control"); 90131554Stjr#endif 91131554Stjr 92131554Stjr/* 93131554Stjr * Allow the sendsig functions to use the ldebug() facility 94131554Stjr * even though they are not syscalls themselves. Map them 95131554Stjr * to syscall 0. This is slightly less bogus than using 96131554Stjr * ldebug(sigreturn). 97131554Stjr */ 98131554Stjr#define LINUX_SYS_linux_rt_sendsig 0 99131554Stjr#define LINUX_SYS_linux_sendsig 0 100131554Stjr 101131554Stjr#define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) 102131554Stjr 103131554Stjrstatic int linux_szsigcode; 104131554Stjrstatic vm_object_t linux_shared_page_obj; 105131554Stjrstatic char *linux_shared_page_mapping; 106131554Stjrextern char _binary_linux_locore_o_start; 107131554Stjrextern char _binary_linux_locore_o_end; 108131554Stjr 109131554Stjrextern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 110131554Stjr 111131554StjrSET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 112131554Stjr 113131554Stjrstatic int linux_fixup(register_t **stack_base, 114131554Stjr struct image_params *iparams); 115131554Stjrstatic int elf_linux_fixup(register_t **stack_base, 116131554Stjr struct image_params *iparams); 117131554Stjrstatic void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 118131554Stjrstatic void exec_linux_setregs(struct thread *td, 119131554Stjr struct image_params *imgp, u_long stack); 120131554Stjrstatic register_t *linux_copyout_strings(struct image_params *imgp); 121131554Stjrstatic boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); 122131554Stjrstatic void linux_vdso_install(void *param); 123131554Stjrstatic void linux_vdso_deinstall(void *param); 124131554Stjr 125131554Stjrstatic int linux_szplatform; 126131554Stjrconst char *linux_kplatform; 127131554Stjr 128131554Stjrstatic eventhandler_tag linux_exit_tag; 129131554Stjrstatic eventhandler_tag linux_exec_tag; 130131554Stjrstatic eventhandler_tag linux_thread_dtor_tag; 131131554Stjr 132131554Stjr/* 133131554Stjr * Linux syscalls return negative errno's, we do positive and map them 134131554Stjr * Reference: 135131554Stjr * FreeBSD: src/sys/sys/errno.h 136131554Stjr * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 137131554Stjr * linux-2.6.17.8/include/asm-generic/errno.h 138131554Stjr */ 139131554Stjrstatic int bsd_to_linux_errno[ELAST + 1] = { 140131554Stjr -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 141131554Stjr -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 142131554Stjr -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 143131554Stjr -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 144131554Stjr -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 145131554Stjr -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 146131554Stjr -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 147131554Stjr -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 148131554Stjr -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 149131554Stjr -72, -67, -71 150131554Stjr}; 151131554Stjr 152131554Stjrint bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 153131554Stjr LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 154131554Stjr LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 155131554Stjr LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 156131554Stjr LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 157131554Stjr LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 158131554Stjr LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 159131554Stjr LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 160131554Stjr 0, LINUX_SIGUSR1, LINUX_SIGUSR2 161131554Stjr}; 162131554Stjr 163131554Stjrint linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 164131554Stjr SIGHUP, SIGINT, SIGQUIT, SIGILL, 165131554Stjr SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 166131554Stjr SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 167131554Stjr SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 168131554Stjr SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 169131554Stjr SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 170131554Stjr SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 171131554Stjr SIGIO, SIGURG, SIGSYS 172131554Stjr}; 173131554Stjr 174131554Stjr#define LINUX_T_UNKNOWN 255 175131554Stjrstatic int _bsd_to_linux_trapcode[] = { 176131554Stjr LINUX_T_UNKNOWN, /* 0 */ 177131554Stjr 6, /* 1 T_PRIVINFLT */ 178131554Stjr LINUX_T_UNKNOWN, /* 2 */ 179131554Stjr 3, /* 3 T_BPTFLT */ 180131554Stjr LINUX_T_UNKNOWN, /* 4 */ 181131554Stjr LINUX_T_UNKNOWN, /* 5 */ 182131554Stjr 16, /* 6 T_ARITHTRAP */ 183131554Stjr 254, /* 7 T_ASTFLT */ 184131554Stjr LINUX_T_UNKNOWN, /* 8 */ 185131554Stjr 13, /* 9 T_PROTFLT */ 186131554Stjr 1, /* 10 T_TRCTRAP */ 187131554Stjr LINUX_T_UNKNOWN, /* 11 */ 188131554Stjr 14, /* 12 T_PAGEFLT */ 189131554Stjr LINUX_T_UNKNOWN, /* 13 */ 190131554Stjr 17, /* 14 T_ALIGNFLT */ 191131554Stjr LINUX_T_UNKNOWN, /* 15 */ 192131554Stjr LINUX_T_UNKNOWN, /* 16 */ 193131554Stjr LINUX_T_UNKNOWN, /* 17 */ 194131554Stjr 0, /* 18 T_DIVIDE */ 195131554Stjr 2, /* 19 T_NMI */ 196131554Stjr 4, /* 20 T_OFLOW */ 197131554Stjr 5, /* 21 T_BOUND */ 198131554Stjr 7, /* 22 T_DNA */ 199131554Stjr 8, /* 23 T_DOUBLEFLT */ 200131554Stjr 9, /* 24 T_FPOPFLT */ 201131554Stjr 10, /* 25 T_TSSFLT */ 202131554Stjr 11, /* 26 T_SEGNPFLT */ 203131554Stjr 12, /* 27 T_STKFLT */ 204131554Stjr 18, /* 28 T_MCHK */ 205131554Stjr 19, /* 29 T_XMMFLT */ 206131554Stjr 15 /* 30 T_RESERVED */ 207131554Stjr}; 208131554Stjr#define bsd_to_linux_trapcode(code) \ 209131554Stjr ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 210131554Stjr _bsd_to_linux_trapcode[(code)]: \ 211131554Stjr LINUX_T_UNKNOWN) 212131554Stjr 213131554StjrLINUX_VDSO_SYM_INTPTR(linux_sigcode); 214131554StjrLINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); 215131554StjrLINUX_VDSO_SYM_INTPTR(linux_vsyscall); 216131554Stjr 217131554Stjr/* 218131554Stjr * If FreeBSD & Linux have a difference of opinion about what a trap 219131554Stjr * means, deal with it here. 220131554Stjr * 221131554Stjr * MPSAFE 222131554Stjr */ 223131554Stjrstatic int 224131554Stjrtranslate_traps(int signal, int trap_code) 225131554Stjr{ 226131554Stjr if (signal != SIGBUS) 227131554Stjr return (signal); 228131554Stjr switch (trap_code) { 229131554Stjr case T_PROTFLT: 230131554Stjr case T_TSSFLT: 231131554Stjr case T_DOUBLEFLT: 232131554Stjr case T_PAGEFLT: 233131554Stjr return (SIGSEGV); 234131554Stjr default: 235131554Stjr return (signal); 236131554Stjr } 237131554Stjr} 238131554Stjr 239131554Stjrstatic int 240131554Stjrlinux_fixup(register_t **stack_base, struct image_params *imgp) 241131554Stjr{ 242131554Stjr register_t *argv, *envp; 243131554Stjr 244131554Stjr argv = *stack_base; 245131554Stjr envp = *stack_base + (imgp->args->argc + 1); 246131554Stjr (*stack_base)--; 247131554Stjr suword(*stack_base, (intptr_t)(void *)envp); 248131554Stjr (*stack_base)--; 249131554Stjr suword(*stack_base, (intptr_t)(void *)argv); 250131554Stjr (*stack_base)--; 251131554Stjr suword(*stack_base, imgp->args->argc); 252131554Stjr return (0); 253131554Stjr} 254131554Stjr 255131554Stjrstatic int 256131554Stjrelf_linux_fixup(register_t **stack_base, struct image_params *imgp) 257131554Stjr{ 258131554Stjr struct proc *p; 259131554Stjr Elf32_Auxargs *args; 260131554Stjr Elf32_Addr *uplatform; 261131554Stjr struct ps_strings *arginfo; 262131554Stjr register_t *pos; 263131554Stjr 264131554Stjr KASSERT(curthread->td_proc == imgp->proc, 265131554Stjr ("unsafe elf_linux_fixup(), should be curproc")); 266131554Stjr 267131554Stjr p = imgp->proc; 268131554Stjr arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 269131554Stjr uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); 270131554Stjr args = (Elf32_Auxargs *)imgp->auxargs; 271131554Stjr pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 272131554Stjr 273131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, 274131554Stjr imgp->proc->p_sysent->sv_shared_page_base); 275131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); 276131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 277131554Stjr 278131554Stjr /* 279131554Stjr * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 280131554Stjr * as it has appeared in the 2.4.0-rc7 first time. 281131554Stjr * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 282131554Stjr * glibc falls back to the hard-coded CLK_TCK value when aux entry 283131554Stjr * is not present. 284131554Stjr * Also see linux_times() implementation. 285131554Stjr */ 286131554Stjr if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 287131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); 288131554Stjr AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 289131554Stjr AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 290131554Stjr AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 291131554Stjr AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 292131554Stjr AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 293131554Stjr AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 294131554Stjr AUXARGS_ENTRY(pos, AT_BASE, args->base); 295131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0); 296131554Stjr AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 297131554Stjr AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 298131554Stjr AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 299131554Stjr AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 300131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 301131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); 302131554Stjr if (imgp->execpathp != 0) 303131554Stjr AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); 304131554Stjr if (args->execfd != -1) 305131554Stjr AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 306131554Stjr AUXARGS_ENTRY(pos, AT_NULL, 0); 307131554Stjr 308131554Stjr free(imgp->auxargs, M_TEMP); 309131554Stjr imgp->auxargs = NULL; 310131554Stjr 311131554Stjr (*stack_base)--; 312131554Stjr suword(*stack_base, (register_t)imgp->args->argc); 313131554Stjr return (0); 314131554Stjr} 315131554Stjr 316131554Stjr/* 317131554Stjr * Copied from kern/kern_exec.c 318131554Stjr */ 319131554Stjrstatic register_t * 320131554Stjrlinux_copyout_strings(struct image_params *imgp) 321131554Stjr{ 322131554Stjr int argc, envc; 323131554Stjr char **vectp; 324131554Stjr char *stringp, *destp; 325131554Stjr register_t *stack_base; 326131554Stjr struct ps_strings *arginfo; 327131554Stjr char canary[LINUX_AT_RANDOM_LEN]; 328131554Stjr size_t execpath_len; 329131554Stjr struct proc *p; 330131554Stjr 331131554Stjr /* 332131554Stjr * Calculate string base and vector table pointers. 333131554Stjr */ 334131554Stjr p = imgp->proc; 335131554Stjr if (imgp->execpath != NULL && imgp->auxargs != NULL) 336131554Stjr execpath_len = strlen(imgp->execpath) + 1; 337131554Stjr else 338131554Stjr execpath_len = 0; 339131554Stjr arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 340131554Stjr destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - 341131554Stjr roundup(sizeof(canary), sizeof(char *)) - 342131554Stjr roundup(execpath_len, sizeof(char *)) - 343131554Stjr roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 344131554Stjr 345131554Stjr /* 346131554Stjr * install LINUX_PLATFORM 347131554Stjr */ 348131554Stjr copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform), 349131554Stjr linux_szplatform); 350131554Stjr 351131554Stjr if (execpath_len != 0) { 352131554Stjr imgp->execpathp = (uintptr_t)arginfo - 353131554Stjr linux_szplatform - execpath_len; 354131554Stjr copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); 355131554Stjr } 356131554Stjr 357131554Stjr /* 358131554Stjr * Prepare the canary for SSP. 359131554Stjr */ 360131554Stjr arc4rand(canary, sizeof(canary), 0); 361131554Stjr imgp->canary = (uintptr_t)arginfo - linux_szplatform - 362131554Stjr roundup(execpath_len, sizeof(char *)) - 363131554Stjr roundup(sizeof(canary), sizeof(char *)); 364131554Stjr copyout(canary, (void *)imgp->canary, sizeof(canary)); 365131554Stjr 366131554Stjr /* 367131554Stjr * If we have a valid auxargs ptr, prepare some room 368131554Stjr * on the stack. 369131554Stjr */ 370131554Stjr if (imgp->auxargs) { 371131554Stjr /* 372131554Stjr * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 373131554Stjr * lower compatibility. 374131554Stjr */ 375131554Stjr imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 376131554Stjr (LINUX_AT_COUNT * 2); 377131554Stjr /* 378131554Stjr * The '+ 2' is for the null pointers at the end of each of 379131554Stjr * the arg and env vector sets,and imgp->auxarg_size is room 380131554Stjr * for argument of Runtime loader. 381131554Stjr */ 382131554Stjr vectp = (char **)(destp - (imgp->args->argc + 383131554Stjr imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); 384131554Stjr } else { 385131554Stjr /* 386131554Stjr * The '+ 2' is for the null pointers at the end of each of 387131554Stjr * the arg and env vector sets 388131554Stjr */ 389131554Stjr vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 390131554Stjr sizeof(char *)); 391131554Stjr } 392131554Stjr 393131554Stjr /* 394131554Stjr * vectp also becomes our initial stack base 395131554Stjr */ 396131554Stjr stack_base = (register_t *)vectp; 397131554Stjr 398131554Stjr stringp = imgp->args->begin_argv; 399131554Stjr argc = imgp->args->argc; 400131554Stjr envc = imgp->args->envc; 401131554Stjr 402131554Stjr /* 403131554Stjr * Copy out strings - arguments and environment. 404131554Stjr */ 405131554Stjr copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 406131554Stjr 407131554Stjr /* 408131554Stjr * Fill in "ps_strings" struct for ps, w, etc. 409131554Stjr */ 410131554Stjr suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 411131554Stjr suword(&arginfo->ps_nargvstr, argc); 412131554Stjr 413131554Stjr /* 414131554Stjr * Fill in argument portion of vector table. 415131554Stjr */ 416131554Stjr for (; argc > 0; --argc) { 417131554Stjr suword(vectp++, (long)(intptr_t)destp); 418131554Stjr while (*stringp++ != 0) 419131554Stjr destp++; 420131554Stjr destp++; 421131554Stjr } 422131554Stjr 423131554Stjr /* a null vector table pointer separates the argp's from the envp's */ 424131554Stjr suword(vectp++, 0); 425131554Stjr 426131554Stjr suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 427131554Stjr suword(&arginfo->ps_nenvstr, envc); 428131554Stjr 429131554Stjr /* 430131554Stjr * Fill in environment portion of vector table. 431131554Stjr */ 432131554Stjr for (; envc > 0; --envc) { 433131554Stjr suword(vectp++, (long)(intptr_t)destp); 434131554Stjr while (*stringp++ != 0) 435131554Stjr destp++; 436131554Stjr destp++; 437131554Stjr } 438131554Stjr 439131554Stjr /* end of vector table is a null pointer */ 440131554Stjr suword(vectp, 0); 441131554Stjr 442131554Stjr return (stack_base); 443131554Stjr} 444131554Stjr 445131554Stjrstatic void 446131554Stjrlinux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 447131554Stjr{ 448131554Stjr struct thread *td = curthread; 449131554Stjr struct proc *p = td->td_proc; 450131554Stjr struct sigacts *psp; 451131554Stjr struct trapframe *regs; 452131554Stjr struct l_rt_sigframe *fp, frame; 453131554Stjr int sig, code; 454131554Stjr int oonstack; 455131554Stjr 456131554Stjr sig = ksi->ksi_signo; 457131554Stjr code = ksi->ksi_code; 458131554Stjr PROC_LOCK_ASSERT(p, MA_OWNED); 459131554Stjr psp = p->p_sigacts; 460131554Stjr mtx_assert(&psp->ps_mtx, MA_OWNED); 461131554Stjr regs = td->td_frame; 462131554Stjr oonstack = sigonstack(regs->tf_esp); 463131554Stjr 464131554Stjr#ifdef DEBUG 465131554Stjr if (ldebug(rt_sendsig)) 466131554Stjr printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 467131554Stjr catcher, sig, (void*)mask, code); 468131554Stjr#endif 469131554Stjr /* 470131554Stjr * Allocate space for the signal handler context. 471131554Stjr */ 472131554Stjr if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 473131554Stjr SIGISMEMBER(psp->ps_sigonstack, sig)) { 474131554Stjr fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 475131554Stjr td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 476131554Stjr } else 477131554Stjr fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 478131554Stjr mtx_unlock(&psp->ps_mtx); 479131554Stjr 480131554Stjr /* 481131554Stjr * Build the argument list for the signal handler. 482131554Stjr */ 483131554Stjr if (p->p_sysent->sv_sigtbl) 484131554Stjr if (sig <= p->p_sysent->sv_sigsize) 485131554Stjr sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 486131554Stjr 487131554Stjr bzero(&frame, sizeof(frame)); 488131554Stjr 489131554Stjr frame.sf_handler = catcher; 490131554Stjr frame.sf_sig = sig; 491131554Stjr frame.sf_siginfo = &fp->sf_si; 492131554Stjr frame.sf_ucontext = &fp->sf_sc; 493131554Stjr 494131554Stjr /* Fill in POSIX parts */ 495131554Stjr ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 496131554Stjr 497131554Stjr /* 498131554Stjr * Build the signal context to be used by sigreturn. 499131554Stjr */ 500131554Stjr frame.sf_sc.uc_flags = 0; /* XXX ??? */ 501131554Stjr frame.sf_sc.uc_link = NULL; /* XXX ??? */ 502131554Stjr 503131554Stjr frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 504131554Stjr frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 505131554Stjr frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 506131554Stjr ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 507131554Stjr PROC_UNLOCK(p); 508131554Stjr 509131554Stjr bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 510131554Stjr 511131554Stjr frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 512131554Stjr frame.sf_sc.uc_mcontext.sc_gs = rgs(); 513131554Stjr frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 514131554Stjr frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 515131554Stjr frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 516131554Stjr frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 517131554Stjr frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 518131554Stjr frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 519131554Stjr frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 520131554Stjr frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; 521131554Stjr frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 522131554Stjr frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 523131554Stjr frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 524131554Stjr frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 525131554Stjr frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 526131554Stjr frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 527131554Stjr frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 528131554Stjr frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 529131554Stjr frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 530131554Stjr frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 531131554Stjr frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 532131554Stjr 533131554Stjr#ifdef DEBUG 534131554Stjr if (ldebug(rt_sendsig)) 535131554Stjr printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 536131554Stjr frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 537131554Stjr td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 538131554Stjr#endif 539131554Stjr 540131554Stjr if (copyout(&frame, fp, sizeof(frame)) != 0) { 541131554Stjr /* 542131554Stjr * Process has trashed its stack; give it an illegal 543131554Stjr * instruction to halt it in its tracks. 544131554Stjr */ 545131554Stjr#ifdef DEBUG 546131554Stjr if (ldebug(rt_sendsig)) 547131554Stjr printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 548131554Stjr fp, oonstack); 549131554Stjr#endif 550131554Stjr PROC_LOCK(p); 551131554Stjr sigexit(td, SIGILL); 552131554Stjr } 553131554Stjr 554131554Stjr /* 555131554Stjr * Build context to run handler in. 556131554Stjr */ 557131554Stjr regs->tf_esp = (int)fp; 558131554Stjr regs->tf_eip = linux_rt_sigcode; 559131554Stjr regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 560131554Stjr regs->tf_cs = _ucodesel; 561131554Stjr regs->tf_ds = _udatasel; 562131554Stjr regs->tf_es = _udatasel; 563131554Stjr regs->tf_fs = _udatasel; 564131554Stjr regs->tf_ss = _udatasel; 565131554Stjr PROC_LOCK(p); 566131554Stjr mtx_lock(&psp->ps_mtx); 567131554Stjr} 568131554Stjr 569131554Stjr 570131554Stjr/* 571131554Stjr * Send an interrupt to process. 572131554Stjr * 573131554Stjr * Stack is set up to allow sigcode stored 574131554Stjr * in u. to call routine, followed by kcall 575131554Stjr * to sigreturn routine below. After sigreturn 576131554Stjr * resets the signal mask, the stack, and the 577131554Stjr * frame pointer, it returns to the user 578131554Stjr * specified pc, psl. 579131554Stjr */ 580131554Stjrstatic void 581131554Stjrlinux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 582131554Stjr{ 583131554Stjr struct thread *td = curthread; 584131554Stjr struct proc *p = td->td_proc; 585131554Stjr struct sigacts *psp; 586131554Stjr struct trapframe *regs; 587131554Stjr struct l_sigframe *fp, frame; 588131554Stjr l_sigset_t lmask; 589131554Stjr int sig, code; 590131554Stjr int oonstack, i; 591131554Stjr 592131554Stjr PROC_LOCK_ASSERT(p, MA_OWNED); 593131554Stjr psp = p->p_sigacts; 594131554Stjr sig = ksi->ksi_signo; 595131554Stjr code = ksi->ksi_code; 596131554Stjr mtx_assert(&psp->ps_mtx, MA_OWNED); 597131554Stjr if (SIGISMEMBER(psp->ps_siginfo, sig)) { 598131554Stjr /* Signal handler installed with SA_SIGINFO. */ 599131554Stjr linux_rt_sendsig(catcher, ksi, mask); 600131554Stjr return; 601131554Stjr } 602131554Stjr regs = td->td_frame; 603131554Stjr oonstack = sigonstack(regs->tf_esp); 604131554Stjr 605131554Stjr#ifdef DEBUG 606131554Stjr if (ldebug(sendsig)) 607131554Stjr printf(ARGS(sendsig, "%p, %d, %p, %u"), 608131554Stjr catcher, sig, (void*)mask, code); 609131554Stjr#endif 610131554Stjr 611131554Stjr /* 612131554Stjr * Allocate space for the signal handler context. 613131554Stjr */ 614 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 615 SIGISMEMBER(psp->ps_sigonstack, sig)) { 616 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 617 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 618 } else 619 fp = (struct l_sigframe *)regs->tf_esp - 1; 620 mtx_unlock(&psp->ps_mtx); 621 PROC_UNLOCK(p); 622 623 /* 624 * Build the argument list for the signal handler. 625 */ 626 if (p->p_sysent->sv_sigtbl) 627 if (sig <= p->p_sysent->sv_sigsize) 628 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 629 630 bzero(&frame, sizeof(frame)); 631 632 frame.sf_handler = catcher; 633 frame.sf_sig = sig; 634 635 bsd_to_linux_sigset(mask, &lmask); 636 637 /* 638 * Build the signal context to be used by sigreturn. 639 */ 640 frame.sf_sc.sc_mask = lmask.__bits[0]; 641 frame.sf_sc.sc_gs = rgs(); 642 frame.sf_sc.sc_fs = regs->tf_fs; 643 frame.sf_sc.sc_es = regs->tf_es; 644 frame.sf_sc.sc_ds = regs->tf_ds; 645 frame.sf_sc.sc_edi = regs->tf_edi; 646 frame.sf_sc.sc_esi = regs->tf_esi; 647 frame.sf_sc.sc_ebp = regs->tf_ebp; 648 frame.sf_sc.sc_ebx = regs->tf_ebx; 649 frame.sf_sc.sc_esp = regs->tf_esp; 650 frame.sf_sc.sc_edx = regs->tf_edx; 651 frame.sf_sc.sc_ecx = regs->tf_ecx; 652 frame.sf_sc.sc_eax = regs->tf_eax; 653 frame.sf_sc.sc_eip = regs->tf_eip; 654 frame.sf_sc.sc_cs = regs->tf_cs; 655 frame.sf_sc.sc_eflags = regs->tf_eflags; 656 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 657 frame.sf_sc.sc_ss = regs->tf_ss; 658 frame.sf_sc.sc_err = regs->tf_err; 659 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 660 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 661 662 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 663 frame.sf_extramask[i] = lmask.__bits[i+1]; 664 665 if (copyout(&frame, fp, sizeof(frame)) != 0) { 666 /* 667 * Process has trashed its stack; give it an illegal 668 * instruction to halt it in its tracks. 669 */ 670 PROC_LOCK(p); 671 sigexit(td, SIGILL); 672 } 673 674 /* 675 * Build context to run handler in. 676 */ 677 regs->tf_esp = (int)fp; 678 regs->tf_eip = linux_sigcode; 679 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 680 regs->tf_cs = _ucodesel; 681 regs->tf_ds = _udatasel; 682 regs->tf_es = _udatasel; 683 regs->tf_fs = _udatasel; 684 regs->tf_ss = _udatasel; 685 PROC_LOCK(p); 686 mtx_lock(&psp->ps_mtx); 687} 688 689/* 690 * System call to cleanup state after a signal 691 * has been taken. Reset signal mask and 692 * stack state from context left by sendsig (above). 693 * Return to previous pc and psl as specified by 694 * context left by sendsig. Check carefully to 695 * make sure that the user has not modified the 696 * psl to gain improper privileges or to cause 697 * a machine fault. 698 */ 699int 700linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 701{ 702 struct l_sigframe frame; 703 struct trapframe *regs; 704 l_sigset_t lmask; 705 sigset_t bmask; 706 int eflags, i; 707 ksiginfo_t ksi; 708 709 regs = td->td_frame; 710 711#ifdef DEBUG 712 if (ldebug(sigreturn)) 713 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 714#endif 715 /* 716 * The trampoline code hands us the sigframe. 717 * It is unsafe to keep track of it ourselves, in the event that a 718 * program jumps out of a signal handler. 719 */ 720 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 721 return (EFAULT); 722 723 /* 724 * Check for security violations. 725 */ 726#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 727 eflags = frame.sf_sc.sc_eflags; 728 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 729 return (EINVAL); 730 731 /* 732 * Don't allow users to load a valid privileged %cs. Let the 733 * hardware check for invalid selectors, excess privilege in 734 * other selectors, invalid %eip's and invalid %esp's. 735 */ 736#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 737 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 738 ksiginfo_init_trap(&ksi); 739 ksi.ksi_signo = SIGBUS; 740 ksi.ksi_code = BUS_OBJERR; 741 ksi.ksi_trapno = T_PROTFLT; 742 ksi.ksi_addr = (void *)regs->tf_eip; 743 trapsignal(td, &ksi); 744 return (EINVAL); 745 } 746 747 lmask.__bits[0] = frame.sf_sc.sc_mask; 748 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 749 lmask.__bits[i+1] = frame.sf_extramask[i]; 750 linux_to_bsd_sigset(&lmask, &bmask); 751 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 752 753 /* 754 * Restore signal context. 755 */ 756 /* %gs was restored by the trampoline. */ 757 regs->tf_fs = frame.sf_sc.sc_fs; 758 regs->tf_es = frame.sf_sc.sc_es; 759 regs->tf_ds = frame.sf_sc.sc_ds; 760 regs->tf_edi = frame.sf_sc.sc_edi; 761 regs->tf_esi = frame.sf_sc.sc_esi; 762 regs->tf_ebp = frame.sf_sc.sc_ebp; 763 regs->tf_ebx = frame.sf_sc.sc_ebx; 764 regs->tf_edx = frame.sf_sc.sc_edx; 765 regs->tf_ecx = frame.sf_sc.sc_ecx; 766 regs->tf_eax = frame.sf_sc.sc_eax; 767 regs->tf_eip = frame.sf_sc.sc_eip; 768 regs->tf_cs = frame.sf_sc.sc_cs; 769 regs->tf_eflags = eflags; 770 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 771 regs->tf_ss = frame.sf_sc.sc_ss; 772 773 return (EJUSTRETURN); 774} 775 776/* 777 * System call to cleanup state after a signal 778 * has been taken. Reset signal mask and 779 * stack state from context left by rt_sendsig (above). 780 * Return to previous pc and psl as specified by 781 * context left by sendsig. Check carefully to 782 * make sure that the user has not modified the 783 * psl to gain improper privileges or to cause 784 * a machine fault. 785 */ 786int 787linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 788{ 789 struct l_ucontext uc; 790 struct l_sigcontext *context; 791 sigset_t bmask; 792 l_stack_t *lss; 793 stack_t ss; 794 struct trapframe *regs; 795 int eflags; 796 ksiginfo_t ksi; 797 798 regs = td->td_frame; 799 800#ifdef DEBUG 801 if (ldebug(rt_sigreturn)) 802 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 803#endif 804 /* 805 * The trampoline code hands us the ucontext. 806 * It is unsafe to keep track of it ourselves, in the event that a 807 * program jumps out of a signal handler. 808 */ 809 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 810 return (EFAULT); 811 812 context = &uc.uc_mcontext; 813 814 /* 815 * Check for security violations. 816 */ 817#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 818 eflags = context->sc_eflags; 819 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 820 return (EINVAL); 821 822 /* 823 * Don't allow users to load a valid privileged %cs. Let the 824 * hardware check for invalid selectors, excess privilege in 825 * other selectors, invalid %eip's and invalid %esp's. 826 */ 827#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 828 if (!CS_SECURE(context->sc_cs)) { 829 ksiginfo_init_trap(&ksi); 830 ksi.ksi_signo = SIGBUS; 831 ksi.ksi_code = BUS_OBJERR; 832 ksi.ksi_trapno = T_PROTFLT; 833 ksi.ksi_addr = (void *)regs->tf_eip; 834 trapsignal(td, &ksi); 835 return (EINVAL); 836 } 837 838 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 839 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 840 841 /* 842 * Restore signal context 843 */ 844 /* %gs was restored by the trampoline. */ 845 regs->tf_fs = context->sc_fs; 846 regs->tf_es = context->sc_es; 847 regs->tf_ds = context->sc_ds; 848 regs->tf_edi = context->sc_edi; 849 regs->tf_esi = context->sc_esi; 850 regs->tf_ebp = context->sc_ebp; 851 regs->tf_ebx = context->sc_ebx; 852 regs->tf_edx = context->sc_edx; 853 regs->tf_ecx = context->sc_ecx; 854 regs->tf_eax = context->sc_eax; 855 regs->tf_eip = context->sc_eip; 856 regs->tf_cs = context->sc_cs; 857 regs->tf_eflags = eflags; 858 regs->tf_esp = context->sc_esp_at_signal; 859 regs->tf_ss = context->sc_ss; 860 861 /* 862 * call sigaltstack & ignore results.. 863 */ 864 lss = &uc.uc_stack; 865 ss.ss_sp = lss->ss_sp; 866 ss.ss_size = lss->ss_size; 867 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 868 869#ifdef DEBUG 870 if (ldebug(rt_sigreturn)) 871 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 872 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 873#endif 874 (void)kern_sigaltstack(td, &ss, NULL); 875 876 return (EJUSTRETURN); 877} 878 879static int 880linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 881{ 882 struct proc *p; 883 struct trapframe *frame; 884 885 p = td->td_proc; 886 frame = td->td_frame; 887 888 sa->code = frame->tf_eax; 889 sa->args[0] = frame->tf_ebx; 890 sa->args[1] = frame->tf_ecx; 891 sa->args[2] = frame->tf_edx; 892 sa->args[3] = frame->tf_esi; 893 sa->args[4] = frame->tf_edi; 894 sa->args[5] = frame->tf_ebp; /* Unconfirmed */ 895 896 if (sa->code >= p->p_sysent->sv_size) 897 sa->callp = &p->p_sysent->sv_table[0]; 898 else 899 sa->callp = &p->p_sysent->sv_table[sa->code]; 900 sa->narg = sa->callp->sy_narg; 901 902 td->td_retval[0] = 0; 903 td->td_retval[1] = frame->tf_edx; 904 905 return (0); 906} 907 908/* 909 * If a linux binary is exec'ing something, try this image activator 910 * first. We override standard shell script execution in order to 911 * be able to modify the interpreter path. We only do this if a linux 912 * binary is doing the exec, so we do not create an EXEC module for it. 913 */ 914static int exec_linux_imgact_try(struct image_params *iparams); 915 916static int 917exec_linux_imgact_try(struct image_params *imgp) 918{ 919 const char *head = (const char *)imgp->image_header; 920 char *rpath; 921 int error = -1; 922 923 /* 924 * The interpreter for shell scripts run from a linux binary needs 925 * to be located in /compat/linux if possible in order to recursively 926 * maintain linux path emulation. 927 */ 928 if (((const short *)head)[0] == SHELLMAGIC) { 929 /* 930 * Run our normal shell image activator. If it succeeds attempt 931 * to use the alternate path for the interpreter. If an alternate 932 * path is found, use our stringspace to store it. 933 */ 934 if ((error = exec_shell_imgact(imgp)) == 0) { 935 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 936 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); 937 if (rpath != NULL) 938 imgp->args->fname_buf = 939 imgp->interpreter_name = rpath; 940 } 941 } 942 return (error); 943} 944 945/* 946 * exec_setregs may initialize some registers differently than Linux 947 * does, thus potentially confusing Linux binaries. If necessary, we 948 * override the exec_setregs default(s) here. 949 */ 950static void 951exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 952{ 953 struct pcb *pcb = td->td_pcb; 954 955 exec_setregs(td, imgp, stack); 956 957 /* Linux sets %gs to 0, we default to _udatasel */ 958 pcb->pcb_gs = 0; 959 load_gs(0); 960 961 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 962} 963 964static void 965linux_get_machine(const char **dst) 966{ 967 968 switch (cpu_class) { 969 case CPUCLASS_686: 970 *dst = "i686"; 971 break; 972 case CPUCLASS_586: 973 *dst = "i586"; 974 break; 975 case CPUCLASS_486: 976 *dst = "i486"; 977 break; 978 default: 979 *dst = "i386"; 980 } 981} 982 983struct sysentvec linux_sysvec = { 984 .sv_size = LINUX_SYS_MAXSYSCALL, 985 .sv_table = linux_sysent, 986 .sv_mask = 0, 987 .sv_sigsize = LINUX_SIGTBLSZ, 988 .sv_sigtbl = bsd_to_linux_signal, 989 .sv_errsize = ELAST + 1, 990 .sv_errtbl = bsd_to_linux_errno, 991 .sv_transtrap = translate_traps, 992 .sv_fixup = linux_fixup, 993 .sv_sendsig = linux_sendsig, 994 .sv_sigcode = &_binary_linux_locore_o_start, 995 .sv_szsigcode = &linux_szsigcode, 996 .sv_prepsyscall = NULL, 997 .sv_name = "Linux a.out", 998 .sv_coredump = NULL, 999 .sv_imgact_try = exec_linux_imgact_try, 1000 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1001 .sv_pagesize = PAGE_SIZE, 1002 .sv_minuser = VM_MIN_ADDRESS, 1003 .sv_maxuser = VM_MAXUSER_ADDRESS, 1004 .sv_usrstack = LINUX_USRSTACK, 1005 .sv_psstrings = PS_STRINGS, 1006 .sv_stackprot = VM_PROT_ALL, 1007 .sv_copyout_strings = exec_copyout_strings, 1008 .sv_setregs = exec_linux_setregs, 1009 .sv_fixlimit = NULL, 1010 .sv_maxssiz = NULL, 1011 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, 1012 .sv_set_syscall_retval = cpu_set_syscall_retval, 1013 .sv_fetch_syscall_args = linux_fetch_syscall_args, 1014 .sv_syscallnames = NULL, 1015 .sv_shared_page_base = LINUX_SHAREDPAGE, 1016 .sv_shared_page_len = PAGE_SIZE, 1017 .sv_schedtail = linux_schedtail, 1018 .sv_thread_detach = linux_thread_detach, 1019}; 1020INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); 1021 1022struct sysentvec elf_linux_sysvec = { 1023 .sv_size = LINUX_SYS_MAXSYSCALL, 1024 .sv_table = linux_sysent, 1025 .sv_mask = 0, 1026 .sv_sigsize = LINUX_SIGTBLSZ, 1027 .sv_sigtbl = bsd_to_linux_signal, 1028 .sv_errsize = ELAST + 1, 1029 .sv_errtbl = bsd_to_linux_errno, 1030 .sv_transtrap = translate_traps, 1031 .sv_fixup = elf_linux_fixup, 1032 .sv_sendsig = linux_sendsig, 1033 .sv_sigcode = &_binary_linux_locore_o_start, 1034 .sv_szsigcode = &linux_szsigcode, 1035 .sv_prepsyscall = NULL, 1036 .sv_name = "Linux ELF", 1037 .sv_coredump = elf32_coredump, 1038 .sv_imgact_try = exec_linux_imgact_try, 1039 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1040 .sv_pagesize = PAGE_SIZE, 1041 .sv_minuser = VM_MIN_ADDRESS, 1042 .sv_maxuser = VM_MAXUSER_ADDRESS, 1043 .sv_usrstack = LINUX_USRSTACK, 1044 .sv_psstrings = LINUX_PS_STRINGS, 1045 .sv_stackprot = VM_PROT_ALL, 1046 .sv_copyout_strings = linux_copyout_strings, 1047 .sv_setregs = exec_linux_setregs, 1048 .sv_fixlimit = NULL, 1049 .sv_maxssiz = NULL, 1050 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, 1051 .sv_set_syscall_retval = cpu_set_syscall_retval, 1052 .sv_fetch_syscall_args = linux_fetch_syscall_args, 1053 .sv_syscallnames = NULL, 1054 .sv_shared_page_base = LINUX_SHAREDPAGE, 1055 .sv_shared_page_len = PAGE_SIZE, 1056 .sv_schedtail = linux_schedtail, 1057 .sv_thread_detach = linux_thread_detach, 1058}; 1059 1060static void 1061linux_vdso_install(void *param) 1062{ 1063 1064 linux_szsigcode = (&_binary_linux_locore_o_end - 1065 &_binary_linux_locore_o_start); 1066 1067 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1068 panic("Linux invalid vdso size\n"); 1069 1070 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1071 1072 linux_shared_page_obj = __elfN(linux_shared_page_init) 1073 (&linux_shared_page_mapping); 1074 1075 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE); 1076 1077 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1078 linux_szsigcode); 1079 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1080} 1081SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1082 (sysinit_cfunc_t)linux_vdso_install, NULL); 1083 1084static void 1085linux_vdso_deinstall(void *param) 1086{ 1087 1088 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1089}; 1090SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1091 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1092 1093static char GNU_ABI_VENDOR[] = "GNU"; 1094static int GNULINUX_ABI_DESC = 0; 1095 1096static boolean_t 1097linux_trans_osrel(const Elf_Note *note, int32_t *osrel) 1098{ 1099 const Elf32_Word *desc; 1100 uintptr_t p; 1101 1102 p = (uintptr_t)(note + 1); 1103 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1104 1105 desc = (const Elf32_Word *)p; 1106 if (desc[0] != GNULINUX_ABI_DESC) 1107 return (FALSE); 1108 1109 /* 1110 * For linux we encode osrel as follows (see linux_mib.c): 1111 * VVVMMMIII (version, major, minor), see linux_mib.c. 1112 */ 1113 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1114 1115 return (TRUE); 1116} 1117 1118static Elf_Brandnote linux_brandnote = { 1119 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1120 .hdr.n_descsz = 16, /* XXX at least 16 */ 1121 .hdr.n_type = 1, 1122 .vendor = GNU_ABI_VENDOR, 1123 .flags = BN_TRANSLATE_OSREL, 1124 .trans_osrel = linux_trans_osrel 1125}; 1126 1127static Elf32_Brandinfo linux_brand = { 1128 .brand = ELFOSABI_LINUX, 1129 .machine = EM_386, 1130 .compat_3_brand = "Linux", 1131 .emul_path = "/compat/linux", 1132 .interp_path = "/lib/ld-linux.so.1", 1133 .sysvec = &elf_linux_sysvec, 1134 .interp_newpath = NULL, 1135 .brand_note = &linux_brandnote, 1136 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1137}; 1138 1139static Elf32_Brandinfo linux_glibc2brand = { 1140 .brand = ELFOSABI_LINUX, 1141 .machine = EM_386, 1142 .compat_3_brand = "Linux", 1143 .emul_path = "/compat/linux", 1144 .interp_path = "/lib/ld-linux.so.2", 1145 .sysvec = &elf_linux_sysvec, 1146 .interp_newpath = NULL, 1147 .brand_note = &linux_brandnote, 1148 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1149}; 1150 1151Elf32_Brandinfo *linux_brandlist[] = { 1152 &linux_brand, 1153 &linux_glibc2brand, 1154 NULL 1155}; 1156 1157static int 1158linux_elf_modevent(module_t mod, int type, void *data) 1159{ 1160 Elf32_Brandinfo **brandinfo; 1161 int error; 1162 struct linux_ioctl_handler **lihp; 1163 1164 error = 0; 1165 1166 switch(type) { 1167 case MOD_LOAD: 1168 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1169 ++brandinfo) 1170 if (elf32_insert_brand_entry(*brandinfo) < 0) 1171 error = EINVAL; 1172 if (error == 0) { 1173 SET_FOREACH(lihp, linux_ioctl_handler_set) 1174 linux_ioctl_register_handler(*lihp); 1175 LIST_INIT(&futex_list); 1176 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1177 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 1178 NULL, 1000); 1179 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 1180 NULL, 1000); 1181 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1182 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1183 linux_get_machine(&linux_kplatform); 1184 linux_szplatform = roundup(strlen(linux_kplatform) + 1, 1185 sizeof(char *)); 1186 linux_osd_jail_register(); 1187 stclohz = (stathz ? stathz : hz); 1188 if (bootverbose) 1189 printf("Linux ELF exec handler installed\n"); 1190 } else 1191 printf("cannot insert Linux ELF brand handler\n"); 1192 break; 1193 case MOD_UNLOAD: 1194 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1195 ++brandinfo) 1196 if (elf32_brand_inuse(*brandinfo)) 1197 error = EBUSY; 1198 if (error == 0) { 1199 for (brandinfo = &linux_brandlist[0]; 1200 *brandinfo != NULL; ++brandinfo) 1201 if (elf32_remove_brand_entry(*brandinfo) < 0) 1202 error = EINVAL; 1203 } 1204 if (error == 0) { 1205 SET_FOREACH(lihp, linux_ioctl_handler_set) 1206 linux_ioctl_unregister_handler(*lihp); 1207 mtx_destroy(&futex_mtx); 1208 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1209 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1210 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1211 linux_osd_jail_deregister(); 1212 if (bootverbose) 1213 printf("Linux ELF exec handler removed\n"); 1214 } else 1215 printf("Could not deinstall ELF interpreter entry\n"); 1216 break; 1217 default: 1218 return (EOPNOTSUPP); 1219 } 1220 return (error); 1221} 1222 1223static moduledata_t linux_elf_mod = { 1224 "linuxelf", 1225 linux_elf_modevent, 1226 0 1227}; 1228 1229DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1230