linux_machdep.c revision 283359
1239281Sgonzo/*- 2239281Sgonzo * Copyright (c) 2000 Marcel Moolenaar 3239281Sgonzo * All rights reserved. 4239281Sgonzo * 5239281Sgonzo * Redistribution and use in source and binary forms, with or without 6239281Sgonzo * modification, are permitted provided that the following conditions 7239281Sgonzo * are met: 8239281Sgonzo * 1. Redistributions of source code must retain the above copyright 9239281Sgonzo * notice, this list of conditions and the following disclaimer 10239281Sgonzo * in this position and unchanged. 11239281Sgonzo * 2. Redistributions in binary form must reproduce the above copyright 12239281Sgonzo * notice, this list of conditions and the following disclaimer in the 13239281Sgonzo * documentation and/or other materials provided with the distribution. 14239281Sgonzo * 3. The name of the author may not be used to endorse or promote products 15239281Sgonzo * derived from this software without specific prior written permission. 16239281Sgonzo * 17239281Sgonzo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18239281Sgonzo * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19239281Sgonzo * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20239281Sgonzo * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21239281Sgonzo * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22239281Sgonzo * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23239281Sgonzo * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24239281Sgonzo * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25239281Sgonzo * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26239281Sgonzo * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27239281Sgonzo */ 28239281Sgonzo 29239281Sgonzo#include <sys/cdefs.h> 30239281Sgonzo__FBSDID("$FreeBSD: stable/10/sys/i386/linux/linux_machdep.c 283359 2015-05-24 07:32:02Z kib $"); 31239281Sgonzo 32239281Sgonzo#include <sys/param.h> 33239281Sgonzo#include <sys/systm.h> 34239281Sgonzo#include <sys/capsicum.h> 35239281Sgonzo#include <sys/file.h> 36239281Sgonzo#include <sys/fcntl.h> 37239281Sgonzo#include <sys/imgact.h> 38239281Sgonzo#include <sys/lock.h> 39239281Sgonzo#include <sys/malloc.h> 40239281Sgonzo#include <sys/mman.h> 41239281Sgonzo#include <sys/mutex.h> 42239281Sgonzo#include <sys/sx.h> 43239281Sgonzo#include <sys/priv.h> 44239281Sgonzo#include <sys/proc.h> 45239281Sgonzo#include <sys/queue.h> 46239281Sgonzo#include <sys/resource.h> 47239281Sgonzo#include <sys/resourcevar.h> 48239281Sgonzo#include <sys/signalvar.h> 49239281Sgonzo#include <sys/syscallsubr.h> 50239281Sgonzo#include <sys/sysproto.h> 51239281Sgonzo#include <sys/unistd.h> 52239281Sgonzo#include <sys/wait.h> 53239281Sgonzo#include <sys/sched.h> 54239281Sgonzo 55239281Sgonzo#include <machine/frame.h> 56239281Sgonzo#include <machine/psl.h> 57239281Sgonzo#include <machine/segments.h> 58239281Sgonzo#include <machine/sysarch.h> 59239281Sgonzo 60239281Sgonzo#include <vm/vm.h> 61239281Sgonzo#include <vm/pmap.h> 62239281Sgonzo#include <vm/vm_map.h> 63239281Sgonzo 64239281Sgonzo#include <i386/linux/linux.h> 65239281Sgonzo#include <i386/linux/linux_proto.h> 66239281Sgonzo#include <compat/linux/linux_ipc.h> 67239281Sgonzo#include <compat/linux/linux_misc.h> 68239281Sgonzo#include <compat/linux/linux_signal.h> 69239281Sgonzo#include <compat/linux/linux_util.h> 70239281Sgonzo#include <compat/linux/linux_emul.h> 71239281Sgonzo 72239281Sgonzo#include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */ 73239281Sgonzo 74239281Sgonzo#include "opt_posix.h" 75239281Sgonzo 76239281Sgonzoextern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */ 77239281Sgonzo 78239281Sgonzostruct l_descriptor { 79239281Sgonzo l_uint entry_number; 80239281Sgonzo l_ulong base_addr; 81239281Sgonzo l_uint limit; 82239281Sgonzo l_uint seg_32bit:1; 83239281Sgonzo l_uint contents:2; 84239281Sgonzo l_uint read_exec_only:1; 85239281Sgonzo l_uint limit_in_pages:1; 86239281Sgonzo l_uint seg_not_present:1; 87239281Sgonzo l_uint useable:1; 88239281Sgonzo}; 89239281Sgonzo 90239281Sgonzostruct l_old_select_argv { 91239281Sgonzo l_int nfds; 92239281Sgonzo l_fd_set *readfds; 93239281Sgonzo l_fd_set *writefds; 94239281Sgonzo l_fd_set *exceptfds; 95239281Sgonzo struct l_timeval *timeout; 96239281Sgonzo}; 97239281Sgonzo 98239281Sgonzostatic int linux_mmap_common(struct thread *td, l_uintptr_t addr, 99239281Sgonzo l_size_t len, l_int prot, l_int flags, l_int fd, 100239281Sgonzo l_loff_t pos); 101239281Sgonzo 102239281Sgonzoint 103239281Sgonzolinux_to_bsd_sigaltstack(int lsa) 104239281Sgonzo{ 105239281Sgonzo int bsa = 0; 106239281Sgonzo 107239281Sgonzo if (lsa & LINUX_SS_DISABLE) 108239281Sgonzo bsa |= SS_DISABLE; 109239281Sgonzo if (lsa & LINUX_SS_ONSTACK) 110239281Sgonzo bsa |= SS_ONSTACK; 111239281Sgonzo return (bsa); 112239281Sgonzo} 113239281Sgonzo 114239281Sgonzoint 115239281Sgonzobsd_to_linux_sigaltstack(int bsa) 116239281Sgonzo{ 117239281Sgonzo int lsa = 0; 118239281Sgonzo 119239281Sgonzo if (bsa & SS_DISABLE) 120239281Sgonzo lsa |= LINUX_SS_DISABLE; 121239281Sgonzo if (bsa & SS_ONSTACK) 122239281Sgonzo lsa |= LINUX_SS_ONSTACK; 123239281Sgonzo return (lsa); 124239281Sgonzo} 125239281Sgonzo 126239281Sgonzoint 127239281Sgonzolinux_execve(struct thread *td, struct linux_execve_args *args) 128239281Sgonzo{ 129239281Sgonzo struct image_args eargs; 130239281Sgonzo struct vmspace *oldvmspace; 131239281Sgonzo char *newpath; 132239281Sgonzo int error; 133239281Sgonzo 134239281Sgonzo LCONVPATHEXIST(td, args->path, &newpath); 135239281Sgonzo 136239281Sgonzo#ifdef DEBUG 137239281Sgonzo if (ldebug(execve)) 138239281Sgonzo printf(ARGS(execve, "%s"), newpath); 139239281Sgonzo#endif 140239281Sgonzo 141239281Sgonzo error = pre_execve(td, &oldvmspace); 142239281Sgonzo if (error != 0) { 143239281Sgonzo free(newpath, M_TEMP); 144239281Sgonzo return (error); 145239281Sgonzo } 146239281Sgonzo error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, 147239281Sgonzo args->argp, args->envp); 148239281Sgonzo free(newpath, M_TEMP); 149239281Sgonzo if (error == 0) 150239281Sgonzo error = kern_execve(td, &eargs, NULL); 151239281Sgonzo if (error == 0) { 152239281Sgonzo /* linux process can exec fbsd one, dont attempt 153239281Sgonzo * to create emuldata for such process using 154239281Sgonzo * linux_proc_init, this leads to a panic on KASSERT 155239281Sgonzo * because such process has p->p_emuldata == NULL 156239281Sgonzo */ 157239281Sgonzo if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) 158245672Skientzle error = linux_proc_init(td, 0, 0); 159245672Skientzle } 160239281Sgonzo post_execve(td, error, oldvmspace); 161245672Skientzle return (error); 162239281Sgonzo} 163239281Sgonzo 164239281Sgonzostruct l_ipc_kludge { 165239281Sgonzo struct l_msgbuf *msgp; 166252229Srpaulo l_long msgtyp; 167252229Srpaulo}; 168252229Srpaulo 169239281Sgonzoint 170239281Sgonzolinux_ipc(struct thread *td, struct linux_ipc_args *args) 171239281Sgonzo{ 172239281Sgonzo 173239281Sgonzo switch (args->what & 0xFFFF) { 174239281Sgonzo case LINUX_SEMOP: { 175239281Sgonzo struct linux_semop_args a; 176239281Sgonzo 177239281Sgonzo a.semid = args->arg1; 178239281Sgonzo a.tsops = args->ptr; 179239281Sgonzo a.nsops = args->arg2; 180239281Sgonzo return (linux_semop(td, &a)); 181239281Sgonzo } 182239281Sgonzo case LINUX_SEMGET: { 183239281Sgonzo struct linux_semget_args a; 184239281Sgonzo 185239281Sgonzo a.key = args->arg1; 186239281Sgonzo a.nsems = args->arg2; 187239281Sgonzo a.semflg = args->arg3; 188239281Sgonzo return (linux_semget(td, &a)); 189239281Sgonzo } 190239281Sgonzo case LINUX_SEMCTL: { 191239281Sgonzo struct linux_semctl_args a; 192239281Sgonzo int error; 193239281Sgonzo 194239281Sgonzo a.semid = args->arg1; 195239281Sgonzo a.semnum = args->arg2; 196239281Sgonzo a.cmd = args->arg3; 197239281Sgonzo error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 198239281Sgonzo if (error) 199239281Sgonzo return (error); 200239281Sgonzo return (linux_semctl(td, &a)); 201239281Sgonzo } 202239281Sgonzo case LINUX_MSGSND: { 203239281Sgonzo struct linux_msgsnd_args a; 204239281Sgonzo 205239281Sgonzo a.msqid = args->arg1; 206239281Sgonzo a.msgp = args->ptr; 207239281Sgonzo a.msgsz = args->arg2; 208239281Sgonzo a.msgflg = args->arg3; 209240518Seadler return (linux_msgsnd(td, &a)); 210239281Sgonzo } 211239281Sgonzo case LINUX_MSGRCV: { 212239281Sgonzo struct linux_msgrcv_args a; 213239281Sgonzo 214239281Sgonzo a.msqid = args->arg1; 215239281Sgonzo a.msgsz = args->arg2; 216239281Sgonzo a.msgflg = args->arg3; 217239281Sgonzo if ((args->what >> 16) == 0) { 218239281Sgonzo struct l_ipc_kludge tmp; 219239281Sgonzo int error; 220239281Sgonzo 221239281Sgonzo if (args->ptr == NULL) 222239281Sgonzo return (EINVAL); 223239281Sgonzo error = copyin(args->ptr, &tmp, sizeof(tmp)); 224239281Sgonzo if (error) 225239281Sgonzo return (error); 226239281Sgonzo a.msgp = tmp.msgp; 227239281Sgonzo a.msgtyp = tmp.msgtyp; 228239281Sgonzo } else { 229239281Sgonzo a.msgp = args->ptr; 230239281Sgonzo a.msgtyp = args->arg5; 231239281Sgonzo } 232239281Sgonzo return (linux_msgrcv(td, &a)); 233239281Sgonzo } 234239281Sgonzo case LINUX_MSGGET: { 235239281Sgonzo struct linux_msgget_args a; 236239281Sgonzo 237239281Sgonzo a.key = args->arg1; 238239281Sgonzo a.msgflg = args->arg2; 239239281Sgonzo return (linux_msgget(td, &a)); 240239281Sgonzo } 241239281Sgonzo case LINUX_MSGCTL: { 242239281Sgonzo struct linux_msgctl_args a; 243239281Sgonzo 244239281Sgonzo a.msqid = args->arg1; 245239281Sgonzo a.cmd = args->arg2; 246239281Sgonzo a.buf = args->ptr; 247239281Sgonzo return (linux_msgctl(td, &a)); 248239281Sgonzo } 249239281Sgonzo case LINUX_SHMAT: { 250239281Sgonzo struct linux_shmat_args a; 251239281Sgonzo 252239281Sgonzo a.shmid = args->arg1; 253239281Sgonzo a.shmaddr = args->ptr; 254239281Sgonzo a.shmflg = args->arg2; 255239281Sgonzo a.raddr = (l_ulong *)args->arg3; 256239281Sgonzo return (linux_shmat(td, &a)); 257239281Sgonzo } 258239281Sgonzo case LINUX_SHMDT: { 259239281Sgonzo struct linux_shmdt_args a; 260239281Sgonzo 261239281Sgonzo a.shmaddr = args->ptr; 262239281Sgonzo return (linux_shmdt(td, &a)); 263239281Sgonzo } 264239281Sgonzo case LINUX_SHMGET: { 265239281Sgonzo struct linux_shmget_args a; 266239281Sgonzo 267239281Sgonzo a.key = args->arg1; 268239281Sgonzo a.size = args->arg2; 269239281Sgonzo a.shmflg = args->arg3; 270239281Sgonzo return (linux_shmget(td, &a)); 271239281Sgonzo } 272239281Sgonzo case LINUX_SHMCTL: { 273239281Sgonzo struct linux_shmctl_args a; 274239281Sgonzo 275239281Sgonzo a.shmid = args->arg1; 276239281Sgonzo a.cmd = args->arg2; 277239281Sgonzo a.buf = args->ptr; 278239281Sgonzo return (linux_shmctl(td, &a)); 279239281Sgonzo } 280239281Sgonzo default: 281239281Sgonzo break; 282239281Sgonzo } 283239281Sgonzo 284239281Sgonzo return (EINVAL); 285239281Sgonzo} 286239281Sgonzo 287239281Sgonzoint 288239281Sgonzolinux_old_select(struct thread *td, struct linux_old_select_args *args) 289239281Sgonzo{ 290239281Sgonzo struct l_old_select_argv linux_args; 291239281Sgonzo struct linux_select_args newsel; 292239281Sgonzo int error; 293239281Sgonzo 294239281Sgonzo#ifdef DEBUG 295239281Sgonzo if (ldebug(old_select)) 296239281Sgonzo printf(ARGS(old_select, "%p"), args->ptr); 297239281Sgonzo#endif 298239281Sgonzo 299239281Sgonzo error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 300239281Sgonzo if (error) 301239281Sgonzo return (error); 302239281Sgonzo 303239281Sgonzo newsel.nfds = linux_args.nfds; 304239281Sgonzo newsel.readfds = linux_args.readfds; 305239281Sgonzo newsel.writefds = linux_args.writefds; 306239281Sgonzo newsel.exceptfds = linux_args.exceptfds; 307239281Sgonzo newsel.timeout = linux_args.timeout; 308239281Sgonzo return (linux_select(td, &newsel)); 309239281Sgonzo} 310239281Sgonzo 311239281Sgonzoint 312239281Sgonzolinux_set_cloned_tls(struct thread *td, void *desc) 313239281Sgonzo{ 314239281Sgonzo struct segment_descriptor sd; 315239281Sgonzo struct l_user_desc info; 316239281Sgonzo int idx, error; 317239281Sgonzo int a[2]; 318239281Sgonzo 319239281Sgonzo error = copyin(desc, &info, sizeof(struct l_user_desc)); 320239281Sgonzo if (error) { 321239281Sgonzo printf(LMSG("copyin failed!")); 322239281Sgonzo } else { 323239281Sgonzo idx = info.entry_number; 324239281Sgonzo 325239281Sgonzo /* 326239281Sgonzo * looks like we're getting the idx we returned 327239281Sgonzo * in the set_thread_area() syscall 328239281Sgonzo */ 329239281Sgonzo if (idx != 6 && idx != 3) { 330239281Sgonzo printf(LMSG("resetting idx!")); 331239281Sgonzo idx = 3; 332239281Sgonzo } 333239281Sgonzo 334239281Sgonzo /* this doesnt happen in practice */ 335239281Sgonzo if (idx == 6) { 336239281Sgonzo /* we might copy out the entry_number as 3 */ 337239281Sgonzo info.entry_number = 3; 338239281Sgonzo error = copyout(&info, desc, sizeof(struct l_user_desc)); 339239281Sgonzo if (error) 340239281Sgonzo printf(LMSG("copyout failed!")); 341239281Sgonzo } 342239281Sgonzo 343239281Sgonzo a[0] = LINUX_LDT_entry_a(&info); 344239281Sgonzo a[1] = LINUX_LDT_entry_b(&info); 345239281Sgonzo 346239281Sgonzo memcpy(&sd, &a, sizeof(a)); 347239281Sgonzo#ifdef DEBUG 348239281Sgonzo if (ldebug(clone)) 349239281Sgonzo printf("Segment created in clone with " 350239281Sgonzo "CLONE_SETTLS: lobase: %x, hibase: %x, " 351239281Sgonzo "lolimit: %x, hilimit: %x, type: %i, " 352239281Sgonzo "dpl: %i, p: %i, xx: %i, def32: %i, " 353239281Sgonzo "gran: %i\n", sd.sd_lobase, sd.sd_hibase, 354239281Sgonzo sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, 355239281Sgonzo sd.sd_dpl, sd.sd_p, sd.sd_xx, 356239281Sgonzo sd.sd_def32, sd.sd_gran); 357239281Sgonzo#endif 358239281Sgonzo 359239281Sgonzo /* set %gs */ 360239281Sgonzo td->td_pcb->pcb_gsd = sd; 361239281Sgonzo td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); 362239281Sgonzo } 363239281Sgonzo 364239281Sgonzo return (error); 365239281Sgonzo} 366239281Sgonzo 367239281Sgonzoint 368239281Sgonzolinux_set_upcall_kse(struct thread *td, register_t stack) 369239281Sgonzo{ 370239281Sgonzo 371239281Sgonzo td->td_frame->tf_esp = stack; 372239281Sgonzo 373239281Sgonzo return (0); 374239281Sgonzo} 375239281Sgonzo 376239281Sgonzo#define STACK_SIZE (2 * 1024 * 1024) 377239281Sgonzo#define GUARD_SIZE (4 * PAGE_SIZE) 378239281Sgonzo 379239281Sgonzoint 380239281Sgonzolinux_mmap2(struct thread *td, struct linux_mmap2_args *args) 381239281Sgonzo{ 382239281Sgonzo 383239281Sgonzo#ifdef DEBUG 384239281Sgonzo if (ldebug(mmap2)) 385239281Sgonzo printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"), 386239281Sgonzo (void *)args->addr, args->len, args->prot, 387239281Sgonzo args->flags, args->fd, args->pgoff); 388239281Sgonzo#endif 389239281Sgonzo 390239281Sgonzo return (linux_mmap_common(td, args->addr, args->len, args->prot, 391239281Sgonzo args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 392239281Sgonzo PAGE_SIZE)); 393239281Sgonzo} 394239281Sgonzo 395239281Sgonzoint 396239281Sgonzolinux_mmap(struct thread *td, struct linux_mmap_args *args) 397239281Sgonzo{ 398245672Skientzle int error; 399239281Sgonzo struct l_mmap_argv linux_args; 400239281Sgonzo 401239281Sgonzo error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 402239281Sgonzo if (error) 403245672Skientzle return (error); 404245672Skientzle 405245672Skientzle#ifdef DEBUG 406245672Skientzle if (ldebug(mmap)) 407245672Skientzle printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"), 408239281Sgonzo (void *)linux_args.addr, linux_args.len, linux_args.prot, 409239281Sgonzo linux_args.flags, linux_args.fd, linux_args.pgoff); 410239281Sgonzo#endif 411239281Sgonzo 412239281Sgonzo return (linux_mmap_common(td, linux_args.addr, linux_args.len, 413239281Sgonzo linux_args.prot, linux_args.flags, linux_args.fd, 414239281Sgonzo (uint32_t)linux_args.pgoff)); 415239281Sgonzo} 416239281Sgonzo 417239281Sgonzostatic int 418239281Sgonzolinux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 419239281Sgonzo l_int flags, l_int fd, l_loff_t pos) 420239281Sgonzo{ 421239281Sgonzo struct proc *p = td->td_proc; 422239281Sgonzo struct mmap_args /* { 423239281Sgonzo caddr_t addr; 424239281Sgonzo size_t len; 425239281Sgonzo int prot; 426239281Sgonzo int flags; 427239281Sgonzo int fd; 428239281Sgonzo long pad; 429239281Sgonzo off_t pos; 430239281Sgonzo } */ bsd_args; 431239281Sgonzo int error; 432239281Sgonzo struct file *fp; 433239281Sgonzo cap_rights_t rights; 434239281Sgonzo 435239281Sgonzo error = 0; 436239281Sgonzo bsd_args.flags = 0; 437239281Sgonzo fp = NULL; 438239281Sgonzo 439239281Sgonzo /* 440239281Sgonzo * Linux mmap(2): 441239281Sgonzo * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 442239281Sgonzo */ 443239281Sgonzo if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 444239281Sgonzo return (EINVAL); 445239281Sgonzo 446239281Sgonzo if (flags & LINUX_MAP_SHARED) 447239281Sgonzo bsd_args.flags |= MAP_SHARED; 448239281Sgonzo if (flags & LINUX_MAP_PRIVATE) 449239281Sgonzo bsd_args.flags |= MAP_PRIVATE; 450239281Sgonzo if (flags & LINUX_MAP_FIXED) 451239281Sgonzo bsd_args.flags |= MAP_FIXED; 452239281Sgonzo if (flags & LINUX_MAP_ANON) { 453239281Sgonzo /* Enforce pos to be on page boundary, then ignore. */ 454239281Sgonzo if ((pos & PAGE_MASK) != 0) 455239281Sgonzo return (EINVAL); 456239281Sgonzo pos = 0; 457239281Sgonzo bsd_args.flags |= MAP_ANON; 458239281Sgonzo } else 459239281Sgonzo bsd_args.flags |= MAP_NOSYNC; 460239281Sgonzo if (flags & LINUX_MAP_GROWSDOWN) 461239281Sgonzo bsd_args.flags |= MAP_STACK; 462239281Sgonzo 463239281Sgonzo /* 464239281Sgonzo * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 465239281Sgonzo * on Linux/i386. We do this to ensure maximum compatibility. 466239281Sgonzo * Linux/ia64 does the same in i386 emulation mode. 467239281Sgonzo */ 468239281Sgonzo bsd_args.prot = prot; 469239281Sgonzo if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 470239281Sgonzo bsd_args.prot |= PROT_READ | PROT_EXEC; 471239281Sgonzo 472239281Sgonzo /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 473239281Sgonzo bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 474239281Sgonzo if (bsd_args.fd != -1) { 475239281Sgonzo /* 476239281Sgonzo * Linux follows Solaris mmap(2) description: 477239281Sgonzo * The file descriptor fildes is opened with 478239281Sgonzo * read permission, regardless of the 479239281Sgonzo * protection options specified. 480239281Sgonzo * 481239281Sgonzo * Checking just CAP_MMAP is fine here, since the real work 482239281Sgonzo * is done in the FreeBSD mmap(). 483239281Sgonzo */ 484239281Sgonzo 485239281Sgonzo error = fget(td, bsd_args.fd, 486239281Sgonzo cap_rights_init(&rights, CAP_MMAP), &fp); 487239281Sgonzo if (error != 0) 488239281Sgonzo return (error); 489239281Sgonzo if (fp->f_type != DTYPE_VNODE) { 490239281Sgonzo fdrop(fp, td); 491239281Sgonzo return (EINVAL); 492239281Sgonzo } 493239281Sgonzo 494239281Sgonzo /* Linux mmap() just fails for O_WRONLY files */ 495239281Sgonzo if (!(fp->f_flag & FREAD)) { 496239281Sgonzo fdrop(fp, td); 497239281Sgonzo return (EACCES); 498239281Sgonzo } 499239281Sgonzo 500239281Sgonzo fdrop(fp, td); 501 } 502 503 if (flags & LINUX_MAP_GROWSDOWN) { 504 /* 505 * The Linux MAP_GROWSDOWN option does not limit auto 506 * growth of the region. Linux mmap with this option 507 * takes as addr the inital BOS, and as len, the initial 508 * region size. It can then grow down from addr without 509 * limit. However, linux threads has an implicit internal 510 * limit to stack size of STACK_SIZE. Its just not 511 * enforced explicitly in linux. But, here we impose 512 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 513 * region, since we can do this with our mmap. 514 * 515 * Our mmap with MAP_STACK takes addr as the maximum 516 * downsize limit on BOS, and as len the max size of 517 * the region. It them maps the top SGROWSIZ bytes, 518 * and auto grows the region down, up to the limit 519 * in addr. 520 * 521 * If we don't use the MAP_STACK option, the effect 522 * of this code is to allocate a stack region of a 523 * fixed size of (STACK_SIZE - GUARD_SIZE). 524 */ 525 526 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 527 /* 528 * Some linux apps will attempt to mmap 529 * thread stacks near the top of their 530 * address space. If their TOS is greater 531 * than vm_maxsaddr, vm_map_growstack() 532 * will confuse the thread stack with the 533 * process stack and deliver a SEGV if they 534 * attempt to grow the thread stack past their 535 * current stacksize rlimit. To avoid this, 536 * adjust vm_maxsaddr upwards to reflect 537 * the current stacksize rlimit rather 538 * than the maximum possible stacksize. 539 * It would be better to adjust the 540 * mmap'ed region, but some apps do not check 541 * mmap's return value. 542 */ 543 PROC_LOCK(p); 544 p->p_vmspace->vm_maxsaddr = (char *)USRSTACK - 545 lim_cur(p, RLIMIT_STACK); 546 PROC_UNLOCK(p); 547 } 548 549 /* 550 * This gives us our maximum stack size and a new BOS. 551 * If we're using VM_STACK, then mmap will just map 552 * the top SGROWSIZ bytes, and let the stack grow down 553 * to the limit at BOS. If we're not using VM_STACK 554 * we map the full stack, since we don't have a way 555 * to autogrow it. 556 */ 557 if (len > STACK_SIZE - GUARD_SIZE) { 558 bsd_args.addr = (caddr_t)PTRIN(addr); 559 bsd_args.len = len; 560 } else { 561 bsd_args.addr = (caddr_t)PTRIN(addr) - 562 (STACK_SIZE - GUARD_SIZE - len); 563 bsd_args.len = STACK_SIZE - GUARD_SIZE; 564 } 565 } else { 566 bsd_args.addr = (caddr_t)PTRIN(addr); 567 bsd_args.len = len; 568 } 569 bsd_args.pos = pos; 570 571#ifdef DEBUG 572 if (ldebug(mmap)) 573 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 574 __func__, 575 (void *)bsd_args.addr, bsd_args.len, bsd_args.prot, 576 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 577#endif 578 error = sys_mmap(td, &bsd_args); 579#ifdef DEBUG 580 if (ldebug(mmap)) 581 printf("-> %s() return: 0x%x (0x%08x)\n", 582 __func__, error, (u_int)td->td_retval[0]); 583#endif 584 return (error); 585} 586 587int 588linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 589{ 590 struct mprotect_args bsd_args; 591 592 bsd_args.addr = uap->addr; 593 bsd_args.len = uap->len; 594 bsd_args.prot = uap->prot; 595 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 596 bsd_args.prot |= PROT_READ | PROT_EXEC; 597 return (sys_mprotect(td, &bsd_args)); 598} 599 600int 601linux_ioperm(struct thread *td, struct linux_ioperm_args *args) 602{ 603 int error; 604 struct i386_ioperm_args iia; 605 606 iia.start = args->start; 607 iia.length = args->length; 608 iia.enable = args->enable; 609 error = i386_set_ioperm(td, &iia); 610 return (error); 611} 612 613int 614linux_iopl(struct thread *td, struct linux_iopl_args *args) 615{ 616 int error; 617 618 if (args->level < 0 || args->level > 3) 619 return (EINVAL); 620 if ((error = priv_check(td, PRIV_IO)) != 0) 621 return (error); 622 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 623 return (error); 624 td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | 625 (args->level * (PSL_IOPL / 3)); 626 return (0); 627} 628 629int 630linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) 631{ 632 int error; 633 struct i386_ldt_args ldt; 634 struct l_descriptor ld; 635 union descriptor desc; 636 int size, written; 637 638 switch (uap->func) { 639 case 0x00: /* read_ldt */ 640 ldt.start = 0; 641 ldt.descs = uap->ptr; 642 ldt.num = uap->bytecount / sizeof(union descriptor); 643 error = i386_get_ldt(td, &ldt); 644 td->td_retval[0] *= sizeof(union descriptor); 645 break; 646 case 0x02: /* read_default_ldt = 0 */ 647 size = 5*sizeof(struct l_desc_struct); 648 if (size > uap->bytecount) 649 size = uap->bytecount; 650 for (written = error = 0; written < size && error == 0; written++) 651 error = subyte((char *)uap->ptr + written, 0); 652 td->td_retval[0] = written; 653 break; 654 case 0x01: /* write_ldt */ 655 case 0x11: /* write_ldt */ 656 if (uap->bytecount != sizeof(ld)) 657 return (EINVAL); 658 659 error = copyin(uap->ptr, &ld, sizeof(ld)); 660 if (error) 661 return (error); 662 663 ldt.start = ld.entry_number; 664 ldt.descs = &desc; 665 ldt.num = 1; 666 desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); 667 desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; 668 desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); 669 desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; 670 desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | 671 (ld.contents << 2); 672 desc.sd.sd_dpl = 3; 673 desc.sd.sd_p = (ld.seg_not_present ^ 1); 674 desc.sd.sd_xx = 0; 675 desc.sd.sd_def32 = ld.seg_32bit; 676 desc.sd.sd_gran = ld.limit_in_pages; 677 error = i386_set_ldt(td, &ldt, &desc); 678 break; 679 default: 680 error = ENOSYS; 681 break; 682 } 683 684 if (error == EOPNOTSUPP) { 685 printf("linux: modify_ldt needs kernel option USER_LDT\n"); 686 error = ENOSYS; 687 } 688 689 return (error); 690} 691 692int 693linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 694{ 695 l_osigaction_t osa; 696 l_sigaction_t act, oact; 697 int error; 698 699#ifdef DEBUG 700 if (ldebug(sigaction)) 701 printf(ARGS(sigaction, "%d, %p, %p"), 702 args->sig, (void *)args->nsa, (void *)args->osa); 703#endif 704 705 if (args->nsa != NULL) { 706 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 707 if (error) 708 return (error); 709 act.lsa_handler = osa.lsa_handler; 710 act.lsa_flags = osa.lsa_flags; 711 act.lsa_restorer = osa.lsa_restorer; 712 LINUX_SIGEMPTYSET(act.lsa_mask); 713 act.lsa_mask.__bits[0] = osa.lsa_mask; 714 } 715 716 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 717 args->osa ? &oact : NULL); 718 719 if (args->osa != NULL && !error) { 720 osa.lsa_handler = oact.lsa_handler; 721 osa.lsa_flags = oact.lsa_flags; 722 osa.lsa_restorer = oact.lsa_restorer; 723 osa.lsa_mask = oact.lsa_mask.__bits[0]; 724 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 725 } 726 727 return (error); 728} 729 730/* 731 * Linux has two extra args, restart and oldmask. We dont use these, 732 * but it seems that "restart" is actually a context pointer that 733 * enables the signal to happen with a different register set. 734 */ 735int 736linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 737{ 738 sigset_t sigmask; 739 l_sigset_t mask; 740 741#ifdef DEBUG 742 if (ldebug(sigsuspend)) 743 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 744#endif 745 746 LINUX_SIGEMPTYSET(mask); 747 mask.__bits[0] = args->mask; 748 linux_to_bsd_sigset(&mask, &sigmask); 749 return (kern_sigsuspend(td, sigmask)); 750} 751 752int 753linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 754{ 755 l_sigset_t lmask; 756 sigset_t sigmask; 757 int error; 758 759#ifdef DEBUG 760 if (ldebug(rt_sigsuspend)) 761 printf(ARGS(rt_sigsuspend, "%p, %d"), 762 (void *)uap->newset, uap->sigsetsize); 763#endif 764 765 if (uap->sigsetsize != sizeof(l_sigset_t)) 766 return (EINVAL); 767 768 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 769 if (error) 770 return (error); 771 772 linux_to_bsd_sigset(&lmask, &sigmask); 773 return (kern_sigsuspend(td, sigmask)); 774} 775 776int 777linux_pause(struct thread *td, struct linux_pause_args *args) 778{ 779 struct proc *p = td->td_proc; 780 sigset_t sigmask; 781 782#ifdef DEBUG 783 if (ldebug(pause)) 784 printf(ARGS(pause, "")); 785#endif 786 787 PROC_LOCK(p); 788 sigmask = td->td_sigmask; 789 PROC_UNLOCK(p); 790 return (kern_sigsuspend(td, sigmask)); 791} 792 793int 794linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 795{ 796 stack_t ss, oss; 797 l_stack_t lss; 798 int error; 799 800#ifdef DEBUG 801 if (ldebug(sigaltstack)) 802 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 803#endif 804 805 if (uap->uss != NULL) { 806 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 807 if (error) 808 return (error); 809 810 ss.ss_sp = lss.ss_sp; 811 ss.ss_size = lss.ss_size; 812 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 813 } 814 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 815 (uap->uoss != NULL) ? &oss : NULL); 816 if (!error && uap->uoss != NULL) { 817 lss.ss_sp = oss.ss_sp; 818 lss.ss_size = oss.ss_size; 819 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 820 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 821 } 822 823 return (error); 824} 825 826int 827linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 828{ 829 struct ftruncate_args sa; 830 831#ifdef DEBUG 832 if (ldebug(ftruncate64)) 833 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 834 (intmax_t)args->length); 835#endif 836 837 sa.fd = args->fd; 838 sa.length = args->length; 839 return sys_ftruncate(td, &sa); 840} 841 842int 843linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) 844{ 845 struct l_user_desc info; 846 int error; 847 int idx; 848 int a[2]; 849 struct segment_descriptor sd; 850 851 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 852 if (error) 853 return (error); 854 855#ifdef DEBUG 856 if (ldebug(set_thread_area)) 857 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"), 858 info.entry_number, 859 info.base_addr, 860 info.limit, 861 info.seg_32bit, 862 info.contents, 863 info.read_exec_only, 864 info.limit_in_pages, 865 info.seg_not_present, 866 info.useable); 867#endif 868 869 idx = info.entry_number; 870 /* 871 * Semantics of linux version: every thread in the system has array of 872 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 873 * syscall loads one of the selected tls decriptors with a value and 874 * also loads GDT descriptors 6, 7 and 8 with the content of the 875 * per-thread descriptors. 876 * 877 * Semantics of fbsd version: I think we can ignore that linux has 3 878 * per-thread descriptors and use just the 1st one. The tls_array[] 879 * is used only in set/get-thread_area() syscalls and for loading the 880 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so 881 * we will load just one. 882 * 883 * XXX: this doesn't work when a user space process tries to use more 884 * than 1 TLS segment. Comment in the linux sources says wine might do 885 * this. 886 */ 887 888 /* 889 * we support just GLIBC TLS now 890 * we should let 3 proceed as well because we use this segment so 891 * if code does two subsequent calls it should succeed 892 */ 893 if (idx != 6 && idx != -1 && idx != 3) 894 return (EINVAL); 895 896 /* 897 * we have to copy out the GDT entry we use 898 * FreeBSD uses GDT entry #3 for storing %gs so load that 899 * 900 * XXX: what if a user space program doesn't check this value and tries 901 * to use 6, 7 or 8? 902 */ 903 idx = info.entry_number = 3; 904 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 905 if (error) 906 return (error); 907 908 if (LINUX_LDT_empty(&info)) { 909 a[0] = 0; 910 a[1] = 0; 911 } else { 912 a[0] = LINUX_LDT_entry_a(&info); 913 a[1] = LINUX_LDT_entry_b(&info); 914 } 915 916 memcpy(&sd, &a, sizeof(a)); 917#ifdef DEBUG 918 if (ldebug(set_thread_area)) 919 printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase, 920 sd.sd_hibase, 921 sd.sd_lolimit, 922 sd.sd_hilimit, 923 sd.sd_type, 924 sd.sd_dpl, 925 sd.sd_p, 926 sd.sd_xx, 927 sd.sd_def32, 928 sd.sd_gran); 929#endif 930 931 /* this is taken from i386 version of cpu_set_user_tls() */ 932 critical_enter(); 933 /* set %gs */ 934 td->td_pcb->pcb_gsd = sd; 935 PCPU_GET(fsgs_gdt)[1] = sd; 936 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 937 critical_exit(); 938 939 return (0); 940} 941 942int 943linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) 944{ 945 946 struct l_user_desc info; 947 int error; 948 int idx; 949 struct l_desc_struct desc; 950 struct segment_descriptor sd; 951 952#ifdef DEBUG 953 if (ldebug(get_thread_area)) 954 printf(ARGS(get_thread_area, "%p"), args->desc); 955#endif 956 957 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 958 if (error) 959 return (error); 960 961 idx = info.entry_number; 962 /* XXX: I am not sure if we want 3 to be allowed too. */ 963 if (idx != 6 && idx != 3) 964 return (EINVAL); 965 966 idx = 3; 967 968 memset(&info, 0, sizeof(info)); 969 970 sd = PCPU_GET(fsgs_gdt)[1]; 971 972 memcpy(&desc, &sd, sizeof(desc)); 973 974 info.entry_number = idx; 975 info.base_addr = LINUX_GET_BASE(&desc); 976 info.limit = LINUX_GET_LIMIT(&desc); 977 info.seg_32bit = LINUX_GET_32BIT(&desc); 978 info.contents = LINUX_GET_CONTENTS(&desc); 979 info.read_exec_only = !LINUX_GET_WRITABLE(&desc); 980 info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); 981 info.seg_not_present = !LINUX_GET_PRESENT(&desc); 982 info.useable = LINUX_GET_USEABLE(&desc); 983 984 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 985 if (error) 986 return (EFAULT); 987 988 return (0); 989} 990 991/* XXX: this wont work with module - convert it */ 992int 993linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 994{ 995#ifdef P1003_1B_MQUEUE 996 return sys_kmq_open(td, (struct kmq_open_args *) args); 997#else 998 return (ENOSYS); 999#endif 1000} 1001 1002int 1003linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 1004{ 1005#ifdef P1003_1B_MQUEUE 1006 return sys_kmq_unlink(td, (struct kmq_unlink_args *) args); 1007#else 1008 return (ENOSYS); 1009#endif 1010} 1011 1012int 1013linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 1014{ 1015#ifdef P1003_1B_MQUEUE 1016 return sys_kmq_timedsend(td, (struct kmq_timedsend_args *) args); 1017#else 1018 return (ENOSYS); 1019#endif 1020} 1021 1022int 1023linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 1024{ 1025#ifdef P1003_1B_MQUEUE 1026 return sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *) args); 1027#else 1028 return (ENOSYS); 1029#endif 1030} 1031 1032int 1033linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 1034{ 1035#ifdef P1003_1B_MQUEUE 1036 return sys_kmq_notify(td, (struct kmq_notify_args *) args); 1037#else 1038 return (ENOSYS); 1039#endif 1040} 1041 1042int 1043linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 1044{ 1045#ifdef P1003_1B_MQUEUE 1046 return sys_kmq_setattr(td, (struct kmq_setattr_args *) args); 1047#else 1048 return (ENOSYS); 1049#endif 1050} 1051 1052int 1053linux_wait4(struct thread *td, struct linux_wait4_args *args) 1054{ 1055 int error, options; 1056 struct rusage ru, *rup; 1057 1058#ifdef DEBUG 1059 if (ldebug(wait4)) 1060 printf(ARGS(wait4, "%d, %p, %d, %p"), 1061 args->pid, (void *)args->status, args->options, 1062 (void *)args->rusage); 1063#endif 1064 1065 options = (args->options & (WNOHANG | WUNTRACED)); 1066 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1067 if (args->options & __WCLONE) 1068 options |= WLINUXCLONE; 1069 1070 if (args->rusage != NULL) 1071 rup = &ru; 1072 else 1073 rup = NULL; 1074 error = linux_common_wait(td, args->pid, args->status, options, rup); 1075 if (error) 1076 return (error); 1077 if (args->rusage != NULL) 1078 error = copyout(&ru, args->rusage, sizeof(ru)); 1079 1080 return (error); 1081} 1082