linux32_machdep.c revision 293575
1/*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2002 Doug Rabson 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_machdep.c 293575 2016-01-09 17:29:08Z dchagin $"); 33 34#include "opt_compat.h" 35 36#include <sys/param.h> 37#include <sys/kernel.h> 38#include <sys/systm.h> 39#include <sys/capsicum.h> 40#include <sys/file.h> 41#include <sys/fcntl.h> 42#include <sys/clock.h> 43#include <sys/imgact.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mutex.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/resource.h> 52#include <sys/resourcevar.h> 53#include <sys/syscallsubr.h> 54#include <sys/sysproto.h> 55#include <sys/unistd.h> 56#include <sys/wait.h> 57 58#include <machine/frame.h> 59#include <machine/pcb.h> 60#include <machine/psl.h> 61#include <machine/segments.h> 62#include <machine/specialreg.h> 63 64#include <vm/vm.h> 65#include <vm/pmap.h> 66#include <vm/vm_map.h> 67 68#include <compat/freebsd32/freebsd32_util.h> 69#include <amd64/linux32/linux.h> 70#include <amd64/linux32/linux32_proto.h> 71#include <compat/linux/linux_ipc.h> 72#include <compat/linux/linux_misc.h> 73#include <compat/linux/linux_signal.h> 74#include <compat/linux/linux_util.h> 75#include <compat/linux/linux_emul.h> 76 77static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); 78 79struct l_old_select_argv { 80 l_int nfds; 81 l_uintptr_t readfds; 82 l_uintptr_t writefds; 83 l_uintptr_t exceptfds; 84 l_uintptr_t timeout; 85} __packed; 86 87static int linux_mmap_common(struct thread *td, l_uintptr_t addr, 88 l_size_t len, l_int prot, l_int flags, l_int fd, 89 l_loff_t pos); 90 91static void 92bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 93{ 94 95 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 96 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 97 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 98 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 99 lru->ru_maxrss = ru->ru_maxrss; 100 lru->ru_ixrss = ru->ru_ixrss; 101 lru->ru_idrss = ru->ru_idrss; 102 lru->ru_isrss = ru->ru_isrss; 103 lru->ru_minflt = ru->ru_minflt; 104 lru->ru_majflt = ru->ru_majflt; 105 lru->ru_nswap = ru->ru_nswap; 106 lru->ru_inblock = ru->ru_inblock; 107 lru->ru_oublock = ru->ru_oublock; 108 lru->ru_msgsnd = ru->ru_msgsnd; 109 lru->ru_msgrcv = ru->ru_msgrcv; 110 lru->ru_nsignals = ru->ru_nsignals; 111 lru->ru_nvcsw = ru->ru_nvcsw; 112 lru->ru_nivcsw = ru->ru_nivcsw; 113} 114 115int 116linux_copyout_rusage(struct rusage *ru, void *uaddr) 117{ 118 struct l_rusage lru; 119 120 bsd_to_linux_rusage(ru, &lru); 121 122 return (copyout(&lru, uaddr, sizeof(struct l_rusage))); 123} 124 125int 126linux_execve(struct thread *td, struct linux_execve_args *args) 127{ 128 struct image_args eargs; 129 struct vmspace *oldvmspace; 130 char *path; 131 int error; 132 133 LCONVPATHEXIST(td, args->path, &path); 134 135#ifdef DEBUG 136 if (ldebug(execve)) 137 printf(ARGS(execve, "%s"), path); 138#endif 139 140 error = pre_execve(td, &oldvmspace); 141 if (error != 0) { 142 free(path, M_TEMP); 143 return (error); 144 } 145 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 146 args->argp, args->envp); 147 free(path, M_TEMP); 148 if (error == 0) 149 error = kern_execve(td, &eargs, NULL); 150 if (error == 0) 151 error = linux_common_execve(td, &eargs); 152 post_execve(td, error, oldvmspace); 153 return (error); 154} 155 156CTASSERT(sizeof(struct l_iovec32) == 8); 157 158static int 159linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 160{ 161 struct l_iovec32 iov32; 162 struct iovec *iov; 163 struct uio *uio; 164 uint32_t iovlen; 165 int error, i; 166 167 *uiop = NULL; 168 if (iovcnt > UIO_MAXIOV) 169 return (EINVAL); 170 iovlen = iovcnt * sizeof(struct iovec); 171 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 172 iov = (struct iovec *)(uio + 1); 173 for (i = 0; i < iovcnt; i++) { 174 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 175 if (error) { 176 free(uio, M_IOV); 177 return (error); 178 } 179 iov[i].iov_base = PTRIN(iov32.iov_base); 180 iov[i].iov_len = iov32.iov_len; 181 } 182 uio->uio_iov = iov; 183 uio->uio_iovcnt = iovcnt; 184 uio->uio_segflg = UIO_USERSPACE; 185 uio->uio_offset = -1; 186 uio->uio_resid = 0; 187 for (i = 0; i < iovcnt; i++) { 188 if (iov->iov_len > INT_MAX - uio->uio_resid) { 189 free(uio, M_IOV); 190 return (EINVAL); 191 } 192 uio->uio_resid += iov->iov_len; 193 iov++; 194 } 195 *uiop = uio; 196 return (0); 197} 198 199int 200linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 201 int error) 202{ 203 struct l_iovec32 iov32; 204 struct iovec *iov; 205 uint32_t iovlen; 206 int i; 207 208 *iovp = NULL; 209 if (iovcnt > UIO_MAXIOV) 210 return (error); 211 iovlen = iovcnt * sizeof(struct iovec); 212 iov = malloc(iovlen, M_IOV, M_WAITOK); 213 for (i = 0; i < iovcnt; i++) { 214 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 215 if (error) { 216 free(iov, M_IOV); 217 return (error); 218 } 219 iov[i].iov_base = PTRIN(iov32.iov_base); 220 iov[i].iov_len = iov32.iov_len; 221 } 222 *iovp = iov; 223 return(0); 224 225} 226 227int 228linux_readv(struct thread *td, struct linux_readv_args *uap) 229{ 230 struct uio *auio; 231 int error; 232 233 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 234 if (error) 235 return (error); 236 error = kern_readv(td, uap->fd, auio); 237 free(auio, M_IOV); 238 return (error); 239} 240 241int 242linux_writev(struct thread *td, struct linux_writev_args *uap) 243{ 244 struct uio *auio; 245 int error; 246 247 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 248 if (error) 249 return (error); 250 error = kern_writev(td, uap->fd, auio); 251 free(auio, M_IOV); 252 return (error); 253} 254 255struct l_ipc_kludge { 256 l_uintptr_t msgp; 257 l_long msgtyp; 258} __packed; 259 260int 261linux_ipc(struct thread *td, struct linux_ipc_args *args) 262{ 263 264 switch (args->what & 0xFFFF) { 265 case LINUX_SEMOP: { 266 struct linux_semop_args a; 267 268 a.semid = args->arg1; 269 a.tsops = args->ptr; 270 a.nsops = args->arg2; 271 return (linux_semop(td, &a)); 272 } 273 case LINUX_SEMGET: { 274 struct linux_semget_args a; 275 276 a.key = args->arg1; 277 a.nsems = args->arg2; 278 a.semflg = args->arg3; 279 return (linux_semget(td, &a)); 280 } 281 case LINUX_SEMCTL: { 282 struct linux_semctl_args a; 283 int error; 284 285 a.semid = args->arg1; 286 a.semnum = args->arg2; 287 a.cmd = args->arg3; 288 error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 289 if (error) 290 return (error); 291 return (linux_semctl(td, &a)); 292 } 293 case LINUX_MSGSND: { 294 struct linux_msgsnd_args a; 295 296 a.msqid = args->arg1; 297 a.msgp = args->ptr; 298 a.msgsz = args->arg2; 299 a.msgflg = args->arg3; 300 return (linux_msgsnd(td, &a)); 301 } 302 case LINUX_MSGRCV: { 303 struct linux_msgrcv_args a; 304 305 a.msqid = args->arg1; 306 a.msgsz = args->arg2; 307 a.msgflg = args->arg3; 308 if ((args->what >> 16) == 0) { 309 struct l_ipc_kludge tmp; 310 int error; 311 312 if (args->ptr == 0) 313 return (EINVAL); 314 error = copyin(args->ptr, &tmp, sizeof(tmp)); 315 if (error) 316 return (error); 317 a.msgp = PTRIN(tmp.msgp); 318 a.msgtyp = tmp.msgtyp; 319 } else { 320 a.msgp = args->ptr; 321 a.msgtyp = args->arg5; 322 } 323 return (linux_msgrcv(td, &a)); 324 } 325 case LINUX_MSGGET: { 326 struct linux_msgget_args a; 327 328 a.key = args->arg1; 329 a.msgflg = args->arg2; 330 return (linux_msgget(td, &a)); 331 } 332 case LINUX_MSGCTL: { 333 struct linux_msgctl_args a; 334 335 a.msqid = args->arg1; 336 a.cmd = args->arg2; 337 a.buf = args->ptr; 338 return (linux_msgctl(td, &a)); 339 } 340 case LINUX_SHMAT: { 341 struct linux_shmat_args a; 342 343 a.shmid = args->arg1; 344 a.shmaddr = args->ptr; 345 a.shmflg = args->arg2; 346 a.raddr = PTRIN((l_uint)args->arg3); 347 return (linux_shmat(td, &a)); 348 } 349 case LINUX_SHMDT: { 350 struct linux_shmdt_args a; 351 352 a.shmaddr = args->ptr; 353 return (linux_shmdt(td, &a)); 354 } 355 case LINUX_SHMGET: { 356 struct linux_shmget_args a; 357 358 a.key = args->arg1; 359 a.size = args->arg2; 360 a.shmflg = args->arg3; 361 return (linux_shmget(td, &a)); 362 } 363 case LINUX_SHMCTL: { 364 struct linux_shmctl_args a; 365 366 a.shmid = args->arg1; 367 a.cmd = args->arg2; 368 a.buf = args->ptr; 369 return (linux_shmctl(td, &a)); 370 } 371 default: 372 break; 373 } 374 375 return (EINVAL); 376} 377 378int 379linux_old_select(struct thread *td, struct linux_old_select_args *args) 380{ 381 struct l_old_select_argv linux_args; 382 struct linux_select_args newsel; 383 int error; 384 385#ifdef DEBUG 386 if (ldebug(old_select)) 387 printf(ARGS(old_select, "%p"), args->ptr); 388#endif 389 390 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 391 if (error) 392 return (error); 393 394 newsel.nfds = linux_args.nfds; 395 newsel.readfds = PTRIN(linux_args.readfds); 396 newsel.writefds = PTRIN(linux_args.writefds); 397 newsel.exceptfds = PTRIN(linux_args.exceptfds); 398 newsel.timeout = PTRIN(linux_args.timeout); 399 return (linux_select(td, &newsel)); 400} 401 402int 403linux_set_cloned_tls(struct thread *td, void *desc) 404{ 405 struct user_segment_descriptor sd; 406 struct l_user_desc info; 407 struct pcb *pcb; 408 int error; 409 int a[2]; 410 411 error = copyin(desc, &info, sizeof(struct l_user_desc)); 412 if (error) { 413 printf(LMSG("copyin failed!")); 414 } else { 415 /* We might copy out the entry_number as GUGS32_SEL. */ 416 info.entry_number = GUGS32_SEL; 417 error = copyout(&info, desc, sizeof(struct l_user_desc)); 418 if (error) 419 printf(LMSG("copyout failed!")); 420 421 a[0] = LINUX_LDT_entry_a(&info); 422 a[1] = LINUX_LDT_entry_b(&info); 423 424 memcpy(&sd, &a, sizeof(a)); 425#ifdef DEBUG 426 if (ldebug(clone)) 427 printf("Segment created in clone with " 428 "CLONE_SETTLS: lobase: %x, hibase: %x, " 429 "lolimit: %x, hilimit: %x, type: %i, " 430 "dpl: %i, p: %i, xx: %i, long: %i, " 431 "def32: %i, gran: %i\n", sd.sd_lobase, 432 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 433 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 434 sd.sd_long, sd.sd_def32, sd.sd_gran); 435#endif 436 pcb = td->td_pcb; 437 pcb->pcb_gsbase = (register_t)info.base_addr; 438 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 439 set_pcb_flags(pcb, PCB_32BIT); 440 } 441 442 return (error); 443} 444 445int 446linux_set_upcall_kse(struct thread *td, register_t stack) 447{ 448 449 if (stack) 450 td->td_frame->tf_rsp = stack; 451 452 /* 453 * The newly created Linux thread returns 454 * to the user space by the same path that a parent do. 455 */ 456 td->td_frame->tf_rax = 0; 457 return (0); 458} 459 460#define STACK_SIZE (2 * 1024 * 1024) 461#define GUARD_SIZE (4 * PAGE_SIZE) 462 463int 464linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 465{ 466 467#ifdef DEBUG 468 if (ldebug(mmap2)) 469 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 470 args->addr, args->len, args->prot, 471 args->flags, args->fd, args->pgoff); 472#endif 473 474 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 475 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 476 PAGE_SIZE)); 477} 478 479int 480linux_mmap(struct thread *td, struct linux_mmap_args *args) 481{ 482 int error; 483 struct l_mmap_argv linux_args; 484 485 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 486 if (error) 487 return (error); 488 489#ifdef DEBUG 490 if (ldebug(mmap)) 491 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 492 linux_args.addr, linux_args.len, linux_args.prot, 493 linux_args.flags, linux_args.fd, linux_args.pgoff); 494#endif 495 496 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 497 linux_args.prot, linux_args.flags, linux_args.fd, 498 (uint32_t)linux_args.pgoff)); 499} 500 501static int 502linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 503 l_int flags, l_int fd, l_loff_t pos) 504{ 505 struct proc *p = td->td_proc; 506 struct mmap_args /* { 507 caddr_t addr; 508 size_t len; 509 int prot; 510 int flags; 511 int fd; 512 long pad; 513 off_t pos; 514 } */ bsd_args; 515 int error; 516 struct file *fp; 517 cap_rights_t rights; 518 519 error = 0; 520 bsd_args.flags = 0; 521 fp = NULL; 522 523 /* 524 * Linux mmap(2): 525 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 526 */ 527 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 528 return (EINVAL); 529 530 if (flags & LINUX_MAP_SHARED) 531 bsd_args.flags |= MAP_SHARED; 532 if (flags & LINUX_MAP_PRIVATE) 533 bsd_args.flags |= MAP_PRIVATE; 534 if (flags & LINUX_MAP_FIXED) 535 bsd_args.flags |= MAP_FIXED; 536 if (flags & LINUX_MAP_ANON) { 537 /* Enforce pos to be on page boundary, then ignore. */ 538 if ((pos & PAGE_MASK) != 0) 539 return (EINVAL); 540 pos = 0; 541 bsd_args.flags |= MAP_ANON; 542 } else 543 bsd_args.flags |= MAP_NOSYNC; 544 if (flags & LINUX_MAP_GROWSDOWN) 545 bsd_args.flags |= MAP_STACK; 546 547 /* 548 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 549 * on Linux/i386. We do this to ensure maximum compatibility. 550 * Linux/ia64 does the same in i386 emulation mode. 551 */ 552 bsd_args.prot = prot; 553 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 554 bsd_args.prot |= PROT_READ | PROT_EXEC; 555 556 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 557 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 558 if (bsd_args.fd != -1) { 559 /* 560 * Linux follows Solaris mmap(2) description: 561 * The file descriptor fildes is opened with 562 * read permission, regardless of the 563 * protection options specified. 564 */ 565 566 error = fget(td, bsd_args.fd, 567 cap_rights_init(&rights, CAP_MMAP), &fp); 568 if (error != 0) 569 return (error); 570 if (fp->f_type != DTYPE_VNODE) { 571 fdrop(fp, td); 572 return (EINVAL); 573 } 574 575 /* Linux mmap() just fails for O_WRONLY files */ 576 if (!(fp->f_flag & FREAD)) { 577 fdrop(fp, td); 578 return (EACCES); 579 } 580 581 fdrop(fp, td); 582 } 583 584 if (flags & LINUX_MAP_GROWSDOWN) { 585 /* 586 * The Linux MAP_GROWSDOWN option does not limit auto 587 * growth of the region. Linux mmap with this option 588 * takes as addr the inital BOS, and as len, the initial 589 * region size. It can then grow down from addr without 590 * limit. However, Linux threads has an implicit internal 591 * limit to stack size of STACK_SIZE. Its just not 592 * enforced explicitly in Linux. But, here we impose 593 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 594 * region, since we can do this with our mmap. 595 * 596 * Our mmap with MAP_STACK takes addr as the maximum 597 * downsize limit on BOS, and as len the max size of 598 * the region. It then maps the top SGROWSIZ bytes, 599 * and auto grows the region down, up to the limit 600 * in addr. 601 * 602 * If we don't use the MAP_STACK option, the effect 603 * of this code is to allocate a stack region of a 604 * fixed size of (STACK_SIZE - GUARD_SIZE). 605 */ 606 607 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 608 /* 609 * Some Linux apps will attempt to mmap 610 * thread stacks near the top of their 611 * address space. If their TOS is greater 612 * than vm_maxsaddr, vm_map_growstack() 613 * will confuse the thread stack with the 614 * process stack and deliver a SEGV if they 615 * attempt to grow the thread stack past their 616 * current stacksize rlimit. To avoid this, 617 * adjust vm_maxsaddr upwards to reflect 618 * the current stacksize rlimit rather 619 * than the maximum possible stacksize. 620 * It would be better to adjust the 621 * mmap'ed region, but some apps do not check 622 * mmap's return value. 623 */ 624 PROC_LOCK(p); 625 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 626 lim_cur(p, RLIMIT_STACK); 627 PROC_UNLOCK(p); 628 } 629 630 /* 631 * This gives us our maximum stack size and a new BOS. 632 * If we're using VM_STACK, then mmap will just map 633 * the top SGROWSIZ bytes, and let the stack grow down 634 * to the limit at BOS. If we're not using VM_STACK 635 * we map the full stack, since we don't have a way 636 * to autogrow it. 637 */ 638 if (len > STACK_SIZE - GUARD_SIZE) { 639 bsd_args.addr = (caddr_t)PTRIN(addr); 640 bsd_args.len = len; 641 } else { 642 bsd_args.addr = (caddr_t)PTRIN(addr) - 643 (STACK_SIZE - GUARD_SIZE - len); 644 bsd_args.len = STACK_SIZE - GUARD_SIZE; 645 } 646 } else { 647 bsd_args.addr = (caddr_t)PTRIN(addr); 648 bsd_args.len = len; 649 } 650 bsd_args.pos = pos; 651 652#ifdef DEBUG 653 if (ldebug(mmap)) 654 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 655 __func__, 656 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 657 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 658#endif 659 error = sys_mmap(td, &bsd_args); 660#ifdef DEBUG 661 if (ldebug(mmap)) 662 printf("-> %s() return: 0x%x (0x%08x)\n", 663 __func__, error, (u_int)td->td_retval[0]); 664#endif 665 return (error); 666} 667 668int 669linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 670{ 671 struct mprotect_args bsd_args; 672 673 bsd_args.addr = uap->addr; 674 bsd_args.len = uap->len; 675 bsd_args.prot = uap->prot; 676 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 677 bsd_args.prot |= PROT_READ | PROT_EXEC; 678 return (sys_mprotect(td, &bsd_args)); 679} 680 681int 682linux_iopl(struct thread *td, struct linux_iopl_args *args) 683{ 684 int error; 685 686 if (args->level < 0 || args->level > 3) 687 return (EINVAL); 688 if ((error = priv_check(td, PRIV_IO)) != 0) 689 return (error); 690 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 691 return (error); 692 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 693 (args->level * (PSL_IOPL / 3)); 694 695 return (0); 696} 697 698int 699linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 700{ 701 l_osigaction_t osa; 702 l_sigaction_t act, oact; 703 int error; 704 705#ifdef DEBUG 706 if (ldebug(sigaction)) 707 printf(ARGS(sigaction, "%d, %p, %p"), 708 args->sig, (void *)args->nsa, (void *)args->osa); 709#endif 710 711 if (args->nsa != NULL) { 712 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 713 if (error) 714 return (error); 715 act.lsa_handler = osa.lsa_handler; 716 act.lsa_flags = osa.lsa_flags; 717 act.lsa_restorer = osa.lsa_restorer; 718 LINUX_SIGEMPTYSET(act.lsa_mask); 719 act.lsa_mask.__mask = osa.lsa_mask; 720 } 721 722 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 723 args->osa ? &oact : NULL); 724 725 if (args->osa != NULL && !error) { 726 osa.lsa_handler = oact.lsa_handler; 727 osa.lsa_flags = oact.lsa_flags; 728 osa.lsa_restorer = oact.lsa_restorer; 729 osa.lsa_mask = oact.lsa_mask.__mask; 730 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 731 } 732 733 return (error); 734} 735 736/* 737 * Linux has two extra args, restart and oldmask. We don't use these, 738 * but it seems that "restart" is actually a context pointer that 739 * enables the signal to happen with a different register set. 740 */ 741int 742linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 743{ 744 sigset_t sigmask; 745 l_sigset_t mask; 746 747#ifdef DEBUG 748 if (ldebug(sigsuspend)) 749 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 750#endif 751 752 LINUX_SIGEMPTYSET(mask); 753 mask.__mask = args->mask; 754 linux_to_bsd_sigset(&mask, &sigmask); 755 return (kern_sigsuspend(td, sigmask)); 756} 757 758int 759linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 760{ 761 l_sigset_t lmask; 762 sigset_t sigmask; 763 int error; 764 765#ifdef DEBUG 766 if (ldebug(rt_sigsuspend)) 767 printf(ARGS(rt_sigsuspend, "%p, %d"), 768 (void *)uap->newset, uap->sigsetsize); 769#endif 770 771 if (uap->sigsetsize != sizeof(l_sigset_t)) 772 return (EINVAL); 773 774 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 775 if (error) 776 return (error); 777 778 linux_to_bsd_sigset(&lmask, &sigmask); 779 return (kern_sigsuspend(td, sigmask)); 780} 781 782int 783linux_pause(struct thread *td, struct linux_pause_args *args) 784{ 785 struct proc *p = td->td_proc; 786 sigset_t sigmask; 787 788#ifdef DEBUG 789 if (ldebug(pause)) 790 printf(ARGS(pause, "")); 791#endif 792 793 PROC_LOCK(p); 794 sigmask = td->td_sigmask; 795 PROC_UNLOCK(p); 796 return (kern_sigsuspend(td, sigmask)); 797} 798 799int 800linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 801{ 802 stack_t ss, oss; 803 l_stack_t lss; 804 int error; 805 806#ifdef DEBUG 807 if (ldebug(sigaltstack)) 808 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 809#endif 810 811 if (uap->uss != NULL) { 812 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 813 if (error) 814 return (error); 815 816 ss.ss_sp = PTRIN(lss.ss_sp); 817 ss.ss_size = lss.ss_size; 818 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 819 } 820 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 821 (uap->uoss != NULL) ? &oss : NULL); 822 if (!error && uap->uoss != NULL) { 823 lss.ss_sp = PTROUT(oss.ss_sp); 824 lss.ss_size = oss.ss_size; 825 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 826 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 827 } 828 829 return (error); 830} 831 832int 833linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 834{ 835 struct ftruncate_args sa; 836 837#ifdef DEBUG 838 if (ldebug(ftruncate64)) 839 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 840 (intmax_t)args->length); 841#endif 842 843 sa.fd = args->fd; 844 sa.length = args->length; 845 return sys_ftruncate(td, &sa); 846} 847 848int 849linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 850{ 851 struct timeval atv; 852 l_timeval atv32; 853 struct timezone rtz; 854 int error = 0; 855 856 if (uap->tp) { 857 microtime(&atv); 858 atv32.tv_sec = atv.tv_sec; 859 atv32.tv_usec = atv.tv_usec; 860 error = copyout(&atv32, uap->tp, sizeof(atv32)); 861 } 862 if (error == 0 && uap->tzp != NULL) { 863 rtz.tz_minuteswest = tz_minuteswest; 864 rtz.tz_dsttime = tz_dsttime; 865 error = copyout(&rtz, uap->tzp, sizeof(rtz)); 866 } 867 return (error); 868} 869 870int 871linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 872{ 873 l_timeval atv32; 874 struct timeval atv, *tvp; 875 struct timezone atz, *tzp; 876 int error; 877 878 if (uap->tp) { 879 error = copyin(uap->tp, &atv32, sizeof(atv32)); 880 if (error) 881 return (error); 882 atv.tv_sec = atv32.tv_sec; 883 atv.tv_usec = atv32.tv_usec; 884 tvp = &atv; 885 } else 886 tvp = NULL; 887 if (uap->tzp) { 888 error = copyin(uap->tzp, &atz, sizeof(atz)); 889 if (error) 890 return (error); 891 tzp = &atz; 892 } else 893 tzp = NULL; 894 return (kern_settimeofday(td, tvp, tzp)); 895} 896 897int 898linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 899{ 900 struct rusage s; 901 int error; 902 903 error = kern_getrusage(td, uap->who, &s); 904 if (error != 0) 905 return (error); 906 if (uap->rusage != NULL) 907 error = linux_copyout_rusage(&s, uap->rusage); 908 return (error); 909} 910 911int 912linux_set_thread_area(struct thread *td, 913 struct linux_set_thread_area_args *args) 914{ 915 struct l_user_desc info; 916 struct user_segment_descriptor sd; 917 struct pcb *pcb; 918 int a[2]; 919 int error; 920 921 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 922 if (error) 923 return (error); 924 925#ifdef DEBUG 926 if (ldebug(set_thread_area)) 927 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 928 "%i, %i, %i"), info.entry_number, info.base_addr, 929 info.limit, info.seg_32bit, info.contents, 930 info.read_exec_only, info.limit_in_pages, 931 info.seg_not_present, info.useable); 932#endif 933 934 /* 935 * Semantics of Linux version: every thread in the system has array 936 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 937 * This syscall loads one of the selected TLS decriptors with a value 938 * and also loads GDT descriptors 6, 7 and 8 with the content of 939 * the per-thread descriptors. 940 * 941 * Semantics of FreeBSD version: I think we can ignore that Linux has 942 * three per-thread descriptors and use just the first one. 943 * The tls_array[] is used only in [gs]et_thread_area() syscalls and 944 * for loading the GDT descriptors. We use just one GDT descriptor 945 * for TLS, so we will load just one. 946 * 947 * XXX: This doesn't work when a user space process tries to use more 948 * than one TLS segment. Comment in the Linux source says wine might 949 * do this. 950 */ 951 952 /* 953 * GLIBC reads current %gs and call set_thread_area() with it. 954 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 955 * we use these segments. 956 */ 957 switch (info.entry_number) { 958 case GUGS32_SEL: 959 case GUDATA_SEL: 960 case 6: 961 case -1: 962 info.entry_number = GUGS32_SEL; 963 break; 964 default: 965 return (EINVAL); 966 } 967 968 /* 969 * We have to copy out the GDT entry we use. 970 * 971 * XXX: What if a user space program does not check the return value 972 * and tries to use 6, 7 or 8? 973 */ 974 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 975 if (error) 976 return (error); 977 978 if (LINUX_LDT_empty(&info)) { 979 a[0] = 0; 980 a[1] = 0; 981 } else { 982 a[0] = LINUX_LDT_entry_a(&info); 983 a[1] = LINUX_LDT_entry_b(&info); 984 } 985 986 memcpy(&sd, &a, sizeof(a)); 987#ifdef DEBUG 988 if (ldebug(set_thread_area)) 989 printf("Segment created in set_thread_area: " 990 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 991 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 992 "def32: %i, gran: %i\n", 993 sd.sd_lobase, 994 sd.sd_hibase, 995 sd.sd_lolimit, 996 sd.sd_hilimit, 997 sd.sd_type, 998 sd.sd_dpl, 999 sd.sd_p, 1000 sd.sd_xx, 1001 sd.sd_long, 1002 sd.sd_def32, 1003 sd.sd_gran); 1004#endif 1005 1006 pcb = td->td_pcb; 1007 pcb->pcb_gsbase = (register_t)info.base_addr; 1008 set_pcb_flags(pcb, PCB_32BIT); 1009 update_gdt_gsbase(td, info.base_addr); 1010 1011 return (0); 1012} 1013