linux32_machdep.c revision 293500
1/*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2002 Doug Rabson 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_machdep.c 293500 2016-01-09 15:23:54Z dchagin $"); 33 34#include <sys/param.h> 35#include <sys/kernel.h> 36#include <sys/systm.h> 37#include <sys/capsicum.h> 38#include <sys/file.h> 39#include <sys/fcntl.h> 40#include <sys/clock.h> 41#include <sys/imgact.h> 42#include <sys/limits.h> 43#include <sys/lock.h> 44#include <sys/malloc.h> 45#include <sys/mman.h> 46#include <sys/mutex.h> 47#include <sys/priv.h> 48#include <sys/proc.h> 49#include <sys/resource.h> 50#include <sys/resourcevar.h> 51#include <sys/syscallsubr.h> 52#include <sys/sysproto.h> 53#include <sys/unistd.h> 54#include <sys/wait.h> 55 56#include <machine/frame.h> 57#include <machine/pcb.h> 58#include <machine/psl.h> 59#include <machine/segments.h> 60#include <machine/specialreg.h> 61 62#include <vm/vm.h> 63#include <vm/pmap.h> 64#include <vm/vm_map.h> 65 66#include <compat/freebsd32/freebsd32_util.h> 67#include <amd64/linux32/linux.h> 68#include <amd64/linux32/linux32_proto.h> 69#include <compat/linux/linux_ipc.h> 70#include <compat/linux/linux_misc.h> 71#include <compat/linux/linux_signal.h> 72#include <compat/linux/linux_util.h> 73#include <compat/linux/linux_emul.h> 74 75static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); 76 77struct l_old_select_argv { 78 l_int nfds; 79 l_uintptr_t readfds; 80 l_uintptr_t writefds; 81 l_uintptr_t exceptfds; 82 l_uintptr_t timeout; 83} __packed; 84 85int 86linux_to_bsd_sigaltstack(int lsa) 87{ 88 int bsa = 0; 89 90 if (lsa & LINUX_SS_DISABLE) 91 bsa |= SS_DISABLE; 92 if (lsa & LINUX_SS_ONSTACK) 93 bsa |= SS_ONSTACK; 94 return (bsa); 95} 96 97static int linux_mmap_common(struct thread *td, l_uintptr_t addr, 98 l_size_t len, l_int prot, l_int flags, l_int fd, 99 l_loff_t pos); 100 101int 102bsd_to_linux_sigaltstack(int bsa) 103{ 104 int lsa = 0; 105 106 if (bsa & SS_DISABLE) 107 lsa |= LINUX_SS_DISABLE; 108 if (bsa & SS_ONSTACK) 109 lsa |= LINUX_SS_ONSTACK; 110 return (lsa); 111} 112 113static void 114bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 115{ 116 117 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 118 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 119 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 120 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 121 lru->ru_maxrss = ru->ru_maxrss; 122 lru->ru_ixrss = ru->ru_ixrss; 123 lru->ru_idrss = ru->ru_idrss; 124 lru->ru_isrss = ru->ru_isrss; 125 lru->ru_minflt = ru->ru_minflt; 126 lru->ru_majflt = ru->ru_majflt; 127 lru->ru_nswap = ru->ru_nswap; 128 lru->ru_inblock = ru->ru_inblock; 129 lru->ru_oublock = ru->ru_oublock; 130 lru->ru_msgsnd = ru->ru_msgsnd; 131 lru->ru_msgrcv = ru->ru_msgrcv; 132 lru->ru_nsignals = ru->ru_nsignals; 133 lru->ru_nvcsw = ru->ru_nvcsw; 134 lru->ru_nivcsw = ru->ru_nivcsw; 135} 136 137int 138linux_copyout_rusage(struct rusage *ru, void *uaddr) 139{ 140 struct l_rusage lru; 141 142 bsd_to_linux_rusage(ru, &lru); 143 144 return (copyout(&lru, uaddr, sizeof(struct l_rusage))); 145} 146 147int 148linux_execve(struct thread *td, struct linux_execve_args *args) 149{ 150 struct image_args eargs; 151 struct vmspace *oldvmspace; 152 char *path; 153 int error; 154 155 LCONVPATHEXIST(td, args->path, &path); 156 157#ifdef DEBUG 158 if (ldebug(execve)) 159 printf(ARGS(execve, "%s"), path); 160#endif 161 162 error = pre_execve(td, &oldvmspace); 163 if (error != 0) { 164 free(path, M_TEMP); 165 return (error); 166 } 167 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 168 args->argp, args->envp); 169 free(path, M_TEMP); 170 if (error == 0) 171 error = kern_execve(td, &eargs, NULL); 172 if (error == 0) 173 error = linux_common_execve(td, &eargs); 174 post_execve(td, error, oldvmspace); 175 return (error); 176} 177 178CTASSERT(sizeof(struct l_iovec32) == 8); 179 180static int 181linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 182{ 183 struct l_iovec32 iov32; 184 struct iovec *iov; 185 struct uio *uio; 186 uint32_t iovlen; 187 int error, i; 188 189 *uiop = NULL; 190 if (iovcnt > UIO_MAXIOV) 191 return (EINVAL); 192 iovlen = iovcnt * sizeof(struct iovec); 193 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 194 iov = (struct iovec *)(uio + 1); 195 for (i = 0; i < iovcnt; i++) { 196 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 197 if (error) { 198 free(uio, M_IOV); 199 return (error); 200 } 201 iov[i].iov_base = PTRIN(iov32.iov_base); 202 iov[i].iov_len = iov32.iov_len; 203 } 204 uio->uio_iov = iov; 205 uio->uio_iovcnt = iovcnt; 206 uio->uio_segflg = UIO_USERSPACE; 207 uio->uio_offset = -1; 208 uio->uio_resid = 0; 209 for (i = 0; i < iovcnt; i++) { 210 if (iov->iov_len > INT_MAX - uio->uio_resid) { 211 free(uio, M_IOV); 212 return (EINVAL); 213 } 214 uio->uio_resid += iov->iov_len; 215 iov++; 216 } 217 *uiop = uio; 218 return (0); 219} 220 221int 222linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 223 int error) 224{ 225 struct l_iovec32 iov32; 226 struct iovec *iov; 227 uint32_t iovlen; 228 int i; 229 230 *iovp = NULL; 231 if (iovcnt > UIO_MAXIOV) 232 return (error); 233 iovlen = iovcnt * sizeof(struct iovec); 234 iov = malloc(iovlen, M_IOV, M_WAITOK); 235 for (i = 0; i < iovcnt; i++) { 236 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 237 if (error) { 238 free(iov, M_IOV); 239 return (error); 240 } 241 iov[i].iov_base = PTRIN(iov32.iov_base); 242 iov[i].iov_len = iov32.iov_len; 243 } 244 *iovp = iov; 245 return(0); 246 247} 248 249int 250linux_readv(struct thread *td, struct linux_readv_args *uap) 251{ 252 struct uio *auio; 253 int error; 254 255 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 256 if (error) 257 return (error); 258 error = kern_readv(td, uap->fd, auio); 259 free(auio, M_IOV); 260 return (error); 261} 262 263int 264linux_writev(struct thread *td, struct linux_writev_args *uap) 265{ 266 struct uio *auio; 267 int error; 268 269 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 270 if (error) 271 return (error); 272 error = kern_writev(td, uap->fd, auio); 273 free(auio, M_IOV); 274 return (error); 275} 276 277struct l_ipc_kludge { 278 l_uintptr_t msgp; 279 l_long msgtyp; 280} __packed; 281 282int 283linux_ipc(struct thread *td, struct linux_ipc_args *args) 284{ 285 286 switch (args->what & 0xFFFF) { 287 case LINUX_SEMOP: { 288 struct linux_semop_args a; 289 290 a.semid = args->arg1; 291 a.tsops = args->ptr; 292 a.nsops = args->arg2; 293 return (linux_semop(td, &a)); 294 } 295 case LINUX_SEMGET: { 296 struct linux_semget_args a; 297 298 a.key = args->arg1; 299 a.nsems = args->arg2; 300 a.semflg = args->arg3; 301 return (linux_semget(td, &a)); 302 } 303 case LINUX_SEMCTL: { 304 struct linux_semctl_args a; 305 int error; 306 307 a.semid = args->arg1; 308 a.semnum = args->arg2; 309 a.cmd = args->arg3; 310 error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 311 if (error) 312 return (error); 313 return (linux_semctl(td, &a)); 314 } 315 case LINUX_MSGSND: { 316 struct linux_msgsnd_args a; 317 318 a.msqid = args->arg1; 319 a.msgp = args->ptr; 320 a.msgsz = args->arg2; 321 a.msgflg = args->arg3; 322 return (linux_msgsnd(td, &a)); 323 } 324 case LINUX_MSGRCV: { 325 struct linux_msgrcv_args a; 326 327 a.msqid = args->arg1; 328 a.msgsz = args->arg2; 329 a.msgflg = args->arg3; 330 if ((args->what >> 16) == 0) { 331 struct l_ipc_kludge tmp; 332 int error; 333 334 if (args->ptr == 0) 335 return (EINVAL); 336 error = copyin(args->ptr, &tmp, sizeof(tmp)); 337 if (error) 338 return (error); 339 a.msgp = PTRIN(tmp.msgp); 340 a.msgtyp = tmp.msgtyp; 341 } else { 342 a.msgp = args->ptr; 343 a.msgtyp = args->arg5; 344 } 345 return (linux_msgrcv(td, &a)); 346 } 347 case LINUX_MSGGET: { 348 struct linux_msgget_args a; 349 350 a.key = args->arg1; 351 a.msgflg = args->arg2; 352 return (linux_msgget(td, &a)); 353 } 354 case LINUX_MSGCTL: { 355 struct linux_msgctl_args a; 356 357 a.msqid = args->arg1; 358 a.cmd = args->arg2; 359 a.buf = args->ptr; 360 return (linux_msgctl(td, &a)); 361 } 362 case LINUX_SHMAT: { 363 struct linux_shmat_args a; 364 365 a.shmid = args->arg1; 366 a.shmaddr = args->ptr; 367 a.shmflg = args->arg2; 368 a.raddr = PTRIN((l_uint)args->arg3); 369 return (linux_shmat(td, &a)); 370 } 371 case LINUX_SHMDT: { 372 struct linux_shmdt_args a; 373 374 a.shmaddr = args->ptr; 375 return (linux_shmdt(td, &a)); 376 } 377 case LINUX_SHMGET: { 378 struct linux_shmget_args a; 379 380 a.key = args->arg1; 381 a.size = args->arg2; 382 a.shmflg = args->arg3; 383 return (linux_shmget(td, &a)); 384 } 385 case LINUX_SHMCTL: { 386 struct linux_shmctl_args a; 387 388 a.shmid = args->arg1; 389 a.cmd = args->arg2; 390 a.buf = args->ptr; 391 return (linux_shmctl(td, &a)); 392 } 393 default: 394 break; 395 } 396 397 return (EINVAL); 398} 399 400int 401linux_old_select(struct thread *td, struct linux_old_select_args *args) 402{ 403 struct l_old_select_argv linux_args; 404 struct linux_select_args newsel; 405 int error; 406 407#ifdef DEBUG 408 if (ldebug(old_select)) 409 printf(ARGS(old_select, "%p"), args->ptr); 410#endif 411 412 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 413 if (error) 414 return (error); 415 416 newsel.nfds = linux_args.nfds; 417 newsel.readfds = PTRIN(linux_args.readfds); 418 newsel.writefds = PTRIN(linux_args.writefds); 419 newsel.exceptfds = PTRIN(linux_args.exceptfds); 420 newsel.timeout = PTRIN(linux_args.timeout); 421 return (linux_select(td, &newsel)); 422} 423 424int 425linux_set_cloned_tls(struct thread *td, void *desc) 426{ 427 struct user_segment_descriptor sd; 428 struct l_user_desc info; 429 struct pcb *pcb; 430 int error; 431 int a[2]; 432 433 error = copyin(desc, &info, sizeof(struct l_user_desc)); 434 if (error) { 435 printf(LMSG("copyin failed!")); 436 } else { 437 /* We might copy out the entry_number as GUGS32_SEL. */ 438 info.entry_number = GUGS32_SEL; 439 error = copyout(&info, desc, sizeof(struct l_user_desc)); 440 if (error) 441 printf(LMSG("copyout failed!")); 442 443 a[0] = LINUX_LDT_entry_a(&info); 444 a[1] = LINUX_LDT_entry_b(&info); 445 446 memcpy(&sd, &a, sizeof(a)); 447#ifdef DEBUG 448 if (ldebug(clone)) 449 printf("Segment created in clone with " 450 "CLONE_SETTLS: lobase: %x, hibase: %x, " 451 "lolimit: %x, hilimit: %x, type: %i, " 452 "dpl: %i, p: %i, xx: %i, long: %i, " 453 "def32: %i, gran: %i\n", sd.sd_lobase, 454 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 455 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 456 sd.sd_long, sd.sd_def32, sd.sd_gran); 457#endif 458 pcb = td->td_pcb; 459 pcb->pcb_gsbase = (register_t)info.base_addr; 460 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 461 set_pcb_flags(pcb, PCB_32BIT); 462 } 463 464 return (error); 465} 466 467int 468linux_set_upcall_kse(struct thread *td, register_t stack) 469{ 470 471 if (stack) 472 td->td_frame->tf_rsp = stack; 473 474 /* 475 * The newly created Linux thread returns 476 * to the user space by the same path that a parent do. 477 */ 478 td->td_frame->tf_rax = 0; 479 return (0); 480} 481 482#define STACK_SIZE (2 * 1024 * 1024) 483#define GUARD_SIZE (4 * PAGE_SIZE) 484 485int 486linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 487{ 488 489#ifdef DEBUG 490 if (ldebug(mmap2)) 491 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 492 args->addr, args->len, args->prot, 493 args->flags, args->fd, args->pgoff); 494#endif 495 496 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 497 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 498 PAGE_SIZE)); 499} 500 501int 502linux_mmap(struct thread *td, struct linux_mmap_args *args) 503{ 504 int error; 505 struct l_mmap_argv linux_args; 506 507 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 508 if (error) 509 return (error); 510 511#ifdef DEBUG 512 if (ldebug(mmap)) 513 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 514 linux_args.addr, linux_args.len, linux_args.prot, 515 linux_args.flags, linux_args.fd, linux_args.pgoff); 516#endif 517 518 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 519 linux_args.prot, linux_args.flags, linux_args.fd, 520 (uint32_t)linux_args.pgoff)); 521} 522 523static int 524linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 525 l_int flags, l_int fd, l_loff_t pos) 526{ 527 struct proc *p = td->td_proc; 528 struct mmap_args /* { 529 caddr_t addr; 530 size_t len; 531 int prot; 532 int flags; 533 int fd; 534 long pad; 535 off_t pos; 536 } */ bsd_args; 537 int error; 538 struct file *fp; 539 cap_rights_t rights; 540 541 error = 0; 542 bsd_args.flags = 0; 543 fp = NULL; 544 545 /* 546 * Linux mmap(2): 547 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 548 */ 549 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 550 return (EINVAL); 551 552 if (flags & LINUX_MAP_SHARED) 553 bsd_args.flags |= MAP_SHARED; 554 if (flags & LINUX_MAP_PRIVATE) 555 bsd_args.flags |= MAP_PRIVATE; 556 if (flags & LINUX_MAP_FIXED) 557 bsd_args.flags |= MAP_FIXED; 558 if (flags & LINUX_MAP_ANON) { 559 /* Enforce pos to be on page boundary, then ignore. */ 560 if ((pos & PAGE_MASK) != 0) 561 return (EINVAL); 562 pos = 0; 563 bsd_args.flags |= MAP_ANON; 564 } else 565 bsd_args.flags |= MAP_NOSYNC; 566 if (flags & LINUX_MAP_GROWSDOWN) 567 bsd_args.flags |= MAP_STACK; 568 569 /* 570 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 571 * on Linux/i386. We do this to ensure maximum compatibility. 572 * Linux/ia64 does the same in i386 emulation mode. 573 */ 574 bsd_args.prot = prot; 575 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 576 bsd_args.prot |= PROT_READ | PROT_EXEC; 577 578 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 579 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 580 if (bsd_args.fd != -1) { 581 /* 582 * Linux follows Solaris mmap(2) description: 583 * The file descriptor fildes is opened with 584 * read permission, regardless of the 585 * protection options specified. 586 */ 587 588 error = fget(td, bsd_args.fd, 589 cap_rights_init(&rights, CAP_MMAP), &fp); 590 if (error != 0) 591 return (error); 592 if (fp->f_type != DTYPE_VNODE) { 593 fdrop(fp, td); 594 return (EINVAL); 595 } 596 597 /* Linux mmap() just fails for O_WRONLY files */ 598 if (!(fp->f_flag & FREAD)) { 599 fdrop(fp, td); 600 return (EACCES); 601 } 602 603 fdrop(fp, td); 604 } 605 606 if (flags & LINUX_MAP_GROWSDOWN) { 607 /* 608 * The Linux MAP_GROWSDOWN option does not limit auto 609 * growth of the region. Linux mmap with this option 610 * takes as addr the inital BOS, and as len, the initial 611 * region size. It can then grow down from addr without 612 * limit. However, Linux threads has an implicit internal 613 * limit to stack size of STACK_SIZE. Its just not 614 * enforced explicitly in Linux. But, here we impose 615 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 616 * region, since we can do this with our mmap. 617 * 618 * Our mmap with MAP_STACK takes addr as the maximum 619 * downsize limit on BOS, and as len the max size of 620 * the region. It then maps the top SGROWSIZ bytes, 621 * and auto grows the region down, up to the limit 622 * in addr. 623 * 624 * If we don't use the MAP_STACK option, the effect 625 * of this code is to allocate a stack region of a 626 * fixed size of (STACK_SIZE - GUARD_SIZE). 627 */ 628 629 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 630 /* 631 * Some Linux apps will attempt to mmap 632 * thread stacks near the top of their 633 * address space. If their TOS is greater 634 * than vm_maxsaddr, vm_map_growstack() 635 * will confuse the thread stack with the 636 * process stack and deliver a SEGV if they 637 * attempt to grow the thread stack past their 638 * current stacksize rlimit. To avoid this, 639 * adjust vm_maxsaddr upwards to reflect 640 * the current stacksize rlimit rather 641 * than the maximum possible stacksize. 642 * It would be better to adjust the 643 * mmap'ed region, but some apps do not check 644 * mmap's return value. 645 */ 646 PROC_LOCK(p); 647 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 648 lim_cur(p, RLIMIT_STACK); 649 PROC_UNLOCK(p); 650 } 651 652 /* 653 * This gives us our maximum stack size and a new BOS. 654 * If we're using VM_STACK, then mmap will just map 655 * the top SGROWSIZ bytes, and let the stack grow down 656 * to the limit at BOS. If we're not using VM_STACK 657 * we map the full stack, since we don't have a way 658 * to autogrow it. 659 */ 660 if (len > STACK_SIZE - GUARD_SIZE) { 661 bsd_args.addr = (caddr_t)PTRIN(addr); 662 bsd_args.len = len; 663 } else { 664 bsd_args.addr = (caddr_t)PTRIN(addr) - 665 (STACK_SIZE - GUARD_SIZE - len); 666 bsd_args.len = STACK_SIZE - GUARD_SIZE; 667 } 668 } else { 669 bsd_args.addr = (caddr_t)PTRIN(addr); 670 bsd_args.len = len; 671 } 672 bsd_args.pos = pos; 673 674#ifdef DEBUG 675 if (ldebug(mmap)) 676 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 677 __func__, 678 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 679 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 680#endif 681 error = sys_mmap(td, &bsd_args); 682#ifdef DEBUG 683 if (ldebug(mmap)) 684 printf("-> %s() return: 0x%x (0x%08x)\n", 685 __func__, error, (u_int)td->td_retval[0]); 686#endif 687 return (error); 688} 689 690int 691linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 692{ 693 struct mprotect_args bsd_args; 694 695 bsd_args.addr = uap->addr; 696 bsd_args.len = uap->len; 697 bsd_args.prot = uap->prot; 698 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 699 bsd_args.prot |= PROT_READ | PROT_EXEC; 700 return (sys_mprotect(td, &bsd_args)); 701} 702 703int 704linux_iopl(struct thread *td, struct linux_iopl_args *args) 705{ 706 int error; 707 708 if (args->level < 0 || args->level > 3) 709 return (EINVAL); 710 if ((error = priv_check(td, PRIV_IO)) != 0) 711 return (error); 712 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 713 return (error); 714 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 715 (args->level * (PSL_IOPL / 3)); 716 717 return (0); 718} 719 720int 721linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 722{ 723 l_osigaction_t osa; 724 l_sigaction_t act, oact; 725 int error; 726 727#ifdef DEBUG 728 if (ldebug(sigaction)) 729 printf(ARGS(sigaction, "%d, %p, %p"), 730 args->sig, (void *)args->nsa, (void *)args->osa); 731#endif 732 733 if (args->nsa != NULL) { 734 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 735 if (error) 736 return (error); 737 act.lsa_handler = osa.lsa_handler; 738 act.lsa_flags = osa.lsa_flags; 739 act.lsa_restorer = osa.lsa_restorer; 740 LINUX_SIGEMPTYSET(act.lsa_mask); 741 act.lsa_mask.__bits[0] = osa.lsa_mask; 742 } 743 744 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 745 args->osa ? &oact : NULL); 746 747 if (args->osa != NULL && !error) { 748 osa.lsa_handler = oact.lsa_handler; 749 osa.lsa_flags = oact.lsa_flags; 750 osa.lsa_restorer = oact.lsa_restorer; 751 osa.lsa_mask = oact.lsa_mask.__bits[0]; 752 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 753 } 754 755 return (error); 756} 757 758/* 759 * Linux has two extra args, restart and oldmask. We don't use these, 760 * but it seems that "restart" is actually a context pointer that 761 * enables the signal to happen with a different register set. 762 */ 763int 764linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 765{ 766 sigset_t sigmask; 767 l_sigset_t mask; 768 769#ifdef DEBUG 770 if (ldebug(sigsuspend)) 771 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 772#endif 773 774 LINUX_SIGEMPTYSET(mask); 775 mask.__bits[0] = args->mask; 776 linux_to_bsd_sigset(&mask, &sigmask); 777 return (kern_sigsuspend(td, sigmask)); 778} 779 780int 781linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 782{ 783 l_sigset_t lmask; 784 sigset_t sigmask; 785 int error; 786 787#ifdef DEBUG 788 if (ldebug(rt_sigsuspend)) 789 printf(ARGS(rt_sigsuspend, "%p, %d"), 790 (void *)uap->newset, uap->sigsetsize); 791#endif 792 793 if (uap->sigsetsize != sizeof(l_sigset_t)) 794 return (EINVAL); 795 796 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 797 if (error) 798 return (error); 799 800 linux_to_bsd_sigset(&lmask, &sigmask); 801 return (kern_sigsuspend(td, sigmask)); 802} 803 804int 805linux_pause(struct thread *td, struct linux_pause_args *args) 806{ 807 struct proc *p = td->td_proc; 808 sigset_t sigmask; 809 810#ifdef DEBUG 811 if (ldebug(pause)) 812 printf(ARGS(pause, "")); 813#endif 814 815 PROC_LOCK(p); 816 sigmask = td->td_sigmask; 817 PROC_UNLOCK(p); 818 return (kern_sigsuspend(td, sigmask)); 819} 820 821int 822linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 823{ 824 stack_t ss, oss; 825 l_stack_t lss; 826 int error; 827 828#ifdef DEBUG 829 if (ldebug(sigaltstack)) 830 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 831#endif 832 833 if (uap->uss != NULL) { 834 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 835 if (error) 836 return (error); 837 838 ss.ss_sp = PTRIN(lss.ss_sp); 839 ss.ss_size = lss.ss_size; 840 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 841 } 842 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 843 (uap->uoss != NULL) ? &oss : NULL); 844 if (!error && uap->uoss != NULL) { 845 lss.ss_sp = PTROUT(oss.ss_sp); 846 lss.ss_size = oss.ss_size; 847 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 848 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 849 } 850 851 return (error); 852} 853 854int 855linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 856{ 857 struct ftruncate_args sa; 858 859#ifdef DEBUG 860 if (ldebug(ftruncate64)) 861 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 862 (intmax_t)args->length); 863#endif 864 865 sa.fd = args->fd; 866 sa.length = args->length; 867 return sys_ftruncate(td, &sa); 868} 869 870int 871linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 872{ 873 struct timeval atv; 874 l_timeval atv32; 875 struct timezone rtz; 876 int error = 0; 877 878 if (uap->tp) { 879 microtime(&atv); 880 atv32.tv_sec = atv.tv_sec; 881 atv32.tv_usec = atv.tv_usec; 882 error = copyout(&atv32, uap->tp, sizeof(atv32)); 883 } 884 if (error == 0 && uap->tzp != NULL) { 885 rtz.tz_minuteswest = tz_minuteswest; 886 rtz.tz_dsttime = tz_dsttime; 887 error = copyout(&rtz, uap->tzp, sizeof(rtz)); 888 } 889 return (error); 890} 891 892int 893linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 894{ 895 l_timeval atv32; 896 struct timeval atv, *tvp; 897 struct timezone atz, *tzp; 898 int error; 899 900 if (uap->tp) { 901 error = copyin(uap->tp, &atv32, sizeof(atv32)); 902 if (error) 903 return (error); 904 atv.tv_sec = atv32.tv_sec; 905 atv.tv_usec = atv32.tv_usec; 906 tvp = &atv; 907 } else 908 tvp = NULL; 909 if (uap->tzp) { 910 error = copyin(uap->tzp, &atz, sizeof(atz)); 911 if (error) 912 return (error); 913 tzp = &atz; 914 } else 915 tzp = NULL; 916 return (kern_settimeofday(td, tvp, tzp)); 917} 918 919int 920linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 921{ 922 struct rusage s; 923 int error; 924 925 error = kern_getrusage(td, uap->who, &s); 926 if (error != 0) 927 return (error); 928 if (uap->rusage != NULL) 929 error = linux_copyout_rusage(&s, uap->rusage); 930 return (error); 931} 932 933int 934linux_set_thread_area(struct thread *td, 935 struct linux_set_thread_area_args *args) 936{ 937 struct l_user_desc info; 938 struct user_segment_descriptor sd; 939 struct pcb *pcb; 940 int a[2]; 941 int error; 942 943 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 944 if (error) 945 return (error); 946 947#ifdef DEBUG 948 if (ldebug(set_thread_area)) 949 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 950 "%i, %i, %i"), info.entry_number, info.base_addr, 951 info.limit, info.seg_32bit, info.contents, 952 info.read_exec_only, info.limit_in_pages, 953 info.seg_not_present, info.useable); 954#endif 955 956 /* 957 * Semantics of Linux version: every thread in the system has array 958 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 959 * This syscall loads one of the selected TLS decriptors with a value 960 * and also loads GDT descriptors 6, 7 and 8 with the content of 961 * the per-thread descriptors. 962 * 963 * Semantics of FreeBSD version: I think we can ignore that Linux has 964 * three per-thread descriptors and use just the first one. 965 * The tls_array[] is used only in [gs]et_thread_area() syscalls and 966 * for loading the GDT descriptors. We use just one GDT descriptor 967 * for TLS, so we will load just one. 968 * 969 * XXX: This doesn't work when a user space process tries to use more 970 * than one TLS segment. Comment in the Linux source says wine might 971 * do this. 972 */ 973 974 /* 975 * GLIBC reads current %gs and call set_thread_area() with it. 976 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 977 * we use these segments. 978 */ 979 switch (info.entry_number) { 980 case GUGS32_SEL: 981 case GUDATA_SEL: 982 case 6: 983 case -1: 984 info.entry_number = GUGS32_SEL; 985 break; 986 default: 987 return (EINVAL); 988 } 989 990 /* 991 * We have to copy out the GDT entry we use. 992 * 993 * XXX: What if a user space program does not check the return value 994 * and tries to use 6, 7 or 8? 995 */ 996 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 997 if (error) 998 return (error); 999 1000 if (LINUX_LDT_empty(&info)) { 1001 a[0] = 0; 1002 a[1] = 0; 1003 } else { 1004 a[0] = LINUX_LDT_entry_a(&info); 1005 a[1] = LINUX_LDT_entry_b(&info); 1006 } 1007 1008 memcpy(&sd, &a, sizeof(a)); 1009#ifdef DEBUG 1010 if (ldebug(set_thread_area)) 1011 printf("Segment created in set_thread_area: " 1012 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1013 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1014 "def32: %i, gran: %i\n", 1015 sd.sd_lobase, 1016 sd.sd_hibase, 1017 sd.sd_lolimit, 1018 sd.sd_hilimit, 1019 sd.sd_type, 1020 sd.sd_dpl, 1021 sd.sd_p, 1022 sd.sd_xx, 1023 sd.sd_long, 1024 sd.sd_def32, 1025 sd.sd_gran); 1026#endif 1027 1028 pcb = td->td_pcb; 1029 pcb->pcb_gsbase = (register_t)info.base_addr; 1030 set_pcb_flags(pcb, PCB_32BIT); 1031 update_gdt_gsbase(td, info.base_addr); 1032 1033 return (0); 1034} 1035