linux_misc.c revision 293500
1/*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S��ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_misc.c 293500 2016-01-09 15:23:54Z dchagin $"); 32 33#include "opt_compat.h" 34#include "opt_kdtrace.h" 35 36#include <sys/param.h> 37#include <sys/blist.h> 38#include <sys/fcntl.h> 39#if defined(__i386__) 40#include <sys/imgact_aout.h> 41#endif 42#include <sys/jail.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/mman.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/priv.h> 52#include <sys/proc.h> 53#include <sys/reboot.h> 54#include <sys/racct.h> 55#include <sys/resourcevar.h> 56#include <sys/sched.h> 57#include <sys/signalvar.h> 58#include <sys/stat.h> 59#include <sys/syscallsubr.h> 60#include <sys/sysctl.h> 61#include <sys/sysproto.h> 62#include <sys/systm.h> 63#include <sys/time.h> 64#include <sys/vmmeter.h> 65#include <sys/vnode.h> 66#include <sys/wait.h> 67#include <sys/cpuset.h> 68 69#include <security/mac/mac_framework.h> 70 71#include <vm/vm.h> 72#include <vm/pmap.h> 73#include <vm/vm_kern.h> 74#include <vm/vm_map.h> 75#include <vm/vm_extern.h> 76#include <vm/vm_object.h> 77#include <vm/swap_pager.h> 78 79#ifdef COMPAT_LINUX32 80#include <machine/../linux32/linux.h> 81#include <machine/../linux32/linux32_proto.h> 82#else 83#include <machine/../linux/linux.h> 84#include <machine/../linux/linux_proto.h> 85#endif 86 87#include <compat/linux/linux_file.h> 88#include <compat/linux/linux_mib.h> 89#include <compat/linux/linux_signal.h> 90#include <compat/linux/linux_util.h> 91#include <compat/linux/linux_sysproto.h> 92#include <compat/linux/linux_emul.h> 93#include <compat/linux/linux_misc.h> 94 95int stclohz; /* Statistics clock frequency */ 96 97static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 98 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 99 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 100 RLIMIT_MEMLOCK, RLIMIT_AS 101}; 102 103struct l_sysinfo { 104 l_long uptime; /* Seconds since boot */ 105 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 106#define LINUX_SYSINFO_LOADS_SCALE 65536 107 l_ulong totalram; /* Total usable main memory size */ 108 l_ulong freeram; /* Available memory size */ 109 l_ulong sharedram; /* Amount of shared memory */ 110 l_ulong bufferram; /* Memory used by buffers */ 111 l_ulong totalswap; /* Total swap space size */ 112 l_ulong freeswap; /* swap space still available */ 113 l_ushort procs; /* Number of current processes */ 114 l_ushort pads; 115 l_ulong totalbig; 116 l_ulong freebig; 117 l_uint mem_unit; 118 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 119}; 120int 121linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 122{ 123 struct l_sysinfo sysinfo; 124 vm_object_t object; 125 int i, j; 126 struct timespec ts; 127 128 getnanouptime(&ts); 129 if (ts.tv_nsec != 0) 130 ts.tv_sec++; 131 sysinfo.uptime = ts.tv_sec; 132 133 /* Use the information from the mib to get our load averages */ 134 for (i = 0; i < 3; i++) 135 sysinfo.loads[i] = averunnable.ldavg[i] * 136 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 137 138 sysinfo.totalram = physmem * PAGE_SIZE; 139 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 140 141 sysinfo.sharedram = 0; 142 mtx_lock(&vm_object_list_mtx); 143 TAILQ_FOREACH(object, &vm_object_list, object_list) 144 if (object->shadow_count > 1) 145 sysinfo.sharedram += object->resident_page_count; 146 mtx_unlock(&vm_object_list_mtx); 147 148 sysinfo.sharedram *= PAGE_SIZE; 149 sysinfo.bufferram = 0; 150 151 swap_pager_status(&i, &j); 152 sysinfo.totalswap = i * PAGE_SIZE; 153 sysinfo.freeswap = (i - j) * PAGE_SIZE; 154 155 sysinfo.procs = nprocs; 156 157 /* The following are only present in newer Linux kernels. */ 158 sysinfo.totalbig = 0; 159 sysinfo.freebig = 0; 160 sysinfo.mem_unit = 1; 161 162 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 163} 164 165int 166linux_alarm(struct thread *td, struct linux_alarm_args *args) 167{ 168 struct itimerval it, old_it; 169 u_int secs; 170 int error; 171 172#ifdef DEBUG 173 if (ldebug(alarm)) 174 printf(ARGS(alarm, "%u"), args->secs); 175#endif 176 177 secs = args->secs; 178 179 if (secs > INT_MAX) 180 secs = INT_MAX; 181 182 it.it_value.tv_sec = (long) secs; 183 it.it_value.tv_usec = 0; 184 it.it_interval.tv_sec = 0; 185 it.it_interval.tv_usec = 0; 186 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 187 if (error) 188 return (error); 189 if (timevalisset(&old_it.it_value)) { 190 if (old_it.it_value.tv_usec != 0) 191 old_it.it_value.tv_sec++; 192 td->td_retval[0] = old_it.it_value.tv_sec; 193 } 194 return (0); 195} 196 197int 198linux_brk(struct thread *td, struct linux_brk_args *args) 199{ 200 struct vmspace *vm = td->td_proc->p_vmspace; 201 vm_offset_t new, old; 202 struct obreak_args /* { 203 char * nsize; 204 } */ tmp; 205 206#ifdef DEBUG 207 if (ldebug(brk)) 208 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 209#endif 210 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 211 new = (vm_offset_t)args->dsend; 212 tmp.nsize = (char *)new; 213 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 214 td->td_retval[0] = (long)new; 215 else 216 td->td_retval[0] = (long)old; 217 218 return (0); 219} 220 221#if defined(__i386__) 222/* XXX: what about amd64/linux32? */ 223 224int 225linux_uselib(struct thread *td, struct linux_uselib_args *args) 226{ 227 struct nameidata ni; 228 struct vnode *vp; 229 struct exec *a_out; 230 struct vattr attr; 231 vm_offset_t vmaddr; 232 unsigned long file_offset; 233 unsigned long bss_size; 234 char *library; 235 ssize_t aresid; 236 int error, locked, writecount; 237 238 LCONVPATHEXIST(td, args->library, &library); 239 240#ifdef DEBUG 241 if (ldebug(uselib)) 242 printf(ARGS(uselib, "%s"), library); 243#endif 244 245 a_out = NULL; 246 locked = 0; 247 vp = NULL; 248 249 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 250 UIO_SYSSPACE, library, td); 251 error = namei(&ni); 252 LFREEPATH(library); 253 if (error) 254 goto cleanup; 255 256 vp = ni.ni_vp; 257 NDFREE(&ni, NDF_ONLY_PNBUF); 258 259 /* 260 * From here on down, we have a locked vnode that must be unlocked. 261 * XXX: The code below largely duplicates exec_check_permissions(). 262 */ 263 locked = 1; 264 265 /* Writable? */ 266 error = VOP_GET_WRITECOUNT(vp, &writecount); 267 if (error != 0) 268 goto cleanup; 269 if (writecount != 0) { 270 error = ETXTBSY; 271 goto cleanup; 272 } 273 274 /* Executable? */ 275 error = VOP_GETATTR(vp, &attr, td->td_ucred); 276 if (error) 277 goto cleanup; 278 279 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 280 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 281 /* EACCESS is what exec(2) returns. */ 282 error = ENOEXEC; 283 goto cleanup; 284 } 285 286 /* Sensible size? */ 287 if (attr.va_size == 0) { 288 error = ENOEXEC; 289 goto cleanup; 290 } 291 292 /* Can we access it? */ 293 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 294 if (error) 295 goto cleanup; 296 297 /* 298 * XXX: This should use vn_open() so that it is properly authorized, 299 * and to reduce code redundancy all over the place here. 300 * XXX: Not really, it duplicates far more of exec_check_permissions() 301 * than vn_open(). 302 */ 303#ifdef MAC 304 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 305 if (error) 306 goto cleanup; 307#endif 308 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 309 if (error) 310 goto cleanup; 311 312 /* Pull in executable header into exec_map */ 313 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 314 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 315 if (error) 316 goto cleanup; 317 318 /* Is it a Linux binary ? */ 319 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 320 error = ENOEXEC; 321 goto cleanup; 322 } 323 324 /* 325 * While we are here, we should REALLY do some more checks 326 */ 327 328 /* Set file/virtual offset based on a.out variant. */ 329 switch ((int)(a_out->a_magic & 0xffff)) { 330 case 0413: /* ZMAGIC */ 331 file_offset = 1024; 332 break; 333 case 0314: /* QMAGIC */ 334 file_offset = 0; 335 break; 336 default: 337 error = ENOEXEC; 338 goto cleanup; 339 } 340 341 bss_size = round_page(a_out->a_bss); 342 343 /* Check various fields in header for validity/bounds. */ 344 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 345 error = ENOEXEC; 346 goto cleanup; 347 } 348 349 /* text + data can't exceed file size */ 350 if (a_out->a_data + a_out->a_text > attr.va_size) { 351 error = EFAULT; 352 goto cleanup; 353 } 354 355 /* 356 * text/data/bss must not exceed limits 357 * XXX - this is not complete. it should check current usage PLUS 358 * the resources needed by this library. 359 */ 360 PROC_LOCK(td->td_proc); 361 if (a_out->a_text > maxtsiz || 362 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || 363 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 364 bss_size) != 0) { 365 PROC_UNLOCK(td->td_proc); 366 error = ENOMEM; 367 goto cleanup; 368 } 369 PROC_UNLOCK(td->td_proc); 370 371 /* 372 * Prevent more writers. 373 * XXX: Note that if any of the VM operations fail below we don't 374 * clear this flag. 375 */ 376 VOP_SET_TEXT(vp); 377 378 /* 379 * Lock no longer needed 380 */ 381 locked = 0; 382 VOP_UNLOCK(vp, 0); 383 384 /* 385 * Check if file_offset page aligned. Currently we cannot handle 386 * misalinged file offsets, and so we read in the entire image 387 * (what a waste). 388 */ 389 if (file_offset & PAGE_MASK) { 390#ifdef DEBUG 391 printf("uselib: Non page aligned binary %lu\n", file_offset); 392#endif 393 /* Map text+data read/write/execute */ 394 395 /* a_entry is the load address and is page aligned */ 396 vmaddr = trunc_page(a_out->a_entry); 397 398 /* get anon user mapping, read+write+execute */ 399 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 400 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 401 VM_PROT_ALL, VM_PROT_ALL, 0); 402 if (error) 403 goto cleanup; 404 405 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 406 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 407 td->td_ucred, NOCRED, &aresid, td); 408 if (error != 0) 409 goto cleanup; 410 if (aresid != 0) { 411 error = ENOEXEC; 412 goto cleanup; 413 } 414 } else { 415#ifdef DEBUG 416 printf("uselib: Page aligned binary %lu\n", file_offset); 417#endif 418 /* 419 * for QMAGIC, a_entry is 20 bytes beyond the load address 420 * to skip the executable header 421 */ 422 vmaddr = trunc_page(a_out->a_entry); 423 424 /* 425 * Map it all into the process's space as a single 426 * copy-on-write "data" segment. 427 */ 428 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 429 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 430 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 431 if (error) 432 goto cleanup; 433 } 434#ifdef DEBUG 435 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 436 ((long *)vmaddr)[1]); 437#endif 438 if (bss_size != 0) { 439 /* Calculate BSS start address */ 440 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 441 a_out->a_data; 442 443 /* allocate some 'anon' space */ 444 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 445 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 446 VM_PROT_ALL, 0); 447 if (error) 448 goto cleanup; 449 } 450 451cleanup: 452 /* Unlock vnode if needed */ 453 if (locked) 454 VOP_UNLOCK(vp, 0); 455 456 /* Release the temporary mapping. */ 457 if (a_out) 458 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 459 460 return (error); 461} 462 463#endif /* __i386__ */ 464 465int 466linux_select(struct thread *td, struct linux_select_args *args) 467{ 468 l_timeval ltv; 469 struct timeval tv0, tv1, utv, *tvp; 470 int error; 471 472#ifdef DEBUG 473 if (ldebug(select)) 474 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 475 (void *)args->readfds, (void *)args->writefds, 476 (void *)args->exceptfds, (void *)args->timeout); 477#endif 478 479 /* 480 * Store current time for computation of the amount of 481 * time left. 482 */ 483 if (args->timeout) { 484 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 485 goto select_out; 486 utv.tv_sec = ltv.tv_sec; 487 utv.tv_usec = ltv.tv_usec; 488#ifdef DEBUG 489 if (ldebug(select)) 490 printf(LMSG("incoming timeout (%jd/%ld)"), 491 (intmax_t)utv.tv_sec, utv.tv_usec); 492#endif 493 494 if (itimerfix(&utv)) { 495 /* 496 * The timeval was invalid. Convert it to something 497 * valid that will act as it does under Linux. 498 */ 499 utv.tv_sec += utv.tv_usec / 1000000; 500 utv.tv_usec %= 1000000; 501 if (utv.tv_usec < 0) { 502 utv.tv_sec -= 1; 503 utv.tv_usec += 1000000; 504 } 505 if (utv.tv_sec < 0) 506 timevalclear(&utv); 507 } 508 microtime(&tv0); 509 tvp = &utv; 510 } else 511 tvp = NULL; 512 513 error = kern_select(td, args->nfds, args->readfds, args->writefds, 514 args->exceptfds, tvp, sizeof(l_int) * 8); 515 516#ifdef DEBUG 517 if (ldebug(select)) 518 printf(LMSG("real select returns %d"), error); 519#endif 520 if (error) 521 goto select_out; 522 523 if (args->timeout) { 524 if (td->td_retval[0]) { 525 /* 526 * Compute how much time was left of the timeout, 527 * by subtracting the current time and the time 528 * before we started the call, and subtracting 529 * that result from the user-supplied value. 530 */ 531 microtime(&tv1); 532 timevalsub(&tv1, &tv0); 533 timevalsub(&utv, &tv1); 534 if (utv.tv_sec < 0) 535 timevalclear(&utv); 536 } else 537 timevalclear(&utv); 538#ifdef DEBUG 539 if (ldebug(select)) 540 printf(LMSG("outgoing timeout (%jd/%ld)"), 541 (intmax_t)utv.tv_sec, utv.tv_usec); 542#endif 543 ltv.tv_sec = utv.tv_sec; 544 ltv.tv_usec = utv.tv_usec; 545 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 546 goto select_out; 547 } 548 549select_out: 550#ifdef DEBUG 551 if (ldebug(select)) 552 printf(LMSG("select_out -> %d"), error); 553#endif 554 return (error); 555} 556 557int 558linux_mremap(struct thread *td, struct linux_mremap_args *args) 559{ 560 struct munmap_args /* { 561 void *addr; 562 size_t len; 563 } */ bsd_args; 564 int error = 0; 565 566#ifdef DEBUG 567 if (ldebug(mremap)) 568 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 569 (void *)(uintptr_t)args->addr, 570 (unsigned long)args->old_len, 571 (unsigned long)args->new_len, 572 (unsigned long)args->flags); 573#endif 574 575 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 576 td->td_retval[0] = 0; 577 return (EINVAL); 578 } 579 580 /* 581 * Check for the page alignment. 582 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 583 */ 584 if (args->addr & PAGE_MASK) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 args->new_len = round_page(args->new_len); 590 args->old_len = round_page(args->old_len); 591 592 if (args->new_len > args->old_len) { 593 td->td_retval[0] = 0; 594 return (ENOMEM); 595 } 596 597 if (args->new_len < args->old_len) { 598 bsd_args.addr = 599 (caddr_t)((uintptr_t)args->addr + args->new_len); 600 bsd_args.len = args->old_len - args->new_len; 601 error = sys_munmap(td, &bsd_args); 602 } 603 604 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 605 return (error); 606} 607 608#define LINUX_MS_ASYNC 0x0001 609#define LINUX_MS_INVALIDATE 0x0002 610#define LINUX_MS_SYNC 0x0004 611 612int 613linux_msync(struct thread *td, struct linux_msync_args *args) 614{ 615 struct msync_args bsd_args; 616 617 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 618 bsd_args.len = (uintptr_t)args->len; 619 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 620 621 return (sys_msync(td, &bsd_args)); 622} 623 624int 625linux_time(struct thread *td, struct linux_time_args *args) 626{ 627 struct timeval tv; 628 l_time_t tm; 629 int error; 630 631#ifdef DEBUG 632 if (ldebug(time)) 633 printf(ARGS(time, "*")); 634#endif 635 636 microtime(&tv); 637 tm = tv.tv_sec; 638 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 639 return (error); 640 td->td_retval[0] = tm; 641 return (0); 642} 643 644struct l_times_argv { 645 l_clock_t tms_utime; 646 l_clock_t tms_stime; 647 l_clock_t tms_cutime; 648 l_clock_t tms_cstime; 649}; 650 651 652/* 653 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 654 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 655 * auxiliary vector entry. 656 */ 657#define CLK_TCK 100 658 659#define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 660#define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 661 662#define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 663 CONVNTCK(r) : CONVOTCK(r)) 664 665int 666linux_times(struct thread *td, struct linux_times_args *args) 667{ 668 struct timeval tv, utime, stime, cutime, cstime; 669 struct l_times_argv tms; 670 struct proc *p; 671 int error; 672 673#ifdef DEBUG 674 if (ldebug(times)) 675 printf(ARGS(times, "*")); 676#endif 677 678 if (args->buf != NULL) { 679 p = td->td_proc; 680 PROC_LOCK(p); 681 PROC_STATLOCK(p); 682 calcru(p, &utime, &stime); 683 PROC_STATUNLOCK(p); 684 calccru(p, &cutime, &cstime); 685 PROC_UNLOCK(p); 686 687 tms.tms_utime = CONVTCK(utime); 688 tms.tms_stime = CONVTCK(stime); 689 690 tms.tms_cutime = CONVTCK(cutime); 691 tms.tms_cstime = CONVTCK(cstime); 692 693 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 694 return (error); 695 } 696 697 microuptime(&tv); 698 td->td_retval[0] = (int)CONVTCK(tv); 699 return (0); 700} 701 702int 703linux_newuname(struct thread *td, struct linux_newuname_args *args) 704{ 705 struct l_new_utsname utsname; 706 char osname[LINUX_MAX_UTSNAME]; 707 char osrelease[LINUX_MAX_UTSNAME]; 708 char *p; 709 710#ifdef DEBUG 711 if (ldebug(newuname)) 712 printf(ARGS(newuname, "*")); 713#endif 714 715 linux_get_osname(td, osname); 716 linux_get_osrelease(td, osrelease); 717 718 bzero(&utsname, sizeof(utsname)); 719 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 720 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 721 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 722 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 723 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 724 for (p = utsname.version; *p != '\0'; ++p) 725 if (*p == '\n') { 726 *p = '\0'; 727 break; 728 } 729 strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME); 730 731 return (copyout(&utsname, args->buf, sizeof(utsname))); 732} 733 734#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 735struct l_utimbuf { 736 l_time_t l_actime; 737 l_time_t l_modtime; 738}; 739 740int 741linux_utime(struct thread *td, struct linux_utime_args *args) 742{ 743 struct timeval tv[2], *tvp; 744 struct l_utimbuf lut; 745 char *fname; 746 int error; 747 748 LCONVPATHEXIST(td, args->fname, &fname); 749 750#ifdef DEBUG 751 if (ldebug(utime)) 752 printf(ARGS(utime, "%s, *"), fname); 753#endif 754 755 if (args->times) { 756 if ((error = copyin(args->times, &lut, sizeof lut))) { 757 LFREEPATH(fname); 758 return (error); 759 } 760 tv[0].tv_sec = lut.l_actime; 761 tv[0].tv_usec = 0; 762 tv[1].tv_sec = lut.l_modtime; 763 tv[1].tv_usec = 0; 764 tvp = tv; 765 } else 766 tvp = NULL; 767 768 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 769 LFREEPATH(fname); 770 return (error); 771} 772 773int 774linux_utimes(struct thread *td, struct linux_utimes_args *args) 775{ 776 l_timeval ltv[2]; 777 struct timeval tv[2], *tvp = NULL; 778 char *fname; 779 int error; 780 781 LCONVPATHEXIST(td, args->fname, &fname); 782 783#ifdef DEBUG 784 if (ldebug(utimes)) 785 printf(ARGS(utimes, "%s, *"), fname); 786#endif 787 788 if (args->tptr != NULL) { 789 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 790 LFREEPATH(fname); 791 return (error); 792 } 793 tv[0].tv_sec = ltv[0].tv_sec; 794 tv[0].tv_usec = ltv[0].tv_usec; 795 tv[1].tv_sec = ltv[1].tv_sec; 796 tv[1].tv_usec = ltv[1].tv_usec; 797 tvp = tv; 798 } 799 800 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 801 LFREEPATH(fname); 802 return (error); 803} 804 805int 806linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 807{ 808 l_timeval ltv[2]; 809 struct timeval tv[2], *tvp = NULL; 810 char *fname; 811 int error, dfd; 812 813 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 814 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 815 816#ifdef DEBUG 817 if (ldebug(futimesat)) 818 printf(ARGS(futimesat, "%s, *"), fname); 819#endif 820 821 if (args->utimes != NULL) { 822 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 823 LFREEPATH(fname); 824 return (error); 825 } 826 tv[0].tv_sec = ltv[0].tv_sec; 827 tv[0].tv_usec = ltv[0].tv_usec; 828 tv[1].tv_sec = ltv[1].tv_sec; 829 tv[1].tv_usec = ltv[1].tv_usec; 830 tvp = tv; 831 } 832 833 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 834 LFREEPATH(fname); 835 return (error); 836} 837#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 838 839int 840linux_common_wait(struct thread *td, int pid, int *status, 841 int options, struct rusage *ru) 842{ 843 int error, tmpstat; 844 845 error = kern_wait(td, pid, &tmpstat, options, ru); 846 if (error) 847 return (error); 848 849 if (status) { 850 tmpstat &= 0xffff; 851 if (WIFSIGNALED(tmpstat)) 852 tmpstat = (tmpstat & 0xffffff80) | 853 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 854 else if (WIFSTOPPED(tmpstat)) 855 tmpstat = (tmpstat & 0xffff00ff) | 856 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 857 error = copyout(&tmpstat, status, sizeof(int)); 858 } 859 860 return (error); 861} 862 863int 864linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 865{ 866 int options; 867 868#ifdef DEBUG 869 if (ldebug(waitpid)) 870 printf(ARGS(waitpid, "%d, %p, %d"), 871 args->pid, (void *)args->status, args->options); 872#endif 873 /* 874 * this is necessary because the test in kern_wait doesn't work 875 * because we mess with the options here 876 */ 877 if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) 878 return (EINVAL); 879 880 options = (args->options & (WNOHANG | WUNTRACED)); 881 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 882 if (args->options & __WCLONE) 883 options |= WLINUXCLONE; 884 885 return (linux_common_wait(td, args->pid, args->status, options, NULL)); 886} 887 888int 889linux_wait4(struct thread *td, struct linux_wait4_args *args) 890{ 891 int error, options; 892 struct rusage ru, *rup; 893 894#ifdef DEBUG 895 if (ldebug(wait4)) 896 printf(ARGS(wait4, "%d, %p, %d, %p"), 897 args->pid, (void *)args->status, args->options, 898 (void *)args->rusage); 899#endif 900 901 options = (args->options & (WNOHANG | WUNTRACED)); 902 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 903 if (args->options & __WCLONE) 904 options |= WLINUXCLONE; 905 906 if (args->rusage != NULL) 907 rup = &ru; 908 else 909 rup = NULL; 910 error = linux_common_wait(td, args->pid, args->status, options, rup); 911 if (error != 0) 912 return (error); 913 if (args->rusage != NULL) 914 error = linux_copyout_rusage(&ru, args->rusage); 915 return (error); 916} 917 918int 919linux_mknod(struct thread *td, struct linux_mknod_args *args) 920{ 921 char *path; 922 int error; 923 924 LCONVPATHCREAT(td, args->path, &path); 925 926#ifdef DEBUG 927 if (ldebug(mknod)) 928 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 929#endif 930 931 switch (args->mode & S_IFMT) { 932 case S_IFIFO: 933 case S_IFSOCK: 934 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 935 break; 936 937 case S_IFCHR: 938 case S_IFBLK: 939 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 940 args->dev); 941 break; 942 943 case S_IFDIR: 944 error = EPERM; 945 break; 946 947 case 0: 948 args->mode |= S_IFREG; 949 /* FALLTHROUGH */ 950 case S_IFREG: 951 error = kern_open(td, path, UIO_SYSSPACE, 952 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 953 if (error == 0) 954 kern_close(td, td->td_retval[0]); 955 break; 956 957 default: 958 error = EINVAL; 959 break; 960 } 961 LFREEPATH(path); 962 return (error); 963} 964 965int 966linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 967{ 968 char *path; 969 int error, dfd; 970 971 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 972 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 973 974#ifdef DEBUG 975 if (ldebug(mknodat)) 976 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 977#endif 978 979 switch (args->mode & S_IFMT) { 980 case S_IFIFO: 981 case S_IFSOCK: 982 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 983 break; 984 985 case S_IFCHR: 986 case S_IFBLK: 987 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 988 args->dev); 989 break; 990 991 case S_IFDIR: 992 error = EPERM; 993 break; 994 995 case 0: 996 args->mode |= S_IFREG; 997 /* FALLTHROUGH */ 998 case S_IFREG: 999 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1000 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1001 if (error == 0) 1002 kern_close(td, td->td_retval[0]); 1003 break; 1004 1005 default: 1006 error = EINVAL; 1007 break; 1008 } 1009 LFREEPATH(path); 1010 return (error); 1011} 1012 1013/* 1014 * UGH! This is just about the dumbest idea I've ever heard!! 1015 */ 1016int 1017linux_personality(struct thread *td, struct linux_personality_args *args) 1018{ 1019#ifdef DEBUG 1020 if (ldebug(personality)) 1021 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 1022#endif 1023 if (args->per != 0) 1024 return (EINVAL); 1025 1026 /* Yes Jim, it's still a Linux... */ 1027 td->td_retval[0] = 0; 1028 return (0); 1029} 1030 1031struct l_itimerval { 1032 l_timeval it_interval; 1033 l_timeval it_value; 1034}; 1035 1036#define B2L_ITIMERVAL(bip, lip) \ 1037 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1038 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1039 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1040 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1041 1042int 1043linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1044{ 1045 int error; 1046 struct l_itimerval ls; 1047 struct itimerval aitv, oitv; 1048 1049#ifdef DEBUG 1050 if (ldebug(setitimer)) 1051 printf(ARGS(setitimer, "%p, %p"), 1052 (void *)uap->itv, (void *)uap->oitv); 1053#endif 1054 1055 if (uap->itv == NULL) { 1056 uap->itv = uap->oitv; 1057 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1058 } 1059 1060 error = copyin(uap->itv, &ls, sizeof(ls)); 1061 if (error != 0) 1062 return (error); 1063 B2L_ITIMERVAL(&aitv, &ls); 1064#ifdef DEBUG 1065 if (ldebug(setitimer)) { 1066 printf("setitimer: value: sec: %jd, usec: %ld\n", 1067 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1068 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1069 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1070 } 1071#endif 1072 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1073 if (error != 0 || uap->oitv == NULL) 1074 return (error); 1075 B2L_ITIMERVAL(&ls, &oitv); 1076 1077 return (copyout(&ls, uap->oitv, sizeof(ls))); 1078} 1079 1080int 1081linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1082{ 1083 int error; 1084 struct l_itimerval ls; 1085 struct itimerval aitv; 1086 1087#ifdef DEBUG 1088 if (ldebug(getitimer)) 1089 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1090#endif 1091 error = kern_getitimer(td, uap->which, &aitv); 1092 if (error != 0) 1093 return (error); 1094 B2L_ITIMERVAL(&ls, &aitv); 1095 return (copyout(&ls, uap->itv, sizeof(ls))); 1096} 1097 1098int 1099linux_nice(struct thread *td, struct linux_nice_args *args) 1100{ 1101 struct setpriority_args bsd_args; 1102 1103 bsd_args.which = PRIO_PROCESS; 1104 bsd_args.who = 0; /* current process */ 1105 bsd_args.prio = args->inc; 1106 return (sys_setpriority(td, &bsd_args)); 1107} 1108 1109int 1110linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1111{ 1112 struct ucred *newcred, *oldcred; 1113 l_gid_t *linux_gidset; 1114 gid_t *bsd_gidset; 1115 int ngrp, error; 1116 struct proc *p; 1117 1118 ngrp = args->gidsetsize; 1119 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1120 return (EINVAL); 1121 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); 1122 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1123 if (error) 1124 goto out; 1125 newcred = crget(); 1126 p = td->td_proc; 1127 PROC_LOCK(p); 1128 oldcred = crcopysafe(p, newcred); 1129 1130 /* 1131 * cr_groups[0] holds egid. Setting the whole set from 1132 * the supplied set will cause egid to be changed too. 1133 * Keep cr_groups[0] unchanged to prevent that. 1134 */ 1135 1136 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1137 PROC_UNLOCK(p); 1138 crfree(newcred); 1139 goto out; 1140 } 1141 1142 if (ngrp > 0) { 1143 newcred->cr_ngroups = ngrp + 1; 1144 1145 bsd_gidset = newcred->cr_groups; 1146 ngrp--; 1147 while (ngrp >= 0) { 1148 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1149 ngrp--; 1150 } 1151 } else 1152 newcred->cr_ngroups = 1; 1153 1154 setsugid(p); 1155 p->p_ucred = newcred; 1156 PROC_UNLOCK(p); 1157 crfree(oldcred); 1158 error = 0; 1159out: 1160 free(linux_gidset, M_TEMP); 1161 return (error); 1162} 1163 1164int 1165linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1166{ 1167 struct ucred *cred; 1168 l_gid_t *linux_gidset; 1169 gid_t *bsd_gidset; 1170 int bsd_gidsetsz, ngrp, error; 1171 1172 cred = td->td_ucred; 1173 bsd_gidset = cred->cr_groups; 1174 bsd_gidsetsz = cred->cr_ngroups - 1; 1175 1176 /* 1177 * cr_groups[0] holds egid. Returning the whole set 1178 * here will cause a duplicate. Exclude cr_groups[0] 1179 * to prevent that. 1180 */ 1181 1182 if ((ngrp = args->gidsetsize) == 0) { 1183 td->td_retval[0] = bsd_gidsetsz; 1184 return (0); 1185 } 1186 1187 if (ngrp < bsd_gidsetsz) 1188 return (EINVAL); 1189 1190 ngrp = 0; 1191 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1192 M_TEMP, M_WAITOK); 1193 while (ngrp < bsd_gidsetsz) { 1194 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1195 ngrp++; 1196 } 1197 1198 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1199 free(linux_gidset, M_TEMP); 1200 if (error) 1201 return (error); 1202 1203 td->td_retval[0] = ngrp; 1204 return (0); 1205} 1206 1207int 1208linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1209{ 1210 struct rlimit bsd_rlim; 1211 struct l_rlimit rlim; 1212 u_int which; 1213 int error; 1214 1215#ifdef DEBUG 1216 if (ldebug(setrlimit)) 1217 printf(ARGS(setrlimit, "%d, %p"), 1218 args->resource, (void *)args->rlim); 1219#endif 1220 1221 if (args->resource >= LINUX_RLIM_NLIMITS) 1222 return (EINVAL); 1223 1224 which = linux_to_bsd_resource[args->resource]; 1225 if (which == -1) 1226 return (EINVAL); 1227 1228 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1229 if (error) 1230 return (error); 1231 1232 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1233 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1234 return (kern_setrlimit(td, which, &bsd_rlim)); 1235} 1236 1237int 1238linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1239{ 1240 struct l_rlimit rlim; 1241 struct proc *p = td->td_proc; 1242 struct rlimit bsd_rlim; 1243 u_int which; 1244 1245#ifdef DEBUG 1246 if (ldebug(old_getrlimit)) 1247 printf(ARGS(old_getrlimit, "%d, %p"), 1248 args->resource, (void *)args->rlim); 1249#endif 1250 1251 if (args->resource >= LINUX_RLIM_NLIMITS) 1252 return (EINVAL); 1253 1254 which = linux_to_bsd_resource[args->resource]; 1255 if (which == -1) 1256 return (EINVAL); 1257 1258 PROC_LOCK(p); 1259 lim_rlimit(p, which, &bsd_rlim); 1260 PROC_UNLOCK(p); 1261 1262#ifdef COMPAT_LINUX32 1263 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1264 if (rlim.rlim_cur == UINT_MAX) 1265 rlim.rlim_cur = INT_MAX; 1266 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1267 if (rlim.rlim_max == UINT_MAX) 1268 rlim.rlim_max = INT_MAX; 1269#else 1270 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1271 if (rlim.rlim_cur == ULONG_MAX) 1272 rlim.rlim_cur = LONG_MAX; 1273 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1274 if (rlim.rlim_max == ULONG_MAX) 1275 rlim.rlim_max = LONG_MAX; 1276#endif 1277 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1278} 1279 1280int 1281linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1282{ 1283 struct l_rlimit rlim; 1284 struct proc *p = td->td_proc; 1285 struct rlimit bsd_rlim; 1286 u_int which; 1287 1288#ifdef DEBUG 1289 if (ldebug(getrlimit)) 1290 printf(ARGS(getrlimit, "%d, %p"), 1291 args->resource, (void *)args->rlim); 1292#endif 1293 1294 if (args->resource >= LINUX_RLIM_NLIMITS) 1295 return (EINVAL); 1296 1297 which = linux_to_bsd_resource[args->resource]; 1298 if (which == -1) 1299 return (EINVAL); 1300 1301 PROC_LOCK(p); 1302 lim_rlimit(p, which, &bsd_rlim); 1303 PROC_UNLOCK(p); 1304 1305 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1306 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1307 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1308} 1309 1310int 1311linux_sched_setscheduler(struct thread *td, 1312 struct linux_sched_setscheduler_args *args) 1313{ 1314 struct sched_param sched_param; 1315 struct thread *tdt; 1316 int error, policy; 1317 1318#ifdef DEBUG 1319 if (ldebug(sched_setscheduler)) 1320 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1321 args->pid, args->policy, (const void *)args->param); 1322#endif 1323 1324 switch (args->policy) { 1325 case LINUX_SCHED_OTHER: 1326 policy = SCHED_OTHER; 1327 break; 1328 case LINUX_SCHED_FIFO: 1329 policy = SCHED_FIFO; 1330 break; 1331 case LINUX_SCHED_RR: 1332 policy = SCHED_RR; 1333 break; 1334 default: 1335 return (EINVAL); 1336 } 1337 1338 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1339 if (error) 1340 return (error); 1341 1342 tdt = linux_tdfind(td, args->pid, -1); 1343 if (tdt == NULL) 1344 return (ESRCH); 1345 1346 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1347 PROC_UNLOCK(tdt->td_proc); 1348 return (error); 1349} 1350 1351int 1352linux_sched_getscheduler(struct thread *td, 1353 struct linux_sched_getscheduler_args *args) 1354{ 1355 struct thread *tdt; 1356 int error, policy; 1357 1358#ifdef DEBUG 1359 if (ldebug(sched_getscheduler)) 1360 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1361#endif 1362 1363 tdt = linux_tdfind(td, args->pid, -1); 1364 if (tdt == NULL) 1365 return (ESRCH); 1366 1367 error = kern_sched_getscheduler(td, tdt, &policy); 1368 PROC_UNLOCK(tdt->td_proc); 1369 1370 switch (policy) { 1371 case SCHED_OTHER: 1372 td->td_retval[0] = LINUX_SCHED_OTHER; 1373 break; 1374 case SCHED_FIFO: 1375 td->td_retval[0] = LINUX_SCHED_FIFO; 1376 break; 1377 case SCHED_RR: 1378 td->td_retval[0] = LINUX_SCHED_RR; 1379 break; 1380 } 1381 return (error); 1382} 1383 1384int 1385linux_sched_get_priority_max(struct thread *td, 1386 struct linux_sched_get_priority_max_args *args) 1387{ 1388 struct sched_get_priority_max_args bsd; 1389 1390#ifdef DEBUG 1391 if (ldebug(sched_get_priority_max)) 1392 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1393#endif 1394 1395 switch (args->policy) { 1396 case LINUX_SCHED_OTHER: 1397 bsd.policy = SCHED_OTHER; 1398 break; 1399 case LINUX_SCHED_FIFO: 1400 bsd.policy = SCHED_FIFO; 1401 break; 1402 case LINUX_SCHED_RR: 1403 bsd.policy = SCHED_RR; 1404 break; 1405 default: 1406 return (EINVAL); 1407 } 1408 return (sys_sched_get_priority_max(td, &bsd)); 1409} 1410 1411int 1412linux_sched_get_priority_min(struct thread *td, 1413 struct linux_sched_get_priority_min_args *args) 1414{ 1415 struct sched_get_priority_min_args bsd; 1416 1417#ifdef DEBUG 1418 if (ldebug(sched_get_priority_min)) 1419 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1420#endif 1421 1422 switch (args->policy) { 1423 case LINUX_SCHED_OTHER: 1424 bsd.policy = SCHED_OTHER; 1425 break; 1426 case LINUX_SCHED_FIFO: 1427 bsd.policy = SCHED_FIFO; 1428 break; 1429 case LINUX_SCHED_RR: 1430 bsd.policy = SCHED_RR; 1431 break; 1432 default: 1433 return (EINVAL); 1434 } 1435 return (sys_sched_get_priority_min(td, &bsd)); 1436} 1437 1438#define REBOOT_CAD_ON 0x89abcdef 1439#define REBOOT_CAD_OFF 0 1440#define REBOOT_HALT 0xcdef0123 1441#define REBOOT_RESTART 0x01234567 1442#define REBOOT_RESTART2 0xA1B2C3D4 1443#define REBOOT_POWEROFF 0x4321FEDC 1444#define REBOOT_MAGIC1 0xfee1dead 1445#define REBOOT_MAGIC2 0x28121969 1446#define REBOOT_MAGIC2A 0x05121996 1447#define REBOOT_MAGIC2B 0x16041998 1448 1449int 1450linux_reboot(struct thread *td, struct linux_reboot_args *args) 1451{ 1452 struct reboot_args bsd_args; 1453 1454#ifdef DEBUG 1455 if (ldebug(reboot)) 1456 printf(ARGS(reboot, "0x%x"), args->cmd); 1457#endif 1458 1459 if (args->magic1 != REBOOT_MAGIC1) 1460 return (EINVAL); 1461 1462 switch (args->magic2) { 1463 case REBOOT_MAGIC2: 1464 case REBOOT_MAGIC2A: 1465 case REBOOT_MAGIC2B: 1466 break; 1467 default: 1468 return (EINVAL); 1469 } 1470 1471 switch (args->cmd) { 1472 case REBOOT_CAD_ON: 1473 case REBOOT_CAD_OFF: 1474 return (priv_check(td, PRIV_REBOOT)); 1475 case REBOOT_HALT: 1476 bsd_args.opt = RB_HALT; 1477 break; 1478 case REBOOT_RESTART: 1479 case REBOOT_RESTART2: 1480 bsd_args.opt = 0; 1481 break; 1482 case REBOOT_POWEROFF: 1483 bsd_args.opt = RB_POWEROFF; 1484 break; 1485 default: 1486 return (EINVAL); 1487 } 1488 return (sys_reboot(td, &bsd_args)); 1489} 1490 1491 1492/* 1493 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1494 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1495 * are assumed to be preserved. The following lightweight syscalls fixes 1496 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1497 * 1498 * linux_getpid() - MP SAFE 1499 * linux_getgid() - MP SAFE 1500 * linux_getuid() - MP SAFE 1501 */ 1502 1503int 1504linux_getpid(struct thread *td, struct linux_getpid_args *args) 1505{ 1506 1507#ifdef DEBUG 1508 if (ldebug(getpid)) 1509 printf(ARGS(getpid, "")); 1510#endif 1511 td->td_retval[0] = td->td_proc->p_pid; 1512 1513 return (0); 1514} 1515 1516int 1517linux_gettid(struct thread *td, struct linux_gettid_args *args) 1518{ 1519 struct linux_emuldata *em; 1520 1521#ifdef DEBUG 1522 if (ldebug(gettid)) 1523 printf(ARGS(gettid, "")); 1524#endif 1525 1526 em = em_find(td); 1527 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1528 1529 td->td_retval[0] = em->em_tid; 1530 1531 return (0); 1532} 1533 1534 1535int 1536linux_getppid(struct thread *td, struct linux_getppid_args *args) 1537{ 1538 1539#ifdef DEBUG 1540 if (ldebug(getppid)) 1541 printf(ARGS(getppid, "")); 1542#endif 1543 1544 PROC_LOCK(td->td_proc); 1545 td->td_retval[0] = td->td_proc->p_pptr->p_pid; 1546 PROC_UNLOCK(td->td_proc); 1547 return (0); 1548} 1549 1550int 1551linux_getgid(struct thread *td, struct linux_getgid_args *args) 1552{ 1553 1554#ifdef DEBUG 1555 if (ldebug(getgid)) 1556 printf(ARGS(getgid, "")); 1557#endif 1558 1559 td->td_retval[0] = td->td_ucred->cr_rgid; 1560 return (0); 1561} 1562 1563int 1564linux_getuid(struct thread *td, struct linux_getuid_args *args) 1565{ 1566 1567#ifdef DEBUG 1568 if (ldebug(getuid)) 1569 printf(ARGS(getuid, "")); 1570#endif 1571 1572 td->td_retval[0] = td->td_ucred->cr_ruid; 1573 return (0); 1574} 1575 1576 1577int 1578linux_getsid(struct thread *td, struct linux_getsid_args *args) 1579{ 1580 struct getsid_args bsd; 1581 1582#ifdef DEBUG 1583 if (ldebug(getsid)) 1584 printf(ARGS(getsid, "%i"), args->pid); 1585#endif 1586 1587 bsd.pid = args->pid; 1588 return (sys_getsid(td, &bsd)); 1589} 1590 1591int 1592linux_nosys(struct thread *td, struct nosys_args *ignore) 1593{ 1594 1595 return (ENOSYS); 1596} 1597 1598int 1599linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1600{ 1601 struct getpriority_args bsd_args; 1602 int error; 1603 1604#ifdef DEBUG 1605 if (ldebug(getpriority)) 1606 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1607#endif 1608 1609 bsd_args.which = args->which; 1610 bsd_args.who = args->who; 1611 error = sys_getpriority(td, &bsd_args); 1612 td->td_retval[0] = 20 - td->td_retval[0]; 1613 return (error); 1614} 1615 1616int 1617linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1618{ 1619 int name[2]; 1620 1621#ifdef DEBUG 1622 if (ldebug(sethostname)) 1623 printf(ARGS(sethostname, "*, %i"), args->len); 1624#endif 1625 1626 name[0] = CTL_KERN; 1627 name[1] = KERN_HOSTNAME; 1628 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1629 args->len, 0, 0)); 1630} 1631 1632int 1633linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1634{ 1635 int name[2]; 1636 1637#ifdef DEBUG 1638 if (ldebug(setdomainname)) 1639 printf(ARGS(setdomainname, "*, %i"), args->len); 1640#endif 1641 1642 name[0] = CTL_KERN; 1643 name[1] = KERN_NISDOMAINNAME; 1644 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1645 args->len, 0, 0)); 1646} 1647 1648int 1649linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1650{ 1651 1652#ifdef DEBUG 1653 if (ldebug(exit_group)) 1654 printf(ARGS(exit_group, "%i"), args->error_code); 1655#endif 1656 1657 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1658 args->error_code); 1659 1660 /* 1661 * XXX: we should send a signal to the parent if 1662 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1663 * as it doesnt occur often. 1664 */ 1665 exit1(td, W_EXITCODE(args->error_code, 0)); 1666 /* NOTREACHED */ 1667} 1668 1669#define _LINUX_CAPABILITY_VERSION 0x19980330 1670 1671struct l_user_cap_header { 1672 l_int version; 1673 l_int pid; 1674}; 1675 1676struct l_user_cap_data { 1677 l_int effective; 1678 l_int permitted; 1679 l_int inheritable; 1680}; 1681 1682int 1683linux_capget(struct thread *td, struct linux_capget_args *args) 1684{ 1685 struct l_user_cap_header luch; 1686 struct l_user_cap_data lucd; 1687 int error; 1688 1689 if (args->hdrp == NULL) 1690 return (EFAULT); 1691 1692 error = copyin(args->hdrp, &luch, sizeof(luch)); 1693 if (error != 0) 1694 return (error); 1695 1696 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1697 luch.version = _LINUX_CAPABILITY_VERSION; 1698 error = copyout(&luch, args->hdrp, sizeof(luch)); 1699 if (error) 1700 return (error); 1701 return (EINVAL); 1702 } 1703 1704 if (luch.pid) 1705 return (EPERM); 1706 1707 if (args->datap) { 1708 /* 1709 * The current implementation doesn't support setting 1710 * a capability (it's essentially a stub) so indicate 1711 * that no capabilities are currently set or available 1712 * to request. 1713 */ 1714 bzero (&lucd, sizeof(lucd)); 1715 error = copyout(&lucd, args->datap, sizeof(lucd)); 1716 } 1717 1718 return (error); 1719} 1720 1721int 1722linux_capset(struct thread *td, struct linux_capset_args *args) 1723{ 1724 struct l_user_cap_header luch; 1725 struct l_user_cap_data lucd; 1726 int error; 1727 1728 if (args->hdrp == NULL || args->datap == NULL) 1729 return (EFAULT); 1730 1731 error = copyin(args->hdrp, &luch, sizeof(luch)); 1732 if (error != 0) 1733 return (error); 1734 1735 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1736 luch.version = _LINUX_CAPABILITY_VERSION; 1737 error = copyout(&luch, args->hdrp, sizeof(luch)); 1738 if (error) 1739 return (error); 1740 return (EINVAL); 1741 } 1742 1743 if (luch.pid) 1744 return (EPERM); 1745 1746 error = copyin(args->datap, &lucd, sizeof(lucd)); 1747 if (error != 0) 1748 return (error); 1749 1750 /* We currently don't support setting any capabilities. */ 1751 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1752 linux_msg(td, 1753 "capset effective=0x%x, permitted=0x%x, " 1754 "inheritable=0x%x is not implemented", 1755 (int)lucd.effective, (int)lucd.permitted, 1756 (int)lucd.inheritable); 1757 return (EPERM); 1758 } 1759 1760 return (0); 1761} 1762 1763int 1764linux_prctl(struct thread *td, struct linux_prctl_args *args) 1765{ 1766 int error = 0, max_size; 1767 struct proc *p = td->td_proc; 1768 char comm[LINUX_MAX_COMM_LEN]; 1769 struct linux_emuldata *em; 1770 int pdeath_signal; 1771 1772#ifdef DEBUG 1773 if (ldebug(prctl)) 1774 printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, 1775 args->arg2, args->arg3, args->arg4, args->arg5); 1776#endif 1777 1778 switch (args->option) { 1779 case LINUX_PR_SET_PDEATHSIG: 1780 if (!LINUX_SIG_VALID(args->arg2)) 1781 return (EINVAL); 1782 em = em_find(td); 1783 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1784 em->pdeath_signal = args->arg2; 1785 break; 1786 case LINUX_PR_GET_PDEATHSIG: 1787 em = em_find(td); 1788 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1789 pdeath_signal = em->pdeath_signal; 1790 error = copyout(&pdeath_signal, 1791 (void *)(register_t)args->arg2, 1792 sizeof(pdeath_signal)); 1793 break; 1794 case LINUX_PR_GET_KEEPCAPS: 1795 /* 1796 * Indicate that we always clear the effective and 1797 * permitted capability sets when the user id becomes 1798 * non-zero (actually the capability sets are simply 1799 * always zero in the current implementation). 1800 */ 1801 td->td_retval[0] = 0; 1802 break; 1803 case LINUX_PR_SET_KEEPCAPS: 1804 /* 1805 * Ignore requests to keep the effective and permitted 1806 * capability sets when the user id becomes non-zero. 1807 */ 1808 break; 1809 case LINUX_PR_SET_NAME: 1810 /* 1811 * To be on the safe side we need to make sure to not 1812 * overflow the size a linux program expects. We already 1813 * do this here in the copyin, so that we don't need to 1814 * check on copyout. 1815 */ 1816 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1817 error = copyinstr((void *)(register_t)args->arg2, comm, 1818 max_size, NULL); 1819 1820 /* Linux silently truncates the name if it is too long. */ 1821 if (error == ENAMETOOLONG) { 1822 /* 1823 * XXX: copyinstr() isn't documented to populate the 1824 * array completely, so do a copyin() to be on the 1825 * safe side. This should be changed in case 1826 * copyinstr() is changed to guarantee this. 1827 */ 1828 error = copyin((void *)(register_t)args->arg2, comm, 1829 max_size - 1); 1830 comm[max_size - 1] = '\0'; 1831 } 1832 if (error) 1833 return (error); 1834 1835 PROC_LOCK(p); 1836 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1837 PROC_UNLOCK(p); 1838 break; 1839 case LINUX_PR_GET_NAME: 1840 PROC_LOCK(p); 1841 strlcpy(comm, p->p_comm, sizeof(comm)); 1842 PROC_UNLOCK(p); 1843 error = copyout(comm, (void *)(register_t)args->arg2, 1844 strlen(comm) + 1); 1845 break; 1846 default: 1847 error = EINVAL; 1848 break; 1849 } 1850 1851 return (error); 1852} 1853 1854int 1855linux_sched_setparam(struct thread *td, 1856 struct linux_sched_setparam_args *uap) 1857{ 1858 struct sched_param sched_param; 1859 struct thread *tdt; 1860 int error; 1861 1862#ifdef DEBUG 1863 if (ldebug(sched_setparam)) 1864 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 1865#endif 1866 1867 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1868 if (error) 1869 return (error); 1870 1871 tdt = linux_tdfind(td, uap->pid, -1); 1872 if (tdt == NULL) 1873 return (ESRCH); 1874 1875 error = kern_sched_setparam(td, tdt, &sched_param); 1876 PROC_UNLOCK(tdt->td_proc); 1877 return (error); 1878} 1879 1880int 1881linux_sched_getparam(struct thread *td, 1882 struct linux_sched_getparam_args *uap) 1883{ 1884 struct sched_param sched_param; 1885 struct thread *tdt; 1886 int error; 1887 1888#ifdef DEBUG 1889 if (ldebug(sched_getparam)) 1890 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 1891#endif 1892 1893 tdt = linux_tdfind(td, uap->pid, -1); 1894 if (tdt == NULL) 1895 return (ESRCH); 1896 1897 error = kern_sched_getparam(td, tdt, &sched_param); 1898 PROC_UNLOCK(tdt->td_proc); 1899 if (error == 0) 1900 error = copyout(&sched_param, uap->param, 1901 sizeof(sched_param)); 1902 return (error); 1903} 1904 1905/* 1906 * Get affinity of a process. 1907 */ 1908int 1909linux_sched_getaffinity(struct thread *td, 1910 struct linux_sched_getaffinity_args *args) 1911{ 1912 int error; 1913 struct thread *tdt; 1914 struct cpuset_getaffinity_args cga; 1915 1916#ifdef DEBUG 1917 if (ldebug(sched_getaffinity)) 1918 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 1919 args->len); 1920#endif 1921 if (args->len < sizeof(cpuset_t)) 1922 return (EINVAL); 1923 1924 tdt = linux_tdfind(td, args->pid, -1); 1925 if (tdt == NULL) 1926 return (ESRCH); 1927 1928 PROC_UNLOCK(tdt->td_proc); 1929 cga.level = CPU_LEVEL_WHICH; 1930 cga.which = CPU_WHICH_TID; 1931 cga.id = tdt->td_tid; 1932 cga.cpusetsize = sizeof(cpuset_t); 1933 cga.mask = (cpuset_t *) args->user_mask_ptr; 1934 1935 if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) 1936 td->td_retval[0] = sizeof(cpuset_t); 1937 1938 return (error); 1939} 1940 1941/* 1942 * Set affinity of a process. 1943 */ 1944int 1945linux_sched_setaffinity(struct thread *td, 1946 struct linux_sched_setaffinity_args *args) 1947{ 1948 struct cpuset_setaffinity_args csa; 1949 struct thread *tdt; 1950 1951#ifdef DEBUG 1952 if (ldebug(sched_setaffinity)) 1953 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 1954 args->len); 1955#endif 1956 if (args->len < sizeof(cpuset_t)) 1957 return (EINVAL); 1958 1959 tdt = linux_tdfind(td, args->pid, -1); 1960 if (tdt == NULL) 1961 return (ESRCH); 1962 1963 PROC_UNLOCK(tdt->td_proc); 1964 csa.level = CPU_LEVEL_WHICH; 1965 csa.which = CPU_WHICH_TID; 1966 csa.id = tdt->td_tid; 1967 csa.cpusetsize = sizeof(cpuset_t); 1968 csa.mask = (cpuset_t *) args->user_mask_ptr; 1969 1970 return (sys_cpuset_setaffinity(td, &csa)); 1971} 1972 1973int 1974linux_sched_rr_get_interval(struct thread *td, 1975 struct linux_sched_rr_get_interval_args *uap) 1976{ 1977 struct timespec ts; 1978 struct l_timespec lts; 1979 struct thread *tdt; 1980 int error; 1981 1982 tdt = linux_tdfind(td, uap->pid, -1); 1983 if (tdt == NULL) 1984 return (ESRCH); 1985 1986 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 1987 PROC_UNLOCK(tdt->td_proc); 1988 if (error != 0) 1989 return (error); 1990 lts.tv_sec = ts.tv_sec; 1991 lts.tv_nsec = ts.tv_nsec; 1992 return (copyout(<s, uap->interval, sizeof(lts))); 1993} 1994 1995/* 1996 * In case when the Linux thread is the initial thread in 1997 * the thread group thread id is equal to the process id. 1998 * Glibc depends on this magic (assert in pthread_getattr_np.c). 1999 */ 2000struct thread * 2001linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2002{ 2003 struct linux_emuldata *em; 2004 struct thread *tdt; 2005 struct proc *p; 2006 2007 tdt = NULL; 2008 if (tid == 0 || tid == td->td_tid) { 2009 tdt = td; 2010 PROC_LOCK(tdt->td_proc); 2011 } else if (tid > PID_MAX) 2012 tdt = tdfind(tid, pid); 2013 else { 2014 /* 2015 * Initial thread where the tid equal to the pid. 2016 */ 2017 p = pfind(tid); 2018 if (p != NULL) { 2019 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2020 /* 2021 * p is not a Linuxulator process. 2022 */ 2023 PROC_UNLOCK(p); 2024 return (NULL); 2025 } 2026 FOREACH_THREAD_IN_PROC(p, tdt) { 2027 em = em_find(tdt); 2028 if (tid == em->em_tid) 2029 return (tdt); 2030 } 2031 PROC_UNLOCK(p); 2032 } 2033 return (NULL); 2034 } 2035 2036 return (tdt); 2037} 2038 2039void 2040linux_to_bsd_waitopts(int options, int *bsdopts) 2041{ 2042 2043 if (options & LINUX_WNOHANG) 2044 *bsdopts |= WNOHANG; 2045 if (options & LINUX_WUNTRACED) 2046 *bsdopts |= WUNTRACED; 2047 if (options & LINUX_WEXITED) 2048 *bsdopts |= WEXITED; 2049 if (options & LINUX_WCONTINUED) 2050 *bsdopts |= WCONTINUED; 2051 if (options & LINUX_WNOWAIT) 2052 *bsdopts |= WNOWAIT; 2053 2054 if (options & __WCLONE) 2055 *bsdopts |= WLINUXCLONE; 2056} 2057