linux_futex.c revision 301426
1/* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3/*- 4 * Copyright (c) 2009-2016 Dmitry Chagin 5 * Copyright (c) 2005 Emmanuel Dreyfus 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Emmanuel Dreyfus 19 * 4. The name of the author may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_futex.c 301426 2016-06-05 06:06:55Z dchagin $"); 38#if 0 39__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 40#endif 41 42#include "opt_compat.h" 43#include "opt_kdtrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/imgact.h> 48#include <sys/kernel.h> 49#include <sys/ktr.h> 50#include <sys/lock.h> 51#include <sys/malloc.h> 52#include <sys/mutex.h> 53#include <sys/priv.h> 54#include <sys/proc.h> 55#include <sys/queue.h> 56#include <sys/sched.h> 57#include <sys/sdt.h> 58#include <sys/umtx.h> 59 60#include <vm/vm_extern.h> 61 62#ifdef COMPAT_LINUX32 63#include <machine/../linux32/linux.h> 64#include <machine/../linux32/linux32_proto.h> 65#else 66#include <machine/../linux/linux.h> 67#include <machine/../linux/linux_proto.h> 68#endif 69#include <compat/linux/linux_dtrace.h> 70#include <compat/linux/linux_emul.h> 71#include <compat/linux/linux_futex.h> 72#include <compat/linux/linux_timer.h> 73#include <compat/linux/linux_util.h> 74 75/* DTrace init */ 76LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 77 78/** 79 * Futex part for the special DTrace module "locks". 80 */ 81LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); 82LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); 83 84/** 85 * Per futex probes. 86 */ 87LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); 88LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); 89 90/** 91 * DTrace probes in this module. 92 */ 93LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", 94 "struct waiting_proc *"); 95LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", 96 "int"); 97LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", 98 "int"); 99LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); 100LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", 101 "uint32_t"); 102LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); 103LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", 104 "int"); 105LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); 106LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); 107LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); 108LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", 109 "struct waiting_proc **", "struct futex **"); 110LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); 111LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); 112LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", 113 "struct waiting_proc **", "struct timespec *"); 114LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", 115 "struct waiting_proc *", "uint32_t *", "uint32_t"); 116LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", 117 "struct waiting_proc *"); 118LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); 119LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", 120 "uint32_t"); 121LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", 122 "struct waiting_proc *", "uint32_t"); 123LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); 124LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); 125LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", 126 "struct futex *", "int"); 127LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); 128LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", 129 "struct waiting_proc *", "uint32_t"); 130LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); 131LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", 132 "struct waiting_proc **", "struct timespec *", "uint32_t"); 133LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); 134LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); 135LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", 136 "int", "uint32_t"); 137LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", 138 "int"); 139LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); 140LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); 141LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); 142LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); 143LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", 144 "struct linux_sys_futex_args *"); 145LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); 146LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); 147LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); 148LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", 149 "uint32_t", "uint32_t"); 150LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, 151 "uint32_t *", "uint32_t", "int", "uint32_t"); 152LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", 153 "uint32_t", "uint32_t"); 154LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", 155 "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); 156LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, 157 "uint32_t", "int"); 158LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", 159 "int", "uint32_t", "uint32_t *", "uint32_t"); 160LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); 161LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); 162LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); 163LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); 164LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); 165LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); 166LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); 167LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); 168LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); 169LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", 170 "struct linux_set_robust_list_args *"); 171LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); 172LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); 173LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", 174 "struct linux_get_robust_list_args *"); 175LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); 176LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); 177LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, 178 "struct linux_emuldata *", "uint32_t *", "unsigned int"); 179LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); 180LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); 181LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, 182 "struct linux_robust_list **", "struct linux_robust_list **", 183 "unsigned int *"); 184LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); 185LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); 186LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", 187 "struct linux_emuldata *"); 188LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); 189LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); 190 191struct futex; 192 193struct waiting_proc { 194 uint32_t wp_flags; 195 struct futex *wp_futex; 196 TAILQ_ENTRY(waiting_proc) wp_list; 197}; 198 199struct futex { 200 struct mtx f_lck; 201 uint32_t *f_uaddr; /* user-supplied value, for debug */ 202 struct umtx_key f_key; 203 uint32_t f_refcount; 204 uint32_t f_bitset; 205 LIST_ENTRY(futex) f_list; 206 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 207}; 208 209struct futex_list futex_list; 210 211#define FUTEX_LOCK(f) mtx_lock(&(f)->f_lck) 212#define FUTEX_LOCKED(f) mtx_owned(&(f)->f_lck) 213#define FUTEX_UNLOCK(f) mtx_unlock(&(f)->f_lck) 214#define FUTEX_INIT(f) do { \ 215 mtx_init(&(f)->f_lck, "ftlk", NULL, \ 216 MTX_DUPOK); \ 217 LIN_SDT_PROBE1(futex, futex, create, \ 218 &(f)->f_lck); \ 219 } while (0) 220#define FUTEX_DESTROY(f) do { \ 221 LIN_SDT_PROBE1(futex, futex, destroy, \ 222 &(f)->f_lck); \ 223 mtx_destroy(&(f)->f_lck); \ 224 } while (0) 225#define FUTEX_ASSERT_LOCKED(f) mtx_assert(&(f)->f_lck, MA_OWNED) 226#define FUTEX_ASSERT_UNLOCKED(f) mtx_assert(&(f)->f_lck, MA_NOTOWNED) 227 228struct mtx futex_mtx; /* protects the futex list */ 229#define FUTEXES_LOCK do { \ 230 mtx_lock(&futex_mtx); \ 231 LIN_SDT_PROBE1(locks, futex_mtx, \ 232 locked, &futex_mtx); \ 233 } while (0) 234#define FUTEXES_UNLOCK do { \ 235 LIN_SDT_PROBE1(locks, futex_mtx, \ 236 unlock, &futex_mtx); \ 237 mtx_unlock(&futex_mtx); \ 238 } while (0) 239 240/* flags for futex_get() */ 241#define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 242#define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 243#define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 244#define FUTEX_SHARED 0x8 /* shared futex */ 245#define FUTEX_DONTLOCK 0x10 /* don't lock futex */ 246 247/* wp_flags */ 248#define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 249 * of futex where thread sleep to wp_list 250 * of another futex. 251 */ 252#define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 253 * wp_list to prevent double wakeup. 254 */ 255 256static void futex_put(struct futex *, struct waiting_proc *); 257static int futex_get0(uint32_t *, struct futex **f, uint32_t); 258static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, 259 uint32_t); 260static int futex_sleep(struct futex *, struct waiting_proc *, struct timespec *); 261static int futex_wake(struct futex *, int, uint32_t); 262static int futex_requeue(struct futex *, int, struct futex *, int); 263static int futex_copyin_timeout(int, struct l_timespec *, int, 264 struct timespec *); 265static int futex_wait(struct futex *, struct waiting_proc *, struct timespec *, 266 uint32_t); 267static void futex_lock(struct futex *); 268static void futex_unlock(struct futex *); 269static int futex_atomic_op(struct thread *, int, uint32_t *); 270static int handle_futex_death(struct linux_emuldata *, uint32_t *, 271 unsigned int); 272static int fetch_robust_entry(struct linux_robust_list **, 273 struct linux_robust_list **, unsigned int *); 274 275/* support.s */ 276int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 277int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 278int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 279int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 280int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 281 282 283static int 284futex_copyin_timeout(int op, struct l_timespec *luts, int clockrt, 285 struct timespec *ts) 286{ 287 struct l_timespec lts; 288 struct timespec kts; 289 int error; 290 291 error = copyin(luts, <s, sizeof(lts)); 292 if (error) 293 return (error); 294 295 error = linux_to_native_timespec(ts, <s); 296 if (error) 297 return (error); 298 if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 299 return (EINVAL); 300 301 if (clockrt) { 302 nanotime(&kts); 303 timespecsub(ts, &kts); 304 } else if (op == LINUX_FUTEX_WAIT_BITSET) { 305 nanouptime(&kts); 306 timespecsub(ts, &kts); 307 } 308 return (error); 309} 310 311static void 312futex_put(struct futex *f, struct waiting_proc *wp) 313{ 314 LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); 315 316 if (wp != NULL) { 317 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 318 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 319 free(wp, M_FUTEX_WP); 320 } 321 322 FUTEXES_LOCK; 323 if (--f->f_refcount == 0) { 324 LIST_REMOVE(f, f_list); 325 FUTEXES_UNLOCK; 326 if (FUTEX_LOCKED(f)) 327 futex_unlock(f); 328 329 LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, 330 f->f_refcount, f->f_key.shared); 331 LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " 332 "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); 333 umtx_key_release(&f->f_key); 334 FUTEX_DESTROY(f); 335 free(f, M_FUTEX); 336 337 LIN_SDT_PROBE0(futex, futex_put, return); 338 return; 339 } 340 341 LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, 342 f->f_key.shared); 343 LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", 344 f->f_uaddr, f->f_refcount, f->f_key.shared); 345 FUTEXES_UNLOCK; 346 if (FUTEX_LOCKED(f)) 347 futex_unlock(f); 348 349 LIN_SDT_PROBE0(futex, futex_put, return); 350} 351 352static int 353futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 354{ 355 struct futex *f, *tmpf; 356 struct umtx_key key; 357 int error; 358 359 LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); 360 361 *newf = tmpf = NULL; 362 363 error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? 364 AUTO_SHARE : THREAD_SHARE, &key); 365 if (error) { 366 LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); 367 LIN_SDT_PROBE1(futex, futex_get0, return, error); 368 return (error); 369 } 370retry: 371 FUTEXES_LOCK; 372 LIST_FOREACH(f, &futex_list, f_list) { 373 if (umtx_key_match(&f->f_key, &key)) { 374 if (tmpf != NULL) { 375 if (FUTEX_LOCKED(tmpf)) 376 futex_unlock(tmpf); 377 FUTEX_DESTROY(tmpf); 378 free(tmpf, M_FUTEX); 379 } 380 if (flags & FUTEX_DONTEXISTS) { 381 FUTEXES_UNLOCK; 382 umtx_key_release(&key); 383 384 LIN_SDT_PROBE1(futex, futex_get0, return, 385 EINVAL); 386 return (EINVAL); 387 } 388 389 /* 390 * Increment refcount of the found futex to 391 * prevent it from deallocation before FUTEX_LOCK() 392 */ 393 ++f->f_refcount; 394 FUTEXES_UNLOCK; 395 umtx_key_release(&key); 396 397 if ((flags & FUTEX_DONTLOCK) == 0) 398 futex_lock(f); 399 *newf = f; 400 LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, 401 f->f_refcount, f->f_key.shared); 402 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", 403 uaddr, f->f_refcount, f->f_key.shared); 404 405 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 406 return (0); 407 } 408 } 409 410 if (flags & FUTEX_DONTCREATE) { 411 FUTEXES_UNLOCK; 412 umtx_key_release(&key); 413 LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); 414 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); 415 416 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 417 return (0); 418 } 419 420 if (tmpf == NULL) { 421 FUTEXES_UNLOCK; 422 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 423 tmpf->f_uaddr = uaddr; 424 tmpf->f_key = key; 425 tmpf->f_refcount = 1; 426 tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; 427 FUTEX_INIT(tmpf); 428 TAILQ_INIT(&tmpf->f_waiting_proc); 429 430 /* 431 * Lock the new futex before an insert into the futex_list 432 * to prevent futex usage by other. 433 */ 434 if ((flags & FUTEX_DONTLOCK) == 0) 435 futex_lock(tmpf); 436 goto retry; 437 } 438 439 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 440 FUTEXES_UNLOCK; 441 442 LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, 443 tmpf->f_key.shared); 444 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", 445 uaddr, tmpf->f_refcount, tmpf->f_key.shared); 446 *newf = tmpf; 447 448 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 449 return (0); 450} 451 452static int 453futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 454 uint32_t flags) 455{ 456 int error; 457 458 LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); 459 460 if (flags & FUTEX_CREATE_WP) { 461 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 462 (*wp)->wp_flags = 0; 463 } 464 error = futex_get0(uaddr, f, flags); 465 if (error) { 466 LIN_SDT_PROBE0(futex, futex_get, error); 467 468 if (flags & FUTEX_CREATE_WP) 469 free(*wp, M_FUTEX_WP); 470 471 LIN_SDT_PROBE1(futex, futex_get, return, error); 472 return (error); 473 } 474 if (flags & FUTEX_CREATE_WP) { 475 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 476 (*wp)->wp_futex = *f; 477 } 478 479 LIN_SDT_PROBE1(futex, futex_get, return, error); 480 return (error); 481} 482 483static inline void 484futex_lock(struct futex *f) 485{ 486 487 LINUX_CTR3(sys_futex, "futex_lock uaddr %p ref %d shared %d", 488 f->f_uaddr, f->f_refcount, f->f_key.shared); 489 FUTEX_ASSERT_UNLOCKED(f); 490 FUTEX_LOCK(f); 491} 492 493static inline void 494futex_unlock(struct futex *f) 495{ 496 497 LINUX_CTR3(sys_futex, "futex_unlock uaddr %p ref %d shared %d", 498 f->f_uaddr, f->f_refcount, f->f_key.shared); 499 FUTEX_ASSERT_LOCKED(f); 500 FUTEX_UNLOCK(f); 501} 502 503static int 504futex_sleep(struct futex *f, struct waiting_proc *wp, struct timespec *ts) 505{ 506 struct timespec uts; 507 sbintime_t sbt, prec, tmp; 508 time_t over; 509 int error; 510 511 FUTEX_ASSERT_LOCKED(f); 512 if (ts != NULL) { 513 uts = *ts; 514 if (uts.tv_sec > INT32_MAX / 2) { 515 over = uts.tv_sec - INT32_MAX / 2; 516 uts.tv_sec -= over; 517 } 518 tmp = tstosbt(uts); 519 if (TIMESEL(&sbt, tmp)) 520 sbt += tc_tick_sbt; 521 sbt += tmp; 522 prec = tmp; 523 prec >>= tc_precexp; 524 } else { 525 sbt = 0; 526 prec = 0; 527 } 528 LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, sbt); 529 LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", 530 f->f_uaddr, wp, sbt, f->f_refcount); 531 532 error = msleep_sbt(wp, &f->f_lck, PCATCH, "futex", sbt, prec, C_ABSOLUTE); 533 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 534 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 535 536 if (error) { 537 LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, 538 f->f_uaddr, wp, wp->wp_futex->f_uaddr, 539 wp->wp_futex->f_refcount); 540 } 541 542 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" 543 " %p requeued uaddr %p ref %d", 544 error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, 545 wp->wp_futex->f_refcount); 546 futex_put(f, NULL); 547 f = wp->wp_futex; 548 futex_lock(f); 549 } else { 550 if (error) { 551 LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, 552 f->f_uaddr, wp); 553 } 554 LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", 555 error, f->f_uaddr, wp); 556 } 557 558 futex_put(f, wp); 559 560 LIN_SDT_PROBE1(futex, futex_sleep, return, error); 561 return (error); 562} 563 564static int 565futex_wake(struct futex *f, int n, uint32_t bitset) 566{ 567 struct waiting_proc *wp, *wpt; 568 int count = 0; 569 570 LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); 571 572 if (bitset == 0) { 573 LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); 574 return (EINVAL); 575 } 576 577 FUTEX_ASSERT_LOCKED(f); 578 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 579 LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, 580 f->f_refcount); 581 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", 582 f->f_uaddr, wp, f->f_refcount); 583 /* 584 * Unless we find a matching bit in 585 * the bitset, continue searching. 586 */ 587 if (!(wp->wp_futex->f_bitset & bitset)) 588 continue; 589 590 wp->wp_flags |= FUTEX_WP_REMOVED; 591 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 592 LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); 593 wakeup_one(wp); 594 if (++count == n) 595 break; 596 } 597 598 LIN_SDT_PROBE1(futex, futex_wake, return, count); 599 return (count); 600} 601 602static int 603futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 604{ 605 struct waiting_proc *wp, *wpt; 606 int count = 0; 607 608 LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); 609 610 FUTEX_ASSERT_LOCKED(f); 611 FUTEX_ASSERT_LOCKED(f2); 612 613 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 614 if (++count <= n) { 615 LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", 616 f->f_uaddr, wp); 617 wp->wp_flags |= FUTEX_WP_REMOVED; 618 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 619 LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); 620 wakeup_one(wp); 621 } else { 622 LIN_SDT_PROBE3(futex, futex_requeue, requeue, 623 f->f_uaddr, wp, f2->f_uaddr); 624 LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", 625 f->f_uaddr, wp, f2->f_uaddr); 626 wp->wp_flags |= FUTEX_WP_REQUEUED; 627 /* Move wp to wp_list of f2 futex */ 628 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 629 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 630 631 /* 632 * Thread which sleeps on wp after waking should 633 * acquire f2 lock, so increment refcount of f2 to 634 * prevent it from premature deallocation. 635 */ 636 wp->wp_futex = f2; 637 FUTEXES_LOCK; 638 ++f2->f_refcount; 639 FUTEXES_UNLOCK; 640 if (count - n >= n2) 641 break; 642 } 643 } 644 645 LIN_SDT_PROBE1(futex, futex_requeue, return, count); 646 return (count); 647} 648 649static int 650futex_wait(struct futex *f, struct waiting_proc *wp, struct timespec *ts, 651 uint32_t bitset) 652{ 653 int error; 654 655 LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); 656 657 if (bitset == 0) { 658 LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); 659 return (EINVAL); 660 } 661 662 f->f_bitset = bitset; 663 error = futex_sleep(f, wp, ts); 664 if (error) 665 LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); 666 if (error == EWOULDBLOCK) 667 error = ETIMEDOUT; 668 669 LIN_SDT_PROBE1(futex, futex_wait, return, error); 670 return (error); 671} 672 673static int 674futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 675{ 676 int op = (encoded_op >> 28) & 7; 677 int cmp = (encoded_op >> 24) & 15; 678 int oparg = (encoded_op << 8) >> 20; 679 int cmparg = (encoded_op << 20) >> 20; 680 int oldval = 0, ret; 681 682 LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); 683 684 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 685 oparg = 1 << oparg; 686 687 LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, 688 cmparg); 689 690 /* XXX: Linux verifies access here and returns EFAULT */ 691 LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); 692 693 switch (op) { 694 case FUTEX_OP_SET: 695 ret = futex_xchgl(oparg, uaddr, &oldval); 696 break; 697 case FUTEX_OP_ADD: 698 ret = futex_addl(oparg, uaddr, &oldval); 699 break; 700 case FUTEX_OP_OR: 701 ret = futex_orl(oparg, uaddr, &oldval); 702 break; 703 case FUTEX_OP_ANDN: 704 ret = futex_andl(~oparg, uaddr, &oldval); 705 break; 706 case FUTEX_OP_XOR: 707 ret = futex_xorl(oparg, uaddr, &oldval); 708 break; 709 default: 710 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); 711 ret = -ENOSYS; 712 break; 713 } 714 715 if (ret) { 716 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 717 return (ret); 718 } 719 720 switch (cmp) { 721 case FUTEX_OP_CMP_EQ: 722 ret = (oldval == cmparg); 723 break; 724 case FUTEX_OP_CMP_NE: 725 ret = (oldval != cmparg); 726 break; 727 case FUTEX_OP_CMP_LT: 728 ret = (oldval < cmparg); 729 break; 730 case FUTEX_OP_CMP_GE: 731 ret = (oldval >= cmparg); 732 break; 733 case FUTEX_OP_CMP_LE: 734 ret = (oldval <= cmparg); 735 break; 736 case FUTEX_OP_CMP_GT: 737 ret = (oldval > cmparg); 738 break; 739 default: 740 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); 741 ret = -ENOSYS; 742 } 743 744 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 745 return (ret); 746} 747 748int 749linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 750{ 751 int clockrt, nrwake, op_ret, ret; 752 struct linux_pemuldata *pem; 753 struct waiting_proc *wp; 754 struct futex *f, *f2; 755 struct timespec uts, *ts; 756 int error, save; 757 uint32_t flags, val; 758 759 LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); 760 761 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 762 flags = 0; 763 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 764 } else 765 flags = FUTEX_SHARED; 766 767 /* 768 * Currently support for switching between CLOCK_MONOTONIC and 769 * CLOCK_REALTIME is not present. However Linux forbids the use of 770 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and 771 * FUTEX_WAIT_REQUEUE_PI. 772 */ 773 clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 774 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 775 if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && 776 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { 777 LIN_SDT_PROBE0(futex, linux_sys_futex, 778 unimplemented_clockswitch); 779 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 780 return (ENOSYS); 781 } 782 783 error = 0; 784 f = f2 = NULL; 785 786 switch (args->op) { 787 case LINUX_FUTEX_WAIT: 788 args->val3 = FUTEX_BITSET_MATCH_ANY; 789 /* FALLTHROUGH */ 790 791 case LINUX_FUTEX_WAIT_BITSET: 792 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, 793 args->val, args->val3); 794 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 795 args->uaddr, args->val, args->val3); 796 797 if (args->timeout != NULL) { 798 error = futex_copyin_timeout(args->op, args->timeout, 799 clockrt, &uts); 800 if (error) { 801 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 802 error); 803 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 804 return (error); 805 } 806 ts = &uts; 807 } else 808 ts = NULL; 809 810retry0: 811 error = futex_get(args->uaddr, &wp, &f, 812 flags | FUTEX_CREATE_WP); 813 if (error) { 814 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 815 return (error); 816 } 817 818 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 819 if (error) { 820 futex_put(f, wp); 821 error = copyin(args->uaddr, &val, sizeof(val)); 822 if (error == 0) 823 goto retry0; 824 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 825 error); 826 LINUX_CTR1(sys_futex, "WAIT copyin failed %d", 827 error); 828 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 829 return (error); 830 } 831 if (val != args->val) { 832 LIN_SDT_PROBE4(futex, linux_sys_futex, 833 debug_wait_value_neq, args->uaddr, args->val, val, 834 args->val3); 835 LINUX_CTR3(sys_futex, 836 "WAIT uaddr %p val 0x%x != uval 0x%x", 837 args->uaddr, args->val, val); 838 futex_put(f, wp); 839 840 LIN_SDT_PROBE1(futex, linux_sys_futex, return, 841 EWOULDBLOCK); 842 return (EWOULDBLOCK); 843 } 844 845 error = futex_wait(f, wp, ts, args->val3); 846 break; 847 848 case LINUX_FUTEX_WAKE: 849 args->val3 = FUTEX_BITSET_MATCH_ANY; 850 /* FALLTHROUGH */ 851 852 case LINUX_FUTEX_WAKE_BITSET: 853 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, 854 args->val, args->val3); 855 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 856 args->uaddr, args->val, args->val3); 857 858 error = futex_get(args->uaddr, NULL, &f, 859 flags | FUTEX_DONTCREATE); 860 if (error) { 861 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 862 return (error); 863 } 864 865 if (f == NULL) { 866 td->td_retval[0] = 0; 867 868 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 869 return (error); 870 } 871 td->td_retval[0] = futex_wake(f, args->val, args->val3); 872 futex_put(f, NULL); 873 break; 874 875 case LINUX_FUTEX_CMP_REQUEUE: 876 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, 877 args->uaddr, args->val, args->val3, args->uaddr2, 878 args->timeout); 879 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 880 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 881 args->uaddr, args->val, args->val3, args->uaddr2, 882 args->timeout); 883 884 /* 885 * Linux allows this, we would not, it is an incorrect 886 * usage of declared ABI, so return EINVAL. 887 */ 888 if (args->uaddr == args->uaddr2) { 889 LIN_SDT_PROBE0(futex, linux_sys_futex, 890 invalid_cmp_requeue_use); 891 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 892 return (EINVAL); 893 } 894 895retry1: 896 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 897 if (error) { 898 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 899 return (error); 900 } 901 902 /* 903 * To avoid deadlocks return EINVAL if second futex 904 * exists at this time. 905 * 906 * Glibc fall back to FUTEX_WAKE in case of any error 907 * returned by FUTEX_CMP_REQUEUE. 908 */ 909 error = futex_get(args->uaddr2, NULL, &f2, 910 flags | FUTEX_DONTEXISTS | FUTEX_DONTLOCK); 911 if (error) { 912 futex_put(f, NULL); 913 914 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 915 return (error); 916 } 917 futex_lock(f); 918 futex_lock(f2); 919 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 920 if (error) { 921 futex_put(f2, NULL); 922 futex_put(f, NULL); 923 error = copyin(args->uaddr, &val, sizeof(val)); 924 if (error == 0) 925 goto retry1; 926 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 927 error); 928 LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", 929 error); 930 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 931 return (error); 932 } 933 if (val != args->val3) { 934 LIN_SDT_PROBE2(futex, linux_sys_futex, 935 debug_cmp_requeue_value_neq, args->val, val); 936 LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", 937 args->val, val); 938 futex_put(f2, NULL); 939 futex_put(f, NULL); 940 941 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); 942 return (EAGAIN); 943 } 944 945 nrwake = (int)(unsigned long)args->timeout; 946 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 947 futex_put(f2, NULL); 948 futex_put(f, NULL); 949 break; 950 951 case LINUX_FUTEX_WAKE_OP: 952 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, 953 args->uaddr, args->op, args->val, args->uaddr2, args->val3); 954 LINUX_CTR5(sys_futex, "WAKE_OP " 955 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 956 args->uaddr, args->val, args->uaddr2, args->val3, 957 args->timeout); 958 959retry2: 960 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 961 if (error) { 962 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 963 return (error); 964 } 965 966 if (args->uaddr != args->uaddr2) 967 error = futex_get(args->uaddr2, NULL, &f2, 968 flags | FUTEX_DONTLOCK); 969 if (error) { 970 futex_put(f, NULL); 971 972 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 973 return (error); 974 } 975 futex_lock(f); 976 futex_lock(f2); 977 978 /* 979 * This function returns positive number as results and 980 * negative as errors 981 */ 982 save = vm_fault_disable_pagefaults(); 983 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 984 vm_fault_enable_pagefaults(save); 985 986 LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", 987 args->uaddr, op_ret); 988 989 if (op_ret < 0) { 990 if (f2 != NULL) 991 futex_put(f2, NULL); 992 futex_put(f, NULL); 993 error = copyin(args->uaddr2, &val, sizeof(val)); 994 if (error == 0) 995 goto retry2; 996 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 997 return (error); 998 } 999 1000 ret = futex_wake(f, args->val, args->val3); 1001 1002 if (op_ret > 0) { 1003 op_ret = 0; 1004 nrwake = (int)(unsigned long)args->timeout; 1005 1006 if (f2 != NULL) 1007 op_ret += futex_wake(f2, nrwake, args->val3); 1008 else 1009 op_ret += futex_wake(f, nrwake, args->val3); 1010 ret += op_ret; 1011 1012 } 1013 if (f2 != NULL) 1014 futex_put(f2, NULL); 1015 futex_put(f, NULL); 1016 td->td_retval[0] = ret; 1017 break; 1018 1019 case LINUX_FUTEX_LOCK_PI: 1020 /* not yet implemented */ 1021 pem = pem_find(td->td_proc); 1022 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1023 linux_msg(td, 1024 "linux_sys_futex: " 1025 "unsupported futex_pi op\n"); 1026 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1027 LIN_SDT_PROBE0(futex, linux_sys_futex, 1028 unimplemented_lock_pi); 1029 } 1030 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1031 return (ENOSYS); 1032 1033 case LINUX_FUTEX_UNLOCK_PI: 1034 /* not yet implemented */ 1035 pem = pem_find(td->td_proc); 1036 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1037 linux_msg(td, 1038 "linux_sys_futex: " 1039 "unsupported futex_pi op\n"); 1040 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1041 LIN_SDT_PROBE0(futex, linux_sys_futex, 1042 unimplemented_unlock_pi); 1043 } 1044 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1045 return (ENOSYS); 1046 1047 case LINUX_FUTEX_TRYLOCK_PI: 1048 /* not yet implemented */ 1049 pem = pem_find(td->td_proc); 1050 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1051 linux_msg(td, 1052 "linux_sys_futex: " 1053 "unsupported futex_pi op\n"); 1054 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1055 LIN_SDT_PROBE0(futex, linux_sys_futex, 1056 unimplemented_trylock_pi); 1057 } 1058 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1059 return (ENOSYS); 1060 1061 case LINUX_FUTEX_REQUEUE: 1062 /* 1063 * Glibc does not use this operation since version 2.3.3, 1064 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 1065 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 1066 * FUTEX_REQUEUE returned EINVAL. 1067 */ 1068 pem = pem_find(td->td_proc); 1069 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 1070 linux_msg(td, 1071 "linux_sys_futex: " 1072 "unsupported futex_requeue op\n"); 1073 pem->flags |= LINUX_XDEPR_REQUEUEOP; 1074 LIN_SDT_PROBE0(futex, linux_sys_futex, 1075 deprecated_requeue); 1076 } 1077 1078 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 1079 return (EINVAL); 1080 1081 case LINUX_FUTEX_WAIT_REQUEUE_PI: 1082 /* not yet implemented */ 1083 pem = pem_find(td->td_proc); 1084 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1085 linux_msg(td, 1086 "linux_sys_futex: " 1087 "unsupported futex_pi op\n"); 1088 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1089 LIN_SDT_PROBE0(futex, linux_sys_futex, 1090 unimplemented_wait_requeue_pi); 1091 } 1092 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1093 return (ENOSYS); 1094 1095 case LINUX_FUTEX_CMP_REQUEUE_PI: 1096 /* not yet implemented */ 1097 pem = pem_find(td->td_proc); 1098 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1099 linux_msg(td, 1100 "linux_sys_futex: " 1101 "unsupported futex_pi op\n"); 1102 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1103 LIN_SDT_PROBE0(futex, linux_sys_futex, 1104 unimplemented_cmp_requeue_pi); 1105 } 1106 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1107 return (ENOSYS); 1108 1109 default: 1110 linux_msg(td, 1111 "linux_sys_futex: unknown op %d\n", args->op); 1112 LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, 1113 args->op); 1114 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1115 return (ENOSYS); 1116 } 1117 1118 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 1119 return (error); 1120} 1121 1122int 1123linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 1124{ 1125 struct linux_emuldata *em; 1126 1127 LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); 1128 1129 if (args->len != sizeof(struct linux_robust_list_head)) { 1130 LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); 1131 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); 1132 return (EINVAL); 1133 } 1134 1135 em = em_find(td); 1136 em->robust_futexes = args->head; 1137 1138 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); 1139 return (0); 1140} 1141 1142int 1143linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 1144{ 1145 struct linux_emuldata *em; 1146 struct linux_robust_list_head *head; 1147 l_size_t len = sizeof(struct linux_robust_list_head); 1148 struct thread *td2; 1149 int error = 0; 1150 1151 LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); 1152 1153 if (!args->pid) { 1154 em = em_find(td); 1155 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1156 head = em->robust_futexes; 1157 } else { 1158 td2 = tdfind(args->pid, -1); 1159 if (td2 == NULL) { 1160 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1161 ESRCH); 1162 return (ESRCH); 1163 } 1164 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 1165 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1166 EPERM); 1167 PROC_UNLOCK(td2->td_proc); 1168 return (EPERM); 1169 } 1170 1171 em = em_find(td2); 1172 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1173 /* XXX: ptrace? */ 1174 if (priv_check(td, PRIV_CRED_SETUID) || 1175 priv_check(td, PRIV_CRED_SETEUID) || 1176 p_candebug(td, td2->td_proc)) { 1177 PROC_UNLOCK(td2->td_proc); 1178 1179 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1180 EPERM); 1181 return (EPERM); 1182 } 1183 head = em->robust_futexes; 1184 1185 PROC_UNLOCK(td2->td_proc); 1186 } 1187 1188 error = copyout(&len, args->len, sizeof(l_size_t)); 1189 if (error) { 1190 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1191 error); 1192 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); 1193 return (EFAULT); 1194 } 1195 1196 error = copyout(&head, args->head, sizeof(head)); 1197 if (error) { 1198 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1199 error); 1200 } 1201 1202 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); 1203 return (error); 1204} 1205 1206static int 1207handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, 1208 unsigned int pi) 1209{ 1210 uint32_t uval, nval, mval; 1211 struct futex *f; 1212 int error; 1213 1214 LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); 1215 1216retry: 1217 error = copyin(uaddr, &uval, 4); 1218 if (error) { 1219 LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); 1220 LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); 1221 return (EFAULT); 1222 } 1223 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 1224 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1225 nval = casuword32(uaddr, uval, mval); 1226 1227 if (nval == -1) { 1228 LIN_SDT_PROBE1(futex, handle_futex_death, return, 1229 EFAULT); 1230 return (EFAULT); 1231 } 1232 1233 if (nval != uval) 1234 goto retry; 1235 1236 if (!pi && (uval & FUTEX_WAITERS)) { 1237 error = futex_get(uaddr, NULL, &f, 1238 FUTEX_DONTCREATE | FUTEX_SHARED); 1239 if (error) { 1240 LIN_SDT_PROBE1(futex, handle_futex_death, 1241 return, error); 1242 return (error); 1243 } 1244 if (f != NULL) { 1245 futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); 1246 futex_put(f, NULL); 1247 } 1248 } 1249 } 1250 1251 LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); 1252 return (0); 1253} 1254 1255static int 1256fetch_robust_entry(struct linux_robust_list **entry, 1257 struct linux_robust_list **head, unsigned int *pi) 1258{ 1259 l_ulong uentry; 1260 int error; 1261 1262 LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); 1263 1264 error = copyin((const void *)head, &uentry, sizeof(l_ulong)); 1265 if (error) { 1266 LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); 1267 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); 1268 return (EFAULT); 1269 } 1270 1271 *entry = (void *)(uentry & ~1UL); 1272 *pi = uentry & 1; 1273 1274 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); 1275 return (0); 1276} 1277 1278/* This walks the list of robust futexes releasing them. */ 1279void 1280release_futexes(struct thread *td, struct linux_emuldata *em) 1281{ 1282 struct linux_robust_list_head *head = NULL; 1283 struct linux_robust_list *entry, *next_entry, *pending; 1284 unsigned int limit = 2048, pi, next_pi, pip; 1285 l_long futex_offset; 1286 int rc, error; 1287 1288 LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); 1289 1290 head = em->robust_futexes; 1291 1292 if (head == NULL) { 1293 LIN_SDT_PROBE0(futex, release_futexes, return); 1294 return; 1295 } 1296 1297 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { 1298 LIN_SDT_PROBE0(futex, release_futexes, return); 1299 return; 1300 } 1301 1302 error = copyin(&head->futex_offset, &futex_offset, 1303 sizeof(futex_offset)); 1304 if (error) { 1305 LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); 1306 LIN_SDT_PROBE0(futex, release_futexes, return); 1307 return; 1308 } 1309 1310 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { 1311 LIN_SDT_PROBE0(futex, release_futexes, return); 1312 return; 1313 } 1314 1315 while (entry != &head->list) { 1316 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 1317 1318 if (entry != pending) 1319 if (handle_futex_death(em, 1320 (uint32_t *)((caddr_t)entry + futex_offset), pi)) { 1321 LIN_SDT_PROBE0(futex, release_futexes, return); 1322 return; 1323 } 1324 if (rc) { 1325 LIN_SDT_PROBE0(futex, release_futexes, return); 1326 return; 1327 } 1328 1329 entry = next_entry; 1330 pi = next_pi; 1331 1332 if (!--limit) 1333 break; 1334 1335 sched_relinquish(curthread); 1336 } 1337 1338 if (pending) 1339 handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); 1340 1341 LIN_SDT_PROBE0(futex, release_futexes, return); 1342} 1343